Rat Carcinogenicity Distribution Comparison Dashboard

DrugBank database
MolPort database
Python script number 74 to build the frequency distribution graph of the Rat_carcinogenicity_c parameter on DrugBank molecules.
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
import numpy as np

# 1. Original Data (Rat Carcinogenicity Categorical)
bin_centers = np.array([0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9])

frequencies = np.array([0.47, 4.84, 10.57, 13.55, 13.55, 10.32, 10.96, 8.92, 7.86, 6.50, 4.20, 2.93, 2.89, 1.61, 0.42, 0.30, 0.13])

# 2. Exact Parameters (User Defined)
amp = 12.51
mean = 0.3438
std_dev = 0.1650

# Manual Gaussian function
def gauss_exact(x, a, mu, sigma):
    return a * np.exp(-((x - mu)**2) / (2 * sigma**2))

# Generate data for the fit line
x_fit = np.linspace(0.05, 0.95, 500)
y_fit = gauss_exact(x_fit, amp, mean, std_dev)

# 3. Hexadecimal Colors (Statistical Thresholds)
color_safe = '#008000'
color_warn = '#FFD700'
color_danger = '#B22222'

cutoff_safe = mean + std_dev
cutoff_danger = mean + 3 * std_dev

colors = []
for val in bin_centers:
    if val <= cutoff_safe:
        colors.append(color_safe)
    elif val <= cutoff_danger:
        colors.append(color_warn)
    else:
        colors.append(color_danger)

# 4. Create the graph
plt.figure(figsize=(7, 6))

# Bars
plt.bar(bin_centers, frequencies, width=0.04, color=colors, edgecolor='black', alpha=0.7, label='Observed Data')

# Trend Line (Gaussian)
plt.plot(x_fit, y_fit, color='orange', linewidth=3, 
         label=f'Gaussian Fit\n$\\mu={mean:.4f}, \\sigma={std_dev:.4f}$')

# 5. Tags and Titles
plt.xlabel('Probability of Rat Carcinogenicity', fontsize=12)
plt.ylabel('% Frequency', fontsize=12)
plt.title('Rat Carcinogenicity Distribution', fontsize=14)

# 6. Legend
legend_elements = [
    Patch(facecolor=color_safe, edgecolor='black', label=f'Normal Zone ($\\leq {cutoff_safe:.2f}$)'),
    Patch(facecolor=color_warn, edgecolor='black', label=f'Warning Zone ($\\leq {cutoff_danger:.2f}$)'),
    Patch(facecolor=color_danger, edgecolor='black', label=f'Danger Zone ($> {cutoff_danger:.2f}$)'),
    plt.Line2D([0], [0], color='orange', lw=3, label=f'Fit (Mean={mean}, SD={std_dev})')
]

plt.legend(handles=legend_elements, loc='upper right', framealpha=0.95, fontsize=10)

plt.grid(axis='y', linestyle='--', alpha=0.5)
plt.tight_layout()

plt.show()