Rat Carcinogenic Potency Distribution Comparison Dashboard

DrugBank database
MolPort database
Python script number 75 to build the frequency distribution graph of the Rat_carcinogenicity parameter on DrugBank molecules.
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
import numpy as np

# 1. Original Data (Rat Carcinogenicity - Quantitative TD50)
bin_centers = np.array([-2.0, -1.5, -1.0, -0.5, 0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5])
frequencies = np.array([0.04, 0.21, 1.44, 4.88, 9.26, 13.42, 18.39, 17.62, 13.25, 8.24, 6.75, 3.23, 1.61, 0.85, 0.59, 0.21])

# 2. Exact Parameters
amp = 17.73
mean = 1.270
std_dev = 1.107

def gauss_exact(x, a, mu, sigma):
    return a * np.exp(-((x - mu)**2) / (2 * sigma**2))

x_fit = np.linspace(-2.5, 6.0, 500)
y_fit = gauss_exact(x_fit, amp, mean, std_dev)

# 3. Hexadecimal Colors and Biological Thresholds
color_safe = '#008000'
color_warn = '#FFD700'
color_danger = '#B22222'

colors = []
for val in bin_centers:
    if val < 1.0:
        colors.append(color_safe)
    elif val <= 3.0:
        colors.append(color_warn)
    else:
        colors.append(color_danger)

# 4. Create the graph
plt.figure(figsize=(7, 6))

# Bars
plt.bar(bin_centers, frequencies, width=0.4, color=colors, edgecolor='black', alpha=0.7, label='Observed Data')

# Trend Line (Gaussian)
plt.plot(x_fit, y_fit, color='orange', linewidth=3, 
         label=f'Gaussian Fit\n$\\mu={mean:.3f}, \\sigma={std_dev:.3f}$')

# 5. Tags and Titles
plt.xlabel(r'Rat Carcinogenic Potency ($-\log_{10} TD_{50}$)', fontsize=12)
plt.ylabel('% Frequency', fontsize=12)
plt.title('Rat Carcinogenic Potency Distribution', fontsize=14)

# 6. Legend
legend_elements = [
    Patch(facecolor=color_safe, edgecolor='black', label='Very Low Risk ($< 1.0$)'),
    Patch(facecolor=color_warn, edgecolor='black', label='Low-Moderate Risk ($1.0 - 3.0$)'),
    Patch(facecolor=color_danger, edgecolor='black', label='High Risk Elimination ($> 3.0$)'),
    plt.Line2D([0], [0], color='orange', lw=3, label=f'Fit (Mean={mean}, SD={std_dev })')
]

plt.legend(handles=legend_elements, loc='upper right', framealpha=0.95, fontsize=10)

plt.grid(axis='y', linestyle='--', alpha=0.5)
plt.tight_layout()

plt.show()