Rodents Carcinogenicity (Combined Model) Distribution Dashboard

DrugBank database
MolPort database
Python script number 76 to build the frequency distribution graph of the Rodents_carcinogenicity parameter on DrugBank molecules.
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from matplotlib.patches import Patch
import numpy as np

# 1. Original Data (Rodents Carcinogenicity)
bins_rc = np.array([0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95])
freq_rc = np.array([0.04, 1.53, 4.12, 7.30, 12.10, 13.84, 11.46, 11.25, 8.58, 7.52, 6.03, 3.95, 3.31, 3.44, 2.68, 1.87, 0.64, 0.30, 0.04])

# Exact Gaussian Parameters
amp = 12.35
mean = 0.3567
std = 0.1570

def gauss_exact(x, a, mu, sigma):
    return a * np.exp(-((x - mu)**2) / (2 * sigma**2))

x_fit = np.linspace(0.0, 1.0, 500)
y_fit = gauss_exact(x_fit, amp, mean, std)

# 2. Function for Colors (Adapted to scale 0-1)
def get_colors(bins):
    return ['#008000' if b < 0.4 else '#FFD700' if b <= 0.7 else '#B22222' for b in bins]

colors_hex = get_colors(bins_rc)

# Apply separate transparencies (Fill at 50%, Border at 90%)
face_colors = [mcolors.to_rgba(c, alpha=0.60) for c in colors_hex]
edge_colors = [mcolors.to_rgba(c, alpha=0.90) for c in colors_hex]

# 3. Create the chart
plt.figure(figsize=(7, 6))

# Draw bars and trend line
plt.bar(bins_rc, freq_rc, width=0.04, color=face_colors, edgecolor=edge_colors, linewidth=1.5, zorder=2)
plt.plot(x_fit, y_fit, color='orange', linewidth=2.5, linestyle='-', alpha=0.8, zorder=3)

# 4. Tags and Titles
plt.xlabel('Carcinogenicity Potential Index (Score 0-1)', fontsize=12)
plt.ylabel('% Frequency', fontsize=12)
plt.title('Rodents Carcinogenicity (Combined Model)', fontsize=14)

# 5. Legend
legend_elements = [
    Patch(facecolor=mcolors.to_rgba('#008000', 0.5), edgecolor='#008000', label='Low Probability (< 0.4)'),
    Patch(facecolor=mcolors.to_rgba('#FFD700', 0.5), edgecolor='#FFD700', label='Moderate Probability (0.4 - 0.7)'),
    Patch(facecolor=mcolors.to_rgba('#B22222', 0.5), edgecolor='#B22222', label='High Probability (> 0.7)'),
    plt.Line2D([0], [0], color='orange', lw=2.5, alpha=0.8, label=f'Fit (Mean={mean}, SD={std})')
]
plt.legend(handles=legend_elements, loc='upper right', framealpha=0.95, fontsize=10)

plt.grid(axis='y', linestyle=':', alpha=0.7, zorder=0)
plt.xlim(0.0, 1.0)
plt.ylim(0, 15)
plt.tight_layout()

plt.show()