nHet (Number of Heteroatoms) Distribution Dashboard

DrugBank database
MolPort database
Python script number 4 to build the frequency distribution graph of the nHet parameter on DrugBank molecules.
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
from matplotlib.lines import Line2D
import numpy as np

# 1. nHet Data
bin_centers = list(range(0, 26))
frequencies = [
    0.197, 5.472, 7.638, 10.787, 11.299, 11.339, 9.370, 8.307, 7.244, 6.260, 
    5.591, 3.740, 2.717, 2.126, 2.520, 1.457, 1.181, 0.512, 0.276, 0.591, 
    0.276, 0.118, 0.157, 0.315, 0.197, 0.315
]

# 2. Gaussian Fit Parameters
amplitude = 10.65
mean = 5.331
sd = 3.756

# Generate smooth X data for the curve
x_smooth = np.linspace(-1, 26, 300)

# Calculate Y using the Gaussian equation
y_smooth = amplitude * np.exp(-0.5 * ((x_smooth - mean) / sd)**2)

# 3. Define colors (nHet Traffic light system)
colors = []
for x in bin_centers:
    # Optimal Range: 2 to 9 heteroatoms
    if 2 <= x <= 9:
        colors.append('green')
    # Rango Precaución: 1 o 10-15
    elif (x == 1) or (10 <= x <= 15):
        colors.append('gold')
    # Rango Riesgo: 0 o > 15 
    else:
        colors.append('firebrick')

# 4. Create the plot
plt.figure(figsize=(7, 6))

# A. Draw Bars
plt.bar(bin_centers, frequencies, width=0.8, color=colors, edgecolor='black', alpha=0.7, label='Data Frequency')

# B. Draw Trend Line
plt.plot(x_smooth, y_smooth, color='darkorange', linewidth=2.5, label='Gaussian Fit')

# 5. Labels and Titles
plt.xlabel('nHet (Number of Heteroatoms)', fontsize=12)
plt.ylabel('% Frequency', fontsize=12)
plt.title('nHet Distribution', fontsize=14)

# Adjust X axis
plt.xticks(np.arange(0, 27, 1), fontsize=9) # Show all numbers from 0 to 26
plt.xlim(-1, 26)

# 6. Custom Legend
legend_elements = [
    Line2D([0], [0], color='darkorange', lw=2, label=f'Fit (Mean={mean}, SD={sd})'),
    Patch(facecolor='green', edgecolor='black', alpha=0.7, label='Optimal (2 - 9 Heteroatoms)'),
    Patch(facecolor='gold', edgecolor='black', alpha=0.7, label='Caution (1, 10-15 Heteroatoms)'),
    Patch(facecolor='firebrick', edgecolor='black', alpha=0.7, label='Risk / Atypical (0, >15 Heteroatoms)')
]

plt.legend(handles=legend_elements, loc='upper right')
plt.grid(axis='y', linestyle='--', alpha=0.5)
plt.tight_layout()

plt.show()