pKa Distribution Dashboard

DrugBank database
MolPort database
Python script number 14 to build the frequency distribution graph of the pKa parameter on DrugBank molecules.
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
from matplotlib.lines import Line2D
import numpy as np

# 1. pKa data
bin_centers = [-2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
counts = [0.038, 0.154, 0.922, 1.384, 3.459, 6.726, 13.528, 17.487, 13.374, 13.182, 12.875, 9.877, 4.035, 1.806, 0.692, 0.346, 0.115]

# 2. Gaussian Fit Parameters
amplitude = 16.08
mean = 6.017
sd = 2.522

# Generate smooth X data for the curve
x_smooth = np.linspace(-7, 16, 300)

# Calculate Y using the Gaussian equation
y_smooth = amplitude * np.exp(-0.5 * ((x_smooth - mean) / sd)**2)

# 3. Define colors (Traffic light pKa - Physiological)
colors = []
for x in bin_centers:
    # Optimal Range: 3 to 8 (Close to physiological pH 7.4 and intestinal pH)
    if 3 <= x <= 8:
        colors.append('green')
    # Caution Range: 0-3 (Strong Acids) or 8-11 (Strong Bases)
    elif (0 <= x < 3) or (8 < x <= 11):
        colors.append('gold')
    # Risk Range: Extremes <0 or >11
    else:
        colors.append('firebrick')

# 4. Create the chart
plt.figure(figsize=(7, 6))

# A. Draw Bars
plt.bar(bin_centers, counts, width=0.8, color=colors, edgecolor='black', alpha=0.7, label='Drug Count')

# B. Draw Trend Line
plt.plot(x_smooth, y_smooth, color='darkorange', linewidth=2.5, label='Gaussian Fit')

# 5. Tags and Titles
plt.xlabel('pKa (Logarithmic Acid Dissociation Constant)', fontsize=12)
plt.ylabel('Number of Drugs (Count)', fontsize=12)
plt.title('pKa Distribution', fontsize=14)

# Adjust X axis
plt.xticks(np.arange(-6, 16, 1))
plt.xlim(-1, 14)

# 6. Custom Legend
legend_elements = [
    Line2D([0], [0], color='darkorange', lw=2, label=f'Fit (Mean={mean}, SD={sd})'),
    Patch(facecolor='green', edgecolor='black', alpha=0.7, label='Physiological Window (3 - 8)'),
    Patch(facecolor='gold', edgecolor='black', alpha=0.7, label='Strong Acid/Base (0-3, 8-11)'),
    Patch(facecolor='firebrick', edgecolor='black', alpha=0.7, label='Extreme Ionization (<0, >11)')
]

plt.legend(handles=legend_elements, loc='upper right')
plt.grid(axis='y', linestyle='--', alpha=0.5)
plt.tight_layout()

plt.show()