import matplotlib.pyplot as plt
from matplotlib.patches import Patch
from matplotlib.lines import Line2D
import numpy as np
# 1. pKa data
bin_centers = [-2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
counts = [0.038, 0.154, 0.922, 1.384, 3.459, 6.726, 13.528, 17.487, 13.374, 13.182, 12.875, 9.877, 4.035, 1.806, 0.692, 0.346, 0.115]
# 2. Gaussian Fit Parameters
amplitude = 16.08
mean = 6.017
sd = 2.522
# Generate smooth X data for the curve
x_smooth = np.linspace(-7, 16, 300)
# Calculate Y using the Gaussian equation
y_smooth = amplitude * np.exp(-0.5 * ((x_smooth - mean) / sd)**2)
# 3. Define colors (Traffic light pKa - Physiological)
colors = []
for x in bin_centers:
# Optimal Range: 3 to 8 (Close to physiological pH 7.4 and intestinal pH)
if 3 <= x <= 8:
colors.append('green')
# Caution Range: 0-3 (Strong Acids) or 8-11 (Strong Bases)
elif (0 <= x < 3) or (8 < x <= 11):
colors.append('gold')
# Risk Range: Extremes <0 or >11
else:
colors.append('firebrick')
# 4. Create the chart
plt.figure(figsize=(7, 6))
# A. Draw Bars
plt.bar(bin_centers, counts, width=0.8, color=colors, edgecolor='black', alpha=0.7, label='Drug Count')
# B. Draw Trend Line
plt.plot(x_smooth, y_smooth, color='darkorange', linewidth=2.5, label='Gaussian Fit')
# 5. Tags and Titles
plt.xlabel('pKa (Logarithmic Acid Dissociation Constant)', fontsize=12)
plt.ylabel('Number of Drugs (Count)', fontsize=12)
plt.title('pKa Distribution', fontsize=14)
# Adjust X axis
plt.xticks(np.arange(-6, 16, 1))
plt.xlim(-1, 14)
# 6. Custom Legend
legend_elements = [
Line2D([0], [0], color='darkorange', lw=2, label=f'Fit (Mean={mean}, SD={sd})'),
Patch(facecolor='green', edgecolor='black', alpha=0.7, label='Physiological Window (3 - 8)'),
Patch(facecolor='gold', edgecolor='black', alpha=0.7, label='Strong Acid/Base (0-3, 8-11)'),
Patch(facecolor='firebrick', edgecolor='black', alpha=0.7, label='Extreme Ionization (<0, >11)')
]
plt.legend(handles=legend_elements, loc='upper right')
plt.grid(axis='y', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()