logS (Aqueous Solubility) Distribution Dashboard

DrugBank database
MolPort database
Python script number 11 to build the frequency distribution graph of the logS parameter on DrugBank molecules.
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
from matplotlib.lines import Line2D
import numpy as np

# 1. logS data
bin_centers = [-9, -8.5, -8, -7.5, -7, -6.5, -6, -5.5, -5, -4.5, -4, -3.5, -3, -2.5, -2, -1.5, -1, -0.5, 0, 0.5, 1, 1.5, 2]
frequencies = [0.04, 0.12, 0.24, 0.44, 1.50, 1.62, 3.11, 5.02, 7.52, 9.79, 9.51, 9.55, 8.29, 9.59, 6.15, 6.15, 4.53, 5.46, 
              4.49, 4.25, 2.02, 0.57, 0.04]

# 2. Gaussian Fit Parameters (Your new data)
amplitude = 9.421
mean = -3.172
sd = 2.158

# Generate smooth X data for the curve (a little beyond the limits for aesthetics)
x_smooth = np.linspace(-9.5, 2.5, 300)

# Calculate Y using the Gaussian equation
y_smooth = amplitude * np.exp(-0.5 * ((x_smooth - mean) / sd)**2)

# 3. Define colors (logS traffic light)
colors = []
for x in bin_centers:
    # Optimal Range: -4 to 0.5 (Green)
    if -4 <= x <= 0.5:
        colors.append('green')
    # Caution Range: -6 to -4 or 0.5 to 1 (Yellow/Gold)
    elif (-6 <= x < -4) or (0.5 < x <= 1.0):
        colors.append('gold')
    # Risk Range: < -6 or > 1 (Brick Red)
    else:
        colors.append('firebrick')

# 4. Create the chart
plt.figure(figsize=(7, 7))

# A. Draw Bars
plt.bar(bin_centers, frequencies, width=0.4, color=colors, edgecolor='black', alpha=0.7)

# B. Draw Trend Line
plt.plot(x_smooth, y_smooth, color='darkorange', linewidth=2.5, label='Gaussian Fit')

# 5. Tags and Titles
plt.xlabel('logS (Aqueous Solubility)', fontsize=12)
plt.ylabel('% Frequency', fontsize=12)
plt.title('logS Distribution', fontsize=14)
plt.xticks(bin_centers, rotation=45, fontsize=9)

# 6. Custom Legend
legend_elements = [
    Line2D([0], [0], color='darkorange', lw=2.5, label=f'Fit (Mean={mean}, SD={sd})'),
    Patch(facecolor='green', edgecolor='black', alpha=0.7, label='Optimal Soluble (-4 to 0.5)'),
    Patch(facecolor='gold', edgecolor='black', alpha=0.7, label='Poor/High Sol. (-6 to -4, 0.5 to 1)'),
    Patch(facecolor='firebrick', edgecolor='black', alpha=0.7, label='Insoluble/Too Polar (< -6, > 1)')
]

plt.legend(handles=legend_elements, loc='upper left')
plt.grid(axis='y', linestyle='--', alpha=0.5)
plt.tight_layout()

plt.show()