logP Distribution Dashboard

DrugBank database
MolPort database
Python script number 13 to build the frequency distribution graph of the logP parameter on DrugBank molecules.
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
import numpy as np

# 1. Input Data (Bars)
bin_centers = [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8]
frequencies = [0.040453074433657, 1.73948220064725, 4.28802588996764, 4.81391585760518, 8.05016181229773, 12.2168284789644, 10.4773462783172,
              15.9385113268608, 17.6779935275081, 13.6326860841424, 6.59385113268608, 2.79126213592233, 1.13268608414239, 0.606796116504854]

# 2. Gaussian Fit Parameters (User provided)
amplitude = 15.98
mean = 2.052
sd = 2.503

# Generate smooth X data for the curve (start a bit before and end after the bins for better visuals)
x_smooth = np.linspace(-5, 8, 200)

# Calculate Y data using the provided equation: Y = Amplitude * exp(-0.5 * ((X - Mean) / SD)^2)
y_smooth = amplitude * np.exp(-0.5 * ((x_smooth - mean) / sd)**2)

# 3. Define colors based on the "Traffic Light" criteria
colors = []
for x in bin_centers:
    if 2 <= x <= 3:
        colors.append('green')      # Optimal
    elif (0 <= x < 2) or (4 <= x <= 5):
        colors.append('gold')       # Acceptable
    else:
        colors.append('firebrick')  # Risk

# 4. Create the plot
plt.figure(figsize=(7, 6))

# A. Plot the Bars
plt.bar(bin_centers, frequencies, color=colors, edgecolor='black', alpha=0.7, label='Data Frequency')

# B. Plot the Trend Line (Gaussian Fit)
plt.plot(x_smooth, y_smooth, color='darkorange', linewidth=2, label='Gaussian Fit')

# 5. Add labels and details
plt.xlabel('logP (Bin Center)', fontsize=12)
plt.ylabel('% Frequency', fontsize=12)
plt.title('logP Distribution', fontsize=14)
plt.xticks(bin_centers)

# 6.Custom Legend
# We create patches for the colors and a Line2D for the curve
from matplotlib.lines import Line2D
legend_elements = [
    Line2D([0], [0], color='darkorange', lw=2.5, label=f'Fit (Mean={mean}, SD={sd})'),
    Patch(facecolor='green', edgecolor='black', alpha=0.7, label='Optimal (2 to 3)'),
    Patch(facecolor='gold', edgecolor='black', alpha=0.7, label='Acceptable (0-1, 4-5)'),
    Patch(facecolor='firebrick', edgecolor='black', alpha=0.7, label='Risk / Outlier (<0, >5)')
]

plt.legend(handles=legend_elements, loc='upper left')
plt.grid(axis='y', linestyle='--', alpha=0.5)

# Show the plot
plt.show()