Quantitative Aquatic Bioconcentration (logBCF) Dashboard

DrugBank database
MolPort database
Python script number 114 to build the frequency distribution graph of the BCF parameter on DrugBank molecules.
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from matplotlib.patches import Patch
import numpy as np
from scipy.interpolate import PchipInterpolator

# 1. PASTE YOUR DATA HERE
datos_crudos = """Bin Center	% Frequency
-1	0.0424628450106157
-0.8	0.0424628450106157
-0.6	0.212314225053079
-0.4	1.01910828025478
-0.2	2.92993630573248
0	6.62420382165605
0.2	9.42675159235669
0.4	12.8662420382166
0.6	13.8428874734607
0.8	9.42675159235669
1	8.49256900212314
1.2	7.04883227176221
1.4	5.77494692144374
1.6	5.98726114649681
1.8	3.56687898089172
2	3.60934182590234
2.2	3.43949044585987
2.4	2.50530785562633
2.6	1.48619957537155
2.8	0.764331210191083
3	0.424628450106157
3.2	0.212314225053079
3.4	0.0849256900212314
3.6	0.0849256900212314
3.8	0.0424628450106157
4	0.0424628450106157"""

# 2. AUTOMATIC PROCESSING
lineas = datos_crudos.strip().split('\n')[1:] 
bins_array = []
freq_array = []

for linea in lineas:
    b, f = linea.strip().split() 
    bins_array.append(float(b))
    freq_array.append(float(f))

bins = np.array(bins_array)
freq = np.array(freq_array)
mean_val = np.average(bins, weights=freq)

interpolator = PchipInterpolator(bins, freq)
x_fit = np.linspace(min(bins), max(bins), 500)
y_fit = interpolator(x_fit)
y_fit = np.clip(y_fit, 0, None)

# Colors adapted to the logBCF logarithmic scale
# < 2.0 (Green), 2.0 - 3.0 (Yellow), > 3.0 (Red - Bioconcentration)
def get_colors(b_array):
    return ['#008000' if b < 2.0 else '#FFD700' if b <= 3.0 else '#B22222' for b in b_array]

colors_hex = get_colors(bins)
face_colors = [mcolors.to_rgba(c, alpha=0.60) for c in colors_hex]
edge_colors = [mcolors.to_rgba(c, alpha=0.90) for c in colors_hex]

# 3. CREATION OF THE GRAPH AND THE GAUSSIAN
plt.figure(figsize=(7, 6))

plt.bar(bins, freq, width=0.18, color=face_colors, edgecolor=edge_colors, linewidth=1.5, zorder=2)

# 4. Gaussian Fit Parameters
amplitude = 11.48
mean = 0.7058
sd = 0.6631
gauss_y = amplitude * np.exp(-((x_fit - mean)**2) / (2 * sd**2))
plt.plot(x_fit, gauss_y, color='orange', linewidth=2.5, linestyle='-', alpha=0.7, zorder=4)


# 5. LABELS AND TITLES
plt.xlabel('logBCF (Logarithm of Bioconcentration Factor)', fontsize=12)
plt.ylabel('% Frequency', fontsize=12)
plt.title('Quantitative Aquatic Bioconcentration (logBCF)', fontsize=14)

legend_elements = [
    Patch(facecolor=mcolors.to_rgba('#008000', 0.6), edgecolor='#008000', label='Low Bioaccumulation (< 2.0)'),
    Patch(facecolor=mcolors.to_rgba('#FFD700', 0.6), edgecolor='#FFD700', label='Moderate Range (2.0 - 3.0)'),
    Patch(facecolor=mcolors.to_rgba('#B22222', 0.6), edgecolor='#B22222', label='High Bioconcentration (> 3.0)'),
    plt.Line2D([0], [0], color='orange', lw=2, linestyle='-', alpha=0.7, label=f'Fit (Mean={mean}, SD={sd})'),
]
plt.legend(handles=legend_elements, loc='upper right', framealpha=0.95, fontsize=10)

plt.grid(axis='y', linestyle=':', alpha=0.7, zorder=0)
plt.xlim(min(bins) - 0.2, max(bins) + 0.2)
plt.ylim(0, max(freq) * 1.15) 
plt.tight_layout()

plt.show()