Environmental Hazard: Aquatic Toxicity (Bluegill Sunfish)

DrugBank database
MolPort database
Python script number 105 to build the frequency distribution graph of the Bluegill_sunfish_toxicity parameter on DrugBank molecules.
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from matplotlib.patches import Patch
import numpy as np
from scipy.interpolate import PchipInterpolator

# 1. PASTE YOUR DATA HERE Bluegill_sunfish_toxicity (Separated by tabs, including headers)
datos_crudos = """Bin Center	% Frequency
0.05	5.81740976645435
0.1	10.3609341825902
0.15	7.60084925690021
0.2	6.24203821656051
0.25	5.09554140127389
0.3	4.3312101910828
0.35	4.11889596602972
0.4	4.11889596602972
0.45	4.41613588110403
0.5	3.60934182590234
0.55	3.43949044585987
0.6	4.11889596602972
0.65	4.62845010615711
0.7	5.22292993630573
0.75	5.52016985138004
0.8	5.77494692144374
0.85	5.56263269639066
0.9	5.60509554140127
0.95	3.69426751592357
1	0.721868365180467"""

# 2. AUTOMATIC PROCESSING
lineas = datos_crudos.strip().split('\n')[1:] 
bins_array = []
freq_array = []

for linea in lineas:
    b, f = linea.strip().split() 
    bins_array.append(float(b))
    freq_array.append(float(f))

bins = np.array(bins_array)
freq = np.array(freq_array)

# Statistical Calculations
mean_val = np.average(bins, weights=freq)

# PCHIP interpolation
interpolator = PchipInterpolator(bins, freq)
x_fit = np.linspace(min(bins), max(bins), 500)
y_fit = interpolator(x_fit)
y_fit = np.clip(y_fit, 0, None)

# Color Function
def get_colors(b_array):
    return ['#008000' if b < 0.4 else '#FFD700' if b <= 0.7 else '#B22222' for b in b_array]

colors_hex = get_colors(bins)
face_colors = [mcolors.to_rgba(c, alpha=0.60) for c in colors_hex]
edge_colors = [mcolors.to_rgba(c, alpha=0.90) for c in colors_hex]

# 3. CREATION OF THE GRAPH
plt.figure(figsize=(7, 6))

plt.bar(bins, freq, width=0.04, color=face_colors, edgecolor=edge_colors, linewidth=1.5, zorder=2)

# 4. LABELS AND TITLES
plt.xlabel('Bluegill Sunfish Toxicity Probability (LC50 < 100 ppm)', fontsize=12)
plt.ylabel('% Frequency', fontsize=12)
plt.title('Environmental Hazard: Aquatic Toxicity (Bluegill Sunfish)', fontsize=14)

legend_elements = [
    Patch(facecolor=mcolors.to_rgba('#008000', 0.6), edgecolor='#008000', label='Low Toxicity / Inert (< 0.4)'),
    Patch(facecolor=mcolors.to_rgba('#FFD700', 0.6), edgecolor='#FFD700', label='Moderate Hazard (0.4 - 0.7)'),
    Patch(facecolor=mcolors.to_rgba('#B22222', 0.6), edgecolor='#B22222', label='High Lethality Risk (> 0.7)')
]
plt.legend(handles=legend_elements, loc='upper right', framealpha=0.95, fontsize=10)

plt.grid(axis='y', linestyle=':', alpha=0.7, zorder=0)
plt.xlim(0, 1.05)
plt.ylim(0, max(freq) * 1.2) 
plt.tight_layout()

plt.show()