Aquatic Environmental Hazard: Toxicity in Crustaceans (D. magna) Dashboard

DrugBank database
MolPort database
Python script number 102 to build the frequency distribution graph of the magna_toxicity parameter on DrugBank molecules.
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from matplotlib.patches import Patch
import numpy as np
from scipy.interpolate import PchipInterpolator

# 1. PASTE YOUR DATA HERE
datos_crudos = """Bin Center	% Frequency
0	0.212314225053079
0.05	7.5583864118896
0.1	8.6624203821656
0.15	7.13375796178344
0.2	5.56263269639066
0.25	4.45859872611465
0.3	3.94904458598726
0.35	3.82165605095541
0.4	3.43949044585987
0.45	3.69426751592357
0.5	3.90658174097665
0.55	3.56687898089172
0.6	4.07643312101911
0.65	4.28874734607219
0.7	4.71337579617834
0.75	4.37367303609342
0.8	4.96815286624204
0.85	6.4968152866242
0.9	7.77070063694268
0.95	6.62420382165605
1	0.721868365180467"""

# 2. AUTOMATIC PROCESSING
lineas = datos_crudos.strip().split('\n')[1:] 
bins_array = []
freq_array = []

for linea in lineas:
    # Supports both tabs and spaces
    partes = linea.strip().split('\t')
    if len(partes) < 2:
        partes = linea.strip().split()
    bins_array.append(float(partes[0]))
    freq_array.append(float(partes[1]))

bins = np.array(bins_array)
freq = np.array(freq_array)
mean_val = np.average(bins, weights=freq)

# Smooth interpolation for data trend
interpolator = PchipInterpolator(bins, freq)
x_fit = np.linspace(min(bins), max(bins), 500)
y_fit = interpolator(x_fit)
y_fit = np.clip(y_fit, 0, None)

# Ecological Traffic Light
def get_colors(b_array):
    return ['#008000' if b < 0.4 else '#FFD700' if b <= 0.7 else '#B22222' for b in b_array]

colors_hex = get_colors(bins)
face_colors = [mcolors.to_rgba(c, alpha=0.60) for c in colors_hex]
edge_colors = [mcolors.to_rgba(c, alpha=0.90) for c in colors_hex]

# 3. CREATION OF THE GRAPH
plt.figure(figsize=(7, 6))

plt.bar(bins, freq, width=0.04, color=face_colors, edgecolor=edge_colors, linewidth=1.5, zorder=2)

# 4. LABELS AND TITLES
plt.xlabel('Aquatic Toxicity Probability (EC50 < 100 ppm)', fontsize=12)
plt.ylabel('% Frequency', fontsize=12)
plt.title('Aquatic Environmental Hazard: Toxicity in Crustaceans (D. magna)', fontsize=14)

legend_elements = [
    Patch(facecolor=mcolors.to_rgba('#008000', 0.6), edgecolor='#008000', label='Non-Toxic / Safe (< 0.4)'),
    Patch(facecolor=mcolors.to_rgba('#FFD700', 0.6), edgecolor='#FFD700', label='Moderate Risk (0.4 - 0.7)'),
    Patch(facecolor=mcolors.to_rgba('#B22222', 0.6), edgecolor='#B22222', label='High Toxicity (> 0.7)')
]
plt.legend(handles=legend_elements, loc='upper center', framealpha=0.95, fontsize=10)

plt.grid(axis='y', linestyle=':', alpha=0.7, zorder=0)
plt.xlim(-0.05, 1.05)
plt.ylim(0, max(freq) * 1.15) 
plt.tight_layout()

plt.show()