To run this script, you need to download the following two Excel files.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_curve, roc_auc_score
# --- FOLDER PATHS ---
base_path = r"D:\my-path" # running on Windows OS
path_validation = base_path + r"\49-bardox-isotioc-KEAP1-final-score.xlsx"
path_prospectivo = base_path + r"\CHEMBL3038498.xlsx"
COLOR_GOOD = '#63C28D'
COLOR_BAD = '#C14E4E'
def generate_figure_11_final():
print("Loading and clearing data...")
# 1. Loading and Cleaning NaNs in Binary_Class
df_val = pd.read_excel(path_validation, sheet_name='final-score')
df_val = df_val.dropna(subset=['Clase_Binaria']).copy()
df_pros = pd.read_excel(path_prospectivo).dropna(subset=['pChEMBL'])
# Weights and investment logic
pesos = {'Ames': 0.163, 'Caco_2': 0.163, 'HIA': 0.154, 'BBB': 0.147, 'PPB': 0.142,
'Pgp_inhibitor': 0.141, 'hERG_1uM': 0.113, 'ARE': 0.047, 'CYP3A4_inhibitor': 0.009, 'DILI': 0.005}
invert = ['Caco_2', 'BBB', 'HIA', 'Pgp_inhibitor', 'DILI']
# 2. Score Calculation
for dataset in [df_val, df_pros]:
dataset['ADMET_Score'] = 0.0
for p, w in pesos.items():
if p in dataset.columns:
dataset['ADMET_Score'] += w * (1 - dataset[p]) if p in invert else w * dataset[p]
df_pros['Activo_Real'] = (df_pros['pChEMBL'] >= 6.0).astype(int)
# 3. Figure Configuration
fig = plt.figure(figsize=(18, 14))
gs = fig.add_gridspec(2, 2, hspace=0.35, wspace=0.25)
# --- PANEL A: ROC (Validation) ---
ax_a = fig.add_subplot(gs[0, 0])
fpr, tpr, _ = roc_curve(df_val['Clase_Binaria'], df_val['ADMET_Score'])
ax_a.plot(fpr, tpr, color=COLOR_GOOD, lw=4, label=f'ADMET-Score (AUC=1.00)')
ax_a.plot([0, 1], [0, 1], 'k--', alpha=0.3)
ax_a.set_title('A) Method Validation (Docking-based)', fontweight='bold', size=16, loc='left')
ax_a.set_xlabel('False Positive Rate'); ax_a.set_ylabel('True Positive Rate'); ax_a.legend()
# --- PANEL B: PCA (Validation) ---
ax_b = fig.add_subplot(gs[0, 1])
params = [p for p in pesos.keys() if p in df_val.columns]
X_scaled = StandardScaler().fit_transform(df_val[params])
pca_coords = PCA(n_components=2).fit_transform(X_scaled)
sns.scatterplot(ax=ax_b, x=pca_coords[:,0], y=pca_coords[:,1], hue=df_val['Clase_Binaria'],
palette={1: COLOR_GOOD, 0: COLOR_BAD}, s=180, edgecolor='k', alpha=0.8)
ax_b.set_title('B) ADMET Chemical Space Mapping', fontweight='bold', size=16, loc='left')
ax_b.set_xlabel('PC1'); ax_b.set_ylabel('PC2')
ax_b.legend(title='Affinity Class', labels=['High Affinity', 'Low Affinity'])
# --- PANEL C: LOADINGS ---
ax_c = fig.add_subplot(gs[1, 0])
pca_obj = PCA(n_components=1).fit(X_scaled)
loadings = pd.DataFrame({'Param': params, 'PC1': pca_obj.components_[0]}).sort_values('PC1')
loadings['Color'] = [COLOR_BAD if x > 0 else COLOR_GOOD for x in loadings['PC1']]
sns.barplot(ax=ax_c, x='PC1', y='Param', data=loadings, hue='Param',
palette={row['Param']: row['Color'] for _, row in loadings.iterrows()},
edgecolor='k', legend=False)
ax_c.set_title('C) Navigability Drivers (PC1 Loadings)', fontweight='bold', size=16, loc='left')
ax_c.axvline(0, color='k', lw=1.5)
# --- PANEL D: ENRICHMENT (ChEMBL) ---
ax_d = fig.add_subplot(gs[1, 1])
df_p_sorted = df_pros.sort_values('ADMET_Score', ascending=False).reset_index()
df_p_sorted['Cum_Actives'] = df_p_sorted['Activo_Real'].cumsum() / df_p_sorted['Activo_Real'].sum() * 100
df_p_sorted['Percentile'] = (df_p_sorted.index + 1) / len(df_p_sorted) * 100
ax_d.plot(df_p_sorted['Percentile'], df_p_sorted['Cum_Actives'], color='teal', lw=4, label='ADMET-Score Ranking')
ax_d.plot([0, 100], [0, 100], 'k--', alpha=0.5, label='Random Selection')
# Top 5% Point
ax_d.scatter(5, 80.46, color='orange', s=250, zorder=5, edgecolor='k')
ax_d.annotate(f"Top 5%\nEF=1.29x", (8, 70), fontweight='bold', color='darkorange', size=13)
ax_d.set_title('D) Prospective Enrichment (ChEMBL Library)', fontweight='bold', size=16, loc='left')
ax_d.set_xlabel('% of Compounds Screened'); ax_d.set_ylabel('% of Total Actives Found')
ax_d.legend(loc='lower right')
ax_d.grid(alpha=0.2)
plt.subplots_adjust(top=0.90, bottom=0.08, left=0.10, right=0.95, hspace=0.4, wspace=0.3)
save_name = base_path + r"\Figure_11.png"
plt.savefig(save_name, dpi=300, bbox_inches='tight')
plt.show()
print(f"Success! Figure saved as: {save_name}")
if __name__ == "__main__":
generate_figure_11_final()