ADMETsar 3.0-derived chemical navigability rules from DrugBank-approved drugs.

Python script to generate the Figure 11.

To run this script, you need to download the following two Excel files.
"script-number-132": Multi-parametric validation of the ADMET-first screening strategy for KEAP1 modulators.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_curve, roc_auc_score

# --- FOLDER PATHS ---
base_path = r"D:\my-path" # running on Windows OS
path_validation = base_path + r"\49-bardox-isotioc-KEAP1-final-score.xlsx"
path_prospectivo = base_path + r"\CHEMBL3038498.xlsx"

COLOR_GOOD = '#63C28D'
COLOR_BAD = '#C14E4E'

def generate_figure_11_final():
    print("Loading and clearing data...")
    
    # 1. Loading and Cleaning NaNs in Binary_Class
    df_val = pd.read_excel(path_validation, sheet_name='final-score')
    df_val = df_val.dropna(subset=['Clase_Binaria']).copy()
    
    df_pros = pd.read_excel(path_prospectivo).dropna(subset=['pChEMBL'])
    
    # Weights and investment logic
    pesos = {'Ames': 0.163, 'Caco_2': 0.163, 'HIA': 0.154, 'BBB': 0.147, 'PPB': 0.142, 
             'Pgp_inhibitor': 0.141, 'hERG_1uM': 0.113, 'ARE': 0.047, 'CYP3A4_inhibitor': 0.009, 'DILI': 0.005}
    invert = ['Caco_2', 'BBB', 'HIA', 'Pgp_inhibitor', 'DILI']
    
    # 2. Score Calculation
    for dataset in [df_val, df_pros]:
        dataset['ADMET_Score'] = 0.0
        for p, w in pesos.items():
            if p in dataset.columns:
                dataset['ADMET_Score'] += w * (1 - dataset[p]) if p in invert else w * dataset[p]
    
    df_pros['Activo_Real'] = (df_pros['pChEMBL'] >= 6.0).astype(int)

    # 3. Figure Configuration
    fig = plt.figure(figsize=(18, 14))
    gs = fig.add_gridspec(2, 2, hspace=0.35, wspace=0.25)
    
    # --- PANEL A: ROC (Validation) ---
    ax_a = fig.add_subplot(gs[0, 0])
    fpr, tpr, _ = roc_curve(df_val['Clase_Binaria'], df_val['ADMET_Score'])
    ax_a.plot(fpr, tpr, color=COLOR_GOOD, lw=4, label=f'ADMET-Score (AUC=1.00)')
    ax_a.plot([0, 1], [0, 1], 'k--', alpha=0.3)
    ax_a.set_title('A) Method Validation (Docking-based)', fontweight='bold', size=16, loc='left')
    ax_a.set_xlabel('False Positive Rate'); ax_a.set_ylabel('True Positive Rate'); ax_a.legend()

    # --- PANEL B: PCA (Validation) ---
    ax_b = fig.add_subplot(gs[0, 1])
    params = [p for p in pesos.keys() if p in df_val.columns]
    X_scaled = StandardScaler().fit_transform(df_val[params])
    pca_coords = PCA(n_components=2).fit_transform(X_scaled)
    sns.scatterplot(ax=ax_b, x=pca_coords[:,0], y=pca_coords[:,1], hue=df_val['Clase_Binaria'], 
                    palette={1: COLOR_GOOD, 0: COLOR_BAD}, s=180, edgecolor='k', alpha=0.8)
    ax_b.set_title('B) ADMET Chemical Space Mapping', fontweight='bold', size=16, loc='left')
    ax_b.set_xlabel('PC1'); ax_b.set_ylabel('PC2')
    ax_b.legend(title='Affinity Class', labels=['High Affinity', 'Low Affinity'])

    # --- PANEL C: LOADINGS ---
    ax_c = fig.add_subplot(gs[1, 0])
    pca_obj = PCA(n_components=1).fit(X_scaled)
    loadings = pd.DataFrame({'Param': params, 'PC1': pca_obj.components_[0]}).sort_values('PC1')
    loadings['Color'] = [COLOR_BAD if x > 0 else COLOR_GOOD for x in loadings['PC1']]
    sns.barplot(ax=ax_c, x='PC1', y='Param', data=loadings, hue='Param', 
                palette={row['Param']: row['Color'] for _, row in loadings.iterrows()}, 
                edgecolor='k', legend=False)
    ax_c.set_title('C) Navigability Drivers (PC1 Loadings)', fontweight='bold', size=16, loc='left')
    ax_c.axvline(0, color='k', lw=1.5)

    # --- PANEL D: ENRICHMENT (ChEMBL) ---
    ax_d = fig.add_subplot(gs[1, 1])
    df_p_sorted = df_pros.sort_values('ADMET_Score', ascending=False).reset_index()
    df_p_sorted['Cum_Actives'] = df_p_sorted['Activo_Real'].cumsum() / df_p_sorted['Activo_Real'].sum() * 100
    df_p_sorted['Percentile'] = (df_p_sorted.index + 1) / len(df_p_sorted) * 100
    
    ax_d.plot(df_p_sorted['Percentile'], df_p_sorted['Cum_Actives'], color='teal', lw=4, label='ADMET-Score Ranking')
    ax_d.plot([0, 100], [0, 100], 'k--', alpha=0.5, label='Random Selection')
    
    # Top 5% Point
    ax_d.scatter(5, 80.46, color='orange', s=250, zorder=5, edgecolor='k')
    ax_d.annotate(f"Top 5%\nEF=1.29x", (8, 70), fontweight='bold', color='darkorange', size=13)

    ax_d.set_title('D) Prospective Enrichment (ChEMBL Library)', fontweight='bold', size=16, loc='left')
    ax_d.set_xlabel('% of Compounds Screened'); ax_d.set_ylabel('% of Total Actives Found')
    ax_d.legend(loc='lower right')
    ax_d.grid(alpha=0.2)

    plt.subplots_adjust(top=0.90, bottom=0.08, left=0.10, right=0.95, hspace=0.4, wspace=0.3)
    save_name = base_path + r"\Figure_11.png"
    plt.savefig(save_name, dpi=300, bbox_inches='tight')
    plt.show()
    print(f"Success! Figure saved as: {save_name}")

if __name__ == "__main__":
    generate_figure_11_final()