Python script to build Figure 1

This script uses the ADMETsar3-vs-DrugBank.xlsm file which you can download from this link.

"script-number-119": Python script for Figure 1.
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import pandas as pd
import os

# Dictionary with continuous parameters, adjusted bounds, and colors
admet_data = {
    "MW": {"bounds": [0, 100, 200, 500, 600, 800], "colors": ["#C14E4E", "#FFDF33", "#63C28D", "#FFDF33", "#C14E4E"]},
    "nAtom": {"bounds": [0, 4, 14, 35, 50, 100], "colors": ["#C14E4E", "#FFDF33", "#63C28D", "#FFDF33", "#C14E4E"]},
    "nHet": {"bounds": [0, 1, 2, 10, 16, 30], "colors": ["#C14E4E", "#FFDF33", "#63C28D", "#FFDF33", "#C14E4E"]}, 
    "nRing": {"bounds": [0, 1, 5, 7, 10], "colors": ["#FFDF33", "#63C28D", "#FFDF33", "#C14E4E"]},
    "nRot": {"bounds": [0, 10, 16, 30], "colors": ["#63C28D", "#FFDF33", "#C14E4E"]},
    "HBA": {"bounds": [0, 1, 11, 13, 20], "colors": ["#FFDF33", "#63C28D", "#FFDF33", "#C14E4E"]},
    "HBD": {"bounds": [0, 4, 6, 10], "colors": ["#63C28D", "#FFDF33", "#C14E4E"]},
    "TPSA": {"bounds": [0, 20, 140, 160, 200], "colors": ["#FFDF33", "#63C28D", "#FFDF33", "#C14E4E"]},
    "SlogP": {"bounds": [-5, 0, 1, 3, 5, 10], "colors": ["#C14E4E", "#FFDF33", "#63C28D", "#FFDF33", "#C14E4E"]},
    "logS": {"bounds": [-8, -6, -4, 0.5, 1, 3], "colors": ["#C14E4E", "#FFDF33", "#63C28D", "#FFDF33", "#C14E4E"]},
    "QED": {"bounds": [0, 0.35, 0.6, 1.0], "colors": ["#C14E4E", "#FFDF33", "#63C28D"]},
    "logP": {"bounds": [-5, 0, 1, 3, 5, 10], "colors": ["#C14E4E", "#FFDF33", "#63C28D", "#FFDF33", "#C14E4E"]},
    "pKa": {"bounds": [-5, 0, 3, 8, 11, 16], "colors": ["#C14E4E", "#FFDF33", "#63C28D", "#FFDF33", "#C14E4E"]},
}

def load_categorical_rules_from_excel(excel_path):
    """
    Reads the Excel file, calculates percentages for categorical rules,
    and appends them to the main admet_data dictionary.
    """
    print(f"Reading data from {excel_path}...")
    try:
        df = pd.read_excel(excel_path, engine='openpyxl')
    except Exception as e:
        print(f"Error reading the Excel file: {e}")
        return

    rules = ["Lipinski_rule", "Pfizer_rule", "GSK_rule"]
    
    for rule in rules:
        if rule in df.columns:
            # Calculate value frequencies as percentages
            counts = df[rule].value_counts(normalize=True) * 100
            
            accept_pct = 0.0
            not_accept_pct = 0.0
            
            # Categorize the string values into "Accept" or "Not Accept" robustly
            for key, val in counts.items():
                k_str = str(key).lower()
                if 'not' in k_str or 'reject' in k_str:
                    not_accept_pct += val
                elif 'accept' in k_str:
                    accept_pct += val
            
            # Normalize to 100% just in case of missing or anomalous data
            total = accept_pct + not_accept_pct
            if total > 0:
                accept_pct = (accept_pct / total) * 100
                not_accept_pct = (not_accept_pct / total) * 100
            else:
                accept_pct, not_accept_pct = 50.0, 50.0 # Fallback
                
            # Add the mapped categorical rule (scaled 0-100) to the dictionary
            admet_data[rule] = {
                "bounds": [0, not_accept_pct, 100],
                "labels": ["", f"Not Accept\n({not_accept_pct:.1f}%)", f"Accept\n({accept_pct:.1f}%)"],
                "colors": ["#C14E4E", "#63C28D"] # Red for Not Accept, Green for Accept
            }
            print(f"[{rule}] Processed -> Not Accept: {not_accept_pct:.1f}%, Accept: {accept_pct:.1f}%")
        else:
            print(f"Warning: The column '{rule}' was not found in the Excel file.")

def create_proportional_admet_bars():
    """
    Generates and saves the proportional bar charts for all parameters.
    """
    # Dimensions requested: 8 cm x 2 cm (converted to inches for matplotlib)
    fig_width = 8 / 2.54
    fig_height = 1.3 / 2.54

    output_dir = "Figure-1_ADMET_Proportional_Bars"
    os.makedirs(output_dir, exist_ok=True)
    
    # Background color matching the provided image style
    bg_color = '#ffffff'

    for param, info in admet_data.items():
        bounds = info["bounds"]
        colors = info["colors"]
        labels = info.get("labels", [str(b) for b in bounds])

        # Initialize figure
        fig, ax = plt.subplots(figsize=(fig_width, fig_height))
        fig.patch.set_facecolor(bg_color)
        ax.set_facecolor(bg_color)

        min_val = bounds[0]
        max_val = bounds[-1]

        # Draw each colored segment proportionally
        for i in range(len(colors)):
            start = bounds[i]
            width = bounds[i+1] - bounds[i]
            rect = patches.Rectangle(
                (start, 0), width, 1, 
                facecolor=colors[i], edgecolor='none'
            )
            ax.add_patch(rect)

        # Axes setup
        ax.set_xlim(min_val, max_val)
        ax.set_ylim(0, 1)

        # X-axis setup (ticks and labels)
        ax.set_xticks(bounds)
        ax.set_xticklabels(labels, rotation=45, ha='right', rotation_mode='anchor', fontsize=9)
        
        # Hide top, left and right spines (borders)
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['left'].set_visible(False)
        
        # Offset the bottom spine slightly downwards
        ax.spines['bottom'].set_position(('outward', 5))
        ax.spines['bottom'].set_linewidth(1)
        
        # Tick styling
        ax.tick_params(axis='x', direction='out', length=4, width=1, colors='black')
        
        # Completely hide the Y axis
        ax.get_yaxis().set_visible(False)

        # Parameter label below the X-axis
        ax.set_xlabel(param, fontsize=11, labelpad=5, weight='bold')

        # Adjust layout manually to prevent UserWarning about margins
        plt.subplots_adjust(bottom=0.45)
        
        # Save the figure
        filename = os.path.join(output_dir, f"{param}_bar.png")
        plt.savefig(filename, dpi=300, bbox_inches='tight', facecolor=fig.get_facecolor())
        plt.close()

if __name__ == "__main__":
    # 1. Define the Excel filename
    excel_file = "ADMETsar3-vs-DrugBank.xlsm"
    
    # 2. Extract data for categorical rules if the file exists
    if os.path.exists(excel_file):
        load_categorical_rules_from_excel(excel_file)
    else:
        print(f"Error: The file '{excel_file}' was not found. Ensure it is in the same directory.")
        print("Continuous plots will be generated, but categorical rules will be missing.")

    # 3. Generate all charts
    create_proportional_admet_bars()
    print("Process completed successfully! Check the 'Figure-1_ADMET_Proportional_Bars' folder.")