This script uses the ADMETsar3-vs-DrugBank.xlsm file which you can download from this link.
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import pandas as pd
import os
# Dictionary with continuous parameters, adjusted bounds, and colors
admet_data = {
"MW": {"bounds": [0, 100, 200, 500, 600, 800], "colors": ["#C14E4E", "#FFDF33", "#63C28D", "#FFDF33", "#C14E4E"]},
"nAtom": {"bounds": [0, 4, 14, 35, 50, 100], "colors": ["#C14E4E", "#FFDF33", "#63C28D", "#FFDF33", "#C14E4E"]},
"nHet": {"bounds": [0, 1, 2, 10, 16, 30], "colors": ["#C14E4E", "#FFDF33", "#63C28D", "#FFDF33", "#C14E4E"]},
"nRing": {"bounds": [0, 1, 5, 7, 10], "colors": ["#FFDF33", "#63C28D", "#FFDF33", "#C14E4E"]},
"nRot": {"bounds": [0, 10, 16, 30], "colors": ["#63C28D", "#FFDF33", "#C14E4E"]},
"HBA": {"bounds": [0, 1, 11, 13, 20], "colors": ["#FFDF33", "#63C28D", "#FFDF33", "#C14E4E"]},
"HBD": {"bounds": [0, 4, 6, 10], "colors": ["#63C28D", "#FFDF33", "#C14E4E"]},
"TPSA": {"bounds": [0, 20, 140, 160, 200], "colors": ["#FFDF33", "#63C28D", "#FFDF33", "#C14E4E"]},
"SlogP": {"bounds": [-5, 0, 1, 3, 5, 10], "colors": ["#C14E4E", "#FFDF33", "#63C28D", "#FFDF33", "#C14E4E"]},
"logS": {"bounds": [-8, -6, -4, 0.5, 1, 3], "colors": ["#C14E4E", "#FFDF33", "#63C28D", "#FFDF33", "#C14E4E"]},
"QED": {"bounds": [0, 0.35, 0.6, 1.0], "colors": ["#C14E4E", "#FFDF33", "#63C28D"]},
"logP": {"bounds": [-5, 0, 1, 3, 5, 10], "colors": ["#C14E4E", "#FFDF33", "#63C28D", "#FFDF33", "#C14E4E"]},
"pKa": {"bounds": [-5, 0, 3, 8, 11, 16], "colors": ["#C14E4E", "#FFDF33", "#63C28D", "#FFDF33", "#C14E4E"]},
}
def load_categorical_rules_from_excel(excel_path):
"""
Reads the Excel file, calculates percentages for categorical rules,
and appends them to the main admet_data dictionary.
"""
print(f"Reading data from {excel_path}...")
try:
df = pd.read_excel(excel_path, engine='openpyxl')
except Exception as e:
print(f"Error reading the Excel file: {e}")
return
rules = ["Lipinski_rule", "Pfizer_rule", "GSK_rule"]
for rule in rules:
if rule in df.columns:
# Calculate value frequencies as percentages
counts = df[rule].value_counts(normalize=True) * 100
accept_pct = 0.0
not_accept_pct = 0.0
# Categorize the string values into "Accept" or "Not Accept" robustly
for key, val in counts.items():
k_str = str(key).lower()
if 'not' in k_str or 'reject' in k_str:
not_accept_pct += val
elif 'accept' in k_str:
accept_pct += val
# Normalize to 100% just in case of missing or anomalous data
total = accept_pct + not_accept_pct
if total > 0:
accept_pct = (accept_pct / total) * 100
not_accept_pct = (not_accept_pct / total) * 100
else:
accept_pct, not_accept_pct = 50.0, 50.0 # Fallback
# Add the mapped categorical rule (scaled 0-100) to the dictionary
admet_data[rule] = {
"bounds": [0, not_accept_pct, 100],
"labels": ["", f"Not Accept\n({not_accept_pct:.1f}%)", f"Accept\n({accept_pct:.1f}%)"],
"colors": ["#C14E4E", "#63C28D"] # Red for Not Accept, Green for Accept
}
print(f"[{rule}] Processed -> Not Accept: {not_accept_pct:.1f}%, Accept: {accept_pct:.1f}%")
else:
print(f"Warning: The column '{rule}' was not found in the Excel file.")
def create_proportional_admet_bars():
"""
Generates and saves the proportional bar charts for all parameters.
"""
# Dimensions requested: 8 cm x 2 cm (converted to inches for matplotlib)
fig_width = 8 / 2.54
fig_height = 1.3 / 2.54
output_dir = "Figure-1_ADMET_Proportional_Bars"
os.makedirs(output_dir, exist_ok=True)
# Background color matching the provided image style
bg_color = '#ffffff'
for param, info in admet_data.items():
bounds = info["bounds"]
colors = info["colors"]
labels = info.get("labels", [str(b) for b in bounds])
# Initialize figure
fig, ax = plt.subplots(figsize=(fig_width, fig_height))
fig.patch.set_facecolor(bg_color)
ax.set_facecolor(bg_color)
min_val = bounds[0]
max_val = bounds[-1]
# Draw each colored segment proportionally
for i in range(len(colors)):
start = bounds[i]
width = bounds[i+1] - bounds[i]
rect = patches.Rectangle(
(start, 0), width, 1,
facecolor=colors[i], edgecolor='none'
)
ax.add_patch(rect)
# Axes setup
ax.set_xlim(min_val, max_val)
ax.set_ylim(0, 1)
# X-axis setup (ticks and labels)
ax.set_xticks(bounds)
ax.set_xticklabels(labels, rotation=45, ha='right', rotation_mode='anchor', fontsize=9)
# Hide top, left and right spines (borders)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
# Offset the bottom spine slightly downwards
ax.spines['bottom'].set_position(('outward', 5))
ax.spines['bottom'].set_linewidth(1)
# Tick styling
ax.tick_params(axis='x', direction='out', length=4, width=1, colors='black')
# Completely hide the Y axis
ax.get_yaxis().set_visible(False)
# Parameter label below the X-axis
ax.set_xlabel(param, fontsize=11, labelpad=5, weight='bold')
# Adjust layout manually to prevent UserWarning about margins
plt.subplots_adjust(bottom=0.45)
# Save the figure
filename = os.path.join(output_dir, f"{param}_bar.png")
plt.savefig(filename, dpi=300, bbox_inches='tight', facecolor=fig.get_facecolor())
plt.close()
if __name__ == "__main__":
# 1. Define the Excel filename
excel_file = "ADMETsar3-vs-DrugBank.xlsm"
# 2. Extract data for categorical rules if the file exists
if os.path.exists(excel_file):
load_categorical_rules_from_excel(excel_file)
else:
print(f"Error: The file '{excel_file}' was not found. Ensure it is in the same directory.")
print("Continuous plots will be generated, but categorical rules will be missing.")
# 3. Generate all charts
create_proportional_admet_bars()
print("Process completed successfully! Check the 'Figure-1_ADMET_Proportional_Bars' folder.")