import matplotlib.pyplot as plt
from matplotlib.patches import Patch
import numpy as np
from scipy.interpolate import make_interp_spline
# 1. UGT Substrate data (UDP-glucuronosyltransferases (UGTs) )
x = [0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5,
0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1]
# Frequencies in %
y = [0.08, 1.42, 3.92, 4.69, 5.83, 5.06, 6.55, 5.78, 4.73, 4.89, 4.57,
4.41, 4.37, 5.66, 6.51, 6.80, 8.50, 8.50, 5.91, 1.78, 0.04]
# 2. Curve smoothing (Spline)
x_smooth = np.linspace(min(x), max(x), 300)
spl = make_interp_spline(x, y, k=3)
y_smooth = spl(x_smooth)
y_smooth = [val if val > 0 else 0 for val in y_smooth]
# 3. Define colors (UGT Traffic Light)
colors = []
for val in x:
# High probability of being a UGT substrate
if val >= 0.7:
colors.append('green')
# Low probability
elif val <= 0.3:
colors.append('firebrick')
# intermediate zone
else:
colors.append('gold')
# 4. Create the chart
plt.figure(figsize=(7, 6))
# A. Bars
plt.bar(x, y, width=0.04, color=colors, edgecolor='black', alpha=0.7, label='Data Frequency')
# 5. Tags and Titles
plt.xlabel('Probability of being UGT Substrate (Phase II Metabolism)', fontsize=12)
plt.ylabel('% Frequency', fontsize=12)
plt.title('UGT Substrate Prediction', fontsize=14)
# Axle settings
plt.xticks(np.arange(0, 1.1, 0.1))
plt.xlim(-0.05, 1.05)
plt.ylim(0, 10)
# 6. Vertical Legend (LEFT, because the peaks are on the right)
legend_elements = [
Patch(facecolor='green', edgecolor='black', alpha=0.7, label='Likely UGT Substrate (> 0.7)'),
Patch(facecolor='gold', edgecolor='black', alpha=0.7, label='Uncertain / Moderate (0.3 - 0.7)'),
Patch(facecolor='firebrick', edgecolor='black', alpha=0.7, label='Unlikely UGT Substrate (< 0.3)')
]
plt.legend(handles=legend_elements, loc='upper left', ncol=1, framealpha=0.9)
plt.grid(axis='y', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()