from __future__ import annotations from pathlib import Path import time from biotite.application.autodock import VinaApp import gradio as gr from gradio_molecule3d import Molecule3D from gradio_molecule2d import molecule2d import numpy as np from rdkit import Chem from rdkit.Chem import AllChem import pandas as pd from biotite.structure import centroid, from_template from biotite.structure.io import load_structure from biotite.structure.io.mol import MOLFile, SDFile from plinder.eval.docking.write_scores import evaluate EVAL_METRICS = ["system_id", "LDDT-PLI", "LDDT-LP", "BISY-RMSD"] def vina( ligand, receptor, pocket_center, output_folder: Path, size=10, max_num_poses=5 ): app = VinaApp( ligand, receptor, center=pocket_center, size=[size, size, size], ) app.set_max_number_of_models(max_num_poses) app.start() app.join() docked_ligand = from_template(ligand, app.get_ligand_coord()) docked_ligand = docked_ligand[..., ~np.isnan(docked_ligand.coord[0]).any(axis=-1)] output_files = [] for i in range(max_num_poses): sdf_file = MOLFile() sdf_file.set_structure(docked_ligand[i]) sdf_file.write(output_folder / f"docked_ligand_{i}.sdf") output_files.append(sdf_file) return output_files def predict( input_sequence: str, input_ligand: str, input_msa: gr.File | None = None, input_protein: gr.File | None = None, max_num_poses: int = 1, ): """ Main prediction function that calls ligsite and smina Parameters ---------- input_sequence: str monomer sequence input_ligand: str ligand as SMILES string input_msa: gradio.File | None Gradio file object to MSA a3m file input_protein: gradio.File | None Gradio file object to monomer protein structure as CIF file max_num_poses: int Number of poses to generate Returns ------- output_structures: tuple (output_protein, output_ligand_sdf) run_time: float run time of the program """ start_time = time.time() if input_protein is None: raise gr.Error("need input_protein") ligand_file = "ligand.sdf" conformer = Chem.AddHs(Chem.MolFromSmiles(input_ligand)) AllChem.EmbedMolecule(conformer) AllChem.MMFFOptimizeMolecule(conformer) Chem.SDWriter(ligand_file).write(conformer) ligand = SDFile.read(ligand_file).record.get_structure() receptor = load_structure(input_protein, include_bonds=True) docking_poses = vina( ligand, receptor, centroid(receptor), Path(input_protein).parent, max_num_poses=max_num_poses, ) end_time = time.time() run_time = end_time - start_time return [input_protein.name, docking_poses[0]], run_time def get_metrics( system_id: str, receptor_file: Path, ligand_file: Path, ) -> tuple[pd.DataFrame, float]: start_time = time.time() metrics = pd.DataFrame( [ evaluate( model_system_id=system_id, reference_system_id=system_id, receptor_file=receptor_file, ligand_files=[ligand_file], flexible=False, posebusters=False, posebusters_full=False, ) ] ) metrics = metrics[ ["system_id", "lddt_pli_ave", "lddt_lp_ave", "bisy_rmsd_ave"] ].copy() metrics.rename( columns={ "lddt_pli_ave": "LDDT-PLI", "lddt_lp_ave": "LDDT-LP", "bisy_rmsd_ave": "BISY-RMSD", }, inplace=True, ) end_time = time.time() run_time = end_time - start_time return metrics, run_time with gr.Blocks() as app: gr.Markdown("# Vina") gr.Markdown( "Example model using Vina to dock the ligand with the pocket center defined by the centroid of the input protein." ) with gr.Row(): input_sequence = gr.Textbox(lines=3, label="Input Protein sequence (FASTA)") input_ligand = gr.Textbox(lines=3, label="Input ligand SMILES") input_msa = gr.File(label="Input MSA (a3m)") input_protein = gr.File(label="Input protein monomer (CIF)") # define any options here # for automated inference the default options are used max_num_poses = gr.Slider(1, 10, value=1, label="Max number of poses to generate") # checkbox_option = gr.Checkbox(label="Checkbox Option") # dropdown_option = gr.Dropdown(["Option 1", "Option 2", "Option 3"], label="Radio Option") btn = gr.Button("Run Inference") gr.Examples( [ [ "QECTKFKVSSCRECIESGPGCTWCQKLNFTGPGDPDSIRCDTRPQLLMRGCAADDIMDPTSLAETQEDHNGGQKQLSPQKVTLYLRPGQAAAFNVTFRRAKGYPIDLYYLMDLSYSMLDDLRNVKKLGGDLLRALNEITESGRIGFGSFVDKTVLPFVNTHPDKLRNPCPNKEKECQPPFAFRHVLKLTDNSNQFQTEVGKQLISGNLDAPEGGLDAMMQVAACPEEIGWRKVTRLLVFATDDGFHFAGDGKLGAILTPNDGRCHLEDNLYKRSNEFDYPSVGQLAHKLAENNIQPIFAVTSRMVKTYEKLTEIIPKSAVGELSEDSSNVVQLIKNAYNKLSSRVFLDHNALPDTLKVTYDSFCSNGVTHRNQPRGDCDGVQINVPITFQVKVTATECIQEQSFVIRALGFTDIVTVQVLPQCECRCRDQSRDRSLCHGKGFLECGICRCDTGYIGKNCECQTQGRSSQELEGSCRKDNNSIICSGLGDCVCGQCLCHTSDVPGKLIYGQYCECDTINCERYNGQVCGGPGRGLCFCGKCRCHPGFEGSACQCERTTEGCLNPRRVECSGRGRCRCNVCECHSGYQLPLCQECPGCPSPCGKYISCAECLKFEKGPFGKNCSAACPGLQLSNNPVKGRTCKERDSEGCWVAYTLEQQDGMDRYLIYVDESRECCGGPAALQTLFQG", "CC(=O)N[C@H]1[C@H](O[C@H]2[C@H](O)[C@@H](NC(C)=O)CO[C@@H]2CO)O[C@H](CO)[C@@H](O)[C@@H]1O", None, "input_test.cif", ], ], [input_sequence, input_ligand, input_msa, input_protein], ) reps = [ { "model": 0, "style": "cartoon", "color": "whiteCarbon", }, { "model": 0, "resname": "UNK", "style": "stick", "color": "greenCarbon", }, { "model": 0, "resname": "LIG", "style": "stick", "color": "greenCarbon", }, { "model": 1, "style": "stick", "color": "greenCarbon", }, ] smiles = molecule2d(input_ligand) out = Molecule3D(reps=reps) run_time = gr.Textbox(label="Runtime") btn.click( predict, inputs=[input_sequence, input_ligand, input_msa, input_protein, max_num_poses], outputs=[out, run_time], ) app.launch() with gr.Blocks() as app: with gr.Tab("🧬 Vina"): gr.Markdown( "Example model using Vina to dock the ligand with the pocket center defined by the centroid of the input protein." ) with gr.Row(): input_sequence = gr.Textbox(lines=3, label="Input Protein sequence (FASTA)") input_ligand = gr.Textbox(lines=3, label="Input ligand SMILES") input_msa = gr.File(label="Input MSA (a3m)") input_protein = gr.File(label="Input protein monomer (CIF)") max_num_poses = gr.Slider( 1, 10, value=1, label="Max number of poses to generate" ) btn = gr.Button("Run Inference") gr.Examples( [ [ "QECTKFKVSSCRECIESGPGCTWCQKLNFTGPGDPDSIRCDTRPQLLMRGCAADDIMDPTSLAETQEDHNGGQKQLSPQKVTLYLRPGQAAAFNVTFRRAKGYPIDLYYLMDLSYSMLDDLRNVKKLGGDLLRALNEITESGRIGFGSFVDKTVLPFVNTHPDKLRNPCPNKEKECQPPFAFRHVLKLTDNSNQFQTEVGKQLISGNLDAPEGGLDAMMQVAACPEEIGWRKVTRLLVFATDDGFHFAGDGKLGAILTPNDGRCHLEDNLYKRSNEFDYPSVGQLAHKLAENNIQPIFAVTSRMVKTYEKLTEIIPKSAVGELSEDSSNVVQLIKNAYNKLSSRVFLDHNALPDTLKVTYDSFCSNGVTHRNQPRGDCDGVQINVPITFQVKVTATECIQEQSFVIRALGFTDIVTVQVLPQCECRCRDQSRDRSLCHGKGFLECGICRCDTGYIGKNCECQTQGRSSQELEGSCRKDNNSIICSGLGDCVCGQCLCHTSDVPGKLIYGQYCECDTINCERYNGQVCGGPGRGLCFCGKCRCHPGFEGSACQCERTTEGCLNPRRVECSGRGRCRCNVCECHSGYQLPLCQECPGCPSPCGKYISCAECLKFEKGPFGKNCSAACPGLQLSNNPVKGRTCKERDSEGCWVAYTLEQQDGMDRYLIYVDESRECCGGPAALQTLFQG", "CC(=O)N[C@H]1[C@H](O[C@H]2[C@H](O)[C@@H](NC(C)=O)CO[C@@H]2CO)O[C@H](CO)[C@@H](O)[C@@H]1O", None, "input_test.cif", ], ], [input_sequence, input_ligand, input_msa, input_protein], ) reps = [ { "model": 0, "style": "cartoon", "color": "whiteCarbon", }, { "model": 0, "resname": "UNK", "style": "stick", "color": "greenCarbon", }, { "model": 0, "resname": "LIG", "style": "stick", "color": "greenCarbon", }, { "model": 1, "style": "stick", "color": "greenCarbon", }, ] smiles = molecule2d(input_ligand) out = Molecule3D(reps=reps) run_time = gr.Textbox(label="Runtime") btn.click( predict, inputs=[ input_sequence, input_ligand, input_msa, input_protein, max_num_poses, ], outputs=[out, run_time], ) with gr.Tab("⚖️ PLINDER evaluation template"): with gr.Row(): with gr.Column(): input_system_id = gr.Textbox(label="PLINDER system ID") input_receptor_file = gr.File(label="Receptor file (CIF)") input_ligand_file = gr.File(label="Ligand file (SDF)") eval_btn = gr.Button("Run Evaluation") gr.Examples( [ [ "4neh__1__1.B__1.H", "input_protein_test.cif", "input_ligand_test.sdf", ], ], [input_system_id, input_receptor_file, input_ligand_file], ) reps = [ { "model": 0, "style": "cartoon", "color": "whiteCarbon", }, { "model": 0, "resname": "UNK", "style": "stick", "color": "greenCarbon", }, { "model": 0, "resname": "LIG", "style": "stick", "color": "greenCarbon", }, { "model": 1, "style": "stick", "color": "greenCarbon", }, ] # pred_native = Molecule3D(reps=reps, config={"backgroundColor": "black"}) eval_run_time = gr.Textbox(label="Evaluation runtime") metric_table = gr.DataFrame( pd.DataFrame([], columns=EVAL_METRICS), label="Evaluation metrics" ) eval_btn.click( evaluate, inputs=[input_system_id, input_receptor_file, input_ligand_file], outputs=[metric_table, eval_run_time], ) app.launch()