Source code for refinegems.utility.connections

#!/usr/bin/env python
"""Provides functions / connections to other tools for easier access and usage."""

__author__ = "Famke Baeuerle und Carolin Brune"

################################################################################
# requirements
################################################################################

import cobra
import json
import memote
import pandas as pd
import shutil
import subprocess
import tempfile
import time
import warnings

from BOFdat import step1
from BOFdat import step2
from BOFdat.util import update
from BOFdat.util.update import determine_coefficients

from importlib.resources import files
from libsbml import Model as libModel
from MCC import MassChargeCuration
from pathlib import Path
from libsbml import readSBML
from sboannotator.SBOannotator import sbo_annotator
from typing import Literal, Union

from memote.support import consistency

# needed by memote.support.consistency
from memote.support import consistency_helpers as con_helpers

from .util import test_biomass_presence, is_stoichiometric_factor
from .io import write_model_to_file

# note:
#    for BOFdat to run correctly, one needs to change 'solution.f' to 'solution.objective_value'
#    in the coenzymes_and_ions.py file of BOFdat
#    -> see forked version of it in the Draeger-lab github

################################################################################
# variables
################################################################################

################################################################################
# functions
################################################################################

# BOFdat
# ------


[docs] def adjust_BOF( genome: str, model_file: str, model: cobra.Model, dna_weight_fraction: float, weight_frac: float, ) -> str: """Adjust the model's BOF using BOFdat. Currently implemented are step 1 DNA coefficients and step 2. Args: - genome (str): Path to the genome (e.g. .fna) FASTA file. - model_file (str): Path to the sbml (.xml) file of the model. - model (cobra.Model): The genome-scale metabolic model (from the string above), loaded with COBRApy. - dna_weight_fraction (float): DNA weight fraction for BOF step 1. - weight_frac (float): Weight fraction for the second step of BOFdat (enzymes and ions) Returns: str: The updated BOF reaction as a reaction string. """ # BOFdat step 1: # -------------- # dna coefficients dna_coefficients = step1.generate_dna_coefficients( genome, model_file, DNA_WEIGHT_FRACTION=dna_weight_fraction ) bd_step1 = {} for m in dna_coefficients: bd_step1[m.id] = dna_coefficients[m] # BOFdat step 2: # -------------- # find inorganic ions selected_metabolites = step2.find_coenzymes_and_ions(model_file) # determine coefficients bd_step2 = determine_coefficients(selected_metabolites, model, weight_frac) bd_step2.update(bd_step1) # update BOF # ---------- # retrieve previously used BOF growth_func_list = test_biomass_presence(model) if len(growth_func_list) == 1: objective_list = model.reactions.get_by_id(growth_func_list[0]).reaction.split( " " ) elif len(growth_func_list) > 1: mes = f"Multiple BOFs found. Using {growth_func_list[0]} for BOF adjustment." warnings.warn(mes, category=UserWarning) objective_list = model.reactions.get_by_id(growth_func_list[0]).reaction.split( " " ) # else not needed, as if there is no BOF, a new one will be created # ............................................................... objective_reactant = {} objective_product = {} product = False # get reactants, product and factors from equation for s in objective_list: if s == "+": continue elif ">" in s: product = True elif is_stoichiometric_factor(s): factor = s else: if product: objective_product[s] = factor else: objective_reactant[s] = factor # update BOF information with data from BOFdat for m in bd_step2: if bd_step2[m] < 0: objective_reactant[m] = bd_step2[m] * (-1) else: objective_product[m] = bd_step2[m] # create new objective function new_objective = " + ".join( "{} {}".format(value, key) for key, value in objective_reactant.items() ) new_objective += " --> " new_objective += " + ".join( "{} {}".format(value, key) for key, value in objective_product.items() ) return new_objective
# DIAMOND # -------
[docs] def run_DIAMOND_blastp( fasta: str, db: str, sensitivity: Literal[ "sensitive", "more-sensitive", "very-sensitive", "ultra-sensitive" ] = "more-sensitive", coverage: float = 95.0, threads: int = 2, outdir: str = None, outname: str = "DIAMOND_blastp_res.tsv", ) -> str: """Run DIAMOND in BLASTp mode. Args: - fasta (str): The FASTA file to BLAST for. - db (str): The DIAMOND database file to BLAST against - sensitivity (Literal['sensitive', 'more-sensitive', 'very-sensitive','ultra-sensitive'], optional): Sensitivity mode for DIAMOND. Defaults to 'more-sensitive'. - coverage (float, optional): A parameter for DIAMOND Coverage theshold for the hits. Defaults to 95.0. - threads (int, optional): A parameter for DIAMOND. Number of threds to be used while BLASTing. Defaults to 2. - outdir (str, optional): Path to a directory to write the output files to. Defaults to None. - outname (str, optional): Name of the result file (name only, not a path). Defaults to 'DIAMOND_blastp_res.tsv'. Returns: str: Path to the results of the DIAMOND BLASTp run. """ if outdir: outname = str(Path(outdir, "DIAMOND_blastp_res.tsv")) logfile = Path(outdir, "log_DIAMOND_blastp.txt") else: outname = str(Path(outname)) logfile = Path("log_DIAMOND_blastp.txt") completed_blast = subprocess.run( [ "diamond", "blastp", "-d", db, "-q", fasta, "--" + sensitivity, "--query-cover", str(coverage), "-p", str(threads), "-o", outname, "--outfmt", str(6), "qseqid", "sseqid", "pident", "length", "mismatch", "gapopen", "qstart", "qend", "sstart", "send", "evalue", "bitscore", ], shell=False, stderr=subprocess.PIPE, text=True, ) with open(logfile, "a") as f: f.write(completed_blast.stderr) return outname
[docs] def filter_DIAMOND_blastp_results( blasttsv: str, pid_theshold: float = 90.0 ) -> pd.DataFrame: """Filter the results of a DIAMOND BLASTp run (see :py:func:`~refinegems.utility.connections.run_DIAMOND_blastp`) by percentage identity value (PID) and extract the matching pairs of query and subject IDs. Args: - blasttsv (str): Path to the DIAMOND BLASTp result file. - pid_theshold (float, optional): Threshold value for the PID. Given in percent. Defaults to 90.0. Raises: - ValueError: PID threshold has to be between 0.0 and 100.0 Returns: pd.DataFrame: A table with the columns query_ID and subject_ID containing hits from BLAST run with s PID higher than the given threshold value. """ if pid_theshold > 100.0 or pid_theshold < 0.0: raise ValueError("PID threshold has to be between 0.0 and 100.0") # load diamond results diamond_results = pd.read_csv(blasttsv, sep="\t", header=None) diamond_results.columns = [ "query_ID", "subject_ID", "PID", "align_len", "no_mismatch", "no_gapopen", "query_start", "query_end", "subject_start", "subject_end", "E-value", "bitscore", ] # filter by PID diamond_results = diamond_results[diamond_results["PID"] >= pid_theshold] # trim cols diamond_results = diamond_results[["query_ID", "subject_ID"]] return diamond_results
# MCC - MassChargeCuration # ------------------------
[docs] def perform_mcc(model: cobra.Model, dir: str, apply: bool = True) -> cobra.Model: """Run the MassChargeCuration toll on the model and optionally directly apply the solution. Args: - model (cobra.Model): The model to use the tool on. - dir (str): Path of the directory to save MCC output in. - apply (bool, optional): If True, model is directly updated with the results. Defaults to True. Returns: cobra.Model: The model (updated or not) """ # make temporary directory to save files for MCC in with tempfile.TemporaryDirectory() as temp: # use MCC if apply: # update model balancer = MassChargeCuration(model, update_ids=False, data_path=temp) else: # do not change original model model_copy = model.copy() balancer = MassChargeCuration(model_copy, update_ids=False, data_path=temp) # save reports balancer.generate_reaction_report(Path(dir, model.id + "_mcc_reactions")) balancer.generate_metabolite_report(Path(dir, model.id + "_mcc_metabolites")) balancer.generate_visual_report(Path(dir, model.id + "_mcc_visual")) return model
# Memote # -------
[docs] def run_memote( model: cobra.Model, type: Literal["json", "html"] = "html", return_res: bool = False, save_res: Union[str, None] = None, verbose: bool = False, ) -> Union[dict, str, None]: """Run the memote snapshot function on a given model loaded with COBRApy. Args: - model (cobra.Model): The model loaded with COBRApy. - type (Literal['json','html'], optional): Type of report to produce. Can be 'html' or 'json'. Defaults to 'html'. - return_res (bool, optional): Option to return the result. Defaults to False. - save_res (str | None, optional): If given a path string, saves the report under the given path. Defaults to None. - verbose (bool, optional): Produce a more verbose ouput. Defaults to False. Raises: - ValueError: Unknown input for parameter type Returns: (1) Case ``return_res = True`` and ``type = json``: dict: The json dictionary. (2) Case ``return_res = True`` and ``type = html``: str: The html string. (3) Case ``return_res = False``: None: no return """ # verbose output I if verbose: print("\n# -------------------\n# Analyse with MEMOTE\n# -------------------") start = time.time() # run memote ret, res = memote.suite.api.test_model( model, sbml_version=None, results=True, pytest_args=None, exclusive=None, skip=None, experimental=None, solver_timeout=10, ) # load depending on type match type: case "html": snap = memote.suite.api.snapshot_report(res, html=True) result = snap case "json": snap = memote.suite.api.snapshot_report(res, html=False) result = json.loads(snap) case _: message = f"Unknown input for parameter type: {type} " raise ValueError(message) # option to save report if save_res: with open(save_res, "w", encoding="utf-8") as f: f.write(result) # verbose output II if verbose: end = time.time() print(f"\ttotal time: {end - start}s") # option to return report if return_res: return result
[docs] def get_memote_score(memote_report: dict) -> float: """Extracts MEMOTE score from report Args: - memote_report (dict): Output from :py:func:`~refinegems.utility.connections.run_memote`. Returns: float: MEMOTE score """ return memote_report["score"]["total_score"]
# SBOannotator # ------------
[docs] def run_SBOannotator(model: libModel) -> libModel: """Run SBOannotator on a model to annotate the SBO terms. Args: - model (libModel): The model loaded with libSBML. Returns: libModel: The model with corrected / added SBO terms. """ dbs_scheme = files("sboannotator").joinpath("create_dbs.sql") with tempfile.TemporaryDirectory() as tempdir: write_model_to_file(model, str(Path(tempdir, "tempmodel.xml"))) # run SBOannotator doc = readSBML(str(Path(tempdir, "tempmodel.xml"))) model = doc.getModel() copy_scheme = shutil.copy(dbs_scheme, Path(tempdir, "dbs.sql")) model = sbo_annotator( doc, model, "constrained-based", str(Path(tempdir, "dbs")), str(Path(tempdir, "dud.xml")), ) return model