Source code for refinegems.analysis.core_pan

"""Handling, creating and working with pan-core models."""

__author__ = "Carolin Brune"

################################################################################
# requirements
################################################################################

import cobra

from typing import Literal

from ..utility.io import load_model
from ..classes.reports import CorePanAnalysisReport
from ..utility.entities import resolve_compartment_names

################################################################################
# functions
################################################################################

# core-pan modelling
# ------------------



[docs]
def extract_reactions_ids(
    model: cobra.Model, based_on: Literal["id"] = "id"
) -> list[str]:
    """Extract reactions identifiers from a model.

    Based on:

    - id: extracts the actual IDs as set in the model.

    Args:
        - model (cobra.Model):
            The model to extract the IDs from.
            Loaded with COBRApy.
        - based_on (Literal['id'], optional):
            How and which IDs to extract. Defaults to 'id'.

    Raises:
        - ValueError: Unknown input for parameter based_on if not in given options.

    Returns:
        list[str]:
            List of extracted IDs in the given format.
    """

    match based_on:
        case "id":
            return [_.id for _ in model.reactions]
        case _:
            raise ValueError(f"Unknown input for parameter based_on: {based_on}")




[docs]
def find_core_reaction_ids(all_reactions: dict[str : list[str]]) -> list[str]:
    """Helper function for :py:func:`~refinegems.analysis.core_pan.generate_core_pan_model`.
    Identify the core reactions from a set of reactions from different models.
    Core reactions are reactions that occur in ALL the models.

    Args:
        - all_reactions (dict[str:list[str]]):
            List of reactions IDs for all model to be part of the core-pan model.

    Returns:
        list[str]:
            List of the IDs of reactions that are defined as core.
    """

    core = []
    first = True
    for reacs in all_reactions.values():
        if first:
            core = set(reacs)
            first = False
        else:
            core = core.intersection(set(reacs))

    return core




[docs]
def find_pan_reactions(
    all_reactions: dict[str : list[str]], core: list[str]
) -> list[str]:
    """Helper function for :py:func:`~refinegems.analysis.core_pan.generate_core_pan_model`. Identify the pan reactions
    for a set of reactions of different model. Pan reactions are reactions, that are found
    in AT LEAST one model but NOT in all.

    Args:
        - all_reactions (dict[str:list[str]):
            List of reactions IDs for all model to be part of the core-pan model.
        - core (list[str]):
            List of core reaction IDs, output of :py:func:`~refinegems.analysis.core_pan.find_core_reaction_ids`.

    Returns:
        list[str]:
            List of pan reaction IDs.
    """

    pan = {}
    for model, reacs in all_reactions.items():
        pan[model] = set([_ for _ in reacs if _ not in core])

    return pan




[docs]
def collect_reacs_from_model(
    model: cobra.Model,
    reac_id_list: list[str],
    based_on: Literal["id"] = "id",
    notes: tuple[str] = ("core-pan", "core"),
) -> list[cobra.Reaction]:
    """Based on a model and a list of reactions IDs, collects the corresponding reactions.

    Args:
        - model (cobra.Model):
            The model.
        - reac_id_list (list[str]):
            List of reactions IDs. are treated as actual cobra ID or not depending on 'based_on'.
        - based_on (Literal['id'], optional):
            Defines, if the IDs are to be treated literal ('id') or not.
            Defaults to 'id'.
        - notes (tuple, optional):
            What kind of reactions have been collected. Expects a tuple of two strings.
            Uses the tuple to create a notes entry in the reaction object.
            Defaults to ('core-pan','core').

    Raises:
        - ValueError: Unknown input for parameter based_on.

    Returns:
        list[cobra.Reaction]:
            List of the extracted reactions.
    """

    match based_on:
        case "id":

            reac_list = []
            for id in reac_id_list:
                new_reac = model.reactions.get_by_id(id)
                new_reac.notes[notes[0]] = notes[1]
                reac_list.append(new_reac)

            return reac_list

        case _:
            raise ValueError(f"Unknown input for parameter based_on: {based_on}")




[docs]
def generate_core_pan_model(
    model_list: list[str],
    based_on: Literal["id"] = "id",
    name: str = "core_pan_model",
    remove_genes: bool = True,
) -> cobra.Model:
    """Generate a core-pan model from a set of models.

    Generation id based on:

        - id: uses the IDs to compare reactions

    Args:
        - model_list (list[str]):
            List of paths to models.
        - based_on (Literal['id'], optional):
            How to decide which reactions are considered the same.
            Defaults to 'id'.
        - name (str, optional):
            Name of the new model.
            Defaults to 'core_pan_model'.
        - remove_genes (bool, optional):
            Flag to remove all genes from the model.
            Defaults to True.

    Returns:
        cobra.Model:
            The generated core-pan model.
    """

    # load all models
    all_models = load_model(model_list, "cobra")

    # resolve compartment issue
    for model in all_models:
        resolve_compartment_names(model)

    # extract reactions
    all_reactions = {
        model.id: extract_reactions_ids(model, based_on) for model in all_models
    }

    # define core-pan
    core = find_core_reaction_ids(all_reactions)
    pan = find_pan_reactions(all_reactions, core)

    # extract corresponding reactions from input models
    core_reacs = collect_reacs_from_model(
        all_models[0], core, based_on, notes=("core-pan", "core")
    )
    pan_reacs = []
    collected = []
    for model, reacs in pan.items():
        to_add = [_ for _ in reacs if _ not in collected]
        current_model = [_ for _ in all_models if _.id == model][0]
        new_reacs = collect_reacs_from_model(
            current_model, to_add, based_on, notes=("core-pan", "pan")
        )
        pan_reacs.extend(new_reacs)
        collected.extend(to_add)

    # construct model
    cp_model = cobra.Model(name)
    cp_model.add_reactions(core_reacs)
    cp_model.add_reactions(pan_reacs)

    # step 4: remove genes (optional)
    if remove_genes:
        cobra.manipulation.delete.remove_genes(
            cp_model, cp_model.genes, remove_reactions=False
        )

    return cp_model



# core-pan comparison
# -------------------


[docs]
def compare_to_core_pan(
    model: cobra.Model, cp_model: cobra.Model, based_on: Literal["id"] = "id"
) -> CorePanAnalysisReport:
    """Compare a model to a pan-core model.

    Comparison can be done based on:

        - id: uses the reaction IDs for a simple and direct comparison.
        
        .. note:: 
            Currently, this requires the model reactions to be annotated with 'core-pan' notes.
            This function however, is object to change and will be extended in the future.

    Args:
        - model (cobra.Model):
            The input model.
        - cp_model (cobra.Model):
            The core-pan model
        - based_on (Literal['id'], optional):
            How to perform the comparison.
            Defaults to 'id'.

    Raises:
        - ValueError: Unknown input for parameter based_on.

    Returns:
        CorePanAnalysisReport:
            The analysis results in form of a report object.
    """

    results = CorePanAnalysisReport(model)

    match based_on:
        # compare models solely based on their reactions IDs
        case "id":

            # separate cp_model reactions into core and pan list
            core_reac_list = [
                _.id for _ in cp_model.reactions if "core-pan" in _.notes and _.notes["core-pan"] == "core"
            ]
            pan_reac_list = [
                _.id for _ in cp_model.reactions if "core-pan" in _.notes and _.notes["core-pan"] == "pan"
            ]

            # compare model to the core and pan reaction list
            results.core_reac = [
                _.id for _ in model.reactions if _.id in core_reac_list
            ]
            results.pan_reac = [_.id for _ in model.reactions if _.id in pan_reac_list]
            results.novel_reac = [
                _.id
                for _ in model.reactions
                if _.id not in results.pan_reac and _.id not in results.core_reac
            ]

        case _:
            raise ValueError(f"Unknown input for parameter based_on: {based_on}")

    return results