API

ADToolbox has its own way of figuring out the path to the required files and configurations required for running different methods. The entire toolbox relies on the configs module. Objects of different classes in ADToolbox are instantiated by an instance of the corresponding class in the configs module. For instance, if you want to use the methods of the metagenomics class in core module, you should do the following:

from adtoolbox import configs and core

metag_conf=configs.Metagenomics() 
metag_object=core.Metagenomics(metag_conf)

Doing this will result in that any core.Metagenomics method will refer to the defult configurations defined in the configs module. If you want to overwright the defult configuration, you can pass the desired argument to the configs.Metagenomics constructor. For example, if you want to change the docker repository for VSEARCH you can:


metag_conf=configs.Metagenomics(vsearch_docker="mydocker") 
metag_object=core.Metagenomics(metag_conf)

Now when you execute the corresponding method in core.Metagenomics it will use mydocker instead of the defult. To learn more about defult configs, go to the configs api.

configs

You can access this module by:

from adtoolbox import configs

This module contains configurations that are required by other classes in the package and also links to remote databases. The following classes are included in this module:

1. Database

An instance of this class will hold all the configuration information for core.Database functionalities.

Source code in adtoolbox/configs.py

class Database:
	"An instance of this class will hold all the configuration information for core.Database functionalities."

	def __init__(self,
		compound_db:str=Seed_COMPOUNDS_DB,
		reaction_db:str=Seed_RXN_DB,
		local_compound_db:str=os.path.join(Main_Dir, "Database", 'Local_compounds.json'),
		local_reaction_db:str=os.path.join(Main_Dir, "Database", 'Local_reactions.json'),
		csv_reaction_db:str=os.path.join(Main_Dir, "Database", 'Reaction_Metadata.csv'),
		feed_db=os.path.join(Main_Dir, "Database", 'feed_db.tsv'),
		amplicon_to_genome_db=os.path.join(Main_Dir,'Database','Amplicon2GenomeDBs'),
		cazy_links:str=EXTERNAL_LINKS["cazy_links"],
		amplicon_to_genome_urls:dict=EXTERNAL_LINKS["amplicon2genome"],
		adm_parameters_urls:dict=E_ADM_REMOTE,
		adm_parameters:dict=E_ADM_LOCAL,
		seed_rxn_url:str =EXTERNAL_LINKS["seed_rxn_url"],
		seed_compound_url:str =EXTERNAL_LINKS["seed_compound_url"],
		protein_db_url:str =INTERNAL_LINKS["protein_db_url"],
		adtoolbox_rxn_db_url:str =INTERNAL_LINKS["adtoolbox_rxn_db_url"],
		feed_db_url:str =INTERNAL_LINKS["feed_db_url"],
		qiime_classifier_db:str=os.path.join(Main_Dir, "Database","qiime2_classifier_db" ,'qiime2_classifier_db.qza'),
		qiime_classifier_db_url:str=INTERNAL_LINKS["qiime_classifier_db_url"],
  		adtoolbox_singularity=ADTOOLBOX_CONTAINERS["singularity_x86"],
		adtoolbox_docker=ADTOOLBOX_CONTAINERS["docker_x86"],
    	protein_db=os.path.join(Main_Dir, "Database", 'Protein_DB.fasta'),
		adm_microbial_groups_mapping=E_ADM_MICROBIAL_GROUPS_MAPPING,
        metagenomics_studies_db=os.path.join(Main_Dir,"Database","Studies","metagenomics_studies.tsv"),
        experimental_data_db=os.path.join(Main_Dir,"Database","Studies","experimental_data_references.json"),
        studies_remote=STUDIES_REMOTE,
        studies_local=STUDIES_LOCAL,
        check_sanity:bool=False
		):
		self.compound_db = compound_db
		self.reaction_db = reaction_db
		self.local_compound_db = local_compound_db
		self.local_reaction_db = local_reaction_db
		self.csv_reaction_db = csv_reaction_db
		self.feed_db = feed_db
		self.amplicon_to_genome_db = amplicon_to_genome_db
		self.cazy_links = cazy_links
		self.amplicon_to_genome_urls = amplicon_to_genome_urls
		self.adm_parameters_urls = adm_parameters_urls
		self.adm_parameters = adm_parameters
		self.seed_rxn_url = seed_rxn_url
		self.seed_compound_url = seed_compound_url
		self.protein_db_url = protein_db_url
		self.adtoolbox_rxn_db_url = adtoolbox_rxn_db_url
		self.feed_db_url = feed_db_url
		self.qiime_classifier_db = qiime_classifier_db
		self.qiime_classifier_db_url = qiime_classifier_db_url
		self.adtoolbox_singularity=adtoolbox_singularity
		self.adtoolbox_docker=adtoolbox_docker
		self.protein_db=protein_db
		self.adm_microbial_groups_mapping=adm_microbial_groups_mapping
		self.metagenomics_studies_db=metagenomics_studies_db
		self.experimental_data_db=experimental_data_db
		self.studies_remote=studies_remote
		self.studies_local=studies_local
		self.protein_db_mmseqs=pathlib.Path(protein_db).parent.joinpath("protein_db_mmseqs")
		if check_sanity:
			self.check_adm_parameters()

	def check_adm_parameters(self):
		branches=all([pathlib.Path(x).parent==pathlib.Path(self.adm_parameters["model_parameters"]).parent for x in self.adm_parameters.values()])
		if not branches:
			warnings.warn(f"The ADM parameters are not in the same directory!")

2. Metagenomics

An instance of this class will hold all the configuration information for core.Metagenomics functionalities.

Source code in adtoolbox/configs.py

class Metagenomics:
	"""	
	An instance of this class will hold all the configuration information for core.Metagenomics functionalities.
	"""
	### Here we have some class variables that are used in the class
	gtdb_dir="ssu_all_*.fna"
	def __init__(self, 
            amplicon2genome_k=10,
            vsearch_similarity=0.97,
            genomes_base_dir=os.path.join(Main_Dir,"Genomes"),
            align_to_gtdb_outputs_dir=os.path.join(Main_Dir,"Genomes"),
            amplicon2genome_db=Database().amplicon_to_genome_db,
            qiime_outputs_dir=os.path.join(Main_Dir,'Metagenomics_Data','QIIME_Outputs'),
			genome_alignment_script=os.path.join(Main_Dir,"Metagenomics_Data","QIIME_Outputs","genome_alignment_script.sh"),
			vsearch_threads:int=4,
			rsync_download_dir=os.path.join(Main_Dir,"Genomes","rsync_download.sh"),
			adtoolbox_singularity=ADTOOLBOX_CONTAINERS["singularity_x86"],
			adtoolbox_docker=ADTOOLBOX_CONTAINERS["docker_x86"],
            genome_alignment_output=os.path.join(Main_Dir,"Outputs"),
            csv_reaction_db=Database().csv_reaction_db,
            sra=os.path.join(Main_Dir,"Metagenomics_Analysis","SRA"),
            bit_score=40,
            e_value=10**-5,
            qiime2_docker_image="quay.io/qiime2/core:2022.2",
            qiime2_singularity_image="docker://quay.io/qiime2/core:2022.2",
            qiime2_paired_end_bash_str=os.path.join(PKG_DATA,"qiime_template_paired.txt"),
            qiime2_single_end_bash_str=os.path.join(PKG_DATA,"qiime_template_single.txt"),
			qiime_classifier_db=Database().qiime_classifier_db,
			adm_mapping=Database().adm_microbial_groups_mapping,
             ):
		self.k = amplicon2genome_k
		self.vsearch_similarity = vsearch_similarity
		self.align_to_gtdb_outputs_dir = align_to_gtdb_outputs_dir
		self.amplicon2genome_db = amplicon2genome_db
		self.qiime_outputs_dir = qiime_outputs_dir
		self.protein_db=Database().protein_db
		self.protein_db_mmseqs=Database().protein_db_mmseqs
		self.seed_rxn_db=Seed_RXN_DB
		self.genome_alignment_output = genome_alignment_output
		self.bit_score = bit_score
		self.e_value = e_value
		self.vsearch_threads=vsearch_threads
		self.csv_reaction_db=csv_reaction_db
		self.sra=sra
		self.qiime2_singularity_image=qiime2_singularity_image
		self.qiime2_docker_image=qiime2_docker_image
		self.qiime2_paired_end_bash_str=qiime2_paired_end_bash_str
		self.qiime2_single_end_bash_str=qiime2_single_end_bash_str 
		self.qiime_classifier_db=qiime_classifier_db
		if list(pathlib.Path(self.amplicon2genome_db).rglob(Metagenomics.gtdb_dir)):
			self.gtdb_dir_fasta=str(list(pathlib.Path(self.amplicon2genome_db).rglob(Metagenomics.gtdb_dir))[0])
		else:
			self.gtdb_dir_fasta=None
		self.genome_alignment_script=genome_alignment_script	
		self.adtoolbox_singularity=adtoolbox_singularity
		self.adtoolbox_docker=adtoolbox_docker
		self.rsync_download_dir=rsync_download_dir
		self.genomes_base_dir=genomes_base_dir
		self.adm_mapping=adm_mapping

3. Utils

An instance of this class will hold all the configuration information for utils module functionalities.

Source code in adtoolbox/configs.py

class Utils:
	"""
	An instance of this class will hold all the configuration information for utils module functionalities."""
	def __init__(self,
	slurm_template:str=os.path.join(PKG_DATA,"slurm_template.txt"),
	docker_template_qiime:str=None,
	singularity_template_qiime:str=None,
	slurm_executer:str='',
	slurm_wall_time:str='24:00:00',
	slurm_job_name:str='ADToolbox',
	slurm_outlog:str='ADToolbox.log',
    slurm_cpus:str="12",
	slurm_memory:str="100G",
	slurm_save_dir:str=os.getcwd(),
	adtoolbox_singularity:str=ADTOOLBOX_CONTAINERS["singularity_x86"],
	adtoolbox_docker:str=ADTOOLBOX_CONTAINERS["docker_x86"]
	) -> None:
		self.slurm_template = slurm_template
		self.docker_template_qiime = docker_template_qiime
		self.singularity_template_qiime = singularity_template_qiime
		self.slurm_executer = slurm_executer
		self.slurm_wall_time = slurm_wall_time
		self.slurm_job_name = slurm_job_name
		self.slurm_outlog=slurm_outlog
		self.slurm_cpus = slurm_cpus
		self.slurm_save_dir = slurm_save_dir
		self.slurm_memory = slurm_memory
		self.adtoolbox_singularity=adtoolbox_singularity
		self.adtoolbox_docker=adtoolbox_docker

core

You can access this module by:

from adtoolbox import core

This module includes the following classes:

1. Experiment

This class creates an interface for the experimental data to be used in different places in ADToolbox. First you should give each experiment a name. Time must be a list of time points in days, and there must be a time 0 point assinged to each experiment. variables must be a list of integers that represent the variables that are the index of the ADM species that we have concentration data for. data must be a list of lists. Each list in the list must be a list of concentrations for each species at each time point. IMPORTANT: The order of the species in the data list must match the order of the species in the variables list. if there are specific initial concentrations for the ADM species, they can be passed as a dictionary to the initial_concentrations argument. reference is an optional argument that can be used to provide a reference for the experimental data. If using the database module to query for Experiment objects you can query by name or reference or model_type. So, having a descriptive reference can be useful for querying as well. default model name is "e_adm". This can be changed by passing a different model name to the model_name argument. This also helps with querying.

Parameters:

Name	Type	Description	Default
`name`	`str`	A unique name for the experiment.	required
`time`	`list`	A list of time points in days.	required
`variables`	`list`	A list of integers that represent the variables that are the index of the ADM species that we have concentration data for.	required
`data`	`list`	A list of lists. Each list in the list must be a list of concentrations for each species at each time point.	required
`initial_concentrations`	`dict`	A dictionary of initial concentrations for the ADM species. Defaults to {}.	`dataclasses.field(default_factory=dict)`
`reference`	`str`	A reference for the experimental data. Defaults to ''.	`''`
`model_name`	`str`	The name of the model that the experimental data is for. Defaults to "e_adm".	`'e_adm'`

Examples:

>>> from adtoolbox import configs
>>> import json
>>> with open(configs.Database().species,"r") as f:
...     species=json.load(f)
>>> S_su_index=species.index("S_su")
>>> S_aa_index=species.index("S_aa")
>>> exp=Experiment(name="Test",time=[0,1,2],variables=[S_su_index,S_aa_index],data=[[1,2,3],[4,5,6]],reference="Test reference")

Source code in adtoolbox/core.py

@dataclass
class Experiment:
    """
    This class creates an interface for the experimental data to be used in different places in ADToolbox.
    First you should give each experiment a name. Time must be a list of time points in days, and there must be a time 0 point assinged to each experiment.
    variables must be a list of integers that represent the variables that are the index of the ADM species that we have concentration data for.
    data must be a list of lists. Each list in the list must be a list of concentrations for each species at each time point.
    IMPORTANT: The order of the species in the data list must match the order of the species in the variables list.
    if there are specific initial concentrations for the ADM species, they can be passed as a dictionary to the initial_concentrations argument.
    reference is an optional argument that can be used to provide a reference for the experimental data. If using the database module 
    to query for Experiment objects you can query by name or reference or model_type. So, having a descriptive reference can be useful for querying as well.
    default model name is "e_adm". This can be changed by passing a different model name to the model_name argument. This also helps with querying.

    Args:
        name (str): A unique name for the experiment.
        time (list): A list of time points in days.
        variables (list): A list of integers that represent the variables that are the index of the ADM species that we have concentration data for.
        data (list): A list of lists. Each list in the list must be a list of concentrations for each species at each time point.
        initial_concentrations (dict, optional): A dictionary of initial concentrations for the ADM species. Defaults to {}.
        reference (str, optional): A reference for the experimental data. Defaults to ''.
        model_name (str, optional): The name of the model that the experimental data is for. Defaults to "e_adm".

    Examples:
        >>> from adtoolbox import configs
        >>> import json
        >>> with open(configs.Database().species,"r") as f:
        ...     species=json.load(f)
        >>> S_su_index=species.index("S_su")
        >>> S_aa_index=species.index("S_aa")
        >>> exp=Experiment(name="Test",time=[0,1,2],variables=[S_su_index,S_aa_index],data=[[1,2,3],[4,5,6]],reference="Test reference")

    """
    name:str
    time: list[float]
    variables: list[int]
    data: list[list[float]]
    initial_concentrations: dict[str,float] = dataclasses.field(default_factory=dict)
    reference: str = ""
    model_name: str = "e_adm"


    def __post_init__(self):
        self.data=np.array(self.data).T
        self.validate()

    def validate(self):
        assert len(self.time)==self.data.shape[0], "Number of time points must match number of rows in data."
        assert len(self.variables)==self.data.shape[1] , "Number of variables must match number of columns in data."
        assert self.time[0]==0, "Time must start at 0."
        return "successful"

    def to_dict(self):
        return {"name":self.name,
                "time":self.time,
                "variables":self.variables,
                "data":self.data.T.tolist(),
                "initial_concentrations":self.initial_concentrations,
                "reference":self.reference,
                "model_name":self.model_name}

2. Feed

The Feed class is used to store the feed information, and later use it in the e_adm model. all the entered numbers must in percentages. Carbohudrates, lipids, and proteins and si must sum up to 100, and they form the total dissolved solids. Carbohydrates, lipids, proteins, and xi must sum up to 100, and they form the total suspended solids.

IMPORTANT: It is assumed that lipid, proteins and carbohydrates have the same fraction in soluble and insoluble fractions.

Parameters:

Name	Type	Description	Default
`name`	`str`	A unique name for the feed.	required
`carbohydrates`	`float`	percentage of carbohydrates in the feed.	required
`lipids`	`float`	percentage of lipids in the feed.	required
`proteins`	`float`	percentage of proteins in the feed.	required
`tss`	`float`	percentage of total COD in the form of suspended solids.	required
`si`	`float`	percentage of percentage of soluble inorganics in the TDS.	required
`xi`	`float`	percentage of percentage of insoluble inorganics in the TSS.	required
`reference`	`str`	A reference for the feed data. Defaults to ''.	`''`

Examples:

>>> feed=Feed(name="Test",carbohydrates=20,lipids=20,proteins=20,si=20,xi=20,tss=70)
>>> assert feed.ch_tss==feed.lip_tss==feed.prot_tss==feed.xi_tss==0.25

Source code in adtoolbox/core.py

@dataclass
class Feed:

    """
    The Feed class is used to store the feed information, and later use it in the e_adm model.
    all the entered numbers must in percentages. Carbohudrates, lipids, and proteins and si must sum up to 100, 
    and they form the total dissolved solids. Carbohydrates, lipids, proteins, and xi must sum up to 100, and they form the total suspended solids.

    IMPORTANT: It is assumed that lipid, proteins and carbohydrates have the same fraction in soluble and insoluble fractions.

    Args:
        name (str): A unique name for the feed.
        carbohydrates (float): percentage of carbohydrates in the feed.
        lipids (float): percentage of lipids in the feed.
        proteins (float): percentage of proteins in the feed.
        tss (float): percentage of total COD in the form of suspended solids.
        si (float): percentage of percentage of soluble inorganics in the TDS.
        xi (float): percentage of percentage of insoluble inorganics in the TSS.
        reference (str, optional): A reference for the feed data. Defaults to ''.    

    Examples:
        >>> feed=Feed(name="Test",carbohydrates=20,lipids=20,proteins=20,si=20,xi=20,tss=70)
        >>> assert feed.ch_tss==feed.lip_tss==feed.prot_tss==feed.xi_tss==0.25

    """
    # total_cod:float Transfer to base parameters
    name:str            # A unique name for the feed
    carbohydrates:float # percentage of carbohydrates in the feed
    lipids:float        # percentage of lipids in the feed
    proteins:float      # percentage of proteins in the feed
    tss:float           # percentage of total COD in the form of suspended solids
    si:float            # percentage of percentage of soluble inorganics in the TDS
    xi:float            # percentage of percentage of insoluble inorganics in the TSS
    reference:str=''    # A reference for the feed data

    def __post_init__(self):
        if self.carbohydrates+self.lipids+self.proteins>100:
            raise ValueError("The sum of the percentages must less than 100")
        if self.carbohydrates+self.lipids+self.proteins+self.si<1:
            warn("The sum of lipids, carbohydrates, proteins are suspiciously low! Make sure youhave input the numbers in percentages!")
        li_ch_pr=self.carbohydrates+self.lipids+self.proteins
        without_xi=100-self.xi
        self.ch_tss=self.carbohydrates/li_ch_pr*without_xi/100
        self.lip_tss=self.lipids/li_ch_pr*without_xi/100
        self.prot_tss=self.proteins/li_ch_pr*without_xi/100
        self.xi_tss=self.xi/100
        without_si=100-self.si
        self.ch_tds=self.carbohydrates/li_ch_pr*without_si/100
        self.lip_tds=self.lipids/li_ch_pr*without_si/100
        self.prot_tds=self.proteins/li_ch_pr*without_si/100
        self.si_tds=self.si/100

    def to_dict(self)->dict:
        return {"name":self.name,
                "carbohydrates":self.carbohydrates,
                "lipids":self.lipids,
                "proteins":self.proteins,
                "tss":self.tss,
                "si":self.si,
                "xi":self.xi,
                "reference":self.reference}

3. MetegenomicsStudy

This class is used to communicate between the metagenomics studies database and the ADM model.

Parameters:

Name	Type	Description	Default
`name`	`str`	The name of the metagenomics study. Its okay if it is not unique.	required
`study_type`	`str`	The type of the metagenomics study. It can be "amplicon" or "WGS".	required
`microbiome`	`str`	The microbiome that the metagenomics study is about.	required
`sample_accession`	`str`	The SRA sample accession number of the metagenomics study. This must be unique.	required
`comments`	`str`	Any comments that you want to add to the metagenomics study.	required
`study_accession`	`str`	The SRA study accession number of the metagenomics study.	required

Examples:

>>> study=MetagenomicsStudy(name="Test",study_type="WGS",microbiome="test_microbiome",sample_accession="test_accession",comments="test_comments",study_accession="test_study_accession")
>>> assert study.name=="Test"

Source code in adtoolbox/core.py

@dataclass
class MetagenomicsStudy:
    """
    This class is used to communicate between the metagenomics studies database and the ADM model.

    Args:
        name (str): The name of the metagenomics study. Its okay if it is not unique.
        study_type (str): The type of the metagenomics study. It can be "amplicon" or "WGS".
        microbiome (str): The microbiome that the metagenomics study is about.
        sample_accession (str): The SRA sample accession number of the metagenomics study. This must be unique.
        comments (str): Any comments that you want to add to the metagenomics study.
        study_accession (str): The SRA study accession number of the metagenomics study.   

    Examples:
        >>> study=MetagenomicsStudy(name="Test",study_type="WGS",microbiome="test_microbiome",sample_accession="test_accession",comments="test_comments",study_accession="test_study_accession")
        >>> assert study.name=="Test"

    """
    name:str
    study_type:str
    microbiome:str
    sample_accession:str
    comments:str
    study_accession:str

    def to_dict(self)->dict:
        return {"name":self.name,
                "study_type":self.study_type,
                "microbiome":self.microbiome,
                "sample_accession":self.sample_accession,
                "comments":self.comments,
                "study_accession":self.study_accession}

4. Reaction

This class provides a simple interface between information about biochemical reactions and multiple functionalities of ADToolbox. In order to instantiate a reaction object, you need to pass a dictionary of the reaction information. This dictionary must include 'name','stoichiometry' keys. This follows the format of the seed database. stoichiometry must be formatted like seed database. The seed database format is as follows: stoichiometry: '-1:cpd00079:0:0:"D-glucose-6-phosphate";1:cpd00072:0:0:"D-fructose-6-phosphate"'

Parameters:

Name	Type	Description	Default
`data`	`dict`	A dictionary containing the reaction information. This follows the format of the seed database.	required

Examples:

>>> A={"name":'D-glucose-6-phosphate aldose-ketose-isomerase',"stoichiometry":'-1:cpd00079:0:0:"D-glucose-6-phosphate";1:cpd00072:0:0:"D-fructose-6-phosphate"'}
>>> a=Reaction(A)
>>> print(a)
D-glucose-6-phosphate aldose-ketose-isomerase

Source code in adtoolbox/core.py

class Reaction:
    """
    This class provides a simple interface between information about biochemical reactions and multiple functionalities of ADToolbox.
    In order to instantiate a reaction object, you need to pass a dictionary of the reaction information.
    This dictionary must include 'name','stoichiometry' keys. This follows the format of the seed database.
    stoichiometry must be formatted like seed database. The seed database format is as follows:
    stoichiometry: '-1:cpd00079:0:0:\"D-glucose-6-phosphate\";1:cpd00072:0:0:\"D-fructose-6-phosphate\"'

    Args:
        data (dict): A dictionary containing the reaction information. This follows the format of the seed database.


    Examples:
        >>> A={"name":'D-glucose-6-phosphate aldose-ketose-isomerase',"stoichiometry":'-1:cpd00079:0:0:\"D-glucose-6-phosphate\";1:cpd00072:0:0:\"D-fructose-6-phosphate\"'}
        >>> a=Reaction(A)
        >>> print(a)
        D-glucose-6-phosphate aldose-ketose-isomerase

    """
    def __init__(self, data:dict)->None:
        self.data = data

    def __str__(self)->str:
        return self.data['name']

    @property
    def stoichiometry(self)->dict:
        """
        Returns the stoichiometry of the reaction by the seed id of the compounds as key and the
        stoichiometric coefficient as value.
        Examples:
            >>> A={"name":'D-glucose-6-phosphate aldose-ketose-isomerase',"stoichiometry":'-1:cpd00079:0:0:\"D-glucose-6-phosphate\";1:cpd00072:0:0:\"D-fructose-6-phosphate\"'}
            >>> a=Reaction(A)
            >>> a.stoichiometry=={'cpd00079': -1, 'cpd00072': 1}
            True

        Args:
            self (Reaction): An instance of the Reaction.

        Returns:
            dict: The stoichiometry of the reaction 
        """
        return {compound.split(':')[1]:float(compound.split(':')[0]) for compound in self.data['stoichiometry'].split(';') }

`stoichiometry: dict` `property`

Returns the stoichiometry of the reaction by the seed id of the compounds as key and the stoichiometric coefficient as value.

Examples:

>>> A={"name":'D-glucose-6-phosphate aldose-ketose-isomerase',"stoichiometry":'-1:cpd00079:0:0:"D-glucose-6-phosphate";1:cpd00072:0:0:"D-fructose-6-phosphate"'}
>>> a=Reaction(A)
>>> a.stoichiometry=={'cpd00079': -1, 'cpd00072': 1}
True

Parameters:

Name	Type	Description	Default
`self`	`Reaction`	An instance of the Reaction.	required

Returns:

Name	Type	Description
`dict`	`dict`	The stoichiometry of the reaction

5. Metabolite

This class provides a simple interface between information about metabolites and multiple functionalities of ADToolbox. In order to instantiate a metabolite object, you need to pass a dictionary of the metabolite information. This dictionary must include 'name','mass','formula' keys. This follows the format of the seed database. formula must be formatted like seed database. The seed database format is as follows: formula: 'C6H12O6' Possibly the main advantage of instantiating a metabolite object is that it provides a COD attribute that can be used to convert the concentration of the metabolite from g/l to gCOD/l. This is useful for comparing the experimental data with the model outputs.

Parameters:

Name	Type	Description	Default
`data`	`dict`	A dictionary containing the metabolite information. This follows the format of the seed database.	required

Examples:

>>> A={"name":"methane","mass":16,"formula":"CH4"}
>>> a=Metabolite(A)
>>> print(a)
methane

Source code in adtoolbox/core.py

class Metabolite:
    """
    This class provides a simple interface between information about metabolites and multiple functionalities of ADToolbox.
    In order to instantiate a metabolite object, you need to pass a dictionary of the metabolite information.
    This dictionary must include 'name','mass','formula' keys. This follows the format of the seed database.
    formula must be formatted like seed database. The seed database format is as follows:
    formula: 'C6H12O6'
    Possibly the main advantage of instantiating a metabolite object is that it provides a COD attribute that can be used to convert
    the concentration of the metabolite from g/l to gCOD/l. This is useful for comparing the experimental data with the model outputs.

    Args:
        data (dict): A dictionary containing the metabolite information. This follows the format of the seed database.


    Examples:
        >>> A={"name":"methane","mass":16,"formula":"CH4"}
        >>> a=Metabolite(A)
        >>> print(a)
        methane

    """

    def __init__(self, data):
        self.data = data
        self.cod = self.cod_calc()
        self.mw= self.data.get('mass',None)

    def __str__(self) -> str:
        return self.data['name']

    def cod_calc(self,add_h:float=0,add_c:float=0,add_o:float=0)->float:
        """
        Calculates the conversion rates for g/l -> gCOD/l
        In some cases we would like to add extra atoms for COD calculations
        For example, model seed biochemistry database only uses acetate instead of acetic acid.
        The 1 hydrogen difference changes the COD conversion rate. For this reason we can add extra atoms to the formula
        to calculate the COD conversion rate without changing anything else.

        Args:
            add_h (float): The number of extra hydrogen atoms to add to the formula for COD calculation.
            add_c (float): The number of extra carbon atoms to add to the formula for COD calculation.
            add_o (float): The number of extra oxygen atoms to add to the formula for COD calculation.

        Examples:
            >>> A={"name":"methane","mass":16,"formula":"CH4"}
            >>> a=Metabolite(A)
            >>> a.cod
            4.0

        Args:
            self (Metabolite): An instance of the Metabolite class: Note

        Returns:
            float: COD conversion from g/l to gCOD/l

        """
        if self.data['formula'] and self.data['mass']:
            contents = {}
            atoms = ["H", "C", "O"]
            mw = self.data['mass']+add_h*1+add_c*12+add_o*16
            for atom in atoms:
                if re.search(atom+'\d*', self.data['formula']):
                    if len(re.search(atom+'\d*', self.data['formula']).group()[1:]) == 0:
                        contents[atom] = 1
                    else:
                        contents[atom] = int(
                            re.search(atom+'\d*', self.data['formula']).group()[1:])
                else:
                    contents[atom] = 0
            contents['H']+=add_h
            contents['C']+=add_c
            contents['O']+=add_o
            cod_conv=1/mw*(contents['H']+4*contents['C']-2*contents['O'])/4*32
            return cod_conv

        else:
            return 'None'

`cod_calc(add_h=0, add_c=0, add_o=0)`

Calculates the conversion rates for g/l -> gCOD/l In some cases we would like to add extra atoms for COD calculations For example, model seed biochemistry database only uses acetate instead of acetic acid. The 1 hydrogen difference changes the COD conversion rate. For this reason we can add extra atoms to the formula to calculate the COD conversion rate without changing anything else.

Parameters:

Name	Type	Description	Default
`add_h`	`float`	The number of extra hydrogen atoms to add to the formula for COD calculation.	`0`
`add_c`	`float`	The number of extra carbon atoms to add to the formula for COD calculation.	`0`
`add_o`	`float`	The number of extra oxygen atoms to add to the formula for COD calculation.	`0`

Examples:

>>> A={"name":"methane","mass":16,"formula":"CH4"}
>>> a=Metabolite(A)
>>> a.cod
4.0

Parameters:

Name	Type	Description	Default
`self`	`Metabolite`	An instance of the Metabolite class: Note	required

Returns:

Name	Type	Description
`float`	`float`	COD conversion from g/l to gCOD/l

Source code in adtoolbox/core.py

def cod_calc(self,add_h:float=0,add_c:float=0,add_o:float=0)->float:
    """
    Calculates the conversion rates for g/l -> gCOD/l
    In some cases we would like to add extra atoms for COD calculations
    For example, model seed biochemistry database only uses acetate instead of acetic acid.
    The 1 hydrogen difference changes the COD conversion rate. For this reason we can add extra atoms to the formula
    to calculate the COD conversion rate without changing anything else.

    Args:
        add_h (float): The number of extra hydrogen atoms to add to the formula for COD calculation.
        add_c (float): The number of extra carbon atoms to add to the formula for COD calculation.
        add_o (float): The number of extra oxygen atoms to add to the formula for COD calculation.

    Examples:
        >>> A={"name":"methane","mass":16,"formula":"CH4"}
        >>> a=Metabolite(A)
        >>> a.cod
        4.0

    Args:
        self (Metabolite): An instance of the Metabolite class: Note

    Returns:
        float: COD conversion from g/l to gCOD/l

    """
    if self.data['formula'] and self.data['mass']:
        contents = {}
        atoms = ["H", "C", "O"]
        mw = self.data['mass']+add_h*1+add_c*12+add_o*16
        for atom in atoms:
            if re.search(atom+'\d*', self.data['formula']):
                if len(re.search(atom+'\d*', self.data['formula']).group()[1:]) == 0:
                    contents[atom] = 1
                else:
                    contents[atom] = int(
                        re.search(atom+'\d*', self.data['formula']).group()[1:])
            else:
                contents[atom] = 0
        contents['H']+=add_h
        contents['C']+=add_c
        contents['O']+=add_o
        cod_conv=1/mw*(contents['H']+4*contents['C']-2*contents['O'])/4*32
        return cod_conv

    else:
        return 'None'

6.SeedDB

This class is designed to interact with seed database. The main advantage of using this class is that it can be used to instantiate a reaction and metabolite object, and it provides extra functionalities that rely on information in the seed database. For example, If there is a chemical formula assigned to a metabolite in the seed database, then the informattion about the COD of that metabolite can be computed using the chemical formula.

Parameters:

Name	Type	Description	Default
`config`	`configs.SeedDB`	An instance of the SeedDB class in the configs module. This class contains the information about the seed database.	required

Examples:

>>> seed_db=SeedDB(configs.SeedDB())
>>> assert seed_db.compound_db==configs.SeedDB().compound_db
>>> assert seed_db.reaction_db==configs.SeedDB().reaction_db

Source code in adtoolbox/core.py

class SeedDB:

    """
    This class is designed to interact with seed database. The main advantage of using this class is that it can be used to instantiate
    a reaction and metabolite object, and it provides extra functionalities that rely on information in the seed database. For example, 
    If there is a chemical formula assigned to a metabolite in the seed database, then the informattion about the COD of that metabolite
    can be computed using the chemical formula. 

    Args:
        config (configs.SeedDB): An instance of the SeedDB class in the configs module. This class contains the information about the seed database.

    Examples:
        >>> seed_db=SeedDB(configs.SeedDB())
        >>> assert seed_db.compound_db==configs.SeedDB().compound_db
        >>> assert seed_db.reaction_db==configs.SeedDB().reaction_db

    """

    def __init__(self, config:configs.Database) -> None:

        self.reaction_db = config.reaction_db
        self.compound_db = config.compound_db

    def instantiate_rxns(self, seed_id:str)->Reaction:
        """
        This method is used to instantiate reaction objects from the seed database.
        in order to instantiate a reaction object, you need to pass the seed identifier for that reaction.

        Args:
            seed_id (str): The seed identifier for the reaction.

        Returns:
            Reaction: An instance of the Reaction class.

        Required Configs:
            - config.reaction_db

        Examples:
            >>> seed_db=SeedDB()
            >>> rxn=seed_db.instantiate_rxns("rxn00558")
            >>> assert rxn.data["name"]=="D-glucose-6-phosphate aldose-ketose-isomerase"
        """
        db=pd.read_json(self.reaction_db)
        return Reaction(data=db[db["id"]==seed_id].to_dict(orient="records")[0])

    def instantiate_metabs(self, seed_id:str)->Metabolite:
        """
        This method is used to instantiate metabolite objects from the seed database.
        In order to instantiate a metabolite object, you need to pass the seed identifier for that metabolite.

        Args:
            seed_id (str): The seed identifier for the metabolite.

        Returns:
            Metabolite: An instance of the Metabolite class. 

        Required Configs:
            - config.compound_db

        Examples:
            >>> seed_db=SeedDB()
            >>> metab=seed_db.instantiate_metabs("cpd01024")
            >>> assert metab.cod==4.0
        """
        db=pd.read_json(self.compound_db)
        return Metabolite(data=db[db["id"]==seed_id].to_dict(orient="records")[0])

    def get_seed_rxn_from_ec(self, ec_number:str)->list:
        """
        This method is used to get the seed reaction identifiers for a given EC number.

        Args:
            ec_number (str): The EC number.

        Returns:
            list: A list of seed reaction identifiers.

        Required Configs:
            - config.reaction_db

        Examples:
            >>> seed_db=SeedDB()
            >>> seed_rxn_list=seed_db.get_seed_rxn_from_ec("1.1.1.1")
            >>> assert len(seed_rxn_list)>0

        """
        db=pd.read_json(self.reaction_db)
        db=db[db["ec_numbers"].apply(lambda x: ec_number in x if x else False)]
        db.drop_duplicates("id",inplace=True,keep="first")
        return db.to_dict(orient="records")

`get_seed_rxn_from_ec(ec_number)`

This method is used to get the seed reaction identifiers for a given EC number.

Parameters:

Name	Type	Description	Default
`ec_number`	`str`	The EC number.	required

Returns:

Name	Type	Description
`list`	`list`	A list of seed reaction identifiers.

Required Configs

config.reaction_db

Examples:

>>> seed_db=SeedDB()
>>> seed_rxn_list=seed_db.get_seed_rxn_from_ec("1.1.1.1")
>>> assert len(seed_rxn_list)>0

Source code in adtoolbox/core.py

def get_seed_rxn_from_ec(self, ec_number:str)->list:
    """
    This method is used to get the seed reaction identifiers for a given EC number.

    Args:
        ec_number (str): The EC number.

    Returns:
        list: A list of seed reaction identifiers.

    Required Configs:
        - config.reaction_db

    Examples:
        >>> seed_db=SeedDB()
        >>> seed_rxn_list=seed_db.get_seed_rxn_from_ec("1.1.1.1")
        >>> assert len(seed_rxn_list)>0

    """
    db=pd.read_json(self.reaction_db)
    db=db[db["ec_numbers"].apply(lambda x: ec_number in x if x else False)]
    db.drop_duplicates("id",inplace=True,keep="first")
    return db.to_dict(orient="records")

`instantiate_metabs(seed_id)`

This method is used to instantiate metabolite objects from the seed database. In order to instantiate a metabolite object, you need to pass the seed identifier for that metabolite.

Parameters:

Name	Type	Description	Default
`seed_id`	`str`	The seed identifier for the metabolite.	required

Returns:

Name	Type	Description
`Metabolite`	`Metabolite`	An instance of the Metabolite class.

Required Configs

config.compound_db

Examples:

>>> seed_db=SeedDB()
>>> metab=seed_db.instantiate_metabs("cpd01024")
>>> assert metab.cod==4.0

Source code in adtoolbox/core.py

def instantiate_metabs(self, seed_id:str)->Metabolite:
    """
    This method is used to instantiate metabolite objects from the seed database.
    In order to instantiate a metabolite object, you need to pass the seed identifier for that metabolite.

    Args:
        seed_id (str): The seed identifier for the metabolite.

    Returns:
        Metabolite: An instance of the Metabolite class. 

    Required Configs:
        - config.compound_db

    Examples:
        >>> seed_db=SeedDB()
        >>> metab=seed_db.instantiate_metabs("cpd01024")
        >>> assert metab.cod==4.0
    """
    db=pd.read_json(self.compound_db)
    return Metabolite(data=db[db["id"]==seed_id].to_dict(orient="records")[0])

`instantiate_rxns(seed_id)`

This method is used to instantiate reaction objects from the seed database. in order to instantiate a reaction object, you need to pass the seed identifier for that reaction.

Parameters:

Name	Type	Description	Default
`seed_id`	`str`	The seed identifier for the reaction.	required

Returns:

Name	Type	Description
`Reaction`	`Reaction`	An instance of the Reaction class.
	`Reaction`

Required Configs

config.reaction_db

Examples:

>>> seed_db=SeedDB()
>>> rxn=seed_db.instantiate_rxns("rxn00558")
>>> assert rxn.data["name"]=="D-glucose-6-phosphate aldose-ketose-isomerase"

Source code in adtoolbox/core.py

def instantiate_rxns(self, seed_id:str)->Reaction:
    """
    This method is used to instantiate reaction objects from the seed database.
    in order to instantiate a reaction object, you need to pass the seed identifier for that reaction.

    Args:
        seed_id (str): The seed identifier for the reaction.

    Returns:
        Reaction: An instance of the Reaction class.

    Required Configs:
        - config.reaction_db

    Examples:
        >>> seed_db=SeedDB()
        >>> rxn=seed_db.instantiate_rxns("rxn00558")
        >>> assert rxn.data["name"]=="D-glucose-6-phosphate aldose-ketose-isomerase"
    """
    db=pd.read_json(self.reaction_db)
    return Reaction(data=db[db["id"]==seed_id].to_dict(orient="records")[0])

7. Database

Here is a schematic of the Database Module

database

This class is designed to supply any data requirement for ADToolbox. All functionalisties for saving, loading, and querying data are implemented here. ADToolbox in general contains the following databases:

The seed reaction database
The seed compound database
ADToolbox's Feed database
ADToolbox's Metagenomics studies database
ADToolbox's Experimental data database
ADToolbox's Protein database
ADToolbox's Reaction database
GTDB-tk database for bacterial and archaeal 16s rRNA sequences
ADM and e_adm model parameters

This class is instantiated with a configs.Database object. This object contains the paths to all the databases that ADToolbox uses. Please refer to the documentation of each method for more information on the required configurations.

Parameters:

Name	Type	Description	Default
`config`	`configs.Database`	A configs.Database object. Defaults to configs.Database().	`configs.Database()`

Examples:

>>> db=Database(config=configs.Database())
>>> assert type(db)==Database and type(db.config)==configs.Database

Source code in adtoolbox/core.py

class Database:

    '''
    This class is designed to supply any data requirement for ADToolbox. All functionalisties for saving, loading, and querying data are implemented here.
    ADToolbox in general contains the following databases:

    - The seed reaction database

    - The seed compound database

    - ADToolbox's Feed database

    - ADToolbox's Metagenomics studies database

    - ADToolbox's Experimental data database

    - ADToolbox's Protein database

    - ADToolbox's Reaction database

    - GTDB-tk database for bacterial and archaeal 16s rRNA sequences

    - ADM and e_adm model parameters

    This class is instantiated with a configs.Database object. This object contains the paths to all the databases that ADToolbox uses.
    Please refer to the documentation of each method for more information on the required configurations.

    Args:
        config (configs.Database, optional): A configs.Database object. Defaults to configs.Database().

    Examples:
        >>> db=Database(config=configs.Database())
        >>> assert type(db)==Database and type(db.config)==configs.Database

    '''
    def __init__(self, config:configs.Database=configs.Database())->None:
        self.config = config


    def initialize_protein_db(self)->None:
        """This function intializes ADToolbox's protein database by creating an empty fasta file.
        Be careful, this will overwrite any existing file with the same name.
        Logically, this needs method needs config.protein_db to be defined.

        Required Configs:
            - config.protein_db
            --------

        Examples:
            >>> import os
            >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==False # This is just to make sure that the following lines create the file
            >>> db=Database(config=configs.Database(protein_db=os.path.join(Main_Dir,"protein_test_db.fasta"))) # point to a test non-existing file
            >>> db.initialize_protein_db() # initialize the protein database
            >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==True # check if the file is created
            >>> os.remove(os.path.join(Main_Dir,"protein_test_db.fasta")) # remove the file to clean up
        """

        if not (pathlib.Path(self.config.protein_db).parent).exists():
            pathlib.Path(self.config.protein_db).parent.mkdir(parents=True)
        with open(self.config.protein_db, 'w') as f:
            pass

    def initialize_reaction_db(self)->None:
        r"""This function intializes ADToolbox's reaction database by creating an empty tsv file.
        Be careful, this will overwrite any existing file with the same name.

        Required Configs:
            - config.reaction_db

        Examples:
            >>> import os
            >>> assert os.path.exists(os.path.join(Main_Dir,"reaction_test_db.tsv"))==False
            >>> db=Database(config=configs.Database(reaction_db=os.path.join(Main_Dir,"reaction_test_db.tsv")))
            >>> db.initialize_reaction_db()
            >>> assert pd.read_table(os.path.join(Main_Dir,"reaction_test_db.tsv"),delimiter="\t").shape[0]==0
            >>> assert set(pd.read_csv(os.path.join(Main_Dir,"reaction_test_db.tsv"),delimiter="\t").columns)==set(["ec_numbers","seed_ids","reaction_names","adm1_reaction","e_adm_reactions","pathways"])
            >>> os.remove(os.path.join(Main_Dir,"reaction_test_db.tsv"))

        """
        pd.DataFrame(columns=["ec_numbers","seed_ids","reaction_names","adm1_reaction","e_adm_reactions","pathways"]).to_csv(self.config.reaction_db,index=False,sep="\t")

    def initialize_feed_db(self)->None:
        r"""This function intializes ADToolbox's Feed database by creating an empty tsv file.
        Be careful, this will overwrite any existing file with the same name.

        Required Configs:
            - config.feed_db

        Examples:
            >>> import os
            >>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==False
            >>> db=Database(config=configs.Database(feed_db=os.path.join(Main_Dir,"feed_test_db.tsv")))
            >>> db.initialize_feed_db()
            >>> assert pd.read_table(os.path.join(Main_Dir,"feed_test_db.tsv"),delimiter='\t').shape[0]==0
            >>> assert set(pd.read_table(os.path.join(Main_Dir,"feed_test_db.tsv"),delimiter='\t').columns)==set(["name","carbohydrates","lipids","proteins","tss","si","xi","reference"])
            >>> os.remove(os.path.join(Main_Dir,"feed_test_db.tsv"))

        """
        pd.DataFrame(columns=["name","carbohydrates","lipids","proteins","tss","si","xi","reference"]).to_csv(self.config.feed_db,index=False,sep="\t")

    def initialize_metagenomics_studies_db(self)->None:
        r"""This function intializes ADToolbox's Metagenomics studies database by creating an empty tsv file.
        Be careful, this will overwrite any existing file with the same name.

        Required Configs:
            - config.metagenomics_studies_db

        Examples:
            >>> import os
            >>> assert os.path.exists(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))==False
            >>> db=Database(config=configs.Database(metagenomics_studies_db=os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv")))
            >>> db.initialize_metagenomics_studies_db()
            >>> assert pd.read_table(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"),delimiter="\t").shape[0]==0
            >>> assert set(pd.read_table(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"),delimiter="\t").columns)==set(["name","study_type","microbiome","sample_accession","comments","study_accession"])
            >>> os.remove(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))

        """
        pd.DataFrame(columns=["name","study_type","microbiome","sample_accession","comments","study_accession"]).to_csv(self.config.metagenomics_studies_db,index=False,sep="\t")

    def initialize_experimental_data_db(self)->None:
        """This function intializes ADToolbox's experimental data database by creating an empty json file.
        Be careful, this will overwrite any existing file with the same name.

        Required Configs:
            - config.experimental_data_db

        Examples:
            >>> import os
            >>> assert os.path.exists(os.path.join(Main_Dir,"experimental_data_test_db.json"))==False
            >>> db=Database(config=configs.Database(experimental_data_db=os.path.join(Main_Dir,"experimental_data_test_db.json")))
            >>> db.initialize_experimental_data_db()
            >>> assert pd.read_json(os.path.join(Main_Dir,"experimental_data_test_db.json")).shape[0]==0
            >>> with open(os.path.join(Main_Dir,"experimental_data_test_db.json"),"r") as f:
            ...     assert json.load(f)==[]
            >>> os.remove(os.path.join(Main_Dir,"experimental_data_test_db.json"))

        """
        pd.DataFrame(columns=["name","initial_conditions","time","variables","data","reference"]).to_json(self.config.experimental_data_db,orient="records")


    def filter_seed_from_ec(self, 
                            ec_list:list[str],
                            save:bool=False) -> tuple:
        """
        This function takes a list of EC numbers and filters the seed database to find the seed reactions that have the EC numbers in their EC number list.
        This will help to trim the large seed database to a smaller one that only contains the reactions that are relevant to the AD process.

        Args:
            ec_list (list[str]): A list of EC numbers.
            save (bool, optional): Whether to save the filtered seed database or not. Defaults to False.

        Returns:
            tuple: A tuple containing the filtered seed reaction database and the seed compound database, respectively.

        Required Configs:

            - config.reaction_db
            --------
            - config.compound_db
            --------
            - config.local_reaction_db
            --------
            - config.local_compound_db
            --------


        Examples:
            >>> db=Database()
            >>> seed_rxn_db,seed_compound_db=db.filter_seed_from_ec(["1.1.1.1","1.1.1.2"])
            >>> assert len(seed_rxn_db)>0 and len(seed_compound_db)>0
            >>> assert pd.read_json(configs.Database().reaction_db).shape[0]>pd.DataFrame(seed_rxn_db).shape[0]
        """
        seed_rxn_db=pd.read_json(self.config.reaction_db)
        seed_compound_db=pd.read_json(self.config.compound_db)
        seed_rxn_db=seed_rxn_db[seed_rxn_db["ec_numbers"].apply(lambda x: any(ec in x for ec in ec_list) if x else False)]
        seed_compound_db=seed_compound_db[seed_compound_db["id"].apply(lambda x: True if x in seed_rxn_db["stoichiometry"].sum() else False)]
        if save:
            seed_rxn_db.to_json(self.config.local_reaction_db)
            seed_compound_db.to_json(self.config.local_compound_db)
        return seed_rxn_db.to_dict(orient="record"),seed_compound_db.to_dict(orient="record")



    def get_protein_seqs_from_uniprot(self, uniprot_id:str) -> str:
        """
        This function takes a uniprot id and fetches the protein sequence from Uniprot.

        Args:
            uniprot_id (str): The uniprot id of the protein.


        Returns:
            str: The protein sequence.

        Examples:
            >>> db=Database()
            >>> seq=db.get_protein_seqs_from_uniprot("P0A9P0")
            >>> assert type(seq)==str and len(seq)>0
        """
        Base_URL = "https://rest.uniprot.org/uniprotkb/"
        session = requests.Session()
        retry = Retry(connect=3, backoff_factor=0.5)
        adapter = HTTPAdapter(max_retries=retry)
        session.mount('http://', adapter)
        try:
            file = session.get(
                f"https://rest.uniprot.org/uniprotkb/{uniprot_id}.fasta", timeout=10)
        except:
            print("Could not fetch the sequence! Trying again ...")
            while True:
                time.sleep(5)
                file = session.get(Base_URL+uniprot_id+".fasta", timeout=10)
                if file.ok:
                    break

        return ''.join(file.text.split('\n')[1:-1])

    def proteins_from_ec(self,ec_number:str) -> dict:
        """
        This function returns a dictionary of protein sequences for a given EC number.
        The keys are the uniprot ids and ec number compatible with ADToolbox protein database
        and the values are the protein sequences. Since ADToolbox deals with microbial process,
        only bacterial and archaeal proteins are considered.

        Args:
            ec_number (str): The EC number.

        Returns:
            dict: A dictionary of protein sequences.

        Examples:
            >>> db=Database()
            >>> protein_seqs=db.proteins_from_ec("1.1.1.1")
            >>> assert len(protein_seqs)>0
            >>> assert list(protein_seqs.keys())[0].split("|")[1]=="1.1.1.1"
        """
        session = requests.Session()
        retry = Retry(connect=3, backoff_factor=0.5)
        adapter = HTTPAdapter(max_retries=retry)
        session.mount('http://', adapter)
        protein_seqs={}
        try:
            file = session.get(
                f"https://rest.uniprot.org/uniprotkb/stream?format=fasta&query=%28%28ec%3A{ec_number}%29%20AND%20%28reviewed%3Atrue%29%20NOT%20%28taxonomy_id%3A2759%29%29", timeout=30)
        except requests.exceptions.HTTPError or requests.exceptions.ConnectionError:
            print("Request Error! Trying again ...")
            time.sleep(30)
            file = session.get(
                f"https://rest.uniprot.org/uniprotkb/stream?format=fasta&query=%28%28ec%3A{ec_number}%29%20AND%20%28reviewed%3Atrue%29%20NOT%20%28taxonomy_id%3A2759%29%29", timeout=30)
        # This alsp does a sanity chec
        except Exception:
            print('Something went wrong!')
        text = file.text
        if text:
            text=text.split('>')
            text.remove("")
            for seq in text:
                protein_seqs.update([(seq.split("\n")[0].split("|")[1]+"|"+ec_number, "".join(seq.split("\n")[1:]))])


        return protein_seqs


    def build_protein_db_from_reactions_db(self):
        r"""
        This function builds the protein database from the reaction database.
        It takes the reaction database and finds the protein sequences for each EC number in the reaction database.
        Then it saves the protein sequences in a fasta file.

        Required Configs:
            - config.reaction_db
            --------
            - config.protein_db
            --------

        Examples:
            >>> import os
            >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==False
            >>> assert os.path.exists(os.path.join(Main_Dir,"reaction_test_db.tsv"))==False
            >>> db=Database(config=configs.Database(protein_db=os.path.join(Main_Dir,"protein_test_db.fasta"),reaction_db=os.path.join(Main_Dir,"reaction_test_db.tsv")))
            >>> reaction_db=pd.DataFrame(columns=["EC_Numbers","Seed Ids","Reaction Names","ADM1_Reaction","e_adm_Reactions","Pathways"])
            >>> reaction_db.loc[0,"EC_Numbers"]="1.1.1.1"
            >>> reaction_db.to_csv(os.path.join(Main_Dir,"reaction_test_db.tsv"),index=False,sep="\t")
            >>> db.build_protein_db_from_reactions_db()
            >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==True
            >>> assert os.path.exists(os.path.join(Main_Dir,"reaction_test_db.tsv"))==True
            >>> assert os.path.getsize(os.path.join(Main_Dir,"protein_test_db.fasta"))>0
            >>> os.remove(os.path.join(Main_Dir,"protein_test_db.fasta"))
            >>> os.remove(os.path.join(Main_Dir,"reaction_test_db.tsv"))
        """
        rxn_db=pd.read_table(self.config.reaction_db,delimiter="\t")
        ec_numbers=rxn_db["EC_Numbers"]
        ec_numbers=list(set(ec_numbers))
        protein_seqs={}
        for ec in ec_numbers:
            protein_seqs.update(self.proteins_from_ec(ec))
        with open(self.config.protein_db,"w") as f:
            for key,value in protein_seqs.items():
                f.write(">"+key+"\n")
                f.write(value+"\n")

    def cazy_ec(self)->list:
        """
        This method returns a list of EC numbers that are extracted from the Cazy website.
        This method is useful for adding more carbohydrate metabolism reactions to the reaction database.

        Returns:
            list: A list of EC numbers for carbohydrate metabolism found on CAZy database.

        Examples:
            >>> db=Database()
            >>> ec_list=db.cazy_ec()
            >>> assert len(ec_list)>0
        """

        ec_list = []
        for link in self.config.cazy_links:
            page = requests.get(link)
            soup = BeautifulSoup(page.content, "html.parser")
            results = soup.find("div", class_="cadre_principal").find_all(
                "th", class_="thec")
            for ec_number in results:
                if '-' not in ec_number.text.strip() and '.' in ec_number.text.strip():
                    ec_list.append(ec_number.text.strip())

        return ec_list

    def add_protein_to_protein_db(self, protein_id:str, header_tail:str)->None:
        """
        This funciton adds a protein sequence to the protein database. It takes a uniprot id and an EC number it is assigned to 
        and adds the corresponding protein sequence to the protein database.

        Required Configs:
            - config.protein_db

        Args:
            protein_id (str): The uniprot id of the protein.
            header_tail (str): A text to append to the header of the entry in the database;
            In ADToolbox it is better to use ec number for compatibility with downstream functions.


        Examples:
            >>> import os
            >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==False
            >>> db=Database(config=configs.Database(protein_db=os.path.join(Main_Dir,"protein_test_db.fasta")))
            >>> db.add_protein_to_protein_db("P0A9P0","1.2.3.4")
            >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==True
            >>> assert os.path.getsize(os.path.join(Main_Dir,"protein_test_db.fasta"))>0
            >>> import utils
            >>> assert len(utils.fasta_to_dict(os.path.join(Main_Dir,"protein_test_db.fasta")))>0
            >>> os.remove(os.path.join(Main_Dir,"protein_test_db.fasta"))
        """
        if not os.path.exists(self.config.protein_db):
            self.initialize_protein_db()
        with open(self.config.protein_db,"a") as f:
            f.write(">"+protein_id+"|"+header_tail+"\n")
            f.write(self.get_protein_seqs_from_uniprot(protein_id)+"\n")

    def add_proteins_from_ecnumbers_to_protein_db(self, ec_numbers:list)->None:
        """
        This function adds protein sequences to the protein database from a list of EC numbers.
        It takes a list of EC numbers and finds the protein sequences for each EC number in the list.
        Then it saves the protein sequences in a fasta file.

        Required Configs:
            - config.protein_db

        Args:
            ec_numbers (list): A list of EC numbers.

        Examples:
            >>> import os
            >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==False
            >>> db=Database(config=configs.Database(protein_db=os.path.join(Main_Dir,"protein_test_db.fasta")))
            >>> db.add_proteins_from_ecnumbers_to_protein_db(["1.1.1.1","1.1.1.2"])
            >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==True
            >>> import utils
            >>> assert len(utils.fasta_to_dict(os.path.join(Main_Dir,"protein_test_db.fasta")))>0
            >>> os.remove(os.path.join(Main_Dir,"protein_test_db.fasta"))
        """
        if not os.path.exists(self.config.protein_db):
            self.initialize_protein_db()

        protein_seqs={}
        for ec in ec_numbers:
            protein_seqs.update(self.proteins_from_ec(ec))

        with open(self.config.protein_db,"a") as f:
            for key,value in protein_seqs.items():
                f.write(">"+key+"\n")
                f.write(value+"\n")

    def add_feed_to_feed_db(self,feed:Feed)->None:
        r"""
        This function adds a feed to the feed database. It takes the feed name and the feed composition and adds them to the feed database.

        Required Configs:
            - config.feed_db

        Args:
            feed (Feed): An instance of the Feed class.

        Examples:
            >>> import os
            >>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==False
            >>> db=Database(config=configs.Database(feed_db=os.path.join(Main_Dir,"feed_test_db.tsv")))
            >>> feed=Feed(name="test_feed",carbohydrates=10,lipids=20,proteins=30,tss=80,si=10,xi=30,reference="test")
            >>> db.add_feed_to_feed_db(feed)
            >>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==True
            >>> assert pd.read_table(os.path.join(Main_Dir,"feed_test_db.tsv"),delimiter="\t").shape[0]>0
            >>> os.remove(os.path.join(Main_Dir,"feed_test_db.tsv"))

        """
        if not os.path.exists(self.config.feed_db):
            self.initialize_feed_db()

        if feed.name in pd.read_table(self.config.feed_db,delimiter="\t")["name"].values:
            raise ValueError("Feed already exists in the database.")
        feed_db=pd.read_table(self.config.feed_db,delimiter="\t")
        feed_db=pd.concat([feed_db,pd.DataFrame([feed.to_dict()])],ignore_index=True,axis=0)
        feed_db.to_csv(self.config.feed_db,index=False,sep="\t")

    def remove_feed_from_feed_db(self,field_name:str,query:str)->None:
        r"""
        This function removes studyes that contain the query in the given column, field name, from the feed database.

        Required Configs:
            - config.feed_db

        Args:
            field_name (str): The name of the column to query.
            query (str): The query string.

        Examples:
            >>> import os
            >>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==False
            >>> db=Database(config=configs.Database(feed_db=os.path.join(Main_Dir,"feed_test_db.tsv")))
            >>> feed=Feed(name="test_feed",carbohydrates=10,lipids=20,proteins=30,tss=80,si=10,xi=30,reference="test")
            >>> db.add_feed_to_feed_db(feed)
            >>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==True
            >>> assert pd.read_table(os.path.join(Main_Dir,"feed_test_db.tsv"),delimiter="\t").shape[0]>0
            >>> db.remove_feed_from_feed_db("name","test_feed")
            >>> assert pd.read_table(os.path.join(Main_Dir,"feed_test_db.tsv"),delimiter="\t").shape[0]==0
            >>> os.remove(os.path.join(Main_Dir,"feed_test_db.tsv"))

        """
        if not os.path.exists(self.config.feed_db):
            raise FileNotFoundError("Feed database does not exist!")


        feed_db=pd.read_table(self.config.feed_db,delimiter="\t")
        feed_db=feed_db[feed_db[field_name].str.contains(query)==False]
        feed_db.to_csv(self.config.feed_db,index=False,sep="\t")

    def get_feed_from_feed_db(self,field_name:str,query:str)->list[Feed]:
        r"""
        This function returns a feed from the feed database. It takes the query string and the column name to query and returns the feed that contains the query string in the given column.

        Required Configs:
            - config.feed_db

        Args:
            field_name (str): The name of the column to query.
            query (str): The query string.

        Returns:
            Feed: An instance of the Feed class.

        Examples:
            >>> import os
            >>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==False
            >>> db=Database(config=configs.Database(feed_db=os.path.join(Main_Dir,"feed_test_db.tsv")))
            >>> feed=Feed(name="test_feed",carbohydrates=10,lipids=20,proteins=30,tss=80,si=10,xi=30,reference="test")
            >>> db.add_feed_to_feed_db(feed)
            >>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==True
            >>> assert pd.read_table(os.path.join(Main_Dir,"feed_test_db.tsv"),delimiter="\t").shape[0]>0
            >>> feed=db.get_feed_from_feed_db("name","test_feed")
            >>> assert feed[0].name=="test_feed"
            >>> os.remove(os.path.join(Main_Dir,"feed_test_db.tsv"))

        """
        if not os.path.exists(self.config.feed_db):
            raise FileNotFoundError("Feed database does not exist!")

        feed_db=pd.read_table(self.config.feed_db,delimiter="\t")
        feed_db=feed_db[feed_db[field_name].str.contains(query)]
        return [Feed(**feed.to_dict()) for _,feed in feed_db.iterrows()]

    def add_metagenomics_study_to_metagenomics_studies_db(self,metagenomics_study:MetagenomicsStudy)->None:
        r"""
        This function adds a metagenomics study to the metagenomics studies database. It takes a metagenomics study and adds it to the metagenomics studies database.

        Required Configs:
            - config.metagenomics_studies_db

        Args:
            metagenomics_study (MetagenomicsStudy): An instance of the MetagenomicsStudy class.

        Examples:
            >>> import os
            >>> assert os.path.exists(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))==False
            >>> db=Database(config=configs.Database(metagenomics_studies_db=os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv")))
            >>> metagenomics_study=MetagenomicsStudy(name="test_study",study_type="metagenomics",microbiome="anaerobic digester",sample_accession="test",comments="test",study_accession="test")
            >>> db.add_metagenomics_study_to_metagenomics_studies_db(metagenomics_study)
            >>> assert os.path.exists(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))==True
            >>> assert pd.read_table(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"),delimiter="\t").shape[0]>0
            >>> os.remove(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))
        """
        if not os.path.exists(self.config.metagenomics_studies_db):
            self.initialize_metagenomics_studies_db()
        metagenomics_studies_db=pd.read_table(self.config.metagenomics_studies_db,delimiter="\t")
        metagenomics_studies_db=pd.concat([metagenomics_studies_db,pd.DataFrame([metagenomics_study.to_dict()])],ignore_index=True,axis=0)
        metagenomics_studies_db.to_csv(self.config.metagenomics_studies_db,index=False,sep="\t")

    def remove_metagenomics_study_from_metagenomics_studies_db(self,field_name:str,query:str)->None:
        r"""
        This function removes studies that contain the query in the given column, field name, from the metagenomics studies database.

        Required Configs:
            - config.metagenomics_studies_db

        Args:
            field_name (str): The name of the column to query.
            query (str): The query string.

        Examples:
            >>> import os
            >>> assert os.path.exists(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))==False
            >>> db=Database(config=configs.Database(metagenomics_studies_db=os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv")))
            >>> metagenomics_study=MetagenomicsStudy(name="test_study",study_type="metagenomics",microbiome="anaerobic digester",sample_accession="test",comments="test",study_accession="test")
            >>> db.add_metagenomics_study_to_metagenomics_studies_db(metagenomics_study)
            >>> assert os.path.exists(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))==True
            >>> assert pd.read_table(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"),delimiter="\t").shape[0]>0
            >>> db.remove_metagenomics_study_from_metagenomics_studies_db("name","test_study")
            >>> assert pd.read_table(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"),delimiter="\t").shape[0]==0
            >>> os.remove(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))
        """
        if not os.path.exists(self.config.metagenomics_studies_db):
            raise FileNotFoundError("Metagenomics studies database does not exist!")

        metagenomics_studies_db=pd.read_table(self.config.metagenomics_studies_db,delimiter="\t")
        metagenomics_studies_db=metagenomics_studies_db[metagenomics_studies_db[field_name].str.contains(query)==False]
        metagenomics_studies_db.to_csv(self.config.metagenomics_studies_db,index=False,sep="\t")

    def get_metagenomics_study_from_metagenomics_studies_db(self,field_name:str,query:str)->list[MetagenomicsStudy]:
        r"""
        This function returns a metagenomics study from the metagenomics studies database. It takes the query string and the column name to query and returns the metagenomics study that contains the query string in the given column.

        Required Configs:
            - config.metagenomics_studies_db

        Args:
            field_name (str): The name of the column to query.
            query (str): The query string.

        Returns:
            MetagenomicsStudy: An instance of the MetagenomicsStudy class.

        Examples:
            >>> import os
            >>> assert os.path.exists(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))==False
            >>> db=Database(config=configs.Database(metagenomics_studies_db=os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv")))
            >>> metagenomics_study=MetagenomicsStudy(name="test_study",study_type="metagenomics",microbiome="anaerobic digester",sample_accession="test",comments="test",study_accession="test")
            >>> db.add_metagenomics_study_to_metagenomics_studies_db(metagenomics_study)
            >>> assert os.path.exists(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))==True
            >>> assert pd.read_table(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"),delimiter="\t").shape[0]>0
            >>> metagenomics_study=db.get_metagenomics_study_from_metagenomics_studies_db("name","test_study")
            >>> assert metagenomics_study[0].name=="test_study"
            >>> os.remove(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))
        """
        if not os.path.exists(self.config.metagenomics_studies_db):
            raise FileNotFoundError("Metagenomics studies database does not exist!")

        metagenomics_studies_db=pd.read_table(self.config.metagenomics_studies_db,delimiter="\t")
        metagenomics_studies_db=metagenomics_studies_db[metagenomics_studies_db[field_name].str.contains(query)]
        return [MetagenomicsStudy(**metagenomics_study.to_dict()) for _,metagenomics_study in metagenomics_studies_db.iterrows()]

    def add_experiment_to_experiments_db(self,experiment:Experiment)->None:
        r"""
        This function adds an experiment to the experiments database. It takes an experiment and adds it to the experiments database.

        Required Configs:
            - config.experimental_data_db

        Args:
            experiment (Experiment): An instance of the Experiment class.

        Examples:
            >>> import os,json
            >>> assert os.path.exists(os.path.join(Main_Dir,"experiments_test_db.tsv"))==False
            >>> db=Database(config=configs.Database(experimental_data_db=os.path.join(Main_Dir,"experiments_test_db.json")))
            >>> experiment=Experiment(name="test_study",time=[0,1,2],variables=[2,6],data= [[1,2,3],[4,5,6]],reference="test")
            >>> db.add_experiment_to_experiments_db(experiment)
            >>> assert os.path.exists(os.path.join(Main_Dir,"experiments_test_db.json"))==True
            >>> assert os.path.getsize(os.path.join(Main_Dir,"experiments_test_db.json"))>0
            >>> os.remove(os.path.join(Main_Dir,"experiments_test_db.json"))
        """
        if not os.path.exists(self.config.experimental_data_db):
            self.initialize_experimental_data_db()

        if experiment.name in [experiment.name for experiment in self.get_experiment_from_experiments_db("name",experiment.name)]: 
            raise ValueError("Experiment already exists in the database!")

        with open(self.config.experimental_data_db,"r") as f:
            experiments_db=json.load(f)
        experiments_db.append(experiment.to_dict())
        with open(self.config.experimental_data_db,"w") as f:
            json.dump(experiments_db,f)

    def remove_experiment_from_experiments_db(self,field_name:str,query:str)->None:
        r"""
        This function removes experiments that contain the query in the given column, field name, from the experiments database.

        Required Configs:
            - config.experimental_data_db

        Args:
            field_name (str): The name of the column to query.
            query (str): The query string.

        Examples:
            >>> import os,json
            >>> assert os.path.exists(os.path.join(Main_Dir,"experiments_test_db.tsv"))==False
            >>> db=Database(config=configs.Database(experimental_data_db=os.path.join(Main_Dir,"experiments_test_db.json")))
            >>> experiment=Experiment(name="test_study",time=[0,1,2],variables=[2,6],data= [[1,2,3],[4,5,6]],reference="test")
            >>> db.add_experiment_to_experiments_db(experiment)
            >>> assert os.path.exists(os.path.join(Main_Dir,"experiments_test_db.json"))==True
            >>> assert os.path.getsize(os.path.join(Main_Dir,"experiments_test_db.json"))>0
            >>> db.remove_experiment_from_experiments_db("name","test_study")
            >>> assert pd.read_json(os.path.join(Main_Dir,"experiments_test_db.json")).shape[0]==0
            >>> os.remove(os.path.join(Main_Dir,"experiments_test_db.json"))
        """
        if not os.path.exists(self.config.experimental_data_db):
            raise FileNotFoundError("Experimental data database does not exist!")

        with open(self.config.experimental_data_db,"r") as f:
            experiments_db=json.load(f)
        experiments_db=[experiment for experiment in experiments_db if query not in experiment[field_name]]
        with open(self.config.experimental_data_db,"w") as f:
            json.dump(experiments_db,f)

    def get_experiment_from_experiments_db(self,field_name:str,query:str)->list[Experiment]:
        r"""
        This function returns an experiment from the experiments database. It takes the query string and the column name to query and returns the experiment that contains the query string in the given column.

        Required Configs:
            - config.experimental_data_db

        Args:
            field_name (str): The name of the column to query.
            query (str): The query string.

        Returns:
            Experiment: An instance of the Experiment class.

        Examples:
            >>> import os,json
            >>> assert os.path.exists(os.path.join(Main_Dir,"experiments_test_db.tsv"))==False
            >>> db=Database(config=configs.Database(experimental_data_db=os.path.join(Main_Dir,"experiments_test_db.json")))
            >>> experiment=Experiment(name="test_study",time=[0,1,2],variables=[2,6],data= [[1,2,3],[4,5,6]],reference="test")
            >>> db.add_experiment_to_experiments_db(experiment)
            >>> assert os.path.exists(os.path.join(Main_Dir,"experiments_test_db.json"))==True
            >>> assert os.path.getsize(os.path.join(Main_Dir,"experiments_test_db.json"))>0
            >>> experiment=db.get_experiment_from_experiments_db("name","test_study")
            >>> assert experiment[0].name=="test_study"
            >>> os.remove(os.path.join(Main_Dir,"experiments_test_db.json"))
        """
        if not os.path.exists(self.config.experimental_data_db):
            raise FileNotFoundError("Experimental data database does not exist!")

        with open(self.config.experimental_data_db,"r") as f:
            experiments_db=json.load(f)
        experiments_db=[experiment for experiment in experiments_db if query in experiment[field_name]]
        return [Experiment(**experiment) for experiment in experiments_db]

    def build_mmseqs_database(self,container:str="None")->str:
        """Builds an indexed mmseqs database from the ADToolbox's fasta protein database.

        Required Configs:
            - config.protein_db
            - config.adtoolbox_singularity
            - config.adtoolbox_docker

        Args:
            container (str, optional): The container to run the script with. Defaults to "None".
        Returns:
            str: The script to build the mmseqs database.

        Examples:
            >>> import os
            >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==False
            >>> db=Database(config=configs.Database(protein_db=os.path.join(Main_Dir,"protein_test_db.fasta")))
            >>> db.add_protein_to_protein_db("P0A9P0","x,x,x,x")
            >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==True
            >>> assert os.path.getsize(os.path.join(Main_Dir,"protein_test_db.fasta"))>0
            >>> script=db.build_mmseqs_database()
            >>> assert script=="mmseqs createdb "+str(os.path.join(Main_Dir,"protein_test_db.fasta"))+" "+str(db.config.protein_db_mmseqs)
            >>> os.remove(os.path.join(Main_Dir,"protein_test_db.fasta"))

        """
        script=create_mmseqs_database(self.config.protein_db,
                                      self.config.protein_db_mmseqs,
                                      container=container,
                                      run=False,
                                      config=self.config)

        if container=="None":
            pass

        elif container=="singularity":
            script=f"singularity exec --bind {self.config.protein_db}:{self.config.protein_db},{self.config.protein_db_mmseqs}:{self.config.protein_db_mmseqs} {self.config.adtoolbox_singularity} {script}"

        elif container=="docker":
            script=f"docker run -v {self.config.protein_db}:{self.config.protein_db} -v {self.config.protein_db_mmseqs}:{self.config.protein_db_mmseqs} {self.config.adtoolbox_docker} {script}"

        else:
            raise ValueError("Container should be either None, singularity or docker!")

        return script


    def download_adm_parameters(self,verbose:bool=True)->None:
        """
        Downloads the parameters needed for running ADM models in ADToolbox.

        Required Configs:
            - config.adm_parameters_base_dir
            - config.adm_parameters_urls

        Examples:
            >>> import os
            >>> assert os.path.exists(os.path.join(Main_Dir,"adm_parameters_test"))==False
            >>> db=Database(config=configs.Database(adm_parameters_base_dir=os.path.join(Main_Dir,"adm_parameters_test")))
            >>> db.download_adm_parameters(verbose=False) 
            >>> assert os.path.exists(os.path.join(Main_Dir,"adm_parameters_test"))==True
            >>> assert len(os.listdir(os.path.join(Main_Dir,"adm_parameters_test")))==12
            >>> os.system("rm -r "+os.path.join(Main_Dir,"adm_parameters_test"))
            0

        Args:

            verbose (bool, optional): Whether to print the progress or not. Defaults to True.


        """
        for param in self.config.adm_parameters.keys():
            if not pathlib.Path(self.config.adm_parameters[param]).parent.exists():
                os.makedirs(pathlib.Path(self.config.adm_parameters[param]).parent)
            r = requests.get(self.config.adm_parameters_urls[param], allow_redirects=True)
            with open(self.config.adm_parameters[param], 'wb') as f:
                f.write(r.content)
            if verbose:
                rich.print(f"[green]{param} downloaded to {self.config.adm_parameters[param]}")

    def download_seed_databases(self,verbose:bool=True) -> None :
        """This function will download the seed databases, both compound and reaction databases.

        Required Configs:
            - config.seed_rxn_url
            - config.seed_compound_url
            - config.reaction_db
            - config.compound_db

        Args:
            verbose (bool, optional): Whether to print the progress or not. Defaults to True.

        Examples:
            >>> import os
            >>> assert os.path.exists(os.path.join(Main_Dir,"seed_rxn.json"))==False
            >>> assert os.path.exists(os.path.join(Main_Dir,"seed_compound.json"))==False
            >>> db=Database(config=configs.Database(reaction_db=os.path.join(Main_Dir,"seed_rxn.json"),compound_db=os.path.join(Main_Dir,"seed_compound.json")))
            >>> db.download_seed_databases(verbose=False)
            >>> assert os.path.exists(os.path.join(Main_Dir,"seed_rxn.json"))==True
            >>> assert os.path.exists(os.path.join(Main_Dir,"seed_compound.json"))==True
            >>> os.remove(os.path.join(Main_Dir,"seed_rxn.json"))
            >>> os.remove(os.path.join(Main_Dir,"seed_compound.json"))
        """
        r = requests.get(self.config.seed_rxn_url, allow_redirects=True,stream=True)
        if not os.path.exists(Path(self.config.reaction_db).parent):
            os.makedirs(Path(self.config.reaction_db).parent)
        with open(self.config.reaction_db, 'wb') as f:
            f.write(r.content)
        if verbose:
            rich.print(f"[green]Reaction database downloaded to {self.config.reaction_db}")
        r=requests.get(self.config.seed_compound_url,allow_redirects=True,stream=True)
        with open(self.config.compound_db, 'wb') as f:
            f.write(r.content)
        if verbose:
            rich.print(f"[green]Compound database downloaded to {self.config.compound_db}")

    def download_protein_database(self, verbose:bool=True) -> None:
        """
        Downloads the prebuilt protein database from the remote repository.

        Required Configs:
            - config.protein_db_url
            - config.protein_db

        Args:
            verbose (bool, optional): Whether to print the progress or not. Defaults to True.

        Examples:
            >>> import os
            >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==False
            >>> db=Database(config=configs.Database(protein_db=os.path.join(Main_Dir,"protein_test_db.fasta")))
            >>> db.download_protein_database(verbose=False)
            >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==True
            >>> assert os.path.getsize(os.path.join(Main_Dir,"protein_test_db.fasta"))>0
            >>> os.remove(os.path.join(Main_Dir,"protein_test_db.fasta"))
        """
        r = requests.get(self.config.protein_db_url, allow_redirects=True)

        if not os.path.exists(Path(self.config.protein_db).parent):
            os.makedirs(Path(self.config.protein_db).parent)

        with open(self.config.protein_db, 'wb') as f:
            f.write(r.content)
        if verbose:
            rich.print(f"[green]Protein database downloaded to {self.config.protein_db}")

    def download_reaction_database(self,verbose:bool=True)->None:
        """
        This function will download the reaction database from the remote repository.

        Required Configs:
            - config.adtoolbox_rxn_db_url
            - config.csv_reaction_db

        Args:
            verbose (bool, optional): Whether to print the progress or not. Defaults to True.

        Examples:
            >>> import os
            >>> assert os.path.exists(os.path.join(Main_Dir,"reaction_test_db.csv"))==False
            >>> db=Database(config=configs.Database(csv_reaction_db=os.path.join(Main_Dir,"reaction_test_db.csv")))
            >>> db.download_reaction_database(verbose=False)
            >>> assert os.path.exists(os.path.join(Main_Dir,"reaction_test_db.csv"))==True
            >>> assert os.path.getsize(os.path.join(Main_Dir,"reaction_test_db.csv"))>0
            >>> os.remove(os.path.join(Main_Dir,"reaction_test_db.csv"))
        """

        r = requests.get(self.config.adtoolbox_rxn_db_url, allow_redirects=True)

        if not os.path.exists(Path(self.config.csv_reaction_db).parent):
            os.makedirs(Path(self.config.csv_reaction_db).parent)

        with open(self.config.csv_reaction_db, 'wb') as f:
            f.write(r.content)
        if verbose:
            rich.print(f"[green]Reaction database downloaded to {self.config.csv_reaction_db}")


    def download_feed_database(self,verbose:bool=True)-> None:
        """
        This function will download the feed database from the remote repository.

        Required Configs:
            - config.feed_db_url
            - config.feed_db

        Args:
            verbose (bool, optional): Whether to print the progress or not. Defaults to True.

        Examples:
            >>> import os
            >>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==False
            >>> db=Database(config=configs.Database(feed_db=os.path.join(Main_Dir,"feed_test_db.tsv")))
            >>> db.download_feed_database(verbose=False)
            >>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==True
            >>> assert os.path.getsize(os.path.join(Main_Dir,"feed_test_db.tsv"))>0
            >>> os.remove(os.path.join(Main_Dir,"feed_test_db.tsv"))
        """
        r = requests.get(self.config.feed_db_url, allow_redirects=True)

        if not os.path.exists(Path(self.config.feed_db).parent):
            os.makedirs(Path(self.config.feed_db).parent)

        with open(self.config.feed_db, 'wb') as f:
            f.write(r.content)
        if verbose:
            rich.print(f"[green]Feed database downloaded to {self.config.feed_db}")

    def download_qiime_classifier_db(self,verbose:bool=True)->None:
        r = requests.get(self.config.qiime_classifier_db_url, allow_redirects=True,stream=True)
        block_size = 1024
        total_size = int(r.headers.get('content-length', 0))
        if not os.path.exists(Path(self.config.qiime_classifier_db).parent):
            os.makedirs(Path(self.config.qiime_classifier_db).parent)
        with open(self.config.qiime_classifier_db, 'wb') as f:
            with Progress() as progress:
                task = progress.add_task("Downloading the qiime's classifier database:", total=total_size)
                for data in r.iter_content(block_size):
                    progress.update(task, advance=len(data))
                    f.write(data)
        if verbose:
            rich.print(f"[green]Qiime's classifier database downloaded to {self.config.qiime_classifier_db}")

    def download_studies_database(self,verbose:bool=True)->None:
        """
        This function will download the required files for studies functionality.

        Args:
            verbode (bool, optional): Whether to print the progress or not. Defaults to True.

        Examples:
            >>> import os
            >>> assert os.path.exists(os.path.join(Main_Dir,"studies_test_db.tsv"))==False
            >>> db=Database(config=configs.Database(studies_db=os.path.join(Main_Dir,"studies_test_db.tsv")))
            >>> db.download_studies_database(verbose=False)
            >>> assert os.path.exists(os.path.join(Main_Dir,"studies_test_db.tsv"))==True
            >>> assert os.path.getsize(os.path.join(Main_Dir,"studies_test_db.tsv"))>0
            >>> os.remove(os.path.join(Main_Dir,"studies_test_db.tsv"))
        """
        for i in self.config.studies_remote:
            r = requests.get(self.config.studies_remote[i], allow_redirects=True)
            if not os.path.exists(Path(self.config.studies_local[i]).parent):
                os.makedirs(Path(self.config.studies_local[i]).parent)
            with open(self.config.studies_local[i], 'wb') as f:
                f.write(r.content)

            if verbose:
                rich.print(f"[bold green]Downloaded {self.config.studies_remote[i]}[/bold green]")

    def download_amplicon_to_genome_db(self,verbose:bool=True):
        """
        This function will automatically download the GTDB-tk database for genome assignment.

        Required Configs:
            - config.amplicon_to_genome_db
            - config.amplicon_to_genome_urls

        Args:
            verbose (bool, optional): Whether to print the progress or not. Defaults to True.

        Examples:
            >>> import os
            >>> assert os.path.exists(os.path.join(Main_Dir,"amplicon_to_genome_test_db"))==False
            >>> db=Database(config=configs.Database(amplicon_to_genome_db=os.path.join(Main_Dir,"amplicon_to_genome_test_db")))
            >>> db.download_amplicon_to_genome_db(verbose=False)
            >>> assert os.path.exists(os.path.join(Main_Dir,"amplicon_to_genome_test_db"))==True
            >>> assert len(os.listdir(os.path.join(Main_Dir,"amplicon_to_genome_test_db")))>0
            >>> os.system("rm -r "+os.path.join(Main_Dir,"amplicon_to_genome_test_db"))
            0
        """
        if not os.path.exists(self.config.amplicon_to_genome_db):
            os.mkdir(self.config.amplicon_to_genome_db)

        url = self.config.amplicon_to_genome_urls
        if verbose:
            for keys in ['Version', 'MD5SUM', 'FILE_DESCRIPTIONS']:
                with requests.get(url[keys], allow_redirects=True, stream=True) as r:
                    total_size = int(r.headers.get('content-length', 0))
                    block_size = 1024
                    with Progress() as progress:
                        task1 = progress.add_task("Downloading " + keys, total=total_size)
                        with open(os.path.join(self.config.amplicon_to_genome_db, keys), 'wb') as f:
                            for data in r.iter_content(block_size):
                                progress.update(task1, advance=len(data))
                                f.write(data)
            with requests.get(url['metadata_field_desc'], allow_redirects=True, stream=True) as r:
                total_size = int(r.headers.get('content-length', 0))
                block_size = 1024
                with Progress() as progress:
                    task1 = progress.add_task("Downloading metadata_field_desc.tsv", total=total_size)
                    with open(os.path.join(self.config.amplicon_to_genome_db, 'metadata_field_desc.tsv'), 'wb') as f:
                        for data in r.iter_content(block_size):
                            progress.update(task1, advance=len(data))
                            f.write(data)

            for keys in ['bac120_ssu']:
                with requests.get(url[keys], allow_redirects=True, stream=True) as r:
                    total_size = int(r.headers.get('content-length', 0))
                    block_size = 1024
                    with Progress() as progress:
                        task1 = progress.add_task("Downloading " + keys, total=total_size)
                        with open(os.path.join(self.config.amplicon_to_genome_db, url[keys].split("/")[-1]), 'wb') as f:
                            for data in r.iter_content(block_size):
                                progress.update(task1, advance=len(data))
                                f.write(data)
                with tarfile.open(os.path.join(self.config.amplicon_to_genome_db, url[keys].split("/")[-1])) as f_in:
                    f_in.extractall(self.config.amplicon_to_genome_db)


                os.remove(os.path.join(self.config.amplicon_to_genome_db, url[keys].split("/")[-1]))
        else:
            for keys in ['Version', 'MD5SUM', 'FILE_DESCRIPTIONS']:
                with requests.get(url[keys], allow_redirects=True, stream=False) as r:
                    with open(os.path.join(self.config.amplicon_to_genome_db, keys), 'wb') as f:
                        f.write(r.content)
            with requests.get(url['metadata_field_desc'], allow_redirects=True, stream=False) as r:
                with open(os.path.join(self.config.amplicon_to_genome_db, 'metadata_field_desc.tsv'), 'wb') as f:
                    f.write(r.content)
            for keys in [ 'bac120_ssu']:
                with requests.get(url[keys], allow_redirects=True, stream=False) as r:
                    with open(os.path.join(self.config.amplicon_to_genome_db, url[keys].split("/")[-1]), 'wb') as f:
                        f.write(r.content)
                with tarfile.open(os.path.join(self.config.amplicon_to_genome_db, url[keys].split("/")[-1])) as f_in:
                    f_in.extractall(self.config.amplicon_to_genome_db)
        if verbose:
            rich.print("[bold green]Downloaded all the required files for Amplicon to Genome functionality.[/bold green]")




    def download_all_databases(self,verbose:bool=True)->None:
        """
        This function will download all the required databases for all the functionalities of ADToolbox.
        NOTE: each method that this function calls is individually tested so it is skipped from testing!

        Args:
            verbose (bool, optional): Whether to print the progress or not. Defaults to True.

        Required Configs:
            - config.adm_parameters_base_dir
            - config.adm_parameters_urls
            - config.seed_rxn_url
            - config.seed_compound_url
            - config.reaction_db
            - config.compound_db
            - config.protein_db_url
            - config.protein_db
            - config.adtoolbox_rxn_db_url
            - config.csv_reaction_db
            - config.feed_db_url
            - config.feed_db
            - config.amplicon_to_genome_db
            - config.amplicon_to_genome_urls
            - config.qiime_classifier_db_url
            - config.qiime_classifier_db
            - config.studies_db
            - config.studies_urls

        Examples:
            >>> import os # doctest: +SKIP
            >>> db=Database(config=configs.Database()) # doctest: +SKIP
            >>> db.download_all_databases(verbose=False) # doctest: +SKIP

        """

        self.download_seed_databases(verbose=verbose)
        self.download_adm_parameters(verbose=verbose)
        self.download_protein_database(verbose=verbose)
        self.download_reaction_database(verbose=verbose)
        self.download_feed_database(verbose=verbose)
        self.download_studies_database(verbose=verbose)
        self.download_amplicon_to_genome_db(verbose=verbose)
        self.download_qiime_classifier_db(verbose=verbose)

`add_experiment_to_experiments_db(experiment)`

This function adds an experiment to the experiments database. It takes an experiment and adds it to the experiments database.

Required Configs

config.experimental_data_db

Parameters:

Name	Type	Description	Default
`experiment`	`Experiment`	An instance of the Experiment class.	required

Examples:

>>> import os,json
>>> assert os.path.exists(os.path.join(Main_Dir,"experiments_test_db.tsv"))==False
>>> db=Database(config=configs.Database(experimental_data_db=os.path.join(Main_Dir,"experiments_test_db.json")))
>>> experiment=Experiment(name="test_study",time=[0,1,2],variables=[2,6],data= [[1,2,3],[4,5,6]],reference="test")
>>> db.add_experiment_to_experiments_db(experiment)
>>> assert os.path.exists(os.path.join(Main_Dir,"experiments_test_db.json"))==True
>>> assert os.path.getsize(os.path.join(Main_Dir,"experiments_test_db.json"))>0
>>> os.remove(os.path.join(Main_Dir,"experiments_test_db.json"))

Source code in adtoolbox/core.py

def add_experiment_to_experiments_db(self,experiment:Experiment)->None:
    r"""
    This function adds an experiment to the experiments database. It takes an experiment and adds it to the experiments database.

    Required Configs:
        - config.experimental_data_db

    Args:
        experiment (Experiment): An instance of the Experiment class.

    Examples:
        >>> import os,json
        >>> assert os.path.exists(os.path.join(Main_Dir,"experiments_test_db.tsv"))==False
        >>> db=Database(config=configs.Database(experimental_data_db=os.path.join(Main_Dir,"experiments_test_db.json")))
        >>> experiment=Experiment(name="test_study",time=[0,1,2],variables=[2,6],data= [[1,2,3],[4,5,6]],reference="test")
        >>> db.add_experiment_to_experiments_db(experiment)
        >>> assert os.path.exists(os.path.join(Main_Dir,"experiments_test_db.json"))==True
        >>> assert os.path.getsize(os.path.join(Main_Dir,"experiments_test_db.json"))>0
        >>> os.remove(os.path.join(Main_Dir,"experiments_test_db.json"))
    """
    if not os.path.exists(self.config.experimental_data_db):
        self.initialize_experimental_data_db()

    if experiment.name in [experiment.name for experiment in self.get_experiment_from_experiments_db("name",experiment.name)]: 
        raise ValueError("Experiment already exists in the database!")

    with open(self.config.experimental_data_db,"r") as f:
        experiments_db=json.load(f)
    experiments_db.append(experiment.to_dict())
    with open(self.config.experimental_data_db,"w") as f:
        json.dump(experiments_db,f)

`add_feed_to_feed_db(feed)`

This function adds a feed to the feed database. It takes the feed name and the feed composition and adds them to the feed database.

Required Configs

config.feed_db

Parameters:

Name	Type	Description	Default
`feed`	`Feed`	An instance of the Feed class.	required

Examples:

>>> import os
>>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==False
>>> db=Database(config=configs.Database(feed_db=os.path.join(Main_Dir,"feed_test_db.tsv")))
>>> feed=Feed(name="test_feed",carbohydrates=10,lipids=20,proteins=30,tss=80,si=10,xi=30,reference="test")
>>> db.add_feed_to_feed_db(feed)
>>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==True
>>> assert pd.read_table(os.path.join(Main_Dir,"feed_test_db.tsv"),delimiter="\t").shape[0]>0
>>> os.remove(os.path.join(Main_Dir,"feed_test_db.tsv"))

Source code in adtoolbox/core.py

def add_feed_to_feed_db(self,feed:Feed)->None:
    r"""
    This function adds a feed to the feed database. It takes the feed name and the feed composition and adds them to the feed database.

    Required Configs:
        - config.feed_db

    Args:
        feed (Feed): An instance of the Feed class.

    Examples:
        >>> import os
        >>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==False
        >>> db=Database(config=configs.Database(feed_db=os.path.join(Main_Dir,"feed_test_db.tsv")))
        >>> feed=Feed(name="test_feed",carbohydrates=10,lipids=20,proteins=30,tss=80,si=10,xi=30,reference="test")
        >>> db.add_feed_to_feed_db(feed)
        >>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==True
        >>> assert pd.read_table(os.path.join(Main_Dir,"feed_test_db.tsv"),delimiter="\t").shape[0]>0
        >>> os.remove(os.path.join(Main_Dir,"feed_test_db.tsv"))

    """
    if not os.path.exists(self.config.feed_db):
        self.initialize_feed_db()

    if feed.name in pd.read_table(self.config.feed_db,delimiter="\t")["name"].values:
        raise ValueError("Feed already exists in the database.")
    feed_db=pd.read_table(self.config.feed_db,delimiter="\t")
    feed_db=pd.concat([feed_db,pd.DataFrame([feed.to_dict()])],ignore_index=True,axis=0)
    feed_db.to_csv(self.config.feed_db,index=False,sep="\t")

`add_metagenomics_study_to_metagenomics_studies_db(metagenomics_study)`

This function adds a metagenomics study to the metagenomics studies database. It takes a metagenomics study and adds it to the metagenomics studies database.

Required Configs

config.metagenomics_studies_db

Parameters:

Name	Type	Description	Default
`metagenomics_study`	`MetagenomicsStudy`	An instance of the MetagenomicsStudy class.	required

Examples:

>>> import os
>>> assert os.path.exists(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))==False
>>> db=Database(config=configs.Database(metagenomics_studies_db=os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv")))
>>> metagenomics_study=MetagenomicsStudy(name="test_study",study_type="metagenomics",microbiome="anaerobic digester",sample_accession="test",comments="test",study_accession="test")
>>> db.add_metagenomics_study_to_metagenomics_studies_db(metagenomics_study)
>>> assert os.path.exists(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))==True
>>> assert pd.read_table(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"),delimiter="\t").shape[0]>0
>>> os.remove(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))

Source code in adtoolbox/core.py

def add_metagenomics_study_to_metagenomics_studies_db(self,metagenomics_study:MetagenomicsStudy)->None:
    r"""
    This function adds a metagenomics study to the metagenomics studies database. It takes a metagenomics study and adds it to the metagenomics studies database.

    Required Configs:
        - config.metagenomics_studies_db

    Args:
        metagenomics_study (MetagenomicsStudy): An instance of the MetagenomicsStudy class.

    Examples:
        >>> import os
        >>> assert os.path.exists(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))==False
        >>> db=Database(config=configs.Database(metagenomics_studies_db=os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv")))
        >>> metagenomics_study=MetagenomicsStudy(name="test_study",study_type="metagenomics",microbiome="anaerobic digester",sample_accession="test",comments="test",study_accession="test")
        >>> db.add_metagenomics_study_to_metagenomics_studies_db(metagenomics_study)
        >>> assert os.path.exists(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))==True
        >>> assert pd.read_table(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"),delimiter="\t").shape[0]>0
        >>> os.remove(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))
    """
    if not os.path.exists(self.config.metagenomics_studies_db):
        self.initialize_metagenomics_studies_db()
    metagenomics_studies_db=pd.read_table(self.config.metagenomics_studies_db,delimiter="\t")
    metagenomics_studies_db=pd.concat([metagenomics_studies_db,pd.DataFrame([metagenomics_study.to_dict()])],ignore_index=True,axis=0)
    metagenomics_studies_db.to_csv(self.config.metagenomics_studies_db,index=False,sep="\t")

`add_protein_to_protein_db(protein_id, header_tail)`

This funciton adds a protein sequence to the protein database. It takes a uniprot id and an EC number it is assigned to and adds the corresponding protein sequence to the protein database.

Required Configs

config.protein_db

Parameters:

Name	Type	Description	Default
`protein_id`	`str`	The uniprot id of the protein.	required
`header_tail`	`str`	A text to append to the header of the entry in the database;	required

Examples:

>>> import os
>>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==False
>>> db=Database(config=configs.Database(protein_db=os.path.join(Main_Dir,"protein_test_db.fasta")))
>>> db.add_protein_to_protein_db("P0A9P0","1.2.3.4")
>>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==True
>>> assert os.path.getsize(os.path.join(Main_Dir,"protein_test_db.fasta"))>0
>>> import utils
>>> assert len(utils.fasta_to_dict(os.path.join(Main_Dir,"protein_test_db.fasta")))>0
>>> os.remove(os.path.join(Main_Dir,"protein_test_db.fasta"))

Source code in adtoolbox/core.py

def add_protein_to_protein_db(self, protein_id:str, header_tail:str)->None:
    """
    This funciton adds a protein sequence to the protein database. It takes a uniprot id and an EC number it is assigned to 
    and adds the corresponding protein sequence to the protein database.

    Required Configs:
        - config.protein_db

    Args:
        protein_id (str): The uniprot id of the protein.
        header_tail (str): A text to append to the header of the entry in the database;
        In ADToolbox it is better to use ec number for compatibility with downstream functions.


    Examples:
        >>> import os
        >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==False
        >>> db=Database(config=configs.Database(protein_db=os.path.join(Main_Dir,"protein_test_db.fasta")))
        >>> db.add_protein_to_protein_db("P0A9P0","1.2.3.4")
        >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==True
        >>> assert os.path.getsize(os.path.join(Main_Dir,"protein_test_db.fasta"))>0
        >>> import utils
        >>> assert len(utils.fasta_to_dict(os.path.join(Main_Dir,"protein_test_db.fasta")))>0
        >>> os.remove(os.path.join(Main_Dir,"protein_test_db.fasta"))
    """
    if not os.path.exists(self.config.protein_db):
        self.initialize_protein_db()
    with open(self.config.protein_db,"a") as f:
        f.write(">"+protein_id+"|"+header_tail+"\n")
        f.write(self.get_protein_seqs_from_uniprot(protein_id)+"\n")

`add_proteins_from_ecnumbers_to_protein_db(ec_numbers)`

This function adds protein sequences to the protein database from a list of EC numbers. It takes a list of EC numbers and finds the protein sequences for each EC number in the list. Then it saves the protein sequences in a fasta file.

Required Configs

config.protein_db

Parameters:

Name	Type	Description	Default
`ec_numbers`	`list`	A list of EC numbers.	required

Examples:

>>> import os
>>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==False
>>> db=Database(config=configs.Database(protein_db=os.path.join(Main_Dir,"protein_test_db.fasta")))
>>> db.add_proteins_from_ecnumbers_to_protein_db(["1.1.1.1","1.1.1.2"])
>>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==True
>>> import utils
>>> assert len(utils.fasta_to_dict(os.path.join(Main_Dir,"protein_test_db.fasta")))>0
>>> os.remove(os.path.join(Main_Dir,"protein_test_db.fasta"))

Source code in adtoolbox/core.py

def add_proteins_from_ecnumbers_to_protein_db(self, ec_numbers:list)->None:
    """
    This function adds protein sequences to the protein database from a list of EC numbers.
    It takes a list of EC numbers and finds the protein sequences for each EC number in the list.
    Then it saves the protein sequences in a fasta file.

    Required Configs:
        - config.protein_db

    Args:
        ec_numbers (list): A list of EC numbers.

    Examples:
        >>> import os
        >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==False
        >>> db=Database(config=configs.Database(protein_db=os.path.join(Main_Dir,"protein_test_db.fasta")))
        >>> db.add_proteins_from_ecnumbers_to_protein_db(["1.1.1.1","1.1.1.2"])
        >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==True
        >>> import utils
        >>> assert len(utils.fasta_to_dict(os.path.join(Main_Dir,"protein_test_db.fasta")))>0
        >>> os.remove(os.path.join(Main_Dir,"protein_test_db.fasta"))
    """
    if not os.path.exists(self.config.protein_db):
        self.initialize_protein_db()

    protein_seqs={}
    for ec in ec_numbers:
        protein_seqs.update(self.proteins_from_ec(ec))

    with open(self.config.protein_db,"a") as f:
        for key,value in protein_seqs.items():
            f.write(">"+key+"\n")
            f.write(value+"\n")

`build_mmseqs_database(container='None')`

Builds an indexed mmseqs database from the ADToolbox's fasta protein database.

Required Configs

config.protein_db
config.adtoolbox_singularity
config.adtoolbox_docker

Parameters:

Name	Type	Description	Default
`container`	`str`	The container to run the script with. Defaults to "None".	`'None'`

Returns:

Name	Type	Description
`str`	`str`	The script to build the mmseqs database.
	`str`

Examples:

>>> import os
>>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==False
>>> db=Database(config=configs.Database(protein_db=os.path.join(Main_Dir,"protein_test_db.fasta")))
>>> db.add_protein_to_protein_db("P0A9P0","x,x,x,x")
>>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==True
>>> assert os.path.getsize(os.path.join(Main_Dir,"protein_test_db.fasta"))>0
>>> script=db.build_mmseqs_database()
>>> assert script=="mmseqs createdb "+str(os.path.join(Main_Dir,"protein_test_db.fasta"))+" "+str(db.config.protein_db_mmseqs)
>>> os.remove(os.path.join(Main_Dir,"protein_test_db.fasta"))

Source code in adtoolbox/core.py

def build_mmseqs_database(self,container:str="None")->str:
    """Builds an indexed mmseqs database from the ADToolbox's fasta protein database.

    Required Configs:
        - config.protein_db
        - config.adtoolbox_singularity
        - config.adtoolbox_docker

    Args:
        container (str, optional): The container to run the script with. Defaults to "None".
    Returns:
        str: The script to build the mmseqs database.

    Examples:
        >>> import os
        >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==False
        >>> db=Database(config=configs.Database(protein_db=os.path.join(Main_Dir,"protein_test_db.fasta")))
        >>> db.add_protein_to_protein_db("P0A9P0","x,x,x,x")
        >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==True
        >>> assert os.path.getsize(os.path.join(Main_Dir,"protein_test_db.fasta"))>0
        >>> script=db.build_mmseqs_database()
        >>> assert script=="mmseqs createdb "+str(os.path.join(Main_Dir,"protein_test_db.fasta"))+" "+str(db.config.protein_db_mmseqs)
        >>> os.remove(os.path.join(Main_Dir,"protein_test_db.fasta"))

    """
    script=create_mmseqs_database(self.config.protein_db,
                                  self.config.protein_db_mmseqs,
                                  container=container,
                                  run=False,
                                  config=self.config)

    if container=="None":
        pass

    elif container=="singularity":
        script=f"singularity exec --bind {self.config.protein_db}:{self.config.protein_db},{self.config.protein_db_mmseqs}:{self.config.protein_db_mmseqs} {self.config.adtoolbox_singularity} {script}"

    elif container=="docker":
        script=f"docker run -v {self.config.protein_db}:{self.config.protein_db} -v {self.config.protein_db_mmseqs}:{self.config.protein_db_mmseqs} {self.config.adtoolbox_docker} {script}"

    else:
        raise ValueError("Container should be either None, singularity or docker!")

    return script

`build_protein_db_from_reactions_db()`

This function builds the protein database from the reaction database. It takes the reaction database and finds the protein sequences for each EC number in the reaction database. Then it saves the protein sequences in a fasta file.

Required Configs

- config.reaction_db

- config.protein_db

Examples:

>>> import os
>>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==False
>>> assert os.path.exists(os.path.join(Main_Dir,"reaction_test_db.tsv"))==False
>>> db=Database(config=configs.Database(protein_db=os.path.join(Main_Dir,"protein_test_db.fasta"),reaction_db=os.path.join(Main_Dir,"reaction_test_db.tsv")))
>>> reaction_db=pd.DataFrame(columns=["EC_Numbers","Seed Ids","Reaction Names","ADM1_Reaction","e_adm_Reactions","Pathways"])
>>> reaction_db.loc[0,"EC_Numbers"]="1.1.1.1"
>>> reaction_db.to_csv(os.path.join(Main_Dir,"reaction_test_db.tsv"),index=False,sep="\t")
>>> db.build_protein_db_from_reactions_db()
>>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==True
>>> assert os.path.exists(os.path.join(Main_Dir,"reaction_test_db.tsv"))==True
>>> assert os.path.getsize(os.path.join(Main_Dir,"protein_test_db.fasta"))>0
>>> os.remove(os.path.join(Main_Dir,"protein_test_db.fasta"))
>>> os.remove(os.path.join(Main_Dir,"reaction_test_db.tsv"))

Source code in adtoolbox/core.py

def build_protein_db_from_reactions_db(self):
    r"""
    This function builds the protein database from the reaction database.
    It takes the reaction database and finds the protein sequences for each EC number in the reaction database.
    Then it saves the protein sequences in a fasta file.

    Required Configs:
        - config.reaction_db
        --------
        - config.protein_db
        --------

    Examples:
        >>> import os
        >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==False
        >>> assert os.path.exists(os.path.join(Main_Dir,"reaction_test_db.tsv"))==False
        >>> db=Database(config=configs.Database(protein_db=os.path.join(Main_Dir,"protein_test_db.fasta"),reaction_db=os.path.join(Main_Dir,"reaction_test_db.tsv")))
        >>> reaction_db=pd.DataFrame(columns=["EC_Numbers","Seed Ids","Reaction Names","ADM1_Reaction","e_adm_Reactions","Pathways"])
        >>> reaction_db.loc[0,"EC_Numbers"]="1.1.1.1"
        >>> reaction_db.to_csv(os.path.join(Main_Dir,"reaction_test_db.tsv"),index=False,sep="\t")
        >>> db.build_protein_db_from_reactions_db()
        >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==True
        >>> assert os.path.exists(os.path.join(Main_Dir,"reaction_test_db.tsv"))==True
        >>> assert os.path.getsize(os.path.join(Main_Dir,"protein_test_db.fasta"))>0
        >>> os.remove(os.path.join(Main_Dir,"protein_test_db.fasta"))
        >>> os.remove(os.path.join(Main_Dir,"reaction_test_db.tsv"))
    """
    rxn_db=pd.read_table(self.config.reaction_db,delimiter="\t")
    ec_numbers=rxn_db["EC_Numbers"]
    ec_numbers=list(set(ec_numbers))
    protein_seqs={}
    for ec in ec_numbers:
        protein_seqs.update(self.proteins_from_ec(ec))
    with open(self.config.protein_db,"w") as f:
        for key,value in protein_seqs.items():
            f.write(">"+key+"\n")
            f.write(value+"\n")

`cazy_ec()`

This method returns a list of EC numbers that are extracted from the Cazy website. This method is useful for adding more carbohydrate metabolism reactions to the reaction database.

Returns:

Name	Type	Description
`list`	`list`	A list of EC numbers for carbohydrate metabolism found on CAZy database.

Examples:

>>> db=Database()
>>> ec_list=db.cazy_ec()
>>> assert len(ec_list)>0

Source code in adtoolbox/core.py

def cazy_ec(self)->list:
    """
    This method returns a list of EC numbers that are extracted from the Cazy website.
    This method is useful for adding more carbohydrate metabolism reactions to the reaction database.

    Returns:
        list: A list of EC numbers for carbohydrate metabolism found on CAZy database.

    Examples:
        >>> db=Database()
        >>> ec_list=db.cazy_ec()
        >>> assert len(ec_list)>0
    """

    ec_list = []
    for link in self.config.cazy_links:
        page = requests.get(link)
        soup = BeautifulSoup(page.content, "html.parser")
        results = soup.find("div", class_="cadre_principal").find_all(
            "th", class_="thec")
        for ec_number in results:
            if '-' not in ec_number.text.strip() and '.' in ec_number.text.strip():
                ec_list.append(ec_number.text.strip())

    return ec_list

`download_adm_parameters(verbose=True)`

Downloads the parameters needed for running ADM models in ADToolbox.

Required Configs

config.adm_parameters_base_dir
config.adm_parameters_urls

Examples:

>>> import os
>>> assert os.path.exists(os.path.join(Main_Dir,"adm_parameters_test"))==False
>>> db=Database(config=configs.Database(adm_parameters_base_dir=os.path.join(Main_Dir,"adm_parameters_test")))
>>> db.download_adm_parameters(verbose=False) 
>>> assert os.path.exists(os.path.join(Main_Dir,"adm_parameters_test"))==True
>>> assert len(os.listdir(os.path.join(Main_Dir,"adm_parameters_test")))==12
>>> os.system("rm -r "+os.path.join(Main_Dir,"adm_parameters_test"))
0

Parameters:

Name	Type	Description	Default
`verbose`	`bool`	Whether to print the progress or not. Defaults to True.	`True`

Source code in adtoolbox/core.py

def download_adm_parameters(self,verbose:bool=True)->None:
    """
    Downloads the parameters needed for running ADM models in ADToolbox.

    Required Configs:
        - config.adm_parameters_base_dir
        - config.adm_parameters_urls

    Examples:
        >>> import os
        >>> assert os.path.exists(os.path.join(Main_Dir,"adm_parameters_test"))==False
        >>> db=Database(config=configs.Database(adm_parameters_base_dir=os.path.join(Main_Dir,"adm_parameters_test")))
        >>> db.download_adm_parameters(verbose=False) 
        >>> assert os.path.exists(os.path.join(Main_Dir,"adm_parameters_test"))==True
        >>> assert len(os.listdir(os.path.join(Main_Dir,"adm_parameters_test")))==12
        >>> os.system("rm -r "+os.path.join(Main_Dir,"adm_parameters_test"))
        0

    Args:

        verbose (bool, optional): Whether to print the progress or not. Defaults to True.


    """
    for param in self.config.adm_parameters.keys():
        if not pathlib.Path(self.config.adm_parameters[param]).parent.exists():
            os.makedirs(pathlib.Path(self.config.adm_parameters[param]).parent)
        r = requests.get(self.config.adm_parameters_urls[param], allow_redirects=True)
        with open(self.config.adm_parameters[param], 'wb') as f:
            f.write(r.content)
        if verbose:
            rich.print(f"[green]{param} downloaded to {self.config.adm_parameters[param]}")

`download_all_databases(verbose=True)`

This function will download all the required databases for all the functionalities of ADToolbox. NOTE: each method that this function calls is individually tested so it is skipped from testing!

Parameters:

Name	Type	Description	Default
`verbose`	`bool`	Whether to print the progress or not. Defaults to True.	`True`

Required Configs

config.adm_parameters_base_dir
config.adm_parameters_urls
config.seed_rxn_url
config.seed_compound_url
config.reaction_db
config.compound_db
config.protein_db_url
config.protein_db
config.adtoolbox_rxn_db_url
config.csv_reaction_db
config.feed_db_url
config.feed_db
config.amplicon_to_genome_db
config.amplicon_to_genome_urls
config.qiime_classifier_db_url
config.qiime_classifier_db
config.studies_db
config.studies_urls

Examples:

>>> import os
>>> db=Database(config=configs.Database())
>>> db.download_all_databases(verbose=False)

Source code in adtoolbox/core.py

def download_all_databases(self,verbose:bool=True)->None:
    """
    This function will download all the required databases for all the functionalities of ADToolbox.
    NOTE: each method that this function calls is individually tested so it is skipped from testing!

    Args:
        verbose (bool, optional): Whether to print the progress or not. Defaults to True.

    Required Configs:
        - config.adm_parameters_base_dir
        - config.adm_parameters_urls
        - config.seed_rxn_url
        - config.seed_compound_url
        - config.reaction_db
        - config.compound_db
        - config.protein_db_url
        - config.protein_db
        - config.adtoolbox_rxn_db_url
        - config.csv_reaction_db
        - config.feed_db_url
        - config.feed_db
        - config.amplicon_to_genome_db
        - config.amplicon_to_genome_urls
        - config.qiime_classifier_db_url
        - config.qiime_classifier_db
        - config.studies_db
        - config.studies_urls

    Examples:
        >>> import os # doctest: +SKIP
        >>> db=Database(config=configs.Database()) # doctest: +SKIP
        >>> db.download_all_databases(verbose=False) # doctest: +SKIP

    """

    self.download_seed_databases(verbose=verbose)
    self.download_adm_parameters(verbose=verbose)
    self.download_protein_database(verbose=verbose)
    self.download_reaction_database(verbose=verbose)
    self.download_feed_database(verbose=verbose)
    self.download_studies_database(verbose=verbose)
    self.download_amplicon_to_genome_db(verbose=verbose)
    self.download_qiime_classifier_db(verbose=verbose)

`download_amplicon_to_genome_db(verbose=True)`

This function will automatically download the GTDB-tk database for genome assignment.

Required Configs

config.amplicon_to_genome_db
config.amplicon_to_genome_urls

Parameters:

Name	Type	Description	Default
`verbose`	`bool`	Whether to print the progress or not. Defaults to True.	`True`

Examples:

>>> import os
>>> assert os.path.exists(os.path.join(Main_Dir,"amplicon_to_genome_test_db"))==False
>>> db=Database(config=configs.Database(amplicon_to_genome_db=os.path.join(Main_Dir,"amplicon_to_genome_test_db")))
>>> db.download_amplicon_to_genome_db(verbose=False)
>>> assert os.path.exists(os.path.join(Main_Dir,"amplicon_to_genome_test_db"))==True
>>> assert len(os.listdir(os.path.join(Main_Dir,"amplicon_to_genome_test_db")))>0
>>> os.system("rm -r "+os.path.join(Main_Dir,"amplicon_to_genome_test_db"))
0

Source code in adtoolbox/core.py

def download_amplicon_to_genome_db(self,verbose:bool=True):
    """
    This function will automatically download the GTDB-tk database for genome assignment.

    Required Configs:
        - config.amplicon_to_genome_db
        - config.amplicon_to_genome_urls

    Args:
        verbose (bool, optional): Whether to print the progress or not. Defaults to True.

    Examples:
        >>> import os
        >>> assert os.path.exists(os.path.join(Main_Dir,"amplicon_to_genome_test_db"))==False
        >>> db=Database(config=configs.Database(amplicon_to_genome_db=os.path.join(Main_Dir,"amplicon_to_genome_test_db")))
        >>> db.download_amplicon_to_genome_db(verbose=False)
        >>> assert os.path.exists(os.path.join(Main_Dir,"amplicon_to_genome_test_db"))==True
        >>> assert len(os.listdir(os.path.join(Main_Dir,"amplicon_to_genome_test_db")))>0
        >>> os.system("rm -r "+os.path.join(Main_Dir,"amplicon_to_genome_test_db"))
        0
    """
    if not os.path.exists(self.config.amplicon_to_genome_db):
        os.mkdir(self.config.amplicon_to_genome_db)

    url = self.config.amplicon_to_genome_urls
    if verbose:
        for keys in ['Version', 'MD5SUM', 'FILE_DESCRIPTIONS']:
            with requests.get(url[keys], allow_redirects=True, stream=True) as r:
                total_size = int(r.headers.get('content-length', 0))
                block_size = 1024
                with Progress() as progress:
                    task1 = progress.add_task("Downloading " + keys, total=total_size)
                    with open(os.path.join(self.config.amplicon_to_genome_db, keys), 'wb') as f:
                        for data in r.iter_content(block_size):
                            progress.update(task1, advance=len(data))
                            f.write(data)
        with requests.get(url['metadata_field_desc'], allow_redirects=True, stream=True) as r:
            total_size = int(r.headers.get('content-length', 0))
            block_size = 1024
            with Progress() as progress:
                task1 = progress.add_task("Downloading metadata_field_desc.tsv", total=total_size)
                with open(os.path.join(self.config.amplicon_to_genome_db, 'metadata_field_desc.tsv'), 'wb') as f:
                    for data in r.iter_content(block_size):
                        progress.update(task1, advance=len(data))
                        f.write(data)

        for keys in ['bac120_ssu']:
            with requests.get(url[keys], allow_redirects=True, stream=True) as r:
                total_size = int(r.headers.get('content-length', 0))
                block_size = 1024
                with Progress() as progress:
                    task1 = progress.add_task("Downloading " + keys, total=total_size)
                    with open(os.path.join(self.config.amplicon_to_genome_db, url[keys].split("/")[-1]), 'wb') as f:
                        for data in r.iter_content(block_size):
                            progress.update(task1, advance=len(data))
                            f.write(data)
            with tarfile.open(os.path.join(self.config.amplicon_to_genome_db, url[keys].split("/")[-1])) as f_in:
                f_in.extractall(self.config.amplicon_to_genome_db)


            os.remove(os.path.join(self.config.amplicon_to_genome_db, url[keys].split("/")[-1]))
    else:
        for keys in ['Version', 'MD5SUM', 'FILE_DESCRIPTIONS']:
            with requests.get(url[keys], allow_redirects=True, stream=False) as r:
                with open(os.path.join(self.config.amplicon_to_genome_db, keys), 'wb') as f:
                    f.write(r.content)
        with requests.get(url['metadata_field_desc'], allow_redirects=True, stream=False) as r:
            with open(os.path.join(self.config.amplicon_to_genome_db, 'metadata_field_desc.tsv'), 'wb') as f:
                f.write(r.content)
        for keys in [ 'bac120_ssu']:
            with requests.get(url[keys], allow_redirects=True, stream=False) as r:
                with open(os.path.join(self.config.amplicon_to_genome_db, url[keys].split("/")[-1]), 'wb') as f:
                    f.write(r.content)
            with tarfile.open(os.path.join(self.config.amplicon_to_genome_db, url[keys].split("/")[-1])) as f_in:
                f_in.extractall(self.config.amplicon_to_genome_db)
    if verbose:
        rich.print("[bold green]Downloaded all the required files for Amplicon to Genome functionality.[/bold green]")

`download_feed_database(verbose=True)`

This function will download the feed database from the remote repository.

Required Configs

config.feed_db_url
config.feed_db

Parameters:

Name	Type	Description	Default
`verbose`	`bool`	Whether to print the progress or not. Defaults to True.	`True`

Examples:

>>> import os
>>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==False
>>> db=Database(config=configs.Database(feed_db=os.path.join(Main_Dir,"feed_test_db.tsv")))
>>> db.download_feed_database(verbose=False)
>>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==True
>>> assert os.path.getsize(os.path.join(Main_Dir,"feed_test_db.tsv"))>0
>>> os.remove(os.path.join(Main_Dir,"feed_test_db.tsv"))

Source code in adtoolbox/core.py

def download_feed_database(self,verbose:bool=True)-> None:
    """
    This function will download the feed database from the remote repository.

    Required Configs:
        - config.feed_db_url
        - config.feed_db

    Args:
        verbose (bool, optional): Whether to print the progress or not. Defaults to True.

    Examples:
        >>> import os
        >>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==False
        >>> db=Database(config=configs.Database(feed_db=os.path.join(Main_Dir,"feed_test_db.tsv")))
        >>> db.download_feed_database(verbose=False)
        >>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==True
        >>> assert os.path.getsize(os.path.join(Main_Dir,"feed_test_db.tsv"))>0
        >>> os.remove(os.path.join(Main_Dir,"feed_test_db.tsv"))
    """
    r = requests.get(self.config.feed_db_url, allow_redirects=True)

    if not os.path.exists(Path(self.config.feed_db).parent):
        os.makedirs(Path(self.config.feed_db).parent)

    with open(self.config.feed_db, 'wb') as f:
        f.write(r.content)
    if verbose:
        rich.print(f"[green]Feed database downloaded to {self.config.feed_db}")

`download_protein_database(verbose=True)`

Downloads the prebuilt protein database from the remote repository.

Required Configs

config.protein_db_url
config.protein_db

Parameters:

Name	Type	Description	Default
`verbose`	`bool`	Whether to print the progress or not. Defaults to True.	`True`

Examples:

>>> import os
>>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==False
>>> db=Database(config=configs.Database(protein_db=os.path.join(Main_Dir,"protein_test_db.fasta")))
>>> db.download_protein_database(verbose=False)
>>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==True
>>> assert os.path.getsize(os.path.join(Main_Dir,"protein_test_db.fasta"))>0
>>> os.remove(os.path.join(Main_Dir,"protein_test_db.fasta"))

Source code in adtoolbox/core.py

def download_protein_database(self, verbose:bool=True) -> None:
    """
    Downloads the prebuilt protein database from the remote repository.

    Required Configs:
        - config.protein_db_url
        - config.protein_db

    Args:
        verbose (bool, optional): Whether to print the progress or not. Defaults to True.

    Examples:
        >>> import os
        >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==False
        >>> db=Database(config=configs.Database(protein_db=os.path.join(Main_Dir,"protein_test_db.fasta")))
        >>> db.download_protein_database(verbose=False)
        >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==True
        >>> assert os.path.getsize(os.path.join(Main_Dir,"protein_test_db.fasta"))>0
        >>> os.remove(os.path.join(Main_Dir,"protein_test_db.fasta"))
    """
    r = requests.get(self.config.protein_db_url, allow_redirects=True)

    if not os.path.exists(Path(self.config.protein_db).parent):
        os.makedirs(Path(self.config.protein_db).parent)

    with open(self.config.protein_db, 'wb') as f:
        f.write(r.content)
    if verbose:
        rich.print(f"[green]Protein database downloaded to {self.config.protein_db}")

`download_reaction_database(verbose=True)`

This function will download the reaction database from the remote repository.

Required Configs

config.adtoolbox_rxn_db_url
config.csv_reaction_db

Parameters:

Name	Type	Description	Default
`verbose`	`bool`	Whether to print the progress or not. Defaults to True.	`True`

Examples:

>>> import os
>>> assert os.path.exists(os.path.join(Main_Dir,"reaction_test_db.csv"))==False
>>> db=Database(config=configs.Database(csv_reaction_db=os.path.join(Main_Dir,"reaction_test_db.csv")))
>>> db.download_reaction_database(verbose=False)
>>> assert os.path.exists(os.path.join(Main_Dir,"reaction_test_db.csv"))==True
>>> assert os.path.getsize(os.path.join(Main_Dir,"reaction_test_db.csv"))>0
>>> os.remove(os.path.join(Main_Dir,"reaction_test_db.csv"))

Source code in adtoolbox/core.py

def download_reaction_database(self,verbose:bool=True)->None:
    """
    This function will download the reaction database from the remote repository.

    Required Configs:
        - config.adtoolbox_rxn_db_url
        - config.csv_reaction_db

    Args:
        verbose (bool, optional): Whether to print the progress or not. Defaults to True.

    Examples:
        >>> import os
        >>> assert os.path.exists(os.path.join(Main_Dir,"reaction_test_db.csv"))==False
        >>> db=Database(config=configs.Database(csv_reaction_db=os.path.join(Main_Dir,"reaction_test_db.csv")))
        >>> db.download_reaction_database(verbose=False)
        >>> assert os.path.exists(os.path.join(Main_Dir,"reaction_test_db.csv"))==True
        >>> assert os.path.getsize(os.path.join(Main_Dir,"reaction_test_db.csv"))>0
        >>> os.remove(os.path.join(Main_Dir,"reaction_test_db.csv"))
    """

    r = requests.get(self.config.adtoolbox_rxn_db_url, allow_redirects=True)

    if not os.path.exists(Path(self.config.csv_reaction_db).parent):
        os.makedirs(Path(self.config.csv_reaction_db).parent)

    with open(self.config.csv_reaction_db, 'wb') as f:
        f.write(r.content)
    if verbose:
        rich.print(f"[green]Reaction database downloaded to {self.config.csv_reaction_db}")

`download_seed_databases(verbose=True)`

This function will download the seed databases, both compound and reaction databases.

Required Configs

config.seed_rxn_url
config.seed_compound_url
config.reaction_db
config.compound_db

Parameters:

Name	Type	Description	Default
`verbose`	`bool`	Whether to print the progress or not. Defaults to True.	`True`

Examples:

>>> import os
>>> assert os.path.exists(os.path.join(Main_Dir,"seed_rxn.json"))==False
>>> assert os.path.exists(os.path.join(Main_Dir,"seed_compound.json"))==False
>>> db=Database(config=configs.Database(reaction_db=os.path.join(Main_Dir,"seed_rxn.json"),compound_db=os.path.join(Main_Dir,"seed_compound.json")))
>>> db.download_seed_databases(verbose=False)
>>> assert os.path.exists(os.path.join(Main_Dir,"seed_rxn.json"))==True
>>> assert os.path.exists(os.path.join(Main_Dir,"seed_compound.json"))==True
>>> os.remove(os.path.join(Main_Dir,"seed_rxn.json"))
>>> os.remove(os.path.join(Main_Dir,"seed_compound.json"))

Source code in adtoolbox/core.py

def download_seed_databases(self,verbose:bool=True) -> None :
    """This function will download the seed databases, both compound and reaction databases.

    Required Configs:
        - config.seed_rxn_url
        - config.seed_compound_url
        - config.reaction_db
        - config.compound_db

    Args:
        verbose (bool, optional): Whether to print the progress or not. Defaults to True.

    Examples:
        >>> import os
        >>> assert os.path.exists(os.path.join(Main_Dir,"seed_rxn.json"))==False
        >>> assert os.path.exists(os.path.join(Main_Dir,"seed_compound.json"))==False
        >>> db=Database(config=configs.Database(reaction_db=os.path.join(Main_Dir,"seed_rxn.json"),compound_db=os.path.join(Main_Dir,"seed_compound.json")))
        >>> db.download_seed_databases(verbose=False)
        >>> assert os.path.exists(os.path.join(Main_Dir,"seed_rxn.json"))==True
        >>> assert os.path.exists(os.path.join(Main_Dir,"seed_compound.json"))==True
        >>> os.remove(os.path.join(Main_Dir,"seed_rxn.json"))
        >>> os.remove(os.path.join(Main_Dir,"seed_compound.json"))
    """
    r = requests.get(self.config.seed_rxn_url, allow_redirects=True,stream=True)
    if not os.path.exists(Path(self.config.reaction_db).parent):
        os.makedirs(Path(self.config.reaction_db).parent)
    with open(self.config.reaction_db, 'wb') as f:
        f.write(r.content)
    if verbose:
        rich.print(f"[green]Reaction database downloaded to {self.config.reaction_db}")
    r=requests.get(self.config.seed_compound_url,allow_redirects=True,stream=True)
    with open(self.config.compound_db, 'wb') as f:
        f.write(r.content)
    if verbose:
        rich.print(f"[green]Compound database downloaded to {self.config.compound_db}")

`download_studies_database(verbose=True)`

This function will download the required files for studies functionality.

Parameters:

Name	Type	Description	Default
`verbode`	`bool`	Whether to print the progress or not. Defaults to True.	required

Examples:

>>> import os
>>> assert os.path.exists(os.path.join(Main_Dir,"studies_test_db.tsv"))==False
>>> db=Database(config=configs.Database(studies_db=os.path.join(Main_Dir,"studies_test_db.tsv")))
>>> db.download_studies_database(verbose=False)
>>> assert os.path.exists(os.path.join(Main_Dir,"studies_test_db.tsv"))==True
>>> assert os.path.getsize(os.path.join(Main_Dir,"studies_test_db.tsv"))>0
>>> os.remove(os.path.join(Main_Dir,"studies_test_db.tsv"))

Source code in adtoolbox/core.py

def download_studies_database(self,verbose:bool=True)->None:
    """
    This function will download the required files for studies functionality.

    Args:
        verbode (bool, optional): Whether to print the progress or not. Defaults to True.

    Examples:
        >>> import os
        >>> assert os.path.exists(os.path.join(Main_Dir,"studies_test_db.tsv"))==False
        >>> db=Database(config=configs.Database(studies_db=os.path.join(Main_Dir,"studies_test_db.tsv")))
        >>> db.download_studies_database(verbose=False)
        >>> assert os.path.exists(os.path.join(Main_Dir,"studies_test_db.tsv"))==True
        >>> assert os.path.getsize(os.path.join(Main_Dir,"studies_test_db.tsv"))>0
        >>> os.remove(os.path.join(Main_Dir,"studies_test_db.tsv"))
    """
    for i in self.config.studies_remote:
        r = requests.get(self.config.studies_remote[i], allow_redirects=True)
        if not os.path.exists(Path(self.config.studies_local[i]).parent):
            os.makedirs(Path(self.config.studies_local[i]).parent)
        with open(self.config.studies_local[i], 'wb') as f:
            f.write(r.content)

        if verbose:
            rich.print(f"[bold green]Downloaded {self.config.studies_remote[i]}[/bold green]")

`filter_seed_from_ec(ec_list, save=False)`

This function takes a list of EC numbers and filters the seed database to find the seed reactions that have the EC numbers in their EC number list. This will help to trim the large seed database to a smaller one that only contains the reactions that are relevant to the AD process.

Parameters:

Name	Type	Description	Default
`ec_list`	`list[str]`	A list of EC numbers.	required
`save`	`bool`	Whether to save the filtered seed database or not. Defaults to False.	`False`

Returns:

Name	Type	Description
`tuple`	`tuple`	A tuple containing the filtered seed reaction database and the seed compound database, respectively.

Required Configs

- config.reaction_db

- config.compound_db

- config.local_reaction_db

- config.local_compound_db

Examples:

>>> db=Database()
>>> seed_rxn_db,seed_compound_db=db.filter_seed_from_ec(["1.1.1.1","1.1.1.2"])
>>> assert len(seed_rxn_db)>0 and len(seed_compound_db)>0
>>> assert pd.read_json(configs.Database().reaction_db).shape[0]>pd.DataFrame(seed_rxn_db).shape[0]

Source code in adtoolbox/core.py

def filter_seed_from_ec(self, 
                        ec_list:list[str],
                        save:bool=False) -> tuple:
    """
    This function takes a list of EC numbers and filters the seed database to find the seed reactions that have the EC numbers in their EC number list.
    This will help to trim the large seed database to a smaller one that only contains the reactions that are relevant to the AD process.

    Args:
        ec_list (list[str]): A list of EC numbers.
        save (bool, optional): Whether to save the filtered seed database or not. Defaults to False.

    Returns:
        tuple: A tuple containing the filtered seed reaction database and the seed compound database, respectively.

    Required Configs:

        - config.reaction_db
        --------
        - config.compound_db
        --------
        - config.local_reaction_db
        --------
        - config.local_compound_db
        --------


    Examples:
        >>> db=Database()
        >>> seed_rxn_db,seed_compound_db=db.filter_seed_from_ec(["1.1.1.1","1.1.1.2"])
        >>> assert len(seed_rxn_db)>0 and len(seed_compound_db)>0
        >>> assert pd.read_json(configs.Database().reaction_db).shape[0]>pd.DataFrame(seed_rxn_db).shape[0]
    """
    seed_rxn_db=pd.read_json(self.config.reaction_db)
    seed_compound_db=pd.read_json(self.config.compound_db)
    seed_rxn_db=seed_rxn_db[seed_rxn_db["ec_numbers"].apply(lambda x: any(ec in x for ec in ec_list) if x else False)]
    seed_compound_db=seed_compound_db[seed_compound_db["id"].apply(lambda x: True if x in seed_rxn_db["stoichiometry"].sum() else False)]
    if save:
        seed_rxn_db.to_json(self.config.local_reaction_db)
        seed_compound_db.to_json(self.config.local_compound_db)
    return seed_rxn_db.to_dict(orient="record"),seed_compound_db.to_dict(orient="record")

`get_experiment_from_experiments_db(field_name, query)`

This function returns an experiment from the experiments database. It takes the query string and the column name to query and returns the experiment that contains the query string in the given column.

Required Configs

config.experimental_data_db

Parameters:

Name	Type	Description	Default
`field_name`	`str`	The name of the column to query.	required
`query`	`str`	The query string.	required

Returns:

Name	Type	Description
`Experiment`	`list[Experiment]`	An instance of the Experiment class.

Examples:

>>> import os,json
>>> assert os.path.exists(os.path.join(Main_Dir,"experiments_test_db.tsv"))==False
>>> db=Database(config=configs.Database(experimental_data_db=os.path.join(Main_Dir,"experiments_test_db.json")))
>>> experiment=Experiment(name="test_study",time=[0,1,2],variables=[2,6],data= [[1,2,3],[4,5,6]],reference="test")
>>> db.add_experiment_to_experiments_db(experiment)
>>> assert os.path.exists(os.path.join(Main_Dir,"experiments_test_db.json"))==True
>>> assert os.path.getsize(os.path.join(Main_Dir,"experiments_test_db.json"))>0
>>> experiment=db.get_experiment_from_experiments_db("name","test_study")
>>> assert experiment[0].name=="test_study"
>>> os.remove(os.path.join(Main_Dir,"experiments_test_db.json"))

Source code in adtoolbox/core.py

def get_experiment_from_experiments_db(self,field_name:str,query:str)->list[Experiment]:
    r"""
    This function returns an experiment from the experiments database. It takes the query string and the column name to query and returns the experiment that contains the query string in the given column.

    Required Configs:
        - config.experimental_data_db

    Args:
        field_name (str): The name of the column to query.
        query (str): The query string.

    Returns:
        Experiment: An instance of the Experiment class.

    Examples:
        >>> import os,json
        >>> assert os.path.exists(os.path.join(Main_Dir,"experiments_test_db.tsv"))==False
        >>> db=Database(config=configs.Database(experimental_data_db=os.path.join(Main_Dir,"experiments_test_db.json")))
        >>> experiment=Experiment(name="test_study",time=[0,1,2],variables=[2,6],data= [[1,2,3],[4,5,6]],reference="test")
        >>> db.add_experiment_to_experiments_db(experiment)
        >>> assert os.path.exists(os.path.join(Main_Dir,"experiments_test_db.json"))==True
        >>> assert os.path.getsize(os.path.join(Main_Dir,"experiments_test_db.json"))>0
        >>> experiment=db.get_experiment_from_experiments_db("name","test_study")
        >>> assert experiment[0].name=="test_study"
        >>> os.remove(os.path.join(Main_Dir,"experiments_test_db.json"))
    """
    if not os.path.exists(self.config.experimental_data_db):
        raise FileNotFoundError("Experimental data database does not exist!")

    with open(self.config.experimental_data_db,"r") as f:
        experiments_db=json.load(f)
    experiments_db=[experiment for experiment in experiments_db if query in experiment[field_name]]
    return [Experiment(**experiment) for experiment in experiments_db]

`get_feed_from_feed_db(field_name, query)`

This function returns a feed from the feed database. It takes the query string and the column name to query and returns the feed that contains the query string in the given column.

Required Configs

config.feed_db

Parameters:

Name	Type	Description	Default
`field_name`	`str`	The name of the column to query.	required
`query`	`str`	The query string.	required

Returns:

Name	Type	Description
`Feed`	`list[Feed]`	An instance of the Feed class.

Examples:

>>> import os
>>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==False
>>> db=Database(config=configs.Database(feed_db=os.path.join(Main_Dir,"feed_test_db.tsv")))
>>> feed=Feed(name="test_feed",carbohydrates=10,lipids=20,proteins=30,tss=80,si=10,xi=30,reference="test")
>>> db.add_feed_to_feed_db(feed)
>>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==True
>>> assert pd.read_table(os.path.join(Main_Dir,"feed_test_db.tsv"),delimiter="\t").shape[0]>0
>>> feed=db.get_feed_from_feed_db("name","test_feed")
>>> assert feed[0].name=="test_feed"
>>> os.remove(os.path.join(Main_Dir,"feed_test_db.tsv"))

Source code in adtoolbox/core.py

def get_feed_from_feed_db(self,field_name:str,query:str)->list[Feed]:
    r"""
    This function returns a feed from the feed database. It takes the query string and the column name to query and returns the feed that contains the query string in the given column.

    Required Configs:
        - config.feed_db

    Args:
        field_name (str): The name of the column to query.
        query (str): The query string.

    Returns:
        Feed: An instance of the Feed class.

    Examples:
        >>> import os
        >>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==False
        >>> db=Database(config=configs.Database(feed_db=os.path.join(Main_Dir,"feed_test_db.tsv")))
        >>> feed=Feed(name="test_feed",carbohydrates=10,lipids=20,proteins=30,tss=80,si=10,xi=30,reference="test")
        >>> db.add_feed_to_feed_db(feed)
        >>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==True
        >>> assert pd.read_table(os.path.join(Main_Dir,"feed_test_db.tsv"),delimiter="\t").shape[0]>0
        >>> feed=db.get_feed_from_feed_db("name","test_feed")
        >>> assert feed[0].name=="test_feed"
        >>> os.remove(os.path.join(Main_Dir,"feed_test_db.tsv"))

    """
    if not os.path.exists(self.config.feed_db):
        raise FileNotFoundError("Feed database does not exist!")

    feed_db=pd.read_table(self.config.feed_db,delimiter="\t")
    feed_db=feed_db[feed_db[field_name].str.contains(query)]
    return [Feed(**feed.to_dict()) for _,feed in feed_db.iterrows()]

`get_metagenomics_study_from_metagenomics_studies_db(field_name, query)`

This function returns a metagenomics study from the metagenomics studies database. It takes the query string and the column name to query and returns the metagenomics study that contains the query string in the given column.

Required Configs

config.metagenomics_studies_db

Parameters:

Name	Type	Description	Default
`field_name`	`str`	The name of the column to query.	required
`query`	`str`	The query string.	required

Returns:

Name	Type	Description
`MetagenomicsStudy`	`list[MetagenomicsStudy]`	An instance of the MetagenomicsStudy class.

Examples:

>>> import os
>>> assert os.path.exists(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))==False
>>> db=Database(config=configs.Database(metagenomics_studies_db=os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv")))
>>> metagenomics_study=MetagenomicsStudy(name="test_study",study_type="metagenomics",microbiome="anaerobic digester",sample_accession="test",comments="test",study_accession="test")
>>> db.add_metagenomics_study_to_metagenomics_studies_db(metagenomics_study)
>>> assert os.path.exists(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))==True
>>> assert pd.read_table(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"),delimiter="\t").shape[0]>0
>>> metagenomics_study=db.get_metagenomics_study_from_metagenomics_studies_db("name","test_study")
>>> assert metagenomics_study[0].name=="test_study"
>>> os.remove(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))

Source code in adtoolbox/core.py

def get_metagenomics_study_from_metagenomics_studies_db(self,field_name:str,query:str)->list[MetagenomicsStudy]:
    r"""
    This function returns a metagenomics study from the metagenomics studies database. It takes the query string and the column name to query and returns the metagenomics study that contains the query string in the given column.

    Required Configs:
        - config.metagenomics_studies_db

    Args:
        field_name (str): The name of the column to query.
        query (str): The query string.

    Returns:
        MetagenomicsStudy: An instance of the MetagenomicsStudy class.

    Examples:
        >>> import os
        >>> assert os.path.exists(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))==False
        >>> db=Database(config=configs.Database(metagenomics_studies_db=os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv")))
        >>> metagenomics_study=MetagenomicsStudy(name="test_study",study_type="metagenomics",microbiome="anaerobic digester",sample_accession="test",comments="test",study_accession="test")
        >>> db.add_metagenomics_study_to_metagenomics_studies_db(metagenomics_study)
        >>> assert os.path.exists(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))==True
        >>> assert pd.read_table(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"),delimiter="\t").shape[0]>0
        >>> metagenomics_study=db.get_metagenomics_study_from_metagenomics_studies_db("name","test_study")
        >>> assert metagenomics_study[0].name=="test_study"
        >>> os.remove(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))
    """
    if not os.path.exists(self.config.metagenomics_studies_db):
        raise FileNotFoundError("Metagenomics studies database does not exist!")

    metagenomics_studies_db=pd.read_table(self.config.metagenomics_studies_db,delimiter="\t")
    metagenomics_studies_db=metagenomics_studies_db[metagenomics_studies_db[field_name].str.contains(query)]
    return [MetagenomicsStudy(**metagenomics_study.to_dict()) for _,metagenomics_study in metagenomics_studies_db.iterrows()]

`get_protein_seqs_from_uniprot(uniprot_id)`

This function takes a uniprot id and fetches the protein sequence from Uniprot.

Parameters:

Name	Type	Description	Default
`uniprot_id`	`str`	The uniprot id of the protein.	required

Returns:

Name	Type	Description
`str`	`str`	The protein sequence.

Examples:

>>> db=Database()
>>> seq=db.get_protein_seqs_from_uniprot("P0A9P0")
>>> assert type(seq)==str and len(seq)>0

Source code in adtoolbox/core.py

def get_protein_seqs_from_uniprot(self, uniprot_id:str) -> str:
    """
    This function takes a uniprot id and fetches the protein sequence from Uniprot.

    Args:
        uniprot_id (str): The uniprot id of the protein.


    Returns:
        str: The protein sequence.

    Examples:
        >>> db=Database()
        >>> seq=db.get_protein_seqs_from_uniprot("P0A9P0")
        >>> assert type(seq)==str and len(seq)>0
    """
    Base_URL = "https://rest.uniprot.org/uniprotkb/"
    session = requests.Session()
    retry = Retry(connect=3, backoff_factor=0.5)
    adapter = HTTPAdapter(max_retries=retry)
    session.mount('http://', adapter)
    try:
        file = session.get(
            f"https://rest.uniprot.org/uniprotkb/{uniprot_id}.fasta", timeout=10)
    except:
        print("Could not fetch the sequence! Trying again ...")
        while True:
            time.sleep(5)
            file = session.get(Base_URL+uniprot_id+".fasta", timeout=10)
            if file.ok:
                break

    return ''.join(file.text.split('\n')[1:-1])

`initialize_experimental_data_db()`

This function intializes ADToolbox's experimental data database by creating an empty json file. Be careful, this will overwrite any existing file with the same name.

Required Configs

config.experimental_data_db

Examples:

>>> import os
>>> assert os.path.exists(os.path.join(Main_Dir,"experimental_data_test_db.json"))==False
>>> db=Database(config=configs.Database(experimental_data_db=os.path.join(Main_Dir,"experimental_data_test_db.json")))
>>> db.initialize_experimental_data_db()
>>> assert pd.read_json(os.path.join(Main_Dir,"experimental_data_test_db.json")).shape[0]==0
>>> with open(os.path.join(Main_Dir,"experimental_data_test_db.json"),"r") as f:
...     assert json.load(f)==[]
>>> os.remove(os.path.join(Main_Dir,"experimental_data_test_db.json"))

Source code in adtoolbox/core.py

def initialize_experimental_data_db(self)->None:
    """This function intializes ADToolbox's experimental data database by creating an empty json file.
    Be careful, this will overwrite any existing file with the same name.

    Required Configs:
        - config.experimental_data_db

    Examples:
        >>> import os
        >>> assert os.path.exists(os.path.join(Main_Dir,"experimental_data_test_db.json"))==False
        >>> db=Database(config=configs.Database(experimental_data_db=os.path.join(Main_Dir,"experimental_data_test_db.json")))
        >>> db.initialize_experimental_data_db()
        >>> assert pd.read_json(os.path.join(Main_Dir,"experimental_data_test_db.json")).shape[0]==0
        >>> with open(os.path.join(Main_Dir,"experimental_data_test_db.json"),"r") as f:
        ...     assert json.load(f)==[]
        >>> os.remove(os.path.join(Main_Dir,"experimental_data_test_db.json"))

    """
    pd.DataFrame(columns=["name","initial_conditions","time","variables","data","reference"]).to_json(self.config.experimental_data_db,orient="records")

`initialize_feed_db()`

This function intializes ADToolbox's Feed database by creating an empty tsv file. Be careful, this will overwrite any existing file with the same name.

Required Configs

config.feed_db

Examples:

>>> import os
>>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==False
>>> db=Database(config=configs.Database(feed_db=os.path.join(Main_Dir,"feed_test_db.tsv")))
>>> db.initialize_feed_db()
>>> assert pd.read_table(os.path.join(Main_Dir,"feed_test_db.tsv"),delimiter='\t').shape[0]==0
>>> assert set(pd.read_table(os.path.join(Main_Dir,"feed_test_db.tsv"),delimiter='\t').columns)==set(["name","carbohydrates","lipids","proteins","tss","si","xi","reference"])
>>> os.remove(os.path.join(Main_Dir,"feed_test_db.tsv"))

Source code in adtoolbox/core.py

def initialize_feed_db(self)->None:
    r"""This function intializes ADToolbox's Feed database by creating an empty tsv file.
    Be careful, this will overwrite any existing file with the same name.

    Required Configs:
        - config.feed_db

    Examples:
        >>> import os
        >>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==False
        >>> db=Database(config=configs.Database(feed_db=os.path.join(Main_Dir,"feed_test_db.tsv")))
        >>> db.initialize_feed_db()
        >>> assert pd.read_table(os.path.join(Main_Dir,"feed_test_db.tsv"),delimiter='\t').shape[0]==0
        >>> assert set(pd.read_table(os.path.join(Main_Dir,"feed_test_db.tsv"),delimiter='\t').columns)==set(["name","carbohydrates","lipids","proteins","tss","si","xi","reference"])
        >>> os.remove(os.path.join(Main_Dir,"feed_test_db.tsv"))

    """
    pd.DataFrame(columns=["name","carbohydrates","lipids","proteins","tss","si","xi","reference"]).to_csv(self.config.feed_db,index=False,sep="\t")

`initialize_metagenomics_studies_db()`

This function intializes ADToolbox's Metagenomics studies database by creating an empty tsv file. Be careful, this will overwrite any existing file with the same name.

Required Configs

config.metagenomics_studies_db

Examples:

>>> import os
>>> assert os.path.exists(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))==False
>>> db=Database(config=configs.Database(metagenomics_studies_db=os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv")))
>>> db.initialize_metagenomics_studies_db()
>>> assert pd.read_table(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"),delimiter="\t").shape[0]==0
>>> assert set(pd.read_table(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"),delimiter="\t").columns)==set(["name","study_type","microbiome","sample_accession","comments","study_accession"])
>>> os.remove(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))

Source code in adtoolbox/core.py

def initialize_metagenomics_studies_db(self)->None:
    r"""This function intializes ADToolbox's Metagenomics studies database by creating an empty tsv file.
    Be careful, this will overwrite any existing file with the same name.

    Required Configs:
        - config.metagenomics_studies_db

    Examples:
        >>> import os
        >>> assert os.path.exists(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))==False
        >>> db=Database(config=configs.Database(metagenomics_studies_db=os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv")))
        >>> db.initialize_metagenomics_studies_db()
        >>> assert pd.read_table(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"),delimiter="\t").shape[0]==0
        >>> assert set(pd.read_table(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"),delimiter="\t").columns)==set(["name","study_type","microbiome","sample_accession","comments","study_accession"])
        >>> os.remove(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))

    """
    pd.DataFrame(columns=["name","study_type","microbiome","sample_accession","comments","study_accession"]).to_csv(self.config.metagenomics_studies_db,index=False,sep="\t")

`initialize_protein_db()`

This function intializes ADToolbox's protein database by creating an empty fasta file. Be careful, this will overwrite any existing file with the same name. Logically, this needs method needs config.protein_db to be defined.

Required Configs

- config.protein_db

Examples:

>>> import os
>>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==False # This is just to make sure that the following lines create the file
>>> db=Database(config=configs.Database(protein_db=os.path.join(Main_Dir,"protein_test_db.fasta"))) # point to a test non-existing file
>>> db.initialize_protein_db() # initialize the protein database
>>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==True # check if the file is created
>>> os.remove(os.path.join(Main_Dir,"protein_test_db.fasta")) # remove the file to clean up

Source code in adtoolbox/core.py

def initialize_protein_db(self)->None:
    """This function intializes ADToolbox's protein database by creating an empty fasta file.
    Be careful, this will overwrite any existing file with the same name.
    Logically, this needs method needs config.protein_db to be defined.

    Required Configs:
        - config.protein_db
        --------

    Examples:
        >>> import os
        >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==False # This is just to make sure that the following lines create the file
        >>> db=Database(config=configs.Database(protein_db=os.path.join(Main_Dir,"protein_test_db.fasta"))) # point to a test non-existing file
        >>> db.initialize_protein_db() # initialize the protein database
        >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==True # check if the file is created
        >>> os.remove(os.path.join(Main_Dir,"protein_test_db.fasta")) # remove the file to clean up
    """

    if not (pathlib.Path(self.config.protein_db).parent).exists():
        pathlib.Path(self.config.protein_db).parent.mkdir(parents=True)
    with open(self.config.protein_db, 'w') as f:
        pass

`initialize_reaction_db()`

This function intializes ADToolbox's reaction database by creating an empty tsv file. Be careful, this will overwrite any existing file with the same name.

Required Configs

config.reaction_db

Examples:

>>> import os
>>> assert os.path.exists(os.path.join(Main_Dir,"reaction_test_db.tsv"))==False
>>> db=Database(config=configs.Database(reaction_db=os.path.join(Main_Dir,"reaction_test_db.tsv")))
>>> db.initialize_reaction_db()
>>> assert pd.read_table(os.path.join(Main_Dir,"reaction_test_db.tsv"),delimiter="\t").shape[0]==0
>>> assert set(pd.read_csv(os.path.join(Main_Dir,"reaction_test_db.tsv"),delimiter="\t").columns)==set(["ec_numbers","seed_ids","reaction_names","adm1_reaction","e_adm_reactions","pathways"])
>>> os.remove(os.path.join(Main_Dir,"reaction_test_db.tsv"))

Source code in adtoolbox/core.py

def initialize_reaction_db(self)->None:
    r"""This function intializes ADToolbox's reaction database by creating an empty tsv file.
    Be careful, this will overwrite any existing file with the same name.

    Required Configs:
        - config.reaction_db

    Examples:
        >>> import os
        >>> assert os.path.exists(os.path.join(Main_Dir,"reaction_test_db.tsv"))==False
        >>> db=Database(config=configs.Database(reaction_db=os.path.join(Main_Dir,"reaction_test_db.tsv")))
        >>> db.initialize_reaction_db()
        >>> assert pd.read_table(os.path.join(Main_Dir,"reaction_test_db.tsv"),delimiter="\t").shape[0]==0
        >>> assert set(pd.read_csv(os.path.join(Main_Dir,"reaction_test_db.tsv"),delimiter="\t").columns)==set(["ec_numbers","seed_ids","reaction_names","adm1_reaction","e_adm_reactions","pathways"])
        >>> os.remove(os.path.join(Main_Dir,"reaction_test_db.tsv"))

    """
    pd.DataFrame(columns=["ec_numbers","seed_ids","reaction_names","adm1_reaction","e_adm_reactions","pathways"]).to_csv(self.config.reaction_db,index=False,sep="\t")

`proteins_from_ec(ec_number)`

This function returns a dictionary of protein sequences for a given EC number. The keys are the uniprot ids and ec number compatible with ADToolbox protein database and the values are the protein sequences. Since ADToolbox deals with microbial process, only bacterial and archaeal proteins are considered.

Parameters:

Name	Type	Description	Default
`ec_number`	`str`	The EC number.	required

Returns:

Name	Type	Description
`dict`	`dict`	A dictionary of protein sequences.
	`dict`

Examples:

>>> db=Database()
>>> protein_seqs=db.proteins_from_ec("1.1.1.1")
>>> assert len(protein_seqs)>0
>>> assert list(protein_seqs.keys())[0].split("|")[1]=="1.1.1.1"

Source code in adtoolbox/core.py

def proteins_from_ec(self,ec_number:str) -> dict:
    """
    This function returns a dictionary of protein sequences for a given EC number.
    The keys are the uniprot ids and ec number compatible with ADToolbox protein database
    and the values are the protein sequences. Since ADToolbox deals with microbial process,
    only bacterial and archaeal proteins are considered.

    Args:
        ec_number (str): The EC number.

    Returns:
        dict: A dictionary of protein sequences.

    Examples:
        >>> db=Database()
        >>> protein_seqs=db.proteins_from_ec("1.1.1.1")
        >>> assert len(protein_seqs)>0
        >>> assert list(protein_seqs.keys())[0].split("|")[1]=="1.1.1.1"
    """
    session = requests.Session()
    retry = Retry(connect=3, backoff_factor=0.5)
    adapter = HTTPAdapter(max_retries=retry)
    session.mount('http://', adapter)
    protein_seqs={}
    try:
        file = session.get(
            f"https://rest.uniprot.org/uniprotkb/stream?format=fasta&query=%28%28ec%3A{ec_number}%29%20AND%20%28reviewed%3Atrue%29%20NOT%20%28taxonomy_id%3A2759%29%29", timeout=30)
    except requests.exceptions.HTTPError or requests.exceptions.ConnectionError:
        print("Request Error! Trying again ...")
        time.sleep(30)
        file = session.get(
            f"https://rest.uniprot.org/uniprotkb/stream?format=fasta&query=%28%28ec%3A{ec_number}%29%20AND%20%28reviewed%3Atrue%29%20NOT%20%28taxonomy_id%3A2759%29%29", timeout=30)
    # This alsp does a sanity chec
    except Exception:
        print('Something went wrong!')
    text = file.text
    if text:
        text=text.split('>')
        text.remove("")
        for seq in text:
            protein_seqs.update([(seq.split("\n")[0].split("|")[1]+"|"+ec_number, "".join(seq.split("\n")[1:]))])


    return protein_seqs

`remove_experiment_from_experiments_db(field_name, query)`

This function removes experiments that contain the query in the given column, field name, from the experiments database.

Required Configs

config.experimental_data_db

Parameters:

Name	Type	Description	Default
`field_name`	`str`	The name of the column to query.	required
`query`	`str`	The query string.	required

Examples:

>>> import os,json
>>> assert os.path.exists(os.path.join(Main_Dir,"experiments_test_db.tsv"))==False
>>> db=Database(config=configs.Database(experimental_data_db=os.path.join(Main_Dir,"experiments_test_db.json")))
>>> experiment=Experiment(name="test_study",time=[0,1,2],variables=[2,6],data= [[1,2,3],[4,5,6]],reference="test")
>>> db.add_experiment_to_experiments_db(experiment)
>>> assert os.path.exists(os.path.join(Main_Dir,"experiments_test_db.json"))==True
>>> assert os.path.getsize(os.path.join(Main_Dir,"experiments_test_db.json"))>0
>>> db.remove_experiment_from_experiments_db("name","test_study")
>>> assert pd.read_json(os.path.join(Main_Dir,"experiments_test_db.json")).shape[0]==0
>>> os.remove(os.path.join(Main_Dir,"experiments_test_db.json"))

Source code in adtoolbox/core.py

def remove_experiment_from_experiments_db(self,field_name:str,query:str)->None:
    r"""
    This function removes experiments that contain the query in the given column, field name, from the experiments database.

    Required Configs:
        - config.experimental_data_db

    Args:
        field_name (str): The name of the column to query.
        query (str): The query string.

    Examples:
        >>> import os,json
        >>> assert os.path.exists(os.path.join(Main_Dir,"experiments_test_db.tsv"))==False
        >>> db=Database(config=configs.Database(experimental_data_db=os.path.join(Main_Dir,"experiments_test_db.json")))
        >>> experiment=Experiment(name="test_study",time=[0,1,2],variables=[2,6],data= [[1,2,3],[4,5,6]],reference="test")
        >>> db.add_experiment_to_experiments_db(experiment)
        >>> assert os.path.exists(os.path.join(Main_Dir,"experiments_test_db.json"))==True
        >>> assert os.path.getsize(os.path.join(Main_Dir,"experiments_test_db.json"))>0
        >>> db.remove_experiment_from_experiments_db("name","test_study")
        >>> assert pd.read_json(os.path.join(Main_Dir,"experiments_test_db.json")).shape[0]==0
        >>> os.remove(os.path.join(Main_Dir,"experiments_test_db.json"))
    """
    if not os.path.exists(self.config.experimental_data_db):
        raise FileNotFoundError("Experimental data database does not exist!")

    with open(self.config.experimental_data_db,"r") as f:
        experiments_db=json.load(f)
    experiments_db=[experiment for experiment in experiments_db if query not in experiment[field_name]]
    with open(self.config.experimental_data_db,"w") as f:
        json.dump(experiments_db,f)

`remove_feed_from_feed_db(field_name, query)`

This function removes studyes that contain the query in the given column, field name, from the feed database.

Required Configs

config.feed_db

Parameters:

Name	Type	Description	Default
`field_name`	`str`	The name of the column to query.	required
`query`	`str`	The query string.	required

Examples:

>>> import os
>>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==False
>>> db=Database(config=configs.Database(feed_db=os.path.join(Main_Dir,"feed_test_db.tsv")))
>>> feed=Feed(name="test_feed",carbohydrates=10,lipids=20,proteins=30,tss=80,si=10,xi=30,reference="test")
>>> db.add_feed_to_feed_db(feed)
>>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==True
>>> assert pd.read_table(os.path.join(Main_Dir,"feed_test_db.tsv"),delimiter="\t").shape[0]>0
>>> db.remove_feed_from_feed_db("name","test_feed")
>>> assert pd.read_table(os.path.join(Main_Dir,"feed_test_db.tsv"),delimiter="\t").shape[0]==0
>>> os.remove(os.path.join(Main_Dir,"feed_test_db.tsv"))

Source code in adtoolbox/core.py

def remove_feed_from_feed_db(self,field_name:str,query:str)->None:
    r"""
    This function removes studyes that contain the query in the given column, field name, from the feed database.

    Required Configs:
        - config.feed_db

    Args:
        field_name (str): The name of the column to query.
        query (str): The query string.

    Examples:
        >>> import os
        >>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==False
        >>> db=Database(config=configs.Database(feed_db=os.path.join(Main_Dir,"feed_test_db.tsv")))
        >>> feed=Feed(name="test_feed",carbohydrates=10,lipids=20,proteins=30,tss=80,si=10,xi=30,reference="test")
        >>> db.add_feed_to_feed_db(feed)
        >>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==True
        >>> assert pd.read_table(os.path.join(Main_Dir,"feed_test_db.tsv"),delimiter="\t").shape[0]>0
        >>> db.remove_feed_from_feed_db("name","test_feed")
        >>> assert pd.read_table(os.path.join(Main_Dir,"feed_test_db.tsv"),delimiter="\t").shape[0]==0
        >>> os.remove(os.path.join(Main_Dir,"feed_test_db.tsv"))

    """
    if not os.path.exists(self.config.feed_db):
        raise FileNotFoundError("Feed database does not exist!")


    feed_db=pd.read_table(self.config.feed_db,delimiter="\t")
    feed_db=feed_db[feed_db[field_name].str.contains(query)==False]
    feed_db.to_csv(self.config.feed_db,index=False,sep="\t")

`remove_metagenomics_study_from_metagenomics_studies_db(field_name, query)`

This function removes studies that contain the query in the given column, field name, from the metagenomics studies database.

Required Configs

config.metagenomics_studies_db

Parameters:

Name	Type	Description	Default
`field_name`	`str`	The name of the column to query.	required
`query`	`str`	The query string.	required

Examples:

>>> import os
>>> assert os.path.exists(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))==False
>>> db=Database(config=configs.Database(metagenomics_studies_db=os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv")))
>>> metagenomics_study=MetagenomicsStudy(name="test_study",study_type="metagenomics",microbiome="anaerobic digester",sample_accession="test",comments="test",study_accession="test")
>>> db.add_metagenomics_study_to_metagenomics_studies_db(metagenomics_study)
>>> assert os.path.exists(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))==True
>>> assert pd.read_table(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"),delimiter="\t").shape[0]>0
>>> db.remove_metagenomics_study_from_metagenomics_studies_db("name","test_study")
>>> assert pd.read_table(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"),delimiter="\t").shape[0]==0
>>> os.remove(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))

Source code in adtoolbox/core.py

def remove_metagenomics_study_from_metagenomics_studies_db(self,field_name:str,query:str)->None:
    r"""
    This function removes studies that contain the query in the given column, field name, from the metagenomics studies database.

    Required Configs:
        - config.metagenomics_studies_db

    Args:
        field_name (str): The name of the column to query.
        query (str): The query string.

    Examples:
        >>> import os
        >>> assert os.path.exists(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))==False
        >>> db=Database(config=configs.Database(metagenomics_studies_db=os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv")))
        >>> metagenomics_study=MetagenomicsStudy(name="test_study",study_type="metagenomics",microbiome="anaerobic digester",sample_accession="test",comments="test",study_accession="test")
        >>> db.add_metagenomics_study_to_metagenomics_studies_db(metagenomics_study)
        >>> assert os.path.exists(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))==True
        >>> assert pd.read_table(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"),delimiter="\t").shape[0]>0
        >>> db.remove_metagenomics_study_from_metagenomics_studies_db("name","test_study")
        >>> assert pd.read_table(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"),delimiter="\t").shape[0]==0
        >>> os.remove(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))
    """
    if not os.path.exists(self.config.metagenomics_studies_db):
        raise FileNotFoundError("Metagenomics studies database does not exist!")

    metagenomics_studies_db=pd.read_table(self.config.metagenomics_studies_db,delimiter="\t")
    metagenomics_studies_db=metagenomics_studies_db[metagenomics_studies_db[field_name].str.contains(query)==False]
    metagenomics_studies_db.to_csv(self.config.metagenomics_studies_db,index=False,sep="\t")

8. Metagenomics

Here is a schematic view of core.Metagenomics API:

schema

This is the main class for Metagenomics functionality of ADToolbox. This class contains all the methods required for metagenomics analysis that ADToolbox offers.

Source code in adtoolbox/core.py

class Metagenomics:

    """
    This is the main class for Metagenomics functionality of ADToolbox. This class contains all the methods required for metagenomics analysis 
    that ADToolbox offers.
    """
    def __init__(self,config:configs.Metagenomics)->None:
        """In order to instntiate an object from this class, you need to provide a metagenomics configs object from the configs module : configs.Metagenomics.
        Information for inputs and of each method is then obtained from the corresponding configs object. The following example shows how to instantiate an object from this class
        using the default configs object:

        Examples:
            >>> from adtoolbox import core, configs
            >>> config=configs.Metagenomics() ### This uses default arguments. Refer to configs module for more information.
            >>> metagenomics=core.Metagenomics(config)
            >>> assert type(metagenomics)==core.Metagenomics

        Args:
            config (configs.Metagenomics): A metagenomics configs object from configs module.

        Returns:
            None
        """
        self.config=config

    def find_top_taxa(
        self,
        sample_name:str,
        treshold:Union[int,float],
        mode:str='top_k',
        )->dict:
        """
        This function needs three inputs from qiime:
        1. feature table: This is the abundance of each feature in each sample (TSV).
        2. taxonomy table: This is the taxonomy of each feature (TSV). 
        3. rep seqs: This is the representative sequence of each feature (fasta).
        It then finds the top k features or features that form specific percentile of the community of the sample.

        Required Configs:

            config.feature_table_dir: The path to the feature table tsv file.
            ---------
            config.taxonomy_table_dir: The path to the taxonomy table tsv file.
            ---------
            config.rep_seq_fasta: The path to the representative sequence fasta file.
            ---------

        Args:
            sample_name (str): The name of the sample.
            threshold (int, float): The threshold for the top k or the percentile.
            mode (str, optional): Whether to find the top k features or features that form specific percentile of the community of the sample. Defaults to 'top_k'. Options: 'top_k', 'percentile'.

        Returns:
            dict: A dictionary of the top k features and their taxonomy.
        """
        ### Load all the required files
        feature_table = pd.read_table(self.config.feature_table_dir, sep='\t',skiprows=1)
        taxonomy_table = pd.read_table(self.config.taxonomy_table_dir, delimiter='\t')
        repseqs=fasta_to_dict(self.config.rep_seq_fasta)
        ### End Loading
        if mode == 'top_k':
            sorted_df=feature_table.sort_values(sample_name, ascending=False)
            top_featureids=list(sorted_df['#OTU ID'].head(treshold))
            top_taxa=[taxonomy_table[taxonomy_table['Feature ID']==featureid]['Taxon'].values[0] for featureid in top_featureids]
            top_repseqs=[repseqs[featureid] for featureid in top_featureids]
            top_abundances=list(sorted_df[sample_name].head(treshold)/sorted_df[sample_name].sum())

        elif mode == 'percentile':
            feature_table[sample_name]=feature_table[sample_name]/feature_table[sample_name].sum()
            sorted_df=feature_table.sort_values(sample_name, ascending=False)
            sorted_df['cumsum']=sorted_df[sample_name].cumsum()*100
            sorted_df_filtered=sorted_df[sorted_df['cumsum']<=treshold]
            top_featureids=list(sorted_df_filtered['#OTU ID'])
            top_taxa=[taxonomy_table[taxonomy_table['Feature ID']==featureid]['Taxon'].values[0] for featureid in top_featureids]
            top_repseqs=[repseqs[featureid] for featureid in top_featureids]
            top_abundances=sorted_df.loc[sorted_df_filtered.index][sample_name].values.tolist()
        else:
            raise ValueError("mode must be either 'top_k' or 'percentile'")

        return {'top_featureids':top_featureids,'top_taxa':top_taxa,'top_repseqs':top_repseqs,'top_abundances':top_abundances}    


    def align_to_gtdb(self,
                      query_dir:str,
                      output_dir:str,
                      container:str="None")->str:
        """This function takes the representative sequences of the top k features and generates the script to
        align these feature sequences to gtdb using VSEARCH. If you intend to run this you either
        need to have VSEARCH installed or run it with a container option. You can use either the docker or singularity
        as container options. Otherwise you can use None and run it with the assumption that VSEARCH is installed.
        If you only want the script and not to run it, set run to False.

        Required Configs:

            ---------
            config.gtdb_dir_fasta: The path to the gtdb fasta database.
            ---------
            config.vsearch_similarity: The similarity threshold for the alignment to be used by VSEARCH.
            ---------
            config.vsearch_threads: The number of threads to be used by VSEARCH.
            ---------
            config.adtoolbox_docker: The name of the docker image to be used by ADToolbox (Only if using Docker as container).
            ---------
            config.adtoolbox_singularity: The name of the singularity image to be used by ADToolbox (Only if using Singularity as container).
            ---------

        Args:
            container (str, optional): The container to use. Defaults to "None".

        Returns:
            str: The script that is supposed to be running later.
        """
        ### Load all the required files
        alignment_dir = os.path.join(output_dir,'Alignments')
        match_table=os.path.join(output_dir,'matches.blast')
        gtdb_dir_fasta=self.config.gtdb_dir_fasta
        ### End Loading
        query=query_dir
        dirs=[output_dir,
            gtdb_dir_fasta,
            query
            ]
        for dir in dirs:
            if not pathlib.Path(dir).exists():
                os.mkdir(dir)
        if container=="None":
            bash_script=('vsearch --top_hits_only --blast6out '+
                        match_table+
                        ' --usearch_global '+ query +
                        ' --db '+ gtdb_dir_fasta +
                        ' --id ' +str(self.config.vsearch_similarity) +
                        ' --threads '+str(self.config.vsearch_threads)+
                        ' --alnout '+ alignment_dir +
                        ' --top_hits_only'+'\n')

        if container=="docker":
            bash_script='docker run'
            for dir in dirs:
                bash_script+=('-v '+dir+':'+dir+' ')

            bash_script += (self.config.adtoolbox_docker+' vsearch --top_hits_only --blast6out '+
                        match_table+
                        ' --usearch_global '+ query +
                        ' --db '+ gtdb_dir_fasta +
                        ' --id ' +str(self.config.vsearch_similarity) +
                        ' --threads '+str(self.config.vsearch_threads)+
                        ' --alnout '+ alignment_dir +
                        ' --top_hits_only'+'\n')

        if container=="singularity":
            bash_script='singularity exec '
            for dir in dirs:
                bash_script+=('-B '+str(dir)+':'+str(dir)+' ')

            bash_script += (self.config.adtoolbox_singularity+' vsearch --top_hits_only --blast6out '+
                        match_table+
                        ' --usearch_global '+ str(query) +
                        ' --db '+ gtdb_dir_fasta +
                        ' --id ' +str(self.config.vsearch_similarity) +
                        ' --threads '+str(self.config.vsearch_threads)+
                        ' --alnout '+ alignment_dir +
                        ' --top_hits_only'+'\n')
        return bash_script,



    def get_genomes_from_gtdb_alignment(self,alignment_dir:str)->dict:
        """This function takes the alignment file generated from the align_to_gtdb function and generates the the genome information
        using the GTDB-Tk. In the outputted dictionary, the keys are feature ids and the values are the representative genomes.

        Required Configs:
            config.align_to_gtdb_outputs_dir: The path to the directory where the outputs of the align_to_gtdb function are saved.
            ---------
            config.feature_to_taxa: The path to the json file where the json file including feature ids and the representative genomes will be saved.

        Args:
            save (bool, optional): Whether to save the json file or not. Defaults to True.
        """
        matches = os.path.join(alignment_dir,'matches.blast')
        aligned=pd.read_table(matches,header=None,delimiter='\t')
        aligned.drop_duplicates(0,inplace=True)
        aligned[1]=aligned[1].apply(lambda x: ("".join(x.split('_')[1:])).split("~")[0])
        alignment_dict=dict(zip(aligned[0],aligned[1]))


        return alignment_dict


    def download_genome(self,identifier:str,output_dir:str,container:str="None")-> str:
        """This function downloads the genomes from NCBI using the refseq/genbank identifiers.
        Note that this function uses rsync to download the genomes. 

        Required Configs:
            config.genomes_base_dir: The path to the base directory where the genomes will be saved.
            ---------
            config.adtoolbox_docker: The name of the docker image to be used by ADToolbox (Only if using Docker as container).
            ---------
            config.adtoolbox_singularity: The name of the singularity image to be used by ADToolbox (Only if using Singularity as container).
            ---------
        Args:
            identifier list[str]: The list of identifiers for the genomes. It can be either refseq or genbank.
            container (str, optional): The container to use. Defaults to "None". You may select from "None", "docker", "singularity".

        Returns:
            str: The bash script that is used to download the genomes or to be used to download the genomes.

        """
        base_ncbi_dir = 'rsync://ftp.ncbi.nlm.nih.gov/genomes/all/'
        bash_script=""

        specific_ncbi_dir = identifier[0:3]+'/'+\
                            identifier[3:6]+'/'+\
                            identifier[6:9]+'/'+\
                            identifier[9:].split('.')[0]

        genome_dir=pathlib.Path(output_dir)

        if container=="None":
            bash_script+=('rsync -avz --progress '+base_ncbi_dir+specific_ncbi_dir+' '+str(genome_dir))

        if container=="docker":
            bash_script+=('docker run -it -v '+str(genome_dir.parent)+':'+str(genome_dir.parent)+ f' {self.config.adtoolbox_docker} rsync -avz --progress '+' '+base_ncbi_dir+specific_ncbi_dir+' '+str(genome_dir))

        if container=="singularity":
            bash_script+=('singularity exec -B '+str(genome_dir.parent)+':'+str(genome_dir.parent)+ f' {self.config.adtoolbox_singularity} rsync -avz --progress '+' '+base_ncbi_dir+specific_ncbi_dir+' '+str(genome_dir))

        return bash_script,

    def async_genome_downloader(self,identifiers:Iterable[str],batch_size:float=10,container:str="None"):
        sem=asyncio.Semaphore(batch_size)
        asyncio.run(self._collect_coros(identifiers=identifiers,semaphore=sem,container=container))

    async def _collect_coros(self,identifiers:Iterable[str],semaphore:asyncio.Semaphore,container:str="None"):
        await asyncio.gather(*[self._genome_dl_coro(identifier=i,semaphore=semaphore,container=container) for i in identifiers])

    async def _genome_dl_coro(self,identifier:str,semaphore:asyncio.Semaphore,container:str="None")->None:
        async with semaphore:
            await asyncio.create_subprocess_exec(*self.download_genome(identifier=identifier,container=container).split(" "))

    def extract_genome_info(self,
                            endpattern:str="genomic.fna.gz",
                            filters:dict={
                                          "INCLUDE":[],
                                          "EXCLUDE":["cds","rna"],
                                            })->dict[str,str]:
        """This function extracts the genome information from the genomes base directory. The output
        is a dictionary where the keys are the genome IDs and the values are the paths to the genome files.

        Required Configs:
            config.genomes_base_dir: The path to the base directory where the genomes are saved.
            ---------
        Args:
            genome_info (dict[str,str]): A dictionary containing the genome information.
            endpattern (str, optional): The end pattern of the genome files. Defaults to "genomic.fna.gz".
            filters (dict, optional): The filters to be applied to the genome files. This filter must be a 
            dictionary with two keys: INCLUDE and EXCLUDE. The values of these keys must be lists of strings.
            Defaults to {"INCLUDE":[],"EXCLUDE":["cds","rna"]}. This defult is compatible with the genomes downloaded
            from NCBI i.e. only change this if you are providing your own genomes with different file name conventions.
        Returns:
            dict[str,str]: A dictionary containing the address of the genomes that are downloaded or to be downloaded.
        """
        base_dir = pathlib.Path(self.config.genomes_base_dir)
        genome_info = {}
        for genome_dir in base_dir.iterdir():
            if genome_dir.is_dir():
                candids=list(genome_dir.rglob(f'*{endpattern}'))
                for candid in candids:
                    if all([i in candid.name for i in filters["INCLUDE"]]) and all([i not in candid.name for i in filters["EXCLUDE"]]):
                        genome_info[genome_dir.name]=str(candid.absolute())           
        return genome_info

    def align_genome_to_protein_db(
            self,
            address:str,
            outdir:str,
            name:str,
            container:str="None",
            )->tuple[str,str]:
        """
        This is a function that will align a genome to the Protein Database of the ADToolbox using mmseqs2.
        If you want to save the scripts, set save to True. Note that the alignment tables will be saved in any case.
        Note that this function uses mmseqs2 to align the genomes to the protein database. So, to run this function without
        any container you need to have mmseqs2 installed on your system. However, if you want to run this function with a container,
        you need to have the container installed on your system. You may select from "None", "docker", "singularity".

        Requires:
            config.genome_alignment_output: The path to the directory where the alignment results will be saved.
            ---------
            config.protein_db: The path to the ADToolbox protein database in fasta.
            ---------
            config.adtoolbox_docker: The name of the docker image to be used by ADToolbox (Only if using Docker as container).
            ---------
            config.adtoolbox_singularity: The name of the singularity image to be used by ADToolbox (Only if using Singularity as container).
            ---------
        Args:
            address (str): The address of the genome fasta file. The file must be in fasta format.
            run (bool, optional): Whether to run the alignment. Defaults to True.
            save (bool, optional): Whether to save the alignment scripts. Defaults to True.
            container (str, optional): The container to use. Defaults to "None". You may select from "None", "docker", "singularity".

        Returns:
            str: A dictionary containing the alignment files.
            str: The bash script that is used to align the genomes or to be used to align the genomes.
        """

        if container=="None":
            bash_script = ""
            alignment_file=os.path.join(outdir,"Alignment_Results_mmseq_"+name+".tsv")
            bash_script += "mmseqs easy-search " + \
                address + " " + \
                self.config.protein_db + " " + \
                alignment_file+ ' tmp --format-mode 4 '+"\n\n"

        if container=="docker":
            bash_script = ""
            alignment_file=os.path.join(outdir,"Alignment_Results_mmseq_"+name+".tsv")
            bash_script +="docker run -it "+ \
            " -v "+address+":"+address+ \
            " -v "+self.config.protein_db+":"+self.config.protein_db+ \
            " -v "+outdir+":"+outdir+ \
            f" {self.config.adtoolbox_docker}  mmseqs easy-search " + \
                address + " " + \
                self.config.protein_db + " " + \
                alignment_file+' tmpfiles --format-mode 4 '+"\n\n"

        if container=="singularity":
            bash_script = ""
            alignment_file=os.path.join(outdir,"Alignment_Results_mmseq_"+name+".tsv")
            bash_script +="singularity exec "+ \
            " -B "+address+":"+address+ \
            " -B "+self.config.protein_db+":"+self.config.protein_db+ \
            " -B "+outdir+":"+outdir+ \
            f" {self.config.adtoolbox_singularity}  mmseqs easy-search " + \
                address + " " + \
                self.config.protein_db + " " + \
                alignment_file+' tmpfiles --format-mode 4 '+"\n\n"

        return  bash_script,alignment_file

    def align_short_reads_to_protein_db(self,query_seq:str,
                                        alignment_file_name:str,
                                        container:str="None",
                                        )->tuple[str,str]:
        """This function aligns shotgun short reads to the protein database of the ADToolbox using mmseqs2.
        mmseqs wrappers in utils are used to perform this task. The result of this task is an alignment table.

        Required Configs:

            protein_db_mmseqs (str): The address of the existing/to be created protein database of the ADToolbox for mmseqs.
            --------
        Args:
            query_seq (str): The address of the query sequence.
            alignment_file_name (str): The name of the alignment file.
            container (str, optional): The container to use. Defaults to "None". You may select from "None", "docker", "singularity".


        Returns:
            str: The bash script that is used to align the genomes or to be used to align the genomes.
            str: The address of the alignment file.
        """
        if not pathlib.Path(self.config.protein_db_mmseqs).exists():
            raise FileNotFoundError("""The protein database of the ADToolbox for mmseqs is not found. Please build it first
                                    using Database.build_mmseqs_database method.""")
        path_query=pathlib.Path(query_seq)
        script = ""
        script += create_mmseqs_database(query_seq,str(path_query.parent/path_query.name.split(".")[0]),container=container,save=None,run=False)+"\n"
        script += mmseqs_search(
            query_db=str(path_query.parent/path_query.name.split(".")[0]),
            target_db=self.config.protein_db_mmseqs,
            results_db=path_query.parent/alignment_file_name,
            run=False,
            save=None,
            container=container,
        )+"\n"
        script += mmseqs_result_db_to_tsv(
            query_db=str(path_query.parent/path_query.name.split(".")[0]),
            target_db=self.config.protein_db_mmseqs,
            results_db=path_query.parent/alignment_file_name,
            tsv_file=path_query.parent/(alignment_file_name+".tsv"),
            container=container,
            save=None,
            run=False,)+"\n"
        return script,path_query.parent/(alignment_file_name+".tsv")

    def extract_ec_from_alignment(self,alignment_file:str)->dict[str,int]:
        """
        This function extracts the number of times an EC number is found in the alignment file when aligned to ADToolbox protein database.

        Required Configs:
            config.e_value: The e-value threshold for the filtering the alignment table.
            ---------
            config.bit_score: The bit score threshold for the filtering the alignment table.
            ---------
            config.ec_counts_from_alignment: The address of the json file that the results will be saved in.
            ---------
        Args:
            alignment_file (str): The address of the alignment file.

        Returns:
            dict: A dictionary of EC numbers and their counts.

        """
        alignment_table = pd.read_table(alignment_file,sep='\t')
        alignment_table = alignment_table[(alignment_table['evalue']<self.config.e_value)&(alignment_table['bits']>self.config.bit_score)]
        alignment_table["target"]=alignment_table["target"].apply(lambda x:x.split("|")[1])
        ec_counts=alignment_table["target"].value_counts().to_dict()
        return ec_counts

    def get_cod_from_ec_counts(self,ec_counts:dict)->dict:
        """This function takes a json file that comtains ec counts and converts it to ADM microbial agents counts.
        Required Configs:
            config.adm_mapping : A dictionary that maps ADM reactions to ADM microbial agents.
            ---------
            config.csv_reaction_db : The address of the reaction database of ADToolbox.
            ---------
            config.adm_cod_from_ec  : The address of the json file that the results will be saved in.
            ---------
        Args:
            ec_counts (dict): A dictionary containing the counts for each ec number.  
        Returns:
            dict: A dictionary containing the ADM microbial agents counts.
        """
        reaction_db = pd.read_table(self.config.csv_reaction_db, sep=',').drop_duplicates("EC_Numbers")
        reaction_db.set_index("EC_Numbers",inplace=True)
        adm_reactions_agents = {k:0 for k in self.config.adm_mapping.keys()}
        for ec in ec_counts.keys():
            l=reaction_db.loc[ec,"e_adm_Reactions"].split("|")
            for adm_rxn in l: 
                adm_reactions_agents[adm_rxn]+=ec_counts[ec]
        adm_microbial_agents={}
        for k,v in self.config.adm_mapping.items():
            adm_microbial_agents[v]=adm_reactions_agents[k]
        return adm_microbial_agents

    def calculate_group_abundances(self,elements_feature_abundances:dict[str,dict],rel_abund:dict[str,dict])->dict[str,dict[str,float]]:
        """
        This method is defined to calculate the features for each sample given:
        1) The relative abundances of the genomes in each sample:
            - In this dictionary the keys are the sample names and the values are dictionaries where the keys are the genome names and the values are the relative abundances of the genomes in the sample.
        2) The relative abundances of the elements in each genome.
            - In this dictionary the keys are the genome names and the values are dictionaries where the keys are the element names and the values are the relative abundances of the elements in the genome.

        Required Configs:
            None

        Args:
            elements_feature_abundances (dict[str,dict]): A dictionary containing the relative abundances of the elements in each genome.
            rel_abund (dict[str,dict]): A dictionary containing the relative abundances of the genomes in each sample.

        Returns:
            dict[str,dict[str,float]]: A dictionary containing the relative abundances of the elements in each sample.
        """
        out={}
        df=pd.DataFrame(elements_feature_abundances).T.fillna(0)
        for sample,abunds in rel_abund.items():
            out[sample]=scaler(pd.DataFrame(df.loc[abunds.keys(),:].multiply(list(abunds.values()),axis=0).sum(axis=0)).T).to_dict(orient="records")[0]
        return out

    def extract_relative_abundances(self,feature_table_dir:str,sample_names:Union[list[str],None]=None,top_k:int=-1)->dict:

        """
        This method extracts the relative abundances of the features in each sample from the feature table. The feature table must follow the qiime2 feature-table format.
        NOTE: The final feature abundances sum to 1 for each sample.
        Required Configs:
            None
        Args:
            feature_table_dir (str): The path to the feature table.
            sample_names (Union[list[str],None], optional): The list of sample names. to be considered. If None, all the samples will be considered. Defaults to None.
            top_k (int, optional): The number of top features to be used. If -1, all the features will be used. Defaults to -1.

        Returns:
            dict: A dictionary containing the relative abundances of the features in each sample.
        """
        feature_table = pd.read_table(feature_table_dir,sep='\t',skiprows=1)
        if sample_names is None:
            sample_names = feature_table.columns[1:]
        relative_abundances={sample:[] for sample in sample_names}
        if top_k == -1:
            top_k = feature_table.shape[0]
        for sample in sample_names:
            relative_abundances[sample]=(feature_table.sort_values(sample,ascending=False).head(top_k)[sample]/(feature_table.sort_values(sample,ascending=False).head(top_k)[sample].sum())).to_dict()
        return relative_abundances

    def assign_ec_to_genome(self,alignment_file:str)->dict:
        """
        This function takes an alignment file and assigns the EC numbers to the genomes based on the alignment file,
        and the e-adm groupings of the EC numbers. The output is a dictionary where the keys e-adm reactions and the values are the EC numbers,
        that are found in the genome and are grouped under the e-adm reaction.

        Args:
            alignment_file (str): The address of the alignment file.

        Returns:
            dict: A dictionary containing the e-adm reactions and the EC numbers that are found in the genome and are grouped under the e-adm reaction.
        """

        aligntable = pd.read_table(alignment_file,delimiter="\t")
        aligntable = aligntable[(aligntable["bits"]>self.config.bit_score) & (aligntable["evalue"]<self.config.e_value)]

        ec_align_list = aligntable["target"].str.split("|",expand=True)
        ec_align_list = list(ec_align_list[1].unique()) 

        metadatatable = pd.read_table(self.config.csv_reaction_db, sep=',').drop_duplicates("EC_Numbers")[(['EC_Numbers','Modified_ADM_Reactions'])].dropna(axis=0)
        metadatatable=metadatatable[metadatatable["EC_Numbers"].isin(ec_align_list)]
        adm_reactions=list(set(metadatatable["Modified_ADM_Reactions"].str.split("|").sum()))
        adm_to_ecs={}
        for reaction in adm_reactions:
            adm_to_ecs[reaction]=list(metadatatable[metadatatable["Modified_ADM_Reactions"].str.contains(reaction)]["EC_Numbers"])

        return adm_to_ecs




    def seqs_from_sra(self,accession:str,target_dir:str,container:str="None")-> tuple[str,dict]:
        """ 
        This method downloads the fastq files from the SRA database using the accession number (ONLY SAMPLE ACCESSION AND NOT PROJECT ACCESSION) of the project or run.
        The method uses the fasterq-dump tool to download the fastq files. This method also extracts the sample metadata from the SRA database for future use.
        #NOTE In order for this method to work without any container, you need to have the SRA toolkit installed on your system or
        at least have prefetch and fasterq-dump installed on your system. For more information on how to install the SRA toolkit, please refer to the following link:
        https://github.com/ncbi/sra-tools

        Required Configs:
            None


        Args:
            accession (str): The accession number of the SRA project or run
            target_dir (str): The directory where the fastq files will be downloaded
            container (str, optional): The containerization tool that will be used to run the bash scripts. Defaults to "None". Options are "None","docker","singularity"

        Returns:
            prefetch_script (str): The bash script that will be used to download the SRA files in python string format
            sample_metadata (dict): A dictionary that contains the sample metadata

        """   
        if container=="None":
            prefetch_script=f"""#!/bin/bash\nprefetch {accession} -O {target_dir}"""
            acc_folder=pathlib.Path(target_dir)/accession
            fasterq_dump_script=""
            sra_file=acc_folder/(accession+".sra")
            fasterq_dump_script+=f"\nfasterq-dump {sra_file} -O {acc_folder} --split-files"
            fasterq_dump_script+=f"\nrm {sra_file}"

            prefetch_script+=fasterq_dump_script


        elif container=="docker":
            warn("Docker is not supported yet")

        sample_metadata=utils.get_sample_metadata_from_accession(accession)      


        return prefetch_script,sample_metadata     




    def run_qiime2_from_sra(self,
                            read_1:str,
                            read_2:str|None,
                            sample_name:str|None=None,
                            manifest_dir:str|None=None,
                            workings_dir:str|None=None,
                            save_manifest:bool=True,
                            container:str='None') -> tuple[str,str]:
        """
        This method uses the input fastq files to run qiime2. The method uses the qiime2 template scripts that are provided in pkg_data module.
        The method also creates a manifest file for qiime2. The manifest file is created based on the input fastq files.
        Required Configs:
            config.qiime2_single_end_bash_str: The path to the qiime2 bash script for single end reads.
            ---------
            config.qiime2_paired_end_bash_str: The path to the qiime2 bash script for paired end reads.
            ---------
            config.qiime_classifier_db: The path to the qiime2 classifier database.
            ---------
            config.qiime2_docker_image: The name of the docker image to be used by ADToolbox (Only if using Docker as container).
            ---------
            config.qiime2_singularity_image: The name of the singularity image to be used by ADToolbox (Only if using Singularity as container).
            ---------
        Args:
            read_1 (str): directory of the forward reads file
            read_2 (str): directory of the reverse reads file. This is provided only if the reads are paired end. If this is not the case,
            sample_name (str, optional): The name of the sample. If None, the name of the sample will be the name of the directory where the fastq files are located. Defaults to None.
            manifest_dir (str, optional): The directory where the manifest file will be saved. If None, the manifest file will be saved in the same directory as the fastq files. Defaults to None.
            workings_dir (str, optional): The directory where the qiime2 outputs will be saved. If None, the outputs will be saved in the same directory as the fastq files. Defaults to None.
            container (str, optional): If you want to run the qiime2 commands in a container, specify the container name here. Defaults to 'None'.
        Returns:
            qiime2_bash_str (str): The bash script that will be used to run qiime2 in python string format
            manifest (dict): The manifest file that will be used to run qiime2 in python dictionary format


        """

        if sample_name is None:
            sample_name=str(pathlib.Path(read_1).parent.name)
        if manifest_dir is None:
            manifest_dir=pathlib.Path(read_1).parent
        else:
            manifest_dir=pathlib.Path(manifest_dir)

        if workings_dir is None:
            workings_dir=pathlib.Path(read_1).parent
        else:
            workings_dir=pathlib.Path(workings_dir)


        manifest_single={'sample-id':[],'absolute-filepath':[]}
        manifest_paired={'sample-id':[],'forward-absolute-filepath':[],'reverse-absolute-filepath':[]}  
        if read_2 is not None:
            manifest_paired['sample-id'].append(sample_name)
            manifest_paired['forward-absolute-filepath'].append(read_1)
            manifest_paired['reverse-absolute-filepath'].append(read_2)
            paired_end=True
        else:
            manifest_single['sample-id'].append(sample_name)
            manifest_single['absolute-filepath'].append(read_1)
            paired_end=False

        manifest=pd.DataFrame(manifest_single) if not paired_end else pd.DataFrame(manifest_paired)

        if paired_end:
            with open(self.config.qiime2_paired_end_bash_str,"r") as f:
                qiime2_bash_str=f.read()
        else:
            with open(self.config.qiime2_single_end_bash_str,"r") as f:
                qiime2_bash_str=f.read()

        if container=="None":
            qiime2_bash_str=qiime2_bash_str.replace("<manifest>",str(manifest_dir))
            qiime2_bash_str=qiime2_bash_str.replace("<qiime2_work_dir>",str(workings_dir))
            qiime2_bash_str=qiime2_bash_str.replace("<classifier>",str(self.config.qiime_classifier_db))

        elif container=="docker":
            qiime2_bash_str=qiime2_bash_str.splitlines()
            for idx,line in enumerate(qiime2_bash_str):
                line=line.lstrip()
                if line.startswith("qiime") or line.startswith("biom"):
                    if not paired_end:
                        pec=""
                    else:
                        pec="-v "+read_2+":"+read_2+" "
                    qiime2_bash_str[idx]=f"docker run --env TMPDIR=/data/tmp -v {str(manifest_dir)}:{str(manifest_dir)} -v {read_1}:{read_1} -v {read_2}:{read_2} {pec} -v {self.config.qiime_classifier_db}:{self.config.qiime_classifier_db} -w /data  {self.config.qiime2_docker_image}"+" "+line
            qiime2_bash_str="\n".join(qiime2_bash_str)
            qiime2_bash_str=qiime2_bash_str.replace("<manifest>",os.path.join(str(manifest_dir),"manifest.tsv"))
            qiime2_bash_str=qiime2_bash_str.replace("<qiime2_work_dir>",str(workings_dir))
            qiime2_bash_str=qiime2_bash_str.replace("<classifier>",self.config.qiime_classifier_db)
            if not paired_end:
                manifest['absolute-filepath']=[x for x in manifest['absolute-filepath']]

            else:
                manifest['forward-absolute-filepath']=[x for x in manifest['forward-absolute-filepath']]
                manifest['reverse-absolute-filepath']=[x for x in manifest['reverse-absolute-filepath']]

        elif container=="singularity":
            qiime2_bash_str=qiime2_bash_str.splitlines()
            for idx,line in enumerate(qiime2_bash_str):
                line=line.lstrip()
                if line.startswith("qiime") or line.startswith("biom"):
                    qiime2_bash_str[idx]=f"singularity exec --bind  {str(seqs)}:{str(seqs)},$PWD:$PWD,{str(Path(self.config.qiime_classifier_db))}:{str(Path(self.config.qiime_classifier_db))},$SINGULARITY_TMPDIR:/tmp  {self.config.qiime2_singularity_image} " +line
            qiime2_bash_str="\n".join(qiime2_bash_str)
            qiime2_bash_str=qiime2_bash_str.replace("<manifest>",str(manifest_dir))
            qiime2_bash_str=qiime2_bash_str.replace("<qiime2_work_dir>",str(seqs))
            qiime2_bash_str=qiime2_bash_str.replace("<classifier>",str(Path(self.config.qiime_classifier_db)))

        else:
            raise ValueError("Container must be None, singularity or docker")

        if save_manifest:
            manifest.to_csv(os.path.join(manifest_dir,"manifest.tsv"),sep="\t",index=False)
        return qiime2_bash_str,manifest

`init(config)`

In order to instntiate an object from this class, you need to provide a metagenomics configs object from the configs module : configs.Metagenomics. Information for inputs and of each method is then obtained from the corresponding configs object. The following example shows how to instantiate an object from this class using the default configs object:

Examples:

>>> from adtoolbox import core, configs
>>> config=configs.Metagenomics() ### This uses default arguments. Refer to configs module for more information.
>>> metagenomics=core.Metagenomics(config)
>>> assert type(metagenomics)==core.Metagenomics

Parameters:

Name	Type	Description	Default
`config`	`configs.Metagenomics`	A metagenomics configs object from configs module.	required

Returns:

Type	Description
`None`	None

Source code in adtoolbox/core.py

def __init__(self,config:configs.Metagenomics)->None:
    """In order to instntiate an object from this class, you need to provide a metagenomics configs object from the configs module : configs.Metagenomics.
    Information for inputs and of each method is then obtained from the corresponding configs object. The following example shows how to instantiate an object from this class
    using the default configs object:

    Examples:
        >>> from adtoolbox import core, configs
        >>> config=configs.Metagenomics() ### This uses default arguments. Refer to configs module for more information.
        >>> metagenomics=core.Metagenomics(config)
        >>> assert type(metagenomics)==core.Metagenomics

    Args:
        config (configs.Metagenomics): A metagenomics configs object from configs module.

    Returns:
        None
    """
    self.config=config

`align_genome_to_protein_db(address, outdir, name, container='None')`

This is a function that will align a genome to the Protein Database of the ADToolbox using mmseqs2. If you want to save the scripts, set save to True. Note that the alignment tables will be saved in any case. Note that this function uses mmseqs2 to align the genomes to the protein database. So, to run this function without any container you need to have mmseqs2 installed on your system. However, if you want to run this function with a container, you need to have the container installed on your system. You may select from "None", "docker", "singularity".

Requires

config.genome_alignment_output: The path to the directory where the alignment results will be saved.

config.protein_db: The path to the ADToolbox protein database in fasta.

config.adtoolbox_docker: The name of the docker image to be used by ADToolbox (Only if using Docker as container).

config.adtoolbox_singularity: The name of the singularity image to be used by ADToolbox (Only if using Singularity as container).

Parameters:

Name	Type	Description	Default
`address`	`str`	The address of the genome fasta file. The file must be in fasta format.	required
`run`	`bool`	Whether to run the alignment. Defaults to True.	required
`save`	`bool`	Whether to save the alignment scripts. Defaults to True.	required
`container`	`str`	The container to use. Defaults to "None". You may select from "None", "docker", "singularity".	`'None'`

Returns:

Name	Type	Description
`str`	`str`	A dictionary containing the alignment files.
`str`	`str`	The bash script that is used to align the genomes or to be used to align the genomes.

Source code in adtoolbox/core.py

def align_genome_to_protein_db(
        self,
        address:str,
        outdir:str,
        name:str,
        container:str="None",
        )->tuple[str,str]:
    """
    This is a function that will align a genome to the Protein Database of the ADToolbox using mmseqs2.
    If you want to save the scripts, set save to True. Note that the alignment tables will be saved in any case.
    Note that this function uses mmseqs2 to align the genomes to the protein database. So, to run this function without
    any container you need to have mmseqs2 installed on your system. However, if you want to run this function with a container,
    you need to have the container installed on your system. You may select from "None", "docker", "singularity".

    Requires:
        config.genome_alignment_output: The path to the directory where the alignment results will be saved.
        ---------
        config.protein_db: The path to the ADToolbox protein database in fasta.
        ---------
        config.adtoolbox_docker: The name of the docker image to be used by ADToolbox (Only if using Docker as container).
        ---------
        config.adtoolbox_singularity: The name of the singularity image to be used by ADToolbox (Only if using Singularity as container).
        ---------
    Args:
        address (str): The address of the genome fasta file. The file must be in fasta format.
        run (bool, optional): Whether to run the alignment. Defaults to True.
        save (bool, optional): Whether to save the alignment scripts. Defaults to True.
        container (str, optional): The container to use. Defaults to "None". You may select from "None", "docker", "singularity".

    Returns:
        str: A dictionary containing the alignment files.
        str: The bash script that is used to align the genomes or to be used to align the genomes.
    """

    if container=="None":
        bash_script = ""
        alignment_file=os.path.join(outdir,"Alignment_Results_mmseq_"+name+".tsv")
        bash_script += "mmseqs easy-search " + \
            address + " " + \
            self.config.protein_db + " " + \
            alignment_file+ ' tmp --format-mode 4 '+"\n\n"

    if container=="docker":
        bash_script = ""
        alignment_file=os.path.join(outdir,"Alignment_Results_mmseq_"+name+".tsv")
        bash_script +="docker run -it "+ \
        " -v "+address+":"+address+ \
        " -v "+self.config.protein_db+":"+self.config.protein_db+ \
        " -v "+outdir+":"+outdir+ \
        f" {self.config.adtoolbox_docker}  mmseqs easy-search " + \
            address + " " + \
            self.config.protein_db + " " + \
            alignment_file+' tmpfiles --format-mode 4 '+"\n\n"

    if container=="singularity":
        bash_script = ""
        alignment_file=os.path.join(outdir,"Alignment_Results_mmseq_"+name+".tsv")
        bash_script +="singularity exec "+ \
        " -B "+address+":"+address+ \
        " -B "+self.config.protein_db+":"+self.config.protein_db+ \
        " -B "+outdir+":"+outdir+ \
        f" {self.config.adtoolbox_singularity}  mmseqs easy-search " + \
            address + " " + \
            self.config.protein_db + " " + \
            alignment_file+' tmpfiles --format-mode 4 '+"\n\n"

    return  bash_script,alignment_file

`align_short_reads_to_protein_db(query_seq, alignment_file_name, container='None')`

This function aligns shotgun short reads to the protein database of the ADToolbox using mmseqs2. mmseqs wrappers in utils are used to perform this task. The result of this task is an alignment table.

Required Configs

protein_db_mmseqs (str): The address of the existing/to be created protein database of the ADToolbox for mmseqs.

Parameters:

Name	Type	Description	Default
`query_seq`	`str`	The address of the query sequence.	required
`alignment_file_name`	`str`	The name of the alignment file.	required
`container`	`str`	The container to use. Defaults to "None". You may select from "None", "docker", "singularity".	`'None'`

Returns:

Name	Type	Description
`str`	`str`	The bash script that is used to align the genomes or to be used to align the genomes.
`str`	`str`	The address of the alignment file.

Source code in adtoolbox/core.py

def align_short_reads_to_protein_db(self,query_seq:str,
                                    alignment_file_name:str,
                                    container:str="None",
                                    )->tuple[str,str]:
    """This function aligns shotgun short reads to the protein database of the ADToolbox using mmseqs2.
    mmseqs wrappers in utils are used to perform this task. The result of this task is an alignment table.

    Required Configs:

        protein_db_mmseqs (str): The address of the existing/to be created protein database of the ADToolbox for mmseqs.
        --------
    Args:
        query_seq (str): The address of the query sequence.
        alignment_file_name (str): The name of the alignment file.
        container (str, optional): The container to use. Defaults to "None". You may select from "None", "docker", "singularity".


    Returns:
        str: The bash script that is used to align the genomes or to be used to align the genomes.
        str: The address of the alignment file.
    """
    if not pathlib.Path(self.config.protein_db_mmseqs).exists():
        raise FileNotFoundError("""The protein database of the ADToolbox for mmseqs is not found. Please build it first
                                using Database.build_mmseqs_database method.""")
    path_query=pathlib.Path(query_seq)
    script = ""
    script += create_mmseqs_database(query_seq,str(path_query.parent/path_query.name.split(".")[0]),container=container,save=None,run=False)+"\n"
    script += mmseqs_search(
        query_db=str(path_query.parent/path_query.name.split(".")[0]),
        target_db=self.config.protein_db_mmseqs,
        results_db=path_query.parent/alignment_file_name,
        run=False,
        save=None,
        container=container,
    )+"\n"
    script += mmseqs_result_db_to_tsv(
        query_db=str(path_query.parent/path_query.name.split(".")[0]),
        target_db=self.config.protein_db_mmseqs,
        results_db=path_query.parent/alignment_file_name,
        tsv_file=path_query.parent/(alignment_file_name+".tsv"),
        container=container,
        save=None,
        run=False,)+"\n"
    return script,path_query.parent/(alignment_file_name+".tsv")

`align_to_gtdb(query_dir, output_dir, container='None')`

This function takes the representative sequences of the top k features and generates the script to align these feature sequences to gtdb using VSEARCH. If you intend to run this you either need to have VSEARCH installed or run it with a container option. You can use either the docker or singularity as container options. Otherwise you can use None and run it with the assumption that VSEARCH is installed. If you only want the script and not to run it, set run to False.

Required Configs

config.gtdb_dir_fasta: The path to the gtdb fasta database.

config.vsearch_similarity: The similarity threshold for the alignment to be used by VSEARCH.

config.vsearch_threads: The number of threads to be used by VSEARCH.

config.adtoolbox_docker: The name of the docker image to be used by ADToolbox (Only if using Docker as container).

config.adtoolbox_singularity: The name of the singularity image to be used by ADToolbox (Only if using Singularity as container).

Parameters:

Name	Type	Description	Default
`container`	`str`	The container to use. Defaults to "None".	`'None'`

Returns:

Name	Type	Description
`str`	`str`	The script that is supposed to be running later.

Source code in adtoolbox/core.py

def align_to_gtdb(self,
                  query_dir:str,
                  output_dir:str,
                  container:str="None")->str:
    """This function takes the representative sequences of the top k features and generates the script to
    align these feature sequences to gtdb using VSEARCH. If you intend to run this you either
    need to have VSEARCH installed or run it with a container option. You can use either the docker or singularity
    as container options. Otherwise you can use None and run it with the assumption that VSEARCH is installed.
    If you only want the script and not to run it, set run to False.

    Required Configs:

        ---------
        config.gtdb_dir_fasta: The path to the gtdb fasta database.
        ---------
        config.vsearch_similarity: The similarity threshold for the alignment to be used by VSEARCH.
        ---------
        config.vsearch_threads: The number of threads to be used by VSEARCH.
        ---------
        config.adtoolbox_docker: The name of the docker image to be used by ADToolbox (Only if using Docker as container).
        ---------
        config.adtoolbox_singularity: The name of the singularity image to be used by ADToolbox (Only if using Singularity as container).
        ---------

    Args:
        container (str, optional): The container to use. Defaults to "None".

    Returns:
        str: The script that is supposed to be running later.
    """
    ### Load all the required files
    alignment_dir = os.path.join(output_dir,'Alignments')
    match_table=os.path.join(output_dir,'matches.blast')
    gtdb_dir_fasta=self.config.gtdb_dir_fasta
    ### End Loading
    query=query_dir
    dirs=[output_dir,
        gtdb_dir_fasta,
        query
        ]
    for dir in dirs:
        if not pathlib.Path(dir).exists():
            os.mkdir(dir)
    if container=="None":
        bash_script=('vsearch --top_hits_only --blast6out '+
                    match_table+
                    ' --usearch_global '+ query +
                    ' --db '+ gtdb_dir_fasta +
                    ' --id ' +str(self.config.vsearch_similarity) +
                    ' --threads '+str(self.config.vsearch_threads)+
                    ' --alnout '+ alignment_dir +
                    ' --top_hits_only'+'\n')

    if container=="docker":
        bash_script='docker run'
        for dir in dirs:
            bash_script+=('-v '+dir+':'+dir+' ')

        bash_script += (self.config.adtoolbox_docker+' vsearch --top_hits_only --blast6out '+
                    match_table+
                    ' --usearch_global '+ query +
                    ' --db '+ gtdb_dir_fasta +
                    ' --id ' +str(self.config.vsearch_similarity) +
                    ' --threads '+str(self.config.vsearch_threads)+
                    ' --alnout '+ alignment_dir +
                    ' --top_hits_only'+'\n')

    if container=="singularity":
        bash_script='singularity exec '
        for dir in dirs:
            bash_script+=('-B '+str(dir)+':'+str(dir)+' ')

        bash_script += (self.config.adtoolbox_singularity+' vsearch --top_hits_only --blast6out '+
                    match_table+
                    ' --usearch_global '+ str(query) +
                    ' --db '+ gtdb_dir_fasta +
                    ' --id ' +str(self.config.vsearch_similarity) +
                    ' --threads '+str(self.config.vsearch_threads)+
                    ' --alnout '+ alignment_dir +
                    ' --top_hits_only'+'\n')
    return bash_script,

`assign_ec_to_genome(alignment_file)`

This function takes an alignment file and assigns the EC numbers to the genomes based on the alignment file, and the e-adm groupings of the EC numbers. The output is a dictionary where the keys e-adm reactions and the values are the EC numbers, that are found in the genome and are grouped under the e-adm reaction.

Parameters:

Name	Type	Description	Default
`alignment_file`	`str`	The address of the alignment file.	required

Returns:

Name	Type	Description
`dict`	`dict`	A dictionary containing the e-adm reactions and the EC numbers that are found in the genome and are grouped under the e-adm reaction.

Source code in adtoolbox/core.py

def assign_ec_to_genome(self,alignment_file:str)->dict:
    """
    This function takes an alignment file and assigns the EC numbers to the genomes based on the alignment file,
    and the e-adm groupings of the EC numbers. The output is a dictionary where the keys e-adm reactions and the values are the EC numbers,
    that are found in the genome and are grouped under the e-adm reaction.

    Args:
        alignment_file (str): The address of the alignment file.

    Returns:
        dict: A dictionary containing the e-adm reactions and the EC numbers that are found in the genome and are grouped under the e-adm reaction.
    """

    aligntable = pd.read_table(alignment_file,delimiter="\t")
    aligntable = aligntable[(aligntable["bits"]>self.config.bit_score) & (aligntable["evalue"]<self.config.e_value)]

    ec_align_list = aligntable["target"].str.split("|",expand=True)
    ec_align_list = list(ec_align_list[1].unique()) 

    metadatatable = pd.read_table(self.config.csv_reaction_db, sep=',').drop_duplicates("EC_Numbers")[(['EC_Numbers','Modified_ADM_Reactions'])].dropna(axis=0)
    metadatatable=metadatatable[metadatatable["EC_Numbers"].isin(ec_align_list)]
    adm_reactions=list(set(metadatatable["Modified_ADM_Reactions"].str.split("|").sum()))
    adm_to_ecs={}
    for reaction in adm_reactions:
        adm_to_ecs[reaction]=list(metadatatable[metadatatable["Modified_ADM_Reactions"].str.contains(reaction)]["EC_Numbers"])

    return adm_to_ecs

`calculate_group_abundances(elements_feature_abundances, rel_abund)`

This method is defined to calculate the features for each sample given: 1) The relative abundances of the genomes in each sample: - In this dictionary the keys are the sample names and the values are dictionaries where the keys are the genome names and the values are the relative abundances of the genomes in the sample. 2) The relative abundances of the elements in each genome. - In this dictionary the keys are the genome names and the values are dictionaries where the keys are the element names and the values are the relative abundances of the elements in the genome.

Required Configs

None

Parameters:

Name	Type	Description	Default
`elements_feature_abundances`	`dict[str, dict]`	A dictionary containing the relative abundances of the elements in each genome.	required
`rel_abund`	`dict[str, dict]`	A dictionary containing the relative abundances of the genomes in each sample.	required

Returns:

Type	Description
`dict[str, dict[str, float]]`	dict[str,dict[str,float]]: A dictionary containing the relative abundances of the elements in each sample.

Source code in adtoolbox/core.py

def calculate_group_abundances(self,elements_feature_abundances:dict[str,dict],rel_abund:dict[str,dict])->dict[str,dict[str,float]]:
    """
    This method is defined to calculate the features for each sample given:
    1) The relative abundances of the genomes in each sample:
        - In this dictionary the keys are the sample names and the values are dictionaries where the keys are the genome names and the values are the relative abundances of the genomes in the sample.
    2) The relative abundances of the elements in each genome.
        - In this dictionary the keys are the genome names and the values are dictionaries where the keys are the element names and the values are the relative abundances of the elements in the genome.

    Required Configs:
        None

    Args:
        elements_feature_abundances (dict[str,dict]): A dictionary containing the relative abundances of the elements in each genome.
        rel_abund (dict[str,dict]): A dictionary containing the relative abundances of the genomes in each sample.

    Returns:
        dict[str,dict[str,float]]: A dictionary containing the relative abundances of the elements in each sample.
    """
    out={}
    df=pd.DataFrame(elements_feature_abundances).T.fillna(0)
    for sample,abunds in rel_abund.items():
        out[sample]=scaler(pd.DataFrame(df.loc[abunds.keys(),:].multiply(list(abunds.values()),axis=0).sum(axis=0)).T).to_dict(orient="records")[0]
    return out

`download_genome(identifier, output_dir, container='None')`

This function downloads the genomes from NCBI using the refseq/genbank identifiers. Note that this function uses rsync to download the genomes.

Required Configs

config.genomes_base_dir: The path to the base directory where the genomes will be saved.

config.adtoolbox_docker: The name of the docker image to be used by ADToolbox (Only if using Docker as container).

config.adtoolbox_singularity: The name of the singularity image to be used by ADToolbox (Only if using Singularity as container).

Parameters:

Name	Type	Description	Default
`identifier`	`list[str]`	The list of identifiers for the genomes. It can be either refseq or genbank.	required
`container`	`str`	The container to use. Defaults to "None". You may select from "None", "docker", "singularity".	`'None'`

Returns:

Name	Type	Description
`str`	`str`	The bash script that is used to download the genomes or to be used to download the genomes.

Source code in adtoolbox/core.py

def download_genome(self,identifier:str,output_dir:str,container:str="None")-> str:
    """This function downloads the genomes from NCBI using the refseq/genbank identifiers.
    Note that this function uses rsync to download the genomes. 

    Required Configs:
        config.genomes_base_dir: The path to the base directory where the genomes will be saved.
        ---------
        config.adtoolbox_docker: The name of the docker image to be used by ADToolbox (Only if using Docker as container).
        ---------
        config.adtoolbox_singularity: The name of the singularity image to be used by ADToolbox (Only if using Singularity as container).
        ---------
    Args:
        identifier list[str]: The list of identifiers for the genomes. It can be either refseq or genbank.
        container (str, optional): The container to use. Defaults to "None". You may select from "None", "docker", "singularity".

    Returns:
        str: The bash script that is used to download the genomes or to be used to download the genomes.

    """
    base_ncbi_dir = 'rsync://ftp.ncbi.nlm.nih.gov/genomes/all/'
    bash_script=""

    specific_ncbi_dir = identifier[0:3]+'/'+\
                        identifier[3:6]+'/'+\
                        identifier[6:9]+'/'+\
                        identifier[9:].split('.')[0]

    genome_dir=pathlib.Path(output_dir)

    if container=="None":
        bash_script+=('rsync -avz --progress '+base_ncbi_dir+specific_ncbi_dir+' '+str(genome_dir))

    if container=="docker":
        bash_script+=('docker run -it -v '+str(genome_dir.parent)+':'+str(genome_dir.parent)+ f' {self.config.adtoolbox_docker} rsync -avz --progress '+' '+base_ncbi_dir+specific_ncbi_dir+' '+str(genome_dir))

    if container=="singularity":
        bash_script+=('singularity exec -B '+str(genome_dir.parent)+':'+str(genome_dir.parent)+ f' {self.config.adtoolbox_singularity} rsync -avz --progress '+' '+base_ncbi_dir+specific_ncbi_dir+' '+str(genome_dir))

    return bash_script,

`extract_ec_from_alignment(alignment_file)`

This function extracts the number of times an EC number is found in the alignment file when aligned to ADToolbox protein database.

Required Configs

config.e_value: The e-value threshold for the filtering the alignment table.

config.bit_score: The bit score threshold for the filtering the alignment table.

config.ec_counts_from_alignment: The address of the json file that the results will be saved in.

Parameters:

Name	Type	Description	Default
`alignment_file`	`str`	The address of the alignment file.	required

Returns:

Name	Type	Description
`dict`	`dict[str, int]`	A dictionary of EC numbers and their counts.

Source code in adtoolbox/core.py

def extract_ec_from_alignment(self,alignment_file:str)->dict[str,int]:
    """
    This function extracts the number of times an EC number is found in the alignment file when aligned to ADToolbox protein database.

    Required Configs:
        config.e_value: The e-value threshold for the filtering the alignment table.
        ---------
        config.bit_score: The bit score threshold for the filtering the alignment table.
        ---------
        config.ec_counts_from_alignment: The address of the json file that the results will be saved in.
        ---------
    Args:
        alignment_file (str): The address of the alignment file.

    Returns:
        dict: A dictionary of EC numbers and their counts.

    """
    alignment_table = pd.read_table(alignment_file,sep='\t')
    alignment_table = alignment_table[(alignment_table['evalue']<self.config.e_value)&(alignment_table['bits']>self.config.bit_score)]
    alignment_table["target"]=alignment_table["target"].apply(lambda x:x.split("|")[1])
    ec_counts=alignment_table["target"].value_counts().to_dict()
    return ec_counts

`extract_genome_info(endpattern='genomic.fna.gz', filters={'INCLUDE': [], 'EXCLUDE': ['cds', 'rna']})`

This function extracts the genome information from the genomes base directory. The output is a dictionary where the keys are the genome IDs and the values are the paths to the genome files.

Required Configs

config.genomes_base_dir: The path to the base directory where the genomes are saved.

Parameters:

Name	Type	Description	Default
`genome_info`	`dict[str, str]`	A dictionary containing the genome information.	required
`endpattern`	`str`	The end pattern of the genome files. Defaults to "genomic.fna.gz".	`'genomic.fna.gz'`
`filters`	`dict`	The filters to be applied to the genome files. This filter must be a	`{'INCLUDE': [], 'EXCLUDE': ['cds', 'rna']}`
`dictionary`	`with two keys`	INCLUDE and EXCLUDE. The values of these keys must be lists of strings.	required
`Defaults`	`to {"INCLUDE"`	[],"EXCLUDE":["cds","rna"]}. This defult is compatible with the genomes downloaded	required

Returns:

Type	Description
`dict[str, str]`	dict[str,str]: A dictionary containing the address of the genomes that are downloaded or to be downloaded.

Source code in adtoolbox/core.py

def extract_genome_info(self,
                        endpattern:str="genomic.fna.gz",
                        filters:dict={
                                      "INCLUDE":[],
                                      "EXCLUDE":["cds","rna"],
                                        })->dict[str,str]:
    """This function extracts the genome information from the genomes base directory. The output
    is a dictionary where the keys are the genome IDs and the values are the paths to the genome files.

    Required Configs:
        config.genomes_base_dir: The path to the base directory where the genomes are saved.
        ---------
    Args:
        genome_info (dict[str,str]): A dictionary containing the genome information.
        endpattern (str, optional): The end pattern of the genome files. Defaults to "genomic.fna.gz".
        filters (dict, optional): The filters to be applied to the genome files. This filter must be a 
        dictionary with two keys: INCLUDE and EXCLUDE. The values of these keys must be lists of strings.
        Defaults to {"INCLUDE":[],"EXCLUDE":["cds","rna"]}. This defult is compatible with the genomes downloaded
        from NCBI i.e. only change this if you are providing your own genomes with different file name conventions.
    Returns:
        dict[str,str]: A dictionary containing the address of the genomes that are downloaded or to be downloaded.
    """
    base_dir = pathlib.Path(self.config.genomes_base_dir)
    genome_info = {}
    for genome_dir in base_dir.iterdir():
        if genome_dir.is_dir():
            candids=list(genome_dir.rglob(f'*{endpattern}'))
            for candid in candids:
                if all([i in candid.name for i in filters["INCLUDE"]]) and all([i not in candid.name for i in filters["EXCLUDE"]]):
                    genome_info[genome_dir.name]=str(candid.absolute())           
    return genome_info

`extract_relative_abundances(feature_table_dir, sample_names=None, top_k=-1)`

This method extracts the relative abundances of the features in each sample from the feature table. The feature table must follow the qiime2 feature-table format. NOTE: The final feature abundances sum to 1 for each sample.

Required Configs

None

Parameters:

Name	Type	Description	Default
`feature_table_dir`	`str`	The path to the feature table.	required
`sample_names`	`Union[list[str], None]`	The list of sample names. to be considered. If None, all the samples will be considered. Defaults to None.	`None`
`top_k`	`int`	The number of top features to be used. If -1, all the features will be used. Defaults to -1.	`-1`

Returns:

Name	Type	Description
`dict`	`dict`	A dictionary containing the relative abundances of the features in each sample.

Source code in adtoolbox/core.py

def extract_relative_abundances(self,feature_table_dir:str,sample_names:Union[list[str],None]=None,top_k:int=-1)->dict:

    """
    This method extracts the relative abundances of the features in each sample from the feature table. The feature table must follow the qiime2 feature-table format.
    NOTE: The final feature abundances sum to 1 for each sample.
    Required Configs:
        None
    Args:
        feature_table_dir (str): The path to the feature table.
        sample_names (Union[list[str],None], optional): The list of sample names. to be considered. If None, all the samples will be considered. Defaults to None.
        top_k (int, optional): The number of top features to be used. If -1, all the features will be used. Defaults to -1.

    Returns:
        dict: A dictionary containing the relative abundances of the features in each sample.
    """
    feature_table = pd.read_table(feature_table_dir,sep='\t',skiprows=1)
    if sample_names is None:
        sample_names = feature_table.columns[1:]
    relative_abundances={sample:[] for sample in sample_names}
    if top_k == -1:
        top_k = feature_table.shape[0]
    for sample in sample_names:
        relative_abundances[sample]=(feature_table.sort_values(sample,ascending=False).head(top_k)[sample]/(feature_table.sort_values(sample,ascending=False).head(top_k)[sample].sum())).to_dict()
    return relative_abundances

`find_top_taxa(sample_name, treshold, mode='top_k')`

This function needs three inputs from qiime: 1. feature table: This is the abundance of each feature in each sample (TSV). 2. taxonomy table: This is the taxonomy of each feature (TSV). 3. rep seqs: This is the representative sequence of each feature (fasta). It then finds the top k features or features that form specific percentile of the community of the sample.

Required Configs

config.feature_table_dir: The path to the feature table tsv file.

config.taxonomy_table_dir: The path to the taxonomy table tsv file.

config.rep_seq_fasta: The path to the representative sequence fasta file.

Parameters:

Name	Type	Description	Default
`sample_name`	`str`	The name of the sample.	required
`threshold`	`int, float`	The threshold for the top k or the percentile.	required
`mode`	`str`	Whether to find the top k features or features that form specific percentile of the community of the sample. Defaults to 'top_k'. Options: 'top_k', 'percentile'.	`'top_k'`

Returns:

Name	Type	Description
`dict`	`dict`	A dictionary of the top k features and their taxonomy.

Source code in adtoolbox/core.py

def find_top_taxa(
    self,
    sample_name:str,
    treshold:Union[int,float],
    mode:str='top_k',
    )->dict:
    """
    This function needs three inputs from qiime:
    1. feature table: This is the abundance of each feature in each sample (TSV).
    2. taxonomy table: This is the taxonomy of each feature (TSV). 
    3. rep seqs: This is the representative sequence of each feature (fasta).
    It then finds the top k features or features that form specific percentile of the community of the sample.

    Required Configs:

        config.feature_table_dir: The path to the feature table tsv file.
        ---------
        config.taxonomy_table_dir: The path to the taxonomy table tsv file.
        ---------
        config.rep_seq_fasta: The path to the representative sequence fasta file.
        ---------

    Args:
        sample_name (str): The name of the sample.
        threshold (int, float): The threshold for the top k or the percentile.
        mode (str, optional): Whether to find the top k features or features that form specific percentile of the community of the sample. Defaults to 'top_k'. Options: 'top_k', 'percentile'.

    Returns:
        dict: A dictionary of the top k features and their taxonomy.
    """
    ### Load all the required files
    feature_table = pd.read_table(self.config.feature_table_dir, sep='\t',skiprows=1)
    taxonomy_table = pd.read_table(self.config.taxonomy_table_dir, delimiter='\t')
    repseqs=fasta_to_dict(self.config.rep_seq_fasta)
    ### End Loading
    if mode == 'top_k':
        sorted_df=feature_table.sort_values(sample_name, ascending=False)
        top_featureids=list(sorted_df['#OTU ID'].head(treshold))
        top_taxa=[taxonomy_table[taxonomy_table['Feature ID']==featureid]['Taxon'].values[0] for featureid in top_featureids]
        top_repseqs=[repseqs[featureid] for featureid in top_featureids]
        top_abundances=list(sorted_df[sample_name].head(treshold)/sorted_df[sample_name].sum())

    elif mode == 'percentile':
        feature_table[sample_name]=feature_table[sample_name]/feature_table[sample_name].sum()
        sorted_df=feature_table.sort_values(sample_name, ascending=False)
        sorted_df['cumsum']=sorted_df[sample_name].cumsum()*100
        sorted_df_filtered=sorted_df[sorted_df['cumsum']<=treshold]
        top_featureids=list(sorted_df_filtered['#OTU ID'])
        top_taxa=[taxonomy_table[taxonomy_table['Feature ID']==featureid]['Taxon'].values[0] for featureid in top_featureids]
        top_repseqs=[repseqs[featureid] for featureid in top_featureids]
        top_abundances=sorted_df.loc[sorted_df_filtered.index][sample_name].values.tolist()
    else:
        raise ValueError("mode must be either 'top_k' or 'percentile'")

    return {'top_featureids':top_featureids,'top_taxa':top_taxa,'top_repseqs':top_repseqs,'top_abundances':top_abundances}    

`get_cod_from_ec_counts(ec_counts)`

This function takes a json file that comtains ec counts and converts it to ADM microbial agents counts.

Required Configs

config.adm_mapping : A dictionary that maps ADM reactions to ADM microbial agents.

config.csv_reaction_db : The address of the reaction database of ADToolbox.

config.adm_cod_from_ec : The address of the json file that the results will be saved in.

Parameters:

Name	Type	Description	Default
`ec_counts`	`dict`	A dictionary containing the counts for each ec number.	required

Returns:

Name	Type	Description
`dict`	`dict`	A dictionary containing the ADM microbial agents counts.

Source code in adtoolbox/core.py

def get_cod_from_ec_counts(self,ec_counts:dict)->dict:
    """This function takes a json file that comtains ec counts and converts it to ADM microbial agents counts.
    Required Configs:
        config.adm_mapping : A dictionary that maps ADM reactions to ADM microbial agents.
        ---------
        config.csv_reaction_db : The address of the reaction database of ADToolbox.
        ---------
        config.adm_cod_from_ec  : The address of the json file that the results will be saved in.
        ---------
    Args:
        ec_counts (dict): A dictionary containing the counts for each ec number.  
    Returns:
        dict: A dictionary containing the ADM microbial agents counts.
    """
    reaction_db = pd.read_table(self.config.csv_reaction_db, sep=',').drop_duplicates("EC_Numbers")
    reaction_db.set_index("EC_Numbers",inplace=True)
    adm_reactions_agents = {k:0 for k in self.config.adm_mapping.keys()}
    for ec in ec_counts.keys():
        l=reaction_db.loc[ec,"e_adm_Reactions"].split("|")
        for adm_rxn in l: 
            adm_reactions_agents[adm_rxn]+=ec_counts[ec]
    adm_microbial_agents={}
    for k,v in self.config.adm_mapping.items():
        adm_microbial_agents[v]=adm_reactions_agents[k]
    return adm_microbial_agents

`get_genomes_from_gtdb_alignment(alignment_dir)`

This function takes the alignment file generated from the align_to_gtdb function and generates the the genome information using the GTDB-Tk. In the outputted dictionary, the keys are feature ids and the values are the representative genomes.

Required Configs

config.align_to_gtdb_outputs_dir: The path to the directory where the outputs of the align_to_gtdb function are saved.

config.feature_to_taxa: The path to the json file where the json file including feature ids and the representative genomes will be saved.

Parameters:

Name	Type	Description	Default
`save`	`bool`	Whether to save the json file or not. Defaults to True.	required

Source code in adtoolbox/core.py

def get_genomes_from_gtdb_alignment(self,alignment_dir:str)->dict:
    """This function takes the alignment file generated from the align_to_gtdb function and generates the the genome information
    using the GTDB-Tk. In the outputted dictionary, the keys are feature ids and the values are the representative genomes.

    Required Configs:
        config.align_to_gtdb_outputs_dir: The path to the directory where the outputs of the align_to_gtdb function are saved.
        ---------
        config.feature_to_taxa: The path to the json file where the json file including feature ids and the representative genomes will be saved.

    Args:
        save (bool, optional): Whether to save the json file or not. Defaults to True.
    """
    matches = os.path.join(alignment_dir,'matches.blast')
    aligned=pd.read_table(matches,header=None,delimiter='\t')
    aligned.drop_duplicates(0,inplace=True)
    aligned[1]=aligned[1].apply(lambda x: ("".join(x.split('_')[1:])).split("~")[0])
    alignment_dict=dict(zip(aligned[0],aligned[1]))


    return alignment_dict

`run_qiime2_from_sra(read_1, read_2, sample_name=None, manifest_dir=None, workings_dir=None, save_manifest=True, container='None')`

This method uses the input fastq files to run qiime2. The method uses the qiime2 template scripts that are provided in pkg_data module. The method also creates a manifest file for qiime2. The manifest file is created based on the input fastq files.

Required Configs

config.qiime2_single_end_bash_str: The path to the qiime2 bash script for single end reads.

config.qiime2_paired_end_bash_str: The path to the qiime2 bash script for paired end reads.

config.qiime_classifier_db: The path to the qiime2 classifier database.

config.qiime2_docker_image: The name of the docker image to be used by ADToolbox (Only if using Docker as container).

config.qiime2_singularity_image: The name of the singularity image to be used by ADToolbox (Only if using Singularity as container).

Parameters:

Name	Type	Description	Default
`read_1`	`str`	directory of the forward reads file	required
`read_2`	`str`	directory of the reverse reads file. This is provided only if the reads are paired end. If this is not the case,	required
`sample_name`	`str`	The name of the sample. If None, the name of the sample will be the name of the directory where the fastq files are located. Defaults to None.	`None`
`manifest_dir`	`str`	The directory where the manifest file will be saved. If None, the manifest file will be saved in the same directory as the fastq files. Defaults to None.	`None`
`workings_dir`	`str`	The directory where the qiime2 outputs will be saved. If None, the outputs will be saved in the same directory as the fastq files. Defaults to None.	`None`
`container`	`str`	If you want to run the qiime2 commands in a container, specify the container name here. Defaults to 'None'.	`'None'`

Returns:

Name	Type	Description
`qiime2_bash_str`	`str`	The bash script that will be used to run qiime2 in python string format
`manifest`	`dict`	The manifest file that will be used to run qiime2 in python dictionary format

Source code in adtoolbox/core.py

def run_qiime2_from_sra(self,
                        read_1:str,
                        read_2:str|None,
                        sample_name:str|None=None,
                        manifest_dir:str|None=None,
                        workings_dir:str|None=None,
                        save_manifest:bool=True,
                        container:str='None') -> tuple[str,str]:
    """
    This method uses the input fastq files to run qiime2. The method uses the qiime2 template scripts that are provided in pkg_data module.
    The method also creates a manifest file for qiime2. The manifest file is created based on the input fastq files.
    Required Configs:
        config.qiime2_single_end_bash_str: The path to the qiime2 bash script for single end reads.
        ---------
        config.qiime2_paired_end_bash_str: The path to the qiime2 bash script for paired end reads.
        ---------
        config.qiime_classifier_db: The path to the qiime2 classifier database.
        ---------
        config.qiime2_docker_image: The name of the docker image to be used by ADToolbox (Only if using Docker as container).
        ---------
        config.qiime2_singularity_image: The name of the singularity image to be used by ADToolbox (Only if using Singularity as container).
        ---------
    Args:
        read_1 (str): directory of the forward reads file
        read_2 (str): directory of the reverse reads file. This is provided only if the reads are paired end. If this is not the case,
        sample_name (str, optional): The name of the sample. If None, the name of the sample will be the name of the directory where the fastq files are located. Defaults to None.
        manifest_dir (str, optional): The directory where the manifest file will be saved. If None, the manifest file will be saved in the same directory as the fastq files. Defaults to None.
        workings_dir (str, optional): The directory where the qiime2 outputs will be saved. If None, the outputs will be saved in the same directory as the fastq files. Defaults to None.
        container (str, optional): If you want to run the qiime2 commands in a container, specify the container name here. Defaults to 'None'.
    Returns:
        qiime2_bash_str (str): The bash script that will be used to run qiime2 in python string format
        manifest (dict): The manifest file that will be used to run qiime2 in python dictionary format


    """

    if sample_name is None:
        sample_name=str(pathlib.Path(read_1).parent.name)
    if manifest_dir is None:
        manifest_dir=pathlib.Path(read_1).parent
    else:
        manifest_dir=pathlib.Path(manifest_dir)

    if workings_dir is None:
        workings_dir=pathlib.Path(read_1).parent
    else:
        workings_dir=pathlib.Path(workings_dir)


    manifest_single={'sample-id':[],'absolute-filepath':[]}
    manifest_paired={'sample-id':[],'forward-absolute-filepath':[],'reverse-absolute-filepath':[]}  
    if read_2 is not None:
        manifest_paired['sample-id'].append(sample_name)
        manifest_paired['forward-absolute-filepath'].append(read_1)
        manifest_paired['reverse-absolute-filepath'].append(read_2)
        paired_end=True
    else:
        manifest_single['sample-id'].append(sample_name)
        manifest_single['absolute-filepath'].append(read_1)
        paired_end=False

    manifest=pd.DataFrame(manifest_single) if not paired_end else pd.DataFrame(manifest_paired)

    if paired_end:
        with open(self.config.qiime2_paired_end_bash_str,"r") as f:
            qiime2_bash_str=f.read()
    else:
        with open(self.config.qiime2_single_end_bash_str,"r") as f:
            qiime2_bash_str=f.read()

    if container=="None":
        qiime2_bash_str=qiime2_bash_str.replace("<manifest>",str(manifest_dir))
        qiime2_bash_str=qiime2_bash_str.replace("<qiime2_work_dir>",str(workings_dir))
        qiime2_bash_str=qiime2_bash_str.replace("<classifier>",str(self.config.qiime_classifier_db))

    elif container=="docker":
        qiime2_bash_str=qiime2_bash_str.splitlines()
        for idx,line in enumerate(qiime2_bash_str):
            line=line.lstrip()
            if line.startswith("qiime") or line.startswith("biom"):
                if not paired_end:
                    pec=""
                else:
                    pec="-v "+read_2+":"+read_2+" "
                qiime2_bash_str[idx]=f"docker run --env TMPDIR=/data/tmp -v {str(manifest_dir)}:{str(manifest_dir)} -v {read_1}:{read_1} -v {read_2}:{read_2} {pec} -v {self.config.qiime_classifier_db}:{self.config.qiime_classifier_db} -w /data  {self.config.qiime2_docker_image}"+" "+line
        qiime2_bash_str="\n".join(qiime2_bash_str)
        qiime2_bash_str=qiime2_bash_str.replace("<manifest>",os.path.join(str(manifest_dir),"manifest.tsv"))
        qiime2_bash_str=qiime2_bash_str.replace("<qiime2_work_dir>",str(workings_dir))
        qiime2_bash_str=qiime2_bash_str.replace("<classifier>",self.config.qiime_classifier_db)
        if not paired_end:
            manifest['absolute-filepath']=[x for x in manifest['absolute-filepath']]

        else:
            manifest['forward-absolute-filepath']=[x for x in manifest['forward-absolute-filepath']]
            manifest['reverse-absolute-filepath']=[x for x in manifest['reverse-absolute-filepath']]

    elif container=="singularity":
        qiime2_bash_str=qiime2_bash_str.splitlines()
        for idx,line in enumerate(qiime2_bash_str):
            line=line.lstrip()
            if line.startswith("qiime") or line.startswith("biom"):
                qiime2_bash_str[idx]=f"singularity exec --bind  {str(seqs)}:{str(seqs)},$PWD:$PWD,{str(Path(self.config.qiime_classifier_db))}:{str(Path(self.config.qiime_classifier_db))},$SINGULARITY_TMPDIR:/tmp  {self.config.qiime2_singularity_image} " +line
        qiime2_bash_str="\n".join(qiime2_bash_str)
        qiime2_bash_str=qiime2_bash_str.replace("<manifest>",str(manifest_dir))
        qiime2_bash_str=qiime2_bash_str.replace("<qiime2_work_dir>",str(seqs))
        qiime2_bash_str=qiime2_bash_str.replace("<classifier>",str(Path(self.config.qiime_classifier_db)))

    else:
        raise ValueError("Container must be None, singularity or docker")

    if save_manifest:
        manifest.to_csv(os.path.join(manifest_dir,"manifest.tsv"),sep="\t",index=False)
    return qiime2_bash_str,manifest

`seqs_from_sra(accession, target_dir, container='None')`

This method downloads the fastq files from the SRA database using the accession number (ONLY SAMPLE ACCESSION AND NOT PROJECT ACCESSION) of the project or run. The method uses the fasterq-dump tool to download the fastq files. This method also extracts the sample metadata from the SRA database for future use.

NOTE In order for this method to work without any container, you need to have the SRA toolkit installed on your system or

at least have prefetch and fasterq-dump installed on your system. For more information on how to install the SRA toolkit, please refer to the following link: https://github.com/ncbi/sra-tools

Required Configs

None

Parameters:

Name	Type	Description	Default
`accession`	`str`	The accession number of the SRA project or run	required
`target_dir`	`str`	The directory where the fastq files will be downloaded	required
`container`	`str`	The containerization tool that will be used to run the bash scripts. Defaults to "None". Options are "None","docker","singularity"	`'None'`

Returns:

Name	Type	Description
`prefetch_script`	`str`	The bash script that will be used to download the SRA files in python string format
`sample_metadata`	`dict`	A dictionary that contains the sample metadata

Source code in adtoolbox/core.py

def seqs_from_sra(self,accession:str,target_dir:str,container:str="None")-> tuple[str,dict]:
    """ 
    This method downloads the fastq files from the SRA database using the accession number (ONLY SAMPLE ACCESSION AND NOT PROJECT ACCESSION) of the project or run.
    The method uses the fasterq-dump tool to download the fastq files. This method also extracts the sample metadata from the SRA database for future use.
    #NOTE In order for this method to work without any container, you need to have the SRA toolkit installed on your system or
    at least have prefetch and fasterq-dump installed on your system. For more information on how to install the SRA toolkit, please refer to the following link:
    https://github.com/ncbi/sra-tools

    Required Configs:
        None


    Args:
        accession (str): The accession number of the SRA project or run
        target_dir (str): The directory where the fastq files will be downloaded
        container (str, optional): The containerization tool that will be used to run the bash scripts. Defaults to "None". Options are "None","docker","singularity"

    Returns:
        prefetch_script (str): The bash script that will be used to download the SRA files in python string format
        sample_metadata (dict): A dictionary that contains the sample metadata

    """   
    if container=="None":
        prefetch_script=f"""#!/bin/bash\nprefetch {accession} -O {target_dir}"""
        acc_folder=pathlib.Path(target_dir)/accession
        fasterq_dump_script=""
        sra_file=acc_folder/(accession+".sra")
        fasterq_dump_script+=f"\nfasterq-dump {sra_file} -O {acc_folder} --split-files"
        fasterq_dump_script+=f"\nrm {sra_file}"

        prefetch_script+=fasterq_dump_script


    elif container=="docker":
        warn("Docker is not supported yet")

    sample_metadata=utils.get_sample_metadata_from_accession(accession)      


    return prefetch_script,sample_metadata     

adm

Here is a schematic view of adm API:

adm

You can access this module by:

from adtoolbox import adm

This module includes the following classes:

`Model`

Any kinetic model could be an instance of this class.

Parameters:

Name	Type	Description	Default
`model_parameters`	`dict`	a dictionary which contains model parameters	required
`base_parameters`	`dict`	a dictionary which contains base paramters	required
`initial_conditions`	`dict`	a dictionary containing inlet conditions for all species	required
`inlet_conditions`	`dict`	a dictionary containing inlet conditions for all species	required
`feed`	`Feed`	a Feed instance which contains the feed information	required
`reactions`	`list`	a list containing all types of reactions	required
`species`	`list`	a list containing all species	required
`ode_system`	`Callable`	a callable which outputs the ODE system compatible with Scipy.integrate.solve_ivp	required
`build_stoichiometric_matrix(Callable)`		a callable which builds the stoichiometric matrix	required
`control_state`	`dict`	a dictionary containing the states that are desired to be constant. Defaults to {}.	`{}`

Returns:

Name	Type	Description
`Model`		returns a model instance for downstream purposes.

Source code in adtoolbox/adm.py

class Model:

    """Any kinetic model could be an instance of this class.
    Args:
        model_parameters (dict): a dictionary which contains model parameters
        base_parameters (dict): a dictionary which contains base paramters
        initial_conditions (dict): a dictionary containing inlet conditions for all species
        inlet_conditions (dict): a dictionary containing inlet conditions for all species
        feed (Feed): a Feed instance which contains the feed information
        reactions (list): a list containing all types of reactions
        species (list): a list containing all species
        ode_system (Callable): a callable which outputs the ODE system compatible with Scipy.integrate.solve_ivp
        build_stoichiometric_matrix(Callable): a callable which builds the stoichiometric matrix
        control_state (dict, optional): a dictionary containing the states that are desired to be constant. Defaults to {}.



    Returns:
        Model: returns a model instance for downstream purposes.
    """
    def __init__(self, 
                 model_parameters: dict,
                 base_parameters: dict,
                 initial_conditions: dict,
                 inlet_conditions:dict,
                 feed:Feed,
                 reactions: list, 
                 species: list, 
                 ode_system:Callable, 
                 build_stoichiometric_matrix:Callable,
                 control_state:dict={},
                 name:str="ADM", 
                 switch:str="DAE",
                 simulation_time:float=30,
                 time_limit:float=-1):

        self.model_parameters = model_parameters
        self.base_parameters = base_parameters
        self.feed=feed
        for items in control_state.keys():
            initial_conditions[items]
            initial_conditions[items]=control_state[items]
        self.control_state=control_state
        self.inlet_conditions = np.array(
            [inlet_conditions[i+"_in"] for i in species])[:, np.newaxis]
        self.reactions = reactions
        self.species = species
        self.initial_conditions = np.array(
            [initial_conditions[i] for i in species])[:, np.newaxis]
        self._ic=initial_conditions
        self._inc=inlet_conditions
        self.switch = switch
        self.name = name
        self.build_stoichiometric_matrix = build_stoichiometric_matrix
        self.ode_system = ode_system
        self.sim_time=simulation_time
        self.time_limit=time_limit
        self.nitrogen_limited=False

    @property
    def s(self):
        """Returns the stoichiometric matrix of a model"""
        return self.build_stoichiometric_matrix(
            base_parameters=self.base_parameters,model_parameters= self.model_parameters,reactions= self.reactions,species= self.species,feed=self.feed, nitrogen_limited=self.nitrogen_limited)

    def update_parameters(self, 
                        model_parameters: dict|None=None,
                        base_parameters:  dict|None=None,
                        initial_conditions: dict|None=None,
                        inlet_conditions: dict|None=None)->None:
        """
        This method updates the parameters of the model. Each argument can be a dictionary containing the parameters to be updated.
        NOTE: It is important to note that you have to separate different kind parameters.
        Args:
            model_parameters (dict): a dictionary which contains the model parameters to be updated as keys and their values as values.
            base_parameters (dict): a dictionary which contains the base parameters to be updated as keys and their values as values.
            initial_conditions (dict): a dictionary containing the initial conditions to be updated as keys and their values as values.
            inlet_conditions (dict): a dictionary containing the inlet conditions to be updated as keys and their values as values.

        Returns:
            None: This method does not return anything.
        """
        if model_parameters is not None:
            self.model_parameters.update(model_parameters)
        if base_parameters is not None:
            self.base_parameters.update(base_parameters)
        if initial_conditions is not None:
            for k,v in initial_conditions.items():
                self.initial_conditions[self.species.index(k)]=v
        if inlet_conditions is not None:
            for k,v in inlet_conditions.items():
                self.inlet_conditions[self.species.index(k)]=v




    def solve_model(self, t_eval: np.ndarray, method="BDF")->scipy.integrate._ivp.ivp.OdeResult:
        """
        Function to solve the model. 
        Examples:
            >>> import numpy as np
            >>> reactions=['rxn1','rxn2']
            >>> species=['a','b','c']
            >>> initial_conditions={'a':.001,'b':.002,'c':.003}
            >>> inlet_conditions={'a_in':.001,'b_in':.002,'c_in':.003}
            >>> model_parameters={'k1':0.001,'k2':0.002}
            >>> base_parameters={'T':0.1}
            >>> feed=Feed(10,20,20,20)
            >>> def build_stoiciometric_matrix(base_parameters,model_parameters,reactions,species):
            ...    s = np.zeros((len(species), len(reactions)))
            ...    s[[0,1],0]=[-1,0.001]
            ...    s[[1,2],1]=[-5,1]
            ...    return s
            >>> def ode_system(t,c,Model1):
            ...    v = np.zeros((len(Model1.reactions), 1))
            ...    v[0]=Model1.model_parameters['k1']*c[0]*Model1.base_parameters['T']/1000
            ...    v[1]=Model1.model_parameters['k2']*c[1]/1000
            ...    dCdt=np.matmul(Model1.S,v)
            ...    return dCdt[:, 0]
            >>> m= Model(model_parameters,base_parameters,initial_conditions,inlet_conditions,reactions,species,ODE_System,Build_Stoiciometric_Matrix)
            >>> m.solve_model((0,.1),np.linspace(0,0.1,10),method='RK45')['status']==0
            True

        Args:
            t_eval (np.ndarray): Time points at which the solution is reported
            method (str, optional): The method used to solve the ODE. Defaults to "BDF".

        Returns:
            scipy.integrate._ivp.ivp.OdeResult: Returns the results of the simulation being run and gives optimized paramters.
        """
        self.info={"Fluxes":[]}
        y0=self.initial_conditions[:, 0]
        try:
            self._be_time=time.time()
            c = scipy.integrate.solve_ivp(self.ode_system, (0,self.sim_time), y0, t_eval=t_eval, method=method, args=[self],rtol=1e-6)
            if not c.success:
                raise Exception
        except Exception as e:
            print("Could not solve model, setting C to a very large value")
            c=_Fake_Sol(np.ones((y0.shape[0],len(t_eval)))*1e10,t_eval)

        return c


       #C = scipy.integrate.solve_ivp(
       #        self.ODE_System, t_span, y0, t_eval=T_eval, method=method, args=[self])
       #
       #return C



    def plot(self, Sol: scipy.integrate._ivp.ivp.OdeResult, type: str = "Line")-> go.Figure:
        """ A function which returns a plot of the solution from the ODE
        """
        solution = {
            't': Sol.t,
        }
        for i in range(len(self.species)):
            solution[self.species[i]] = Sol.y[i, :]
        sol_df = pd.DataFrame(solution)

        if type == "Line":
            fig = px.line(sol_df, x="t", y=sol_df.columns,
                          title="Concentration of species")
            fig.update_layout(
                title={
                    'y': 0.95,
                    'x': 0.5,

                    "font_size": 30,
                    'xanchor': 'center',
                    'yanchor': 'top'}

            )
            fig.update_xaxes(
                title={
                    "text": "Time (Days)",
                    "font_size": 25,
                }
            )
            fig.update_yaxes(
                title={
                    "text": "Concentrations (kg COD/m^3)",
                    "font_size": 25,
                }
            )

        elif type == "Sankey":
            ### Maybe add a sankey plot here later
            pass

        return fig



    def dash_app(self, sol: scipy.integrate._ivp.ivp.OdeResult,
                 escher_map:str|None=os.path.join(PKG_DATA,"Modified_ADM_Map.json"),
                 cobra_model:str|None=os.path.join(PKG_DATA,"Modified_ADM_Model.json"),
                 **kwargs)->None:
        """A method that creates the dash web app for a model based on an ODE solution.

        Examples:
            >>> import numpy as np
            >>> reactions=['rxn1','rxn2']
            >>> species=['a','b','c']
            >>> initial_conditions={'a':.001,'b':.002,'c':.003}
            >>> inlet_conditions={'a_in':.001,'b_in':.002,'c_in':.003}
            >>> model_parameters={'k1':0.001,'k2':0.002}
            >>> base_parameters={'T':0.1}
            >>> feed=Feed(10,20,20,20)
            >>> def build_stoiciometric_matrix(base_parameters,model_parameters,reactions,species):
            ...    s = np.zeros((len(species), len(reactions)))
            ...    s[[0,1],0]=[-1,0.001]
            ...    s[[1,2],1]=[-5,1]
            ...    return s
            >>> def ode_system(t,c,Model1):
            ...    v = np.zeros((len(Model1.reactions), 1))
            ...    v[0]=Model1.model_parameters['k1']*c[0]*Model1.base_parameters['T']/1000
            ...    v[1]=Model1.model_parameters['k2']*c[1]/1000
            ...    dCdt=np.matmul(Model1.S,v)
            ...    return dCdt[:, 0]
            >>> m= Model(model_parameters,base_parameters,initial_conditions,inlet_conditions,reactions,species,ODE_System,Build_Stoiciometric_Matrix)
            >>> m.solve_model((0,.1),np.linspace(0,0.1,10),method='RK45')['status']==0
            True
            >>> m.dash_app(m.solve_model(np.linspace(0,30,1000)))

        Args:
            sol (scipy.integrate._ivp.ivp.OdeResult): The solution of the ODE system. This should be the output of the solve_model method.

        Returns:
            None: This method does not return anything.


        """
        if escher_map is not None:
            with open(escher_map,'rb') as f:
                escher_map=json.load(f)
        if cobra_model is not None:
            with open(cobra_model,'rb') as f:
                cobra_model=json.load(f)

        app = Dash(__name__, external_stylesheets=[dbc.themes.FLATLY])
        colors = {
            'background': '#659dbd',
            'text': '#3e4444'
        }


        solution = {
            't': sol.t,
        }
        for i in range(len(self.species)):
            solution[self.species[i]] = sol.y[i, :]
        sol_df = pd.DataFrame(solution)


        fig = px.line(sol_df, x="t", y=sol_df.columns,
                      title="Concentration of species")
        fig.update_layout(
        title={
        'y': 0.95,
        'x': 0.5,
        "font_size": 30,
        'xanchor': 'center',
        'yanchor': 'top'},
        legend=dict(font=dict(size= 20),),
        plot_bgcolor="rgba(0,0,0,0)",
        paper_bgcolor="rgba(0,0,0,0)",
            )
        fig.update_xaxes(
        title={
        "text": "Time (Days)",
        "font_size": 25,
            },
             tickfont_size=20,
        linecolor='grey',
        gridcolor='grey',
            )
        fig.update_yaxes(
        title={
        "text": "Concentrations (kg COD/m^3)",
        "font_size": 25,
         },
        tickfont_size=20,
        linecolor='grey',
        gridcolor='grey',

            )
        fig.update_traces(line=dict(width=3))

        styles={
            'table_width': '95%',
            'padding-left': '20px',
            'container_width': '85%'
        }
        page=[dbc.Container(
                        html.H1("ADToolbox Web Interface",style={"font-size":"70px", "padding-top":"50px"}),className="text-white bg-primary",style={"height":"300px","text-align": "center"}, fluid=True),
                        dbc.Container([dbc.Row(
                                    [dbc.Card([
                                        html.H2(f"{self.name} Concentration Plot", style={
                                            'textAlign': 'left',
                                            'color': colors['text'],
                                            'font-size': '15',
                                            'padding-top': '50px',
                                            'padding-bottom': '20px',
                                            'padding-left': styles['padding-left'] },
                                             className="card-title"),
                                        dcc.Graph(figure=fig, id='Concentrations_Line_Plot',
                                                style={
                                                "height":"600px",
                                                "padding-left": styles['padding-left'],
                                                'background-color': 'rgba(0,0,0,0)'}
                                                ),],className='bg-light'),

                                    dbc.Card([html.H3("Base Parameters", style={
                                        'textAlign': 'left',
                                        'color': colors['text'],
                                        'font-size': '15',
                                        'padding-top': '50px',
                                        'padding-bottom': '20px',
                                        'padding-left': styles['padding-left']
                                        }),
                                        dash_table.DataTable(
                                        id='base_parameters',
                                        columns=[{"name": i, "id": i,"type":"numeric"} for i in list(self.base_parameters.keys())],
                                        data=pd.DataFrame(self.base_parameters,index=[0]).to_dict('records'),
                                        editable=True,
                                        style_table={'overflowX': 'scroll', 'padding-left': '20px','padding-bottom':'30px', 'width': styles['table_width']},
                                        style_header={
                                        'color': 'black',
                                        'font-size': '30px',
                                            },
                                        style_data={
                                        'backgroundColor': 'rgb(250, 250, 250)',
                                        'color': 'black',
                                        'font-size': '25px'}),],className="bg-light"),

                                    dbc.Card([html.H3("Model Parameters", style={
                                        'textAlign': 'left',
                                        'color': colors['text'],
                                        'font-size': '15',
                                        'padding-top': '50px',
                                        'padding-bottom': '20px',
                                        'padding-left': styles['padding-left']
                                        }),
                                        dash_table.DataTable(
                                        id='model_parameters',
                                        columns=[{"name": i, "id": i,"type":"numeric"} for i in list(self.model_parameters.keys())],
                                        data=pd.DataFrame(self.model_parameters,index=[0]).to_dict('records'),
                                        editable=True,
                                        style_table={'overflowX': 'scroll', 'padding-left': '20px','padding-bottom':'30px', 'width': styles['table_width']},
                                        style_header={
                                        'color': 'black',
                                        'font-size': '30px',
                                            },
                                        style_data={
                                        'backgroundColor': 'rgb(250, 250, 250)',
                                        'color': 'black',
                                        'font-size': '25px'}),],className="bg-light"),

                                    dbc.Card([html.H3("Initial Conditions", style={
                                        'textAlign': 'left',
                                        'color': colors['text'],
                                        'font-size': '15',
                                        'padding-top': '50px',
                                        'padding-bottom': '20px',
                                        'padding-left': styles['padding-left']
                                        }),
                                        dash_table.DataTable(
                                        id='initial_conditions',
                                        columns=[{"name": i, "id": i,"type":"numeric"} for i in list(self._ic.keys())],
                                        data=pd.DataFrame(self._ic,index=[0]).to_dict('records'),
                                        editable=True,
                                        style_table={'overflowX': 'scroll', 'padding-left': '20px','padding-bottom':'30px', 'width': styles['table_width']},
                                        style_header={
                                        'color': 'black',
                                        'font-size': '30px',
                                            },
                                        style_data={
                                        'backgroundColor': 'rgb(250, 250, 250)',
                                        'color': 'black',
                                        'font-size': '25px'}),],className="bg-light"),

                                    dbc.Card([html.H3("Inlet Conditions", style={
                                        'textAlign': 'left',
                                        'color': colors['text'],
                                        'font-size': '15',
                                        'padding-top': '50px',
                                        'padding-bottom': '20px',
                                        'padding-left': styles['padding-left']
                                        }),
                                        dash_table.DataTable(
                                        id='inlet_conditions',
                                        columns=[{"name": i, "id": i,"type":"numeric"} for i in list(self._inc.keys())],
                                        data=pd.DataFrame(self._inc,index=[0]).to_dict('records'),
                                        editable=True,
                                        style_table={'overflowX': 'scroll', 'padding-left': '20px','padding-bottom':'30px', 'width': styles['table_width']},
                                        style_header={
                                        'color': 'black',
                                        'font-size': '30px',
                                            },
                                        style_data={
                                        'backgroundColor': 'rgb(250, 250, 250)',
                                        'color': 'black',
                                        'font-size': '25px'}),],className="bg-light"),
                                        ],className="bg-light")],fluid=True,className="bg-light",style={"width": styles['container_width']}),
                                    dbc.Container([dbc.Row(
                                    [
                                    html.H2("Escher Map", style={
                                    'textAlign': 'left',
                                    'color': colors['text'],
                                    'font-size': '15',
                                    'padding-top': '20px',
                                    'padding-bottom': '20px',
                                    'padding-left': styles['padding-left']
                                    }) ,

                                    dcc.Dropdown(["Show Map","Hide Map"],
                                     self.reactions[0], style={"width": "300px","font-size":25,'padding-left':'2-px'}, id="Drop_Down_Escher"),
                                    html.Div(children=None,id="Escher_",style={"height": "100px",'padding-buttom':'20px'}),
                                    ])], fluid=True,className="bg-light pb-3",style={"width": styles['container_width']}),
            dbc.Container(html.Div(children=None,id="Escher",style={'align':'center'}),fluid=True,className="bg-light pb-3",style={"width": styles['container_width']}),
        ]
        if escher_map is None:
            page.pop(-1)
            page.pop(-1)
            page.pop(-1)


        app.layout = html.Div(page)

        @app.callback(Output(component_id="Escher_", component_property='children'), Input(component_id="Drop_Down_Escher", component_property='value'))
        def escher_wrapper(drop_down_escher):
            print("drop_down_escher")
            if drop_down_escher=="Show Map":
                Labels={}
                for i in range(0,self.sim_time,int(self.sim_time/20)):
                    Labels[i]={'label':str(i),'style':{'color': '#77b0b1'}}
                Labels[self.sim_time]=self.sim_time
                return [html.H2("Time (Day)",style={'textAlign': 'center'}),dcc.Slider(0,self.sim_time,int(self.sim_time/20),value=0,id="Escher_Slider",marks=None,tooltip={"placement": "bottom", "always_visible": True})]

        @app.callback(Output(component_id="Escher", component_property='children'), Input(component_id="Drop_Down_Escher", component_property='value'),
        Input(component_id="Escher_Slider", component_property='value'),prevent_initial_call=True)        
        def draw_escher(drop_down_escher,escher_slider):
            rxn_data={}
            self.ode_system(0,sol.y[:,int(sol.y.shape[1]/self.sim_time*escher_slider)],self)
            fluxes=self.info["Fluxes"]
            for ind,i in enumerate(self.reactions):
                rxn_data[i.replace(" ","_")]= fluxes[ind]
            if kwargs.get('min_flux',None):
                min_scale={ 'type': 'value','value':kwargs.get('min_flux') , 'color': 'red','size':10 }
            else:
                min_scale={ 'type': 'min' , 'color': 'red','size':10 }
            if kwargs.get('max_flux',None):
                max_scale={ 'type': 'value','value':kwargs.get('max_flux') , 'color': 'green','size':10 }
            else:
                max_scale={ 'type': 'max', 'color': 'green','size':10 }

            if drop_down_escher=="Show Map":
                return [dash_escher.DashEscher(mapData=escher_map,modelData=cobra_model,
            options={
             'reaction_data':rxn_data,
             'enable_keys':False,
             'reaction_scale':[min_scale,max_scale],
            }
            ,height='1000px',
        width='100%')
             ]
        @app.callback(Output(component_id='Concentrations_Line_Plot', component_property='figure'),
                    Input(component_id='base_parameters', component_property='data'),
                    Input(component_id='model_parameters', component_property='data'),
                    Input(component_id='initial_conditions', component_property='data'),
                    Input(component_id='inlet_conditions', component_property='data'),
                    prevent_initial_call=True
                    )
        def update_graph_fig(base_parameters: dict, model_parameters:dict, initial_conditions: dict, inlet_conditions: dict)->plotly.graph_objects.Figure:

            if len(self.control_state.keys()):
                for i in self.control_state.keys():
                    self.control_state[i]=initial_conditions[0][i]
            if len(base_parameters):
                self.base_parameters = base_parameters[0]
            if len(model_parameters):
                self.model_parameters = model_parameters[0]
            self.initial_conditions = np.array(
            [initial_conditions[0][i] for i in self.species])[:, np.newaxis]
            self.inlet_conditions = np.array(
            [inlet_conditions[0][i+"_in"] for i in self.species])[:, np.newaxis]
            update_sol = self.solve_model(np.linspace(0, self.sim_time, 10000))

            sol=update_sol
            solution = {
                    't': update_sol.t,
                        }
            for i in range(len(self.species)):
                solution[self.species[i]] = update_sol.y[i, :]
            sol_df = pd.DataFrame(solution)

            fig = px.line(sol_df, x="t", y=sol_df.columns,
                          title="Concentration of species")
            fig.update_layout(
            title={
            'y': 0.95,
            'x': 0.5,
            "font_size": 30,
            'xanchor': 'center',
            'yanchor': 'top'},
            legend=dict(font=dict(size= 20),),
            plot_bgcolor="rgba(0,0,0,0)",
            paper_bgcolor="rgba(0,0,0,0)",

                )
            fig.update_xaxes(
            title={
            "text": "Time (Days)",
            "font_size": 25,
                },
                 tickfont_size=20,
            linecolor='grey',
            gridcolor='grey',
                )
            fig.update_yaxes(
            title={
            "text": "Concentrations (kg COD/m^3)",
            "font_size": 25,
             },
            tickfont_size=20,
            linecolor='grey',
            gridcolor='grey',


                )
            fig.update_traces(line=dict(width=3))
            return fig



        app.run_server(port=8000, host='127.0.0.1')

    def csv_report(self,sol: scipy.integrate._ivp.ivp.OdeResult ,address: str)->None:
        """Converts the results to a pandas data frame then to a csv"""
        df = pd.DataFrame(sol.y, columns=sol.t, index=self.species)
        df.to_csv(os.path.join(address,self.name+"_Report.csv"), header=True,
                  index=True)

    def copy(self):
        """Returns a copy of the model"""
        return type(self)(model_parameters=self.model_parameters.copy(),
                          base_parameters=self.base_parameters.copy(),
                          initial_conditions=self._ic.copy(),
                          inlet_conditions=self._inc.copy(),
                          feed=self.feed,
                          reactions=self.reactions.copy(),
                          species=self.species.copy(),
                          ode_system=self.ode_system,
                          build_stoichiometric_matrix=self.build_stoichiometric_matrix,
                          control_state=self.control_state.copy(),
                          name=self.name,
                          switch=self.switch,
                          time_limit=self.time_limit,
                          simulation_time=self.sim_time)

    def build_cobra_model(self,address:str=None):
        """This method builds a cobra model from an instance of Model. One particular use
        of such models is to build an escher map from the model.
        Args:
            address (str, optional): The address to save the model. Defaults to None.
        """
        try:
            import cobra
        except ImportError:
            raise ImportError("CobraPy is not installed, please install it to use this function")
        model = cobra.Model(self.name)
        for reaction in self.reactions:
            temp_reaction = cobra.Reaction(reaction.replace(" ", "_"), name=reaction.replace(" ", "_"))
            temp_mets = np.where(self.s[:, self.reactions.index(reaction)] != 0)
            met_dict = {}
            for met in temp_mets[0]:
                metabolite = cobra.Metabolite(self.species[met].replace(" ", "_"),
                                              name=self.species[met].replace(" ", "_"), compartment="Model")
                met_dict[metabolite] = self.s[met, self.reactions.index(reaction)]
            temp_reaction.add_metabolites(met_dict)
            model.add_reactions([temp_reaction])
        if address:
            cobra.io.save_json_model(model, address)
        return model

`s` `property`

Returns the stoichiometric matrix of a model

`build_cobra_model(address=None)`

This method builds a cobra model from an instance of Model. One particular use of such models is to build an escher map from the model.

Parameters:

Name	Type	Description	Default
`address`	`str`	The address to save the model. Defaults to None.	`None`

Source code in adtoolbox/adm.py

def build_cobra_model(self,address:str=None):
    """This method builds a cobra model from an instance of Model. One particular use
    of such models is to build an escher map from the model.
    Args:
        address (str, optional): The address to save the model. Defaults to None.
    """
    try:
        import cobra
    except ImportError:
        raise ImportError("CobraPy is not installed, please install it to use this function")
    model = cobra.Model(self.name)
    for reaction in self.reactions:
        temp_reaction = cobra.Reaction(reaction.replace(" ", "_"), name=reaction.replace(" ", "_"))
        temp_mets = np.where(self.s[:, self.reactions.index(reaction)] != 0)
        met_dict = {}
        for met in temp_mets[0]:
            metabolite = cobra.Metabolite(self.species[met].replace(" ", "_"),
                                          name=self.species[met].replace(" ", "_"), compartment="Model")
            met_dict[metabolite] = self.s[met, self.reactions.index(reaction)]
        temp_reaction.add_metabolites(met_dict)
        model.add_reactions([temp_reaction])
    if address:
        cobra.io.save_json_model(model, address)
    return model

`copy()`

Returns a copy of the model

Source code in adtoolbox/adm.py

def copy(self):
    """Returns a copy of the model"""
    return type(self)(model_parameters=self.model_parameters.copy(),
                      base_parameters=self.base_parameters.copy(),
                      initial_conditions=self._ic.copy(),
                      inlet_conditions=self._inc.copy(),
                      feed=self.feed,
                      reactions=self.reactions.copy(),
                      species=self.species.copy(),
                      ode_system=self.ode_system,
                      build_stoichiometric_matrix=self.build_stoichiometric_matrix,
                      control_state=self.control_state.copy(),
                      name=self.name,
                      switch=self.switch,
                      time_limit=self.time_limit,
                      simulation_time=self.sim_time)

`csv_report(sol, address)`

Converts the results to a pandas data frame then to a csv

Source code in adtoolbox/adm.py

def csv_report(self,sol: scipy.integrate._ivp.ivp.OdeResult ,address: str)->None:
    """Converts the results to a pandas data frame then to a csv"""
    df = pd.DataFrame(sol.y, columns=sol.t, index=self.species)
    df.to_csv(os.path.join(address,self.name+"_Report.csv"), header=True,
              index=True)

`dash_app(sol, escher_map=os.path.join(PKG_DATA, 'Modified_ADM_Map.json'), cobra_model=os.path.join(PKG_DATA, 'Modified_ADM_Model.json'), **kwargs)`

A method that creates the dash web app for a model based on an ODE solution.

Examples:

>>> import numpy as np
>>> reactions=['rxn1','rxn2']
>>> species=['a','b','c']
>>> initial_conditions={'a':.001,'b':.002,'c':.003}
>>> inlet_conditions={'a_in':.001,'b_in':.002,'c_in':.003}
>>> model_parameters={'k1':0.001,'k2':0.002}
>>> base_parameters={'T':0.1}
>>> feed=Feed(10,20,20,20)
>>> def build_stoiciometric_matrix(base_parameters,model_parameters,reactions,species):
...    s = np.zeros((len(species), len(reactions)))
...    s[[0,1],0]=[-1,0.001]
...    s[[1,2],1]=[-5,1]
...    return s
>>> def ode_system(t,c,Model1):
...    v = np.zeros((len(Model1.reactions), 1))
...    v[0]=Model1.model_parameters['k1']*c[0]*Model1.base_parameters['T']/1000
...    v[1]=Model1.model_parameters['k2']*c[1]/1000
...    dCdt=np.matmul(Model1.S,v)
...    return dCdt[:, 0]
>>> m= Model(model_parameters,base_parameters,initial_conditions,inlet_conditions,reactions,species,ODE_System,Build_Stoiciometric_Matrix)
>>> m.solve_model((0,.1),np.linspace(0,0.1,10),method='RK45')['status']==0
True
>>> m.dash_app(m.solve_model(np.linspace(0,30,1000)))

Parameters:

Name	Type	Description	Default
`sol`	`scipy.integrate._ivp.ivp.OdeResult`	The solution of the ODE system. This should be the output of the solve_model method.	required

Returns:

Name	Type	Description
`None`	`None`	This method does not return anything.

Source code in adtoolbox/adm.py

def dash_app(self, sol: scipy.integrate._ivp.ivp.OdeResult,
             escher_map:str|None=os.path.join(PKG_DATA,"Modified_ADM_Map.json"),
             cobra_model:str|None=os.path.join(PKG_DATA,"Modified_ADM_Model.json"),
             **kwargs)->None:
    """A method that creates the dash web app for a model based on an ODE solution.

    Examples:
        >>> import numpy as np
        >>> reactions=['rxn1','rxn2']
        >>> species=['a','b','c']
        >>> initial_conditions={'a':.001,'b':.002,'c':.003}
        >>> inlet_conditions={'a_in':.001,'b_in':.002,'c_in':.003}
        >>> model_parameters={'k1':0.001,'k2':0.002}
        >>> base_parameters={'T':0.1}
        >>> feed=Feed(10,20,20,20)
        >>> def build_stoiciometric_matrix(base_parameters,model_parameters,reactions,species):
        ...    s = np.zeros((len(species), len(reactions)))
        ...    s[[0,1],0]=[-1,0.001]
        ...    s[[1,2],1]=[-5,1]
        ...    return s
        >>> def ode_system(t,c,Model1):
        ...    v = np.zeros((len(Model1.reactions), 1))
        ...    v[0]=Model1.model_parameters['k1']*c[0]*Model1.base_parameters['T']/1000
        ...    v[1]=Model1.model_parameters['k2']*c[1]/1000
        ...    dCdt=np.matmul(Model1.S,v)
        ...    return dCdt[:, 0]
        >>> m= Model(model_parameters,base_parameters,initial_conditions,inlet_conditions,reactions,species,ODE_System,Build_Stoiciometric_Matrix)
        >>> m.solve_model((0,.1),np.linspace(0,0.1,10),method='RK45')['status']==0
        True
        >>> m.dash_app(m.solve_model(np.linspace(0,30,1000)))

    Args:
        sol (scipy.integrate._ivp.ivp.OdeResult): The solution of the ODE system. This should be the output of the solve_model method.

    Returns:
        None: This method does not return anything.


    """
    if escher_map is not None:
        with open(escher_map,'rb') as f:
            escher_map=json.load(f)
    if cobra_model is not None:
        with open(cobra_model,'rb') as f:
            cobra_model=json.load(f)

    app = Dash(__name__, external_stylesheets=[dbc.themes.FLATLY])
    colors = {
        'background': '#659dbd',
        'text': '#3e4444'
    }


    solution = {
        't': sol.t,
    }
    for i in range(len(self.species)):
        solution[self.species[i]] = sol.y[i, :]
    sol_df = pd.DataFrame(solution)


    fig = px.line(sol_df, x="t", y=sol_df.columns,
                  title="Concentration of species")
    fig.update_layout(
    title={
    'y': 0.95,
    'x': 0.5,
    "font_size": 30,
    'xanchor': 'center',
    'yanchor': 'top'},
    legend=dict(font=dict(size= 20),),
    plot_bgcolor="rgba(0,0,0,0)",
    paper_bgcolor="rgba(0,0,0,0)",
        )
    fig.update_xaxes(
    title={
    "text": "Time (Days)",
    "font_size": 25,
        },
         tickfont_size=20,
    linecolor='grey',
    gridcolor='grey',
        )
    fig.update_yaxes(
    title={
    "text": "Concentrations (kg COD/m^3)",
    "font_size": 25,
     },
    tickfont_size=20,
    linecolor='grey',
    gridcolor='grey',

        )
    fig.update_traces(line=dict(width=3))

    styles={
        'table_width': '95%',
        'padding-left': '20px',
        'container_width': '85%'
    }
    page=[dbc.Container(
                    html.H1("ADToolbox Web Interface",style={"font-size":"70px", "padding-top":"50px"}),className="text-white bg-primary",style={"height":"300px","text-align": "center"}, fluid=True),
                    dbc.Container([dbc.Row(
                                [dbc.Card([
                                    html.H2(f"{self.name} Concentration Plot", style={
                                        'textAlign': 'left',
                                        'color': colors['text'],
                                        'font-size': '15',
                                        'padding-top': '50px',
                                        'padding-bottom': '20px',
                                        'padding-left': styles['padding-left'] },
                                         className="card-title"),
                                    dcc.Graph(figure=fig, id='Concentrations_Line_Plot',
                                            style={
                                            "height":"600px",
                                            "padding-left": styles['padding-left'],
                                            'background-color': 'rgba(0,0,0,0)'}
                                            ),],className='bg-light'),

                                dbc.Card([html.H3("Base Parameters", style={
                                    'textAlign': 'left',
                                    'color': colors['text'],
                                    'font-size': '15',
                                    'padding-top': '50px',
                                    'padding-bottom': '20px',
                                    'padding-left': styles['padding-left']
                                    }),
                                    dash_table.DataTable(
                                    id='base_parameters',
                                    columns=[{"name": i, "id": i,"type":"numeric"} for i in list(self.base_parameters.keys())],
                                    data=pd.DataFrame(self.base_parameters,index=[0]).to_dict('records'),
                                    editable=True,
                                    style_table={'overflowX': 'scroll', 'padding-left': '20px','padding-bottom':'30px', 'width': styles['table_width']},
                                    style_header={
                                    'color': 'black',
                                    'font-size': '30px',
                                        },
                                    style_data={
                                    'backgroundColor': 'rgb(250, 250, 250)',
                                    'color': 'black',
                                    'font-size': '25px'}),],className="bg-light"),

                                dbc.Card([html.H3("Model Parameters", style={
                                    'textAlign': 'left',
                                    'color': colors['text'],
                                    'font-size': '15',
                                    'padding-top': '50px',
                                    'padding-bottom': '20px',
                                    'padding-left': styles['padding-left']
                                    }),
                                    dash_table.DataTable(
                                    id='model_parameters',
                                    columns=[{"name": i, "id": i,"type":"numeric"} for i in list(self.model_parameters.keys())],
                                    data=pd.DataFrame(self.model_parameters,index=[0]).to_dict('records'),
                                    editable=True,
                                    style_table={'overflowX': 'scroll', 'padding-left': '20px','padding-bottom':'30px', 'width': styles['table_width']},
                                    style_header={
                                    'color': 'black',
                                    'font-size': '30px',
                                        },
                                    style_data={
                                    'backgroundColor': 'rgb(250, 250, 250)',
                                    'color': 'black',
                                    'font-size': '25px'}),],className="bg-light"),

                                dbc.Card([html.H3("Initial Conditions", style={
                                    'textAlign': 'left',
                                    'color': colors['text'],
                                    'font-size': '15',
                                    'padding-top': '50px',
                                    'padding-bottom': '20px',
                                    'padding-left': styles['padding-left']
                                    }),
                                    dash_table.DataTable(
                                    id='initial_conditions',
                                    columns=[{"name": i, "id": i,"type":"numeric"} for i in list(self._ic.keys())],
                                    data=pd.DataFrame(self._ic,index=[0]).to_dict('records'),
                                    editable=True,
                                    style_table={'overflowX': 'scroll', 'padding-left': '20px','padding-bottom':'30px', 'width': styles['table_width']},
                                    style_header={
                                    'color': 'black',
                                    'font-size': '30px',
                                        },
                                    style_data={
                                    'backgroundColor': 'rgb(250, 250, 250)',
                                    'color': 'black',
                                    'font-size': '25px'}),],className="bg-light"),

                                dbc.Card([html.H3("Inlet Conditions", style={
                                    'textAlign': 'left',
                                    'color': colors['text'],
                                    'font-size': '15',
                                    'padding-top': '50px',
                                    'padding-bottom': '20px',
                                    'padding-left': styles['padding-left']
                                    }),
                                    dash_table.DataTable(
                                    id='inlet_conditions',
                                    columns=[{"name": i, "id": i,"type":"numeric"} for i in list(self._inc.keys())],
                                    data=pd.DataFrame(self._inc,index=[0]).to_dict('records'),
                                    editable=True,
                                    style_table={'overflowX': 'scroll', 'padding-left': '20px','padding-bottom':'30px', 'width': styles['table_width']},
                                    style_header={
                                    'color': 'black',
                                    'font-size': '30px',
                                        },
                                    style_data={
                                    'backgroundColor': 'rgb(250, 250, 250)',
                                    'color': 'black',
                                    'font-size': '25px'}),],className="bg-light"),
                                    ],className="bg-light")],fluid=True,className="bg-light",style={"width": styles['container_width']}),
                                dbc.Container([dbc.Row(
                                [
                                html.H2("Escher Map", style={
                                'textAlign': 'left',
                                'color': colors['text'],
                                'font-size': '15',
                                'padding-top': '20px',
                                'padding-bottom': '20px',
                                'padding-left': styles['padding-left']
                                }) ,

                                dcc.Dropdown(["Show Map","Hide Map"],
                                 self.reactions[0], style={"width": "300px","font-size":25,'padding-left':'2-px'}, id="Drop_Down_Escher"),
                                html.Div(children=None,id="Escher_",style={"height": "100px",'padding-buttom':'20px'}),
                                ])], fluid=True,className="bg-light pb-3",style={"width": styles['container_width']}),
        dbc.Container(html.Div(children=None,id="Escher",style={'align':'center'}),fluid=True,className="bg-light pb-3",style={"width": styles['container_width']}),
    ]
    if escher_map is None:
        page.pop(-1)
        page.pop(-1)
        page.pop(-1)


    app.layout = html.Div(page)

    @app.callback(Output(component_id="Escher_", component_property='children'), Input(component_id="Drop_Down_Escher", component_property='value'))
    def escher_wrapper(drop_down_escher):
        print("drop_down_escher")
        if drop_down_escher=="Show Map":
            Labels={}
            for i in range(0,self.sim_time,int(self.sim_time/20)):
                Labels[i]={'label':str(i),'style':{'color': '#77b0b1'}}
            Labels[self.sim_time]=self.sim_time
            return [html.H2("Time (Day)",style={'textAlign': 'center'}),dcc.Slider(0,self.sim_time,int(self.sim_time/20),value=0,id="Escher_Slider",marks=None,tooltip={"placement": "bottom", "always_visible": True})]

    @app.callback(Output(component_id="Escher", component_property='children'), Input(component_id="Drop_Down_Escher", component_property='value'),
    Input(component_id="Escher_Slider", component_property='value'),prevent_initial_call=True)        
    def draw_escher(drop_down_escher,escher_slider):
        rxn_data={}
        self.ode_system(0,sol.y[:,int(sol.y.shape[1]/self.sim_time*escher_slider)],self)
        fluxes=self.info["Fluxes"]
        for ind,i in enumerate(self.reactions):
            rxn_data[i.replace(" ","_")]= fluxes[ind]
        if kwargs.get('min_flux',None):
            min_scale={ 'type': 'value','value':kwargs.get('min_flux') , 'color': 'red','size':10 }
        else:
            min_scale={ 'type': 'min' , 'color': 'red','size':10 }
        if kwargs.get('max_flux',None):
            max_scale={ 'type': 'value','value':kwargs.get('max_flux') , 'color': 'green','size':10 }
        else:
            max_scale={ 'type': 'max', 'color': 'green','size':10 }

        if drop_down_escher=="Show Map":
            return [dash_escher.DashEscher(mapData=escher_map,modelData=cobra_model,
        options={
         'reaction_data':rxn_data,
         'enable_keys':False,
         'reaction_scale':[min_scale,max_scale],
        }
        ,height='1000px',
    width='100%')
         ]
    @app.callback(Output(component_id='Concentrations_Line_Plot', component_property='figure'),
                Input(component_id='base_parameters', component_property='data'),
                Input(component_id='model_parameters', component_property='data'),
                Input(component_id='initial_conditions', component_property='data'),
                Input(component_id='inlet_conditions', component_property='data'),
                prevent_initial_call=True
                )
    def update_graph_fig(base_parameters: dict, model_parameters:dict, initial_conditions: dict, inlet_conditions: dict)->plotly.graph_objects.Figure:

        if len(self.control_state.keys()):
            for i in self.control_state.keys():
                self.control_state[i]=initial_conditions[0][i]
        if len(base_parameters):
            self.base_parameters = base_parameters[0]
        if len(model_parameters):
            self.model_parameters = model_parameters[0]
        self.initial_conditions = np.array(
        [initial_conditions[0][i] for i in self.species])[:, np.newaxis]
        self.inlet_conditions = np.array(
        [inlet_conditions[0][i+"_in"] for i in self.species])[:, np.newaxis]
        update_sol = self.solve_model(np.linspace(0, self.sim_time, 10000))

        sol=update_sol
        solution = {
                't': update_sol.t,
                    }
        for i in range(len(self.species)):
            solution[self.species[i]] = update_sol.y[i, :]
        sol_df = pd.DataFrame(solution)

        fig = px.line(sol_df, x="t", y=sol_df.columns,
                      title="Concentration of species")
        fig.update_layout(
        title={
        'y': 0.95,
        'x': 0.5,
        "font_size": 30,
        'xanchor': 'center',
        'yanchor': 'top'},
        legend=dict(font=dict(size= 20),),
        plot_bgcolor="rgba(0,0,0,0)",
        paper_bgcolor="rgba(0,0,0,0)",

            )
        fig.update_xaxes(
        title={
        "text": "Time (Days)",
        "font_size": 25,
            },
             tickfont_size=20,
        linecolor='grey',
        gridcolor='grey',
            )
        fig.update_yaxes(
        title={
        "text": "Concentrations (kg COD/m^3)",
        "font_size": 25,
         },
        tickfont_size=20,
        linecolor='grey',
        gridcolor='grey',


            )
        fig.update_traces(line=dict(width=3))
        return fig



    app.run_server(port=8000, host='127.0.0.1')

`plot(Sol, type='Line')`

A function which returns a plot of the solution from the ODE

Source code in adtoolbox/adm.py

def plot(self, Sol: scipy.integrate._ivp.ivp.OdeResult, type: str = "Line")-> go.Figure:
    """ A function which returns a plot of the solution from the ODE
    """
    solution = {
        't': Sol.t,
    }
    for i in range(len(self.species)):
        solution[self.species[i]] = Sol.y[i, :]
    sol_df = pd.DataFrame(solution)

    if type == "Line":
        fig = px.line(sol_df, x="t", y=sol_df.columns,
                      title="Concentration of species")
        fig.update_layout(
            title={
                'y': 0.95,
                'x': 0.5,

                "font_size": 30,
                'xanchor': 'center',
                'yanchor': 'top'}

        )
        fig.update_xaxes(
            title={
                "text": "Time (Days)",
                "font_size": 25,
            }
        )
        fig.update_yaxes(
            title={
                "text": "Concentrations (kg COD/m^3)",
                "font_size": 25,
            }
        )

    elif type == "Sankey":
        ### Maybe add a sankey plot here later
        pass

    return fig

`solve_model(t_eval, method='BDF')`

Function to solve the model.

Examples:

>>> import numpy as np
>>> reactions=['rxn1','rxn2']
>>> species=['a','b','c']
>>> initial_conditions={'a':.001,'b':.002,'c':.003}
>>> inlet_conditions={'a_in':.001,'b_in':.002,'c_in':.003}
>>> model_parameters={'k1':0.001,'k2':0.002}
>>> base_parameters={'T':0.1}
>>> feed=Feed(10,20,20,20)
>>> def build_stoiciometric_matrix(base_parameters,model_parameters,reactions,species):
...    s = np.zeros((len(species), len(reactions)))
...    s[[0,1],0]=[-1,0.001]
...    s[[1,2],1]=[-5,1]
...    return s
>>> def ode_system(t,c,Model1):
...    v = np.zeros((len(Model1.reactions), 1))
...    v[0]=Model1.model_parameters['k1']*c[0]*Model1.base_parameters['T']/1000
...    v[1]=Model1.model_parameters['k2']*c[1]/1000
...    dCdt=np.matmul(Model1.S,v)
...    return dCdt[:, 0]
>>> m= Model(model_parameters,base_parameters,initial_conditions,inlet_conditions,reactions,species,ODE_System,Build_Stoiciometric_Matrix)
>>> m.solve_model((0,.1),np.linspace(0,0.1,10),method='RK45')['status']==0
True

Parameters:

Name	Type	Description	Default
`t_eval`	`np.ndarray`	Time points at which the solution is reported	required
`method`	`str`	The method used to solve the ODE. Defaults to "BDF".	`'BDF'`

Returns:

Type	Description
`scipy.integrate._ivp.ivp.OdeResult`	scipy.integrate._ivp.ivp.OdeResult: Returns the results of the simulation being run and gives optimized paramters.

Source code in adtoolbox/adm.py

def solve_model(self, t_eval: np.ndarray, method="BDF")->scipy.integrate._ivp.ivp.OdeResult:
    """
    Function to solve the model. 
    Examples:
        >>> import numpy as np
        >>> reactions=['rxn1','rxn2']
        >>> species=['a','b','c']
        >>> initial_conditions={'a':.001,'b':.002,'c':.003}
        >>> inlet_conditions={'a_in':.001,'b_in':.002,'c_in':.003}
        >>> model_parameters={'k1':0.001,'k2':0.002}
        >>> base_parameters={'T':0.1}
        >>> feed=Feed(10,20,20,20)
        >>> def build_stoiciometric_matrix(base_parameters,model_parameters,reactions,species):
        ...    s = np.zeros((len(species), len(reactions)))
        ...    s[[0,1],0]=[-1,0.001]
        ...    s[[1,2],1]=[-5,1]
        ...    return s
        >>> def ode_system(t,c,Model1):
        ...    v = np.zeros((len(Model1.reactions), 1))
        ...    v[0]=Model1.model_parameters['k1']*c[0]*Model1.base_parameters['T']/1000
        ...    v[1]=Model1.model_parameters['k2']*c[1]/1000
        ...    dCdt=np.matmul(Model1.S,v)
        ...    return dCdt[:, 0]
        >>> m= Model(model_parameters,base_parameters,initial_conditions,inlet_conditions,reactions,species,ODE_System,Build_Stoiciometric_Matrix)
        >>> m.solve_model((0,.1),np.linspace(0,0.1,10),method='RK45')['status']==0
        True

    Args:
        t_eval (np.ndarray): Time points at which the solution is reported
        method (str, optional): The method used to solve the ODE. Defaults to "BDF".

    Returns:
        scipy.integrate._ivp.ivp.OdeResult: Returns the results of the simulation being run and gives optimized paramters.
    """
    self.info={"Fluxes":[]}
    y0=self.initial_conditions[:, 0]
    try:
        self._be_time=time.time()
        c = scipy.integrate.solve_ivp(self.ode_system, (0,self.sim_time), y0, t_eval=t_eval, method=method, args=[self],rtol=1e-6)
        if not c.success:
            raise Exception
    except Exception as e:
        print("Could not solve model, setting C to a very large value")
        c=_Fake_Sol(np.ones((y0.shape[0],len(t_eval)))*1e10,t_eval)

    return c

`update_parameters(model_parameters=None, base_parameters=None, initial_conditions=None, inlet_conditions=None)`

This method updates the parameters of the model. Each argument can be a dictionary containing the parameters to be updated. NOTE: It is important to note that you have to separate different kind parameters.

Parameters:

Name	Type	Description	Default
`model_parameters`	`dict`	a dictionary which contains the model parameters to be updated as keys and their values as values.	`None`
`base_parameters`	`dict`	a dictionary which contains the base parameters to be updated as keys and their values as values.	`None`
`initial_conditions`	`dict`	a dictionary containing the initial conditions to be updated as keys and their values as values.	`None`
`inlet_conditions`	`dict`	a dictionary containing the inlet conditions to be updated as keys and their values as values.	`None`

Returns:

Name	Type	Description
`None`	`None`	This method does not return anything.

Source code in adtoolbox/adm.py

def update_parameters(self, 
                    model_parameters: dict|None=None,
                    base_parameters:  dict|None=None,
                    initial_conditions: dict|None=None,
                    inlet_conditions: dict|None=None)->None:
    """
    This method updates the parameters of the model. Each argument can be a dictionary containing the parameters to be updated.
    NOTE: It is important to note that you have to separate different kind parameters.
    Args:
        model_parameters (dict): a dictionary which contains the model parameters to be updated as keys and their values as values.
        base_parameters (dict): a dictionary which contains the base parameters to be updated as keys and their values as values.
        initial_conditions (dict): a dictionary containing the initial conditions to be updated as keys and their values as values.
        inlet_conditions (dict): a dictionary containing the inlet conditions to be updated as keys and their values as values.

    Returns:
        None: This method does not return anything.
    """
    if model_parameters is not None:
        self.model_parameters.update(model_parameters)
    if base_parameters is not None:
        self.base_parameters.update(base_parameters)
    if initial_conditions is not None:
        for k,v in initial_conditions.items():
            self.initial_conditions[self.species.index(k)]=v
    if inlet_conditions is not None:
        for k,v in inlet_conditions.items():
            self.inlet_conditions[self.species.index(k)]=v

`adm1_ode_sys(t, c, model)`

The ODE system for the original ADM. No testing is done.

Parameters:

Name	Type	Description	Default
`t`	`float`	a matrix of zeros to be filled	required
`c`	`np.ndarray`	an array of concentrations to be filled	required
`Model`	`Model`	The an instance of Model to calculate ODE with	required

Returns:

Type	Description
`np.ndarray`	np.ndarray: The output is dCdt, the change of concentration with respect to time.

Source code in adtoolbox/adm.py

def adm1_ode_sys(t: float, c: np.ndarray, model:Model)-> np.ndarray:
    """ The ODE system for the original ADM.
        No testing is done.

        Args:
            t (float):a matrix of zeros to be filled
            c (np.ndarray): an array of concentrations to be filled
            Model (Model): The an instance of Model to calculate ODE with

        Returns:
            np.ndarray: The output is dCdt, the change of concentration with respect to time.
    """
    c[34] = c[10] - c[33]
    c[32] = c[9] - c[31]
    I_pH_aa = (model.model_parameters["K_pH_aa"] ** model.model_parameters['nn_aa'])/(np.power(
        c[26], model.model_parameters['nn_aa']) + np.power(model.model_parameters["K_pH_aa"], model.model_parameters['nn_aa']))
    I_pH_ac = (model.model_parameters['K_pH_ac'] ** model.model_parameters["n_ac"])/(
        c[26] ** model.model_parameters['n_ac'] + model.model_parameters['K_pH_ac'] ** model.model_parameters['n_ac'])
    I_pH_h2 = (model.model_parameters['K_pH_h2']**model.model_parameters['n_h2'])/(
        c[26] ** model.model_parameters['n_h2'] + model.model_parameters['K_pH_h2']**model.model_parameters['n_h2'])
    I_IN_lim = 1 / (1+(model.model_parameters['K_S_IN'] / c[10]))
    I_h2_fa = 1 / (1+(c[7] / model.model_parameters['K_I_h2_fa']))
    I_h2_c4 = 1 / (1+(c[7]/model.model_parameters['K_I_h2_c4']))
    I_h2_pro = (1/(1+(c[7]/model.model_parameters['K_I_h2_pro'])))
    I_nh3 = 1/(1+(c[33]/model.model_parameters['K_I_nh3']))
    I5 = (I_pH_aa * I_IN_lim)
    I6 = np.copy(I5)
    I7 = (I_pH_aa * I_IN_lim * I_h2_fa)
    I8 = (I_pH_aa * I_IN_lim * I_h2_c4)
    I9 = np.copy(I8)
    I10 = (I_pH_aa * I_IN_lim * I_h2_pro)
    I11 = (I_pH_ac * I_IN_lim * I_nh3)
    I12 = (I_pH_h2 * I_IN_lim)
    v = np.zeros((len(model.reactions), 1))
    v[0] = model.model_parameters["k_dis"]*c[12]

    v[1] = model.model_parameters['k_hyd_ch']*c[13]
    v[2] = model.model_parameters['k_hyd_pr']*c[14]
    v[3] = model.model_parameters['k_hyd_li']*c[15]

    v[4] = model.model_parameters['k_m_su']*c[0] / \
(model.model_parameters['K_S_su']+c[0])*c[16]*I5
    v[5] = model.model_parameters['k_m_aa']*c[1] / \
        (model.model_parameters['K_S_aa']+c[1])*c[17]*I6
    v[6] = model.model_parameters['k_m_fa']*c[2] / \
        (model.model_parameters['K_S_fa']+c[2])*c[18]*I7
    v[7] = model.model_parameters['k_m_c4']*c[3] / \
        (model.model_parameters['K_S_c4']+c[3]) * \
        c[19]*c[3]/(c[3]+c[4]+10 ** (-6))*I8
    v[8] = model.model_parameters['k_m_c4']*c[4] / \
        (model.model_parameters['K_S_c4']+c[4]) * \
        c[19]*c[4]/(c[4]+c[3]+10 ** (-6))*I9
    v[9] = model.model_parameters['k_m_pr']*c[5] / \
        (model.model_parameters['K_S_pro']+c[5])*c[20]*I10
    v[10] = model.model_parameters['k_m_ac']*c[6] / \
        (model.model_parameters['K_S_ac']+c[6])*c[21]*I11
    v[11] = model.model_parameters['k_m_h2']*c[7] / \
        (model.model_parameters['K_S_h2']+c[7])*c[22]*I12
    v[12] = model.model_parameters['k_dec_X_su']*c[16]
    v[13] = model.model_parameters['k_dec_X_aa']*c[17]
    v[14] = model.model_parameters['k_dec_X_fa']*c[18]
    v[15] = model.model_parameters['k_dec_X_c4']*c[19]
    v[16] = model.model_parameters['k_dec_X_pro']*c[20]
    v[17] = model.model_parameters['k_dec_X_ac']*c[21]
    v[18] = model.model_parameters['k_dec_X_h2']*c[22]
    v[19] = model.model_parameters['k_A_B_va'] * \
        (c[27] * (model.model_parameters['K_a_va'] + c[26]) -
         model.model_parameters['K_a_va'] * c[3])
    v[20] = model.model_parameters['k_A_B_bu'] * \
        (c[28] * (model.model_parameters['K_a_bu'] + c[26]) -
         model.model_parameters['K_a_bu'] * c[4])
    v[21] = model.model_parameters['k_A_B_pro'] * \
        (c[29] * (model.model_parameters['K_a_pro'] + c[26]) -
         model.model_parameters['K_a_pro'] * c[5])
    v[22] = model.model_parameters['k_A_B_ac'] * \
        (c[30] * (model.model_parameters['K_a_ac'] + c[26]) -
         model.model_parameters['K_a_ac'] * c[6])
    v[23] = model.model_parameters['k_A_B_co2'] * \
        (c[31] * (model.model_parameters['K_a_co2'] + c[26]) -
         model.model_parameters['K_a_co2'] * c[9])
    v[24] = model.model_parameters['k_A_B_IN'] * \
        (c[33] * (model.model_parameters['K_a_IN'] + c[26]) -
         model.model_parameters['K_a_IN'] * c[10])
    p_gas_h2 = c[35] * model.base_parameters["R"] * \
        model.base_parameters["T_op"] / 16
    p_gas_ch4 = c[36] * model.base_parameters["R"] * \
        model.base_parameters["T_op"] / 64
    p_gas_co2 = c[37] * model.base_parameters["R"] * \
        model.base_parameters["T_op"]
    p_gas_h2o = 0.0313 * \
        np.exp(5290 *
               (1 / model.base_parameters["T_base"] - 1 / model.base_parameters["T_op"]))
    P_gas = p_gas_h2 + p_gas_ch4 + p_gas_co2 + p_gas_h2o
    q_gas = max(
        0, (model.model_parameters['k_p'] * (P_gas - model.base_parameters['P_atm'])))
    v[25] = model.model_parameters['k_L_a'] * \
        (c[7] - 16 * model.model_parameters['K_H_h2'] * p_gas_h2)
    v[26] = model.model_parameters['k_L_a'] * \
        (c[8] - 64 * model.model_parameters['K_H_ch4'] * p_gas_ch4)
    v[27] = model.model_parameters['k_L_a'] * \
        (c[32] - model.model_parameters['K_H_co2'] * p_gas_co2)
    dCdt = np.matmul(model.s, v)

    if c[model.species.index('S_IN')]<0.01:
        model.nitrogen_limited=True
    else:
        model.nitrogen_limited=False

    phi = c[24]+c[34]-c[31] - (c[30] / 64) - (c[29] / 112) - (c[28] / 160) - (c[27] / 208) - c[25]
    c[26] = (-1 * phi / 2) + (0.5 * np.sqrt(phi**2 + 4 * model.model_parameters['K_w']))

    dCdt[0: 35] = dCdt[0: 35]+model.base_parameters['q_in'] / model.base_parameters["V_liq"] * \
        (model.inlet_conditions[0: 35]-c[0:35].reshape(-1, 1))


    dCdt[35:] = dCdt[35:]+q_gas/model.base_parameters["V_gas"] * (model.inlet_conditions[35:]-c[35:].reshape(-1, 1))
    dCdt[[26, 32, 34], 0] = 0
    if model.switch == "DAE":
        dCdt[7] = 0
        dCdt[27: 32] = 0
        dCdt[33] = 0

    if model.control_state.keys():
        for state in model.control_state.keys():
            c[model.species.index(state)]=model.control_state[state]
            dCdt[model.species.index(state)]=0


    return dCdt[:, 0]

`build_adm1_stoiciometric_matrix(base_parameters, model_parameters, reactons, species, feed, nitrogen_limited=False)`

This function builds the stoichiometric matrix for the ADM1 Model.

Parameters:

Name	Type	Description	Default
`base_parameters`	`dict`	a dictionary containing the base parameters	required
`model_parameters`	`dict`	a dictionary containing the model parameters	required
`reactons`	`list`	a list containing all reactions	required
`species`	`list`	a list containing all species	required
`feed`	`Feed`	a Feed instance which contains the feed information	required
`nitrogen_limited`	`bool`	A boolean which indicates whether the model is nitrogen limited. Defaults to False.	`False`

Returns:

Type	Description
`np.ndarray`	np.ndarray: Returns the stoichiometric matrix of the ADM1 model.

Source code in adtoolbox/adm.py

def build_adm1_stoiciometric_matrix(base_parameters: dict, model_parameters: dict, reactons: list, species:list,feed:Feed,nitrogen_limited:bool=False)-> np.ndarray:
    """This function builds the stoichiometric matrix for the ADM1 Model.
    Args:
        base_parameters (dict): a dictionary containing the base parameters
        model_parameters (dict): a dictionary containing the model parameters
        reactons (list): a list containing all reactions
        species (list): a list containing all species
        feed (Feed): a Feed instance which contains the feed information
        nitrogen_limited (bool, optional): A boolean which indicates whether the model is nitrogen limited. Defaults to False.

    Returns:
        np.ndarray: Returns the stoichiometric matrix of the ADM1 model.
    """

    S = np.zeros((len(species), len(reactons)))
    S[0, [1, 3, 4]] = [1, (1-model_parameters["f_fa_li"]), - 1]
    S[1, [2, 5]] = [1, -1]
    S[2, [3, 6]] = [(model_parameters["f_fa_li"]), - 1]
    Y_aa=0 if nitrogen_limited else model_parameters['Y_aa']
    S[3, [5, 7]] = [(1-Y_aa) *
                    model_parameters['f_va_aa'], - 1]
    Y_su=0 if nitrogen_limited else model_parameters['Y_su']
    S[4, [4, 5, 8]] = [(1-Y_su)*model_parameters['f_bu_su'],
                       (1-Y_aa)*model_parameters["f_bu_aa"], - 1]
    S[5, [4, 5, 7, 9]] = [(1-model_parameters["Y_su"])*model_parameters['f_pro_su'],
                          (1-Y_aa)*model_parameters["f_pro_aa"], (1 - model_parameters['Y_c4'])*0.54, -1]

    Y_fa=0 if nitrogen_limited else model_parameters['Y_fa'] 
    S[6, [4, 5, 6, 7, 8, 9, 10]] = [(1-Y_su)*model_parameters['f_ac_su'],
                                    (1-Y_aa) *
                                    model_parameters['f_ac_aa'],
                                    (1-Y_fa)*0.7,
                                    (1-model_parameters['Y_c4'])*0.31,
                                    (1-model_parameters['Y_c4'])*0.8,
                                    (1-model_parameters['Y_pro'])*0.57,
                                    -1]
    S[7, [4, 5, 6, 7, 8, 9, 11, 25]] = [(1-Y_su)*model_parameters['f_h2_su'],
                                        (1-Y_aa) *
                                        model_parameters['f_h2_aa'],
                                        (1-Y_fa)*0.3,
                                        (1-model_parameters['Y_c4'])*0.15,
                                        (1-model_parameters['Y_c4'])*0.2,
                                        (1-model_parameters['Y_pro'])*0.43,
                                        -1,
                                        -1]
    S[8, [10, 11, 26]] = [(1-model_parameters['Y_ac']),
                          (1-model_parameters['Y_h2']),
                          -1]
    s_1 = (-1 * model_parameters['C_xc'] + model_parameters['f_sI_xc'] * model_parameters['C_sI'] + model_parameters['f_ch_xc'] * model_parameters['C_ch'] +
           model_parameters['f_pr_xc'] * model_parameters['C_pr'] + model_parameters['f_li_xc'] * model_parameters['C_li'] + model_parameters['f_xI_xc'] * model_parameters['C_xI'])
    s_2 = (-1 * model_parameters['C_ch'] + model_parameters['C_su'])
    s_3 = (-1 * model_parameters['C_pr'] + model_parameters['C_aa'])
    s_4 = (-1 * model_parameters['C_li'] + (1 - model_parameters['f_fa_li']) *
           model_parameters['C_su'] + model_parameters['f_fa_li'] * model_parameters['C_fa'])
    s_5 = (-1 * model_parameters['C_su'] + (1 - Y_su) * (model_parameters['f_bu_su'] * model_parameters['C_bu'] + model_parameters['f_pro_su']
                                                                             * model_parameters['C_pro'] + model_parameters['f_ac_su'] * model_parameters['C_ac']) + Y_su * model_parameters['C_bac'])
    s_6 = (-1 * model_parameters['C_aa'] + (1 - Y_aa) * (model_parameters['f_va_aa'] * model_parameters['C_va'] + model_parameters['f_bu_aa'] * model_parameters['C_bu'] +
                                                                             model_parameters['f_pro_aa'] * model_parameters['C_pro'] + model_parameters['f_ac_aa'] * model_parameters['C_ac']) + Y_aa * model_parameters['C_bac'])
    s_7 = (-1 * model_parameters['C_fa'] + (1 - Y_fa) * 0.7 *
           model_parameters['C_ac'] + Y_fa * model_parameters['C_bac'])
    s_8 = (-1 * model_parameters['C_va'] + (1 - model_parameters['Y_c4']) * 0.54 * model_parameters['C_pro'] + (
        1 - model_parameters['Y_c4']) * 0.31 * model_parameters['C_ac'] + model_parameters['Y_c4'] * model_parameters['C_bac'])
    s_9 = (-1 * model_parameters['C_bu'] + (1 - model_parameters['Y_c4']) * 0.8 *
           model_parameters['C_ac'] + model_parameters['Y_c4'] * model_parameters['C_bac'])
    s_10 = (-1 * model_parameters['C_pro'] + (1 - model_parameters['Y_pro']) * 0.57 *
            model_parameters['C_ac'] + model_parameters['Y_pro'] * model_parameters['C_bac'])
    s_11 = (-1 * model_parameters['C_ac'] + (1 - model_parameters['Y_ac']) *
            model_parameters['C_ch4'] + model_parameters['Y_ac'] * model_parameters['C_bac'])
    s_12 = ((1 - model_parameters['Y_h2']) * model_parameters['C_ch4'] +
            model_parameters['Y_h2'] * model_parameters['C_bac'])
    s_13 = (-1 * model_parameters['C_bac'] + model_parameters['C_xc'])
    S[9, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 27]] = [-s_1, -s_2, -s_3, -s_4, -
                                                                                    s_5, -s_6, -s_7, -s_8, -s_9, -s_10, -s_11, -s_12, -s_13, -s_13, -s_13, -s_13, -s_13, -s_13, -s_13, -1]
    S[10, [0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]] = [model_parameters['N_xc']-model_parameters['f_xI_xc']*model_parameters['N_I']-model_parameters['f_sI_xc']*model_parameters['N_I']-model_parameters['f_pr_xc']*model_parameters['N_aa'],
                                                                        -Y_su*model_parameters['N_bac'],
                                                                        model_parameters['N_aa']-Y_aa *
                                                                        model_parameters['N_bac'],
                                                                        -Y_fa*model_parameters['N_bac'],
                                                                        -model_parameters['Y_c4']*model_parameters['N_bac'],
                                                                        -model_parameters['Y_c4']*model_parameters['N_bac'],
                                                                        -model_parameters['Y_pro']*model_parameters['N_bac'],
                                                                        -model_parameters['Y_ac']*model_parameters['N_bac'],
                                                                        -model_parameters['Y_h2']*model_parameters['N_bac'],
                                                                        model_parameters['N_bac'] -
                                                                        model_parameters['N_xc'],
                                                                        model_parameters['N_bac'] -
                                                                        model_parameters['N_xc'],
                                                                        model_parameters['N_bac'] -
                                                                        model_parameters['N_xc'],
                                                                        model_parameters['N_bac'] -
                                                                        model_parameters['N_xc'],
                                                                        model_parameters['N_bac'] -
                                                                        model_parameters['N_xc'],
                                                                        model_parameters['N_bac'] -
                                                                        model_parameters['N_xc'],
                                                                        model_parameters['N_bac']-model_parameters['N_xc']]
    S[11, 0] = model_parameters['f_sI_xc']
    S[12, [0, 12, 13, 14, 15, 16, 17, 18]] = [-1, 1, 1, 1, 1, 1, 1, 1]
    S[13, [0, 1]] = [model_parameters['f_ch_xc'], -1]
    S[14, [0, 2]] = [model_parameters['f_pr_xc'], -1]
    S[15, [0, 3]] = [model_parameters['f_li_xc'], -1]
    S[16, [4, 12]] = [Y_su, -1]
    S[17, [5, 13]] = [Y_aa, -1]
    S[18, [6, 14]] = [Y_fa, -1]
    S[19, [7, 8, 15]] = [model_parameters['Y_c4'], model_parameters['Y_c4'], -1]
    S[20, [9, 16]] = [model_parameters['Y_pro'], -1]
    S[21, [10, 17]] = [model_parameters['Y_ac'], -1]
    S[22, [11, 18]] = [model_parameters['Y_h2'], -1]
    S[23, 0] = model_parameters['f_xI_xc']
    S[24, :] = 0
    S[25, :] = 0
    S[26, :] = 0
    S[27, 19] = -1
    S[28, 20] = -1
    S[29, 21] = -1
    S[30, 22] = -1
    S[31, 23] = -1
    S[32, :] = 0
    S[33, 24] = -1
    S[34, :] = 0
    S[35, 25] = base_parameters['V_liq']/base_parameters['V_gas']
    S[36, 26] = base_parameters['V_liq']/base_parameters['V_gas']
    S[37, 27] = base_parameters['V_liq']/base_parameters['V_gas']
    return S

`build_e_adm_2_stoichiometric_matrix(base_parameters, model_parameters, reactions, species, feed, nitrogen_limited=False)`

This function builds the stoichiometric matrix for e-ADM2 Model.

Model Parameters (dict): a dictionary which contains model parameters
base_parameters (dict): a dictionary which contains base paramters
Initial Conditions (dict): a dictionary containing inlet conditions for all species
Inlet Conditions (dict): a dictionary containing inlet conditions for all species
reactions (list): a list containing all of the reaction names
species (list): a list containing all species

Returns:

Type	Description
`np.ndarray`	np.ndarray: Returns an matrix of stochiometic values.

Source code in adtoolbox/adm.py

def build_e_adm_2_stoichiometric_matrix(base_parameters: dict,
                                             model_parameters: dict,
                                             reactions: list,
                                             species: list,
                                             feed:Feed,
                                             nitrogen_limited:bool=False)->np.ndarray:
    """ 
    This function builds the stoichiometric matrix for e-ADM2 Model.

        Model Parameters (dict): a dictionary which contains model parameters
        base_parameters (dict): a dictionary which contains base paramters
        Initial Conditions (dict): a dictionary containing inlet conditions for all species
        Inlet Conditions (dict): a dictionary containing inlet conditions for all species
        reactions (list): a list containing all of the reaction names
        species (list): a list containing all species

    Returns:
        np.ndarray: Returns an matrix of stochiometic values.
    """
    S = np.zeros((len(species), len(reactions)))
    S[list(map(species.index, ["TSS", "X_ch", "X_pr", "X_li", "X_I"])),
      reactions.index('TSS_Disintegration')] = [-1,feed.ch_tss, feed.prot_tss, feed.lip_tss, feed.xi_tss]
    S[list(map(species.index, ["TDS", "X_ch", "X_pr", "X_li", "S_I"])), reactions.index('TDS_Disintegration')] = [-1,
                                                                                                                  feed.ch_tds, feed.prot_tds, feed.lip_tds, feed.si_tds]
    S[list(map(species.index, ["X_ch", "S_su"])),
      reactions.index('Hydrolysis carbohydrates')] = [-1, 1]
    S[list(map(species.index, ["X_pr", "S_aa"])),
      reactions.index('Hydrolysis proteins')] = [-1, 1]
    S[list(map(species.index, ["X_li", "S_fa"])),
      reactions.index('Hydrolysis lipids')] = [-1, 1]

    Y_su=0 if nitrogen_limited else model_parameters['Y_su']
    f_ac_su=1-model_parameters['f_pro_su']-model_parameters['f_et_su']-model_parameters['f_lac_su']
    f_IC_su = -(-model_parameters['C_su'] +
                (1-Y_su)*model_parameters['f_pro_su']*model_parameters['C_pro'] +
                (1-Y_su)*model_parameters['f_et_su']*model_parameters['C_et'] +
                (1-Y_su)*model_parameters['f_lac_su']*model_parameters['C_lac'] +
                (1-Y_su)*f_ac_su*model_parameters['C_ac'] +
                Y_su*model_parameters['C_bac'])


    S[list(map(species.index, ["S_su", "S_pro", "S_et", "S_lac", "S_ac", "S_IN", "S_IC", "X_su"])),
      reactions.index('Uptake of sugars')] = [-1,
                                              (1-Y_su) * model_parameters['f_pro_su'],
                                              (1-Y_su) * model_parameters['f_et_su'],
                                              (1-Y_su) * model_parameters['f_lac_su'],
                                              (1-Y_su) * f_ac_su,
                                              -model_parameters['N_bac']*Y_su,
                                              f_IC_su,
                                              Y_su]

    Y_aa=0 if nitrogen_limited else model_parameters['Y_aa']
    f_ac_aa=1-model_parameters['f_pro_aa']-model_parameters['f_et_aa']-model_parameters['f_lac_aa']
    f_IC_aa = -(-model_parameters['C_aa'] +
                (1-Y_aa)*model_parameters['f_pro_aa']*model_parameters['C_pro'] +
                (1-Y_aa)*model_parameters['f_et_aa']*model_parameters['C_et'] +
                (1-Y_aa)*model_parameters['f_lac_aa']*model_parameters['C_lac'] +
                (1-Y_aa)*f_ac_aa*model_parameters['C_ac'] +
                Y_aa*model_parameters['C_bac'])



    S[list(map(species.index, ["S_aa", "S_pro", "S_et", "S_lac", "S_ac", "S_IN", "S_IC", "X_aa"])),
      reactions.index('Uptake of amino acids')] = [-1,
                                                   (1-Y_aa) * model_parameters['f_pro_aa'],
                                                   (1-Y_aa) * model_parameters['f_et_aa'],
                                                   (1-Y_aa) * model_parameters['f_lac_aa'],
                                                   (1-Y_aa) * f_ac_aa,
                                                   model_parameters['N_aa']-Y_aa * model_parameters['N_bac'],
                                                   f_IC_aa,
                                                   Y_aa]

    Y_fa=0 if nitrogen_limited else model_parameters['Y_fa']
    f_ac_fa=1-model_parameters['f_pro_fa']-model_parameters['f_et_fa']-model_parameters['f_lac_fa']
    f_IC_fa = -(-model_parameters['C_fa']+
                (1-Y_fa)*model_parameters['f_pro_fa']*model_parameters['C_pro'] +
                (1-Y_fa)*model_parameters['f_et_fa']*model_parameters['C_et'] +
                (1-Y_fa)*model_parameters['f_lac_fa']*model_parameters['C_lac'] +
                (1-Y_fa)*f_ac_fa*model_parameters['C_ac'] +
                Y_fa*model_parameters['C_bac'])
    # if f_IC_fa<0:
    #     raise ValueError("f_IC_fa is negative") 

    S[list(map(species.index, ["S_fa", "S_pro", "S_et", "S_lac", "S_ac", "S_IN", "S_IC", "X_fa"])),
      reactions.index('Uptake of LCFA')] = [-1,
                                            (1-Y_fa) * model_parameters['f_pro_fa'],
                                            (1-Y_fa) * model_parameters['f_et_fa'],
                                            (1-Y_fa) * model_parameters['f_lac_fa'],
                                            (1-Y_fa) * f_ac_fa,
                                            -Y_fa * model_parameters['N_bac'],
                                            f_IC_fa,
                                            Y_fa]
    if any([f_ac_fa<0,f_ac_aa<0,f_ac_su<0]):
        raise ValueError("f_ac is negative")
    Y_ac_et=0 if nitrogen_limited else model_parameters['Y_ac_et']
    Y_ac_lac=0 if nitrogen_limited else model_parameters['Y_ac_lac']
    f_IC_ac_et = -(-model_parameters['C_ac'] +
                    model_parameters['f_et_ac']*model_parameters['C_et'] +
                   (1-model_parameters['f_et_ac']-Y_ac_et) * model_parameters['f_bu_ac']*model_parameters['C_bu'] +
                   Y_ac_et*model_parameters['C_bac'])

    f_IC_ac_lac = -(-model_parameters['C_ac'] +
                    model_parameters['f_lac_ac']*model_parameters['C_lac'] +
                    (1-model_parameters['f_lac_ac']-Y_ac_lac) * model_parameters['f_bu_ac']*model_parameters['C_bu'] +
                    Y_ac_lac*model_parameters['C_bac'])


    S[list(map(species.index, ["S_ac", "S_et", "S_bu", "S_IN", "S_IC", "S_h2", "X_ac_et"])),
      reactions.index('Uptake of acetate_et')] = [-1,
                                                  model_parameters['f_et_ac'],
                                                  (1- model_parameters['f_et_ac']-model_parameters['Y_ac']) * model_parameters['f_bu_ac'],
                                                  -Y_ac_et * model_parameters['N_bac'],
                                                  f_IC_ac_et,
                                                  (1- model_parameters['f_et_ac']-Y_ac_et) * (1-model_parameters['f_bu_ac']),
                                                  Y_ac_et]

    S[list(map(species.index, ["S_ac", "S_lac", "S_bu", "S_IN", "S_IC", "S_h2", "X_ac_lac"])),
        reactions.index('Uptake of acetate_lac')] = [-1,
                                                    model_parameters['f_lac_ac'],
                                                     (1-model_parameters['f_lac_ac']-Y_ac_lac) * model_parameters['f_bu_ac'],
                                                     -Y_ac_lac * model_parameters['N_bac'],
                                                     f_IC_ac_lac,
                                                     (1-model_parameters['f_lac_ac']-Y_ac_lac) * (1-model_parameters['f_bu_ac']),
                                                     Y_ac_lac]

    Y_pro_et=0 if nitrogen_limited else model_parameters['Y_pro_et']
    Y_pro_lac=0 if nitrogen_limited else model_parameters['Y_pro_et']

    f_IC_pro_et = -(-model_parameters['C_pro'] +
                    model_parameters['f_et_pro']*model_parameters['C_et'] +
                    (1-model_parameters['f_et_pro']-Y_pro_et)*model_parameters['f_va_pro']*model_parameters['C_va'] +
                    (Y_pro_et)*model_parameters['C_bac'])

    f_IC_pro_lac = -(-model_parameters['C_pro'] +
                     model_parameters['f_lac_pro']*model_parameters['C_lac'] +
                     (1-model_parameters['f_lac_pro']-Y_pro_lac)*model_parameters['f_va_pro']*model_parameters['C_va'] +
                     (Y_pro_lac)*model_parameters['C_bac'])




    S[list(map(species.index, ["S_pro", "S_et", "S_va", "S_IN", "S_IC", "S_h2", "X_chain_et"])),
      reactions.index('Uptake of propionate_et')] = [-1,
                                                    model_parameters['f_et_pro'],
                                                     (1-model_parameters['f_et_pro']-Y_pro_et) * model_parameters['f_va_pro'],
                                                     -Y_pro_et *  model_parameters['N_bac'],
                                                     f_IC_pro_et,
                                                     (1-model_parameters['f_et_pro']-Y_pro_et) * (1-model_parameters['f_va_pro']),
                                                     model_parameters['Y_chain_et_pro']]

    S[list(map(species.index, ["S_pro", "S_lac", "S_va", "S_IN", "S_IC", "S_h2", "X_chain_lac"])),
        reactions.index('Uptake of propionate_lac')] = [-1,
                                                        model_parameters['f_lac_pro'],
                                                        (1-model_parameters['f_lac_pro']-Y_pro_lac) * model_parameters['f_va_pro'],
                                                        -Y_pro_lac * model_parameters['N_bac'],
                                                        f_IC_pro_lac,
                                                        (1-model_parameters['f_lac_pro']-Y_pro_lac) * (1-model_parameters['f_va_pro']),
                                                        model_parameters['Y_chain_lac_pro']]

    Y_bu_et=0 if nitrogen_limited else model_parameters['Y_bu_et']
    Y_bu_lac=0 if nitrogen_limited else model_parameters['Y_bu_lac']
    f_IC_bu_et = -(-model_parameters['C_bu'] +
                    model_parameters['f_et_bu']*model_parameters['C_et'] +
                   (1-model_parameters['f_et_bu']-Y_bu_et)*model_parameters['f_cap_bu']*model_parameters['C_cap'] +
                   (Y_bu_et)*model_parameters['C_bac'])

    f_IC_bu_lac = -(-model_parameters['C_bu'] +
                    model_parameters['f_lac_bu']*model_parameters['C_lac'] +
                    (1-model_parameters['f_lac_bu']-Y_bu_lac)*model_parameters['f_cap_bu']*model_parameters['C_cap'] +
                    (Y_bu_lac)*model_parameters['C_bac'])


    S[list(map(species.index, ["S_bu", "S_et", "S_cap", "S_IN", "S_IC", "S_h2", "X_chain_et"])),
        reactions.index('Uptake of butyrate_et')] = [-1,
                                                     model_parameters['f_et_bu'],
                                                     (1-model_parameters['f_et_bu']-Y_bu_et) * model_parameters['f_cap_bu'],
                                                     -Y_bu_et * model_parameters['N_bac'],
                                                     f_IC_bu_et,
                                                     (1-model_parameters['f_et_bu']-Y_bu_et)*(1-model_parameters['f_cap_bu']),
                                                     Y_bu_et]

    S[list(map(species.index, ["S_bu", "S_lac", "S_cap", "S_IN", "S_IC", "S_h2", "X_chain_lac"])),
        reactions.index('Uptake of butyrate_lac')] = [-1,
                                                      model_parameters['f_lac_bu'],
                                                      (1- model_parameters['f_lac_bu']-Y_bu_lac) * model_parameters['f_cap_bu'],
                                                      -Y_bu_lac *model_parameters['N_bac'],
                                                      f_IC_bu_lac,
                                                      (1- model_parameters['f_lac_bu']-Y_bu_lac)*(1-model_parameters['f_cap_bu']),
                                                      Y_bu_lac]


    Y_va=0 if nitrogen_limited else model_parameters['Y_va']

    S[list(map(species.index, ["S_va", "S_pro", "X_VFA_deg"])),
        reactions.index('Uptake of valerate')] = [-1,
                                                  (1-Y_va),
                                                  Y_va,
                                                  ]

    Y_cap=0 if nitrogen_limited else model_parameters['Y_cap']
    S[list(map(species.index, ["S_cap", "S_ac", "X_VFA_deg"])),
        reactions.index('Uptake of caproate')] = [-1,
                                                  (1 - Y_cap),
                                                  Y_cap]

    Y_cap=0 if nitrogen_limited else model_parameters['Y_bu']
    S[list(map(species.index, ["S_bu", "S_ac", "X_VFA_deg"])),
        reactions.index('Uptake of butyrate')] = [-1,
                                                  (1 - Y_cap),
                                                  Y_cap]



    Y_Me_ac=0 if nitrogen_limited else model_parameters["Y_Me_ac"]
    f_IC_Me_ach2 =0
    S[list(map(species.index, ["S_gas_h2", "S_ac", "S_ch4", "X_Me_ac", 'S_IC', 'S_IN'])),
        reactions.index('Methanogenessis from acetate and h2')] = [-1,
                                                                   model_parameters['f_ac_h2'],
                                                                   (1 +model_parameters['f_ac_h2']- Y_Me_ac),
                                                                   Y_Me_ac,
                                                                   f_IC_Me_ach2,
                                                                    -Y_Me_ac *model_parameters['N_bac']
                                                                   ]

    Y_Me_CO2=0 if nitrogen_limited else model_parameters["Y_Me_CO2"]


    S[list(map(species.index, ["S_gas_h2", "S_gas_ch4", "X_Me_CO2", 'S_gas_co2',"S_IN"])),
        reactions.index('Methanogenessis from CO2 and h2')] = [-1,
                                                               (1 -model_parameters['f_co2_ch4']- Y_Me_CO2),
                                                               (Y_Me_CO2),
                                                               model_parameters['f_co2_ch4'],
                                                                -Y_Me_CO2 *model_parameters['N_bac']
                                                                ]



    Y_ac_et_ox=0 if nitrogen_limited else model_parameters["Y_ac_et_ox"]
    f_IC_et_ox=-(-model_parameters['C_et'] +
                    (1-Y_ac_et_ox)*model_parameters['C_bac']
                    +Y_ac_et_ox*model_parameters['C_ac'])

    S[list(map(species.index, ["S_et", "X_et","S_ac","S_IC"])),
        reactions.index('Uptake of ethanol')] = [-1,1-Y_ac_et_ox,Y_ac_et_ox,f_IC_et_ox]


    Y_pro_lac_ox=0 if nitrogen_limited else model_parameters['Y_pro_lac_ox']
    f_IC_lac_ox=-(-model_parameters['C_lac'] +
                (1-Y_pro_lac_ox)*model_parameters['C_bac']
                +Y_pro_lac_ox*model_parameters['C_pro'])

    S[list(map(species.index, ["S_lac" ,"S_pro","X_lac","S_IC"])),
        reactions.index('Uptake of lactate')] = [-1, 1-Y_pro_lac_ox,Y_pro_lac_ox,f_IC_lac_ox]

    S[list(map(species.index, ["X_su", "TSS","S_IN","S_IC"])),
        reactions.index('Decay of Xsu')] = [-1, 1,model_parameters['N_bac'],model_parameters['C_bac']]

    S[list(map(species.index, ["X_aa", "TSS","S_IN","S_IC"])),
        reactions.index('Decay of Xaa')] = [-1, 1,model_parameters['N_bac'],model_parameters['C_bac']]

    S[list(map(species.index, ["X_fa", "TSS","S_IN","S_IC"])),
        reactions.index('Decay of Xfa')] = [-1, 1,model_parameters['N_bac'],model_parameters['C_bac']]

    S[list(map(species.index, ["X_ac_et", "TSS","S_IN","S_IC"])),
        reactions.index('Decay of X_ac_et')] = [-1, 1,model_parameters['N_bac'],model_parameters['C_bac']]

    S[list(map(species.index, ["X_ac_lac", "TSS","S_IN","S_IC"])),
        reactions.index('Decay of X_ac_lac')] = [-1, 1,model_parameters['N_bac'],model_parameters['C_bac']]

    S[list(map(species.index, ["X_chain_et", "TSS", "S_IN","S_IC"])),
        reactions.index('Decay of X_chain_et')] = [-1, 1,model_parameters['N_bac'],model_parameters['C_bac']]

    S[list(map(species.index, ["X_chain_lac", "TSS", "S_IN","S_IC"])),
        reactions.index('Decay of X_chain_lac')] = [-1, 1,model_parameters['N_bac'],model_parameters['C_bac']]

    S[list(map(species.index, ["X_VFA_deg", "TSS", "S_IN","S_IC"])),
        reactions.index('Decay of X_VFA_deg')] = [-1, 1,model_parameters['N_bac'],model_parameters['C_bac']]

    S[list(map(species.index, ["X_Me_ac", "TSS", "S_IN","S_IC"])),
        reactions.index('Decay of X_Me_ac')] = [-1, 1,model_parameters['N_bac'],model_parameters['C_bac']]

    S[list(map(species.index, ["X_Me_CO2", "TSS", "S_IN","S_IC"])),
        reactions.index('Decay of X_Me_CO2')] = [-1, 1,model_parameters['N_bac'],model_parameters['C_bac']]

    S[list(map(species.index, ["S_va_ion"])),
        reactions.index('Acid Base Equilibrium (Va)')] = [-1]

    S[list(map(species.index, ["S_bu_ion"])),
        reactions.index('Acid Base Equilibrium (Bu)')] = [-1]

    S[list(map(species.index, ["S_pro_ion"])),
        reactions.index('Acid Base Equilibrium (Pro)')] = [-1]

    S[list(map(species.index, ["S_cap_ion"])),
        reactions.index('Acid Base Equilibrium (Cap)')] = [-1]

    S[list(map(species.index, ["S_lac_ion"])),
        reactions.index('Acid Base Equilibrium (Lac)')] = [-1]

    S[list(map(species.index, ["S_ac_ion"])),
        reactions.index('Acid Base Equilibrium (Ac)')] = [-1]

    S[list(map(species.index, ["S_hco3_ion"])),  # I don't think this is right، should look at the reaction in ADM1
        reactions.index('Acid Base Equilibrium (CO2)')] = [-1]

    S[list(map(species.index, ["S_nh3", "S_nh4_ion"])),
        reactions.index('Acid Base Equilibrium (In)')] = [-1, 1]  # I don't think this is right، should look at the reaction in ADM1

    S[list(map(species.index, ["S_h2", "S_gas_h2"])),
        reactions.index('Gas Transfer H2')] = [-base_parameters['V_liq']/base_parameters['V_gas'], 1]
    S[list(map(species.index, ["S_ch4", "S_gas_ch4"])),
        reactions.index('Gas Transfer CH4')] = [-base_parameters['V_liq']/base_parameters['V_gas'], 1]
    S[list(map(species.index, ["S_co2", "S_gas_co2"])),
        reactions.index('Gas Transfer CO2')] = [-base_parameters['V_liq']/base_parameters['V_gas'], 1]

    return S

`build_e_adm_stoiciometric_matrix(base_parameters, model_parameters, reactions, species, feed, nitrogen_limited=False)`

This function builds the stoichiometric matrix for the e_ADM Model.

Model Parameters (dict): a dictionary which contains model parameters
base_parameters (dict): a dictionary which contains base paramters
Initial Conditions (dict): a dictionary containing inlet conditions for all species
Inlet Conditions (dict): a dictionary containing inlet conditions for all species
reactions (list): a list containing all of the reaction names
species (list): a list containing all species

Returns:

Type	Description
`np.ndarray`	np.ndarray: Returns an matrix of stochiometic values.

Source code in adtoolbox/adm.py

def build_e_adm_stoiciometric_matrix(base_parameters: dict,
                                     model_parameters: dict,
                                     reactions: list,
                                     species: list,
                                     feed:Feed,
                                     nitrogen_limited:bool=False)->np.ndarray:
    """ 
    This function builds the stoichiometric matrix for the e_ADM Model.

        Model Parameters (dict): a dictionary which contains model parameters
        base_parameters (dict): a dictionary which contains base paramters
        Initial Conditions (dict): a dictionary containing inlet conditions for all species
        Inlet Conditions (dict): a dictionary containing inlet conditions for all species
        reactions (list): a list containing all of the reaction names
        species (list): a list containing all species

    Returns:
        np.ndarray: Returns an matrix of stochiometic values.
    """
    S = np.zeros((len(species), len(reactions)))
    S[list(map(species.index, ["TSS", "X_ch", "X_pr", "X_li", "X_I"])),
      reactions.index('TSS_Disintegration')] = [-1,feed.ch_tss, feed.prot_tss, feed.lip_tss,feed.xi_tss]
    S[list(map(species.index, ["TDS", "X_ch", "X_pr", "X_li", "S_I"])), reactions.index('TDS_Disintegration')] = [-1,
                                                                                                            feed.ch_tds, feed.prot_tds, feed.lip_tds, feed.si_tds]
    S[list(map(species.index, ["X_ch", "S_su"])),reactions.index('Hydrolysis carbohydrates')] = [-1, 1]
    S[list(map(species.index, ["X_pr", "S_aa"])),reactions.index('Hydrolysis proteins')] = [-1, 1]
    S[list(map(species.index, ["X_li", "S_fa"])),reactions.index('Hydrolysis lipids')] = [-1, 1]

    f_IC_su_et=-(-model_parameters['C_su']+
               (1-model_parameters['Y_su_et']) * model_parameters['C_et']+
               (1-model_parameters['Y_su_et']) * model_parameters['C_bac']
              )

    f_IC_su_lac=-(-model_parameters['C_su']+
              (1-model_parameters['Y_su_lac']) * model_parameters['C_lac']+
              (1-model_parameters['Y_su_lac']) * model_parameters['C_bac']
              )

    f_IC_su_ac=-(-model_parameters['C_su']+
               (1-model_parameters['Y_su_ac']) * model_parameters['C_ac']+
               (1-model_parameters['Y_su_ac']) * model_parameters['C_bac']
              )
    f_IC_su_pro=-(-model_parameters['C_su']+
                (1-model_parameters['Y_su_pro']) * model_parameters['C_pro']+
                (1-model_parameters['Y_su_pro']) * model_parameters['C_bac']
              )

    S[list(map(species.index, ["S_su","S_et","S_IN","S_IC","X_su"])),
     reactions.index('Su_to_et')] = [-1,
                                         (1-model_parameters['Y_su_et']),
                                           -model_parameters['N_bac']* model_parameters['Y_su_et'],
                                            f_IC_su_et,
                                            model_parameters['Y_su_et']]

    S[list(map(species.index, ["S_su","S_lac","S_IN","S_IC","X_su"])),
     reactions.index('Su_to_lac')] = [-1,
                                                (1-model_parameters['Y_su_lac']),
                                                -model_parameters['N_bac']* model_parameters['Y_su_lac'],
                                                f_IC_su_lac,
                                                model_parameters['Y_su_lac']]

    S[list(map(species.index, ["S_su","S_ac","S_IN","S_IC","X_su"])),
      reactions.index('Su_to_ac')] = [-1,
                                    (1-model_parameters['Y_su_ac']),
                                    -model_parameters['N_bac']* model_parameters['Y_su_ac'],
                                    f_IC_su_ac,
                                    model_parameters['Y_su_ac']]

    S[list(map(species.index, ["S_su","S_pro","S_IN","S_IC","X_su"])),
            reactions.index('Su_to_pro')] = [-1,
                                            (1-model_parameters['Y_su_pro']),
                                              -model_parameters['N_bac']* model_parameters['Y_su_pro'],
                                              f_IC_su_pro,
                                              model_parameters['Y_su_pro']]

    f_IC_aa_lac=-(-model_parameters['C_aa']+
              (1-model_parameters['Y_aa_lac']) * model_parameters['C_lac']+
              (1-model_parameters['Y_aa_lac']) * model_parameters['C_bac']
              )

    f_IC_aa_ac=-(-model_parameters['C_aa']+
              (1-model_parameters['Y_aa_ac']) * model_parameters['C_ac']+
              (1-model_parameters['Y_aa_ac']) * model_parameters['C_bac']
              )

    f_IC_aa_pro=-(-model_parameters['C_aa']+
              (1-model_parameters['Y_aa_pro']) * model_parameters['C_pro']+
              (1-model_parameters['Y_aa_pro']) * model_parameters['C_bac']
              )


    S[list(map(species.index, ["S_aa","S_lac","S_IN", "S_IC", "X_aa"])),
      reactions.index('aas_to_lac')] = [-1,
                                             (1-model_parameters['Y_aa_lac']),
                                             model_parameters['N_aa']- model_parameters['Y_aa_lac'] * model_parameters['N_bac'],
                                             f_IC_aa_lac,
                                             model_parameters['Y_aa_lac']]

    S[list(map(species.index, ["S_aa","S_pro","S_IN", "S_IC", "X_aa"])),
      reactions.index('aas_to_pro')] = [-1,
                                             (1-model_parameters['Y_aa_pro']),
                                             model_parameters['N_aa']- model_parameters['Y_aa_pro'] * model_parameters['N_bac'],
                                             f_IC_aa_pro,
                                             model_parameters['Y_aa_pro']]


    S[list(map(species.index, ["S_aa","S_ac","S_IN", "S_IC", "X_aa"])),
      reactions.index('aas_to_ac')] = [-1,
                                             (1-model_parameters['Y_aa_ac']),
                                             model_parameters['N_aa']- model_parameters['Y_aa_ac'] * model_parameters['N_bac'],
                                             f_IC_aa_ac,
                                             model_parameters['Y_aa_ac']]

    Y_fa=0 if nitrogen_limited else model_parameters['Y_fa']
    f_IC_fa = -(-model_parameters['C_fa'] +
                (1-Y_fa)*model_parameters['f_pro_fa']*model_parameters['C_pro'] +
                (1-Y_fa)*model_parameters['f_ac_fa']*model_parameters['C_ac'] +
                (1-Y_fa)*model_parameters['C_bac'])

    S[list(map(species.index, ["S_fa", "S_pro", "S_ac", "S_IN", "S_IC", "X_fa"])),
      reactions.index('Uptake of LCFA')] = [-1,
                                            (1-Y_fa) * model_parameters['f_pro_fa'],
                                            (1-Y_fa) * model_parameters['f_ac_fa'],
                                              -Y_fa * model_parameters['N_bac'],
                                              f_IC_fa,
                                              Y_fa]
#HERE
    Y_ac_et=0 if nitrogen_limited else model_parameters['Y_ac_et']
    Y_ac_lac=0 if nitrogen_limited else model_parameters['Y_ac_lac']
    f_IC_ac_et = -((-1-(1-Y_ac_et) * model_parameters['f_et_ac'])*model_parameters['C_ac'] +
                   (1-Y_ac_et)* model_parameters['f_et_ac']*model_parameters['C_et'] +
                   (1-Y_ac_et) * model_parameters['f_bu_ac']*model_parameters['C_bu'] +
                   (1-Y_ac_et)* model_parameters['C_bac'])  

    f_IC_ac_lac = -((-1-(1-Y_ac_lac) * model_parameters['f_lac_ac'])*model_parameters['C_ac'] +
                    (1-Y_ac_lac)* model_parameters['f_lac_ac']* model_parameters['C_lac'] +
                    (1-Y_ac_lac)* model_parameters['f_bu_ac']* model_parameters['C_bu'] +
                    (1-Y_ac_lac)* model_parameters['C_bac'])

    S[list(map(species.index, ["S_ac", "S_et", "S_bu", "S_IN", "S_IC", "S_h2", "X_ac_et"])),
      reactions.index('Uptake of acetate_et')] = [-1-(1-Y_ac_et) * model_parameters['f_et_ac'],
                                                  (1-Y_ac_et) * model_parameters['f_et_ac'],
                                                  (1-model_parameters['Y_ac']) * model_parameters['f_bu_ac'],
                                                  -Y_ac_et * model_parameters['N_bac'],
                                                  f_IC_ac_et,
                                                  (1-Y_ac_et) * (1-model_parameters['f_bu_ac']),
                                                  Y_ac_et]

    S[list(map(species.index, ["S_ac", "S_lac", "S_bu", "S_IN", "S_IC", "S_h2", "X_ac_lac"])),
        reactions.index('Uptake of acetate_lac')] = [-1-(1-Y_ac_lac) * model_parameters['f_lac_ac'],
                                                     (1-Y_ac_lac) * model_parameters['f_lac_ac'],
                                                     (1-Y_ac_lac) * model_parameters['f_bu_ac'],
                                                     -Y_ac_lac * model_parameters['N_bac'], 
                                                     f_IC_ac_lac,
                                                     (1-Y_ac_lac) * (1-model_parameters['f_bu_ac']),
                                                     Y_ac_lac]

    Y_pro_et=0 if nitrogen_limited else model_parameters['Y_pro_et']
    Y_pro_lac=0 if nitrogen_limited else model_parameters['Y_pro_lac']

    f_IC_pro_et = -((-1-(1-Y_pro_et) * model_parameters['f_et_pro'])*model_parameters['C_pro'] +
                    (1-Y_pro_et)*model_parameters['f_et_pro']*model_parameters['C_et'] +
                    (1-Y_pro_et)*model_parameters['f_va_pro']*model_parameters['C_va'] +
                    (1-Y_pro_et)*model_parameters['C_bac'])

    f_IC_pro_lac = -((-1-(1-Y_pro_lac) * model_parameters['f_lac_pro'])*model_parameters['C_pro'] +
                     (1-Y_pro_lac)*model_parameters['f_lac_pro']*model_parameters['C_lac'] +
                     (1-Y_pro_lac)*model_parameters['f_va_pro']*model_parameters['C_va'] +
                     (1-Y_pro_lac)*model_parameters['C_bac'])

    S[list(map(species.index, ["S_pro", "S_et", "S_va","S_IC","S_IN","S_h2", "X_chain_et"])),
      reactions.index('Uptake of propionate_et')] = [-1-(1-model_parameters['Y_chain_et_pro']) * model_parameters['f_et_pro'],
                                                     (1-model_parameters['Y_chain_et_pro']) * model_parameters['f_et_pro'],
                                                     (1-model_parameters['Y_chain_et_pro']) * model_parameters['f_va_pro'],
                                                     f_IC_pro_et,
                                                     -model_parameters['Y_chain_et_pro'] * model_parameters['N_bac'],
                                                     (1-model_parameters['Y_chain_et_pro']) * (1-model_parameters['f_va_pro']),
                                                     model_parameters['Y_chain_et_pro']]

    S[list(map(species.index, ["S_pro", "S_lac", "S_va", "S_IC", "S_IN", "S_h2", "X_chain_lac"])),
        reactions.index('Uptake of propionate_lac')] = [-1-(1-model_parameters['Y_chain_lac_pro']) * model_parameters['f_lac_pro'],
                                                        (1-model_parameters['Y_chain_lac_pro']) * model_parameters['f_lac_pro'],
                                                        (1-model_parameters['Y_chain_lac_pro']) * model_parameters['f_va_pro'],
                                                        f_IC_pro_lac,
                                                        -model_parameters['Y_chain_lac_pro'] * model_parameters['N_bac'],
                                                        (1-model_parameters['Y_chain_lac_pro']) * (1-model_parameters['f_va_pro']),
                                                        model_parameters['Y_chain_lac_pro']]

    Y_bu_et=0 if nitrogen_limited else model_parameters['Y_bu_et']
    Y_pro_lac=0 if nitrogen_limited else model_parameters['Y_pro_lac']

    f_IC_bu_et = -((-1-(1-Y_bu_et) * model_parameters['f_et_bu'])*model_parameters['C_bu'] +
                   (1-Y_bu_et)*model_parameters['f_et_bu']*model_parameters['C_et'] +
                   (1-Y_bu_et)*model_parameters['f_cap_bu']*model_parameters['C_cap'] +
                   (1-Y_bu_et)*model_parameters['C_bac'])

    f_IC_bu_lac = -((-1-(1-Y_pro_lac) * model_parameters['f_lac_bu'])*model_parameters['C_bu'] +
                    (1-Y_pro_lac)*model_parameters['f_lac_bu']*model_parameters['C_lac'] +
                    (1-Y_pro_lac)*model_parameters['f_cap_bu']*model_parameters['C_cap'] +
                    (1-Y_pro_lac)*model_parameters['C_bac'])

    S[list(map(species.index, ["S_bu", "S_et", "S_cap", "S_IC", "S_IN", "S_h2", "X_chain_et"])),
        reactions.index('Uptake of butyrate_et')] = [-1-(1-Y_bu_et) * model_parameters['f_et_bu'],
                                                     (1-Y_bu_et) * model_parameters['f_et_bu'],
                                                     (1-Y_bu_et) * model_parameters['f_cap_bu'],
                                                     f_IC_bu_et,
                                                     -Y_bu_et * model_parameters['N_bac'],
                                                     (1-Y_bu_et)*(1-model_parameters['f_cap_bu']),
                                                     Y_bu_et]

    S[list(map(species.index, ["S_bu", "S_lac", "S_cap", "S_IC", "S_IN", "S_h2", "X_chain_lac"])),
        reactions.index('Uptake of butyrate_lac')] = [-1-(1-Y_pro_lac) * model_parameters['f_lac_bu'],
                                                      (1-Y_pro_lac) * model_parameters['f_lac_bu'],
                                                      (1-Y_pro_lac) * model_parameters['f_cap_bu'],
                                                      f_IC_bu_lac,
                                                      -Y_pro_lac * model_parameters['N_bac'],
                                                      (1-Y_pro_lac)*(1-model_parameters['f_cap_bu']),
                                                      Y_pro_lac]

    Y_va=0 if nitrogen_limited else model_parameters['Y_va']
    Y_cap=0 if nitrogen_limited else model_parameters['Y_cap']
    S[list(map(species.index, ["S_va", "S_pro", "X_VFA_deg"])),
        reactions.index('Uptake of valerate')] = [-1,
                                                  (1-Y_va),
                                                  Y_va]

    S[list(map(species.index, ["S_cap", "S_ac", "X_VFA_deg"])),
        reactions.index('Uptake of caproate')] = [-1,
                                                  (1 - Y_cap),
                                                  Y_cap]

    S[list(map(species.index, ["S_bu", "S_ac", "X_VFA_deg"])),
        reactions.index('Uptake of butyrate')] = [-1,
                                                  (1 - model_parameters['Y_bu']),
                                                  model_parameters['Y_bu']]

    Y_Me_ac=0 if nitrogen_limited else model_parameters["Y_Me_ac"]
    f_IC_Me_ach2 =0
    f_IC_Me_ach2 = -((1 - model_parameters['Y_h2_ac'])*model_parameters['f_ac_h2']*model_parameters['C_ac']+
                     (1 -Y_Me_ac)*model_parameters['C_ch4']+
                     Y_Me_ac*model_parameters['C_bac'])



    S[list(map(species.index, ["S_h2", "S_ac", "S_ch4", "X_Me_ac", 'S_IC'])),
        reactions.index('Methanogenessis from acetate and h2')] = [-1-(1 - model_parameters['Y_h2_ac'])*model_parameters['f_ac_h2'],
                    (1 - model_parameters['Y_h2_ac'])*model_parameters['f_ac_h2'],
                    (1 -model_parameters['Y_h2_ac']),
                    model_parameters['Y_h2_ac'],
                    f_IC_Me_ach2]

    f_IC_Me_CO2h2 = -(model_parameters['Y_h2_CO2']*model_parameters['C_ch4'] +
                      model_parameters['Y_h2_CO2']*model_parameters['C_bac'])

    S[list(map(species.index, ["S_h2", "S_ch4", "X_Me_CO2", 'S_IC'])),
        reactions.index('Methanogenessis from CO2 and h2')] = [-1,
                                                               (1 - model_parameters['Y_h2_CO2']),
                                                               (model_parameters['Y_h2_CO2']),
                                                               f_IC_Me_CO2h2]

    Y_ac_et_ox=0 if nitrogen_limited else model_parameters["Y_ac_et_ox"]

    f_IC_et_ox=-(-model_parameters['C_et'] +
                    (1-Y_ac_et_ox)*model_parameters['C_bac']
                    +Y_ac_et_ox*model_parameters['C_ac'])

    S[list(map(species.index, ["S_et", "X_et","S_ac","S_IC"])),
        reactions.index('Uptake of ethanol')] = [-1,Y_ac_et_ox,(1-Y_ac_et_ox),f_IC_et_ox]


    Y_pro_lac_ox=0 if nitrogen_limited else model_parameters['Y_pro_lac_ox']
    f_IC_lac_ox=-(-model_parameters['C_lac'] +
                (1-Y_pro_lac_ox)*model_parameters['C_bac']
                +Y_pro_lac_ox*model_parameters['C_pro'])

    S[list(map(species.index, ["S_lac", "X_lac","S_pro","S_IC"])),
        reactions.index('Uptake of lactate')] = [-1, Y_pro_lac_ox,(1-Y_pro_lac_ox),f_IC_lac_ox]

    S[list(map(species.index, ["X_su", "TSS"])),
        reactions.index('Decay of Xsu')] = [-1, 1]

    S[list(map(species.index, ["X_aa", "TSS"])),
        reactions.index('Decay of Xaa')] = [-1, 1]

    S[list(map(species.index, ["X_fa", "TSS"])),
        reactions.index('Decay of Xfa')] = [-1, 1]

    S[list(map(species.index, ["X_ac_et", "TSS"])),
        reactions.index('Decay of X_ac_et')] = [-1, 1]

    S[list(map(species.index, ["X_ac_lac", "TSS"])),
        reactions.index('Decay of X_ac_lac')] = [-1, 1]

    S[list(map(species.index, ["X_chain_et", "TSS"])),
        reactions.index('Decay of X_chain_et')] = [-1, 1]

    S[list(map(species.index, ["X_chain_lac", "TSS"])),
        reactions.index('Decay of X_chain_lac')] = [-1, 1]

    S[list(map(species.index, ["X_VFA_deg", "TSS"])),
        reactions.index('Decay of X_VFA_deg')] = [-1, 1]

    S[list(map(species.index, ["X_Me_ac", "TSS"])),
        reactions.index('Decay of X_Me_ac')] = [-1, 1]

    S[list(map(species.index, ["X_Me_CO2", "TSS"])),
        reactions.index('Decay of X_Me_CO2')] = [-1, 1]

    S[list(map(species.index, ["S_va_ion","S_va"])),
        reactions.index('Acid Base Equilibrium (Va)')] = [-1,1]

    S[list(map(species.index, ["S_bu_ion","S_bu"])),
        reactions.index('Acid Base Equilibrium (Bu)')] = [-1,1]

    S[list(map(species.index, ["S_pro_ion","S_pro"])),
        reactions.index('Acid Base Equilibrium (Pro)')] = [-1,1]

    S[list(map(species.index, ["S_cap_ion","S_cap"])),
        reactions.index('Acid Base Equilibrium (Cap)')] = [-1,1]

    S[list(map(species.index, ["S_lac_ion","S_lac"])),
        reactions.index('Acid Base Equilibrium (Lac)')] = [-1,1]

    S[list(map(species.index, ["S_ac_ion","S_ac"])),
        reactions.index('Acid Base Equilibrium (Ac)')] = [-1,1]

    S[list(map(species.index, ["S_co2", "S_hco3_ion"])),  # I don't think this is right، should look at the reaction in ADM1
        reactions.index('Acid Base Equilibrium (CO2)')] = [-1, 1]

    S[list(map(species.index, ["S_nh3", "S_nh4_ion"])),
        reactions.index('Acid Base Equilibrium (In)')] = [-1, 1]  # I don't think this is right، should look at the reaction in ADM1

    S[list(map(species.index, ["S_h2", "S_gas_h2"])),
        reactions.index('Gas Transfer H2')] = [-base_parameters['V_liq']/base_parameters['V_gas'], 1]
    S[list(map(species.index, ["S_ch4", "S_gas_ch4"])),
        reactions.index('Gas Transfer CH4')] = [-base_parameters['V_liq']/base_parameters['V_gas'], 1]
    S[list(map(species.index, ["S_co2", "S_gas_co2"])),
        reactions.index('Gas Transfer CO2')] = [-base_parameters['V_liq']/base_parameters['V_gas'], 1]
    return S

`e_adm_2_ode_sys(t, c, model)`

This function is used to build the ODEs of the e-adm2 model.

Parameters:

Name	Type	Description	Default
`t`	`float`	a matrix of zeros to be filled	required
`c`	`np.ndarray`	an array of concentrations to be filled	required
`Model`	`Model`	The model to calculate ODE with	required

Returns:

Type	Description
`np.ndarray`	np.ndarray: The output is dCdt, the change of concentration with respect to time.

Source code in adtoolbox/adm.py

def e_adm_2_ode_sys(t: float, c: np.ndarray, model: Model)-> np.ndarray:
    """
    This function is used to build the ODEs of the e-adm2 model.

    Args:
        t (float):a matrix of zeros to be filled
        c (np.ndarray): an array of concentrations to be filled
        Model (Model): The model to calculate ODE with

    Returns:
        np.ndarray: The output is dCdt, the change of concentration with respect to time. 
    """
    ### Initialize the ion concentrations
    # if t==0:
    if t==0:
        c[model.species.index('S_va_ion')]=model.model_parameters['K_a_va']/(model.model_parameters['K_a_va']+c[model.species.index('S_H_ion')])*c[model.species.index('S_va')]
        c[model.species.index('S_bu_ion')]=model.model_parameters['K_a_bu']/(model.model_parameters['K_a_bu']+c[model.species.index('S_H_ion')])*c[model.species.index('S_bu')]
        c[model.species.index('S_pro_ion')]=model.model_parameters['K_a_pro']/(model.model_parameters['K_a_pro']+c[model.species.index('S_H_ion')])*c[model.species.index('S_pro')]
        c[model.species.index('S_cap_ion')]=model.model_parameters['K_a_cap']/(model.model_parameters['K_a_cap']+c[model.species.index('S_H_ion')])*c[model.species.index('S_cap')]
        c[model.species.index('S_ac_ion')]=model.model_parameters['K_a_ac']/(model.model_parameters['K_a_ac']+c[model.species.index('S_H_ion')])*c[model.species.index('S_ac')]
        c[model.species.index('S_lac_ion')]=model.model_parameters['K_a_lac']/(model.model_parameters['K_a_lac']+c[model.species.index('S_H_ion')])*c[model.species.index('S_lac')]    
        c[model.species.index('S_hco3_ion')] = c[model.species.index('S_IC')] - c[model.species.index('S_co2')]
        phi=(model.model_parameters['K_w']/c[model.species.index('S_H_ion')]-c[model.species.index('S_H_ion')])
        c[model.species.index('S_anion')] = c[model.species.index('S_cation')]+c[model.species.index('S_nh4_ion')]-c[model.species.index('S_hco3_ion')]-(c[model.species.index('S_lac_ion')] / 88) - (c[model.species.index('S_ac_ion')] / 64) - (c[model.species.index('S_pro_ion')] /
                                                                                                                                                                     112) - (c[model.species.index('S_bu_ion')] / 160)-(c[model.species.index('S_cap_ion')] / 230) - (c[model.species.index('S_va_ion')] / 208)-phi

    c[model.species.index('S_hco3_ion')] = model.model_parameters['K_a_co2'] * c[model.species.index('S_IC')]/(model.model_parameters['K_a_co2'] + c[model.species.index('S_H_ion')])
    c[model.species.index('S_nh4_ion')]=  model.model_parameters['K_b_nh3'] * c[model.species.index('S_IN')]/(model.model_parameters['K_b_nh3'] + model.base_parameters['K_W'] / c[model.species.index('S_H_ion')])

    c[model.species.index('S_co2')]= c[model.species.index('S_IC')] -  c[model.species.index('S_hco3_ion')]
    c[model.species.index('S_nh3')]= c[model.species.index('S_IN')] - c[model.species.index('S_nh4_ion')]

    if (time.time()-model._be_time )>model.time_limit and model.time_limit!=-1:
        raise Exception("Time limit exceeded")


    I_pH_aa = (model.model_parameters["K_pH_aa"] ** model.model_parameters['nn_aa'])/(np.power(
        c[model.species.index('S_H_ion')], model.model_parameters['nn_aa']) + np.power(model.model_parameters["K_pH_aa"], model.model_parameters['nn_aa']))
    I_pH_ac = (model.model_parameters['K_pH_ac'] ** model.model_parameters["n_ac"])/(
        c[model.species.index('S_H_ion')] ** model.model_parameters['n_ac'] + model.model_parameters['K_pH_ac'] ** model.model_parameters['n_ac'])
    I_pH_pro = (model.model_parameters['K_pH_pro'] ** model.model_parameters["n_pro"])/(
        c[model.species.index('S_H_ion')] ** model.model_parameters['n_pro'] + model.model_parameters['K_pH_pro'] ** model.model_parameters['n_pro'])
    I_pH_bu = (model.model_parameters['K_pH_bu'] ** model.model_parameters["n_bu"])/(
        c[model.species.index('S_H_ion')] ** model.model_parameters['n_bu'] + model.model_parameters['K_pH_bu'] ** model.model_parameters['n_bu'])
    I_pH_va = (model.model_parameters['K_pH_va'] ** model.model_parameters["n_va"])/(
        c[model.species.index('S_H_ion')] ** model.model_parameters['n_va'] + model.model_parameters['K_pH_va'] ** model.model_parameters['n_va'])
    I_pH_cap = (model.model_parameters['K_pH_cap'] ** model.model_parameters["n_cap"])/(
        c[model.species.index('S_H_ion')] ** model.model_parameters['n_cap'] + model.model_parameters['K_pH_cap'] ** model.model_parameters['n_cap'])
    I_pH_h2 = (model.model_parameters['K_pH_h2']**model.model_parameters['n_h2'])/(
        c[model.species.index('S_H_ion')] ** model.model_parameters['n_h2'] + model.model_parameters['K_pH_h2']**model.model_parameters['n_h2'])

    I_IN_lim = 1 / (1+(c[model.species.index('S_IN')] / (model.model_parameters['K_S_IN']+10**-9)))

    I_h2_fa = 1 /  (1+(c[model.species.index('S_h2')] /(model.model_parameters['K_I_h2_fa']+10**-9)))

    I_h2_c4 = 1 /  (1+(c[model.species.index('S_h2')] / (model.model_parameters['K_I_h2_c4']+10**-9)))

    I_h2_pro = 1/  (1+(c[model.species.index('S_h2')] / (model.model_parameters['K_I_h2_pro']+10**-9)))

    I_nh3 =    1/  (1+(c[model.species.index('S_nh3')] / (model.model_parameters['K_I_nh3']+10**-9)))

    I_h2_oxidation=1/(1+(c[model.species.index('S_h2')] / (model.model_parameters['K_I_h2_ox']+10**-9)))

    I5 =    max(0,(I_pH_aa * I_IN_lim))
    I6 =    max(0,I5)
    I7 =    max(0,(I_pH_aa * I_IN_lim * I_h2_fa))
    I8 =    max(0,(I_pH_aa * I_IN_lim * I_h2_c4))
    I9 =    max(0,I8)
    I10 =   max(0,(I_pH_pro * I_IN_lim * I_h2_pro))
    I11 =   max(0,(I_pH_ac * I_IN_lim * I_nh3))
    I12 =   max(0,(I_pH_h2 * I_IN_lim))
    I13 =   max(0,(I_pH_cap * I_IN_lim * I_h2_c4))
    I14 =   max(0,(I_pH_bu * I_IN_lim * I_h2_c4))
    I15 =   max(0,(I_pH_va * I_IN_lim * I_h2_c4))
    I16 =   max(0,I_IN_lim * I_nh3*I_pH_aa*I_h2_oxidation)

    v = np.zeros((len(model.reactions), 1))

    v[model.reactions.index('TSS_Disintegration')] = model.model_parameters["k_dis_TSS"]*c[model.species.index('TSS')]

    v[model.reactions.index('TDS_Disintegration')] = model.model_parameters["k_dis_TDS"]*c[model.species.index('TDS')]

    v[model.reactions.index('Hydrolysis carbohydrates')] = model.model_parameters['k_hyd_ch']*c[model.species.index('X_ch')]

    v[model.reactions.index('Hydrolysis proteins')] = model.model_parameters['k_hyd_pr']*c[model.species.index('X_pr')]

    v[model.reactions.index('Hydrolysis lipids')] = model.model_parameters['k_hyd_li']*c[model.species.index('X_li')]

    v[model.reactions.index('Uptake of sugars')] = model.model_parameters['k_m_su']*c[model.species.index('S_su')] / \
        (model.model_parameters['K_S_su']+c[model.species.index('S_su')])*c[model.species.index('X_su')]*I5

    v[model.reactions.index('Uptake of amino acids')] = model.model_parameters['k_m_aa']*c[model.species.index('S_aa')] / \
        (model.model_parameters['K_S_aa']+c[model.species.index('S_aa')]
         )*c[model.species.index('X_aa')]*I6

    v[model.reactions.index('Uptake of LCFA')] = model.model_parameters['k_m_fa']*c[model.species.index('S_fa')] / \
        (model.model_parameters['K_S_fa'] +
         c[model.species.index('S_fa')])*c[model.species.index('X_fa')]*I7

    v[model.reactions.index('Uptake of acetate_et')] = model.model_parameters['k_m_ac']*c[model.species.index('S_ac')]*c[model.species.index('S_et')] / \
        (model.model_parameters['K_S_ac']*c[model.species.index('S_ac')]+model.model_parameters['K_S_ac_et']*c[model.species.index('S_et')]+c[model.species.index('S_ac')]*c[model.species.index('S_et')]+10**-9
         )*c[model.species.index('X_ac_et')]*I11

    v[model.reactions.index('Uptake of acetate_lac')] = model.model_parameters['k_m_ac']*c[model.species.index('S_ac')]*c[model.species.index('S_lac')] / \
        (model.model_parameters['K_S_ac']*c[model.species.index('S_ac')]+model.model_parameters['K_S_ac_lac']*c[model.species.index('S_lac')]+c[model.species.index('S_ac')]*c[model.species.index('S_lac')]+10**-9
         )*c[model.species.index('X_ac_lac')]*I11

    v[model.reactions.index('Uptake of propionate_et')] = model.model_parameters['k_m_pro']*c[model.species.index('S_pro')]*c[model.species.index('S_et')] / \
        (model.model_parameters['K_S_pro']*c[model.species.index('S_pro')]+model.model_parameters['K_S_pro_et']*c[model.species.index('S_et')]+c[model.species.index('S_pro')]*c[model.species.index('S_et')]+10**-9
         )*c[model.species.index('X_chain_et')]*I10

    v[model.reactions.index('Uptake of propionate_lac')] = model.model_parameters['k_m_pro']*c[model.species.index('S_pro')]*c[model.species.index('S_lac')] / \
        (model.model_parameters['K_S_pro']*c[model.species.index('S_pro')]+model.model_parameters['K_S_pro_lac']*c[model.species.index('S_lac')]+c[model.species.index('S_pro')]*c[model.species.index('S_lac')]+10**-9
         )*c[model.species.index('X_chain_lac')]*I10

    v[model.reactions.index('Uptake of butyrate_et')] = model.model_parameters['k_m_bu']*c[model.species.index('S_bu')]*c[model.species.index('S_et')] / \
        (model.model_parameters['K_S_bu']*c[model.species.index('S_bu')]+model.model_parameters['K_S_bu_et']*c[model.species.index('S_et')]+c[model.species.index('S_bu')]*c[model.species.index('S_et')]+10**-9
         )*c[model.species.index('X_chain_et')]*I14

    v[model.reactions.index('Uptake of butyrate_lac')] = model.model_parameters['k_m_bu']*c[model.species.index('S_bu')]*c[model.species.index('S_lac')] / \
        (model.model_parameters['K_S_bu']*c[model.species.index('S_bu')]+model.model_parameters['K_S_bu_lac']*c[model.species.index('S_lac')]+c[model.species.index('S_bu')]*c[model.species.index('S_lac')]+10**-9
         )*c[model.species.index('X_chain_lac')]*I14

    v[model.reactions.index('Uptake of valerate')] = model.model_parameters['k_m_va']*c[model.species.index('S_va')] / \
        (model.model_parameters['K_S_va']+c[model.species.index('S_va')]
         )*c[model.species.index('X_VFA_deg')]*I15

    v[model.reactions.index('Uptake of caproate')] = model.model_parameters['k_m_cap']*c[model.species.index('S_cap')] / \
        (model.model_parameters['K_S_cap']+c[model.species.index('S_cap')]
         )*c[model.species.index('X_VFA_deg')]*I13

    v[model.reactions.index('Uptake of butyrate')] = model.model_parameters['k_m_bu_deg']*c[model.species.index('S_bu')] / \
        (model.model_parameters['K_S_bu']+c[model.species.index('S_bu')]
         )*c[model.species.index('X_VFA_deg')]*I13

    v[model.reactions.index('Methanogenessis from acetate and h2')] = model.model_parameters['k_m_h2_Me_ac']*c[model.species.index('S_gas_h2')]*c[model.species.index('S_ac')] / \
        (model.model_parameters['K_S_h2_Me_ac']*c[model.species.index('S_gas_h2')]+model.model_parameters['K_S_ac_Me']*c[model.species.index(
            'S_ac')]+c[model.species.index('S_ac')]*c[model.species.index('S_gas_h2')]+10**-9)*c[model.species.index('X_Me_ac')]*I12

    v[model.reactions.index('Methanogenessis from CO2 and h2')] = model.model_parameters['k_m_h2_Me_CO2']*c[model.species.index('S_gas_h2')]*c[model.species.index('S_gas_co2')] / \
        (model.model_parameters['K_S_h2_Me_CO2']*c[model.species.index('S_gas_h2')]+model.model_parameters['K_S_CO2_Me']*c[model.species.index(
            'S_gas_co2')]+c[model.species.index('S_gas_co2')]*c[model.species.index('S_gas_h2')]+10**-9)*c[model.species.index('X_Me_CO2')]*I12


    v[model.reactions.index('Uptake of ethanol')] = model.model_parameters['k_m_et']*c[model.species.index('S_et')] / \
        (model.model_parameters['K_S_et']+c[model.species.index('S_et')]
         )*c[model.species.index("X_et")]*I16

    v[model.reactions.index('Uptake of lactate')] = model.model_parameters['k_m_lac']*c[model.species.index('S_lac')] / \
        (model.model_parameters['K_S_lac']+c[model.species.index('S_lac')]
         )*c[model.species.index('X_lac')]*I16

    v[model.reactions.index('Decay of Xsu')] = model.model_parameters['k_dec_X_su']*c[model.species.index('X_su')]
    v[model.reactions.index('Decay of Xaa')] = model.model_parameters['k_dec_X_aa']*c[model.species.index('X_aa')]
    v[model.reactions.index('Decay of Xfa')] = model.model_parameters['k_dec_X_fa']*c[model.species.index('X_fa')]
    v[model.reactions.index('Decay of X_ac_et')] = model.model_parameters['k_dec_X_ac']*c[model.species.index('X_ac_et')]
    v[model.reactions.index('Decay of X_ac_lac')] = model.model_parameters['k_dec_X_ac']*c[model.species.index('X_ac_lac')]
    v[model.reactions.index('Decay of X_chain_et')] = model.model_parameters['k_dec_X_chain_et']*c[model.species.index('X_chain_et')]
    v[model.reactions.index('Decay of X_chain_lac')] = model.model_parameters['k_dec_X_chain_lac']*c[model.species.index('X_chain_lac')]
    v[model.reactions.index('Decay of X_VFA_deg')] = model.model_parameters['k_dec_X_VFA_deg']*c[model.species.index('X_VFA_deg')]
    v[model.reactions.index('Decay of X_Me_ac')] = model.model_parameters['k_dec_X_Me_ac']*c[model.species.index('X_Me_ac')]
    v[model.reactions.index('Decay of X_Me_CO2')] = model.model_parameters['k_dec_X_Me_CO2']*c[model.species.index('X_Me_CO2')]
    v[model.reactions.index('Decay of Xet')] = model.model_parameters['k_dec_X_et']*c[model.species.index('X_et')]
    v[model.reactions.index('Decay of Xlac')] = model.model_parameters['k_dec_X_lac']*c[model.species.index('X_lac')]


    v[model.reactions.index('Acid Base Equilibrium (Va)')] = model.model_parameters['k_A_B_va'] * \
        (c[model.species.index('S_va_ion')] * (model.model_parameters['K_a_va'] + c[model.species.index('S_H_ion')]) -
         model.model_parameters['K_a_va'] * c[model.species.index('S_va')])
    v[model.reactions.index('Acid Base Equilibrium (Bu)')] = model.model_parameters['k_A_B_bu'] * \
        (c[model.species.index('S_bu_ion')] * (model.model_parameters['K_a_bu'] + c[model.species.index('S_H_ion')]) -
         model.model_parameters['K_a_bu'] * c[model.species.index('S_bu')])
    v[model.reactions.index('Acid Base Equilibrium (Pro)')] = model.model_parameters['k_A_B_pro'] * \
        (c[model.species.index('S_pro_ion')] * (model.model_parameters['K_a_pro'] + c[model.species.index('S_H_ion')]) -
         model.model_parameters['K_a_pro'] * c[model.species.index('S_pro')])
    v[model.reactions.index('Acid Base Equilibrium (Cap)')] = model.model_parameters['k_A_B_cap'] * \
        (c[model.species.index('S_cap_ion')] * (model.model_parameters['K_a_cap'] + c[model.species.index('S_H_ion')]) -
         model.model_parameters['K_a_cap'] * c[model.species.index('S_cap')])
    v[model.reactions.index('Acid Base Equilibrium (Lac)')] = model.model_parameters['k_A_B_lac'] * \
        (c[model.species.index('S_lac_ion')] * (model.model_parameters['K_a_lac'] + c[model.species.index('S_H_ion')]) -
         model.model_parameters['K_a_lac'] * c[model.species.index('S_lac')])
    v[model.reactions.index('Acid Base Equilibrium (Ac)')] = model.model_parameters['k_A_B_ac'] * \
        (c[model.species.index('S_ac_ion')] * (model.model_parameters['K_a_ac'] + c[model.species.index('S_H_ion')]) -
         model.model_parameters['K_a_ac'] * c[model.species.index('S_ac')])
    v[model.reactions.index('Acid Base Equilibrium (CO2)')] = model.model_parameters['k_A_B_co2'] * \
        (c[model.species.index('S_hco3_ion')] * (model.model_parameters['K_a_co2'] + c[model.species.index('S_H_ion')]) -
         model.model_parameters['K_a_co2'] * c[model.species.index('S_IC')])
    v[model.reactions.index('Acid Base Equilibrium (In)')] = model.model_parameters['k_A_B_IN'] * \
        (c[model.species.index('S_nh3')] * (model.model_parameters['K_a_IN'] + c[model.species.index('S_H_ion')]) -
         model.model_parameters['K_a_IN'] * c[model.species.index('S_IC')])


    p_gas_h2 = c[model.species.index('S_gas_h2')] * model.base_parameters["R"] * model.base_parameters["T_op"] / 16
    p_gas_ch4 = c[model.species.index('S_gas_ch4')] * model.base_parameters["R"] * model.base_parameters["T_op"] / 64
    p_gas_co2 = c[model.species.index('S_gas_co2')] * model.base_parameters["R"] * model.base_parameters["T_op"]
    p_gas_h2o = 0.0313 * np.exp(5290 *(1 / model.base_parameters["T_base"] - 1 / model.base_parameters["T_op"]))

    P_gas = p_gas_h2 + p_gas_ch4 + p_gas_co2 + p_gas_h2o

    q_gas = max(0, (model.model_parameters['k_p'] * (P_gas - model.base_parameters['P_atm'])))

    v[model.reactions.index('Gas Transfer H2')] = max(0,model.model_parameters['k_L_a'] * (c[model.species.index('S_h2')] - 16 *model.model_parameters['K_H_h2'] * p_gas_h2))
    v[model.reactions.index('Gas Transfer CH4')] = max(0,model.model_parameters['k_L_a'] * (c[model.species.index('S_ch4')] - 64 * model.model_parameters['K_H_ch4'] * p_gas_ch4))
    v[model.reactions.index('Gas Transfer CO2')] = max(0,model.model_parameters['k_L_a'] * (c[model.species.index('S_co2')] - model.model_parameters['K_H_co2'] * p_gas_co2))

    if c[model.species.index('S_IN')]<0.01:
        model.nitrogen_limited=True
    else:
        model.nitrogen_limited=False

    dCdt = np.matmul(model.s, v)
    phi = c[model.species.index('S_cation')]+c[model.species.index('S_nh4_ion')]-c[model.species.index('S_hco3_ion')]-(c[model.species.index('S_lac_ion')] / 88) - \
    (c[model.species.index('S_ac_ion')] / 64) - (c[model.species.index('S_pro_ion')] / 112) - (c[model.species.index('S_bu_ion')] / 160)-(c[model.species.index('S_cap_ion')] / 230) - (c[model.species.index('S_va_ion')] / 208) - c[model.species.index('S_anion')]

    if 'S_H_ion' in model.control_state.keys():
        c[model.species.index('S_H_ion')]=model.control_state['S_H_ion']
    else:
        c[model.species.index('S_H_ion')] = (-1 * phi / 2) + (0.5 * np.sqrt(phi**2 + 4 * model.model_parameters['K_w']))

    dCdt[0: len(model.species)-3] = dCdt[0: len(model.species)-3]+model.base_parameters['q_in'] / model.base_parameters["V_liq"] * (model.inlet_conditions[0: len(model.species)-3]-c[0: len(model.species)-3].reshape(-1, 1))

    dCdt[len(model.species)-3:] = dCdt[len(model.species)-3:]+q_gas/model.base_parameters["V_gas"] * (model.inlet_conditions[len(model.species)-3:]-c[len(model.species)-3:].reshape(-1, 1))

    dCdt[[model.species.index('S_H_ion'), model.species.index('S_co2'), model.species.index('S_nh4_ion')], 0] = 0

    if model.switch == "DAE":
        dCdt[model.species.index('S_va_ion'):model.species.index('S_co2')] = 0
        dCdt[model.species.index('S_nh3')] = 0
        c[model.species.index('S_va_ion')]=model.model_parameters['K_a_va']/(model.model_parameters['K_a_va']+c[model.species.index('S_H_ion')])*c[model.species.index('S_va')]
        c[model.species.index('S_bu_ion')]=model.model_parameters['K_a_bu']/(model.model_parameters['K_a_bu']+c[model.species.index('S_H_ion')])*c[model.species.index('S_bu')]
        c[model.species.index('S_pro_ion')]=model.model_parameters['K_a_pro']/(model.model_parameters['K_a_pro']+c[model.species.index('S_H_ion')])*c[model.species.index('S_pro')]
        c[model.species.index('S_cap_ion')]=model.model_parameters['K_a_cap']/(model.model_parameters['K_a_cap']+c[model.species.index('S_H_ion')])*c[model.species.index('S_cap')]
        c[model.species.index('S_ac_ion')]=model.model_parameters['K_a_ac']/(model.model_parameters['K_a_ac']+c[model.species.index('S_H_ion')])*c[model.species.index('S_ac')]
        c[model.species.index('S_lac_ion')]=model.model_parameters['K_a_lac']/(model.model_parameters['K_a_lac']+c[model.species.index('S_H_ion')])*c[model.species.index('S_lac')]    
        c[model.species.index('S_hco3_ion')] = c[model.species.index('S_IC')] - c[model.species.index('S_co2')]




    if model.control_state.keys():
        for state in model.control_state.keys():
            c[model.species.index(state)]=model.control_state[state]
            dCdt[model.species.index(state)]=0

    model.info["Fluxes"]=v
    return dCdt[:, 0]

`e_adm_ode_sys(t, c, model)`

This function is used to build the ODEs of the e_adm model.

Parameters:

Name	Type	Description	Default
`t`	`float`	a matrix of zeros to be filled	required
`c`	`np.ndarray`	an array of concentrations to be filled	required
`Model`	`Model`	The model to calculate ODE with	required

Returns:

Type	Description
`np.ndarray`	np.ndarray: The output is dCdt, the change of concentration with respect to time.

Source code in adtoolbox/adm.py

def e_adm_ode_sys(t: float, c: np.ndarray, model: Model)-> np.ndarray:
    """
    This function is used to build the ODEs of the e_adm model.

    Args:
        t (float):a matrix of zeros to be filled
        c (np.ndarray): an array of concentrations to be filled
        Model (Model): The model to calculate ODE with

    Returns:
        np.ndarray: The output is dCdt, the change of concentration with respect to time. 
    """
    c[c<0]=0
    c[model.species.index('S_H_ion')]=0.000001
    if model.switch == "DAE":

        c[model.species.index('S_va_ion')]=model.model_parameters['K_a_va']/(model.model_parameters['K_a_va']+c[model.species.index('S_H_ion')])*c[model.species.index('S_va')]

        c[model.species.index('S_bu_ion')]=model.model_parameters['K_a_bu']/(model.model_parameters['K_a_bu']+c[model.species.index('S_H_ion')])*c[model.species.index('S_bu')]

        c[model.species.index('S_pro_ion')]=model.model_parameters['K_a_pro']/(model.model_parameters['K_a_pro']+c[model.species.index('S_H_ion')])*c[model.species.index('S_pro')]

        c[model.species.index('S_cap_ion')]=model.model_parameters['K_a_cap']/(model.model_parameters['K_a_cap']+c[model.species.index('S_H_ion')])*c[model.species.index('S_cap')]

        c[model.species.index('S_ac_ion')]=model.model_parameters['K_a_ac']/(model.model_parameters['K_a_ac']+c[model.species.index('S_H_ion')])*c[model.species.index('S_ac')]

        c[model.species.index('S_lac_ion')]=model.model_parameters['K_a_lac']/(model.model_parameters['K_a_lac']+c[model.species.index('S_H_ion')])*c[model.species.index('S_lac')]
    else: 
        v[model.reactions.index('Acid Base Equilibrium (Va)')] = model.model_parameters['k_A_B_va'] * \
            (c[model.species.index('S_va_ion')] * (model.model_parameters['K_a_va'] + c[model.species.index('S_H_ion')]) -
             model.model_parameters['K_a_va'] * c[model.species.index('S_va')])

        v[model.reactions.index('Acid Base Equilibrium (Bu)')] = model.model_parameters['k_A_B_bu'] * \
            (c[model.species.index('S_bu_ion')] * (model.model_parameters['K_a_bu'] + c[model.species.index('S_H_ion')]) -
             model.model_parameters['K_a_bu'] * c[model.species.index('S_bu')])

        v[model.reactions.index('Acid Base Equilibrium (Pro)')] = model.model_parameters['k_A_B_pro'] * \
            (c[model.species.index('S_pro_ion')] * (model.model_parameters['K_a_pro'] + c[model.species.index('S_H_ion')]) -
             model.model_parameters['K_a_pro'] * c[model.species.index('S_pro')])

        v[model.reactions.index('Acid Base Equilibrium (Cap)')] = model.model_parameters['k_A_B_cap'] * \
            (c[model.species.index('S_cap_ion')] * (model.model_parameters['K_a_cap'] + c[model.species.index('S_H_ion')]) -
             model.model_parameters['K_a_cap'] * c[model.species.index('S_cap')])

        v[model.reactions.index('Acid Base Equilibrium (Lac)')] = model.model_parameters['k_A_B_lac'] * \
            (c[model.species.index('S_lac_ion')] * (model.model_parameters['K_a_lac'] + c[model.species.index('S_H_ion')]) -
             model.model_parameters['K_a_lac'] * c[model.species.index('S_lac')])

        v[model.reactions.index('Acid Base Equilibrium (Ac)')] = model.model_parameters['k_A_B_ac'] * \
            (c[model.species.index('S_ac_ion')] * (model.model_parameters['K_a_ac'] + c[model.species.index('S_H_ion')]) -
             model.model_parameters['K_a_ac'] * c[model.species.index('S_ac')])

        v[model.reactions.index('Acid Base Equilibrium (CO2)')] = model.model_parameters['k_A_B_co2'] * \
            (c[model.species.index('S_hco3_ion')] * (model.model_parameters['K_a_co2'] + c[model.species.index('S_H_ion')]) -
             model.model_parameters['K_a_co2'] * c[model.species.index('S_IC')])

        v[model.reactions.index('Acid Base Equilibrium (In)')] = model.model_parameters['k_A_B_IN'] * \
            (c[model.species.index('S_nh3')] * (model.model_parameters['K_a_IN'] + c[model.species.index('S_H_ion')]) -
             model.model_parameters['K_a_IN'] * c[model.species.index('S_IC')])

    c[model.species.index('S_nh4_ion')] = c[model.species.index(
        'S_IN')] - c[model.species.index('S_nh3')]
    c[model.species.index('S_co2')] = c[model.species.index(
        'S_IC')] - c[model.species.index('S_hco3_ion')]
    I_pH_aa = (model.model_parameters["K_pH_aa"] ** model.model_parameters['nn_aa'])/(np.power(
        c[model.species.index('S_H_ion')], model.model_parameters['nn_aa']) + np.power(model.model_parameters["K_pH_aa"], model.model_parameters['nn_aa']))

    I_pH_ac = (model.model_parameters['K_pH_ac'] ** model.model_parameters["n_ac"])/(
        c[model.species.index('S_H_ion')] ** model.model_parameters['n_ac'] + model.model_parameters['K_pH_ac'] ** model.model_parameters['n_ac'])

    I_pH_pro = (model.model_parameters['K_pH_pro'] ** model.model_parameters["n_pro"])/(
        c[model.species.index('S_H_ion')] ** model.model_parameters['n_pro'] + model.model_parameters['K_pH_pro'] ** model.model_parameters['n_pro'])

    I_pH_bu = (model.model_parameters['K_pH_bu'] ** model.model_parameters["n_bu"])/(
        c[model.species.index('S_H_ion')] ** model.model_parameters['n_bu'] + model.model_parameters['K_pH_bu'] ** model.model_parameters['n_bu'])

    I_pH_va = (model.model_parameters['K_pH_va'] ** model.model_parameters["n_va"])/(
        c[model.species.index('S_H_ion')] ** model.model_parameters['n_va'] + model.model_parameters['K_pH_va'] ** model.model_parameters['n_va'])

    I_pH_cap = (model.model_parameters['K_pH_cap'] ** model.model_parameters["n_cap"])/(
        c[model.species.index('S_H_ion')] ** model.model_parameters['n_cap'] + model.model_parameters['K_pH_cap'] ** model.model_parameters['n_cap'])

    I_pH_h2 = (model.model_parameters['K_pH_h2']**model.model_parameters['n_h2'])/(
        c[model.species.index('S_H_ion')] ** model.model_parameters['n_h2'] + model.model_parameters['K_pH_h2']**model.model_parameters['n_h2'])

    I_IN_lim = 1 / \
        (1+(model.model_parameters['K_S_IN'] / (c[model.species.index('S_IN')]+10**-9)))

    I_h2_fa = 1 / (1+(c[model.species.index('S_h2')] /
                   (model.model_parameters['K_I_h2_fa']+10**-9)))

    I_h2_c4 = 1 / (1+(c[model.species.index('S_h2')] /
                   (model.model_parameters['K_I_h2_c4']+10**-9)))

    I_h2_pro = (1/(1+(c[model.species.index('S_h2')] /
                (model.model_parameters['K_I_h2_pro']+10**-9))))

    I_nh3 = 1/(1+(c[model.species.index('S_nh3')] /
               (model.model_parameters['K_I_nh3']+10**-9)))

    I_h2_oxidation=(1/(1+(c[model.species.index('S_h2')] /
                (model.model_parameters['K_I_h2_ox']+10**-9))))

    # I5 = (I_pH_aa * I_IN_lim)
    # I6 = I5.copy()
    # I7 = (I_pH_aa * I_IN_lim * I_h2_fa)
    # I8 = (I_pH_aa * I_IN_lim * I_h2_c4)
    # I9 = I8.copy()
    # I10 = (I_pH_pro * I_IN_lim * I_h2_pro)
    # I11 = (I_pH_ac * I_IN_lim * I_nh3)
    # I12 = (I_pH_h2 * I_IN_lim)
    # I13 = (I_pH_cap * I_IN_lim * I_h2_c4)
    # I14 = (I_pH_bu * I_IN_lim * I_h2_c4)
    # I15 = (I_pH_va * I_IN_lim * I_h2_c4)
    # I16 = I_IN_lim * I_nh3*I_pH_aa*I_h2_oxidation
    I5  = 1
    I6  = 1
    I7  = 1
    I8  = 1
    I9  = 1 #one
    I10 = 1
    I11 = 1
    I12 = 1
    I13 = 1
    I14 = 1
    I15 = 1
    I16 = 1

    v = np.zeros((len(model.reactions), 1))

    v[model.reactions.index(
        'TSS_Disintegration')] = model.model_parameters["k_dis_TSS"]*c[model.species.index('TSS')]

    v[model.reactions.index(
        'TDS_Disintegration')] = model.model_parameters["k_dis_TDS"]*c[model.species.index('TDS')]

    v[model.reactions.index('Hydrolysis carbohydrates')
      ] = model.model_parameters['k_hyd_ch']*c[model.species.index('X_ch')]

    v[model.reactions.index('Hydrolysis proteins')
      ] = model.model_parameters['k_hyd_pr']*c[model.species.index('X_pr')]

    v[model.reactions.index('Hydrolysis lipids')
      ] = model.model_parameters['k_hyd_li']*c[model.species.index('X_li')]

    v[model.reactions.index('Su_to_et')] = model.model_parameters['k_m_su_et']*c[model.species.index('S_su')] / \
        (model.model_parameters['K_S_su_et']+c[model.species.index('S_su')])*c[model.species.index('X_su')]*I5

    v[model.reactions.index('Su_to_lac')] = model.model_parameters['k_m_su_lac']*c[model.species.index('S_su')] / \
        (model.model_parameters['K_S_su_lac']+c[model.species.index('S_su')]
         )*c[model.species.index('X_su')]/(c[model.species.index('X_su')]+model.model_parameters['K_S_X_su_lac'])*I5    

    v[model.reactions.index('Su_to_ac')] = model.model_parameters['k_m_su_ac']*c[model.species.index('S_su')] / \
        (model.model_parameters['K_S_su_ac']+c[model.species.index('S_su')]
         )*c[model.species.index('X_su')]*I5

    v[model.reactions.index('Su_to_pro')] = model.model_parameters['k_m_su_pro']*c[model.species.index('S_su')] / \
        (model.model_parameters['K_S_su_pro']+c[model.species.index('S_su')]
         )*c[model.species.index('X_su')]/(c[model.species.index('X_su')]+model.model_parameters['K_S_X_su_pro'])*I5        


    v[model.reactions.index('aas_to_lac')] = model.model_parameters['k_m_aa_lac']*c[model.species.index('S_aa')] / \
        (model.model_parameters['K_S_aa_lac']+c[model.species.index('S_aa')]
         )*c[model.species.index('X_aa')]/(c[model.species.index('X_aa')]+model.model_parameters['K_S_X_aa_lac'])*I6

    v[model.reactions.index('aas_to_pro')] = model.model_parameters['k_m_aa_pro']*c[model.species.index('S_aa')] / \
        (model.model_parameters['K_S_aa_pro']+c[model.species.index('S_aa')]
         )*c[model.species.index('X_aa')]/(c[model.species.index('X_aa')]+model.model_parameters['K_S_X_aa_pro'])*I6

    v[model.reactions.index('aas_to_ac')] = model.model_parameters['k_m_aa_ac']*c[model.species.index('S_aa')] / \
        (model.model_parameters['K_S_aa_ac']+c[model.species.index('S_aa')]
         )*c[model.species.index('X_aa')]/(c[model.species.index('X_aa')]+model.model_parameters['K_S_X_aa_ac'])*I6

    v[model.reactions.index('Uptake of LCFA')] = model.model_parameters['k_m_fa']*c[model.species.index('S_fa')] / \
        (model.model_parameters['K_S_fa'] +
         c[model.species.index('S_fa')])*c[model.species.index('X_fa')]/(c[model.species.index('X_fa')]+model.model_parameters['K_S_X_fa'])*I7

    v[model.reactions.index('Uptake of acetate_et')] = model.model_parameters['k_m_ac_et']*c[model.species.index('S_ac')]*c[model.species.index('S_et')] / \
        (model.model_parameters['K_S_ac']*c[model.species.index('S_ac')]+model.model_parameters['K_S_ac_et']*c[model.species.index('S_et')]+c[model.species.index('S_ac')]*c[model.species.index('S_et')]+10**-9
         )*c[model.species.index('X_ac_et')]/(c[model.species.index('X_ac_et')]+model.model_parameters['K_S_X_ac_et'])*I11

    v[model.reactions.index('Uptake of acetate_lac')] = model.model_parameters['k_m_ac_lac']*c[model.species.index('S_ac')]*c[model.species.index('S_lac')] / \
        (model.model_parameters['K_S_ac']*c[model.species.index('S_ac')]+model.model_parameters['K_S_ac_lac']*c[model.species.index('S_lac')]+c[model.species.index('S_ac')]*c[model.species.index('S_lac')]+10**-9
         )*c[model.species.index('X_ac_lac')]/(c[model.species.index('X_ac_lac')]+model.model_parameters['K_S_X_ac_lac'])*I11

    v[model.reactions.index('Uptake of propionate_et')] = model.model_parameters['k_m_pro_et']*c[model.species.index('S_pro')]*c[model.species.index('S_et')] / \
        (model.model_parameters['K_S_pro']*c[model.species.index('S_pro')]+model.model_parameters['K_S_pro_et']*c[model.species.index('S_et')]+c[model.species.index('S_pro')]*c[model.species.index('S_et')]+10**-9
         )*c[model.species.index('X_chain_et')]/(c[model.species.index('X_chain_et')]+model.model_parameters['K_S_X_chain_et'])*I10

    v[model.reactions.index('Uptake of propionate_lac')] = model.model_parameters['k_m_pro_lac']*c[model.species.index('S_pro')]*c[model.species.index('S_lac')] / \
        (model.model_parameters['K_S_pro']*c[model.species.index('S_pro')]+model.model_parameters['K_S_pro_lac']*c[model.species.index('S_lac')]+c[model.species.index('S_pro')]*c[model.species.index('S_lac')]+10**-9
         )*c[model.species.index('X_chain_lac')]/(c[model.species.index('X_chain_lac')]+model.model_parameters['K_S_X_chain_lac'])*I10

    v[model.reactions.index('Uptake of butyrate_et')] = model.model_parameters['k_m_bu_et']*c[model.species.index('S_bu')]*c[model.species.index('S_et')] / \
        (model.model_parameters['K_S_bu']*c[model.species.index('S_bu')]+model.model_parameters['K_S_bu_et']*c[model.species.index('S_et')]+c[model.species.index('S_bu')]*c[model.species.index('S_et')]+10**-9
         )*c[model.species.index('X_chain_et')]*I14

    v[model.reactions.index('Uptake of butyrate_lac')] = model.model_parameters['k_m_bu_lac']*c[model.species.index('S_bu')]*c[model.species.index('S_lac')] / \
        (model.model_parameters['K_S_bu']*c[model.species.index('S_bu')]+model.model_parameters['K_S_bu_lac']*c[model.species.index('S_lac')]+c[model.species.index('S_bu')]*c[model.species.index('S_lac')]+10**-9
         )*c[model.species.index('X_chain_lac')]/(c[model.species.index('X_chain_lac')]+model.model_parameters['K_S_X_chain_lac'])*I14

    v[model.reactions.index('Uptake of butyrate')] = model.model_parameters['k_m_bu']*c[model.species.index('S_bu')]/ \
        (model.model_parameters['K_S_bu']+c[model.species.index('S_bu')])*c[model.species.index('X_VFA_deg')]/(c[model.species.index('X_VFA_deg')]+model.model_parameters['K_S_X_VFA_deg'])*I14

    v[model.reactions.index('Uptake of valerate')] = model.model_parameters['k_m_va']*c[model.species.index('S_va')] / \
        (model.model_parameters['K_S_va']+c[model.species.index('S_va')])*c[model.species.index('X_VFA_deg')]/(c[model.species.index('X_VFA_deg')]+model.model_parameters['K_S_X_VFA_deg'])*I15

    v[model.reactions.index('Uptake of caproate')] = model.model_parameters['k_m_cap']*c[model.species.index('S_cap')] / \
        (model.model_parameters['K_S_cap']+c[model.species.index('S_cap')])*c[model.species.index('X_VFA_deg')]/(c[model.species.index('X_VFA_deg')]+model.model_parameters['K_S_X_VFA_deg'])*I13

    v[model.reactions.index('Methanogenessis from acetate and h2')] = model.model_parameters['k_m_h2_Me_ac']*c[model.species.index('S_h2')]*c[model.species.index('S_ac')] / \
        (model.model_parameters['K_S_h2_Me_ac']*c[model.species.index('S_h2')]+model.model_parameters['K_S_ac_Me']*c[model.species.index(
            'S_ac')]+c[model.species.index('S_ac')]*c[model.species.index('S_h2')]+10**-9)*c[model.species.index('X_Me_ac')]*I12

    v[model.reactions.index('Methanogenessis from CO2 and h2')] = model.model_parameters['k_m_h2_Me_CO2']*c[model.species.index('S_h2')]*c[model.species.index('S_co2')] / \
        (model.model_parameters['K_S_h2_Me_CO2']*c[model.species.index('S_h2')]+model.model_parameters['K_S_CO2_Me']*c[model.species.index(
            'S_co2')]+c[model.species.index('S_co2')]*c[model.species.index('S_h2')]+10**-9)*c[model.species.index('X_Me_CO2')]*I12


    v[model.reactions.index('Uptake of ethanol')] = model.model_parameters['k_m_et']*c[model.species.index('S_et')] / \
        (model.model_parameters['K_S_et']+c[model.species.index('S_et')]
         )*c[model.species.index("X_et")]/(c[model.species.index("X_et")]+model.model_parameters['K_S_X_et'])*I16

    v[model.reactions.index('Uptake of lactate')] = model.model_parameters['k_m_lac']*c[model.species.index('S_lac')] / \
        (model.model_parameters['K_S_lac']+c[model.species.index('S_lac')]
         )*c[model.species.index('X_lac')]/(c[model.species.index('X_lac')]+model.model_parameters['K_S_X_lac'])*I16

    v[model.reactions.index(
        'Decay of Xsu')] = model.model_parameters['k_dec_X_su']*c[model.species.index('X_su')]

    v[model.reactions.index(
        'Decay of Xaa')] = model.model_parameters['k_dec_X_aa']*c[model.species.index('X_aa')]

    v[model.reactions.index(
        'Decay of Xfa')] = model.model_parameters['k_dec_X_fa']*c[model.species.index('X_fa')]

    v[model.reactions.index(
        'Decay of X_ac_et')] = model.model_parameters['k_dec_X_ac']*c[model.species.index('X_ac_et')]

    v[model.reactions.index(
        'Decay of X_ac_lac')] = model.model_parameters['k_dec_X_ac']*c[model.species.index('X_ac_lac')]

    v[model.reactions.index(
        'Decay of X_chain_et')] = model.model_parameters['k_dec_X_chain_et']*c[model.species.index('X_chain_et')]

    v[model.reactions.index('Decay of X_chain_lac')
      ] = model.model_parameters['k_dec_X_chain_lac']*c[model.species.index('X_chain_lac')]

    v[model.reactions.index(
        'Decay of X_VFA_deg')] = model.model_parameters['k_dec_X_VFA_deg']*c[model.species.index('X_VFA_deg')]

    v[model.reactions.index(
        'Decay of X_Me_ac')] = model.model_parameters['k_dec_X_Me_ac']*c[model.species.index('X_Me_ac')]

    v[model.reactions.index(
        'Decay of X_Me_CO2')] = model.model_parameters['k_dec_X_Me_CO2']*c[model.species.index('X_Me_CO2')]

    v[model.reactions.index(
        'Decay of Xet')] = model.model_parameters['k_dec_X_et']*c[model.species.index('X_et')]

    v[model.reactions.index(
        'Decay of Xlac')] = model.model_parameters['k_dec_X_lac']*c[model.species.index('X_lac')]



    p_gas_h2 = c[model.species.index('S_gas_h2')] * model.base_parameters["R"] * \
        model.base_parameters["T_op"] / 16
    p_gas_ch4 = c[model.species.index('S_gas_ch4')] * model.base_parameters["R"] * \
        model.base_parameters["T_op"] / 64
    p_gas_co2 = c[model.species.index('S_gas_co2')] * model.base_parameters["R"] * \
        model.base_parameters["T_op"]
    p_gas_h2o = 0.0313 * \
        np.exp(5290 *
               (1 / model.base_parameters["T_base"] - 1 / model.base_parameters["T_op"]))
    P_gas = p_gas_h2 + p_gas_ch4 + p_gas_co2 + p_gas_h2o
    q_gas = max(
        0, (model.model_parameters['k_p'] * (P_gas - model.base_parameters['P_atm'])))
    v[model.reactions.index('Gas Transfer H2')] = model.model_parameters['k_L_a'] * \
        (c[model.species.index('S_h2')] - 16 *
         model.model_parameters['K_H_h2'] * p_gas_h2)

    v[model.reactions.index('Gas Transfer CH4')] = max(0,model.model_parameters['k_L_a'] * \
        (c[model.species.index('S_ch4')] - 64 *
         model.model_parameters['K_H_ch4'] * p_gas_ch4))
    v[model.reactions.index('Gas Transfer CO2')] = max(0,model.model_parameters['k_L_a'] * \
        (c[model.species.index('S_co2')] -
         model.model_parameters['K_H_co2'] * p_gas_co2))

    dCdt = np.matmul(model.s, v)

    phi = c[model.species.index('S_cation')]+c[model.species.index('S_nh4_ion')]-c[model.species.index('S_hco3_ion')]-(c[model.species.index('S_lac_ion')] / 88) - (c[model.species.index('S_ac_ion')] / 64) - (c[model.species.index('S_pro_ion')] /
                                                                                                                                                                     112) - (c[model.species.index('S_bu_ion')] / 160)-(c[model.species.index('S_cap_ion')] / 230) - (c[model.species.index('S_va_ion')] / 208) - c[model.species.index('S_anion')]
    if 'S_H_ion' in model.control_state.keys():
        c[model.species.index('S_H_ion')]=model.control_state['S_H_ion']
    else:
        c[model.species.index('S_H_ion')] = (-1 * phi / 2) + \
        (0.5 * np.sqrt(phi**2 + 4 * model.model_parameters['K_w']))

    dCdt[0: model.species.__len__()-3] = dCdt[0: model.species.__len__()-3]+model.base_parameters['q_in'] / \
        model.base_parameters["V_liq"] * \
        (model.inlet_conditions[0: model.species.__len__(
        )-3]-c[0: model.species.__len__()-3].reshape(-1, 1))

    dCdt[model.species.__len__()-3:] = dCdt[model.species.__len__()-3:]+q_gas/model.base_parameters["V_gas"] * \
        (model.inlet_conditions[model.species.__len__() -
         3:]-c[model.species.__len__()-3:].reshape(-1, 1))

    dCdt[[model.species.index('S_H_ion'), model.species.index(
        'S_co2'), model.species.index('S_nh4_ion')], 0] = 0

    if c[model.species.index('S_IN')]<0.01:
        model.nitrogen_limited=True
    else:
        model.nitrogen_limited=False

    if model.switch == "DAE":
        # dCdt[model.species.index('S_h2')] = 0

        dCdt[model.species.index('S_va_ion'):model.species.index('S_co2')] = 0

        dCdt[model.species.index('S_nh3')] = 0

    if model.control_state.keys():
        for state in model.control_state.keys():
            c[model.species.index(state)]=model.control_state[state]
            dCdt[model.species.index(state)]=0
    model.info["Fluxes"]=v
    return dCdt[:, 0]

API

configs

1. Database

2. Metagenomics

3. Utils

core

1. Experiment

2. Feed

3. MetegenomicsStudy

4. Reaction

stoichiometry: dict property

5. Metabolite

cod_calc(add_h=0, add_c=0, add_o=0)

6.SeedDB

get_seed_rxn_from_ec(ec_number)

instantiate_metabs(seed_id)

instantiate_rxns(seed_id)

7. Database

add_experiment_to_experiments_db(experiment)

add_feed_to_feed_db(feed)

add_metagenomics_study_to_metagenomics_studies_db(metagenomics_study)

add_protein_to_protein_db(protein_id, header_tail)

add_proteins_from_ecnumbers_to_protein_db(ec_numbers)

build_mmseqs_database(container='None')

build_protein_db_from_reactions_db()

- config.reaction_db

- config.protein_db

cazy_ec()

download_adm_parameters(verbose=True)

download_all_databases(verbose=True)

download_amplicon_to_genome_db(verbose=True)

download_feed_database(verbose=True)

download_protein_database(verbose=True)

download_reaction_database(verbose=True)

download_seed_databases(verbose=True)

download_studies_database(verbose=True)

filter_seed_from_ec(ec_list, save=False)

- config.reaction_db

- config.compound_db

- config.local_reaction_db

- config.local_compound_db

get_experiment_from_experiments_db(field_name, query)

get_feed_from_feed_db(field_name, query)

get_metagenomics_study_from_metagenomics_studies_db(field_name, query)

get_protein_seqs_from_uniprot(uniprot_id)

initialize_experimental_data_db()

initialize_feed_db()

initialize_metagenomics_studies_db()

initialize_protein_db()

- config.protein_db

initialize_reaction_db()

proteins_from_ec(ec_number)

remove_experiment_from_experiments_db(field_name, query)

remove_feed_from_feed_db(field_name, query)

remove_metagenomics_study_from_metagenomics_studies_db(field_name, query)

8. Metagenomics

__init__(config)

align_genome_to_protein_db(address, outdir, name, container='None')

config.genome_alignment_output: The path to the directory where the alignment results will be saved.

config.protein_db: The path to the ADToolbox protein database in fasta.

config.adtoolbox_docker: The name of the docker image to be used by ADToolbox (Only if using Docker as container).

config.adtoolbox_singularity: The name of the singularity image to be used by ADToolbox (Only if using Singularity as container).

align_short_reads_to_protein_db(query_seq, alignment_file_name, container='None')

protein_db_mmseqs (str): The address of the existing/to be created protein database of the ADToolbox for mmseqs.

align_to_gtdb(query_dir, output_dir, container='None')

config.gtdb_dir_fasta: The path to the gtdb fasta database.

config.vsearch_similarity: The similarity threshold for the alignment to be used by VSEARCH.

config.vsearch_threads: The number of threads to be used by VSEARCH.

config.adtoolbox_docker: The name of the docker image to be used by ADToolbox (Only if using Docker as container).

config.adtoolbox_singularity: The name of the singularity image to be used by ADToolbox (Only if using Singularity as container).

assign_ec_to_genome(alignment_file)

calculate_group_abundances(elements_feature_abundances, rel_abund)

download_genome(identifier, output_dir, container='None')

config.genomes_base_dir: The path to the base directory where the genomes will be saved.

config.adtoolbox_docker: The name of the docker image to be used by ADToolbox (Only if using Docker as container).

config.adtoolbox_singularity: The name of the singularity image to be used by ADToolbox (Only if using Singularity as container).

extract_ec_from_alignment(alignment_file)

config.e_value: The e-value threshold for the filtering the alignment table.

config.bit_score: The bit score threshold for the filtering the alignment table.

config.ec_counts_from_alignment: The address of the json file that the results will be saved in.

`stoichiometry: dict` `property`

`cod_calc(add_h=0, add_c=0, add_o=0)`

`get_seed_rxn_from_ec(ec_number)`

`instantiate_metabs(seed_id)`

`instantiate_rxns(seed_id)`

`add_experiment_to_experiments_db(experiment)`

`add_feed_to_feed_db(feed)`

`add_metagenomics_study_to_metagenomics_studies_db(metagenomics_study)`

`add_protein_to_protein_db(protein_id, header_tail)`

`add_proteins_from_ecnumbers_to_protein_db(ec_numbers)`

`build_mmseqs_database(container='None')`

`build_protein_db_from_reactions_db()`

`cazy_ec()`

`download_adm_parameters(verbose=True)`

`download_all_databases(verbose=True)`

`download_amplicon_to_genome_db(verbose=True)`

`download_feed_database(verbose=True)`

`download_protein_database(verbose=True)`

`download_reaction_database(verbose=True)`

`download_seed_databases(verbose=True)`

`download_studies_database(verbose=True)`

`filter_seed_from_ec(ec_list, save=False)`

`get_experiment_from_experiments_db(field_name, query)`

`get_feed_from_feed_db(field_name, query)`

`get_metagenomics_study_from_metagenomics_studies_db(field_name, query)`

`get_protein_seqs_from_uniprot(uniprot_id)`

`initialize_experimental_data_db()`

`initialize_feed_db()`

`initialize_metagenomics_studies_db()`

`initialize_protein_db()`

`initialize_reaction_db()`

`proteins_from_ec(ec_number)`

`remove_experiment_from_experiments_db(field_name, query)`

`remove_feed_from_feed_db(field_name, query)`

`remove_metagenomics_study_from_metagenomics_studies_db(field_name, query)`

`init(config)`

`align_genome_to_protein_db(address, outdir, name, container='None')`

`align_short_reads_to_protein_db(query_seq, alignment_file_name, container='None')`

`align_to_gtdb(query_dir, output_dir, container='None')`

`assign_ec_to_genome(alignment_file)`

`calculate_group_abundances(elements_feature_abundances, rel_abund)`

`download_genome(identifier, output_dir, container='None')`

`extract_ec_from_alignment(alignment_file)`

`extract_genome_info(endpattern='genomic.fna.gz', filters={'INCLUDE': [], 'EXCLUDE': ['cds', 'rna']})`

`extract_relative_abundances(feature_table_dir, sample_names=None, top_k=-1)`

`find_top_taxa(sample_name, treshold, mode='top_k')`

`get_cod_from_ec_counts(ec_counts)`

`get_genomes_from_gtdb_alignment(alignment_dir)`

`run_qiime2_from_sra(read_1, read_2, sample_name=None, manifest_dir=None, workings_dir=None, save_manifest=True, container='None')`

`seqs_from_sra(accession, target_dir, container='None')`

`Model`

`s` `property`

`build_cobra_model(address=None)`

`copy()`

`csv_report(sol, address)`

`dash_app(sol, escher_map=os.path.join(PKG_DATA, 'Modified_ADM_Map.json'), cobra_model=os.path.join(PKG_DATA, 'Modified_ADM_Model.json'), **kwargs)`

`plot(Sol, type='Line')`

`solve_model(t_eval, method='BDF')`

`update_parameters(model_parameters=None, base_parameters=None, initial_conditions=None, inlet_conditions=None)`

`adm1_ode_sys(t, c, model)`

`build_adm1_stoiciometric_matrix(base_parameters, model_parameters, reactons, species, feed, nitrogen_limited=False)`

`build_e_adm_2_stoichiometric_matrix(base_parameters, model_parameters, reactions, species, feed, nitrogen_limited=False)`

`build_e_adm_stoiciometric_matrix(base_parameters, model_parameters, reactions, species, feed, nitrogen_limited=False)`

`e_adm_2_ode_sys(t, c, model)`

`e_adm_ode_sys(t, c, model)`