Skip to content

API

ADToolbox has its own way of figuring out the path to the required files and configurations required for running different methods. The entire toolbox relies on the configs module. Objects of different classes in ADToolbox are instantiated by an instance of the corresponding class in the configs module. For instance, if you want to use the methods of the metagenomics class in core module, you should do the following:

from adtoolbox import configs and core

metag_conf=configs.Metagenomics() 
metag_object=core.Metagenomics(metag_conf)

Doing this will result in that any core.Metagenomics method will refer to the defult configurations defined in the configs module. If you want to overwright the defult configuration, you can pass the desired argument to the configs.Metagenomics constructor. For example, if you want to change the docker repository for VSEARCH you can:


metag_conf=configs.Metagenomics(vsearch_docker="mydocker") 
metag_object=core.Metagenomics(metag_conf)

Now when you execute the corresponding method in core.Metagenomics it will use mydocker instead of the defult. To learn more about defult configs, go to the configs api.

configs

You can access this module by:

from adtoolbox import configs 

This module contains configurations that are required by other classes in the package and also links to remote databases. The following classes are included in this module:

1. Database

An instance of this class will hold all the configuration information for core.Database functionalities.

Source code in adtoolbox/configs.py
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
class Database:
	"An instance of this class will hold all the configuration information for core.Database functionalities."

	def __init__(self,
		compound_db:str=Seed_COMPOUNDS_DB,
		reaction_db:str=Seed_RXN_DB,
		local_compound_db:str=os.path.join(Main_Dir, "Database", 'Local_compounds.json'),
		local_reaction_db:str=os.path.join(Main_Dir, "Database", 'Local_reactions.json'),
		csv_reaction_db:str=os.path.join(Main_Dir, "Database", 'Reaction_Metadata.csv'),
		feed_db=os.path.join(Main_Dir, "Database", 'feed_db.tsv'),
		amplicon_to_genome_db=os.path.join(Main_Dir,'Database','Amplicon2GenomeDBs'),
		cazy_links:str=EXTERNAL_LINKS["cazy_links"],
		amplicon_to_genome_urls:dict=EXTERNAL_LINKS["amplicon2genome"],
		adm_parameters_urls:dict=E_ADM_REMOTE,
		adm_parameters:dict=E_ADM_LOCAL,
		seed_rxn_url:str =EXTERNAL_LINKS["seed_rxn_url"],
		seed_compound_url:str =EXTERNAL_LINKS["seed_compound_url"],
		protein_db_url:str =INTERNAL_LINKS["protein_db_url"],
		adtoolbox_rxn_db_url:str =INTERNAL_LINKS["adtoolbox_rxn_db_url"],
		feed_db_url:str =INTERNAL_LINKS["feed_db_url"],
		qiime_classifier_db:str=os.path.join(Main_Dir, "Database","qiime2_classifier_db" ,'qiime2_classifier_db.qza'),
		qiime_classifier_db_url:str=INTERNAL_LINKS["qiime_classifier_db_url"],
  		adtoolbox_singularity=ADTOOLBOX_CONTAINERS["singularity_x86"],
		adtoolbox_docker=ADTOOLBOX_CONTAINERS["docker_x86"],
    	protein_db=os.path.join(Main_Dir, "Database", 'Protein_DB.fasta'),
		adm_microbial_groups_mapping=E_ADM_MICROBIAL_GROUPS_MAPPING,
        metagenomics_studies_db=os.path.join(Main_Dir,"Database","Studies","metagenomics_studies.tsv"),
        experimental_data_db=os.path.join(Main_Dir,"Database","Studies","experimental_data_references.json"),
        studies_remote=STUDIES_REMOTE,
        studies_local=STUDIES_LOCAL,
        check_sanity:bool=False
		):
		self.compound_db = compound_db
		self.reaction_db = reaction_db
		self.local_compound_db = local_compound_db
		self.local_reaction_db = local_reaction_db
		self.csv_reaction_db = csv_reaction_db
		self.feed_db = feed_db
		self.amplicon_to_genome_db = amplicon_to_genome_db
		self.cazy_links = cazy_links
		self.amplicon_to_genome_urls = amplicon_to_genome_urls
		self.adm_parameters_urls = adm_parameters_urls
		self.adm_parameters = adm_parameters
		self.seed_rxn_url = seed_rxn_url
		self.seed_compound_url = seed_compound_url
		self.protein_db_url = protein_db_url
		self.adtoolbox_rxn_db_url = adtoolbox_rxn_db_url
		self.feed_db_url = feed_db_url
		self.qiime_classifier_db = qiime_classifier_db
		self.qiime_classifier_db_url = qiime_classifier_db_url
		self.adtoolbox_singularity=adtoolbox_singularity
		self.adtoolbox_docker=adtoolbox_docker
		self.protein_db=protein_db
		self.adm_microbial_groups_mapping=adm_microbial_groups_mapping
		self.metagenomics_studies_db=metagenomics_studies_db
		self.experimental_data_db=experimental_data_db
		self.studies_remote=studies_remote
		self.studies_local=studies_local
		self.protein_db_mmseqs=pathlib.Path(protein_db).parent.joinpath("protein_db_mmseqs")
		if check_sanity:
			self.check_adm_parameters()

	def check_adm_parameters(self):
		branches=all([pathlib.Path(x).parent==pathlib.Path(self.adm_parameters["model_parameters"]).parent for x in self.adm_parameters.values()])
		if not branches:
			warnings.warn(f"The ADM parameters are not in the same directory!")

2. Metagenomics

An instance of this class will hold all the configuration information for core.Metagenomics functionalities.

Source code in adtoolbox/configs.py
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
class Metagenomics:
	"""	
	An instance of this class will hold all the configuration information for core.Metagenomics functionalities.
	"""
	### Here we have some class variables that are used in the class
	gtdb_dir="ssu_all_*.fna"
	def __init__(self, 
            amplicon2genome_k=10,
            vsearch_similarity=0.97,
            genomes_base_dir=os.path.join(Main_Dir,"Genomes"),
            align_to_gtdb_outputs_dir=os.path.join(Main_Dir,"Genomes"),
            amplicon2genome_db=Database().amplicon_to_genome_db,
            qiime_outputs_dir=os.path.join(Main_Dir,'Metagenomics_Data','QIIME_Outputs'),
			genome_alignment_script=os.path.join(Main_Dir,"Metagenomics_Data","QIIME_Outputs","genome_alignment_script.sh"),
			vsearch_threads:int=4,
			rsync_download_dir=os.path.join(Main_Dir,"Genomes","rsync_download.sh"),
			adtoolbox_singularity=ADTOOLBOX_CONTAINERS["singularity_x86"],
			adtoolbox_docker=ADTOOLBOX_CONTAINERS["docker_x86"],
            genome_alignment_output=os.path.join(Main_Dir,"Outputs"),
            csv_reaction_db=Database().csv_reaction_db,
            sra=os.path.join(Main_Dir,"Metagenomics_Analysis","SRA"),
            bit_score=40,
            e_value=10**-5,
            qiime2_docker_image="quay.io/qiime2/core:2022.2",
            qiime2_singularity_image="docker://quay.io/qiime2/core:2022.2",
            qiime2_paired_end_bash_str=os.path.join(PKG_DATA,"qiime_template_paired.txt"),
            qiime2_single_end_bash_str=os.path.join(PKG_DATA,"qiime_template_single.txt"),
			qiime_classifier_db=Database().qiime_classifier_db,
			adm_mapping=Database().adm_microbial_groups_mapping,
             ):
		self.k = amplicon2genome_k
		self.vsearch_similarity = vsearch_similarity
		self.align_to_gtdb_outputs_dir = align_to_gtdb_outputs_dir
		self.amplicon2genome_db = amplicon2genome_db
		self.qiime_outputs_dir = qiime_outputs_dir
		self.protein_db=Database().protein_db
		self.protein_db_mmseqs=Database().protein_db_mmseqs
		self.seed_rxn_db=Seed_RXN_DB
		self.genome_alignment_output = genome_alignment_output
		self.bit_score = bit_score
		self.e_value = e_value
		self.vsearch_threads=vsearch_threads
		self.csv_reaction_db=csv_reaction_db
		self.sra=sra
		self.qiime2_singularity_image=qiime2_singularity_image
		self.qiime2_docker_image=qiime2_docker_image
		self.qiime2_paired_end_bash_str=qiime2_paired_end_bash_str
		self.qiime2_single_end_bash_str=qiime2_single_end_bash_str 
		self.qiime_classifier_db=qiime_classifier_db
		if list(pathlib.Path(self.amplicon2genome_db).rglob(Metagenomics.gtdb_dir)):
			self.gtdb_dir_fasta=str(list(pathlib.Path(self.amplicon2genome_db).rglob(Metagenomics.gtdb_dir))[0])
		else:
			self.gtdb_dir_fasta=None
		self.genome_alignment_script=genome_alignment_script	
		self.adtoolbox_singularity=adtoolbox_singularity
		self.adtoolbox_docker=adtoolbox_docker
		self.rsync_download_dir=rsync_download_dir
		self.genomes_base_dir=genomes_base_dir
		self.adm_mapping=adm_mapping

3. Utils

An instance of this class will hold all the configuration information for utils module functionalities.

Source code in adtoolbox/configs.py
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
class Utils:
	"""
	An instance of this class will hold all the configuration information for utils module functionalities."""
	def __init__(self,
	slurm_template:str=os.path.join(PKG_DATA,"slurm_template.txt"),
	docker_template_qiime:str=None,
	singularity_template_qiime:str=None,
	slurm_executer:str='',
	slurm_wall_time:str='24:00:00',
	slurm_job_name:str='ADToolbox',
	slurm_outlog:str='ADToolbox.log',
    slurm_cpus:str="12",
	slurm_memory:str="100G",
	slurm_save_dir:str=os.getcwd(),
	adtoolbox_singularity:str=ADTOOLBOX_CONTAINERS["singularity_x86"],
	adtoolbox_docker:str=ADTOOLBOX_CONTAINERS["docker_x86"]
	) -> None:
		self.slurm_template = slurm_template
		self.docker_template_qiime = docker_template_qiime
		self.singularity_template_qiime = singularity_template_qiime
		self.slurm_executer = slurm_executer
		self.slurm_wall_time = slurm_wall_time
		self.slurm_job_name = slurm_job_name
		self.slurm_outlog=slurm_outlog
		self.slurm_cpus = slurm_cpus
		self.slurm_save_dir = slurm_save_dir
		self.slurm_memory = slurm_memory
		self.adtoolbox_singularity=adtoolbox_singularity
		self.adtoolbox_docker=adtoolbox_docker

core

You can access this module by:

from adtoolbox import core 

This module includes the following classes:

1. Experiment

This class creates an interface for the experimental data to be used in different places in ADToolbox. First you should give each experiment a name. Time must be a list of time points in days, and there must be a time 0 point assinged to each experiment. variables must be a list of integers that represent the variables that are the index of the ADM species that we have concentration data for. data must be a list of lists. Each list in the list must be a list of concentrations for each species at each time point. IMPORTANT: The order of the species in the data list must match the order of the species in the variables list. if there are specific initial concentrations for the ADM species, they can be passed as a dictionary to the initial_concentrations argument. reference is an optional argument that can be used to provide a reference for the experimental data. If using the database module to query for Experiment objects you can query by name or reference or model_type. So, having a descriptive reference can be useful for querying as well. default model name is "e_adm". This can be changed by passing a different model name to the model_name argument. This also helps with querying.

Parameters:

Name Type Description Default
name str

A unique name for the experiment.

required
time list

A list of time points in days.

required
variables list

A list of integers that represent the variables that are the index of the ADM species that we have concentration data for.

required
data list

A list of lists. Each list in the list must be a list of concentrations for each species at each time point.

required
initial_concentrations dict

A dictionary of initial concentrations for the ADM species. Defaults to {}.

dataclasses.field(default_factory=dict)
reference str

A reference for the experimental data. Defaults to ''.

''
model_name str

The name of the model that the experimental data is for. Defaults to "e_adm".

'e_adm'

Examples:

>>> from adtoolbox import configs
>>> import json
>>> with open(configs.Database().species,"r") as f:
...     species=json.load(f)
>>> S_su_index=species.index("S_su")
>>> S_aa_index=species.index("S_aa")
>>> exp=Experiment(name="Test",time=[0,1,2],variables=[S_su_index,S_aa_index],data=[[1,2,3],[4,5,6]],reference="Test reference")
Source code in adtoolbox/core.py
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
@dataclass
class Experiment:
    """
    This class creates an interface for the experimental data to be used in different places in ADToolbox.
    First you should give each experiment a name. Time must be a list of time points in days, and there must be a time 0 point assinged to each experiment.
    variables must be a list of integers that represent the variables that are the index of the ADM species that we have concentration data for.
    data must be a list of lists. Each list in the list must be a list of concentrations for each species at each time point.
    IMPORTANT: The order of the species in the data list must match the order of the species in the variables list.
    if there are specific initial concentrations for the ADM species, they can be passed as a dictionary to the initial_concentrations argument.
    reference is an optional argument that can be used to provide a reference for the experimental data. If using the database module 
    to query for Experiment objects you can query by name or reference or model_type. So, having a descriptive reference can be useful for querying as well.
    default model name is "e_adm". This can be changed by passing a different model name to the model_name argument. This also helps with querying.

    Args:
        name (str): A unique name for the experiment.
        time (list): A list of time points in days.
        variables (list): A list of integers that represent the variables that are the index of the ADM species that we have concentration data for.
        data (list): A list of lists. Each list in the list must be a list of concentrations for each species at each time point.
        initial_concentrations (dict, optional): A dictionary of initial concentrations for the ADM species. Defaults to {}.
        reference (str, optional): A reference for the experimental data. Defaults to ''.
        model_name (str, optional): The name of the model that the experimental data is for. Defaults to "e_adm".

    Examples:
        >>> from adtoolbox import configs
        >>> import json
        >>> with open(configs.Database().species,"r") as f:
        ...     species=json.load(f)
        >>> S_su_index=species.index("S_su")
        >>> S_aa_index=species.index("S_aa")
        >>> exp=Experiment(name="Test",time=[0,1,2],variables=[S_su_index,S_aa_index],data=[[1,2,3],[4,5,6]],reference="Test reference")

    """
    name:str
    time: list[float]
    variables: list[int]
    data: list[list[float]]
    initial_concentrations: dict[str,float] = dataclasses.field(default_factory=dict)
    reference: str = ""
    model_name: str = "e_adm"


    def __post_init__(self):
        self.data=np.array(self.data).T
        self.validate()

    def validate(self):
        assert len(self.time)==self.data.shape[0], "Number of time points must match number of rows in data."
        assert len(self.variables)==self.data.shape[1] , "Number of variables must match number of columns in data."
        assert self.time[0]==0, "Time must start at 0."
        return "successful"

    def to_dict(self):
        return {"name":self.name,
                "time":self.time,
                "variables":self.variables,
                "data":self.data.T.tolist(),
                "initial_concentrations":self.initial_concentrations,
                "reference":self.reference,
                "model_name":self.model_name}

2. Feed

The Feed class is used to store the feed information, and later use it in the e_adm model. all the entered numbers must in percentages. Carbohudrates, lipids, and proteins and si must sum up to 100, and they form the total dissolved solids. Carbohydrates, lipids, proteins, and xi must sum up to 100, and they form the total suspended solids.

IMPORTANT: It is assumed that lipid, proteins and carbohydrates have the same fraction in soluble and insoluble fractions.

Parameters:

Name Type Description Default
name str

A unique name for the feed.

required
carbohydrates float

percentage of carbohydrates in the feed.

required
lipids float

percentage of lipids in the feed.

required
proteins float

percentage of proteins in the feed.

required
tss float

percentage of total COD in the form of suspended solids.

required
si float

percentage of percentage of soluble inorganics in the TDS.

required
xi float

percentage of percentage of insoluble inorganics in the TSS.

required
reference str

A reference for the feed data. Defaults to ''.

''

Examples:

>>> feed=Feed(name="Test",carbohydrates=20,lipids=20,proteins=20,si=20,xi=20,tss=70)
>>> assert feed.ch_tss==feed.lip_tss==feed.prot_tss==feed.xi_tss==0.25
Source code in adtoolbox/core.py
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
@dataclass
class Feed:

    """
    The Feed class is used to store the feed information, and later use it in the e_adm model.
    all the entered numbers must in percentages. Carbohudrates, lipids, and proteins and si must sum up to 100, 
    and they form the total dissolved solids. Carbohydrates, lipids, proteins, and xi must sum up to 100, and they form the total suspended solids.

    IMPORTANT: It is assumed that lipid, proteins and carbohydrates have the same fraction in soluble and insoluble fractions.

    Args:
        name (str): A unique name for the feed.
        carbohydrates (float): percentage of carbohydrates in the feed.
        lipids (float): percentage of lipids in the feed.
        proteins (float): percentage of proteins in the feed.
        tss (float): percentage of total COD in the form of suspended solids.
        si (float): percentage of percentage of soluble inorganics in the TDS.
        xi (float): percentage of percentage of insoluble inorganics in the TSS.
        reference (str, optional): A reference for the feed data. Defaults to ''.    

    Examples:
        >>> feed=Feed(name="Test",carbohydrates=20,lipids=20,proteins=20,si=20,xi=20,tss=70)
        >>> assert feed.ch_tss==feed.lip_tss==feed.prot_tss==feed.xi_tss==0.25

    """
    # total_cod:float Transfer to base parameters
    name:str            # A unique name for the feed
    carbohydrates:float # percentage of carbohydrates in the feed
    lipids:float        # percentage of lipids in the feed
    proteins:float      # percentage of proteins in the feed
    tss:float           # percentage of total COD in the form of suspended solids
    si:float            # percentage of percentage of soluble inorganics in the TDS
    xi:float            # percentage of percentage of insoluble inorganics in the TSS
    reference:str=''    # A reference for the feed data

    def __post_init__(self):
        if self.carbohydrates+self.lipids+self.proteins>100:
            raise ValueError("The sum of the percentages must less than 100")
        if self.carbohydrates+self.lipids+self.proteins+self.si<1:
            warn("The sum of lipids, carbohydrates, proteins are suspiciously low! Make sure youhave input the numbers in percentages!")
        li_ch_pr=self.carbohydrates+self.lipids+self.proteins
        without_xi=100-self.xi
        self.ch_tss=self.carbohydrates/li_ch_pr*without_xi/100
        self.lip_tss=self.lipids/li_ch_pr*without_xi/100
        self.prot_tss=self.proteins/li_ch_pr*without_xi/100
        self.xi_tss=self.xi/100
        without_si=100-self.si
        self.ch_tds=self.carbohydrates/li_ch_pr*without_si/100
        self.lip_tds=self.lipids/li_ch_pr*without_si/100
        self.prot_tds=self.proteins/li_ch_pr*without_si/100
        self.si_tds=self.si/100

    def to_dict(self)->dict:
        return {"name":self.name,
                "carbohydrates":self.carbohydrates,
                "lipids":self.lipids,
                "proteins":self.proteins,
                "tss":self.tss,
                "si":self.si,
                "xi":self.xi,
                "reference":self.reference}

3. MetegenomicsStudy

This class is used to communicate between the metagenomics studies database and the ADM model.

Parameters:

Name Type Description Default
name str

The name of the metagenomics study. Its okay if it is not unique.

required
study_type str

The type of the metagenomics study. It can be "amplicon" or "WGS".

required
microbiome str

The microbiome that the metagenomics study is about.

required
sample_accession str

The SRA sample accession number of the metagenomics study. This must be unique.

required
comments str

Any comments that you want to add to the metagenomics study.

required
study_accession str

The SRA study accession number of the metagenomics study.

required

Examples:

>>> study=MetagenomicsStudy(name="Test",study_type="WGS",microbiome="test_microbiome",sample_accession="test_accession",comments="test_comments",study_accession="test_study_accession")
>>> assert study.name=="Test"
Source code in adtoolbox/core.py
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
@dataclass
class MetagenomicsStudy:
    """
    This class is used to communicate between the metagenomics studies database and the ADM model.

    Args:
        name (str): The name of the metagenomics study. Its okay if it is not unique.
        study_type (str): The type of the metagenomics study. It can be "amplicon" or "WGS".
        microbiome (str): The microbiome that the metagenomics study is about.
        sample_accession (str): The SRA sample accession number of the metagenomics study. This must be unique.
        comments (str): Any comments that you want to add to the metagenomics study.
        study_accession (str): The SRA study accession number of the metagenomics study.   

    Examples:
        >>> study=MetagenomicsStudy(name="Test",study_type="WGS",microbiome="test_microbiome",sample_accession="test_accession",comments="test_comments",study_accession="test_study_accession")
        >>> assert study.name=="Test"

    """
    name:str
    study_type:str
    microbiome:str
    sample_accession:str
    comments:str
    study_accession:str

    def to_dict(self)->dict:
        return {"name":self.name,
                "study_type":self.study_type,
                "microbiome":self.microbiome,
                "sample_accession":self.sample_accession,
                "comments":self.comments,
                "study_accession":self.study_accession}

4. Reaction

This class provides a simple interface between information about biochemical reactions and multiple functionalities of ADToolbox. In order to instantiate a reaction object, you need to pass a dictionary of the reaction information. This dictionary must include 'name','stoichiometry' keys. This follows the format of the seed database. stoichiometry must be formatted like seed database. The seed database format is as follows: stoichiometry: '-1:cpd00079:0:0:"D-glucose-6-phosphate";1:cpd00072:0:0:"D-fructose-6-phosphate"'

Parameters:

Name Type Description Default
data dict

A dictionary containing the reaction information. This follows the format of the seed database.

required

Examples:

>>> A={"name":'D-glucose-6-phosphate aldose-ketose-isomerase',"stoichiometry":'-1:cpd00079:0:0:"D-glucose-6-phosphate";1:cpd00072:0:0:"D-fructose-6-phosphate"'}
>>> a=Reaction(A)
>>> print(a)
D-glucose-6-phosphate aldose-ketose-isomerase
Source code in adtoolbox/core.py
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
class Reaction:
    """
    This class provides a simple interface between information about biochemical reactions and multiple functionalities of ADToolbox.
    In order to instantiate a reaction object, you need to pass a dictionary of the reaction information.
    This dictionary must include 'name','stoichiometry' keys. This follows the format of the seed database.
    stoichiometry must be formatted like seed database. The seed database format is as follows:
    stoichiometry: '-1:cpd00079:0:0:\"D-glucose-6-phosphate\";1:cpd00072:0:0:\"D-fructose-6-phosphate\"'

    Args:
        data (dict): A dictionary containing the reaction information. This follows the format of the seed database.


    Examples:
        >>> A={"name":'D-glucose-6-phosphate aldose-ketose-isomerase',"stoichiometry":'-1:cpd00079:0:0:\"D-glucose-6-phosphate\";1:cpd00072:0:0:\"D-fructose-6-phosphate\"'}
        >>> a=Reaction(A)
        >>> print(a)
        D-glucose-6-phosphate aldose-ketose-isomerase

    """
    def __init__(self, data:dict)->None:
        self.data = data

    def __str__(self)->str:
        return self.data['name']

    @property
    def stoichiometry(self)->dict:
        """
        Returns the stoichiometry of the reaction by the seed id of the compounds as key and the
        stoichiometric coefficient as value.
        Examples:
            >>> A={"name":'D-glucose-6-phosphate aldose-ketose-isomerase',"stoichiometry":'-1:cpd00079:0:0:\"D-glucose-6-phosphate\";1:cpd00072:0:0:\"D-fructose-6-phosphate\"'}
            >>> a=Reaction(A)
            >>> a.stoichiometry=={'cpd00079': -1, 'cpd00072': 1}
            True

        Args:
            self (Reaction): An instance of the Reaction.

        Returns:
            dict: The stoichiometry of the reaction 
        """
        return {compound.split(':')[1]:float(compound.split(':')[0]) for compound in self.data['stoichiometry'].split(';') }

stoichiometry: dict property

Returns the stoichiometry of the reaction by the seed id of the compounds as key and the stoichiometric coefficient as value.

Examples:

>>> A={"name":'D-glucose-6-phosphate aldose-ketose-isomerase',"stoichiometry":'-1:cpd00079:0:0:"D-glucose-6-phosphate";1:cpd00072:0:0:"D-fructose-6-phosphate"'}
>>> a=Reaction(A)
>>> a.stoichiometry=={'cpd00079': -1, 'cpd00072': 1}
True

Parameters:

Name Type Description Default
self Reaction

An instance of the Reaction.

required

Returns:

Name Type Description
dict dict

The stoichiometry of the reaction

5. Metabolite

This class provides a simple interface between information about metabolites and multiple functionalities of ADToolbox. In order to instantiate a metabolite object, you need to pass a dictionary of the metabolite information. This dictionary must include 'name','mass','formula' keys. This follows the format of the seed database. formula must be formatted like seed database. The seed database format is as follows: formula: 'C6H12O6' Possibly the main advantage of instantiating a metabolite object is that it provides a COD attribute that can be used to convert the concentration of the metabolite from g/l to gCOD/l. This is useful for comparing the experimental data with the model outputs.

Parameters:

Name Type Description Default
data dict

A dictionary containing the metabolite information. This follows the format of the seed database.

required

Examples:

>>> A={"name":"methane","mass":16,"formula":"CH4"}
>>> a=Metabolite(A)
>>> print(a)
methane
Source code in adtoolbox/core.py
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
class Metabolite:
    """
    This class provides a simple interface between information about metabolites and multiple functionalities of ADToolbox.
    In order to instantiate a metabolite object, you need to pass a dictionary of the metabolite information.
    This dictionary must include 'name','mass','formula' keys. This follows the format of the seed database.
    formula must be formatted like seed database. The seed database format is as follows:
    formula: 'C6H12O6'
    Possibly the main advantage of instantiating a metabolite object is that it provides a COD attribute that can be used to convert
    the concentration of the metabolite from g/l to gCOD/l. This is useful for comparing the experimental data with the model outputs.

    Args:
        data (dict): A dictionary containing the metabolite information. This follows the format of the seed database.


    Examples:
        >>> A={"name":"methane","mass":16,"formula":"CH4"}
        >>> a=Metabolite(A)
        >>> print(a)
        methane

    """

    def __init__(self, data):
        self.data = data
        self.cod = self.cod_calc()
        self.mw= self.data.get('mass',None)

    def __str__(self) -> str:
        return self.data['name']

    def cod_calc(self,add_h:float=0,add_c:float=0,add_o:float=0)->float:
        """
        Calculates the conversion rates for g/l -> gCOD/l
        In some cases we would like to add extra atoms for COD calculations
        For example, model seed biochemistry database only uses acetate instead of acetic acid.
        The 1 hydrogen difference changes the COD conversion rate. For this reason we can add extra atoms to the formula
        to calculate the COD conversion rate without changing anything else.

        Args:
            add_h (float): The number of extra hydrogen atoms to add to the formula for COD calculation.
            add_c (float): The number of extra carbon atoms to add to the formula for COD calculation.
            add_o (float): The number of extra oxygen atoms to add to the formula for COD calculation.

        Examples:
            >>> A={"name":"methane","mass":16,"formula":"CH4"}
            >>> a=Metabolite(A)
            >>> a.cod
            4.0

        Args:
            self (Metabolite): An instance of the Metabolite class: Note

        Returns:
            float: COD conversion from g/l to gCOD/l

        """
        if self.data['formula'] and self.data['mass']:
            contents = {}
            atoms = ["H", "C", "O"]
            mw = self.data['mass']+add_h*1+add_c*12+add_o*16
            for atom in atoms:
                if re.search(atom+'\d*', self.data['formula']):
                    if len(re.search(atom+'\d*', self.data['formula']).group()[1:]) == 0:
                        contents[atom] = 1
                    else:
                        contents[atom] = int(
                            re.search(atom+'\d*', self.data['formula']).group()[1:])
                else:
                    contents[atom] = 0
            contents['H']+=add_h
            contents['C']+=add_c
            contents['O']+=add_o
            cod_conv=1/mw*(contents['H']+4*contents['C']-2*contents['O'])/4*32
            return cod_conv

        else:
            return 'None'

cod_calc(add_h=0, add_c=0, add_o=0)

Calculates the conversion rates for g/l -> gCOD/l In some cases we would like to add extra atoms for COD calculations For example, model seed biochemistry database only uses acetate instead of acetic acid. The 1 hydrogen difference changes the COD conversion rate. For this reason we can add extra atoms to the formula to calculate the COD conversion rate without changing anything else.

Parameters:

Name Type Description Default
add_h float

The number of extra hydrogen atoms to add to the formula for COD calculation.

0
add_c float

The number of extra carbon atoms to add to the formula for COD calculation.

0
add_o float

The number of extra oxygen atoms to add to the formula for COD calculation.

0

Examples:

>>> A={"name":"methane","mass":16,"formula":"CH4"}
>>> a=Metabolite(A)
>>> a.cod
4.0

Parameters:

Name Type Description Default
self Metabolite

An instance of the Metabolite class: Note

required

Returns:

Name Type Description
float float

COD conversion from g/l to gCOD/l

Source code in adtoolbox/core.py
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
def cod_calc(self,add_h:float=0,add_c:float=0,add_o:float=0)->float:
    """
    Calculates the conversion rates for g/l -> gCOD/l
    In some cases we would like to add extra atoms for COD calculations
    For example, model seed biochemistry database only uses acetate instead of acetic acid.
    The 1 hydrogen difference changes the COD conversion rate. For this reason we can add extra atoms to the formula
    to calculate the COD conversion rate without changing anything else.

    Args:
        add_h (float): The number of extra hydrogen atoms to add to the formula for COD calculation.
        add_c (float): The number of extra carbon atoms to add to the formula for COD calculation.
        add_o (float): The number of extra oxygen atoms to add to the formula for COD calculation.

    Examples:
        >>> A={"name":"methane","mass":16,"formula":"CH4"}
        >>> a=Metabolite(A)
        >>> a.cod
        4.0

    Args:
        self (Metabolite): An instance of the Metabolite class: Note

    Returns:
        float: COD conversion from g/l to gCOD/l

    """
    if self.data['formula'] and self.data['mass']:
        contents = {}
        atoms = ["H", "C", "O"]
        mw = self.data['mass']+add_h*1+add_c*12+add_o*16
        for atom in atoms:
            if re.search(atom+'\d*', self.data['formula']):
                if len(re.search(atom+'\d*', self.data['formula']).group()[1:]) == 0:
                    contents[atom] = 1
                else:
                    contents[atom] = int(
                        re.search(atom+'\d*', self.data['formula']).group()[1:])
            else:
                contents[atom] = 0
        contents['H']+=add_h
        contents['C']+=add_c
        contents['O']+=add_o
        cod_conv=1/mw*(contents['H']+4*contents['C']-2*contents['O'])/4*32
        return cod_conv

    else:
        return 'None'

6.SeedDB

This class is designed to interact with seed database. The main advantage of using this class is that it can be used to instantiate a reaction and metabolite object, and it provides extra functionalities that rely on information in the seed database. For example, If there is a chemical formula assigned to a metabolite in the seed database, then the informattion about the COD of that metabolite can be computed using the chemical formula.

Parameters:

Name Type Description Default
config configs.SeedDB

An instance of the SeedDB class in the configs module. This class contains the information about the seed database.

required

Examples:

>>> seed_db=SeedDB(configs.SeedDB())
>>> assert seed_db.compound_db==configs.SeedDB().compound_db
>>> assert seed_db.reaction_db==configs.SeedDB().reaction_db
Source code in adtoolbox/core.py
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
class SeedDB:

    """
    This class is designed to interact with seed database. The main advantage of using this class is that it can be used to instantiate
    a reaction and metabolite object, and it provides extra functionalities that rely on information in the seed database. For example, 
    If there is a chemical formula assigned to a metabolite in the seed database, then the informattion about the COD of that metabolite
    can be computed using the chemical formula. 

    Args:
        config (configs.SeedDB): An instance of the SeedDB class in the configs module. This class contains the information about the seed database.

    Examples:
        >>> seed_db=SeedDB(configs.SeedDB())
        >>> assert seed_db.compound_db==configs.SeedDB().compound_db
        >>> assert seed_db.reaction_db==configs.SeedDB().reaction_db

    """

    def __init__(self, config:configs.Database) -> None:

        self.reaction_db = config.reaction_db
        self.compound_db = config.compound_db

    def instantiate_rxns(self, seed_id:str)->Reaction:
        """
        This method is used to instantiate reaction objects from the seed database.
        in order to instantiate a reaction object, you need to pass the seed identifier for that reaction.

        Args:
            seed_id (str): The seed identifier for the reaction.

        Returns:
            Reaction: An instance of the Reaction class.

        Required Configs:
            - config.reaction_db

        Examples:
            >>> seed_db=SeedDB()
            >>> rxn=seed_db.instantiate_rxns("rxn00558")
            >>> assert rxn.data["name"]=="D-glucose-6-phosphate aldose-ketose-isomerase"
        """
        db=pd.read_json(self.reaction_db)
        return Reaction(data=db[db["id"]==seed_id].to_dict(orient="records")[0])

    def instantiate_metabs(self, seed_id:str)->Metabolite:
        """
        This method is used to instantiate metabolite objects from the seed database.
        In order to instantiate a metabolite object, you need to pass the seed identifier for that metabolite.

        Args:
            seed_id (str): The seed identifier for the metabolite.

        Returns:
            Metabolite: An instance of the Metabolite class. 

        Required Configs:
            - config.compound_db

        Examples:
            >>> seed_db=SeedDB()
            >>> metab=seed_db.instantiate_metabs("cpd01024")
            >>> assert metab.cod==4.0
        """
        db=pd.read_json(self.compound_db)
        return Metabolite(data=db[db["id"]==seed_id].to_dict(orient="records")[0])

    def get_seed_rxn_from_ec(self, ec_number:str)->list:
        """
        This method is used to get the seed reaction identifiers for a given EC number.

        Args:
            ec_number (str): The EC number.

        Returns:
            list: A list of seed reaction identifiers.

        Required Configs:
            - config.reaction_db

        Examples:
            >>> seed_db=SeedDB()
            >>> seed_rxn_list=seed_db.get_seed_rxn_from_ec("1.1.1.1")
            >>> assert len(seed_rxn_list)>0

        """
        db=pd.read_json(self.reaction_db)
        db=db[db["ec_numbers"].apply(lambda x: ec_number in x if x else False)]
        db.drop_duplicates("id",inplace=True,keep="first")
        return db.to_dict(orient="records")

get_seed_rxn_from_ec(ec_number)

This method is used to get the seed reaction identifiers for a given EC number.

Parameters:

Name Type Description Default
ec_number str

The EC number.

required

Returns:

Name Type Description
list list

A list of seed reaction identifiers.

Required Configs
  • config.reaction_db

Examples:

>>> seed_db=SeedDB()
>>> seed_rxn_list=seed_db.get_seed_rxn_from_ec("1.1.1.1")
>>> assert len(seed_rxn_list)>0
Source code in adtoolbox/core.py
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
def get_seed_rxn_from_ec(self, ec_number:str)->list:
    """
    This method is used to get the seed reaction identifiers for a given EC number.

    Args:
        ec_number (str): The EC number.

    Returns:
        list: A list of seed reaction identifiers.

    Required Configs:
        - config.reaction_db

    Examples:
        >>> seed_db=SeedDB()
        >>> seed_rxn_list=seed_db.get_seed_rxn_from_ec("1.1.1.1")
        >>> assert len(seed_rxn_list)>0

    """
    db=pd.read_json(self.reaction_db)
    db=db[db["ec_numbers"].apply(lambda x: ec_number in x if x else False)]
    db.drop_duplicates("id",inplace=True,keep="first")
    return db.to_dict(orient="records")

instantiate_metabs(seed_id)

This method is used to instantiate metabolite objects from the seed database. In order to instantiate a metabolite object, you need to pass the seed identifier for that metabolite.

Parameters:

Name Type Description Default
seed_id str

The seed identifier for the metabolite.

required

Returns:

Name Type Description
Metabolite Metabolite

An instance of the Metabolite class.

Required Configs
  • config.compound_db

Examples:

>>> seed_db=SeedDB()
>>> metab=seed_db.instantiate_metabs("cpd01024")
>>> assert metab.cod==4.0
Source code in adtoolbox/core.py
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
def instantiate_metabs(self, seed_id:str)->Metabolite:
    """
    This method is used to instantiate metabolite objects from the seed database.
    In order to instantiate a metabolite object, you need to pass the seed identifier for that metabolite.

    Args:
        seed_id (str): The seed identifier for the metabolite.

    Returns:
        Metabolite: An instance of the Metabolite class. 

    Required Configs:
        - config.compound_db

    Examples:
        >>> seed_db=SeedDB()
        >>> metab=seed_db.instantiate_metabs("cpd01024")
        >>> assert metab.cod==4.0
    """
    db=pd.read_json(self.compound_db)
    return Metabolite(data=db[db["id"]==seed_id].to_dict(orient="records")[0])

instantiate_rxns(seed_id)

This method is used to instantiate reaction objects from the seed database. in order to instantiate a reaction object, you need to pass the seed identifier for that reaction.

Parameters:

Name Type Description Default
seed_id str

The seed identifier for the reaction.

required

Returns:

Name Type Description
Reaction Reaction

An instance of the Reaction class.

Reaction
Required Configs
  • config.reaction_db

Examples:

>>> seed_db=SeedDB()
>>> rxn=seed_db.instantiate_rxns("rxn00558")
>>> assert rxn.data["name"]=="D-glucose-6-phosphate aldose-ketose-isomerase"
Source code in adtoolbox/core.py
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
def instantiate_rxns(self, seed_id:str)->Reaction:
    """
    This method is used to instantiate reaction objects from the seed database.
    in order to instantiate a reaction object, you need to pass the seed identifier for that reaction.

    Args:
        seed_id (str): The seed identifier for the reaction.

    Returns:
        Reaction: An instance of the Reaction class.

    Required Configs:
        - config.reaction_db

    Examples:
        >>> seed_db=SeedDB()
        >>> rxn=seed_db.instantiate_rxns("rxn00558")
        >>> assert rxn.data["name"]=="D-glucose-6-phosphate aldose-ketose-isomerase"
    """
    db=pd.read_json(self.reaction_db)
    return Reaction(data=db[db["id"]==seed_id].to_dict(orient="records")[0])

7. Database

Here is a schematic of the Database Module

database

This class is designed to supply any data requirement for ADToolbox. All functionalisties for saving, loading, and querying data are implemented here. ADToolbox in general contains the following databases:

  • The seed reaction database

  • The seed compound database

  • ADToolbox's Feed database

  • ADToolbox's Metagenomics studies database

  • ADToolbox's Experimental data database

  • ADToolbox's Protein database

  • ADToolbox's Reaction database

  • GTDB-tk database for bacterial and archaeal 16s rRNA sequences

  • ADM and e_adm model parameters

This class is instantiated with a configs.Database object. This object contains the paths to all the databases that ADToolbox uses. Please refer to the documentation of each method for more information on the required configurations.

Parameters:

Name Type Description Default
config configs.Database

A configs.Database object. Defaults to configs.Database().

configs.Database()

Examples:

>>> db=Database(config=configs.Database())
>>> assert type(db)==Database and type(db.config)==configs.Database
Source code in adtoolbox/core.py
 423
 424
 425
 426
 427
 428
 429
 430
 431
 432
 433
 434
 435
 436
 437
 438
 439
 440
 441
 442
 443
 444
 445
 446
 447
 448
 449
 450
 451
 452
 453
 454
 455
 456
 457
 458
 459
 460
 461
 462
 463
 464
 465
 466
 467
 468
 469
 470
 471
 472
 473
 474
 475
 476
 477
 478
 479
 480
 481
 482
 483
 484
 485
 486
 487
 488
 489
 490
 491
 492
 493
 494
 495
 496
 497
 498
 499
 500
 501
 502
 503
 504
 505
 506
 507
 508
 509
 510
 511
 512
 513
 514
 515
 516
 517
 518
 519
 520
 521
 522
 523
 524
 525
 526
 527
 528
 529
 530
 531
 532
 533
 534
 535
 536
 537
 538
 539
 540
 541
 542
 543
 544
 545
 546
 547
 548
 549
 550
 551
 552
 553
 554
 555
 556
 557
 558
 559
 560
 561
 562
 563
 564
 565
 566
 567
 568
 569
 570
 571
 572
 573
 574
 575
 576
 577
 578
 579
 580
 581
 582
 583
 584
 585
 586
 587
 588
 589
 590
 591
 592
 593
 594
 595
 596
 597
 598
 599
 600
 601
 602
 603
 604
 605
 606
 607
 608
 609
 610
 611
 612
 613
 614
 615
 616
 617
 618
 619
 620
 621
 622
 623
 624
 625
 626
 627
 628
 629
 630
 631
 632
 633
 634
 635
 636
 637
 638
 639
 640
 641
 642
 643
 644
 645
 646
 647
 648
 649
 650
 651
 652
 653
 654
 655
 656
 657
 658
 659
 660
 661
 662
 663
 664
 665
 666
 667
 668
 669
 670
 671
 672
 673
 674
 675
 676
 677
 678
 679
 680
 681
 682
 683
 684
 685
 686
 687
 688
 689
 690
 691
 692
 693
 694
 695
 696
 697
 698
 699
 700
 701
 702
 703
 704
 705
 706
 707
 708
 709
 710
 711
 712
 713
 714
 715
 716
 717
 718
 719
 720
 721
 722
 723
 724
 725
 726
 727
 728
 729
 730
 731
 732
 733
 734
 735
 736
 737
 738
 739
 740
 741
 742
 743
 744
 745
 746
 747
 748
 749
 750
 751
 752
 753
 754
 755
 756
 757
 758
 759
 760
 761
 762
 763
 764
 765
 766
 767
 768
 769
 770
 771
 772
 773
 774
 775
 776
 777
 778
 779
 780
 781
 782
 783
 784
 785
 786
 787
 788
 789
 790
 791
 792
 793
 794
 795
 796
 797
 798
 799
 800
 801
 802
 803
 804
 805
 806
 807
 808
 809
 810
 811
 812
 813
 814
 815
 816
 817
 818
 819
 820
 821
 822
 823
 824
 825
 826
 827
 828
 829
 830
 831
 832
 833
 834
 835
 836
 837
 838
 839
 840
 841
 842
 843
 844
 845
 846
 847
 848
 849
 850
 851
 852
 853
 854
 855
 856
 857
 858
 859
 860
 861
 862
 863
 864
 865
 866
 867
 868
 869
 870
 871
 872
 873
 874
 875
 876
 877
 878
 879
 880
 881
 882
 883
 884
 885
 886
 887
 888
 889
 890
 891
 892
 893
 894
 895
 896
 897
 898
 899
 900
 901
 902
 903
 904
 905
 906
 907
 908
 909
 910
 911
 912
 913
 914
 915
 916
 917
 918
 919
 920
 921
 922
 923
 924
 925
 926
 927
 928
 929
 930
 931
 932
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
class Database:

    '''
    This class is designed to supply any data requirement for ADToolbox. All functionalisties for saving, loading, and querying data are implemented here.
    ADToolbox in general contains the following databases:

    - The seed reaction database

    - The seed compound database

    - ADToolbox's Feed database

    - ADToolbox's Metagenomics studies database

    - ADToolbox's Experimental data database

    - ADToolbox's Protein database

    - ADToolbox's Reaction database

    - GTDB-tk database for bacterial and archaeal 16s rRNA sequences

    - ADM and e_adm model parameters

    This class is instantiated with a configs.Database object. This object contains the paths to all the databases that ADToolbox uses.
    Please refer to the documentation of each method for more information on the required configurations.

    Args:
        config (configs.Database, optional): A configs.Database object. Defaults to configs.Database().

    Examples:
        >>> db=Database(config=configs.Database())
        >>> assert type(db)==Database and type(db.config)==configs.Database

    '''
    def __init__(self, config:configs.Database=configs.Database())->None:
        self.config = config


    def initialize_protein_db(self)->None:
        """This function intializes ADToolbox's protein database by creating an empty fasta file.
        Be careful, this will overwrite any existing file with the same name.
        Logically, this needs method needs config.protein_db to be defined.

        Required Configs:
            - config.protein_db
            --------

        Examples:
            >>> import os
            >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==False # This is just to make sure that the following lines create the file
            >>> db=Database(config=configs.Database(protein_db=os.path.join(Main_Dir,"protein_test_db.fasta"))) # point to a test non-existing file
            >>> db.initialize_protein_db() # initialize the protein database
            >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==True # check if the file is created
            >>> os.remove(os.path.join(Main_Dir,"protein_test_db.fasta")) # remove the file to clean up
        """

        if not (pathlib.Path(self.config.protein_db).parent).exists():
            pathlib.Path(self.config.protein_db).parent.mkdir(parents=True)
        with open(self.config.protein_db, 'w') as f:
            pass

    def initialize_reaction_db(self)->None:
        r"""This function intializes ADToolbox's reaction database by creating an empty tsv file.
        Be careful, this will overwrite any existing file with the same name.

        Required Configs:
            - config.reaction_db

        Examples:
            >>> import os
            >>> assert os.path.exists(os.path.join(Main_Dir,"reaction_test_db.tsv"))==False
            >>> db=Database(config=configs.Database(reaction_db=os.path.join(Main_Dir,"reaction_test_db.tsv")))
            >>> db.initialize_reaction_db()
            >>> assert pd.read_table(os.path.join(Main_Dir,"reaction_test_db.tsv"),delimiter="\t").shape[0]==0
            >>> assert set(pd.read_csv(os.path.join(Main_Dir,"reaction_test_db.tsv"),delimiter="\t").columns)==set(["ec_numbers","seed_ids","reaction_names","adm1_reaction","e_adm_reactions","pathways"])
            >>> os.remove(os.path.join(Main_Dir,"reaction_test_db.tsv"))

        """
        pd.DataFrame(columns=["ec_numbers","seed_ids","reaction_names","adm1_reaction","e_adm_reactions","pathways"]).to_csv(self.config.reaction_db,index=False,sep="\t")

    def initialize_feed_db(self)->None:
        r"""This function intializes ADToolbox's Feed database by creating an empty tsv file.
        Be careful, this will overwrite any existing file with the same name.

        Required Configs:
            - config.feed_db

        Examples:
            >>> import os
            >>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==False
            >>> db=Database(config=configs.Database(feed_db=os.path.join(Main_Dir,"feed_test_db.tsv")))
            >>> db.initialize_feed_db()
            >>> assert pd.read_table(os.path.join(Main_Dir,"feed_test_db.tsv"),delimiter='\t').shape[0]==0
            >>> assert set(pd.read_table(os.path.join(Main_Dir,"feed_test_db.tsv"),delimiter='\t').columns)==set(["name","carbohydrates","lipids","proteins","tss","si","xi","reference"])
            >>> os.remove(os.path.join(Main_Dir,"feed_test_db.tsv"))

        """
        pd.DataFrame(columns=["name","carbohydrates","lipids","proteins","tss","si","xi","reference"]).to_csv(self.config.feed_db,index=False,sep="\t")

    def initialize_metagenomics_studies_db(self)->None:
        r"""This function intializes ADToolbox's Metagenomics studies database by creating an empty tsv file.
        Be careful, this will overwrite any existing file with the same name.

        Required Configs:
            - config.metagenomics_studies_db

        Examples:
            >>> import os
            >>> assert os.path.exists(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))==False
            >>> db=Database(config=configs.Database(metagenomics_studies_db=os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv")))
            >>> db.initialize_metagenomics_studies_db()
            >>> assert pd.read_table(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"),delimiter="\t").shape[0]==0
            >>> assert set(pd.read_table(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"),delimiter="\t").columns)==set(["name","study_type","microbiome","sample_accession","comments","study_accession"])
            >>> os.remove(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))

        """
        pd.DataFrame(columns=["name","study_type","microbiome","sample_accession","comments","study_accession"]).to_csv(self.config.metagenomics_studies_db,index=False,sep="\t")

    def initialize_experimental_data_db(self)->None:
        """This function intializes ADToolbox's experimental data database by creating an empty json file.
        Be careful, this will overwrite any existing file with the same name.

        Required Configs:
            - config.experimental_data_db

        Examples:
            >>> import os
            >>> assert os.path.exists(os.path.join(Main_Dir,"experimental_data_test_db.json"))==False
            >>> db=Database(config=configs.Database(experimental_data_db=os.path.join(Main_Dir,"experimental_data_test_db.json")))
            >>> db.initialize_experimental_data_db()
            >>> assert pd.read_json(os.path.join(Main_Dir,"experimental_data_test_db.json")).shape[0]==0
            >>> with open(os.path.join(Main_Dir,"experimental_data_test_db.json"),"r") as f:
            ...     assert json.load(f)==[]
            >>> os.remove(os.path.join(Main_Dir,"experimental_data_test_db.json"))

        """
        pd.DataFrame(columns=["name","initial_conditions","time","variables","data","reference"]).to_json(self.config.experimental_data_db,orient="records")


    def filter_seed_from_ec(self, 
                            ec_list:list[str],
                            save:bool=False) -> tuple:
        """
        This function takes a list of EC numbers and filters the seed database to find the seed reactions that have the EC numbers in their EC number list.
        This will help to trim the large seed database to a smaller one that only contains the reactions that are relevant to the AD process.

        Args:
            ec_list (list[str]): A list of EC numbers.
            save (bool, optional): Whether to save the filtered seed database or not. Defaults to False.

        Returns:
            tuple: A tuple containing the filtered seed reaction database and the seed compound database, respectively.

        Required Configs:

            - config.reaction_db
            --------
            - config.compound_db
            --------
            - config.local_reaction_db
            --------
            - config.local_compound_db
            --------


        Examples:
            >>> db=Database()
            >>> seed_rxn_db,seed_compound_db=db.filter_seed_from_ec(["1.1.1.1","1.1.1.2"])
            >>> assert len(seed_rxn_db)>0 and len(seed_compound_db)>0
            >>> assert pd.read_json(configs.Database().reaction_db).shape[0]>pd.DataFrame(seed_rxn_db).shape[0]
        """
        seed_rxn_db=pd.read_json(self.config.reaction_db)
        seed_compound_db=pd.read_json(self.config.compound_db)
        seed_rxn_db=seed_rxn_db[seed_rxn_db["ec_numbers"].apply(lambda x: any(ec in x for ec in ec_list) if x else False)]
        seed_compound_db=seed_compound_db[seed_compound_db["id"].apply(lambda x: True if x in seed_rxn_db["stoichiometry"].sum() else False)]
        if save:
            seed_rxn_db.to_json(self.config.local_reaction_db)
            seed_compound_db.to_json(self.config.local_compound_db)
        return seed_rxn_db.to_dict(orient="record"),seed_compound_db.to_dict(orient="record")



    def get_protein_seqs_from_uniprot(self, uniprot_id:str) -> str:
        """
        This function takes a uniprot id and fetches the protein sequence from Uniprot.

        Args:
            uniprot_id (str): The uniprot id of the protein.


        Returns:
            str: The protein sequence.

        Examples:
            >>> db=Database()
            >>> seq=db.get_protein_seqs_from_uniprot("P0A9P0")
            >>> assert type(seq)==str and len(seq)>0
        """
        Base_URL = "https://rest.uniprot.org/uniprotkb/"
        session = requests.Session()
        retry = Retry(connect=3, backoff_factor=0.5)
        adapter = HTTPAdapter(max_retries=retry)
        session.mount('http://', adapter)
        try:
            file = session.get(
                f"https://rest.uniprot.org/uniprotkb/{uniprot_id}.fasta", timeout=10)
        except:
            print("Could not fetch the sequence! Trying again ...")
            while True:
                time.sleep(5)
                file = session.get(Base_URL+uniprot_id+".fasta", timeout=10)
                if file.ok:
                    break

        return ''.join(file.text.split('\n')[1:-1])

    def proteins_from_ec(self,ec_number:str) -> dict:
        """
        This function returns a dictionary of protein sequences for a given EC number.
        The keys are the uniprot ids and ec number compatible with ADToolbox protein database
        and the values are the protein sequences. Since ADToolbox deals with microbial process,
        only bacterial and archaeal proteins are considered.

        Args:
            ec_number (str): The EC number.

        Returns:
            dict: A dictionary of protein sequences.

        Examples:
            >>> db=Database()
            >>> protein_seqs=db.proteins_from_ec("1.1.1.1")
            >>> assert len(protein_seqs)>0
            >>> assert list(protein_seqs.keys())[0].split("|")[1]=="1.1.1.1"
        """
        session = requests.Session()
        retry = Retry(connect=3, backoff_factor=0.5)
        adapter = HTTPAdapter(max_retries=retry)
        session.mount('http://', adapter)
        protein_seqs={}
        try:
            file = session.get(
                f"https://rest.uniprot.org/uniprotkb/stream?format=fasta&query=%28%28ec%3A{ec_number}%29%20AND%20%28reviewed%3Atrue%29%20NOT%20%28taxonomy_id%3A2759%29%29", timeout=30)
        except requests.exceptions.HTTPError or requests.exceptions.ConnectionError:
            print("Request Error! Trying again ...")
            time.sleep(30)
            file = session.get(
                f"https://rest.uniprot.org/uniprotkb/stream?format=fasta&query=%28%28ec%3A{ec_number}%29%20AND%20%28reviewed%3Atrue%29%20NOT%20%28taxonomy_id%3A2759%29%29", timeout=30)
        # This alsp does a sanity chec
        except Exception:
            print('Something went wrong!')
        text = file.text
        if text:
            text=text.split('>')
            text.remove("")
            for seq in text:
                protein_seqs.update([(seq.split("\n")[0].split("|")[1]+"|"+ec_number, "".join(seq.split("\n")[1:]))])


        return protein_seqs


    def build_protein_db_from_reactions_db(self):
        r"""
        This function builds the protein database from the reaction database.
        It takes the reaction database and finds the protein sequences for each EC number in the reaction database.
        Then it saves the protein sequences in a fasta file.

        Required Configs:
            - config.reaction_db
            --------
            - config.protein_db
            --------

        Examples:
            >>> import os
            >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==False
            >>> assert os.path.exists(os.path.join(Main_Dir,"reaction_test_db.tsv"))==False
            >>> db=Database(config=configs.Database(protein_db=os.path.join(Main_Dir,"protein_test_db.fasta"),reaction_db=os.path.join(Main_Dir,"reaction_test_db.tsv")))
            >>> reaction_db=pd.DataFrame(columns=["EC_Numbers","Seed Ids","Reaction Names","ADM1_Reaction","e_adm_Reactions","Pathways"])
            >>> reaction_db.loc[0,"EC_Numbers"]="1.1.1.1"
            >>> reaction_db.to_csv(os.path.join(Main_Dir,"reaction_test_db.tsv"),index=False,sep="\t")
            >>> db.build_protein_db_from_reactions_db()
            >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==True
            >>> assert os.path.exists(os.path.join(Main_Dir,"reaction_test_db.tsv"))==True
            >>> assert os.path.getsize(os.path.join(Main_Dir,"protein_test_db.fasta"))>0
            >>> os.remove(os.path.join(Main_Dir,"protein_test_db.fasta"))
            >>> os.remove(os.path.join(Main_Dir,"reaction_test_db.tsv"))
        """
        rxn_db=pd.read_table(self.config.reaction_db,delimiter="\t")
        ec_numbers=rxn_db["EC_Numbers"]
        ec_numbers=list(set(ec_numbers))
        protein_seqs={}
        for ec in ec_numbers:
            protein_seqs.update(self.proteins_from_ec(ec))
        with open(self.config.protein_db,"w") as f:
            for key,value in protein_seqs.items():
                f.write(">"+key+"\n")
                f.write(value+"\n")

    def cazy_ec(self)->list:
        """
        This method returns a list of EC numbers that are extracted from the Cazy website.
        This method is useful for adding more carbohydrate metabolism reactions to the reaction database.

        Returns:
            list: A list of EC numbers for carbohydrate metabolism found on CAZy database.

        Examples:
            >>> db=Database()
            >>> ec_list=db.cazy_ec()
            >>> assert len(ec_list)>0
        """

        ec_list = []
        for link in self.config.cazy_links:
            page = requests.get(link)
            soup = BeautifulSoup(page.content, "html.parser")
            results = soup.find("div", class_="cadre_principal").find_all(
                "th", class_="thec")
            for ec_number in results:
                if '-' not in ec_number.text.strip() and '.' in ec_number.text.strip():
                    ec_list.append(ec_number.text.strip())

        return ec_list

    def add_protein_to_protein_db(self, protein_id:str, header_tail:str)->None:
        """
        This funciton adds a protein sequence to the protein database. It takes a uniprot id and an EC number it is assigned to 
        and adds the corresponding protein sequence to the protein database.

        Required Configs:
            - config.protein_db

        Args:
            protein_id (str): The uniprot id of the protein.
            header_tail (str): A text to append to the header of the entry in the database;
            In ADToolbox it is better to use ec number for compatibility with downstream functions.


        Examples:
            >>> import os
            >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==False
            >>> db=Database(config=configs.Database(protein_db=os.path.join(Main_Dir,"protein_test_db.fasta")))
            >>> db.add_protein_to_protein_db("P0A9P0","1.2.3.4")
            >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==True
            >>> assert os.path.getsize(os.path.join(Main_Dir,"protein_test_db.fasta"))>0
            >>> import utils
            >>> assert len(utils.fasta_to_dict(os.path.join(Main_Dir,"protein_test_db.fasta")))>0
            >>> os.remove(os.path.join(Main_Dir,"protein_test_db.fasta"))
        """
        if not os.path.exists(self.config.protein_db):
            self.initialize_protein_db()
        with open(self.config.protein_db,"a") as f:
            f.write(">"+protein_id+"|"+header_tail+"\n")
            f.write(self.get_protein_seqs_from_uniprot(protein_id)+"\n")

    def add_proteins_from_ecnumbers_to_protein_db(self, ec_numbers:list)->None:
        """
        This function adds protein sequences to the protein database from a list of EC numbers.
        It takes a list of EC numbers and finds the protein sequences for each EC number in the list.
        Then it saves the protein sequences in a fasta file.

        Required Configs:
            - config.protein_db

        Args:
            ec_numbers (list): A list of EC numbers.

        Examples:
            >>> import os
            >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==False
            >>> db=Database(config=configs.Database(protein_db=os.path.join(Main_Dir,"protein_test_db.fasta")))
            >>> db.add_proteins_from_ecnumbers_to_protein_db(["1.1.1.1","1.1.1.2"])
            >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==True
            >>> import utils
            >>> assert len(utils.fasta_to_dict(os.path.join(Main_Dir,"protein_test_db.fasta")))>0
            >>> os.remove(os.path.join(Main_Dir,"protein_test_db.fasta"))
        """
        if not os.path.exists(self.config.protein_db):
            self.initialize_protein_db()

        protein_seqs={}
        for ec in ec_numbers:
            protein_seqs.update(self.proteins_from_ec(ec))

        with open(self.config.protein_db,"a") as f:
            for key,value in protein_seqs.items():
                f.write(">"+key+"\n")
                f.write(value+"\n")

    def add_feed_to_feed_db(self,feed:Feed)->None:
        r"""
        This function adds a feed to the feed database. It takes the feed name and the feed composition and adds them to the feed database.

        Required Configs:
            - config.feed_db

        Args:
            feed (Feed): An instance of the Feed class.

        Examples:
            >>> import os
            >>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==False
            >>> db=Database(config=configs.Database(feed_db=os.path.join(Main_Dir,"feed_test_db.tsv")))
            >>> feed=Feed(name="test_feed",carbohydrates=10,lipids=20,proteins=30,tss=80,si=10,xi=30,reference="test")
            >>> db.add_feed_to_feed_db(feed)
            >>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==True
            >>> assert pd.read_table(os.path.join(Main_Dir,"feed_test_db.tsv"),delimiter="\t").shape[0]>0
            >>> os.remove(os.path.join(Main_Dir,"feed_test_db.tsv"))

        """
        if not os.path.exists(self.config.feed_db):
            self.initialize_feed_db()

        if feed.name in pd.read_table(self.config.feed_db,delimiter="\t")["name"].values:
            raise ValueError("Feed already exists in the database.")
        feed_db=pd.read_table(self.config.feed_db,delimiter="\t")
        feed_db=pd.concat([feed_db,pd.DataFrame([feed.to_dict()])],ignore_index=True,axis=0)
        feed_db.to_csv(self.config.feed_db,index=False,sep="\t")

    def remove_feed_from_feed_db(self,field_name:str,query:str)->None:
        r"""
        This function removes studyes that contain the query in the given column, field name, from the feed database.

        Required Configs:
            - config.feed_db

        Args:
            field_name (str): The name of the column to query.
            query (str): The query string.

        Examples:
            >>> import os
            >>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==False
            >>> db=Database(config=configs.Database(feed_db=os.path.join(Main_Dir,"feed_test_db.tsv")))
            >>> feed=Feed(name="test_feed",carbohydrates=10,lipids=20,proteins=30,tss=80,si=10,xi=30,reference="test")
            >>> db.add_feed_to_feed_db(feed)
            >>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==True
            >>> assert pd.read_table(os.path.join(Main_Dir,"feed_test_db.tsv"),delimiter="\t").shape[0]>0
            >>> db.remove_feed_from_feed_db("name","test_feed")
            >>> assert pd.read_table(os.path.join(Main_Dir,"feed_test_db.tsv"),delimiter="\t").shape[0]==0
            >>> os.remove(os.path.join(Main_Dir,"feed_test_db.tsv"))

        """
        if not os.path.exists(self.config.feed_db):
            raise FileNotFoundError("Feed database does not exist!")


        feed_db=pd.read_table(self.config.feed_db,delimiter="\t")
        feed_db=feed_db[feed_db[field_name].str.contains(query)==False]
        feed_db.to_csv(self.config.feed_db,index=False,sep="\t")

    def get_feed_from_feed_db(self,field_name:str,query:str)->list[Feed]:
        r"""
        This function returns a feed from the feed database. It takes the query string and the column name to query and returns the feed that contains the query string in the given column.

        Required Configs:
            - config.feed_db

        Args:
            field_name (str): The name of the column to query.
            query (str): The query string.

        Returns:
            Feed: An instance of the Feed class.

        Examples:
            >>> import os
            >>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==False
            >>> db=Database(config=configs.Database(feed_db=os.path.join(Main_Dir,"feed_test_db.tsv")))
            >>> feed=Feed(name="test_feed",carbohydrates=10,lipids=20,proteins=30,tss=80,si=10,xi=30,reference="test")
            >>> db.add_feed_to_feed_db(feed)
            >>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==True
            >>> assert pd.read_table(os.path.join(Main_Dir,"feed_test_db.tsv"),delimiter="\t").shape[0]>0
            >>> feed=db.get_feed_from_feed_db("name","test_feed")
            >>> assert feed[0].name=="test_feed"
            >>> os.remove(os.path.join(Main_Dir,"feed_test_db.tsv"))

        """
        if not os.path.exists(self.config.feed_db):
            raise FileNotFoundError("Feed database does not exist!")

        feed_db=pd.read_table(self.config.feed_db,delimiter="\t")
        feed_db=feed_db[feed_db[field_name].str.contains(query)]
        return [Feed(**feed.to_dict()) for _,feed in feed_db.iterrows()]

    def add_metagenomics_study_to_metagenomics_studies_db(self,metagenomics_study:MetagenomicsStudy)->None:
        r"""
        This function adds a metagenomics study to the metagenomics studies database. It takes a metagenomics study and adds it to the metagenomics studies database.

        Required Configs:
            - config.metagenomics_studies_db

        Args:
            metagenomics_study (MetagenomicsStudy): An instance of the MetagenomicsStudy class.

        Examples:
            >>> import os
            >>> assert os.path.exists(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))==False
            >>> db=Database(config=configs.Database(metagenomics_studies_db=os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv")))
            >>> metagenomics_study=MetagenomicsStudy(name="test_study",study_type="metagenomics",microbiome="anaerobic digester",sample_accession="test",comments="test",study_accession="test")
            >>> db.add_metagenomics_study_to_metagenomics_studies_db(metagenomics_study)
            >>> assert os.path.exists(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))==True
            >>> assert pd.read_table(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"),delimiter="\t").shape[0]>0
            >>> os.remove(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))
        """
        if not os.path.exists(self.config.metagenomics_studies_db):
            self.initialize_metagenomics_studies_db()
        metagenomics_studies_db=pd.read_table(self.config.metagenomics_studies_db,delimiter="\t")
        metagenomics_studies_db=pd.concat([metagenomics_studies_db,pd.DataFrame([metagenomics_study.to_dict()])],ignore_index=True,axis=0)
        metagenomics_studies_db.to_csv(self.config.metagenomics_studies_db,index=False,sep="\t")

    def remove_metagenomics_study_from_metagenomics_studies_db(self,field_name:str,query:str)->None:
        r"""
        This function removes studies that contain the query in the given column, field name, from the metagenomics studies database.

        Required Configs:
            - config.metagenomics_studies_db

        Args:
            field_name (str): The name of the column to query.
            query (str): The query string.

        Examples:
            >>> import os
            >>> assert os.path.exists(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))==False
            >>> db=Database(config=configs.Database(metagenomics_studies_db=os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv")))
            >>> metagenomics_study=MetagenomicsStudy(name="test_study",study_type="metagenomics",microbiome="anaerobic digester",sample_accession="test",comments="test",study_accession="test")
            >>> db.add_metagenomics_study_to_metagenomics_studies_db(metagenomics_study)
            >>> assert os.path.exists(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))==True
            >>> assert pd.read_table(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"),delimiter="\t").shape[0]>0
            >>> db.remove_metagenomics_study_from_metagenomics_studies_db("name","test_study")
            >>> assert pd.read_table(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"),delimiter="\t").shape[0]==0
            >>> os.remove(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))
        """
        if not os.path.exists(self.config.metagenomics_studies_db):
            raise FileNotFoundError("Metagenomics studies database does not exist!")

        metagenomics_studies_db=pd.read_table(self.config.metagenomics_studies_db,delimiter="\t")
        metagenomics_studies_db=metagenomics_studies_db[metagenomics_studies_db[field_name].str.contains(query)==False]
        metagenomics_studies_db.to_csv(self.config.metagenomics_studies_db,index=False,sep="\t")

    def get_metagenomics_study_from_metagenomics_studies_db(self,field_name:str,query:str)->list[MetagenomicsStudy]:
        r"""
        This function returns a metagenomics study from the metagenomics studies database. It takes the query string and the column name to query and returns the metagenomics study that contains the query string in the given column.

        Required Configs:
            - config.metagenomics_studies_db

        Args:
            field_name (str): The name of the column to query.
            query (str): The query string.

        Returns:
            MetagenomicsStudy: An instance of the MetagenomicsStudy class.

        Examples:
            >>> import os
            >>> assert os.path.exists(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))==False
            >>> db=Database(config=configs.Database(metagenomics_studies_db=os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv")))
            >>> metagenomics_study=MetagenomicsStudy(name="test_study",study_type="metagenomics",microbiome="anaerobic digester",sample_accession="test",comments="test",study_accession="test")
            >>> db.add_metagenomics_study_to_metagenomics_studies_db(metagenomics_study)
            >>> assert os.path.exists(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))==True
            >>> assert pd.read_table(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"),delimiter="\t").shape[0]>0
            >>> metagenomics_study=db.get_metagenomics_study_from_metagenomics_studies_db("name","test_study")
            >>> assert metagenomics_study[0].name=="test_study"
            >>> os.remove(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))
        """
        if not os.path.exists(self.config.metagenomics_studies_db):
            raise FileNotFoundError("Metagenomics studies database does not exist!")

        metagenomics_studies_db=pd.read_table(self.config.metagenomics_studies_db,delimiter="\t")
        metagenomics_studies_db=metagenomics_studies_db[metagenomics_studies_db[field_name].str.contains(query)]
        return [MetagenomicsStudy(**metagenomics_study.to_dict()) for _,metagenomics_study in metagenomics_studies_db.iterrows()]

    def add_experiment_to_experiments_db(self,experiment:Experiment)->None:
        r"""
        This function adds an experiment to the experiments database. It takes an experiment and adds it to the experiments database.

        Required Configs:
            - config.experimental_data_db

        Args:
            experiment (Experiment): An instance of the Experiment class.

        Examples:
            >>> import os,json
            >>> assert os.path.exists(os.path.join(Main_Dir,"experiments_test_db.tsv"))==False
            >>> db=Database(config=configs.Database(experimental_data_db=os.path.join(Main_Dir,"experiments_test_db.json")))
            >>> experiment=Experiment(name="test_study",time=[0,1,2],variables=[2,6],data= [[1,2,3],[4,5,6]],reference="test")
            >>> db.add_experiment_to_experiments_db(experiment)
            >>> assert os.path.exists(os.path.join(Main_Dir,"experiments_test_db.json"))==True
            >>> assert os.path.getsize(os.path.join(Main_Dir,"experiments_test_db.json"))>0
            >>> os.remove(os.path.join(Main_Dir,"experiments_test_db.json"))
        """
        if not os.path.exists(self.config.experimental_data_db):
            self.initialize_experimental_data_db()

        if experiment.name in [experiment.name for experiment in self.get_experiment_from_experiments_db("name",experiment.name)]: 
            raise ValueError("Experiment already exists in the database!")

        with open(self.config.experimental_data_db,"r") as f:
            experiments_db=json.load(f)
        experiments_db.append(experiment.to_dict())
        with open(self.config.experimental_data_db,"w") as f:
            json.dump(experiments_db,f)

    def remove_experiment_from_experiments_db(self,field_name:str,query:str)->None:
        r"""
        This function removes experiments that contain the query in the given column, field name, from the experiments database.

        Required Configs:
            - config.experimental_data_db

        Args:
            field_name (str): The name of the column to query.
            query (str): The query string.

        Examples:
            >>> import os,json
            >>> assert os.path.exists(os.path.join(Main_Dir,"experiments_test_db.tsv"))==False
            >>> db=Database(config=configs.Database(experimental_data_db=os.path.join(Main_Dir,"experiments_test_db.json")))
            >>> experiment=Experiment(name="test_study",time=[0,1,2],variables=[2,6],data= [[1,2,3],[4,5,6]],reference="test")
            >>> db.add_experiment_to_experiments_db(experiment)
            >>> assert os.path.exists(os.path.join(Main_Dir,"experiments_test_db.json"))==True
            >>> assert os.path.getsize(os.path.join(Main_Dir,"experiments_test_db.json"))>0
            >>> db.remove_experiment_from_experiments_db("name","test_study")
            >>> assert pd.read_json(os.path.join(Main_Dir,"experiments_test_db.json")).shape[0]==0
            >>> os.remove(os.path.join(Main_Dir,"experiments_test_db.json"))
        """
        if not os.path.exists(self.config.experimental_data_db):
            raise FileNotFoundError("Experimental data database does not exist!")

        with open(self.config.experimental_data_db,"r") as f:
            experiments_db=json.load(f)
        experiments_db=[experiment for experiment in experiments_db if query not in experiment[field_name]]
        with open(self.config.experimental_data_db,"w") as f:
            json.dump(experiments_db,f)

    def get_experiment_from_experiments_db(self,field_name:str,query:str)->list[Experiment]:
        r"""
        This function returns an experiment from the experiments database. It takes the query string and the column name to query and returns the experiment that contains the query string in the given column.

        Required Configs:
            - config.experimental_data_db

        Args:
            field_name (str): The name of the column to query.
            query (str): The query string.

        Returns:
            Experiment: An instance of the Experiment class.

        Examples:
            >>> import os,json
            >>> assert os.path.exists(os.path.join(Main_Dir,"experiments_test_db.tsv"))==False
            >>> db=Database(config=configs.Database(experimental_data_db=os.path.join(Main_Dir,"experiments_test_db.json")))
            >>> experiment=Experiment(name="test_study",time=[0,1,2],variables=[2,6],data= [[1,2,3],[4,5,6]],reference="test")
            >>> db.add_experiment_to_experiments_db(experiment)
            >>> assert os.path.exists(os.path.join(Main_Dir,"experiments_test_db.json"))==True
            >>> assert os.path.getsize(os.path.join(Main_Dir,"experiments_test_db.json"))>0
            >>> experiment=db.get_experiment_from_experiments_db("name","test_study")
            >>> assert experiment[0].name=="test_study"
            >>> os.remove(os.path.join(Main_Dir,"experiments_test_db.json"))
        """
        if not os.path.exists(self.config.experimental_data_db):
            raise FileNotFoundError("Experimental data database does not exist!")

        with open(self.config.experimental_data_db,"r") as f:
            experiments_db=json.load(f)
        experiments_db=[experiment for experiment in experiments_db if query in experiment[field_name]]
        return [Experiment(**experiment) for experiment in experiments_db]

    def build_mmseqs_database(self,container:str="None")->str:
        """Builds an indexed mmseqs database from the ADToolbox's fasta protein database.

        Required Configs:
            - config.protein_db
            - config.adtoolbox_singularity
            - config.adtoolbox_docker

        Args:
            container (str, optional): The container to run the script with. Defaults to "None".
        Returns:
            str: The script to build the mmseqs database.

        Examples:
            >>> import os
            >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==False
            >>> db=Database(config=configs.Database(protein_db=os.path.join(Main_Dir,"protein_test_db.fasta")))
            >>> db.add_protein_to_protein_db("P0A9P0","x,x,x,x")
            >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==True
            >>> assert os.path.getsize(os.path.join(Main_Dir,"protein_test_db.fasta"))>0
            >>> script=db.build_mmseqs_database()
            >>> assert script=="mmseqs createdb "+str(os.path.join(Main_Dir,"protein_test_db.fasta"))+" "+str(db.config.protein_db_mmseqs)
            >>> os.remove(os.path.join(Main_Dir,"protein_test_db.fasta"))

        """
        script=create_mmseqs_database(self.config.protein_db,
                                      self.config.protein_db_mmseqs,
                                      container=container,
                                      run=False,
                                      config=self.config)

        if container=="None":
            pass

        elif container=="singularity":
            script=f"singularity exec --bind {self.config.protein_db}:{self.config.protein_db},{self.config.protein_db_mmseqs}:{self.config.protein_db_mmseqs} {self.config.adtoolbox_singularity} {script}"

        elif container=="docker":
            script=f"docker run -v {self.config.protein_db}:{self.config.protein_db} -v {self.config.protein_db_mmseqs}:{self.config.protein_db_mmseqs} {self.config.adtoolbox_docker} {script}"

        else:
            raise ValueError("Container should be either None, singularity or docker!")

        return script


    def download_adm_parameters(self,verbose:bool=True)->None:
        """
        Downloads the parameters needed for running ADM models in ADToolbox.

        Required Configs:
            - config.adm_parameters_base_dir
            - config.adm_parameters_urls

        Examples:
            >>> import os
            >>> assert os.path.exists(os.path.join(Main_Dir,"adm_parameters_test"))==False
            >>> db=Database(config=configs.Database(adm_parameters_base_dir=os.path.join(Main_Dir,"adm_parameters_test")))
            >>> db.download_adm_parameters(verbose=False) 
            >>> assert os.path.exists(os.path.join(Main_Dir,"adm_parameters_test"))==True
            >>> assert len(os.listdir(os.path.join(Main_Dir,"adm_parameters_test")))==12
            >>> os.system("rm -r "+os.path.join(Main_Dir,"adm_parameters_test"))
            0

        Args:

            verbose (bool, optional): Whether to print the progress or not. Defaults to True.


        """
        for param in self.config.adm_parameters.keys():
            if not pathlib.Path(self.config.adm_parameters[param]).parent.exists():
                os.makedirs(pathlib.Path(self.config.adm_parameters[param]).parent)
            r = requests.get(self.config.adm_parameters_urls[param], allow_redirects=True)
            with open(self.config.adm_parameters[param], 'wb') as f:
                f.write(r.content)
            if verbose:
                rich.print(f"[green]{param} downloaded to {self.config.adm_parameters[param]}")

    def download_seed_databases(self,verbose:bool=True) -> None :
        """This function will download the seed databases, both compound and reaction databases.

        Required Configs:
            - config.seed_rxn_url
            - config.seed_compound_url
            - config.reaction_db
            - config.compound_db

        Args:
            verbose (bool, optional): Whether to print the progress or not. Defaults to True.

        Examples:
            >>> import os
            >>> assert os.path.exists(os.path.join(Main_Dir,"seed_rxn.json"))==False
            >>> assert os.path.exists(os.path.join(Main_Dir,"seed_compound.json"))==False
            >>> db=Database(config=configs.Database(reaction_db=os.path.join(Main_Dir,"seed_rxn.json"),compound_db=os.path.join(Main_Dir,"seed_compound.json")))
            >>> db.download_seed_databases(verbose=False)
            >>> assert os.path.exists(os.path.join(Main_Dir,"seed_rxn.json"))==True
            >>> assert os.path.exists(os.path.join(Main_Dir,"seed_compound.json"))==True
            >>> os.remove(os.path.join(Main_Dir,"seed_rxn.json"))
            >>> os.remove(os.path.join(Main_Dir,"seed_compound.json"))
        """
        r = requests.get(self.config.seed_rxn_url, allow_redirects=True,stream=True)
        if not os.path.exists(Path(self.config.reaction_db).parent):
            os.makedirs(Path(self.config.reaction_db).parent)
        with open(self.config.reaction_db, 'wb') as f:
            f.write(r.content)
        if verbose:
            rich.print(f"[green]Reaction database downloaded to {self.config.reaction_db}")
        r=requests.get(self.config.seed_compound_url,allow_redirects=True,stream=True)
        with open(self.config.compound_db, 'wb') as f:
            f.write(r.content)
        if verbose:
            rich.print(f"[green]Compound database downloaded to {self.config.compound_db}")

    def download_protein_database(self, verbose:bool=True) -> None:
        """
        Downloads the prebuilt protein database from the remote repository.

        Required Configs:
            - config.protein_db_url
            - config.protein_db

        Args:
            verbose (bool, optional): Whether to print the progress or not. Defaults to True.

        Examples:
            >>> import os
            >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==False
            >>> db=Database(config=configs.Database(protein_db=os.path.join(Main_Dir,"protein_test_db.fasta")))
            >>> db.download_protein_database(verbose=False)
            >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==True
            >>> assert os.path.getsize(os.path.join(Main_Dir,"protein_test_db.fasta"))>0
            >>> os.remove(os.path.join(Main_Dir,"protein_test_db.fasta"))
        """
        r = requests.get(self.config.protein_db_url, allow_redirects=True)

        if not os.path.exists(Path(self.config.protein_db).parent):
            os.makedirs(Path(self.config.protein_db).parent)

        with open(self.config.protein_db, 'wb') as f:
            f.write(r.content)
        if verbose:
            rich.print(f"[green]Protein database downloaded to {self.config.protein_db}")

    def download_reaction_database(self,verbose:bool=True)->None:
        """
        This function will download the reaction database from the remote repository.

        Required Configs:
            - config.adtoolbox_rxn_db_url
            - config.csv_reaction_db

        Args:
            verbose (bool, optional): Whether to print the progress or not. Defaults to True.

        Examples:
            >>> import os
            >>> assert os.path.exists(os.path.join(Main_Dir,"reaction_test_db.csv"))==False
            >>> db=Database(config=configs.Database(csv_reaction_db=os.path.join(Main_Dir,"reaction_test_db.csv")))
            >>> db.download_reaction_database(verbose=False)
            >>> assert os.path.exists(os.path.join(Main_Dir,"reaction_test_db.csv"))==True
            >>> assert os.path.getsize(os.path.join(Main_Dir,"reaction_test_db.csv"))>0
            >>> os.remove(os.path.join(Main_Dir,"reaction_test_db.csv"))
        """

        r = requests.get(self.config.adtoolbox_rxn_db_url, allow_redirects=True)

        if not os.path.exists(Path(self.config.csv_reaction_db).parent):
            os.makedirs(Path(self.config.csv_reaction_db).parent)

        with open(self.config.csv_reaction_db, 'wb') as f:
            f.write(r.content)
        if verbose:
            rich.print(f"[green]Reaction database downloaded to {self.config.csv_reaction_db}")


    def download_feed_database(self,verbose:bool=True)-> None:
        """
        This function will download the feed database from the remote repository.

        Required Configs:
            - config.feed_db_url
            - config.feed_db

        Args:
            verbose (bool, optional): Whether to print the progress or not. Defaults to True.

        Examples:
            >>> import os
            >>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==False
            >>> db=Database(config=configs.Database(feed_db=os.path.join(Main_Dir,"feed_test_db.tsv")))
            >>> db.download_feed_database(verbose=False)
            >>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==True
            >>> assert os.path.getsize(os.path.join(Main_Dir,"feed_test_db.tsv"))>0
            >>> os.remove(os.path.join(Main_Dir,"feed_test_db.tsv"))
        """
        r = requests.get(self.config.feed_db_url, allow_redirects=True)

        if not os.path.exists(Path(self.config.feed_db).parent):
            os.makedirs(Path(self.config.feed_db).parent)

        with open(self.config.feed_db, 'wb') as f:
            f.write(r.content)
        if verbose:
            rich.print(f"[green]Feed database downloaded to {self.config.feed_db}")

    def download_qiime_classifier_db(self,verbose:bool=True)->None:
        r = requests.get(self.config.qiime_classifier_db_url, allow_redirects=True,stream=True)
        block_size = 1024
        total_size = int(r.headers.get('content-length', 0))
        if not os.path.exists(Path(self.config.qiime_classifier_db).parent):
            os.makedirs(Path(self.config.qiime_classifier_db).parent)
        with open(self.config.qiime_classifier_db, 'wb') as f:
            with Progress() as progress:
                task = progress.add_task("Downloading the qiime's classifier database:", total=total_size)
                for data in r.iter_content(block_size):
                    progress.update(task, advance=len(data))
                    f.write(data)
        if verbose:
            rich.print(f"[green]Qiime's classifier database downloaded to {self.config.qiime_classifier_db}")

    def download_studies_database(self,verbose:bool=True)->None:
        """
        This function will download the required files for studies functionality.

        Args:
            verbode (bool, optional): Whether to print the progress or not. Defaults to True.

        Examples:
            >>> import os
            >>> assert os.path.exists(os.path.join(Main_Dir,"studies_test_db.tsv"))==False
            >>> db=Database(config=configs.Database(studies_db=os.path.join(Main_Dir,"studies_test_db.tsv")))
            >>> db.download_studies_database(verbose=False)
            >>> assert os.path.exists(os.path.join(Main_Dir,"studies_test_db.tsv"))==True
            >>> assert os.path.getsize(os.path.join(Main_Dir,"studies_test_db.tsv"))>0
            >>> os.remove(os.path.join(Main_Dir,"studies_test_db.tsv"))
        """
        for i in self.config.studies_remote:
            r = requests.get(self.config.studies_remote[i], allow_redirects=True)
            if not os.path.exists(Path(self.config.studies_local[i]).parent):
                os.makedirs(Path(self.config.studies_local[i]).parent)
            with open(self.config.studies_local[i], 'wb') as f:
                f.write(r.content)

            if verbose:
                rich.print(f"[bold green]Downloaded {self.config.studies_remote[i]}[/bold green]")

    def download_amplicon_to_genome_db(self,verbose:bool=True):
        """
        This function will automatically download the GTDB-tk database for genome assignment.

        Required Configs:
            - config.amplicon_to_genome_db
            - config.amplicon_to_genome_urls

        Args:
            verbose (bool, optional): Whether to print the progress or not. Defaults to True.

        Examples:
            >>> import os
            >>> assert os.path.exists(os.path.join(Main_Dir,"amplicon_to_genome_test_db"))==False
            >>> db=Database(config=configs.Database(amplicon_to_genome_db=os.path.join(Main_Dir,"amplicon_to_genome_test_db")))
            >>> db.download_amplicon_to_genome_db(verbose=False)
            >>> assert os.path.exists(os.path.join(Main_Dir,"amplicon_to_genome_test_db"))==True
            >>> assert len(os.listdir(os.path.join(Main_Dir,"amplicon_to_genome_test_db")))>0
            >>> os.system("rm -r "+os.path.join(Main_Dir,"amplicon_to_genome_test_db"))
            0
        """
        if not os.path.exists(self.config.amplicon_to_genome_db):
            os.mkdir(self.config.amplicon_to_genome_db)

        url = self.config.amplicon_to_genome_urls
        if verbose:
            for keys in ['Version', 'MD5SUM', 'FILE_DESCRIPTIONS']:
                with requests.get(url[keys], allow_redirects=True, stream=True) as r:
                    total_size = int(r.headers.get('content-length', 0))
                    block_size = 1024
                    with Progress() as progress:
                        task1 = progress.add_task("Downloading " + keys, total=total_size)
                        with open(os.path.join(self.config.amplicon_to_genome_db, keys), 'wb') as f:
                            for data in r.iter_content(block_size):
                                progress.update(task1, advance=len(data))
                                f.write(data)
            with requests.get(url['metadata_field_desc'], allow_redirects=True, stream=True) as r:
                total_size = int(r.headers.get('content-length', 0))
                block_size = 1024
                with Progress() as progress:
                    task1 = progress.add_task("Downloading metadata_field_desc.tsv", total=total_size)
                    with open(os.path.join(self.config.amplicon_to_genome_db, 'metadata_field_desc.tsv'), 'wb') as f:
                        for data in r.iter_content(block_size):
                            progress.update(task1, advance=len(data))
                            f.write(data)

            for keys in ['bac120_ssu']:
                with requests.get(url[keys], allow_redirects=True, stream=True) as r:
                    total_size = int(r.headers.get('content-length', 0))
                    block_size = 1024
                    with Progress() as progress:
                        task1 = progress.add_task("Downloading " + keys, total=total_size)
                        with open(os.path.join(self.config.amplicon_to_genome_db, url[keys].split("/")[-1]), 'wb') as f:
                            for data in r.iter_content(block_size):
                                progress.update(task1, advance=len(data))
                                f.write(data)
                with tarfile.open(os.path.join(self.config.amplicon_to_genome_db, url[keys].split("/")[-1])) as f_in:
                    f_in.extractall(self.config.amplicon_to_genome_db)


                os.remove(os.path.join(self.config.amplicon_to_genome_db, url[keys].split("/")[-1]))
        else:
            for keys in ['Version', 'MD5SUM', 'FILE_DESCRIPTIONS']:
                with requests.get(url[keys], allow_redirects=True, stream=False) as r:
                    with open(os.path.join(self.config.amplicon_to_genome_db, keys), 'wb') as f:
                        f.write(r.content)
            with requests.get(url['metadata_field_desc'], allow_redirects=True, stream=False) as r:
                with open(os.path.join(self.config.amplicon_to_genome_db, 'metadata_field_desc.tsv'), 'wb') as f:
                    f.write(r.content)
            for keys in [ 'bac120_ssu']:
                with requests.get(url[keys], allow_redirects=True, stream=False) as r:
                    with open(os.path.join(self.config.amplicon_to_genome_db, url[keys].split("/")[-1]), 'wb') as f:
                        f.write(r.content)
                with tarfile.open(os.path.join(self.config.amplicon_to_genome_db, url[keys].split("/")[-1])) as f_in:
                    f_in.extractall(self.config.amplicon_to_genome_db)
        if verbose:
            rich.print("[bold green]Downloaded all the required files for Amplicon to Genome functionality.[/bold green]")




    def download_all_databases(self,verbose:bool=True)->None:
        """
        This function will download all the required databases for all the functionalities of ADToolbox.
        NOTE: each method that this function calls is individually tested so it is skipped from testing!

        Args:
            verbose (bool, optional): Whether to print the progress or not. Defaults to True.

        Required Configs:
            - config.adm_parameters_base_dir
            - config.adm_parameters_urls
            - config.seed_rxn_url
            - config.seed_compound_url
            - config.reaction_db
            - config.compound_db
            - config.protein_db_url
            - config.protein_db
            - config.adtoolbox_rxn_db_url
            - config.csv_reaction_db
            - config.feed_db_url
            - config.feed_db
            - config.amplicon_to_genome_db
            - config.amplicon_to_genome_urls
            - config.qiime_classifier_db_url
            - config.qiime_classifier_db
            - config.studies_db
            - config.studies_urls

        Examples:
            >>> import os # doctest: +SKIP
            >>> db=Database(config=configs.Database()) # doctest: +SKIP
            >>> db.download_all_databases(verbose=False) # doctest: +SKIP

        """

        self.download_seed_databases(verbose=verbose)
        self.download_adm_parameters(verbose=verbose)
        self.download_protein_database(verbose=verbose)
        self.download_reaction_database(verbose=verbose)
        self.download_feed_database(verbose=verbose)
        self.download_studies_database(verbose=verbose)
        self.download_amplicon_to_genome_db(verbose=verbose)
        self.download_qiime_classifier_db(verbose=verbose)

add_experiment_to_experiments_db(experiment)

This function adds an experiment to the experiments database. It takes an experiment and adds it to the experiments database.

Required Configs
  • config.experimental_data_db

Parameters:

Name Type Description Default
experiment Experiment

An instance of the Experiment class.

required

Examples:

>>> import os,json
>>> assert os.path.exists(os.path.join(Main_Dir,"experiments_test_db.tsv"))==False
>>> db=Database(config=configs.Database(experimental_data_db=os.path.join(Main_Dir,"experiments_test_db.json")))
>>> experiment=Experiment(name="test_study",time=[0,1,2],variables=[2,6],data= [[1,2,3],[4,5,6]],reference="test")
>>> db.add_experiment_to_experiments_db(experiment)
>>> assert os.path.exists(os.path.join(Main_Dir,"experiments_test_db.json"))==True
>>> assert os.path.getsize(os.path.join(Main_Dir,"experiments_test_db.json"))>0
>>> os.remove(os.path.join(Main_Dir,"experiments_test_db.json"))
Source code in adtoolbox/core.py
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
def add_experiment_to_experiments_db(self,experiment:Experiment)->None:
    r"""
    This function adds an experiment to the experiments database. It takes an experiment and adds it to the experiments database.

    Required Configs:
        - config.experimental_data_db

    Args:
        experiment (Experiment): An instance of the Experiment class.

    Examples:
        >>> import os,json
        >>> assert os.path.exists(os.path.join(Main_Dir,"experiments_test_db.tsv"))==False
        >>> db=Database(config=configs.Database(experimental_data_db=os.path.join(Main_Dir,"experiments_test_db.json")))
        >>> experiment=Experiment(name="test_study",time=[0,1,2],variables=[2,6],data= [[1,2,3],[4,5,6]],reference="test")
        >>> db.add_experiment_to_experiments_db(experiment)
        >>> assert os.path.exists(os.path.join(Main_Dir,"experiments_test_db.json"))==True
        >>> assert os.path.getsize(os.path.join(Main_Dir,"experiments_test_db.json"))>0
        >>> os.remove(os.path.join(Main_Dir,"experiments_test_db.json"))
    """
    if not os.path.exists(self.config.experimental_data_db):
        self.initialize_experimental_data_db()

    if experiment.name in [experiment.name for experiment in self.get_experiment_from_experiments_db("name",experiment.name)]: 
        raise ValueError("Experiment already exists in the database!")

    with open(self.config.experimental_data_db,"r") as f:
        experiments_db=json.load(f)
    experiments_db.append(experiment.to_dict())
    with open(self.config.experimental_data_db,"w") as f:
        json.dump(experiments_db,f)

add_feed_to_feed_db(feed)

This function adds a feed to the feed database. It takes the feed name and the feed composition and adds them to the feed database.

Required Configs
  • config.feed_db

Parameters:

Name Type Description Default
feed Feed

An instance of the Feed class.

required

Examples:

>>> import os
>>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==False
>>> db=Database(config=configs.Database(feed_db=os.path.join(Main_Dir,"feed_test_db.tsv")))
>>> feed=Feed(name="test_feed",carbohydrates=10,lipids=20,proteins=30,tss=80,si=10,xi=30,reference="test")
>>> db.add_feed_to_feed_db(feed)
>>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==True
>>> assert pd.read_table(os.path.join(Main_Dir,"feed_test_db.tsv"),delimiter="\t").shape[0]>0
>>> os.remove(os.path.join(Main_Dir,"feed_test_db.tsv"))
Source code in adtoolbox/core.py
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
def add_feed_to_feed_db(self,feed:Feed)->None:
    r"""
    This function adds a feed to the feed database. It takes the feed name and the feed composition and adds them to the feed database.

    Required Configs:
        - config.feed_db

    Args:
        feed (Feed): An instance of the Feed class.

    Examples:
        >>> import os
        >>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==False
        >>> db=Database(config=configs.Database(feed_db=os.path.join(Main_Dir,"feed_test_db.tsv")))
        >>> feed=Feed(name="test_feed",carbohydrates=10,lipids=20,proteins=30,tss=80,si=10,xi=30,reference="test")
        >>> db.add_feed_to_feed_db(feed)
        >>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==True
        >>> assert pd.read_table(os.path.join(Main_Dir,"feed_test_db.tsv"),delimiter="\t").shape[0]>0
        >>> os.remove(os.path.join(Main_Dir,"feed_test_db.tsv"))

    """
    if not os.path.exists(self.config.feed_db):
        self.initialize_feed_db()

    if feed.name in pd.read_table(self.config.feed_db,delimiter="\t")["name"].values:
        raise ValueError("Feed already exists in the database.")
    feed_db=pd.read_table(self.config.feed_db,delimiter="\t")
    feed_db=pd.concat([feed_db,pd.DataFrame([feed.to_dict()])],ignore_index=True,axis=0)
    feed_db.to_csv(self.config.feed_db,index=False,sep="\t")

add_metagenomics_study_to_metagenomics_studies_db(metagenomics_study)

This function adds a metagenomics study to the metagenomics studies database. It takes a metagenomics study and adds it to the metagenomics studies database.

Required Configs
  • config.metagenomics_studies_db

Parameters:

Name Type Description Default
metagenomics_study MetagenomicsStudy

An instance of the MetagenomicsStudy class.

required

Examples:

>>> import os
>>> assert os.path.exists(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))==False
>>> db=Database(config=configs.Database(metagenomics_studies_db=os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv")))
>>> metagenomics_study=MetagenomicsStudy(name="test_study",study_type="metagenomics",microbiome="anaerobic digester",sample_accession="test",comments="test",study_accession="test")
>>> db.add_metagenomics_study_to_metagenomics_studies_db(metagenomics_study)
>>> assert os.path.exists(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))==True
>>> assert pd.read_table(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"),delimiter="\t").shape[0]>0
>>> os.remove(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))
Source code in adtoolbox/core.py
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
def add_metagenomics_study_to_metagenomics_studies_db(self,metagenomics_study:MetagenomicsStudy)->None:
    r"""
    This function adds a metagenomics study to the metagenomics studies database. It takes a metagenomics study and adds it to the metagenomics studies database.

    Required Configs:
        - config.metagenomics_studies_db

    Args:
        metagenomics_study (MetagenomicsStudy): An instance of the MetagenomicsStudy class.

    Examples:
        >>> import os
        >>> assert os.path.exists(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))==False
        >>> db=Database(config=configs.Database(metagenomics_studies_db=os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv")))
        >>> metagenomics_study=MetagenomicsStudy(name="test_study",study_type="metagenomics",microbiome="anaerobic digester",sample_accession="test",comments="test",study_accession="test")
        >>> db.add_metagenomics_study_to_metagenomics_studies_db(metagenomics_study)
        >>> assert os.path.exists(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))==True
        >>> assert pd.read_table(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"),delimiter="\t").shape[0]>0
        >>> os.remove(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))
    """
    if not os.path.exists(self.config.metagenomics_studies_db):
        self.initialize_metagenomics_studies_db()
    metagenomics_studies_db=pd.read_table(self.config.metagenomics_studies_db,delimiter="\t")
    metagenomics_studies_db=pd.concat([metagenomics_studies_db,pd.DataFrame([metagenomics_study.to_dict()])],ignore_index=True,axis=0)
    metagenomics_studies_db.to_csv(self.config.metagenomics_studies_db,index=False,sep="\t")

add_protein_to_protein_db(protein_id, header_tail)

This funciton adds a protein sequence to the protein database. It takes a uniprot id and an EC number it is assigned to and adds the corresponding protein sequence to the protein database.

Required Configs
  • config.protein_db

Parameters:

Name Type Description Default
protein_id str

The uniprot id of the protein.

required
header_tail str

A text to append to the header of the entry in the database;

required

Examples:

>>> import os
>>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==False
>>> db=Database(config=configs.Database(protein_db=os.path.join(Main_Dir,"protein_test_db.fasta")))
>>> db.add_protein_to_protein_db("P0A9P0","1.2.3.4")
>>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==True
>>> assert os.path.getsize(os.path.join(Main_Dir,"protein_test_db.fasta"))>0
>>> import utils
>>> assert len(utils.fasta_to_dict(os.path.join(Main_Dir,"protein_test_db.fasta")))>0
>>> os.remove(os.path.join(Main_Dir,"protein_test_db.fasta"))
Source code in adtoolbox/core.py
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
def add_protein_to_protein_db(self, protein_id:str, header_tail:str)->None:
    """
    This funciton adds a protein sequence to the protein database. It takes a uniprot id and an EC number it is assigned to 
    and adds the corresponding protein sequence to the protein database.

    Required Configs:
        - config.protein_db

    Args:
        protein_id (str): The uniprot id of the protein.
        header_tail (str): A text to append to the header of the entry in the database;
        In ADToolbox it is better to use ec number for compatibility with downstream functions.


    Examples:
        >>> import os
        >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==False
        >>> db=Database(config=configs.Database(protein_db=os.path.join(Main_Dir,"protein_test_db.fasta")))
        >>> db.add_protein_to_protein_db("P0A9P0","1.2.3.4")
        >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==True
        >>> assert os.path.getsize(os.path.join(Main_Dir,"protein_test_db.fasta"))>0
        >>> import utils
        >>> assert len(utils.fasta_to_dict(os.path.join(Main_Dir,"protein_test_db.fasta")))>0
        >>> os.remove(os.path.join(Main_Dir,"protein_test_db.fasta"))
    """
    if not os.path.exists(self.config.protein_db):
        self.initialize_protein_db()
    with open(self.config.protein_db,"a") as f:
        f.write(">"+protein_id+"|"+header_tail+"\n")
        f.write(self.get_protein_seqs_from_uniprot(protein_id)+"\n")

add_proteins_from_ecnumbers_to_protein_db(ec_numbers)

This function adds protein sequences to the protein database from a list of EC numbers. It takes a list of EC numbers and finds the protein sequences for each EC number in the list. Then it saves the protein sequences in a fasta file.

Required Configs
  • config.protein_db

Parameters:

Name Type Description Default
ec_numbers list

A list of EC numbers.

required

Examples:

>>> import os
>>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==False
>>> db=Database(config=configs.Database(protein_db=os.path.join(Main_Dir,"protein_test_db.fasta")))
>>> db.add_proteins_from_ecnumbers_to_protein_db(["1.1.1.1","1.1.1.2"])
>>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==True
>>> import utils
>>> assert len(utils.fasta_to_dict(os.path.join(Main_Dir,"protein_test_db.fasta")))>0
>>> os.remove(os.path.join(Main_Dir,"protein_test_db.fasta"))
Source code in adtoolbox/core.py
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
def add_proteins_from_ecnumbers_to_protein_db(self, ec_numbers:list)->None:
    """
    This function adds protein sequences to the protein database from a list of EC numbers.
    It takes a list of EC numbers and finds the protein sequences for each EC number in the list.
    Then it saves the protein sequences in a fasta file.

    Required Configs:
        - config.protein_db

    Args:
        ec_numbers (list): A list of EC numbers.

    Examples:
        >>> import os
        >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==False
        >>> db=Database(config=configs.Database(protein_db=os.path.join(Main_Dir,"protein_test_db.fasta")))
        >>> db.add_proteins_from_ecnumbers_to_protein_db(["1.1.1.1","1.1.1.2"])
        >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==True
        >>> import utils
        >>> assert len(utils.fasta_to_dict(os.path.join(Main_Dir,"protein_test_db.fasta")))>0
        >>> os.remove(os.path.join(Main_Dir,"protein_test_db.fasta"))
    """
    if not os.path.exists(self.config.protein_db):
        self.initialize_protein_db()

    protein_seqs={}
    for ec in ec_numbers:
        protein_seqs.update(self.proteins_from_ec(ec))

    with open(self.config.protein_db,"a") as f:
        for key,value in protein_seqs.items():
            f.write(">"+key+"\n")
            f.write(value+"\n")

build_mmseqs_database(container='None')

Builds an indexed mmseqs database from the ADToolbox's fasta protein database.

Required Configs
  • config.protein_db
  • config.adtoolbox_singularity
  • config.adtoolbox_docker

Parameters:

Name Type Description Default
container str

The container to run the script with. Defaults to "None".

'None'

Returns:

Name Type Description
str str

The script to build the mmseqs database.

str

Examples:

>>> import os
>>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==False
>>> db=Database(config=configs.Database(protein_db=os.path.join(Main_Dir,"protein_test_db.fasta")))
>>> db.add_protein_to_protein_db("P0A9P0","x,x,x,x")
>>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==True
>>> assert os.path.getsize(os.path.join(Main_Dir,"protein_test_db.fasta"))>0
>>> script=db.build_mmseqs_database()
>>> assert script=="mmseqs createdb "+str(os.path.join(Main_Dir,"protein_test_db.fasta"))+" "+str(db.config.protein_db_mmseqs)
>>> os.remove(os.path.join(Main_Dir,"protein_test_db.fasta"))
Source code in adtoolbox/core.py
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
def build_mmseqs_database(self,container:str="None")->str:
    """Builds an indexed mmseqs database from the ADToolbox's fasta protein database.

    Required Configs:
        - config.protein_db
        - config.adtoolbox_singularity
        - config.adtoolbox_docker

    Args:
        container (str, optional): The container to run the script with. Defaults to "None".
    Returns:
        str: The script to build the mmseqs database.

    Examples:
        >>> import os
        >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==False
        >>> db=Database(config=configs.Database(protein_db=os.path.join(Main_Dir,"protein_test_db.fasta")))
        >>> db.add_protein_to_protein_db("P0A9P0","x,x,x,x")
        >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==True
        >>> assert os.path.getsize(os.path.join(Main_Dir,"protein_test_db.fasta"))>0
        >>> script=db.build_mmseqs_database()
        >>> assert script=="mmseqs createdb "+str(os.path.join(Main_Dir,"protein_test_db.fasta"))+" "+str(db.config.protein_db_mmseqs)
        >>> os.remove(os.path.join(Main_Dir,"protein_test_db.fasta"))

    """
    script=create_mmseqs_database(self.config.protein_db,
                                  self.config.protein_db_mmseqs,
                                  container=container,
                                  run=False,
                                  config=self.config)

    if container=="None":
        pass

    elif container=="singularity":
        script=f"singularity exec --bind {self.config.protein_db}:{self.config.protein_db},{self.config.protein_db_mmseqs}:{self.config.protein_db_mmseqs} {self.config.adtoolbox_singularity} {script}"

    elif container=="docker":
        script=f"docker run -v {self.config.protein_db}:{self.config.protein_db} -v {self.config.protein_db_mmseqs}:{self.config.protein_db_mmseqs} {self.config.adtoolbox_docker} {script}"

    else:
        raise ValueError("Container should be either None, singularity or docker!")

    return script

build_protein_db_from_reactions_db()

This function builds the protein database from the reaction database. It takes the reaction database and finds the protein sequences for each EC number in the reaction database. Then it saves the protein sequences in a fasta file.

Required Configs

- config.reaction_db

- config.protein_db

Examples:

>>> import os
>>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==False
>>> assert os.path.exists(os.path.join(Main_Dir,"reaction_test_db.tsv"))==False
>>> db=Database(config=configs.Database(protein_db=os.path.join(Main_Dir,"protein_test_db.fasta"),reaction_db=os.path.join(Main_Dir,"reaction_test_db.tsv")))
>>> reaction_db=pd.DataFrame(columns=["EC_Numbers","Seed Ids","Reaction Names","ADM1_Reaction","e_adm_Reactions","Pathways"])
>>> reaction_db.loc[0,"EC_Numbers"]="1.1.1.1"
>>> reaction_db.to_csv(os.path.join(Main_Dir,"reaction_test_db.tsv"),index=False,sep="\t")
>>> db.build_protein_db_from_reactions_db()
>>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==True
>>> assert os.path.exists(os.path.join(Main_Dir,"reaction_test_db.tsv"))==True
>>> assert os.path.getsize(os.path.join(Main_Dir,"protein_test_db.fasta"))>0
>>> os.remove(os.path.join(Main_Dir,"protein_test_db.fasta"))
>>> os.remove(os.path.join(Main_Dir,"reaction_test_db.tsv"))
Source code in adtoolbox/core.py
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
def build_protein_db_from_reactions_db(self):
    r"""
    This function builds the protein database from the reaction database.
    It takes the reaction database and finds the protein sequences for each EC number in the reaction database.
    Then it saves the protein sequences in a fasta file.

    Required Configs:
        - config.reaction_db
        --------
        - config.protein_db
        --------

    Examples:
        >>> import os
        >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==False
        >>> assert os.path.exists(os.path.join(Main_Dir,"reaction_test_db.tsv"))==False
        >>> db=Database(config=configs.Database(protein_db=os.path.join(Main_Dir,"protein_test_db.fasta"),reaction_db=os.path.join(Main_Dir,"reaction_test_db.tsv")))
        >>> reaction_db=pd.DataFrame(columns=["EC_Numbers","Seed Ids","Reaction Names","ADM1_Reaction","e_adm_Reactions","Pathways"])
        >>> reaction_db.loc[0,"EC_Numbers"]="1.1.1.1"
        >>> reaction_db.to_csv(os.path.join(Main_Dir,"reaction_test_db.tsv"),index=False,sep="\t")
        >>> db.build_protein_db_from_reactions_db()
        >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==True
        >>> assert os.path.exists(os.path.join(Main_Dir,"reaction_test_db.tsv"))==True
        >>> assert os.path.getsize(os.path.join(Main_Dir,"protein_test_db.fasta"))>0
        >>> os.remove(os.path.join(Main_Dir,"protein_test_db.fasta"))
        >>> os.remove(os.path.join(Main_Dir,"reaction_test_db.tsv"))
    """
    rxn_db=pd.read_table(self.config.reaction_db,delimiter="\t")
    ec_numbers=rxn_db["EC_Numbers"]
    ec_numbers=list(set(ec_numbers))
    protein_seqs={}
    for ec in ec_numbers:
        protein_seqs.update(self.proteins_from_ec(ec))
    with open(self.config.protein_db,"w") as f:
        for key,value in protein_seqs.items():
            f.write(">"+key+"\n")
            f.write(value+"\n")

cazy_ec()

This method returns a list of EC numbers that are extracted from the Cazy website. This method is useful for adding more carbohydrate metabolism reactions to the reaction database.

Returns:

Name Type Description
list list

A list of EC numbers for carbohydrate metabolism found on CAZy database.

Examples:

>>> db=Database()
>>> ec_list=db.cazy_ec()
>>> assert len(ec_list)>0
Source code in adtoolbox/core.py
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
def cazy_ec(self)->list:
    """
    This method returns a list of EC numbers that are extracted from the Cazy website.
    This method is useful for adding more carbohydrate metabolism reactions to the reaction database.

    Returns:
        list: A list of EC numbers for carbohydrate metabolism found on CAZy database.

    Examples:
        >>> db=Database()
        >>> ec_list=db.cazy_ec()
        >>> assert len(ec_list)>0
    """

    ec_list = []
    for link in self.config.cazy_links:
        page = requests.get(link)
        soup = BeautifulSoup(page.content, "html.parser")
        results = soup.find("div", class_="cadre_principal").find_all(
            "th", class_="thec")
        for ec_number in results:
            if '-' not in ec_number.text.strip() and '.' in ec_number.text.strip():
                ec_list.append(ec_number.text.strip())

    return ec_list

download_adm_parameters(verbose=True)

Downloads the parameters needed for running ADM models in ADToolbox.

Required Configs
  • config.adm_parameters_base_dir
  • config.adm_parameters_urls

Examples:

>>> import os
>>> assert os.path.exists(os.path.join(Main_Dir,"adm_parameters_test"))==False
>>> db=Database(config=configs.Database(adm_parameters_base_dir=os.path.join(Main_Dir,"adm_parameters_test")))
>>> db.download_adm_parameters(verbose=False) 
>>> assert os.path.exists(os.path.join(Main_Dir,"adm_parameters_test"))==True
>>> assert len(os.listdir(os.path.join(Main_Dir,"adm_parameters_test")))==12
>>> os.system("rm -r "+os.path.join(Main_Dir,"adm_parameters_test"))
0

Parameters:

Name Type Description Default
verbose bool

Whether to print the progress or not. Defaults to True.

True
Source code in adtoolbox/core.py
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
def download_adm_parameters(self,verbose:bool=True)->None:
    """
    Downloads the parameters needed for running ADM models in ADToolbox.

    Required Configs:
        - config.adm_parameters_base_dir
        - config.adm_parameters_urls

    Examples:
        >>> import os
        >>> assert os.path.exists(os.path.join(Main_Dir,"adm_parameters_test"))==False
        >>> db=Database(config=configs.Database(adm_parameters_base_dir=os.path.join(Main_Dir,"adm_parameters_test")))
        >>> db.download_adm_parameters(verbose=False) 
        >>> assert os.path.exists(os.path.join(Main_Dir,"adm_parameters_test"))==True
        >>> assert len(os.listdir(os.path.join(Main_Dir,"adm_parameters_test")))==12
        >>> os.system("rm -r "+os.path.join(Main_Dir,"adm_parameters_test"))
        0

    Args:

        verbose (bool, optional): Whether to print the progress or not. Defaults to True.


    """
    for param in self.config.adm_parameters.keys():
        if not pathlib.Path(self.config.adm_parameters[param]).parent.exists():
            os.makedirs(pathlib.Path(self.config.adm_parameters[param]).parent)
        r = requests.get(self.config.adm_parameters_urls[param], allow_redirects=True)
        with open(self.config.adm_parameters[param], 'wb') as f:
            f.write(r.content)
        if verbose:
            rich.print(f"[green]{param} downloaded to {self.config.adm_parameters[param]}")

download_all_databases(verbose=True)

This function will download all the required databases for all the functionalities of ADToolbox. NOTE: each method that this function calls is individually tested so it is skipped from testing!

Parameters:

Name Type Description Default
verbose bool

Whether to print the progress or not. Defaults to True.

True
Required Configs
  • config.adm_parameters_base_dir
  • config.adm_parameters_urls
  • config.seed_rxn_url
  • config.seed_compound_url
  • config.reaction_db
  • config.compound_db
  • config.protein_db_url
  • config.protein_db
  • config.adtoolbox_rxn_db_url
  • config.csv_reaction_db
  • config.feed_db_url
  • config.feed_db
  • config.amplicon_to_genome_db
  • config.amplicon_to_genome_urls
  • config.qiime_classifier_db_url
  • config.qiime_classifier_db
  • config.studies_db
  • config.studies_urls

Examples:

>>> import os
>>> db=Database(config=configs.Database())
>>> db.download_all_databases(verbose=False)
Source code in adtoolbox/core.py
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
def download_all_databases(self,verbose:bool=True)->None:
    """
    This function will download all the required databases for all the functionalities of ADToolbox.
    NOTE: each method that this function calls is individually tested so it is skipped from testing!

    Args:
        verbose (bool, optional): Whether to print the progress or not. Defaults to True.

    Required Configs:
        - config.adm_parameters_base_dir
        - config.adm_parameters_urls
        - config.seed_rxn_url
        - config.seed_compound_url
        - config.reaction_db
        - config.compound_db
        - config.protein_db_url
        - config.protein_db
        - config.adtoolbox_rxn_db_url
        - config.csv_reaction_db
        - config.feed_db_url
        - config.feed_db
        - config.amplicon_to_genome_db
        - config.amplicon_to_genome_urls
        - config.qiime_classifier_db_url
        - config.qiime_classifier_db
        - config.studies_db
        - config.studies_urls

    Examples:
        >>> import os # doctest: +SKIP
        >>> db=Database(config=configs.Database()) # doctest: +SKIP
        >>> db.download_all_databases(verbose=False) # doctest: +SKIP

    """

    self.download_seed_databases(verbose=verbose)
    self.download_adm_parameters(verbose=verbose)
    self.download_protein_database(verbose=verbose)
    self.download_reaction_database(verbose=verbose)
    self.download_feed_database(verbose=verbose)
    self.download_studies_database(verbose=verbose)
    self.download_amplicon_to_genome_db(verbose=verbose)
    self.download_qiime_classifier_db(verbose=verbose)

download_amplicon_to_genome_db(verbose=True)

This function will automatically download the GTDB-tk database for genome assignment.

Required Configs
  • config.amplicon_to_genome_db
  • config.amplicon_to_genome_urls

Parameters:

Name Type Description Default
verbose bool

Whether to print the progress or not. Defaults to True.

True

Examples:

>>> import os
>>> assert os.path.exists(os.path.join(Main_Dir,"amplicon_to_genome_test_db"))==False
>>> db=Database(config=configs.Database(amplicon_to_genome_db=os.path.join(Main_Dir,"amplicon_to_genome_test_db")))
>>> db.download_amplicon_to_genome_db(verbose=False)
>>> assert os.path.exists(os.path.join(Main_Dir,"amplicon_to_genome_test_db"))==True
>>> assert len(os.listdir(os.path.join(Main_Dir,"amplicon_to_genome_test_db")))>0
>>> os.system("rm -r "+os.path.join(Main_Dir,"amplicon_to_genome_test_db"))
0
Source code in adtoolbox/core.py
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
def download_amplicon_to_genome_db(self,verbose:bool=True):
    """
    This function will automatically download the GTDB-tk database for genome assignment.

    Required Configs:
        - config.amplicon_to_genome_db
        - config.amplicon_to_genome_urls

    Args:
        verbose (bool, optional): Whether to print the progress or not. Defaults to True.

    Examples:
        >>> import os
        >>> assert os.path.exists(os.path.join(Main_Dir,"amplicon_to_genome_test_db"))==False
        >>> db=Database(config=configs.Database(amplicon_to_genome_db=os.path.join(Main_Dir,"amplicon_to_genome_test_db")))
        >>> db.download_amplicon_to_genome_db(verbose=False)
        >>> assert os.path.exists(os.path.join(Main_Dir,"amplicon_to_genome_test_db"))==True
        >>> assert len(os.listdir(os.path.join(Main_Dir,"amplicon_to_genome_test_db")))>0
        >>> os.system("rm -r "+os.path.join(Main_Dir,"amplicon_to_genome_test_db"))
        0
    """
    if not os.path.exists(self.config.amplicon_to_genome_db):
        os.mkdir(self.config.amplicon_to_genome_db)

    url = self.config.amplicon_to_genome_urls
    if verbose:
        for keys in ['Version', 'MD5SUM', 'FILE_DESCRIPTIONS']:
            with requests.get(url[keys], allow_redirects=True, stream=True) as r:
                total_size = int(r.headers.get('content-length', 0))
                block_size = 1024
                with Progress() as progress:
                    task1 = progress.add_task("Downloading " + keys, total=total_size)
                    with open(os.path.join(self.config.amplicon_to_genome_db, keys), 'wb') as f:
                        for data in r.iter_content(block_size):
                            progress.update(task1, advance=len(data))
                            f.write(data)
        with requests.get(url['metadata_field_desc'], allow_redirects=True, stream=True) as r:
            total_size = int(r.headers.get('content-length', 0))
            block_size = 1024
            with Progress() as progress:
                task1 = progress.add_task("Downloading metadata_field_desc.tsv", total=total_size)
                with open(os.path.join(self.config.amplicon_to_genome_db, 'metadata_field_desc.tsv'), 'wb') as f:
                    for data in r.iter_content(block_size):
                        progress.update(task1, advance=len(data))
                        f.write(data)

        for keys in ['bac120_ssu']:
            with requests.get(url[keys], allow_redirects=True, stream=True) as r:
                total_size = int(r.headers.get('content-length', 0))
                block_size = 1024
                with Progress() as progress:
                    task1 = progress.add_task("Downloading " + keys, total=total_size)
                    with open(os.path.join(self.config.amplicon_to_genome_db, url[keys].split("/")[-1]), 'wb') as f:
                        for data in r.iter_content(block_size):
                            progress.update(task1, advance=len(data))
                            f.write(data)
            with tarfile.open(os.path.join(self.config.amplicon_to_genome_db, url[keys].split("/")[-1])) as f_in:
                f_in.extractall(self.config.amplicon_to_genome_db)


            os.remove(os.path.join(self.config.amplicon_to_genome_db, url[keys].split("/")[-1]))
    else:
        for keys in ['Version', 'MD5SUM', 'FILE_DESCRIPTIONS']:
            with requests.get(url[keys], allow_redirects=True, stream=False) as r:
                with open(os.path.join(self.config.amplicon_to_genome_db, keys), 'wb') as f:
                    f.write(r.content)
        with requests.get(url['metadata_field_desc'], allow_redirects=True, stream=False) as r:
            with open(os.path.join(self.config.amplicon_to_genome_db, 'metadata_field_desc.tsv'), 'wb') as f:
                f.write(r.content)
        for keys in [ 'bac120_ssu']:
            with requests.get(url[keys], allow_redirects=True, stream=False) as r:
                with open(os.path.join(self.config.amplicon_to_genome_db, url[keys].split("/")[-1]), 'wb') as f:
                    f.write(r.content)
            with tarfile.open(os.path.join(self.config.amplicon_to_genome_db, url[keys].split("/")[-1])) as f_in:
                f_in.extractall(self.config.amplicon_to_genome_db)
    if verbose:
        rich.print("[bold green]Downloaded all the required files for Amplicon to Genome functionality.[/bold green]")

download_feed_database(verbose=True)

This function will download the feed database from the remote repository.

Required Configs
  • config.feed_db_url
  • config.feed_db

Parameters:

Name Type Description Default
verbose bool

Whether to print the progress or not. Defaults to True.

True

Examples:

>>> import os
>>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==False
>>> db=Database(config=configs.Database(feed_db=os.path.join(Main_Dir,"feed_test_db.tsv")))
>>> db.download_feed_database(verbose=False)
>>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==True
>>> assert os.path.getsize(os.path.join(Main_Dir,"feed_test_db.tsv"))>0
>>> os.remove(os.path.join(Main_Dir,"feed_test_db.tsv"))
Source code in adtoolbox/core.py
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
def download_feed_database(self,verbose:bool=True)-> None:
    """
    This function will download the feed database from the remote repository.

    Required Configs:
        - config.feed_db_url
        - config.feed_db

    Args:
        verbose (bool, optional): Whether to print the progress or not. Defaults to True.

    Examples:
        >>> import os
        >>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==False
        >>> db=Database(config=configs.Database(feed_db=os.path.join(Main_Dir,"feed_test_db.tsv")))
        >>> db.download_feed_database(verbose=False)
        >>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==True
        >>> assert os.path.getsize(os.path.join(Main_Dir,"feed_test_db.tsv"))>0
        >>> os.remove(os.path.join(Main_Dir,"feed_test_db.tsv"))
    """
    r = requests.get(self.config.feed_db_url, allow_redirects=True)

    if not os.path.exists(Path(self.config.feed_db).parent):
        os.makedirs(Path(self.config.feed_db).parent)

    with open(self.config.feed_db, 'wb') as f:
        f.write(r.content)
    if verbose:
        rich.print(f"[green]Feed database downloaded to {self.config.feed_db}")

download_protein_database(verbose=True)

Downloads the prebuilt protein database from the remote repository.

Required Configs
  • config.protein_db_url
  • config.protein_db

Parameters:

Name Type Description Default
verbose bool

Whether to print the progress or not. Defaults to True.

True

Examples:

>>> import os
>>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==False
>>> db=Database(config=configs.Database(protein_db=os.path.join(Main_Dir,"protein_test_db.fasta")))
>>> db.download_protein_database(verbose=False)
>>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==True
>>> assert os.path.getsize(os.path.join(Main_Dir,"protein_test_db.fasta"))>0
>>> os.remove(os.path.join(Main_Dir,"protein_test_db.fasta"))
Source code in adtoolbox/core.py
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
def download_protein_database(self, verbose:bool=True) -> None:
    """
    Downloads the prebuilt protein database from the remote repository.

    Required Configs:
        - config.protein_db_url
        - config.protein_db

    Args:
        verbose (bool, optional): Whether to print the progress or not. Defaults to True.

    Examples:
        >>> import os
        >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==False
        >>> db=Database(config=configs.Database(protein_db=os.path.join(Main_Dir,"protein_test_db.fasta")))
        >>> db.download_protein_database(verbose=False)
        >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==True
        >>> assert os.path.getsize(os.path.join(Main_Dir,"protein_test_db.fasta"))>0
        >>> os.remove(os.path.join(Main_Dir,"protein_test_db.fasta"))
    """
    r = requests.get(self.config.protein_db_url, allow_redirects=True)

    if not os.path.exists(Path(self.config.protein_db).parent):
        os.makedirs(Path(self.config.protein_db).parent)

    with open(self.config.protein_db, 'wb') as f:
        f.write(r.content)
    if verbose:
        rich.print(f"[green]Protein database downloaded to {self.config.protein_db}")

download_reaction_database(verbose=True)

This function will download the reaction database from the remote repository.

Required Configs
  • config.adtoolbox_rxn_db_url
  • config.csv_reaction_db

Parameters:

Name Type Description Default
verbose bool

Whether to print the progress or not. Defaults to True.

True

Examples:

>>> import os
>>> assert os.path.exists(os.path.join(Main_Dir,"reaction_test_db.csv"))==False
>>> db=Database(config=configs.Database(csv_reaction_db=os.path.join(Main_Dir,"reaction_test_db.csv")))
>>> db.download_reaction_database(verbose=False)
>>> assert os.path.exists(os.path.join(Main_Dir,"reaction_test_db.csv"))==True
>>> assert os.path.getsize(os.path.join(Main_Dir,"reaction_test_db.csv"))>0
>>> os.remove(os.path.join(Main_Dir,"reaction_test_db.csv"))
Source code in adtoolbox/core.py
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
def download_reaction_database(self,verbose:bool=True)->None:
    """
    This function will download the reaction database from the remote repository.

    Required Configs:
        - config.adtoolbox_rxn_db_url
        - config.csv_reaction_db

    Args:
        verbose (bool, optional): Whether to print the progress or not. Defaults to True.

    Examples:
        >>> import os
        >>> assert os.path.exists(os.path.join(Main_Dir,"reaction_test_db.csv"))==False
        >>> db=Database(config=configs.Database(csv_reaction_db=os.path.join(Main_Dir,"reaction_test_db.csv")))
        >>> db.download_reaction_database(verbose=False)
        >>> assert os.path.exists(os.path.join(Main_Dir,"reaction_test_db.csv"))==True
        >>> assert os.path.getsize(os.path.join(Main_Dir,"reaction_test_db.csv"))>0
        >>> os.remove(os.path.join(Main_Dir,"reaction_test_db.csv"))
    """

    r = requests.get(self.config.adtoolbox_rxn_db_url, allow_redirects=True)

    if not os.path.exists(Path(self.config.csv_reaction_db).parent):
        os.makedirs(Path(self.config.csv_reaction_db).parent)

    with open(self.config.csv_reaction_db, 'wb') as f:
        f.write(r.content)
    if verbose:
        rich.print(f"[green]Reaction database downloaded to {self.config.csv_reaction_db}")

download_seed_databases(verbose=True)

This function will download the seed databases, both compound and reaction databases.

Required Configs
  • config.seed_rxn_url
  • config.seed_compound_url
  • config.reaction_db
  • config.compound_db

Parameters:

Name Type Description Default
verbose bool

Whether to print the progress or not. Defaults to True.

True

Examples:

>>> import os
>>> assert os.path.exists(os.path.join(Main_Dir,"seed_rxn.json"))==False
>>> assert os.path.exists(os.path.join(Main_Dir,"seed_compound.json"))==False
>>> db=Database(config=configs.Database(reaction_db=os.path.join(Main_Dir,"seed_rxn.json"),compound_db=os.path.join(Main_Dir,"seed_compound.json")))
>>> db.download_seed_databases(verbose=False)
>>> assert os.path.exists(os.path.join(Main_Dir,"seed_rxn.json"))==True
>>> assert os.path.exists(os.path.join(Main_Dir,"seed_compound.json"))==True
>>> os.remove(os.path.join(Main_Dir,"seed_rxn.json"))
>>> os.remove(os.path.join(Main_Dir,"seed_compound.json"))
Source code in adtoolbox/core.py
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
def download_seed_databases(self,verbose:bool=True) -> None :
    """This function will download the seed databases, both compound and reaction databases.

    Required Configs:
        - config.seed_rxn_url
        - config.seed_compound_url
        - config.reaction_db
        - config.compound_db

    Args:
        verbose (bool, optional): Whether to print the progress or not. Defaults to True.

    Examples:
        >>> import os
        >>> assert os.path.exists(os.path.join(Main_Dir,"seed_rxn.json"))==False
        >>> assert os.path.exists(os.path.join(Main_Dir,"seed_compound.json"))==False
        >>> db=Database(config=configs.Database(reaction_db=os.path.join(Main_Dir,"seed_rxn.json"),compound_db=os.path.join(Main_Dir,"seed_compound.json")))
        >>> db.download_seed_databases(verbose=False)
        >>> assert os.path.exists(os.path.join(Main_Dir,"seed_rxn.json"))==True
        >>> assert os.path.exists(os.path.join(Main_Dir,"seed_compound.json"))==True
        >>> os.remove(os.path.join(Main_Dir,"seed_rxn.json"))
        >>> os.remove(os.path.join(Main_Dir,"seed_compound.json"))
    """
    r = requests.get(self.config.seed_rxn_url, allow_redirects=True,stream=True)
    if not os.path.exists(Path(self.config.reaction_db).parent):
        os.makedirs(Path(self.config.reaction_db).parent)
    with open(self.config.reaction_db, 'wb') as f:
        f.write(r.content)
    if verbose:
        rich.print(f"[green]Reaction database downloaded to {self.config.reaction_db}")
    r=requests.get(self.config.seed_compound_url,allow_redirects=True,stream=True)
    with open(self.config.compound_db, 'wb') as f:
        f.write(r.content)
    if verbose:
        rich.print(f"[green]Compound database downloaded to {self.config.compound_db}")

download_studies_database(verbose=True)

This function will download the required files for studies functionality.

Parameters:

Name Type Description Default
verbode bool

Whether to print the progress or not. Defaults to True.

required

Examples:

>>> import os
>>> assert os.path.exists(os.path.join(Main_Dir,"studies_test_db.tsv"))==False
>>> db=Database(config=configs.Database(studies_db=os.path.join(Main_Dir,"studies_test_db.tsv")))
>>> db.download_studies_database(verbose=False)
>>> assert os.path.exists(os.path.join(Main_Dir,"studies_test_db.tsv"))==True
>>> assert os.path.getsize(os.path.join(Main_Dir,"studies_test_db.tsv"))>0
>>> os.remove(os.path.join(Main_Dir,"studies_test_db.tsv"))
Source code in adtoolbox/core.py
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
def download_studies_database(self,verbose:bool=True)->None:
    """
    This function will download the required files for studies functionality.

    Args:
        verbode (bool, optional): Whether to print the progress or not. Defaults to True.

    Examples:
        >>> import os
        >>> assert os.path.exists(os.path.join(Main_Dir,"studies_test_db.tsv"))==False
        >>> db=Database(config=configs.Database(studies_db=os.path.join(Main_Dir,"studies_test_db.tsv")))
        >>> db.download_studies_database(verbose=False)
        >>> assert os.path.exists(os.path.join(Main_Dir,"studies_test_db.tsv"))==True
        >>> assert os.path.getsize(os.path.join(Main_Dir,"studies_test_db.tsv"))>0
        >>> os.remove(os.path.join(Main_Dir,"studies_test_db.tsv"))
    """
    for i in self.config.studies_remote:
        r = requests.get(self.config.studies_remote[i], allow_redirects=True)
        if not os.path.exists(Path(self.config.studies_local[i]).parent):
            os.makedirs(Path(self.config.studies_local[i]).parent)
        with open(self.config.studies_local[i], 'wb') as f:
            f.write(r.content)

        if verbose:
            rich.print(f"[bold green]Downloaded {self.config.studies_remote[i]}[/bold green]")

filter_seed_from_ec(ec_list, save=False)

This function takes a list of EC numbers and filters the seed database to find the seed reactions that have the EC numbers in their EC number list. This will help to trim the large seed database to a smaller one that only contains the reactions that are relevant to the AD process.

Parameters:

Name Type Description Default
ec_list list[str]

A list of EC numbers.

required
save bool

Whether to save the filtered seed database or not. Defaults to False.

False

Returns:

Name Type Description
tuple tuple

A tuple containing the filtered seed reaction database and the seed compound database, respectively.

Required Configs

- config.reaction_db

- config.compound_db

- config.local_reaction_db

- config.local_compound_db

Examples:

>>> db=Database()
>>> seed_rxn_db,seed_compound_db=db.filter_seed_from_ec(["1.1.1.1","1.1.1.2"])
>>> assert len(seed_rxn_db)>0 and len(seed_compound_db)>0
>>> assert pd.read_json(configs.Database().reaction_db).shape[0]>pd.DataFrame(seed_rxn_db).shape[0]
Source code in adtoolbox/core.py
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
def filter_seed_from_ec(self, 
                        ec_list:list[str],
                        save:bool=False) -> tuple:
    """
    This function takes a list of EC numbers and filters the seed database to find the seed reactions that have the EC numbers in their EC number list.
    This will help to trim the large seed database to a smaller one that only contains the reactions that are relevant to the AD process.

    Args:
        ec_list (list[str]): A list of EC numbers.
        save (bool, optional): Whether to save the filtered seed database or not. Defaults to False.

    Returns:
        tuple: A tuple containing the filtered seed reaction database and the seed compound database, respectively.

    Required Configs:

        - config.reaction_db
        --------
        - config.compound_db
        --------
        - config.local_reaction_db
        --------
        - config.local_compound_db
        --------


    Examples:
        >>> db=Database()
        >>> seed_rxn_db,seed_compound_db=db.filter_seed_from_ec(["1.1.1.1","1.1.1.2"])
        >>> assert len(seed_rxn_db)>0 and len(seed_compound_db)>0
        >>> assert pd.read_json(configs.Database().reaction_db).shape[0]>pd.DataFrame(seed_rxn_db).shape[0]
    """
    seed_rxn_db=pd.read_json(self.config.reaction_db)
    seed_compound_db=pd.read_json(self.config.compound_db)
    seed_rxn_db=seed_rxn_db[seed_rxn_db["ec_numbers"].apply(lambda x: any(ec in x for ec in ec_list) if x else False)]
    seed_compound_db=seed_compound_db[seed_compound_db["id"].apply(lambda x: True if x in seed_rxn_db["stoichiometry"].sum() else False)]
    if save:
        seed_rxn_db.to_json(self.config.local_reaction_db)
        seed_compound_db.to_json(self.config.local_compound_db)
    return seed_rxn_db.to_dict(orient="record"),seed_compound_db.to_dict(orient="record")

get_experiment_from_experiments_db(field_name, query)

This function returns an experiment from the experiments database. It takes the query string and the column name to query and returns the experiment that contains the query string in the given column.

Required Configs
  • config.experimental_data_db

Parameters:

Name Type Description Default
field_name str

The name of the column to query.

required
query str

The query string.

required

Returns:

Name Type Description
Experiment list[Experiment]

An instance of the Experiment class.

Examples:

>>> import os,json
>>> assert os.path.exists(os.path.join(Main_Dir,"experiments_test_db.tsv"))==False
>>> db=Database(config=configs.Database(experimental_data_db=os.path.join(Main_Dir,"experiments_test_db.json")))
>>> experiment=Experiment(name="test_study",time=[0,1,2],variables=[2,6],data= [[1,2,3],[4,5,6]],reference="test")
>>> db.add_experiment_to_experiments_db(experiment)
>>> assert os.path.exists(os.path.join(Main_Dir,"experiments_test_db.json"))==True
>>> assert os.path.getsize(os.path.join(Main_Dir,"experiments_test_db.json"))>0
>>> experiment=db.get_experiment_from_experiments_db("name","test_study")
>>> assert experiment[0].name=="test_study"
>>> os.remove(os.path.join(Main_Dir,"experiments_test_db.json"))
Source code in adtoolbox/core.py
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
def get_experiment_from_experiments_db(self,field_name:str,query:str)->list[Experiment]:
    r"""
    This function returns an experiment from the experiments database. It takes the query string and the column name to query and returns the experiment that contains the query string in the given column.

    Required Configs:
        - config.experimental_data_db

    Args:
        field_name (str): The name of the column to query.
        query (str): The query string.

    Returns:
        Experiment: An instance of the Experiment class.

    Examples:
        >>> import os,json
        >>> assert os.path.exists(os.path.join(Main_Dir,"experiments_test_db.tsv"))==False
        >>> db=Database(config=configs.Database(experimental_data_db=os.path.join(Main_Dir,"experiments_test_db.json")))
        >>> experiment=Experiment(name="test_study",time=[0,1,2],variables=[2,6],data= [[1,2,3],[4,5,6]],reference="test")
        >>> db.add_experiment_to_experiments_db(experiment)
        >>> assert os.path.exists(os.path.join(Main_Dir,"experiments_test_db.json"))==True
        >>> assert os.path.getsize(os.path.join(Main_Dir,"experiments_test_db.json"))>0
        >>> experiment=db.get_experiment_from_experiments_db("name","test_study")
        >>> assert experiment[0].name=="test_study"
        >>> os.remove(os.path.join(Main_Dir,"experiments_test_db.json"))
    """
    if not os.path.exists(self.config.experimental_data_db):
        raise FileNotFoundError("Experimental data database does not exist!")

    with open(self.config.experimental_data_db,"r") as f:
        experiments_db=json.load(f)
    experiments_db=[experiment for experiment in experiments_db if query in experiment[field_name]]
    return [Experiment(**experiment) for experiment in experiments_db]

get_feed_from_feed_db(field_name, query)

This function returns a feed from the feed database. It takes the query string and the column name to query and returns the feed that contains the query string in the given column.

Required Configs
  • config.feed_db

Parameters:

Name Type Description Default
field_name str

The name of the column to query.

required
query str

The query string.

required

Returns:

Name Type Description
Feed list[Feed]

An instance of the Feed class.

Examples:

>>> import os
>>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==False
>>> db=Database(config=configs.Database(feed_db=os.path.join(Main_Dir,"feed_test_db.tsv")))
>>> feed=Feed(name="test_feed",carbohydrates=10,lipids=20,proteins=30,tss=80,si=10,xi=30,reference="test")
>>> db.add_feed_to_feed_db(feed)
>>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==True
>>> assert pd.read_table(os.path.join(Main_Dir,"feed_test_db.tsv"),delimiter="\t").shape[0]>0
>>> feed=db.get_feed_from_feed_db("name","test_feed")
>>> assert feed[0].name=="test_feed"
>>> os.remove(os.path.join(Main_Dir,"feed_test_db.tsv"))
Source code in adtoolbox/core.py
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
def get_feed_from_feed_db(self,field_name:str,query:str)->list[Feed]:
    r"""
    This function returns a feed from the feed database. It takes the query string and the column name to query and returns the feed that contains the query string in the given column.

    Required Configs:
        - config.feed_db

    Args:
        field_name (str): The name of the column to query.
        query (str): The query string.

    Returns:
        Feed: An instance of the Feed class.

    Examples:
        >>> import os
        >>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==False
        >>> db=Database(config=configs.Database(feed_db=os.path.join(Main_Dir,"feed_test_db.tsv")))
        >>> feed=Feed(name="test_feed",carbohydrates=10,lipids=20,proteins=30,tss=80,si=10,xi=30,reference="test")
        >>> db.add_feed_to_feed_db(feed)
        >>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==True
        >>> assert pd.read_table(os.path.join(Main_Dir,"feed_test_db.tsv"),delimiter="\t").shape[0]>0
        >>> feed=db.get_feed_from_feed_db("name","test_feed")
        >>> assert feed[0].name=="test_feed"
        >>> os.remove(os.path.join(Main_Dir,"feed_test_db.tsv"))

    """
    if not os.path.exists(self.config.feed_db):
        raise FileNotFoundError("Feed database does not exist!")

    feed_db=pd.read_table(self.config.feed_db,delimiter="\t")
    feed_db=feed_db[feed_db[field_name].str.contains(query)]
    return [Feed(**feed.to_dict()) for _,feed in feed_db.iterrows()]

get_metagenomics_study_from_metagenomics_studies_db(field_name, query)

This function returns a metagenomics study from the metagenomics studies database. It takes the query string and the column name to query and returns the metagenomics study that contains the query string in the given column.

Required Configs
  • config.metagenomics_studies_db

Parameters:

Name Type Description Default
field_name str

The name of the column to query.

required
query str

The query string.

required

Returns:

Name Type Description
MetagenomicsStudy list[MetagenomicsStudy]

An instance of the MetagenomicsStudy class.

Examples:

>>> import os
>>> assert os.path.exists(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))==False
>>> db=Database(config=configs.Database(metagenomics_studies_db=os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv")))
>>> metagenomics_study=MetagenomicsStudy(name="test_study",study_type="metagenomics",microbiome="anaerobic digester",sample_accession="test",comments="test",study_accession="test")
>>> db.add_metagenomics_study_to_metagenomics_studies_db(metagenomics_study)
>>> assert os.path.exists(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))==True
>>> assert pd.read_table(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"),delimiter="\t").shape[0]>0
>>> metagenomics_study=db.get_metagenomics_study_from_metagenomics_studies_db("name","test_study")
>>> assert metagenomics_study[0].name=="test_study"
>>> os.remove(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))
Source code in adtoolbox/core.py
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
def get_metagenomics_study_from_metagenomics_studies_db(self,field_name:str,query:str)->list[MetagenomicsStudy]:
    r"""
    This function returns a metagenomics study from the metagenomics studies database. It takes the query string and the column name to query and returns the metagenomics study that contains the query string in the given column.

    Required Configs:
        - config.metagenomics_studies_db

    Args:
        field_name (str): The name of the column to query.
        query (str): The query string.

    Returns:
        MetagenomicsStudy: An instance of the MetagenomicsStudy class.

    Examples:
        >>> import os
        >>> assert os.path.exists(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))==False
        >>> db=Database(config=configs.Database(metagenomics_studies_db=os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv")))
        >>> metagenomics_study=MetagenomicsStudy(name="test_study",study_type="metagenomics",microbiome="anaerobic digester",sample_accession="test",comments="test",study_accession="test")
        >>> db.add_metagenomics_study_to_metagenomics_studies_db(metagenomics_study)
        >>> assert os.path.exists(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))==True
        >>> assert pd.read_table(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"),delimiter="\t").shape[0]>0
        >>> metagenomics_study=db.get_metagenomics_study_from_metagenomics_studies_db("name","test_study")
        >>> assert metagenomics_study[0].name=="test_study"
        >>> os.remove(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))
    """
    if not os.path.exists(self.config.metagenomics_studies_db):
        raise FileNotFoundError("Metagenomics studies database does not exist!")

    metagenomics_studies_db=pd.read_table(self.config.metagenomics_studies_db,delimiter="\t")
    metagenomics_studies_db=metagenomics_studies_db[metagenomics_studies_db[field_name].str.contains(query)]
    return [MetagenomicsStudy(**metagenomics_study.to_dict()) for _,metagenomics_study in metagenomics_studies_db.iterrows()]

get_protein_seqs_from_uniprot(uniprot_id)

This function takes a uniprot id and fetches the protein sequence from Uniprot.

Parameters:

Name Type Description Default
uniprot_id str

The uniprot id of the protein.

required

Returns:

Name Type Description
str str

The protein sequence.

Examples:

>>> db=Database()
>>> seq=db.get_protein_seqs_from_uniprot("P0A9P0")
>>> assert type(seq)==str and len(seq)>0
Source code in adtoolbox/core.py
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
def get_protein_seqs_from_uniprot(self, uniprot_id:str) -> str:
    """
    This function takes a uniprot id and fetches the protein sequence from Uniprot.

    Args:
        uniprot_id (str): The uniprot id of the protein.


    Returns:
        str: The protein sequence.

    Examples:
        >>> db=Database()
        >>> seq=db.get_protein_seqs_from_uniprot("P0A9P0")
        >>> assert type(seq)==str and len(seq)>0
    """
    Base_URL = "https://rest.uniprot.org/uniprotkb/"
    session = requests.Session()
    retry = Retry(connect=3, backoff_factor=0.5)
    adapter = HTTPAdapter(max_retries=retry)
    session.mount('http://', adapter)
    try:
        file = session.get(
            f"https://rest.uniprot.org/uniprotkb/{uniprot_id}.fasta", timeout=10)
    except:
        print("Could not fetch the sequence! Trying again ...")
        while True:
            time.sleep(5)
            file = session.get(Base_URL+uniprot_id+".fasta", timeout=10)
            if file.ok:
                break

    return ''.join(file.text.split('\n')[1:-1])

initialize_experimental_data_db()

This function intializes ADToolbox's experimental data database by creating an empty json file. Be careful, this will overwrite any existing file with the same name.

Required Configs
  • config.experimental_data_db

Examples:

>>> import os
>>> assert os.path.exists(os.path.join(Main_Dir,"experimental_data_test_db.json"))==False
>>> db=Database(config=configs.Database(experimental_data_db=os.path.join(Main_Dir,"experimental_data_test_db.json")))
>>> db.initialize_experimental_data_db()
>>> assert pd.read_json(os.path.join(Main_Dir,"experimental_data_test_db.json")).shape[0]==0
>>> with open(os.path.join(Main_Dir,"experimental_data_test_db.json"),"r") as f:
...     assert json.load(f)==[]
>>> os.remove(os.path.join(Main_Dir,"experimental_data_test_db.json"))
Source code in adtoolbox/core.py
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
def initialize_experimental_data_db(self)->None:
    """This function intializes ADToolbox's experimental data database by creating an empty json file.
    Be careful, this will overwrite any existing file with the same name.

    Required Configs:
        - config.experimental_data_db

    Examples:
        >>> import os
        >>> assert os.path.exists(os.path.join(Main_Dir,"experimental_data_test_db.json"))==False
        >>> db=Database(config=configs.Database(experimental_data_db=os.path.join(Main_Dir,"experimental_data_test_db.json")))
        >>> db.initialize_experimental_data_db()
        >>> assert pd.read_json(os.path.join(Main_Dir,"experimental_data_test_db.json")).shape[0]==0
        >>> with open(os.path.join(Main_Dir,"experimental_data_test_db.json"),"r") as f:
        ...     assert json.load(f)==[]
        >>> os.remove(os.path.join(Main_Dir,"experimental_data_test_db.json"))

    """
    pd.DataFrame(columns=["name","initial_conditions","time","variables","data","reference"]).to_json(self.config.experimental_data_db,orient="records")

initialize_feed_db()

This function intializes ADToolbox's Feed database by creating an empty tsv file. Be careful, this will overwrite any existing file with the same name.

Required Configs
  • config.feed_db

Examples:

>>> import os
>>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==False
>>> db=Database(config=configs.Database(feed_db=os.path.join(Main_Dir,"feed_test_db.tsv")))
>>> db.initialize_feed_db()
>>> assert pd.read_table(os.path.join(Main_Dir,"feed_test_db.tsv"),delimiter='\t').shape[0]==0
>>> assert set(pd.read_table(os.path.join(Main_Dir,"feed_test_db.tsv"),delimiter='\t').columns)==set(["name","carbohydrates","lipids","proteins","tss","si","xi","reference"])
>>> os.remove(os.path.join(Main_Dir,"feed_test_db.tsv"))
Source code in adtoolbox/core.py
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
def initialize_feed_db(self)->None:
    r"""This function intializes ADToolbox's Feed database by creating an empty tsv file.
    Be careful, this will overwrite any existing file with the same name.

    Required Configs:
        - config.feed_db

    Examples:
        >>> import os
        >>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==False
        >>> db=Database(config=configs.Database(feed_db=os.path.join(Main_Dir,"feed_test_db.tsv")))
        >>> db.initialize_feed_db()
        >>> assert pd.read_table(os.path.join(Main_Dir,"feed_test_db.tsv"),delimiter='\t').shape[0]==0
        >>> assert set(pd.read_table(os.path.join(Main_Dir,"feed_test_db.tsv"),delimiter='\t').columns)==set(["name","carbohydrates","lipids","proteins","tss","si","xi","reference"])
        >>> os.remove(os.path.join(Main_Dir,"feed_test_db.tsv"))

    """
    pd.DataFrame(columns=["name","carbohydrates","lipids","proteins","tss","si","xi","reference"]).to_csv(self.config.feed_db,index=False,sep="\t")

initialize_metagenomics_studies_db()

This function intializes ADToolbox's Metagenomics studies database by creating an empty tsv file. Be careful, this will overwrite any existing file with the same name.

Required Configs
  • config.metagenomics_studies_db

Examples:

>>> import os
>>> assert os.path.exists(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))==False
>>> db=Database(config=configs.Database(metagenomics_studies_db=os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv")))
>>> db.initialize_metagenomics_studies_db()
>>> assert pd.read_table(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"),delimiter="\t").shape[0]==0
>>> assert set(pd.read_table(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"),delimiter="\t").columns)==set(["name","study_type","microbiome","sample_accession","comments","study_accession"])
>>> os.remove(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))
Source code in adtoolbox/core.py
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
def initialize_metagenomics_studies_db(self)->None:
    r"""This function intializes ADToolbox's Metagenomics studies database by creating an empty tsv file.
    Be careful, this will overwrite any existing file with the same name.

    Required Configs:
        - config.metagenomics_studies_db

    Examples:
        >>> import os
        >>> assert os.path.exists(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))==False
        >>> db=Database(config=configs.Database(metagenomics_studies_db=os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv")))
        >>> db.initialize_metagenomics_studies_db()
        >>> assert pd.read_table(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"),delimiter="\t").shape[0]==0
        >>> assert set(pd.read_table(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"),delimiter="\t").columns)==set(["name","study_type","microbiome","sample_accession","comments","study_accession"])
        >>> os.remove(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))

    """
    pd.DataFrame(columns=["name","study_type","microbiome","sample_accession","comments","study_accession"]).to_csv(self.config.metagenomics_studies_db,index=False,sep="\t")

initialize_protein_db()

This function intializes ADToolbox's protein database by creating an empty fasta file. Be careful, this will overwrite any existing file with the same name. Logically, this needs method needs config.protein_db to be defined.

Required Configs

- config.protein_db

Examples:

>>> import os
>>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==False # This is just to make sure that the following lines create the file
>>> db=Database(config=configs.Database(protein_db=os.path.join(Main_Dir,"protein_test_db.fasta"))) # point to a test non-existing file
>>> db.initialize_protein_db() # initialize the protein database
>>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==True # check if the file is created
>>> os.remove(os.path.join(Main_Dir,"protein_test_db.fasta")) # remove the file to clean up
Source code in adtoolbox/core.py
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
def initialize_protein_db(self)->None:
    """This function intializes ADToolbox's protein database by creating an empty fasta file.
    Be careful, this will overwrite any existing file with the same name.
    Logically, this needs method needs config.protein_db to be defined.

    Required Configs:
        - config.protein_db
        --------

    Examples:
        >>> import os
        >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==False # This is just to make sure that the following lines create the file
        >>> db=Database(config=configs.Database(protein_db=os.path.join(Main_Dir,"protein_test_db.fasta"))) # point to a test non-existing file
        >>> db.initialize_protein_db() # initialize the protein database
        >>> assert os.path.exists(os.path.join(Main_Dir,"protein_test_db.fasta"))==True # check if the file is created
        >>> os.remove(os.path.join(Main_Dir,"protein_test_db.fasta")) # remove the file to clean up
    """

    if not (pathlib.Path(self.config.protein_db).parent).exists():
        pathlib.Path(self.config.protein_db).parent.mkdir(parents=True)
    with open(self.config.protein_db, 'w') as f:
        pass

initialize_reaction_db()

This function intializes ADToolbox's reaction database by creating an empty tsv file. Be careful, this will overwrite any existing file with the same name.

Required Configs
  • config.reaction_db

Examples:

>>> import os
>>> assert os.path.exists(os.path.join(Main_Dir,"reaction_test_db.tsv"))==False
>>> db=Database(config=configs.Database(reaction_db=os.path.join(Main_Dir,"reaction_test_db.tsv")))
>>> db.initialize_reaction_db()
>>> assert pd.read_table(os.path.join(Main_Dir,"reaction_test_db.tsv"),delimiter="\t").shape[0]==0
>>> assert set(pd.read_csv(os.path.join(Main_Dir,"reaction_test_db.tsv"),delimiter="\t").columns)==set(["ec_numbers","seed_ids","reaction_names","adm1_reaction","e_adm_reactions","pathways"])
>>> os.remove(os.path.join(Main_Dir,"reaction_test_db.tsv"))
Source code in adtoolbox/core.py
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
def initialize_reaction_db(self)->None:
    r"""This function intializes ADToolbox's reaction database by creating an empty tsv file.
    Be careful, this will overwrite any existing file with the same name.

    Required Configs:
        - config.reaction_db

    Examples:
        >>> import os
        >>> assert os.path.exists(os.path.join(Main_Dir,"reaction_test_db.tsv"))==False
        >>> db=Database(config=configs.Database(reaction_db=os.path.join(Main_Dir,"reaction_test_db.tsv")))
        >>> db.initialize_reaction_db()
        >>> assert pd.read_table(os.path.join(Main_Dir,"reaction_test_db.tsv"),delimiter="\t").shape[0]==0
        >>> assert set(pd.read_csv(os.path.join(Main_Dir,"reaction_test_db.tsv"),delimiter="\t").columns)==set(["ec_numbers","seed_ids","reaction_names","adm1_reaction","e_adm_reactions","pathways"])
        >>> os.remove(os.path.join(Main_Dir,"reaction_test_db.tsv"))

    """
    pd.DataFrame(columns=["ec_numbers","seed_ids","reaction_names","adm1_reaction","e_adm_reactions","pathways"]).to_csv(self.config.reaction_db,index=False,sep="\t")

proteins_from_ec(ec_number)

This function returns a dictionary of protein sequences for a given EC number. The keys are the uniprot ids and ec number compatible with ADToolbox protein database and the values are the protein sequences. Since ADToolbox deals with microbial process, only bacterial and archaeal proteins are considered.

Parameters:

Name Type Description Default
ec_number str

The EC number.

required

Returns:

Name Type Description
dict dict

A dictionary of protein sequences.

dict

Examples:

>>> db=Database()
>>> protein_seqs=db.proteins_from_ec("1.1.1.1")
>>> assert len(protein_seqs)>0
>>> assert list(protein_seqs.keys())[0].split("|")[1]=="1.1.1.1"
Source code in adtoolbox/core.py
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
def proteins_from_ec(self,ec_number:str) -> dict:
    """
    This function returns a dictionary of protein sequences for a given EC number.
    The keys are the uniprot ids and ec number compatible with ADToolbox protein database
    and the values are the protein sequences. Since ADToolbox deals with microbial process,
    only bacterial and archaeal proteins are considered.

    Args:
        ec_number (str): The EC number.

    Returns:
        dict: A dictionary of protein sequences.

    Examples:
        >>> db=Database()
        >>> protein_seqs=db.proteins_from_ec("1.1.1.1")
        >>> assert len(protein_seqs)>0
        >>> assert list(protein_seqs.keys())[0].split("|")[1]=="1.1.1.1"
    """
    session = requests.Session()
    retry = Retry(connect=3, backoff_factor=0.5)
    adapter = HTTPAdapter(max_retries=retry)
    session.mount('http://', adapter)
    protein_seqs={}
    try:
        file = session.get(
            f"https://rest.uniprot.org/uniprotkb/stream?format=fasta&query=%28%28ec%3A{ec_number}%29%20AND%20%28reviewed%3Atrue%29%20NOT%20%28taxonomy_id%3A2759%29%29", timeout=30)
    except requests.exceptions.HTTPError or requests.exceptions.ConnectionError:
        print("Request Error! Trying again ...")
        time.sleep(30)
        file = session.get(
            f"https://rest.uniprot.org/uniprotkb/stream?format=fasta&query=%28%28ec%3A{ec_number}%29%20AND%20%28reviewed%3Atrue%29%20NOT%20%28taxonomy_id%3A2759%29%29", timeout=30)
    # This alsp does a sanity chec
    except Exception:
        print('Something went wrong!')
    text = file.text
    if text:
        text=text.split('>')
        text.remove("")
        for seq in text:
            protein_seqs.update([(seq.split("\n")[0].split("|")[1]+"|"+ec_number, "".join(seq.split("\n")[1:]))])


    return protein_seqs

remove_experiment_from_experiments_db(field_name, query)

This function removes experiments that contain the query in the given column, field name, from the experiments database.

Required Configs
  • config.experimental_data_db

Parameters:

Name Type Description Default
field_name str

The name of the column to query.

required
query str

The query string.

required

Examples:

>>> import os,json
>>> assert os.path.exists(os.path.join(Main_Dir,"experiments_test_db.tsv"))==False
>>> db=Database(config=configs.Database(experimental_data_db=os.path.join(Main_Dir,"experiments_test_db.json")))
>>> experiment=Experiment(name="test_study",time=[0,1,2],variables=[2,6],data= [[1,2,3],[4,5,6]],reference="test")
>>> db.add_experiment_to_experiments_db(experiment)
>>> assert os.path.exists(os.path.join(Main_Dir,"experiments_test_db.json"))==True
>>> assert os.path.getsize(os.path.join(Main_Dir,"experiments_test_db.json"))>0
>>> db.remove_experiment_from_experiments_db("name","test_study")
>>> assert pd.read_json(os.path.join(Main_Dir,"experiments_test_db.json")).shape[0]==0
>>> os.remove(os.path.join(Main_Dir,"experiments_test_db.json"))
Source code in adtoolbox/core.py
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
def remove_experiment_from_experiments_db(self,field_name:str,query:str)->None:
    r"""
    This function removes experiments that contain the query in the given column, field name, from the experiments database.

    Required Configs:
        - config.experimental_data_db

    Args:
        field_name (str): The name of the column to query.
        query (str): The query string.

    Examples:
        >>> import os,json
        >>> assert os.path.exists(os.path.join(Main_Dir,"experiments_test_db.tsv"))==False
        >>> db=Database(config=configs.Database(experimental_data_db=os.path.join(Main_Dir,"experiments_test_db.json")))
        >>> experiment=Experiment(name="test_study",time=[0,1,2],variables=[2,6],data= [[1,2,3],[4,5,6]],reference="test")
        >>> db.add_experiment_to_experiments_db(experiment)
        >>> assert os.path.exists(os.path.join(Main_Dir,"experiments_test_db.json"))==True
        >>> assert os.path.getsize(os.path.join(Main_Dir,"experiments_test_db.json"))>0
        >>> db.remove_experiment_from_experiments_db("name","test_study")
        >>> assert pd.read_json(os.path.join(Main_Dir,"experiments_test_db.json")).shape[0]==0
        >>> os.remove(os.path.join(Main_Dir,"experiments_test_db.json"))
    """
    if not os.path.exists(self.config.experimental_data_db):
        raise FileNotFoundError("Experimental data database does not exist!")

    with open(self.config.experimental_data_db,"r") as f:
        experiments_db=json.load(f)
    experiments_db=[experiment for experiment in experiments_db if query not in experiment[field_name]]
    with open(self.config.experimental_data_db,"w") as f:
        json.dump(experiments_db,f)

remove_feed_from_feed_db(field_name, query)

This function removes studyes that contain the query in the given column, field name, from the feed database.

Required Configs
  • config.feed_db

Parameters:

Name Type Description Default
field_name str

The name of the column to query.

required
query str

The query string.

required

Examples:

>>> import os
>>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==False
>>> db=Database(config=configs.Database(feed_db=os.path.join(Main_Dir,"feed_test_db.tsv")))
>>> feed=Feed(name="test_feed",carbohydrates=10,lipids=20,proteins=30,tss=80,si=10,xi=30,reference="test")
>>> db.add_feed_to_feed_db(feed)
>>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==True
>>> assert pd.read_table(os.path.join(Main_Dir,"feed_test_db.tsv"),delimiter="\t").shape[0]>0
>>> db.remove_feed_from_feed_db("name","test_feed")
>>> assert pd.read_table(os.path.join(Main_Dir,"feed_test_db.tsv"),delimiter="\t").shape[0]==0
>>> os.remove(os.path.join(Main_Dir,"feed_test_db.tsv"))
Source code in adtoolbox/core.py
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
def remove_feed_from_feed_db(self,field_name:str,query:str)->None:
    r"""
    This function removes studyes that contain the query in the given column, field name, from the feed database.

    Required Configs:
        - config.feed_db

    Args:
        field_name (str): The name of the column to query.
        query (str): The query string.

    Examples:
        >>> import os
        >>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==False
        >>> db=Database(config=configs.Database(feed_db=os.path.join(Main_Dir,"feed_test_db.tsv")))
        >>> feed=Feed(name="test_feed",carbohydrates=10,lipids=20,proteins=30,tss=80,si=10,xi=30,reference="test")
        >>> db.add_feed_to_feed_db(feed)
        >>> assert os.path.exists(os.path.join(Main_Dir,"feed_test_db.tsv"))==True
        >>> assert pd.read_table(os.path.join(Main_Dir,"feed_test_db.tsv"),delimiter="\t").shape[0]>0
        >>> db.remove_feed_from_feed_db("name","test_feed")
        >>> assert pd.read_table(os.path.join(Main_Dir,"feed_test_db.tsv"),delimiter="\t").shape[0]==0
        >>> os.remove(os.path.join(Main_Dir,"feed_test_db.tsv"))

    """
    if not os.path.exists(self.config.feed_db):
        raise FileNotFoundError("Feed database does not exist!")


    feed_db=pd.read_table(self.config.feed_db,delimiter="\t")
    feed_db=feed_db[feed_db[field_name].str.contains(query)==False]
    feed_db.to_csv(self.config.feed_db,index=False,sep="\t")

remove_metagenomics_study_from_metagenomics_studies_db(field_name, query)

This function removes studies that contain the query in the given column, field name, from the metagenomics studies database.

Required Configs
  • config.metagenomics_studies_db

Parameters:

Name Type Description Default
field_name str

The name of the column to query.

required
query str

The query string.

required

Examples:

>>> import os
>>> assert os.path.exists(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))==False
>>> db=Database(config=configs.Database(metagenomics_studies_db=os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv")))
>>> metagenomics_study=MetagenomicsStudy(name="test_study",study_type="metagenomics",microbiome="anaerobic digester",sample_accession="test",comments="test",study_accession="test")
>>> db.add_metagenomics_study_to_metagenomics_studies_db(metagenomics_study)
>>> assert os.path.exists(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))==True
>>> assert pd.read_table(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"),delimiter="\t").shape[0]>0
>>> db.remove_metagenomics_study_from_metagenomics_studies_db("name","test_study")
>>> assert pd.read_table(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"),delimiter="\t").shape[0]==0
>>> os.remove(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))
Source code in adtoolbox/core.py
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
def remove_metagenomics_study_from_metagenomics_studies_db(self,field_name:str,query:str)->None:
    r"""
    This function removes studies that contain the query in the given column, field name, from the metagenomics studies database.

    Required Configs:
        - config.metagenomics_studies_db

    Args:
        field_name (str): The name of the column to query.
        query (str): The query string.

    Examples:
        >>> import os
        >>> assert os.path.exists(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))==False
        >>> db=Database(config=configs.Database(metagenomics_studies_db=os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv")))
        >>> metagenomics_study=MetagenomicsStudy(name="test_study",study_type="metagenomics",microbiome="anaerobic digester",sample_accession="test",comments="test",study_accession="test")
        >>> db.add_metagenomics_study_to_metagenomics_studies_db(metagenomics_study)
        >>> assert os.path.exists(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))==True
        >>> assert pd.read_table(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"),delimiter="\t").shape[0]>0
        >>> db.remove_metagenomics_study_from_metagenomics_studies_db("name","test_study")
        >>> assert pd.read_table(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"),delimiter="\t").shape[0]==0
        >>> os.remove(os.path.join(Main_Dir,"metagenomics_studies_test_db.tsv"))
    """
    if not os.path.exists(self.config.metagenomics_studies_db):
        raise FileNotFoundError("Metagenomics studies database does not exist!")

    metagenomics_studies_db=pd.read_table(self.config.metagenomics_studies_db,delimiter="\t")
    metagenomics_studies_db=metagenomics_studies_db[metagenomics_studies_db[field_name].str.contains(query)==False]
    metagenomics_studies_db.to_csv(self.config.metagenomics_studies_db,index=False,sep="\t")

8. Metagenomics

Here is a schematic view of core.Metagenomics API:

schema

This is the main class for Metagenomics functionality of ADToolbox. This class contains all the methods required for metagenomics analysis that ADToolbox offers.

Source code in adtoolbox/core.py
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
class Metagenomics:

    """
    This is the main class for Metagenomics functionality of ADToolbox. This class contains all the methods required for metagenomics analysis 
    that ADToolbox offers.
    """
    def __init__(self,config:configs.Metagenomics)->None:
        """In order to instntiate an object from this class, you need to provide a metagenomics configs object from the configs module : configs.Metagenomics.
        Information for inputs and of each method is then obtained from the corresponding configs object. The following example shows how to instantiate an object from this class
        using the default configs object:

        Examples:
            >>> from adtoolbox import core, configs
            >>> config=configs.Metagenomics() ### This uses default arguments. Refer to configs module for more information.
            >>> metagenomics=core.Metagenomics(config)
            >>> assert type(metagenomics)==core.Metagenomics

        Args:
            config (configs.Metagenomics): A metagenomics configs object from configs module.

        Returns:
            None
        """
        self.config=config

    def find_top_taxa(
        self,
        sample_name:str,
        treshold:Union[int,float],
        mode:str='top_k',
        )->dict:
        """
        This function needs three inputs from qiime:
        1. feature table: This is the abundance of each feature in each sample (TSV).
        2. taxonomy table: This is the taxonomy of each feature (TSV). 
        3. rep seqs: This is the representative sequence of each feature (fasta).
        It then finds the top k features or features that form specific percentile of the community of the sample.

        Required Configs:

            config.feature_table_dir: The path to the feature table tsv file.
            ---------
            config.taxonomy_table_dir: The path to the taxonomy table tsv file.
            ---------
            config.rep_seq_fasta: The path to the representative sequence fasta file.
            ---------

        Args:
            sample_name (str): The name of the sample.
            threshold (int, float): The threshold for the top k or the percentile.
            mode (str, optional): Whether to find the top k features or features that form specific percentile of the community of the sample. Defaults to 'top_k'. Options: 'top_k', 'percentile'.

        Returns:
            dict: A dictionary of the top k features and their taxonomy.
        """
        ### Load all the required files
        feature_table = pd.read_table(self.config.feature_table_dir, sep='\t',skiprows=1)
        taxonomy_table = pd.read_table(self.config.taxonomy_table_dir, delimiter='\t')
        repseqs=fasta_to_dict(self.config.rep_seq_fasta)
        ### End Loading
        if mode == 'top_k':
            sorted_df=feature_table.sort_values(sample_name, ascending=False)
            top_featureids=list(sorted_df['#OTU ID'].head(treshold))
            top_taxa=[taxonomy_table[taxonomy_table['Feature ID']==featureid]['Taxon'].values[0] for featureid in top_featureids]
            top_repseqs=[repseqs[featureid] for featureid in top_featureids]
            top_abundances=list(sorted_df[sample_name].head(treshold)/sorted_df[sample_name].sum())

        elif mode == 'percentile':
            feature_table[sample_name]=feature_table[sample_name]/feature_table[sample_name].sum()
            sorted_df=feature_table.sort_values(sample_name, ascending=False)
            sorted_df['cumsum']=sorted_df[sample_name].cumsum()*100
            sorted_df_filtered=sorted_df[sorted_df['cumsum']<=treshold]
            top_featureids=list(sorted_df_filtered['#OTU ID'])
            top_taxa=[taxonomy_table[taxonomy_table['Feature ID']==featureid]['Taxon'].values[0] for featureid in top_featureids]
            top_repseqs=[repseqs[featureid] for featureid in top_featureids]
            top_abundances=sorted_df.loc[sorted_df_filtered.index][sample_name].values.tolist()
        else:
            raise ValueError("mode must be either 'top_k' or 'percentile'")

        return {'top_featureids':top_featureids,'top_taxa':top_taxa,'top_repseqs':top_repseqs,'top_abundances':top_abundances}    


    def align_to_gtdb(self,
                      query_dir:str,
                      output_dir:str,
                      container:str="None")->str:
        """This function takes the representative sequences of the top k features and generates the script to
        align these feature sequences to gtdb using VSEARCH. If you intend to run this you either
        need to have VSEARCH installed or run it with a container option. You can use either the docker or singularity
        as container options. Otherwise you can use None and run it with the assumption that VSEARCH is installed.
        If you only want the script and not to run it, set run to False.

        Required Configs:

            ---------
            config.gtdb_dir_fasta: The path to the gtdb fasta database.
            ---------
            config.vsearch_similarity: The similarity threshold for the alignment to be used by VSEARCH.
            ---------
            config.vsearch_threads: The number of threads to be used by VSEARCH.
            ---------
            config.adtoolbox_docker: The name of the docker image to be used by ADToolbox (Only if using Docker as container).
            ---------
            config.adtoolbox_singularity: The name of the singularity image to be used by ADToolbox (Only if using Singularity as container).
            ---------

        Args:
            container (str, optional): The container to use. Defaults to "None".

        Returns:
            str: The script that is supposed to be running later.
        """
        ### Load all the required files
        alignment_dir = os.path.join(output_dir,'Alignments')
        match_table=os.path.join(output_dir,'matches.blast')
        gtdb_dir_fasta=self.config.gtdb_dir_fasta
        ### End Loading
        query=query_dir
        dirs=[output_dir,
            gtdb_dir_fasta,
            query
            ]
        for dir in dirs:
            if not pathlib.Path(dir).exists():
                os.mkdir(dir)
        if container=="None":
            bash_script=('vsearch --top_hits_only --blast6out '+
                        match_table+
                        ' --usearch_global '+ query +
                        ' --db '+ gtdb_dir_fasta +
                        ' --id ' +str(self.config.vsearch_similarity) +
                        ' --threads '+str(self.config.vsearch_threads)+
                        ' --alnout '+ alignment_dir +
                        ' --top_hits_only'+'\n')

        if container=="docker":
            bash_script='docker run'
            for dir in dirs:
                bash_script+=('-v '+dir+':'+dir+' ')

            bash_script += (self.config.adtoolbox_docker+' vsearch --top_hits_only --blast6out '+
                        match_table+
                        ' --usearch_global '+ query +
                        ' --db '+ gtdb_dir_fasta +
                        ' --id ' +str(self.config.vsearch_similarity) +
                        ' --threads '+str(self.config.vsearch_threads)+
                        ' --alnout '+ alignment_dir +
                        ' --top_hits_only'+'\n')

        if container=="singularity":
            bash_script='singularity exec '
            for dir in dirs:
                bash_script+=('-B '+str(dir)+':'+str(dir)+' ')

            bash_script += (self.config.adtoolbox_singularity+' vsearch --top_hits_only --blast6out '+
                        match_table+
                        ' --usearch_global '+ str(query) +
                        ' --db '+ gtdb_dir_fasta +
                        ' --id ' +str(self.config.vsearch_similarity) +
                        ' --threads '+str(self.config.vsearch_threads)+
                        ' --alnout '+ alignment_dir +
                        ' --top_hits_only'+'\n')
        return bash_script,



    def get_genomes_from_gtdb_alignment(self,alignment_dir:str)->dict:
        """This function takes the alignment file generated from the align_to_gtdb function and generates the the genome information
        using the GTDB-Tk. In the outputted dictionary, the keys are feature ids and the values are the representative genomes.

        Required Configs:
            config.align_to_gtdb_outputs_dir: The path to the directory where the outputs of the align_to_gtdb function are saved.
            ---------
            config.feature_to_taxa: The path to the json file where the json file including feature ids and the representative genomes will be saved.

        Args:
            save (bool, optional): Whether to save the json file or not. Defaults to True.
        """
        matches = os.path.join(alignment_dir,'matches.blast')
        aligned=pd.read_table(matches,header=None,delimiter='\t')
        aligned.drop_duplicates(0,inplace=True)
        aligned[1]=aligned[1].apply(lambda x: ("".join(x.split('_')[1:])).split("~")[0])
        alignment_dict=dict(zip(aligned[0],aligned[1]))


        return alignment_dict


    def download_genome(self,identifier:str,output_dir:str,container:str="None")-> str:
        """This function downloads the genomes from NCBI using the refseq/genbank identifiers.
        Note that this function uses rsync to download the genomes. 

        Required Configs:
            config.genomes_base_dir: The path to the base directory where the genomes will be saved.
            ---------
            config.adtoolbox_docker: The name of the docker image to be used by ADToolbox (Only if using Docker as container).
            ---------
            config.adtoolbox_singularity: The name of the singularity image to be used by ADToolbox (Only if using Singularity as container).
            ---------
        Args:
            identifier list[str]: The list of identifiers for the genomes. It can be either refseq or genbank.
            container (str, optional): The container to use. Defaults to "None". You may select from "None", "docker", "singularity".

        Returns:
            str: The bash script that is used to download the genomes or to be used to download the genomes.

        """
        base_ncbi_dir = 'rsync://ftp.ncbi.nlm.nih.gov/genomes/all/'
        bash_script=""

        specific_ncbi_dir = identifier[0:3]+'/'+\
                            identifier[3:6]+'/'+\
                            identifier[6:9]+'/'+\
                            identifier[9:].split('.')[0]

        genome_dir=pathlib.Path(output_dir)

        if container=="None":
            bash_script+=('rsync -avz --progress '+base_ncbi_dir+specific_ncbi_dir+' '+str(genome_dir))

        if container=="docker":
            bash_script+=('docker run -it -v '+str(genome_dir.parent)+':'+str(genome_dir.parent)+ f' {self.config.adtoolbox_docker} rsync -avz --progress '+' '+base_ncbi_dir+specific_ncbi_dir+' '+str(genome_dir))

        if container=="singularity":
            bash_script+=('singularity exec -B '+str(genome_dir.parent)+':'+str(genome_dir.parent)+ f' {self.config.adtoolbox_singularity} rsync -avz --progress '+' '+base_ncbi_dir+specific_ncbi_dir+' '+str(genome_dir))

        return bash_script,

    def async_genome_downloader(self,identifiers:Iterable[str],batch_size:float=10,container:str="None"):
        sem=asyncio.Semaphore(batch_size)
        asyncio.run(self._collect_coros(identifiers=identifiers,semaphore=sem,container=container))

    async def _collect_coros(self,identifiers:Iterable[str],semaphore:asyncio.Semaphore,container:str="None"):
        await asyncio.gather(*[self._genome_dl_coro(identifier=i,semaphore=semaphore,container=container) for i in identifiers])

    async def _genome_dl_coro(self,identifier:str,semaphore:asyncio.Semaphore,container:str="None")->None:
        async with semaphore:
            await asyncio.create_subprocess_exec(*self.download_genome(identifier=identifier,container=container).split(" "))

    def extract_genome_info(self,
                            endpattern:str="genomic.fna.gz",
                            filters:dict={
                                          "INCLUDE":[],
                                          "EXCLUDE":["cds","rna"],
                                            })->dict[str,str]:
        """This function extracts the genome information from the genomes base directory. The output
        is a dictionary where the keys are the genome IDs and the values are the paths to the genome files.

        Required Configs:
            config.genomes_base_dir: The path to the base directory where the genomes are saved.
            ---------
        Args:
            genome_info (dict[str,str]): A dictionary containing the genome information.
            endpattern (str, optional): The end pattern of the genome files. Defaults to "genomic.fna.gz".
            filters (dict, optional): The filters to be applied to the genome files. This filter must be a 
            dictionary with two keys: INCLUDE and EXCLUDE. The values of these keys must be lists of strings.
            Defaults to {"INCLUDE":[],"EXCLUDE":["cds","rna"]}. This defult is compatible with the genomes downloaded
            from NCBI i.e. only change this if you are providing your own genomes with different file name conventions.
        Returns:
            dict[str,str]: A dictionary containing the address of the genomes that are downloaded or to be downloaded.
        """
        base_dir = pathlib.Path(self.config.genomes_base_dir)
        genome_info = {}
        for genome_dir in base_dir.iterdir():
            if genome_dir.is_dir():
                candids=list(genome_dir.rglob(f'*{endpattern}'))
                for candid in candids:
                    if all([i in candid.name for i in filters["INCLUDE"]]) and all([i not in candid.name for i in filters["EXCLUDE"]]):
                        genome_info[genome_dir.name]=str(candid.absolute())           
        return genome_info

    def align_genome_to_protein_db(
            self,
            address:str,
            outdir:str,
            name:str,
            container:str="None",
            )->tuple[str,str]:
        """
        This is a function that will align a genome to the Protein Database of the ADToolbox using mmseqs2.
        If you want to save the scripts, set save to True. Note that the alignment tables will be saved in any case.
        Note that this function uses mmseqs2 to align the genomes to the protein database. So, to run this function without
        any container you need to have mmseqs2 installed on your system. However, if you want to run this function with a container,
        you need to have the container installed on your system. You may select from "None", "docker", "singularity".

        Requires:
            config.genome_alignment_output: The path to the directory where the alignment results will be saved.
            ---------
            config.protein_db: The path to the ADToolbox protein database in fasta.
            ---------
            config.adtoolbox_docker: The name of the docker image to be used by ADToolbox (Only if using Docker as container).
            ---------
            config.adtoolbox_singularity: The name of the singularity image to be used by ADToolbox (Only if using Singularity as container).
            ---------
        Args:
            address (str): The address of the genome fasta file. The file must be in fasta format.
            run (bool, optional): Whether to run the alignment. Defaults to True.
            save (bool, optional): Whether to save the alignment scripts. Defaults to True.
            container (str, optional): The container to use. Defaults to "None". You may select from "None", "docker", "singularity".

        Returns:
            str: A dictionary containing the alignment files.
            str: The bash script that is used to align the genomes or to be used to align the genomes.
        """

        if container=="None":
            bash_script = ""
            alignment_file=os.path.join(outdir,"Alignment_Results_mmseq_"+name+".tsv")
            bash_script += "mmseqs easy-search " + \
                address + " " + \
                self.config.protein_db + " " + \
                alignment_file+ ' tmp --format-mode 4 '+"\n\n"

        if container=="docker":
            bash_script = ""
            alignment_file=os.path.join(outdir,"Alignment_Results_mmseq_"+name+".tsv")
            bash_script +="docker run -it "+ \
            " -v "+address+":"+address+ \
            " -v "+self.config.protein_db+":"+self.config.protein_db+ \
            " -v "+outdir+":"+outdir+ \
            f" {self.config.adtoolbox_docker}  mmseqs easy-search " + \
                address + " " + \
                self.config.protein_db + " " + \
                alignment_file+' tmpfiles --format-mode 4 '+"\n\n"

        if container=="singularity":
            bash_script = ""
            alignment_file=os.path.join(outdir,"Alignment_Results_mmseq_"+name+".tsv")
            bash_script +="singularity exec "+ \
            " -B "+address+":"+address+ \
            " -B "+self.config.protein_db+":"+self.config.protein_db+ \
            " -B "+outdir+":"+outdir+ \
            f" {self.config.adtoolbox_singularity}  mmseqs easy-search " + \
                address + " " + \
                self.config.protein_db + " " + \
                alignment_file+' tmpfiles --format-mode 4 '+"\n\n"

        return  bash_script,alignment_file

    def align_short_reads_to_protein_db(self,query_seq:str,
                                        alignment_file_name:str,
                                        container:str="None",
                                        )->tuple[str,str]:
        """This function aligns shotgun short reads to the protein database of the ADToolbox using mmseqs2.
        mmseqs wrappers in utils are used to perform this task. The result of this task is an alignment table.

        Required Configs:

            protein_db_mmseqs (str): The address of the existing/to be created protein database of the ADToolbox for mmseqs.
            --------
        Args:
            query_seq (str): The address of the query sequence.
            alignment_file_name (str): The name of the alignment file.
            container (str, optional): The container to use. Defaults to "None". You may select from "None", "docker", "singularity".


        Returns:
            str: The bash script that is used to align the genomes or to be used to align the genomes.
            str: The address of the alignment file.
        """
        if not pathlib.Path(self.config.protein_db_mmseqs).exists():
            raise FileNotFoundError("""The protein database of the ADToolbox for mmseqs is not found. Please build it first
                                    using Database.build_mmseqs_database method.""")
        path_query=pathlib.Path(query_seq)
        script = ""
        script += create_mmseqs_database(query_seq,str(path_query.parent/path_query.name.split(".")[0]),container=container,save=None,run=False)+"\n"
        script += mmseqs_search(
            query_db=str(path_query.parent/path_query.name.split(".")[0]),
            target_db=self.config.protein_db_mmseqs,
            results_db=path_query.parent/alignment_file_name,
            run=False,
            save=None,
            container=container,
        )+"\n"
        script += mmseqs_result_db_to_tsv(
            query_db=str(path_query.parent/path_query.name.split(".")[0]),
            target_db=self.config.protein_db_mmseqs,
            results_db=path_query.parent/alignment_file_name,
            tsv_file=path_query.parent/(alignment_file_name+".tsv"),
            container=container,
            save=None,
            run=False,)+"\n"
        return script,path_query.parent/(alignment_file_name+".tsv")

    def extract_ec_from_alignment(self,alignment_file:str)->dict[str,int]:
        """
        This function extracts the number of times an EC number is found in the alignment file when aligned to ADToolbox protein database.

        Required Configs:
            config.e_value: The e-value threshold for the filtering the alignment table.
            ---------
            config.bit_score: The bit score threshold for the filtering the alignment table.
            ---------
            config.ec_counts_from_alignment: The address of the json file that the results will be saved in.
            ---------
        Args:
            alignment_file (str): The address of the alignment file.

        Returns:
            dict: A dictionary of EC numbers and their counts.

        """
        alignment_table = pd.read_table(alignment_file,sep='\t')
        alignment_table = alignment_table[(alignment_table['evalue']<self.config.e_value)&(alignment_table['bits']>self.config.bit_score)]
        alignment_table["target"]=alignment_table["target"].apply(lambda x:x.split("|")[1])
        ec_counts=alignment_table["target"].value_counts().to_dict()
        return ec_counts

    def get_cod_from_ec_counts(self,ec_counts:dict)->dict:
        """This function takes a json file that comtains ec counts and converts it to ADM microbial agents counts.
        Required Configs:
            config.adm_mapping : A dictionary that maps ADM reactions to ADM microbial agents.
            ---------
            config.csv_reaction_db : The address of the reaction database of ADToolbox.
            ---------
            config.adm_cod_from_ec  : The address of the json file that the results will be saved in.
            ---------
        Args:
            ec_counts (dict): A dictionary containing the counts for each ec number.  
        Returns:
            dict: A dictionary containing the ADM microbial agents counts.
        """
        reaction_db = pd.read_table(self.config.csv_reaction_db, sep=',').drop_duplicates("EC_Numbers")
        reaction_db.set_index("EC_Numbers",inplace=True)
        adm_reactions_agents = {k:0 for k in self.config.adm_mapping.keys()}
        for ec in ec_counts.keys():
            l=reaction_db.loc[ec,"e_adm_Reactions"].split("|")
            for adm_rxn in l: 
                adm_reactions_agents[adm_rxn]+=ec_counts[ec]
        adm_microbial_agents={}
        for k,v in self.config.adm_mapping.items():
            adm_microbial_agents[v]=adm_reactions_agents[k]
        return adm_microbial_agents

    def calculate_group_abundances(self,elements_feature_abundances:dict[str,dict],rel_abund:dict[str,dict])->dict[str,dict[str,float]]:
        """
        This method is defined to calculate the features for each sample given:
        1) The relative abundances of the genomes in each sample:
            - In this dictionary the keys are the sample names and the values are dictionaries where the keys are the genome names and the values are the relative abundances of the genomes in the sample.
        2) The relative abundances of the elements in each genome.
            - In this dictionary the keys are the genome names and the values are dictionaries where the keys are the element names and the values are the relative abundances of the elements in the genome.

        Required Configs:
            None

        Args:
            elements_feature_abundances (dict[str,dict]): A dictionary containing the relative abundances of the elements in each genome.
            rel_abund (dict[str,dict]): A dictionary containing the relative abundances of the genomes in each sample.

        Returns:
            dict[str,dict[str,float]]: A dictionary containing the relative abundances of the elements in each sample.
        """
        out={}
        df=pd.DataFrame(elements_feature_abundances).T.fillna(0)
        for sample,abunds in rel_abund.items():
            out[sample]=scaler(pd.DataFrame(df.loc[abunds.keys(),:].multiply(list(abunds.values()),axis=0).sum(axis=0)).T).to_dict(orient="records")[0]
        return out

    def extract_relative_abundances(self,feature_table_dir:str,sample_names:Union[list[str],None]=None,top_k:int=-1)->dict:

        """
        This method extracts the relative abundances of the features in each sample from the feature table. The feature table must follow the qiime2 feature-table format.
        NOTE: The final feature abundances sum to 1 for each sample.
        Required Configs:
            None
        Args:
            feature_table_dir (str): The path to the feature table.
            sample_names (Union[list[str],None], optional): The list of sample names. to be considered. If None, all the samples will be considered. Defaults to None.
            top_k (int, optional): The number of top features to be used. If -1, all the features will be used. Defaults to -1.

        Returns:
            dict: A dictionary containing the relative abundances of the features in each sample.
        """
        feature_table = pd.read_table(feature_table_dir,sep='\t',skiprows=1)
        if sample_names is None:
            sample_names = feature_table.columns[1:]
        relative_abundances={sample:[] for sample in sample_names}
        if top_k == -1:
            top_k = feature_table.shape[0]
        for sample in sample_names:
            relative_abundances[sample]=(feature_table.sort_values(sample,ascending=False).head(top_k)[sample]/(feature_table.sort_values(sample,ascending=False).head(top_k)[sample].sum())).to_dict()
        return relative_abundances

    def assign_ec_to_genome(self,alignment_file:str)->dict:
        """
        This function takes an alignment file and assigns the EC numbers to the genomes based on the alignment file,
        and the e-adm groupings of the EC numbers. The output is a dictionary where the keys e-adm reactions and the values are the EC numbers,
        that are found in the genome and are grouped under the e-adm reaction.

        Args:
            alignment_file (str): The address of the alignment file.

        Returns:
            dict: A dictionary containing the e-adm reactions and the EC numbers that are found in the genome and are grouped under the e-adm reaction.
        """

        aligntable = pd.read_table(alignment_file,delimiter="\t")
        aligntable = aligntable[(aligntable["bits"]>self.config.bit_score) & (aligntable["evalue"]<self.config.e_value)]

        ec_align_list = aligntable["target"].str.split("|",expand=True)
        ec_align_list = list(ec_align_list[1].unique()) 

        metadatatable = pd.read_table(self.config.csv_reaction_db, sep=',').drop_duplicates("EC_Numbers")[(['EC_Numbers','Modified_ADM_Reactions'])].dropna(axis=0)
        metadatatable=metadatatable[metadatatable["EC_Numbers"].isin(ec_align_list)]
        adm_reactions=list(set(metadatatable["Modified_ADM_Reactions"].str.split("|").sum()))
        adm_to_ecs={}
        for reaction in adm_reactions:
            adm_to_ecs[reaction]=list(metadatatable[metadatatable["Modified_ADM_Reactions"].str.contains(reaction)]["EC_Numbers"])

        return adm_to_ecs




    def seqs_from_sra(self,accession:str,target_dir:str,container:str="None")-> tuple[str,dict]:
        """ 
        This method downloads the fastq files from the SRA database using the accession number (ONLY SAMPLE ACCESSION AND NOT PROJECT ACCESSION) of the project or run.
        The method uses the fasterq-dump tool to download the fastq files. This method also extracts the sample metadata from the SRA database for future use.
        #NOTE In order for this method to work without any container, you need to have the SRA toolkit installed on your system or
        at least have prefetch and fasterq-dump installed on your system. For more information on how to install the SRA toolkit, please refer to the following link:
        https://github.com/ncbi/sra-tools

        Required Configs:
            None


        Args:
            accession (str): The accession number of the SRA project or run
            target_dir (str): The directory where the fastq files will be downloaded
            container (str, optional): The containerization tool that will be used to run the bash scripts. Defaults to "None". Options are "None","docker","singularity"

        Returns:
            prefetch_script (str): The bash script that will be used to download the SRA files in python string format
            sample_metadata (dict): A dictionary that contains the sample metadata

        """   
        if container=="None":
            prefetch_script=f"""#!/bin/bash\nprefetch {accession} -O {target_dir}"""
            acc_folder=pathlib.Path(target_dir)/accession
            fasterq_dump_script=""
            sra_file=acc_folder/(accession+".sra")
            fasterq_dump_script+=f"\nfasterq-dump {sra_file} -O {acc_folder} --split-files"
            fasterq_dump_script+=f"\nrm {sra_file}"

            prefetch_script+=fasterq_dump_script


        elif container=="docker":
            warn("Docker is not supported yet")

        sample_metadata=utils.get_sample_metadata_from_accession(accession)      


        return prefetch_script,sample_metadata     




    def run_qiime2_from_sra(self,
                            read_1:str,
                            read_2:str|None,
                            sample_name:str|None=None,
                            manifest_dir:str|None=None,
                            workings_dir:str|None=None,
                            save_manifest:bool=True,
                            container:str='None') -> tuple[str,str]:
        """
        This method uses the input fastq files to run qiime2. The method uses the qiime2 template scripts that are provided in pkg_data module.
        The method also creates a manifest file for qiime2. The manifest file is created based on the input fastq files.
        Required Configs:
            config.qiime2_single_end_bash_str: The path to the qiime2 bash script for single end reads.
            ---------
            config.qiime2_paired_end_bash_str: The path to the qiime2 bash script for paired end reads.
            ---------
            config.qiime_classifier_db: The path to the qiime2 classifier database.
            ---------
            config.qiime2_docker_image: The name of the docker image to be used by ADToolbox (Only if using Docker as container).
            ---------
            config.qiime2_singularity_image: The name of the singularity image to be used by ADToolbox (Only if using Singularity as container).
            ---------
        Args:
            read_1 (str): directory of the forward reads file
            read_2 (str): directory of the reverse reads file. This is provided only if the reads are paired end. If this is not the case,
            sample_name (str, optional): The name of the sample. If None, the name of the sample will be the name of the directory where the fastq files are located. Defaults to None.
            manifest_dir (str, optional): The directory where the manifest file will be saved. If None, the manifest file will be saved in the same directory as the fastq files. Defaults to None.
            workings_dir (str, optional): The directory where the qiime2 outputs will be saved. If None, the outputs will be saved in the same directory as the fastq files. Defaults to None.
            container (str, optional): If you want to run the qiime2 commands in a container, specify the container name here. Defaults to 'None'.
        Returns:
            qiime2_bash_str (str): The bash script that will be used to run qiime2 in python string format
            manifest (dict): The manifest file that will be used to run qiime2 in python dictionary format


        """

        if sample_name is None:
            sample_name=str(pathlib.Path(read_1).parent.name)
        if manifest_dir is None:
            manifest_dir=pathlib.Path(read_1).parent
        else:
            manifest_dir=pathlib.Path(manifest_dir)

        if workings_dir is None:
            workings_dir=pathlib.Path(read_1).parent
        else:
            workings_dir=pathlib.Path(workings_dir)


        manifest_single={'sample-id':[],'absolute-filepath':[]}
        manifest_paired={'sample-id':[],'forward-absolute-filepath':[],'reverse-absolute-filepath':[]}  
        if read_2 is not None:
            manifest_paired['sample-id'].append(sample_name)
            manifest_paired['forward-absolute-filepath'].append(read_1)
            manifest_paired['reverse-absolute-filepath'].append(read_2)
            paired_end=True
        else:
            manifest_single['sample-id'].append(sample_name)
            manifest_single['absolute-filepath'].append(read_1)
            paired_end=False

        manifest=pd.DataFrame(manifest_single) if not paired_end else pd.DataFrame(manifest_paired)

        if paired_end:
            with open(self.config.qiime2_paired_end_bash_str,"r") as f:
                qiime2_bash_str=f.read()
        else:
            with open(self.config.qiime2_single_end_bash_str,"r") as f:
                qiime2_bash_str=f.read()

        if container=="None":
            qiime2_bash_str=qiime2_bash_str.replace("<manifest>",str(manifest_dir))
            qiime2_bash_str=qiime2_bash_str.replace("<qiime2_work_dir>",str(workings_dir))
            qiime2_bash_str=qiime2_bash_str.replace("<classifier>",str(self.config.qiime_classifier_db))

        elif container=="docker":
            qiime2_bash_str=qiime2_bash_str.splitlines()
            for idx,line in enumerate(qiime2_bash_str):
                line=line.lstrip()
                if line.startswith("qiime") or line.startswith("biom"):
                    if not paired_end:
                        pec=""
                    else:
                        pec="-v "+read_2+":"+read_2+" "
                    qiime2_bash_str[idx]=f"docker run --env TMPDIR=/data/tmp -v {str(manifest_dir)}:{str(manifest_dir)} -v {read_1}:{read_1} -v {read_2}:{read_2} {pec} -v {self.config.qiime_classifier_db}:{self.config.qiime_classifier_db} -w /data  {self.config.qiime2_docker_image}"+" "+line
            qiime2_bash_str="\n".join(qiime2_bash_str)
            qiime2_bash_str=qiime2_bash_str.replace("<manifest>",os.path.join(str(manifest_dir),"manifest.tsv"))
            qiime2_bash_str=qiime2_bash_str.replace("<qiime2_work_dir>",str(workings_dir))
            qiime2_bash_str=qiime2_bash_str.replace("<classifier>",self.config.qiime_classifier_db)
            if not paired_end:
                manifest['absolute-filepath']=[x for x in manifest['absolute-filepath']]

            else:
                manifest['forward-absolute-filepath']=[x for x in manifest['forward-absolute-filepath']]
                manifest['reverse-absolute-filepath']=[x for x in manifest['reverse-absolute-filepath']]

        elif container=="singularity":
            qiime2_bash_str=qiime2_bash_str.splitlines()
            for idx,line in enumerate(qiime2_bash_str):
                line=line.lstrip()
                if line.startswith("qiime") or line.startswith("biom"):
                    qiime2_bash_str[idx]=f"singularity exec --bind  {str(seqs)}:{str(seqs)},$PWD:$PWD,{str(Path(self.config.qiime_classifier_db))}:{str(Path(self.config.qiime_classifier_db))},$SINGULARITY_TMPDIR:/tmp  {self.config.qiime2_singularity_image} " +line
            qiime2_bash_str="\n".join(qiime2_bash_str)
            qiime2_bash_str=qiime2_bash_str.replace("<manifest>",str(manifest_dir))
            qiime2_bash_str=qiime2_bash_str.replace("<qiime2_work_dir>",str(seqs))
            qiime2_bash_str=qiime2_bash_str.replace("<classifier>",str(Path(self.config.qiime_classifier_db)))

        else:
            raise ValueError("Container must be None, singularity or docker")

        if save_manifest:
            manifest.to_csv(os.path.join(manifest_dir,"manifest.tsv"),sep="\t",index=False)
        return qiime2_bash_str,manifest

__init__(config)

In order to instntiate an object from this class, you need to provide a metagenomics configs object from the configs module : configs.Metagenomics. Information for inputs and of each method is then obtained from the corresponding configs object. The following example shows how to instantiate an object from this class using the default configs object:

Examples:

>>> from adtoolbox import core, configs
>>> config=configs.Metagenomics() ### This uses default arguments. Refer to configs module for more information.
>>> metagenomics=core.Metagenomics(config)
>>> assert type(metagenomics)==core.Metagenomics

Parameters:

Name Type Description Default
config configs.Metagenomics

A metagenomics configs object from configs module.

required

Returns:

Type Description
None

None

Source code in adtoolbox/core.py
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
def __init__(self,config:configs.Metagenomics)->None:
    """In order to instntiate an object from this class, you need to provide a metagenomics configs object from the configs module : configs.Metagenomics.
    Information for inputs and of each method is then obtained from the corresponding configs object. The following example shows how to instantiate an object from this class
    using the default configs object:

    Examples:
        >>> from adtoolbox import core, configs
        >>> config=configs.Metagenomics() ### This uses default arguments. Refer to configs module for more information.
        >>> metagenomics=core.Metagenomics(config)
        >>> assert type(metagenomics)==core.Metagenomics

    Args:
        config (configs.Metagenomics): A metagenomics configs object from configs module.

    Returns:
        None
    """
    self.config=config

align_genome_to_protein_db(address, outdir, name, container='None')

This is a function that will align a genome to the Protein Database of the ADToolbox using mmseqs2. If you want to save the scripts, set save to True. Note that the alignment tables will be saved in any case. Note that this function uses mmseqs2 to align the genomes to the protein database. So, to run this function without any container you need to have mmseqs2 installed on your system. However, if you want to run this function with a container, you need to have the container installed on your system. You may select from "None", "docker", "singularity".

Requires

config.genome_alignment_output: The path to the directory where the alignment results will be saved.

config.protein_db: The path to the ADToolbox protein database in fasta.

config.adtoolbox_docker: The name of the docker image to be used by ADToolbox (Only if using Docker as container).

config.adtoolbox_singularity: The name of the singularity image to be used by ADToolbox (Only if using Singularity as container).

Parameters:

Name Type Description Default
address str

The address of the genome fasta file. The file must be in fasta format.

required
run bool

Whether to run the alignment. Defaults to True.

required
save bool

Whether to save the alignment scripts. Defaults to True.

required
container str

The container to use. Defaults to "None". You may select from "None", "docker", "singularity".

'None'

Returns:

Name Type Description
str str

A dictionary containing the alignment files.

str str

The bash script that is used to align the genomes or to be used to align the genomes.

Source code in adtoolbox/core.py
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
def align_genome_to_protein_db(
        self,
        address:str,
        outdir:str,
        name:str,
        container:str="None",
        )->tuple[str,str]:
    """
    This is a function that will align a genome to the Protein Database of the ADToolbox using mmseqs2.
    If you want to save the scripts, set save to True. Note that the alignment tables will be saved in any case.
    Note that this function uses mmseqs2 to align the genomes to the protein database. So, to run this function without
    any container you need to have mmseqs2 installed on your system. However, if you want to run this function with a container,
    you need to have the container installed on your system. You may select from "None", "docker", "singularity".

    Requires:
        config.genome_alignment_output: The path to the directory where the alignment results will be saved.
        ---------
        config.protein_db: The path to the ADToolbox protein database in fasta.
        ---------
        config.adtoolbox_docker: The name of the docker image to be used by ADToolbox (Only if using Docker as container).
        ---------
        config.adtoolbox_singularity: The name of the singularity image to be used by ADToolbox (Only if using Singularity as container).
        ---------
    Args:
        address (str): The address of the genome fasta file. The file must be in fasta format.
        run (bool, optional): Whether to run the alignment. Defaults to True.
        save (bool, optional): Whether to save the alignment scripts. Defaults to True.
        container (str, optional): The container to use. Defaults to "None". You may select from "None", "docker", "singularity".

    Returns:
        str: A dictionary containing the alignment files.
        str: The bash script that is used to align the genomes or to be used to align the genomes.
    """

    if container=="None":
        bash_script = ""
        alignment_file=os.path.join(outdir,"Alignment_Results_mmseq_"+name+".tsv")
        bash_script += "mmseqs easy-search " + \
            address + " " + \
            self.config.protein_db + " " + \
            alignment_file+ ' tmp --format-mode 4 '+"\n\n"

    if container=="docker":
        bash_script = ""
        alignment_file=os.path.join(outdir,"Alignment_Results_mmseq_"+name+".tsv")
        bash_script +="docker run -it "+ \
        " -v "+address+":"+address+ \
        " -v "+self.config.protein_db+":"+self.config.protein_db+ \
        " -v "+outdir+":"+outdir+ \
        f" {self.config.adtoolbox_docker}  mmseqs easy-search " + \
            address + " " + \
            self.config.protein_db + " " + \
            alignment_file+' tmpfiles --format-mode 4 '+"\n\n"

    if container=="singularity":
        bash_script = ""
        alignment_file=os.path.join(outdir,"Alignment_Results_mmseq_"+name+".tsv")
        bash_script +="singularity exec "+ \
        " -B "+address+":"+address+ \
        " -B "+self.config.protein_db+":"+self.config.protein_db+ \
        " -B "+outdir+":"+outdir+ \
        f" {self.config.adtoolbox_singularity}  mmseqs easy-search " + \
            address + " " + \
            self.config.protein_db + " " + \
            alignment_file+' tmpfiles --format-mode 4 '+"\n\n"

    return  bash_script,alignment_file

align_short_reads_to_protein_db(query_seq, alignment_file_name, container='None')

This function aligns shotgun short reads to the protein database of the ADToolbox using mmseqs2. mmseqs wrappers in utils are used to perform this task. The result of this task is an alignment table.

Required Configs

protein_db_mmseqs (str): The address of the existing/to be created protein database of the ADToolbox for mmseqs.

Parameters:

Name Type Description Default
query_seq str

The address of the query sequence.

required
alignment_file_name str

The name of the alignment file.

required
container str

The container to use. Defaults to "None". You may select from "None", "docker", "singularity".

'None'

Returns:

Name Type Description
str str

The bash script that is used to align the genomes or to be used to align the genomes.

str str

The address of the alignment file.

Source code in adtoolbox/core.py
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
def align_short_reads_to_protein_db(self,query_seq:str,
                                    alignment_file_name:str,
                                    container:str="None",
                                    )->tuple[str,str]:
    """This function aligns shotgun short reads to the protein database of the ADToolbox using mmseqs2.
    mmseqs wrappers in utils are used to perform this task. The result of this task is an alignment table.

    Required Configs:

        protein_db_mmseqs (str): The address of the existing/to be created protein database of the ADToolbox for mmseqs.
        --------
    Args:
        query_seq (str): The address of the query sequence.
        alignment_file_name (str): The name of the alignment file.
        container (str, optional): The container to use. Defaults to "None". You may select from "None", "docker", "singularity".


    Returns:
        str: The bash script that is used to align the genomes or to be used to align the genomes.
        str: The address of the alignment file.
    """
    if not pathlib.Path(self.config.protein_db_mmseqs).exists():
        raise FileNotFoundError("""The protein database of the ADToolbox for mmseqs is not found. Please build it first
                                using Database.build_mmseqs_database method.""")
    path_query=pathlib.Path(query_seq)
    script = ""
    script += create_mmseqs_database(query_seq,str(path_query.parent/path_query.name.split(".")[0]),container=container,save=None,run=False)+"\n"
    script += mmseqs_search(
        query_db=str(path_query.parent/path_query.name.split(".")[0]),
        target_db=self.config.protein_db_mmseqs,
        results_db=path_query.parent/alignment_file_name,
        run=False,
        save=None,
        container=container,
    )+"\n"
    script += mmseqs_result_db_to_tsv(
        query_db=str(path_query.parent/path_query.name.split(".")[0]),
        target_db=self.config.protein_db_mmseqs,
        results_db=path_query.parent/alignment_file_name,
        tsv_file=path_query.parent/(alignment_file_name+".tsv"),
        container=container,
        save=None,
        run=False,)+"\n"
    return script,path_query.parent/(alignment_file_name+".tsv")

align_to_gtdb(query_dir, output_dir, container='None')

This function takes the representative sequences of the top k features and generates the script to align these feature sequences to gtdb using VSEARCH. If you intend to run this you either need to have VSEARCH installed or run it with a container option. You can use either the docker or singularity as container options. Otherwise you can use None and run it with the assumption that VSEARCH is installed. If you only want the script and not to run it, set run to False.

Required Configs

config.gtdb_dir_fasta: The path to the gtdb fasta database.

config.vsearch_similarity: The similarity threshold for the alignment to be used by VSEARCH.

config.vsearch_threads: The number of threads to be used by VSEARCH.

config.adtoolbox_docker: The name of the docker image to be used by ADToolbox (Only if using Docker as container).

config.adtoolbox_singularity: The name of the singularity image to be used by ADToolbox (Only if using Singularity as container).

Parameters:

Name Type Description Default
container str

The container to use. Defaults to "None".

'None'

Returns:

Name Type Description
str str

The script that is supposed to be running later.

Source code in adtoolbox/core.py
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
def align_to_gtdb(self,
                  query_dir:str,
                  output_dir:str,
                  container:str="None")->str:
    """This function takes the representative sequences of the top k features and generates the script to
    align these feature sequences to gtdb using VSEARCH. If you intend to run this you either
    need to have VSEARCH installed or run it with a container option. You can use either the docker or singularity
    as container options. Otherwise you can use None and run it with the assumption that VSEARCH is installed.
    If you only want the script and not to run it, set run to False.

    Required Configs:

        ---------
        config.gtdb_dir_fasta: The path to the gtdb fasta database.
        ---------
        config.vsearch_similarity: The similarity threshold for the alignment to be used by VSEARCH.
        ---------
        config.vsearch_threads: The number of threads to be used by VSEARCH.
        ---------
        config.adtoolbox_docker: The name of the docker image to be used by ADToolbox (Only if using Docker as container).
        ---------
        config.adtoolbox_singularity: The name of the singularity image to be used by ADToolbox (Only if using Singularity as container).
        ---------

    Args:
        container (str, optional): The container to use. Defaults to "None".

    Returns:
        str: The script that is supposed to be running later.
    """
    ### Load all the required files
    alignment_dir = os.path.join(output_dir,'Alignments')
    match_table=os.path.join(output_dir,'matches.blast')
    gtdb_dir_fasta=self.config.gtdb_dir_fasta
    ### End Loading
    query=query_dir
    dirs=[output_dir,
        gtdb_dir_fasta,
        query
        ]
    for dir in dirs:
        if not pathlib.Path(dir).exists():
            os.mkdir(dir)
    if container=="None":
        bash_script=('vsearch --top_hits_only --blast6out '+
                    match_table+
                    ' --usearch_global '+ query +
                    ' --db '+ gtdb_dir_fasta +
                    ' --id ' +str(self.config.vsearch_similarity) +
                    ' --threads '+str(self.config.vsearch_threads)+
                    ' --alnout '+ alignment_dir +
                    ' --top_hits_only'+'\n')

    if container=="docker":
        bash_script='docker run'
        for dir in dirs:
            bash_script+=('-v '+dir+':'+dir+' ')

        bash_script += (self.config.adtoolbox_docker+' vsearch --top_hits_only --blast6out '+
                    match_table+
                    ' --usearch_global '+ query +
                    ' --db '+ gtdb_dir_fasta +
                    ' --id ' +str(self.config.vsearch_similarity) +
                    ' --threads '+str(self.config.vsearch_threads)+
                    ' --alnout '+ alignment_dir +
                    ' --top_hits_only'+'\n')

    if container=="singularity":
        bash_script='singularity exec '
        for dir in dirs:
            bash_script+=('-B '+str(dir)+':'+str(dir)+' ')

        bash_script += (self.config.adtoolbox_singularity+' vsearch --top_hits_only --blast6out '+
                    match_table+
                    ' --usearch_global '+ str(query) +
                    ' --db '+ gtdb_dir_fasta +
                    ' --id ' +str(self.config.vsearch_similarity) +
                    ' --threads '+str(self.config.vsearch_threads)+
                    ' --alnout '+ alignment_dir +
                    ' --top_hits_only'+'\n')
    return bash_script,

assign_ec_to_genome(alignment_file)

This function takes an alignment file and assigns the EC numbers to the genomes based on the alignment file, and the e-adm groupings of the EC numbers. The output is a dictionary where the keys e-adm reactions and the values are the EC numbers, that are found in the genome and are grouped under the e-adm reaction.

Parameters:

Name Type Description Default
alignment_file str

The address of the alignment file.

required

Returns:

Name Type Description
dict dict

A dictionary containing the e-adm reactions and the EC numbers that are found in the genome and are grouped under the e-adm reaction.

Source code in adtoolbox/core.py
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
def assign_ec_to_genome(self,alignment_file:str)->dict:
    """
    This function takes an alignment file and assigns the EC numbers to the genomes based on the alignment file,
    and the e-adm groupings of the EC numbers. The output is a dictionary where the keys e-adm reactions and the values are the EC numbers,
    that are found in the genome and are grouped under the e-adm reaction.

    Args:
        alignment_file (str): The address of the alignment file.

    Returns:
        dict: A dictionary containing the e-adm reactions and the EC numbers that are found in the genome and are grouped under the e-adm reaction.
    """

    aligntable = pd.read_table(alignment_file,delimiter="\t")
    aligntable = aligntable[(aligntable["bits"]>self.config.bit_score) & (aligntable["evalue"]<self.config.e_value)]

    ec_align_list = aligntable["target"].str.split("|",expand=True)
    ec_align_list = list(ec_align_list[1].unique()) 

    metadatatable = pd.read_table(self.config.csv_reaction_db, sep=',').drop_duplicates("EC_Numbers")[(['EC_Numbers','Modified_ADM_Reactions'])].dropna(axis=0)
    metadatatable=metadatatable[metadatatable["EC_Numbers"].isin(ec_align_list)]
    adm_reactions=list(set(metadatatable["Modified_ADM_Reactions"].str.split("|").sum()))
    adm_to_ecs={}
    for reaction in adm_reactions:
        adm_to_ecs[reaction]=list(metadatatable[metadatatable["Modified_ADM_Reactions"].str.contains(reaction)]["EC_Numbers"])

    return adm_to_ecs

calculate_group_abundances(elements_feature_abundances, rel_abund)

This method is defined to calculate the features for each sample given: 1) The relative abundances of the genomes in each sample: - In this dictionary the keys are the sample names and the values are dictionaries where the keys are the genome names and the values are the relative abundances of the genomes in the sample. 2) The relative abundances of the elements in each genome. - In this dictionary the keys are the genome names and the values are dictionaries where the keys are the element names and the values are the relative abundances of the elements in the genome.

Required Configs

None

Parameters:

Name Type Description Default
elements_feature_abundances dict[str, dict]

A dictionary containing the relative abundances of the elements in each genome.

required
rel_abund dict[str, dict]

A dictionary containing the relative abundances of the genomes in each sample.

required

Returns:

Type Description
dict[str, dict[str, float]]

dict[str,dict[str,float]]: A dictionary containing the relative abundances of the elements in each sample.

Source code in adtoolbox/core.py
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
def calculate_group_abundances(self,elements_feature_abundances:dict[str,dict],rel_abund:dict[str,dict])->dict[str,dict[str,float]]:
    """
    This method is defined to calculate the features for each sample given:
    1) The relative abundances of the genomes in each sample:
        - In this dictionary the keys are the sample names and the values are dictionaries where the keys are the genome names and the values are the relative abundances of the genomes in the sample.
    2) The relative abundances of the elements in each genome.
        - In this dictionary the keys are the genome names and the values are dictionaries where the keys are the element names and the values are the relative abundances of the elements in the genome.

    Required Configs:
        None

    Args:
        elements_feature_abundances (dict[str,dict]): A dictionary containing the relative abundances of the elements in each genome.
        rel_abund (dict[str,dict]): A dictionary containing the relative abundances of the genomes in each sample.

    Returns:
        dict[str,dict[str,float]]: A dictionary containing the relative abundances of the elements in each sample.
    """
    out={}
    df=pd.DataFrame(elements_feature_abundances).T.fillna(0)
    for sample,abunds in rel_abund.items():
        out[sample]=scaler(pd.DataFrame(df.loc[abunds.keys(),:].multiply(list(abunds.values()),axis=0).sum(axis=0)).T).to_dict(orient="records")[0]
    return out

download_genome(identifier, output_dir, container='None')

This function downloads the genomes from NCBI using the refseq/genbank identifiers. Note that this function uses rsync to download the genomes.

Required Configs

config.genomes_base_dir: The path to the base directory where the genomes will be saved.

config.adtoolbox_docker: The name of the docker image to be used by ADToolbox (Only if using Docker as container).

config.adtoolbox_singularity: The name of the singularity image to be used by ADToolbox (Only if using Singularity as container).

Parameters:

Name Type Description Default
identifier list[str]

The list of identifiers for the genomes. It can be either refseq or genbank.

required
container str

The container to use. Defaults to "None". You may select from "None", "docker", "singularity".

'None'

Returns:

Name Type Description
str str

The bash script that is used to download the genomes or to be used to download the genomes.

Source code in adtoolbox/core.py
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
def download_genome(self,identifier:str,output_dir:str,container:str="None")-> str:
    """This function downloads the genomes from NCBI using the refseq/genbank identifiers.
    Note that this function uses rsync to download the genomes. 

    Required Configs:
        config.genomes_base_dir: The path to the base directory where the genomes will be saved.
        ---------
        config.adtoolbox_docker: The name of the docker image to be used by ADToolbox (Only if using Docker as container).
        ---------
        config.adtoolbox_singularity: The name of the singularity image to be used by ADToolbox (Only if using Singularity as container).
        ---------
    Args:
        identifier list[str]: The list of identifiers for the genomes. It can be either refseq or genbank.
        container (str, optional): The container to use. Defaults to "None". You may select from "None", "docker", "singularity".

    Returns:
        str: The bash script that is used to download the genomes or to be used to download the genomes.

    """
    base_ncbi_dir = 'rsync://ftp.ncbi.nlm.nih.gov/genomes/all/'
    bash_script=""

    specific_ncbi_dir = identifier[0:3]+'/'+\
                        identifier[3:6]+'/'+\
                        identifier[6:9]+'/'+\
                        identifier[9:].split('.')[0]

    genome_dir=pathlib.Path(output_dir)

    if container=="None":
        bash_script+=('rsync -avz --progress '+base_ncbi_dir+specific_ncbi_dir+' '+str(genome_dir))

    if container=="docker":
        bash_script+=('docker run -it -v '+str(genome_dir.parent)+':'+str(genome_dir.parent)+ f' {self.config.adtoolbox_docker} rsync -avz --progress '+' '+base_ncbi_dir+specific_ncbi_dir+' '+str(genome_dir))

    if container=="singularity":
        bash_script+=('singularity exec -B '+str(genome_dir.parent)+':'+str(genome_dir.parent)+ f' {self.config.adtoolbox_singularity} rsync -avz --progress '+' '+base_ncbi_dir+specific_ncbi_dir+' '+str(genome_dir))

    return bash_script,

extract_ec_from_alignment(alignment_file)

This function extracts the number of times an EC number is found in the alignment file when aligned to ADToolbox protein database.

Required Configs

config.e_value: The e-value threshold for the filtering the alignment table.

config.bit_score: The bit score threshold for the filtering the alignment table.

config.ec_counts_from_alignment: The address of the json file that the results will be saved in.

Parameters:

Name Type Description Default
alignment_file str

The address of the alignment file.

required

Returns:

Name Type Description
dict dict[str, int]

A dictionary of EC numbers and their counts.

Source code in adtoolbox/core.py
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
def extract_ec_from_alignment(self,alignment_file:str)->dict[str,int]:
    """
    This function extracts the number of times an EC number is found in the alignment file when aligned to ADToolbox protein database.

    Required Configs:
        config.e_value: The e-value threshold for the filtering the alignment table.
        ---------
        config.bit_score: The bit score threshold for the filtering the alignment table.
        ---------
        config.ec_counts_from_alignment: The address of the json file that the results will be saved in.
        ---------
    Args:
        alignment_file (str): The address of the alignment file.

    Returns:
        dict: A dictionary of EC numbers and their counts.

    """
    alignment_table = pd.read_table(alignment_file,sep='\t')
    alignment_table = alignment_table[(alignment_table['evalue']<self.config.e_value)&(alignment_table['bits']>self.config.bit_score)]
    alignment_table["target"]=alignment_table["target"].apply(lambda x:x.split("|")[1])
    ec_counts=alignment_table["target"].value_counts().to_dict()
    return ec_counts

extract_genome_info(endpattern='genomic.fna.gz', filters={'INCLUDE': [], 'EXCLUDE': ['cds', 'rna']})

This function extracts the genome information from the genomes base directory. The output is a dictionary where the keys are the genome IDs and the values are the paths to the genome files.

Required Configs

config.genomes_base_dir: The path to the base directory where the genomes are saved.

Parameters:

Name Type Description Default
genome_info dict[str, str]

A dictionary containing the genome information.

required
endpattern str

The end pattern of the genome files. Defaults to "genomic.fna.gz".

'genomic.fna.gz'
filters dict

The filters to be applied to the genome files. This filter must be a

{'INCLUDE': [], 'EXCLUDE': ['cds', 'rna']}
dictionary with two keys

INCLUDE and EXCLUDE. The values of these keys must be lists of strings.

required
Defaults to {"INCLUDE"

[],"EXCLUDE":["cds","rna"]}. This defult is compatible with the genomes downloaded

required

Returns:

Type Description
dict[str, str]

dict[str,str]: A dictionary containing the address of the genomes that are downloaded or to be downloaded.

Source code in adtoolbox/core.py
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
def extract_genome_info(self,
                        endpattern:str="genomic.fna.gz",
                        filters:dict={
                                      "INCLUDE":[],
                                      "EXCLUDE":["cds","rna"],
                                        })->dict[str,str]:
    """This function extracts the genome information from the genomes base directory. The output
    is a dictionary where the keys are the genome IDs and the values are the paths to the genome files.

    Required Configs:
        config.genomes_base_dir: The path to the base directory where the genomes are saved.
        ---------
    Args:
        genome_info (dict[str,str]): A dictionary containing the genome information.
        endpattern (str, optional): The end pattern of the genome files. Defaults to "genomic.fna.gz".
        filters (dict, optional): The filters to be applied to the genome files. This filter must be a 
        dictionary with two keys: INCLUDE and EXCLUDE. The values of these keys must be lists of strings.
        Defaults to {"INCLUDE":[],"EXCLUDE":["cds","rna"]}. This defult is compatible with the genomes downloaded
        from NCBI i.e. only change this if you are providing your own genomes with different file name conventions.
    Returns:
        dict[str,str]: A dictionary containing the address of the genomes that are downloaded or to be downloaded.
    """
    base_dir = pathlib.Path(self.config.genomes_base_dir)
    genome_info = {}
    for genome_dir in base_dir.iterdir():
        if genome_dir.is_dir():
            candids=list(genome_dir.rglob(f'*{endpattern}'))
            for candid in candids:
                if all([i in candid.name for i in filters["INCLUDE"]]) and all([i not in candid.name for i in filters["EXCLUDE"]]):
                    genome_info[genome_dir.name]=str(candid.absolute())           
    return genome_info

extract_relative_abundances(feature_table_dir, sample_names=None, top_k=-1)

This method extracts the relative abundances of the features in each sample from the feature table. The feature table must follow the qiime2 feature-table format. NOTE: The final feature abundances sum to 1 for each sample.

Required Configs

None

Parameters:

Name Type Description Default
feature_table_dir str

The path to the feature table.

required
sample_names Union[list[str], None]

The list of sample names. to be considered. If None, all the samples will be considered. Defaults to None.

None
top_k int

The number of top features to be used. If -1, all the features will be used. Defaults to -1.

-1

Returns:

Name Type Description
dict dict

A dictionary containing the relative abundances of the features in each sample.

Source code in adtoolbox/core.py
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
def extract_relative_abundances(self,feature_table_dir:str,sample_names:Union[list[str],None]=None,top_k:int=-1)->dict:

    """
    This method extracts the relative abundances of the features in each sample from the feature table. The feature table must follow the qiime2 feature-table format.
    NOTE: The final feature abundances sum to 1 for each sample.
    Required Configs:
        None
    Args:
        feature_table_dir (str): The path to the feature table.
        sample_names (Union[list[str],None], optional): The list of sample names. to be considered. If None, all the samples will be considered. Defaults to None.
        top_k (int, optional): The number of top features to be used. If -1, all the features will be used. Defaults to -1.

    Returns:
        dict: A dictionary containing the relative abundances of the features in each sample.
    """
    feature_table = pd.read_table(feature_table_dir,sep='\t',skiprows=1)
    if sample_names is None:
        sample_names = feature_table.columns[1:]
    relative_abundances={sample:[] for sample in sample_names}
    if top_k == -1:
        top_k = feature_table.shape[0]
    for sample in sample_names:
        relative_abundances[sample]=(feature_table.sort_values(sample,ascending=False).head(top_k)[sample]/(feature_table.sort_values(sample,ascending=False).head(top_k)[sample].sum())).to_dict()
    return relative_abundances

find_top_taxa(sample_name, treshold, mode='top_k')

This function needs three inputs from qiime: 1. feature table: This is the abundance of each feature in each sample (TSV). 2. taxonomy table: This is the taxonomy of each feature (TSV). 3. rep seqs: This is the representative sequence of each feature (fasta). It then finds the top k features or features that form specific percentile of the community of the sample.

Required Configs

config.feature_table_dir: The path to the feature table tsv file.

config.taxonomy_table_dir: The path to the taxonomy table tsv file.

config.rep_seq_fasta: The path to the representative sequence fasta file.

Parameters:

Name Type Description Default
sample_name str

The name of the sample.

required
threshold int, float

The threshold for the top k or the percentile.

required
mode str

Whether to find the top k features or features that form specific percentile of the community of the sample. Defaults to 'top_k'. Options: 'top_k', 'percentile'.

'top_k'

Returns:

Name Type Description
dict dict

A dictionary of the top k features and their taxonomy.

Source code in adtoolbox/core.py
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
def find_top_taxa(
    self,
    sample_name:str,
    treshold:Union[int,float],
    mode:str='top_k',
    )->dict:
    """
    This function needs three inputs from qiime:
    1. feature table: This is the abundance of each feature in each sample (TSV).
    2. taxonomy table: This is the taxonomy of each feature (TSV). 
    3. rep seqs: This is the representative sequence of each feature (fasta).
    It then finds the top k features or features that form specific percentile of the community of the sample.

    Required Configs:

        config.feature_table_dir: The path to the feature table tsv file.
        ---------
        config.taxonomy_table_dir: The path to the taxonomy table tsv file.
        ---------
        config.rep_seq_fasta: The path to the representative sequence fasta file.
        ---------

    Args:
        sample_name (str): The name of the sample.
        threshold (int, float): The threshold for the top k or the percentile.
        mode (str, optional): Whether to find the top k features or features that form specific percentile of the community of the sample. Defaults to 'top_k'. Options: 'top_k', 'percentile'.

    Returns:
        dict: A dictionary of the top k features and their taxonomy.
    """
    ### Load all the required files
    feature_table = pd.read_table(self.config.feature_table_dir, sep='\t',skiprows=1)
    taxonomy_table = pd.read_table(self.config.taxonomy_table_dir, delimiter='\t')
    repseqs=fasta_to_dict(self.config.rep_seq_fasta)
    ### End Loading
    if mode == 'top_k':
        sorted_df=feature_table.sort_values(sample_name, ascending=False)
        top_featureids=list(sorted_df['#OTU ID'].head(treshold))
        top_taxa=[taxonomy_table[taxonomy_table['Feature ID']==featureid]['Taxon'].values[0] for featureid in top_featureids]
        top_repseqs=[repseqs[featureid] for featureid in top_featureids]
        top_abundances=list(sorted_df[sample_name].head(treshold)/sorted_df[sample_name].sum())

    elif mode == 'percentile':
        feature_table[sample_name]=feature_table[sample_name]/feature_table[sample_name].sum()
        sorted_df=feature_table.sort_values(sample_name, ascending=False)
        sorted_df['cumsum']=sorted_df[sample_name].cumsum()*100
        sorted_df_filtered=sorted_df[sorted_df['cumsum']<=treshold]
        top_featureids=list(sorted_df_filtered['#OTU ID'])
        top_taxa=[taxonomy_table[taxonomy_table['Feature ID']==featureid]['Taxon'].values[0] for featureid in top_featureids]
        top_repseqs=[repseqs[featureid] for featureid in top_featureids]
        top_abundances=sorted_df.loc[sorted_df_filtered.index][sample_name].values.tolist()
    else:
        raise ValueError("mode must be either 'top_k' or 'percentile'")

    return {'top_featureids':top_featureids,'top_taxa':top_taxa,'top_repseqs':top_repseqs,'top_abundances':top_abundances}    

get_cod_from_ec_counts(ec_counts)

This function takes a json file that comtains ec counts and converts it to ADM microbial agents counts.

Required Configs

config.adm_mapping : A dictionary that maps ADM reactions to ADM microbial agents.

config.csv_reaction_db : The address of the reaction database of ADToolbox.

config.adm_cod_from_ec : The address of the json file that the results will be saved in.

Parameters:

Name Type Description Default
ec_counts dict

A dictionary containing the counts for each ec number.

required

Returns:

Name Type Description
dict dict

A dictionary containing the ADM microbial agents counts.

Source code in adtoolbox/core.py
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
def get_cod_from_ec_counts(self,ec_counts:dict)->dict:
    """This function takes a json file that comtains ec counts and converts it to ADM microbial agents counts.
    Required Configs:
        config.adm_mapping : A dictionary that maps ADM reactions to ADM microbial agents.
        ---------
        config.csv_reaction_db : The address of the reaction database of ADToolbox.
        ---------
        config.adm_cod_from_ec  : The address of the json file that the results will be saved in.
        ---------
    Args:
        ec_counts (dict): A dictionary containing the counts for each ec number.  
    Returns:
        dict: A dictionary containing the ADM microbial agents counts.
    """
    reaction_db = pd.read_table(self.config.csv_reaction_db, sep=',').drop_duplicates("EC_Numbers")
    reaction_db.set_index("EC_Numbers",inplace=True)
    adm_reactions_agents = {k:0 for k in self.config.adm_mapping.keys()}
    for ec in ec_counts.keys():
        l=reaction_db.loc[ec,"e_adm_Reactions"].split("|")
        for adm_rxn in l: 
            adm_reactions_agents[adm_rxn]+=ec_counts[ec]
    adm_microbial_agents={}
    for k,v in self.config.adm_mapping.items():
        adm_microbial_agents[v]=adm_reactions_agents[k]
    return adm_microbial_agents

get_genomes_from_gtdb_alignment(alignment_dir)

This function takes the alignment file generated from the align_to_gtdb function and generates the the genome information using the GTDB-Tk. In the outputted dictionary, the keys are feature ids and the values are the representative genomes.

Required Configs

config.align_to_gtdb_outputs_dir: The path to the directory where the outputs of the align_to_gtdb function are saved.

config.feature_to_taxa: The path to the json file where the json file including feature ids and the representative genomes will be saved.

Parameters:

Name Type Description Default
save bool

Whether to save the json file or not. Defaults to True.

required
Source code in adtoolbox/core.py
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
def get_genomes_from_gtdb_alignment(self,alignment_dir:str)->dict:
    """This function takes the alignment file generated from the align_to_gtdb function and generates the the genome information
    using the GTDB-Tk. In the outputted dictionary, the keys are feature ids and the values are the representative genomes.

    Required Configs:
        config.align_to_gtdb_outputs_dir: The path to the directory where the outputs of the align_to_gtdb function are saved.
        ---------
        config.feature_to_taxa: The path to the json file where the json file including feature ids and the representative genomes will be saved.

    Args:
        save (bool, optional): Whether to save the json file or not. Defaults to True.
    """
    matches = os.path.join(alignment_dir,'matches.blast')
    aligned=pd.read_table(matches,header=None,delimiter='\t')
    aligned.drop_duplicates(0,inplace=True)
    aligned[1]=aligned[1].apply(lambda x: ("".join(x.split('_')[1:])).split("~")[0])
    alignment_dict=dict(zip(aligned[0],aligned[1]))


    return alignment_dict

run_qiime2_from_sra(read_1, read_2, sample_name=None, manifest_dir=None, workings_dir=None, save_manifest=True, container='None')

This method uses the input fastq files to run qiime2. The method uses the qiime2 template scripts that are provided in pkg_data module. The method also creates a manifest file for qiime2. The manifest file is created based on the input fastq files.

Required Configs

config.qiime2_single_end_bash_str: The path to the qiime2 bash script for single end reads.

config.qiime2_paired_end_bash_str: The path to the qiime2 bash script for paired end reads.

config.qiime_classifier_db: The path to the qiime2 classifier database.

config.qiime2_docker_image: The name of the docker image to be used by ADToolbox (Only if using Docker as container).

config.qiime2_singularity_image: The name of the singularity image to be used by ADToolbox (Only if using Singularity as container).

Parameters:

Name Type Description Default
read_1 str

directory of the forward reads file

required
read_2 str

directory of the reverse reads file. This is provided only if the reads are paired end. If this is not the case,

required
sample_name str

The name of the sample. If None, the name of the sample will be the name of the directory where the fastq files are located. Defaults to None.

None
manifest_dir str

The directory where the manifest file will be saved. If None, the manifest file will be saved in the same directory as the fastq files. Defaults to None.

None
workings_dir str

The directory where the qiime2 outputs will be saved. If None, the outputs will be saved in the same directory as the fastq files. Defaults to None.

None
container str

If you want to run the qiime2 commands in a container, specify the container name here. Defaults to 'None'.

'None'

Returns:

Name Type Description
qiime2_bash_str str

The bash script that will be used to run qiime2 in python string format

manifest dict

The manifest file that will be used to run qiime2 in python dictionary format

Source code in adtoolbox/core.py
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
def run_qiime2_from_sra(self,
                        read_1:str,
                        read_2:str|None,
                        sample_name:str|None=None,
                        manifest_dir:str|None=None,
                        workings_dir:str|None=None,
                        save_manifest:bool=True,
                        container:str='None') -> tuple[str,str]:
    """
    This method uses the input fastq files to run qiime2. The method uses the qiime2 template scripts that are provided in pkg_data module.
    The method also creates a manifest file for qiime2. The manifest file is created based on the input fastq files.
    Required Configs:
        config.qiime2_single_end_bash_str: The path to the qiime2 bash script for single end reads.
        ---------
        config.qiime2_paired_end_bash_str: The path to the qiime2 bash script for paired end reads.
        ---------
        config.qiime_classifier_db: The path to the qiime2 classifier database.
        ---------
        config.qiime2_docker_image: The name of the docker image to be used by ADToolbox (Only if using Docker as container).
        ---------
        config.qiime2_singularity_image: The name of the singularity image to be used by ADToolbox (Only if using Singularity as container).
        ---------
    Args:
        read_1 (str): directory of the forward reads file
        read_2 (str): directory of the reverse reads file. This is provided only if the reads are paired end. If this is not the case,
        sample_name (str, optional): The name of the sample. If None, the name of the sample will be the name of the directory where the fastq files are located. Defaults to None.
        manifest_dir (str, optional): The directory where the manifest file will be saved. If None, the manifest file will be saved in the same directory as the fastq files. Defaults to None.
        workings_dir (str, optional): The directory where the qiime2 outputs will be saved. If None, the outputs will be saved in the same directory as the fastq files. Defaults to None.
        container (str, optional): If you want to run the qiime2 commands in a container, specify the container name here. Defaults to 'None'.
    Returns:
        qiime2_bash_str (str): The bash script that will be used to run qiime2 in python string format
        manifest (dict): The manifest file that will be used to run qiime2 in python dictionary format


    """

    if sample_name is None:
        sample_name=str(pathlib.Path(read_1).parent.name)
    if manifest_dir is None:
        manifest_dir=pathlib.Path(read_1).parent
    else:
        manifest_dir=pathlib.Path(manifest_dir)

    if workings_dir is None:
        workings_dir=pathlib.Path(read_1).parent
    else:
        workings_dir=pathlib.Path(workings_dir)


    manifest_single={'sample-id':[],'absolute-filepath':[]}
    manifest_paired={'sample-id':[],'forward-absolute-filepath':[],'reverse-absolute-filepath':[]}  
    if read_2 is not None:
        manifest_paired['sample-id'].append(sample_name)
        manifest_paired['forward-absolute-filepath'].append(read_1)
        manifest_paired['reverse-absolute-filepath'].append(read_2)
        paired_end=True
    else:
        manifest_single['sample-id'].append(sample_name)
        manifest_single['absolute-filepath'].append(read_1)
        paired_end=False

    manifest=pd.DataFrame(manifest_single) if not paired_end else pd.DataFrame(manifest_paired)

    if paired_end:
        with open(self.config.qiime2_paired_end_bash_str,"r") as f:
            qiime2_bash_str=f.read()
    else:
        with open(self.config.qiime2_single_end_bash_str,"r") as f:
            qiime2_bash_str=f.read()

    if container=="None":
        qiime2_bash_str=qiime2_bash_str.replace("<manifest>",str(manifest_dir))
        qiime2_bash_str=qiime2_bash_str.replace("<qiime2_work_dir>",str(workings_dir))
        qiime2_bash_str=qiime2_bash_str.replace("<classifier>",str(self.config.qiime_classifier_db))

    elif container=="docker":
        qiime2_bash_str=qiime2_bash_str.splitlines()
        for idx,line in enumerate(qiime2_bash_str):
            line=line.lstrip()
            if line.startswith("qiime") or line.startswith("biom"):
                if not paired_end:
                    pec=""
                else:
                    pec="-v "+read_2+":"+read_2+" "
                qiime2_bash_str[idx]=f"docker run --env TMPDIR=/data/tmp -v {str(manifest_dir)}:{str(manifest_dir)} -v {read_1}:{read_1} -v {read_2}:{read_2} {pec} -v {self.config.qiime_classifier_db}:{self.config.qiime_classifier_db} -w /data  {self.config.qiime2_docker_image}"+" "+line
        qiime2_bash_str="\n".join(qiime2_bash_str)
        qiime2_bash_str=qiime2_bash_str.replace("<manifest>",os.path.join(str(manifest_dir),"manifest.tsv"))
        qiime2_bash_str=qiime2_bash_str.replace("<qiime2_work_dir>",str(workings_dir))
        qiime2_bash_str=qiime2_bash_str.replace("<classifier>",self.config.qiime_classifier_db)
        if not paired_end:
            manifest['absolute-filepath']=[x for x in manifest['absolute-filepath']]

        else:
            manifest['forward-absolute-filepath']=[x for x in manifest['forward-absolute-filepath']]
            manifest['reverse-absolute-filepath']=[x for x in manifest['reverse-absolute-filepath']]

    elif container=="singularity":
        qiime2_bash_str=qiime2_bash_str.splitlines()
        for idx,line in enumerate(qiime2_bash_str):
            line=line.lstrip()
            if line.startswith("qiime") or line.startswith("biom"):
                qiime2_bash_str[idx]=f"singularity exec --bind  {str(seqs)}:{str(seqs)},$PWD:$PWD,{str(Path(self.config.qiime_classifier_db))}:{str(Path(self.config.qiime_classifier_db))},$SINGULARITY_TMPDIR:/tmp  {self.config.qiime2_singularity_image} " +line
        qiime2_bash_str="\n".join(qiime2_bash_str)
        qiime2_bash_str=qiime2_bash_str.replace("<manifest>",str(manifest_dir))
        qiime2_bash_str=qiime2_bash_str.replace("<qiime2_work_dir>",str(seqs))
        qiime2_bash_str=qiime2_bash_str.replace("<classifier>",str(Path(self.config.qiime_classifier_db)))

    else:
        raise ValueError("Container must be None, singularity or docker")

    if save_manifest:
        manifest.to_csv(os.path.join(manifest_dir,"manifest.tsv"),sep="\t",index=False)
    return qiime2_bash_str,manifest

seqs_from_sra(accession, target_dir, container='None')

This method downloads the fastq files from the SRA database using the accession number (ONLY SAMPLE ACCESSION AND NOT PROJECT ACCESSION) of the project or run. The method uses the fasterq-dump tool to download the fastq files. This method also extracts the sample metadata from the SRA database for future use.

NOTE In order for this method to work without any container, you need to have the SRA toolkit installed on your system or

at least have prefetch and fasterq-dump installed on your system. For more information on how to install the SRA toolkit, please refer to the following link: https://github.com/ncbi/sra-tools

Required Configs

None

Parameters:

Name Type Description Default
accession str

The accession number of the SRA project or run

required
target_dir str

The directory where the fastq files will be downloaded

required
container str

The containerization tool that will be used to run the bash scripts. Defaults to "None". Options are "None","docker","singularity"

'None'

Returns:

Name Type Description
prefetch_script str

The bash script that will be used to download the SRA files in python string format

sample_metadata dict

A dictionary that contains the sample metadata

Source code in adtoolbox/core.py
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
def seqs_from_sra(self,accession:str,target_dir:str,container:str="None")-> tuple[str,dict]:
    """ 
    This method downloads the fastq files from the SRA database using the accession number (ONLY SAMPLE ACCESSION AND NOT PROJECT ACCESSION) of the project or run.
    The method uses the fasterq-dump tool to download the fastq files. This method also extracts the sample metadata from the SRA database for future use.
    #NOTE In order for this method to work without any container, you need to have the SRA toolkit installed on your system or
    at least have prefetch and fasterq-dump installed on your system. For more information on how to install the SRA toolkit, please refer to the following link:
    https://github.com/ncbi/sra-tools

    Required Configs:
        None


    Args:
        accession (str): The accession number of the SRA project or run
        target_dir (str): The directory where the fastq files will be downloaded
        container (str, optional): The containerization tool that will be used to run the bash scripts. Defaults to "None". Options are "None","docker","singularity"

    Returns:
        prefetch_script (str): The bash script that will be used to download the SRA files in python string format
        sample_metadata (dict): A dictionary that contains the sample metadata

    """   
    if container=="None":
        prefetch_script=f"""#!/bin/bash\nprefetch {accession} -O {target_dir}"""
        acc_folder=pathlib.Path(target_dir)/accession
        fasterq_dump_script=""
        sra_file=acc_folder/(accession+".sra")
        fasterq_dump_script+=f"\nfasterq-dump {sra_file} -O {acc_folder} --split-files"
        fasterq_dump_script+=f"\nrm {sra_file}"

        prefetch_script+=fasterq_dump_script


    elif container=="docker":
        warn("Docker is not supported yet")

    sample_metadata=utils.get_sample_metadata_from_accession(accession)      


    return prefetch_script,sample_metadata     

adm

Here is a schematic view of adm API:

adm

You can access this module by:

from adtoolbox import adm 

This module includes the following classes:

Model

Any kinetic model could be an instance of this class.

Parameters:

Name Type Description Default
model_parameters dict

a dictionary which contains model parameters

required
base_parameters dict

a dictionary which contains base paramters

required
initial_conditions dict

a dictionary containing inlet conditions for all species

required
inlet_conditions dict

a dictionary containing inlet conditions for all species

required
feed Feed

a Feed instance which contains the feed information

required
reactions list

a list containing all types of reactions

required
species list

a list containing all species

required
ode_system Callable

a callable which outputs the ODE system compatible with Scipy.integrate.solve_ivp

required
build_stoichiometric_matrix(Callable)

a callable which builds the stoichiometric matrix

required
control_state dict

a dictionary containing the states that are desired to be constant. Defaults to {}.

{}

Returns:

Name Type Description
Model

returns a model instance for downstream purposes.

Source code in adtoolbox/adm.py
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
class Model:

    """Any kinetic model could be an instance of this class.
    Args:
        model_parameters (dict): a dictionary which contains model parameters
        base_parameters (dict): a dictionary which contains base paramters
        initial_conditions (dict): a dictionary containing inlet conditions for all species
        inlet_conditions (dict): a dictionary containing inlet conditions for all species
        feed (Feed): a Feed instance which contains the feed information
        reactions (list): a list containing all types of reactions
        species (list): a list containing all species
        ode_system (Callable): a callable which outputs the ODE system compatible with Scipy.integrate.solve_ivp
        build_stoichiometric_matrix(Callable): a callable which builds the stoichiometric matrix
        control_state (dict, optional): a dictionary containing the states that are desired to be constant. Defaults to {}.



    Returns:
        Model: returns a model instance for downstream purposes.
    """
    def __init__(self, 
                 model_parameters: dict,
                 base_parameters: dict,
                 initial_conditions: dict,
                 inlet_conditions:dict,
                 feed:Feed,
                 reactions: list, 
                 species: list, 
                 ode_system:Callable, 
                 build_stoichiometric_matrix:Callable,
                 control_state:dict={},
                 name:str="ADM", 
                 switch:str="DAE",
                 simulation_time:float=30,
                 time_limit:float=-1):

        self.model_parameters = model_parameters
        self.base_parameters = base_parameters
        self.feed=feed
        for items in control_state.keys():
            initial_conditions[items]
            initial_conditions[items]=control_state[items]
        self.control_state=control_state
        self.inlet_conditions = np.array(
            [inlet_conditions[i+"_in"] for i in species])[:, np.newaxis]
        self.reactions = reactions
        self.species = species
        self.initial_conditions = np.array(
            [initial_conditions[i] for i in species])[:, np.newaxis]
        self._ic=initial_conditions
        self._inc=inlet_conditions
        self.switch = switch
        self.name = name
        self.build_stoichiometric_matrix = build_stoichiometric_matrix
        self.ode_system = ode_system
        self.sim_time=simulation_time
        self.time_limit=time_limit
        self.nitrogen_limited=False

    @property
    def s(self):
        """Returns the stoichiometric matrix of a model"""
        return self.build_stoichiometric_matrix(
            base_parameters=self.base_parameters,model_parameters= self.model_parameters,reactions= self.reactions,species= self.species,feed=self.feed, nitrogen_limited=self.nitrogen_limited)

    def update_parameters(self, 
                        model_parameters: dict|None=None,
                        base_parameters:  dict|None=None,
                        initial_conditions: dict|None=None,
                        inlet_conditions: dict|None=None)->None:
        """
        This method updates the parameters of the model. Each argument can be a dictionary containing the parameters to be updated.
        NOTE: It is important to note that you have to separate different kind parameters.
        Args:
            model_parameters (dict): a dictionary which contains the model parameters to be updated as keys and their values as values.
            base_parameters (dict): a dictionary which contains the base parameters to be updated as keys and their values as values.
            initial_conditions (dict): a dictionary containing the initial conditions to be updated as keys and their values as values.
            inlet_conditions (dict): a dictionary containing the inlet conditions to be updated as keys and their values as values.

        Returns:
            None: This method does not return anything.
        """
        if model_parameters is not None:
            self.model_parameters.update(model_parameters)
        if base_parameters is not None:
            self.base_parameters.update(base_parameters)
        if initial_conditions is not None:
            for k,v in initial_conditions.items():
                self.initial_conditions[self.species.index(k)]=v
        if inlet_conditions is not None:
            for k,v in inlet_conditions.items():
                self.inlet_conditions[self.species.index(k)]=v




    def solve_model(self, t_eval: np.ndarray, method="BDF")->scipy.integrate._ivp.ivp.OdeResult:
        """
        Function to solve the model. 
        Examples:
            >>> import numpy as np
            >>> reactions=['rxn1','rxn2']
            >>> species=['a','b','c']
            >>> initial_conditions={'a':.001,'b':.002,'c':.003}
            >>> inlet_conditions={'a_in':.001,'b_in':.002,'c_in':.003}
            >>> model_parameters={'k1':0.001,'k2':0.002}
            >>> base_parameters={'T':0.1}
            >>> feed=Feed(10,20,20,20)
            >>> def build_stoiciometric_matrix(base_parameters,model_parameters,reactions,species):
            ...    s = np.zeros((len(species), len(reactions)))
            ...    s[[0,1],0]=[-1,0.001]
            ...    s[[1,2],1]=[-5,1]
            ...    return s
            >>> def ode_system(t,c,Model1):
            ...    v = np.zeros((len(Model1.reactions), 1))
            ...    v[0]=Model1.model_parameters['k1']*c[0]*Model1.base_parameters['T']/1000
            ...    v[1]=Model1.model_parameters['k2']*c[1]/1000
            ...    dCdt=np.matmul(Model1.S,v)
            ...    return dCdt[:, 0]
            >>> m= Model(model_parameters,base_parameters,initial_conditions,inlet_conditions,reactions,species,ODE_System,Build_Stoiciometric_Matrix)
            >>> m.solve_model((0,.1),np.linspace(0,0.1,10),method='RK45')['status']==0
            True

        Args:
            t_eval (np.ndarray): Time points at which the solution is reported
            method (str, optional): The method used to solve the ODE. Defaults to "BDF".

        Returns:
            scipy.integrate._ivp.ivp.OdeResult: Returns the results of the simulation being run and gives optimized paramters.
        """
        self.info={"Fluxes":[]}
        y0=self.initial_conditions[:, 0]
        try:
            self._be_time=time.time()
            c = scipy.integrate.solve_ivp(self.ode_system, (0,self.sim_time), y0, t_eval=t_eval, method=method, args=[self],rtol=1e-6)
            if not c.success:
                raise Exception
        except Exception as e:
            print("Could not solve model, setting C to a very large value")
            c=_Fake_Sol(np.ones((y0.shape[0],len(t_eval)))*1e10,t_eval)

        return c


       #C = scipy.integrate.solve_ivp(
       #        self.ODE_System, t_span, y0, t_eval=T_eval, method=method, args=[self])
       #
       #return C



    def plot(self, Sol: scipy.integrate._ivp.ivp.OdeResult, type: str = "Line")-> go.Figure:
        """ A function which returns a plot of the solution from the ODE
        """
        solution = {
            't': Sol.t,
        }
        for i in range(len(self.species)):
            solution[self.species[i]] = Sol.y[i, :]
        sol_df = pd.DataFrame(solution)

        if type == "Line":
            fig = px.line(sol_df, x="t", y=sol_df.columns,
                          title="Concentration of species")
            fig.update_layout(
                title={
                    'y': 0.95,
                    'x': 0.5,

                    "font_size": 30,
                    'xanchor': 'center',
                    'yanchor': 'top'}

            )
            fig.update_xaxes(
                title={
                    "text": "Time (Days)",
                    "font_size": 25,
                }
            )
            fig.update_yaxes(
                title={
                    "text": "Concentrations (kg COD/m^3)",
                    "font_size": 25,
                }
            )

        elif type == "Sankey":
            ### Maybe add a sankey plot here later
            pass

        return fig



    def dash_app(self, sol: scipy.integrate._ivp.ivp.OdeResult,
                 escher_map:str|None=os.path.join(PKG_DATA,"Modified_ADM_Map.json"),
                 cobra_model:str|None=os.path.join(PKG_DATA,"Modified_ADM_Model.json"),
                 **kwargs)->None:
        """A method that creates the dash web app for a model based on an ODE solution.

        Examples:
            >>> import numpy as np
            >>> reactions=['rxn1','rxn2']
            >>> species=['a','b','c']
            >>> initial_conditions={'a':.001,'b':.002,'c':.003}
            >>> inlet_conditions={'a_in':.001,'b_in':.002,'c_in':.003}
            >>> model_parameters={'k1':0.001,'k2':0.002}
            >>> base_parameters={'T':0.1}
            >>> feed=Feed(10,20,20,20)
            >>> def build_stoiciometric_matrix(base_parameters,model_parameters,reactions,species):
            ...    s = np.zeros((len(species), len(reactions)))
            ...    s[[0,1],0]=[-1,0.001]
            ...    s[[1,2],1]=[-5,1]
            ...    return s
            >>> def ode_system(t,c,Model1):
            ...    v = np.zeros((len(Model1.reactions), 1))
            ...    v[0]=Model1.model_parameters['k1']*c[0]*Model1.base_parameters['T']/1000
            ...    v[1]=Model1.model_parameters['k2']*c[1]/1000
            ...    dCdt=np.matmul(Model1.S,v)
            ...    return dCdt[:, 0]
            >>> m= Model(model_parameters,base_parameters,initial_conditions,inlet_conditions,reactions,species,ODE_System,Build_Stoiciometric_Matrix)
            >>> m.solve_model((0,.1),np.linspace(0,0.1,10),method='RK45')['status']==0
            True
            >>> m.dash_app(m.solve_model(np.linspace(0,30,1000)))

        Args:
            sol (scipy.integrate._ivp.ivp.OdeResult): The solution of the ODE system. This should be the output of the solve_model method.

        Returns:
            None: This method does not return anything.


        """
        if escher_map is not None:
            with open(escher_map,'rb') as f:
                escher_map=json.load(f)
        if cobra_model is not None:
            with open(cobra_model,'rb') as f:
                cobra_model=json.load(f)

        app = Dash(__name__, external_stylesheets=[dbc.themes.FLATLY])
        colors = {
            'background': '#659dbd',
            'text': '#3e4444'
        }


        solution = {
            't': sol.t,
        }
        for i in range(len(self.species)):
            solution[self.species[i]] = sol.y[i, :]
        sol_df = pd.DataFrame(solution)


        fig = px.line(sol_df, x="t", y=sol_df.columns,
                      title="Concentration of species")
        fig.update_layout(
        title={
        'y': 0.95,
        'x': 0.5,
        "font_size": 30,
        'xanchor': 'center',
        'yanchor': 'top'},
        legend=dict(font=dict(size= 20),),
        plot_bgcolor="rgba(0,0,0,0)",
        paper_bgcolor="rgba(0,0,0,0)",
            )
        fig.update_xaxes(
        title={
        "text": "Time (Days)",
        "font_size": 25,
            },
             tickfont_size=20,
        linecolor='grey',
        gridcolor='grey',
            )
        fig.update_yaxes(
        title={
        "text": "Concentrations (kg COD/m^3)",
        "font_size": 25,
         },
        tickfont_size=20,
        linecolor='grey',
        gridcolor='grey',

            )
        fig.update_traces(line=dict(width=3))

        styles={
            'table_width': '95%',
            'padding-left': '20px',
            'container_width': '85%'
        }
        page=[dbc.Container(
                        html.H1("ADToolbox Web Interface",style={"font-size":"70px", "padding-top":"50px"}),className="text-white bg-primary",style={"height":"300px","text-align": "center"}, fluid=True),
                        dbc.Container([dbc.Row(
                                    [dbc.Card([
                                        html.H2(f"{self.name} Concentration Plot", style={
                                            'textAlign': 'left',
                                            'color': colors['text'],
                                            'font-size': '15',
                                            'padding-top': '50px',
                                            'padding-bottom': '20px',
                                            'padding-left': styles['padding-left'] },
                                             className="card-title"),
                                        dcc.Graph(figure=fig, id='Concentrations_Line_Plot',
                                                style={
                                                "height":"600px",
                                                "padding-left": styles['padding-left'],
                                                'background-color': 'rgba(0,0,0,0)'}
                                                ),],className='bg-light'),

                                    dbc.Card([html.H3("Base Parameters", style={
                                        'textAlign': 'left',
                                        'color': colors['text'],
                                        'font-size': '15',
                                        'padding-top': '50px',
                                        'padding-bottom': '20px',
                                        'padding-left': styles['padding-left']
                                        }),
                                        dash_table.DataTable(
                                        id='base_parameters',
                                        columns=[{"name": i, "id": i,"type":"numeric"} for i in list(self.base_parameters.keys())],
                                        data=pd.DataFrame(self.base_parameters,index=[0]).to_dict('records'),
                                        editable=True,
                                        style_table={'overflowX': 'scroll', 'padding-left': '20px','padding-bottom':'30px', 'width': styles['table_width']},
                                        style_header={
                                        'color': 'black',
                                        'font-size': '30px',
                                            },
                                        style_data={
                                        'backgroundColor': 'rgb(250, 250, 250)',
                                        'color': 'black',
                                        'font-size': '25px'}),],className="bg-light"),

                                    dbc.Card([html.H3("Model Parameters", style={
                                        'textAlign': 'left',
                                        'color': colors['text'],
                                        'font-size': '15',
                                        'padding-top': '50px',
                                        'padding-bottom': '20px',
                                        'padding-left': styles['padding-left']
                                        }),
                                        dash_table.DataTable(
                                        id='model_parameters',
                                        columns=[{"name": i, "id": i,"type":"numeric"} for i in list(self.model_parameters.keys())],
                                        data=pd.DataFrame(self.model_parameters,index=[0]).to_dict('records'),
                                        editable=True,
                                        style_table={'overflowX': 'scroll', 'padding-left': '20px','padding-bottom':'30px', 'width': styles['table_width']},
                                        style_header={
                                        'color': 'black',
                                        'font-size': '30px',
                                            },
                                        style_data={
                                        'backgroundColor': 'rgb(250, 250, 250)',
                                        'color': 'black',
                                        'font-size': '25px'}),],className="bg-light"),

                                    dbc.Card([html.H3("Initial Conditions", style={
                                        'textAlign': 'left',
                                        'color': colors['text'],
                                        'font-size': '15',
                                        'padding-top': '50px',
                                        'padding-bottom': '20px',
                                        'padding-left': styles['padding-left']
                                        }),
                                        dash_table.DataTable(
                                        id='initial_conditions',
                                        columns=[{"name": i, "id": i,"type":"numeric"} for i in list(self._ic.keys())],
                                        data=pd.DataFrame(self._ic,index=[0]).to_dict('records'),
                                        editable=True,
                                        style_table={'overflowX': 'scroll', 'padding-left': '20px','padding-bottom':'30px', 'width': styles['table_width']},
                                        style_header={
                                        'color': 'black',
                                        'font-size': '30px',
                                            },
                                        style_data={
                                        'backgroundColor': 'rgb(250, 250, 250)',
                                        'color': 'black',
                                        'font-size': '25px'}),],className="bg-light"),

                                    dbc.Card([html.H3("Inlet Conditions", style={
                                        'textAlign': 'left',
                                        'color': colors['text'],
                                        'font-size': '15',
                                        'padding-top': '50px',
                                        'padding-bottom': '20px',
                                        'padding-left': styles['padding-left']
                                        }),
                                        dash_table.DataTable(
                                        id='inlet_conditions',
                                        columns=[{"name": i, "id": i,"type":"numeric"} for i in list(self._inc.keys())],
                                        data=pd.DataFrame(self._inc,index=[0]).to_dict('records'),
                                        editable=True,
                                        style_table={'overflowX': 'scroll', 'padding-left': '20px','padding-bottom':'30px', 'width': styles['table_width']},
                                        style_header={
                                        'color': 'black',
                                        'font-size': '30px',
                                            },
                                        style_data={
                                        'backgroundColor': 'rgb(250, 250, 250)',
                                        'color': 'black',
                                        'font-size': '25px'}),],className="bg-light"),
                                        ],className="bg-light")],fluid=True,className="bg-light",style={"width": styles['container_width']}),
                                    dbc.Container([dbc.Row(
                                    [
                                    html.H2("Escher Map", style={
                                    'textAlign': 'left',
                                    'color': colors['text'],
                                    'font-size': '15',
                                    'padding-top': '20px',
                                    'padding-bottom': '20px',
                                    'padding-left': styles['padding-left']
                                    }) ,

                                    dcc.Dropdown(["Show Map","Hide Map"],
                                     self.reactions[0], style={"width": "300px","font-size":25,'padding-left':'2-px'}, id="Drop_Down_Escher"),
                                    html.Div(children=None,id="Escher_",style={"height": "100px",'padding-buttom':'20px'}),
                                    ])], fluid=True,className="bg-light pb-3",style={"width": styles['container_width']}),
            dbc.Container(html.Div(children=None,id="Escher",style={'align':'center'}),fluid=True,className="bg-light pb-3",style={"width": styles['container_width']}),
        ]
        if escher_map is None:
            page.pop(-1)
            page.pop(-1)
            page.pop(-1)


        app.layout = html.Div(page)

        @app.callback(Output(component_id="Escher_", component_property='children'), Input(component_id="Drop_Down_Escher", component_property='value'))
        def escher_wrapper(drop_down_escher):
            print("drop_down_escher")
            if drop_down_escher=="Show Map":
                Labels={}
                for i in range(0,self.sim_time,int(self.sim_time/20)):
                    Labels[i]={'label':str(i),'style':{'color': '#77b0b1'}}
                Labels[self.sim_time]=self.sim_time
                return [html.H2("Time (Day)",style={'textAlign': 'center'}),dcc.Slider(0,self.sim_time,int(self.sim_time/20),value=0,id="Escher_Slider",marks=None,tooltip={"placement": "bottom", "always_visible": True})]

        @app.callback(Output(component_id="Escher", component_property='children'), Input(component_id="Drop_Down_Escher", component_property='value'),
        Input(component_id="Escher_Slider", component_property='value'),prevent_initial_call=True)        
        def draw_escher(drop_down_escher,escher_slider):
            rxn_data={}
            self.ode_system(0,sol.y[:,int(sol.y.shape[1]/self.sim_time*escher_slider)],self)
            fluxes=self.info["Fluxes"]
            for ind,i in enumerate(self.reactions):
                rxn_data[i.replace(" ","_")]= fluxes[ind]
            if kwargs.get('min_flux',None):
                min_scale={ 'type': 'value','value':kwargs.get('min_flux') , 'color': 'red','size':10 }
            else:
                min_scale={ 'type': 'min' , 'color': 'red','size':10 }
            if kwargs.get('max_flux',None):
                max_scale={ 'type': 'value','value':kwargs.get('max_flux') , 'color': 'green','size':10 }
            else:
                max_scale={ 'type': 'max', 'color': 'green','size':10 }

            if drop_down_escher=="Show Map":
                return [dash_escher.DashEscher(mapData=escher_map,modelData=cobra_model,
            options={
             'reaction_data':rxn_data,
             'enable_keys':False,
             'reaction_scale':[min_scale,max_scale],
            }
            ,height='1000px',
        width='100%')
             ]
        @app.callback(Output(component_id='Concentrations_Line_Plot', component_property='figure'),
                    Input(component_id='base_parameters', component_property='data'),
                    Input(component_id='model_parameters', component_property='data'),
                    Input(component_id='initial_conditions', component_property='data'),
                    Input(component_id='inlet_conditions', component_property='data'),
                    prevent_initial_call=True
                    )
        def update_graph_fig(base_parameters: dict, model_parameters:dict, initial_conditions: dict, inlet_conditions: dict)->plotly.graph_objects.Figure:

            if len(self.control_state.keys()):
                for i in self.control_state.keys():
                    self.control_state[i]=initial_conditions[0][i]
            if len(base_parameters):
                self.base_parameters = base_parameters[0]
            if len(model_parameters):
                self.model_parameters = model_parameters[0]
            self.initial_conditions = np.array(
            [initial_conditions[0][i] for i in self.species])[:, np.newaxis]
            self.inlet_conditions = np.array(
            [inlet_conditions[0][i+"_in"] for i in self.species])[:, np.newaxis]
            update_sol = self.solve_model(np.linspace(0, self.sim_time, 10000))

            sol=update_sol
            solution = {
                    't': update_sol.t,
                        }
            for i in range(len(self.species)):
                solution[self.species[i]] = update_sol.y[i, :]
            sol_df = pd.DataFrame(solution)

            fig = px.line(sol_df, x="t", y=sol_df.columns,
                          title="Concentration of species")
            fig.update_layout(
            title={
            'y': 0.95,
            'x': 0.5,
            "font_size": 30,
            'xanchor': 'center',
            'yanchor': 'top'},
            legend=dict(font=dict(size= 20),),
            plot_bgcolor="rgba(0,0,0,0)",
            paper_bgcolor="rgba(0,0,0,0)",

                )
            fig.update_xaxes(
            title={
            "text": "Time (Days)",
            "font_size": 25,
                },
                 tickfont_size=20,
            linecolor='grey',
            gridcolor='grey',
                )
            fig.update_yaxes(
            title={
            "text": "Concentrations (kg COD/m^3)",
            "font_size": 25,
             },
            tickfont_size=20,
            linecolor='grey',
            gridcolor='grey',


                )
            fig.update_traces(line=dict(width=3))
            return fig



        app.run_server(port=8000, host='127.0.0.1')

    def csv_report(self,sol: scipy.integrate._ivp.ivp.OdeResult ,address: str)->None:
        """Converts the results to a pandas data frame then to a csv"""
        df = pd.DataFrame(sol.y, columns=sol.t, index=self.species)
        df.to_csv(os.path.join(address,self.name+"_Report.csv"), header=True,
                  index=True)

    def copy(self):
        """Returns a copy of the model"""
        return type(self)(model_parameters=self.model_parameters.copy(),
                          base_parameters=self.base_parameters.copy(),
                          initial_conditions=self._ic.copy(),
                          inlet_conditions=self._inc.copy(),
                          feed=self.feed,
                          reactions=self.reactions.copy(),
                          species=self.species.copy(),
                          ode_system=self.ode_system,
                          build_stoichiometric_matrix=self.build_stoichiometric_matrix,
                          control_state=self.control_state.copy(),
                          name=self.name,
                          switch=self.switch,
                          time_limit=self.time_limit,
                          simulation_time=self.sim_time)

    def build_cobra_model(self,address:str=None):
        """This method builds a cobra model from an instance of Model. One particular use
        of such models is to build an escher map from the model.
        Args:
            address (str, optional): The address to save the model. Defaults to None.
        """
        try:
            import cobra
        except ImportError:
            raise ImportError("CobraPy is not installed, please install it to use this function")
        model = cobra.Model(self.name)
        for reaction in self.reactions:
            temp_reaction = cobra.Reaction(reaction.replace(" ", "_"), name=reaction.replace(" ", "_"))
            temp_mets = np.where(self.s[:, self.reactions.index(reaction)] != 0)
            met_dict = {}
            for met in temp_mets[0]:
                metabolite = cobra.Metabolite(self.species[met].replace(" ", "_"),
                                              name=self.species[met].replace(" ", "_"), compartment="Model")
                met_dict[metabolite] = self.s[met, self.reactions.index(reaction)]
            temp_reaction.add_metabolites(met_dict)
            model.add_reactions([temp_reaction])
        if address:
            cobra.io.save_json_model(model, address)
        return model

s property

Returns the stoichiometric matrix of a model

build_cobra_model(address=None)

This method builds a cobra model from an instance of Model. One particular use of such models is to build an escher map from the model.

Parameters:

Name Type Description Default
address str

The address to save the model. Defaults to None.

None
Source code in adtoolbox/adm.py
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
def build_cobra_model(self,address:str=None):
    """This method builds a cobra model from an instance of Model. One particular use
    of such models is to build an escher map from the model.
    Args:
        address (str, optional): The address to save the model. Defaults to None.
    """
    try:
        import cobra
    except ImportError:
        raise ImportError("CobraPy is not installed, please install it to use this function")
    model = cobra.Model(self.name)
    for reaction in self.reactions:
        temp_reaction = cobra.Reaction(reaction.replace(" ", "_"), name=reaction.replace(" ", "_"))
        temp_mets = np.where(self.s[:, self.reactions.index(reaction)] != 0)
        met_dict = {}
        for met in temp_mets[0]:
            metabolite = cobra.Metabolite(self.species[met].replace(" ", "_"),
                                          name=self.species[met].replace(" ", "_"), compartment="Model")
            met_dict[metabolite] = self.s[met, self.reactions.index(reaction)]
        temp_reaction.add_metabolites(met_dict)
        model.add_reactions([temp_reaction])
    if address:
        cobra.io.save_json_model(model, address)
    return model

copy()

Returns a copy of the model

Source code in adtoolbox/adm.py
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
def copy(self):
    """Returns a copy of the model"""
    return type(self)(model_parameters=self.model_parameters.copy(),
                      base_parameters=self.base_parameters.copy(),
                      initial_conditions=self._ic.copy(),
                      inlet_conditions=self._inc.copy(),
                      feed=self.feed,
                      reactions=self.reactions.copy(),
                      species=self.species.copy(),
                      ode_system=self.ode_system,
                      build_stoichiometric_matrix=self.build_stoichiometric_matrix,
                      control_state=self.control_state.copy(),
                      name=self.name,
                      switch=self.switch,
                      time_limit=self.time_limit,
                      simulation_time=self.sim_time)

csv_report(sol, address)

Converts the results to a pandas data frame then to a csv

Source code in adtoolbox/adm.py
584
585
586
587
588
def csv_report(self,sol: scipy.integrate._ivp.ivp.OdeResult ,address: str)->None:
    """Converts the results to a pandas data frame then to a csv"""
    df = pd.DataFrame(sol.y, columns=sol.t, index=self.species)
    df.to_csv(os.path.join(address,self.name+"_Report.csv"), header=True,
              index=True)

dash_app(sol, escher_map=os.path.join(PKG_DATA, 'Modified_ADM_Map.json'), cobra_model=os.path.join(PKG_DATA, 'Modified_ADM_Model.json'), **kwargs)

A method that creates the dash web app for a model based on an ODE solution.

Examples:

>>> import numpy as np
>>> reactions=['rxn1','rxn2']
>>> species=['a','b','c']
>>> initial_conditions={'a':.001,'b':.002,'c':.003}
>>> inlet_conditions={'a_in':.001,'b_in':.002,'c_in':.003}
>>> model_parameters={'k1':0.001,'k2':0.002}
>>> base_parameters={'T':0.1}
>>> feed=Feed(10,20,20,20)
>>> def build_stoiciometric_matrix(base_parameters,model_parameters,reactions,species):
...    s = np.zeros((len(species), len(reactions)))
...    s[[0,1],0]=[-1,0.001]
...    s[[1,2],1]=[-5,1]
...    return s
>>> def ode_system(t,c,Model1):
...    v = np.zeros((len(Model1.reactions), 1))
...    v[0]=Model1.model_parameters['k1']*c[0]*Model1.base_parameters['T']/1000
...    v[1]=Model1.model_parameters['k2']*c[1]/1000
...    dCdt=np.matmul(Model1.S,v)
...    return dCdt[:, 0]
>>> m= Model(model_parameters,base_parameters,initial_conditions,inlet_conditions,reactions,species,ODE_System,Build_Stoiciometric_Matrix)
>>> m.solve_model((0,.1),np.linspace(0,0.1,10),method='RK45')['status']==0
True
>>> m.dash_app(m.solve_model(np.linspace(0,30,1000)))

Parameters:

Name Type Description Default
sol scipy.integrate._ivp.ivp.OdeResult

The solution of the ODE system. This should be the output of the solve_model method.

required

Returns:

Name Type Description
None None

This method does not return anything.

Source code in adtoolbox/adm.py
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
def dash_app(self, sol: scipy.integrate._ivp.ivp.OdeResult,
             escher_map:str|None=os.path.join(PKG_DATA,"Modified_ADM_Map.json"),
             cobra_model:str|None=os.path.join(PKG_DATA,"Modified_ADM_Model.json"),
             **kwargs)->None:
    """A method that creates the dash web app for a model based on an ODE solution.

    Examples:
        >>> import numpy as np
        >>> reactions=['rxn1','rxn2']
        >>> species=['a','b','c']
        >>> initial_conditions={'a':.001,'b':.002,'c':.003}
        >>> inlet_conditions={'a_in':.001,'b_in':.002,'c_in':.003}
        >>> model_parameters={'k1':0.001,'k2':0.002}
        >>> base_parameters={'T':0.1}
        >>> feed=Feed(10,20,20,20)
        >>> def build_stoiciometric_matrix(base_parameters,model_parameters,reactions,species):
        ...    s = np.zeros((len(species), len(reactions)))
        ...    s[[0,1],0]=[-1,0.001]
        ...    s[[1,2],1]=[-5,1]
        ...    return s
        >>> def ode_system(t,c,Model1):
        ...    v = np.zeros((len(Model1.reactions), 1))
        ...    v[0]=Model1.model_parameters['k1']*c[0]*Model1.base_parameters['T']/1000
        ...    v[1]=Model1.model_parameters['k2']*c[1]/1000
        ...    dCdt=np.matmul(Model1.S,v)
        ...    return dCdt[:, 0]
        >>> m= Model(model_parameters,base_parameters,initial_conditions,inlet_conditions,reactions,species,ODE_System,Build_Stoiciometric_Matrix)
        >>> m.solve_model((0,.1),np.linspace(0,0.1,10),method='RK45')['status']==0
        True
        >>> m.dash_app(m.solve_model(np.linspace(0,30,1000)))

    Args:
        sol (scipy.integrate._ivp.ivp.OdeResult): The solution of the ODE system. This should be the output of the solve_model method.

    Returns:
        None: This method does not return anything.


    """
    if escher_map is not None:
        with open(escher_map,'rb') as f:
            escher_map=json.load(f)
    if cobra_model is not None:
        with open(cobra_model,'rb') as f:
            cobra_model=json.load(f)

    app = Dash(__name__, external_stylesheets=[dbc.themes.FLATLY])
    colors = {
        'background': '#659dbd',
        'text': '#3e4444'
    }


    solution = {
        't': sol.t,
    }
    for i in range(len(self.species)):
        solution[self.species[i]] = sol.y[i, :]
    sol_df = pd.DataFrame(solution)


    fig = px.line(sol_df, x="t", y=sol_df.columns,
                  title="Concentration of species")
    fig.update_layout(
    title={
    'y': 0.95,
    'x': 0.5,
    "font_size": 30,
    'xanchor': 'center',
    'yanchor': 'top'},
    legend=dict(font=dict(size= 20),),
    plot_bgcolor="rgba(0,0,0,0)",
    paper_bgcolor="rgba(0,0,0,0)",
        )
    fig.update_xaxes(
    title={
    "text": "Time (Days)",
    "font_size": 25,
        },
         tickfont_size=20,
    linecolor='grey',
    gridcolor='grey',
        )
    fig.update_yaxes(
    title={
    "text": "Concentrations (kg COD/m^3)",
    "font_size": 25,
     },
    tickfont_size=20,
    linecolor='grey',
    gridcolor='grey',

        )
    fig.update_traces(line=dict(width=3))

    styles={
        'table_width': '95%',
        'padding-left': '20px',
        'container_width': '85%'
    }
    page=[dbc.Container(
                    html.H1("ADToolbox Web Interface",style={"font-size":"70px", "padding-top":"50px"}),className="text-white bg-primary",style={"height":"300px","text-align": "center"}, fluid=True),
                    dbc.Container([dbc.Row(
                                [dbc.Card([
                                    html.H2(f"{self.name} Concentration Plot", style={
                                        'textAlign': 'left',
                                        'color': colors['text'],
                                        'font-size': '15',
                                        'padding-top': '50px',
                                        'padding-bottom': '20px',
                                        'padding-left': styles['padding-left'] },
                                         className="card-title"),
                                    dcc.Graph(figure=fig, id='Concentrations_Line_Plot',
                                            style={
                                            "height":"600px",
                                            "padding-left": styles['padding-left'],
                                            'background-color': 'rgba(0,0,0,0)'}
                                            ),],className='bg-light'),

                                dbc.Card([html.H3("Base Parameters", style={
                                    'textAlign': 'left',
                                    'color': colors['text'],
                                    'font-size': '15',
                                    'padding-top': '50px',
                                    'padding-bottom': '20px',
                                    'padding-left': styles['padding-left']
                                    }),
                                    dash_table.DataTable(
                                    id='base_parameters',
                                    columns=[{"name": i, "id": i,"type":"numeric"} for i in list(self.base_parameters.keys())],
                                    data=pd.DataFrame(self.base_parameters,index=[0]).to_dict('records'),
                                    editable=True,
                                    style_table={'overflowX': 'scroll', 'padding-left': '20px','padding-bottom':'30px', 'width': styles['table_width']},
                                    style_header={
                                    'color': 'black',
                                    'font-size': '30px',
                                        },
                                    style_data={
                                    'backgroundColor': 'rgb(250, 250, 250)',
                                    'color': 'black',
                                    'font-size': '25px'}),],className="bg-light"),

                                dbc.Card([html.H3("Model Parameters", style={
                                    'textAlign': 'left',
                                    'color': colors['text'],
                                    'font-size': '15',
                                    'padding-top': '50px',
                                    'padding-bottom': '20px',
                                    'padding-left': styles['padding-left']
                                    }),
                                    dash_table.DataTable(
                                    id='model_parameters',
                                    columns=[{"name": i, "id": i,"type":"numeric"} for i in list(self.model_parameters.keys())],
                                    data=pd.DataFrame(self.model_parameters,index=[0]).to_dict('records'),
                                    editable=True,
                                    style_table={'overflowX': 'scroll', 'padding-left': '20px','padding-bottom':'30px', 'width': styles['table_width']},
                                    style_header={
                                    'color': 'black',
                                    'font-size': '30px',
                                        },
                                    style_data={
                                    'backgroundColor': 'rgb(250, 250, 250)',
                                    'color': 'black',
                                    'font-size': '25px'}),],className="bg-light"),

                                dbc.Card([html.H3("Initial Conditions", style={
                                    'textAlign': 'left',
                                    'color': colors['text'],
                                    'font-size': '15',
                                    'padding-top': '50px',
                                    'padding-bottom': '20px',
                                    'padding-left': styles['padding-left']
                                    }),
                                    dash_table.DataTable(
                                    id='initial_conditions',
                                    columns=[{"name": i, "id": i,"type":"numeric"} for i in list(self._ic.keys())],
                                    data=pd.DataFrame(self._ic,index=[0]).to_dict('records'),
                                    editable=True,
                                    style_table={'overflowX': 'scroll', 'padding-left': '20px','padding-bottom':'30px', 'width': styles['table_width']},
                                    style_header={
                                    'color': 'black',
                                    'font-size': '30px',
                                        },
                                    style_data={
                                    'backgroundColor': 'rgb(250, 250, 250)',
                                    'color': 'black',
                                    'font-size': '25px'}),],className="bg-light"),

                                dbc.Card([html.H3("Inlet Conditions", style={
                                    'textAlign': 'left',
                                    'color': colors['text'],
                                    'font-size': '15',
                                    'padding-top': '50px',
                                    'padding-bottom': '20px',
                                    'padding-left': styles['padding-left']
                                    }),
                                    dash_table.DataTable(
                                    id='inlet_conditions',
                                    columns=[{"name": i, "id": i,"type":"numeric"} for i in list(self._inc.keys())],
                                    data=pd.DataFrame(self._inc,index=[0]).to_dict('records'),
                                    editable=True,
                                    style_table={'overflowX': 'scroll', 'padding-left': '20px','padding-bottom':'30px', 'width': styles['table_width']},
                                    style_header={
                                    'color': 'black',
                                    'font-size': '30px',
                                        },
                                    style_data={
                                    'backgroundColor': 'rgb(250, 250, 250)',
                                    'color': 'black',
                                    'font-size': '25px'}),],className="bg-light"),
                                    ],className="bg-light")],fluid=True,className="bg-light",style={"width": styles['container_width']}),
                                dbc.Container([dbc.Row(
                                [
                                html.H2("Escher Map", style={
                                'textAlign': 'left',
                                'color': colors['text'],
                                'font-size': '15',
                                'padding-top': '20px',
                                'padding-bottom': '20px',
                                'padding-left': styles['padding-left']
                                }) ,

                                dcc.Dropdown(["Show Map","Hide Map"],
                                 self.reactions[0], style={"width": "300px","font-size":25,'padding-left':'2-px'}, id="Drop_Down_Escher"),
                                html.Div(children=None,id="Escher_",style={"height": "100px",'padding-buttom':'20px'}),
                                ])], fluid=True,className="bg-light pb-3",style={"width": styles['container_width']}),
        dbc.Container(html.Div(children=None,id="Escher",style={'align':'center'}),fluid=True,className="bg-light pb-3",style={"width": styles['container_width']}),
    ]
    if escher_map is None:
        page.pop(-1)
        page.pop(-1)
        page.pop(-1)


    app.layout = html.Div(page)

    @app.callback(Output(component_id="Escher_", component_property='children'), Input(component_id="Drop_Down_Escher", component_property='value'))
    def escher_wrapper(drop_down_escher):
        print("drop_down_escher")
        if drop_down_escher=="Show Map":
            Labels={}
            for i in range(0,self.sim_time,int(self.sim_time/20)):
                Labels[i]={'label':str(i),'style':{'color': '#77b0b1'}}
            Labels[self.sim_time]=self.sim_time
            return [html.H2("Time (Day)",style={'textAlign': 'center'}),dcc.Slider(0,self.sim_time,int(self.sim_time/20),value=0,id="Escher_Slider",marks=None,tooltip={"placement": "bottom", "always_visible": True})]

    @app.callback(Output(component_id="Escher", component_property='children'), Input(component_id="Drop_Down_Escher", component_property='value'),
    Input(component_id="Escher_Slider", component_property='value'),prevent_initial_call=True)        
    def draw_escher(drop_down_escher,escher_slider):
        rxn_data={}
        self.ode_system(0,sol.y[:,int(sol.y.shape[1]/self.sim_time*escher_slider)],self)
        fluxes=self.info["Fluxes"]
        for ind,i in enumerate(self.reactions):
            rxn_data[i.replace(" ","_")]= fluxes[ind]
        if kwargs.get('min_flux',None):
            min_scale={ 'type': 'value','value':kwargs.get('min_flux') , 'color': 'red','size':10 }
        else:
            min_scale={ 'type': 'min' , 'color': 'red','size':10 }
        if kwargs.get('max_flux',None):
            max_scale={ 'type': 'value','value':kwargs.get('max_flux') , 'color': 'green','size':10 }
        else:
            max_scale={ 'type': 'max', 'color': 'green','size':10 }

        if drop_down_escher=="Show Map":
            return [dash_escher.DashEscher(mapData=escher_map,modelData=cobra_model,
        options={
         'reaction_data':rxn_data,
         'enable_keys':False,
         'reaction_scale':[min_scale,max_scale],
        }
        ,height='1000px',
    width='100%')
         ]
    @app.callback(Output(component_id='Concentrations_Line_Plot', component_property='figure'),
                Input(component_id='base_parameters', component_property='data'),
                Input(component_id='model_parameters', component_property='data'),
                Input(component_id='initial_conditions', component_property='data'),
                Input(component_id='inlet_conditions', component_property='data'),
                prevent_initial_call=True
                )
    def update_graph_fig(base_parameters: dict, model_parameters:dict, initial_conditions: dict, inlet_conditions: dict)->plotly.graph_objects.Figure:

        if len(self.control_state.keys()):
            for i in self.control_state.keys():
                self.control_state[i]=initial_conditions[0][i]
        if len(base_parameters):
            self.base_parameters = base_parameters[0]
        if len(model_parameters):
            self.model_parameters = model_parameters[0]
        self.initial_conditions = np.array(
        [initial_conditions[0][i] for i in self.species])[:, np.newaxis]
        self.inlet_conditions = np.array(
        [inlet_conditions[0][i+"_in"] for i in self.species])[:, np.newaxis]
        update_sol = self.solve_model(np.linspace(0, self.sim_time, 10000))

        sol=update_sol
        solution = {
                't': update_sol.t,
                    }
        for i in range(len(self.species)):
            solution[self.species[i]] = update_sol.y[i, :]
        sol_df = pd.DataFrame(solution)

        fig = px.line(sol_df, x="t", y=sol_df.columns,
                      title="Concentration of species")
        fig.update_layout(
        title={
        'y': 0.95,
        'x': 0.5,
        "font_size": 30,
        'xanchor': 'center',
        'yanchor': 'top'},
        legend=dict(font=dict(size= 20),),
        plot_bgcolor="rgba(0,0,0,0)",
        paper_bgcolor="rgba(0,0,0,0)",

            )
        fig.update_xaxes(
        title={
        "text": "Time (Days)",
        "font_size": 25,
            },
             tickfont_size=20,
        linecolor='grey',
        gridcolor='grey',
            )
        fig.update_yaxes(
        title={
        "text": "Concentrations (kg COD/m^3)",
        "font_size": 25,
         },
        tickfont_size=20,
        linecolor='grey',
        gridcolor='grey',


            )
        fig.update_traces(line=dict(width=3))
        return fig



    app.run_server(port=8000, host='127.0.0.1')

plot(Sol, type='Line')

A function which returns a plot of the solution from the ODE

Source code in adtoolbox/adm.py
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
def plot(self, Sol: scipy.integrate._ivp.ivp.OdeResult, type: str = "Line")-> go.Figure:
    """ A function which returns a plot of the solution from the ODE
    """
    solution = {
        't': Sol.t,
    }
    for i in range(len(self.species)):
        solution[self.species[i]] = Sol.y[i, :]
    sol_df = pd.DataFrame(solution)

    if type == "Line":
        fig = px.line(sol_df, x="t", y=sol_df.columns,
                      title="Concentration of species")
        fig.update_layout(
            title={
                'y': 0.95,
                'x': 0.5,

                "font_size": 30,
                'xanchor': 'center',
                'yanchor': 'top'}

        )
        fig.update_xaxes(
            title={
                "text": "Time (Days)",
                "font_size": 25,
            }
        )
        fig.update_yaxes(
            title={
                "text": "Concentrations (kg COD/m^3)",
                "font_size": 25,
            }
        )

    elif type == "Sankey":
        ### Maybe add a sankey plot here later
        pass

    return fig

solve_model(t_eval, method='BDF')

Function to solve the model.

Examples:

>>> import numpy as np
>>> reactions=['rxn1','rxn2']
>>> species=['a','b','c']
>>> initial_conditions={'a':.001,'b':.002,'c':.003}
>>> inlet_conditions={'a_in':.001,'b_in':.002,'c_in':.003}
>>> model_parameters={'k1':0.001,'k2':0.002}
>>> base_parameters={'T':0.1}
>>> feed=Feed(10,20,20,20)
>>> def build_stoiciometric_matrix(base_parameters,model_parameters,reactions,species):
...    s = np.zeros((len(species), len(reactions)))
...    s[[0,1],0]=[-1,0.001]
...    s[[1,2],1]=[-5,1]
...    return s
>>> def ode_system(t,c,Model1):
...    v = np.zeros((len(Model1.reactions), 1))
...    v[0]=Model1.model_parameters['k1']*c[0]*Model1.base_parameters['T']/1000
...    v[1]=Model1.model_parameters['k2']*c[1]/1000
...    dCdt=np.matmul(Model1.S,v)
...    return dCdt[:, 0]
>>> m= Model(model_parameters,base_parameters,initial_conditions,inlet_conditions,reactions,species,ODE_System,Build_Stoiciometric_Matrix)
>>> m.solve_model((0,.1),np.linspace(0,0.1,10),method='RK45')['status']==0
True

Parameters:

Name Type Description Default
t_eval np.ndarray

Time points at which the solution is reported

required
method str

The method used to solve the ODE. Defaults to "BDF".

'BDF'

Returns:

Type Description
scipy.integrate._ivp.ivp.OdeResult

scipy.integrate._ivp.ivp.OdeResult: Returns the results of the simulation being run and gives optimized paramters.

Source code in adtoolbox/adm.py
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
def solve_model(self, t_eval: np.ndarray, method="BDF")->scipy.integrate._ivp.ivp.OdeResult:
    """
    Function to solve the model. 
    Examples:
        >>> import numpy as np
        >>> reactions=['rxn1','rxn2']
        >>> species=['a','b','c']
        >>> initial_conditions={'a':.001,'b':.002,'c':.003}
        >>> inlet_conditions={'a_in':.001,'b_in':.002,'c_in':.003}
        >>> model_parameters={'k1':0.001,'k2':0.002}
        >>> base_parameters={'T':0.1}
        >>> feed=Feed(10,20,20,20)
        >>> def build_stoiciometric_matrix(base_parameters,model_parameters,reactions,species):
        ...    s = np.zeros((len(species), len(reactions)))
        ...    s[[0,1],0]=[-1,0.001]
        ...    s[[1,2],1]=[-5,1]
        ...    return s
        >>> def ode_system(t,c,Model1):
        ...    v = np.zeros((len(Model1.reactions), 1))
        ...    v[0]=Model1.model_parameters['k1']*c[0]*Model1.base_parameters['T']/1000
        ...    v[1]=Model1.model_parameters['k2']*c[1]/1000
        ...    dCdt=np.matmul(Model1.S,v)
        ...    return dCdt[:, 0]
        >>> m= Model(model_parameters,base_parameters,initial_conditions,inlet_conditions,reactions,species,ODE_System,Build_Stoiciometric_Matrix)
        >>> m.solve_model((0,.1),np.linspace(0,0.1,10),method='RK45')['status']==0
        True

    Args:
        t_eval (np.ndarray): Time points at which the solution is reported
        method (str, optional): The method used to solve the ODE. Defaults to "BDF".

    Returns:
        scipy.integrate._ivp.ivp.OdeResult: Returns the results of the simulation being run and gives optimized paramters.
    """
    self.info={"Fluxes":[]}
    y0=self.initial_conditions[:, 0]
    try:
        self._be_time=time.time()
        c = scipy.integrate.solve_ivp(self.ode_system, (0,self.sim_time), y0, t_eval=t_eval, method=method, args=[self],rtol=1e-6)
        if not c.success:
            raise Exception
    except Exception as e:
        print("Could not solve model, setting C to a very large value")
        c=_Fake_Sol(np.ones((y0.shape[0],len(t_eval)))*1e10,t_eval)

    return c

update_parameters(model_parameters=None, base_parameters=None, initial_conditions=None, inlet_conditions=None)

This method updates the parameters of the model. Each argument can be a dictionary containing the parameters to be updated. NOTE: It is important to note that you have to separate different kind parameters.

Parameters:

Name Type Description Default
model_parameters dict

a dictionary which contains the model parameters to be updated as keys and their values as values.

None
base_parameters dict

a dictionary which contains the base parameters to be updated as keys and their values as values.

None
initial_conditions dict

a dictionary containing the initial conditions to be updated as keys and their values as values.

None
inlet_conditions dict

a dictionary containing the inlet conditions to be updated as keys and their values as values.

None

Returns:

Name Type Description
None None

This method does not return anything.

Source code in adtoolbox/adm.py
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
def update_parameters(self, 
                    model_parameters: dict|None=None,
                    base_parameters:  dict|None=None,
                    initial_conditions: dict|None=None,
                    inlet_conditions: dict|None=None)->None:
    """
    This method updates the parameters of the model. Each argument can be a dictionary containing the parameters to be updated.
    NOTE: It is important to note that you have to separate different kind parameters.
    Args:
        model_parameters (dict): a dictionary which contains the model parameters to be updated as keys and their values as values.
        base_parameters (dict): a dictionary which contains the base parameters to be updated as keys and their values as values.
        initial_conditions (dict): a dictionary containing the initial conditions to be updated as keys and their values as values.
        inlet_conditions (dict): a dictionary containing the inlet conditions to be updated as keys and their values as values.

    Returns:
        None: This method does not return anything.
    """
    if model_parameters is not None:
        self.model_parameters.update(model_parameters)
    if base_parameters is not None:
        self.base_parameters.update(base_parameters)
    if initial_conditions is not None:
        for k,v in initial_conditions.items():
            self.initial_conditions[self.species.index(k)]=v
    if inlet_conditions is not None:
        for k,v in inlet_conditions.items():
            self.inlet_conditions[self.species.index(k)]=v

adm1_ode_sys(t, c, model)

The ODE system for the original ADM. No testing is done.

Parameters:

Name Type Description Default
t float

a matrix of zeros to be filled

required
c np.ndarray

an array of concentrations to be filled

required
Model Model

The an instance of Model to calculate ODE with

required

Returns:

Type Description
np.ndarray

np.ndarray: The output is dCdt, the change of concentration with respect to time.

Source code in adtoolbox/adm.py
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
def adm1_ode_sys(t: float, c: np.ndarray, model:Model)-> np.ndarray:
    """ The ODE system for the original ADM.
        No testing is done.

        Args:
            t (float):a matrix of zeros to be filled
            c (np.ndarray): an array of concentrations to be filled
            Model (Model): The an instance of Model to calculate ODE with

        Returns:
            np.ndarray: The output is dCdt, the change of concentration with respect to time.
    """
    c[34] = c[10] - c[33]
    c[32] = c[9] - c[31]
    I_pH_aa = (model.model_parameters["K_pH_aa"] ** model.model_parameters['nn_aa'])/(np.power(
        c[26], model.model_parameters['nn_aa']) + np.power(model.model_parameters["K_pH_aa"], model.model_parameters['nn_aa']))
    I_pH_ac = (model.model_parameters['K_pH_ac'] ** model.model_parameters["n_ac"])/(
        c[26] ** model.model_parameters['n_ac'] + model.model_parameters['K_pH_ac'] ** model.model_parameters['n_ac'])
    I_pH_h2 = (model.model_parameters['K_pH_h2']**model.model_parameters['n_h2'])/(
        c[26] ** model.model_parameters['n_h2'] + model.model_parameters['K_pH_h2']**model.model_parameters['n_h2'])
    I_IN_lim = 1 / (1+(model.model_parameters['K_S_IN'] / c[10]))
    I_h2_fa = 1 / (1+(c[7] / model.model_parameters['K_I_h2_fa']))
    I_h2_c4 = 1 / (1+(c[7]/model.model_parameters['K_I_h2_c4']))
    I_h2_pro = (1/(1+(c[7]/model.model_parameters['K_I_h2_pro'])))
    I_nh3 = 1/(1+(c[33]/model.model_parameters['K_I_nh3']))
    I5 = (I_pH_aa * I_IN_lim)
    I6 = np.copy(I5)
    I7 = (I_pH_aa * I_IN_lim * I_h2_fa)
    I8 = (I_pH_aa * I_IN_lim * I_h2_c4)
    I9 = np.copy(I8)
    I10 = (I_pH_aa * I_IN_lim * I_h2_pro)
    I11 = (I_pH_ac * I_IN_lim * I_nh3)
    I12 = (I_pH_h2 * I_IN_lim)
    v = np.zeros((len(model.reactions), 1))
    v[0] = model.model_parameters["k_dis"]*c[12]

    v[1] = model.model_parameters['k_hyd_ch']*c[13]
    v[2] = model.model_parameters['k_hyd_pr']*c[14]
    v[3] = model.model_parameters['k_hyd_li']*c[15]

    v[4] = model.model_parameters['k_m_su']*c[0] / \
(model.model_parameters['K_S_su']+c[0])*c[16]*I5
    v[5] = model.model_parameters['k_m_aa']*c[1] / \
        (model.model_parameters['K_S_aa']+c[1])*c[17]*I6
    v[6] = model.model_parameters['k_m_fa']*c[2] / \
        (model.model_parameters['K_S_fa']+c[2])*c[18]*I7
    v[7] = model.model_parameters['k_m_c4']*c[3] / \
        (model.model_parameters['K_S_c4']+c[3]) * \
        c[19]*c[3]/(c[3]+c[4]+10 ** (-6))*I8
    v[8] = model.model_parameters['k_m_c4']*c[4] / \
        (model.model_parameters['K_S_c4']+c[4]) * \
        c[19]*c[4]/(c[4]+c[3]+10 ** (-6))*I9
    v[9] = model.model_parameters['k_m_pr']*c[5] / \
        (model.model_parameters['K_S_pro']+c[5])*c[20]*I10
    v[10] = model.model_parameters['k_m_ac']*c[6] / \
        (model.model_parameters['K_S_ac']+c[6])*c[21]*I11
    v[11] = model.model_parameters['k_m_h2']*c[7] / \
        (model.model_parameters['K_S_h2']+c[7])*c[22]*I12
    v[12] = model.model_parameters['k_dec_X_su']*c[16]
    v[13] = model.model_parameters['k_dec_X_aa']*c[17]
    v[14] = model.model_parameters['k_dec_X_fa']*c[18]
    v[15] = model.model_parameters['k_dec_X_c4']*c[19]
    v[16] = model.model_parameters['k_dec_X_pro']*c[20]
    v[17] = model.model_parameters['k_dec_X_ac']*c[21]
    v[18] = model.model_parameters['k_dec_X_h2']*c[22]
    v[19] = model.model_parameters['k_A_B_va'] * \
        (c[27] * (model.model_parameters['K_a_va'] + c[26]) -
         model.model_parameters['K_a_va'] * c[3])
    v[20] = model.model_parameters['k_A_B_bu'] * \
        (c[28] * (model.model_parameters['K_a_bu'] + c[26]) -
         model.model_parameters['K_a_bu'] * c[4])
    v[21] = model.model_parameters['k_A_B_pro'] * \
        (c[29] * (model.model_parameters['K_a_pro'] + c[26]) -
         model.model_parameters['K_a_pro'] * c[5])
    v[22] = model.model_parameters['k_A_B_ac'] * \
        (c[30] * (model.model_parameters['K_a_ac'] + c[26]) -
         model.model_parameters['K_a_ac'] * c[6])
    v[23] = model.model_parameters['k_A_B_co2'] * \
        (c[31] * (model.model_parameters['K_a_co2'] + c[26]) -
         model.model_parameters['K_a_co2'] * c[9])
    v[24] = model.model_parameters['k_A_B_IN'] * \
        (c[33] * (model.model_parameters['K_a_IN'] + c[26]) -
         model.model_parameters['K_a_IN'] * c[10])
    p_gas_h2 = c[35] * model.base_parameters["R"] * \
        model.base_parameters["T_op"] / 16
    p_gas_ch4 = c[36] * model.base_parameters["R"] * \
        model.base_parameters["T_op"] / 64
    p_gas_co2 = c[37] * model.base_parameters["R"] * \
        model.base_parameters["T_op"]
    p_gas_h2o = 0.0313 * \
        np.exp(5290 *
               (1 / model.base_parameters["T_base"] - 1 / model.base_parameters["T_op"]))
    P_gas = p_gas_h2 + p_gas_ch4 + p_gas_co2 + p_gas_h2o
    q_gas = max(
        0, (model.model_parameters['k_p'] * (P_gas - model.base_parameters['P_atm'])))
    v[25] = model.model_parameters['k_L_a'] * \
        (c[7] - 16 * model.model_parameters['K_H_h2'] * p_gas_h2)
    v[26] = model.model_parameters['k_L_a'] * \
        (c[8] - 64 * model.model_parameters['K_H_ch4'] * p_gas_ch4)
    v[27] = model.model_parameters['k_L_a'] * \
        (c[32] - model.model_parameters['K_H_co2'] * p_gas_co2)
    dCdt = np.matmul(model.s, v)

    if c[model.species.index('S_IN')]<0.01:
        model.nitrogen_limited=True
    else:
        model.nitrogen_limited=False

    phi = c[24]+c[34]-c[31] - (c[30] / 64) - (c[29] / 112) - (c[28] / 160) - (c[27] / 208) - c[25]
    c[26] = (-1 * phi / 2) + (0.5 * np.sqrt(phi**2 + 4 * model.model_parameters['K_w']))

    dCdt[0: 35] = dCdt[0: 35]+model.base_parameters['q_in'] / model.base_parameters["V_liq"] * \
        (model.inlet_conditions[0: 35]-c[0:35].reshape(-1, 1))


    dCdt[35:] = dCdt[35:]+q_gas/model.base_parameters["V_gas"] * (model.inlet_conditions[35:]-c[35:].reshape(-1, 1))
    dCdt[[26, 32, 34], 0] = 0
    if model.switch == "DAE":
        dCdt[7] = 0
        dCdt[27: 32] = 0
        dCdt[33] = 0

    if model.control_state.keys():
        for state in model.control_state.keys():
            c[model.species.index(state)]=model.control_state[state]
            dCdt[model.species.index(state)]=0


    return dCdt[:, 0]

build_adm1_stoiciometric_matrix(base_parameters, model_parameters, reactons, species, feed, nitrogen_limited=False)

This function builds the stoichiometric matrix for the ADM1 Model.

Parameters:

Name Type Description Default
base_parameters dict

a dictionary containing the base parameters

required
model_parameters dict

a dictionary containing the model parameters

required
reactons list

a list containing all reactions

required
species list

a list containing all species

required
feed Feed

a Feed instance which contains the feed information

required
nitrogen_limited bool

A boolean which indicates whether the model is nitrogen limited. Defaults to False.

False

Returns:

Type Description
np.ndarray

np.ndarray: Returns the stoichiometric matrix of the ADM1 model.

Source code in adtoolbox/adm.py
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
def build_adm1_stoiciometric_matrix(base_parameters: dict, model_parameters: dict, reactons: list, species:list,feed:Feed,nitrogen_limited:bool=False)-> np.ndarray:
    """This function builds the stoichiometric matrix for the ADM1 Model.
    Args:
        base_parameters (dict): a dictionary containing the base parameters
        model_parameters (dict): a dictionary containing the model parameters
        reactons (list): a list containing all reactions
        species (list): a list containing all species
        feed (Feed): a Feed instance which contains the feed information
        nitrogen_limited (bool, optional): A boolean which indicates whether the model is nitrogen limited. Defaults to False.

    Returns:
        np.ndarray: Returns the stoichiometric matrix of the ADM1 model.
    """

    S = np.zeros((len(species), len(reactons)))
    S[0, [1, 3, 4]] = [1, (1-model_parameters["f_fa_li"]), - 1]
    S[1, [2, 5]] = [1, -1]
    S[2, [3, 6]] = [(model_parameters["f_fa_li"]), - 1]
    Y_aa=0 if nitrogen_limited else model_parameters['Y_aa']
    S[3, [5, 7]] = [(1-Y_aa) *
                    model_parameters['f_va_aa'], - 1]
    Y_su=0 if nitrogen_limited else model_parameters['Y_su']
    S[4, [4, 5, 8]] = [(1-Y_su)*model_parameters['f_bu_su'],
                       (1-Y_aa)*model_parameters["f_bu_aa"], - 1]
    S[5, [4, 5, 7, 9]] = [(1-model_parameters["Y_su"])*model_parameters['f_pro_su'],
                          (1-Y_aa)*model_parameters["f_pro_aa"], (1 - model_parameters['Y_c4'])*0.54, -1]

    Y_fa=0 if nitrogen_limited else model_parameters['Y_fa'] 
    S[6, [4, 5, 6, 7, 8, 9, 10]] = [(1-Y_su)*model_parameters['f_ac_su'],
                                    (1-Y_aa) *
                                    model_parameters['f_ac_aa'],
                                    (1-Y_fa)*0.7,
                                    (1-model_parameters['Y_c4'])*0.31,
                                    (1-model_parameters['Y_c4'])*0.8,
                                    (1-model_parameters['Y_pro'])*0.57,
                                    -1]
    S[7, [4, 5, 6, 7, 8, 9, 11, 25]] = [(1-Y_su)*model_parameters['f_h2_su'],
                                        (1-Y_aa) *
                                        model_parameters['f_h2_aa'],
                                        (1-Y_fa)*0.3,
                                        (1-model_parameters['Y_c4'])*0.15,
                                        (1-model_parameters['Y_c4'])*0.2,
                                        (1-model_parameters['Y_pro'])*0.43,
                                        -1,
                                        -1]
    S[8, [10, 11, 26]] = [(1-model_parameters['Y_ac']),
                          (1-model_parameters['Y_h2']),
                          -1]
    s_1 = (-1 * model_parameters['C_xc'] + model_parameters['f_sI_xc'] * model_parameters['C_sI'] + model_parameters['f_ch_xc'] * model_parameters['C_ch'] +
           model_parameters['f_pr_xc'] * model_parameters['C_pr'] + model_parameters['f_li_xc'] * model_parameters['C_li'] + model_parameters['f_xI_xc'] * model_parameters['C_xI'])
    s_2 = (-1 * model_parameters['C_ch'] + model_parameters['C_su'])
    s_3 = (-1 * model_parameters['C_pr'] + model_parameters['C_aa'])
    s_4 = (-1 * model_parameters['C_li'] + (1 - model_parameters['f_fa_li']) *
           model_parameters['C_su'] + model_parameters['f_fa_li'] * model_parameters['C_fa'])
    s_5 = (-1 * model_parameters['C_su'] + (1 - Y_su) * (model_parameters['f_bu_su'] * model_parameters['C_bu'] + model_parameters['f_pro_su']
                                                                             * model_parameters['C_pro'] + model_parameters['f_ac_su'] * model_parameters['C_ac']) + Y_su * model_parameters['C_bac'])
    s_6 = (-1 * model_parameters['C_aa'] + (1 - Y_aa) * (model_parameters['f_va_aa'] * model_parameters['C_va'] + model_parameters['f_bu_aa'] * model_parameters['C_bu'] +
                                                                             model_parameters['f_pro_aa'] * model_parameters['C_pro'] + model_parameters['f_ac_aa'] * model_parameters['C_ac']) + Y_aa * model_parameters['C_bac'])
    s_7 = (-1 * model_parameters['C_fa'] + (1 - Y_fa) * 0.7 *
           model_parameters['C_ac'] + Y_fa * model_parameters['C_bac'])
    s_8 = (-1 * model_parameters['C_va'] + (1 - model_parameters['Y_c4']) * 0.54 * model_parameters['C_pro'] + (
        1 - model_parameters['Y_c4']) * 0.31 * model_parameters['C_ac'] + model_parameters['Y_c4'] * model_parameters['C_bac'])
    s_9 = (-1 * model_parameters['C_bu'] + (1 - model_parameters['Y_c4']) * 0.8 *
           model_parameters['C_ac'] + model_parameters['Y_c4'] * model_parameters['C_bac'])
    s_10 = (-1 * model_parameters['C_pro'] + (1 - model_parameters['Y_pro']) * 0.57 *
            model_parameters['C_ac'] + model_parameters['Y_pro'] * model_parameters['C_bac'])
    s_11 = (-1 * model_parameters['C_ac'] + (1 - model_parameters['Y_ac']) *
            model_parameters['C_ch4'] + model_parameters['Y_ac'] * model_parameters['C_bac'])
    s_12 = ((1 - model_parameters['Y_h2']) * model_parameters['C_ch4'] +
            model_parameters['Y_h2'] * model_parameters['C_bac'])
    s_13 = (-1 * model_parameters['C_bac'] + model_parameters['C_xc'])
    S[9, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 27]] = [-s_1, -s_2, -s_3, -s_4, -
                                                                                    s_5, -s_6, -s_7, -s_8, -s_9, -s_10, -s_11, -s_12, -s_13, -s_13, -s_13, -s_13, -s_13, -s_13, -s_13, -1]
    S[10, [0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]] = [model_parameters['N_xc']-model_parameters['f_xI_xc']*model_parameters['N_I']-model_parameters['f_sI_xc']*model_parameters['N_I']-model_parameters['f_pr_xc']*model_parameters['N_aa'],
                                                                        -Y_su*model_parameters['N_bac'],
                                                                        model_parameters['N_aa']-Y_aa *
                                                                        model_parameters['N_bac'],
                                                                        -Y_fa*model_parameters['N_bac'],
                                                                        -model_parameters['Y_c4']*model_parameters['N_bac'],
                                                                        -model_parameters['Y_c4']*model_parameters['N_bac'],
                                                                        -model_parameters['Y_pro']*model_parameters['N_bac'],
                                                                        -model_parameters['Y_ac']*model_parameters['N_bac'],
                                                                        -model_parameters['Y_h2']*model_parameters['N_bac'],
                                                                        model_parameters['N_bac'] -
                                                                        model_parameters['N_xc'],
                                                                        model_parameters['N_bac'] -
                                                                        model_parameters['N_xc'],
                                                                        model_parameters['N_bac'] -
                                                                        model_parameters['N_xc'],
                                                                        model_parameters['N_bac'] -
                                                                        model_parameters['N_xc'],
                                                                        model_parameters['N_bac'] -
                                                                        model_parameters['N_xc'],
                                                                        model_parameters['N_bac'] -
                                                                        model_parameters['N_xc'],
                                                                        model_parameters['N_bac']-model_parameters['N_xc']]
    S[11, 0] = model_parameters['f_sI_xc']
    S[12, [0, 12, 13, 14, 15, 16, 17, 18]] = [-1, 1, 1, 1, 1, 1, 1, 1]
    S[13, [0, 1]] = [model_parameters['f_ch_xc'], -1]
    S[14, [0, 2]] = [model_parameters['f_pr_xc'], -1]
    S[15, [0, 3]] = [model_parameters['f_li_xc'], -1]
    S[16, [4, 12]] = [Y_su, -1]
    S[17, [5, 13]] = [Y_aa, -1]
    S[18, [6, 14]] = [Y_fa, -1]
    S[19, [7, 8, 15]] = [model_parameters['Y_c4'], model_parameters['Y_c4'], -1]
    S[20, [9, 16]] = [model_parameters['Y_pro'], -1]
    S[21, [10, 17]] = [model_parameters['Y_ac'], -1]
    S[22, [11, 18]] = [model_parameters['Y_h2'], -1]
    S[23, 0] = model_parameters['f_xI_xc']
    S[24, :] = 0
    S[25, :] = 0
    S[26, :] = 0
    S[27, 19] = -1
    S[28, 20] = -1
    S[29, 21] = -1
    S[30, 22] = -1
    S[31, 23] = -1
    S[32, :] = 0
    S[33, 24] = -1
    S[34, :] = 0
    S[35, 25] = base_parameters['V_liq']/base_parameters['V_gas']
    S[36, 26] = base_parameters['V_liq']/base_parameters['V_gas']
    S[37, 27] = base_parameters['V_liq']/base_parameters['V_gas']
    return S

build_e_adm_2_stoichiometric_matrix(base_parameters, model_parameters, reactions, species, feed, nitrogen_limited=False)

This function builds the stoichiometric matrix for e-ADM2 Model.

Model Parameters (dict): a dictionary which contains model parameters
base_parameters (dict): a dictionary which contains base paramters
Initial Conditions (dict): a dictionary containing inlet conditions for all species
Inlet Conditions (dict): a dictionary containing inlet conditions for all species
reactions (list): a list containing all of the reaction names
species (list): a list containing all species

Returns:

Type Description
np.ndarray

np.ndarray: Returns an matrix of stochiometic values.

Source code in adtoolbox/adm.py
 891
 892
 893
 894
 895
 896
 897
 898
 899
 900
 901
 902
 903
 904
 905
 906
 907
 908
 909
 910
 911
 912
 913
 914
 915
 916
 917
 918
 919
 920
 921
 922
 923
 924
 925
 926
 927
 928
 929
 930
 931
 932
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
def build_e_adm_2_stoichiometric_matrix(base_parameters: dict,
                                             model_parameters: dict,
                                             reactions: list,
                                             species: list,
                                             feed:Feed,
                                             nitrogen_limited:bool=False)->np.ndarray:
    """ 
    This function builds the stoichiometric matrix for e-ADM2 Model.

        Model Parameters (dict): a dictionary which contains model parameters
        base_parameters (dict): a dictionary which contains base paramters
        Initial Conditions (dict): a dictionary containing inlet conditions for all species
        Inlet Conditions (dict): a dictionary containing inlet conditions for all species
        reactions (list): a list containing all of the reaction names
        species (list): a list containing all species

    Returns:
        np.ndarray: Returns an matrix of stochiometic values.
    """
    S = np.zeros((len(species), len(reactions)))
    S[list(map(species.index, ["TSS", "X_ch", "X_pr", "X_li", "X_I"])),
      reactions.index('TSS_Disintegration')] = [-1,feed.ch_tss, feed.prot_tss, feed.lip_tss, feed.xi_tss]
    S[list(map(species.index, ["TDS", "X_ch", "X_pr", "X_li", "S_I"])), reactions.index('TDS_Disintegration')] = [-1,
                                                                                                                  feed.ch_tds, feed.prot_tds, feed.lip_tds, feed.si_tds]
    S[list(map(species.index, ["X_ch", "S_su"])),
      reactions.index('Hydrolysis carbohydrates')] = [-1, 1]
    S[list(map(species.index, ["X_pr", "S_aa"])),
      reactions.index('Hydrolysis proteins')] = [-1, 1]
    S[list(map(species.index, ["X_li", "S_fa"])),
      reactions.index('Hydrolysis lipids')] = [-1, 1]

    Y_su=0 if nitrogen_limited else model_parameters['Y_su']
    f_ac_su=1-model_parameters['f_pro_su']-model_parameters['f_et_su']-model_parameters['f_lac_su']
    f_IC_su = -(-model_parameters['C_su'] +
                (1-Y_su)*model_parameters['f_pro_su']*model_parameters['C_pro'] +
                (1-Y_su)*model_parameters['f_et_su']*model_parameters['C_et'] +
                (1-Y_su)*model_parameters['f_lac_su']*model_parameters['C_lac'] +
                (1-Y_su)*f_ac_su*model_parameters['C_ac'] +
                Y_su*model_parameters['C_bac'])


    S[list(map(species.index, ["S_su", "S_pro", "S_et", "S_lac", "S_ac", "S_IN", "S_IC", "X_su"])),
      reactions.index('Uptake of sugars')] = [-1,
                                              (1-Y_su) * model_parameters['f_pro_su'],
                                              (1-Y_su) * model_parameters['f_et_su'],
                                              (1-Y_su) * model_parameters['f_lac_su'],
                                              (1-Y_su) * f_ac_su,
                                              -model_parameters['N_bac']*Y_su,
                                              f_IC_su,
                                              Y_su]

    Y_aa=0 if nitrogen_limited else model_parameters['Y_aa']
    f_ac_aa=1-model_parameters['f_pro_aa']-model_parameters['f_et_aa']-model_parameters['f_lac_aa']
    f_IC_aa = -(-model_parameters['C_aa'] +
                (1-Y_aa)*model_parameters['f_pro_aa']*model_parameters['C_pro'] +
                (1-Y_aa)*model_parameters['f_et_aa']*model_parameters['C_et'] +
                (1-Y_aa)*model_parameters['f_lac_aa']*model_parameters['C_lac'] +
                (1-Y_aa)*f_ac_aa*model_parameters['C_ac'] +
                Y_aa*model_parameters['C_bac'])



    S[list(map(species.index, ["S_aa", "S_pro", "S_et", "S_lac", "S_ac", "S_IN", "S_IC", "X_aa"])),
      reactions.index('Uptake of amino acids')] = [-1,
                                                   (1-Y_aa) * model_parameters['f_pro_aa'],
                                                   (1-Y_aa) * model_parameters['f_et_aa'],
                                                   (1-Y_aa) * model_parameters['f_lac_aa'],
                                                   (1-Y_aa) * f_ac_aa,
                                                   model_parameters['N_aa']-Y_aa * model_parameters['N_bac'],
                                                   f_IC_aa,
                                                   Y_aa]

    Y_fa=0 if nitrogen_limited else model_parameters['Y_fa']
    f_ac_fa=1-model_parameters['f_pro_fa']-model_parameters['f_et_fa']-model_parameters['f_lac_fa']
    f_IC_fa = -(-model_parameters['C_fa']+
                (1-Y_fa)*model_parameters['f_pro_fa']*model_parameters['C_pro'] +
                (1-Y_fa)*model_parameters['f_et_fa']*model_parameters['C_et'] +
                (1-Y_fa)*model_parameters['f_lac_fa']*model_parameters['C_lac'] +
                (1-Y_fa)*f_ac_fa*model_parameters['C_ac'] +
                Y_fa*model_parameters['C_bac'])
    # if f_IC_fa<0:
    #     raise ValueError("f_IC_fa is negative") 

    S[list(map(species.index, ["S_fa", "S_pro", "S_et", "S_lac", "S_ac", "S_IN", "S_IC", "X_fa"])),
      reactions.index('Uptake of LCFA')] = [-1,
                                            (1-Y_fa) * model_parameters['f_pro_fa'],
                                            (1-Y_fa) * model_parameters['f_et_fa'],
                                            (1-Y_fa) * model_parameters['f_lac_fa'],
                                            (1-Y_fa) * f_ac_fa,
                                            -Y_fa * model_parameters['N_bac'],
                                            f_IC_fa,
                                            Y_fa]
    if any([f_ac_fa<0,f_ac_aa<0,f_ac_su<0]):
        raise ValueError("f_ac is negative")
    Y_ac_et=0 if nitrogen_limited else model_parameters['Y_ac_et']
    Y_ac_lac=0 if nitrogen_limited else model_parameters['Y_ac_lac']
    f_IC_ac_et = -(-model_parameters['C_ac'] +
                    model_parameters['f_et_ac']*model_parameters['C_et'] +
                   (1-model_parameters['f_et_ac']-Y_ac_et) * model_parameters['f_bu_ac']*model_parameters['C_bu'] +
                   Y_ac_et*model_parameters['C_bac'])

    f_IC_ac_lac = -(-model_parameters['C_ac'] +
                    model_parameters['f_lac_ac']*model_parameters['C_lac'] +
                    (1-model_parameters['f_lac_ac']-Y_ac_lac) * model_parameters['f_bu_ac']*model_parameters['C_bu'] +
                    Y_ac_lac*model_parameters['C_bac'])


    S[list(map(species.index, ["S_ac", "S_et", "S_bu", "S_IN", "S_IC", "S_h2", "X_ac_et"])),
      reactions.index('Uptake of acetate_et')] = [-1,
                                                  model_parameters['f_et_ac'],
                                                  (1- model_parameters['f_et_ac']-model_parameters['Y_ac']) * model_parameters['f_bu_ac'],
                                                  -Y_ac_et * model_parameters['N_bac'],
                                                  f_IC_ac_et,
                                                  (1- model_parameters['f_et_ac']-Y_ac_et) * (1-model_parameters['f_bu_ac']),
                                                  Y_ac_et]

    S[list(map(species.index, ["S_ac", "S_lac", "S_bu", "S_IN", "S_IC", "S_h2", "X_ac_lac"])),
        reactions.index('Uptake of acetate_lac')] = [-1,
                                                    model_parameters['f_lac_ac'],
                                                     (1-model_parameters['f_lac_ac']-Y_ac_lac) * model_parameters['f_bu_ac'],
                                                     -Y_ac_lac * model_parameters['N_bac'],
                                                     f_IC_ac_lac,
                                                     (1-model_parameters['f_lac_ac']-Y_ac_lac) * (1-model_parameters['f_bu_ac']),
                                                     Y_ac_lac]

    Y_pro_et=0 if nitrogen_limited else model_parameters['Y_pro_et']
    Y_pro_lac=0 if nitrogen_limited else model_parameters['Y_pro_et']

    f_IC_pro_et = -(-model_parameters['C_pro'] +
                    model_parameters['f_et_pro']*model_parameters['C_et'] +
                    (1-model_parameters['f_et_pro']-Y_pro_et)*model_parameters['f_va_pro']*model_parameters['C_va'] +
                    (Y_pro_et)*model_parameters['C_bac'])

    f_IC_pro_lac = -(-model_parameters['C_pro'] +
                     model_parameters['f_lac_pro']*model_parameters['C_lac'] +
                     (1-model_parameters['f_lac_pro']-Y_pro_lac)*model_parameters['f_va_pro']*model_parameters['C_va'] +
                     (Y_pro_lac)*model_parameters['C_bac'])




    S[list(map(species.index, ["S_pro", "S_et", "S_va", "S_IN", "S_IC", "S_h2", "X_chain_et"])),
      reactions.index('Uptake of propionate_et')] = [-1,
                                                    model_parameters['f_et_pro'],
                                                     (1-model_parameters['f_et_pro']-Y_pro_et) * model_parameters['f_va_pro'],
                                                     -Y_pro_et *  model_parameters['N_bac'],
                                                     f_IC_pro_et,
                                                     (1-model_parameters['f_et_pro']-Y_pro_et) * (1-model_parameters['f_va_pro']),
                                                     model_parameters['Y_chain_et_pro']]

    S[list(map(species.index, ["S_pro", "S_lac", "S_va", "S_IN", "S_IC", "S_h2", "X_chain_lac"])),
        reactions.index('Uptake of propionate_lac')] = [-1,
                                                        model_parameters['f_lac_pro'],
                                                        (1-model_parameters['f_lac_pro']-Y_pro_lac) * model_parameters['f_va_pro'],
                                                        -Y_pro_lac * model_parameters['N_bac'],
                                                        f_IC_pro_lac,
                                                        (1-model_parameters['f_lac_pro']-Y_pro_lac) * (1-model_parameters['f_va_pro']),
                                                        model_parameters['Y_chain_lac_pro']]

    Y_bu_et=0 if nitrogen_limited else model_parameters['Y_bu_et']
    Y_bu_lac=0 if nitrogen_limited else model_parameters['Y_bu_lac']
    f_IC_bu_et = -(-model_parameters['C_bu'] +
                    model_parameters['f_et_bu']*model_parameters['C_et'] +
                   (1-model_parameters['f_et_bu']-Y_bu_et)*model_parameters['f_cap_bu']*model_parameters['C_cap'] +
                   (Y_bu_et)*model_parameters['C_bac'])

    f_IC_bu_lac = -(-model_parameters['C_bu'] +
                    model_parameters['f_lac_bu']*model_parameters['C_lac'] +
                    (1-model_parameters['f_lac_bu']-Y_bu_lac)*model_parameters['f_cap_bu']*model_parameters['C_cap'] +
                    (Y_bu_lac)*model_parameters['C_bac'])


    S[list(map(species.index, ["S_bu", "S_et", "S_cap", "S_IN", "S_IC", "S_h2", "X_chain_et"])),
        reactions.index('Uptake of butyrate_et')] = [-1,
                                                     model_parameters['f_et_bu'],
                                                     (1-model_parameters['f_et_bu']-Y_bu_et) * model_parameters['f_cap_bu'],
                                                     -Y_bu_et * model_parameters['N_bac'],
                                                     f_IC_bu_et,
                                                     (1-model_parameters['f_et_bu']-Y_bu_et)*(1-model_parameters['f_cap_bu']),
                                                     Y_bu_et]

    S[list(map(species.index, ["S_bu", "S_lac", "S_cap", "S_IN", "S_IC", "S_h2", "X_chain_lac"])),
        reactions.index('Uptake of butyrate_lac')] = [-1,
                                                      model_parameters['f_lac_bu'],
                                                      (1- model_parameters['f_lac_bu']-Y_bu_lac) * model_parameters['f_cap_bu'],
                                                      -Y_bu_lac *model_parameters['N_bac'],
                                                      f_IC_bu_lac,
                                                      (1- model_parameters['f_lac_bu']-Y_bu_lac)*(1-model_parameters['f_cap_bu']),
                                                      Y_bu_lac]


    Y_va=0 if nitrogen_limited else model_parameters['Y_va']

    S[list(map(species.index, ["S_va", "S_pro", "X_VFA_deg"])),
        reactions.index('Uptake of valerate')] = [-1,
                                                  (1-Y_va),
                                                  Y_va,
                                                  ]

    Y_cap=0 if nitrogen_limited else model_parameters['Y_cap']
    S[list(map(species.index, ["S_cap", "S_ac", "X_VFA_deg"])),
        reactions.index('Uptake of caproate')] = [-1,
                                                  (1 - Y_cap),
                                                  Y_cap]

    Y_cap=0 if nitrogen_limited else model_parameters['Y_bu']
    S[list(map(species.index, ["S_bu", "S_ac", "X_VFA_deg"])),
        reactions.index('Uptake of butyrate')] = [-1,
                                                  (1 - Y_cap),
                                                  Y_cap]



    Y_Me_ac=0 if nitrogen_limited else model_parameters["Y_Me_ac"]
    f_IC_Me_ach2 =0
    S[list(map(species.index, ["S_gas_h2", "S_ac", "S_ch4", "X_Me_ac", 'S_IC', 'S_IN'])),
        reactions.index('Methanogenessis from acetate and h2')] = [-1,
                                                                   model_parameters['f_ac_h2'],
                                                                   (1 +model_parameters['f_ac_h2']- Y_Me_ac),
                                                                   Y_Me_ac,
                                                                   f_IC_Me_ach2,
                                                                    -Y_Me_ac *model_parameters['N_bac']
                                                                   ]

    Y_Me_CO2=0 if nitrogen_limited else model_parameters["Y_Me_CO2"]


    S[list(map(species.index, ["S_gas_h2", "S_gas_ch4", "X_Me_CO2", 'S_gas_co2',"S_IN"])),
        reactions.index('Methanogenessis from CO2 and h2')] = [-1,
                                                               (1 -model_parameters['f_co2_ch4']- Y_Me_CO2),
                                                               (Y_Me_CO2),
                                                               model_parameters['f_co2_ch4'],
                                                                -Y_Me_CO2 *model_parameters['N_bac']
                                                                ]



    Y_ac_et_ox=0 if nitrogen_limited else model_parameters["Y_ac_et_ox"]
    f_IC_et_ox=-(-model_parameters['C_et'] +
                    (1-Y_ac_et_ox)*model_parameters['C_bac']
                    +Y_ac_et_ox*model_parameters['C_ac'])

    S[list(map(species.index, ["S_et", "X_et","S_ac","S_IC"])),
        reactions.index('Uptake of ethanol')] = [-1,1-Y_ac_et_ox,Y_ac_et_ox,f_IC_et_ox]


    Y_pro_lac_ox=0 if nitrogen_limited else model_parameters['Y_pro_lac_ox']
    f_IC_lac_ox=-(-model_parameters['C_lac'] +
                (1-Y_pro_lac_ox)*model_parameters['C_bac']
                +Y_pro_lac_ox*model_parameters['C_pro'])

    S[list(map(species.index, ["S_lac" ,"S_pro","X_lac","S_IC"])),
        reactions.index('Uptake of lactate')] = [-1, 1-Y_pro_lac_ox,Y_pro_lac_ox,f_IC_lac_ox]

    S[list(map(species.index, ["X_su", "TSS","S_IN","S_IC"])),
        reactions.index('Decay of Xsu')] = [-1, 1,model_parameters['N_bac'],model_parameters['C_bac']]

    S[list(map(species.index, ["X_aa", "TSS","S_IN","S_IC"])),
        reactions.index('Decay of Xaa')] = [-1, 1,model_parameters['N_bac'],model_parameters['C_bac']]

    S[list(map(species.index, ["X_fa", "TSS","S_IN","S_IC"])),
        reactions.index('Decay of Xfa')] = [-1, 1,model_parameters['N_bac'],model_parameters['C_bac']]

    S[list(map(species.index, ["X_ac_et", "TSS","S_IN","S_IC"])),
        reactions.index('Decay of X_ac_et')] = [-1, 1,model_parameters['N_bac'],model_parameters['C_bac']]

    S[list(map(species.index, ["X_ac_lac", "TSS","S_IN","S_IC"])),
        reactions.index('Decay of X_ac_lac')] = [-1, 1,model_parameters['N_bac'],model_parameters['C_bac']]

    S[list(map(species.index, ["X_chain_et", "TSS", "S_IN","S_IC"])),
        reactions.index('Decay of X_chain_et')] = [-1, 1,model_parameters['N_bac'],model_parameters['C_bac']]

    S[list(map(species.index, ["X_chain_lac", "TSS", "S_IN","S_IC"])),
        reactions.index('Decay of X_chain_lac')] = [-1, 1,model_parameters['N_bac'],model_parameters['C_bac']]

    S[list(map(species.index, ["X_VFA_deg", "TSS", "S_IN","S_IC"])),
        reactions.index('Decay of X_VFA_deg')] = [-1, 1,model_parameters['N_bac'],model_parameters['C_bac']]

    S[list(map(species.index, ["X_Me_ac", "TSS", "S_IN","S_IC"])),
        reactions.index('Decay of X_Me_ac')] = [-1, 1,model_parameters['N_bac'],model_parameters['C_bac']]

    S[list(map(species.index, ["X_Me_CO2", "TSS", "S_IN","S_IC"])),
        reactions.index('Decay of X_Me_CO2')] = [-1, 1,model_parameters['N_bac'],model_parameters['C_bac']]

    S[list(map(species.index, ["S_va_ion"])),
        reactions.index('Acid Base Equilibrium (Va)')] = [-1]

    S[list(map(species.index, ["S_bu_ion"])),
        reactions.index('Acid Base Equilibrium (Bu)')] = [-1]

    S[list(map(species.index, ["S_pro_ion"])),
        reactions.index('Acid Base Equilibrium (Pro)')] = [-1]

    S[list(map(species.index, ["S_cap_ion"])),
        reactions.index('Acid Base Equilibrium (Cap)')] = [-1]

    S[list(map(species.index, ["S_lac_ion"])),
        reactions.index('Acid Base Equilibrium (Lac)')] = [-1]

    S[list(map(species.index, ["S_ac_ion"])),
        reactions.index('Acid Base Equilibrium (Ac)')] = [-1]

    S[list(map(species.index, ["S_hco3_ion"])),  # I don't think this is right، should look at the reaction in ADM1
        reactions.index('Acid Base Equilibrium (CO2)')] = [-1]

    S[list(map(species.index, ["S_nh3", "S_nh4_ion"])),
        reactions.index('Acid Base Equilibrium (In)')] = [-1, 1]  # I don't think this is right، should look at the reaction in ADM1

    S[list(map(species.index, ["S_h2", "S_gas_h2"])),
        reactions.index('Gas Transfer H2')] = [-base_parameters['V_liq']/base_parameters['V_gas'], 1]
    S[list(map(species.index, ["S_ch4", "S_gas_ch4"])),
        reactions.index('Gas Transfer CH4')] = [-base_parameters['V_liq']/base_parameters['V_gas'], 1]
    S[list(map(species.index, ["S_co2", "S_gas_co2"])),
        reactions.index('Gas Transfer CO2')] = [-base_parameters['V_liq']/base_parameters['V_gas'], 1]

    return S

build_e_adm_stoiciometric_matrix(base_parameters, model_parameters, reactions, species, feed, nitrogen_limited=False)

This function builds the stoichiometric matrix for the e_ADM Model.

Model Parameters (dict): a dictionary which contains model parameters
base_parameters (dict): a dictionary which contains base paramters
Initial Conditions (dict): a dictionary containing inlet conditions for all species
Inlet Conditions (dict): a dictionary containing inlet conditions for all species
reactions (list): a list containing all of the reaction names
species (list): a list containing all species

Returns:

Type Description
np.ndarray

np.ndarray: Returns an matrix of stochiometic values.

Source code in adtoolbox/adm.py
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
def build_e_adm_stoiciometric_matrix(base_parameters: dict,
                                     model_parameters: dict,
                                     reactions: list,
                                     species: list,
                                     feed:Feed,
                                     nitrogen_limited:bool=False)->np.ndarray:
    """ 
    This function builds the stoichiometric matrix for the e_ADM Model.

        Model Parameters (dict): a dictionary which contains model parameters
        base_parameters (dict): a dictionary which contains base paramters
        Initial Conditions (dict): a dictionary containing inlet conditions for all species
        Inlet Conditions (dict): a dictionary containing inlet conditions for all species
        reactions (list): a list containing all of the reaction names
        species (list): a list containing all species

    Returns:
        np.ndarray: Returns an matrix of stochiometic values.
    """
    S = np.zeros((len(species), len(reactions)))
    S[list(map(species.index, ["TSS", "X_ch", "X_pr", "X_li", "X_I"])),
      reactions.index('TSS_Disintegration')] = [-1,feed.ch_tss, feed.prot_tss, feed.lip_tss,feed.xi_tss]
    S[list(map(species.index, ["TDS", "X_ch", "X_pr", "X_li", "S_I"])), reactions.index('TDS_Disintegration')] = [-1,
                                                                                                            feed.ch_tds, feed.prot_tds, feed.lip_tds, feed.si_tds]
    S[list(map(species.index, ["X_ch", "S_su"])),reactions.index('Hydrolysis carbohydrates')] = [-1, 1]
    S[list(map(species.index, ["X_pr", "S_aa"])),reactions.index('Hydrolysis proteins')] = [-1, 1]
    S[list(map(species.index, ["X_li", "S_fa"])),reactions.index('Hydrolysis lipids')] = [-1, 1]

    f_IC_su_et=-(-model_parameters['C_su']+
               (1-model_parameters['Y_su_et']) * model_parameters['C_et']+
               (1-model_parameters['Y_su_et']) * model_parameters['C_bac']
              )

    f_IC_su_lac=-(-model_parameters['C_su']+
              (1-model_parameters['Y_su_lac']) * model_parameters['C_lac']+
              (1-model_parameters['Y_su_lac']) * model_parameters['C_bac']
              )

    f_IC_su_ac=-(-model_parameters['C_su']+
               (1-model_parameters['Y_su_ac']) * model_parameters['C_ac']+
               (1-model_parameters['Y_su_ac']) * model_parameters['C_bac']
              )
    f_IC_su_pro=-(-model_parameters['C_su']+
                (1-model_parameters['Y_su_pro']) * model_parameters['C_pro']+
                (1-model_parameters['Y_su_pro']) * model_parameters['C_bac']
              )

    S[list(map(species.index, ["S_su","S_et","S_IN","S_IC","X_su"])),
     reactions.index('Su_to_et')] = [-1,
                                         (1-model_parameters['Y_su_et']),
                                           -model_parameters['N_bac']* model_parameters['Y_su_et'],
                                            f_IC_su_et,
                                            model_parameters['Y_su_et']]

    S[list(map(species.index, ["S_su","S_lac","S_IN","S_IC","X_su"])),
     reactions.index('Su_to_lac')] = [-1,
                                                (1-model_parameters['Y_su_lac']),
                                                -model_parameters['N_bac']* model_parameters['Y_su_lac'],
                                                f_IC_su_lac,
                                                model_parameters['Y_su_lac']]

    S[list(map(species.index, ["S_su","S_ac","S_IN","S_IC","X_su"])),
      reactions.index('Su_to_ac')] = [-1,
                                    (1-model_parameters['Y_su_ac']),
                                    -model_parameters['N_bac']* model_parameters['Y_su_ac'],
                                    f_IC_su_ac,
                                    model_parameters['Y_su_ac']]

    S[list(map(species.index, ["S_su","S_pro","S_IN","S_IC","X_su"])),
            reactions.index('Su_to_pro')] = [-1,
                                            (1-model_parameters['Y_su_pro']),
                                              -model_parameters['N_bac']* model_parameters['Y_su_pro'],
                                              f_IC_su_pro,
                                              model_parameters['Y_su_pro']]

    f_IC_aa_lac=-(-model_parameters['C_aa']+
              (1-model_parameters['Y_aa_lac']) * model_parameters['C_lac']+
              (1-model_parameters['Y_aa_lac']) * model_parameters['C_bac']
              )

    f_IC_aa_ac=-(-model_parameters['C_aa']+
              (1-model_parameters['Y_aa_ac']) * model_parameters['C_ac']+
              (1-model_parameters['Y_aa_ac']) * model_parameters['C_bac']
              )

    f_IC_aa_pro=-(-model_parameters['C_aa']+
              (1-model_parameters['Y_aa_pro']) * model_parameters['C_pro']+
              (1-model_parameters['Y_aa_pro']) * model_parameters['C_bac']
              )


    S[list(map(species.index, ["S_aa","S_lac","S_IN", "S_IC", "X_aa"])),
      reactions.index('aas_to_lac')] = [-1,
                                             (1-model_parameters['Y_aa_lac']),
                                             model_parameters['N_aa']- model_parameters['Y_aa_lac'] * model_parameters['N_bac'],
                                             f_IC_aa_lac,
                                             model_parameters['Y_aa_lac']]

    S[list(map(species.index, ["S_aa","S_pro","S_IN", "S_IC", "X_aa"])),
      reactions.index('aas_to_pro')] = [-1,
                                             (1-model_parameters['Y_aa_pro']),
                                             model_parameters['N_aa']- model_parameters['Y_aa_pro'] * model_parameters['N_bac'],
                                             f_IC_aa_pro,
                                             model_parameters['Y_aa_pro']]


    S[list(map(species.index, ["S_aa","S_ac","S_IN", "S_IC", "X_aa"])),
      reactions.index('aas_to_ac')] = [-1,
                                             (1-model_parameters['Y_aa_ac']),
                                             model_parameters['N_aa']- model_parameters['Y_aa_ac'] * model_parameters['N_bac'],
                                             f_IC_aa_ac,
                                             model_parameters['Y_aa_ac']]

    Y_fa=0 if nitrogen_limited else model_parameters['Y_fa']
    f_IC_fa = -(-model_parameters['C_fa'] +
                (1-Y_fa)*model_parameters['f_pro_fa']*model_parameters['C_pro'] +
                (1-Y_fa)*model_parameters['f_ac_fa']*model_parameters['C_ac'] +
                (1-Y_fa)*model_parameters['C_bac'])

    S[list(map(species.index, ["S_fa", "S_pro", "S_ac", "S_IN", "S_IC", "X_fa"])),
      reactions.index('Uptake of LCFA')] = [-1,
                                            (1-Y_fa) * model_parameters['f_pro_fa'],
                                            (1-Y_fa) * model_parameters['f_ac_fa'],
                                              -Y_fa * model_parameters['N_bac'],
                                              f_IC_fa,
                                              Y_fa]
#HERE
    Y_ac_et=0 if nitrogen_limited else model_parameters['Y_ac_et']
    Y_ac_lac=0 if nitrogen_limited else model_parameters['Y_ac_lac']
    f_IC_ac_et = -((-1-(1-Y_ac_et) * model_parameters['f_et_ac'])*model_parameters['C_ac'] +
                   (1-Y_ac_et)* model_parameters['f_et_ac']*model_parameters['C_et'] +
                   (1-Y_ac_et) * model_parameters['f_bu_ac']*model_parameters['C_bu'] +
                   (1-Y_ac_et)* model_parameters['C_bac'])  

    f_IC_ac_lac = -((-1-(1-Y_ac_lac) * model_parameters['f_lac_ac'])*model_parameters['C_ac'] +
                    (1-Y_ac_lac)* model_parameters['f_lac_ac']* model_parameters['C_lac'] +
                    (1-Y_ac_lac)* model_parameters['f_bu_ac']* model_parameters['C_bu'] +
                    (1-Y_ac_lac)* model_parameters['C_bac'])

    S[list(map(species.index, ["S_ac", "S_et", "S_bu", "S_IN", "S_IC", "S_h2", "X_ac_et"])),
      reactions.index('Uptake of acetate_et')] = [-1-(1-Y_ac_et) * model_parameters['f_et_ac'],
                                                  (1-Y_ac_et) * model_parameters['f_et_ac'],
                                                  (1-model_parameters['Y_ac']) * model_parameters['f_bu_ac'],
                                                  -Y_ac_et * model_parameters['N_bac'],
                                                  f_IC_ac_et,
                                                  (1-Y_ac_et) * (1-model_parameters['f_bu_ac']),
                                                  Y_ac_et]

    S[list(map(species.index, ["S_ac", "S_lac", "S_bu", "S_IN", "S_IC", "S_h2", "X_ac_lac"])),
        reactions.index('Uptake of acetate_lac')] = [-1-(1-Y_ac_lac) * model_parameters['f_lac_ac'],
                                                     (1-Y_ac_lac) * model_parameters['f_lac_ac'],
                                                     (1-Y_ac_lac) * model_parameters['f_bu_ac'],
                                                     -Y_ac_lac * model_parameters['N_bac'], 
                                                     f_IC_ac_lac,
                                                     (1-Y_ac_lac) * (1-model_parameters['f_bu_ac']),
                                                     Y_ac_lac]

    Y_pro_et=0 if nitrogen_limited else model_parameters['Y_pro_et']
    Y_pro_lac=0 if nitrogen_limited else model_parameters['Y_pro_lac']

    f_IC_pro_et = -((-1-(1-Y_pro_et) * model_parameters['f_et_pro'])*model_parameters['C_pro'] +
                    (1-Y_pro_et)*model_parameters['f_et_pro']*model_parameters['C_et'] +
                    (1-Y_pro_et)*model_parameters['f_va_pro']*model_parameters['C_va'] +
                    (1-Y_pro_et)*model_parameters['C_bac'])

    f_IC_pro_lac = -((-1-(1-Y_pro_lac) * model_parameters['f_lac_pro'])*model_parameters['C_pro'] +
                     (1-Y_pro_lac)*model_parameters['f_lac_pro']*model_parameters['C_lac'] +
                     (1-Y_pro_lac)*model_parameters['f_va_pro']*model_parameters['C_va'] +
                     (1-Y_pro_lac)*model_parameters['C_bac'])

    S[list(map(species.index, ["S_pro", "S_et", "S_va","S_IC","S_IN","S_h2", "X_chain_et"])),
      reactions.index('Uptake of propionate_et')] = [-1-(1-model_parameters['Y_chain_et_pro']) * model_parameters['f_et_pro'],
                                                     (1-model_parameters['Y_chain_et_pro']) * model_parameters['f_et_pro'],
                                                     (1-model_parameters['Y_chain_et_pro']) * model_parameters['f_va_pro'],
                                                     f_IC_pro_et,
                                                     -model_parameters['Y_chain_et_pro'] * model_parameters['N_bac'],
                                                     (1-model_parameters['Y_chain_et_pro']) * (1-model_parameters['f_va_pro']),
                                                     model_parameters['Y_chain_et_pro']]

    S[list(map(species.index, ["S_pro", "S_lac", "S_va", "S_IC", "S_IN", "S_h2", "X_chain_lac"])),
        reactions.index('Uptake of propionate_lac')] = [-1-(1-model_parameters['Y_chain_lac_pro']) * model_parameters['f_lac_pro'],
                                                        (1-model_parameters['Y_chain_lac_pro']) * model_parameters['f_lac_pro'],
                                                        (1-model_parameters['Y_chain_lac_pro']) * model_parameters['f_va_pro'],
                                                        f_IC_pro_lac,
                                                        -model_parameters['Y_chain_lac_pro'] * model_parameters['N_bac'],
                                                        (1-model_parameters['Y_chain_lac_pro']) * (1-model_parameters['f_va_pro']),
                                                        model_parameters['Y_chain_lac_pro']]

    Y_bu_et=0 if nitrogen_limited else model_parameters['Y_bu_et']
    Y_pro_lac=0 if nitrogen_limited else model_parameters['Y_pro_lac']

    f_IC_bu_et = -((-1-(1-Y_bu_et) * model_parameters['f_et_bu'])*model_parameters['C_bu'] +
                   (1-Y_bu_et)*model_parameters['f_et_bu']*model_parameters['C_et'] +
                   (1-Y_bu_et)*model_parameters['f_cap_bu']*model_parameters['C_cap'] +
                   (1-Y_bu_et)*model_parameters['C_bac'])

    f_IC_bu_lac = -((-1-(1-Y_pro_lac) * model_parameters['f_lac_bu'])*model_parameters['C_bu'] +
                    (1-Y_pro_lac)*model_parameters['f_lac_bu']*model_parameters['C_lac'] +
                    (1-Y_pro_lac)*model_parameters['f_cap_bu']*model_parameters['C_cap'] +
                    (1-Y_pro_lac)*model_parameters['C_bac'])

    S[list(map(species.index, ["S_bu", "S_et", "S_cap", "S_IC", "S_IN", "S_h2", "X_chain_et"])),
        reactions.index('Uptake of butyrate_et')] = [-1-(1-Y_bu_et) * model_parameters['f_et_bu'],
                                                     (1-Y_bu_et) * model_parameters['f_et_bu'],
                                                     (1-Y_bu_et) * model_parameters['f_cap_bu'],
                                                     f_IC_bu_et,
                                                     -Y_bu_et * model_parameters['N_bac'],
                                                     (1-Y_bu_et)*(1-model_parameters['f_cap_bu']),
                                                     Y_bu_et]

    S[list(map(species.index, ["S_bu", "S_lac", "S_cap", "S_IC", "S_IN", "S_h2", "X_chain_lac"])),
        reactions.index('Uptake of butyrate_lac')] = [-1-(1-Y_pro_lac) * model_parameters['f_lac_bu'],
                                                      (1-Y_pro_lac) * model_parameters['f_lac_bu'],
                                                      (1-Y_pro_lac) * model_parameters['f_cap_bu'],
                                                      f_IC_bu_lac,
                                                      -Y_pro_lac * model_parameters['N_bac'],
                                                      (1-Y_pro_lac)*(1-model_parameters['f_cap_bu']),
                                                      Y_pro_lac]

    Y_va=0 if nitrogen_limited else model_parameters['Y_va']
    Y_cap=0 if nitrogen_limited else model_parameters['Y_cap']
    S[list(map(species.index, ["S_va", "S_pro", "X_VFA_deg"])),
        reactions.index('Uptake of valerate')] = [-1,
                                                  (1-Y_va),
                                                  Y_va]

    S[list(map(species.index, ["S_cap", "S_ac", "X_VFA_deg"])),
        reactions.index('Uptake of caproate')] = [-1,
                                                  (1 - Y_cap),
                                                  Y_cap]

    S[list(map(species.index, ["S_bu", "S_ac", "X_VFA_deg"])),
        reactions.index('Uptake of butyrate')] = [-1,
                                                  (1 - model_parameters['Y_bu']),
                                                  model_parameters['Y_bu']]

    Y_Me_ac=0 if nitrogen_limited else model_parameters["Y_Me_ac"]
    f_IC_Me_ach2 =0
    f_IC_Me_ach2 = -((1 - model_parameters['Y_h2_ac'])*model_parameters['f_ac_h2']*model_parameters['C_ac']+
                     (1 -Y_Me_ac)*model_parameters['C_ch4']+
                     Y_Me_ac*model_parameters['C_bac'])



    S[list(map(species.index, ["S_h2", "S_ac", "S_ch4", "X_Me_ac", 'S_IC'])),
        reactions.index('Methanogenessis from acetate and h2')] = [-1-(1 - model_parameters['Y_h2_ac'])*model_parameters['f_ac_h2'],
                    (1 - model_parameters['Y_h2_ac'])*model_parameters['f_ac_h2'],
                    (1 -model_parameters['Y_h2_ac']),
                    model_parameters['Y_h2_ac'],
                    f_IC_Me_ach2]

    f_IC_Me_CO2h2 = -(model_parameters['Y_h2_CO2']*model_parameters['C_ch4'] +
                      model_parameters['Y_h2_CO2']*model_parameters['C_bac'])

    S[list(map(species.index, ["S_h2", "S_ch4", "X_Me_CO2", 'S_IC'])),
        reactions.index('Methanogenessis from CO2 and h2')] = [-1,
                                                               (1 - model_parameters['Y_h2_CO2']),
                                                               (model_parameters['Y_h2_CO2']),
                                                               f_IC_Me_CO2h2]

    Y_ac_et_ox=0 if nitrogen_limited else model_parameters["Y_ac_et_ox"]

    f_IC_et_ox=-(-model_parameters['C_et'] +
                    (1-Y_ac_et_ox)*model_parameters['C_bac']
                    +Y_ac_et_ox*model_parameters['C_ac'])

    S[list(map(species.index, ["S_et", "X_et","S_ac","S_IC"])),
        reactions.index('Uptake of ethanol')] = [-1,Y_ac_et_ox,(1-Y_ac_et_ox),f_IC_et_ox]


    Y_pro_lac_ox=0 if nitrogen_limited else model_parameters['Y_pro_lac_ox']
    f_IC_lac_ox=-(-model_parameters['C_lac'] +
                (1-Y_pro_lac_ox)*model_parameters['C_bac']
                +Y_pro_lac_ox*model_parameters['C_pro'])

    S[list(map(species.index, ["S_lac", "X_lac","S_pro","S_IC"])),
        reactions.index('Uptake of lactate')] = [-1, Y_pro_lac_ox,(1-Y_pro_lac_ox),f_IC_lac_ox]

    S[list(map(species.index, ["X_su", "TSS"])),
        reactions.index('Decay of Xsu')] = [-1, 1]

    S[list(map(species.index, ["X_aa", "TSS"])),
        reactions.index('Decay of Xaa')] = [-1, 1]

    S[list(map(species.index, ["X_fa", "TSS"])),
        reactions.index('Decay of Xfa')] = [-1, 1]

    S[list(map(species.index, ["X_ac_et", "TSS"])),
        reactions.index('Decay of X_ac_et')] = [-1, 1]

    S[list(map(species.index, ["X_ac_lac", "TSS"])),
        reactions.index('Decay of X_ac_lac')] = [-1, 1]

    S[list(map(species.index, ["X_chain_et", "TSS"])),
        reactions.index('Decay of X_chain_et')] = [-1, 1]

    S[list(map(species.index, ["X_chain_lac", "TSS"])),
        reactions.index('Decay of X_chain_lac')] = [-1, 1]

    S[list(map(species.index, ["X_VFA_deg", "TSS"])),
        reactions.index('Decay of X_VFA_deg')] = [-1, 1]

    S[list(map(species.index, ["X_Me_ac", "TSS"])),
        reactions.index('Decay of X_Me_ac')] = [-1, 1]

    S[list(map(species.index, ["X_Me_CO2", "TSS"])),
        reactions.index('Decay of X_Me_CO2')] = [-1, 1]

    S[list(map(species.index, ["S_va_ion","S_va"])),
        reactions.index('Acid Base Equilibrium (Va)')] = [-1,1]

    S[list(map(species.index, ["S_bu_ion","S_bu"])),
        reactions.index('Acid Base Equilibrium (Bu)')] = [-1,1]

    S[list(map(species.index, ["S_pro_ion","S_pro"])),
        reactions.index('Acid Base Equilibrium (Pro)')] = [-1,1]

    S[list(map(species.index, ["S_cap_ion","S_cap"])),
        reactions.index('Acid Base Equilibrium (Cap)')] = [-1,1]

    S[list(map(species.index, ["S_lac_ion","S_lac"])),
        reactions.index('Acid Base Equilibrium (Lac)')] = [-1,1]

    S[list(map(species.index, ["S_ac_ion","S_ac"])),
        reactions.index('Acid Base Equilibrium (Ac)')] = [-1,1]

    S[list(map(species.index, ["S_co2", "S_hco3_ion"])),  # I don't think this is right، should look at the reaction in ADM1
        reactions.index('Acid Base Equilibrium (CO2)')] = [-1, 1]

    S[list(map(species.index, ["S_nh3", "S_nh4_ion"])),
        reactions.index('Acid Base Equilibrium (In)')] = [-1, 1]  # I don't think this is right، should look at the reaction in ADM1

    S[list(map(species.index, ["S_h2", "S_gas_h2"])),
        reactions.index('Gas Transfer H2')] = [-base_parameters['V_liq']/base_parameters['V_gas'], 1]
    S[list(map(species.index, ["S_ch4", "S_gas_ch4"])),
        reactions.index('Gas Transfer CH4')] = [-base_parameters['V_liq']/base_parameters['V_gas'], 1]
    S[list(map(species.index, ["S_co2", "S_gas_co2"])),
        reactions.index('Gas Transfer CO2')] = [-base_parameters['V_liq']/base_parameters['V_gas'], 1]
    return S

e_adm_2_ode_sys(t, c, model)

This function is used to build the ODEs of the e-adm2 model.

Parameters:

Name Type Description Default
t float

a matrix of zeros to be filled

required
c np.ndarray

an array of concentrations to be filled

required
Model Model

The model to calculate ODE with

required

Returns:

Type Description
np.ndarray

np.ndarray: The output is dCdt, the change of concentration with respect to time.

Source code in adtoolbox/adm.py
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
def e_adm_2_ode_sys(t: float, c: np.ndarray, model: Model)-> np.ndarray:
    """
    This function is used to build the ODEs of the e-adm2 model.

    Args:
        t (float):a matrix of zeros to be filled
        c (np.ndarray): an array of concentrations to be filled
        Model (Model): The model to calculate ODE with

    Returns:
        np.ndarray: The output is dCdt, the change of concentration with respect to time. 
    """
    ### Initialize the ion concentrations
    # if t==0:
    if t==0:
        c[model.species.index('S_va_ion')]=model.model_parameters['K_a_va']/(model.model_parameters['K_a_va']+c[model.species.index('S_H_ion')])*c[model.species.index('S_va')]
        c[model.species.index('S_bu_ion')]=model.model_parameters['K_a_bu']/(model.model_parameters['K_a_bu']+c[model.species.index('S_H_ion')])*c[model.species.index('S_bu')]
        c[model.species.index('S_pro_ion')]=model.model_parameters['K_a_pro']/(model.model_parameters['K_a_pro']+c[model.species.index('S_H_ion')])*c[model.species.index('S_pro')]
        c[model.species.index('S_cap_ion')]=model.model_parameters['K_a_cap']/(model.model_parameters['K_a_cap']+c[model.species.index('S_H_ion')])*c[model.species.index('S_cap')]
        c[model.species.index('S_ac_ion')]=model.model_parameters['K_a_ac']/(model.model_parameters['K_a_ac']+c[model.species.index('S_H_ion')])*c[model.species.index('S_ac')]
        c[model.species.index('S_lac_ion')]=model.model_parameters['K_a_lac']/(model.model_parameters['K_a_lac']+c[model.species.index('S_H_ion')])*c[model.species.index('S_lac')]    
        c[model.species.index('S_hco3_ion')] = c[model.species.index('S_IC')] - c[model.species.index('S_co2')]
        phi=(model.model_parameters['K_w']/c[model.species.index('S_H_ion')]-c[model.species.index('S_H_ion')])
        c[model.species.index('S_anion')] = c[model.species.index('S_cation')]+c[model.species.index('S_nh4_ion')]-c[model.species.index('S_hco3_ion')]-(c[model.species.index('S_lac_ion')] / 88) - (c[model.species.index('S_ac_ion')] / 64) - (c[model.species.index('S_pro_ion')] /
                                                                                                                                                                     112) - (c[model.species.index('S_bu_ion')] / 160)-(c[model.species.index('S_cap_ion')] / 230) - (c[model.species.index('S_va_ion')] / 208)-phi

    c[model.species.index('S_hco3_ion')] = model.model_parameters['K_a_co2'] * c[model.species.index('S_IC')]/(model.model_parameters['K_a_co2'] + c[model.species.index('S_H_ion')])
    c[model.species.index('S_nh4_ion')]=  model.model_parameters['K_b_nh3'] * c[model.species.index('S_IN')]/(model.model_parameters['K_b_nh3'] + model.base_parameters['K_W'] / c[model.species.index('S_H_ion')])

    c[model.species.index('S_co2')]= c[model.species.index('S_IC')] -  c[model.species.index('S_hco3_ion')]
    c[model.species.index('S_nh3')]= c[model.species.index('S_IN')] - c[model.species.index('S_nh4_ion')]

    if (time.time()-model._be_time )>model.time_limit and model.time_limit!=-1:
        raise Exception("Time limit exceeded")


    I_pH_aa = (model.model_parameters["K_pH_aa"] ** model.model_parameters['nn_aa'])/(np.power(
        c[model.species.index('S_H_ion')], model.model_parameters['nn_aa']) + np.power(model.model_parameters["K_pH_aa"], model.model_parameters['nn_aa']))
    I_pH_ac = (model.model_parameters['K_pH_ac'] ** model.model_parameters["n_ac"])/(
        c[model.species.index('S_H_ion')] ** model.model_parameters['n_ac'] + model.model_parameters['K_pH_ac'] ** model.model_parameters['n_ac'])
    I_pH_pro = (model.model_parameters['K_pH_pro'] ** model.model_parameters["n_pro"])/(
        c[model.species.index('S_H_ion')] ** model.model_parameters['n_pro'] + model.model_parameters['K_pH_pro'] ** model.model_parameters['n_pro'])
    I_pH_bu = (model.model_parameters['K_pH_bu'] ** model.model_parameters["n_bu"])/(
        c[model.species.index('S_H_ion')] ** model.model_parameters['n_bu'] + model.model_parameters['K_pH_bu'] ** model.model_parameters['n_bu'])
    I_pH_va = (model.model_parameters['K_pH_va'] ** model.model_parameters["n_va"])/(
        c[model.species.index('S_H_ion')] ** model.model_parameters['n_va'] + model.model_parameters['K_pH_va'] ** model.model_parameters['n_va'])
    I_pH_cap = (model.model_parameters['K_pH_cap'] ** model.model_parameters["n_cap"])/(
        c[model.species.index('S_H_ion')] ** model.model_parameters['n_cap'] + model.model_parameters['K_pH_cap'] ** model.model_parameters['n_cap'])
    I_pH_h2 = (model.model_parameters['K_pH_h2']**model.model_parameters['n_h2'])/(
        c[model.species.index('S_H_ion')] ** model.model_parameters['n_h2'] + model.model_parameters['K_pH_h2']**model.model_parameters['n_h2'])

    I_IN_lim = 1 / (1+(c[model.species.index('S_IN')] / (model.model_parameters['K_S_IN']+10**-9)))

    I_h2_fa = 1 /  (1+(c[model.species.index('S_h2')] /(model.model_parameters['K_I_h2_fa']+10**-9)))

    I_h2_c4 = 1 /  (1+(c[model.species.index('S_h2')] / (model.model_parameters['K_I_h2_c4']+10**-9)))

    I_h2_pro = 1/  (1+(c[model.species.index('S_h2')] / (model.model_parameters['K_I_h2_pro']+10**-9)))

    I_nh3 =    1/  (1+(c[model.species.index('S_nh3')] / (model.model_parameters['K_I_nh3']+10**-9)))

    I_h2_oxidation=1/(1+(c[model.species.index('S_h2')] / (model.model_parameters['K_I_h2_ox']+10**-9)))

    I5 =    max(0,(I_pH_aa * I_IN_lim))
    I6 =    max(0,I5)
    I7 =    max(0,(I_pH_aa * I_IN_lim * I_h2_fa))
    I8 =    max(0,(I_pH_aa * I_IN_lim * I_h2_c4))
    I9 =    max(0,I8)
    I10 =   max(0,(I_pH_pro * I_IN_lim * I_h2_pro))
    I11 =   max(0,(I_pH_ac * I_IN_lim * I_nh3))
    I12 =   max(0,(I_pH_h2 * I_IN_lim))
    I13 =   max(0,(I_pH_cap * I_IN_lim * I_h2_c4))
    I14 =   max(0,(I_pH_bu * I_IN_lim * I_h2_c4))
    I15 =   max(0,(I_pH_va * I_IN_lim * I_h2_c4))
    I16 =   max(0,I_IN_lim * I_nh3*I_pH_aa*I_h2_oxidation)

    v = np.zeros((len(model.reactions), 1))

    v[model.reactions.index('TSS_Disintegration')] = model.model_parameters["k_dis_TSS"]*c[model.species.index('TSS')]

    v[model.reactions.index('TDS_Disintegration')] = model.model_parameters["k_dis_TDS"]*c[model.species.index('TDS')]

    v[model.reactions.index('Hydrolysis carbohydrates')] = model.model_parameters['k_hyd_ch']*c[model.species.index('X_ch')]

    v[model.reactions.index('Hydrolysis proteins')] = model.model_parameters['k_hyd_pr']*c[model.species.index('X_pr')]

    v[model.reactions.index('Hydrolysis lipids')] = model.model_parameters['k_hyd_li']*c[model.species.index('X_li')]

    v[model.reactions.index('Uptake of sugars')] = model.model_parameters['k_m_su']*c[model.species.index('S_su')] / \
        (model.model_parameters['K_S_su']+c[model.species.index('S_su')])*c[model.species.index('X_su')]*I5

    v[model.reactions.index('Uptake of amino acids')] = model.model_parameters['k_m_aa']*c[model.species.index('S_aa')] / \
        (model.model_parameters['K_S_aa']+c[model.species.index('S_aa')]
         )*c[model.species.index('X_aa')]*I6

    v[model.reactions.index('Uptake of LCFA')] = model.model_parameters['k_m_fa']*c[model.species.index('S_fa')] / \
        (model.model_parameters['K_S_fa'] +
         c[model.species.index('S_fa')])*c[model.species.index('X_fa')]*I7

    v[model.reactions.index('Uptake of acetate_et')] = model.model_parameters['k_m_ac']*c[model.species.index('S_ac')]*c[model.species.index('S_et')] / \
        (model.model_parameters['K_S_ac']*c[model.species.index('S_ac')]+model.model_parameters['K_S_ac_et']*c[model.species.index('S_et')]+c[model.species.index('S_ac')]*c[model.species.index('S_et')]+10**-9
         )*c[model.species.index('X_ac_et')]*I11

    v[model.reactions.index('Uptake of acetate_lac')] = model.model_parameters['k_m_ac']*c[model.species.index('S_ac')]*c[model.species.index('S_lac')] / \
        (model.model_parameters['K_S_ac']*c[model.species.index('S_ac')]+model.model_parameters['K_S_ac_lac']*c[model.species.index('S_lac')]+c[model.species.index('S_ac')]*c[model.species.index('S_lac')]+10**-9
         )*c[model.species.index('X_ac_lac')]*I11

    v[model.reactions.index('Uptake of propionate_et')] = model.model_parameters['k_m_pro']*c[model.species.index('S_pro')]*c[model.species.index('S_et')] / \
        (model.model_parameters['K_S_pro']*c[model.species.index('S_pro')]+model.model_parameters['K_S_pro_et']*c[model.species.index('S_et')]+c[model.species.index('S_pro')]*c[model.species.index('S_et')]+10**-9
         )*c[model.species.index('X_chain_et')]*I10

    v[model.reactions.index('Uptake of propionate_lac')] = model.model_parameters['k_m_pro']*c[model.species.index('S_pro')]*c[model.species.index('S_lac')] / \
        (model.model_parameters['K_S_pro']*c[model.species.index('S_pro')]+model.model_parameters['K_S_pro_lac']*c[model.species.index('S_lac')]+c[model.species.index('S_pro')]*c[model.species.index('S_lac')]+10**-9
         )*c[model.species.index('X_chain_lac')]*I10

    v[model.reactions.index('Uptake of butyrate_et')] = model.model_parameters['k_m_bu']*c[model.species.index('S_bu')]*c[model.species.index('S_et')] / \
        (model.model_parameters['K_S_bu']*c[model.species.index('S_bu')]+model.model_parameters['K_S_bu_et']*c[model.species.index('S_et')]+c[model.species.index('S_bu')]*c[model.species.index('S_et')]+10**-9
         )*c[model.species.index('X_chain_et')]*I14

    v[model.reactions.index('Uptake of butyrate_lac')] = model.model_parameters['k_m_bu']*c[model.species.index('S_bu')]*c[model.species.index('S_lac')] / \
        (model.model_parameters['K_S_bu']*c[model.species.index('S_bu')]+model.model_parameters['K_S_bu_lac']*c[model.species.index('S_lac')]+c[model.species.index('S_bu')]*c[model.species.index('S_lac')]+10**-9
         )*c[model.species.index('X_chain_lac')]*I14

    v[model.reactions.index('Uptake of valerate')] = model.model_parameters['k_m_va']*c[model.species.index('S_va')] / \
        (model.model_parameters['K_S_va']+c[model.species.index('S_va')]
         )*c[model.species.index('X_VFA_deg')]*I15

    v[model.reactions.index('Uptake of caproate')] = model.model_parameters['k_m_cap']*c[model.species.index('S_cap')] / \
        (model.model_parameters['K_S_cap']+c[model.species.index('S_cap')]
         )*c[model.species.index('X_VFA_deg')]*I13

    v[model.reactions.index('Uptake of butyrate')] = model.model_parameters['k_m_bu_deg']*c[model.species.index('S_bu')] / \
        (model.model_parameters['K_S_bu']+c[model.species.index('S_bu')]
         )*c[model.species.index('X_VFA_deg')]*I13

    v[model.reactions.index('Methanogenessis from acetate and h2')] = model.model_parameters['k_m_h2_Me_ac']*c[model.species.index('S_gas_h2')]*c[model.species.index('S_ac')] / \
        (model.model_parameters['K_S_h2_Me_ac']*c[model.species.index('S_gas_h2')]+model.model_parameters['K_S_ac_Me']*c[model.species.index(
            'S_ac')]+c[model.species.index('S_ac')]*c[model.species.index('S_gas_h2')]+10**-9)*c[model.species.index('X_Me_ac')]*I12

    v[model.reactions.index('Methanogenessis from CO2 and h2')] = model.model_parameters['k_m_h2_Me_CO2']*c[model.species.index('S_gas_h2')]*c[model.species.index('S_gas_co2')] / \
        (model.model_parameters['K_S_h2_Me_CO2']*c[model.species.index('S_gas_h2')]+model.model_parameters['K_S_CO2_Me']*c[model.species.index(
            'S_gas_co2')]+c[model.species.index('S_gas_co2')]*c[model.species.index('S_gas_h2')]+10**-9)*c[model.species.index('X_Me_CO2')]*I12


    v[model.reactions.index('Uptake of ethanol')] = model.model_parameters['k_m_et']*c[model.species.index('S_et')] / \
        (model.model_parameters['K_S_et']+c[model.species.index('S_et')]
         )*c[model.species.index("X_et")]*I16

    v[model.reactions.index('Uptake of lactate')] = model.model_parameters['k_m_lac']*c[model.species.index('S_lac')] / \
        (model.model_parameters['K_S_lac']+c[model.species.index('S_lac')]
         )*c[model.species.index('X_lac')]*I16

    v[model.reactions.index('Decay of Xsu')] = model.model_parameters['k_dec_X_su']*c[model.species.index('X_su')]
    v[model.reactions.index('Decay of Xaa')] = model.model_parameters['k_dec_X_aa']*c[model.species.index('X_aa')]
    v[model.reactions.index('Decay of Xfa')] = model.model_parameters['k_dec_X_fa']*c[model.species.index('X_fa')]
    v[model.reactions.index('Decay of X_ac_et')] = model.model_parameters['k_dec_X_ac']*c[model.species.index('X_ac_et')]
    v[model.reactions.index('Decay of X_ac_lac')] = model.model_parameters['k_dec_X_ac']*c[model.species.index('X_ac_lac')]
    v[model.reactions.index('Decay of X_chain_et')] = model.model_parameters['k_dec_X_chain_et']*c[model.species.index('X_chain_et')]
    v[model.reactions.index('Decay of X_chain_lac')] = model.model_parameters['k_dec_X_chain_lac']*c[model.species.index('X_chain_lac')]
    v[model.reactions.index('Decay of X_VFA_deg')] = model.model_parameters['k_dec_X_VFA_deg']*c[model.species.index('X_VFA_deg')]
    v[model.reactions.index('Decay of X_Me_ac')] = model.model_parameters['k_dec_X_Me_ac']*c[model.species.index('X_Me_ac')]
    v[model.reactions.index('Decay of X_Me_CO2')] = model.model_parameters['k_dec_X_Me_CO2']*c[model.species.index('X_Me_CO2')]
    v[model.reactions.index('Decay of Xet')] = model.model_parameters['k_dec_X_et']*c[model.species.index('X_et')]
    v[model.reactions.index('Decay of Xlac')] = model.model_parameters['k_dec_X_lac']*c[model.species.index('X_lac')]


    v[model.reactions.index('Acid Base Equilibrium (Va)')] = model.model_parameters['k_A_B_va'] * \
        (c[model.species.index('S_va_ion')] * (model.model_parameters['K_a_va'] + c[model.species.index('S_H_ion')]) -
         model.model_parameters['K_a_va'] * c[model.species.index('S_va')])
    v[model.reactions.index('Acid Base Equilibrium (Bu)')] = model.model_parameters['k_A_B_bu'] * \
        (c[model.species.index('S_bu_ion')] * (model.model_parameters['K_a_bu'] + c[model.species.index('S_H_ion')]) -
         model.model_parameters['K_a_bu'] * c[model.species.index('S_bu')])
    v[model.reactions.index('Acid Base Equilibrium (Pro)')] = model.model_parameters['k_A_B_pro'] * \
        (c[model.species.index('S_pro_ion')] * (model.model_parameters['K_a_pro'] + c[model.species.index('S_H_ion')]) -
         model.model_parameters['K_a_pro'] * c[model.species.index('S_pro')])
    v[model.reactions.index('Acid Base Equilibrium (Cap)')] = model.model_parameters['k_A_B_cap'] * \
        (c[model.species.index('S_cap_ion')] * (model.model_parameters['K_a_cap'] + c[model.species.index('S_H_ion')]) -
         model.model_parameters['K_a_cap'] * c[model.species.index('S_cap')])
    v[model.reactions.index('Acid Base Equilibrium (Lac)')] = model.model_parameters['k_A_B_lac'] * \
        (c[model.species.index('S_lac_ion')] * (model.model_parameters['K_a_lac'] + c[model.species.index('S_H_ion')]) -
         model.model_parameters['K_a_lac'] * c[model.species.index('S_lac')])
    v[model.reactions.index('Acid Base Equilibrium (Ac)')] = model.model_parameters['k_A_B_ac'] * \
        (c[model.species.index('S_ac_ion')] * (model.model_parameters['K_a_ac'] + c[model.species.index('S_H_ion')]) -
         model.model_parameters['K_a_ac'] * c[model.species.index('S_ac')])
    v[model.reactions.index('Acid Base Equilibrium (CO2)')] = model.model_parameters['k_A_B_co2'] * \
        (c[model.species.index('S_hco3_ion')] * (model.model_parameters['K_a_co2'] + c[model.species.index('S_H_ion')]) -
         model.model_parameters['K_a_co2'] * c[model.species.index('S_IC')])
    v[model.reactions.index('Acid Base Equilibrium (In)')] = model.model_parameters['k_A_B_IN'] * \
        (c[model.species.index('S_nh3')] * (model.model_parameters['K_a_IN'] + c[model.species.index('S_H_ion')]) -
         model.model_parameters['K_a_IN'] * c[model.species.index('S_IC')])


    p_gas_h2 = c[model.species.index('S_gas_h2')] * model.base_parameters["R"] * model.base_parameters["T_op"] / 16
    p_gas_ch4 = c[model.species.index('S_gas_ch4')] * model.base_parameters["R"] * model.base_parameters["T_op"] / 64
    p_gas_co2 = c[model.species.index('S_gas_co2')] * model.base_parameters["R"] * model.base_parameters["T_op"]
    p_gas_h2o = 0.0313 * np.exp(5290 *(1 / model.base_parameters["T_base"] - 1 / model.base_parameters["T_op"]))

    P_gas = p_gas_h2 + p_gas_ch4 + p_gas_co2 + p_gas_h2o

    q_gas = max(0, (model.model_parameters['k_p'] * (P_gas - model.base_parameters['P_atm'])))

    v[model.reactions.index('Gas Transfer H2')] = max(0,model.model_parameters['k_L_a'] * (c[model.species.index('S_h2')] - 16 *model.model_parameters['K_H_h2'] * p_gas_h2))
    v[model.reactions.index('Gas Transfer CH4')] = max(0,model.model_parameters['k_L_a'] * (c[model.species.index('S_ch4')] - 64 * model.model_parameters['K_H_ch4'] * p_gas_ch4))
    v[model.reactions.index('Gas Transfer CO2')] = max(0,model.model_parameters['k_L_a'] * (c[model.species.index('S_co2')] - model.model_parameters['K_H_co2'] * p_gas_co2))

    if c[model.species.index('S_IN')]<0.01:
        model.nitrogen_limited=True
    else:
        model.nitrogen_limited=False

    dCdt = np.matmul(model.s, v)
    phi = c[model.species.index('S_cation')]+c[model.species.index('S_nh4_ion')]-c[model.species.index('S_hco3_ion')]-(c[model.species.index('S_lac_ion')] / 88) - \
    (c[model.species.index('S_ac_ion')] / 64) - (c[model.species.index('S_pro_ion')] / 112) - (c[model.species.index('S_bu_ion')] / 160)-(c[model.species.index('S_cap_ion')] / 230) - (c[model.species.index('S_va_ion')] / 208) - c[model.species.index('S_anion')]

    if 'S_H_ion' in model.control_state.keys():
        c[model.species.index('S_H_ion')]=model.control_state['S_H_ion']
    else:
        c[model.species.index('S_H_ion')] = (-1 * phi / 2) + (0.5 * np.sqrt(phi**2 + 4 * model.model_parameters['K_w']))

    dCdt[0: len(model.species)-3] = dCdt[0: len(model.species)-3]+model.base_parameters['q_in'] / model.base_parameters["V_liq"] * (model.inlet_conditions[0: len(model.species)-3]-c[0: len(model.species)-3].reshape(-1, 1))

    dCdt[len(model.species)-3:] = dCdt[len(model.species)-3:]+q_gas/model.base_parameters["V_gas"] * (model.inlet_conditions[len(model.species)-3:]-c[len(model.species)-3:].reshape(-1, 1))

    dCdt[[model.species.index('S_H_ion'), model.species.index('S_co2'), model.species.index('S_nh4_ion')], 0] = 0

    if model.switch == "DAE":
        dCdt[model.species.index('S_va_ion'):model.species.index('S_co2')] = 0
        dCdt[model.species.index('S_nh3')] = 0
        c[model.species.index('S_va_ion')]=model.model_parameters['K_a_va']/(model.model_parameters['K_a_va']+c[model.species.index('S_H_ion')])*c[model.species.index('S_va')]
        c[model.species.index('S_bu_ion')]=model.model_parameters['K_a_bu']/(model.model_parameters['K_a_bu']+c[model.species.index('S_H_ion')])*c[model.species.index('S_bu')]
        c[model.species.index('S_pro_ion')]=model.model_parameters['K_a_pro']/(model.model_parameters['K_a_pro']+c[model.species.index('S_H_ion')])*c[model.species.index('S_pro')]
        c[model.species.index('S_cap_ion')]=model.model_parameters['K_a_cap']/(model.model_parameters['K_a_cap']+c[model.species.index('S_H_ion')])*c[model.species.index('S_cap')]
        c[model.species.index('S_ac_ion')]=model.model_parameters['K_a_ac']/(model.model_parameters['K_a_ac']+c[model.species.index('S_H_ion')])*c[model.species.index('S_ac')]
        c[model.species.index('S_lac_ion')]=model.model_parameters['K_a_lac']/(model.model_parameters['K_a_lac']+c[model.species.index('S_H_ion')])*c[model.species.index('S_lac')]    
        c[model.species.index('S_hco3_ion')] = c[model.species.index('S_IC')] - c[model.species.index('S_co2')]




    if model.control_state.keys():
        for state in model.control_state.keys():
            c[model.species.index(state)]=model.control_state[state]
            dCdt[model.species.index(state)]=0

    model.info["Fluxes"]=v
    return dCdt[:, 0]

e_adm_ode_sys(t, c, model)

This function is used to build the ODEs of the e_adm model.

Parameters:

Name Type Description Default
t float

a matrix of zeros to be filled

required
c np.ndarray

an array of concentrations to be filled

required
Model Model

The model to calculate ODE with

required

Returns:

Type Description
np.ndarray

np.ndarray: The output is dCdt, the change of concentration with respect to time.

Source code in adtoolbox/adm.py
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
def e_adm_ode_sys(t: float, c: np.ndarray, model: Model)-> np.ndarray:
    """
    This function is used to build the ODEs of the e_adm model.

    Args:
        t (float):a matrix of zeros to be filled
        c (np.ndarray): an array of concentrations to be filled
        Model (Model): The model to calculate ODE with

    Returns:
        np.ndarray: The output is dCdt, the change of concentration with respect to time. 
    """
    c[c<0]=0
    c[model.species.index('S_H_ion')]=0.000001
    if model.switch == "DAE":

        c[model.species.index('S_va_ion')]=model.model_parameters['K_a_va']/(model.model_parameters['K_a_va']+c[model.species.index('S_H_ion')])*c[model.species.index('S_va')]

        c[model.species.index('S_bu_ion')]=model.model_parameters['K_a_bu']/(model.model_parameters['K_a_bu']+c[model.species.index('S_H_ion')])*c[model.species.index('S_bu')]

        c[model.species.index('S_pro_ion')]=model.model_parameters['K_a_pro']/(model.model_parameters['K_a_pro']+c[model.species.index('S_H_ion')])*c[model.species.index('S_pro')]

        c[model.species.index('S_cap_ion')]=model.model_parameters['K_a_cap']/(model.model_parameters['K_a_cap']+c[model.species.index('S_H_ion')])*c[model.species.index('S_cap')]

        c[model.species.index('S_ac_ion')]=model.model_parameters['K_a_ac']/(model.model_parameters['K_a_ac']+c[model.species.index('S_H_ion')])*c[model.species.index('S_ac')]

        c[model.species.index('S_lac_ion')]=model.model_parameters['K_a_lac']/(model.model_parameters['K_a_lac']+c[model.species.index('S_H_ion')])*c[model.species.index('S_lac')]
    else: 
        v[model.reactions.index('Acid Base Equilibrium (Va)')] = model.model_parameters['k_A_B_va'] * \
            (c[model.species.index('S_va_ion')] * (model.model_parameters['K_a_va'] + c[model.species.index('S_H_ion')]) -
             model.model_parameters['K_a_va'] * c[model.species.index('S_va')])

        v[model.reactions.index('Acid Base Equilibrium (Bu)')] = model.model_parameters['k_A_B_bu'] * \
            (c[model.species.index('S_bu_ion')] * (model.model_parameters['K_a_bu'] + c[model.species.index('S_H_ion')]) -
             model.model_parameters['K_a_bu'] * c[model.species.index('S_bu')])

        v[model.reactions.index('Acid Base Equilibrium (Pro)')] = model.model_parameters['k_A_B_pro'] * \
            (c[model.species.index('S_pro_ion')] * (model.model_parameters['K_a_pro'] + c[model.species.index('S_H_ion')]) -
             model.model_parameters['K_a_pro'] * c[model.species.index('S_pro')])

        v[model.reactions.index('Acid Base Equilibrium (Cap)')] = model.model_parameters['k_A_B_cap'] * \
            (c[model.species.index('S_cap_ion')] * (model.model_parameters['K_a_cap'] + c[model.species.index('S_H_ion')]) -
             model.model_parameters['K_a_cap'] * c[model.species.index('S_cap')])

        v[model.reactions.index('Acid Base Equilibrium (Lac)')] = model.model_parameters['k_A_B_lac'] * \
            (c[model.species.index('S_lac_ion')] * (model.model_parameters['K_a_lac'] + c[model.species.index('S_H_ion')]) -
             model.model_parameters['K_a_lac'] * c[model.species.index('S_lac')])

        v[model.reactions.index('Acid Base Equilibrium (Ac)')] = model.model_parameters['k_A_B_ac'] * \
            (c[model.species.index('S_ac_ion')] * (model.model_parameters['K_a_ac'] + c[model.species.index('S_H_ion')]) -
             model.model_parameters['K_a_ac'] * c[model.species.index('S_ac')])

        v[model.reactions.index('Acid Base Equilibrium (CO2)')] = model.model_parameters['k_A_B_co2'] * \
            (c[model.species.index('S_hco3_ion')] * (model.model_parameters['K_a_co2'] + c[model.species.index('S_H_ion')]) -
             model.model_parameters['K_a_co2'] * c[model.species.index('S_IC')])

        v[model.reactions.index('Acid Base Equilibrium (In)')] = model.model_parameters['k_A_B_IN'] * \
            (c[model.species.index('S_nh3')] * (model.model_parameters['K_a_IN'] + c[model.species.index('S_H_ion')]) -
             model.model_parameters['K_a_IN'] * c[model.species.index('S_IC')])

    c[model.species.index('S_nh4_ion')] = c[model.species.index(
        'S_IN')] - c[model.species.index('S_nh3')]
    c[model.species.index('S_co2')] = c[model.species.index(
        'S_IC')] - c[model.species.index('S_hco3_ion')]
    I_pH_aa = (model.model_parameters["K_pH_aa"] ** model.model_parameters['nn_aa'])/(np.power(
        c[model.species.index('S_H_ion')], model.model_parameters['nn_aa']) + np.power(model.model_parameters["K_pH_aa"], model.model_parameters['nn_aa']))

    I_pH_ac = (model.model_parameters['K_pH_ac'] ** model.model_parameters["n_ac"])/(
        c[model.species.index('S_H_ion')] ** model.model_parameters['n_ac'] + model.model_parameters['K_pH_ac'] ** model.model_parameters['n_ac'])

    I_pH_pro = (model.model_parameters['K_pH_pro'] ** model.model_parameters["n_pro"])/(
        c[model.species.index('S_H_ion')] ** model.model_parameters['n_pro'] + model.model_parameters['K_pH_pro'] ** model.model_parameters['n_pro'])

    I_pH_bu = (model.model_parameters['K_pH_bu'] ** model.model_parameters["n_bu"])/(
        c[model.species.index('S_H_ion')] ** model.model_parameters['n_bu'] + model.model_parameters['K_pH_bu'] ** model.model_parameters['n_bu'])

    I_pH_va = (model.model_parameters['K_pH_va'] ** model.model_parameters["n_va"])/(
        c[model.species.index('S_H_ion')] ** model.model_parameters['n_va'] + model.model_parameters['K_pH_va'] ** model.model_parameters['n_va'])

    I_pH_cap = (model.model_parameters['K_pH_cap'] ** model.model_parameters["n_cap"])/(
        c[model.species.index('S_H_ion')] ** model.model_parameters['n_cap'] + model.model_parameters['K_pH_cap'] ** model.model_parameters['n_cap'])

    I_pH_h2 = (model.model_parameters['K_pH_h2']**model.model_parameters['n_h2'])/(
        c[model.species.index('S_H_ion')] ** model.model_parameters['n_h2'] + model.model_parameters['K_pH_h2']**model.model_parameters['n_h2'])

    I_IN_lim = 1 / \
        (1+(model.model_parameters['K_S_IN'] / (c[model.species.index('S_IN')]+10**-9)))

    I_h2_fa = 1 / (1+(c[model.species.index('S_h2')] /
                   (model.model_parameters['K_I_h2_fa']+10**-9)))

    I_h2_c4 = 1 / (1+(c[model.species.index('S_h2')] /
                   (model.model_parameters['K_I_h2_c4']+10**-9)))

    I_h2_pro = (1/(1+(c[model.species.index('S_h2')] /
                (model.model_parameters['K_I_h2_pro']+10**-9))))

    I_nh3 = 1/(1+(c[model.species.index('S_nh3')] /
               (model.model_parameters['K_I_nh3']+10**-9)))

    I_h2_oxidation=(1/(1+(c[model.species.index('S_h2')] /
                (model.model_parameters['K_I_h2_ox']+10**-9))))

    # I5 = (I_pH_aa * I_IN_lim)
    # I6 = I5.copy()
    # I7 = (I_pH_aa * I_IN_lim * I_h2_fa)
    # I8 = (I_pH_aa * I_IN_lim * I_h2_c4)
    # I9 = I8.copy()
    # I10 = (I_pH_pro * I_IN_lim * I_h2_pro)
    # I11 = (I_pH_ac * I_IN_lim * I_nh3)
    # I12 = (I_pH_h2 * I_IN_lim)
    # I13 = (I_pH_cap * I_IN_lim * I_h2_c4)
    # I14 = (I_pH_bu * I_IN_lim * I_h2_c4)
    # I15 = (I_pH_va * I_IN_lim * I_h2_c4)
    # I16 = I_IN_lim * I_nh3*I_pH_aa*I_h2_oxidation
    I5  = 1
    I6  = 1
    I7  = 1
    I8  = 1
    I9  = 1 #one
    I10 = 1
    I11 = 1
    I12 = 1
    I13 = 1
    I14 = 1
    I15 = 1
    I16 = 1

    v = np.zeros((len(model.reactions), 1))

    v[model.reactions.index(
        'TSS_Disintegration')] = model.model_parameters["k_dis_TSS"]*c[model.species.index('TSS')]

    v[model.reactions.index(
        'TDS_Disintegration')] = model.model_parameters["k_dis_TDS"]*c[model.species.index('TDS')]

    v[model.reactions.index('Hydrolysis carbohydrates')
      ] = model.model_parameters['k_hyd_ch']*c[model.species.index('X_ch')]

    v[model.reactions.index('Hydrolysis proteins')
      ] = model.model_parameters['k_hyd_pr']*c[model.species.index('X_pr')]

    v[model.reactions.index('Hydrolysis lipids')
      ] = model.model_parameters['k_hyd_li']*c[model.species.index('X_li')]

    v[model.reactions.index('Su_to_et')] = model.model_parameters['k_m_su_et']*c[model.species.index('S_su')] / \
        (model.model_parameters['K_S_su_et']+c[model.species.index('S_su')])*c[model.species.index('X_su')]*I5

    v[model.reactions.index('Su_to_lac')] = model.model_parameters['k_m_su_lac']*c[model.species.index('S_su')] / \
        (model.model_parameters['K_S_su_lac']+c[model.species.index('S_su')]
         )*c[model.species.index('X_su')]/(c[model.species.index('X_su')]+model.model_parameters['K_S_X_su_lac'])*I5    

    v[model.reactions.index('Su_to_ac')] = model.model_parameters['k_m_su_ac']*c[model.species.index('S_su')] / \
        (model.model_parameters['K_S_su_ac']+c[model.species.index('S_su')]
         )*c[model.species.index('X_su')]*I5

    v[model.reactions.index('Su_to_pro')] = model.model_parameters['k_m_su_pro']*c[model.species.index('S_su')] / \
        (model.model_parameters['K_S_su_pro']+c[model.species.index('S_su')]
         )*c[model.species.index('X_su')]/(c[model.species.index('X_su')]+model.model_parameters['K_S_X_su_pro'])*I5        


    v[model.reactions.index('aas_to_lac')] = model.model_parameters['k_m_aa_lac']*c[model.species.index('S_aa')] / \
        (model.model_parameters['K_S_aa_lac']+c[model.species.index('S_aa')]
         )*c[model.species.index('X_aa')]/(c[model.species.index('X_aa')]+model.model_parameters['K_S_X_aa_lac'])*I6

    v[model.reactions.index('aas_to_pro')] = model.model_parameters['k_m_aa_pro']*c[model.species.index('S_aa')] / \
        (model.model_parameters['K_S_aa_pro']+c[model.species.index('S_aa')]
         )*c[model.species.index('X_aa')]/(c[model.species.index('X_aa')]+model.model_parameters['K_S_X_aa_pro'])*I6

    v[model.reactions.index('aas_to_ac')] = model.model_parameters['k_m_aa_ac']*c[model.species.index('S_aa')] / \
        (model.model_parameters['K_S_aa_ac']+c[model.species.index('S_aa')]
         )*c[model.species.index('X_aa')]/(c[model.species.index('X_aa')]+model.model_parameters['K_S_X_aa_ac'])*I6

    v[model.reactions.index('Uptake of LCFA')] = model.model_parameters['k_m_fa']*c[model.species.index('S_fa')] / \
        (model.model_parameters['K_S_fa'] +
         c[model.species.index('S_fa')])*c[model.species.index('X_fa')]/(c[model.species.index('X_fa')]+model.model_parameters['K_S_X_fa'])*I7

    v[model.reactions.index('Uptake of acetate_et')] = model.model_parameters['k_m_ac_et']*c[model.species.index('S_ac')]*c[model.species.index('S_et')] / \
        (model.model_parameters['K_S_ac']*c[model.species.index('S_ac')]+model.model_parameters['K_S_ac_et']*c[model.species.index('S_et')]+c[model.species.index('S_ac')]*c[model.species.index('S_et')]+10**-9
         )*c[model.species.index('X_ac_et')]/(c[model.species.index('X_ac_et')]+model.model_parameters['K_S_X_ac_et'])*I11

    v[model.reactions.index('Uptake of acetate_lac')] = model.model_parameters['k_m_ac_lac']*c[model.species.index('S_ac')]*c[model.species.index('S_lac')] / \
        (model.model_parameters['K_S_ac']*c[model.species.index('S_ac')]+model.model_parameters['K_S_ac_lac']*c[model.species.index('S_lac')]+c[model.species.index('S_ac')]*c[model.species.index('S_lac')]+10**-9
         )*c[model.species.index('X_ac_lac')]/(c[model.species.index('X_ac_lac')]+model.model_parameters['K_S_X_ac_lac'])*I11

    v[model.reactions.index('Uptake of propionate_et')] = model.model_parameters['k_m_pro_et']*c[model.species.index('S_pro')]*c[model.species.index('S_et')] / \
        (model.model_parameters['K_S_pro']*c[model.species.index('S_pro')]+model.model_parameters['K_S_pro_et']*c[model.species.index('S_et')]+c[model.species.index('S_pro')]*c[model.species.index('S_et')]+10**-9
         )*c[model.species.index('X_chain_et')]/(c[model.species.index('X_chain_et')]+model.model_parameters['K_S_X_chain_et'])*I10

    v[model.reactions.index('Uptake of propionate_lac')] = model.model_parameters['k_m_pro_lac']*c[model.species.index('S_pro')]*c[model.species.index('S_lac')] / \
        (model.model_parameters['K_S_pro']*c[model.species.index('S_pro')]+model.model_parameters['K_S_pro_lac']*c[model.species.index('S_lac')]+c[model.species.index('S_pro')]*c[model.species.index('S_lac')]+10**-9
         )*c[model.species.index('X_chain_lac')]/(c[model.species.index('X_chain_lac')]+model.model_parameters['K_S_X_chain_lac'])*I10

    v[model.reactions.index('Uptake of butyrate_et')] = model.model_parameters['k_m_bu_et']*c[model.species.index('S_bu')]*c[model.species.index('S_et')] / \
        (model.model_parameters['K_S_bu']*c[model.species.index('S_bu')]+model.model_parameters['K_S_bu_et']*c[model.species.index('S_et')]+c[model.species.index('S_bu')]*c[model.species.index('S_et')]+10**-9
         )*c[model.species.index('X_chain_et')]*I14

    v[model.reactions.index('Uptake of butyrate_lac')] = model.model_parameters['k_m_bu_lac']*c[model.species.index('S_bu')]*c[model.species.index('S_lac')] / \
        (model.model_parameters['K_S_bu']*c[model.species.index('S_bu')]+model.model_parameters['K_S_bu_lac']*c[model.species.index('S_lac')]+c[model.species.index('S_bu')]*c[model.species.index('S_lac')]+10**-9
         )*c[model.species.index('X_chain_lac')]/(c[model.species.index('X_chain_lac')]+model.model_parameters['K_S_X_chain_lac'])*I14

    v[model.reactions.index('Uptake of butyrate')] = model.model_parameters['k_m_bu']*c[model.species.index('S_bu')]/ \
        (model.model_parameters['K_S_bu']+c[model.species.index('S_bu')])*c[model.species.index('X_VFA_deg')]/(c[model.species.index('X_VFA_deg')]+model.model_parameters['K_S_X_VFA_deg'])*I14

    v[model.reactions.index('Uptake of valerate')] = model.model_parameters['k_m_va']*c[model.species.index('S_va')] / \
        (model.model_parameters['K_S_va']+c[model.species.index('S_va')])*c[model.species.index('X_VFA_deg')]/(c[model.species.index('X_VFA_deg')]+model.model_parameters['K_S_X_VFA_deg'])*I15

    v[model.reactions.index('Uptake of caproate')] = model.model_parameters['k_m_cap']*c[model.species.index('S_cap')] / \
        (model.model_parameters['K_S_cap']+c[model.species.index('S_cap')])*c[model.species.index('X_VFA_deg')]/(c[model.species.index('X_VFA_deg')]+model.model_parameters['K_S_X_VFA_deg'])*I13

    v[model.reactions.index('Methanogenessis from acetate and h2')] = model.model_parameters['k_m_h2_Me_ac']*c[model.species.index('S_h2')]*c[model.species.index('S_ac')] / \
        (model.model_parameters['K_S_h2_Me_ac']*c[model.species.index('S_h2')]+model.model_parameters['K_S_ac_Me']*c[model.species.index(
            'S_ac')]+c[model.species.index('S_ac')]*c[model.species.index('S_h2')]+10**-9)*c[model.species.index('X_Me_ac')]*I12

    v[model.reactions.index('Methanogenessis from CO2 and h2')] = model.model_parameters['k_m_h2_Me_CO2']*c[model.species.index('S_h2')]*c[model.species.index('S_co2')] / \
        (model.model_parameters['K_S_h2_Me_CO2']*c[model.species.index('S_h2')]+model.model_parameters['K_S_CO2_Me']*c[model.species.index(
            'S_co2')]+c[model.species.index('S_co2')]*c[model.species.index('S_h2')]+10**-9)*c[model.species.index('X_Me_CO2')]*I12


    v[model.reactions.index('Uptake of ethanol')] = model.model_parameters['k_m_et']*c[model.species.index('S_et')] / \
        (model.model_parameters['K_S_et']+c[model.species.index('S_et')]
         )*c[model.species.index("X_et")]/(c[model.species.index("X_et")]+model.model_parameters['K_S_X_et'])*I16

    v[model.reactions.index('Uptake of lactate')] = model.model_parameters['k_m_lac']*c[model.species.index('S_lac')] / \
        (model.model_parameters['K_S_lac']+c[model.species.index('S_lac')]
         )*c[model.species.index('X_lac')]/(c[model.species.index('X_lac')]+model.model_parameters['K_S_X_lac'])*I16

    v[model.reactions.index(
        'Decay of Xsu')] = model.model_parameters['k_dec_X_su']*c[model.species.index('X_su')]

    v[model.reactions.index(
        'Decay of Xaa')] = model.model_parameters['k_dec_X_aa']*c[model.species.index('X_aa')]

    v[model.reactions.index(
        'Decay of Xfa')] = model.model_parameters['k_dec_X_fa']*c[model.species.index('X_fa')]

    v[model.reactions.index(
        'Decay of X_ac_et')] = model.model_parameters['k_dec_X_ac']*c[model.species.index('X_ac_et')]

    v[model.reactions.index(
        'Decay of X_ac_lac')] = model.model_parameters['k_dec_X_ac']*c[model.species.index('X_ac_lac')]

    v[model.reactions.index(
        'Decay of X_chain_et')] = model.model_parameters['k_dec_X_chain_et']*c[model.species.index('X_chain_et')]

    v[model.reactions.index('Decay of X_chain_lac')
      ] = model.model_parameters['k_dec_X_chain_lac']*c[model.species.index('X_chain_lac')]

    v[model.reactions.index(
        'Decay of X_VFA_deg')] = model.model_parameters['k_dec_X_VFA_deg']*c[model.species.index('X_VFA_deg')]

    v[model.reactions.index(
        'Decay of X_Me_ac')] = model.model_parameters['k_dec_X_Me_ac']*c[model.species.index('X_Me_ac')]

    v[model.reactions.index(
        'Decay of X_Me_CO2')] = model.model_parameters['k_dec_X_Me_CO2']*c[model.species.index('X_Me_CO2')]

    v[model.reactions.index(
        'Decay of Xet')] = model.model_parameters['k_dec_X_et']*c[model.species.index('X_et')]

    v[model.reactions.index(
        'Decay of Xlac')] = model.model_parameters['k_dec_X_lac']*c[model.species.index('X_lac')]



    p_gas_h2 = c[model.species.index('S_gas_h2')] * model.base_parameters["R"] * \
        model.base_parameters["T_op"] / 16
    p_gas_ch4 = c[model.species.index('S_gas_ch4')] * model.base_parameters["R"] * \
        model.base_parameters["T_op"] / 64
    p_gas_co2 = c[model.species.index('S_gas_co2')] * model.base_parameters["R"] * \
        model.base_parameters["T_op"]
    p_gas_h2o = 0.0313 * \
        np.exp(5290 *
               (1 / model.base_parameters["T_base"] - 1 / model.base_parameters["T_op"]))
    P_gas = p_gas_h2 + p_gas_ch4 + p_gas_co2 + p_gas_h2o
    q_gas = max(
        0, (model.model_parameters['k_p'] * (P_gas - model.base_parameters['P_atm'])))
    v[model.reactions.index('Gas Transfer H2')] = model.model_parameters['k_L_a'] * \
        (c[model.species.index('S_h2')] - 16 *
         model.model_parameters['K_H_h2'] * p_gas_h2)

    v[model.reactions.index('Gas Transfer CH4')] = max(0,model.model_parameters['k_L_a'] * \
        (c[model.species.index('S_ch4')] - 64 *
         model.model_parameters['K_H_ch4'] * p_gas_ch4))
    v[model.reactions.index('Gas Transfer CO2')] = max(0,model.model_parameters['k_L_a'] * \
        (c[model.species.index('S_co2')] -
         model.model_parameters['K_H_co2'] * p_gas_co2))

    dCdt = np.matmul(model.s, v)

    phi = c[model.species.index('S_cation')]+c[model.species.index('S_nh4_ion')]-c[model.species.index('S_hco3_ion')]-(c[model.species.index('S_lac_ion')] / 88) - (c[model.species.index('S_ac_ion')] / 64) - (c[model.species.index('S_pro_ion')] /
                                                                                                                                                                     112) - (c[model.species.index('S_bu_ion')] / 160)-(c[model.species.index('S_cap_ion')] / 230) - (c[model.species.index('S_va_ion')] / 208) - c[model.species.index('S_anion')]
    if 'S_H_ion' in model.control_state.keys():
        c[model.species.index('S_H_ion')]=model.control_state['S_H_ion']
    else:
        c[model.species.index('S_H_ion')] = (-1 * phi / 2) + \
        (0.5 * np.sqrt(phi**2 + 4 * model.model_parameters['K_w']))

    dCdt[0: model.species.__len__()-3] = dCdt[0: model.species.__len__()-3]+model.base_parameters['q_in'] / \
        model.base_parameters["V_liq"] * \
        (model.inlet_conditions[0: model.species.__len__(
        )-3]-c[0: model.species.__len__()-3].reshape(-1, 1))

    dCdt[model.species.__len__()-3:] = dCdt[model.species.__len__()-3:]+q_gas/model.base_parameters["V_gas"] * \
        (model.inlet_conditions[model.species.__len__() -
         3:]-c[model.species.__len__()-3:].reshape(-1, 1))

    dCdt[[model.species.index('S_H_ion'), model.species.index(
        'S_co2'), model.species.index('S_nh4_ion')], 0] = 0

    if c[model.species.index('S_IN')]<0.01:
        model.nitrogen_limited=True
    else:
        model.nitrogen_limited=False

    if model.switch == "DAE":
        # dCdt[model.species.index('S_h2')] = 0

        dCdt[model.species.index('S_va_ion'):model.species.index('S_co2')] = 0

        dCdt[model.species.index('S_nh3')] = 0

    if model.control_state.keys():
        for state in model.control_state.keys():
            c[model.species.index(state)]=model.control_state[state]
            dCdt[model.species.index(state)]=0
    model.info["Fluxes"]=v
    return dCdt[:, 0]