Commit 9c48d7ed authored by Oriol Guitart's avatar Oriol Guitart
Browse files

Improve notebook and remove unused steps

parent 982b8af9
Pipeline #787 passed with stages
in 7 minutes and 9 seconds
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
# How to load new libraries to Chemical Checker infrastructure # How to load new libraries to Chemical Checker infrastructure
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
## Load library files to datasource table ## Load library files to datasource table
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
import os import os
import sys import sys
sys.path.append("/aloy/home/oguitart/projects/source/chemical_checker/chemicalchecker") sys.path.append("/aloy/home/oguitart/projects/source/chemical_checker/chemicalchecker")
os.environ["CC_CONFIG"] = '/aloy/home/oguitart/projects/source/chemical_checker/chemicalchecker/tests/data/config.json' os.environ["CC_CONFIG"] = '/aloy/home/oguitart/projects/source/chemical_checker/chemicalchecker/tests/data/config.json'
from chemicalchecker.util import Config from chemicalchecker.util import Config
from chemicalchecker.database import Datasource from chemicalchecker.database import Datasource
biur_real = {'name':"biur_real",'url':"file:///aloy/scratch/sbnb-adm/checker/local_downloads/BIUR_20100326.sdf", biur_real = {'name':"biur_real",'url':"file:///aloy/scratch/sbnb-adm/checker/local_downloads/BIUR_20100326.sdf",
'permanent':True,'enabled':False,'user':"",'password':"",'description':"ChemistryX Biur real library", 'permanent':True,'enabled':False,'user':"",'password':"",'description':"ChemistryX Biur real library",
'molrepo_name':"biur_real",'molrepo_file':"BIUR_20100326.sdf",'is_db':False} 'molrepo_name':"biur_real",'molrepo_file':"BIUR_20100326.sdf",'is_db':False}
biur_virtual = {'name':"biur_virtual",'url':"file:///aloy/scratch/sbnb-adm/checker/local_downloads/VIRTUAL_BIUR_POR_MW.tar.gz", biur_virtual = {'name':"biur_virtual",'url':"file:///aloy/scratch/sbnb-adm/checker/local_downloads/VIRTUAL_BIUR_POR_MW.tar.gz",
'permanent':True,'enabled':False,'user':"",'password':"",'description':"ChemistryX Biur virtual library", 'permanent':True,'enabled':False,'user':"",'password':"",'description':"ChemistryX Biur virtual library",
'molrepo_name':"biur_virtual",'molrepo_file':"VIRTUAL_BIUR_POR_MW",'is_db':False} 'molrepo_name':"biur_virtual",'molrepo_file':"VIRTUAL_BIUR_POR_MW",'is_db':False}
Datasource.add(biur_real) Datasource.add(biur_real)
Datasource.add(biur_virtual) Datasource.add(biur_virtual)
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
## Add Library parser
Check the *package/chemicalchecker/util/parser/parser.py* file and add a method for each one of the new libraries to parse those libraries.
The new method to parse the library should have the same name as the **molrepo_name** used in the datasource table. The output of the method should be the same as the other methods used in the **parser.py** file.
%% Cell type:markdown id: tags:
## Download & parse datasource associated to library "biur_real" ## Download & parse datasource associated to library "biur_real"
1. Download the datasource files 1. Download the datasource files
2. Parse downloaded files and load it to the molrepo table 2. Parse downloaded files and load it to the molrepo table
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
import os import os
import sys import sys
sys.path.append("/aloy/home/oguitart/projects/source/chemical_checker/chemicalchecker") sys.path.append("/aloy/home/oguitart/projects/source/chemical_checker/chemicalchecker")
os.environ["CC_CONFIG"] = '/aloy/home/oguitart/projects/source/chemical_checker/chemicalchecker/tests/data/config.json' os.environ["CC_CONFIG"] = '/aloy/home/oguitart/projects/source/chemical_checker/chemicalchecker/tests/data/config.json'
from chemicalchecker.util import Config from chemicalchecker.util import Config
from chemicalchecker.database import Datasource, Molrepo from chemicalchecker.database import Datasource, Molrepo
# This method downloads all datasources related to this library # This method downloads all datasources related to this library
# and parse the files to load the new data to the molrepo table # and parse the files to load the new data to the molrepo table
Molrepo.from_molrepo_name("biur_real") Molrepo.from_molrepo_name("biur_real")
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
## Download datasource associated to library "biur_virtual" ## Download datasource associated to library "biur_virtual"
1. Download the datasource files 1. Download the datasource files
2. Parse downloaded files and load it to the molrepo table 2. Parse downloaded files and load it to the molrepo table
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
import os import os
import sys import sys
sys.path.append("/aloy/home/oguitart/projects/source/chemical_checker/chemicalchecker") sys.path.append("/aloy/home/oguitart/projects/source/chemical_checker/chemicalchecker")
os.environ["CC_CONFIG"] = '/aloy/home/oguitart/projects/source/chemical_checker/chemicalchecker/tests/data/config.json' os.environ["CC_CONFIG"] = '/aloy/home/oguitart/projects/source/chemical_checker/chemicalchecker/tests/data/config.json'
from chemicalchecker.util import Config from chemicalchecker.util import Config
from chemicalchecker.database import Datasource, Molrepo from chemicalchecker.database import Datasource, Molrepo
# This method downloads all datasources related to this library # This method downloads all datasources related to this library
# and parse the files to load the new data to the molrepo table # and parse the files to load the new data to the molrepo table
Molrepo.from_molrepo_name("biur_virtual") Molrepo.from_molrepo_name("biur_virtual")
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
## Add libraries to the structure table
Get the inchikey-inchi pairs of the new libraries and add this information to the table structure. But only the molecules that are not already present in that table
%% Cell type:code id: tags:
``` python
import os
import sys
import shutil
sys.path.append("/aloy/home/oguitart/projects/source/chemical_checker/chemicalchecker")
os.environ["CC_CONFIG"] = '/aloy/home/oguitart/projects/source/chemical_checker/chemicalchecker/tests/data/config.json'
from chemicalchecker.database import Structure, Molrepo
# This method queries the molrepo table to get the fields that we want for the specified molrepo
molrepo_ik_inchi = Molrepo.get_fields_by_molrepo_name("biur_real",["inchikey","inchi"])
# This method add the inchikey-inchi pairs only for the molecules not present in the table
Structure.add_missing_only(molrepo_ik_inchi)
# This method queries the molrepo table to get the fields that we want for the specified molrepo
molrepo_ik_inchi = Molrepo.get_fields_by_molrepo_name("biur_virtual",["inchikey","inchi"])
# This method add the inchikey-inchi pairs only for the molecules not present in the table
Structure.add_missing_only(molrepo_ik_inchi)
```
%% Cell type:markdown id: tags:
## Calculate the molecular properties and store them for library "biur_real" ## Calculate the molecular properties and store them for library "biur_real"
It will also add the missing molecules to the structure table
It will create jobs to do this task in the cluster. It will create jobs to do this task in the cluster.
It waits for a molecular property calculation to finish before submitting another one. It waits for a molecular property calculation to finish before submitting another one.
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
import os import os
import sys import sys
import shutil import shutil
sys.path.append("/aloy/home/oguitart/projects/source/chemical_checker/chemicalchecker") sys.path.append("/aloy/home/oguitart/projects/source/chemical_checker/chemicalchecker")
os.environ["CC_CONFIG"] = '/aloy/home/oguitart/projects/source/chemical_checker/chemicalchecker/tests/data/config.json' os.environ["CC_CONFIG"] = '/aloy/home/oguitart/projects/source/chemical_checker/chemicalchecker/tests/data/config.json'
from chemicalchecker.database import Molprop, Structure from chemicalchecker.database import Molprop, Structure
from chemicalchecker.database import Molrepo from chemicalchecker.database import Molrepo
from chemicalchecker.util import HPC from chemicalchecker.util import HPC
from chemicalchecker.util import Config from chemicalchecker.util import Config
molprops = ['fp2d','fp3d','subskeys','scaffolds','physchem'] molprops = ['fp2d','fp3d','subskeys','scaffolds','physchem']
molrepo_ik_inchi = Molrepo.get_fields_by_molrepo_name("biur_real",["inchikey","inchi"]) molrepo_ik_inchi = Molrepo.get_fields_by_molrepo_name("biur_real",["inchikey","inchi"])
for mol in molprops: for mol in molprops:
job_path = "/aloy/scratch/oguitart/jobs_molprop_" + mol job_path = "/aloy/scratch/oguitart/jobs_molprop_" + mol
if os.path.isdir(job_path): if os.path.isdir(job_path):
shutil.rmtree(job_path) shutil.rmtree(job_path)
os.mkdir(job_path) os.mkdir(job_path)
molprop = Molprop(mol) molprop = Molprop(mol)
# This method sends the job and waits for the job to finish # This method sends the job and waits for the job to finish
cluster = molprop.molprop_hpc(job_path,molrepo_ik_inchi) cluster = molprop.molprop_hpc(job_path,molrepo_ik_inchi)
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
## Calculate the molecular properties and store them for library "biur_virtual" ## Calculate the molecular properties and store them for library "biur_virtual"
It will create jobs to do this task in the cluster. It will create jobs to do this task in the cluster.
It waits for a molecular property calculation to finish before submitting another one. It waits for a molecular property calculation to finish before submitting another one.
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
import os import os
import sys import sys
import shutil import shutil
sys.path.append("/aloy/home/oguitart/projects/source/chemical_checker/chemicalchecker") sys.path.append("/aloy/home/oguitart/projects/source/chemical_checker/chemicalchecker")
os.environ["CC_CONFIG"] = '/aloy/home/oguitart/projects/source/chemical_checker/chemicalchecker/tests/data/config.json' os.environ["CC_CONFIG"] = '/aloy/home/oguitart/projects/source/chemical_checker/chemicalchecker/tests/data/config.json'
cc_config = '/aloy/home/oguitart/projects/source/chemical_checker/chemicalchecker/tests/data/config.json' cc_config = '/aloy/home/oguitart/projects/source/chemical_checker/chemicalchecker/tests/data/config.json'
from chemicalchecker.database import Molprop, Structure from chemicalchecker.database import Molprop, Structure
from chemicalchecker.database import Molrepo from chemicalchecker.database import Molrepo
from chemicalchecker.util import HPC from chemicalchecker.util import HPC
from chemicalchecker.util import Config from chemicalchecker.util import Config
molprops = ['fp2d','fp3d','subskeys','scaffolds','physchem'] molprops = ['fp2d','fp3d','subskeys','scaffolds','physchem']
molrepo_ik_inchi = Molrepo.get_fields_by_molrepo_name("biur_virtual",["inchikey","inchi"]) molrepo_ik_inchi = Molrepo.get_fields_by_molrepo_name("biur_virtual",["inchikey","inchi"])
for mol in molprops: for mol in molprops:
job_path = "/aloy/scratch/oguitart/jobs_molprop_" + mol job_path = "/aloy/scratch/oguitart/jobs_molprop_" + mol
if os.path.isdir(job_path): if os.path.isdir(job_path):
shutil.rmtree(job_path) shutil.rmtree(job_path)
os.mkdir(job_path) os.mkdir(job_path)
molprop = Molprop(mol) molprop = Molprop(mol)
# This method sends the job and waits for the job to finish # This method sends the job and waits for the job to finish
cluster = molprop.molprop_hpc(job_path,molrepo_ik_inchi) cluster = molprop.molprop_hpc(job_path,molrepo_ik_inchi)
``` ```
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment