Commit 9c48d7ed authored by Oriol Guitart's avatar Oriol Guitart
Browse files

Improve notebook and remove unused steps

parent 982b8af9
Pipeline #787 passed with stages
in 7 minutes and 9 seconds
%% Cell type:markdown id: tags:
# How to load new libraries to Chemical Checker infrastructure
%% Cell type:markdown id: tags:
## Load library files to datasource table
%% Cell type:code id: tags:
``` python
import os
import sys
sys.path.append("/aloy/home/oguitart/projects/source/chemical_checker/chemicalchecker")
os.environ["CC_CONFIG"] = '/aloy/home/oguitart/projects/source/chemical_checker/chemicalchecker/tests/data/config.json'
from chemicalchecker.util import Config
from chemicalchecker.database import Datasource
biur_real = {'name':"biur_real",'url':"file:///aloy/scratch/sbnb-adm/checker/local_downloads/BIUR_20100326.sdf",
'permanent':True,'enabled':False,'user':"",'password':"",'description':"ChemistryX Biur real library",
'molrepo_name':"biur_real",'molrepo_file':"BIUR_20100326.sdf",'is_db':False}
biur_virtual = {'name':"biur_virtual",'url':"file:///aloy/scratch/sbnb-adm/checker/local_downloads/VIRTUAL_BIUR_POR_MW.tar.gz",
'permanent':True,'enabled':False,'user':"",'password':"",'description':"ChemistryX Biur virtual library",
'molrepo_name':"biur_virtual",'molrepo_file':"VIRTUAL_BIUR_POR_MW",'is_db':False}
Datasource.add(biur_real)
Datasource.add(biur_virtual)
```
%% Cell type:markdown id: tags:
## Add Library parser
Check the *package/chemicalchecker/util/parser/parser.py* file and add a method for each one of the new libraries to parse those libraries.
The new method to parse the library should have the same name as the **molrepo_name** used in the datasource table. The output of the method should be the same as the other methods used in the **parser.py** file.
%% Cell type:markdown id: tags:
## Download & parse datasource associated to library "biur_real"
1. Download the datasource files
2. Parse downloaded files and load it to the molrepo table
%% Cell type:code id: tags:
``` python
import os
import sys
sys.path.append("/aloy/home/oguitart/projects/source/chemical_checker/chemicalchecker")
os.environ["CC_CONFIG"] = '/aloy/home/oguitart/projects/source/chemical_checker/chemicalchecker/tests/data/config.json'
from chemicalchecker.util import Config
from chemicalchecker.database import Datasource, Molrepo
# This method downloads all datasources related to this library
# and parse the files to load the new data to the molrepo table
Molrepo.from_molrepo_name("biur_real")
```
%% Cell type:markdown id: tags:
## Download datasource associated to library "biur_virtual"
1. Download the datasource files
2. Parse downloaded files and load it to the molrepo table
%% Cell type:code id: tags:
``` python
import os
import sys
sys.path.append("/aloy/home/oguitart/projects/source/chemical_checker/chemicalchecker")
os.environ["CC_CONFIG"] = '/aloy/home/oguitart/projects/source/chemical_checker/chemicalchecker/tests/data/config.json'
from chemicalchecker.util import Config
from chemicalchecker.database import Datasource, Molrepo
# This method downloads all datasources related to this library
# and parse the files to load the new data to the molrepo table
Molrepo.from_molrepo_name("biur_virtual")
```
%% Cell type:markdown id: tags:
## Add libraries to the structure table
Get the inchikey-inchi pairs of the new libraries and add this information to the table structure. But only the molecules that are not already present in that table
%% Cell type:code id: tags:
``` python
import os
import sys
import shutil
sys.path.append("/aloy/home/oguitart/projects/source/chemical_checker/chemicalchecker")
os.environ["CC_CONFIG"] = '/aloy/home/oguitart/projects/source/chemical_checker/chemicalchecker/tests/data/config.json'
from chemicalchecker.database import Structure, Molrepo
# This method queries the molrepo table to get the fields that we want for the specified molrepo
molrepo_ik_inchi = Molrepo.get_fields_by_molrepo_name("biur_real",["inchikey","inchi"])
# This method add the inchikey-inchi pairs only for the molecules not present in the table
Structure.add_missing_only(molrepo_ik_inchi)
# This method queries the molrepo table to get the fields that we want for the specified molrepo
molrepo_ik_inchi = Molrepo.get_fields_by_molrepo_name("biur_virtual",["inchikey","inchi"])
# This method add the inchikey-inchi pairs only for the molecules not present in the table
Structure.add_missing_only(molrepo_ik_inchi)
```
%% Cell type:markdown id: tags:
## Calculate the molecular properties and store them for library "biur_real"
It will also add the missing molecules to the structure table
It will create jobs to do this task in the cluster.
It waits for a molecular property calculation to finish before submitting another one.
%% Cell type:code id: tags:
``` python
import os
import sys
import shutil
sys.path.append("/aloy/home/oguitart/projects/source/chemical_checker/chemicalchecker")
os.environ["CC_CONFIG"] = '/aloy/home/oguitart/projects/source/chemical_checker/chemicalchecker/tests/data/config.json'
from chemicalchecker.database import Molprop, Structure
from chemicalchecker.database import Molrepo
from chemicalchecker.util import HPC
from chemicalchecker.util import Config
molprops = ['fp2d','fp3d','subskeys','scaffolds','physchem']
molrepo_ik_inchi = Molrepo.get_fields_by_molrepo_name("biur_real",["inchikey","inchi"])
for mol in molprops:
job_path = "/aloy/scratch/oguitart/jobs_molprop_" + mol
if os.path.isdir(job_path):
shutil.rmtree(job_path)
os.mkdir(job_path)
molprop = Molprop(mol)
# This method sends the job and waits for the job to finish
cluster = molprop.molprop_hpc(job_path,molrepo_ik_inchi)
```
%% Cell type:markdown id: tags:
## Calculate the molecular properties and store them for library "biur_virtual"
It will create jobs to do this task in the cluster.
It waits for a molecular property calculation to finish before submitting another one.
%% Cell type:code id: tags:
``` python
import os
import sys
import shutil
sys.path.append("/aloy/home/oguitart/projects/source/chemical_checker/chemicalchecker")
os.environ["CC_CONFIG"] = '/aloy/home/oguitart/projects/source/chemical_checker/chemicalchecker/tests/data/config.json'
cc_config = '/aloy/home/oguitart/projects/source/chemical_checker/chemicalchecker/tests/data/config.json'
from chemicalchecker.database import Molprop, Structure
from chemicalchecker.database import Molrepo
from chemicalchecker.util import HPC
from chemicalchecker.util import Config
molprops = ['fp2d','fp3d','subskeys','scaffolds','physchem']
molrepo_ik_inchi = Molrepo.get_fields_by_molrepo_name("biur_virtual",["inchikey","inchi"])
for mol in molprops:
job_path = "/aloy/scratch/oguitart/jobs_molprop_" + mol
if os.path.isdir(job_path):
shutil.rmtree(job_path)
os.mkdir(job_path)
molprop = Molprop(mol)
# This method sends the job and waits for the job to finish
cluster = molprop.molprop_hpc(job_path,molrepo_ik_inchi)
```
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment