Commit e584e519 authored by nsoler's avatar nsoler
Browse files

admin cc scripts

parent 2d7769ec
...@@ -62,7 +62,7 @@ from chemicalchecker.util.decorator import cached_property ...@@ -62,7 +62,7 @@ from chemicalchecker.util.decorator import cached_property
from chemicalchecker.util.models import import_models, import_sign0_features # import models for predicting sign1, sign2 from chemicalchecker.util.models import import_models, import_sign0_features # import models for predicting sign1, sign2
CURRENT_VERSION= '2020_01' CURRENT_VERSION= '2020_02'
@logged @logged
class ChemicalChecker(): class ChemicalChecker():
......
...@@ -9,12 +9,28 @@ ...@@ -9,12 +9,28 @@
import os, shutil import os, shutil
import h5py import h5py
def add_metadata(version="2020_01",signatures='0123', pathrepo="/aloy/web_checker/package_cc/"): #VERSION= "2020_02"
from get_repo_version import cc_repo_version
def add_metadata(cc_repo=None,signatures='0123', backup=True):
""" """
version: (str), version of the cc package cc_repo: (str) path to a cc sign repo i.e /aloy/web_checker/package_cc/2020_02
signature: (str or int), number refering to the signature. ex: '012' for sign0, sign1, sign2. signature: (str or int), number refering to the signature. ex: '012' for sign0, sign1, sign2.
pathrepo: (str), path to the cc signature repo backup (bool): make a backup copy of the signature first and add metadata to the backup instead
""" """
if cc_repo is None:
cc_repo = cc_repo_version()
if cc_repo is None:
print("ERROR, cannot guess the latest cc repository path")
print("Please provide it as an argument")
print("ex: cc_repo='/aloy/web_checker/package_cc/2020_02'")
return
else:
print("Working with cc_repo:",cc_repo)
signatures=str(signatures) # in case we have an int. signatures=str(signatures) # in case we have an int.
for molset in ('full','reference'): for molset in ('full','reference'):
...@@ -23,17 +39,25 @@ def add_metadata(version="2020_01",signatures='0123', pathrepo="/aloy/web_checke ...@@ -23,17 +39,25 @@ def add_metadata(version="2020_01",signatures='0123', pathrepo="/aloy/web_checke
for sign in signatures: for sign in signatures:
signature= 'sign'+sign signature= 'sign'+sign
data_code= space+num+'.001' data_code= space+num+'.001'
fichero= os.path.join(pathrepo,version,molset,space,space+num, data_code, signature, signature+'.h5')
fichero= os.path.join(cc_repo,molset,space,space+num, data_code, signature, signature+'.h5')
if os.path.exists(fichero): if os.path.exists(fichero):
print("Making backup which will contain metadata")
backup_file= os.path.join(os.path.dirname(fichero),os.path.basename(fichero).split('.')[0]+'_BACKUP.h5')
shutil.copyfile(fichero,backup_file)
print("Adding metadata to", backup_file) if backup:
print("Making backup which will contain metadata")
backup_file= os.path.join(os.path.dirname(fichero),os.path.basename(fichero).split('.')[0]+'_BACKUP.h5')
if not os.path.exists(backup_file):
shutil.copyfile(fichero,backup_file)
else:
print("Backup file", backup_file,"already exists, just adding metadata to it.")
fichero=backup_file
print("Adding metadata to", fichero)
dico= dict(cctype=signature, dataset_code=data_code, molset=molset) dico= dict(cctype=signature, dataset_code=data_code, molset=molset)
with h5py.File(backup_file,'a') as f: with h5py.File(fichero,'a') as f:
for k,v in dico.items(): for k,v in dico.items():
if k not in f.attrs: if k not in f.attrs:
f.attrs.create(name=k,data=v) f.attrs.create(name=k,data=v)
...@@ -42,21 +66,33 @@ def add_metadata(version="2020_01",signatures='0123', pathrepo="/aloy/web_checke ...@@ -42,21 +66,33 @@ def add_metadata(version="2020_01",signatures='0123', pathrepo="/aloy/web_checke
else: else:
print(fichero, "doesn't exist, skipping") print(fichero, "doesn't exist, skipping")
print("\n____")
def export_sign(target_dir, version="2020_01", signatures='2', pathrepo="/aloy/web_checker/package_cc/",molsets=('full',),copy_backup=False, add_metadata=True): def export_sign(target_dir, cc_repo=None, signatures='2',molsets=('full'),copy_backup=False, add_metadata=True):
""" """
Export all signatures from a given cctype (ex: sign2) in a single folder Export all signatures from a given cctype (ex: sign2) in a single folder
Add Metadata to the output files if not present in the original h5 file Add Metadata to the output files if not present in the original h5 file
target_dir (str): target directory where the signatures will be copied target_dir (str): target directory where the signatures will be copied
version (str): version of the checker cc_repo: (str) path to a cc sign repo i.e /aloy/web_checker/package_cc/2020_02
signature: (str or int), number refering to the signature. ex: '012' for sign0, sign1, sign2. signature: (str or int), number refering to the signature. ex: '012' for sign0, sign1, sign2.
pathrepo: (str), path to the cc signature repo
molsets: (list or tuple), either 'full' or 'reference' molsets: (list or tuple), either 'full' or 'reference'
copy_backup (Bool): copy signx_BACKUP.h5 instead of signx.h5 if present copy_backup (Bool): copy signx_BACKUP.h5 instead of signx.h5 if present
add_metadata (Bool): Add metadata to the copied file add_metadata (Bool): Add metadata to the copied file
""" """
if cc_repo is None:
cc_repo = cc_repo_version()
if cc_repo is None:
print("ERROR, cannot guess the latest cc repository path")
print("Please provide it as an argument")
print("ex: cc_repo='/aloy/web_checker/package_cc/2020_02'")
return
else:
print("Working with cc_repo:",cc_repo)
signatures=str(signatures) # in case we have an int. signatures=str(signatures) # in case we have an int.
for sign in signatures: for sign in signatures:
...@@ -78,9 +114,9 @@ def export_sign(target_dir, version="2020_01", signatures='2', pathrepo="/aloy/w ...@@ -78,9 +114,9 @@ def export_sign(target_dir, version="2020_01", signatures='2', pathrepo="/aloy/w
data_code= space+num+'.001' data_code= space+num+'.001'
if copy_backup: if copy_backup:
fichero= os.path.join(pathrepo,version,molset,space,space+num, data_code, signature, signature+'_BACKUP.h5') fichero= os.path.join(cc_repo,molset,space,space+num, data_code, signature, signature+'_BACKUP.h5')
else: else:
fichero= os.path.join(pathrepo,version,molset,space,space+num, data_code, signature, signature+'.h5') fichero= os.path.join(cc_repo,molset,space,space+num, data_code, signature, signature+'.h5')
target_file = os.path.join(sign_dir, cctype+'_'+space+num+'_'+molset+'.h5') target_file = os.path.join(sign_dir, cctype+'_'+space+num+'_'+molset+'.h5')
...@@ -108,13 +144,12 @@ def export_sign(target_dir, version="2020_01", signatures='2', pathrepo="/aloy/w ...@@ -108,13 +144,12 @@ def export_sign(target_dir, version="2020_01", signatures='2', pathrepo="/aloy/w
if __name__=='__main__': if __name__=='__main__':
current_version="2020_01"
target_directory= "/aloy/scratch/nsoler/CC_related/EXPORT_SIGN" target_directory= "/aloy/scratch/nsoler/CC_related/EXPORT_SIGN"
# Backup all h5 files and add metadata to the backups: # Backup all h5 files and add metadata to the backups:
#add_metadata(version=current_version) add_metadata()
export_sign(target_directory,version="2020_01", signatures='2',molsets=['full'], copy_backup=True) #export_sign(target_directory, signatures='2',molsets=['full'], copy_backup=True)
...@@ -3,14 +3,26 @@ ...@@ -3,14 +3,26 @@
## in A spaces ## in A spaces
from chemicalchecker.core.chemcheck import ChemicalChecker from chemicalchecker.core.chemcheck import ChemicalChecker
from get_repo_version import cc_repo_version
#VERSION= "2020_02"
repo= "/aloy/web_checker/package_cc/2020_01/" def export_features_sign0(cc_repo=None, outDir="/aloy/scratch/nsoler/CC_related/EXPORT_SIGN/sign0"):
outDir= "/aloy/scratch/nsoler/CC_related/EXPORT_SIGN/sign0"
cc = ChemicalChecker(repo) if cc_repo is None:
cc_repo = cc_repo_version()
for space in "ABCDE": if cc_repo is None:
for num in (1, 2, 3, 4, 5): print("ERROR, cannot guess the latest cc repository path")
ds= space+str(num)+'.001' print("Please provide it as an argument")
sign0tmp = cc.get_signature('sign0', 'full', ds) print("ex: cc_repo='/aloy/web_checker/package_cc/2020_02'")
sign0tmp.export_features(outDir) return
\ No newline at end of file else:
print("Working with cc_repo:",cc_repo)
cc = ChemicalChecker(cc_repo)
for space in "ABCDE":
for num in (1, 2, 3, 4, 5):
ds= space+str(num)+'.001'
sign0tmp = cc.get_signature('sign0', 'full', ds)
sign0tmp.export_features(outDir)
\ No newline at end of file
# Nico 10 Feb 2021 # Nico 10 Feb 2021
# Create symlinks for sign0 and 1 into a single destination directory # Create symlinks for sign0 and 1 into a single destination directory
import os import os, sys
from get_repo_version import cc_repo_version
version = "2020_01"
root="/aloy/web_checker/package_cc/"
destination = "/aloy/scratch/nsoler/CC_related/EXPORT_SIGN"
for molset in ('full',):
for space in "ABCDE":
for num in (1, 2, 3, 4, 5):
for sign in ('sign0', 'sign1'):
subsp= space+str(num)
ds = subsp+'.001'
signFile= os.path.join(root,version, molset,space, subsp, ds, sign, sign+'_BACKUP.h5')
if os.path.exists(signFile):
# Make a symlink into the destination
symlink = os.path.join(destination, sign+'_'+subsp+'_'+molset+'.h5')
try:
os.symlink(signFile, symlink)
except Exception as e:
print("Error for creating", symlink)
print(e)
else:
print("Created symlink:",symlink)
else:
print("File not found: ",signFile)
def make symlinks(destination = "/aloy/scratch/nsoler/CC_related/EXPORT_SIGN", cc_repo=None):
"""
Creates symlinks for all signatures in a single folder
"""
if cc_repo is None:
cc_repo = cc_repo_version()
if cc_repo is None:
print("ERROR, cannot guess the latest cc repository path")
print("Please provide it as an argument")
print("ex: cc_repo='/aloy/web_checker/package_cc/2020_02'")
return
else:
print("Working with cc_repo:",cc_repo)
if not os.path.exists(destination):
try:
os.makedirs(destination)
except Exception as e:
print("ERROR while attempting to create destination folder", destination)
print(e)
else:
print("Created directory", destination)
for molset in ('full', 'reference'):
for space in "ABCDE":
for num in (1, 2, 3, 4, 5):
for sign in ('sign0', 'sign1','sign2','sign3'):
subsp= space+str(num)
ds = subsp+'.001'
signFile= os.path.join(cc_repo, molset,space, subsp, ds, sign, sign+'_BACKUP.h5')
if os.path.exists(signFile):
# Make a symlink into the destination
symlink = os.path.join(destination, sign+'_'+subsp+'_'+molset+'.h5')
try:
os.symlink(signFile, symlink)
except Exception as e:
print("Error for creating", symlink)
print(e)
else:
print("Created symlink:",symlink)
else:
print("File not found: ",signFile)
if __name__== '__main__':
destination = ""
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment