add_metadata.py 2.04 KB
Newer Older
1
# Nico (25 Jan 2021)
2
3
# Add metadata into the attr dictionary of every reference signature's h5 file 
# (on backup copies since an error can be produced if the file is being read by someone else while we try accessing it)
4
5
6
7
8
# Metadata to add and example:
#   cctype: 'sign2'
#   dataset_code: 'A1.001'
#   molset: 'full'

9
import os, shutil
10
import h5py
11

12
def add_metadata(version="2020_01",signatures='0123', pathrepo="/aloy/web_checker/package_cc/"):
13
14
    """
    version: (str), version of the cc package
15
    signature: (str or int), number refering to the signature. ex: '012' for sign0, sign1, sign2.
16
17
18
19
20
21
22
23
    path: (str), path to the cc signature repo
    """
    signatures=str(signatures) # in case we have an int.

    for molset in ('full','reference'):
        for space in "ABCDE":
            for num in "12345":
                for sign in signatures:
24
25
26
27
                    signature= 'sign'+sign
                    data_code= space+num+'.001'
                    fichero= os.path.join(pathrepo,version,molset,space,space+num, data_code, signature, signature+'.h5')
                    
28
                    if os.path.exists(fichero):
29
30
31
                        print("Making backup which will contain metadata")
                        backup_file= os.path.join(os.path.dirname(fichero),os.path.basename(fichero).split('.')[0]+'_BACKUP.h5')
                        shutil.copyfile(fichero,backup_file)
32

33
                        print("Adding metadata to", backup_file)
34
                        dico= dict(cctype=signature, dataset_code=data_code, molset=molset)
35

36
                        with h5py.File(backup_file,'a') as f:
37
38
39
40
41
                            for k,v in dico.items():
                                if k not in f.attrs:
                                    f.attrs.create(name=k,data=v)
                                else:
                                    print(k,"already in f.attrs")
42

43
44
                    else:
                        print(fichero, "doesn't exist, skipping")
45

46
47
48
49

if __name__ == '__main__':
    current_version="2020_01"
    add_metadata(version=current_version)