add_metadata.py 1.84 KB
Newer Older
1
2
3
4
5
6
7
# Nico (25 Jan 2021)
# Add metadata into the attr dictionary of every reference signature's h5 file
# Metadata to add and example:
#   cctype: 'sign2'
#   dataset_code: 'A1.001'
#   molset: 'full'

8
import os, shutil
9
import h5py
10

11
def add_metadata(version="2020_01",signatures='0123', pathrepo="/aloy/web_checker/package_cc/"):
12
13
    """
    version: (str), version of the cc package
14
    signature: (str or int), number refering to the signature. ex: '012' for sign0, sign1, sign2.
15
16
17
18
19
20
21
22
    path: (str), path to the cc signature repo
    """
    signatures=str(signatures) # in case we have an int.

    for molset in ('full','reference'):
        for space in "ABCDE":
            for num in "12345":
                for sign in signatures:
23
24
25
26
                    signature= 'sign'+sign
                    data_code= space+num+'.001'
                    fichero= os.path.join(pathrepo,version,molset,space,space+num, data_code, signature, signature+'.h5')
                    
27
                    if os.path.exists(fichero):
28
29
                        print("Making backup")
                        shutil.copyfile(fichero,os.path.join(os.path.dirname(fichero),os.path.basename(fichero).split('.')[0]+'_BACKUP.h5')
30

31
32
                        print("Adding metadata to", fichero)
                        dico= dict(cctype=signature, dataset_code=data_code, molset=molset)
33

34
                        with h5py.File(fichero,'a') as f:
35
36
37
38
39
                            for k,v in dico.items():
                                if k not in f.attrs:
                                    f.attrs.create(name=k,data=v)
                                else:
                                    print(k,"already in f.attrs")
40

41
42
                    else:
                        print(fichero, "doesn't exist, skipping")
43

44
45
46
47

if __name__ == '__main__':
    current_version="2020_01"
    add_metadata(version=current_version)