add_metadata.py 7.16 KB
Newer Older
1
# Nico (25 Jan 2021)
2
3
# Add metadata into the attr dictionary of every reference signature's h5 file 
# (on backup copies since an error can be produced if the file is being read by someone else while we try accessing it)
4
5
6
7
8
# Metadata to add and example:
#   cctype: 'sign2'
#   dataset_code: 'A1.001'
#   molset: 'full'

9
import os, shutil
10
import h5py
11

nsoler's avatar
nsoler committed
12
13
14
#VERSION= "2020_02"
from get_repo_version import cc_repo_version

nsoler's avatar
nsoler committed
15
def remove_backups(cc_repo="2020_02"):
nsoler's avatar
nsoler committed
16
17
18
19
    """
    Removes the previous signx_BACKUP.h5 so that the next function can generate them
    DANGEROUS script! Be careful.
    """
nsoler's avatar
nsoler committed
20
21
22
23
    root="/aloy/web_checker/package_cc/"
    cc_repo = os.path.join(root,cc_repo)
    signatures='0123'

nsoler's avatar
nsoler committed
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
    for molset in ('full','reference'):
        for space in "ABCDE":
            for num in "12345":
                for sign in signatures:
                    signature= 'sign'+sign
                    data_code= space+num+'.001'


                    fichero= os.path.join(cc_repo,molset,space,space+num, data_code, signature, signature+'_BACKUP.h5')
                    
                    if os.path.exists(fichero):
                        try:
                            shutil.rmtree(fichero)
                        except Exception as e:
                            print("WARINING", e)
                            continue
                        else:
                            print("Deleted:",fichero)



nsoler's avatar
nsoler committed
45
def add_metadata(cc_repo=None,signatures='0123', backup=True):
46
    """
nsoler's avatar
nsoler committed
47
    cc_repo: (str) path to a cc sign repo i.e /aloy/web_checker/package_cc/2020_02
48
    signature: (str or int), number refering to the signature. ex: '012' for sign0, sign1, sign2.
nsoler's avatar
nsoler committed
49
    backup (bool): make a backup copy of the signature first and add metadata to the backup instead
50
    """
nsoler's avatar
nsoler committed
51
52
53
54
55
56
57
58
59
60
61
62
63

    if cc_repo is None:
        cc_repo = cc_repo_version()

        if cc_repo is None:
            print("ERROR, cannot guess the latest cc repository path")
            print("Please provide it as an argument")
            print("ex: cc_repo='/aloy/web_checker/package_cc/2020_02'")
            return
        else:
            print("Working with cc_repo:",cc_repo)


64
65
66
67
68
69
    signatures=str(signatures) # in case we have an int.

    for molset in ('full','reference'):
        for space in "ABCDE":
            for num in "12345":
                for sign in signatures:
70
71
                    signature= 'sign'+sign
                    data_code= space+num+'.001'
nsoler's avatar
nsoler committed
72
73
74


                    fichero= os.path.join(cc_repo,molset,space,space+num, data_code, signature, signature+'.h5')
75
                    
76
                    if os.path.exists(fichero):
77

nsoler's avatar
nsoler committed
78
79
80
81
82
83
84
85
86
87
                        if backup:
                            print("Making backup which will contain metadata")
                            backup_file= os.path.join(os.path.dirname(fichero),os.path.basename(fichero).split('.')[0]+'_BACKUP.h5')
                            if not os.path.exists(backup_file):
                                shutil.copyfile(fichero,backup_file)
                            else:
                                print("Backup file", backup_file,"already exists, just adding metadata to it.")
                            fichero=backup_file

                        print("Adding metadata to", fichero)
88
                        dico= dict(cctype=signature, dataset_code=data_code, molset=molset)
89

nsoler's avatar
nsoler committed
90
                        with h5py.File(fichero,'r+') as f:
91
92
93
94
                            for k,v in dico.items():
                                if k not in f.attrs:
                                    f.attrs.create(name=k,data=v)
                                else:
nsoler's avatar
nsoler committed
95
                                    print(k,"already in attrs")
96

97
98
                    else:
                        print(fichero, "doesn't exist, skipping")
nsoler's avatar
nsoler committed
99

nsoler's avatar
nsoler committed
100
                    print("\n____")
101

nsoler's avatar
nsoler committed
102
def export_sign(target_dir, cc_repo=None, signatures='2',molsets=('full'),copy_backup=False, add_metadata=True):
nsoler's avatar
nsoler committed
103
104
105
106
107
    """
    Export all signatures from a given cctype (ex: sign2) in a single folder
    Add Metadata to the output files if not present in the original h5 file

    target_dir (str): target directory where the signatures will be copied
nsoler's avatar
nsoler committed
108
    cc_repo: (str) path to a cc sign repo i.e /aloy/web_checker/package_cc/2020_02
nsoler's avatar
nsoler committed
109
110
111
112
113
114
    signature: (str or int), number refering to the signature. ex: '012' for sign0, sign1, sign2.
    molsets: (list or tuple), either 'full' or 'reference'
    copy_backup (Bool): copy signx_BACKUP.h5 instead of signx.h5 if present
    add_metadata (Bool): Add metadata to the copied file
    """

nsoler's avatar
nsoler committed
115
116
117
118
119
120
121
122
123
124
125
    if cc_repo is None:
        cc_repo = cc_repo_version()

        if cc_repo is None:
            print("ERROR, cannot guess the latest cc repository path")
            print("Please provide it as an argument")
            print("ex: cc_repo='/aloy/web_checker/package_cc/2020_02'")
            return
        else:
            print("Working with cc_repo:",cc_repo)

nsoler's avatar
nsoler committed
126
127
    signatures=str(signatures) # in case we have an int.

nsoler's avatar
nsoler committed
128
    for sign in signatures:
nsoler's avatar
nsoler committed
129
        cctype='sign'+sign
nsoler's avatar
nsoler committed
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
        sign_dir= os.path.join(target_dir,cctype)

        if not os.path.exists(sign_dir):
            try:
                os.makedirs(sign_dir)
            except Exception as e:
                print("WARNING", e)
                continue

       
        for molset in molsets:
            for space in "ABCDE":
                for num in "12345":
                    signature= 'sign'+sign
                    data_code= space+num+'.001'

                    if copy_backup:
nsoler's avatar
nsoler committed
147
                        fichero= os.path.join(cc_repo,molset,space,space+num, data_code, signature, signature+'_BACKUP.h5')
nsoler's avatar
nsoler committed
148
                    else:
nsoler's avatar
nsoler committed
149
                        fichero= os.path.join(cc_repo,molset,space,space+num, data_code, signature, signature+'.h5')
nsoler's avatar
nsoler committed
150
151
152
153
154
155
156
157
158
159
                        
                    target_file  = os.path.join(sign_dir, cctype+'_'+space+num+'_'+molset+'.h5')

                    if os.path.exists(fichero):
                        if not os.path.exists(target_file):
                            print("Copying",fichero,"to",target_file)
                            shutil.copyfile(fichero,target_file)

                            # Adding metadata
                            if add_metadata:
nsoler's avatar
nsoler committed
160
                                print("Adding metadata to", target_file)
nsoler's avatar
nsoler committed
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
                                dico= dict(cctype=signature, dataset_code=data_code, molset=molset)

                                with h5py.File(target_file,'a') as f:
                                    for k,v in dico.items():
                                        if k not in f.attrs:
                                            f.attrs.create(name=k,data=v)
                                        else:
                                            print(k,"already in attrs")
                        else:
                            print("WARNING: file",target_file,"already exists!")
                            print("Please delete it first")
                    else:
                        print(fichero,"does not exist, skipping!")

if __name__=='__main__':
176

nsoler's avatar
nsoler committed
177
178
179
    target_directory= "/aloy/scratch/nsoler/CC_related/EXPORT_SIGN"

    # Backup all h5 files and add metadata to the backups:
nsoler's avatar
nsoler committed
180
    add_metadata()
nsoler's avatar
nsoler committed
181
182

    
nsoler's avatar
nsoler committed
183
    #export_sign(target_directory, signatures='2',molsets=['full'], copy_backup=True)
nsoler's avatar
nsoler committed
184
185