Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Packages
chemical_checker
Commits
7265cb47
Commit
7265cb47
authored
Feb 07, 2021
by
nsoler
Browse files
progress on predicting sign1 for A spaces
parent
75f8e5f4
Pipeline
#2374
failed with stages
in 2 minutes and 5 seconds
Changes
5
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
package/chemicalchecker/core/chemcheck.py
View file @
7265cb47
...
...
@@ -477,11 +477,7 @@ class ChemicalChecker():
version (str) : ex: 2020_01
"""
signObj
=
sign
if
sign
.
molset
!=
'reference'
:
signObj
=
sign
.
get_molset
(
"reference"
)
import_models
(
signObj
,
version
=
version
)
import_models
(
sign
,
version
=
version
)
def
import_h5
(
self
):
...
...
package/chemicalchecker/core/sign1.py
View file @
7265cb47
...
...
@@ -120,8 +120,6 @@ class sign1(BaseSignature, DataSignature):
def
load_model
(
self
,
name
):
fn
=
os
.
path
.
join
(
self
.
get_molset
(
"reference"
).
model_path
,
"%s.pkl"
%
name
)
#debug
print
(
"
\n
SHERLOCK fn"
,
fn
)
with
open
(
fn
,
"rb"
)
as
f
:
mod
=
pickle
.
load
(
f
)
...
...
@@ -277,6 +275,9 @@ class sign1(BaseSignature, DataSignature):
else
:
mod
=
None
if
mod
is
not
None
:
mod
.
model_path
=
self
.
model_path
# avoid taking the info from pickle
print
(
"
\n
SHERLOCK s1.model_path"
,
s1
.
model_path
)
print
(
"
\n
SHERLOCK mod.model_path"
,
mod
.
model_path
)
mod
.
predict
(
s1
)
self
.
__log
.
debug
(
"Prediction done!"
)
if
destination
is
None
:
...
...
package/chemicalchecker/util/models/import_models.py
View file @
7265cb47
...
...
@@ -6,27 +6,49 @@ import os, shutil
def
import_models
(
sign_object
,
version
=
'2020_01'
):
"""
Imports the models for predicting sign_objects
i.e copies the models in the model_path of the reference signature object and
create symbolics lincs to those model files in the full signature model_path
"""
fileDir
=
os
.
path
.
abspath
(
os
.
path
.
dirname
(
__file__
))
signRef
=
sign_object
.
get_molset
(
"reference"
)
signFull
=
sign_object
.
get_molset
(
"full"
)
cctype
=
sign_object
.
cctype
dataset
=
sign_object
.
dataset
molset
=
sign_object
.
molset
destination
=
sign_object
.
model_path
if
molset
!=
'reference'
:
print
(
"Please use a reference signature (not full)"
)
return
None
cctype
=
signRef
.
cctype
dataset
=
signRef
.
dataset
destination
=
signRef
.
model_path
destinationLink
=
signFull
.
model_path
data
=
os
.
path
.
join
(
fileDir
,
version
,
dataset
,
cctype
)
if
not
os
.
path
.
exists
(
data
):
print
(
"Sorry, no model to import for this signature"
)
return
None
for
fichero
in
os
.
listdir
(
data
):
if
fichero
==
"fit.ready"
:
continue
source
=
os
.
path
.
join
(
data
,
fichero
)
target
=
os
.
path
.
join
(
destination
,
fichero
)
print
(
"Importing"
,
source
,
"to"
,
target
)
shutil
.
copyfile
(
source
,
target
)
symlink
=
os
.
path
.
join
(
destinationLink
,
fichero
)
if
not
os
.
path
.
exists
(
target
):
print
(
"Importing"
,
source
,
"to"
,
target
)
shutil
.
copyfile
(
source
,
target
)
# Symlincs
if
not
os
.
path
.
islink
(
symlink
):
print
(
"Creating symlink"
,
symlink
,
"from"
,
target
)
os
.
symlink
(
target
,
symlink
)
package/chemicalchecker/util/parser/calculate_A_spaces.py
View file @
7265cb47
...
...
@@ -149,7 +149,7 @@ class Aspaces_prop_calculator(object):
return
None
def
calculate_mol_properties
(
self
):
def
calculate_mol_properties
(
self
,
outputfiles
):
"""
Calls calculate_data_fn for all spaces
...
...
@@ -157,12 +157,15 @@ class Aspaces_prop_calculator(object):
Arguments:
- space (str): either A1, A2, A3, A4, A5, A5
-
dict_inchikey_inchi
(dict):
mapping
of the molecules to calculate properties from
-
outputfiles
(dict):mapping
space : outputfile path
"""
result
=
dict
()
for
space
in
self
.
Aspaces
:
result
[
space
]
=
self
.
calculate_data_fn
(
space
)
for
space
in
outputfiles
:
if
not
os
.
path
.
exists
(
outputfiles
[
space
]):
result
[
space
]
=
self
.
calculate_data_fn
(
space
)
else
:
print
(
"File"
,
outputfiles
[
space
],
"already present, nothing to do"
)
# dictionary {'A1': [{'inchikey': 'ASXBYYWOLISCLQ-UHFFFAOYSA-N', 'raw': ..raw_string}, {}...]}
return
result
...
...
@@ -188,8 +191,8 @@ class Aspaces_prop_calculator(object):
# Compute the raw properties
all_properties
=
self
.
calculate_mol_properties
()
all_features
=
fetch_features_A
()
all_properties
=
self
.
calculate_mol_properties
(
outputfiles
)
all_features
=
fetch_features_A
()
# features from the fit() method
print
(
'all_properties'
,
all_properties
)
print
(
'all_features'
,
all_features
)
...
...
@@ -263,8 +266,10 @@ class Aspaces_prop_calculator(object):
for
space
,
fp
in
dict_of_Aspaces_h5
.
items
():
print
(
"
\n
Calculating sign0 for space"
,
space
)
sign0
=
self
.
cc
.
get_signature
(
'sign0'
,
'full'
,
space
+
'.001'
)
sign0
.
clear
()
sign0
.
fit
(
data_file
=
fp
,
do_triplets
=
False
,
overwrite
=
True
,
sanitize
=
sanitize
)
if
not
sign0
.
is_fit
():
sign0
.
fit
(
data_file
=
fp
,
do_triplets
=
False
,
overwrite
=
True
,
sanitize
=
sanitize
)
else
:
print
(
"Sign0 for space"
,
space
+
'.001'
,
"already fit, nothing to do"
)
# Then we can use this cc instance to predict sign1
return
self
.
cc
...
...
@@ -284,7 +289,7 @@ class Aspaces_prop_calculator(object):
for
space
in
self
.
Aspaces
:
assert
space
in
dictSpaces
.
keys
(),
print
(
"Sign0 for space"
,
space
,
"not fit!!"
)
assert
space
+
'.001'
in
dictSpaces
.
keys
(),
print
(
"Sign0 for space"
,
space
,
"not fit!!"
)
sign0
=
self
.
cc
.
get_signature
(
'sign0'
,
'full'
,
space
+
'.001'
)
# already fitted
sign1
=
self
.
cc
.
get_signature
(
'sign1'
,
'full'
,
space
+
'.001'
)
# will get converted to reference by the next fct
sign1
.
clear
()
...
...
package/chemicalchecker/util/transform/lsi.py
View file @
7265cb47
...
...
@@ -197,6 +197,7 @@ class Lsi(BaseTransform):
for
x
in
mask
[
0
]])
f
.
write
(
"%s %s
\n
"
%
(
ks
[
i
],
val
))
# load dictionary
print
(
"
\n
SHERLOCK self.model_path"
,
self
.
model_path
)
dictionary
=
corpora
.
Dictionary
.
load
(
os
.
path
.
join
(
self
.
model_path
,
self
.
name
+
".dict.pkl"
))
# corpus
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment