Memory error while trying to predict targets (molpropery) on large molrepo
When I run:
import shutil
import sys, os
from chemicalchecker.database import Molprop, Structure
from chemicalchecker.database import Molrepo
from chemicalchecker.util import HPC
from chemicalchecker.util import Config
molprops = ['chembl_target_predictions']
molrepo_ik_inchi = Molrepo.get_fields_by_molrepo_name("chembl",["inchikey","inchi"])
for mol in molprops:
job_path = "/aloy/scratch/mduran/jobs_molprop_chembl_" + mol
if os.path.isdir(job_path):
shutil.rmtree(job_path)
os.mkdir(job_path)
molprop = Molprop(mol)
# This method sends the job and waits for the job to finish
cluster = molprop.molprop_hpc(job_path,molrepo_ik_inchi)
I get the following memory error:
---------------------------------------------------------------------------
MemoryError Traceback (most recent call last)
<ipython-input-2-6ed1f126b830> in <module>()
20
21 # This method sends the job and waits for the job to finish
---> 22 cluster = molprop.molprop_hpc(job_path,molrepo_ik_inchi)
/aloy/home/mduran/myscripts/chemical_checker/package/chemicalchecker/database/molprop.pyc in molprop_hpc(job_path, inchikey_inchi, **kwargs)
228 # submit jobs
229 cluster = HPC(Config())
--> 230 cluster.submitMultiJob(command, **params)
231 return cluster
232
/aloy/home/mduran/myscripts/chemical_checker/package/chemicalchecker/util/hpc/hpc.pyc in submitMultiJob(self, command, **kwargs)
58
59 if self.job_id is None:
---> 60 self.job_id = self.hpc.submitMultiJob(command, **kwargs)
61 else:
62 raise Exception("HPC instance already in use")
/aloy/home/mduran/myscripts/chemical_checker/package/chemicalchecker/util/hpc/sge.pyc in submitMultiJob(self, command, **kwargs)
165
166 input_dict = dict()
--> 167 for cid, chunk in enumerate(self._chunks(elements, num_jobs), 1):
168 input_dict[str(cid)] = chunk
169 input_path = os.path.join(self.jobdir, str(uuid.uuid4()))
/aloy/home/mduran/myscripts/chemical_checker/package/chemicalchecker/util/hpc/sge.pyc in _chunks(self, l, n)
81 """Yield successive n-sized chunks from l."""
82 if isinstance(l, list):
---> 83 for i in np.array_split(l, n):
84 yield i
85 elif isinstance(l, dict):
/opt/miniconda2/envs/py27/lib/python2.7/site-packages/numpy/lib/shape_base.pyc in array_split(ary, indices_or_sections, axis)
760
761 sub_arys = []
--> 762 sary = _nx.swapaxes(ary, axis, 0)
763 for i in range(Nsections):
764 st = div_points[i]
/opt/miniconda2/envs/py27/lib/python2.7/site-packages/numpy/core/fromnumeric.pyc in swapaxes(a, axis1, axis2)
583
584 """
--> 585 return _wrapfunc(a, 'swapaxes', axis1, axis2)
586
587
/opt/miniconda2/envs/py27/lib/python2.7/site-packages/numpy/core/fromnumeric.pyc in _wrapfunc(obj, method, *args, **kwds)
64 # a downstream library like 'pandas'.
65 except (AttributeError, TypeError):
---> 66 return _wrapit(obj, method, *args, **kwds)
67
68
/opt/miniconda2/envs/py27/lib/python2.7/site-packages/numpy/core/fromnumeric.pyc in _wrapit(obj, method, *args, **kwds)
44 except AttributeError:
45 wrap = None
---> 46 result = getattr(asarray(obj), method)(*args, **kwds)
47 if wrap:
48 if not isinstance(result, mu.ndarray):
/opt/miniconda2/envs/py27/lib/python2.7/site-packages/numpy/core/numeric.pyc in asarray(a, dtype, order)
536
537 """
--> 538 return array(a, dtype, copy=False, order=order)
539
540
MemoryError: