Source code for xenonpy.contrib.extend_descriptors.descriptor.mordred_descriptor

#  Copyright (c) 2021. stewu5. All rights reserved.
#  Use of this source code is governed by a BSD-style
#  license that can be found in the LICENSE file.

import pandas as pd
from mordred import Calculator, descriptors
from rdkit import Chem
from xenonpy.descriptor.base import BaseFeaturizer


[docs]class Mordred2DDescriptor(BaseFeaturizer): def __init__(self, *, on_errors='raise', return_type='any'): # fix n_jobs to be 0 to skip automatic wrapper in XenonPy BaseFeaturizer class super().__init__(n_jobs=0, on_errors=on_errors, return_type=return_type) self.output = None self.__authors__ = ['Stephen Wu', 'TsumiNa']
[docs] def featurize(self, x): # check if type(x) = list if isinstance(x, pd.Series): x = x.tolist() if not isinstance(x, list): x = [x] # check input format, assume SMILES if not RDKit-MOL if not isinstance(x[0], Chem.rdchem.Mol): x_mol = [] for z in x: x_mol.append(Chem.MolFromSmiles(z)) if x_mol[-1] is None: raise ValueError('can not convert Mol from SMILES %s' % z) else: x_mol = x calc = Calculator(descriptors, ignore_3D=True) self.output = calc.pandas(x_mol) return self.output
@property def feature_labels(self): return self.output.columns