Source code for xenonpy.descriptor.cgcnn

#  Copyright (c) 2021. yoshida-lab. All rights reserved.
#  Use of this source code is governed by a BSD-style
#  license that can be found in the LICENSE file.

import warnings

import numpy as np
import torch
from pymatgen.core.structure import Structure

from xenonpy.datatools import preset
from xenonpy.descriptor.base import BaseFeaturizer

__all__ = ['CrystalGraphFeaturizer']


[docs]class CrystalGraphFeaturizer(BaseFeaturizer): def __init__(self, *, max_num_nbr=12, radius=8, atom_feature='origin', n_jobs=-1, on_errors='raise', return_type='any'): """ This featurizer is a port of the original paper [CGCNN]_. .. [CGCNN] `Crystal Graph Convolutional Neural Networks for an Accurate and Interpretable Prediction of Material Properties`__ __ https://doi.org/10.1103/PhysRevLett.120.145301 Parameters ---------- n_jobs: int The number of jobs to run in parallel for both fit and predict. Set -1 to use all cpu cores (default). Inputs ``X`` will be split into some blocks then run on each cpu cores. on_errors: string How to handle exceptions in feature calculations. Can be 'nan', 'keep', 'raise'. When 'nan', return a column with ``np.nan``. The length of column corresponding to the number of feature labs. When 'keep', return a column with exception objects. The default is 'raise' which will raise up the exception. return_type: str Specific the return type. Can be ``any``, ``array`` and ``df``. ``array`` and ``df`` force return type to ``numpy.ndarray`` and ``pandas.DataFrame`` respectively. If ``any``, the return type dependent on the input type. Default is ``any`` """ super().__init__(n_jobs=n_jobs, on_errors=on_errors, return_type=return_type) self.atom_feature = atom_feature self.radius = radius self.max_num_nbr = max_num_nbr self.__implement__ = ['TsumiNa'] def _atom_feature(self, atom_symbol: str): if self.atom_feature == 'origin': return preset.atom_init.loc[atom_symbol] elif self.atom_feature == 'elements': return preset.elements_completed.loc[atom_symbol] elif callable(self.atom_feature): return self.atom_feature(atom_symbol) else: raise TypeError('bad `atom feature` parameter')
[docs] def edge_features(self, structure: Structure, **kwargs): def expand_distance(distances, dmin=0, step=0.2, var=None): """ Parameters ---------- dmin: float Minimum interatomic distance dmax: float Maximum interatomic distance step: float Step size for the Gaussian filter """ filter_ = np.arange(dmin, self.radius + step, step) if var is None: var = step return np.exp(-(distances[..., np.newaxis] - filter_) ** 2 / var ** 2) all_nbrs = structure.get_all_neighbors(self.radius, include_index=True) all_nbrs = [sorted(nbrs, key=lambda x: x[1]) for nbrs in all_nbrs] nbr_fea_idx, nbr_fea = [], [] for nbr in all_nbrs: if len(nbr) < self.max_num_nbr: warnings.warn('can not find enough neighbors to build graph. ' 'If it happens frequently, consider increase ' 'radius.') nbr_fea_idx.append(list(map(lambda x: x[2], nbr)) + [0] * (self.max_num_nbr - len(nbr))) nbr_fea.append(list(map(lambda x: x[1], nbr)) + [self.radius + 1.] * (self.max_num_nbr - len(nbr))) else: nbr_fea_idx.append(list(map(lambda x: x[2], nbr[:self.max_num_nbr]))) nbr_fea.append(list(map(lambda x: x[1], nbr[:self.max_num_nbr]))) nbr_fea = np.array(nbr_fea) nbr_fea = expand_distance(nbr_fea) nbr_fea = torch.Tensor(nbr_fea) nbr_fea_idx = torch.LongTensor(nbr_fea_idx) return nbr_fea, nbr_fea_idx
[docs] def node_features(self, structure: Structure): atom_features = np.vstack([self._atom_feature(s.name) for s in structure.species]) return torch.Tensor(atom_features)
[docs] def featurize(self, structure: Structure): return [self.node_features(structure), *self.edge_features(structure)]
@property def feature_labels(self): return ['atom_feature', 'neighbor_feature', 'neighbor_idx']