Source code for kripodb.pharmacophores

from itertools import groupby
from os import path, walk

import tables
import gzip
from rdkit.Chem import ForwardSDMolSupplier

from .hdf5 import AbstractSimpleTable

FEATURE_TYPES = [{
    'key': 'LIPO',
    'label': 'Hydrophobe',
    'color': 'ff33cc',
    'element': 'He',
}, {
    'key': 'POSC',
    'label': 'Positive charge',
    'color': 'ff9933',
    'element': 'P'
}, {
    'key': 'NEGC',
    'label': 'Negative charge',
    'color': '376092',
    'element': 'Ne'
}, {
    'key': 'HACC',
    'label': 'H-bond acceptor',
    'color': 'bfbfbf',
    'element': 'As'
}, {
    'key': 'HDON',
    'label': 'H-bond donor',
    'color': '00ff00',
    'element': 'O'
}, {
    'key': 'AROM',
    'label': 'Aromatic',
    'color': '00ffff',
    'element': 'Rn'
}]
"""
Types of pharmacophore feature types. List of dictionaries with the following keys:
    * key, short identifier of type
    * label, human readable label
    * color, hex rrggbb color
    * element, Element used in kripo pharmacophore sdfile for this type
"""
FEATURE_TYPE_KEYS = [r['key'] for r in FEATURE_TYPES]
FEATURE_TYPE_ATOM2KEY = {r['element']: r['key'] for r in FEATURE_TYPES}

PYTABLE_FILTERS = tables.Filters(complevel=6, complib='blosc')


class PharmacophoreRow(tables.IsDescription):
    """Table description for similarity pair

    """
    frag_id = tables.StringCol(16)
    type = tables.EnumCol(FEATURE_TYPE_KEYS, FEATURE_TYPE_KEYS[0], base='uint8')
    x = tables.Float32Col()
    y = tables.Float32Col()
    z = tables.Float32Col()


[docs]class PharmacophoresDb(object): """Database for pharmacophores of fragments aka sub-pockets. Args: filename (str): File name of hdf5 file to write or read pharmacophores to/from mode (str): Can be 'r' for reading or 'w' for writing or 'a' for appending expectedrows (int): Expected number of pharmacophores. Required when hdf5 file is created, helps optimize compression **kwargs: Passed to tables.open_file Pharmacophore points of a fragment can be retrieved using:: points = db['frag_id1'] `points` is a list of points, each point is a tuple with following columns feature type key, x, y and z coordinate. The feature type key is defined in FEATURE_TYPES. Attributes: h5file (tables.File): Object representing an open hdf5 file points (PharmacophorePointsTable): HDF5 table that contains pharmacophore points """ def __init__(self, filename, mode='r', expectedrows=0, **kwargs): self.h5file = tables.open_file(filename, mode, filters=PYTABLE_FILTERS, **kwargs) self.points = PharmacophorePointsTable(self.h5file, expectedrows)
[docs] def close(self): """Closes the hdf5file Instead of calling close() explicitly, use context manager:: with PharmacophoresDb('data/pharmacophores.h5') as db: points = db['frag_id1'] """ self.h5file.close()
def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.close()
[docs] def add_dir(self, startdir): """Find \*_pphore.sd.gz \*_pphores.txt file pairs recursively in start directory and add them. Args: startdir (str): Path to a start directory """ self.points.add_dir(startdir)
def __getitem__(self, item): return self.points[item]
[docs] def write_phar(self, outfile, frag_id=None): """Write pharmacophore of frag_id as phar format to outfile Args: outfile (file): File object to write to frag_id (str): Fragment identifier, if None all pharmacophores are written """ if frag_id: outfile.write(as_phar(frag_id, self[frag_id])) else: for frag_id, points in self.points: outfile.write(as_phar(frag_id, points))
[docs] def read_phar(self, infile): """Read phar formatted file and add pharmacophore to self Args: infile: File object of phar formatted file """ self.points.read_phar(infile)
def __iter__(self): return iter(self.points) def __len__(self): """Returns number of points Not the number of pharmacophores """ return len(self.points)
[docs] def append(self, other): """Append pharmacophores in other db to self Args: other (PharmacophoresDb): The other pharmacophores database """ self.points.append(other.points)
[docs]def read_pphore_gzipped_sdfile(sdfile): """Read a gzipped sdfile which contains pharmacophore points as atoms Args: sdfile (string): Path to filename Returns: list: List of Pharmacophore points """ with gzip.open(sdfile) as gzfile: return read_pphore_sdfile(gzfile)
[docs]def read_pphore_sdfile(sdfile): """Read a sdfile which contains pharmacophore points as atoms Args: sdfile (file): File object with sdfile contents Returns: list: List of pharmacophore points """ mols = list(ForwardSDMolSupplier(sdfile)) mol = mols[0] conf = mol.GetConformer(0) points = [] for atom in mol.GetAtoms(): pos = conf.GetAtomPosition(atom.GetIdx()) point = ( FEATURE_TYPE_ATOM2KEY[atom.GetSymbol()], float(pos.x), float(pos.y), float(pos.z), ) points.append(point) return points
[docs]def read_fragtxtfile(fragtxtfile): """Read a fragment text file Args: fragtxtfile: Filename of fragment text file Returns: dict: Dictionary where key is fragment identifier and value is a list of pharmacophore point indexes. """ with open(fragtxtfile) as f: return read_fragtxtfile_as_file(f)
[docs]def read_fragtxtfile_as_file(fileobject): """Read a fragment text file object which contains the pharmacophore point indexes for each fragment identifier. File format is a fragment on each line, the line is space separated with fragment_identifier followed by the pharmacophore point indexes. Args: fileobject (file): File object to read Returns: dict: Dictionary where key is fragment identifier and value is a list of pharmacophore point indexes. """ frags = {} for line in fileobject: point_ids = line.strip().split(' ') frag_id = point_ids.pop(0) point_ids = [int(r) - 1 for r in point_ids] frags[frag_id] = point_ids return frags
[docs]def as_phar(frag_id, points): """Return pharmacophore in \*.phar format. See `align-it <http://silicos-it.be.s3-website-eu-west-1.amazonaws.com/software/align-it/1.0.4/align-it.html#format>`_ for format description. Args: frag_id (str): Fragment identifier points (list): List of points where each point is (key,x,y,z) Returns: str: Pharmacophore is \*.phar format """ lines = [frag_id] for point in points: # Kripo only has code and x/y/z position, # it has no alpha and no normal position # so use zeros line = '{0} {1:.4f} {2:.4f} {3:.4f} 0 0 0 0 0'.format(*point) lines.append(line) lines.append('$$$$') return '\n'.join(lines) + '\n'
[docs]class PharmacophorePointsTable(AbstractSimpleTable): """Wrapper around pytables table to store pharmacohpore points Args: h5file (tables.File): Pytables hdf5 file object which contains the pharmacophores table expectedrows (int): Expected number of pharmacophores. Required when hdf5 file is created, helps optimize compression Pharmacophore points of a fragment can be retrieved using:: points = table['frag_id1'] `points` is a list of points, each point is a tuple with following columns feature type key, x, y and z coordinate. The feature type key is defined in FEATURE_TYPES. Number of pharmacophore points can be requested using:: nr_points = len(table) To check whether fragment identifier is contained use:: 'frag_id1' in table """ table_name = 'pharmacophores' def __init__(self, h5file, expectedrows=0): if self.table_name in h5file.root: table = h5file.root.__getattr__(self.table_name) else: table = h5file.create_table('/', self.table_name, PharmacophoreRow, 'Pharmacophore points of Kripo sub-pockets', expectedrows=expectedrows) table.cols.frag_id.create_index(filters=PYTABLE_FILTERS) super(PharmacophorePointsTable, self).__init__(table)
[docs] def add_dir(self, startdir): """Find \*_pphore.sd.gz \*_pphores.txt file pairs recursively in start directory and add them. Args: startdir (str): Path to a start directory """ for root, _dirs, files in walk(startdir): sdfiles = [path.join(root, file) for file in files if file.endswith('_pphore.sd.gz')] for sdfile in sdfiles: fragtxtfile = sdfile.replace('_pphore.sd.gz', '_pphores.txt') self.add_pocket(sdfile, fragtxtfile)
def add_pocket(self, sdfile, fragtxtfile): points = read_pphore_gzipped_sdfile(sdfile) for frag_id, point_ids in read_fragtxtfile(fragtxtfile): self.add_fragment(frag_id, point_ids, points) self.table.flush() def add_fragment(self, frag_id, points_ids, points): if frag_id in self: raise ValueError("Duplicate key '{0}' found".format(frag_id)) row = self.table.row types = self.table.get_enum('type') for i in points_ids: row['frag_id'] = frag_id point = points[i] row['type'] = types[point[0]] row['x'] = point[1] row['y'] = point[2] row['z'] = point[3] row.append()
[docs] def read_phar(self, infile): """Read phar formatted file and add pharmacophore to self Args: infile: File object of phar formatted file """ record_sep = '$$$$' points = [] frag_id = None for line in infile: line = line.strip() if line == record_sep and points and frag_id: for point in points: self.add_point(frag_id, point) points = [] frag_id = None else: point = line.split(' ') if len(point) != 9: frag_id = line else: points.append(point) self.table.flush()
def add_point(self, frag_id, point): row = self.table.row types = self.table.get_enum('type') row['frag_id'] = frag_id row['type'] = types[point[0]] row['x'] = point[1] row['y'] = point[2] row['z'] = point[3] row.append() def __contains__(self, item): query = 'frag_id == z' binds = {'z': item} nr_hits = len(self.table.get_where_list(query, binds)) return nr_hits > 0 def __getitem__(self, key): points = [] types = self.table.get_enum('type') query = 'frag_id == z' binds = {'z': key} for row in self.table.where(query, binds): points.append(( types(row['type']), row['x'], row['y'], row['z'], )) if len(points) == 0: raise KeyError(key) return points def __iter__(self): types = self.table.get_enum('type') for frag_id, raw_points in groupby(self.table, lambda r: r['frag_id']): points = [( types(row['type']), row['x'], row['y'], row['z'], ) for row in raw_points] yield frag_id.decode(), points