Source code for kripodb.script.fragments

import argparse
import logging
import shelve

from rdkit.Chem.rdmolfiles import SDMolSupplier, MolToMolBlock

from ..db import FragmentsDb
from ..hdf5 import SimilarityMatrix
from ..pdb import PdbReport


[docs]def make_fragments_parser(subparsers): """Creates a parser for fragments sub commands Args: subparsers (argparse.ArgumentParser): Parser to which to add sub commands to """ sc = subparsers.add_parser('fragments', help='Fragments').add_subparsers() shelve2fragmentsdb_sc(sc) sdf2fragmentsdb_sc(sc) pdb2fragmentsdb_sc(sc) fragmentsdb_filter_sc(sc) merge_fragmentsdb_sc(sc) export_sdf_sc(sc)
def shelve2fragmentsdb_sc(subparsers): sc = subparsers.add_parser('shelve', help='Add fragments from shelve to sqlite') sc.add_argument('--skipdups', action='store_true', help='Skip duplicates, instead of dieing one first duplicate') sc.add_argument('shelvefn', type=str) sc.add_argument('fragmentsdb', default='fragments.db', help='Name of fragments db file (default: %(default)s)') sc.set_defaults(func=shelve2fragmentsdb_run) def shelve2fragmentsdb_run(shelvefn, fragmentsdb, skipdups): myshelve = shelve.open(shelvefn, 'r') frags = FragmentsDb(fragmentsdb) frags.add_fragments_from_shelve(myshelve, skipdups) def sdf2fragmentsdb_sc(subparsers): sc = subparsers.add_parser('sdf', help='Add fragments sdf to sqlite') sc.add_argument('sdffns', help='SDF filename', nargs='+') sc.add_argument('fragmentsdb', default='fragments.db', help='Name of fragments db file (default: %(default)s)') sc.set_defaults(func=sdf2fragmentsdb_run) def sdf2fragmentsdb_run(sdffns, fragmentsdb): frags = FragmentsDb(fragmentsdb) for sdffn in sdffns: logging.warning('Parsing {}'.format(sdffn)) suppl = SDMolSupplier(sdffn) frags.add_molecules(suppl) def pdb2fragmentsdb_sc(subparsers): sc = subparsers.add_parser('pdb', help='Add pdb metadata from RCSB PDB website to fragment sqlite db') sc.add_argument('fragmentsdb', default='fragments.db', help='Name of fragments db file (default: %(default)s)') sc.set_defaults(func=pdb2fragmentsdb_run) def pdb2fragmentsdb_run(fragmentsdb): pdb_report = PdbReport() pdbs = pdb_report.fetch() frags = FragmentsDb(fragmentsdb) frags.add_pdbs(pdbs) def fragmentsdb_filter_sc(subparsers): sc = subparsers.add_parser('filter', help='Filter fragments database') sc.add_argument('input', type=str, help='Name of fragments db input file') sc.add_argument('output', type=str, help='Name of fragments db output file, will overwrite file if it exists') sc.add_argument('--pdbs', type=argparse.FileType('r'), help='Keep fragments from any of the supplied pdb codes, one pdb code per line, use - for stdin') sc.add_argument('--matrix', type=str, help='Keep fragments which are in similarity matrix file') sc.set_defaults(func=fragmentsdb_filter) def fragmentsdb_filter(input, output, pdbs, matrix): if matrix: fragmentsdb_filter_matrix(input, output, matrix) else: fragmentsdb_filter_pdbs(input, output, pdbs) def fragmentsdb_filter_matrix(input, output, matrix): output_db = FragmentsDb(output) # mount input into output db print('Reading: ' + input) output_db.cursor.execute('ATTACH DATABASE ? AS orig', (input,)) # create temp table with pdbs output_db.cursor.execute('CREATE TEMPORARY TABLE filter (frag_id TEXT PRIMARY KEY)') sql = 'INSERT OR REPLACE INTO filter (frag_id) VALUES (?)' print('Matrix labels') simmatrix = SimilarityMatrix(matrix) for frag_id in simmatrix.labels.label2ids().keys(): output_db.cursor.execute(sql, (frag_id,)) simmatrix.close() # insert select output_db.cursor.execute('INSERT INTO fragments SELECT * FROM orig.fragments JOIN filter USING (frag_id)') output_db.cursor.execute( 'INSERT INTO pdbs SELECT * FROM orig.pdbs WHERE pdb_code IN (SELECT pdb_code FROM fragments)') output_db.cursor.execute( 'INSERT INTO molecules SELECT * FROM orig.molecules WHERE frag_ID IN (SELECT frag_id FROM fragments)') # drop temp table with pdbs output_db.cursor.execute('DROP TABLE filter') # vacuum output_db.cursor.execute('VACUUM') print('Wrote: ' + output) def fragmentsdb_filter_pdbs(input, output, pdbs): output_db = FragmentsDb(output) # mount input into output db print('Reading: ' + input) output_db.cursor.execute('ATTACH DATABASE ? AS orig', (input,)) # create temp table with pdbs output_db.cursor.execute('CREATE TEMPORARY TABLE filter (pdb_code TEXT PRIMARY KEY)') sql = 'INSERT OR REPLACE INTO filter (pdb_code) VALUES (?)' for pdb in pdbs: output_db.cursor.execute(sql, (pdb.rstrip().lower(),)) # insert select output_db.cursor.execute('INSERT INTO pdbs SELECT * FROM orig.pdbs JOIN filter USING (pdb_code)') output_db.cursor.execute('INSERT INTO fragments SELECT * FROM orig.fragments JOIN filter USING (pdb_code)') output_db.cursor.execute('INSERT INTO molecules SELECT * FROM orig.molecules WHERE frag_ID IN (SELECT frag_id FROM fragments)') # drop temp table with pdbs output_db.cursor.execute('DROP TABLE filter') # vacuum output_db.cursor.execute('VACUUM') print('Wrote: ' + output) def merge_fragmentsdb_sc(subparsers): sc = subparsers.add_parser('merge', help='Combine fragments databases into a single new one') sc.add_argument('ins', nargs='+', help='Input fragments database files') sc.add_argument('out', help='Output fragments database file') sc.set_defaults(func=merge_fragmentsdb) def merge_fragmentsdb(ins, out): with FragmentsDb(out) as output_db: c = output_db.cursor c.execute('SELECT name FROM sqlite_master WHERE type="table"') tables = [table[0] for table in c.fetchall()] for input_fn in ins: c.execute('ATTACH DATABASE ? AS other', (input_fn,)) for table in tables: c.execute('INSERT INTO {0} SELECT * FROM other.{0}'.format(table)) c.execute('DETACH DATABASE other') def export_sdf_sc(subparsers): sc = subparsers.add_parser('export_sd', help='Export molblocks of all fragments as SDF file') sc.add_argument('fragmentsdb', help='Input fragments database file') sc.add_argument('sdfile', type=argparse.FileType('w'), help='Output SDF file') sc.set_defaults(func=export_sdf) def export_sdf(fragmentsdb, sdfile): with FragmentsDb(fragmentsdb) as db: for fragment in db: molblock = MolToMolBlock(fragment['mol']) sdfile.write(molblock) sdfile.write('$$$$\n')