Source code for CDTK.gmx2cdtk

#!/usr/bin/env python
#*  **************************************************************************
#*
#*  CDTK, Chemical Dynamics Toolkit
#*  A modular system for chemical dynamics applications and more
#*
#*  Copyright (C) 2011, 2012, 2013, 2014, 2015, 2016
#*  Oriol Vendrell, DESY, <oriol.vendrell@desy.de>
#*
#*  Copyright (C) 2017, 2018, 2019
#*  Ralph Welsch, DESY, <ralph.welsch@desy.de>
#*
#*  Copyright (C) 2020, 2021, 2022, 2023
#*  Ludger Inhester, DESY, ludger.inhester@cfel.de
#*
#*  This file is part of CDTK.
#*
#*  CDTK is free software: you can redistribute it and/or modify
#*  it under the terms of the GNU General Public License as published by
#*  the Free Software Foundation, either version 3 of the License, or
#*  (at your option) any later version.
#*
#*  This program is distributed in the hope that it will be useful,
#*  but WITHOUT ANY WARRANTY; without even the implied warranty of
#*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#*  GNU General Public License for more details.
#*
#*  You should have received a copy of the GNU General Public License
#*  along with this program.  If not, see <http://www.gnu.org/licenses/>.
#*
#*  **************************************************************************
"""
This module has several functions to transform coordinates and velocities obtained
from Gromacs calculation into suitable input files for CDTK. Use ``gmx2cdtk -h`` 
to find out how to use this program.


Basic units in GMX, please look at the following web.
https://manual.gromacs.org/documentation/2019/reference-manual/definitions.html#table-basicunits
  
1 Picoseconds = 41341.374575751 Atomic Unit Of Time
1 nm = 18.8973 Bohr
1 nm/ps = 18.8973/41341.374575751 Bohr/a.u. = 0.00045710381413114194
1 nm/ps = 4.571038E-04 Bohr/a.u.
"""

import subprocess
from optparse import OptionParser

import numpy as np
import CDTK.Tools.Conversion as cv


nm2bohr = cv.an2au * 10  # 18.8973
nm_ps2bohr_au = cv.an2au * 10 / (cv.fs2au * 1000)  # 4.571038e-04


[docs] def get_parser(): parser = OptionParser() parser = OptionParser(usage="usage: %prog -g filename.gro [further options]") parser.add_option( "-g", "--GRO_file", dest="gmxFile", type="str", default=None, help="GMX .gro input file.", ) parser.add_option( "-t", "--trr_file", dest="trrFile", type="str", default=None, help="GMX .trr input file (optional).", ) parser.add_option( "-n", "--cen_mol", dest="centeredMolecule", type="int", default=13, help="Please enter the index of the molecule that you want to center. It is an integer number and not exceeding the number of water molecules in the box.", ) parser.add_option( "-s", "--skip", dest="skip", type="float", default=None, help="Skip molecules (residues) with center of mass distance from the center large than this value. Value is given in atomic units", ) parser.add_option( "--centerMethod", type="choice", dest="centerMethod", default="first", choices=["first", "com"], help="How to center: according to first atom of residue (first) or center of mass of residue (com)", ) parser.add_option( "-o", type="str", dest="gmxOut", default=None, help="GMX .gro output file (optional)", ) parser.add_option( "-r", "--output_trr", type="str", dest="trrOut", default=None, help="GMX .trr output file (optional)", ) parser.add_option( "--noreorder", action='store_false', dest="doReorder", default=True, help="do not reorder the molecules according to distance (optional, default: do sort the molecules)", ) return parser
oxygenNames = ['O', 'OW', 'OD', 'O_', 'OS'] hydrogenNames = ['H', 'HW', 'HW1', 'HW2', 'HD', 'HC'] carbonNames = ['C', 'CA', 'CR', 'CW', 'C_', 'CS']
[docs] def atomName2Element(atomName): """ converts atomName into element name Parameters ---------- atomName : string Returns ------- element name: string """ atomName = atomName.strip() atomName = ''.join(c for c in atomName if (not c.isnumeric() and c != ' ')) if atomName in oxygenNames: return "O" elif atomName in hydrogenNames: return "H" elif atomName in carbonNames: return "C" elif atomName in cv.periodicTable.keys(): return atomName else: raise ValueError("AtomName {:s} not recognized".format(atomName))
[docs] def atomName2atomNum(atomName): """ converts atomName into element number Parameters ---------- atomName : string Returns ------- element number: index """ atomName = atomName.strip() atomName = ''.join(c for c in atomName if (not c.isnumeric() and c != ' ')) if atomName in oxygenNames: return cv.periodicTable['O']['atomic_number'] elif atomName in hydrogenNames: return cv.periodicTable['H']['atomic_number'] elif atomName in carbonNames: return cv.periodicTable['C']['atomic_number'] elif atomName in cv.periodicTable.keys(): return cv.periodicTable[atomName]['atomic_number'] else: raise ValueError("AtomName {:s} not recognized".format(atomName))
[docs] def atomName2atomMass(atomName): """ converts atomName into element mass Parameters ---------- atomName : string Returns ------- element mass in atomic units: float """ atomName = atomName.strip() atomName = ''.join(c for c in atomName if (not c.isnumeric() and c != ' ')) if atomName in oxygenNames: return cv.periodicTable['O']['atomic_mass'] elif atomName in hydrogenNames: return cv.periodicTable['H']['atomic_mass'] elif atomName in carbonNames: return cv.periodicTable['C']['atomic_mass'] elif atomName in cv.periodicTable.keys(): return cv.periodicTable[atomName]['atomic_mass'] else: raise ValueError("AtomName {:s} not recognized".format(atomName))
[docs] def readtrr(xtcfilename, gmxfilename): try: import MDAnalysis as mda except ModuleNotFoundError: raise ModuleNotFoundError("Please install module MDAnalysis for reading or writing gromacs trr files") u = mda.Universe(gmxfilename, xtcfilename) t = u.trajectory[-1] residueIndex = u.atoms.resids residueName = u.atoms.resnames atomName = u.atoms.names atomName = np.array([a.replace('_','') for a in atomName]) topol = np.array([str(ri) + rn.strip() + "_" + aN.strip() for ri, rn, aN in zip(residueIndex, residueName, atomName)]) # topology with cdtk format atomnums = np.array(list(map(atomName2atomNum, atomName))) atomlist = np.array(list(map(atomName2Element, atomName))) atommass = np.array(list(map(atomName2atomMass, atomName))) v_box = u.dimensions[0:3] / 10. * nm2bohr molec = { "geom": t.positions / 10. * nm2bohr, "vel": t.velocities / 10. * nm_ps2bohr_au, "topol": topol, "atomlist": atomlist, "atomnums": atomnums, "atommass": atommass, "boxsize": v_box, "residueIndex": residueIndex, "residueName": residueName, "atomName": atomName, } return molec
[docs] def writetrr(trrFilename, molec): """write a gromacs .trr file based on the data from molec uses the MDAnalysis python package Args: trrFilename (string): filename molec (dictionary): data """ try: import MDAnalysis as mda except ModuleNotFoundError: raise ModuleNotFoundError("Please install module MDAnalysis for reading or writing gromacs trr files") n_atoms = molec["geom"].shape[0] resid, atResIdx = np.unique(molec['residueIndex'], return_index=True) resnames = np.array([ a.strip()+b for a,b in zip(molec['residueName'][atResIdx], resid.astype(str))]) n_residues = resid.shape[0] atom_resindex = molec['residueIndex'] u = mda.Universe.empty(n_atoms, n_residues=n_residues, n_segments=1, atom_resindex=atom_resindex-1, velocities=True ) u.add_TopologyAttr('name', [a.strip() for a in molec["atomName"]]) u.add_TopologyAttr('types', [a.strip() for a in molec["atomlist"]]) u.add_TopologyAttr('masses', molec['atommass'] ) u.add_TopologyAttr('segid', ['SYSTEM']) u.add_TopologyAttr('resname', resnames) u.add_TopologyAttr('resid', resid) u.load_new(molec['geom'] * 10 / nm2bohr, order='fac') u.trajectory[0].velocities = molec['vel'] * 10. / nm_ps2bohr_au dimensions = np.zeros(6) dimensions[0:3] = molec["boxsize"] * 10. / nm2bohr dimensions[3:6] = np.array([90.,90.,90.]) u.trajectory[0].dimensions = dimensions u.atoms.write(trrFilename)
[docs] def readgmx(filename, check_water=False): """ Read gromacs file and return the topolgy, element types, coordinates, velocities of all atoms and the box size. Parameters ---------- filename : string Name of gromacs file. Returns ------- molec : dict of ndarray. Dictionary containing the following arrays: - geom : (n_atoms, 3) ndarray of float. Coordinates of all atoms. - vel : (n_atoms, 3) ndarray of float. Velocities of all atoms. - residueIndex: (n_atoms) ndarray of ints - residueName: (n_atoms) ndarray of str - topol : (n_atoms) ndarray of str. String identifiers for the topology of the system (atoms in molecules). - atomlist : (n_atoms) ndarray of str. String identifiers of all atoms types (e.g., "O", "H"). - atomnums : (n_atoms) ndarray of int. Intigers with atomic numners of each element in atomloist. - atommass : (n_atoms) ndarray of float. Atomic masses of each element in atomlist. - boxsize : (3) ndarray of float. Box size (len_x, len_y, len_z). - residueName : (n_atoms) ndarray of str Name of residue for each atom - residueIndex : (n_atoms) ndarray of int Index of residue for each atom - atomName : (n_atoms) ndarray of str Name of atoms (e.g. "OW", "HW1") """ # the .gro file format has fixed column positions as delimiters ("%5d%-5s%5s%5d%8.3f%8.3f%8.3f%8.4f%8.4f%8.4f\n") delimiter = [5, 5, 5, 5, 8, 8, 8, 8, 8, 8] residueIndex = np.genfromtxt(filename, skip_header=2, skip_footer=1, usecols=( 0), dtype=int, delimiter=delimiter) residueName = np.genfromtxt(filename, skip_header=2, skip_footer=1, usecols=( 1), dtype='<U5', delimiter=delimiter) atomName = np.genfromtxt(filename, skip_header=2, skip_footer=1, usecols=( 2), dtype='<U5', delimiter=delimiter) atomName = np.array([a.replace('_','') for a in atomName]) topol = np.array([str(ri) + rn.strip() + "_" + aN.strip() for ri, rn, aN in zip(residueIndex, residueName, atomName)]) # topology with cdtk format # Get position and velocities of all atoms posvel = np.genfromtxt( filename, skip_header=2, skip_footer=1, usecols=(range(4, 10)), dtype=float, delimiter=delimiter ) xyz = posvel[:, :3] * nm2bohr vxyz = posvel[:, 3:] * nm_ps2bohr_au # Get box size from last line of file. last_line = subprocess.check_output(["tail", "-1", filename]) v_box = np.array(last_line.split(), dtype=float) * nm2bohr # Get atomic numbers, atomic masses, and element names atomnums = np.array(list(map(atomName2atomNum, atomName))) atommass = np.array(list(map(atomName2atomMass, atomName))) atomlist = np.array(list(map(atomName2Element, atomName))) molec = { "geom": xyz, "vel": vxyz, "topol": topol, "atomlist": atomlist, "atomnums": atomnums, "atommass": atommass, "boxsize": v_box, "residueIndex": residueIndex, "residueName": residueName, "atomName": np.array([a.replace('_','') for a in atomName]), } return molec
[docs] def writegmx(filename, molec, reIndexResidues=True): """ Writes gromacs .gro file from molec stricture Parameters ---------- filename : string Name of gromacs file. molec : dict of ndarray. Dictionary containing the following arrays: - geom : (n_atoms, 3) ndarray of float. Coordinates of all atoms. - vel : (n_atoms, 3) ndarray of float. Velocities of all atoms. - topol : (n_atoms) ndarray of str. String identifiers for the topology of the system (atoms in molecules). - atomlist : (n_atoms) ndarray of str. String identifiers of all atoms types. - atomnums : (n_atoms) ndarray of int. Intigers with atomic numners of each element in atomloist. - atommass : (n_atoms) ndarray of float. Atomic masses of each element in atomlist. - boxsize : (3) ndarray of float. Box size (len_x, len_y, len_z). - residueName : (n_atoms) ndarray of str Name of residue for each atom - residueIndex : (n_atoms) ndarray of int Index of residue for each atom - atomName : (n_atoms) ndarray of str Name of atoms (e.g. "OW", "HW1") """ geom = molec['geom'] vel = molec['vel'] topol = molec['topol'] boxsize = molec['boxsize'] resN = 0 prevResIdx = 0 with open(filename, 'w') as f: f.write(" generated by gmx2cdtk\n") f.write(" {:d}\n".format(geom.shape[0])) for i in range(geom.shape[0]): residx = molec['residueIndex'][i] resname = molec['residueName'][i] atomname = molec['atomName'][i] if prevResIdx == 0 or prevResIdx != residx: resN = resN + 1 prevResIdx = residx if reIndexResidues: printResIdx = resN else: printResIdx = residx if (np.any(np.isnan(vel))): f.write("{:5d}{:5s}{:5s}{:5d}{:8.3f}{:8.3f}{:8.3f}\n".format( printResIdx, resname, atomname, i+1, geom[i, 0] / nm2bohr, geom[i, 1] / nm2bohr, geom[i, 2] / nm2bohr) ) else: f.write("{:5d}{:5s}{:5s}{:5d}{:8.3f}{:8.3f}{:8.3f}{:8.4f}{:8.4f}{:8.4f}\n".format( printResIdx, resname, atomname, i+1, geom[i, 0] / nm2bohr, geom[i, 1] / nm2bohr, geom[i, 2] / nm2bohr, vel[i, 0] / nm_ps2bohr_au, vel[i, 1] / nm_ps2bohr_au, vel[i, 2] / nm_ps2bohr_au) ) f.write("{:f} {:f} {:f}\n".format( boxsize[0]/nm2bohr, boxsize[1]/nm2bohr, boxsize[2]/nm2bohr)) f.close()
[docs] def get_coord_center(n_center, molec, centerMethod='first'): """ This will return the coordinate center of residue index n_center Parameters ---------- Ncenter : int Index of molecule (residue index) that you want to be the center. molec : dict of ndarrays. centerMethod: either "com" or "first" Returns ------- ndarray of float. X, Y, Z coordinates of center. """ atomIdx = np.flatnonzero(molec['residueIndex'] == n_center) if centerMethod == "first": center = molec['geom'][atomIdx[0]] elif centerMethod == "com": g = molec['geom'][atomIdx] m = molec['atommass'][atomIdx] center = np.sum(g * m[:, np.newaxis], axis=0) / np.sum(m) else: raise ValueError("unknown centerMethod={:s} ".format(centerMethod)) print("Centering {:s} {:d} at position {:f} {:f} {:f}\n". format(molec['residueName'][n_center], molec['residueIndex'][n_center], center[0], center[1], center[2])) return center
[docs] def shift_box(shift, xyz, v_box): """ Shift all molecules by shift and move atoms that fall outside the box according to periodic boundary conditions Parameters ---------- shift : (3) ndarray of float. X, Y, Z coordinates of new center (oxygen atom). xyz : (n_atoms, 3) ndarray of float. Original coordinates of all atoms. Returns ------- (n_atoms, 3) ndarray of float. New coordinates centered arund center_new in the box. """ new_xyz = xyz.copy() - shift[np.newaxis, :] new_xyz = new_xyz % v_box new_xyz[new_xyz[:, 0] < -v_box[0]/2, 0] += v_box[0] new_xyz[new_xyz[:, 0] > v_box[0]/2, 0] -= v_box[0] new_xyz[new_xyz[:, 1] < -v_box[1]/2, 1] += v_box[1] new_xyz[new_xyz[:, 1] > v_box[1]/2, 1] -= v_box[1] new_xyz[new_xyz[:, 2] < -v_box[2]/2, 2] += v_box[2] new_xyz[new_xyz[:, 2] > v_box[2]/2, 2] -= v_box[2] return new_xyz
[docs] def makeResiduesWhole(molec): """ Rearranges the atoms by multiples of boxsize such that the residues are closest together Parameters ---------- molec : dict of ndarray. Returns ------- xyz : (n_atoms, 3) ndarray of float. new coordinates. """ box = molec['boxsize'] residues = np.unique(molec['residueIndex']) newGeom = molec['geom'].copy() for r in residues: while True: # iterate until residue is whole residueIsWhole = True atomIdx = np.flatnonzero(molec['residueIndex'] == r) g = newGeom[atomIdx] m = molec['atommass'][atomIdx] resCenter = np.sum(g * m[:, np.newaxis], axis=0) / np.sum(m) for a in atomIdx: for i in range(3): # if an atom is shifted, reiterate the whole residue, because center of mass needs to be recalculated if(newGeom[a, i] - resCenter[i] > box[i]/2): newGeom[a, i] -= box[i] residueIsWhole = False break if(newGeom[a, i] - resCenter[i] < -box[i]/2): newGeom[a, i] += box[i] residueIsWhole = False break if residueIsWhole: # if nothing has been shifted, we can procceed with the next residue break return newGeom
[docs] def sort_molec(molec, skipDistance=False): """ Sorts the molecules (residues) according to their center-of-mass positions (distance to the origin) Parameters ---------- molec : dict of ndarray. Dictionary built from reading gromax file (check readgmx documentation). Returns ------- molec : dict of ndarray Updated dictionary with sorted molecules. """ # calculate the center of mass distance for all residues residues = np.unique(molec['residueIndex']) resCOM = np.zeros((residues.max() + 1, 3)) # residue index starts with 1 resCOMdist = np.zeros((residues.max() + 1)) for r in residues: atomIdx = np.flatnonzero(molec['residueIndex'] == r) g = molec['geom'][atomIdx] m = molec['atommass'][atomIdx] resCOM[r] = np.sum(g * m[:, np.newaxis], axis=0) / np.sum(m) resCOMdist = np.linalg.norm(resCOM, axis=1) # sort the residues according to distance resIndSorted = np.argsort(resCOMdist) # form the atom index that follows the order of the residues ind_sort = [] i = 0 for r in resIndSorted: atomIdx = np.flatnonzero(molec['residueIndex'] == r) # residue 0 should not appear (residue index starts with 1) if atomIdx.shape[0] == 0: continue if skipDistance is not None: if resCOMdist[r] > skipDistance: continue ind_sort = ind_sort + atomIdx[:].tolist() i += len(atomIdx) # reorder all quantities according to reordered atom index molec['geom'] = molec['geom'][ind_sort] molec['vel'] = molec['vel'][ind_sort] molec['topol'] = molec['topol'][ind_sort] molec['atomlist'] = molec['atomlist'][ind_sort] molec['atomnums'] = molec['atomnums'][ind_sort] molec['atommass'] = molec['atommass'][ind_sort] molec['residueIndex'] = molec['residueIndex'][ind_sort] molec['residueName'] = molec['residueName'][ind_sort] molec['atomName'] = molec['atomName'][ind_sort] return molec
[docs] def start(): parser = get_parser() args, _ = parser.parse_args() if args.gmxFile is None: parser.error("You need to provide a .gro file!") if args.trrFile is not None: molec_data = readtrr(args.trrFile, args.gmxFile) else: molec_data = readgmx(args.gmxFile) # Get center of box and shift coordinates. boxCenter = get_coord_center( args.centeredMolecule, molec_data, centerMethod=args.centerMethod) shifted_geom = shift_box( boxCenter, molec_data["geom"], molec_data["boxsize"]) molec_data['geom'] = shifted_geom # make residues whole geom2 = makeResiduesWhole(molec_data) molec_data['geom'] = geom2 # sort molecules according to distance if args.doReorder: molec_data = sort_molec(molec_data, skipDistance=args.skip) ekin = 0.5 * np.sum(molec_data["vel"][:,:]**2 * cv.am2au * molec_data["atommass"][:,np.newaxis]) print(f"kinetic Energy: {ekin} a.u.") #print(f"removing overall momentum") #totalMomentum = np.sum(molec_data["vel"][:,:] * cv.am2au * molec_data["atommass"][:,np.newaxis], axis=0) #comVel = totalMomentum / (cv.am2au * molec_data["atommass"].sum()) #molec_data["vel"][:,:] = molec_data["vel"][:,:] - comVel[np.newaxis,:] #ekin = 0.5 * np.sum(molec_data["vel"][:,:]**2 * cv.am2au * molec_data["atommass"][:,np.newaxis]) #print(f"kinetic Energy: {ekin} a.u.") # create cdtk files print_geom_files(molec_data) if args.gmxOut is not None: writegmx(args.gmxOut, molec_data) if args.trrOut is not None: writetrr(args.trrOut, molec_data)
if __name__ == "__main__": start()