Source code for CDTK.gmx2cdtk

#!/usr/bin/env python
#*  **************************************************************************
#*
#*  CDTK, Chemical Dynamics Toolkit
#*  A modular system for chemical dynamics applications and more
#*
#*  Copyright (C) 2011, 2012, 2013, 2014, 2015, 2016
#*  Oriol Vendrell, DESY, <oriol.vendrell@desy.de>
#*
#*  Copyright (C) 2017, 2018, 2019
#*  Ralph Welsch, DESY, <ralph.welsch@desy.de>
#*
#*  Copyright (C) 2020, 2021, 2022, 2023
#*  Ludger Inhester, DESY, ludger.inhester@cfel.de
#*
#*  This file is part of CDTK.
#*
#*  CDTK is free software: you can redistribute it and/or modify
#*  it under the terms of the GNU General Public License as published by
#*  the Free Software Foundation, either version 3 of the License, or
#*  (at your option) any later version.
#*
#*  This program is distributed in the hope that it will be useful,
#*  but WITHOUT ANY WARRANTY; without even the implied warranty of
#*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#*  GNU General Public License for more details.
#*
#*  You should have received a copy of the GNU General Public License
#*  along with this program.  If not, see <http://www.gnu.org/licenses/>.
#*
#*  **************************************************************************
"""
This module has several functions to transform coordinates and velocities obtained
from Gromacs calculation into suitable input files for CDTK. Use ``gmx2cdtk -h`` 
to find out how to use this program.


Basic units in GMX, please look at the following web.
https://manual.gromacs.org/documentation/2019/reference-manual/definitions.html#table-basicunits
  
1 Picoseconds = 41341.374575751 Atomic Unit Of Time
1 nm = 18.8973 Bohr
1 nm/ps = 18.8973/41341.374575751 Bohr/a.u. = 0.00045710381413114194
1 nm/ps = 4.571038E-04 Bohr/a.u.
"""

import subprocess
from optparse import OptionParser

import numpy as np
import CDTK.Tools.Conversion as cv


nm2bohr = cv.an2au * 10  # 18.8973
nm_ps2bohr_au = cv.an2au * 10 / (cv.fs2au * 1000)  # 4.571038e-04



[docs]
def get_parser():
    parser = OptionParser()
    parser = OptionParser(usage="usage: %prog -g filename.gro [further options]")
    parser.add_option(
        "-g",
        "--GRO_file",
        dest="gmxFile",
        type="str",
        default=None,
        help="GMX .gro input file.",
    )

    parser.add_option(
        "-t",
        "--trr_file",
        dest="trrFile",
        type="str",
        default=None,
        help="GMX .trr input file (optional).",
    )

    parser.add_option(
        "-n",
        "--cen_mol",
        dest="centeredMolecule",
        type="int",
        default=13,
        help="Please enter the index of the molecule that you want to center. It is an integer number and not exceeding the number of water molecules in the box.",
    )

    parser.add_option(
        "-s",
        "--skip",
        dest="skip",
        type="float",
        default=None,
        help="Skip molecules (residues) with center of mass distance from the center large than this value. Value is given in atomic units",
    )

    parser.add_option(
        "--centerMethod",
        type="choice",
        dest="centerMethod",
        default="first",
        choices=["first", "com"],
        help="How to center: according to first atom of residue (first) or center of mass of residue (com)",
    )

    parser.add_option(
        "-o",
        type="str",
        dest="gmxOut",
        default=None,
        help="GMX .gro output file (optional)",
    )

    parser.add_option(
        "-r",
        "--output_trr",
        type="str",
        dest="trrOut",
        default=None,
        help="GMX .trr output file (optional)",
    )

    parser.add_option(
        "--noreorder",
        action='store_false',
        dest="doReorder",
        default=True,
        help="do not reorder the molecules according to distance (optional, default: do sort the molecules)",
    )

    return parser



oxygenNames = ['O', 'OW', 'OD', 'O_', 'OS']
hydrogenNames = ['H', 'HW', 'HW1', 'HW2', 'HD', 'HC']
carbonNames = ['C', 'CA', 'CR', 'CW', 'C_', 'CS']




[docs]
def atomName2Element(atomName):
    """
    converts atomName into element name
    Parameters
    ----------
    atomName : string

    Returns
    -------
    element name: string

    """
    atomName = atomName.strip()
    atomName = ''.join(c for c in atomName if (not c.isnumeric() and c != ' '))
    if atomName in oxygenNames:
        return "O"
    elif atomName in hydrogenNames:
        return "H"
    elif atomName in carbonNames:
        return "C"
    elif atomName in cv.periodicTable.keys():
        return atomName
    else:
        raise ValueError("AtomName {:s} not recognized".format(atomName))




[docs]
def atomName2atomNum(atomName):
    """
    converts atomName into element number
    Parameters
    ----------
    atomName : string

    Returns
    -------
    element number: index

    """
    atomName = atomName.strip()
    atomName = ''.join(c for c in atomName if (not c.isnumeric() and c != ' '))
    if atomName in oxygenNames:
        return cv.periodicTable['O']['atomic_number']
    elif atomName in hydrogenNames:
        return cv.periodicTable['H']['atomic_number']
    elif atomName in carbonNames:
        return cv.periodicTable['C']['atomic_number']
    elif atomName in cv.periodicTable.keys():
        return cv.periodicTable[atomName]['atomic_number']
    else:
        raise ValueError("AtomName {:s} not recognized".format(atomName))




[docs]
def atomName2atomMass(atomName):
    """
    converts atomName into element mass
    Parameters
    ----------
    atomName : string

    Returns
    -------
    element mass in atomic units: float

    """
    atomName = atomName.strip()
    atomName = ''.join(c for c in atomName if (not c.isnumeric() and c != ' '))
    if atomName in oxygenNames:
        return cv.periodicTable['O']['atomic_mass']
    elif atomName in hydrogenNames:
        return cv.periodicTable['H']['atomic_mass']
    elif atomName in carbonNames:
        return cv.periodicTable['C']['atomic_mass']
    elif atomName in cv.periodicTable.keys():
        return cv.periodicTable[atomName]['atomic_mass']
    else:
        raise ValueError("AtomName {:s} not recognized".format(atomName))



[docs]
def readtrr(xtcfilename, gmxfilename):
    try:
        import MDAnalysis as mda
    except ModuleNotFoundError:
        raise ModuleNotFoundError("Please install module MDAnalysis for reading or writing gromacs trr files")
    u = mda.Universe(gmxfilename, xtcfilename)
    t = u.trajectory[-1]
    residueIndex = u.atoms.resids
    residueName = u.atoms.resnames
    atomName = u.atoms.names
    atomName = np.array([a.replace('_','') for a in atomName])
    topol = np.array([str(ri) + rn.strip() + "_" + aN.strip()
                      for ri, rn, aN in zip(residueIndex, residueName, atomName)])  # topology with cdtk format
    atomnums = np.array(list(map(atomName2atomNum, atomName)))
    atomlist = np.array(list(map(atomName2Element, atomName)))
    atommass = np.array(list(map(atomName2atomMass, atomName)))
    v_box = u.dimensions[0:3] / 10. * nm2bohr
    molec = {
        "geom": t.positions / 10. * nm2bohr,
        "vel": t.velocities / 10. * nm_ps2bohr_au,
        "topol": topol,
        "atomlist": atomlist,
        "atomnums": atomnums,
        "atommass": atommass,
        "boxsize": v_box,
        "residueIndex": residueIndex,
        "residueName": residueName,
        "atomName": atomName,
    }
    return molec



[docs]
def writetrr(trrFilename, molec):
    """write a gromacs .trr file based on the data from molec
    uses the MDAnalysis python package

    Args:
        trrFilename (string): filename
        molec (dictionary): data
    """
    try:
        import MDAnalysis as mda
    except ModuleNotFoundError:
        raise ModuleNotFoundError("Please install module MDAnalysis for reading or writing gromacs trr files")

    n_atoms = molec["geom"].shape[0]
    resid, atResIdx = np.unique(molec['residueIndex'], return_index=True)
    resnames = np.array([ a.strip()+b for a,b in zip(molec['residueName'][atResIdx], resid.astype(str))])
    n_residues = resid.shape[0]
    atom_resindex = molec['residueIndex']
    u = mda.Universe.empty(n_atoms,
                        n_residues=n_residues,
                        n_segments=1,
                        atom_resindex=atom_resindex-1,
                        velocities=True
                        )
    u.add_TopologyAttr('name', [a.strip() for a in molec["atomName"]])
    u.add_TopologyAttr('types', [a.strip() for a in molec["atomlist"]])
    u.add_TopologyAttr('masses', molec['atommass'] )
    u.add_TopologyAttr('segid', ['SYSTEM'])
    u.add_TopologyAttr('resname', resnames)
    u.add_TopologyAttr('resid', resid)
    u.load_new(molec['geom'] * 10 / nm2bohr, order='fac')
    u.trajectory[0].velocities = molec['vel'] * 10. / nm_ps2bohr_au

    dimensions = np.zeros(6)
    dimensions[0:3] = molec["boxsize"] * 10. / nm2bohr
    dimensions[3:6] = np.array([90.,90.,90.])
    u.trajectory[0].dimensions = dimensions
    u.atoms.write(trrFilename)



[docs]
def readgmx(filename, check_water=False):
    """
    Read gromacs file and return the topolgy, element types, coordinates,
    velocities of all atoms and the box size.

    Parameters
    ----------
    filename : string
        Name of gromacs file.

    Returns
    -------
    molec : dict of ndarray.
        Dictionary containing the following arrays:
        - geom : (n_atoms, 3) ndarray of float.
            Coordinates of all atoms.
        - vel : (n_atoms, 3) ndarray of float.
            Velocities of all atoms.
        - residueIndex: (n_atoms) ndarray of ints
        - residueName: (n_atoms) ndarray of str
        - topol : (n_atoms) ndarray of str.
            String identifiers for the topology of the system (atoms in molecules).
        - atomlist : (n_atoms) ndarray of str.
            String identifiers of all atoms types (e.g., "O", "H").
        - atomnums : (n_atoms) ndarray of int.
            Intigers with atomic numners of each element in atomloist.
        - atommass : (n_atoms) ndarray of float.
            Atomic masses of each element in atomlist.
        - boxsize : (3) ndarray of float.
            Box size (len_x, len_y, len_z).
        - residueName : (n_atoms) ndarray of str
            Name of residue for each atom 
        - residueIndex : (n_atoms) ndarray of int
            Index of residue for each atom
        - atomName : (n_atoms) ndarray of str
            Name of atoms (e.g. "OW", "HW1")
   """
    # the .gro file format has fixed column positions as delimiters ("%5d%-5s%5s%5d%8.3f%8.3f%8.3f%8.4f%8.4f%8.4f\n")
    delimiter = [5, 5, 5, 5, 8, 8, 8, 8, 8, 8]

    residueIndex = np.genfromtxt(filename, skip_header=2, skip_footer=1, usecols=(
        0), dtype=int, delimiter=delimiter)
    residueName = np.genfromtxt(filename, skip_header=2, skip_footer=1, usecols=(
        1), dtype='<U5', delimiter=delimiter)
    atomName = np.genfromtxt(filename, skip_header=2, skip_footer=1, usecols=(
        2), dtype='<U5', delimiter=delimiter)
    atomName = np.array([a.replace('_','') for a in atomName])
    topol = np.array([str(ri) + rn.strip() + "_" + aN.strip()
                      for ri, rn, aN in zip(residueIndex, residueName, atomName)])  # topology with cdtk format
    # Get position and velocities of all atoms
    posvel = np.genfromtxt(
        filename, skip_header=2, skip_footer=1, usecols=(range(4, 10)), dtype=float, delimiter=delimiter
    )
    xyz = posvel[:, :3] * nm2bohr
    vxyz = posvel[:, 3:] * nm_ps2bohr_au
    # Get box size from last line of file.
    last_line = subprocess.check_output(["tail", "-1", filename])
    v_box = np.array(last_line.split(), dtype=float) * nm2bohr
    # Get atomic numbers, atomic masses, and element names
    atomnums = np.array(list(map(atomName2atomNum, atomName)))
    atommass = np.array(list(map(atomName2atomMass, atomName)))
    atomlist = np.array(list(map(atomName2Element, atomName)))
    molec = {
        "geom": xyz,
        "vel": vxyz,
        "topol": topol,
        "atomlist": atomlist,
        "atomnums": atomnums,
        "atommass": atommass,
        "boxsize": v_box,
        "residueIndex": residueIndex,
        "residueName": residueName,
        "atomName": np.array([a.replace('_','') for a in atomName]),
    }
    return molec




[docs]
def writegmx(filename, molec, reIndexResidues=True):
    """
    Writes gromacs .gro file from molec stricture

    Parameters
    ----------
    filename : string
        Name of gromacs file.

    molec : dict of ndarray.
        Dictionary containing the following arrays:
        - geom : (n_atoms, 3) ndarray of float.
            Coordinates of all atoms.
        - vel : (n_atoms, 3) ndarray of float.
            Velocities of all atoms.
        - topol : (n_atoms) ndarray of str.
            String identifiers for the topology of the system (atoms in molecules).
        - atomlist : (n_atoms) ndarray of str.
            String identifiers of all atoms types.
        - atomnums : (n_atoms) ndarray of int.
            Intigers with atomic numners of each element in atomloist.
        - atommass : (n_atoms) ndarray of float.
            Atomic masses of each element in atomlist.
        - boxsize : (3) ndarray of float.
            Box size (len_x, len_y, len_z).
        - residueName : (n_atoms) ndarray of str
            Name of residue for each atom 
        - residueIndex : (n_atoms) ndarray of int
            Index of residue for each atom
        - atomName : (n_atoms) ndarray of str
            Name of atoms (e.g. "OW", "HW1")
    """
    geom = molec['geom']
    vel = molec['vel']
    topol = molec['topol']
    boxsize = molec['boxsize']

    resN = 0
    prevResIdx = 0
    with open(filename, 'w') as f:
        f.write(" generated by gmx2cdtk\n")
        f.write(" {:d}\n".format(geom.shape[0]))
        for i in range(geom.shape[0]):
            residx = molec['residueIndex'][i]
            resname = molec['residueName'][i]
            atomname = molec['atomName'][i]

            if prevResIdx == 0 or prevResIdx != residx:
                resN = resN + 1
                prevResIdx = residx
            if reIndexResidues:
                printResIdx = resN
            else:
                printResIdx = residx

            if (np.any(np.isnan(vel))):
                f.write("{:5d}{:5s}{:5s}{:5d}{:8.3f}{:8.3f}{:8.3f}\n".format(
                    printResIdx, resname, atomname, i+1,
                    geom[i, 0] / nm2bohr,
                    geom[i, 1] / nm2bohr,
                    geom[i, 2] / nm2bohr)
                )
            else:
                f.write("{:5d}{:5s}{:5s}{:5d}{:8.3f}{:8.3f}{:8.3f}{:8.4f}{:8.4f}{:8.4f}\n".format(
                    printResIdx, resname, atomname, i+1,
                    geom[i, 0] / nm2bohr,
                    geom[i, 1] / nm2bohr,
                    geom[i, 2] / nm2bohr,
                    vel[i, 0] / nm_ps2bohr_au,
                    vel[i, 1] / nm_ps2bohr_au,
                    vel[i, 2] / nm_ps2bohr_au)
                )
        f.write("{:f} {:f} {:f}\n".format(
            boxsize[0]/nm2bohr, boxsize[1]/nm2bohr, boxsize[2]/nm2bohr))
        f.close()




[docs]
def get_coord_center(n_center, molec, centerMethod='first'):
    """
    This will return the coordinate center of residue index n_center

    Parameters
    ----------
    Ncenter : int
        Index of molecule (residue index) that you want to be the center.
    molec : dict of ndarrays.
    centerMethod: either "com" or "first"

    Returns
    -------
    ndarray of float.
        X, Y, Z coordinates of center.

    """
    atomIdx = np.flatnonzero(molec['residueIndex'] == n_center)
    if centerMethod == "first":
        center = molec['geom'][atomIdx[0]]
    elif centerMethod == "com":
        g = molec['geom'][atomIdx]
        m = molec['atommass'][atomIdx]
        center = np.sum(g * m[:, np.newaxis], axis=0) / np.sum(m)
    else:
        raise ValueError("unknown centerMethod={:s} ".format(centerMethod))
    print("Centering {:s} {:d} at position {:f} {:f} {:f}\n".
          format(molec['residueName'][n_center], molec['residueIndex'][n_center], center[0], center[1], center[2]))

    return center




[docs]
def shift_box(shift, xyz, v_box):
    """
    Shift all molecules by shift and move atoms that fall outside the box 
    according to periodic boundary conditions
    Parameters
    ----------
    shift : (3) ndarray of float.
        X, Y, Z coordinates of new center (oxygen atom).
    xyz : (n_atoms, 3) ndarray of float.
        Original coordinates of all atoms.

    Returns
    -------
    (n_atoms, 3) ndarray of float.
        New coordinates centered arund center_new in the box.

    """

    new_xyz = xyz.copy() - shift[np.newaxis, :]
    new_xyz = new_xyz % v_box
    new_xyz[new_xyz[:, 0] < -v_box[0]/2, 0] += v_box[0]
    new_xyz[new_xyz[:, 0] > v_box[0]/2, 0] -= v_box[0]
    new_xyz[new_xyz[:, 1] < -v_box[1]/2, 1] += v_box[1]
    new_xyz[new_xyz[:, 1] > v_box[1]/2, 1] -= v_box[1]
    new_xyz[new_xyz[:, 2] < -v_box[2]/2, 2] += v_box[2]
    new_xyz[new_xyz[:, 2] > v_box[2]/2, 2] -= v_box[2]

    return new_xyz




[docs]
def makeResiduesWhole(molec):
    """
    Rearranges the atoms by multiples of boxsize such that the residues are closest together

    Parameters
    ----------
    molec : dict of ndarray.

    Returns
    -------
    xyz : (n_atoms, 3) ndarray of float.
        new coordinates.    
    """
    box = molec['boxsize']
    residues = np.unique(molec['residueIndex'])
    newGeom = molec['geom'].copy()
    for r in residues:
        while True:  # iterate until residue is whole
            residueIsWhole = True
            atomIdx = np.flatnonzero(molec['residueIndex'] == r)
            g = newGeom[atomIdx]
            m = molec['atommass'][atomIdx]
            resCenter = np.sum(g * m[:, np.newaxis], axis=0) / np.sum(m)
            for a in atomIdx:
                for i in range(3):
                    # if an atom is shifted, reiterate the whole residue, because center of mass needs to be recalculated
                    if(newGeom[a, i] - resCenter[i] > box[i]/2):
                        newGeom[a, i] -= box[i]
                        residueIsWhole = False
                        break
                    if(newGeom[a, i] - resCenter[i] < -box[i]/2):
                        newGeom[a, i] += box[i]
                        residueIsWhole = False
                        break
            if residueIsWhole:  # if nothing has been shifted, we can procceed with the next residue
                break

    return newGeom




[docs]
def sort_molec(molec, skipDistance=False):
    """
    Sorts the molecules (residues) according to their center-of-mass positions (distance to the origin)

    Parameters
    ----------
    molec : dict of ndarray.
        Dictionary built from reading gromax file (check readgmx documentation).
    Returns
    -------
    molec : dict of ndarray
        Updated dictionary with sorted molecules.

    """
    # calculate the center of mass distance for all residues
    residues = np.unique(molec['residueIndex'])
    resCOM = np.zeros((residues.max() + 1, 3))  # residue index starts with 1
    resCOMdist = np.zeros((residues.max() + 1))
    for r in residues:
        atomIdx = np.flatnonzero(molec['residueIndex'] == r)
        g = molec['geom'][atomIdx]
        m = molec['atommass'][atomIdx]
        resCOM[r] = np.sum(g * m[:, np.newaxis], axis=0) / np.sum(m)
    resCOMdist = np.linalg.norm(resCOM, axis=1)
    # sort the residues according to distance
    resIndSorted = np.argsort(resCOMdist)
    # form the atom index that follows the order of the residues
    ind_sort = []
    i = 0
    for r in resIndSorted:
        atomIdx = np.flatnonzero(molec['residueIndex'] == r)
        # residue 0 should not appear (residue index starts with 1)
        if atomIdx.shape[0] == 0:
            continue
        if skipDistance is not None:
            if resCOMdist[r] > skipDistance:
                continue
        ind_sort = ind_sort + atomIdx[:].tolist()
        i += len(atomIdx)
    # reorder all quantities according to reordered atom index
    molec['geom'] = molec['geom'][ind_sort]
    molec['vel'] = molec['vel'][ind_sort]
    molec['topol'] = molec['topol'][ind_sort]
    molec['atomlist'] = molec['atomlist'][ind_sort]
    molec['atomnums'] = molec['atomnums'][ind_sort]
    molec['atommass'] = molec['atommass'][ind_sort]
    molec['residueIndex'] = molec['residueIndex'][ind_sort]
    molec['residueName'] = molec['residueName'][ind_sort]
    molec['atomName'] = molec['atomName'][ind_sort]
    return molec




[docs]
def print_geom_files(molec):
    """
    Prints input files needed for `xsample` first sorting all
    atoms from closest to the center of the box to furthest.
    The files printed are .in, .xyz and .vel.

    Parameters
    ----------
    molec : dict of ndarray.
        Dictionary built from reading gromax file (check readgmx documentation).


    """
    f_pos = "atompos"
    f_vel = "atomvel"
    f_topol = "topol"
    f_elem = "atomlist"
    f_num = "atomnums"
    f_mass = "atommass"

    np.savetxt(f_pos, molec["geom"].ravel(), fmt="%-10.6f")
    np.savetxt(f_vel, molec["vel"].ravel(), fmt="%-10.6f")
    np.savetxt(f_topol, molec["topol"], fmt="%s")
    np.savetxt(f_elem, molec["atomlist"], fmt="%s")
    np.savetxt(f_num, molec["atomnums"], fmt="%d")
    np.savetxt(f_mass, molec["atommass"], fmt="%-10.6f")

    return




[docs]
def print_xsample(filename, center_new, xyz, vxyz, elems):

    raise NotImplementedError




[docs]
def print_xpyder(filename, center_new, xyz, vxyz, elems):

    raise NotImplementedError





[docs]
def start():
    parser = get_parser()
    args, _ = parser.parse_args()
    if args.gmxFile is None:
        parser.error("You need to provide a .gro file!")
    if args.trrFile is not None:
        molec_data = readtrr(args.trrFile, args.gmxFile)
    else:
        molec_data = readgmx(args.gmxFile)

    # Get center of box and shift coordinates.
    boxCenter = get_coord_center(
        args.centeredMolecule, molec_data, centerMethod=args.centerMethod)
    shifted_geom = shift_box(
        boxCenter, molec_data["geom"], molec_data["boxsize"])
    molec_data['geom'] = shifted_geom

    # make residues whole
    geom2 = makeResiduesWhole(molec_data)
    molec_data['geom'] = geom2

    # sort molecules according to distance
    if args.doReorder:
        molec_data = sort_molec(molec_data, skipDistance=args.skip)
    ekin = 0.5 * np.sum(molec_data["vel"][:,:]**2 * cv.am2au * molec_data["atommass"][:,np.newaxis])
    print(f"kinetic Energy: {ekin} a.u.")
    #print(f"removing overall momentum")
    #totalMomentum = np.sum(molec_data["vel"][:,:] * cv.am2au * molec_data["atommass"][:,np.newaxis], axis=0)
    #comVel = totalMomentum / (cv.am2au * molec_data["atommass"].sum())
    #molec_data["vel"][:,:] = molec_data["vel"][:,:] - comVel[np.newaxis,:]
    #ekin = 0.5 * np.sum(molec_data["vel"][:,:]**2 * cv.am2au * molec_data["atommass"][:,np.newaxis])
    #print(f"kinetic Energy: {ekin} a.u.")
    # create cdtk files
    print_geom_files(molec_data)
    if args.gmxOut is not None:
        writegmx(args.gmxOut, molec_data)
    if args.trrOut is not None:
        writetrr(args.trrOut, molec_data)


if __name__ == "__main__":
    start()