Source code for CDTK.Tools.XPYDERCreator

#!/usr/bin/env/python

# Caroline Arnold 2017,2018 at CFEL
# 
# Generate XPYDER template from CP2K trajectories

import numpy as np
import sys
import os

from CDTK.Tools import Geometry as geom
from CDTK.Tools import Conversion as cv


MAX_BOND_LENGTH=2.0 # Max allowed intramolecular O-H distance [An]



[docs]
class XPYDERCreator(object):
    """
    Class for creating XPYDER input files

    How to use it:

    (1) read and sort for given ic
    (2) create cluster geometries for different n_QM, c_ix, embedding status

    """

    def __init__(self,**opts):
        """
        Initialize values

        **************** Parameter-Value Pairs
        self.n_QM        -- Target cluster size
        self.c_ix        -- Target cluster center ix
        self.ic          -- Initial condition
        self.yes_embed   -- If True, use embedding for MM region around QM cluster
        self.target_dir  -- Target dir appended by param-dependent path
        self.t           -- Trajectory time step
        self.thz         -- Pump frequency
        self.intensity   -- Pump intensity
        self.water_model -- Water model to be applied (CDTK:Tools:WaterModel)
        self.gbs         -- Gaussian basis set to be used
        self.md_type     -- CP2K, LAMMPS or GROMACS

        **************** Fixed Values
        traj_path -- Path to original trajectory snapshots
        pbc_dims  -- Box dimensions
        """

        # Parameter-Value Pairs
        self.n_QM        = opts.get('n_QM',10)
        self.c_ix        = opts.get('c_ix',0)
        self.ic          = opts.get('ic',1)
        self.yes_embed   = opts.get('yes_embed',False)
        self.target_dir  = opts.get('target_dir','./01_FSSH_QM/')
        self.t           = opts.get('t',0)
        self.thz         = opts.get('thz',19)
        self.intensity   = opts.get('intensity','5-12')
        self.water_model = opts.get('water_model', 'SPC')
        self.gbs         = opts.get('gbs','6-31G')
        self.md          = opts.get('md', 'CP2K')
        self.box         = opts.get('box', 15.96455)
        self.nn          = opts.get('n', 128)

        # Trajectory components
        self.r_xyz    = None    # Snapshot coords
        self.v_xyz    = None    # Snapshot velocities
        self.n_tot    = None    # Snapshot total number of molecules
        self.types    = None    # Snapshot types
        self.molCOMs  = None    # Snapshot molecular COMs
        self.r_xyz_QM = None    # Coords in QM part
        self.v_xyz_QM = None    # Velocities in QM part
        self.types_QM = None    # Types in QM part
        self.r_xyz_MM = None    # Coords in MM part
        self.v_xyz_MM = None    # Velocities in MM part
        self.types_MM = None    # Types in MM part

        self._traj_path = '/work/carnold/ClusterJobs/03_THz-H2O/trajectories_PKM/'
        
    #### GETTER / SETTER

    # BEGIN Fixed Values
    @property
    def md_type(self):
        return self.md
    @property
    def pbc_dims(self):
        return np.array([self.box] * 3)
    # END Fixed Values

    @property
    def traj_path(self):
        return self._traj_path
    @traj_path.setter
    def traj_path(self, s):
        self._traj_path = s

    @property
    def gbs(self):
        return self.__gbs
    @gbs.setter
    def gbs(self,v):
        if v in ['STO-3G','6-31G','6-31G_star_star']:
            self.__gbs = v
        else:
            raise ValueError("Not implemented for basis set: " + v)

    @property
    def water_model(self):
        return self.__water_model
    @water_model.setter
    def water_model(self,v):
        self.__water_model = v

    @property
    def intensity(self):
        return self.__intensity
    @intensity.setter
    def intensity(self,v):
        self.__intensity = v

    @property
    def thz(self):
        return self.__thz
    @thz.setter
    def thz(self,v):
        self.__thz = int(v)

    @property
    def t(self):
        return self.__t
    @t.setter
    def t(self,v):
        self.__t = int(v)

    @property
    def n_QM(self):
        return self.__n_QM
    @n_QM.setter
    def n_QM(self,v):
        self.__n_QM = int(v)

    @property
    def c_ix(self):
        return self.__c_ix
    @c_ix.setter
    def c_ix(self,v):
        self.__c_ix = int(v)

    @property
    def ic(self):
        return self.__ic
    @ic.setter
    def ic(self,v):
        self.__ic = int(v)

    @property
    def yes_embed(self):
        return self.__yes_embed
    @yes_embed.setter
    def yes_embed(self,v):
        self.__yes_embed = int(v)

    @property
    def target_dir(self):
        return self.__target_dir
    @target_dir.setter
    def target_dir(self,v):
        self.__target_dir = v

    @property
    def r_xyz(self):
        return self.__r_xyz
    @r_xyz.setter
    def r_xyz(self,v):
        self.__r_xyz = np.asarray(v)

    @property
    def v_xyz(self):
        return self.__v_xyz
    @v_xyz.setter
    def v_xyz(self,v):
        self.__v_xyz = np.asarray(v)

    @property
    def n_tot(self):
        return self.__n_tot
    @n_tot.setter
    def n_tot(self,v):
        if v is None:
            self.__n_tot = None
        else:
            self.__n_tot = int(v)

    @property
    def types(self):
        return self.__types
    @types.setter
    def types(self,v):
        self.__types = np.asarray(v)

    @property
    def molCOMs(self):
        return self.__molCOMs
    @molCOMs.setter
    def molCOMs(self,v):
        self.__molCOMs = np.asarray(v)

    @property
    def r_xyz_QM(self):
        return self.__r_xyz_QM
    @r_xyz_QM.setter
    def r_xyz_QM(self,v):
        self.__r_xyz_QM = np.asarray(v)

    @property
    def v_xyz_QM(self):
        return self.__v_xyz_QM
    @v_xyz_QM.setter
    def v_xyz_QM(self,v):
        self.__v_xyz_QM = np.asarray(v)

    @property
    def types_QM(self):
        return self.__types_QM
    @types_QM.setter
    def types_QM(self,v):
        self.__types_QM = np.asarray(v)

    @property
    def r_xyz_MM(self):
        return self.__r_xyz_MM
    @r_xyz_MM.setter
    def r_xyz_MM(self,v):
        self.__r_xyz_MM = np.asarray(v)

    @property
    def v_xyz_MM(self):
        return self.__v_xyz_MM
    @v_xyz_MM.setter
    def v_xyz_MM(self,v):
        self.__v_xyz_MM = np.asarray(v)

    @property
    def types_MM(self):
        return self.__types_MM
    @types_MM.setter
    def types_MM(self,v):
        self.__types_MM = np.asarray(v)


    #### END of GETTER / SETTER


[docs]
    def read_cp2k_trajectory(self):
        """
        Read coordinates and velocities from a CP2K trajectory snapshot
        """

        # Format-field strings
        fs1 = '{m:s}/Intensity-{it:s}/I-{ic:d}/THz-{thz:d}/'
        fs2 = 'traj_t_split/t_{t:d}'
        fs3 = 'vel_t_split/t_{t:d}'

        # Process coordinate file
        f = self.traj_path \
                + fs1.format(m=self.md_type,it=self.intensity,ic=self.ic,thz=self.thz) \
                + fs2.format(t=self.t)

        tmp = np.genfromtxt( f, skip_header=1 )

        self.types = ['O' if t==1 else 'H' for t in tmp[:,0]]
        self.r_xyz = tmp[:,1:]
        self.n_tot = len(self.types) / 3

        # Process velocity file
        f = self.traj_path \
                + fs1.format(m=self.md_type,it=self.intensity,ic=self.ic,thz=self.thz) \
                + fs3.format(t=self.t)

        tmp = np.genfromtxt( f, skip_header=1 )

        self.v_xyz = tmp[:,1:]



[docs]
    def read_lammps_trajectory(self):
        """
        Read coordinates and velocities from a LAMMPS trajectory snapshot
        """

        # Format-field strings
        fs1 = '{m:s}/Intensity-{it:s}/I-{ic:d}/THz-{thz:d}/'
        fs2 = 'traj_t_split/t_{t:d}'
        fs3 = 'vel_t_split/t_{t:d}'

        # Process coordinate file
        f = self.traj_path \
                + fs1.format(m=self.md_type,it=self.intensity,ic=self.ic,thz=self.thz) \
                + fs2.format(t=self.t)

        tmp = np.genfromtxt( f, skip_header=9 )

        self.types = ['O' if t==1 else 'H' for t in tmp[:,1]]
        self.r_xyz = tmp[:,2:]
        self.n_tot = len(self.types) / 3

        # Process velocity file
        f = self.traj_path \
                + fs1.format(m=self.md_type,it=self.intensity,ic=self.ic,thz=self.thz) \
                + fs3.format(t=self.t)

        tmp = np.genfromtxt( f, skip_header=9 )

        self.v_xyz = np.array(tmp[:,2:]) * cv.an2au / cv.fs2au 



[docs]
    def read_gromacs_trajectory(self):
        """
        Read coordinates and velocities from a GROMACS trajectory snapshot
        """

        f = self.traj_path + "I-" + str(self.ic) + "/spce.gro"
#        print "In read: ", f
        
#        muh = np.genfromtxt( f, skip_header=2, skip_footer=1 )
        tmp = np.genfromtxt( f, skip_header=2, skip_footer=1 )[:, 3:]

        self.n_tot = len(tmp) / 3
        
        self.types = ['O', 'H', 'H'] * self.n_tot

        self.r_xyz = np.array(tmp[:,0:3]) * 10.0  # convert to angstroem
        self.v_xyz = np.array(tmp[:,3:6]) * 10.0 * cv.an2au / (1000.0 * cv.fs2au) # convert to au


#        print self.r_xyz
#        exit(-1)
        

[docs]
    def sort_trajectory(self):
        """
        Sort trajectory to molecules obeying periodic boundary conditions

        Final order: OHH OHH ...
        """

        T_xyz_3D = geom.pbc_translation_matrix(self.pbc_dims)

        sorted_types = np.zeros_like(self.types)
        sorted_r_xyz = np.zeros_like(self.r_xyz)
        sorted_v_xyz = np.zeros_like(self.v_xyz)

        remain_h_ix = np.where(self.types=='H')[0]
        used_h_ix = []

        for i,o_ix in enumerate( np.where(self.types=='O')[0] ):
            # Put O coords / vels in right positions
            sorted_r_xyz[3*i] = self.r_xyz[o_ix]
            sorted_v_xyz[3*i] = self.v_xyz[o_ix]

            # Find the two nearest H atoms to current O atom w/ PBC
            h_dist_pbc = np.zeros(len(remain_h_ix))
            tmp_xyz = np.copy(self.r_xyz)

            if len(remain_h_ix)==2:
                nearest_h_ix = remain_h_ix
            else:
                # minimum distance across PBC copies
                for j,h_ix in enumerate( remain_h_ix ):
                    d = np.linalg.norm( self.r_xyz[h_ix] - self.r_xyz[o_ix] + T_xyz_3D, axis=-1 )
                    mdi = np.unravel_index( np.argmin(d), (3,3,3) )
                    tmp_xyz[h_ix] = self.r_xyz[h_ix] + T_xyz_3D[mdi]
                    h_dist_pbc[j] = d[mdi]

                nearest_h_ix = remain_h_ix[np.argpartition(h_dist_pbc,2)[:2]]
                # check that max bond length not exceeded
                assert np.linalg.norm(self.r_xyz[o_ix] - tmp_xyz[nearest_h_ix[-1]]) < MAX_BOND_LENGTH

            # Put H coords / vels in right positions
            sorted_r_xyz[3*i+1:3*i+3] = tmp_xyz[nearest_h_ix]
            sorted_v_xyz[3*i+1:3*i+3] = self.v_xyz[nearest_h_ix]

            used_h_ix.append(nearest_h_ix[:])

            # remove nearest from remain_h_ix
            remain_h_ix = remain_h_ix[remain_h_ix != nearest_h_ix[0]]
            remain_h_ix = remain_h_ix[remain_h_ix != nearest_h_ix[1]]

        # New sorted types
        sorted_types[::3]  = 'O'
        sorted_types[1::3] = 'H'
        sorted_types[2::3] = 'H'

        used_h_ix = np.sort(np.asarray(used_h_ix).flatten())

        # check that we did not lose any atoms in the process
        assert np.allclose(np.sort(np.where(self.types=='H')[0]),used_h_ix)

        # Put sorted to class attributes
        self.types = sorted_types
        self.r_xyz = sorted_r_xyz
        self.v_xyz = sorted_v_xyz



[docs]
    def create_cluster_geometry(self):
        """
        Create a cluster geometry centered around the specified molecule.

        Selects QM and MM region
        """

        self._calc_mol_COMs()

        # Center to specified molecule
        center_COMdists = np.zeros(self.n_tot)
        center_molCOMs = np.copy(self.molCOMs) - self.molCOMs[self.c_ix]
        center_r_xyz   = np.copy(self.r_xyz) - self.molCOMs[self.c_ix]

        # QM and MM region to be divided
        target_r_xyz_QM = np.zeros([3*self.n_QM, 3])
        target_v_xyz_QM = np.zeros([3*self.n_QM, 3])
        target_r_xyz_MM = np.zeros([3*(self.n_tot-self.n_QM), 3])
        target_v_xyz_MM = np.zeros([3*(self.n_tot-self.n_QM), 3])

        T_xyz_3D = geom.pbc_translation_matrix(self.pbc_dims)

        # Center the snapshot to specified molecule
        for i in range(self.n_tot):
            d = np.linalg.norm(center_molCOMs[self.c_ix] - center_molCOMs[i] - T_xyz_3D,axis=-1)
            mdi = np.unravel_index(np.argmin(d), (3,3,3))

            center_molCOMs[i] = center_molCOMs[i] + T_xyz_3D[mdi]
            center_r_xyz[3*i:3*i+3] += T_xyz_3D[mdi]
            center_COMdists[i] = np.linalg.norm(center_molCOMs[self.c_ix] - center_molCOMs[i])

        ix_QM = np.argsort(center_COMdists)[:self.n_QM]
        ix_MM = np.argsort(center_COMdists)[self.n_QM:]

        for i,ix in enumerate(ix_QM):
            target_r_xyz_QM[3*i:3*i+3] = center_r_xyz[3*ix:3*ix+3]
            target_v_xyz_QM[3*i:3*i+3] = self.v_xyz[3*ix:3*ix+3]

        for i,ix in enumerate(ix_MM):
            target_r_xyz_MM[3*i:3*i+3] = center_r_xyz[3*ix:3*ix+3]
            target_v_xyz_MM[3*i:3*i+3] = self.v_xyz[3*ix:3*ix+3]

        # Set QM class attributes
        self.r_xyz_QM = target_r_xyz_QM
        self.v_xyz_QM = target_v_xyz_QM
        self.types_QM = np.tile(['O','H','H'],self.n_QM)

        # Set MM class attributes
        self.r_xyz_MM = target_r_xyz_MM
        self.v_xyz_MM = target_v_xyz_MM
        self.types_MM = np.tile(['O','H','H'],self.n_tot - self.n_QM)



[docs]
    def get_xuvpes_targetpath(self):
        """
        Construct the target path for XUV / PES input files
        """

        s1 = 'Intensity-{intensity}/I-{ic:d}/THz-{thz:d}/'
        s2 = 't_{t:04d}_N_mol_{n_QM:03d}_center_ix_{c_ix:04d}/'

        # Construct path to output file
        target_path = self.target_dir
        target_path += s1.format(intensity=self.intensity,ic=self.ic,thz=self.thz)
        target_path += s2.format(t=self.t,n_QM=self.n_QM,c_ix=self.c_ix)

        return target_path



[docs]
    def write_xuvpes_inputfile(self):
        """
        Fill in the template for XUV photoelectron spectra (XMOLECULE)

        Angstrom units throughout
        """

        # QM part, files needed in projecting tdipole
        lt1 = '  {t:s}  {x:+.8f}  {y:+.8f}  {z:+.8f}  {eln:8s}  {nao:s}\n'
        # QM part, embedded
        lt2 = '  {t:s}  {x:+.8f}  {y:+.8f}  {z:+.8f}\n'
        # MM part, embedded
        lt3 = '  {t:s}  {x:+.8f}  {y:+.8f}  {z:+.8f}  {pc:+.2f}\n'

        target_path = self.get_xuvpes_targetpath()

        # Create directory if needed
        if not os.path.exists( target_path ):
            os.makedirs( target_path )

        # Create target files

        # H2O cluster
        atom_xyz = ''
        for i in range(len(self.types_QM)):
            r = self.r_xyz_QM[i]
            eln = el_string( self.types_QM[i], self.gbs )
            nao = self.types_QM[i] + '_' + self.gbs + '.nao'
            s = lt1.format(t=self.types_QM[i],x=r[0],y=r[1],z=r[2],eln=eln,nao=nao)
            atom_xyz += s

        with open(target_path + 'H2O_cluster_' + self.gbs + '.in','w') as f:
            f.write(open('H2O_cluster.in.template').read().format(atom_xyz=atom_xyz,gbs=self.gbs))

        # H2O X cluster -- takes same atom_xyz
        ptsc_xyz = ''
        for i in range(len(self.types_MM)):
            r = self.r_xyz_MM[i]
            if self.types_MM[i] == 'H':
                pc=0.41
            elif self.types_MM[i] == 'O':
                pc=-0.82
            s = lt3.format(t='X',x=r[0],y=r[1],z=r[2],pc=pc)
            ptsc_xyz += s

        with open(target_path + 'H2O_X_cluster_' + self.gbs + '.in','w') as f:
            f.write(open('H2O_X_cluster.in.template').read().format(atom_xyz=atom_xyz,\
                    ptsc_xyz=ptsc_xyz,gbs=self.gbs))

        # H2O min cluster for projecting tdipole -- takes new atom_xyz
        atom_xyz = ''
        for i in range(len(self.types_QM)):
            r = self.r_xyz_QM[i]
            eln = el_string( self.types_QM[i], self.gbs )
            nao = self.types_QM[i] + '_min.nao'
            s = lt1.format(t=self.types_QM[i],x=r[0],y=r[1],z=r[2],eln=eln,nao=nao)
            atom_xyz += s
        with open(target_path + 'H2O_cluster_min_' + self.gbs + '.in','w') as f:
            f.write(open('H2O_cluster_min.in.template').read().format(atom_xyz=atom_xyz))

        print('============================================================')
        print('The target files were created at')
        print('    ' + target_path)





[docs]
    def write_xpyder_inputfile(self):
        """
        Fill in the xpyder template

        Atomic units throughout

        """

        fs = 'N_mol_{n:03d}/I-{i:d}/center_ix_{c:04d}/'
        line_template='  {t:s}    {x:.8f}    {y:.8f}    {z:.8f}\n'

        # Path to output file
        target_param_path = fs.format(n=self.n_QM,i=self.ic,c=self.c_ix)
        target_file = self.target_dir + target_param_path + 'input_istate_00.in'

        cartPOS = ''
        cartVEL = ''
        cartPOS_MM = ''
        cartVEL_MM = ''

        # QM region
        for i in range(len(self.types_QM)):
            r = self.r_xyz_QM[i] * cv.an2au
            v = self.v_xyz_QM[i]
            s = line_template.format(t=self.types_QM[i],x=r[0],y=r[1],z=r[2])
            cartPOS += s
            s = line_template.format(t=self.types_QM[i],x=v[0],y=v[1],z=v[2])
            cartVEL += s

        # MM region
        if self.yes_embed:
            for i in range(min(len(self.types_MM), self.nn)):
                r = self.r_xyz_MM[i] * cv.an2au
                v = self.v_xyz_MM[i]
                s = line_template.format(t=self.types_MM[i],x=r[0],y=r[1],z=r[2])
                cartPOS_MM += s
                s = line_template.format(t=self.types_MM[i],x=v[0],y=v[1],z=v[2])
                cartVEL_MM += s

        # occupation string for istate 0
        occ = '2'*self.n_QM*5
        occ = occ[:-1] + '1'
        nstates = 4 * self.n_QM

        if not os.path.exists( self.target_dir + target_param_path ):
            os.makedirs(self.target_dir + target_param_path)

        with open(target_file, 'w') as f:
            f.write(open('XPYDER_TEMPLATE').read().format(occ=occ,\
                    nstates=nstates,cartPOS=cartPOS,cartVEL=cartVEL,\
                    cartPOS_MM=cartPOS_MM,cartVEL_MM=cartVEL_MM))
            if self.yes_embed:
                f.write('\n')
                f.write('$mmregion\n')
                f.write('    mode=' + self.water_model + '\n')
                f.write('$end\n')

        print('============================================================')
        print('The target file was created at')
        print('    ' + target_file)


    def _calc_mol_COMs(self):
        """
        Create molecular centers of mass (COMs)

        Requires sorted geometry
        """

        if not np.all(self.types[::3]=='O'):
            raise ValueError('Call sort_trajectory() first')

        tmp = np.zeros([self.n_tot,3])

        mo = cv.periodicTable['O']['atomic_mass']
        mh = cv.periodicTable['H']['atomic_mass']

        for i in range(self.n_tot):
            mol_xyz=geom.XYZGeometry()
            mol_xyz.setAtomPositions(self.r_xyz[3*i:3*i+3])
            mol_xyz.setMasses([mo, mh, mh])

            tmp[i] = mol_xyz.centerOfMass()

        self.molCOMs = tmp



[docs]
def el_string(a_type, gbs):
    """
    Constructs the nao-style electron string

    e.g. O 2s2p

    Parameters:
    a_type - atom type
    gbs    - gaussian basis set
    """

    if a_type not in ['O','H']:
        raise ValueError("Not implemented for atom type: " + a_type)

    if gbs == '6-31G':
        if a_type == 'O':
            return '2s2p1d'
        elif a_type == 'H':
            return '2s1p'
    elif gbs == '6-31G_star_star':
        if a_type == 'O':
            return '2s2p2d'
        elif a_type == 'H':
            return '2s1p1d'
    else:
        raise ValueError("Not implemented for basis set: " + gbs)