Source code for CDTK.Interfaces.MolcasInterface

#*  **************************************************************************
#*
#*  CDTK, Chemical Dynamics Toolkit
#*  A modular system for chemical dynamics applications and more
#*
#*  Copyright (C) 2011, 2012, 2013, 2014, 2015, 2016
#*  Oriol Vendrell, DESY, <oriol.vendrell@desy.de>
#*
#*  Copyright (C) 2017, 2018, 2019
#*  Ralph Welsch, DESY, <ralph.welsch@desy.de>
#*
#*  Copyright (C) 2020, 2021, 2022, 2023
#*  Ludger Inhester, DESY, ludger.inhester@cfel.de
#*
#*  This file is part of CDTK.
#*
#*  CDTK is free software: you can redistribute it and/or modify
#*  it under the terms of the GNU General Public License as published by
#*  the Free Software Foundation, either version 3 of the License, or
#*  (at your option) any later version.
#*
#*  This program is distributed in the hope that it will be useful,
#*  but WITHOUT ANY WARRANTY; without even the implied warranty of
#*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#*  GNU General Public License for more details.
#*
#*  You should have received a copy of the GNU General Public License
#*  along with this program.  If not, see <http://www.gnu.org/licenses/>.
#*
#*  **************************************************************************

import os
import shutil
import sys
import re
import numpy as np

import CDTK
from CDTK.Tools.Utils import getUniqueID
import CDTK.Tools.Utils as uti
import CDTK.Tools.EField as efi
import CDTK.Tools.Conversion as conv

TINY = 1.e-8

# - Default definitions
#  These are just an example of values for methane cation
#  described at the HK-Koopmans level. For any other system
#  they are nonsense.
BASIS    = 'sto-3g'
CHARGE0  = 0
CHARGE1  = 1
SPIN     = 2
NACTEL   = 7
INACTIVE = 1
RAS2     = 4
CIROOT   = 4
NROFJOBIPHS = '2 4 4; 1 2 3 4; 1 2 3 4'



[docs]
class molcas(object):
    def __init__(self,**opts):
        self.engine      = 'molcas' # Used in the wrapper class
        self.nstates     = opts.get('nstates',1)
        self._ID         = opts.get('ID', getUniqueID(12))
        self._inp        = self._ID+'.inp'
        self._log        = self._ID+'.log'
        self._counter    = 0                          # Execution counter
        self.itemplate   = opts.get('inputTemplate','')
        self.is_storeinp = opts.get('is_storeinp',True)
        self.is_storelog = opts.get('is_storelog',True)
        self.is_storeerr = opts.get('is_storeerr',True)
        self.keepdir     = opts.get('keepdir',None)
        # -------------------------------------------------
        self.atomicSymbols = opts.get('atomicSymbols',[])
        self.atomicNumbers = opts.get('atomicNumbers',[])
        self.efields       = opts.get('efields',[])
        # -------------------------------------------------
        self.basis       = opts.get('basis',BASIS)     # Gaussian basis type
        self.charge0     = opts.get('charge0',CHARGE0)
        self.charge1     = opts.get('charge1',CHARGE1)
        self.spin        = opts.get('spin',SPIN)
        self.nactel      = opts.get('nactel',NACTEL)
        self.inactive    = opts.get('inactive',INACTIVE)
        self.ras2        = opts.get('ras2',RAS2)
        self.ciroot      = opts.get('ciroot',CIROOT)
        # -------------------------------------------------
        self._oldD = None



[docs]
    def init_input_options(self,iopts):
        for k in iopts.keys():
            setattr(self,k,iopts[k][0])




[docs]
    def rasci(self, X, Time=0.0, root_g=1, **opts):
        """
        Perform a rasci calculation and return the gradient for one root

        Input
            X       --  3N array with geometry in bohr
            Time    --  Simulation time
            root_g  -- root for which gradient is returned

        Output:
        e  --- energy in au
        g  --- 3N vector for gradient in state root_g
        """
        t = _input_template_rasci()
        natoms = len(X)/3
        xcart = _format_coordinates(X * conv.au2an, self.atomicSymbols)
        if self.efields:
            efieldtxt = _format_efields(self.efields,Time)
        else:
            efieldtxt = '\n'
        rlxroot = root_g
        inputtxt = t.format(
                            title   = self._ID,
                            natoms  = natoms,
                            xcart   = xcart,
                            basis   = self.basis,
                            charge0 = self.charge0,
                            charge1 = self.charge1,
                            spin    = self.spin,
                            nactel  = self.nactel,
                            inactive= self.inactive,
                            ras2    = self.ras2,
                            ciroot  = self.ciroot,
                            rlxroot = rlxroot,
                            efield  = efieldtxt
                           )
        self._run_molcas(inputtxt)
        e = self._parse_rasci_energy()
        g = self._parse_rasci_gradient()
        self._clean()
        return e,g




[docs]
    def rasci_D(self, X, V, Time=0.0, root_g=1, dt=5.0, **opts):
        """
        Perform a rasci calculation and return the gradient for one root

        Also compute the matrix of time derivative couplings D by
            D_ij = (1/2dt) (<P_i(t)|P_j(t+dt)> - <P_i(t+dt)|P_j(t)>)

        Input
            X       --  3N array with geometry in bohr
            V       --  3N array with atomic velocities in bohr/au
            Time    --  Simulation time
            root_g  -- root for which gradient is returned

        The symmetric forward 1st order expression

            D_ij = (1/2dt) (<P_i(t)|P_j(t+dt)> - <P_i(t+dt)|P_j(t)>)

        is implemented using the nuclear velocities as

            D_ij = (1/2dt) (<P_i(X)|P_j(X+V*dt)> - <P_i(X+V*dt)|P_j(X)>)

        where dt is the internal parameter controlling the finite
        differentiation step.
        """
        t = _input_template_rasci_D()
        natoms = len(X)/3
        X1 = X + V*dt
        xcart = _format_coordinates(X * conv.au2an, self.atomicSymbols)
        xcart1 = _format_coordinates(X1 * conv.au2an, self.atomicSymbols)
        if self.efields:
            efieldtxt = _format_efields(self.efields,Time)
        else:
            efieldtxt = '\n'
        rlxroot = root_g
        nrofjobiphs = _format_nrjobiphs(self.ciroot)
        inputtxt = t.format(
                            title   = self._ID,
                            natoms  = natoms,
                            xcart   = xcart,
                            xcart1  = xcart1,
                            basis   = self.basis,
                            charge0 = self.charge0,
                            charge1 = self.charge1,
                            spin    = self.spin,
                            nactel  = self.nactel,
                            inactive= self.inactive,
                            ras2    = self.ras2,
                            ciroot  = self.ciroot,
                            rlxroot = rlxroot,
                            nrofjobiphs = nrofjobiphs,
                            efield  = efieldtxt
                           )
        self._run_molcas(inputtxt)
        e = self._parse_rasci_energy()
        g = self._parse_rasci_gradient()
        D = self._parse_rasci_D(dt)
        self._clean()
        return e,g,D




[docs]
    def rasci_D_num(self, X, V, Time=0.0, root_g=1, dt=5.0, dx=0.01, **opts):
        """
        Perform a rasci calculation and return the gradient for one root

        Use numerical gradients - There is a lot of things problematic (e.g. dependence on initial orbital guess!) 
        with this and it should not be used!!

        Also compute the matrix of time derivative couplings D by
            D_ij = (1/2dt) (<P_i(t)|P_j(t+dt)> - <P_i(t+dt)|P_j(t)>)

        Input
            X       --  3N array with geometry in bohr
            V       --  3N array with atomic velocities in bohr/au
            Time    --  Simulation time
            root_g  -- root for which gradient is returned

        The symmetric forward 1st order expression

            D_ij = (1/2dt) (<P_i(t)|P_j(t+dt)> - <P_i(t+dt)|P_j(t)>)

        is implemented using the nuclear velocities as

            D_ij = (1/2dt) (<P_i(X)|P_j(X+V*dt)> - <P_i(X+V*dt)|P_j(X)>)

        where dt is the internal parameter controlling the finite
        differentiation step.
        """
        ncoord = len(X)
        g = np.zeros(ncoord,float)
        t = _input_template_rasci_E_D()
        natoms = ncoord/3
        X1 = X + V*dt
        xcart = _format_coordinates(X * conv.au2an, self.atomicSymbols)
        xcart1 = _format_coordinates(X1 * conv.au2an, self.atomicSymbols)
        if self.efields:
            efieldtxt = _format_efields(self.efields,Time)
        else:
            efieldtxt = '\n'
        rlxroot = root_g
        nrofjobiphs = _format_nrjobiphs(self.ciroot)
        inputtxt = t.format(
                            title   = self._ID,
                            natoms  = natoms,
                            xcart   = xcart,
                            xcart1  = xcart1,
                            basis   = self.basis,
                            charge0 = self.charge0,
                            charge1 = self.charge1,
                            spin    = self.spin,
                            nactel  = self.nactel,
                            inactive= self.inactive,
                            ras2    = self.ras2,
                            ciroot  = self.ciroot,
                            rlxroot = rlxroot,
                            nrofjobiphs = nrofjobiphs,
                            efield  = efieldtxt
                           )
        self._run_molcas(inputtxt)
        e = self._parse_rasci_energy()
        D = self._parse_rasci_D(dt)
        # Calculate numerical gradient
        t = _input_template_rasci_E()
        save_storeinp = self.is_storeinp
        save_storelog = self.is_storelog
        self.is_storelog = False
        self.is_storeinp = False
        for i in range(len(X)):
            X1 = X.copy()
            X1[i] = X1[i] + dx
            xcart = _format_coordinates(X1 * conv.au2an, self.atomicSymbols)
            if self.efields:
                efieldtxt = _format_efields(self.efields,Time)
            else:
                efieldtxt = '\n'
            inputtxt = t.format(
                                title   = self._ID,
                                natoms  = natoms,
                                xcart   = xcart,
                                basis   = self.basis,
                                charge0 = self.charge0,
                                charge1 = self.charge1,
                                spin    = self.spin,
                                nactel  = self.nactel,
                                inactive= self.inactive,
                                ras2    = self.ras2,
                                ciroot  = self.ciroot,
                                rlxroot = rlxroot,
                                efield  = efieldtxt
                               )

            self._run_molcas(inputtxt)

            e1 = self._parse_rasci_energy()
            g[i] = ( e1[root_g-1] - e[root_g-1] )/dx
        self._clean()
        self.is_storeinp = save_storeinp
        self.is_storelog = save_storelog
        self._counter = self._counter - len(X) # set the counter back
        return e,g,D



    def _run_molcas(self,inputtxt):
        with open(self._inp,'w') as finp:
            finp.write(inputtxt)
        command = 'molcas {0} -o {1}'.format(self._inp,self._log)
        os.system(command)
        self._store_inp()
        self._store_log()
        self._counter += 1


    def _store_log(self):
        if self.is_storelog:
            cstr = '{0:0>7d}'.format(self._counter)
            if self.keepdir:
                dst = self.keepdir + '/' + cstr + '_' + self._log
            else:
                dst = cstr + '_' + self._log
            shutil.copy(self._log,dst)


    def _store_err(self):
        if self.is_storeerr:
            cstr = '{0:0>7d}'.format(self._counter)
            if self.keepdir:
                dst = self.keepdir + '/' + cstr + '_' + self._err
            else:
                dst = cstr + '_' + self._err
            shutil.copy(self._err,dst)


    def _store_inp(self):
        if self.is_storeinp:
            cstr = '{0:0>7d}'.format(self._counter)
            if self.keepdir:
                dst = self.keepdir + '/' + cstr + '_' + self._inp
            else:
                dst = cstr + '_' + self._inp
            shutil.copy(self._inp,dst)


    def _clean(self):
        # clean scratch area from old calculations
        pass


    def _parse_rasci_energy(self):
        regexp = r'^::\s+RASSCF root number\s*([0-9]+) Total energy: \s+([-+]?[0-9.]+)'
        matches = uti.fileLineParser(self._log,regexp,[int,float])
        energies = []
        for m in matches:
            energies.append( m[1] )
        return energies[:self.nstates] # necessary in case there are two rasscf calc in the log file


    def _parse_rasci_gradient(self):
        g = []
        patt1 = 'Molecular gradients'
        with open(self._log) as flog:
            line = flog.readline()
            while line:
                match = re.search(patt1,line)
                if match:
                    flog.readline()
                    flog.readline()
                    flog.readline()
                    flog.readline()
                    flog.readline()
                    flog.readline()
                    flog.readline()
                    line = flog.readline()
                    while line[0:2] != ' -':
                        sline = line.split()
                        g.append(float(sline[1]))
                        g.append(float(sline[2]))
                        g.append(float(sline[3]))
                        line = flog.readline()
                    break
                line = flog.readline()
        return np.asarray(g)


    def _parse_rasci_D(self,dt):
        O = readOverlapMatrix(self._log,2*self.nstates)
        S = SmatrixFromOverlapMatrix(O)
        D = (S - np.transpose(S))/2
        D = self._trackPhases(D)
        self._oldD = D
        return D/dt


    def _trackPhases(self,D):
        """
        Fix the phases of the D matrix according to the previous geometry

        The S matrix returned by SmatrixFromOverlapMatrix has the relative
        phases between the reference geometry and the small displacement
        correctly set. The corresponding D matrix is therefore also correct in
        this respect. However, between one geometry and the previous or next one
        there is still an arbitrarieness that has to be fixed by ensuring that
        the sign of the D_ij(X0) element is the same as the phase of the
        D_ij(X1) element.
        """
        if self._oldD is None:
            return D
        else:
            p = D*self._oldD
            mask = np.sign(p)
            return D*mask


    def _parse_execution_error(self):
        pass

#       patt = r'EXECUTION OF GAMESS TERMINATED -ABNORMALLY-'
#       with open(self._log) as flog:
#           line = flog.readline()
#           while line:
#               match = re.search(patt,line)
#               if match: # execution error
#                   sys.stdout.write(line)
#                   sys.stdout.write(' Inspect file '+self._log + '\n')
#                   sys.exit(1)
#               line = flog.readline()



#############################################################################
#                             MODULE  FUNCTIONS
#############################################################################

#############################################################################
# Output (log) file parsers
#############################################################################

[docs]
def readOverlapMatrix(logfile,nstates):
    """
    Read the overlap matrix generated by rassi
    Input
    logfile     --  string, path to log file
    nstates     --  number of overlaped states
    NOTE: in calculations of time derivative couplings,
    nstates will often be 2*N, where N is the number of
    physical states of interest. This is because rassi
    calculates all overlaps between both sets of states at the
    two displaced geometries.
    """
    patt1 = r'      OVERLAP MATRIX FOR THE ORIGINAL STATES:'
    ovlm = []
    with open(logfile,'r') as flog:
        line = flog.readline()
        while line:
            match = re.search(patt1,line)
            if match: # start of OVL matrix found
                flog.readline()
                line = flog.readline()
                while line[0:3] != '---':
                    sline = list ( map( float, line.split() ) )
                    ovlm.append(sline)
                    line = flog.readline()
                break
            line = flog.readline()
    ovlm = [ val for sublist in ovlm for val in sublist ]
    OVL = np.zeros((nstates,nstates),float)
    for i in range(nstates):
        for j in range(i+1):
            OVL[i,j] = ovlm.pop(0)
            OVL[j,i] = OVL[i,j]
    return OVL




[docs]
def SmatrixFromOverlapMatrix(ovl):
    """
    Extract the useful matrix elements from the overlap matrix of rassi

    The overlap matrix provided by rassi contains useless blocs with overlaps
    between eigenstates states at the same geometry.
    This The S_ij matrix has elements

    <Psi_i(x_a) | Psi_j(x_b)>

    where x_a and x_b are different nuclear configurations
    """
    N = ovl.shape[0]//2
    S = np.zeros((N,N),float)
    for i in range(N):
        for j in range(N,2*N):
            S[i,j-N] = ovl[i,j]
    # Fix phases of S matrix:
    # S_ii must be positive
    # In case it is negative, the ket wave function has changed phase with
    # respect to the corresponding bra. We fix it by multiplying the
    # corresponding column by -1
    for i,d in enumerate(S.diagonal()):
        if d<0:
            S[:,i] = -S[:,i]
    return S






#############################################################################
# String formaters for different parts of the input file
#############################################################################
def _format_coordinates(x,smb):
    """
    return string with formated Cartesian coordinates for $DATA card

    x   : 3N array in Angstrom
    smb : List of atomic symbols
    """
    n = len(x)
    xlines = []
    line = ' {0} {1[0]:12.7f} {1[1]:12.7f} {1[2]:12.7f}'
    x.shape = (n/3,3)
    for atomnum,atomsym in enumerate(smb):
        xlines.append( line.format(atomsym,x[atomnum]) )
    xtxt = '\n'.join(xlines)
    x.shape = (n,)
    return xtxt


def _format_efields(eflist,t):
    pass

def _format_nrjobiphs(n):
    nn = int(n)
    s = ' '
    ss = s.join( list( map(str,range(1,nn+1)) ) )
    return ss+'; '+ss


#############################################################################
# Input templates for different types of calculations
#############################################################################
def _input_template_rasci():
    """
    Return template (string) for Molcas RASCI energy and gradient calculation
    """
    t="""
>>EXPORT MOLCAS_NEW_WORKDIR=YES

&SEWARD
    Title = {title}
    coord
    {natoms}
    {title}
{xcart}
    basis = {basis}
    group = c1

&SCF
    charge   = {charge0}

&RASSCF
    title    = {title}
    charge   = {charge1}
    spin     = {spin}
    nactel   = {nactel} 0 0
    inactive = {inactive}
    ras2     = {ras2}
    ciroot   = {ciroot} {ciroot} 1
    cionly
    rlxroot  = {rlxroot}

&MCLR
    iter = 6000
    SALA = 1
    threshold = 1.e-4

&ALASKA
   PNEW
 """
    return t


def _input_template_rasci_D():
    """
    Return template (string) for Molcas RASCI energy and gradient calculation
    and time derivative overlaps based on rassi
    """
    t="""
>>EXPORT MOLCAS_NEW_WORKDIR=YES

&SEWARD
    Title = {title}
    coord
    {natoms}
    {title}
{xcart}
    basis = {basis}
    group = c1

&SCF
    charge   = {charge0}

&RASSCF
    title    = {title}
    charge   = {charge1}
    spin     = {spin}
    nactel   = {nactel} 0 0
    inactive = {inactive}
    ras2     = {ras2}
    ciroot   = {ciroot} {ciroot} 1
    cionly
    rlxroot  = {rlxroot}

&MCLR
    iter = 6000
    SALA = 1
    threshold = 1.e-4

&ALASKA
   PNEW

>>COPY $Project.JobIph JOB001
>>RM $Project.RunFile

&SEWARD
    Title = {title}
    coord
    {natoms}
    {title}
{xcart1}
    basis = {basis}
    group = c1

&SCF
    charge   = {charge0}

&RASSCF
    title    = {title}
    charge   = {charge1}
    spin     = {spin}
    nactel   = {nactel} 0 0
    inactive = {inactive}
    ras2     = {ras2}
    ciroot   = {ciroot} {ciroot} 1
    cionly
    rlxroot  = {rlxroot}

>>COPY $Project.JobIph JOB002

&RASSI
    Nrofjobiphs
    2 {ciroot} {ciroot};
    {nrofjobiphs}
    overlaps
    ONEL

 """
    return t

def _input_template_rasci_E():
    """
    Return template (string) for Molcas RASCI energy
    """
    t="""
>>EXPORT MOLCAS_NEW_WORKDIR=YES

&SEWARD
    Title = {title}
    coord
    {natoms}
    {title}
{xcart}
    basis = {basis}
    group = c1

&SCF
    charge   = {charge0}

&RASSCF
    title    = {title}
    charge   = {charge1}
    spin     = {spin}
    nactel   = {nactel} 0 0
    inactive = {inactive}
    ras2     = {ras2}
    ciroot   = {ciroot} {ciroot} 1
    cionly
    rlxroot  = {rlxroot}

 """
    return t


def _input_template_rasci_E_D():
    """
    Return template (string) for Molcas RASCI energy
    and time derivative overlaps based on rassi
    """
    t="""
>>EXPORT MOLCAS_NEW_WORKDIR=YES

&SEWARD
    Title = {title}
    coord
    {natoms}
    {title}
{xcart}
    basis = {basis}
    group = c1

&SCF
    charge   = {charge0}

&RASSCF
    title    = {title}
    charge   = {charge1}
    spin     = {spin}
    nactel   = {nactel} 0 0
    inactive = {inactive}
    ras2     = {ras2}
    ciroot   = {ciroot} {ciroot} 1
    cionly
    rlxroot  = {rlxroot}

>>COPY $Project.JobIph JOB001
>>RM $Project.RunFile

&SEWARD
    Title = {title}
    coord
    {natoms}
    {title}
{xcart1}
    basis = {basis}
    group = c1

&SCF
    charge   = {charge0}

&RASSCF
    title    = {title}
    charge   = {charge1}
    spin     = {spin}
    nactel   = {nactel} 0 0
    inactive = {inactive}
    ras2     = {ras2}
    ciroot   = {ciroot} {ciroot} 1
    cionly
    rlxroot  = {rlxroot}

>>COPY $Project.JobIph JOB002

&RASSI
    Nrofjobiphs
    2 {ciroot} {ciroot};
    {nrofjobiphs}
    overlaps
    ONEL

 """
    return t



#############################################################################
# Quick standalone test code
#############################################################################
if __name__ == '__main__':
    mo = molcas()
    mo.nstates = 4
    mo.is_storeinp = True
    mo.is_storelog = True
    mo.is_storeerr = True
    mo.charge0   = 0
    mo.charge1   = 1
    mo.spin      = 2
    mo.nactel    = 7
    mo.inactive  = 1
    mo.ras2      = 4
    mo.ciroot    = 4
    mo.nrofjobiphs = '2 4 4; 1 2 3 4; 1 2 3 4'
    mo.atomicSymbols = ['C','H','H','H','H']
    mo.atomicNumbers = ['6.0','1.0','1.0','1.0','1.0']
    x=np.array([
      0.000000,  0.000000,  0.000000,
      0.000000,  0.000000,  1.050000,
      1.037090,  0.000000, -0.366667,
     -0.542115, -0.938971, -0.383333,
     -0.565685,  0.979796, -0.400000]) * conv.an2au
    v=np.array([
      0.000000,  0.000000,  0.010000,
      0.000000,  0.000000,  0.000000,
      0.000000,  0.000000,  0.000000,
      0.000000,  0.000000,  0.000000,
      0.000000,  0.000000,  0.000000])
    ef = efi.EField()
    ef.P = np.array([1,0,0],float)
    def a(t): return 0.01
    ef.A = a
    mo.efields = [ef]
    e,g,D = mo.rasci_D(x,v,Time=0.0,root_g=2)
#   sys.stdout.write('\n'+str(e)+'\n')
#   sys.stdout.write(str(g)+'\n')