Source code for CDTK.Interfaces.GamessUSInterface

#*  **************************************************************************
#*
#*  CDTK, Chemical Dynamics Toolkit
#*  A modular system for chemical dynamics applications and more
#*
#*  Copyright (C) 2011, 2012, 2013, 2014, 2015, 2016
#*  Oriol Vendrell, DESY, <oriol.vendrell@desy.de>
#*
#*  Copyright (C) 2017, 2018, 2019
#*  Ralph Welsch, DESY, <ralph.welsch@desy.de>
#*
#*  Copyright (C) 2020, 2021, 2022, 2023
#*  Ludger Inhester, DESY, ludger.inhester@cfel.de
#*
#*  This file is part of CDTK.
#*
#*  CDTK is free software: you can redistribute it and/or modify
#*  it under the terms of the GNU General Public License as published by
#*  the Free Software Foundation, either version 3 of the License, or
#*  (at your option) any later version.
#*
#*  This program is distributed in the hope that it will be useful,
#*  but WITHOUT ANY WARRANTY; without even the implied warranty of
#*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#*  GNU General Public License for more details.
#*
#*  You should have received a copy of the GNU General Public License
#*  along with this program.  If not, see <http://www.gnu.org/licenses/>.
#*
#*  **************************************************************************

import os
import shutil
import sys
import re
import numpy as np

import CDTK
from CDTK.Tools.Utils import getUniqueID
import CDTK.Tools.Utils as uti
import CDTK.Tools.EField as efi
import CDTK.Tools.Conversion as conv

TINY = 1.e-8
PATHGMS = os.environ.get('CDTK_GAMESSUS',None)

# - Default definitions
#   !! alteration might lead to unittest failures !!
GUESS = 'HUCKEL'
MEMDDI = 2
MWORDS = 2
GBASIS = "TZV"
NGAUSS = 3
ICHARG = 0
MULT   = 1
SZ     = 0
IROOT  = 1
NSTATE = 1
WSTATE = 1.0
MXXPAN = 2*NSTATE
FULLNR = '.TRUE.' # mcscf second order, with an exact hessian
CPTOL = 1.0e-6    # Tolerance for CPMCHF equations
MAXIT = 800       # Maximum iterations for CPMCHF equations



[docs]
class gamess(object):
    def __init__(self,**opts):
        self.engine      = 'GamessUS' # Used in the wrapper class
        self._ID         = opts.get('ID', getUniqueID(12))
        self._inp        = self._ID+'.inp'
        self._log        = self._ID+'.log'
        self._dat        = self._ID+'.dat'
        self._counter    = 0                          # Execution counter
        self._norb       = 1
        self._vec        = ''   
        self.pathgms     = opts.get('pathgms',PATHGMS)
        self.scratch     = opts.get('scratch','/tmp')
        self.itemplate   = opts.get('inputTemplate','')
        self.guess       = opts.get('guess',GUESS)
        self.is_storelog = opts.get('is_storelog',True)
        self.is_storedat = opts.get('is_storedat',True)
        self.is_storeinp = opts.get('is_storeinp',True)
        self.keepdir     = opts.get('keepdir',None)
        # -------------------------------------------------
        self.atomicSymbols = opts.get('atomicSymbols',[])
        self.atomicNumbers = opts.get('atomicNumbers',[])
        self.efields       = opts.get('efields',[])
        # -------------------------------------------------
        self.memddi      = opts.get('memddi',MEMDDI)  # distributed memory (GB)
        self.mwords      = opts.get('mwords',MWORDS)
        self.gbasis      = opts.get('gbasis',GBASIS)  # Gaussian basis type
        self.ngauss      = opts.get('ngauss',NGAUSS)  # Number of contracted Gaussians
        self.icharg      = opts.get('icharg',ICHARG)
        self.mult        = opts.get('mult',MULT)
        self.sz          = opts.get('sz',SZ)
        self.iroot       = opts.get('iroot',IROOT)
        self.nstate      = opts.get('nstate',NSTATE)
        self.wstate      = opts.get('wstate',WSTATE)
        self.mxxpan      = opts.get('mxxpan',MXXPAN) # Eigenvecs in Dav
        self.fullnr      = opts.get('fullnr',FULLNR)
        # -------------------------------------------------

        if not self.pathgms:
            raise ValueError('Gamess-US path is not set. Please set CDTK_GAMESSUS to the path of your GAMESSUS directory')
        if self.pathgms[-1] != '/': self.pathgms = self.pathgms + '/'



[docs]
    def init_input_options(self,iopts):
        """
        This functions sets input options.

        Input: 
        iopts  --- Input options as a dictonary, where the key defines the variable 
        and the value is a list with the value of the variable as first entry.
        """
        for k in iopts.keys():
            setattr(self,k,iopts[k][0])




[docs]
    def energy_HF(self, X, Time=0.0, **opts):
        """
        Hartree-Fock energy calculation. Optionally with electric fields.

        Input:
        X    -- 3N array of atomic positions, in Bohr (au).
        Time -- current time in au.

        Output:
        e    -- energy obtained from Gamess  (in Hartree).
        """
        t = _input_template_HF_E_EF()
        xcart = _format_coordinates(X * conv.au2an, self.atomicSymbols, self.atomicNumbers)
        if self.efields:
            efieldtxt = _format_efields(self.efields,Time)
        else:
            efieldtxt = '\n'
        if self._counter > 0:
            self.guess = 'MOREAD'
        inputtxt = t.format(efield=efieldtxt,
                            memddi=self.memddi,
                            mwords=self.mwords,
                            xcart=xcart,
                            guess=self.guess,
                            norb=self._norb,
                            vec=self._vec,
                            mult   = self.mult,
                            icharg = self.icharg,
                            gbasis=self.gbasis,
                            ngauss=self.ngauss)
        self._run_gms(inputtxt)
        e = self._parse_energy_HF()
        self._clean()
        return e



[docs]
    def energy_HF_HOMO(self, X, Time=0.0, **opts):
        """
        Hartree-Fock energy calculation. Returns the HF energy and the energy of the HOMO. 
        Optionally with electric fields.
        TODO not working correctly as orbitals are not correctly printed - needs to be fixed

        X   -- 3N array of atomic positions, in Bohr (au)
        """
        t = _input_template_HF_E_EF_HOMO()
        xcart = _format_coordinates(X * conv.au2an, self.atomicSymbols, self.atomicNumbers)
        if self.efields:
            efieldtxt = _format_efields(self.efields,Time)
        else:
            efieldtxt = '\n'
        if self._counter > 0:
            self.guess = 'MOREAD'
        inputtxt = t.format(efield=efieldtxt,
                            memddi=self.memddi,
                            mwords=self.mwords,
                            xcart=xcart,
                            guess=self.guess,
                            norb=self._norb,
                            vec=self._vec,
                            mult   = self.mult,
                            icharg = self.icharg,
                            gbasis=self.gbasis,
                            ngauss=self.ngauss)
        self._run_gms(inputtxt)
        e = self._parse_energy_HF()
        homo = self._parse_homo_energy()
        self._clean()
        return e,homo



[docs]
    def energy_HF_KOOPMANS(self, X, Time=0.0, **opts):
        """
        Hartree-Fock energy calculation. 
        Returns the HF energy plus energy of the excited states by Koopmans'
        theorem. 

        E(1) = -E_HOMO
        E(2) = -E_HOMO-1 etc.

        X   -- 3N array of atomic positions, in Bohr (au)
        """
        t = _input_template_HF_KOOPMANS()
        xcart = _format_coordinates(X * conv.au2an, self.atomicSymbols, self.atomicNumbers)
#        if self._counter > 0:
#            self.guess = 'MOREAD'
        inputtxt = t.format(memddi=self.memddi,
                            mwords=self.mwords,
                            xcart=xcart,
                            guess=self.guess,
                            norb=self._norb,
                            vec=self._vec,
                            mult   = self.mult,
                            icharg = self.icharg,
                            gbasis=self.gbasis,
                            ngauss=self.ngauss)
        self._run_gms(inputtxt)
        moe = self._parse_mo_energy()
        moe = -moe[::-1]
        moe = moe[:self.nstate]
        self._clean()
        return moe



[docs]
    def energy_gradient_HF(self, X, Time=0.0, **opts):
        """
        Hartree-Fock energy and gradient calculation. Optionally with electric fields.

        This function is useful for ground state dynamics in the presence of electric
        fields of (in principle) arbitrary strength that are non-ressonant with
        electronic transitions but which affect nuclear motion. The electronic structure
        is solved with explicit inclusion of the electric field (if present) by
        gamess-us.

        Input:
        X    -- 3N array of atomic positions, in Bohr (au).
        Time -- current time in au.

        Output:
        e    -- energy obtained from Gamess (in Hartree).
        g    -- 3N array of gradient obtained from Gamess (in Hartree / Bohr).
        """
        t = _input_template_HF_EGR_EF()
        xcart = _format_coordinates(X * conv.au2an, self.atomicSymbols, self.atomicNumbers)
        if self.efields:
            efieldtxt = _format_efields(self.efields,Time)
        else:
            efieldtxt = '\n'
        if self._counter > 0:
            self.guess = 'MOREAD'
        inputtxt = t.format(efield  = efieldtxt,
                            memddi  = self.memddi,
                            mwords  = self.mwords,
                            xcart   = xcart,
                            guess   = self.guess,
                            norb    = self._norb,
                            vec     = self._vec,
                            mult    = self.mult,
                            icharg  = self.icharg,
                            gbasis  = self.gbasis,
                            ngauss  = self.ngauss)
        self._run_gms(inputtxt)
        e = self._parse_energy_HF()
        g = self._parse_gradient_HF()
        self._clean()
        return e,g



[docs]
    def energy_CIS(self, X, **opts):
        """
        Calculate energy of excited states at the configuration interaction 
        singles (CIS) level of theory (zero-th order accuracy)

        X - nuclear coordinates (AU)
        """
        t = _input_template_CIS_E()
        xcart = _format_coordinates(X * conv.au2an, self.atomicSymbols, self.atomicNumbers)
        inputtxt = t.format(memddi = self.memddi,
                            mwords=self.mwords,
                            xcart=xcart,
                            guess=self.guess,
                            norb=self._norb,
                            vec=self._vec,
                            mult=self.mult,
                            icharg=self.icharg,
                            gbasis=self.gbasis,
                            ngauss=self.ngauss)
        self._run_gms(inputtxt)
        e = self._parse_energy_CIS()
        self._clean()
        return e




[docs]
    def mcscf(self, X, Time=0.0, **opts):
        """
        MCSCF field calculation

        It provides the energy and gradient of the ground or an excited state
        at the mcscf level of theory
        TODO OV: parsing of the energy does not work properly if only one state (the
        GS) is calculated. This should be eventually corrected.

        Input:
        X    -- 3N array of atomic positions, in Bohr (au).
        Time -- current time in au.

        Output:
        e    -- array of M energies obtained from Gamess (in Hartree).
        g    -- Array of M 3N arrays of gradient obtained from Gamess (in Hartree / Bohr).
        """
        t = _input_template_MCSCF()
        xcart = _format_coordinates(X * conv.au2an, self.atomicSymbols, self.atomicNumbers)
        if self.efields:
            efieldtxt = _format_efields(self.efields,Time)
        else:
            efieldtxt = '\n'
        if self._counter > 0:
            self.guess = 'MOREAD'
        inputtxt = t.format(
                            runtyp = 'gradient',
                            guess  = self.guess,
                            norb=self._norb,
                            vec=self._vec,
                            memddi = self.memddi,
                            mwords = self.mwords,
                            xcart  = xcart,
                            gbasis = self.gbasis,
                            ngauss = self.ngauss,
                            mult   = self.mult,
                            icharg = self.icharg,
                            ncore  = self.ncore,
                            nact   = self.nact,
                            nels   = self.nels,
                            iroot  = self.iroot,
                            nstate = self.nstate,
                            mxxpan = self.mxxpan,
                            fullnr = self.fullnr,
                            sz     = self.sz,
                            wstate = self.wstate,
                            efield = efieldtxt,
                            cptol  = CPTOL,
                            maxit  = MAXIT
                           )
        self._run_gms(inputtxt)
        e = self._parse_mcscf_energy()
        g = self._parse_nacme_gradient()
        self._clean()
        return e,g




[docs]
    def mcqdpt(self, X, Time=0.0, **opts):
        t = _input_template_MCQDPT()
        xcart = _format_coordinates(X * conv.au2an, self.atomicSymbols, self.atomicNumbers)
        if self.efields:
            efieldtxt = _format_efields(self.efields,Time)
        else:
            efieldtxt = '\n'
        if self._counter > 0:
            self.guess = 'MOREAD'
        inputtxt = t.format(
                            runtyp = 'energy',
                            guess  = self.guess,
                            norb=self._norb,
                            vec=self._vec,
                            memddi = self.memddi,
                            mwords = self.mwords,
                            xcart  = xcart,
                            gbasis = self.gbasis,
                            ngauss = self.ngauss,
                            mult   = self.mult,
                            icharg = self.icharg,
                            ncore  = self.ncore,
                            nact   = self.nact,
                            nels   = self.nels,
                            iroot  = self.iroot,
                            nstate = self.nstate,
                            mxxpan = self.mxxpan,
                            sz     = self.sz,
                            wstate = self.wstate,
                            nel     = self.nel,
                            nmoact  = self.nmoact,
                            nmofzc  = self.nmofzc,
                            nmodoc  = self.nmodoc,
                            nmofzv  = self.nmofzv,
                            kstate  = self.kstate,
                            edshft = self.edshft,
                            efield = efieldtxt,
                            cptol  = CPTOL,
                            maxit  = self.maxit
                           )
        
        self._run_gms(inputtxt)
        e = self._parse_mcqdpt_energy()
        self._clean()
        return e

                            


[docs]
    def nacme(self, X, Time=0.0, **opts):
        """
        MCSCF field calculation followed by a calculation of the NAC vector

        It provides the energy, gradient on each state, nacme vector between
        states and dipola matrix elements.

        Input:
        X    -- 3N array of atomic positions, in Bohr (au).
        Time -- current time in au.

        Output:
        e    -- array of M energies obtained from Gamess (in Hartree).
        g    -- Array of M 3N arrays of gradient obtained from Gamess (in Hartree / Bohr).
        d    -- Array of MxM 3N arrays of non-adiabatic couplings in au.
        m    -- Array of MxMx3 transition dipole matrix elements in Debye.
        """
        t = _input_template_MCSCF()
        xcart = _format_coordinates(X * conv.au2an, self.atomicSymbols, self.atomicNumbers)
        if self.efields:
            efieldtxt = _format_efields(self.efields,Time)
        else:
            efieldtxt = '\n'
        if self._counter > 0:
            self.guess = 'MOREAD'
        inputtxt = t.format(
                            runtyp = 'nacme',
                            guess  = self.guess,
                            norb=self._norb,
                            vec=self._vec,
                            memddi = self.memddi,
                            mwords = self.mwords,
                            xcart  = xcart,
                            gbasis = self.gbasis,
                            ngauss = self.ngauss,
                            mult   = self.mult,
                            icharg = self.icharg,
                            ncore  = self.ncore,
                            nact   = self.nact,
                            nels   = self.nels,
                            iroot  = self.iroot,
                            nstate = self.nstate,
                            mxxpan = self.mxxpan,
                            fullnr = self.fullnr,
                            sz     = self.sz,
                            wstate = self.wstate,
                            efield = efieldtxt,
                            cptol  = CPTOL,
                            maxit  = MAXIT
                           )
        
        self._run_gms(inputtxt)
        e = self._parse_nacme_energy()
        g = self._parse_nacme_gradient()
        d = self._parse_nacme_nacme()
        m = self._parse_nacme_dipole()
        nac = np.zeros((g.shape[0], g.shape[0], g.shape[1]))
        for key,value in d.items():
            nac[key[0], key[1], :] = value
        self._clean()
        return e,g,nac,m



[docs]
    def hessian(self, X, **opts):
        """
        Calculation of the Hessian matrix

        Input:
        X - 3N array of atomic positions [a.u.]

        Output:
        H - 3Nx3N array of Hessian in a.u.
        """
        t = _input_template_HESSIAN()
        xcart = _format_coordinates(X * conv.au2an, self.atomicSymbols, self.atomicNumbers)
        inputtxt = t.format(
                            icharg = self.icharg,
                            mult = self.mult,
                            memddi = self.memddi,
                            mwords = self.mwords,
                            gbasis = self.gbasis,
                            ngauss = self.ngauss,
                            guess = self.guess,
                            norb = self._norb,
                            xcart = xcart,
                            vec = self._vec
                           )
        self._run_gms(inputtxt)
        H = self._parse_hessian()
        self._clean()
        return H



[docs]
    def optimize(self, X, **opts):
        """
        Returns the optimized geometry [XYZ, Angstrom]

        Input:
        X - 3N array of initial atomic positions [a.u.]

        Output:
        X0 - 3N array of optimized atomic positions [a.u.]
        """
        t = _input_template_OPTIMIZE()
        xcart = _format_coordinates(X * conv.au2an, self.atomicSymbols, self.atomicNumbers)
        inputtxt = t.format(
                            memddi = self.memddi,
                            mwords = self.mwords,
                            gbasis = self.gbasis,
                            ngauss = self.ngauss,
                            guess = self.guess,
                            norb = self._norb,
                            icharg = self.icharg,
                            mult = self.mult,
                            xcart = xcart
                        )
        self._run_gms(inputtxt)
        X0 = self._parse_equilibrium_geometry()
        self._clean()
        return X0



[docs]
    def read_input_template(self,fname):
        """
        This function reads in a template for setting up a Gamess calculation.

        Input:
        fname --- Name of the file (including path) that contains the template.
        """
        ftmpl = open(fname,'r')
        self.itemplate = ftmpl.read()
        ftmpl.close()




[docs]
    def efield(self,t=0.0):
        """
        Return array with x,y,z components of the electric fields.

        Input:
        t  --- current time in au.
        
        Output:
        E  --- 3 vector of the current value of the electric field in au.
        """
        E = np.zeros(3,float)
        for ef in self.efields:
            E = E + ef.P*ef.A(t)
        return E




[docs]
    def dt_efield(self,t=0.0,Dt=0.1):
        """
        Return array with x,y,z components of dE/dt (field time derivative)
        obtained from a finite difference calculation.

        Input:
        t  --- current time in au.
        Dt --- step length for finite differences in au.
        
        Output:
        dEdt  --- 3 vector of the current value of the derivative of the electric field in au.
        """
        dEdt = np.zeros(3, float)
        for ef in self.efields:
            Em = ef.P*ef.A(t-0.5*Dt)
            Ep = ef.P*ef.A(t+0.5*Dt)
            dEdt = dEdt + (Ep-Em)/Dt
        return dEdt




[docs]
    def run(self, X, **opts):
        """
        Run a Gamess-US calculation

        gamess.itemplate must be assigned
        """
        if not self.itemplate:
            raise ValueError('gamess.itemplate not defined')
        vec = opts.get('vec','') # vec data from a previous run
        xcart = _format_coordinates(X * conv.au2an, self.atomicSymbols, self.atomicNumbers)
        inputtxt = self.itemplate.format(xcart=xcart,
                                           vec=vec)
        self._run_gms(inputtxt)



    def _run_gms(self,inputtxt):
        self._clean()
        finp = open(self._inp,'w')
        finp.write(inputtxt)
        finp.close()
        rungms = self.pathgms + '/rungms'
        command = '{0} {1} >{2} 2>&1'.format(rungms,self._inp,self._log)
        os.system(command)

        self._vec,self._norb = self._parse_vec()
        self._store_log()
        self._store_dat()
        self._store_inp()
        self._counter += 1
        self._parse_execution_error()


    def _store_log(self):
        """ 
        Function to save log files from a run.
        """
        if self.is_storelog:
            cstr = '{0:0>7d}'.format(self._counter)
            if self.keepdir:
                dst = self.keepdir + '/' + cstr + '_' + self._log
            else:
                dst = cstr + '_' + self._log
            shutil.copy(self._log,dst)


    def _store_dat(self):
        """ 
        Function to save dat files from a run.
        """
        if self.is_storedat:
            cstr = '{0:0>7d}'.format(self._counter)
            if self.keepdir:
                dst = self.keepdir + '/' + cstr + '_' + self._dat
            else:
                dst = cstr + '_' + self._dat
            shutil.copy(self._dat,dst)


    def _store_inp(self):
        """ 
        Function to save input files from a run.
        """

        if self.is_storeinp:
            cstr = '{0:0>7d}'.format(self._counter)
            if self.keepdir:
                dst = self.keepdir + '/' + cstr + '_' + self._inp
            else:
                dst = cstr + '_' + self._inp
            shutil.copy(self._inp,dst)



    def _clean(self):
        """ 
        Function to remove input, dat and log files.
        """

        # clean scratch area from old calculations
        extensions = ['.inp','.dat','.log']
        for e in extensions:
            fname = self._ID + e
            if os.path.isfile(fname):
                try:
                    os.unlink(fname)
                except:
                    sys.stderr.write(fname+' could not be deleted')


    def _parse_energy_HF(self):
        """ 
        Function to parse energy of a Hartree-Fock calculation from a log file.

        Output:
        e  --- parsed energy in au.
        """

        e = None
        try:
            flog = open(self._log,'r')
        except:
            sys.stderr.write(flog+' file could not be opened')
        patt = r'\s+TOTAL ENERGY =\s+([-+]?[0-9.]+)$'
        for l in flog.readlines():
            match = re.search(patt,l)
            if match:
                e = float(match.groups()[0])
                break
        flog.close()
        if e:
            return e
        else:
            raise ValueError('TOTAL ENERGY not found in '+self._log+' file')

    def _parse_homo_energy(self):
        patt1 = r'^\s*EIGENVALUES\s*$'
        patt2 = r'^\s*...... END OF RHF CALCULATION ......\s*$'
        patt3 = '(-[0-9]+\.[0-9]+)'

        evals = [] # HOMO is highest non-positive eigenvalue

        with open(self._log) as f:
            line = f.readline()
            while line:
                if re.search(patt1,line):
                    line = f.readline()
                    while not re.search(patt2,line):
                        m = re.findall(patt3,line)
                        if m:
                            evals.append(m)
                        line = f.readline()
                line = f.readline()
            f.close()
        if evals:
            evals = list(map(float, np.asarray(evals).flatten()))
            return evals[-1]
        else:
            raise ValueError('HOMO energy not found in ' + self._log)

    def _parse_mo_energy(self):
        patt1 = r'^\s*EIGENVALUES\s*$'
        patt2 = r'^\s*...... END OF RHF CALCULATION ......\s*$'
        patt3 = '(-[0-9]+\.[0-9]+)'
        patt4 = r'^\s*(ENERGY|DENSITY) CONVERGED\s*$'

        evals = [] 
        density_converged = False

        with open(self._log) as f:
            line = f.readline()
            while line:
                if re.search(patt4, line):
                    density_converged = True
                elif re.search(patt1,line):
                    line = f.readline()
                    while not re.search(patt2,line):
                        m = re.findall(patt3,line)
                        for mm in m:
                            evals.append(float(mm))
                        line = f.readline()
                line = f.readline()
            f.close()
        if density_converged:
            if evals:
                return np.asarray(evals)
            else:
                raise ValueError('MO energy not found in ' + self._log)
        else:
            raise ValueError('Density not converged in ' + self._log)

    def _parse_energy_CI(self):
        """ 
        Function to parse energoes of a CI calculation from a log file.

        Output:
        e  --- array of parsed energies in au.
        """

        e = []
        try:
            flog = open(self._log,'r')
        except:
            sys.stderr.write(flog+' file could not be opened')
        patt = r'\s+STATE\s+[0-9]+\s+ENERGY=\s+([-+]?[0-9.]+)'
        for l in flog.readlines():
            match = re.search(patt,l)
            if match:
                e.append( float(match.groups()[0]) )
        flog.close()
        if e:
            return e
        else:
            raise ValueError('TOTAL ENERGY not found in '+self._log+' file')

    def _parse_mcqdpt_energy(self):
        e = []
        try:
            flog = open(self._log,'r')
        except:
            sys.stderr.write(flog+' file could not be opened')
        patt = r'\s+STATE #\s+[0-9]+\s+ENERGY =\s+([-+]?[0-9.]+)'
        for l in flog.readlines():
            match = re.search(patt,l)
            if match:
                e.append( float(match.groups()[0]) )
        flog.close()
        if e:
            return e
        else:
            raise ValueError('TOTAL ENERGY not found in '+self._log+' file')



    def _parse_mcscf_energy(self):
        """
        Function to parse energies of a MCSCF calculation from a log file.

        Output:
        e --- array of parsed energies in au.
        """
        regexp = r'^ ALL STATES AVERAGED IN THE MCSCF ARE DEGENERATE'
        matches = uti.fileLineParser(self._log,regexp,[float])
        if len(matches)>0:
            regexp = r'^ FINAL MCSCF ENERGY IS\s+([-+]?[0-9.]+)'
            matches = uti.fileLineParser(self._log,regexp,[float])
            mcscf_energy = matches[0][0]
            energies = [mcscf_energy] * int(self.nstate)
            print(self.nstate)
            #for i in range(self.nstate):
            #    energies.append(mcscf_energy)
        else:
            regexp = r'^ ENERGY=\s+([-+]?[0-9.]+)'
            matches = uti.fileLineParser(self._log,regexp,[float])
            energies = []
            for m in matches:
                energies.append( m[0] )
        return energies

    def _parse_nacme_energy(self):
        """ 
        Function to parse energy of a non-adiabatic coupling calculation from a log file.

        Output:
        e  --- array of parsed energies in au.
        """

        regexp = r' COMPUTING NACME FOR STATE WITH E=\s+([-+]?[0-9.]+)'
        matches = uti.fileLineParser(self._log,regexp,[float])
        energies = []
        for m in matches:
            energies.append( m[0] )

        return energies


    def _parse_gradient_HF(self):
        """ 
        Function to parse gradient of a Hartree-Fock calculation from a log file.

        Output:
        g  --- 3N array of parsed gradient in au.
        """

        g = []
        try:
            flog = open(self._log,'r')
        except:
            sys.stderr.write(flog+' file could not be opened')
        patt1 = r'(\sUNITS ARE HARTREE/BOHR)'
        patt2 = r'\s+[0-9]+\s+[A-Za-z]+\s+([-+]?[0-9.]+)\s+([-+]?[0-9.]+)\s+([-+]?[0-9.]+)$'
        line = flog.readline()
        while line:
            match1 = re.search(patt1,line)
            if match1: # start of gradients found
                line = flog.readline()
                while line != '\n':
                    match2 = re.search(patt2,line)
                    g.append( np.array( list( map(float,match2.groups()) ) ) )
                    line = flog.readline()
                g = np.asarray(g).flatten()  # 3N array with 1st derivatives
                break
            line = flog.readline()
        flog.close()
        return g


    def _parse_nacme_gradient(self):
        """ 
        Function to parse gradient of a non-adiabatic coupling calculation from a log file.

        Output:
        g  --- Array of M 3N array of parsed gradient in au.
        """

        g = []
        patt1 = r'(\sUNITS ARE HARTREE/BOHR)'
        patt2 = r'\s+[0-9]+\s+[A-Za-z]+\s+([-+]?[0-9.]+)\s+([-+]?[0-9.]+)\s+([-+]?[0-9.]+)$'
        with open(self._log) as flog:
            line = flog.readline()
            while line:
                match1 = re.search(patt1,line)
                if match1: # start of gradients block found
                    gradS = _parse_gradient_block(flog)
                    g.append( gradS )
                line = flog.readline()

        regexp = r'^ ALL STATES AVERAGED IN THE MCSCF ARE DEGENERATE'
        matches = uti.fileLineParser(self._log,regexp)
        if len(matches)>0 and len(g)>0:
            g = [g[0]] * int(self.nstate)
        g = np.asarray(g)
        flog.close()
        return g


    def _parse_nacme_nacme(self):
        """ 
        Function to parse non-adiabatic couplings from a log file.

        Output:
        g  --- Dictonary with tuple of coupled states as keys and 3N array of non-adiabatic couplings as values in au.
        """

        g = {} # dictionary indexed by tuples to store NACS
        patt1 = r'<STATE\s+([0-9]+), WEIGHT=\s+[0-9.]+ | D/DQ | STATE\s+([0-9]+), WEIGHT=\s+[0-9.]+>'
        with open(self._log) as flog:
            line = flog.readline()
            while line:
                match1 = re.search(patt1,line)
                if match1: # start of gradients block found
                    sI = int( line.split()[1][:-1] ) - 1
                    sJ = int( line.split()[8][:-1] ) - 1
                    flog.readline()
                    nacIJ = _parse_gradient_block(flog)
                    g[(sI,sJ)] = nacIJ
                    if sI != sJ:
                        g[(sJ,sI)] = -nacIJ
                line = flog.readline()
            flog.close()
        return g


    def _parse_nacme_dipole(self):
        """ 
        Function to parse transition dipole couplings from a log file.

        Output:
        g  --- Dictonary with tuple of coupled states as keys and 3 array of transition diple couplings in au.
        """

        d = {} # dictionary indexed by tuples to store transition dipoles
        patt1 = r'^  --- DIPOLE MOMENTS \(DEBYE\) ---'
        patt2 = r'^\s+([0-9]+),\s+([0-9]+)\s+:\s+[A-Z]+\s+([0-9.]+)\s+([0-9.]+)\s+([0-9.]+)'
        with open(self._log,'r') as flog:
            line = flog.readline()
            while line:
                match1 = re.search(patt1,line)
                if match1: # start of dipoles block
                    line = flog.readline()
                    while line:
                        lspl = line.split()
                        if not lspl: break  # break on empty line after dipoles
                        sI = int( lspl[0][:-1] ) - 1
                        sJ = int( lspl[1] ) - 1
                        dvec = np.array( list(map(float,lspl[4:7])) )
                        d[(sI,sJ)] = dvec * conv.de2au
                        if sI != sJ:
                            d[(sJ,sI)] = dvec * conv.de2au
                        line = flog.readline()
                    break
                line = flog.readline()
            flog.close()
        return d


    def _parse_execution_error(self):
        """ 
        Function to check for execution errors in log file.
        """

        patt = r'EXECUTION OF GAMESS TERMINATED -ABNORMALLY-'
        with open(self._log) as flog:
            line = flog.readline()
            while line:
                match = re.search(patt,line)
                if match: # execution error
                    sys.stdout.write(line)
                    sys.stdout.write(' Inspect file '+self._log + '\n')
                    sys.exit(1)
                line = flog.readline()
            flog.close()


    def _parse_vec(self):
        """ 
        Function to parse vec data (containing molecular orbitals) from dat file.

        Output:
        strvec  -- string containing the molecular orbital coefficients as formatted in the dat file.
        norb    -- number of orbitals
        """

        vec = []
        norb = 0
        isSaveLine = False
        with open(self._dat) as fdat:
            line = fdat.readline()
            while line:
                if line[0:5] == ' $VEC':
                    vec = []
                    isSaveLine = True
                if isSaveLine:
                    vec.append(line)
                if line[0:5] == ' $END':
                    isSaveLine = False
                line = fdat.readline()
            fdat.close()
        if vec:
            norb = int( vec[-2].split()[0] )
        strvec = ''.join(vec)
        return strvec,norb

    def _parse_hessian(self):
        """
        Wrapper function to parse Hessian.
        """
        return parse_hessian(self._dat)

    def _parse_energy_CIS(self):
        patt = r'^\s*CONVERGED STATE\s*(\d+) ENERGY\s*=\s*([+-]?[0-9.]+)'
        e = []
        with open(self._log) as flog:
            line = flog.readline()
            while line:
                match = re.search(patt, line)
                if match:
                    e.append(list(map(float, match.groups()))[-1])
                line = flog.readline()
            flog.close()
        e = np.asarray(e)
        return e

    def _parse_equilibrium_geometry(self):
        """
        Funtion to parse optimized geometry from log file.

        Output:
        X0  --- 3N array of optimized geometry in au.
        """
        X0 = []
        patt1 = r'^\s*\*\*\*\*\* EQUILIBRIUM GEOMETRY LOCATED'
        patt2 = r'^ [A-Z]\s+[0-9\.]+\s+([+-]?[0-9\.]+)\s+([+-]?[0-9\.]+)\s+([+-]?[0-9\.]+)\s*$'

        with open(self._log) as flog:
            line = flog.readline()
            while line:
                match1 = re.search(patt1,line)
                if match1: # Enter equilibrium geometry block
                    line = flog.readline()
                    while line.strip():
                        match2 = re.search(patt2,line)
                        if match2:
                            X0.append(list(map(float, match2.groups())))
                        line = flog.readline()
                line = flog.readline()
            flog.close()
        return np.asarray(X0).flatten()


#   def _parse_vec(self):
#       try:
#           fdat = open(self._dat,'r')
#       except:
#           sys.stderr.write(fdat+' file could not be opened')
#       dattxt = fdat.read()
#       fdat.close()
#       p=re.compile(r'( \$VEC\s+$.+\$END)',re.MULTILINE | re.DOTALL)
#       m = re.search(p,dattxt)
#       return m.groups()[0]


#############################################################################
#                             MODULE  FUNCTIONS
#############################################################################

#############################################################################
# Output (log) file parsers
#############################################################################
def _parse_energy_CI_surf(path,ssz=None):
    """
    produce a table of energies and coordinates of a MCSCF/CI surface scan

    path -- path to log file
    ssz --  list with two floats corresponding to S (total spin) and SZ
            (spin projection on the z-axis).
            If given, only states matching this pair of
            values will be extracted
    """
    # Get coordinates and energy values for each state
    regexp0 = r' COORD 1=\s*([-+]?[0-9.]+)'
    regexp1 = r'\s+STATE\s+([0-9]+)\s+ENERGY=\s+([-+]?[0-9.]+)'
    regexp2 = r'\s+STATE\s+([0-9]+)\s+ENERGY=\s+([-+]?[0-9.]+)\s+S=\s+([0-9.]+)\s+SZ=\s+([0-9.]+)'
    xgrid = uti.fileLineParser(path,regexp0,[float])
    energ2 = uti.fileLineParser(path,regexp2,[int,float,float,float])
    # Determine number of scanned points and electronic states
    npoints = len(xgrid)
    nstates = len(energ2)//(npoints+1)
    # Put coordinate and energy values in a table [[x0, e0, e1,...],...]
    table = []
    for i,x in enumerate(xgrid):
        row = []
        row.append( x[0] )
        for j in range((i+1)*nstates,(i+2)*nstates):
            # Check for spin and spin projection restrictions
            if ssz:
                if energ2[j][2] == ssz[0] and energ2[j][3] == ssz[1]:
                    row.append( energ2[j][1] )
            else:
                row.append( energ2[j][1] )
        table.append( row )
    return table

def _parse_gradient_block(fileobj):
    """
    Function to parse a gradient block which is formatted as follows.
    First, the number of the atom.
    Then, the symbol of the atom.
    Then 3 Numbers containing the gradient in x, y, z direction in au.

    The block ends at a empty line (line = '\n') or a line starting with a 
    capital F.

    Input:
    fileobj --- already opened and correctly positioned file object.

    Output:
    g   --- 3N array of the parsed gradient in au.
    """

    g = []
    patt = r'\s+[0-9]+\s+[A-Za-z]+\s+([-+]?[0-9.]+)\s+([-+]?[0-9.]+)\s+([-+]?[0-9.]+)$'
    line = fileobj.readline()
    while line != '\n' and line[1] != 'F':
        match = re.search(patt,line)
        g.append( np.array( list(map(float,match.groups())) ) )
        line = fileobj.readline()
    g = np.asarray(g).flatten()
    return g


[docs]
def parse_hessian(fdatname):
    """ 
    Function to parse a hessian from a dat file. The block containing the Hessian data starts
    with a line containing one whitespace and $HES. Then an irrelevant line follows, then the
    Hessian data (TODO describe format) and the block ends with a line containing one whitespace 
    and $END.

    Input:
    fdatname --- Filename (including path) of the dat file to parse.
    
    Output:
    H        --- 3Nx3N array containing the Hessian in au.
    """


    H = []
    try:
        fdat = open(fdatname,'r')
    except:
        sys.stderr.write(fdat+' file could not be opened')
    patt1 = r'^\s\$HESS'
    patt2 = r'(-?\d\.\d+E[+-]\d{2})'
    patt3 = r'^\s\$END'

    line = fdat.readline()
    while line:
        match1 = re.search(patt1,line)
        if match1:
            line = fdat.readline()
            while not re.search(patt3, line):
                match2 = re.findall(patt2,line)
                if match2:
                    H.extend(list(map(float, match2)))
                line = fdat.readline()
            H = np.asarray(H)
            H = H.reshape(int(np.sqrt(H.size)), int(np.sqrt(H.size)))
            break
        line = fdat.readline()
    fdat.close()
    return H


#############################################################################
# String formaters for different parts of the input file
#############################################################################
def _format_coordinates(x,smb,num):
    """
    return string with formated Cartesian coordinates for $DATA card

    Input:
    x   : 3N array in Angstrom
    smb : List of atomic symbols
    num : List of atomic numbers

    Output:
    xtxt : Correctly formatted string.
    """
    n = len(x)
    xlines = []
    line = ' {0} {1} {2[0]:12.7f} {2[1]:12.7f} {2[2]:12.7f}'
    x.shape = (n//3,3)
    for i,s in enumerate(smb):
        xlines.append( line.format(s,num[i],x[i]) )
    xtxt = '\n'.join(xlines)
    x.shape = (n,)
    return xtxt


def _format_efields(eflist,t):
    """
    return string with formated electric filed input.

    Input:
    eflist --- list containing the electric fields
    t      --- time in au

    Output:
    txt : Correctly formatted string.
    """
    E = np.zeros(3,float)
    tmplt = ' $EFIELD\n   EVEC(1)={0[0]:14.9f} {0[1]:14.9f} {0[2]:14.9f}\n $END\n'
    for ef in eflist:
        E = E + np.asarray(ef.P)*ef.A(t)
    if np.dot(E,E)**0.5 > TINY:
        txt = tmplt.format(E)
    else:
        txt='\n'
    return txt


def _format_wstate(n):
    """
    return string with formated weights string for n states.

    Input:
    n   : number of states

    Output:
    xtxt : Correctly formatted string.
    """
    s=''
    for i in range(n):
        s = s+'1.0,'
    s = s[:-1]
    return s


#############################################################################
# Input templates for different types of calculations
#############################################################################
def _input_template_HF_E_EF():
    """
    Return template (string) for Gamess-US input file
    Templates are filled using its string.format function

    Hartree-Fock Energy/Gradient + EField
    """
    path = CDTK.CDTKPATH + '/Interfaces/'
    tname = path + 'template.gamess_HF_E_EF'
    with open (tname, "r") as myfile:
        t=myfile.read()
        myfile.close()
    return t

def _input_template_HF_E_EF_HOMO():
    """
    Return template (string) for Gamess-US input file
    Templates are filled using its string.format function

    Hartree-Fock Energy/Gradient + HOMO energy + EField
    """
    path = CDTK.CDTKPATH + '/Interfaces/'
    tname = path + 'template.gamess_HF_E_EF_HOMO'
    with open (tname, "r") as myfile:
        t=myfile.read()
        myfile.close()
    return t

def _input_template_HF_KOOPMANS():
    """
    Return template (string) for Gamess-US input file
    Templates are filled using its string.format function

    HF Energy plus Koopmans' excited state energies
    """
    path = CDTK.CDTKPATH + '/Interfaces/'
    tname = path + 'template.gamess_HF_KOOPMANS'
    with open (tname, "r") as myfile:
        t=myfile.read()
        myfile.close()
    return t

def _input_template_HF_EGR_EF():
    """
    Return template (string) for Gamess-US input file
    Templates are filled using its string.format function

    Hartree-Fock Energy/Gradient + EField
    """
    path = CDTK.CDTKPATH + '/Interfaces/'
    tname = path + 'template.gamess_HF_EGR_EF'
    with open (tname, "r") as myfile:
        t=myfile.read()
        myfile.close()
    return t


def _input_template_MCSCF():
    """
    Return template (string) for Gamess-US input file
    Templates are filled using its string.format function

    MCSCF Energy + Gradient + NACME + Dipole
    """
    path = CDTK.CDTKPATH + '/Interfaces/'
    tname = path + 'template.gamess_MCSCF'
    with open (tname, "r") as myfile:
        t=myfile.read()
        myfile.close()
    return t


def _input_template_MCQDPT():
    path = CDTK.CDTKPATH + '/Interfaces/'
    tname = path + 'template.gamess_MCQDPT'
    with open (tname, "r") as myfile:
        t=myfile.read()
        myfile.close()
    return t


def _input_template_HESSIAN():
    """
    Return template (string) for Gamess-US input file
    Templates are filled using its string.format function

    HESSIAN
    """
    path = CDTK.CDTKPATH + '/Interfaces/'
    tname = path + 'template.gamess_HESSIAN'
    with open(tname, 'r') as myfile:
        t = myfile.read()
        myfile.close()
    return t

def _input_template_OPTIMIZE():
    """
    Return template (string) for Gamess-US input file
    Templates are filled using its string.format function

    OPTIMIZE
    """
    path = CDTK.CDTKPATH + '/Interfaces/'
    tname = path + 'template.gamess_OPTIMIZE'
    with open(tname, 'r') as myfile:
        t = myfile.read()
        myfile.close()
    return t

def _input_template_CIS_E():
    """
    Return template (string) for Gamess-US input file
    Templates are filled using its string.format function

    CIS_E
    """
    path = CDTK.CDTKPATH + '/Interfaces/'
    tname = path + 'template.gamess_CIS_E'
    with open(tname, 'r') as myfile:
        t = myfile.read()
        myfile.close()
    return t


#############################################################################
# Quick standalone test code
#############################################################################
if __name__ == '__main__':
   gi = gamess()
   gi.atomicSymbols = ['O','H','H']
   gi.atomicNumbers = ['8.0','1.0','1.0']
   x=np.array([1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0])
   ef = efi.EField()
   ef.P = np.array([1,0,0],float)
   def a(t): return 0.01
   ef.A = a
   gi.efields = [ef]
   e,g = gi.energy_gradient_HF(x,Time=10.0)
   sys.stdout.write('\n'+str(e)+'\n')
   sys.stdout.write(str(g)+'\n')