#!/usr/bin/env python
#* **************************************************************************
#*
#* CDTK, Chemical Dynamics Toolkit
#* A modular system for chemical dynamics applications and more
#*
#* Copyright (C) 2011, 2012, 2013, 2014, 2015, 2016
#* Oriol Vendrell, DESY, <oriol.vendrell@desy.de>
#*
#* Copyright (C) 2017, 2018, 2019
#* Ralph Welsch, DESY, <ralph.welsch@desy.de>
#*
#* Copyright (C) 2020, 2021, 2022, 2023
#* Ludger Inhester, DESY, ludger.inhester@cfel.de
#*
#* This file is part of CDTK.
#*
#* CDTK is free software: you can redistribute it and/or modify
#* it under the terms of the GNU General Public License as published by
#* the Free Software Foundation, either version 3 of the License, or
#* (at your option) any later version.
#*
#* This program is distributed in the hope that it will be useful,
#* but WITHOUT ANY WARRANTY; without even the implied warranty of
#* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#* GNU General Public License for more details.
#*
#* You should have received a copy of the GNU General Public License
#* along with this program. If not, see <http://www.gnu.org/licenses/>.
#*
#* **************************************************************************
import sys
import itertools
import re
import pickle
import shlex
from scipy import interpolate
from optparse import OptionParser
import numpy as np
import numpy.linalg as la
import CDTK.Tools.Utils as uti
import CDTK.Tools.Conversion as conv
# This program does several different analysis schemes for a given set of trajectories
# and also calculated the associated fourier spectrum (TODO: fourier trafor to be implemented!).
#
# The trajectories need to be saved with the -p command from xpyder (pickle).
#
# The analysis is based on the calculation of correlation functions of the type
# <A B(t)>, where A and B correspond to two 'observables'. Setting 'A' to be the
# identity also expectation values can be calculated. One can also request the
# calculation of said correlation function at a given timestep (i.e. the first one, the
# last one, ...). More details in the help text.
#
# The final data can be analysed in several ways. The mean value averaged over all
# trajectories can be calculated as well as other functions (standard deviation, percentile, ...).
# Alternatively, the quantity calculated can be histogrammed with different ways to define the
# histograms. More details in the help text.
# For all values obtained the standard error of the respective quantity is calculated as
# the standard deviation of that quantity obtained from bootstrapping!
# The 'commands' tuple holds most of the definitions needed:
# 0: string defining observable A
# 1: string defining observable B
# 2: string defining the value used for projection
# 3: string defining the operation for combining A and B
# 4: string requesting histograms and defining the bins to be used
# 5: string defining the function to be calculated of the given values (mean, std, ...)
# 6: string defining the output file
[docs]
def getBins(doHis, nvalues, correl):
"""
This function creates a set of bins for histogramming the data calculated.
Input:
TODO ---
doHis --- either an integer number that defines the number of bins to be used,
0 for integer steps (in both cases upper and lower bounds are chosen as highest and
lowest value), or three numbers defining lower bound, upper bound as well as step
or a valid command for numpy.histogram
nvalues --- number of values calculated
correl --- correlation function values calculated
Output:
bins --- array holding the definition of the bins
"""
# arrays to hold the lower and upper bounds
minb = np.zeros(nvalues)
maxb = np.zeros(nvalues)
bins = []
# Iterate over all values calculated
for k in range(nvalues):
# check the string defining the type of bins requested
vals = doHis.split(',')
# 1 part -> get number of bins requested
if len(vals) == 1:
try:
n = int(vals[0])
except ValueError:
n = None
# do integer bins; step of bins = 1
if n == 0:
### TODO get these numbers
minb[k] = int(np.floor(np.nanmin(correl[:, :, k])))-0.5
maxb[k] = int(np.ceil(np.nanmax(correl[:, :, k])))+0.51
bins.append(np.arange(minb[k], maxb[k]))
# do certain number of bins with upper and lower bound given by the data
elif n > 0:
# do not take zeros into account
if nozero == True:
try:
minb[k] = np.nanmin(np.take(correl[:, :, k], correl[:, :, k].flatten().nonzero())) - 1e-7
except ValueError:
minb[k] = -1e-7
try:
maxb[k] = np.nanmax(np.take(correl[:, :, k], correl[:, :, k].flatten().nonzero())) + 1e-7
except ValueError:
maxb[k] = 1e-7
# do take zeros into account
else:
minb[k] = np.nanmin(correl[:, :, k]) - 1e-7
maxb[k] = np.nanmax(correl[:, :, k]) + 1e-7
# create set of bins
bins.append(np.linspace(minb[k], maxb[k], int(doHis)+1))
# this should be a reasonable string for np.histogram
else:
bins.append(vals[0])
# 3 parts -> minimum value, maximum value and step size
elif len(vals) == 3:
minb[k] = float(vals[0])
maxb[k] = float(vals[1]) + float(vals[2])
bins.append(np.arange(minb[k], maxb[k], float(vals[2])))
return bins
[docs]
def bootstrap(correl, doHis, func=np.nanmean, nbootstrap=1000, nozero=False):
"""
This function performs bootstraping to obtain meaningful variances for the calculated
quantities. Either a histogram or a certain quantity (i.e. mean, std) given as a
function.
Input:
correl --- an array of values to perform bootstrapping on; NaN values are ignored
doHis --- either an integer number that defines the number of bins to be used,
0 for integer steps (in both cases upper and lower bounds are chosen as highest and
lowest value), or three numbers defining lower bound, upper bound as well as step
or a valid command for numpy.histogram
func --- function defining the value to be calculated (i.e. mean, std, etc.)
nbootstrap --- number of bootstrapping steps
nzero --- if zero values should be omitted for bin calculation
Output:
mean --- mean values of the quantity requested (i.e. mean or histogram)
sigma --- standard errors for the quantities (i.e. standard deviations of the mean or bin value)
bins --- definition of bins if histogramming is used
ntraj --- number of trajectories used for each value; i.e. number of values not NaN or Inf
"""
# reshape for better processing
if len(correl.shape) == 2:
nsamples, nsteps = correl.shape
nvalues = 1
else:
nsamples, nsteps, nvalues = correl.shape
correl = correl.reshape(nsamples, nsteps, nvalues)
# calculate the number of trajectories that do not have NaN or Inf for each timestep and values
ntraj = np.zeros((nsteps, nvalues), dtype=int)
for j in range(nsteps):
for k in range(nvalues):
if type(correl[:, j, k][0]) == np.ndarray:
ntraj[j, k] = len(correl[:, j, k])
else:
ntraj[j, k] = len(correl[np.isfinite(correl[:, j, k])])
# set up structure for means
values = np.zeros((nsteps, nbootstrap, nvalues)).tolist()
# create bins for histogram
bins = []
if doHis != None:
bins = getBins(doHis, nvalues, correl)
# iterate as often as given
for i in range(nbootstrap):
# get indices for bootstrap resampling; 1 bootstrap iteration = no random sampling!
if nbootstrap == 1:
resample_i = np.array(list(range(nsamples)))
else:
resample_i = np.random.randint(nsamples, size=nsamples)
# for each time-step calculate quantities for all values
for j in range(nsteps):
if doHis != None:
values[j][i] = []
# iterate over all calculated values
for k in range(nvalues):
# do histogram
if doHis != None:
r_correl = correl[resample_i, j, k]
if type(r_correl[0]) == np.ndarray:
r_correl = np.concatenate( r_correl, axis=0 )
else:
r_correl = r_correl[np.isfinite(r_correl)]
nt = float(len(r_correl))
val, bi = np.histogram(r_correl, bins=bins[k], density=False)
values[j][i] += (val/nt).tolist()
bins[k] = bi
# call given function (mean, std, ...)
else:
values[j][i][k] = func(correl[resample_i, j, k])
values = np.array(values).reshape((nsteps, nbootstrap, -1))
nvalues = values.shape[2]
# calculate bootstraped mean of quantity requested and corresponding std. deviation
mean = np.zeros((nsteps, nvalues)).tolist()
sigma = np.zeros((nsteps, nvalues)).tolist()
mean = np.mean(values, axis=1)
sigma = np.std(values, axis=1)
return np.array(mean), np.array(sigma), bins, ntraj
[docs]
def red(c0, c, redString):
"""
This function calculates the correlation between two values at time 0 and t.
The correlation might be calculated for a single value by multiplication, for
a set of values in an array as an elemnt-wise multiplication (i.e. [c0[0]*c[0], c0[1]*c[1], ...)
or for vector-valued quantities by a dot product. Other options might be needed
here.
Input:
c0 --- value or values at time 0
c --- value or values at time t
redString --- definition on how to combine the values.
'dot' does a dot-product of both values
'mult' does a multiplication; element-wise (i.e. [c0[0]*c[0], c0[1]*c[1], ...)
if arrays are given.
Output:
The connected value or values.
"""
if redString == 'dot':
return np.dot(c0, c)
elif redString == 'mult':
return c0 * c
else:
print('Reduction string invalid. redString = ', redStrin, ' not implemented.')
exit(-1)
[docs]
def angle(optionsString, pValue, traj, i, val):
"""
Does perform operations related to angles. If no options given it will not work.
Valid options are as follows:
-1 <a>, -2 <b>, -3 <b> given: calculate the angle between the the sites. A site can be defined as an
atom by its number or as a center of mass by giving m and a comma separated
list of atoms, i.e., -1 m,0,3,4. Optionally, the third argument can also be
a space fixed axis given as -3 s,0,0,1 and then the angle between the vector
given by the first two sites and that axis is calculated.
-p <a> given: this option projects it onto a certain range. Valid are below a value (<,A) or above a value
(>,A) or fully bound ranges (A,<,B) - also several ranges can be given as A,<,B,C,<,D, .. .
If within the given value the function returns 1.0, else the given pValue or NaN if -v is given.
-v given: Requests NaN values for projection.
-n given: Normalization for angle projections required.
Input:
optionsString --- Input flags given
pValue --- value used for projections
traj --- trajectory object to analyze
i --- time step given as integer
val --- value from previous operation
Output:
values obtained from the position operation combined with old value.
"""
obsparser = OptionParser(usage="Options for +angle or +ang", add_help_option=False)
obsparser.add_option('-h', '--help',
dest='help',
action='store_true',
default=False,
help='Prints this help page.')
obsparser.add_option('-1', '--angle1',
dest='ang1',
type='str',
default=None,
help='An angle between the three given (d1, d2 and d3) sites should be calculated. A site can be defined as an atom by its number or as a center of mass by giving m and a set of atoms by their numbers with comma sepatation.')
obsparser.add_option('-2', '--angle2',
dest='ang2',
type='str',
default=None,
help='An angle between the three given (d1, d2 and d3) sites should be calculated. A site can be defined as an atom by its number or as a center of mass by giving m and a set of atoms by their numbers with comma sepatation.')
obsparser.add_option('-3', '--angle3',
dest='ang3',
type='str',
default=None,
help='An angle between the three given (d1, d2 and d3) sites should be calculated. A site can be defined as an atom by its number or as a center of mass by giving m and a set of atoms by their numbers with comma sepatation. The third site can also be given by a space fixed vector given by an s and then the three components.')
obsparser.add_option('-p', '--project',
dest='proj',
type='str',
default=None,
help='Take the angle given and project onto a certain range. Valid are fully bound ranges (A,<,B), or below a value (<,A) or above a value (>,A).')
obsparser.add_option('-n', '--normalize',
dest='doNorm',
action='store_true',
default=False,
help='If normalization is required')
obsparser.add_option('-v', '--nanvalue',
dest='doNan',
action='store_true',
default=False,
help='If only this projection is supposed to give Nans.')
opts, args = obsparser.parse_args(optionsString.split())
if opts.help == True:
obsparser.print_help()
return 0.0
cval = None
# obtain first site:
vals = opts.ang1.split(',')
# center of mass
if vals[0] == 'm':
coord1 = traj.getCOM([int(v) for v in vals[1:]], i)
else:
n1 = int(vals[0])
coord1 = traj.logR[i][n1*3:(n1+1)*3]
# obtain second site:
vals = opts.ang2.split(',')
# center of mass
if vals[0] == 'm':
coord2 = traj.getCOM([int(v) for v in vals[1:]], i)
else:
n2 = int(vals[0])
coord2 = traj.logR[i][n2*3:(n2+1)*3]
# obtain third site:
vals = opts.ang3.split(',')
# center of mass
if vals[0] == 'm':
coord3 = traj.getCOM([int(v) for v in vals[1:]], i)
# second vector given as third site!
elif vals[0] == 's':
coord3 = None
vec2 = np.array(vals[1:4], dtype=float)
else:
n2 = int(vals[0])
coord2 = traj.logR[i][n2*3:(n2+1)*3]
# get first vector:
vec1 = coord1 - coord2
norm1 = np.linalg.norm( vec1 )
# get second vector
if coord3 != None:
vec2 = coord3 - coord2
norm2 = np.linalg.norm( vec2 )
# get angle
dp = np.dot(vec1, vec2)/ norm1 / norm2
angle = np.arccos( np.clip(dp, -1, 1) )
cval = angle
# if we want to project the angle onto a certain interval
if opts.proj != None:
vals = opts.proj.split(',')
# case: above a value; > A
if vals[0] == '>':
if cval > float(vals[1]):
cval = 1.0
else:
if opts.doNan == True:
cval = np.NaN
else:
cval = pValue
# case: below a value; < A
if vals[0] == '<':
if cval < float(vals[1]):
cval = 1.0
else:
if opts.doNan == True:
cval = np.NaN
else:
cval = pValue
if len(vals) == 3:
# case: between value; < A <
if vals[1] == '<':
if cval > float(vals[0]) and cval < float(vals[2]):
cval = 1.0
if opts.doNorm == True:
cval /= (2.0 * np.pi * (np.cos(float(vals[0])) - np.cos(float(vals[2]))) )
else:
if opts.doNan == True:
cval = np.NaN
else:
cval = pValue
# several cases: between value; < A <
elif len(vals) > 3:
for i in range(0, len(vals), 3):
vals2 = vals[i:i+3]
if vals2[1] == '<':
if angle > float(vals2[0]) and angle < float(vals2[2]):
cval = 1.0
if opts.doNorm == True:
cval /= (2.0 * np.pi * (np.cos(float(vals2[0])) - np.cos(float(vals2[2]))) )
break
else:
if opts.doNan == True:
cval = np.NaN
else:
cval = pValue
## Combine with old values
try:
if cval == None or val == None:
return pValue
else:
return cval*val
except:
return cval*val
[docs]
def position(optionsString, pValue, traj, i, val):
"""
Does perform operations related to positions. If no options given it will return None.
Valid options are as follows:
-1 <a>, -2 <b> given: calculate the distance between the two sites. A site can be defined as an
atom by its number or as a center of mass by giving m and a comma separated
list of atoms, i.e., -1 m,0,3,4.
-x <a> given: coordinate value of a given coordinates, e.g., -x 0, gives the x coordinate of the
first atom, or -x 0,3,7 gives the x coordinate of the first and second atom and the
y coordinate of the third atom. Alternatively, also the 3D coordinate of a center
of mass can be obtained by giving m and a comma separated list of atoms. Single atoms
can be obtained as center of mass of that atom, i.e., -x m,1.
-p <a> given: if a single value is calculated (i.e. a distance or single coordinate) this option
projects it onto a certain range. Valid are fully bound ranges (A,<,B), or below a
value (<,A) or above a value (>,A). If within the given value the function returns 1.0,
else the given pValue.
-d <a> given: checks whether a given site is dissociated, i.e., if its distance to all other atoms is
larger than a certain number. The number to compare is given first and then the atom or
site to look at is given, e.g., -d 7.0,2 or -d 7.0,m,0,3,4.
Input:
optionsString --- Input flags given
pValue --- value used for projections
traj --- trajectory object to analyze
i --- time step given as integer
val --- value from previous operation
Output:
values obtained from the position operation combined with old value.
"""
obsparser = OptionParser(usage="Options for +position or +pos", add_help_option=False)
obsparser.add_option('-h', '--help',
dest='help',
action='store_true',
default=False,
help='Prints this help page.')
obsparser.add_option('-1', '--distance1',
dest='dis1',
type='str',
default=None,
help='A distance between the two given (d1 and d2) sites should be calculated. A site can be defined as an atom by its number or as a center of mass by giving m and a set of atoms by their numbers with comma sepatation.')
obsparser.add_option('-2', '--distance2',
dest='dis2',
type='str',
default=None,
help='A distance between the two given (d1 and d2) sites should be calculated. A site can be defined as an atom by its number or as a center of mass by giving m and a set of atoms by their numbers with comma sepatation.')
obsparser.add_option('-p', '--project',
dest='proj',
type='str',
default=None,
help='Take the distance or coordinate value given and project onto a certain range. Valid are fully bound ranges (A,<,B), or below a value (<,A) or above a value (>,A).')
obsparser.add_option('-x', '--x',
dest='coord',
type='str',
default=None,
help='A specific coordinate or a list of coorinates. Numbered starting with 0, i.e., 0: x-coordinate of first atom, 1: y-coordinated of first atom, etc. If we want the center of mass of the list we have to give a m first.')
obsparser.add_option('-d', '--dissociated',
dest='diss',
type='str',
default=None,
help='Check wheather the given atom or list of atoms is beyond a certain distance d to all other atoms. Format -d d,n or -d d,m,n0,n1,n2')
opts, args = obsparser.parse_args(optionsString.split())
if opts.help == True:
obsparser.print_help()
return 0.0
cval = None
# if we want to look at dissociation
if opts.diss != None:
# obtain dissociation parameter
vals = opts.diss.split(',')
diss_distance = float(vals[0])
# center of mass
if vals[1] == 'm':
atoms = [int(v) for v in vals[2:]]
coord = traj.getCOM(atoms, i)
# or regular coordinates
else:
atoms = [int(vals[1])]
coord = traj.logR[i][atoms[0]]
cval = 1.0
# check distance to all other atoms
n = len(traj.logR[i]) // 3
for j in range(n):
# if atom not in site definition, get distance
if not j in atoms:
d = np.linalg.norm(coord - traj.logR[i][j*3:(j+1)*3])
# if not dissociated in one case, break; i.e. lazy evaluation
if d < diss_distance:
cval = pValue
break
# if we want to look at coordinates
if opts.coord != None:
# obtain coordinate list
vals = opts.coord.split(',')
# center of mass
if vals[0] == 'm':
cval = traj.getCOM([int(v) for v in vals[1:]], i)
# certain coordinate
else:
cval = np.zeros(len(vals))
for j, v in enumerate(vals):
cval[j] = traj.logR[i][int(v)]
# if we want to look at distances
if opts.dis1 != None and opts.dis2 != None:
# obtain first site:
vals = opts.dis1.split(',')
# center of mass
if vals[0] == 'm':
coord1 = traj.getCOM([int(v) for v in vals[1:]], i)
else:
n1 = int(vals[0])
coord1 = traj.logR[i][n1*3:(n1+1)*3]
# obtain second site:
vals = opts.dis2.split(',')
# center of mass
if vals[0] == 'm':
coord2 = traj.getCOM([int(v) for v in vals[1:]], i)
else:
n2 = int(vals[0])
coord2 = traj.logR[i][n2*3:(n2+1)*3]
# calculate distance
cval = np.linalg.norm( coord1 - coord2 )
# if we want to project the distance/coordinate onto a certain interval
if opts.proj != None:
vals = opts.proj.split(',')
# case: above a value; > A
if vals[0] == '>':
if cval > float(vals[1]):
cval = 1.0
else:
cval = pValue
# case: below a value; < A
if vals[0] == '<':
if cval < float(vals[1]):
cval = 1.0
else:
cval = pValue
# case: between values < A <
if vals[1] == '<':
if cval > float(vals[0]) and cval < float(vals[2]):
cval = 1.0
else:
cval = pValue
## Combine with old values
try:
if cval == None or val == None:
return pValue
else:
return cval*val
except:
return cval*val
[docs]
def velocity(optionsString, pValue, traj, i, val):
"""
Does perform operations related to velocities. If no options given it will return None.
Note: adapted from RPMD branch.
Valid options are as follows:
-x <a> given: velocity value of a given coordinates, e.g., -x 0, gives the x velocity of the
first atom, or -x 0,3,7 gives the x velocity of the first and second atom and the
y velocity of the third atom. Alternatively, also the 3D velocity of a center
of mass can be obtained by giving m and a comma separated list of atoms. Single atoms
can be obtained as center of mass of that atom, i.e., -x m,1.
Input:
optionsString --- Input flags given
pValue --- value used for projections
traj --- trajectory object to analyze
i --- time step given as integer
val --- value from previous operation
Output:
values obtained from the velocity operation combined with old value.
"""
obsparser = OptionParser(usage="Options for +velocity or +vel", add_help_option=False)
obsparser.add_option('-h', '--help',
dest='help',
action='store_true',
default=False,
help='Prints this help page.')
obsparser.add_option('-A','--A',
dest='obsA',
type='str',
default=None,
help='First observable')
obsparser.add_option('-x', '--x',
dest='coord',
type='str',
default=None,
help='A specific coordinate or a list of coorinates. Numbered starting with 0, i.e., 0: x-coordinate of first atom, 1: y-coordinated of first atom, etc. If we want the center of mass of the list we have to give a m first.')
opts, args = obsparser.parse_args(optionsString.split())
if opts.help == True:
obsparser.print_help()
return 0.0
cval = None
# if we want to look at coordinates
if opts.coord != None:
# obtain coordinate list
vals = opts.coord.split(',')
# center of mass
if vals[0] == 'm':
print("Center of mass velocity not implemented!")
exit(-1)
# certain coordinate
else:
cval = np.zeros(len(vals))
for j, v in enumerate(vals):
cval[j] = traj.logV[i][int(v)]
return cval*val
[docs]
def charge(optionsString, pValue, traj, i, val):
"""
Does perform operations related to charges. If no options given it will return the
charge of the system.
Valid options are as follows:
-t given: give back list of partial charges.
-n <a> given: if -t is given, only consider the atoms given here as a list
-p <a>: project the calculated (partial) charge rounded to an integer to the given
value. Gives 1.0 for matches to the given value, or pValue if not.
Input:
optionsString --- Input flags given
pValue --- value used for projections
traj --- trajectory object to analyze
i --- time step given as integer
val --- value from previous operation
Output:
values obtained from the charge operation combined with old value.
"""
obsparser = OptionParser(usage="Options for +charge or +cha", add_help_option=False)
obsparser.add_option('-h', '--help',
dest='help',
action='store_true',
default=False,
help='Prints this help page.')
obsparser.add_option('-p', '--project',
dest='project',
type='int',
action='store',
default=None,
help='Project only onto given charge, partial charges are rounded to integers')
obsparser.add_option('-t', '--partial',
dest='doPartial',
action='store_true',
default=False,
help='Do analysis for partial charges.')
obsparser.add_option('-n', '--numbers',
dest='atoms',
type='str',
default=None,
help='Atoms to look at given as comma separated list.')
opts, args = obsparser.parse_args(optionsString.split())
if opts.help == True:
obsparser.print_help()
return 0.0
cval = 1.0
# get partial charges for either all atoms or a single site
if opts.doPartial == True:
if opts.atoms == None:
cval = np.array(traj.ES.log['partial'][i][1:])
else:
vals = opts.atoms.split(',')
cval = np.array([traj.ES.log['partial'][i][int(j)+1] for j in vals])
else:
cval = np.array([traj.ES.log['charge'][i][1]], dtype=float)
# project onto certain charge value
if opts.project != None:
proj_q = int(opts.project)
for j, v in enumerate(cval):
if round(v) != proj_q:
cval[j] = pValue
else:
cval[j] = 1.0
## Combine with old values
return cval*val
[docs]
def state(optionsString, pValue, traj, i, val):
"""
Does perform operations related to the electronic state in surface hopping.
Valid options are as follows:
-p <a>: project results only for given state.
Input:
optionsString --- Input flags given
pValue --- value used for projections
traj --- trajectory object to analyze
i --- time step given as integer
val --- value from previous operation
Output:
values obtained from the state operation combined with old value.
"""
obsparser = OptionParser(usage="Options for +state", add_help_option=False)
obsparser.add_option('-h', '--help',
dest='help',
action='store_true',
default=False,
help='Prints this help page.')
obsparser.add_option('-p', '--project',
dest='project',
type='int',
action='store',
default=None,
help='Project only onto given state.')
opts, args = obsparser.parse_args(optionsString.split())
if opts.help == True:
obsparser.print_help()
return 0.0
# make sure the trajectory has a state
hasS = hasattr(traj, 'logS')
# get either the state from the variable or from an logged event
if hasS == True:
cval = traj.logS[i]
else:
cval = int(traj.ES.log['event'][-1][2])
# project onto certain state
if opts.project != None:
if cval == opts.project:
cval = 1.0
else:
cval = pValue
## Combine with old values
return cval*val
[docs]
def spec(optionsString, pValue, traj, i, val):
"""
Does perform operations related to electronic spectra (Auger, Photoionisation, Flouresence, ...).
TODO check if this really works!!
Valid options are as follows:
-a: Auger spectrum
-i: Photoionization spectrum
-f: Flouresence spectrum
Input:
optionsString --- Input flags given
pValue --- value used for projections
traj --- trajectory object to analyze
i --- time step given as integer
val --- value from previous operation
Output:
values obtained from the spec operation combined with old value.
"""
obsparser = OptionParser(usage="Options for +spectrum or +spec", add_help_option=False)
obsparser.add_option('-h', '--help',
dest='help',
action='store_true',
default=False,
help='Prints this help page.')
obsparser.add_option('-a', '--auger',
dest='doAuger',
action='store_true',
default=False,
help='Do analysis for auger decays.')
obsparser.add_option('-i', '--photoion',
dest='doPhoto',
action='store_true',
default=False,
help='Do analysis for photoionization.')
obsparser.add_option('-f', '--flouresence',
dest='doFlouro',
action='store_true',
default=False,
help='Do analysis for fouresence.')
opts, args = obsparser.parse_args(optionsString.split())
if opts.help == True:
obsparser.print_help()
return 0.0
cval = 1.0
if opts.doAuger == True:
cval = [np.NaN]
for p in traj.ES.log['event']:
if p[1].lower() == 'a':
cval[0] = float(p[4])
elif opts.doPhoto == True:
cval = []
for p in traj.ES.log['event']:
if p[1].lower() == 'p':
cval.append(float(p[4]))
elif opts.doFlouro == True:
cval = []
for p in traj.ES.log['event']:
if p[1].lower() == 'f':
cval.append(float(p[4]))
cval = np.array(cval)
## Combine with old values
return cval*val
[docs]
def kinetic(optionsString, pValue, traj, i, val):
"""
Does perform operations related to kinetic energies. If no options given it will return the
full kinetic energy of the system.
Valid options are as follows:
-n <a> given: Comma separated list of atoms to be taken into account.
-s given: if given, the kinetic energy for each atom is calculated separatly and returned as an array
-k <a> given: The remaining KER (TODO what is actually done here?) is calculated for each atom
(currently -n is ignored by this option, but should be implemented); implies -s.
Input:
optionsString --- Input flags given
pValue --- value used for projections
traj --- trajectory object to analyze
i --- time step given as integer
val --- value from previous operation
Output:
values obtained from the kinetic operation combined with old value.
"""
obsparser = OptionParser(usage="Options for +kinetic or +kin", add_help_option=False)
obsparser.add_option('-h', '--help',
dest='help',
action='store_true',
default=False,
help='Prints this help page.')
obsparser.add_option('-n', '--numbers',
dest='atoms',
type='str',
default=None,
help='Atoms to look at given as comma separated list.')
obsparser.add_option('-s', '--separate',
dest='sep',
action='store_true',
default=False,
help='Consider atoms separately.')
obsparser.add_option('-k', '--ker',
dest='ker',
action='store_true',
default=False,
help='Calculate KER, implies -s.')
opts, args = obsparser.parse_args(optionsString.split())
if opts.help == True:
obsparser.print_help()
return 0.0
cval = 1.0
# TODO implement -n option here
# KER to be calculated
if opts.ker == True:
n = traj.NDof // 3
cval = np.zeros(n)
# get kinetic energy first
for j in range(n):
cval[j] = traj.kinetic_energy(step=i, atomlist=[j])
# get remaining contribution by coulomb repulsion
for k in range(n):
if k == j:
continue
else:
red_mass = traj.M[k*3] / (traj.M[k*3] + traj.M[j*3])
qq = traj.ES.log['partial'][i][k+1] * traj.ES.log['partial'][i][j+1]
d = np.linalg.norm( traj.logR[i][j*3:(j+1)*3] - traj.logR[i][k*3:(k+1)*3])
cval[j] += red_mass * qq / d
return cval*val
# take all atoms into account
if opts.atoms == None:
# separately
if opts.sep == True:
n = traj.NDof // 3
cval = np.zeros(n)
for j in range(n):
cval[j] = traj.kinetic_energy(step=i, atomlist=[j])
# full kinetic energy
else:
cval = traj.kinetic_energy(step=i)
# only a subset of atoms
else:
vals = opts.atoms.split(',')
n = len(vals)
# separately
if opts.sep == True:
cval = np.zeros(n)
for j in range(n):
cval[j] = traj.kinetic_energy(step=i, atomlist=[int(vals[j])])
# combined
else:
cval = traj.kinetic_energy(step=i, atomlist=[int(v) for v in vals])
## Combine with old values
return cval*val
[docs]
def operate(opString, optionsString, projValue, traj, i, val):
"""
Perform an operation to obtain a quantity from a trajectory at time step i. Results
are combined with results from previous operations given in 'val'.
The operation is given by the 'opString' with options given by the 'optionsString'.
Input:
opString --- string defining the operation to be performed.
Valid: 'pos': position related things
'vel': velocity related things
'kin': kinetic energy related things
'cha': charge related things
'spec': auger, flouressence or photo-electron related things
'state' : state in surface hopping related
'id' : idendity
optionsString --- string with additional options given to the operation
projValue --- projection value
traj --- Trajectory data
i --- time-step as integer
val --- value obtained from previous operations
Return:
quantity obtained by this operation combined with given older value.
"""
if projValue != 'None':
pValue = float(projValue)
else:
pValue = float('nan')
if opString == 'id' or opString == 'identity':
return 1.0
elif opString == 'pos' or opString == 'position' :
return position(optionsString, pValue, traj, i, val)
elif opString == 'ang' or opString == 'angle':
return angle(optionsString, pValue, traj, i, val)
elif opString == 'vel' or opString == 'velocity':
return velocity(optionsString, pValue, traj, i, val)
elif opString == 'cha' or opString == 'charge':
return charge(optionsString, pValue, traj, i, val)
elif opString == 'kin' or opString == 'kinetic':
return kinetic(optionsString, pValue, traj, i, val)
elif opString == 'spec' or opString == 'spectrum':
return spec(optionsString, pValue, traj, i, val)
elif opString == 'state' or opString == 'state':
return state(optionsString, pValue, traj, i, val)
else:
print('operation invalid. opString = ', opString, ' not implemented.')
exit(-1)
[docs]
def observable(obsString, projValue, traj, i):
"""
Function that calculates a quantity as defined in the 'obsString' from
a trajectory at time-step 'i'.
Input:
obsString --- String defining the quantity to be calculated.
projValue --- Value used for any projections
traj --- Trajectory data
i --- time-step as integer
"""
# number of operations to be performed
# pattern = r'([0-9]+);.*'
# n2 = int(re.findall(pattern, obsString)[0])
# print n
n = obsString.count('+')
# if n != n2:
# print "Wrong! n=", n, "n2=", n2
# stop
# list of operations to be performed
pattern = r'\+(\S+)'
operations = re.findall(pattern, obsString)
# list of options for each operation
options = []
# iterate over all, but last
for j in range(len(operations) - 1):
offset = len(operations[j])
start = obsString.find(operations[j])
end = obsString.find('+' + operations[j+1])
options.append(obsString[start+offset:end])
# last one separate
offset = len(operations[-1])
start = obsString.find(operations[-1])
options.append(obsString[start+offset:])
# go through all operatons
val = 1.0
for j, op in enumerate(operations):
val = operate(op, options[j], projValue, traj, i, val)
# do lazy evaluation
try:
if val == None:
return val
except:
continue
return val
[docs]
def doFourrier():
"""
TODO implement!
"""
return
[docs]
def outputBins(fout, form, bins):
"""
Funtion to write bin definition if histogram was requested
Input:
fout --- output file handle
form --- If given, prints results in an alternative formats. Possible values:
"time": standard format with one line per time and all values along the columns.
"values" one line per value and time along the columns.
"single" with values first and empty line and then std errors.
bins --- Bin edges if histograms were requested.
Output:
The bin data is written.
"""
fout.write("#### A histogram was requested, the center of the histograms are given below: \n")
# standard format - time and then value stderror
if form.lower() == 'time':
fout.write("# 1: Time ")
shift = 0
# iterate over bins
for subs in bins:
fout.write(" ||| ")
# get bin center and write it as well as mentioning the stderror
for i in range(len(subs) - 1):
binc = (subs[i] + subs[i+1]) / 2.0
fout.write(str(2*i + 2 + shift) + ": " + str(binc) + " " + str(2*i + 3 + shift) + ": stderr ")
shift += (2*len(subs) - 2)
fout.write("\n")
elif form.lower() == 'single':
fout.write("# ")
shift = 0
# iterate over bins
for subs in bins:
# get bin center and write it as well as mentioning the stderror
for i in range(len(subs) - 1):
binc = (subs[i] + subs[i+1]) / 2.0
fout.write(str(i + 1 + shift) + ": " + str(binc) + " ")
shift += (len(subs))
fout.write("\n")
elif form.lower() == 'values':
shift = 0
# iterate over bins
for subs in bins:
fout.write("# ||| ")
# get bin center and write it as well as mentioning the stderror
for i in range(len(subs) - 1):
binc = (subs[i] + subs[i+1]) / 2.0
fout.write(str(2*i + 2 + shift) + ": " + str(binc) + " " + str(2*i + 3 + shift) + ": stderr ")
shift += (2*len(subs) - 2)
fout.write("\n")
return
[docs]
def outputNtraj(fout, time, ntraj):
"""
Funtion to write the number of trajectories contributing.
Input:
fout --- output file handle.
time --- array giving the timestamps.
ntraj --- array giving the number of trajectories.
Output:
The number of trajectories are written.
"""
fout.write("### Number of trajectories used for this file is: \n")
for t, nl in zip(time, ntraj):
fout.write("# " + str(t) + " ")
for n in nl:
fout.write(str(n) + " ")
fout.write(" \n")
return
[docs]
def outputData(m, commands, nbootstrap, step, form, bins, time, mean, sigma, ntraj):
"""
Funtion to write data to files.
Input:
m --- index of current command.
commands --- dictonary of commands as tuples. They are essentially the input flags
for the main program with the following structure:
(opA, opB, projection, red, his, output)
opA and opB define the operations for <A(0) B(t)>
red defines how to combined A and B, i.e. multiplication, dot product, ...
his defines if histogram should be produced and number of bins (0=automatic)
output defines output file
nbootstrap --- number of bootstrap resamplings to be performed
step --- Only give results for one timestep. -1 is last, 0 is first, None is all.
form --- If given, prints results in an alternative formats. Possible values:
"time": standard format with one line per time and all values along the columns.
"values" one line per value and time along the columns.
"single" with values first and empty line and then std errors.
bins --- Bin edges if histograms were requested.
time --- array giving the timestamps.
mean --- Mean value of the requested data as calculated by the bootstrapping.
sigma --- Standard deviations of the mean of the requested data as calculated by the bootstrapping.
ntraj --- array giving the number of trajectories.
Output:
The data is written to files with names given in the command. The first column is always
time, and then there is a column for each value directly followed by its standard error.
"""
# output file
sout = commands[m][6]
with open(sout, 'w') as fout:
# write out some header information
fout.write("#### Output file generated by analyze.py for the following commands: \n")
fout.write("#### -A " + str(commands[m][0]) + " \n")
fout.write("#### -B " + str(commands[m][1]) + " \n")
fout.write("#### -p " + str(commands[m][2]) + " \n")
fout.write("#### -r " + str(commands[m][3]) + " \n")
fout.write("#### -s " + str(commands[m][4]) + " \n")
fout.write("#### -v " + str(commands[m][5]) + " \n")
fout.write("#### -t " + str(step) + " \n")
fout.write("#### -c " + str(form) + " \n")
fout.write("#### Standard deviations of the quantities calculated are calculated employing a bootstrapping approach with " + str(nbootstrap) + " samples. \n")
# output bins if histogram was requested
if (commands[m][4] != None):
outputBins(fout, form, bins)
# iterate over data and output
for t, me, si in zip(time, mean, sigma):
# standard format - time and then value stderror
if form.lower() == 'time':
fout.write('{0: >12.3f} '.format(t) )
for valm, vals in zip(np.array(me).flatten(), np.array(si).flatten()):
fout.write('{0: >12.3e} {1: >12.3e} '.format(valm, vals))
fout.write('\n')
# alternative format - all values in one line and then all stderrors
elif form.lower() == 'single':
for valm in np.array(me).flatten():
fout.write('{0: >12.3f} '.format(valm))
fout.write('\n \n')
for vals in np.array(si).flatten():
fout.write('{0: >12.3f} '.format(vals))
fout.write('\n')
# value format - each value has a row and times are for each column
if form.lower() == 'values':
# write times to the header line too
fout.write("# 1: Values ")
for t in time:
fout.write(str(t) + " ")
nvalues = len(bins)
ntime = len(time)
# reshape for better plotting
mean = mean.reshape(ntime, -1, nvalues, order='F')
sigma = sigma.reshape(ntime, -1, nvalues, order='F')
shift = 0
# iterate over bins
for k, subs in enumerate(bins):
fout.write("\n")
# get bin center and write respective values
for i in range(len(subs) - 1):
binc = (subs[i] + subs[i+1]) / 2.0
fout.write(str(binc) + " ")
for valm, vals in zip(np.array(mean)[:, i, k], np.array(sigma)[:, i, k]):
fout.write('{0: >12.3f} {1: >12.3f} '.format(valm, vals))
fout.write('\n')
fout.write('#### \n')
## print number of trajectories that contribute if None given as projection
if (commands[m][2] == 'None'):
outputNtraj(fout, time, ntraj)
return
[docs]
def correlation_function(commands, dirs, nbootstrap=1000, step=None, form='time', nozero=False):
"""
Function to calculate a set of correlation functions or expectation values given by
the command string and for the trajectories given in the dirs folder. Standard errors
of the calculated quantities are calculated using bootstrapping.
Input:
commands --- dictonary of commands as tuples. They are essentially the input flags
for the main program with the following structure:
(opA, opB, projection, red, his, output)
opA and opB define the operations for <A(0) B(t)>
red defines how to combined A and B, i.e. multiplication, dot product, ...
his defines if histogram should be produced and number of bins (0=automatic)
output defines output file
dirs --- list of directories containing the pickled trajectory objects
nbootstrap --- number of bootstrap resamplings to be performed
step --- Only give results for one timestep. -1 is last, 0 is first, None is all.
form --- If given, prints results in an alternative formats. Possible values:
"time": standard format with one line per time and all values along the columns.
"values" one line per value and time along the columns.
"single" with values first and empty line and then std errors.
nozero --- If true ignore zero values in bin calculation.
Output:
The data is written to files with names given in the command. The first column is always
time, and then there is a column for each value directly followed by its standard error.
"""
ncom = len(commands)
c0 = np.zeros(ncom).tolist()
correl = [[] for m in range(ncom)]
time = []
# Loop over directories, Read data and add to corrlation function
for j, d in enumerate(dirs):
# load pickled file
picklefile = open(d + '/pickle.dat', "rb" )
traj = pickle.load(picklefile)
picklefile.close()
# Initialize corrlation function array
# all times in the trajectory
if step == None:
n = len(traj.logR)
# only one timestep requested
else:
n = 1
time = np.zeros(n)
for m in range(ncom):
correl[m].append([])
correl[m][j] = np.zeros(n).tolist()
## Value at t=0
c0[m] = observable(commands[m][0], commands[m][2], traj, 0)
## Iterate over trajectory
for i in range(n):
s = i
if step != None:
s = step
time[i] = traj.logt[s]
## and all commands
for m in range(ncom):
c = observable(commands[m][1], commands[m][2], traj, s)
# update correlation function
correl[m][j][i] = red(c0[m], c, commands[m][3])
for m in range(ncom):
## do statistic here
## obtain mean/std/percentile/... and corresponding sigma
if commands[m][5] == 'mean':
func = np.nanmean
elif commands[m][5] == 'std':
func = np.nanstd
elif "percentile" in commands[m][5]:
pe = float(commands[m][5].split()[1])
func = (lambda x: np.nanpercentile(x, pe))
else:
print('Value function string invalid. ', m, '-th valString = ', commands[m][5], ' not implemented.')
exit(-1)
mean, sigma, bins, ntraj = bootstrap(np.array(correl[m]), commands[m][4], func=func, nbootstrap=nbootstrap, nozero=nozero)
# TODO also implement fourier trafo here!
# Write correlation function and std error to file
outputData(m, commands, nbootstrap, step, form, bins, time, mean, sigma, ntraj)
return
[docs]
def doParse(filename, directory, parser):
"""
Parse a file of commands. For documentation of commands see documentation for input flags
of the whole program. The commands need to be given in the same way. Different commands can be
given separated by lines starting with #.
Input:
filename --- Name of the file to be parsed as string
directory --- Directory for input/output
parser --- Set up parser to interpret commands
Output:
commands --- dictonary of commands
"""
i = 0
commands = {}
cline = ""
with open(filename, 'r') as inf:
for line in inf:
# separator for different commands; line starting with #
if line[0] == '#':
# already read command needs to be parsed
if cline != "":
opts, args = parser.parse_args(shlex.split(cline))
commands[i] = (opts.obsA, opts.obsB, opts.projection, opts.red, opts.his, opts.func, directory + opts.outfile)
i += 1
cline = ""
else:
cline += line.rstrip()
cline += ' '
# already read command needs to be parsed
if cline != "":
opts, args = parser.parse_args(shlex.split(cline))
commands[i] = (opts.obsA, opts.obsB, opts.projection, opts.red, opts.his, opts.func, directory + opts.outfile)
return commands
[docs]
def getParser():
"""
Function defining the input options parser.
Output:
parser --- option parser object.
"""
usage = "\n \n This program does several different analysis schemes for a given set of trajectories and also calculated the associated fourier spectrum (TODO: fourier trafor to be implemented!). \n \n The trajectories need to be saved with the -p command from xpyder (pickle). \n \n The analysis is based on the calculation of correlation functions of the type <A B(t)>, where A and B correspond to two 'observables'. Setting 'A' to be the identity also expectation values can be calculated. One can also request the calculation of said correlation function at a given timestep (i.e. the first one, the last one, ...). More details in the help text below. \n \n The final data can be analysed in several ways. The mean value averaged over all trajectories can be calculated as well as other functions (standard deviation, percentile, ...). Alternatively, the quantity calculated can be histogrammed with different ways to define the histograms. Several ways to format the output file are also possible. More details in the help text. \n \n For all values obtained the standard error of the respective quantity is calculated as the standard deviation of that quantity obtained from bootstrapping! \n \n The type of correlation function is given as follows. These options can be given as command line options or in a file given with -f <file>. In the file more than one quantity can be defined. Each quantity is terminated by a line starting with ###. \n -A <Operations> and -B <operations> define the observable. Each observable can be comprised of different operations. Each single operation is given with a + sign and and identifier (e.g., +position, +velocity, +kinetic, ...) and apropriate options. The options are detailed below for each possible operation. \n Examples: \n -A '+charge' -B '1; +charge' would calculate a charge-charge correlation function. \n -A '+identity' -B '+posision -1 0 -2 1' would calculate the time-dependent expectation value of the distance between atom 0 and 1. \n -A '+identity' -B '+charge -t -p 0 +kinetic -k' would calculate the time-dependent expectation value of the released kinetic energy for each atom that has a partial charge between 0.5 and 1.5 \n \n -r <red> defines how to interpret A B(t), i.e., which operation to perform to combine A and B. \n \n -p <value> the value to use for unsuccesful projections (usually 0.0 or None). \n \n -v <value> the property to calculate for AB, i.e., the mean or standard deviation. \n \n -o <file> defines the output file, which always contains time as first column, then each of the values calculated (bin center values are given as comments) and the corresponding error directly after each value. \n \n -s <options> to produce a histogram of the calculated data. Details below. Other general options are: \n \n Several other options can be given only as command line options: -d <directory>, -c <format>, -n <Nbootstrap>, -t <timestep>, -z; which are detailed below."
# Parse command line options
parser=OptionParser(usage=usage, add_help_option=False)
parser.add_option('-d','--dir',
dest='dir',
type='str',
default='.',
help='Name of directory contraining trj folders. This option cannot be given to individual commands and has to be given as a command line option. ')
parser.add_option('-f', '--file',
dest='File',
action='store',
default=None,
help='Use inputfile instead of command line input. Valid commands in the input file are: -A, -B, -p, -r, -s, -v, -o')
parser.add_option('-n', '--nbootstrap',
dest='nbootstrap',
type='int',
action='store',
default=200,
help='Number of bootstrapping steps. This option cannot be given to individual commands and has to be given as a command line option.')
parser.add_option('-t', '--timestep',
dest='step',
type='int',
action='store',
default=None,
help='Only calculate results for a given timestep; -1 is last timestep. 0 is first timestep. Default is None - give results for all times. This option cannot be given to individual commands and has to be given as a command line option.')
parser.add_option('-c', '--changeformat',
dest='form',
default='time',
help='If given, prints results in an alternative formats. Possible values: "time": standard format with one line per time and all values along the columns. "values" one line per value and time along the columns. "single" with values first and empty line and then std errors. This option cannot be given to individual commands and has to be given as a command line option.')
parser.add_option('-z', '--nozero',
dest='nozero',
action='store_true',
default=False,
help='Ignore zero values for bin calculation. This option cannot be given to individual commands and has to be given as a command line option.')
parser.add_option('-h', '--help',
dest='help',
action='store_true',
default=False,
help='Prints this help page.')
parser.add_option('-A','--A',
dest='obsA',
type='str',
default=None,
help='First observable. TODO help on how to set up.')
parser.add_option('-B','--B',
dest='obsB',
type='str',
default=None,
help='Second observable. TODO help on how to set up.')
parser.add_option('-p', '--projection',
dest='projection',
type='str',
action='store',
default='0.0',
help='Value for unsuccessful projection.')
parser.add_option('-r','--reduce',
dest='red',
type='str',
default='mult',
help='String describing the reduction process, i.e., how to combine observable A and B. Valid: \'mult\' for multiplication and \'dot\' for dot product.')
parser.add_option('-s', '--spread',
dest='his',
action='store',
type='str',
default=None,
help='Do analysis for distribution using a histogram with given either the number of bins, 0 for integer step bins, 3 floats defining minimum, maximum and step or a string which is a valid option for numpy.histogram.')
parser.add_option('-v', '--value',
dest='func',
type='str',
action='store',
default='mean',
help='Function to calculate, i.e. mean, std, percentile x.')
parser.add_option('-o','--output',
dest='outfile',
type='str',
default='correl.dat',
help='Output file. Written in to the given directory.')
return parser
[docs]
def start():
"""
Wrapper for starting the program. Needed for good unit test coding.
"""
parser = getParser()
opts, args = parser.parse_args(sys.argv[1:])
directory = opts.dir + '/'
commands = {}
if opts.File != None:
commands = doParse(opts.File, directory, parser)
else:
commands[0] = (opts.obsA, opts.obsB, opts.projection, opts.red, opts.his, opts.func, directory + opts.outfile)
# called without any option or -h set -> print help for everything
if not sys.argv[1:] or opts.help == True:
parser.print_help()
print("")
ostring = "-h"
position(ostring, None, None, None, None)
print("")
ostring = "-h"
angle(ostring, None, None, None, None)
print("")
ostring = "-h"
charge(ostring, None, None, None, None)
print("")
ostring = "-h"
velocity(ostring, None, None, None, None)
print("")
ostring = "-h"
kinetic(ostring, None, None, None, None)
print("")
ostring = "-h"
state(ostring, None, None, None, None)
print("")
ostring = "-h"
spec(ostring, None, None, None, None)
print("")
sys.exit(0)
# Obtain list of trj directories
dirs = uti.getDirectoryList(directory, "pickle.dat")
# calulate correlation function
correlation_function(commands, dirs, opts.nbootstrap, opts.step, opts.form, opts.nozero)
if __name__ == "__main__":
start()