"""
Created on Sunday, May 24th
@author: Logan Lang
"""
from re import findall, search, match, DOTALL, MULTILINE, finditer, compile
from numpy import array, dot, linspace, sum, where, zeros, pi
import logging
[docs]class QEParser:
def __init__(
self, bandsin="bands.in", kpdosin="kpdos.in", outfile="scf.out", kdirect=True
):
# Qe inputs
self.bandsin = bandsin
self.kfin = kpdosin
self.outfile = outfile
# This is not used since the extra files are useless for the parser
# self.file_names = []
# Not used
self.kdirect = kdirect
self.kticks = None
self.discontinuities = []
############################
self.high_symmetry_points = None
self.nhigh_sym = None
self.nspecies = None
self.composition = {}
self.kticks = None
self.knames = None
self.kpoints = None
self.kpointsCount = None
self.bands = None
self.bandsCount = None
self.ionsCount = None
# Used to store atomic states at the top of the kpdos.out file
self.states = None
self.spd = None
""" for l=1:
1 pz (m=0)
2 px (real combination of m=+/-1 with cosine)
3 py (real combination of m=+/-1 with sine)
for l=2:
1 dz2 (m=0)
2 dzx (real combination of m=+/-1 with cosine)
3 dzy (real combination of m=+/-1 with sine)
4 dx2-y2 (real combination of m=+/-2 with cosine)
5 dxy (real combination of m=+/-2 with sine)"""
# Oribital order. This is the way it goes into the spd Array. index 0 is resered for totals
self.orbitals = [
{"l": 0, "m": 1},
{"l": 1, "m": 3},
{"l": 1, "m": 1},
{"l": 1, "m": 2},
{"l": 2, "m": 5},
{"l": 2, "m": 3},
{"l": 2, "m": 1},
{"l": 2, "m": 2},
{"l": 2, "m": 4},
]
# These are not used
self.orbitalName_short = ["s", "p", "d", "tot"]
self.orbitalCount = None
# Opens the needed files
rf = open(self.bandsin, "r")
self.bandsIn = rf.read()
rf.close()
rf = open(self.kfin, "r")
self.kpdosIn = rf.read()
rf.close()
self.test = None
# The only method this parser takes. I could make more methods to increase its modularity
self._readQEin()
return
def _readQEin(self):
###############################################
spinCalc = False
if len(findall("\s*nspin=(.*)", self.bandsIn)) != 0:
spinCalc = True
#######################################################################
# Dealing with kpoints and labels
#######################################################################
kmethod = findall("K_POINTS[\s\{]*([a-z_]*)[\s\{]*",self.bandsIn)[0]
if kmethod == 'crystal':
numK = int(findall("K_POINTS.*\n([0-9]*)", self.bandsIn)[0])
raw_khigh_sym = findall("K_POINTS.*\n\s*[0-9]*.*\n" + numK * "(.*)\n", self.bandsIn)[0]
tickCountIndex = 0
raw_high_symmetry = []
self.knames = []
self.kticks = []
for x in raw_khigh_sym:
if len(x.split()) == 5:
raw_high_symmetry.append(
(float(x.split()[0]), float(x.split()[1]), float(x.split()[2]))
)
self.knames.append("$%s$" % x.split()[4].replace("!", ""))
self.kticks.append(tickCountIndex)
tickCountIndex += 1
self.high_symmetry_points = array(raw_high_symmetry)
self.nhigh_sym = len(self.knames)
elif kmethod == 'crystal_b':
self.nhigh_sym = int(findall("K_POINTS.*\n([0-9]*)", self.bandsIn)[0])
raw_khigh_sym = findall(
"K_POINTS.*\n\s*[0-9]*.*\n" + self.nhigh_sym * "([0-9\s\.-]*).*\n",
self.bandsIn,
)[0]
# self.kpointsCount = 0
self.kticks = []
self.high_symmetry_points = zeros(shape=(self.nhigh_sym, 3))
tick_Count = 1
for ihs in range(self.nhigh_sym):
self.high_symmetry_points[ihs, :] = [
float(x) for x in raw_khigh_sym[ihs].split()[0:3]
]
self.kticks.append(tick_Count - 1)
tick_Count += int(raw_khigh_sym[ihs].split()[3])
raw_ticks = findall(
"K_POINTS.*\n\s*[0-9]*\s*[0-9]*.*\n" + self.nhigh_sym * ".*!(.*)\n",
self.bandsIn,)[0]
if len(raw_ticks) != self.nhigh_sym:
self.knames = [str(x) for x in range(self.nhigh_sym)]
else:
self.knames = [
"$%s$" % (x.replace(",", "").replace("vlvp1d", "").replace(" ", ""))
for x in raw_ticks
]
# finds discontinuities
for i in range(len(self.kticks)):
if(i < len(self.kticks)-1):
diff = self.kticks[ i+1 ] - self.kticks[i]
if diff == 1 :
self.discontinuities.append(self.kticks[i])
discon_name = "$" + self.knames[i].replace("$","") +"|"+ self.knames[i+1].replace("$","") + "$"
self.knames.pop(i+1)
self.knames[i] = discon_name
self.kticks.pop(i+1)
#######################################################################
# Finding composition and specie data
#######################################################################
self.nspecies = int(findall("ntyp\s*=\s*([0-9]*)", self.bandsIn)[0])
self.ionsCount = int(findall("nat\s*=\s*([0-9]*)", self.bandsIn)[0])
raw_species = findall(
"ATOMIC_SPECIES.*\n" + self.nspecies * "(.*).*\n", self.bandsIn
)[0]
species_list = []
if self.nspecies == 1:
self.composition[raw_species.split()[0]] = 0
species_list.append(raw_species.split()[0])
else:
for nspec in range(self.nspecies):
species_list.append(raw_species[nspec].split()[0])
self.composition[raw_species[nspec].split()[0]] = 0
raw_ions = findall(
"ATOMIC_POSITIONS.*\n" + self.ionsCount * "(.*).*\n", self.bandsIn
)[0]
if self.ionsCount == 1:
self.composition[raw_species.split()[0]] = 1
else:
for ions in range(self.ionsCount):
for species in range(len(species_list)):
if raw_ions[ions].split()[0] == species_list[species]:
self.composition[raw_ions[ions].split()[0]] += 1
#######################################################################
# Reading the kpdos.out for outputfile labels
#######################################################################
rf = open(self.kfin.split(".")[0] + ".out", "r")
kpdosout = rf.read()
rf.close()
# The following lines get the number of states in kpdos.out and stores them in a list, in which their information is stored in a dictionary
raw_natomwfc = findall(
r"(?<=\(read from pseudopotential files\)\:)[\s\S]*?(?=k)", kpdosout
)[0]
natomwfc = len(findall("state[\s#]*(\d*)", raw_natomwfc))
raw_wfc = findall("\(read from pseudopotential files\)\:.*\n\n\s*" + natomwfc * "(.*)\n",
kpdosout,
)[0]
raw_states = []
states_list = []
state_dict = {}
# Read in raw states
for state in raw_wfc:
#print(state)
state_index = int(state.split('#')[1].split(':')[0])
atom_index = int(state.split('atom')[1].split('(')[0])
atom_name = str(state.split('(')[1].split(')')[0])
wfc_index = int(state.split('wfc')[1].split('(')[0])
l_index = int(state.split('wfc')[1].split('l=')[1].split('m=')[0])
m_index = int(state.split('wfc')[1].split('m=')[1].split(')')[0])
append = [state_index, atom_index, atom_name, wfc_index, l_index, m_index]
#append = (findall("state #\s*([0-9]+):\s*atom\s*([0-9]+)\s*\((.*)\s\),\s*wfc\s*([0-9]+)\s\(l=\s*([0-9]+)\s*m=\s*([0-9]+)\)", state)[0])
#print(append)
raw_states.append(append)
#int(state.split('#')[1].split(':')[0])
#findall("state #\s*([0-9]*):\s*atom\s*([0-9]*)\s*\((.*)\s\),\s*wfc\s*([0-9]*)\s\(l=\s*([0-9])\s*m=\s*([0-9])\)", state)[0])
#)
#print(raw_states)
for state in raw_states:
state_dict = {}
state_dict = {
"state_num": int(state[0]),
"species_num": int(state[1]),
"specie": state[2],
"atm_wfc": int(state[3]),
"l": int(state[4]),
"m": int(state[5]),
}
states_list.append(state_dict)
self.states = states_list
"""
This section was used to parse those extra files we discussed. It is not needed, I kept it just incase
output_prefix = findall("filpdos\s*=\s*'(.*)'", self.kpdosIn)[0]
kpdos_files=[]
atmNum = list(range(1, self.ionsCount+1))
wfc_file_label = []
atm_file_label = []
states_dict ={}
#Find unique raw file labels
for state1 in raw_states:
wfc_file_label.append((state1[3],state1[4])) if (state1[3],state1[4]) not in wfc_file_label else None
atm_file_label.append((state1[1],state1[2])) if (state1[1],state1[2]) not in atm_file_label else None
#Combine raw file labels inside a tuple
for atm in atm_file_label:
for wfc in wfc_file_label:
file = (int(atm[0]),atm[1],int(wfc[0]),int(wfc[1]))
kpdos_files.append(file)
file = None
#Convert raw file labels to final file string
for file in kpdos_files:
if(file[3] == 0 ):
self.file_names.append(output_prefix + ".pdos_atm#" + str(file[0]) + "(" + file[1] + ")_wfc#" + str(file[2]) + "(s)")
elif (file[3] == 1 ):
self.file_names.append(output_prefix + ".pdos_atm#" + str(file[0]) + "(" + file[1] + ")_wfc#" + str(file[2]) + "(p)")
elif (file[3] == 2 ):
self.file_names.append(output_prefix + ".pdos_atm#" + str(file[0]) + "(" + file[1] + ")_wfc#" + str(file[2]) + "(d)")
elif (file[3] == 3 ):
self.file_names.append(output_prefix + ".pdos_atm#" + str(file[0]) + "(" + file[1] + ")_wfc#" + str(file[2]) + "(f)")"""
#######################################################################
# Finds kpoints in kpdosout file. They are the k points in the reciprocal space
#######################################################################
raw_kpoints = []
if spinCalc == True:
self.kpointsCount = int(
len(findall("k\s*=\s*(.*)\s*(.*)\s*(.*)", kpdosout)) / 2
)
for k in range(len(findall("k\s*=\s*(.*)\s*(.*)\s*(.*)", kpdosout))):
if k < self.kpointsCount:
raw_kpoints.append(
findall("k\s*=\s*(.*)\s*(.*)\s*(.*)", kpdosout)[k][0]
)
totK = len(findall("k\s*=\s*(.*)\s*(.*)\s*(.*)", kpdosout))
else:
for k in range(len(findall("k\s*=\s*(.*)\s*(.*)\s*(.*)", kpdosout))):
raw_kpoints.append(
findall("k\s*=\s*(.*)\s*(.*)\s*(.*)", kpdosout)[k][0]
)
self.kpointsCount = len(raw_kpoints)
self.kpoints = zeros(shape=(self.kpointsCount, 3))
for ik in range(len(raw_kpoints)):
for coord in range(3):
self.kpoints[ik][coord] = raw_kpoints[ik].split()[coord]
# If kdirect=False, then the kgrid will be in cartesian coordinates.
# Requires the reciprocal lattice vectors to be parsed from the output.
if not self.kdirect:
self.kpoints = dot(self.kpoints, self.reclat)
#######################################################################
# Finds the band count and makes a band array
#######################################################################
band_info = findall(r"====\se\(\s*(\d+)\)\s=\s*([-.\d]+)", kpdosout)
if spinCalc == True:
self.bandsCount = int(len(band_info) / totK)
self.bands = zeros(shape=(self.kpointsCount, self.bandsCount, 2))
ik = 0
for band in range(len(band_info)):
if ik < self.kpointsCount:
self.bands[ik, int(band_info[band][0]) - 1, 0] = float(
band_info[band][1]
)
else:
self.bands[
ik - self.kpointsCount, int(band_info[band][0]) - 1, 1
] = float(band_info[band][1])
if int(band_info[band][0]) == self.bandsCount:
ik += 1
else:
self.bandsCount = int(len(band_info) / self.kpointsCount)
self.bands = zeros(shape=(self.kpointsCount, self.bandsCount))
ik = 0
for band in range(len(band_info)):
self.bands[ik, int(band_info[band][0]) - 1] = float(band_info[band][1])
if int(band_info[band][0]) == self.bandsCount:
ik += 1
#######################################################################
# Filling the spd array
#######################################################################
if spinCalc == True:
spinCount = 2
else:
spinCount = 1
k_info = None
self.orbitalCount = len(self.orbitals)
self.spd = zeros(
shape=(
self.kpointsCount,
self.bandsCount,
spinCount,
self.ionsCount + 1,
len(self.orbitals) + 2,
)
)
k_string = findall(r"\s?k =[\s-]{3}?.+", kpdosout)
"""First loop goes through the kpoints and matches band information that follows the specific k point. The if else statment is used to catch unique cases, namely the end of the kpoints
Also sometimes there is a duplicate final k point so findall is used twice to deal with this case
Second loop goes through band information of a kpoint.The if else statments here again catches unique cases. This is when it hits the last the band or when it is the last band and the last kpoint
The other if else statement here catches the cases when there are no contributions at all and would return nothing
Third loop parses the projections of a band
The fourth loop then goes through the known possible states. The if statment ensures the there is a projection and matchs a projection to a known state.
The fifth loop then goes throguh the known possible orbitals. The if statment then matches the projection with a specific orbital.
Finally the projection is put into the spd array
"""
for kp in range(len(k_string)):
if kp == len(k_string) - 1:
expression = "(?<=" + k_string[kp] + ")[\s\S]*?(?=Lowdin Charges:)"
expression_final_k = "(?<=" + k_string[kp - 1] + ")[\s\S]*?(?=$)"
k_info1 = findall(expression_final_k, kpdosout)[0]
k_info = findall(expression, k_info1)[0]
else:
expression = (
"(?<=" + k_string[kp] + ")[\s\S]*?(?=" + k_string[kp + 1] + ")"
)
# print(expression)
k_info = findall(expression, kpdosout)[0]
# print(k_info)
# print(k_info)
# print(self.bandsCount)
for iband in range(self.bandsCount):
if iband == self.bandsCount - 1 and kp == len(k_string) - 1:
# print("hi")
expression2 = "==== e\(\s*" + str(iband + 1) + "\)[\s\S]*?(?=$)"
# print(k_info)
iband_proj = findall(expression2, k_info)[0]
# print(iband_proj)
elif iband == self.bandsCount - 1:
expression2 = "==== e\(\s*" + str(iband + 1) + "\).*\n([\s\S]*)"
iband_proj = findall(expression2, k_info)[0]
else:
# print("hi")
expression2 = (
"==== e\(\s*"
+ str(iband + 1)
+ "\)[\s\S]*?(?===== e\(\s*"
+ str(iband + 2)
+ "\))"
)
iband_proj = findall(expression2, k_info)[0]
# print(iband_proj)
expression3 = "(?<=psi = )[\s\S]*?(?=\s*\|psi\|\^2)"
wfc = None
if len(findall(expression3, iband_proj)) == 0:
pass
else:
wfc = findall(expression3, iband_proj)[0]
# print(wfc)
state_proj = wfc.split("+")
for proj in state_proj:
for known_states in self.states:
if (
len(findall("#\s*([0-9]*)", proj)) != 0
and int(findall("#\s*([0-9]*)", proj)[0])
== known_states["state_num"]
):
for iorbitals in range(len(self.orbitals)):
if (
known_states["l"]
== self.orbitals[iorbitals]["l"]
and known_states["m"]
== self.orbitals[iorbitals]["m"]
):
if spinCalc == True:
if kp < self.kpointsCount:
# print(known_states["species_num"])
self.spd[
kp,
iband,
0,
known_states["species_num"] - 1,
iorbitals + 1,
] += float(proj.split("*")[0])
self.spd[
kp,
iband,
0,
known_states["species_num"] - 1,
0,
] = known_states["species_num"]
else:
self.spd[
kp - self.kpointsCount,
iband,
1,
known_states["species_num"] - 1,
iorbitals + 1,
] += float(proj.split("*")[0])
self.spd[
kp - self.kpointsCount,
iband,
1,
known_states["species_num"] - 1,
0,
] = known_states["species_num"]
else:
self.spd[
kp,
iband,
0,
known_states["species_num"] - 1,
iorbitals + 1,
] += float(proj.split("*")[0])
self.spd[
kp,
iband,
0,
known_states["species_num"] - 1,
0,
] = known_states["species_num"]
for ions in range(self.ionsCount):
self.spd[:, :, :, ions, 0] = ions + 1
# The following fills the totals for the spd array
self.spd[:, :, :, :, -1] = sum(self.spd[:, :, :, :, 1:-1], axis=4)
self.spd[:, :, :, -1, :] = self.spd.sum(axis=3)
self.spd[:, :, :, -1, 0] = 0
# colinear spin polarized case
# manipulating spd array for spin polarized calculations.
# The shape is (nkpoints,2*nbands,2,natoms,norbitals)
# The third dimension is for spin.
# When this is zero, the bands*2 (all spin up and down bands) have positive projections.
# When this is one, the the first half of bands (spin up) will have positive projections
# and the second half (spin down) will have negative projections. This is to adhere to
# the convention used in PyProcar to obtain spin density and spin magnetization.
if spinCalc:
print("\nQuantum Espresso colinear spin calculation detected.\n")
self.spd2 = zeros(
shape=(
self.kpointsCount,
self.bandsCount * 2,
spinCount,
self.ionsCount + 1,
len(self.orbitals) + 2,
)
)
# spin up block for spin=0
self.spd2[:, : self.bandsCount, 0, :, :] = self.spd[:, :, 0, :, :]
# spin down block for spin=0
self.spd2[:, self.bandsCount :, 0, :, :] = self.spd[:, :, 1, :, :]
# spin up block for spin=1
self.spd2[:, : self.bandsCount, 1, :, :] = self.spd[:, :, 0, :, :]
# spin down block for spin=1
self.spd2[:, self.bandsCount :, 1, :, :] = -1 * self.spd[:, :, 1, :, :]
self.spd = self.spd2
# Reshaping bands array to inlcude all bands (spin up and down)
self.bands = self.bands.reshape(
self.kpointsCount, self.bandsCount * 2, order="F"
)
@property
def fermi(self):
"""
Returns the fermi energy read from .out
"""
fi = open(self.outfile, "r")
data = fi.read()
fi.close()
data = data.split('the Fermi energy is')[1].split('ev')[0]
fermi = float(data)
#print((findall(r"the\s*Fermi\s*energy\s*is\s*([\s\d.]*)ev", data)))
#fermi = float(findall(r"the\s*Fermi\s*energy\s*is\s*([\s\d.]*)ev", data)[0])
return fermi
@property
def reclat(self):
"""
Returns the reciprocal lattice read from .out
"""
rf = open(self.outfile, "r")
data = rf.read()
rf.close()
alat = float(findall(r"alat\)\s*=\s*([\d.e+-]*)", data)[0])
b1 = array(
findall(r"b\(1\)\s*=\s*\(([\d.\s+-e]*)", data)[0].split(), dtype="float64"
)
b2 = array(
findall(r"b\(2\)\s*=\s*\(([\d.\s+-e]*)", data)[0].split(), dtype="float64"
)
b3 = array(
findall(r"b\(3\)\s*=\s*\(([\d.\s+-e]*)", data)[0].split(), dtype="float64"
)
reclat = (2 * pi / alat) * (array((b1, b2, b3)))
# Transposing to get the correct format
reclat = reclat.T
return reclat