Source code for dfttools.parsers.openmx

"""
Parsing `OpenMX <http://openmx-square.org/>`_ files.
"""
import re
import warnings
import math
import json
import os
import os.path

import numpy
import numericalunits

from .generic import parse, cre_varName, cre_word, cre_nonspace, re_int, cre_int, cre_float, AbstractParser, AbstractJSONParser, ParseError
from .structure import cube
from .native_openmx import openmx_bands_bands
from ..simple import band_structure, unit_cell, guess_parser, parse, tag_method
from ..types import UnitCell, Basis
from . import default_real_space_basis

[docs]def populations(s):
    """
    Parses JSON with Lowdin populations. Replaces corresponding arrays
    with numpy objects. Adds units where needed.
    
    Args:
    
        s (str): file contents.
    
    Returns:
    
        A dict with JSON data.
    """
    result = json.loads(s)
    for field in ("k", "bands", "energies", "weights"):
        result[field] = numpy.array(result[field])
        
    result["energies"] *= numericalunits.Hartree
        
    for field in result["basis"].keys():
        result["basis"][field] = numpy.array(result["basis"][field])
        
    return result
    
[docs]def joint_populations(files):
    """
    Collects several files with Lowdin populations and parses them into
    a single object.
    
    Args:
    
        files (list of str): file contents in an array.
    
    Returns:
    
        A dict with joint JSON data.
    """
    if len(files) == 0:
        raise ValueError("Empty array passed")
        
    parsed = {}
    for f in files:
        p = populations(f)
        parsed[p["k-id"]] = p

    collected = {
        "k": [],
        "energies": [],
        "weights": [],
    }
    for i in range(len(parsed)):
        
        if not i in parsed:
            raise ValueError("Missing data at k-point #{:d}".format(i))
        p = parsed[i]
        p2 = parsed[0]
        
        if i>0:
            if not numpy.all(p["bands"] == p2["bands"]):
                raise ValueError("Different bands reported at k #0 and k #{:d}".format(i))
            for k in p["basis"].keys() + p2["basis"].keys():
                if not k in p["basis"] or not k in p2["basis"]:
                    raise ValueError("The basis description '{}' is missing for k #0 or k #{:d}".format(k,i))
                if not numpy.all(p["basis"][k] == p2["basis"][k]):
                    raise ValueError("The basis description '{}' is different for k #0 and k #{:d}".format(k,i))
        
        for c in collected.keys():
            collected[c].append(p[c])
        
    result = parsed[0]
    del result["k-id"]
    for c in collected.keys():
        result[c] = numpy.array(collected[c])
        
    return result
    
[docs]class JSON_DOS(AbstractJSONParser):
    """
    Parses JSON with OpenMX density of states.
    
    Args:
    
        data (str): contents of OpenMX JSON DoS file.
    """
    
    @staticmethod
    def valid_header(header):
        return "openmx-dos-negf" in header
        
    @staticmethod
    def valid_filename(name):
        return name.endswith(".Dos.json")
        
    def __init__(self, data):
        super(JSON_DOS, self).__init__(data)
        self.__set_units__("energy",numericalunits.Hartree)
        self.__set_units__("DOS",1./numericalunits.Hartree)
        
        for field in self.json["basis"].keys():
            self.json["basis"][field] = numpy.array(self.json["basis"][field])
    
[docs]    def basis(self):
        """
        Retrieves the basis set for density weights.
        
        Returns:
        
            A dict contatining basis description.
        """
        return self.json["basis"]
        
[docs]    def weights(self):
        """
        Retrieves the densities.
        
        Returns:
        
            Densities in a 4D array with the following index order:
            * ky
            * kz
            * energy
            * state
        """
        return self.json["DOS"]
        
[docs]    def energies(self):
        """
        Retrieves corresponding energies.
        
        Returns:
        
            A 1D array with energy values.
        """
        return numpy.linspace(self.json["energy"][0], self.json["energy"][1], self.json["DOS"].shape[2])
        
    def __k__(self, index):
        n = self.json["DOS"].shape[index]
        k = numpy.linspace(0, 1, n, endpoint = False)
        k -= k[-1]/2
        return k
        
[docs]    def ky(self):
        """
        Retrieves values of ky.
        
        Returns:
        
            Values of ky.
        """
        return self.__k__(0)
        
[docs]    def kz(self):
        """
        Retrieves values of kz.
        
        Returns:
        
            Values of kz.
        """
        return self.__k__(1)
    
[docs]class Input(AbstractParser):
    """
    Class for parsing parameter values from OpenMX input files.
    
    Args:
    
        data (str): contents of OpenMX input file
    """
    
    @staticmethod
    def valid_header(header):
        l = header.lower()
        return "definition.of.atomic.species" in l
        
    def systemName(self):
        return self.getNonSpaced("system.name")
        
[docs]    def getWord(self, parameter):
        """
        A shortcut to parser.StringParser.matchAfter designed to obtain
        word-like parameter values from textual configuration files.
        
        Args:
        
            parameter (str): parameter name
            
        Returns:
        
            parameter value
        """
        self.parser.reset()
        return self.parser.matchAfter(parameter,cre_word)
        
[docs]    def getNonSpaced(self, parameter):
        """
        A shortcut to parser.StringParser.matchAfter designed to obtain
        parameter values without spaces from textual configuration files.
        
        Args:
        
            parameter (str): parameter name
            
        Returns:
        
            parameter value
        """
        self.parser.reset()
        return self.parser.matchAfter(parameter,cre_nonspace)

[docs]    def getFloat(self, parameter):
        """
        A shortcut to parser.StringParser.matchAfter designed to obtain
        float parameter values from textual configuration files.
        
        Args:
        
            parameter (str): parameter name
            
        Returns:
        
            parameter value
        """
        self.parser.reset()
        return self.parser.floatAfter(parameter)

[docs]    def getInt(self, parameter):
        """
        A shortcut to parser.StringParser.matchAfter designed to obtain
        integer parameter values from textual configuration files.
        
        Args:
        
            parameter (str): parameter name
            
        Returns:
        
            parameter value
        """
        self.parser.reset()
        return self.parser.intAfter(parameter)

    @unit_cell
[docs]    def unitCell(self, l = None, r = None, tolerance = 1e-12):
        """
        Retrieves atomic position data.
        
        Kwargs:
        
            l,r (UnitCell): left lead and right lead cells.
            This information is required for parsing the cell from NEGF
            calculation input file;
            
            tolerance (float): a tolerance for comparing atomic position
            data from the keywords and from the file itself in ``aBohr``.
        
        Returns:
        
            An input unit cell.
            
        Raises:
        
            ValueError: left and right lead cells are not specified for
            NEGF input file.
        """
       
        self.parser.reset()

        if not self.parser.present("<Atoms.UnitVectors"):
            if l is None or r is None:
                raise ValueError("The input file does not specify unit cell dimensions (NEGF calculation) but the left and right leads are not specified")
            no_vectors = True
            
        else:
            no_vectors = False
            
        n = self.getInt("Atoms.Number")
        units = self.getWord("Atoms.SpeciesAndCoordinates.Unit")

        self.parser.save()
        self.parser.skip("<Atoms.SpeciesAndCoordinates")
        coordinates = numpy.zeros((n,3))
        values = []
        for i in range(n):
            self.parser.nextInt()
            values.append(self.parser.nextMatch(cre_word))
            coordinates[i,:] = self.parser.nextFloat(3)
            self.parser.nextLine()
        self.parser.pop()
        
        if units.lower() == "ang":
            coordinates *= numericalunits.angstrom
        elif units.lower() == "au":
            coordinates *= numericalunits.aBohr
            
        if no_vectors:
            
            nl = self.parser.intAfter("LeftLeadAtoms.Number")
            nr = self.parser.intAfter("RightLeadAtoms.Number")

            self.parser.skip("<LeftLeadAtoms.SpeciesAndCoordinates")
            coordinatesl = numpy.zeros((nl,3))
            valuesl = []
            
            for i in range(nl):
                
                self.parser.nextInt()
                valuesl.append(self.parser.nextMatch(cre_word))
                coordinatesl[i,:] = self.parser.nextFloat(3)
                self.parser.nextLine()
                
            self.parser.reset()
    
            self.parser.skip("<RightLeadAtoms.SpeciesAndCoordinates")
            coordinatesr = numpy.zeros((nr,3))
            valuesr = []
            
            for i in range(nr):
                
                self.parser.nextInt()
                valuesr.append(self.parser.nextMatch(cre_word))
                coordinatesr[i,:] = self.parser.nextFloat(3)
                self.parser.nextLine()
                
            self.parser.reset()
                
            if units.lower() == "ang":
                coordinatesl *= numericalunits.angstrom
                coordinatesr *= numericalunits.angstrom
            elif units.lower() == "au":
                coordinatesl *= numericalunits.aBohr
                coordinatesr *= numericalunits.aBohr

            dl = (l.cartesian()-coordinatesl).sum(axis = 0)/l.size()
            delta_l = abs(l.cartesian()-coordinatesl-dl).max()
            
            dr = (r.cartesian()-coordinatesr).sum(axis = 0)/r.size()
            delta_r = abs(r.cartesian()-coordinatesr-dr).max()
            
            if delta_l/numericalunits.aBohr > tolerance:
                raise ValueError("The atomic coordinates given by 'l' keyword and those present in the input file are different. Set the 'tolerance' keyword to at least {:e} to avoid this error.".format(delta_l/numericalunits.aBohr))
            if delta_r/numericalunits.aBohr > tolerance:
                raise ValueError("The atomic coordinates given by 'r' keyword and those present in the input file are different. Set the 'tolerance' keyword to at least {:e} to avoid this error.".format(delta_r/numericalunits.aBohr))
                
            shape = l.vectors.copy()
            shape[0,:] = dl-dr-l.vectors[0,:]
            coordinates = coordinates + dl - l.vectors[0,:]
            
        else:
            
            units_cell = self.parser.matchAfter("Atoms.UnitVectors.Unit",cre_word).lower()
            shape = self.parser.floatAfter("<Atoms.UnitVectors",n = (3,3))
    
            if units_cell == "ang":
                shape *= numericalunits.angstrom
            elif units_cell == "au":
                shape *= numericalunits.aBohr

        return UnitCell(
            default_real_space_basis(shape),
            coordinates,
            values,
            c_basis = None if units.lower() == "frac" else "cartesian"
        )

[docs]class Output(AbstractParser):
    """
    Class for parsing parameter values from OpenMX output files.
    
    Args:
    
        data (string): contents of OpenMX output file
    """
        
    @staticmethod
    def valid_header(header):
        return "Welcome to OpenMX" in header and "T. Ozaki" in header
    
[docs]    def version(self):
        """
        Retrieves OpenMX version as reported in the output.
        
        Returns:
        
            OpenMX program version as string.
        """
        self.parser.reset()
        self.parser.skip(re.compile("Welcome to OpenMX\s+Ver\."))
        return self.parser.nextMatch(cre_varName,n = "\n")[0]
        
[docs]    def total(self):
        """
        Retrieves total energy calculated.
        
        Returns:
        
            An array of floats with total energy per each SCF cycle.
        """
        self.parser.reset()
        result = []
        
        while self.parser.present("Utot  ="):
            self.parser.skip("Utot  =")
            result.append(self.parser.nextFloat()*numericalunits.Hartree)
            
        return numpy.array(result)
        
[docs]    def nat(self):
        """
        Retrieves number of atoms.
        
        Returns:
        
            Number of atoms for the first relaxation step.
        """
        self.parser.reset()
        self.parser.skip("MD or geometry opt. at MD")
        self.parser.skip("maximum force")
        
        n = 0
        while self.parser.closest(("XYZ(ang)","***")) == 0:
            self.parser.skip("XYZ(ang)")
            n += 1
            
        return n
        
[docs]    def unitCells(self, startingCell, noraise = False):
        """
        Retrieves atomic positions data for relax calculation.
        
        Args:
        
            startingCell (qetools.cell.Cell): a unit cell from the input
            file. It is required since no chemical captions are written
            in the output.
            
        Kwargs:
        
            noraise (bool): retirieves as much structures as possible
            without raising exceptions.
        
        Returns:
        
            A set of all unit cells found.
        """
        self.parser.reset()
        cells = []
        
        while self.parser.present("lattice vectors (bohr)"):
            
            try:
                
                self.parser.skip("lattice vectors (bohr)")
                shape = self.parser.nextFloat((3,3))*numericalunits.aBohr
                    
                self.parser.skip("MD or geometry opt. at MD")
                self.parser.skip("maximum force")
                coordinates = []
                
                while self.parser.closest(("XYZ(ang)","***")) == 0:
                    
                    self.parser.skip("XYZ(ang)")
                    coordinates.append(self.parser.nextFloat(3)*numericalunits.angstrom)
                    
                cells.append(UnitCell(
                    default_real_space_basis(shape),
                    coordinates,
                    startingCell.values,
                    c_basis = "cartesian",
                ))
                
            except:
                if not noraise:
                    raise
                else:
                    return cells
        
        return cells
    
    @tag_method("unit-cell", take_file = True)
    def __unit_cells_silent__(self, f):
        # Search for an input file
        directory = os.path.dirname(f.name)
        file_names = list(os.path.join(directory,i) for i in os.listdir(directory))
        for name in file_names:
            if os.path.isfile(name):
                with open(name, "r") as f:
                    if Input in guess_parser(f):
                        try:
                            c = Input(f.read()).unitCell()
                            if c.size() == self.nat():
                                return self.unitCells(c, noraise = True)
                        except:
                            pass
        
        raise ParseError("Could not locate corresponding input file")
                        
[docs]    def populations(self):
        """
        Retrieves Mulliken populations during scf process.
        
        Returns:
        
            A numpy array where the first index corresponds to
            iteration number and the second one is atomic ID. The
            populations are renormalized to reproduce the total charge.
        """
        self.parser.reset()
        result = []
        
        while self.parser.present("NormRD"):
            
            self.parser.skip(re.compile(r"\*{19} MD=\s*\d*\s*SCF=\s*\d*\s*\*{19}"))
            self.parser.goto(re.compile(r"\n\s*"+re_int))
            self.parser.nextLine()
            
            c = []
            
            while self.parser.closest(("Sum of MulP",cre_int)) == 1:
                
                self.parser.skip("sum")
                c.append(self.parser.nextFloat())
                self.parser.nextLine()
            
            self.parser.skip("total=")
            total = self.parser.nextFloat()
            
            self.parser.skip("NormRD")
            c = numpy.array(c)
            result.append(c*total/sum(c))
            
        return numpy.array(result)
        
[docs]    def neutral_charge(self):
        """
        Retrieves the number of valence electrons in the calculation for
        the charge neutral system.
        
        Returns:
        
            The number of electrons.
        """
        self.parser.reset()
        return self.parser.floatAfter("ideal(neutral)=")

[docs]    def solvers(self):
        """
        Retrieves the solver used for each iteration.
        
        Returns:
        
            A list of solver names.
        """
        self.parser.reset()
        result = []
        
        while self.parser.present("NormRD"):
            
            self.parser.skip(re.compile(r"\*{19} MD=\s*\d*\s*SCF=\s*\d*\s*\*{19}"))
            self.parser.goto("_DFT>")
            self.parser.startOfLine()
            result.append(self.parser.nextMatch(cre_word))
            self.parser.skip("NormRD")
            
        return result
        
[docs]    def convergence(self):
        """
        Retrieves convergence error values.
        
        Returns:
        
            A numpy array of convergence errors.
        """
        self.parser.reset()
        result = []
        
        while self.parser.present("NormRD"):
            
            self.parser.skip("NormRD")
            result.append(self.parser.nextFloat())
            
        return numpy.array(result)

[docs]class Bands(AbstractParser):
    """
    Class for parsing band structure from openmx.Band file.
    
    Args:
    
        data (string): contents of OpenMX Band file
    """
        
    @staticmethod
    def valid_filename(name):
        return name.endswith(".Band")
        
[docs]    def fermi(self):
        """
        Retrieves Fermi energy.
        
        Returns:
        
            Fermi energy.
        """
        self.parser.reset()
        p = self.parser
        
        p.nextInt()
        p.nextInt()
        return p.nextFloat()*numericalunits.Hartree
        
[docs]    def captions(self):
        """
        Retrieves user-specified K-point captions.
        
        Returns:
        
            A dict with k-point number - k-point caption pairs.
        """
        self.parser.reset()
        p = self.parser
        
        p.nextLine(2)

        # nk = number of K points
        npath = p.nextInt()
        nk = 0
        result = {}
        
        for i in range(npath):
            
            n = p.nextInt()
            fr = p.nextFloat(3)
            to = p.nextFloat(3)
            fr_c = p.nextMatch(cre_nonspace)
            to_c = p.nextMatch(cre_nonspace)
            result[nk] = fr_c
            nk += n
            result[nk-1] = to_c
            p.nextLine()
            
        return result

    @band_structure
[docs]    def bands(self):
        """
        Retrieves bands.
        
        Returns:
        
            A UnitCell object with band energies.
        """
        fermi = self.fermi()
        
        self.parser.reset()
        p = self.parser
        
        p.nextLine()
        shape = p.nextFloat((3,3))/numericalunits.aBohr
        
        data = openmx_bands_bands(self.data)
        
        return UnitCell(
            Basis(shape, meta = {"Fermi":self.fermi(), "special-points":self.captions()}),
            data[:,:3],
            data[:,3:]*numericalunits.Hartree,
        )

[docs]class Transmission(AbstractParser):
    """
    Class for parsing transmission from openmx.tran file.
    
    Args:
    
        data (string): contents of openmx.tran file
    """
        
    @staticmethod
    def valid_filename(name):
        l = name.rfind(".tran")
        
        if l == -1:
            return False
        print name[l+6:]
        return not (re.match(r"[\d+]_[\d+]\Z",name[l+5:]) is None)
        
    
    @staticmethod
    def valid_header(header):
        return "The unit of current is given by eEh/bar{h}" in header
        
    def __table__(self):
        """
        Reads the data as a table.
        
        Returns:
        
            All numbers presented in a numpy 2D array.
        """
        self.parser.reset()
        
        # Skip comments
        while self.parser.present("#"):
            self.parser.nextLine()
        
        result = []
        while self.parser.present(cre_float):
            result.append(self.parser.nextFloat("\n"))
            self.parser.nextLine()
            
        return numpy.array(result)
        
    def __spin__(self):
        """
        Retrieves integer corresponding to spin treatment.
        
        Returns:
        
            SpinP_switch as reported in the file.
        """
        self.parser.reset()
        return self.parser.intAfter("spinp_switch")

[docs]    def total(self):
        """
        Retrieves total transmission 1D array.
        
        Returns:
        
            A numpy array containing total transmission values with
            imaginary part discarded.
        """
        s = self.__spin__()
        table = self.__table__()
        if s == 0:
            return table[:,5]+table[:,7]
        elif s == 3:
            return table[:,5]
        else:
            raise ValueError("Unrecognized spinp_switch: {:d}".format(s))
        
[docs]    def energy(self):
        """
        Retrieves energy points of computed transmission.
        
        Returns:
        
            A numpy array containing energies with imaginary part discarded.
        """
        return self.__table__()[:,3]*numericalunits.eV
        
# Lower case versions
input = Input
output = Output
bands = Bands
transmission = Transmission