Source code for aiida_gromacs.data.plumed_input

"""Sub class of `Data` to handle inputs used and outputs that will be produced
from commands in the plumed input file.
"""
import re
import os
import sys
from aiida.orm import SinglefileData, FolderData, List
from aiida_gromacs.utils import inputFile_utils

INPUT_KEYWORDS={
        "READ_DISSIMILARITY_MATRIX": ["FILE"], 
        "EXTERNAL": ["FILE"], 
        "METAD": ["GRID_RFILE", "ACCELERATION_RFILE"], 
        "PBMETAD": ["GRID_WFILES"],
}

OUTPUT_KEYWORDS={
        "COMMITTOR": ["FILE"], 
        "DUMPATOMS": ["FILE"], 
        "DUMPCUBE": ["FILE"], 
        "DUMPDERIVATIVES": ["FILE"], 
        "DUMPFORCES": ["FILE"], 
        "DUMPGRID": ["FILE"], 
        "DUMPMASSCHARGE": ["FILE"], 
        "DUMPMULTICOLVAR": ["FILE"], 
        "DUMPPROJECTIONS": ["FILE"], 
        "PRINT": ["FILE"], 
        "GRID_TO_XYZ": ["FILE"], # default=density
        "FIND_CONTOUR": ["FILE"], 
        "PRINT_DISSIMILARITY_MATRIX": ["FILE"], 
        "OUTPUT_PCA_PROJECTION": ["FILE"], 
        "OUTPUT_ANALYSIS_DATA_TO_COLVAR": ["FILE"], 
        "OUTPUT_ANALYSIS_DATA_TO_PDB": ["FILE"], 
        "MAXENT": ["FILE"], # default=label name followed by the string .LAGMULT
        "METAD": ["FILE", "GRID_WFILE"], 
        "PBMETAD": ["FILE", "GRID_WFILES"],
}



[docs]
class PlumedInputData(SinglefileData):
    """Class to find the inputs used and outputs produced from
    the commands in the plumed input file"""


[docs]
    def set_file(self, file, filename=None, **kwargs):
        """Add a file to the node, parse it and set the attributes found.

        :param file: absolute path to the file or a filelike object
        :param filename: specify filename to use (defaults to name of provided file).
        """
        super().set_file(file, filename, **kwargs)

        # Parse the plumed file
        parsed_info = parse_plumed_input_file(self.get_content().splitlines())

        # Add all other attributes found in the parsed dictionary
        for key, value in parsed_info.items():
            self.base.attributes.set(key, value)


    @property
    def inpfile_list(self):
        """Return the list input files used in the plumed script
        """
        return self.base.attributes.get('input_files')
    
    @property
    def outfile_list(self):
        """Return the list output files to be produced from the plumed script
        """
        return self.base.attributes.get('output_files')

    @property
    def calculation_inputs_outputs(self):
        """Return the inputs for the plumed calculation job
        """
        input_files = self.inpfile_list
        subdirs, files = inputFile_utils.check_filepath(input_files)
        calc_inputs = add_calculation_inputs(subdirs, files)
        output_files = self.outfile_list
        calc_outputs = add_calculation_outputs(output_files)
        return calc_inputs, calc_outputs




[docs]
def find_filename_from_string(head, values, filenames):
    """
    Find any filenames that are in a line of the plumed input file. Some keyword
    arguments allow for multiple filenames, which are comma separated.
    Each argument has an '=' after it.

    :param head: line of plumed input file that doesn't start with '#'
    :param values: list of arguments for a plumed keyword that would produce a file
    :param filenames: list of input/output filenames to append to, that are found in the plumed input
    :returns: list of filenames found in parsed plumed input file
    :rtype: list
    """
    split_line = head.split('=')
    if len(split_line) > 0:
        for s, split in enumerate(split_line):
            for value in values:
                filename = None
                if re.search(value, split, re.IGNORECASE):
                    filename = split_line[s+1]
                if filename:
                    # consider all ways spaces could be in comma separation
                    # between filenames
                    if "," in filename:
                        split_filenames = filename.split(',')
                        for split_filename in split_filenames:
                            if " " not in split_filename:
                                filenames.append(split_filename)
                            else:
                                # split the string to remove any whitespaces
                                # and save string only, ignore any strings that
                                # are not separated by a comma
                                split_f = split_filename.split()
                                filenames.append(split_f[0])
                    else:
                        # if no comma, then only one file to find
                        split_filename = filename.split()
                        filenames.append(split_filename[0])

    return filenames




[docs]
def find_plumed_filenames(keywords, i, line, lines):
    """
    Find lines that contain a plumed keyword that would require an input/output
    file

    :param keywords: dictionary of plumed keywords and their arguments that require
        an input/output file
    :param i: the parsed line number
    :param line: the currently parsed line
    :param lines: all the lines in the plumed input file
    :returns: list of filenames found in parsed plumed input file
    :rtype: list
    """
    filenames = []
    # only find lines that don't start with '#'
    head, sep, tail = line.partition("#")
    if len(head.split()) > 0:
        for keyword, values in keywords.items():
            # find the keyword in the first word in the line
            if re.search(keyword, head.split()[0], re.IGNORECASE):
                if "..." in head:
                    # parse lines after '...' where variables are defined
                    for line2 in lines[i+1:]:
                        head2, sep2, tail2 = line2.partition("#")
                        if "..." not in line2:
                            find_filename_from_string(head2, values, filenames)
                        else:
                            break
                else:
                    # find filename in the same line as the where the keyword was found
                    find_filename_from_string(head, values, filenames)
            else:
                continue

    return filenames



[docs]
def parse_plumed_input_file(lines):
    """Parse plumed input file and find any instances of reading input files 
    and writing output files. Find the lines that contain the keyword and 
    then find the FILE keyword in the subsequent lines, onces this is found, 
    then stop and carry on with outer loop. If the FILE keyword does not exist, 
    there may be a default filename for some keywords.

    :param lines: parsed lines from the plumed input file
    """

    input_files = []
    output_files = []
    # iterate through plumed lines and find input and output files
    for i, line in enumerate(lines):
        input_files += find_plumed_filenames(INPUT_KEYWORDS, i, line, lines)
        output_files += find_plumed_filenames(OUTPUT_KEYWORDS, i, line, lines)
                        

    parsed_info = {}
    parsed_info["input_files"] = input_files
    parsed_info["output_files"] = output_files

    return parsed_info




[docs]
def add_calculation_inputs(subdirs, files):
    """If they exist, add input files for plumed and dirs into the calcjob 
    inputs directory

    :param subdirs: list of subdirectories that contain input files
    :param files: list of input files
    """
    calc_inputs = {}
    input_list = []
    # If we have plumed input files then tag them.
    if files:
        calc_inputs["plumed_inpfiles"] = {}
        # Iterate files to assemble a dict of names and paths.
        for file in files:
            formatted_filename = inputFile_utils.format_link_label(file)
            if os.path.isfile(file):
                input_list.append(file)
                calc_inputs["plumed_inpfiles"][formatted_filename] = \
                    SinglefileData(file=os.path.join(os.getcwd(), file))

            elif "PYTEST_CURRENT_TEST" in os.environ:
                test_path = os.path.join(os.getcwd(), 
                                        'tests/input_files', file)
                if os.path.isfile(test_path):
                    calc_inputs["plumed_inpfiles"][formatted_filename] = \
                        SinglefileData(file=test_path)
                else:
                    sys.exit(f"Error: Input file {file} referenced in plumed file does not exist")

            
            else:
                sys.exit(f"Error: Input file {file} referenced in plumed file does not exist")

    # If we have included files in subdirs then process these.

    if subdirs:
        calc_inputs["plumed_dirs"] = {}
        # for each entry establish dir path and build file tree.
        for subdir in subdirs:
            if os.path.isfile(subdir):
                # add file to input list
                input_list.append(subdir.split("/")[-1])
                frst_dir = subdir.split("/")[0]
                # Create a folder that is empty.
                if frst_dir not in calc_inputs["plumed_dirs"].keys():
                    calc_inputs["plumed_dirs"][frst_dir] = FolderData()
                # Now fill it with files referenced in the plumed inputfile.
                # need to make sure to include any nested dirs in the path
                calc_inputs["plumed_dirs"][frst_dir].put_object_from_file(
                    os.path.join(os.getcwd(), subdir), 
                    path="/".join(subdir.split("/")[1:]) # remove the first dir
                    )
                
            # For tests
            elif "PYTEST_CURRENT_TEST" in os.environ:
                if os.path.isfile(os.path.join(os.getcwd(), "tests", subdir)):
                    # Create a folder that is empty.
                    if "tests" not in calc_inputs["plumed_dirs"].keys():
                        calc_inputs["plumed_dirs"]["tests"] = FolderData()
                    # Now fill it with files referenced in the plumed inputfile.
                    calc_inputs["plumed_dirs"]["tests"].put_object_from_file(
                        os.path.join(os.getcwd(), "tests", subdir), 
                        path=subdir)
                        
            else:
                sys.exit(f"Error: subdir {subdir} referenced in plumed file does not exist")

    # NOTE: this list is not used at the moment, might use for searchprevious
    # calc_inputs["input_list"] = List(input_list)

    return calc_inputs




[docs]
def add_calculation_outputs(files):
    """Add outputs from plumed script

    :param files: list of output files
    """
    calc_outputs = {}
    # If we have plumed output files then tag them.
    if files:
        output_list = []
        # Iterate files to assemble a dict of names and paths.
        for file in files:
            if "/" in file:
                file = file.split("/")[-1]
            output_list.append(file)
        calc_outputs["plumed_outfiles"] = List(output_list)
    return calc_outputs




[docs]
def populate_plumed_files_to_inputs(inputs, plumed_filename):
    """Populate the plumed input files and directories into the inputs

    :param inputs: dictionary of inputs for the calculation
    :param plumed_filename: name of the plumed input file
    """
    # Prepare input parameters in AiiDA formats.
    # Set the plumed script as a PlumedInputData type node
    inputs["plumed_file"] = PlumedInputData(
        file=os.path.join(os.getcwd(), plumed_filename)
    )
    # Find the inputs and outputs referenced in the plumed script
    calc_inputs, calc_outputs = inputs["plumed_file"].calculation_inputs_outputs
    # add input files and dirs referenced in plumed file into inputs
    inputs.update(calc_inputs)
    inputs.update(calc_outputs)
    return inputs