Source code for aiida_gromacs.parsers.genericMD
"""
Parsers provided by aiida_gromacs.
This parser saves outputted files from a generic command.
"""
import os
import re
from aiida.common import exceptions
from aiida.engine import ExitCode
from aiida.orm import SinglefileData
from aiida.parsers.parser import Parser
from aiida.plugins import CalculationFactory
# entry point string under which the parser class is registered:
GenericCalculation = CalculationFactory("gromacs.genericMD")
[docs]
class GenericParser(Parser):
"""
Parser class for parsing output of genericMD calculation from which
the retrieved outputs files from the calcjob and the nodes of finished
calculation can be accessed.
"""
[docs]
def __init__(self, node):
"""
Initialize Parser instance
Checks that the ProcessNode being passed was produced by a
GenericCalculation.
:param node: ProcessNode of calculation
:param type node: :class:`aiida.orm.nodes.process.process.ProcessNode`
"""
super().__init__(node)
if not issubclass(node.process_class, GenericCalculation):
raise exceptions.ParsingError("Can only parse GenericCalculation")
[docs]
def parse(self, **kwargs):
"""
Parse outputs, store results in the AiiDA database.
:returns: an exit code, if parsing fails or the user defined
output files are not returned
"""
# get_option() convenience method is used to get the filename of
# the output file
output_filename = self.node.get_option("output_filename")
# the directory for storing parsed output files
output_dir = self.node.get_option("output_dir")
# Check that folder content is as expected
files_retrieved = self.retrieved.list_object_names()
files_expected = [] # [output_filename]
if "output_files" in self.node.inputs:
for name in self.node.inputs.output_files:
files_expected.extend([str(name)])
# Check all outputted files produced have been previously
# defined by the user
for file in files_expected:
if file not in files_retrieved:
self.logger.error(
f"User defined output file '{file}' not in "
f"list of retrieved files '{files_retrieved}'"
)
return self.exit_codes.ERROR_UNTRACKED_OUTPUT_FILES
# passing along the std output file as a SinglefileData node.
self.logger.info(f"Parsing '{output_filename}'")
with self.retrieved.open(output_filename, "rb") as handle:
output_node = SinglefileData(file=handle)
# return stdout file
self.out("log", output_node)
# passing along all expected output file as SinglefileData nodes.
for thing in files_expected:
self.logger.info(f"Parsing '{thing}'")
with self.retrieved.open(thing, "rb") as handle:
output_node = SinglefileData(file=handle, filename=thing)
self.out(self.format_link_label(thing), output_node)
# parse retrieved files and write them to where command was run
for thing in files_retrieved:
self.logger.info(f"Parsing '{thing}'")
file_path = os.path.join(output_dir, thing)
# file_path3 = os.path.join(output_dir, f'{thing}-test2.txt')
try:
with self.retrieved.open(thing, "rb") as handle:
with open(file_path, "wb") as f_out:
while True:
chunk = handle.read(1024)
if not chunk:
break
f_out.write(chunk)
# not used yet.
# test below for parsing log file and saving output as dict
'''if re.search('.log$', thing):
start_string = 'Input Parameters:' #'A ?V ?E ?R ?A ?G ?E ?S'
end_string = 'compressibility' #'M ?E ?G ?A ?- ?F ?L ?O ?P ?S'
with self.retrieved.open(thing, "r") as file:
file_content = file.read()
pattern = rf"{start_string}(.*?){end_string}"
matches = re.findall(pattern, file_content, re.DOTALL)
file_path3 = os.path.join(output_dir, f'{thing}-matched.txt')
# matched_text = self._parse_gromacs_top(thing)
with open(file_path3, "w") as f_out:
f_out.write('matched text:')
for match in matches:
lines = match.splitlines()
for line in lines:
f_out.write(f'{line}\n')
with open(file_path3, "rb") as f_out:
# output_node = SinglefileData(file=f_out, filename=f'{thing}-matched.txt')
# output_node = Dict(file={"test": "test"}, filename='dict.txt')
output_node = Dict({"test": "test"})
output_node.label = 'test-dict'
self.out(self.format_link_label('test-dict'), output_node)'''
except UnicodeDecodeError:
with self.retrieved.open(thing, "r") as handle:
with open(file_path, "w", encoding="utf-8") as f_out:
for line in handle.read():
f_out.write(line)
return ExitCode(0)
[docs]
def _parse_gromacs_top(self, file_path):
"""Not used yet, test for parsing the gromacs tpr file.
:param file_path: The path and name of gtomacs .log file
:returns: The required text from the parsed file
"""
def _find_text_between_strings(file_path, start_string, end_string):
with self.retrieved.open(file_path, "r") as file:
file_content = file.read()
# use re.escape to escape special characters in the strings
# pattern = rf"{re.escape(start_string)}(.*?){re.escape(end_string)}"
pattern = rf"{start_string}(.*?){end_string}"
# use re.DOTALL to make the dot character match newline characters as well
matches = re.findall(pattern, file_content, re.DOTALL)
return matches
#file_path = '1AKI_production.log'
start_string = 'A ?V ?E ?R ?A ?G ?E ?S'
end_string = 'M ?E ?G ?A ?- ?F ?L ?O ?P ?S'
# start_string = '<====== ############### ==>\n\t<==== A V E R A G E S ====>\n\t<== ############### ======>\n\n' #'A V E R A G E S'
# end_string = 'M E G A - F L O P S A C C O U N T I N G'
matched_text = _find_text_between_strings(file_path, start_string, end_string)
return matched_text