# MIT License
# Copyright (c) 2021, 2022 All authors listed in the file AUTHORS.rst
"""
Python functions related to molecular dynamics (MD) simulations
performed with |Gromacs|.
"""
# Standard libraries
import bz2
import gzip
import lzma
import os
[docs]
def get_box_from_gro(fname):
"""
Extract the simulation box dimensions from a |gro_file|.
Parameters
----------
fname : str or bytes or os.PathLike
Name of the |gro_file|.
Returns
-------
box : list
A list containing the simulation box dimensions read from the
last line of the input file.
""" # noqa: W505,E501
box = tail(fname, 1)[0].split()
box = [float(b) for b in box]
return box
[docs]
def get_compressed_file(fname):
"""
Check if the input file or a compressed version of it exsists.
Check if the input file exsists. If it does not exist, check
whether a file with the same name but with one of the following
extensions exsists:
1. .gz
2. .bz2
3. .xz
4. .lzma
Files are checked in the given order. The name of the first file
found will be returned. If none of the files exsists, an exception
is raised.
Parameters
----------
fname : str or bytes or os.PathLike
Name of the input file.
Returns
-------
found_file : str or bytes
Name of the first file found.
Raises
------
FileNotFoundError :
If neither the input file itself nor the input file with one of
the above mentioned extensions exists.
"""
fname = os.fspath(fname)
formats = ["", ".gz", ".bz2", ".xz", ".lzma"]
if isinstance(fname, bytes):
formats = [fmt.encode() for fmt in formats]
files = [fname + fmt for fmt in formats]
for file in files:
if os.path.isfile(file):
return file
raise FileNotFoundError("No such files: '{}'".format("' '".join(files)))
[docs]
def get_last_time_from_log(fname):
"""
Extract the time of the last frame of an |Gromacs| MD simulation
from the |log_file|.
Parameters
----------
fname : str or bytes or os.PathLike
Name of the |log_file|.
Returns
-------
time : float
The time of the last frame in the |log_file|.
""" # noqa: W505,E501
lines = tail(fname, 300)
line_prev = ""
for line in lines[::-1]:
if "Step" in line and "Time" in line:
step, time = line_prev.split()
return float(time)
line_prev = line
[docs]
def get_nbins(fname, binwidth):
"""
Get the number of bins.
Determine the number of bins to use to divide the z dimension of the
simulation box stored in the provided |gro_file| in bins of the
given bin width.
Parameters
----------
fname : str or bytes or os.PathLike
Name of the |gro_file| that holds the box dimensions.
binwidth : float
The desired bin width.
Returns
-------
num_bins : int
The number of bins required to divide the simulation box in bins
of the given width.
"""
box_z = get_box_from_gro(fname)[2]
return round(box_z / binwidth)
[docs]
def get_nsteps_from_mdp(fname):
"""
Extract the maximum number of simulation steps of an |Gromacs| MD
simulation from the |mdp_file|.
Parameters
----------
fname : str or bytes or os.PathLike
Name of the |mdp_file|.
Returns
-------
nsteps : int
Maximum number of simulation steps specified in the |mdp_file|.
Raises
------
FileNotFoundError
If the input file does not exist.
ValueError
If the input file does not contain a line that starts with
"nsteps" or if "nsteps" is not followed by an equal (=) sign.
""" # noqa: W505,E501
with xopen(fname, "r") as file:
found_nsteps = False
for i, line in enumerate(file):
line = line.strip()
if line.startswith("nsteps"):
found_nsteps = True
line_nsteps = line
line_num = i + 1
# nsteps can be defined multiple times in an .mdp file.
# From
# https://manual.gromacs.org/documentation/current/reference-manual/file-formats.html#mdp # noqa: W505,E501
# "The ordering of the items is not important, but if
# you enter the same thing twice, the last is used."
# => Do not break the loop after the first occurence of
# 'nsteps'.
if not found_nsteps:
raise ValueError(
"Could not fine a line in file '{}' that starts with"
" 'nsteps'".format(fname)
)
if "=" not in line_nsteps:
raise ValueError(
"Line {} in file '{}' starts with 'nsteps' but does not contain an"
" equal (=) sign".format(line_num, fname)
)
nsteps = line_nsteps.split("=")[1]
nsteps = nsteps.split(";")[0] # Remove potential comments
return int(nsteps)
[docs]
def tail(fname, n):
"""
Read the last n lines from a file.
Parameters
----------
fname : str or bytes or os.PathLike
Name of the input file.
n : int
The number of lines to read from the end of the input file.
Returns
-------
lines : list
List containing the last `n` lines of the input file. Each list
item represents one line of the file.
"""
lines = []
if n <= 0:
return lines
# Step width to move the cursor (emprical value giving best
# performance).
step_width = max(10 * n, 1)
with xopen(fname, "r") as file:
file.seek(0, 2) # Set cursor to end of file.
pos = file.tell() # Get current cursor position.
# n+1 required to get the entire n-th line and not just its
# ending.
while len(lines) < n + 1:
pos -= min(step_width, pos)
file.seek(pos, 0) # Move cursor backwards.
lines = file.readlines()
if pos == 0: # Reached start of file.
break
return lines[-n:]
[docs]
def xopen(fname, mode="rt", fformat=None, **kwargs):
"""
Open a (compressed) file and return a corresponding
`file-like_object
<https://docs.python.org/3/glossary.html#term-file-like-object>`__.
This function is a replacement for the built-in :func:`open`
function that can additionally read and write compressed files.
Supported compression formats:
* gzip (.gz)
* bzip2 (.bz2)
* XZ/LZMA2 (.xz)
* LZMA (.lzma)
Parameters
----------
fname : str or bytes or os.PathLike
Name of the file to open.
mode : {'r', 'rt', 'rb', 'w', 'wt', 'wb', 'x', 'xt', 'xb', 'a', \
'at', 'ab'}, optional
Opening mode. See the built-in :func:`open` function for more
details.
fformat : {None, 'gz', 'bz2', 'xz', 'lzma', 'uncompressed'}, \
optional
Explicitly specify the file format. If ``None``, the file
format is guessed from the file name extension if present and
otherwise from the file signature. If ``'uncompressed'``, the
file is treated as uncompressed file.
kwargs : dict, optional
Additional keyword arguments to parse to the function that is
used for opening the file. See there for possible arguments and
their description.
Returns
-------
file : file-like object
The opened `file
<https://docs.python.org/3/glossary.html#term-file-object>`__.
See Also
--------
:func:`open` :
Function used to open uncompressed files
:func:`gzip.open` :
Function used to open gzip-compressed files
:func:`bz2.open` :
Function used to open bzip2-compressed files
:func:`lzma.open` :
Function used to open XZ- and LZMA-compressed files
Notes
-----
When writing and `fformat` is ``None``, the compression algorithm is
chosen based on the extension of the given file:
* ``'.gz'`` uses gzip compression.
* ``'.bz2'`` uses bzip2 compression.
* ``'.xz'`` uses XZ/LZMA2 compression.
* ``'.lzma'`` uses legacy LZMA compression.
* otherwise, no compression is done.
When reading and `fformat` is ``None``, the file format is detected
from the file name extension if present. If no extension is present
or the extension is unknown, the format is detected from the file
signature, i.e. the first few bytes of the file also known as
"`magic numbers
<https://www.garykessler.net/library/file_sigs.html>`__".
References
----------
Inspired by `xopen <https://github.com/pycompression/xopen>`__ by
Marcel Martin, Ruben Vorderman et al.
.. _file-like_object:
https://docs.python.org/3/glossary.html#term-file-like-object
"""
fname = os.fspath(fname)
signatures = {
# https://datatracker.ietf.org/doc/html/rfc1952#page-6
"gz": b"\x1f\x8b",
# https://en.wikipedia.org/wiki/List_of_file_signatures
"bz2": b"\x42\x5a\x68",
# https://tukaani.org/xz/xz-file-format.txt
"xz": b"\xfd\x37\x7a\x58\x5a\x00",
# https://zenhax.com/viewtopic.php?t=27
"lzma": b"\x5d\x00",
}
if fformat not in [None, "uncompressed"] + list(signatures.keys()):
raise ValueError("Invalid value for 'fformat': {}".format(fformat))
# Use text mode by default, like the built-in `open` function, also
# when opening compressed files.
if mode in ("r", "w", "x", "a"):
mode += "t"
# Detect file format from extension.
if fformat is None:
for extension in signatures.keys():
if isinstance(fname, bytes):
if fname.endswith(b"." + extension.encode()):
fformat = extension
else:
if fname.endswith("." + extension):
fformat = extension
# Detect file format from file signature.
if fformat is None and "w" not in mode and "x" not in mode:
max_len = max(len(signature) for signature in signatures.values())
try:
with open(fname, "rb") as fh:
file_start = fh.read(max_len)
except OSError:
# File could not be opened.
file_start = False
if file_start:
for extension, signature in signatures.items():
if file_start.startswith(signature):
fformat = extension
break
if fformat == "gz":
return gzip.open(fname, mode, **kwargs)
elif fformat == "bz2":
return bz2.open(fname, mode, **kwargs)
elif fformat in ("xz", "lzma"):
return lzma.open(fname, mode, **kwargs)
elif fformat == "uncompressed" or fformat is None:
return open(fname, mode, **kwargs)