Python API
Documentation for the core functions in gpuma.
Public high-level Python API for common geometry optimization workflows.
This module provides convenience functions built on top of the lower-level
I/O and optimization utilities. It allows users to easily optimize molecular
structures starting from SMILES strings or XYZ files, as well as optimizing
ensembles of conformers.
Single Structure Optimization
Methods for optimizing individual molecules provided as SMILES or XYZ files.
optimize_single_smiles(smiles, output_file=None, config=None)
Optimize a single molecule from a SMILES string.
This function uses the provided SMILES string to generate an initial 3D structure
using the Morfeus library. It then optimizes the structure using the specified
optimization pipeline.
Parameters:
| Name |
Type |
Description |
Default |
smiles
|
str
|
SMILES string of the molecule to optimize.
|
required
|
output_file
|
str
|
Path to an output XYZ file where the optimized structure
will be written. If None, the optimized structure is not saved to a file.
|
None
|
config
|
Config
|
Config object to control the optimization pipeline.
Highly recommended to specify. If None, the configuration will be loaded
from the default file.
|
None
|
Returns:
| Name | Type |
Description |
Structure |
Structure
|
The optimized molecular structure as a Structure object.
|
Raises:
| Type |
Description |
ValueError
|
If the generated structure is not valid.
|
Source code in src/gpuma/api.py
| def optimize_single_smiles(
smiles: str,
output_file: str | None = None,
config: Config | None = None,
) -> Structure:
"""Optimize a single molecule from a SMILES string.
This function uses the provided SMILES string to generate an initial 3D structure
using the Morfeus library. It then optimizes the structure using the specified
optimization pipeline.
Args:
smiles (str): SMILES string of the molecule to optimize.
output_file (str): Path to an output XYZ file where the optimized structure
will be written. If None, the optimized structure is not saved to a file.
config (Config, optional): Config object to control the optimization pipeline.
Highly recommended to specify. If None, the configuration will be loaded
from the default file.
Returns:
Structure: The optimized molecular structure as a Structure object.
Raises:
ValueError: If the generated structure is not valid.
"""
if config is None:
config = load_config_from_file()
structure = smiles_to_xyz(smiles, multiplicity=config.optimization.multiplicity)
if not isinstance(structure, Structure):
raise ValueError("smiles_to_xyz did not return a Structure")
structure.comment = f"Optimized from SMILES: {smiles}"
result = optimize_single_structure(structure, config)
if output_file:
save_xyz_file(result, output_file)
return result
|
optimize_single_xyz_file(input_file, output_file=None, config=None)
Optimize a single structure from an XYZ file.
This function reads a molecular structure from the specified XYZ file,
optimizes it using the provided optimization pipeline, and optionally
writes the optimized structure to an output XYZ file.
Parameters:
| Name |
Type |
Description |
Default |
input_file
|
str
|
Path to an input XYZ file from which to read the initial structure.
|
required
|
output_file
|
str
|
Path to an output XYZ file where the optimized structure will be written.
If None, the optimized structure will not be saved to a file.
|
None
|
config
|
Config
|
Config object to control the optimization pipeline.
Highly recommended to specify. If None, the configuration will be loaded from
the default file.
|
None
|
Returns:
| Name | Type |
Description |
Structure |
Structure
|
The optimized molecular structure as a Structure object.
|
Raises:
| Type |
Description |
ValueError
|
If the input file does not exist or if the read structure is not valid.
|
Source code in src/gpuma/api.py
| def optimize_single_xyz_file(
input_file: str,
output_file: str | None = None,
config: Config | None = None,
) -> Structure:
"""Optimize a single structure from an XYZ file.
This function reads a molecular structure from the specified XYZ file,
optimizes it using the provided optimization pipeline, and optionally
writes the optimized structure to an output XYZ file.
Args:
input_file (str): Path to an input XYZ file from which to read the initial structure.
output_file (str): Path to an output XYZ file where the optimized structure will be written.
If None, the optimized structure will not be saved to a file.
config (Config, optional): Config object to control the optimization pipeline.
Highly recommended to specify. If None, the configuration will be loaded from
the default file.
Returns:
Structure: The optimized molecular structure as a Structure object.
Raises:
ValueError: If the input file does not exist or if the read structure is not valid.
"""
if not file_exists(input_file):
raise ValueError(f"Input file {input_file} does not exist.")
if config is None:
config = load_config_from_file()
structure = read_xyz(
input_file,
charge=int(config.optimization.charge),
multiplicity=int(config.optimization.multiplicity),
)
if not isinstance(structure, Structure):
raise ValueError("read_xyz did not return a Structure")
structure.comment = f"Optimized from: {input_file}"
result = optimize_single_structure(structure, config)
if output_file:
save_xyz_file(result, output_file)
return result
|
Batch & Ensemble Optimization
Methods for processing multiple structures, ensembles, or entire directories.
optimize_ensemble_smiles(smiles, output_file=None, config=None)
Optimize a conformer ensemble generated from a SMILES string.
This function generates a specified number of conformers from the provided
SMILES string using the Morfeus library. It then optimizes each conformer
using the specified optimization pipeline. Optionally, the optimized ensemble
can be saved to a multi-structure XYZ file.
Parameters:
| Name |
Type |
Description |
Default |
smiles
|
str
|
SMILES string of the molecule for which to generate conformers.
|
required
|
output_file
|
str
|
Path to an output multi-structure XYZ file where
the optimized ensemble will be written. If None, the ensemble is not saved to a file.
|
None
|
config
|
Config
|
Config object to control the optimization pipeline.
Highly recommended to specify. If None, the configuration will be loaded from
the default file.
|
None
|
Returns:
| Type |
Description |
list[Structure]
|
list[Structure]: A list of optimized molecular structures as Structure objects.
|
Raises:
| Type |
Description |
ValueError
|
If output_file is not specified when required or if the generated
conformers are not valid.
|
Source code in src/gpuma/api.py
| def optimize_ensemble_smiles(
smiles: str,
output_file: str | None = None,
config: Config | None = None,
) -> list[Structure]:
"""Optimize a conformer ensemble generated from a SMILES string.
This function generates a specified number of conformers from the provided
SMILES string using the Morfeus library. It then optimizes each conformer
using the specified optimization pipeline. Optionally, the optimized ensemble
can be saved to a multi-structure XYZ file.
Args:
smiles (str): SMILES string of the molecule for which to generate conformers.
output_file (str, optional): Path to an output multi-structure XYZ file where
the optimized ensemble will be written. If None, the ensemble is not saved to a file.
config (Config, optional): Config object to control the optimization pipeline.
Highly recommended to specify. If None, the configuration will be loaded from
the default file.
Returns:
list[Structure]: A list of optimized molecular structures as Structure objects.
Raises:
ValueError: If output_file is not specified when required or if the generated
conformers are not valid.
"""
if config is None:
config = load_config_from_file()
multiplicity = int(config.optimization.multiplicity)
num_conformers = int(config.conformer_generation.max_num_conformers)
seed = int(config.conformer_generation.conformer_seed)
conformers = smiles_to_ensemble(smiles, num_conformers, multiplicity, seed=seed)
if not isinstance(conformers, list) or (
len(conformers) and not isinstance(conformers[0], Structure)
):
raise ValueError("smiles_to_ensemble did not return a list of Structure")
for s in conformers:
s.multiplicity = multiplicity
results = optimize_structure_batch(conformers, config)
if output_file:
comments = [
f"Optimized conformer {i + 1} from SMILES: {smiles}" for i in range(len(results))
]
save_multi_xyz(results, output_file, comments)
return results
|
optimize_batch_multi_xyz_file(input_file, output_file=None, config=None)
Optimize a batch of structures from a multi-structure XYZ file.
This function reads multiple molecular structures from the specified multi-structure
XYZ file, optimizes each structure using the provided optimization pipeline, and writes
the optimized structures to an output multi-structure XYZ file.
Parameters:
| Name |
Type |
Description |
Default |
input_file
|
str
|
Path to an input multi-structure XYZ file from which to
read the initial structures.
|
required
|
output_file
|
str
|
Path to an output multi-structure XYZ file where the optimized
structures will be written.
If None, the optimized structures will not be saved to a file.
|
None
|
config
|
Config
|
Config object to control the optimization pipeline.
Highly recommended to specify. If None, the configuration will be loaded from
the default file.
|
None
|
Returns:
| Type |
Description |
list[Structure]
|
list[Structure]: A list of optimized molecular structures as Structure objects.
|
Raises:
| Type |
Description |
ValueError
|
If the input file does not exist or if the read structures are not valid.
|
Source code in src/gpuma/api.py
| def optimize_batch_multi_xyz_file(
input_file: str,
output_file: str | None = None,
config: Config | None = None,
) -> list[Structure]:
"""Optimize a batch of structures from a multi-structure XYZ file.
This function reads multiple molecular structures from the specified multi-structure
XYZ file, optimizes each structure using the provided optimization pipeline, and writes
the optimized structures to an output multi-structure XYZ file.
Args:
input_file (str): Path to an input multi-structure XYZ file from which to
read the initial structures.
output_file (str): Path to an output multi-structure XYZ file where the optimized
structures will be written.
If None, the optimized structures will not be saved to a file.
config (Config, optional): Config object to control the optimization pipeline.
Highly recommended to specify. If None, the configuration will be loaded from
the default file.
Returns:
list[Structure]: A list of optimized molecular structures as Structure objects.
Raises:
ValueError: If the input file does not exist or if the read structures are not valid.
"""
if not file_exists(input_file):
raise ValueError(f"Input file {input_file} does not exist.")
if config is None:
config = load_config_from_file()
structures = read_multi_xyz(
input_file,
charge=int(config.optimization.charge),
multiplicity=int(config.optimization.multiplicity),
)
results = optimize_structure_batch(structures, config)
if output_file:
comments = [
f"Optimized structure {i + 1} from: {input_file}" for i in range(len(results))
]
save_multi_xyz(results, output_file, comments)
return results
|
optimize_batch_xyz_directory(input_directory, output_file, config=None)
Optimize a batch of structures from XYZ files in a directory.
This function reads multiple molecular structures from XYZ files in the specified input
directory, optimizes each structure using the provided optimization pipeline,
and writes the optimized structures to a multi-structure XYZ output file.
Parameters:
| Name |
Type |
Description |
Default |
input_directory
|
str
|
Path to an input directory containing XYZ files.
|
required
|
output_file
|
str
|
Path to an output multi-structure XYZ file where the
optimized structures will be written.
|
required
|
config
|
Config
|
Config object to control the optimization pipeline.
Highly recommended to specify. If None, the configuration will be loaded from the
default file.
|
None
|
Returns:
| Type |
Description |
list[Structure]
|
list[Structure]: A list of optimized molecular structures as Structure objects.
|
Raises:
| Type |
Description |
ValueError
|
If the input directory does not exist or contains no valid XYZ files.
|
Source code in src/gpuma/api.py
| def optimize_batch_xyz_directory(
input_directory: str,
output_file: str,
config: Config | None = None,
) -> list[Structure]:
"""Optimize a batch of structures from XYZ files in a directory.
This function reads multiple molecular structures from XYZ files in the specified input
directory, optimizes each structure using the provided optimization pipeline,
and writes the optimized structures to a multi-structure XYZ output file.
Args:
input_directory (str): Path to an input directory containing XYZ files.
output_file (str): Path to an output multi-structure XYZ file where the
optimized structures will be written.
config (Config, optional): Config object to control the optimization pipeline.
Highly recommended to specify. If None, the configuration will be loaded from the
default file.
Returns:
list[Structure]: A list of optimized molecular structures as Structure objects.
Raises:
ValueError: If the input directory does not exist or contains no valid XYZ files.
"""
if config is None:
config = load_config_from_file()
structures = read_xyz_directory(
input_directory,
charge=int(config.optimization.charge),
multiplicity=int(config.optimization.multiplicity),
)
results = optimize_structure_batch(structures, config)
if output_file:
comments = [
f"Optimized structure {i + 1} from batch input"
for i in range(len(results))
]
save_multi_xyz(results, output_file, comments)
return results
|
Model Loading
Functions for directly loading model calculators and torch-sim wrappers.
load_calculator(config)
Load an ASE-compatible calculator for single-structure optimization.
Dispatches to the Fairchem or ORB-v3 backend based on
config.model.model_type.
Parameters
config : Config
GPUMA configuration object.
Returns
calculator
An ASE calculator (FAIRChemCalculator or ORBCalculator).
Raises
ImportError
If the required backend package is not installed.
ValueError
If the model name is unknown or missing.
Source code in src/gpuma/models.py
| @time_it
def load_calculator(config: Config):
"""Load an ASE-compatible calculator for single-structure optimization.
Dispatches to the Fairchem or ORB-v3 backend based on
``config.model.model_type``.
Parameters
----------
config : Config
GPUMA configuration object.
Returns
-------
calculator
An ASE calculator (``FAIRChemCalculator`` or ``ORBCalculator``).
Raises
------
ImportError
If the required backend package is not installed.
ValueError
If the model name is unknown or missing.
"""
model_type = resolve_model_type(config)
if model_type == "orb":
return _load_orb_calculator(config)
return _load_fairchem_calculator(config)
|
load_torchsim_model(config)
Load a torch-sim model wrapper for GPU-accelerated batch optimization.
Dispatches to the Fairchem or ORB-v3 backend based on
config.model.model_type.
Parameters
config : Config
GPUMA configuration object.
Returns
model
A torch-sim model (FairChemModel or OrbTorchSimModel).
Raises
ImportError
If the required backend package is not installed.
ValueError
If the model name is unknown or missing.
Source code in src/gpuma/models.py
| @time_it
def load_torchsim_model(config: Config):
"""Load a torch-sim model wrapper for GPU-accelerated batch optimization.
Dispatches to the Fairchem or ORB-v3 backend based on
``config.model.model_type``.
Parameters
----------
config : Config
GPUMA configuration object.
Returns
-------
model
A torch-sim model (``FairChemModel`` or ``OrbTorchSimModel``).
Raises
------
ImportError
If the required backend package is not installed.
ValueError
If the model name is unknown or missing.
"""
model_type = resolve_model_type(config)
if model_type == "orb":
return _load_orb_torchsim(config)
return _load_fairchem_torchsim(config)
|
I/O & Structure Conversion
Functions for reading, writing, and converting molecular structures.
read_xyz(file_path, charge=0, multiplicity=1)
Read an XYZ file and return a :class:Structure instance.
Parameters
file_path:
Path to the XYZ file to read.
charge:
Optional total charge to set on the structure (default: 0).
multiplicity:
Optional spin multiplicity to set (default: 1).
Returns
Structure
Object with symbols, coordinates, and an optional comment.
Raises
FileNotFoundError
If the specified file does not exist.
ValueError
If the file format is invalid.
Source code in src/gpuma/io_handler.py
| def read_xyz(file_path: str, charge: int = 0, multiplicity: int = 1) -> Structure:
"""Read an XYZ file and return a :class:`Structure` instance.
Parameters
----------
file_path:
Path to the XYZ file to read.
charge:
Optional total charge to set on the structure (default: ``0``).
multiplicity:
Optional spin multiplicity to set (default: ``1``).
Returns
-------
Structure
Object with symbols, coordinates, and an optional comment.
Raises
------
FileNotFoundError
If the specified file does not exist.
ValueError
If the file format is invalid.
"""
if not os.path.exists(file_path):
raise FileNotFoundError(f"File {file_path} not found")
symbols: list[str] = []
coordinates: list[tuple[float, float, float]] = []
try:
with open(file_path, encoding="utf-8") as infile:
# Read first line: number of atoms
line = infile.readline()
try:
num_atoms = int(line.strip())
except ValueError as exc:
raise ValueError(
"First line must contain the number of atoms as an integer"
) from exc
# Read second line: comment
comment_line = infile.readline()
# If EOF is reached, comment_line is "" (which is falsy)
# But a blank line "\n" is truthy.
if not comment_line and num_atoms >= 0:
# We expected a comment line
# Note: Original code calculated found as max(0, len(lines) - 2)
# If we have 1 line, found = 0.
raise ValueError(f"Expected {num_atoms} atom lines, but found 0")
comment = comment_line.rstrip("\n")
for i in range(num_atoms):
line = infile.readline()
if not line:
raise ValueError(f"Expected {num_atoms} atom lines, but found {i}")
parts = line.split()
if len(parts) < 4:
raise ValueError(f"Line {i + 3} must contain at least 4 elements: symbol x y z")
symbol = parts[0]
try:
x, y, z = float(parts[1]), float(parts[2]), float(parts[3])
except ValueError as exc:
raise ValueError(f"Invalid coordinates in line {i + 3}: {parts[1:4]}") from exc
symbols.append(symbol)
coordinates.append((x, y, z))
except Exception as exc:
if isinstance(exc, (FileNotFoundError, ValueError)):
raise
raise ValueError(f"Error reading XYZ file: {exc}") from exc
return Structure(
symbols=symbols,
coordinates=coordinates,
comment=comment,
charge=charge,
multiplicity=multiplicity,
)
|
read_multi_xyz(file_path, charge=0, multiplicity=1)
Read an XYZ file containing multiple structures.
Parameters
file_path:
Path to the multi-structure XYZ file.
charge:
Optional total charge to set on all returned structures (default: 0).
multiplicity:
Optional spin multiplicity to set (default: 1).
Returns
list[Structure]
List of structures read from the file.
Raises
FileNotFoundError
If the specified file does not exist.
ValueError
If the file format is invalid.
Source code in src/gpuma/io_handler.py
| def read_multi_xyz(file_path: str, charge: int = 0, multiplicity: int = 1) -> list[Structure]:
"""Read an XYZ file containing multiple structures.
Parameters
----------
file_path:
Path to the multi-structure XYZ file.
charge:
Optional total charge to set on all returned structures (default: ``0``).
multiplicity:
Optional spin multiplicity to set (default: ``1``).
Returns
-------
list[Structure]
List of structures read from the file.
Raises
------
FileNotFoundError
If the specified file does not exist.
ValueError
If the file format is invalid.
"""
if not os.path.exists(file_path):
raise FileNotFoundError(f"File {file_path} not found")
structures: list[Structure] = []
try:
with open(file_path, encoding="utf-8") as infile:
line_iterator = iter(infile)
while True:
try:
line = next(line_iterator)
except StopIteration:
break
line_stripped = line.strip()
if not line_stripped:
continue
try:
num_atoms = int(line_stripped)
except ValueError:
continue
try:
comment_line = next(line_iterator)
comment = comment_line.rstrip("\n")
except StopIteration:
break
symbols: list[str] = []
coordinates: list[tuple[float, float, float]] = []
valid = True
for atom_idx in range(num_atoms):
try:
atom_line = next(line_iterator)
except StopIteration:
logger.warning(
"Structure '%s': unexpected end of file at atom %d/%d, skipping",
comment, atom_idx + 1, num_atoms,
)
valid = False
break
parts = atom_line.split()
if len(parts) < 4:
logger.warning(
"Structure '%s': malformed atom line %d, skipping structure",
comment, atom_idx + 1,
)
valid = False
break
symbol = parts[0]
try:
x, y, z = float(parts[1]), float(parts[2]), float(parts[3])
except ValueError:
logger.warning(
"Structure '%s': invalid coordinates at atom %d, skipping structure",
comment, atom_idx + 1,
)
valid = False
break
symbols.append(symbol)
coordinates.append((x, y, z))
if valid and len(symbols) == num_atoms:
structures.append(
Structure(
symbols=symbols,
coordinates=coordinates,
comment=comment,
charge=charge,
multiplicity=multiplicity,
)
)
except Exception as exc:
raise ValueError(f"Error reading multi-XYZ file: {exc}") from exc
return structures
|
read_xyz_directory(directory_path, charge=0, multiplicity=1)
Read all XYZ files from a directory.
Parameters
directory_path:
Path to directory containing XYZ files.
charge:
Optional total charge to set on all returned structures (default: 0).
multiplicity:
Optional spin multiplicity to set (default: 1).
Returns
list[Structure]
List of structures from all XYZ files in the directory.
Raises
FileNotFoundError
If the directory does not exist.
ValueError
If no valid XYZ files are found.
Source code in src/gpuma/io_handler.py
| def read_xyz_directory(
directory_path: str, charge: int = 0, multiplicity: int = 1
) -> list[Structure]:
"""Read all XYZ files from a directory.
Parameters
----------
directory_path:
Path to directory containing XYZ files.
charge:
Optional total charge to set on all returned structures (default: ``0``).
multiplicity:
Optional spin multiplicity to set (default: ``1``).
Returns
-------
list[Structure]
List of structures from all XYZ files in the directory.
Raises
------
FileNotFoundError
If the directory does not exist.
ValueError
If no valid XYZ files are found.
"""
if not os.path.exists(directory_path):
raise FileNotFoundError(f"Directory {directory_path} not found")
xyz_files = glob.iglob(os.path.join(directory_path, "*.xyz"))
structures: list[Structure] = []
found_any = False
for xyz_file in xyz_files:
found_any = True
try:
structures.append(read_xyz(xyz_file, charge=charge, multiplicity=multiplicity))
except Exception as exc: # pragma: no cover - logged and skipped
logger.warning("Failed to read %s: %s", xyz_file, exc)
if not found_any:
raise ValueError(f"No XYZ files found in directory {directory_path}")
if not structures:
raise ValueError("No valid structures could be read from any XYZ files")
return structures
|
smiles_to_xyz(smiles_string, return_full_xyz_str=False, multiplicity=None)
Convert a SMILES string to a :class:Structure or an XYZ string.
Parameters
smiles_string:
Valid SMILES string representing the molecular structure.
return_full_xyz_str:
If True, return an XYZ-format string instead of a
:class:Structure instance.
multiplicity:
Optional spin multiplicity to set on the structure (default: None).
Returns
Structure | str
Either a :class:Structure or an XYZ string depending on
return_full_xyz_str.
Source code in src/gpuma/io_handler.py
| def smiles_to_xyz(
smiles_string: str, return_full_xyz_str: bool = False, multiplicity: int | None = None
) -> Structure | str:
"""Convert a SMILES string to a :class:`Structure` or an XYZ string.
Parameters
----------
smiles_string:
Valid SMILES string representing the molecular structure.
return_full_xyz_str:
If ``True``, return an XYZ-format string instead of a
:class:`Structure` instance.
multiplicity:
Optional spin multiplicity to set on the structure (default: ``None``).
Returns
-------
Structure | str
Either a :class:`Structure` or an XYZ string depending on
``return_full_xyz_str``.
"""
if not smiles_string or not smiles_string.strip():
raise ValueError("SMILES string cannot be empty or None")
struct = _smiles_to_structure_util(smiles_string.strip())
if multiplicity is not None:
struct.multiplicity = int(multiplicity)
if return_full_xyz_str:
xyz_lines = [str(struct.n_atoms)]
xyz_lines.append(
f"Generated from SMILES using MORFEUS | "
f"Charge: {struct.charge} | "
f"Multiplicity: {struct.multiplicity}"
)
for atom, coord in zip(struct.symbols, struct.coordinates, strict=True):
xyz_lines.append(f"{atom} {coord[0]:.6f} {coord[1]:.6f} {coord[2]:.6f}")
return "\n".join(xyz_lines)
struct.comment = (
f"Generated from SMILES: {smiles_string} | "
f"Charge: {struct.charge} | "
f"Multiplicity: {struct.multiplicity}"
)
return struct
|
smiles_to_ensemble(smiles_string, max_num_confs, multiplicity=None, seed=None)
Generate conformer ensemble from SMILES.
Parameters
smiles_string:
Valid SMILES string representing the molecular structure.
max_num_confs:
Maximum number of conformers to generate.
multiplicity:
Optional spin multiplicity to set on the structures (default: None).
seed:
Optional random seed for reproducible conformer generation.
Returns
list[Structure]
A list of :class:Structure instances representing the conformers.
Source code in src/gpuma/io_handler.py
| def smiles_to_ensemble(
smiles_string: str,
max_num_confs: int,
multiplicity: int | None = None,
seed: int | None = None,
) -> list[Structure]:
"""Generate conformer ensemble from SMILES.
Parameters
----------
smiles_string:
Valid SMILES string representing the molecular structure.
max_num_confs:
Maximum number of conformers to generate.
multiplicity:
Optional spin multiplicity to set on the structures (default: ``None``).
seed:
Optional random seed for reproducible conformer generation.
Returns
-------
list[Structure]
A list of :class:`Structure` instances representing the conformers.
"""
if not smiles_string or not smiles_string.strip():
raise ValueError("SMILES string cannot be empty or None")
mult = int(multiplicity) if multiplicity is not None else 1
structs = _smiles_to_ensemble_util(
smiles_string.strip(), max_num_confs, multiplicity=mult, seed=seed,
)
return structs
|
save_xyz_file(structure, file_path)
Save a single :class:Structure to an XYZ file.
The comment line includes the energy (if set), charge, and multiplicity.
Parameters
structure:
Structure to write.
file_path:
Destination file path.
Source code in src/gpuma/io_handler.py
| def save_xyz_file(structure: Structure, file_path: str) -> None:
"""Save a single :class:`Structure` to an XYZ file.
The comment line includes the energy (if set), charge, and multiplicity.
Parameters
----------
structure:
Structure to write.
file_path:
Destination file path.
"""
lines: list[str] = [str(structure.n_atoms)]
# include existing comment and ensure energy/charge/multiplicity are visible
base_comment = structure.comment or ""
energy_part = ""
if structure.energy is not None:
energy_part = f" | Energy: {structure.energy:.6f} eV"
state_part = f" | Charge: {structure.charge} | Multiplicity: {structure.multiplicity}"
comment = (base_comment + energy_part + state_part).strip() or "Structure"
lines.append(comment)
for symbol, coord in zip(structure.symbols, structure.coordinates, strict=True):
lines.append(f"{symbol} {coord[0]:.6f} {coord[1]:.6f} {coord[2]:.6f}")
with open(file_path, "w", encoding="utf-8") as fh:
fh.write("\n".join(lines))
fh.write("\n")
|
save_multi_xyz(structures, file_path, comments=None)
Save multiple structures to a single multi-structure XYZ file.
Each structure block includes the energy (if set), charge, and
multiplicity in the comment line.
Parameters
structures:
List of structures to write.
file_path:
Destination file path.
comments:
Optional per-structure comment strings. Falls back to each
structure's own comment if not provided.
Source code in src/gpuma/io_handler.py
| def save_multi_xyz(
structures: list[Structure], file_path: str, comments: list[str] | None = None
) -> None:
"""Save multiple structures to a single multi-structure XYZ file.
Each structure block includes the energy (if set), charge, and
multiplicity in the comment line.
Parameters
----------
structures:
List of structures to write.
file_path:
Destination file path.
comments:
Optional per-structure comment strings. Falls back to each
structure's own comment if not provided.
"""
lines: list[str] = []
for idx, struct in enumerate(structures):
lines.append(str(struct.n_atoms))
base_comment = ""
if comments and idx < len(comments):
base_comment = comments[idx]
elif struct.comment:
base_comment = struct.comment
energy_part = ""
if struct.energy is not None:
energy_part = f" | Energy: {struct.energy:.6f} eV"
state_part = f" | Charge: {struct.charge} | Multiplicity: {struct.multiplicity}"
comment = (base_comment + energy_part + state_part).strip() or f"Structure {idx + 1}"
lines.append(comment)
for symbol, coord in zip(struct.symbols, struct.coordinates, strict=True):
lines.append(f"{symbol} {coord[0]:.6f} {coord[1]:.6f} {coord[2]:.6f}")
with open(file_path, "w", encoding="utf-8") as fh:
fh.write("\n".join(lines))
fh.write("\n")
|
save_as_single_xyz_files(structures, output_dir, comments=None)
Save each structure to its own XYZ file in a directory.
Files are zero-padded to sort naturally, e.g. structure_01.xyz for
up to 99 structures, structure_0001.xyz for up to 9999, etc.
Parameters
structures:
List of structures to save.
output_dir:
Directory where files will be written. Created if it does not exist.
comments:
Optional per-structure comment strings.
Source code in src/gpuma/io_handler.py
| def save_as_single_xyz_files(
structures: list[Structure], output_dir: str, comments: list[str] | None = None
) -> None:
"""Save each structure to its own XYZ file in a directory.
Files are zero-padded to sort naturally, e.g. ``structure_01.xyz`` for
up to 99 structures, ``structure_0001.xyz`` for up to 9999, etc.
Parameters
----------
structures:
List of structures to save.
output_dir:
Directory where files will be written. Created if it does not exist.
comments:
Optional per-structure comment strings.
"""
os.makedirs(output_dir, exist_ok=True)
width = len(str(len(structures)))
for idx, struct in enumerate(structures):
if comments and idx < len(comments):
struct = Structure(
symbols=struct.symbols,
coordinates=struct.coordinates,
energy=struct.energy,
charge=struct.charge,
multiplicity=struct.multiplicity,
comment=comments[idx],
)
file_path = os.path.join(output_dir, f"structure_{idx + 1:0{width}d}.xyz")
save_xyz_file(struct, file_path)
|
Low-Level Optimization
Lower-level functions used by the high-level API.
optimize_single_structure(structure, config=None, calculator=None)
Optimize a single :class:Structure using an ASE optimizer.
The same structure instance is returned with updated coordinates and
energy.
Parameters
structure : Structure
Molecular structure to optimize.
config : Config, optional
Configuration controlling the model and convergence settings.
Defaults to :func:load_config_from_file if not provided.
calculator : optional
Pre-loaded ASE calculator. If None, one is loaded (and cached)
from the configuration.
Returns
Structure
The input structure with optimized coordinates and energy set.
Raises
RuntimeError
If the optimization fails for any reason.
Source code in src/gpuma/optimizer.py
| def optimize_single_structure(
structure: Structure,
config: Config | None = None,
calculator: Any | None = None,
) -> Structure:
"""Optimize a single :class:`Structure` using an ASE optimizer.
The same ``structure`` instance is returned with updated coordinates and
energy.
Parameters
----------
structure : Structure
Molecular structure to optimize.
config : Config, optional
Configuration controlling the model and convergence settings.
Defaults to :func:`load_config_from_file` if not provided.
calculator : optional
Pre-loaded ASE calculator. If ``None``, one is loaded (and cached)
from the configuration.
Returns
-------
Structure
The input structure with optimized coordinates and energy set.
Raises
------
RuntimeError
If the optimization fails for any reason.
"""
if config is None:
config = load_config_from_file()
try:
if calculator is None:
calculator = _get_cached_calculator(config)
atoms = Atoms(symbols=structure.symbols, positions=structure.coordinates)
atoms.calc = calculator
atoms.info = {"charge": structure.charge, "spin": structure.multiplicity}
fmax = _resolve_force_criterion(config)
opt_cls, opt_name = _resolve_ase_optimizer(config)
logger.info(
"Starting single geometry optimization for structure with %d atoms "
"(optimizer=%s)",
structure.n_atoms,
opt_name,
)
optimizer = opt_cls(atoms, logfile=None)
optimizer.run(fmax=fmax)
logger.info("Optimization completed after %d steps", optimizer.get_number_of_steps())
structure.coordinates = atoms.get_positions().tolist()
structure.energy = float(atoms.get_potential_energy())
return structure
except Exception as exc: # pragma: no cover - defensive logging
raise RuntimeError(f"Optimization failed: {exc}") from exc
|
optimize_structure_batch(structures, config=None)
Optimize a list of structures and return them with updated coordinates.
The optimization mode is controlled by
config.optimization.batch_optimization_mode:
"sequential": Each structure is optimized individually with
ASE using a shared calculator.
"batch": All structures are optimized together using torch-sim
GPU-accelerated batch optimization (requires GPU).
Parameters
structures : list[Structure]
Structures to optimize.
config : Config, optional
Configuration object. Defaults to :func:load_config_from_file.
Returns
list[Structure]
Optimized structures with coordinates and energies set.
Raises
ValueError
If structures have mismatched symbols/coordinates or are empty,
or if the optimization mode is unknown.
Source code in src/gpuma/optimizer.py
| def optimize_structure_batch(
structures: list[Structure],
config: Config | None = None,
) -> list[Structure]:
"""Optimize a list of structures and return them with updated coordinates.
The optimization mode is controlled by
``config.optimization.batch_optimization_mode``:
- ``"sequential"``: Each structure is optimized individually with
ASE using a shared calculator.
- ``"batch"``: All structures are optimized together using torch-sim
GPU-accelerated batch optimization (requires GPU).
Parameters
----------
structures : list[Structure]
Structures to optimize.
config : Config, optional
Configuration object. Defaults to :func:`load_config_from_file`.
Returns
-------
list[Structure]
Optimized structures with coordinates and energies set.
Raises
------
ValueError
If structures have mismatched symbols/coordinates or are empty,
or if the optimization mode is unknown.
"""
if config is None:
config = load_config_from_file()
if not structures:
return []
for i, struct in enumerate(structures):
if struct.n_atoms != len(struct.coordinates):
raise ValueError(f"Structure {i}: symbols/coords length mismatch")
if struct.n_atoms == 0:
raise ValueError(f"Structure {i}: empty structure")
on_cpu = _parse_device_string(config.technical.device) == "cpu"
mode = str(config.optimization.batch_optimization_mode).lower()
logger.info("Optimization device: %s", "CPU" if on_cpu else "GPU")
with timed_block("Total optimization") as tb:
if mode == "sequential" or on_cpu:
if not on_cpu and mode == "batch":
logger.warning(
"Batch optimization mode requires GPU, falling back to sequential mode on CPU.",
)
results = _optimize_sequential(structures, config)
elif mode == "batch":
results = _optimize_batch(structures, config)
else:
raise ValueError(
f"Unknown optimization mode: {mode!r}. Use 'sequential' or 'batch' "
"(batch requires GPU)."
)
log_optimization_summary(structures, results, tb.elapsed, mode, config)
return results
|
Utilities
Timing decorators and context managers for profiling.
time_it(func)
Measure the execution time of a function and log the result.
Parameters
func:
Callable to be wrapped.
Returns
callable
Wrapped function that logs its runtime at :mod:logging.INFO level.
Source code in src/gpuma/decorators.py
| def time_it(func):
"""Measure the execution time of a function and log the result.
Parameters
----------
func:
Callable to be wrapped.
Returns
-------
callable
Wrapped function that logs its runtime at :mod:`logging.INFO` level.
"""
@wraps(func)
def wrap(*args, **kwargs):
start_time = perf_counter()
result = func(*args, **kwargs)
elapsed = perf_counter() - start_time
logger.info("Function: %r took: %.2f sec", func.__name__, elapsed)
return result
return wrap
|
timed_block
Context manager that measures and logs a named code block.
The elapsed time (in seconds) is available via the :attr:elapsed
attribute after the block exits.
Example
with timed_block("model loading") as tb:
... model = load_model()
print(tb.elapsed)
Source code in src/gpuma/decorators.py
| class timed_block:
"""Context manager that measures and logs a named code block.
The elapsed time (in seconds) is available via the :attr:`elapsed`
attribute after the block exits.
Example
-------
>>> with timed_block("model loading") as tb:
... model = load_model()
>>> print(tb.elapsed)
"""
def __init__(self, name: str, *, level: int = logging.INFO):
self.name = name
self.elapsed: float = 0.0
self._level = level
def __enter__(self):
"""Start the timer and return ``self`` for attribute access."""
self._start = perf_counter()
return self
def __exit__(self, *exc_info):
"""Stop the timer, store elapsed time, and log the result."""
self.elapsed = perf_counter() - self._start
logger.log(self._level, "%s took %.2f sec", self.name, self.elapsed)
|
__enter__()
Start the timer and return self for attribute access.
Source code in src/gpuma/decorators.py
| def __enter__(self):
"""Start the timer and return ``self`` for attribute access."""
self._start = perf_counter()
return self
|
__exit__(*exc_info)
Stop the timer, store elapsed time, and log the result.
Source code in src/gpuma/decorators.py
| def __exit__(self, *exc_info):
"""Stop the timer, store elapsed time, and log the result."""
self.elapsed = perf_counter() - self._start
logger.log(self._level, "%s took %.2f sec", self.name, self.elapsed)
|