Source code for pisces.particles.gadget

"""
Gadget-style particle dataset support.

This module provides classes and methods for handling particle datasets
formatted in a Gadget-like manner. It includes functionality for reading,
writing, and manipulating such datasets, with a focus on compatibility
with the Gadget simulation code and its derivatives. The particle dataset
classes in this module maintain the Pisces standard of descending from :class:`~particles.base.ParticleDataset`
and therefore support all of the standard particle dataset operations.
"""

from abc import ABC, abstractmethod
from collections.abc import Sequence
from datetime import datetime
from pathlib import Path
from typing import Union, overload

import h5py
import numpy as np
import unyt

from pisces.particles import ParticleDataset
from pisces.utilities.io_tools import NullHDF5Serializer


[docs] class GadgetLikeParticleDataset(ParticleDataset, ABC): """ Base class for Gadget/AREPO–style particle datasets. This class implements the conventions and file layout used by Gadget-like particle-based codes (e.g., Gadget-2, Gadget-4, AREPO). It provides standard field names, default units, and header structures so that simulation initial conditions and snapshots can be generated, inspected, or modified in a consistent way. """ metadata_serializer = NullHDF5Serializer """Serializer for Gadget particle metadata. Gadget particle datasets do not require any special metadata serialization, so a no-op (null) serializer is used. This means that no specialized metadata classes are permissible. Additionally, we now manually handle unit string conversion on dataset attributes because it is not handled by the serializer. """ # --------------------------------- # # Class Constants / Flags # # --------------------------------- # # These flags provide standard names for built-in access to specific # fields in the dataset. This ensures that those areas of the code are # not hardcoded with string literals. These can be overridden in # subclasses if the dataset uses different naming conventions. # --- Field Name Conventions --- # _POSITION_FIELD_NAME = "Coordinates" _VELOCITY_FIELD_NAME = "Velocities" _MASS_FIELD_NAME = "Masses" _ID_FIELD_NAME = "ParticleIDs" _INTERNAL_ENERGY_FIELD_NAME = "InternalEnergy" _DENSITY_FIELD_NAME = "Density" # --- Class Settings --- # _default_ntypes: int = 6 """int: Default number of Gadget-like particle types. By default, this is set to 6, corresponding to the standard Gadget particle types; however, some simulations may have a different number. This can also be specified by the user as a kwarg when building a dataset. """ _default_unit_system = unyt.unit_systems.galactic_unit_system """Default unit system for interpreting Gadget particle fields. Gadget itself does not enforce units; this is provided for Pisces consistency and metadata export. """ _header_group_name: str = "Header" """str: Name of the group containing the Gadget-like file header. This can be overridden in subclasses if the header group has a different name. """ _particle_type_name_prefix: str = "PartType" """str: Prefix for particle type groups in the HDF5 file. This can be overridden in subclasses if the particle type groups have a different prefix. """ # ------------------------------------ # # Properties # # ------------------------------------ # @property def header_handle(self) -> h5py.Group: """Access the ``Header`` group of the Gadget particle dataset. Returns ------- h5py.Group The HDF5 group object corresponding to the ``Header`` group. Raises ------ KeyError If the ``Header`` group does not exist in the file. """ return self.__handle__[self.__class__._header_group_name] @property def header(self) -> dict: """Retrieve the attributes of the ``Header`` group as a dictionary. Returns ------- dict A dictionary containing the attributes of the ``Header`` group. Raises ------ KeyError If the ``Header`` group does not exist in the file. """ return dict(self.header_handle.attrs) # ------------------------------------ # # Modification Methods # # ------------------------------------ #
[docs] def add_particle_field( self, group_name: str, field_name: str, data: unyt.unyt_array | np.ndarray, metadata: dict = None, overwrite: bool = False, ): """Add a new field (dataset) to an existing particle group. This method creates and writes a new field to the specified particle group within the dataset. The data can be a :class:`numpy.ndarray` or a :class:`~unyt.array.unyt_array`. Units (if present) will be stored in the dataset metadata. Additional metadata may also be included via the `metadata` argument. Parameters ---------- group_name : str The name of the particle group to which the field will be added. This must be a pre-existing group in the particle dataset. field_name : str The name of the new field to create (e.g., ``"particle_mass"``). If the `field_name` is already specified in the group, it will raise an error unless `overwrite` is True. data : ~unyt.array.unyt_array or ~numpy.ndarray The data to write to the new field. The leading dimension must match the number of particles in the group. metadata : dict, optional Optional dictionary of metadata to attach to the dataset. This is generally used by subclasses of the base class to add type specific metadata. overwrite : bool, optional Whether to overwrite an existing dataset with the same name. Defaults to False. Raises ------ KeyError If the group does not exist, or if the field exists and `overwrite` is False. ValueError If the field's leading dimension does not match the group's particle count. """ # Ensure that the group exists and that the field name is valid / # correctly handle the overwrite behavior. group = self.get_particle_group_handle(group_name) group_attrs = self.get_group_metadata(group_name) if field_name in group.keys(): # The field already exists. Our behavior depends on the `overwrite` flag. if not overwrite: raise ValueError("Field already exists. Set `overwrite=True` to replace it.") else: del group[field_name] # Determine the number of particles expected and # ensure that the data matches this shape. num_particles = group_attrs["NUMBER_OF_PARTICLES"] data = np.atleast_1d(data) if data.shape[0] != num_particles: raise ValueError( f"Number of particles in field {field_name} does not match number of particles in group {group_name}" ) # Validation has been completed and we can therefore now # proceed with writing the field to the group. units = getattr(data, "units", "") if isinstance(data, unyt.unyt_array): dset = group.create_dataset(field_name, data=data.d, dtype=data.dtype) else: dset = group.create_dataset(field_name, data=data, dtype=data.dtype) # Handle the metadata. dset.attrs["UNITS"] = str(units) if metadata is not None: dset.attrs.update(self.metadata_serializer.serialize_dict(metadata))
# ------------------------------------- # # Generation Methods # # ------------------------------------- #
[docs] @classmethod def generate_file_skeleton( cls, path: Path, number_of_particles: np.ndarray, box_size: unyt.unyt_quantity, unit_system: unyt.UnitSystem, **kwargs, ): """ Generate the basic file skeleton for a Gadget-like particle dataset. This method creates the HDF5 file and populates it with the necessary groups and datasets to conform to the Gadget-like format. It handles writing the required metadata, file header, and particle type groups. Parameters ---------- path: ~pathlib.Path The file path where the HDF5 file will be created. There is no overwrite safety in this method, so the caller must ensure that the path is valid. If it already exists, then the data will be overwritten. number_of_particles: numpy.ndarray The number of particles of each type to include in the file. This should be an array of length equal to the number of particle types (default is 6 for Gadget-like files). If a particle type has zero particles, its group will not be created. box_size: unyt.unyt_quantity The size of the simulation box. This should be a unyt quantity with units of length. unit_system: unyt.unit_systems.UnitSystem The unit system in which to write the particle fields. This is used to set up the units of each of the fields in the file. kwargs: Additional keyword arguments to pass to the header and particle field generation methods. This can include any parameters that are needed to customize the file header or particle fields, such as cosmological parameters, time information, etc. Notes ----- This is a 3 step process: 1. Create the HDF5 file and write the Pisces metadata. 2. Write the Gadget-like file header. 3. Create the particle type groups and populate them with the required fields. """ # Construct the HDF5 file object. Each of the steps in # the process of creating / modifying the file is handled by # a separate submethod to keep things clean. These can be # modified in subclasses as needed. Additionally, new processes # can be added by overriding this method, calling super() and then # re-opening the file to add new features. with h5py.File(path, "w") as stream: # Write the required pisces metadata into the file's # base directory attributes so that it can be recognized as # a valid pisces dataset. cls._generate_pisces_metadata(stream) # Write the Gadget-like header. This section of the code may require # considerable modification in subclasses to ensure that the header # maintains the correct structures. See notes in `_generate_file_header`. header_group = stream.require_group(cls._header_group_name) header_group.attrs["NOT_PARTICLE_GROUP"] = True cls._generate_file_header(header_group, number_of_particles, box_size, unit_system, **kwargs) # With the basic skeleton in place, we can now iterate through # all of the particle types and generate the necessary fields for # each type. This ensures that the file is fully compliant with # the Gadget-like format. for _type, _count in enumerate(number_of_particles): # If the count is zero or lower, we just skip and don't do anything. if _count <= 0: continue # We do have particles of this type, so we ensure that the group # for these particles exists. _particle_group = stream.require_group(f"{cls._particle_type_name_prefix}{_type}") _particle_group.attrs["NUMBER_OF_PARTICLES"] = np.uint32(_count) # We now offload to ``_generate_particle_fields`` to handle generating # the fields individually. This permits subclasses to minimally modify the # the fields being generated, including taking options and manipulating the fields # that are present. cls._generate_particle_fields(_particle_group, _type, _count, unit_system, **kwargs) # The file skeleton is now complete save for any additional features # which can be added by subclasses. return
@classmethod def _generate_pisces_metadata(cls, stream: h5py.File): """Write required Pisces metadata to the HDF5 file. In order for the file to be recognized as a valid Pisces dataset, it requires that some metadata be stored in the root attributes of the HDF5 file. This method handles writing that metadata. By default, we only write the GEN_TIME and the CLASS_NAME attributes, which are the minimal requirements for particle datasets in Pisces. This also ensures that users can read these particle files as datasets later. Parameters ---------- stream : h5py.File The HDF5 file object to write metadata to. Notes ----- In principle, this method can be extended in order to include additional required metadata, but it is generally not necessary. """ # At least in this base class, we only need to keep track of the generation # time and the class name for the particle dataset. This is then used # to ensure that we can read from disk correctly later. initial_metadata = { "GEN_TIME": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "CLASS_NAME": cls.__name__, } for k, v in cls.metadata_serializer.serialize_dict(initial_metadata).items(): stream.attrs[k] = v @classmethod @abstractmethod def _generate_file_header( cls, stream: h5py.Group, number_of_particles: np.ndarray, box_size: unyt.unyt_quantity, unit_system: unyt.UnitSystem, **kwargs, ): """Write the Gadget-like file header to the HDF5 file. In Gadget-like particle datasets, there is an additional group called with a specific name (generally "Header") that contains metadata about the simulation snapshot. This method handles writing that header group and populating it with the required attributes and datasets. Because the structure of the header can vary so much with the structure of the simulation code, this method is almost always overridden in subclasses. We provide a template here to illustrate the general structure, but it is not intended to be used directly. """ return @classmethod @abstractmethod def _generate_particle_fields( cls, particle_group: h5py.Group, particle_type: int, number_of_particles: int, unit_system: unyt.UnitSystem, **kwargs, ): """Generate the required fields for a given particle type. This method is responsible for creating the datasets within a particle type group (e.g., "PartType0") that store the actual particle data. The specific fields and their structures can vary significantly between different simulation codes, so this method is abstract and must be implemented in subclasses. Parameters ---------- particle_group : h5py.Group The HDF5 group corresponding to the particle type (e.g., "PartType0"). particle_type : int The integer index of the particle type (e.g., 0 for gas, 1 for dark matter). number_of_particles : int The number of particles of this type. unit_system : unyt.UnitSystem The unit system to use for interpreting the fields. Notes ----- This method should create datasets within `particle_group` for each of the required fields, using the appropriate data types and shapes. It should also handle any necessary unit conversions or metadata attributes. """ return @classmethod def _add_particle_field_to_file( cls, particle_group: h5py.Group, field_name: str, number_of_particles: int, field_shape: tuple, field_dtype: np.dtype, field_units: Union[str, unyt.Unit], **kwargs, ) -> h5py.Dataset: """ Add a new particle field dataset to a particle group. This is a low level operation which abstracts out elements of dataset creation that may lead to code duplication and / or errors when subclassing. Parameters ---------- particle_group : h5py.Group The HDF5 group representing the particle type. field_name : str Name of the dataset to create (e.g., ``"Coordinates"``). number_of_particles : int Number of particles in the group (size of the leading dimension). field_shape : tuple of int Shape of a single particle entry (e.g., ``(3,)`` for vectors). field_dtype : numpy.dtype Data type of the dataset (e.g., ``np.float64``). field_units : str or ~unyt.Unit Units associated with the dataset. Stored in the ``UNITS`` attribute. **kwargs : Extra keyword arguments passed to :func:`h5py.Group.create_dataset`. Returns ------- h5py.Dataset The newly created dataset. Raises ------ ValueError If the dataset already exists and ``overwrite`` is False. """ if field_name in particle_group: raise ValueError(f"Field '{field_name}' already exists in group '{particle_group.name}'.") # Create dataset dset = particle_group.create_dataset( field_name, shape=(number_of_particles, *field_shape), dtype=field_dtype, **kwargs, ) # Attach required units attribute dset.attrs["UNITS"] = str(field_units) return dset @overload @classmethod def build_particle_dataset( cls, path: Union[str, Path], number_of_particles: Sequence, box_size: unyt.unyt_quantity, *, ntypes: int = 6, unit_system: unyt.UnitSystem = None, overwrite: bool = False, fields: dict = None, ) -> "GadgetLikeParticleDataset": ...
[docs] @classmethod def build_particle_dataset(cls, *args, **kwargs): """ Create a new Gadget-like particle dataset on disk. This public factory method initializes a new HDF5 file containing the header, particle groups, and required fields in the Gadget/AREPO format. It is the main entry point for generating initial condition files or empty snapshot skeletons. Parameters ---------- path : str or pathlib.Path Destination path for the new HDF5 file. If the file already exists and ``overwrite`` is False, a :class:`FileExistsError` is raised. number_of_particles : sequence of int The number of particles of each type to include in the particle dataset. This should be a sequence (e.g., list or tuple) of length ``ntypes``, each element of which is a non-negative integer. If a particle type has zero particles, its group will not be created. box_size : unyt.unyt_quantity The physical size of the simulation box. Must carry length units. ntypes : int, default=6 Number of particle types to include. Gadget-like codes use six by default (0=gas, 1=DM, 2=disk, 3=bulge, 4=stars, 5=BH). unit_system : unyt.UnitSystem, optional Unit system for tagging dataset attributes. If not provided, the class default is used. overwrite : bool, default=False If True, overwrite any existing file at ``path``. If False, raise :class:`FileExistsError` if the file exists. fields : dict, optional Extra particle fields to populate. Keys must be of the form ``"PartTypeX.FieldName"`` where X is the particle type index. Values must be :class:`unyt.array.unyt_array` or :class:`numpy.ndarray` with the correct leading dimension. Returns ------- GadgetLikeParticleDataset A dataset object representing the newly created HDF5 file. Raises ------ FileExistsError If the target file exists and ``overwrite`` is False. ValueError If the number of particles does not match ``ntypes`` or if a field shape is inconsistent with its particle group. TypeError If ``unit_system`` is not a :class:`unyt.UnitSystem` instance. Notes ----- - This overload describes the base API. Subclasses may add further keyword arguments for physics-specific fields (e.g. MHD, cooling). - At minimum, each populated particle type will include the required datasets: ``Coordinates``, ``Velocities``, ``ParticleIDs``, and ``Masses``. Gas cells (PartType0) also include ``Density`` and ``InternalEnergy``. """ # --- Input Validation --- # # To begin, we need to sort out the various args and kwargs that get passed through. We do all of # this to permit a flexible / self-consistent interface for subclasses, but it comes at the cost # of some boiler plate code here. try: path, number_of_particles, box_size = args except ValueError as err: raise ValueError( "`path`, `number_of_particles`, and `box_size` must be provided as positional arguments." ) from err # Handle the path and overwrite management first. We attempt to # coerce the path to a Path object, then check if it exists. path = Path(path) overwrite = kwargs.pop("overwrite", False) if path.exists() and not overwrite: raise FileExistsError(f"File already exists: {path}. Use overwrite=True to replace it.") elif path.exists() and overwrite: path.unlink() elif path.is_dir(): raise IsADirectoryError(f"Path is a directory: {path}") path.parent.mkdir(parents=True, exist_ok=True) # We now ensure that the number of particles is valid and of the # right length. We do permit users to provide us with a count of the number of # particle types that is different from the default, but we do need to ensure # that the provided number of particles matches the expected number of particle types. _expected_ntypes = kwargs.get("number_of_particle_types", cls._default_ntypes) # coerce the number of particles to a list if it is not already number_of_particles = np.asarray(number_of_particles, dtype=np.uint32) if number_of_particles.shape != (_expected_ntypes,): raise ValueError( f"`number_of_particles` must be of length {_expected_ntypes} to" f" match the expected number of particle types." ) # Configure the unit system so that we have one available at all # times. This is taken from the class defaults if it is not provided. unit_system = kwargs.pop("unit_system", cls._default_unit_system) if not isinstance(unit_system, unyt.UnitSystem): raise TypeError("`unit_system` must be a unyt.UnitSystem instance.") # We now process the box size to ensure that it is a correctly specified unyt # quantity with units of length. elif isinstance(box_size, unyt.unyt_quantity): box_size = unyt.unyt_array(box_size.in_base(unit_system), dtype=np.float64) else: box_size = unyt.unyt_array(box_size * unit_system["length"], dtype=np.float64) # Pop out fields so its not contaminant in the kwargs. fields = kwargs.pop("fields", None) # --- File Generation --- # # At this stage, we can proceed to generate the file skeleton. In doing # so, we will first create the file, then write the Pisces metadata, then # the Gadget header, then the file skeleton, and finally any fields that # are provided to us. cls.generate_file_skeleton( path, number_of_particles, box_size, unit_system, **kwargs, # pass through any additional kwargs to the generation methods. ) # --- Field Population --- # # If fields are provided, we can now populate them. # These are provided as `<particle_type_name>.<field_name>`, so # we can just split the names, then dump them to the correct dataset # after some validation checks. if fields is not None: fields = dict(fields) with h5py.File(path, "r+") as stream: # Cycle through all of the provided fields and # populate them. for field_name, field_data in fields.items(): # Split the field name into particle type and # actual field name. try: ptype_name, f_name = field_name.split(".") except ValueError as exp: raise ValueError( f"Field name '{field_name}' is not in the correct format." " It must be of the form '<particle_type>.<field_name>'" ) from exp # Validate that the particle type group exists, and that # the field also exists as expected. if ptype_name not in stream: raise ValueError(f"Particle type group '{ptype_name}' does not exist in the file.") if f_name not in stream[ptype_name]: raise ValueError(f"Field '{f_name}' does not exist in particle type group '{ptype_name}'.") # Validate that the shape of the provided data matches # the shape of the dataset. dset = stream[ptype_name][f_name] if dset.shape != field_data.shape: raise ValueError( f"Shape of provided data for field '{field_name}' does not match dataset shape." f" Provided shape: {field_data.shape}, Dataset shape: {dset.shape}" ) # If all checks pass, we can write the data to # the dataset. We still need to ensure that the data gets # cast to the correct units from the dataset. _units = unyt.Unit(dset.attrs.get("UNITS", "dimensionless")) dset[...] = unyt.unyt_array(field_data, units=_units).to_value(_units) # Return a validated ParticleDataset instance output = cls(path) # Add the particle ids output.add_particle_ids(policy="global", overwrite=True) return output
[docs] class Gadget4ParticleDataset(GadgetLikeParticleDataset): """ Representation of a Gadget-4 style particle dataset. This class specializes :class:`GadgetLikeParticleDataset` for the conventions of Gadget-4 HDF5 initial conditions and snapshots. It defines the expected header structure, field names, and dataset generation logic for creating valid Gadget-4–compatible files. """ # --------------------------------- # # Class Constants / Flags # # --------------------------------- # # These flags provide standard names for built-in access to specific # fields in the dataset. This ensures that those areas of the code are # not hardcoded with string literals. These can be overridden in # subclasses if the dataset uses different naming conventions. # --- Field Name Conventions --- # _POSITION_FIELD_NAME = "Coordinates" _VELOCITY_FIELD_NAME = "Velocities" _MASS_FIELD_NAME = "Masses" _ID_FIELD_NAME = "ParticleIDs" _INTERNAL_ENERGY_FIELD_NAME = "InternalEnergy" _ELECTRON_ABUNDANCE_FIELD_NAME = "ElectronAbundance" # --- Class Settings --- # _default_ntypes: int = 6 """int: Default number of Gadget-like particle types. By default, this is set to 6, corresponding to the standard Gadget particle types; however, some simulations may have a different number. This can also be specified by the user as a kwarg when building a dataset. """ _default_unit_system = unyt.unit_systems.galactic_unit_system """Default unit system for interpreting Gadget particle fields. Gadget itself does not enforce units; this is provided for Pisces consistency and metadata export. """ _header_group_name: str = "Header" """str: Name of the group containing the Gadget-like file header. This can be overridden in subclasses if the header group has a different name. """ _particle_type_name_prefix: str = "ParticleType" """str: Prefix for particle type groups in the HDF5 file. This can be overridden in subclasses if the particle type groups have a different prefix. """ # ------------------------------------- # # Generation Methods # # ------------------------------------- # @classmethod def _generate_file_header( cls, stream: h5py.File, number_of_particles: np.ndarray, box_size: unyt.unyt_quantity, unit_system: unyt.UnitSystem, **kwargs, ): """ Write Gadget-4 style header attributes to the HDF5 file. This method writes the standard Gadget-4 header attributes into the file’s ``Header`` group. These attributes describe particle counts, box size, and basic cosmology metadata, and must be present for Gadget-4 to interpret the file as valid. Parameters ---------- stream : h5py.File The open HDF5 file handle to which attributes will be written. Attributes are stored at the root or within the ``Header`` group. number_of_particles : numpy.ndarray Array of length ``ntypes`` giving the number of particles per type. Values must fit into 32-bit integers since only single-file snapshots are supported here. box_size : unyt.unyt_quantity Simulation box size with length units. Converted into the provided unit system before being written. unit_system : unyt.UnitSystem Unit system used for box size conversion. **kwargs Optional keyword arguments. Recognized: - ``double_precision`` : bool (default True) Indicates whether floating-point particle data is stored in double precision. This does not change the header layout, but is included for API consistency. Raises ------ ValueError If any particle count exceeds the 32-bit integer limit. Notes ----- The following attributes are written: - ``NumPart_ThisFile`` (int32[ntypes]) — per-file particle counts. - ``NumPart_Total`` (uint64[ntypes]) — total particle counts. - ``MassTable`` (float64[ntypes]) — per-type constant masses (always zeros here, since individual masses are assumed). - ``BoxSize`` (float64) — simulation box size. - ``Time`` (float64) — simulation time (0.0 for ICs). - ``Redshift`` (float64) — redshift (0.0 for ICs). """ # --- Particle Count Management --- # # Manage the particle count writing segment. We ensure that the counts we have # from the user are 64 bit unsigned integers so that we can handle large counts. # We then need to cast that down to 32 to ensure that we have the correct types. Because # we don't support multi-file snapshots here, we need to ensure that the counts # do not exceed the 32-bit limit. number_of_particles = np.asarray(number_of_particles, dtype=np.uint64) # Check that we don't surpass the 32-bit limit. int32_max = np.iinfo(np.int32).max if np.any(number_of_particles > int32_max): raise ValueError( "Particle count exceeds 32-bit limit for single-file snapshots. " "Either reduce particles per type or split snapshot into multiple files." ) # Because we are under the 32-bit limit, we can safely cast to the # expected C-int. low_word = (number_of_particles & 0xFFFFFFFF).astype(np.int32) stream.attrs["NumPart_ThisFile"] = low_word stream.attrs["NumPart_Total"] = low_word.astype(np.uint64) stream.attrs["NumFilesPerSnapshot"] = np.int32(1) # --- Cosmology Parameters --- # # Like the physics flags, the cosmological parameters are not actually # used by AREPO, but are ready nonetheless. As such, we write them but # they are set to zero values. stream.attrs["Redshift"] = np.float64(0.0) stream.attrs["Time"] = np.float64(0.0) # --- Writing the Mass Table and the Box Size --- # # The final two relevant elements of the header are the mass table # and the box size. In AREPO, both of these are required. The mass table # is (by our convention) zeros because we ALWAYS specify particle masses. # The boxsize requires conversion. stream.attrs["MassTable"] = np.zeros_like(number_of_particles, dtype=np.float64) _native_boxsize = box_size.to_value(unit_system["length"]) stream.attrs["BoxSize"] = np.float64(_native_boxsize) @classmethod def _generate_particle_fields( cls, particle_group: h5py.Group, particle_type: int, number_of_particles: int, unit_system: unyt.UnitSystem, **kwargs, ): """ Create required Gadget-4 particle fields for a particle group. This method defines and initializes the standard datasets that must exist for a given particle type in a Gadget-4 dataset. Datasets are created with the correct shape, dtype, and unit attributes. Precision and optional fields are controlled via keyword arguments. Parameters ---------- particle_group : h5py.Group HDF5 group corresponding to the particle type (e.g., ``ParticleType0`` for gas). particle_type : int Particle type index (0=gas, 1=DM, 4=stars, 5=BH, etc.). number_of_particles : int Number of particles of this type. unit_system : unyt.UnitSystem Unit system used for unit annotations. **kwargs Configuration flags: - ``double_precision`` : bool, default=True Store float fields as float64 instead of float32. - ``coordinates_precision`` : {32, 64}, default=32 Precision of ``Coordinates`` dataset. - ``id_precision`` : {32, 64}, default=32 Bit-width of ``ParticleIDs`` dataset. - ``enable_cooling`` : bool, default=False If True, add ``ElectronAbundance`` field to gas. Notes ----- **Always created for all particle types**: - ``Coordinates`` (N,3) float32/float64 - ``Velocities`` (N,3) float32/float64 - ``Masses`` (N,) float32/float64 - ``ParticleIDs`` (N,) uint32/uint64 **Created for gas (ParticleType0)**: - ``InternalEnergy`` (N,) float32/float64 - ``ElectronAbundance`` (N,) float32/float64 if ``enable_cooling=True`` Units are stored in the ``UNITS`` attribute of each dataset. """ # --- Extract Basic Information --- # # Handle the coordinate dtype based on the double precision flag. _coordinate_precision = kwargs.get("coordinates_precision", 32) _id_precision = kwargs.get("id_precision", 32) _double_precision = kwargs.get("double_precision", True) if _coordinate_precision not in [32, 64]: raise ValueError("`coordinates_precision` must be either 32 or 64.") if _id_precision not in [32, 64]: raise ValueError("`id_precision` must be either 32 or 64.") coordinates_dtype = np.float64 if _double_precision or (_coordinate_precision == 64) else np.float32 float_dtype = np.float32 if not _double_precision else np.float64 id_dtype = np.uint64 if _id_precision == 64 else np.uint32 # --- Specify Field Parameters --- # # In order to make things reasonably clean, we specify each # of the required fields in a dictionary, then iterate through # that dictionary to create the datasets. This permits subclasses # to minimally modify the fields that are created. We also provide # access to hooks here so that users can configure the additional fields # for particular flags. _required_field_data = { cls._POSITION_FIELD_NAME: { "types": "all", "shape": (3,), "dtype": coordinates_dtype, "units": unit_system["length"], }, cls._VELOCITY_FIELD_NAME: { "types": "all", "shape": (3,), "dtype": float_dtype, "units": unit_system["velocity"], }, cls._MASS_FIELD_NAME: { "types": "all", "shape": (), "dtype": float_dtype, "units": unit_system["mass"], }, cls._ID_FIELD_NAME: { "types": "all", "shape": (), "dtype": id_dtype, "units": "dimensionless", }, cls._INTERNAL_ENERGY_FIELD_NAME: { "types": [0], "shape": (), "dtype": float_dtype, "units": unit_system["specific_energy"], }, } if kwargs.get("enable_cooling", False): _required_field_data = { **_required_field_data, cls._ELECTRON_ABUNDANCE_FIELD_NAME: { "types": [0], "shape": (), "dtype": float_dtype, "units": "dimensionless", }, } # --- Create Fields --- # # We now iterate through each required fields, check the particle type, # and create the dataset if it is required for this particle type. for field_name, field_info in _required_field_data.items(): if (field_info["types"] == "all") or (particle_type in field_info["types"]): cls._add_particle_field_to_file( particle_group, field_name, number_of_particles, field_shape=field_info["shape"], field_dtype=field_info["dtype"], field_units=field_info["units"], ) @overload @classmethod def build_particle_dataset( cls, path: Union[str, Path], number_of_particles: Sequence, box_size: unyt.unyt_quantity, *, ntypes: int = 6, unit_system: unyt.UnitSystem = None, overwrite: bool = False, fields: dict = None, double_precision: bool = True, coordinates_precision: int = 32, id_precision: int = 32, enable_cooling: bool = False, ) -> "Gadget4ParticleDataset": ...
[docs] @classmethod def build_particle_dataset(cls, *args, **kwargs): """ Build a new Gadget-4 particle dataset. This is the primary public constructor for Gadget-4 style initial condition files. It writes the file header, particle groups, and required fields according to Gadget-4 conventions. Parameters ---------- path : str or pathlib.Path Destination path for the new dataset. number_of_particles : sequence of int Particle counts per type. Length must equal ``ntypes``. box_size : unyt.unyt_quantity Simulation box size with length units. ntypes : int, default=6 Number of particle types (Gadget standard). unit_system : unyt.UnitSystem, optional Unit system for annotating dataset units. Defaults to galactic unit system. overwrite : bool, default=False Overwrite the file if it exists. If False, raise an error instead. fields : dict, optional Additional fields to populate after dataset creation. Keys must be of the form ``"ParticleTypeX.FieldName"``. double_precision : bool, default=True Use float64 for floating-point fields (except where overridden). coordinates_precision : {32, 64}, default=32 Precision for ``Coordinates``. id_precision : {32, 64}, default=32 Precision for ``ParticleIDs``. enable_cooling : bool, default=False If True, add ``ElectronAbundance`` to gas. Returns ------- Gadget4ParticleDataset A dataset object pointing to the newly created file. Raises ------ FileExistsError If the file exists and ``overwrite=False``. ValueError If particle counts or field shapes are inconsistent. TypeError If units or parameters are of invalid types. Examples -------- Create a simple Gadget-4 IC file with gas and DM:: import unyt as u ds = ( Gadget4ParticleDataset.build_particle_dataset( "ics_gadget4.hdf5", number_of_particles=[ 1000, 2000, 0, 0, 0, 0, ], box_size=1.0 * u.Mpc, overwrite=True, ) ) Create a Gadget-4 IC file with cooling enabled:: ds = ( Gadget4ParticleDataset.build_particle_dataset( "ics_cooling.hdf5", number_of_particles=[ 500, 500, 0, 0, 0, 0, ], box_size=500.0 * u.kpc, enable_cooling=True, ) ) """ return super().build_particle_dataset(*args, **kwargs)
[docs] class AREPOParticleDataset(GadgetLikeParticleDataset): """ Representation of an AREPO-style particle dataset. This class specializes :class:`GadgetLikeParticleDataset` for AREPO’s HDF5 initial condition format. It implements the header layout, field conventions, and optional physics extensions expected by the AREPO code. See Also -------- :class:`GadgetLikeParticleDataset` Base class for all Gadget/AREPO-compatible formats. :class:`Gadget4ParticleDataset` Implementation for Gadget-4 format. """ # --------------------------------- # # Class Constants / Flags # # --------------------------------- # # --- Field Name Conventions --- # _POSITION_FIELD_NAME = "Coordinates" # Required for all Ptypes _VELOCITY_FIELD_NAME = "Velocities" # Required for all Ptypes _MASS_FIELD_NAME = "Masses" # Required for all Ptypes _ID_FIELD_NAME = "ParticleIDs" # Required for all Ptypes _INTERNAL_ENERGY_FIELD_NAME = "InternalEnergy" # Required for gas only. _ELECTRON_ABUNDANCE_FIELD_NAME = "ElectronAbundance" # Required for gas if cooling enabled. _MAGNETIC_FIELD_NAME = "MagneticField" # Required for gas if MHD enabled. _PASSIVE_SCALARS_FIELD_NAME = "PassiveScalars" # Required for gas if scalars enabled. # --- Class Settings --- # _default_ntypes: int = 6 _default_unit_system = unyt.unit_systems.galactic_unit_system _header_group_name: str = "Header" _particle_type_name_prefix: str = "PartType" # ------------------------------------- # # Generation Methods # # ------------------------------------- # @classmethod def _generate_file_header( cls, stream: h5py.File, number_of_particles: np.ndarray, box_size: unyt.unyt_quantity, unit_system: unyt.UnitSystem, **kwargs, ): """ Generate and write the AREPO-style header attributes to the HDF5 file. This method creates the ``Header`` attributes required for AREPO initial conditions. The structure is modeled on the Gadget-2 snapshot header format, with several additional attributes retained for compatibility. While many of these fields are not used directly by AREPO when reading ICs (e.g., star formation and cooling flags), they are still written to maintain full Gadget-format compliance. Parameters ---------- stream : h5py.File The open HDF5 group handle into which the header attributes will be written. Attributes are stored at the root level (``stream.attrs``). number_of_particles : numpy.ndarray Array of shape (6,) giving the particle counts per type. Must be convertible to 32-bit integers for single-file snapshots. box_size : unyt.unyt_quantity The size of the simulation box, with units of length. unit_system : ~unyt.UnitSystem The unit system into which the box size will be converted before writing. **kwargs Additional options for header customization. Recognized keys: - ``double_precision`` : bool, optional Whether particle positions/velocities are stored in double precision. Default is True. Raises ------ ValueError If any particle count exceeds the 32-bit signed integer limit, which is not permitted in single-file snapshots. Notes ----- The following attributes are written: - ``NumPart_ThisFile`` (int32[6]): Per-file particle counts (must fit in 32-bit). - ``NumPart_Total`` (uint32[6]): Total particle counts (low word). - ``NumPart_Total_HighWord`` (uint32[6]): High word of total counts (all zero in single-file mode). - ``MassTable`` (float64[6]): Particle type masses. Always zeros here, since variable masses are used. - ``Time`` (float64): Simulation time (written as 0.0 for ICs). - ``Redshift`` (float64): Redshift of the snapshot (0.0 for ICs). - ``BoxSize`` (float64): Physical size of the simulation box, converted to the specified unit system. - ``NumFilesPerSnapshot`` (int32): Not written here (implicit = 1). - ``Omega0``, ``OmegaLambda``: Cosmology parameters. - ``HubbleParam`` (float64): Hubble parameter (written as 1.0). - ``Flag_Sfr``, ``Flag_Cooling``, ``Flag_StellarAge``, ``Flag_Metals``, ``Flag_Feedback`` (int32): Physics flags. Always 0 for ICs. - ``Flag_DoublePrecision`` (int32): Precision flag for particle positions/velocities. Although AREPO does not actually use the physics or cosmology flags when reading ICs, they are included for full Gadget-format compliance. """ # --- Particle Count Management --- # # Manage the particle count writing segment. We ensure that the counts we have # from the user are 64 bit unsigned integers so that we can handle large counts. # We then need to cast that down to 32 to ensure that we have the correct types. Because # we don't support multi-file snapshots here, we need to ensure that the counts # do not exceed the 32-bit limit. number_of_particles = np.asarray(number_of_particles, dtype=np.uint64) # Check that we don't surpass the 32-bit limit. int32_max = np.iinfo(np.int32).max if np.any(number_of_particles > int32_max): raise ValueError( "Particle count exceeds 32-bit limit for single-file snapshots. " "Either reduce particles per type or split snapshot into multiple files." ) # Because we are under the 32-bit limit, we can safely cast to the # expected C-int. low_word = (number_of_particles & 0xFFFFFFFF).astype(np.int32) stream.attrs["NumPart_ThisFile"] = low_word stream.attrs["NumPart_Total"] = low_word.astype(np.uint32) stream.attrs["NumPart_Total_HighWord"] = np.zeros_like(low_word).astype(np.uint32) # --- IC File Flags --- # # An inspection of the AREPO base code indicates that for IC files, these # header flags are effectively ignored, so we just write them unilaterally # to 0 even if they are actually enabled in the parameter file / configuration. # # The double-precision flag is important, however, as it indicates to the # code whether the positions and velocities are stored as float32 or float64. stream.attrs["Flag_Sfr"] = np.int32(0) stream.attrs["Flag_Cooling"] = np.int32(0) stream.attrs["Flag_StellarAge"] = np.int32(0) stream.attrs["Flag_Metals"] = np.int32(0) stream.attrs["Flag_Feedback"] = np.int32(0) stream.attrs["Flag_DoublePrecision"] = np.int32(kwargs.get("double_precision", True)) # --- Cosmology Parameters --- # # Like the physics flags, the cosmological parameters are not actually # used by AREPO, but are ready nonetheless. As such, we write them but # they are set to zero values. stream.attrs["Omega0"] = np.float64(0.0) stream.attrs["OmegaLambda"] = np.float64(0.0) stream.attrs["OmegaBaryon"] = np.float64(0.0) stream.attrs["HubbleParam"] = np.float64(1.0) stream.attrs["Redshift"] = np.float64(0.0) stream.attrs["Time"] = np.float64(0.0) # --- Writing the Mass Table and the Box Size --- # # The final two relevant elements of the header are the mass table # and the box size. In AREPO, both of these are required. The mass table # is (by our convention) zeros because we ALWAYS specify particle masses. # The boxsize requires conversion. stream.attrs["MassTable"] = np.zeros_like(number_of_particles, dtype=np.float64) _native_boxsize = box_size.to_value(unit_system["length"]) stream.attrs["BoxSize"] = np.float64(_native_boxsize) @classmethod def _generate_particle_fields( cls, particle_group: h5py.Group, particle_type: int, number_of_particles: int, unit_system: unyt.UnitSystem, **kwargs, ): """ Create the HDF5 datasets for a given particle type. This method defines and initializes the particle field datasets that must exist in an AREPO–compatible IC file. The base implementation guarantees creation of all *required* fields, and may conditionally add fields depending on enabled physics modules (e.g., MHD, cooling, passive scalars). Subclasses may extend or override this behavior to support additional physics or code–specific conventions. Parameters ---------- particle_group : h5py.Group The open HDF5 group corresponding to the particle type (e.g., ``"PartType0"`` for gas). particle_type : int Particle type index (0 = gas, 1 = dark matter, 4 = stars, 5 = BHs, etc.). number_of_particles : int Number of particles of this type to allocate datasets for. unit_system : unyt.UnitSystem Unit system used to tag dataset attributes. **kwargs Configuration flags that control optional fields: - ``coordinates_in_double_precision`` : bool, default=True Store coordinates as float64 instead of float32. - ``double_precision`` : bool, default=True Store all other float fields as float64 instead of float32. - ``long_ids`` : bool, default=True Store ``ParticleIDs`` as uint64 instead of uint32. - ``enable_mhd`` : bool, default=False Add ``MagneticField`` vector (3-component) for gas. - ``enable_cooling`` : bool, default=False Add ``ElectronAbundance`` scalar field for gas. - ``passive_scalars`` : int, default=0 If >0, add a ``PassiveScalars`` field with this many components. Notes ----- **Always created for all particle types:** - ``Coordinates`` : float32[3] or float64[3] - ``Velocities`` : float32[3] or float64[3] - ``ParticleIDs`` : uint32 or uint64 - ``Masses`` : float32 or float64 **Always created for gas (PartType0):** - ``InternalEnergy`` : float32 or float64 - ``Density`` : float32 or float64 **Conditionally created (if flags set):** - ``MagneticField`` : float32[3] or float64[3] (gas, if ``enable_mhd``) - ``ElectronAbundance`` : float32 or float64 (gas, if ``enable_cooling``) - ``PassiveScalars`` : float32[N] or float64[N] (gas, if ``passive_scalars`` > 0) Returns ------- None Datasets are created directly in ``particle_group``. Attributes describing units are also attached. Raises ------ ValueError If required arguments are missing or inconsistent. """ # --- Extract Basic Information --- # # Handle the coordinate dtype based on the double precision flag. _coords_dp = kwargs.get("coordinates_in_double_precision", True) _dp = kwargs.get("double_precision", True) _id_dp = kwargs.get("long_ids", True) coordinates_dtype = np.float64 if (_coords_dp or _dp) else np.float32 float_dtype = np.float64 if _dp else np.float32 id_dtype = np.uint64 if _id_dp else np.uint32 # --- Specify Field Parameters --- # # In order to make things reasonably clean, we specify each # of the required fields in a dictionary, then iterate through # that dictionary to create the datasets. This permits subclasses # to minimally modify the fields that are created. We also provide # access to hooks here so that users can configure the additional fields # for particular flags. _required_field_data = { cls._POSITION_FIELD_NAME: { "types": "all", "shape": (3,), "dtype": coordinates_dtype, "units": unit_system["length"], }, cls._VELOCITY_FIELD_NAME: { "types": "all", "shape": (3,), "dtype": float_dtype, "units": unit_system["velocity"], }, cls._MASS_FIELD_NAME: { "types": "all", "shape": (), "dtype": float_dtype, "units": unit_system["mass"], }, cls._ID_FIELD_NAME: { "types": "all", "shape": (), "dtype": id_dtype, "units": "dimensionless", }, cls._INTERNAL_ENERGY_FIELD_NAME: { "types": [0], "shape": (), "dtype": float_dtype, "units": unit_system["specific_energy"], }, cls._DENSITY_FIELD_NAME: {"types": [0], "shape": (), "dtype": float_dtype, "units": unit_system["density"]}, } # Add MHD fields if necessary. if kwargs.get("enable_mhd", False): _required_field_data = { **_required_field_data, cls._MAGNETIC_FIELD_NAME: { "types": [0], "shape": (3,), "dtype": float_dtype, "units": unit_system["magnetic_field"], }, } if kwargs.get("enable_cooling", False): _required_field_data = { **_required_field_data, cls._ELECTRON_ABUNDANCE_FIELD_NAME: { "types": [0], "shape": (), "dtype": float_dtype, "units": "dimensionless", }, } if kwargs.get("passive_scalars", 0) > 0: n_scalars = kwargs.get("passive_scalars", 0) _required_field_data = { **_required_field_data, cls._PASSIVE_SCALARS_FIELD_NAME: { "types": [0], "shape": (n_scalars,), "dtype": float_dtype, "units": "dimensionless", }, } # --- Create Fields --- # # We now iterate through each required fields, check the particle type, # and create the dataset if it is required for this particle type. for field_name, field_info in _required_field_data.items(): if (field_info["types"] == "all") or (particle_type in field_info["types"]): cls._add_particle_field_to_file( particle_group, field_name, number_of_particles, field_shape=field_info["shape"], field_dtype=field_info["dtype"], field_units=field_info["units"], ) @overload @classmethod def build_particle_dataset( cls, path: Union[str, Path], number_of_particles: Sequence, box_size: unyt.unyt_quantity, *, ntypes: int = 6, unit_system: unyt.UnitSystem = None, overwrite: bool = False, fields: dict = None, double_precision: bool = True, coordinates_in_double_precision: bool = True, long_ids: bool = True, enable_mhd: bool = False, enable_cooling: bool = False, passive_scalars: int = 0, ) -> "AREPOParticleDataset": ...
[docs] @classmethod def build_particle_dataset(cls, *args, **kwargs) -> "AREPOParticleDataset": """ Create a new HDF5 particle dataset in AREPO IC format. This is the **primary factory method** for generating particle datasets that can be used as initial conditions in simulation codes. It writes a complete, self-contained HDF5 file containing the particle fields required by AREPO (and optional fields depending on physics modules). Parameters ---------- path : str or pathlib.Path Destination file path for the new dataset. If the file exists and ``overwrite=False``, a :class:`FileExistsError` is raised. number_of_particles : sequence of int Number of particles for each particle type. The length must match ``ntypes``. For example, ``[1000, 2000, 0, 0, 500, 0]`` specifies 1000 gas cells, 2000 dark matter particles, 500 stars, and no others. box_size : unyt.unyt_quantity Physical size of the simulation box (must have length units). This is written into the dataset metadata and header. ntypes : int, default=6 Number of particle types to include. Gadget/AREPO use 6 by default (0=gas, 1=DM, 2=disk, 3=bulge, 4=stars, 5=BHs), but this may be larger in customized builds. unit_system : unyt.UnitSystem, optional Unit system for tagging dataset attributes (e.g., cgs or astrophysical). If omitted, a default astrophysical unit system is used. overwrite : bool, default=False If True, overwrite an existing file at ``path``. If False, raise :class:`FileExistsError` if the file exists. fields : dict[str, unyt.unyt_array], optional Optional dictionary of fields to write into the dataset at creation. Keys must be of the form ``"<particle_type>.<field_name>"``, and each value must be a :class:`unyt.array.unyt_array` instance with the correct shape and units for the specified field. Field may also be added, removed, and manipulated after creation. This option simply permits populating fields at creation time. double_precision : bool, default=True Enable double precision initial conditions. This flag is equivalent to AREPO's ``INPUT_IN_DOUBLEPRECISION`` compile-time option. If True, all floating-point fields are stored as double precision floats as defined by the local system. If False, single precision (float32) is used. coordinates_in_double_precision : bool, default=True If True, ``Coordinates`` are stored in double precision. If False, they are stored as float32. Useful when positions need higher accuracy than velocities or internal energy. This corresponds to the AREPO flag ``INPUT_COORDINATES_IN_DOUBLEPRECISION``. long_ids : bool, default=True If True, ``ParticleIDs`` are stored as 64-bit unsigned integers. If False, 32-bit IDs are used. 64-bit IDs are required for runs with >4 billion particles. This corresponds to the AREPO flag ``LONGIDS``. enable_mhd : bool, default=False If True, adds ``MagneticField`` (3-component vector) to gas particles (PartType0). Required if running with MHD enabled. enable_cooling : bool, default=False If True, adds ``ElectronAbundance`` scalar field to gas particles. This initializes the ionization fraction for cooling modules. Defaults to zero if not supplied. passive_scalars : int, default=0 If >0, adds a ``PassiveScalars`` field with the given number of components for gas particles. These are advected tracers that do not affect dynamics unless used by custom physics modules. Returns ------- AREPOParticleDataset The newly created particle dataset object pointing to the file at ``path``. Raises ------ FileExistsError If the file exists and ``overwrite=False``. ValueError If particle counts are inconsistent with field shapes or box size. TypeError If provided fields are not :class:`unyt.unyt_array` instances. """ return super().build_particle_dataset(*args, **kwargs)