diff --git a/docs/changes/2928.feature.rst b/docs/changes/2928.feature.rst new file mode 100644 index 00000000000..c3742044879 --- /dev/null +++ b/docs/changes/2928.feature.rst @@ -0,0 +1,41 @@ +Introduces the `~ctapipe.io.EventPreprocessor` class that can generically +transform an event table by applying the following steps: + +* Generate new or rename existing columns with a `~ctapipe.core.FeatureGenerator` +* Select "good" event rows with a `~ctapipe.core.QualityQuery` +* Select which columns to output (by setting the ``features`` configuration + attribute of the `~ctapipe.io.EventPreprocessor`) + +This is useful for doing the final steps of DL2 processing, and will eventually +replace what is in `~ctapipe.io.DL2EventPreprocessor` and `~ctapipe.io.DL2EventLoader`, which will be deprecated in a future release. + +The `~ctapipe.io.EventPreprocessor` also includes the ability to pre-configure +itself for specific use cases by setting the ``feature_set`` option. Currently +only two are implemented: ``feature_set=dl2_irf``, which defines the transforms, +event selection, and output features for processing simulated DL2 events, and +``feature_set=custom``, which has no pre-configuration and requires all +parameters to be set by the user in a config file. More can be added by adding +to the registry. + +The functionality of `~ctapipe.io.DL2EventLoader` can be mimicked with the following: + +.. code-block:: python + + from ctapipe.io import TableLoader, EventPreprocessor + from astropy.table import vstack + + DL2FILE = "some_dl2_file.h5" + with TableLoader(DL2FILE, dl2=True, simulated=True, observation_info=True) as loader: + preprocess = EventPreprocessor(feature_set="dl2_irf") + events = vstack( + [ + preprocess(QTable(c.data)) + for c in loader.read_subarray_events_chunked(chunk_size=100_000) + ] + ) + + +This also introduces a helper function `~ctapipe.coordinates.altaz_to_nominal` +to convert columns of alt/az coordinates to FOV coordinates in the +`~ctapipe.coordinates.NominalFrame`, which works with the +`~ctapipe.core.FeatureGenerator`. diff --git a/src/ctapipe/coordinates/__init__.py b/src/ctapipe/coordinates/__init__.py index 910ab215edd..7d6ed4ea864 100644 --- a/src/ctapipe/coordinates/__init__.py +++ b/src/ctapipe/coordinates/__init__.py @@ -21,7 +21,11 @@ from .impact_distance import impact_distance, shower_impact_distance from .nominal_frame import NominalFrame from .telescope_frame import TelescopeFrame -from .utils import altaz_to_righthanded_cartesian, get_point_on_shower_axis +from .utils import ( + altaz_to_nominal, + altaz_to_righthanded_cartesian, + get_point_on_shower_axis, +) __all__ = [ "TelescopeFrame", @@ -37,6 +41,7 @@ "impact_distance", "shower_impact_distance", "get_point_on_shower_axis", + "altaz_to_nominal", ] diff --git a/src/ctapipe/coordinates/tests/test_utils.py b/src/ctapipe/coordinates/tests/test_utils.py index 1ebd419c679..6d41dfda4f0 100644 --- a/src/ctapipe/coordinates/tests/test_utils.py +++ b/src/ctapipe/coordinates/tests/test_utils.py @@ -53,3 +53,18 @@ def test_single_telescope(subarray_prod5_paranal): # 10 km is around the shower maximum, should be around 1 degree from the source with pytest.warns(MissingFrameAttributeWarning): assert u.isclose(source.separation(point), 1.0 * u.deg, atol=0.1 * u.deg) + + +def test_altaz_to_nominal(): + from ctapipe.coordinates import altaz_to_nominal + + column = altaz_to_nominal( + az=[220.0, 220.2] * u.deg, + alt=[80.0, 79.2] * u.deg, + pointing_az=[220.0, 220.0] * u.deg, + pointing_alt=[80.0, 80.0] * u.deg, + ) + + assert column.unit == u.deg + assert np.allclose(column[0].value, 0) + assert np.allclose(column[1].value, [0.03747984, -0.79993558]) diff --git a/src/ctapipe/coordinates/utils.py b/src/ctapipe/coordinates/utils.py index e8c2310b776..52a993357ff 100644 --- a/src/ctapipe/coordinates/utils.py +++ b/src/ctapipe/coordinates/utils.py @@ -1,14 +1,16 @@ import astropy.units as u import numpy as np -from astropy.coordinates import AltAz +from astropy.coordinates import AltAz, SkyCoord from erfa.ufunc import p2s as cartesian_to_spherical from erfa.ufunc import s2p as spherical_to_cartesian from .ground_frames import _get_xyz +from .nominal_frame import NominalFrame __all__ = [ "altaz_to_righthanded_cartesian", "get_point_on_shower_axis", + "altaz_to_nominal", ] @@ -80,3 +82,24 @@ def get_point_on_shower_axis(core_x, core_y, alt, az, telescope_position, distan cartesian = point[np.newaxis, :] - _get_xyz(telescope_position).T lon, lat, _ = cartesian_to_spherical(cartesian) return AltAz(alt=lat, az=-lon, copy=False) + + +def altaz_to_nominal(az, alt, pointing_az, pointing_alt) -> u.Quantity: + """ + Compute nominal (FOV) coordinates from alt/az coordinates. + + This can be used in a FeatureGenerator or ExpressionEngine to get a single + column with fov_lon, fov_lat coordinates. + + Returns + ------- + u.Quantity: + 2D array of coordinates with 2 columns: fov_lon, fov_lat + """ + pointing_coord = SkyCoord(az=pointing_az, alt=pointing_alt, frame="altaz") + event_coord = SkyCoord(az=az, alt=alt, frame="altaz", origin=pointing_coord) + nominal_coord = event_coord.transform_to(NominalFrame) + + return u.Quantity( + np.column_stack((nominal_coord.fov_lon.deg, nominal_coord.fov_lat.deg)), u.deg + ) diff --git a/src/ctapipe/io/__init__.py b/src/ctapipe/io/__init__.py index ef85cd7ddfc..c9f4b041ffc 100644 --- a/src/ctapipe/io/__init__.py +++ b/src/ctapipe/io/__init__.py @@ -8,6 +8,7 @@ from .astropy_helpers import read_table, write_table # noqa: I001 from .datalevels import DataLevel from .dl2_tables_preprocessing import DL2EventPreprocessor, DL2EventLoader +from .event_preprocessor import EventPreprocessor from .eventsource import EventSource from .eventseeker import EventSeeker from .tableio import TableReader, TableWriter @@ -46,4 +47,5 @@ "DL2EventPreprocessor", "DL2EventLoader", "get_hdf5_monitoring_types", + "EventPreprocessor", ] diff --git a/src/ctapipe/io/event_preprocessor.py b/src/ctapipe/io/event_preprocessor.py new file mode 100644 index 00000000000..d311035815d --- /dev/null +++ b/src/ctapipe/io/event_preprocessor.py @@ -0,0 +1,217 @@ +"""Module containing classes related to event loading and preprocessing""" + +from astropy.coordinates import angular_separation + +from ..coordinates import altaz_to_nominal +from ..core import ( + Component, + FeatureGenerator, + QualityQuery, + ToolConfigurationError, + traits, +) + +__all__ = ["EventPreprocessor"] + + +from typing import Callable + + +class FeatureSetRegistry: + """Registry for custom feature set configurations.""" + + _registry = {} + + @classmethod + def register(cls, name: str): + """Register a feature set configuration. + + Examples + -------- + >>> @FeatureSetRegistry.register("my_analysis") + ... def my_config(preprocessor): + ... return { + ... "features_to_generate": [("custom", "col_a / col_b")], + ... "quality_criteria": [("cut", "custom > 0.5")], + ... "output_features": ["event_id", "custom"] + ... } + """ + + def decorator(func: Callable): + cls._registry[name] = func + return func + + return decorator + + @classmethod + def get(cls, name: str): + """Get a registered configuration function.""" + return cls._registry.get(name) + + @classmethod + def list_available(cls): + """List all registered feature set names.""" + return list(cls._registry.keys()) + + +@FeatureSetRegistry.register("dl2_irf") +def _dl2_irf_config(preprocessor): + """Built-in configuration for DL2 IRF generation.""" + return { + "features_to_generate": [ + ("reco_energy", f"{preprocessor.energy_reconstructor}_energy"), + ("reco_alt", f"{preprocessor.geometry_reconstructor}_alt"), + ("reco_az", f"{preprocessor.geometry_reconstructor}_az"), + ("gh_score", f"{preprocessor.gammaness_reconstructor}_prediction"), + ("theta", "angular_separation(reco_az, reco_alt, true_az, true_alt)"), + ( + "reco_fov_coord", + "altaz_to_nominal(reco_az, reco_alt, subarray_pointing_lon, subarray_pointing_lat)", + ), + ( + "reco_fov_lon", + "reco_fov_coord[:,0]", + ), # note: GADF IRFs use the negative of this + ("reco_fov_lat", "reco_fov_coord[:,1]"), + ( + "true_fov_coord", + "altaz_to_nominal(true_az, true_alt, subarray_pointing_lon, subarray_pointing_lat)", + ), + ( + "true_fov_lon", + "true_fov_coord[:,0]", + ), # note: GADF IRFs use the negative of this + ("true_fov_lat", "true_fov_coord[:,1]"), + ( + "true_fov_offset", + "angular_separation(true_fov_lon, true_fov_lat, 0*u.deg, 0*u.deg)", + ), + ( + "reco_fov_offset", + "angular_separation(reco_fov_lon, reco_fov_lat, 0*u.deg, 0*u.deg)", + ), + ( + "multiplicity", + f"np.count_nonzero({preprocessor.gammaness_reconstructor}_telescopes,axis=1)", + ), + ], + "quality_criteria": [ + ("Valid geometry", f"{preprocessor.geometry_reconstructor}_is_valid"), + ("valid energy", f"{preprocessor.energy_reconstructor}_is_valid"), + ("valid gammaness", f"{preprocessor.gammaness_reconstructor}_is_valid"), + ("sufficient multiplicity", "multiplicity >= 4"), + ], + "output_features": [ + "event_id", + "obs_id", + "reco_energy", + "reco_alt", + "reco_az", + "gh_score", + "true_energy", + "true_alt", + "true_az", + "true_fov_offset", + "reco_fov_offset", + "theta", + "reco_fov_lat", + "true_fov_lat", + "reco_fov_lon", + "true_fov_lon", + "multiplicity", + ], + } + + +class EventPreprocessor(Component): + """ + Selects or generates features and filters tables of events. + + In normal use, one only has to specify the ``feature_set`` option, which + will generate features supports standard use cases. For advanced usage, you + can set ``feature_set=custom`` and pass in a configured + `~ctapipe.core.FeatureGenerator` and set the ``features`` property of this + class with the columns you to retain in the output table. + + In the `~ctapipe.core.FeatureGenerator` used internally, you have access to + several additional functions useful for DL2 processing: + + - `~astropy.coordinates.angular_separation` + - `~ctapipe.coordinates.altaz_to_nominal` + """ + + energy_reconstructor = traits.Unicode( + default_value="RandomForestRegressor", + help="Prefix of the reco `_energy` column", + ).tag(config=True) + + geometry_reconstructor = traits.Unicode( + default_value="HillasReconstructor", + help="Prefix of the `_alt` and `_az` reco geometry columns", + ).tag(config=True) + + gammaness_reconstructor = traits.Unicode( + default_value="RandomForestClassifier", + help="Prefix of the classifier `_prediction` column", + ).tag(config=True) + + feature_set = traits.CaselessStrEnum( + ["custom"] + FeatureSetRegistry.list_available(), + default_value="custom", + help=( + "Set up the FeatureGenerator.features, output features, and quality criteria " + "based on standard use cases." + "Specify 'custom' if you want to set your own in your config file. If this is set to " + "any value other than 'custom', the feature properties of the configuration " + "file you pass in will be overridden." + ), + ).tag(config=True) + + features = traits.List( + traits.Unicode(), + help=( + "Features (columns) to retain in the output. " + "These can include columns generated by the FeatureGenerator. " + "If you set these, make sure feature_set=custom." + ), + ).tag(config=True) + + def __init__(self, config=None, parent=None, **kwargs): + super().__init__(config=config, parent=parent, **kwargs) + if self.feature_set == "custom": + self.feature_generator = FeatureGenerator(parent=self) + self.quality_query = QualityQuery(parent=self) + else: # use a pre-registered feature set + feature_set = FeatureSetRegistry.get(self.feature_set)(self) + self.feature_generator = FeatureGenerator( + parent=self, features=feature_set["features_to_generate"] + ) + self.quality_query = QualityQuery( + parent=self, quality_criteria=feature_set["quality_criteria"] + ) + self.features = feature_set["output_features"] + # sanity checks: + if len(self.features) == 0: + raise ToolConfigurationError( + "DL2EventPreprocessor has no output features configured." + "You have set `feature_set=custom`, but did not provide the list " + "of features in the configuration (DL2EventPreprocessor.features)." + ) + + def __call__(self, table): + """Return new table with only the columns in features.""" + + # generate new features, which includes renaming columns: + generated = self.feature_generator( + table, + angular_separation=angular_separation, + altaz_to_nominal=altaz_to_nominal, + ) + + # apply event selection on the resulting table + + selected_mask = self.quality_query.get_table_mask(generated) + + # return only the columns specified in `self.features`, and rows in + # `selected_mask` + return generated[self.features][selected_mask] diff --git a/src/ctapipe/io/tests/test_event_preprocessor.py b/src/ctapipe/io/tests/test_event_preprocessor.py new file mode 100644 index 00000000000..378e0d4ac65 --- /dev/null +++ b/src/ctapipe/io/tests/test_event_preprocessor.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python3 + +import numpy as np +import pytest +from astropy import units as u +from astropy.table import QTable + +from ctapipe.io.event_preprocessor import FeatureSetRegistry + + +@pytest.fixture(scope="function") +def minimal_dl2_table(): + """A dunmmy DL2 table for testing DL2EventPreprocessor""" + return QTable( + dict( + obs_id=[10, 10, 10, 10], + event_id=[1, 2, 3, 4], + true_energy=[100.0, 50.0, 2.0, 30.0] * u.TeV, + RandomForestRegressor_energy=[100.1, 49.2, 2.6, 40.0] * u.TeV, + RandomForestRegressor_is_valid=[True, True, True, True], + HillasReconstructor_az=[271.0, 271.6, 271.4, 268.1] * u.deg, + HillasReconstructor_alt=[70.1, 68.2, 69.3, 70.8] * u.deg, + HillasReconstructor_is_valid=[True, True, True, False], + RandomForestClassifier_prediction=[0.9, 0.5, 0.1, 0.3], + RandomForestClassifier_is_valid=[True, True, True, False], + true_alt=[70.0, 70.0, 70.0, 70.0] * u.deg, + true_az=[270.0, 270.0, 270.0, 270.0] * u.deg, + subarray_pointing_lat=[70.0, 70.0, 70.0, 70.0] * u.deg, + subarray_pointing_lon=[270.0, 270.0, 270.0, 270.0] * u.deg, + RandomForestClassifier_telescopes=np.array( + [ + [False, True, True, True], + [True, True, True, True], + [True, True, False, True], + [True, True, True, True], + ] + ), + ) + ) + + +@pytest.mark.parametrize("feature_set", FeatureSetRegistry.list_available()) +def test_event_preprocessing(feature_set, minimal_dl2_table): + from traitlets.config import Config + + from ctapipe.io import EventPreprocessor + + # set some custom features for the case where the feature_set==custom. + # These will be ignored in other feature_sets. + custom_config = Config() + custom_config.EventPreprocessor.features = ["obs_id", "event_id"] + table = minimal_dl2_table + + # process the table: + preprocess = EventPreprocessor(config=custom_config, feature_set=feature_set) + table_processed = preprocess(table) + + for feature in preprocess.features: + assert feature in table_processed.columns + + # check that the qualityquery worked + assert len(table_processed) <= len(table) + + +def test_no_output(): + """Check error is raised if no columns are specified for output.""" + from ctapipe.core import ToolConfigurationError + from ctapipe.io import EventPreprocessor + + with pytest.raises(ToolConfigurationError): + EventPreprocessor(feature_set="custom") + + +def test_nondefault_reconstructors(minimal_dl2_table): + """Check that using a different constructor than default still works""" + + from ctapipe.io import EventPreprocessor + + # define some new reconstructors, and add those columns to the test table: + geom = "ExampleGeometryReconstructor" + energy = "ExampleEnergyRegressor" + gammaness = "ExampleGammnessClassifier" + table = minimal_dl2_table + + table[f"{geom}_alt"] = ([71.1, 62.2, 61.3, 75.8] * u.deg,) + table[f"{geom}_az"] = [231.0, 231.6, 231.4, 238.1] * u.deg + table[f"{geom}_is_valid"] = [True, False, True, True] + + table[f"{energy}_energy"] = [20.0, 1.0, 0.5, 0.1] * u.TeV + table[f"{energy}_is_valid"] = [True, False, True, True] + + table[f"{gammaness}_prediction"] = [0.1, 0.8, 0.9, 0.7] + table[f"{gammaness}_is_valid"] = [True, False, True, True] + table[f"{gammaness}_telescopes"] = table["RandomForestClassifier_telescopes"] + + preprocess = EventPreprocessor( + feature_set="dl2_irf", + geometry_reconstructor=geom, + energy_reconstructor=energy, + gammaness_reconstructor=gammaness, + ) + + table_processed = preprocess(table) + + # check that the processing worked. In this case, we check that the + # requested columns are renamed correctly and that the filtered values match + # the original values. + + mask = table["event_id"] == table_processed["event_id"] + masked = table[mask] # so that we just compare values after filtering + + assert np.allclose(table_processed["reco_energy"], masked[f"{energy}_energy"]) + assert np.allclose(table_processed["reco_az"], masked[f"{geom}_az"]) + assert np.allclose(table_processed["gh_score"], masked[f"{gammaness}_prediction"])