Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions docs/changes/2928.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
Introduces the `~ctapipe.io.EventPreprocessor` class that can generically
transform an event table by applying the following steps:

* Generate new or rename existing columns with a `~ctapipe.core.FeatureGenerator`
* Select "good" event rows with a `~ctapipe.core.QualityQuery`
* Select which columns to output (by setting the ``features`` configuration
attribute of the `~ctapipe.io.EventPreprocessor`)

This is useful for doing the final steps of DL2 processing, and will eventually
replace what is in `DL2EventPreprocessor` and `DL2EventLoader`, which will be
deprecated in a future release.

The `~ctapipe.core.EventPreprocessor` also includes the ability to pre-configure
itself for specific use cases by setting the ``feature_set`` option. Currently
only two `~ctapipe.io.ProcessingFeatureSet` are implemented:
`feature_set=dl2_irf`, which defines the transforms, event selection, and output
features for processing simulated DL2 events, and `feature_set=custom`, which
has no pre-configuration and requires all parameters to be set by the user in a
config file.

The functionality of `DL2EventLoader` can be mimicked with the following:

.. code-block:: python

from ctapipe.io import TableLoader, EventPreprocessor
from astropy.table import vstack

DL2FILE = "some_dl2_file.h5"
loader = TableLoader(DL2FILE, dl2=True, simulated=True, observation_info=True)
preprocess = EventPreprocessor(feature_set="dl2_simulation")
Comment thread
kosack marked this conversation as resolved.
Outdated
events = vstack(
[
preprocess(QTable(c.data))
for c in loader.read_subarray_events_chunked(chunk_size=100_000)
]
)


This also introduces a helper function `~ctapipe.coordinates.altaz_to_nominal` to
convert columns of alt/az coordinates to FOV coordinates in the
`~ctapipe.coordinates.NominalFrame`, which works with the
Comment thread
kosack marked this conversation as resolved.
Outdated
`~ctapipe.io.FeatureGenerator`.
7 changes: 6 additions & 1 deletion src/ctapipe/coordinates/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,11 @@
from .impact_distance import impact_distance, shower_impact_distance
from .nominal_frame import NominalFrame
from .telescope_frame import TelescopeFrame
from .utils import altaz_to_righthanded_cartesian, get_point_on_shower_axis
from .utils import (
altaz_to_nominal,
altaz_to_righthanded_cartesian,
get_point_on_shower_axis,
)

__all__ = [
"TelescopeFrame",
Expand All @@ -37,6 +41,7 @@
"impact_distance",
"shower_impact_distance",
"get_point_on_shower_axis",
"altaz_to_nominal",
]


Expand Down
15 changes: 15 additions & 0 deletions src/ctapipe/coordinates/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,18 @@ def test_single_telescope(subarray_prod5_paranal):
# 10 km is around the shower maximum, should be around 1 degree from the source
with pytest.warns(MissingFrameAttributeWarning):
assert u.isclose(source.separation(point), 1.0 * u.deg, atol=0.1 * u.deg)


def test_altaz_to_nominal():
from ctapipe.coordinates import altaz_to_nominal

column = altaz_to_nominal(
az=[220.0, 220.2] * u.deg,
alt=[80.0, 79.2] * u.deg,
pointing_az=[220.0, 220.0] * u.deg,
pointing_alt=[80.0, 80.0] * u.deg,
)

assert column.unit == u.deg
assert np.allclose(column[0].value, 0)
assert np.allclose(column[1].value, [0.03747984, -0.79993558])
26 changes: 25 additions & 1 deletion src/ctapipe/coordinates/utils.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
import astropy.units as u
import numpy as np
from astropy.coordinates import AltAz
from astropy.coordinates import AltAz, SkyCoord
from erfa.ufunc import p2s as cartesian_to_spherical
from erfa.ufunc import s2p as spherical_to_cartesian

from .ground_frames import _get_xyz
from .nominal_frame import NominalFrame

__all__ = [
"altaz_to_righthanded_cartesian",
"get_point_on_shower_axis",
"altaz_to_nominal",
]


Expand Down Expand Up @@ -80,3 +82,25 @@ def get_point_on_shower_axis(core_x, core_y, alt, az, telescope_position, distan
cartesian = point[np.newaxis, :] - _get_xyz(telescope_position).T
lon, lat, _ = cartesian_to_spherical(cartesian)
return AltAz(alt=lat, az=-lon, copy=False)


def altaz_to_nominal(az, alt, pointing_az, pointing_alt) -> u.Quantity:
"""
Compute nominal (FOV) coordinates from alt/az coordinates.

This can be used in a FeatureGenerator or ExpressionEngine to get a single
column with fov_lon, fov_lat coordinates.

Returns
-------
u.Quantity:
2D array of coordinates with 2 columns: fov_lon, fov_lat
"""
pointing_coord = SkyCoord(az=pointing_az, alt=pointing_alt, frame="altaz")
event_coord = SkyCoord(az=az, alt=alt, frame="altaz", origin=pointing_coord)
nominal_coord = event_coord.transform_to(NominalFrame)

# note the minus sign for the fov_lon, which is to match GADF conventions
Comment thread
kosack marked this conversation as resolved.
Outdated
return u.Quantity(
np.column_stack((nominal_coord.fov_lon.deg, nominal_coord.fov_lat.deg)), u.deg
)
3 changes: 3 additions & 0 deletions src/ctapipe/io/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from .astropy_helpers import read_table, write_table # noqa: I001
from .datalevels import DataLevel
from .dl2_tables_preprocessing import DL2EventPreprocessor, DL2EventLoader
from .event_preprocessor import EventPreprocessor, PreprocessorFeatureSet
from .eventsource import EventSource
from .eventseeker import EventSeeker
from .tableio import TableReader, TableWriter
Expand Down Expand Up @@ -46,4 +47,6 @@
"DL2EventPreprocessor",
"DL2EventLoader",
"get_hdf5_monitoring_types",
"EventPreprocessor",
"PreprocessorFeatureSet",
]
205 changes: 205 additions & 0 deletions src/ctapipe/io/event_preprocessor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
"""Module containing classes related to event loading and preprocessing"""

from enum import StrEnum, auto

from astropy.coordinates import angular_separation
from traitlets import default

from ..coordinates import altaz_to_nominal
from ..core import (
Component,
FeatureGenerator,
QualityQuery,
ToolConfigurationError,
traits,
)

__all__ = ["EventPreprocessor"]


class PreprocessorFeatureSet(StrEnum):
Comment thread
kosack marked this conversation as resolved.
Outdated
"""Pre-defined configurations for DL2EventPreprocessor for specific use cases."""

custom = auto() #: use user-supplied configuration
dl2_irf = auto() #: support IRF preprocessing use case


class EventPreprocessor(Component):
"""
Selects or generates features and filters tables of events.

In normal use, one only has to specify the ``feature_set`` option, which
will generate features supports standard use cases. For advanced usage, you
can set ``feature_set=custom`` and pass in a configured
`~ctapipe.core.FeatureGenerator` and set the ``features`` property of this
class with the columns you to retain in the output table.

In the `~ctapipe.core.FeatureGenerator`` used internally, you have access to several
additional functions useful for DL2 processing:

- `~astropy.coordinates.angular_separation`
- `~ctapipe.coordinates.altaz_to_nominal`
"""

energy_reconstructor = traits.Unicode(
default_value="RandomForestRegressor",
help="Prefix of the reco `_energy` column",
).tag(config=True)

geometry_reconstructor = traits.Unicode(
default_value="HillasReconstructor",
help="Prefix of the `_alt` and `_az` reco geometry columns",
).tag(config=True)

gammaness_reconstructor = traits.Unicode(
default_value="RandomForestClassifier",
help="Prefix of the classifier `_prediction` column",
).tag(config=True)

feature_set = traits.UseEnum(
PreprocessorFeatureSet,
default_value=PreprocessorFeatureSet.dl2_irf,
help=(
"Set up the FeatureGenerator.features, output features, and quality criteria "
"based on standard use cases."
"Specify 'custom' if you want to set your own in your config file. If this is set to "
"any value other than 'custom', the feature properties of the configuration "
"file you pass in will be overridden."
),
).tag(config=True)

features = traits.List(
traits.Unicode(),
help=(
"Features (columns) to retain in the output. "
"These can include columns generated by the FeatureGenerator. "
"If you set these, make sure feature_set=custom."
),
).tag(config=True)

def __init__(self, config=None, parent=None, **kwargs):
super().__init__(config=config, parent=parent, **kwargs)
if PreprocessorFeatureSet(self.feature_set) == PreprocessorFeatureSet.custom:
Comment thread
maxnoe marked this conversation as resolved.
Outdated
self.feature_generator = FeatureGenerator(parent=self)
self.quality_query = QualityQuery(parent=self)
else:
self.feature_generator = FeatureGenerator(
features=self._get_predefined_features_to_generate()
Comment thread
kosack marked this conversation as resolved.
Outdated
)
self.quality_query = QualityQuery(
quality_criteria=self._get_predefined_quality_criteria()
Comment thread
kosack marked this conversation as resolved.
Outdated
)
# sanity checks:
if len(self.features) == 0:
raise ToolConfigurationError(
"DL2EventPreprocessor has no output features configured."
"You have set `feature_set=custom`, but did not provide the list "
"of features in the configuration (DL2EventPreprocessor.features)."
)

def __call__(self, table):
"""Return new table with only the columns in features."""

# generate new features, which includes renaming columns:
generated = self.feature_generator(
table,
angular_separation=angular_separation,
altaz_to_nominal=altaz_to_nominal,
)
Comment thread
maxnoe marked this conversation as resolved.

# apply event selection on the resulting table

selected_mask = self.quality_query.get_table_mask(generated)

# return only the columns specified in `self.features`, and rows in
# `selected_mask`
return generated[self.features][selected_mask]

def _get_predefined_features_to_generate(self) -> list[tuple]:
"""Return a default list of FeatureGenerator features."""
if self.feature_set == PreprocessorFeatureSet.dl2_irf:
# Default features for DL2/Subarray events
return [
("reco_energy", f"{self.energy_reconstructor}_energy"),
("reco_alt", f"{self.geometry_reconstructor}_alt"),
("reco_az", f"{self.geometry_reconstructor}_az"),
("gh_score", f"{self.gammaness_reconstructor}_prediction"),
("theta", "angular_separation(reco_az, reco_alt, true_az, true_alt)"),
(
"reco_fov_coord",
"altaz_to_nominal(reco_az, reco_alt, subarray_pointing_lon, subarray_pointing_lat)",
),
(
"reco_fov_lon",
"reco_fov_coord[:,0]",
), # note: GADF IRFs use the negative of this
("reco_fov_lat", "reco_fov_coord[:,1]"),
(
"true_fov_coord",
"altaz_to_nominal(true_az, true_alt, subarray_pointing_lon, subarray_pointing_lat)",
),
(
"true_fov_lon",
"true_fov_coord[:,0]",
), # note: GADF IRFs use the negative of this
("true_fov_lat", "true_fov_coord[:,1]"),
(
"true_fov_offset",
"angular_separation(reco_fov_lon, reco_fov_lat, 0*u.deg, 0*u.deg)",
Comment thread
kosack marked this conversation as resolved.
Outdated
),
(
"reco_fov_offset",
"angular_separation(true_fov_lon, reco_fov_lat, 0*u.deg, 0*u.deg)",
Comment thread
kosack marked this conversation as resolved.
Outdated
),
(
"multiplicity",
f"np.count_nonzero({self.gammaness_reconstructor}_telescopes,axis=1)",
),
]
else:
raise NotImplementedError(f"unsupported feature_set: {self.feature_set}")

def _get_predefined_quality_criteria(self) -> list[tuple]:
"""
Set the quality criteria for a DL2FeatureSet.

Here you can use any columns in the input table, or any that are
specified in the FeatureGenerator.
"""
if self.feature_set == PreprocessorFeatureSet.dl2_irf:
return [
("Valid geometry", f"{self.geometry_reconstructor}_is_valid"),
("valid energy", f"{self.energy_reconstructor}_is_valid"),
("valid gammaness", f"{self.gammaness_reconstructor}_is_valid"),
("sufficient multiplicity", "multiplicity >= 4"),
]
else:
raise NotImplementedError(f"unsupported feature_set: {self.feature_set}")

@default("features")
def default_features(self):
"""Set the columns to output, for a given FeatureSet."""
if self.feature_set == PreprocessorFeatureSet.dl2_irf:
return [
"event_id",
"obs_id",
"reco_energy",
"reco_alt",
"reco_az",
"gh_score",
"true_energy",
"true_alt",
"true_az",
"true_fov_offset",
"reco_fov_offset",
"theta",
"reco_fov_lat",
"true_fov_lat",
"reco_fov_lon",
"true_fov_lon",
"multiplicity",
]
elif self.feature_set == PreprocessorFeatureSet.custom:
return []
else:
raise NotImplementedError(f"unsupported feature_set: {self.feature_set}")
Loading
Loading