diff --git a/docs/changes/2919.bugfix.rst b/docs/changes/2919.bugfix.rst new file mode 100644 index 00000000000..d9f8bda8160 --- /dev/null +++ b/docs/changes/2919.bugfix.rst @@ -0,0 +1 @@ +Removed the option ``output_table_schema`` from ``DL2EventLoader``, which was incorrectly implemented. diff --git a/src/ctapipe/conftest.py b/src/ctapipe/conftest.py index 55754052deb..929080c5028 100644 --- a/src/ctapipe/conftest.py +++ b/src/ctapipe/conftest.py @@ -985,37 +985,25 @@ def irf_events_table(): @pytest.fixture(scope="function") -def test_config(): - return { - "DL2EventLoader": {"event_reader_function": "read_telescope_events_chunked"}, - "DL2EventPreprocessor": { - "energy_reconstructor": "ExtraTreesRegressor", - "gammaness_classifier": "ExtraTreesClassifier", - "columns_to_rename": {}, - "output_table_schema": [ - Column( - name="obs_id", dtype=np.uint64, description="Observation Block ID" - ), - Column(name="event_id", dtype=np.uint64, description="Array event ID"), - Column(name="tel_id", dtype=np.uint64, description="Telescope ID"), - Column( - name="ExtraTreesRegressor_tel_energy", - unit=u.TeV, - description="Reconstructed energy", - ), - Column( - name="ExtraTreesRegressor_tel_energy_uncert", - unit=u.TeV, - description="Reconstructed energy uncertainty", - ), - ], - "apply_derived_columns": False, - # "disable_column_renaming": True, - "allow_unsupported_pointing_frames": True, - }, - "DL2EventQualityQuery": { - "quality_criteria": [ - ("valid reco", "ExtraTreesRegressor_tel_is_valid"), - ] - }, - } +def dl2_event_loader_config(): + """A traitlets Config for the DL2EventLoader class.""" + from traitlets.config import Config + + return Config( + { + "DL2EventLoader": { + "event_reader_function": "read_telescope_events_chunked" + }, + "DL2EventPreprocessor": { + "energy_reconstructor": "ExtraTreesRegressor", + "gammaness_classifier": "ExtraTreesClassifier", + "apply_derived_columns": False, + "allow_unsupported_pointing_frames": True, + }, + "DL2EventQualityQuery": { + "quality_criteria": [ + ("valid reco", "ExtraTreesRegressor_tel_is_valid"), + ] + }, + } + ) diff --git a/src/ctapipe/coordinates/__init__.py b/src/ctapipe/coordinates/__init__.py index 910ab215edd..afa129c0c72 100644 --- a/src/ctapipe/coordinates/__init__.py +++ b/src/ctapipe/coordinates/__init__.py @@ -21,7 +21,11 @@ from .impact_distance import impact_distance, shower_impact_distance from .nominal_frame import NominalFrame from .telescope_frame import TelescopeFrame -from .utils import altaz_to_righthanded_cartesian, get_point_on_shower_axis +from .utils import ( + altaz_to_fov, + altaz_to_righthanded_cartesian, + get_point_on_shower_axis, +) __all__ = [ "TelescopeFrame", @@ -37,6 +41,7 @@ "impact_distance", "shower_impact_distance", "get_point_on_shower_axis", + "altaz_to_fov", ] diff --git a/src/ctapipe/coordinates/tests/test_utils.py b/src/ctapipe/coordinates/tests/test_utils.py index 1ebd419c679..52615a3c81d 100644 --- a/src/ctapipe/coordinates/tests/test_utils.py +++ b/src/ctapipe/coordinates/tests/test_utils.py @@ -53,3 +53,18 @@ def test_single_telescope(subarray_prod5_paranal): # 10 km is around the shower maximum, should be around 1 degree from the source with pytest.warns(MissingFrameAttributeWarning): assert u.isclose(source.separation(point), 1.0 * u.deg, atol=0.1 * u.deg) + + +def test_altaz_to_fov(): + from ctapipe.coordinates import altaz_to_fov + + column = altaz_to_fov( + az=[220.0, 220.2] * u.deg, + alt=[80.0, 79.2] * u.deg, + pointing_az=[220.0, 220.0] * u.deg, + pointing_alt=[80.0, 80.0] * u.deg, + ) + + assert column.unit == u.deg + assert np.allclose(column[0].value, 0) + assert np.allclose(column[1].value, [-0.03747984, -0.79993558]) diff --git a/src/ctapipe/coordinates/utils.py b/src/ctapipe/coordinates/utils.py index e8c2310b776..a2514bb681c 100644 --- a/src/ctapipe/coordinates/utils.py +++ b/src/ctapipe/coordinates/utils.py @@ -1,14 +1,16 @@ import astropy.units as u import numpy as np -from astropy.coordinates import AltAz +from astropy.coordinates import AltAz, SkyCoord from erfa.ufunc import p2s as cartesian_to_spherical from erfa.ufunc import s2p as spherical_to_cartesian from .ground_frames import _get_xyz +from .nominal_frame import NominalFrame __all__ = [ "altaz_to_righthanded_cartesian", "get_point_on_shower_axis", + "altaz_to_fov", ] @@ -80,3 +82,25 @@ def get_point_on_shower_axis(core_x, core_y, alt, az, telescope_position, distan cartesian = point[np.newaxis, :] - _get_xyz(telescope_position).T lon, lat, _ = cartesian_to_spherical(cartesian) return AltAz(alt=lat, az=-lon, copy=False) + + +def altaz_to_fov(az, alt, pointing_az, pointing_alt) -> u.Quantity[2]: + """ + Compute FOV coordinates from alt/az coordinates. + + This can be used in an FeatureGenerator or ExpressionEngine to get a single + column with fov_lon, fov_lat coordinates. + + Returns + ------- + u.Quantity[2]: + 2D array of coordinates with 2 columns: fov_lon, fov_lat + """ + pointing_coord = SkyCoord(az=pointing_az, alt=pointing_alt, frame="altaz") + event_coord = SkyCoord(az=az, alt=alt, frame="altaz", origin=pointing_coord) + nominal_coord = event_coord.transform_to(NominalFrame) + + # note the minus sign for the fov_lon, which is to match GADF conventions + return u.Quantity( + np.column_stack((-nominal_coord.fov_lon.deg, nominal_coord.fov_lat.deg)), u.deg + ) diff --git a/src/ctapipe/core/feature_generator.py b/src/ctapipe/core/feature_generator.py index 4d330b8fedc..fe997cb19d3 100644 --- a/src/ctapipe/core/feature_generator.py +++ b/src/ctapipe/core/feature_generator.py @@ -23,7 +23,13 @@ def _shallow_copy_table(table): the original table. """ # automatically return Table or QTable depending on input - return table.__class__({col: table[col] for col in table.colnames}, copy=False) + table_copy = table.__class__( + {col: table[col] for col in table.colnames}, copy=False + ) + table_copy.meta = ( + table.meta.copy() + ) # here we don't want a shallow copy, as we might add metadata + return table_copy class FeatureGeneratorException(TypeError): diff --git a/src/ctapipe/core/tests/test_feature_generator.py b/src/ctapipe/core/tests/test_feature_generator.py index 3ba3fb0d153..ffa0d856837 100644 --- a/src/ctapipe/core/tests/test_feature_generator.py +++ b/src/ctapipe/core/tests/test_feature_generator.py @@ -19,19 +19,26 @@ def test_feature_generator(): generator = FeatureGenerator(features=expressions) table = Table({"intensity": [1, 10, 100], "length": [2, 4, 8], "width": [1, 2, 4]}) + table.meta["VERSION"] = 1.0 + log_intensity = [0, 1, 2] area = [2, 8, 32] eccentricity = np.sqrt(0.75) - table = generator(table) + processed_table = generator(table) + processed_table.meta["GEN"] = True + + assert "log_intensity" in processed_table.colnames + assert "area" in processed_table.colnames + assert "eccentricity" in processed_table.colnames - assert "log_intensity" in table.colnames - assert "area" in table.colnames - assert "eccentricity" in table.colnames + assert np.all(processed_table["log_intensity"] == log_intensity) + assert np.all(processed_table["area"] == area) + assert np.all(processed_table["eccentricity"] == eccentricity) - assert np.all(table["log_intensity"] == log_intensity) - assert np.all(table["area"] == area) - assert np.all(table["eccentricity"] == eccentricity) + # also check that metadata was propagated and doesn't overwrite old table: + assert "VERSION" in processed_table.meta + assert "GEN" not in table.meta def test_existing_feature(): diff --git a/src/ctapipe/io/__init__.py b/src/ctapipe/io/__init__.py index ef85cd7ddfc..c3fbf4ffeb3 100644 --- a/src/ctapipe/io/__init__.py +++ b/src/ctapipe/io/__init__.py @@ -7,7 +7,7 @@ from .astropy_helpers import read_table, write_table # noqa: I001 from .datalevels import DataLevel -from .dl2_tables_preprocessing import DL2EventPreprocessor, DL2EventLoader +from .event_preprocessor import EventPreprocessor, PreprocessorFeatureSet from .eventsource import EventSource from .eventseeker import EventSeeker from .tableio import TableReader, TableWriter @@ -43,7 +43,7 @@ "DataWriter", "DATA_MODEL_VERSION", "get_hdf5_datalevels", - "DL2EventPreprocessor", - "DL2EventLoader", + "EventPreprocessor", "get_hdf5_monitoring_types", + "PreprocessorFeatureSet", ] diff --git a/src/ctapipe/io/dl2_tables_preprocessing.py b/src/ctapipe/io/dl2_tables_preprocessing.py deleted file mode 100644 index cab91ad2af8..00000000000 --- a/src/ctapipe/io/dl2_tables_preprocessing.py +++ /dev/null @@ -1,501 +0,0 @@ -"""Module containing classes related to event loading and preprocessing""" - -from pathlib import Path - -import astropy.units as u -import numpy as np -from astropy.coordinates import AltAz, SkyCoord -from astropy.table import Column, QTable, vstack - -try: - from pyirf.simulations import SimulatedEventsInfo - from pyirf.spectral import ( - DIFFUSE_FLUX_UNIT, - POINT_SOURCE_FLUX_UNIT, - PowerLaw, - calculate_event_weights, - ) - from pyirf.utils import calculate_source_fov_offset, calculate_theta - - has_pyirf = True -except ModuleNotFoundError: - has_pyirf = False - -from tables import NoSuchNodeError -from traitlets import default - -from ..compat import COPY_IF_NEEDED -from ..containers import CoordinateFrameType -from ..coordinates import NominalFrame -from ..core import Component, QualityQuery -from ..core.traits import Bool, Dict, List, Tuple, Unicode -from .tableloader import TableLoader - -__all__ = ["DL2EventLoader", "DL2EventPreprocessor", "DL2EventQualityQuery"] - - -class DL2EventQualityQuery(QualityQuery): - """ - Event pre-selection quality criteria for IRF computation with different defaults. - """ - - quality_criteria = List( - Tuple(Unicode(), Unicode()), - default_value=[ - ( - "multiplicity 4", - "np.count_nonzero(HillasReconstructor_telescopes,axis=1) >= 4", - ), - ("valid classifier", "RandomForestClassifier_is_valid"), - ("valid geom reco", "HillasReconstructor_is_valid"), - ("valid energy reco", "RandomForestRegressor_is_valid"), - ], - help=QualityQuery.quality_criteria.help, - ).tag(config=True) - - -class DL2EventPreprocessor(Component): - """Defines pre-selection cuts and the necessary renaming of columns.""" - - classes = [DL2EventQualityQuery] - - energy_reconstructor = Unicode( - default_value="RandomForestRegressor", - help="Prefix of the reco `_energy` column", - ).tag(config=True) - - geometry_reconstructor = Unicode( - default_value="HillasReconstructor", - help="Prefix of the `_alt` and `_az` reco geometry columns", - ).tag(config=True) - - gammaness_classifier = Unicode( - default_value="RandomForestClassifier", - help="Prefix of the classifier `_prediction` column", - ).tag(config=True) - - apply_derived_columns = Bool( - default_value=True, help="Whether to compute derived columns" - ).tag(config=True) - - allow_unsupported_pointing_frames = Bool( - default_value=False, - help=( - "Check whether the pointing is supported." - "For the moment, only pointing in altaz is supported." - "Divergent pointing is also not supported." - ), - ).tag(config=True) - - columns_to_rename = Dict( - key_trait=Unicode(), - value_trait=Unicode(), - help=( - "Dictionary of columns to rename. " - "Leave unset to apply default renaming. " - "Set to an empty dictionary to disable renaming entirely. " - "Set to a partial dictionary to override only some names." - ), - ).tag(config=True) - - output_table_schema = List( - default_value=[ - Column(name="obs_id", dtype=np.uint64, description="Observation Block ID"), - Column(name="event_id", dtype=np.uint64, description="Array event ID"), - Column(name="true_energy", unit=u.TeV, description="Simulated energy"), - Column(name="true_az", unit=u.deg, description="Simulated azimuth"), - Column(name="true_alt", unit=u.deg, description="Simulated altitude"), - Column(name="reco_energy", unit=u.TeV, description="Reconstructed energy"), - Column(name="reco_az", unit=u.deg, description="Reconstructed azimuth"), - Column(name="reco_alt", unit=u.deg, description="Reconstructed altitude"), - Column(name="pointing_az", unit=u.deg, description="Pointing azimuth"), - Column(name="pointing_alt", unit=u.deg, description="Pointing altitude"), - Column( - name="gh_score", - dtype=np.float64, - description="prediction of the classifier, defined between [0,1]," - " where values close to 1 mean that the positive class" - " (e.g. gamma in gamma-ray analysis) is more likely", - ), - ], - help="Schema definition for output event QTable", - ).tag(config=True) - - def __init__(self, config=None, parent=None, **kwargs): - super().__init__(config=config, parent=parent, **kwargs) - self.quality_query = DL2EventQualityQuery(parent=self) - - @default("columns_to_rename") - def _default_columns_to_rename(self): - return { - f"{self.energy_reconstructor}_energy": "reco_energy", - f"{self.geometry_reconstructor}_az": "reco_az", - f"{self.geometry_reconstructor}_alt": "reco_alt", - f"{self.gammaness_classifier}_prediction": "gh_score", - "subarray_pointing_lat": "pointing_alt", - "subarray_pointing_lon": "pointing_az", - } - - def normalise_column_names(self, events: QTable) -> QTable: - """ - Rename column names according to configuration. - - Parameters - ---------- - events : QTable - Input event table. - - Returns - ------- - QTable - Table with selected and renamed columns. - - Raises - ------ - NotImplementedError - If pointing is not AltAz or varies too much. - ValueError - If required columns are missing. - """ - if not self.allow_unsupported_pointing_frames: - if events["subarray_pointing_lat"].std() > 1e-3: - raise NotImplementedError( - "No support for making irfs from varying pointings yet" - ) - if any( - events["subarray_pointing_frame"] != CoordinateFrameType.ALTAZ.value - ): - raise NotImplementedError( - "At the moment only pointing in altaz is supported." - ) - - columns_to_keep = [col.name for col in self.output_table_schema] - - rename_dict = self.columns_to_rename - rename_from = list(rename_dict.keys()) - rename_to = list(rename_dict.values()) - - fixed_columns = list(set(columns_to_keep) - set(rename_to)) - columns_to_read = fixed_columns + rename_from - for col in columns_to_read: - if col not in events.colnames: - raise ValueError( - f"Input files must conform to the ctapipe DL2 data model. " - f"Required column {col} is missing." - ) - - events = QTable(events[columns_to_read], copy=COPY_IF_NEEDED) - if rename_from and rename_to: - events.rename_columns(rename_from, rename_to) - return events - - def make_empty_table(self) -> QTable: - """ - Create an empty event table based on the configured output schema. - """ - schema = list( - self.output_table_schema - ) # make a shallow copy to extend the schema with derived columns - - if self.apply_derived_columns: - schema.extend( - [ - Column( - name="reco_fov_lat", - unit=u.deg, - description="Reconstructed FOV lat", - ), - Column( - name="reco_fov_lon", - unit=u.deg, - description="Reconstructed FOV lon", - ), - Column( - name="theta", - unit=u.deg, - description="Angular offset from source", - ), - Column( - name="true_source_fov_offset", - unit=u.deg, - description="Simulated angular offset from pointing direction", - ), - Column( - name="reco_source_fov_offset", - unit=u.deg, - description="Reconstructed angular offset from pointing direction", - ), - Column( - name="weight", - dtype=np.float64, - description="Event weight", - ), - ] - ) - - return QTable( - names=[col.name for col in schema], - dtype=[col.dtype for col in schema], - units=( - [col.unit for col in schema] - if any(col.unit for col in schema) - else None - ), - meta={}, - ) - - -class DL2EventLoader(Component): - """ - Component for loading events and simulation metadata, applying preselection and optional derived column logic. - """ - - classes = [DL2EventPreprocessor] - - # User-selectable event reading function and kwargs - event_reader_function = Unicode( - default_value="read_subarray_events_chunked", - help=( - "Function of TableLoader used to read event chunks. " - "E.g., 'read_subarray_events_chunked' or 'read_telescope_events_chunked'." - ), - ).tag(config=True) - - event_reader_kwargs = Dict( - default_value={}, - help="Extra keyword arguments passed to the event reading function, e.g., {'path': '/dl2/event/telescope/Reconstructor'}", - ).tag(config=True) - - def __init__(self, file: Path, target_spectrum: "Spectra", **kwargs): # noqa: F821 - from ..irf.spectra import SPECTRA - - super().__init__(**kwargs) - self.epp = DL2EventPreprocessor(parent=self) - self.target_spectrum = SPECTRA[target_spectrum] - self.file = file - - def load_preselected_events( - self, chunk_size: int, obs_time: u.Quantity - ) -> tuple[QTable, int, dict]: - """ - Load and filter events from the file. - - Parameters - ---------- - chunk_size : int - Size of chunks to read from the file. - obs_time : Quantity - Observation time to scale weights. - - Returns - ------- - table : QTable - Filtered and processed event table. - n_raw_events : int - Number of events before selection. - meta : dict - Metadata dictionary with simulation info and input spectrum. - """ - - opts = dict(dl2=True, simulated=True, observation_info=True) - - with TableLoader(self.file, parent=self, **opts) as loader: - table_template = self.epp.make_empty_table() - sim_info, spectrum = self.get_simulation_information(loader, obs_time) - meta = {"sim_info": sim_info, "spectrum": spectrum} - event_chunks = [table_template] - n_raw_events = 0 - reader_func = getattr(loader, self.event_reader_function) - table_reader = reader_func(chunk_size, **opts, **self.event_reader_kwargs) - for _, _, events in table_reader: - selected = events[self.epp.quality_query.get_table_mask(events)] - selected = self.epp.normalise_column_names(selected) - if self.epp.apply_derived_columns: - selected = self.make_derived_columns(selected) - event_chunks.append(selected) - n_raw_events += len(events) - - event_chunks.append( - table_template - ) # Putting it last ensures the correct metadata is used - table = vstack(event_chunks, join_type="exact", metadata_conflicts="silent") - return table, n_raw_events, meta - - def get_simulation_information( - self, loader: TableLoader, obs_time: u.Quantity - ) -> tuple["SimulatedEventsInfo", "PowerLaw"]: - """ - Extract simulation information from the input file. - - Parameters - ---------- - loader : TableLoader - Loader object for reading from the input file. - obs_time : Quantity - Total observation time. - - Returns - ------- - sim_info : SimulatedEventsInfo - Metadata about the simulated events. - spectrum : PowerLaw - Power-law model derived from simulation configuration. - - Raises - ------ - NotImplementedError - If simulation parameters vary across runs. - """ - from ..exceptions import OptionalDependencyMissing - - if not has_pyirf: - raise OptionalDependencyMissing("pyirf") - - sim_config = loader.read_simulation_configuration() - n_showers: int = 0 - try: - shower_distribution = loader.read_shower_distribution() - n_showers = shower_distribution["n_entries"].sum() - except NoSuchNodeError: - self.log.warning( - "Simulation distributions were not found in the input files, " - "falling back to estimating the number of showers from the " - "simulation configuration." - ) - - # Some tight consistency checks. Eventually we will support using the - # arbitrary shower distribution and non-flat spatial distributions. - # Currently we do not support those, so we raise exceptions here to - # avoid that we incorrectly compute the effective area, which would have - # a high scientific impact. - for itm in [ - "spectral_index", - "energy_range_min", - "energy_range_max", - "max_scatter_range", - "max_viewcone_radius", - "min_viewcone_radius", - ]: - if len(np.unique(sim_config[itm])) > 1: - raise NotImplementedError( - f"Unsupported: '{itm}' differs across simulation runs" - ) - - n_showers_config = (sim_config["n_showers"] * sim_config["shower_reuse"]).sum() - if n_showers == 0: - n_showers = n_showers_config - - assert n_showers_config == n_showers, "Inconsistent number of simulated showers" - - # This gets the distribution assuming a power-law model, and we assume - # all merged observations have the same model. - sim_info = SimulatedEventsInfo( - n_showers=n_showers, - energy_min=sim_config["energy_range_min"].quantity[0], - energy_max=sim_config["energy_range_max"].quantity[0], - max_impact=sim_config["max_scatter_range"].quantity[0], - spectral_index=sim_config["spectral_index"][0], - viewcone_max=sim_config["max_viewcone_radius"].quantity[0], - viewcone_min=sim_config["min_viewcone_radius"].quantity[0], - ) - - return sim_info, PowerLaw.from_simulation(sim_info, obstime=obs_time) - - def make_derived_columns(self, events: QTable) -> QTable: - """ - Add derived quantities (e.g., theta, FOV offsets) to the table. - - Parameters - ---------- - events : QTable - Table containing normalized events. - - Returns - ------- - QTable - Table with added derived columns. - """ - events["theta"] = calculate_theta( - events, - assumed_source_az=events["true_az"], - assumed_source_alt=events["true_alt"], - ) - events["true_source_fov_offset"] = calculate_source_fov_offset( - events, prefix="true" - ) - events["reco_source_fov_offset"] = calculate_source_fov_offset( - events, prefix="reco" - ) - - pointing = SkyCoord( - alt=events["pointing_alt"], az=events["pointing_az"], frame=AltAz() - ) - reco = SkyCoord(alt=events["reco_alt"], az=events["reco_az"], frame=AltAz()) - nominal = NominalFrame(origin=pointing) - reco_nominal = reco.transform_to(nominal) - events["reco_fov_lon"] = u.Quantity(-reco_nominal.fov_lon) # minus for GADF - events["reco_fov_lat"] = u.Quantity(reco_nominal.fov_lat) - events["weight"] = 1.0 # defer calculation of proper weights to later - return events - - def make_event_weights( - self, - events: QTable, - spectrum: "PowerLaw", - kind: str, - fov_offset_bins: u.Quantity | None = None, - ) -> QTable: - """ - Compute event weights to match the target spectrum. - - Parameters - ---------- - events : QTable - Input events. - spectrum : PowerLaw - Spectrum from simulation. - kind : str - Type of events ("gammas", etc.). - fov_offset_bins : Quantity, optional - Offset bins for integrating the diffuse flux into point source bins. - - Returns - ------- - QTable - Table with updated weights. - - Raises - ------ - ValueError - If ``fov_offset_bins`` is required but not provided. - """ - if ( - kind == "gammas" - and self.target_spectrum.normalization.unit.is_equivalent( - POINT_SOURCE_FLUX_UNIT - ) - and spectrum.normalization.unit.is_equivalent(DIFFUSE_FLUX_UNIT) - ): - if fov_offset_bins is None: - raise ValueError( - "gamma_target_spectrum is point-like, but no fov offset bins " - "for the integration of the simulated diffuse spectrum were given." - ) - - for low, high in zip(fov_offset_bins[:-1], fov_offset_bins[1:]): - fov_mask = events["true_source_fov_offset"] >= low - fov_mask &= events["true_source_fov_offset"] < high - - events["weight"][fov_mask] = calculate_event_weights( - events[fov_mask]["true_energy"], - target_spectrum=self.target_spectrum, - simulated_spectrum=spectrum.integrate_cone(low, high), - ) - else: - events["weight"] = calculate_event_weights( - events["true_energy"], - target_spectrum=self.target_spectrum, - simulated_spectrum=spectrum, - ) - - return events diff --git a/src/ctapipe/io/event_preprocessor.py b/src/ctapipe/io/event_preprocessor.py new file mode 100644 index 00000000000..66ecb81c9a1 --- /dev/null +++ b/src/ctapipe/io/event_preprocessor.py @@ -0,0 +1,197 @@ +"""Module containing classes related to event loading and preprocessing""" + +from enum import StrEnum, auto + +from astropy.coordinates import angular_separation +from traitlets import default + +from ..coordinates import altaz_to_fov +from ..core import ( + Component, + FeatureGenerator, + QualityQuery, + ToolConfigurationError, + traits, +) + +__all__ = ["EventPreprocessor"] + + +class PreprocessorFeatureSet(StrEnum): + """Pre-defined configurations for DL2EventPreprocessor for specific use cases.""" + + custom = auto() #: use user-supplied configuration + dl2_irf = auto() #: support IRF preprocessing use case + + +class EventPreprocessor(Component): + """ + Selects or generates features and filters tables of events. + + In normal use, one only has to specify the ``feature_set`` option, which + will generate features supports standard use cases. For advanced usage, you + can set ``feature_set=custom`` and pass in a configured + `~ctapipe.core.FeatureGenerator` and set the ``features`` property of this + class with the columns you to retain in the output table. + + In the `~ctapipe.core.FeatureGenerator`` used internally, you have access to several + additional functions useful for DL2 processing: + + - `~astropy.coordinates.angular_separation` + - `~ctapipe.coordinates.alt_az_to_fov` + """ + + energy_reconstructor = traits.Unicode( + default_value="RandomForestRegressor", + help="Prefix of the reco `_energy` column", + ).tag(config=True) + + geometry_reconstructor = traits.Unicode( + default_value="HillasReconstructor", + help="Prefix of the `_alt` and `_az` reco geometry columns", + ).tag(config=True) + + gammaness_reconstructor = traits.Unicode( + default_value="RandomForestClassifier", + help="Prefix of the classifier `_prediction` column", + ).tag(config=True) + + feature_set = traits.UseEnum( + PreprocessorFeatureSet, + default_value=PreprocessorFeatureSet.dl2_irf, + help=( + "Set up the FeatureGenerator.features, output features, and quality criteria " + "based on standard use cases." + "Specify 'custom' if you want to set your own in your config file. If this is set to " + "any value other than 'custom', the feature properties of the configuration " + "file you pass in will be overridden." + ), + ) + + features = traits.List( + traits.Unicode(), + help=( + "Features (columns) to retain in the output. " + "These can include columns generated by the FeatureGenerator. " + "If you set these, make sure feature_set=custom." + ), + ).tag(config=True) + + def __init__(self, config=None, parent=None, **kwargs): + super().__init__(config=config, parent=parent, **kwargs) + if PreprocessorFeatureSet(self.feature_set) == PreprocessorFeatureSet.custom: + self.feature_generator = FeatureGenerator(parent=self) + self.quality_query = QualityQuery(parent=self) + else: + self.feature_generator = FeatureGenerator( + features=self._get_predefined_features_to_generate() + ) + self.quality_query = QualityQuery( + quality_criteria=self._get_predefined_quality_criteria() + ) + # sanity checks: + if len(self.features) == 0: + raise ToolConfigurationError( + "DL2EventPreprocessor has no output features configured." + "You have set `feature_set=custom`, but did not provide the list " + "of features in the configuration (DL2EventPreprocessor.features)." + ) + + def __call__(self, table): + """Return new table with only the columns in features.""" + + # generate new features, which includes renaming columns: + generated = self.feature_generator( + table, angular_separation=angular_separation, altaz_to_fov=altaz_to_fov + ) + + # apply event selection on the resulting table + + selected_mask = self.quality_query.get_table_mask(generated) + + # return only the columns specified in `self.features`, and rows in + # `selected_mask` + return generated[self.features][selected_mask] + + def _get_predefined_features_to_generate(self) -> list[tuple]: + """Return a default list of FeatureGenerator features.""" + if self.feature_set == PreprocessorFeatureSet.dl2_irf: + # Default features for DL2/Subarray events + return [ + ("reco_energy", f"{self.energy_reconstructor}_energy"), + ("reco_alt", f"{self.geometry_reconstructor}_alt"), + ("reco_az", f"{self.geometry_reconstructor}_az"), + ("gh_score", f"{self.gammaness_reconstructor}_prediction"), + ("theta", "angular_separation(reco_az, reco_alt, true_az, true_alt)"), + ( + "reco_fov_coord", + "altaz_to_fov(reco_az, reco_alt, subarray_pointing_lon, subarray_pointing_lat)", + ), + ("reco_fov_lon", "reco_fov_coord[:,0]"), + ("reco_fov_lat", "reco_fov_coord[:,1]"), + ( + "true_fov_coord", + "altaz_to_fov(true_az, true_alt, subarray_pointing_lon, subarray_pointing_lat)", + ), + ("true_fov_lon", "true_fov_coord[:,0]"), + ("true_fov_lat", "true_fov_coord[:,1]"), + ( + "true_fov_offset", + "angular_separation(reco_fov_lon, reco_fov_lat, 0*u.deg, 0*u.deg)", + ), + ( + "reco_fov_offset", + "angular_separation(true_fov_lon, reco_fov_lat, 0*u.deg, 0*u.deg)", + ), + ( + "multiplicity", + f"np.count_nonzero({self.gammaness_reconstructor}_telescopes,axis=1)", + ), + ] + else: + raise NotImplementedError(f"unsupported feature_set: {self.feature_set}") + + def _get_predefined_quality_criteria(self) -> list[tuple]: + """ + Set the quality criteria for a DL2FeatureSet. + + Here you can use any columns in the input table, or any that are + specified in the FeatureGenerator. + """ + if self.feature_set == PreprocessorFeatureSet.dl2_irf: + return [ + ("Valid geometry", f"{self.geometry_reconstructor}_is_valid"), + ("valid energy", f"{self.energy_reconstructor}_is_valid"), + ("valid gammaness", f"{self.gammaness_reconstructor}_is_valid"), + ("sufficient multiplicity", "multiplicity >= 4"), + ] + else: + raise NotImplementedError(f"unsupported feature_set: {self.feature_set}") + + @default("features") + def default_features(self): + """Set the columns to output, for a given FeatureSet.""" + if self.feature_set == PreprocessorFeatureSet.dl2_irf: + return [ + "event_id", + "obs_id", + "reco_energy", + "reco_alt", + "reco_az", + "gh_score", + "true_energy", + "true_alt", + "true_az", + "true_fov_offset", + "reco_fov_offset", + "theta", + "reco_fov_lat", + "true_fov_lat", + "reco_fov_lon", + "true_fov_lon", + "multiplicity", + ] + elif self.feature_set == PreprocessorFeatureSet.custom: + return [] + else: + raise NotImplementedError(f"unsupported feature_set: {self.feature_set}") diff --git a/src/ctapipe/io/tests/test_event_preprocessor.py b/src/ctapipe/io/tests/test_event_preprocessor.py new file mode 100644 index 00000000000..d3b9c631a26 --- /dev/null +++ b/src/ctapipe/io/tests/test_event_preprocessor.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python3 + +import numpy as np +import pytest +from astropy import units as u +from astropy.table import QTable + +from ctapipe.io import PreprocessorFeatureSet + + +@pytest.fixture(scope="function") +def minimal_dl2_table(): + """A dunmmy DL2 table for testing DL2EventPreprocessor""" + return QTable( + dict( + obs_id=[10, 10, 10, 10], + event_id=[1, 2, 3, 4], + true_energy=[100.0, 50.0, 2.0, 30.0] * u.TeV, + RandomForestRegressor_energy=[100.1, 49.2, 2.6, 40.0] * u.TeV, + RandomForestRegressor_is_valid=[True, True, True, True], + HillasReconstructor_az=[271.0, 271.6, 271.4, 268.1] * u.deg, + HillasReconstructor_alt=[70.1, 68.2, 69.3, 70.8] * u.deg, + HillasReconstructor_is_valid=[True, True, True, False], + RandomForestClassifier_prediction=[0.9, 0.5, 0.1, 0.3], + RandomForestClassifier_is_valid=[True, True, True, False], + true_alt=[70.0, 70.0, 70.0, 70.0] * u.deg, + true_az=[270.0, 270.0, 270.0, 270.0] * u.deg, + subarray_pointing_lat=[70.0, 70.0, 70.0, 70.0] * u.deg, + subarray_pointing_lon=[270.0, 270.0, 270.0, 270.0] * u.deg, + RandomForestClassifier_telescopes=np.array( + [ + [False, True, True, True], + [True, True, True, True], + [True, True, False, True], + [True, True, True, True], + ] + ), + ) + ) + + +@pytest.mark.parametrize("feature_set", list(PreprocessorFeatureSet)) +def test_event_preprocessing(feature_set, minimal_dl2_table): + from traitlets.config import Config + + from ctapipe.io import EventPreprocessor + + # set some custom features for the case where the feature_set==custom. + # These will be ignored in other feature_sets. + custom_config = Config() + custom_config.EventPreprocessor.features = ["obs_id", "event_id"] + table = minimal_dl2_table + + # process the table: + preprocess = EventPreprocessor(config=custom_config, feature_set=feature_set) + table_processed = preprocess(table) + + for feature in preprocess.features: + assert feature in table_processed.columns + + # check that the qualityquery worked + assert len(table_processed) <= len(table) + + +def test_no_output(): + """Check error is raised if no columns are specified for output.""" + from ctapipe.core import ToolConfigurationError + from ctapipe.io import EventPreprocessor, PreprocessorFeatureSet + + with pytest.raises(ToolConfigurationError): + EventPreprocessor(feature_set=PreprocessorFeatureSet.custom) + + +def test_nondefault_reconstructors(minimal_dl2_table): + """Check that using a different constructor than default still works""" + + from ctapipe.io import EventPreprocessor + + # define some new reconstructors, and add those columns to the test table: + geom = "ExampleGeometryReconstructor" + energy = "ExampleEnergyRegressor" + gammaness = "ExampleGammnessClassifier" + table = minimal_dl2_table + + table[f"{geom}_alt"] = ([71.1, 62.2, 61.3, 75.8] * u.deg,) + table[f"{geom}_az"] = [231.0, 231.6, 231.4, 238.1] * u.deg + table[f"{geom}_is_valid"] = [True, False, True, True] + + table[f"{energy}_energy"] = [20.0, 1.0, 0.5, 0.1] * u.TeV + table[f"{energy}_is_valid"] = [True, False, True, True] + + table[f"{gammaness}_prediction"] = [0.1, 0.8, 0.9, 0.7] + table[f"{gammaness}_is_valid"] = [True, False, True, True] + table[f"{gammaness}_telescopes"] = table["RandomForestClassifier_telescopes"] + + preprocess = EventPreprocessor( + feature_set=PreprocessorFeatureSet.dl2_irf, + geometry_reconstructor=geom, + energy_reconstructor=energy, + gammaness_reconstructor=gammaness, + ) + + table_processed = preprocess(table) + + # check that the processing worked. In this case, we check that the + # requested columns are renamed correctly and that the filtered values match + # the original values. + + mask = table["event_id"] == table_processed["event_id"] + masked = table[mask] # so that we just compare values after filtering + + assert np.allclose(table_processed["reco_energy"], masked[f"{energy}_energy"]) + assert np.allclose(table_processed["reco_az"], masked[f"{geom}_az"]) + assert np.allclose(table_processed["gh_score"], masked[f"{gammaness}_prediction"]) diff --git a/src/ctapipe/io/tests/test_preprocessing.py b/src/ctapipe/io/tests/test_preprocessing.py deleted file mode 100644 index 46879ac6350..00000000000 --- a/src/ctapipe/io/tests/test_preprocessing.py +++ /dev/null @@ -1,236 +0,0 @@ -import astropy.units as u -import numpy as np -import pytest -from astropy.table import Column, QTable, Table -from traitlets.config import Config - - -@pytest.fixture(scope="function") -def dummy_table(): - """Dummy table to test column renaming.""" - return Table( - { - "obs_id": [1, 1, 1, 2, 3, 3], - "event_id": [1, 2, 3, 1, 1, 2], - "true_energy": [0.99, 10, 0.37, 2.1, 73.4, 1] * u.TeV, - "dummy_energy": [1, 10, 0.4, 2.5, 73, 1] * u.TeV, - "classifier_prediction": [1, 0.3, 0.87, 0.93, 0, 0.1], - "true_alt": [60, 60, 60, 60, 60, 60] * u.deg, - "geom_alt": [58.5, 61.2, 59, 71.6, 60, 62] * u.deg, - "true_az": [13, 13, 13, 13, 13, 13] * u.deg, - "geom_az": [12.5, 13, 11.8, 15.1, 14.7, 12.8] * u.deg, - "subarray_pointing_frame": np.zeros(6), - "subarray_pointing_lat": np.full(6, 20) * u.deg, - "subarray_pointing_lon": np.full(6, 0) * u.deg, - } - ) - - -def test_normalise_column_names(dummy_table): - from ctapipe.io.dl2_tables_preprocessing import DL2EventPreprocessor - - output_table_schema = [ - Column(name="obs_id", dtype=np.uint64, description="Observation Block ID"), - Column(name="event_id", dtype=np.uint64, description="Array event ID"), - Column(name="true_energy", unit=u.TeV, description="Simulated energy"), - Column(name="true_az", unit=u.deg, description="Simulated azimuth"), - Column(name="true_alt", unit=u.deg, description="Simulated altitude"), - Column(name="reco_energy", unit=u.TeV, description="Reconstructed energy"), - Column(name="reco_az", unit=u.deg, description="Reconstructed azimuth"), - Column(name="reco_alt", unit=u.deg, description="Reconstructed altitude"), - Column(name="pointing_alt", unit=u.deg, description="Pointing latitude"), - Column(name="pointing_az", unit=u.deg, description="Pointing longitude"), - Column( - name="gh_score", - dtype=np.float64, - description="prediction of the classifier, defined between [0,1]," - " where values close to 1 mean that the positive class" - " (e.g. gamma in gamma-ray analysis) is more likely", - ), - ] - epp = DL2EventPreprocessor( - energy_reconstructor="dummy", - geometry_reconstructor="geom", - gammaness_classifier="classifier", - output_table_schema=output_table_schema, - ) - norm_table = epp.normalise_column_names(dummy_table) - - needed_cols = [ - "obs_id", - "event_id", - "true_energy", - "true_alt", - "true_az", - "reco_energy", - "reco_alt", - "reco_az", - "gh_score", - "pointing_alt", - "pointing_az", - ] - for c in needed_cols: - assert c in norm_table.colnames - - with pytest.raises(ValueError, match="Required column geom_alt is missing."): - dummy_table.rename_column("geom_alt", "alt_geom") - epp = DL2EventPreprocessor( - energy_reconstructor="dummy", - geometry_reconstructor="geom", - gammaness_classifier="classifier", - output_table_schema=output_table_schema, - ) - _ = epp.normalise_column_names(dummy_table) - - -def test_event_loader(gamma_diffuse_full_reco_file, irf_event_loader_test_config): - pytest.importorskip("pyirf", reason="pyirf is an optional dependency") - from pyirf.simulations import SimulatedEventsInfo - from pyirf.spectral import PowerLaw - - from ctapipe.io.dl2_tables_preprocessing import DL2EventLoader - from ctapipe.irf import Spectra - - loader = DL2EventLoader( - config=irf_event_loader_test_config, - file=gamma_diffuse_full_reco_file, - target_spectrum=Spectra.CRAB_HEGRA, - ) - events, count, meta = loader.load_preselected_events( - chunk_size=10000, - obs_time=u.Quantity(50, u.h), - ) - - columns = [ - "obs_id", - "event_id", - "true_energy", - "true_az", - "true_alt", - "reco_energy", - "reco_az", - "reco_alt", - "reco_fov_lat", - "reco_fov_lon", - "gh_score", - "pointing_az", - "pointing_alt", - "theta", - "true_source_fov_offset", - "reco_source_fov_offset", - "weight", - ] - - assert sorted(columns) == sorted(events.colnames) - assert isinstance(count, int) - assert isinstance(meta["sim_info"], SimulatedEventsInfo) - assert isinstance(meta["spectrum"], PowerLaw) - - events = loader.make_event_weights( - events, meta["spectrum"], "gammas", (0 * u.deg, 1 * u.deg) - ) - - assert "weight" in events.colnames - - -def test_preprocessor_tel_table_with_custom_reconstructor(tmp_path, test_config): - from ctapipe.io.dl2_tables_preprocessing import DL2EventPreprocessor - - # Create a test table with required columns - table = QTable( - { - "obs_id": [1, 1, 2], - "event_id": [100, 101, 102], - "tel_id": [1, 1, 1], - "ExtraTreesRegressor_tel_energy": [1.0, 2.0, 3.0] * u.TeV, - "ExtraTreesRegressor_tel_is_valid": [True, False, True], - "ExtraTreesRegressor_tel_energy_uncert": [0.1, 0.2, 0.1], - "ExtraTreesRegressor_tel_goodness_of_fit": [0.9, 0.8, 0.95], - "subarray_pointing_lat": [80.0, 80.0, 80.0] * u.deg, - "subarray_pointing_lon": [0.0, 0.0, 0.0] * u.deg, - "true_energy": [1.1, 2.1, 3.1] * u.TeV, - "true_az": [42.0, 43.0, 44.0] * u.deg, - "true_alt": [70.0, 71.0, 72.0] * u.deg, - } - ) - - # Set up config - config = test_config - - # Create preprocessor with config - preprocessor = DL2EventPreprocessor(config=Config(config)) - - # Apply quality query and preprocessing - mask = preprocessor.quality_query.get_table_mask(table) - filtered = table[mask] - - # Apply renaming and derived column generation - processed = preprocessor.normalise_column_names(filtered) - - # Check expected column names after renaming - assert "ExtraTreesRegressor_tel_energy" in processed.colnames - assert "obs_id" in processed.colnames # might exist depending on classifier config - assert "tel_id" in processed.colnames - - # Check the number of surviving rows (only valid events) - assert len(processed) == 2 - assert np.all(processed["ExtraTreesRegressor_tel_energy"] > 0 * u.TeV) - - -def test_name_overriding(dummy_table): - from ctapipe.io.dl2_tables_preprocessing import DL2EventPreprocessor - - epp = DL2EventPreprocessor( - energy_reconstructor="dummy", - geometry_reconstructor="geom", - gammaness_classifier="classifier", - columns_to_rename={"true_energy": "false_energy"}, - output_table_schema=[ - Column(name="obs_id", dtype=np.uint64, description="Observation Block ID"), - Column(name="event_id", dtype=np.uint64, description="Array event ID"), - Column(name="false_energy", unit=u.TeV, description="Simulated energy"), - Column(name="true_az", unit=u.deg, description="Simulated azimuth"), - Column(name="true_alt", unit=u.deg, description="Simulated altitude"), - Column(name="dummy_energy", unit=u.TeV, description="Reconstructed energy"), - Column(name="geom_az", unit=u.deg, description="Reconstructed azimuth"), - Column(name="geom_alt", unit=u.deg, description="Reconstructed altitude"), - Column( - name="subarray_pointing_frame", - unit=u.dimensionless_unscaled, - description="Pointing frame", - ), - Column( - name="subarray_pointing_lat", - unit=u.deg, - description="Pointing latitude", - ), - Column( - name="subarray_pointing_lon", - unit=u.deg, - description="Pointing longitude", - ), - Column( - name="classifier_prediction", - unit=u.dimensionless_unscaled, - description="prediction of the classifier, defined between [0,1]," - " where values close to 1 mean that the positive class" - " (e.g. gamma in gamma-ray analysis) is more likely", - ), - ], - ) - norm_table = epp.normalise_column_names(dummy_table) - columns = [ - "obs_id", - "event_id", - "false_energy", - "true_az", - "true_alt", - "dummy_energy", - "classifier_prediction", - "geom_alt", - "geom_az", - "subarray_pointing_frame", - "subarray_pointing_lat", - "subarray_pointing_lon", - ] - assert sorted(columns) == sorted(norm_table.colnames) diff --git a/src/ctapipe/irf/__init__.py b/src/ctapipe/irf/__init__.py index d4abdb43fac..0163133a806 100644 --- a/src/ctapipe/irf/__init__.py +++ b/src/ctapipe/irf/__init__.py @@ -31,7 +31,13 @@ PointSourceSensitivityOptimizer, ThetaPercentileCutCalculator, ) -from .spectra import ENERGY_FLUX_UNIT, FLUX_UNIT, SPECTRA, Spectra +from .spectra import ( + ENERGY_FLUX_UNIT, + FLUX_UNIT, + SPECTRA, + Spectra, + spectrum_from_simulation_config, +) __all__ = [ "AngularResolution2dMaker", @@ -53,4 +59,5 @@ "FLUX_UNIT", "check_bins_in_range", "make_bins_per_decade", + "spectrum_from_simulation_config", ] diff --git a/src/ctapipe/irf/binning.py b/src/ctapipe/irf/binning.py index 172e159b3d2..fda7fb12653 100644 --- a/src/ctapipe/irf/binning.py +++ b/src/ctapipe/irf/binning.py @@ -17,6 +17,7 @@ "DefaultTrueEnergyBins", "DefaultRecoEnergyBins", "DefaultFoVOffsetBins", + "DefaultFoVPhiBins", ] logger = logging.getLogger(__name__) @@ -179,9 +180,9 @@ class DefaultFoVOffsetBins(Component): default_value=1, ).tag(config=True) - def __init__(self, config=None, parent=None, **kwargs): - super().__init__(config=config, parent=parent, **kwargs) - self.fov_offset_bins = u.Quantity( + @property + def fov_offset_bins(self): + return u.Quantity( np.linspace( self.fov_offset_min.to_value(u.deg), self.fov_offset_max.to_value(u.deg), @@ -189,3 +190,27 @@ def __init__(self, config=None, parent=None, **kwargs): ), u.deg, ) + + +class DefaultFoVPhiBins(Component): + """ + Base class for creating IRFs with phi dependence. + + The range is always assumed to be (0, 360) deg. + """ + + fov_phi_n_bins = Integer( + help="Number of FoV offset bins", + default_value=4, + ).tag(config=True) + + @property + def fov_phi_bins(self): + return u.Quantity( + np.linspace( + 0.0 * u.deg, + 360.0 * u.deg, + self.fov_phi_n_bins + 1, + ), + u.deg, + ) diff --git a/src/ctapipe/irf/event_weighter.py b/src/ctapipe/irf/event_weighter.py new file mode 100644 index 00000000000..468485ea33b --- /dev/null +++ b/src/ctapipe/irf/event_weighter.py @@ -0,0 +1,185 @@ +#!/usr/bin/env python3 + +"""Components for computing event weights.""" + +from abc import abstractmethod +from collections.abc import Callable +from typing import override + +import numpy as np +from astropy import units as u +from astropy.table import QTable +from pyirf.spectral import ( + calculate_event_weights, +) + +from ..core import Component, traits +from ..core.feature_generator import _shallow_copy_table +from .binning import DefaultFoVOffsetBins, DefaultFoVPhiBins +from .spectra import SPECTRA, Spectra + +__all__ = [ + "EventWeighter", + "SimpleEventWeighter", + "RadialEventWeighter", + "PolarEventWeighter", +] + + +class EventWeighter(Component): + """Compute weights to go from source to target spectra.""" + + target_spectrum_name = traits.UseEnum( + Spectra, + default_value=Spectra.CRAB_HEGRA, + help="Pre-defined source spectrum to reweight to.", + ).tag(config=True) + + is_diffuse = traits.Bool( + default_value=True, help="If True, assume the source is diffuse." + ).tag(config=True) + + energy_column = traits.Unicode( + help="name of energy column", default_value="true_energy" + ).tag(config=True) + weight_column = traits.Unicode( + help="name of output weight column", default_value="weight" + ).tag(config=True) + + def __init__( + self, + source_spectrum: Callable[[u.Quantity], u.Quantity], + config=None, + parent=None, + **kwargs, + ): + """ + Parameters + ---------- + source_spectrum: Callable + initial spectrum of the events to be processed. + target_spectrum: Callable | None + target spectrum to weight to. If None, a pre-defined spectrum + function from the `target_spectrum_name` attribute will be used` + """ + super().__init__(config=config, parent=parent, **kwargs) + self.source_spectrum = source_spectrum + self.target_spectrum = SPECTRA[self.target_spectrum_name] + + @abstractmethod + def _compute_weights(self, events_table: QTable): + raise NotImplementedError( + f"{self.__class__.__name__} weighting is not implemented" + ) + + def __call__(self, events_table: QTable) -> QTable: + """Returns shallow copy of input table with a `weight` column added""" + + table = _shallow_copy_table(events_table) + self._compute_weights(table) + return table + + +class SimpleEventWeighter(EventWeighter): + """Weights all events spectrally with no spatial binning. + + Calling this class adds a column to the output table with the event-wise + spectral weights, with column name ``weight_column``. + """ + + fov_offset_max = traits.AstroQuantity( + help="upper bound of spatial integral applied to source function", + default_value=u.Quantity(10, u.deg), + physical_type=u.physical.angle, + ).tag(config=True) + + @override + def _compute_weights(self, events_table: QTable): + energy = events_table[self.energy_column] + source_spectrum = self.source_spectrum + if self.is_diffuse: + source_spectrum = source_spectrum.integrate_cone( + 0 * u.deg, self.fov_offset_max + ) + weights = calculate_event_weights( + energy, + target_spectrum=self.target_spectrum, + simulated_spectrum=source_spectrum, + ) + events_table[self.weight_column] = weights + + +class RadialEventWeighter(EventWeighter, DefaultFoVOffsetBins): + """ + Weights in radial (FOV) offset bins in the `~ctapipe.coordinates.NominalFrame`. + + Calling this class adds a column to the output table with the event-wise + spectral-spatial weights, with column name `weight_column``. This implementation + additionally adds the column ``output_table["fov_offset_bin"]``, and the + list of offset bin edges in ``output_table.meta["OFFSBINS"]`` + """ + + fov_offset_column = traits.Unicode( + help="name of FOV radial offset column", default_value="true_fov_offset" + ).tag(config=True) + + @override + def _compute_weights(self, events_table: QTable): + offset_bins = self.fov_offset_bins + offset = events_table[self.fov_offset_column].to_value(offset_bins.unit) + energy = events_table[self.energy_column] + weights = np.zeros_like(energy.value) + + # note that the bin i from digitize starts at 1 and means: + # offset_bins[i-1] <= offset < offset_bins[ii]) + r_bin = np.digitize(offset, offset_bins.value) + + for ii in range(0, len(offset_bins)): + self.log.debug( + f"bin {ii} offset=[{offset_bins[ii - 1]}, {offset_bins[ii]})" + ) + mask = r_bin == ii + weights[mask] = calculate_event_weights( + true_energy=energy[mask], + target_spectrum=self.target_spectrum, + simulated_spectrum=self.source_spectrum.integrate_cone( + offset_bins[ii - 1], offset_bins[ii] + ), + ) + + events_table[self.weight_column] = weights + events_table["fov_offset_bin"] = r_bin + + events_table.columns["fov_offset_bin"].description = ( + "Bin i defined as offset[i-1] <= fov_offset < offset[i]. " + "Where offset is `OFFSBINS` array found this table's metadata." + ) + + events_table.meta["OFFSBINS"] = list(offset_bins.to_value("deg")) + + +class PolarEventWeighter(EventWeighter, DefaultFoVOffsetBins, DefaultFoVPhiBins): + """ + Weights in field-of-view polar $(r, \\phi)$ bins in the `~ctapipe.coordinates.NominalFrame`. + + Calling this class adds a column to the output table with the event-wise + spectral-spatial weights, with column name ``weight_column``. This + implementation additionally adds the columns + ``output_table["fov_offset_bin"]``, ``output_table["fov_phi_bin"]``, and the + list of offset and phi bin edges in ``output_table.meta["OFFSBINS"]`` and + ``output_table.meta["PHIBINS"]`` respectively. + """ + + fov_offset_column = traits.Unicode( + help="name of FOV radial offset column", default_value="true_fov_offset" + ).tag(config=True) + + fov_phi_column = traits.Unicode( + help="name of the FOV azimuthal coordinate", default_value="true_fov_phi" + ).tag(config=True) + + @override + def _compute_weights(self, events_table: QTable): + raise NotImplementedError( + f"{self.__class__.__name__} weighting is not implemented" + ) diff --git a/src/ctapipe/irf/spectra.py b/src/ctapipe/irf/spectra.py index 75106112b97..0e1b8d380f3 100644 --- a/src/ctapipe/irf/spectra.py +++ b/src/ctapipe/irf/spectra.py @@ -1,22 +1,38 @@ """Definition of spectra to be used to calculate event weights for irf computation""" -from enum import Enum +import logging +from collections.abc import Callable +from enum import StrEnum import astropy.units as u -from pyirf.spectral import CRAB_HEGRA, IRFDOC_ELECTRON_SPECTRUM, IRFDOC_PROTON_SPECTRUM +import numpy as np +from astropy.table import Table +from pyirf.simulations import SimulatedEventsInfo +from pyirf.spectral import ( + CRAB_HEGRA, + IRFDOC_ELECTRON_SPECTRUM, + IRFDOC_PROTON_SPECTRUM, + PowerLaw, +) -__all__ = ["ENERGY_FLUX_UNIT", "FLUX_UNIT", "SPECTRA", "Spectra"] +__all__ = [ + "ENERGY_FLUX_UNIT", + "FLUX_UNIT", + "SPECTRA", + "Spectra", + "spectrum_from_simulation_config", +] ENERGY_FLUX_UNIT = (1 * u.erg / u.s / u.cm**2).unit FLUX_UNIT = (1 / u.erg / u.s / u.cm**2).unit -class Spectra(Enum): +class Spectra(StrEnum): """Spectra for calculating event weights""" - CRAB_HEGRA = 1 - IRFDOC_ELECTRON_SPECTRUM = 2 - IRFDOC_PROTON_SPECTRUM = 3 + CRAB_HEGRA = "CRAB_HEGRA" + IRFDOC_ELECTRON_SPECTRUM = "IRFDOC_ELECTRON_SPECTRUM" + IRFDOC_PROTON_SPECTRUM = "IRFDOC_PROTON_SPECTRUM" SPECTRA = { @@ -24,3 +40,87 @@ class Spectra(Enum): Spectra.IRFDOC_ELECTRON_SPECTRUM: IRFDOC_ELECTRON_SPECTRUM, Spectra.IRFDOC_PROTON_SPECTRUM: IRFDOC_PROTON_SPECTRUM, } + + +logger = logging.getLogger(__name__) + + +def spectrum_from_simulation_config( + simulation_configuration_table: Table, + shower_distribution_table: Table | None, + obs_time: u.Quantity, + method: str = "powerlaw", +) -> Callable[[u.Quantity], u.Quantity]: + """ + Return simulated spectrum function from configuration information.. + + Note this currently implements only PowerLaw spectra, but in the future will returnq a + + Parameters + ---------- + simulation_configuration_table: Table + table as read by TableLoader.read_simulation_configuration() + shower_distribution_table: Table + table of simulated shower distribution as read from TableLoader.read_shower_distribution() + obs_time: u.Quantity['s'] + Observation time in a unit convertible to seconds. + method: str + "powerlaw" (for assuming powerlaw distributions), or "histogram" to return an + interpolation function from the underlying distribution histogram. + + Returns + ------- + Callable: + simulated spectrum function. + """ + + if method != "powerlaw": + return NotImplementedError(f"Method '{method}' is not implemented") + + n_showers: int = 0 + if shower_distribution_table: + n_showers = shower_distribution_table["n_entries"].sum() + else: + logger.warning( + "Simulation distributions were not found in the input files, " + "falling back to estimating the number of showers from the " + "simulation configuration." + ) + + # Some tight consistency checks. Eventually we will support using the + # arbitrary shower distribution and non-flat spatial distributions. + # Currently we do not support those, so we raise exceptions here to + # avoid that we incorrectly compute the effective area, which would have + # a high scientific impact. + for itm in [ + "spectral_index", + "energy_range_min", + "energy_range_max", + "max_scatter_range", + "max_viewcone_radius", + "min_viewcone_radius", + ]: + if len(np.unique(simulation_configuration_table[itm])) > 1: + raise NotImplementedError( + f"Unsupported: '{itm}' differs across simulation runs" + ) + + n_showers_config = ( + simulation_configuration_table["n_showers"] + * simulation_configuration_table["shower_reuse"] + ).sum() + if n_showers == 0: + n_showers = n_showers_config + + assert n_showers_config == n_showers, "Inconsistent number of simulated showers" + sim_info = SimulatedEventsInfo( + n_showers=n_showers, + energy_min=simulation_configuration_table["energy_range_min"].quantity[0], + energy_max=simulation_configuration_table["energy_range_max"].quantity[0], + max_impact=simulation_configuration_table["max_scatter_range"].quantity[0], + spectral_index=simulation_configuration_table["spectral_index"][0], + viewcone_max=simulation_configuration_table["max_viewcone_radius"].quantity[0], + viewcone_min=simulation_configuration_table["min_viewcone_radius"].quantity[0], + ) + + return PowerLaw.from_simulation(sim_info, obstime=obs_time) diff --git a/src/ctapipe/irf/tests/test_binning.py b/src/ctapipe/irf/tests/test_binning.py index 4925f8855ca..f27849b0ffc 100644 --- a/src/ctapipe/irf/tests/test_binning.py +++ b/src/ctapipe/irf/tests/test_binning.py @@ -105,3 +105,14 @@ def test_fov_offset_bins_base(): assert np.isclose(binning.fov_offset_bins[0], binning.fov_offset_min, rtol=1e-9) assert np.isclose(binning.fov_offset_bins[-1], binning.fov_offset_max, rtol=1e-9) assert np.allclose(np.diff(binning.fov_offset_bins.to_value(u.deg)), 1) + + +def test_fov_phi_bins_base(): + from ctapipe.irf.binning import DefaultFoVPhiBins + + binning = DefaultFoVPhiBins(fov_offset_n_bins=4) + assert len(binning.fov_offset_bins) == 5 + assert binning.fov_offset_bins.unit == u.deg + assert np.allclose( + binning.fov_offset_bins, [0.0, 90.0, 180.0, 270.0, 360.0] * u.deg + ) diff --git a/src/ctapipe/irf/tests/test_event_weighter.py b/src/ctapipe/irf/tests/test_event_weighter.py new file mode 100644 index 00000000000..c903d59c44c --- /dev/null +++ b/src/ctapipe/irf/tests/test_event_weighter.py @@ -0,0 +1,135 @@ +#!/usr/bin/env python3 + +import numpy as np +import pytest +from astropy import units as u +from astropy.table import QTable + +from ctapipe.irf.event_weighter import RadialEventWeighter, SimpleEventWeighter + + +@pytest.fixture(scope="function") +def example_weight_table(): + table = QTable( + dict( + true_energy=[1.0, 2.0, 0.5, 0.2] * u.TeV, + true_fov_offset=[0.1, 1.2, 2.2, 3.2] * u.deg, + ) + ) + table.meta["VERSION"] = 1.0 + table.columns["true_energy"].description = "True energy of particle" + return table + + +def test_simple_weights(example_weight_table): + from ctapipe.irf.spectra import PowerLaw + + table = example_weight_table + + source_spectrum = PowerLaw( + normalization=u.Quantity(0.00027, "TeV-1 s-1 sr-1 m-2"), + index=-2.0, + e_ref=1.0 * u.TeV, + ) + + weight1 = SimpleEventWeighter( + source_spectrum=source_spectrum, target_spectrum_name="CRAB_HEGRA" + ) + + table_w1 = weight1(table) + + assert np.all(table_w1["weight"] > 0.0) + assert np.all(table_w1["weight"] <= 1.0) + + +def test_flat_weighting(example_weight_table): + """Check that if source and target spectra are the same, we get 1.0.""" + from ctapipe.irf.spectra import SPECTRA, Spectra + + table = example_weight_table + + weight = SimpleEventWeighter( + source_spectrum=SPECTRA[Spectra.CRAB_HEGRA], + target_spectrum_name=Spectra.CRAB_HEGRA.name, + is_diffuse=False, + ) + + w = weight(table)["weight"] + + assert np.allclose(w, 1.0) + + +def test_radial_weights(example_weight_table): + from ctapipe.irf.spectra import PowerLaw + + table = example_weight_table + + source_spectrum = PowerLaw( + normalization=u.Quantity(0.00027, "TeV-1 s-1 sr-1 m-2"), + index=-2.0, + e_ref=1.0 * u.TeV, + ) + + weight = RadialEventWeighter( + source_spectrum=source_spectrum, + target_spectrum_name="CRAB_HEGRA", + fov_offset_min=0.0 * u.deg, + fov_offset_max=5.0 * u.deg, + fov_offset_n_bins=5, + ) + + assert np.allclose(weight.fov_offset_bins, [0.0, 1.0, 2.0, 3.0, 4.0, 5.0] * u.deg) + + table_w = weight(table) + + assert "fov_offset_bin" in table_w.colnames + assert "OFFSBINS" in table_w.meta + assert table_w.meta["OFFSBINS"] == [0.0, 1.0, 2.0, 3.0, 4.0, 5.0] + + for ii in range(0, 7): + mask = table_w["fov_offset_bin"] == ii + + if ii == 0: + # there should be no 0th bin + assert len(table_w[mask]) == 0 + elif ii == 6: + # for outlier bin 6, weights should be all 0 + assert np.all(table_w["weight"][mask] == 0.0) + else: + # for bins 1-5,weights should be positive in this case + assert np.all(table_w["weight"][mask] >= 0.0) + + +def test_radial_weights_fits(example_weight_table, tmp_path): + """Test round-tripping to FITS""" + from astropy.table import Table + + from ctapipe.irf.spectra import PowerLaw + + table = example_weight_table + + source_spectrum = PowerLaw( + normalization=u.Quantity(0.00027, "TeV-1 s-1 sr-1 m-2"), + index=-2.0, + e_ref=1.0 * u.TeV, + ) + + weight = RadialEventWeighter( + source_spectrum=source_spectrum, + target_spectrum_name="CRAB_HEGRA", + fov_offset_min=0.0 * u.deg, + fov_offset_max=5.0 * u.deg, + fov_offset_n_bins=5, + ) + + table_w = weight(table) + table_w.write(tmp_path / "test.fits") + + assert Table.read(tmp_path / "test.fits").meta["OFFSBINS"] == [ + 0.0, + 1.0, + 2.0, + 3.0, + 4.0, + 5.0, + ]