From 66055892014ac9ecc057eb34b774ac55c5e99b16 Mon Sep 17 00:00:00 2001 From: "Georgios.Voutsinas" Date: Thu, 26 Jun 2025 21:51:22 +0200 Subject: [PATCH 01/33] test --- src/ctapipe/irf/preprocessing.py | 193 ++++++++------------ src/ctapipe/irf/tests/test_preprocessing.py | 5 + 2 files changed, 78 insertions(+), 120 deletions(-) diff --git a/src/ctapipe/irf/preprocessing.py b/src/ctapipe/irf/preprocessing.py index 21653693793..b26bff4b175 100644 --- a/src/ctapipe/irf/preprocessing.py +++ b/src/ctapipe/irf/preprocessing.py @@ -5,7 +5,7 @@ import astropy.units as u import numpy as np from astropy.coordinates import AltAz, SkyCoord -from astropy.table import Column, QTable, Table, vstack +from astropy.table import QTable, Table, vstack from pyirf.simulations import SimulatedEventsInfo from pyirf.spectral import ( DIFFUSE_FLUX_UNIT, @@ -20,7 +20,7 @@ from ..containers import CoordinateFrameType from ..coordinates import NominalFrame from ..core import Component, QualityQuery -from ..core.traits import List, Tuple, Unicode +from ..core.traits import Dict, List, Tuple, Unicode from ..io import TableLoader from .spectra import SPECTRA, Spectra @@ -52,26 +52,70 @@ class EventPreprocessor(Component): classes = [EventQualityQuery] - energy_reconstructor = Unicode( - default_value="RandomForestRegressor", - help="Prefix of the reco `_energy` column", - ).tag(config=True) + energy_reconstructor = Unicode("RandomForestRegressor").tag(config=True) + geometry_reconstructor = Unicode("HillasReconstructor").tag(config=True) + gammaness_classifier = Unicode("RandomForestClassifier").tag(config=True) - geometry_reconstructor = Unicode( - default_value="HillasReconstructor", - help="Prefix of the `_alt` and `_az` reco geometry columns", + fixed_columns = List( + Unicode(), + default_value=["obs_id", "event_id", "true_energy", "true_az", "true_alt"], + help="Columns to always keep from the original input", + ).tag(config=True) + """ + fixed_columns = List( + default_value=[ + Column(name="obs_id", dtype=np.uint64, description="Observation block ID"), + Column(name="event_id", dtype=np.uint64, description="Array event ID"), + Column(name="true_energy", unit=u.TeV, description="Simulated energy"), + Column(name="true_az", unit=u.deg, description="Simulated azimuth"), + Column(name="true_alt", unit=u.deg, description="Simulated altitude"), + Column(name="reco_energy", unit=u.TeV, description="Reconstructed energy"), + Column(name="reco_az", unit=u.deg, description="Reconstructed azimuth"), + Column(name="reco_alt", unit=u.deg, description="Reconstructed altitude"), + Column(name="reco_fov_lat", unit=u.deg, description="Reconstructed FOV lat"), + Column(name="reco_fov_lon", unit=u.deg, description="Reconstructed FOV lon"), + Column(name="gh_score", unit=u.dimensionless_unscaled, description="Classifier score"), + Column(name="pointing_az", unit=u.deg, description="Pointing azimuth"), + Column(name="pointing_alt", unit=u.deg, description="Pointing altitude"), + Column(name="theta", unit=u.deg, description="Angular offset from source"), + Column(name="true_source_fov_offset", unit=u.deg, description="True offset from pointing direction"), + Column(name="reco_source_fov_offset", unit=u.deg, description="Reconstructed offset from pointing direction"), + ], + help="Schema definition for output event QTable, including name, dtype, unit, and description", +).tag(config=True) + """ + # Optional user override + columns_to_rename_override = Dict( + key_trait=Unicode(), + value_trait=Unicode(), + default_value={}, + help="Override of columns to rename. If empty, they are generated dynamically.", ).tag(config=True) - gammaness_classifier = Unicode( - default_value="RandomForestClassifier", - help="Prefix of the classifier `_prediction` column", + output_table_schema = List( + default_value=[], + help="Schema definition for output event QTable", ).tag(config=True) def __init__(self, config=None, parent=None, **kwargs): super().__init__(config=config, parent=parent, **kwargs) self.quality_query = EventQualityQuery(parent=self) - def normalise_column_names(self, events: Table) -> QTable: + @property + def columns_to_rename(self) -> dict: + if self.columns_to_rename_override: + return self.columns_to_rename_override + + return { + f"{self.energy_reconstructor}_energy": "reco_energy", + f"{self.geometry_reconstructor}_az": "reco_az", + f"{self.geometry_reconstructor}_alt": "reco_alt", + f"{self.gammaness_classifier}_prediction": "gh_score", + "subarray_pointing_lat": "pointing_alt", + "subarray_pointing_lon": "pointing_az", + } + + def normalise_column_names(self, events: QTable) -> QTable: if events["subarray_pointing_lat"].std() > 1e-3: raise NotImplementedError( "No support for making irfs from varying pointings yet" @@ -81,121 +125,26 @@ def normalise_column_names(self, events: Table) -> QTable: "At the moment only pointing in altaz is supported." ) - keep_columns = [ - "obs_id", - "event_id", - "true_energy", - "true_az", - "true_alt", - ] - rename_from = [ - f"{self.energy_reconstructor}_energy", - f"{self.geometry_reconstructor}_az", - f"{self.geometry_reconstructor}_alt", - f"{self.gammaness_classifier}_prediction", - "subarray_pointing_lat", - "subarray_pointing_lon", - ] - rename_to = [ - "reco_energy", - "reco_az", - "reco_alt", - "gh_score", - "pointing_alt", - "pointing_az", - ] - keep_columns.extend(rename_from) - for c in keep_columns: - if c not in events.colnames: + rename_from = list(self.columns_to_rename.keys()) + rename_to = list(self.columns_to_rename.values()) + + keep_columns = self.fixed_columns + rename_from + for col in keep_columns: + if col not in events.colnames: raise ValueError( - "Input files must conform to the ctapipe DL2 data model. " - f"Required column {c} is missing." + f"Input files must conform to the ctapipe DL2 data model. " + f"Required column {col} is missing." ) events = QTable(events[keep_columns], copy=COPY_IF_NEEDED) events.rename_columns(rename_from, rename_to) + return events def make_empty_table(self) -> QTable: - """ - This function defines the columns later functions expect to be present - in the event table. - """ - columns = [ - Column(name="obs_id", dtype=np.uint64, description="Observation block ID"), - Column(name="event_id", dtype=np.uint64, description="Array event ID"), - Column( - name="true_energy", - unit=u.TeV, - description="Simulated energy", - ), - Column( - name="true_az", - unit=u.deg, - description="Simulated azimuth", - ), - Column( - name="true_alt", - unit=u.deg, - description="Simulated altitude", - ), - Column( - name="reco_energy", - unit=u.TeV, - description="Reconstructed energy", - ), - Column( - name="reco_az", - unit=u.deg, - description="Reconstructed azimuth", - ), - Column( - name="reco_alt", - unit=u.deg, - description="Reconstructed altitude", - ), - Column( - name="reco_fov_lat", - unit=u.deg, - description="Reconstructed field of view lat", - ), - Column( - name="reco_fov_lon", - unit=u.deg, - description="Reconstructed field of view lon", - ), - Column(name="pointing_az", unit=u.deg, description="Pointing azimuth"), - Column(name="pointing_alt", unit=u.deg, description="Pointing altitude"), - Column( - name="theta", - unit=u.deg, - description="Reconstructed angular offset from source position", - ), - Column( - name="true_source_fov_offset", - unit=u.deg, - description="Simulated angular offset from pointing direction", - ), - Column( - name="reco_source_fov_offset", - unit=u.deg, - description="Reconstructed angular offset from pointing direction", - ), - Column( - name="gh_score", - unit=u.dimensionless_unscaled, - description="prediction of the classifier, defined between [0,1]," - " where values close to 1 mean that the positive class" - " (e.g. gamma in gamma-ray analysis) is more likely", - ), - Column( - name="weight", - unit=u.dimensionless_unscaled, - description="Event weight", - ), - ] - - return QTable(columns) + # empty_table = self.normalise_column_namesoriginal_table[:0] + print("Do I print something here???????????", self.output_table_schema) + return QTable(self.output_table_schema) class EventLoader(Component): @@ -219,14 +168,18 @@ def load_preselected_events( opts = dict(dl2=True, simulated=True, observation_info=True) with TableLoader(self.file, parent=self, **opts) as load: header = self.epp.make_empty_table() + print("!!!!!HEADER!!!!!!", header) sim_info, spectrum = self.get_simulation_information(load, obs_time) meta = {"sim_info": sim_info, "spectrum": spectrum} bits = [header] n_raw_events = 0 for _, _, events in load.read_subarray_events_chunked(chunk_size, **opts): selected = events[self.epp.quality_query.get_table_mask(events)] + # print("selected", selected) selected = self.epp.normalise_column_names(selected) + # print("selected", selected) selected = self.make_derived_columns(selected) + # print("selected", selected) bits.append(selected) n_raw_events += len(events) diff --git a/src/ctapipe/irf/tests/test_preprocessing.py b/src/ctapipe/irf/tests/test_preprocessing.py index 71cedc22ade..0e308b8c414 100644 --- a/src/ctapipe/irf/tests/test_preprocessing.py +++ b/src/ctapipe/irf/tests/test_preprocessing.py @@ -35,8 +35,13 @@ def test_normalise_column_names(dummy_table): geometry_reconstructor="geom", gammaness_classifier="classifier", ) + + print(dummy_table) + norm_table = epp.normalise_column_names(dummy_table) + print(norm_table) + needed_cols = [ "obs_id", "event_id", From 1a685098dde9af554cfe0e78529808ad95649c4f Mon Sep 17 00:00:00 2001 From: "Georgios.Voutsinas" Date: Thu, 26 Jun 2025 22:38:10 +0200 Subject: [PATCH 02/33] first working version, ruff --- src/ctapipe/irf/preprocessing.py | 53 +++++++++++++++----------------- 1 file changed, 25 insertions(+), 28 deletions(-) diff --git a/src/ctapipe/irf/preprocessing.py b/src/ctapipe/irf/preprocessing.py index b26bff4b175..a9ea673e99b 100644 --- a/src/ctapipe/irf/preprocessing.py +++ b/src/ctapipe/irf/preprocessing.py @@ -5,7 +5,7 @@ import astropy.units as u import numpy as np from astropy.coordinates import AltAz, SkyCoord -from astropy.table import QTable, Table, vstack +from astropy.table import Column, QTable, Table, vstack from pyirf.simulations import SimulatedEventsInfo from pyirf.spectral import ( DIFFUSE_FLUX_UNIT, @@ -61,29 +61,7 @@ class EventPreprocessor(Component): default_value=["obs_id", "event_id", "true_energy", "true_az", "true_alt"], help="Columns to always keep from the original input", ).tag(config=True) - """ - fixed_columns = List( - default_value=[ - Column(name="obs_id", dtype=np.uint64, description="Observation block ID"), - Column(name="event_id", dtype=np.uint64, description="Array event ID"), - Column(name="true_energy", unit=u.TeV, description="Simulated energy"), - Column(name="true_az", unit=u.deg, description="Simulated azimuth"), - Column(name="true_alt", unit=u.deg, description="Simulated altitude"), - Column(name="reco_energy", unit=u.TeV, description="Reconstructed energy"), - Column(name="reco_az", unit=u.deg, description="Reconstructed azimuth"), - Column(name="reco_alt", unit=u.deg, description="Reconstructed altitude"), - Column(name="reco_fov_lat", unit=u.deg, description="Reconstructed FOV lat"), - Column(name="reco_fov_lon", unit=u.deg, description="Reconstructed FOV lon"), - Column(name="gh_score", unit=u.dimensionless_unscaled, description="Classifier score"), - Column(name="pointing_az", unit=u.deg, description="Pointing azimuth"), - Column(name="pointing_alt", unit=u.deg, description="Pointing altitude"), - Column(name="theta", unit=u.deg, description="Angular offset from source"), - Column(name="true_source_fov_offset", unit=u.deg, description="True offset from pointing direction"), - Column(name="reco_source_fov_offset", unit=u.deg, description="Reconstructed offset from pointing direction"), - ], - help="Schema definition for output event QTable, including name, dtype, unit, and description", -).tag(config=True) - """ + # Optional user override columns_to_rename_override = Dict( key_trait=Unicode(), @@ -93,7 +71,29 @@ class EventPreprocessor(Component): ).tag(config=True) output_table_schema = List( - default_value=[], + default_value=[ + Column(name="obs_id", dtype=np.uint64, description="Observation block ID"), + Column(name="event_id", dtype=np.uint64, description="Array event ID"), + Column(name="true_energy", unit=u.TeV, description="Simulated energy"), + Column(name="true_az", unit=u.deg, description="Simulated azimuth"), + Column(name="true_alt", unit=u.deg, description="Simulated altitude"), + Column(name="reco_energy", unit=u.TeV, description="Reconstructed energy"), + Column(name="reco_az", unit=u.deg, description="Reconstructed azimuth"), + Column(name="reco_alt", unit=u.deg, description="Reconstructed altitude"), + Column( + name="reco_fov_lat", unit=u.deg, description="Reconstructed FOV lat" + ), + Column( + name="reco_fov_lon", unit=u.deg, description="Reconstructed FOV lon" + ), + Column(name="pointing_az", unit=u.deg, description="Pointing azimuth"), + Column(name="pointing_alt", unit=u.deg, description="Pointing altitude"), + Column(name="theta", unit=u.deg, description="Angular offset from source"), + Column(name="true_source_fov_offset", unit=u.deg), + Column(name="reco_source_fov_offset", unit=u.deg), + Column(name="gh_score", unit=u.dimensionless_unscaled), + Column(name="weight", unit=u.dimensionless_unscaled), + ], help="Schema definition for output event QTable", ).tag(config=True) @@ -138,12 +138,9 @@ def normalise_column_names(self, events: QTable) -> QTable: events = QTable(events[keep_columns], copy=COPY_IF_NEEDED) events.rename_columns(rename_from, rename_to) - return events def make_empty_table(self) -> QTable: - # empty_table = self.normalise_column_namesoriginal_table[:0] - print("Do I print something here???????????", self.output_table_schema) return QTable(self.output_table_schema) From 0791398540cc13fcd789edc2a474d6f9f75370a8 Mon Sep 17 00:00:00 2001 From: "Georgios.Voutsinas" Date: Sat, 28 Jun 2025 12:27:49 +0200 Subject: [PATCH 03/33] adding/fixing tests, ruff --- src/ctapipe/irf/preprocessing.py | 132 ++++++++++------- src/ctapipe/irf/tests/test_preprocessing.py | 155 +++++++++++++++++++- 2 files changed, 232 insertions(+), 55 deletions(-) diff --git a/src/ctapipe/irf/preprocessing.py b/src/ctapipe/irf/preprocessing.py index a9ea673e99b..7605904255d 100644 --- a/src/ctapipe/irf/preprocessing.py +++ b/src/ctapipe/irf/preprocessing.py @@ -20,7 +20,7 @@ from ..containers import CoordinateFrameType from ..coordinates import NominalFrame from ..core import Component, QualityQuery -from ..core.traits import Dict, List, Tuple, Unicode +from ..core.traits import Bool, Dict, List, Tuple, Unicode from ..io import TableLoader from .spectra import SPECTRA, Spectra @@ -52,9 +52,35 @@ class EventPreprocessor(Component): classes = [EventQualityQuery] - energy_reconstructor = Unicode("RandomForestRegressor").tag(config=True) - geometry_reconstructor = Unicode("HillasReconstructor").tag(config=True) - gammaness_classifier = Unicode("RandomForestClassifier").tag(config=True) + energy_reconstructor = Unicode( + default_value="RandomForestRegressor", + help="Prefix of the reco `_energy` column", + ).tag(config=True) + + geometry_reconstructor = Unicode( + default_value="HillasReconstructor", + help="Prefix of the `_alt` and `_az` reco geometry columns", + ).tag(config=True) + + gammaness_classifier = Unicode( + default_value="RandomForestClassifier", + help="Prefix of the classifier `_prediction` column", + ).tag(config=True) + + disable_column_renaming = Bool( + default_value=False, + help="Disabling the renaming of the columns from" + "e.g. from ReconstructorNameRegressor_energy" + "to reco_energy.", + ).tag(config=True) + + apply_derived_columns = Bool( + default_value=True, help="Whether to compute derived columns" + ).tag(config=True) + + apply_check_pointing = Bool( + default_value=True, help="Accept only pointing in altaz and not divergent." + ).tag(config=True) fixed_columns = List( Unicode(), @@ -103,10 +129,10 @@ def __init__(self, config=None, parent=None, **kwargs): @property def columns_to_rename(self) -> dict: - if self.columns_to_rename_override: - return self.columns_to_rename_override + if self.disable_column_renaming: + return {} - return { + default = { f"{self.energy_reconstructor}_energy": "reco_energy", f"{self.geometry_reconstructor}_az": "reco_az", f"{self.geometry_reconstructor}_alt": "reco_alt", @@ -114,16 +140,20 @@ def columns_to_rename(self) -> dict: "subarray_pointing_lat": "pointing_alt", "subarray_pointing_lon": "pointing_az", } + return {**default, **self.columns_to_rename_override} def normalise_column_names(self, events: QTable) -> QTable: - if events["subarray_pointing_lat"].std() > 1e-3: - raise NotImplementedError( - "No support for making irfs from varying pointings yet" - ) - if any(events["subarray_pointing_frame"] != CoordinateFrameType.ALTAZ.value): - raise NotImplementedError( - "At the moment only pointing in altaz is supported." - ) + if self.apply_check_pointing: + if events["subarray_pointing_lat"].std() > 1e-3: + raise NotImplementedError( + "No support for making irfs from varying pointings yet" + ) + if any( + events["subarray_pointing_frame"] != CoordinateFrameType.ALTAZ.value + ): + raise NotImplementedError( + "At the moment only pointing in altaz is supported." + ) rename_from = list(self.columns_to_rename.keys()) rename_to = list(self.columns_to_rename.values()) @@ -137,7 +167,8 @@ def normalise_column_names(self, events: QTable) -> QTable: ) events = QTable(events[keep_columns], copy=COPY_IF_NEEDED) - events.rename_columns(rename_from, rename_to) + if rename_from and rename_to: + events.rename_columns(rename_from, rename_to) return events def make_empty_table(self) -> QTable: @@ -146,15 +177,27 @@ def make_empty_table(self) -> QTable: class EventLoader(Component): """ - Contains functions to load events and simulation information from a file - and derive some additional columns needed for irf calculation. + Component for loading events and simulation metadata, applying preselection and optional derived column logic. """ classes = [EventPreprocessor] + # User-selectable event reading function and kwargs + event_reader_function = Unicode( + default_value="read_subarray_events_chunked", + help=( + "Function of TableLoader used to read event chunks. " + "E.g., 'read_subarray_events_chunked' or 'read_telescope_events_chunked'." + ), + ).tag(config=True) + + event_reader_kwargs = Dict( + default_value={}, + help="Extra keyword arguments passed to the event reading function, e.g., {'path': '/dl2/event/telescope/Reconstructor'}", + ).tag(config=True) + def __init__(self, file: Path, target_spectrum: Spectra, **kwargs): super().__init__(**kwargs) - self.epp = EventPreprocessor(parent=self) self.target_spectrum = SPECTRA[target_spectrum] self.file = file @@ -163,41 +206,40 @@ def load_preselected_events( self, chunk_size: int, obs_time: u.Quantity ) -> tuple[QTable, int, dict]: opts = dict(dl2=True, simulated=True, observation_info=True) + with TableLoader(self.file, parent=self, **opts) as load: header = self.epp.make_empty_table() - print("!!!!!HEADER!!!!!!", header) sim_info, spectrum = self.get_simulation_information(load, obs_time) meta = {"sim_info": sim_info, "spectrum": spectrum} bits = [header] n_raw_events = 0 - for _, _, events in load.read_subarray_events_chunked(chunk_size, **opts): + reader_func = getattr(load, self.event_reader_function) + table_reader = reader_func(chunk_size, **opts, **self.event_reader_kwargs) + for _, _, events in table_reader: selected = events[self.epp.quality_query.get_table_mask(events)] - # print("selected", selected) selected = self.epp.normalise_column_names(selected) - # print("selected", selected) - selected = self.make_derived_columns(selected) - # print("selected", selected) + if self.epp.apply_derived_columns: + selected = self.make_derived_columns(selected) bits.append(selected) n_raw_events += len(events) - bits.append(header) # Putting it last ensures the correct metadata is used + bits.append(header) # Final header ensures consistent metadata table = vstack(bits, join_type="exact", metadata_conflicts="silent") return table, n_raw_events, meta def get_simulation_information( - self, loader: TableLoader, obs_time: u.Quantity + self, loader, obs_time: u.Quantity ) -> tuple[SimulatedEventsInfo, PowerLaw]: sim = loader.read_simulation_configuration() try: show = loader.read_shower_distribution() except NoSuchNodeError: - # Fall back to using the run header - show = Table([sim["n_showers"]], names=["n_entries"], dtype=[np.int64]) + show = Table([sim["n_showers"]], names=["n_entries"], dtype=[int]) for itm in ["spectral_index", "energy_range_min", "energy_range_max"]: - if len(np.unique(sim[itm])) > 1: + if len(set(sim[itm])) > 1: raise NotImplementedError( - f"Unsupported: '{itm}' differs across simulation runs" + f"Simulation parameter '{itm}' varies across runs, which is unsupported." ) sim_info = SimulatedEventsInfo( @@ -213,9 +255,7 @@ def get_simulation_information( return sim_info, PowerLaw.from_simulation(sim_info, obstime=obs_time) def make_derived_columns(self, events: QTable) -> QTable: - events["weight"] = ( - 1.0 * u.dimensionless_unscaled - ) # defer calculation of proper weights to later + events["weight"] = 1.0 * u.dimensionless_unscaled events["gh_score"].unit = u.dimensionless_unscaled events["theta"] = calculate_theta( events, @@ -229,18 +269,13 @@ def make_derived_columns(self, events: QTable) -> QTable: events, prefix="reco" ) - altaz = AltAz() pointing = SkyCoord( - alt=events["pointing_alt"], az=events["pointing_az"], frame=altaz - ) - reco = SkyCoord( - alt=events["reco_alt"], - az=events["reco_az"], - frame=altaz, + alt=events["pointing_alt"], az=events["pointing_az"], frame=AltAz() ) + reco = SkyCoord(alt=events["reco_alt"], az=events["reco_az"], frame=AltAz()) nominal = NominalFrame(origin=pointing) reco_nominal = reco.transform_to(nominal) - events["reco_fov_lon"] = u.Quantity(-reco_nominal.fov_lon) # minus for GADF + events["reco_fov_lon"] = u.Quantity(-reco_nominal.fov_lon) events["reco_fov_lat"] = u.Quantity(reco_nominal.fov_lat) return events @@ -260,16 +295,15 @@ def make_event_weights( ): if fov_offset_bins is None: raise ValueError( - "gamma_target_spectrum is point-like, but no fov offset bins " - "for the integration of the simulated diffuse spectrum were given." + "Missing FoV offset bins for point-like gamma spectrum normalization." ) for low, high in zip(fov_offset_bins[:-1], fov_offset_bins[1:]): - fov_mask = events["true_source_fov_offset"] >= low - fov_mask &= events["true_source_fov_offset"] < high - - events["weight"][fov_mask] = calculate_event_weights( - events[fov_mask]["true_energy"], + mask = (events["true_source_fov_offset"] >= low) & ( + events["true_source_fov_offset"] < high + ) + events["weight"][mask] = calculate_event_weights( + events[mask]["true_energy"], target_spectrum=self.target_spectrum, simulated_spectrum=spectrum.integrate_cone(low, high), ) diff --git a/src/ctapipe/irf/tests/test_preprocessing.py b/src/ctapipe/irf/tests/test_preprocessing.py index 0e308b8c414..a4b0040ac40 100644 --- a/src/ctapipe/irf/tests/test_preprocessing.py +++ b/src/ctapipe/irf/tests/test_preprocessing.py @@ -1,9 +1,10 @@ import astropy.units as u import numpy as np import pytest -from astropy.table import Table +from astropy.table import Column, QTable, Table from pyirf.simulations import SimulatedEventsInfo from pyirf.spectral import PowerLaw +from traitlets.config import Config @pytest.fixture(scope="module") @@ -36,12 +37,8 @@ def test_normalise_column_names(dummy_table): gammaness_classifier="classifier", ) - print(dummy_table) - norm_table = epp.normalise_column_names(dummy_table) - print(norm_table) - needed_cols = [ "obs_id", "event_id", @@ -98,8 +95,10 @@ def test_event_loader(gamma_diffuse_full_reco_file, irf_event_loader_test_config "theta", "true_source_fov_offset", "reco_source_fov_offset", + "weight", ] - assert columns.sort() == events.colnames.sort() + + assert sorted(columns) == sorted(events.colnames) assert isinstance(count, int) assert isinstance(meta["sim_info"], SimulatedEventsInfo) @@ -109,3 +108,147 @@ def test_event_loader(gamma_diffuse_full_reco_file, irf_event_loader_test_config events, meta["spectrum"], "gammas", (0 * u.deg, 1 * u.deg) ) assert "weight" in events.colnames + + +def test_preprocessor_tel_table_with_custom_reconstructor(tmp_path): + from ctapipe.irf import EventPreprocessor + + # Create a test table with required columns + table = QTable( + { + "obs_id": [1, 1, 2], + "event_id": [100, 101, 102], + "tel_id": [1, 1, 1], + "ExtraTreesRegressor_tel_energy": [1.0, 2.0, 3.0] * u.TeV, + "ExtraTreesRegressor_tel_is_valid": [True, False, True], + "ExtraTreesRegressor_tel_energy_uncert": [0.1, 0.2, 0.1], + "ExtraTreesRegressor_tel_goodness_of_fit": [0.9, 0.8, 0.95], + "subarray_pointing_lat": [80.0, 80.0, 80.0] * u.deg, + "subarray_pointing_lon": [0.0, 0.0, 0.0] * u.deg, + "true_energy": [1.1, 2.1, 3.1] * u.TeV, + "true_az": [42.0, 43.0, 44.0] * u.deg, + "true_alt": [70.0, 71.0, 72.0] * u.deg, + } + ) + + # Set up config + config = { + "EventPreprocessor": { + "energy_reconstructor": "ExtraTreesRegressor", + "fixed_columns": [ + "obs_id", + "event_id", + "tel_id", + "ExtraTreesRegressor_tel_energy", + "ExtraTreesRegressor_tel_energy_uncert", + ], + "output_table_schema": [ + Column( + name="obs_id", dtype=np.uint64, description="Observation block ID" + ), + Column(name="event_id", dtype=np.uint64, description="Array event ID"), + Column(name="tel_id", dtype=np.uint64, description="Telescope ID"), + Column( + name="ExtraTreesRegressor_tel_energy", + unit=u.TeV, + description="Reconstructed energy", + ), + Column( + name="ExtraTreesRegressor_tel_energy_uncert", + unit=u.TeV, + description="Reconstructed energy uncertainty", + ), + ], + "apply_derived_columns": False, + "disable_column_renaming": True, + "apply_check_pointing": False, + }, + "EventQualityQuery": { + "quality_criteria": [("valid reco", "ExtraTreesRegressor_tel_is_valid")] + }, + } + + # Create preprocessor with config + preprocessor = EventPreprocessor(config=Config(config)) + + # Apply quality query and preprocessing + mask = preprocessor.quality_query.get_table_mask(table) + filtered = table[mask] + + # Apply renaming and derived column generation + processed = preprocessor.normalise_column_names(filtered) + + # Check expected column names after renaming + assert "ExtraTreesRegressor_tel_energy" in processed.colnames + assert "obs_id" in processed.colnames # might exist depending on classifier config + assert "tel_id" in processed.colnames + + # Check the number of surviving rows (only valid events) + assert len(processed) == 2 + assert np.all(processed["ExtraTreesRegressor_tel_energy"] > 0 * u.TeV) + + +def test_loader_tel_table(gamma_diffuse_full_reco_file): + from ctapipe.irf import EventLoader, Spectra + + test_config = { + "EventLoader": {"event_reader_function": "read_telescope_events_chunked"}, + "EventPreprocessor": { + "energy_reconstructor": "ExtraTreesRegressor", + "fixed_columns": [ + "obs_id", + "event_id", + "tel_id", + "ExtraTreesRegressor_tel_energy", + "ExtraTreesRegressor_tel_energy_uncert", + ], + "output_table_schema": [ + Column( + name="obs_id", dtype=np.uint64, description="Observation block ID" + ), + Column(name="event_id", dtype=np.uint64, description="Array event ID"), + Column(name="tel_id", dtype=np.uint64, description="Telescope ID"), + Column( + name="ExtraTreesRegressor_tel_energy", + unit=u.TeV, + description="Reconstructed energy", + ), + Column( + name="ExtraTreesRegressor_tel_energy_uncert", + unit=u.TeV, + description="Reconstructed energy uncertainty", + ), + ], + "apply_derived_columns": False, + "disable_column_renaming": True, + "apply_check_pointing": False, + }, + "EventQualityQuery": { + "quality_criteria": [ + ("valid reco", "ExtraTreesRegressor_tel_is_valid"), + ("telescope ID", "tel_id == 35.0"), + ] + }, + } + + loader = EventLoader( + config=Config(test_config), + file=gamma_diffuse_full_reco_file, + target_spectrum=Spectra.CRAB_HEGRA, + ) + events, count, meta = loader.load_preselected_events( + chunk_size=10000, + obs_time=u.Quantity(50, u.h), + ) + + columns = [ + "obs_id", + "event_id", + "tel_id", + "ExtraTreesRegressor_tel_energy", + "ExtraTreesRegressor_tel_energy_uncert", + ] + + assert sorted(columns) == sorted(events.colnames) + assert np.all(events["ExtraTreesRegressor_tel_energy"] > 0 * u.TeV) + assert np.all(events["tel_id"] == 35.0) From 473c0a1ca9948e45a2f754d61a75d354a932a050 Mon Sep 17 00:00:00 2001 From: "Georgios.Voutsinas" Date: Mon, 30 Jun 2025 09:20:49 +0200 Subject: [PATCH 04/33] ruff --- src/ctapipe/irf/preprocessing.py | 135 ++++++++++++++++++-- src/ctapipe/irf/tests/test_preprocessing.py | 5 +- 2 files changed, 125 insertions(+), 15 deletions(-) diff --git a/src/ctapipe/irf/preprocessing.py b/src/ctapipe/irf/preprocessing.py index 7605904255d..d0bfe93d64b 100644 --- a/src/ctapipe/irf/preprocessing.py +++ b/src/ctapipe/irf/preprocessing.py @@ -143,6 +143,26 @@ def columns_to_rename(self) -> dict: return {**default, **self.columns_to_rename_override} def normalise_column_names(self, events: QTable) -> QTable: + """ + Rename column names according to configuration. + + Parameters + ---------- + events : QTable + Input event table. + + Returns + ------- + QTable + Table with selected and renamed columns. + + Raises + ------ + NotImplementedError + If pointing is not AltAz or varies too much. + ValueError + If required columns are missing. + """ if self.apply_check_pointing: if events["subarray_pointing_lat"].std() > 1e-3: raise NotImplementedError( @@ -172,6 +192,15 @@ def normalise_column_names(self, events: QTable) -> QTable: return events def make_empty_table(self) -> QTable: + """ + Create an empty event table based on the configured output schema. + + Returns + ------- + QTable + Empty event table with configured schema. + """ + return QTable(self.output_table_schema) @@ -205,6 +234,26 @@ def __init__(self, file: Path, target_spectrum: Spectra, **kwargs): def load_preselected_events( self, chunk_size: int, obs_time: u.Quantity ) -> tuple[QTable, int, dict]: + """ + Load and filter events from the file. + + Parameters + ---------- + chunk_size : int + Size of chunks to read from the file. + obs_time : Quantity + Observation time to scale weights. + + Returns + ------- + table : QTable + Filtered and processed event table. + n_raw_events : int + Number of events before selection. + meta : dict + Metadata dictionary with simulation info and input spectrum. + """ + opts = dict(dl2=True, simulated=True, observation_info=True) with TableLoader(self.file, parent=self, **opts) as load: @@ -223,23 +272,45 @@ def load_preselected_events( bits.append(selected) n_raw_events += len(events) - bits.append(header) # Final header ensures consistent metadata + bits.append(header) # Putting it last ensures the correct metadata is used table = vstack(bits, join_type="exact", metadata_conflicts="silent") return table, n_raw_events, meta def get_simulation_information( self, loader, obs_time: u.Quantity ) -> tuple[SimulatedEventsInfo, PowerLaw]: + """ + Extract simulation information from the input file. + + Parameters + ---------- + loader : TableLoader + Loader object for reading from the input file. + obs_time : Quantity + Total observation time. + + Returns + ------- + sim_info : SimulatedEventsInfo + Metadata about the simulated events. + spectrum : PowerLaw + Power-law model derived from simulation configuration. + + Raises + ------ + NotImplementedError + If simulation parameters vary across runs. + """ sim = loader.read_simulation_configuration() try: show = loader.read_shower_distribution() except NoSuchNodeError: - show = Table([sim["n_showers"]], names=["n_entries"], dtype=[int]) + show = Table([sim["n_showers"]], names=["n_entries"], dtype=[np.int64]) for itm in ["spectral_index", "energy_range_min", "energy_range_max"]: - if len(set(sim[itm])) > 1: + if len(np.unique(sim[itm])) > 1: raise NotImplementedError( - f"Simulation parameter '{itm}' varies across runs, which is unsupported." + f"Unsupported: '{itm}' differs across simulation runs" ) sim_info = SimulatedEventsInfo( @@ -255,7 +326,22 @@ def get_simulation_information( return sim_info, PowerLaw.from_simulation(sim_info, obstime=obs_time) def make_derived_columns(self, events: QTable) -> QTable: - events["weight"] = 1.0 * u.dimensionless_unscaled + """ + Add derived quantities (e.g., theta, FOV offsets) to the table. + + Parameters + ---------- + events : QTable + Table containing normalized events. + + Returns + ------- + QTable + Table with added derived columns. + """ + events["weight"] = ( + 1.0 * u.dimensionless_unscaled + ) # defer calculation of proper weights to later events["gh_score"].unit = u.dimensionless_unscaled events["theta"] = calculate_theta( events, @@ -275,7 +361,7 @@ def make_derived_columns(self, events: QTable) -> QTable: reco = SkyCoord(alt=events["reco_alt"], az=events["reco_az"], frame=AltAz()) nominal = NominalFrame(origin=pointing) reco_nominal = reco.transform_to(nominal) - events["reco_fov_lon"] = u.Quantity(-reco_nominal.fov_lon) + events["reco_fov_lon"] = u.Quantity(-reco_nominal.fov_lon) # minus for GADF events["reco_fov_lat"] = u.Quantity(reco_nominal.fov_lat) return events @@ -286,6 +372,30 @@ def make_event_weights( kind: str, fov_offset_bins: u.Quantity | None = None, ) -> QTable: + """ + Compute event weights to match the target spectrum. + + Parameters + ---------- + events : QTable + Input events. + spectrum : PowerLaw + Spectrum from simulation. + kind : str + Type of events ("gammas", etc.). + fov_offset_bins : Quantity, optional + Offset bins for integrating the diffuse flux into point source bins. + + Returns + ------- + QTable + Table with updated weights. + + Raises + ------ + ValueError + If `fov_offset_bins` is required but not provided. + """ if ( kind == "gammas" and self.target_spectrum.normalization.unit.is_equivalent( @@ -295,15 +405,16 @@ def make_event_weights( ): if fov_offset_bins is None: raise ValueError( - "Missing FoV offset bins for point-like gamma spectrum normalization." + "gamma_target_spectrum is point-like, but no fov offset bins " + "for the integration of the simulated diffuse spectrum were given." ) for low, high in zip(fov_offset_bins[:-1], fov_offset_bins[1:]): - mask = (events["true_source_fov_offset"] >= low) & ( - events["true_source_fov_offset"] < high - ) - events["weight"][mask] = calculate_event_weights( - events[mask]["true_energy"], + fov_mask = events["true_source_fov_offset"] >= low + fov_mask &= events["true_source_fov_offset"] < high + + events["weight"][fov_mask] = calculate_event_weights( + events[fov_mask]["true_energy"], target_spectrum=self.target_spectrum, simulated_spectrum=spectrum.integrate_cone(low, high), ) diff --git a/src/ctapipe/irf/tests/test_preprocessing.py b/src/ctapipe/irf/tests/test_preprocessing.py index a4b0040ac40..001aa742ef6 100644 --- a/src/ctapipe/irf/tests/test_preprocessing.py +++ b/src/ctapipe/irf/tests/test_preprocessing.py @@ -36,7 +36,6 @@ def test_normalise_column_names(dummy_table): geometry_reconstructor="geom", gammaness_classifier="classifier", ) - norm_table = epp.normalise_column_names(dummy_table) needed_cols = [ @@ -226,7 +225,7 @@ def test_loader_tel_table(gamma_diffuse_full_reco_file): "EventQualityQuery": { "quality_criteria": [ ("valid reco", "ExtraTreesRegressor_tel_is_valid"), - ("telescope ID", "tel_id == 35.0"), + ("telescope ID", "(tel_id == 35.0) | (tel_id == 19.0)"), ] }, } @@ -251,4 +250,4 @@ def test_loader_tel_table(gamma_diffuse_full_reco_file): assert sorted(columns) == sorted(events.colnames) assert np.all(events["ExtraTreesRegressor_tel_energy"] > 0 * u.TeV) - assert np.all(events["tel_id"] == 35.0) + assert np.all(np.isin(events["tel_id"], [19, 35])) From 298dc024ccc446a5cb9879befb04935dc8cd5aed Mon Sep 17 00:00:00 2001 From: "Georgios.Voutsinas" Date: Mon, 30 Jun 2025 10:49:28 +0200 Subject: [PATCH 05/33] updating docs --- docs/changes/2791.optimization.rst | 3 +++ docs/changes/2791.refactoring.rst | 3 +++ 2 files changed, 6 insertions(+) create mode 100644 docs/changes/2791.optimization.rst create mode 100644 docs/changes/2791.refactoring.rst diff --git a/docs/changes/2791.optimization.rst b/docs/changes/2791.optimization.rst new file mode 100644 index 00000000000..740dfc9466d --- /dev/null +++ b/docs/changes/2791.optimization.rst @@ -0,0 +1,3 @@ +Currently, for telescope cross calibration we have implemented our own table filtering. However similar functionality has been implemented in IRF tools. Our idea is to reuse the existing functionality from IRF tools for cross calibration. The added benefits is increased maintainability. Moreover, cross calibration memory efficiency will be improved by reading data by chunks. Finally, this can potentially benefit other usecases that load DL2 tables. + +In order to do that, we will propose some modifications in irf/preprocessing. However we should ensure that the default behaviour stays the same. diff --git a/docs/changes/2791.refactoring.rst b/docs/changes/2791.refactoring.rst new file mode 100644 index 00000000000..740dfc9466d --- /dev/null +++ b/docs/changes/2791.refactoring.rst @@ -0,0 +1,3 @@ +Currently, for telescope cross calibration we have implemented our own table filtering. However similar functionality has been implemented in IRF tools. Our idea is to reuse the existing functionality from IRF tools for cross calibration. The added benefits is increased maintainability. Moreover, cross calibration memory efficiency will be improved by reading data by chunks. Finally, this can potentially benefit other usecases that load DL2 tables. + +In order to do that, we will propose some modifications in irf/preprocessing. However we should ensure that the default behaviour stays the same. From 9aaba998f782e847b147bd6665eb0cf5904aa7cf Mon Sep 17 00:00:00 2001 From: "Georgios.Voutsinas" Date: Mon, 30 Jun 2025 15:42:11 +0200 Subject: [PATCH 06/33] fix the docs, ruff --- src/ctapipe/irf/preprocessing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ctapipe/irf/preprocessing.py b/src/ctapipe/irf/preprocessing.py index d0bfe93d64b..fe77445cc0a 100644 --- a/src/ctapipe/irf/preprocessing.py +++ b/src/ctapipe/irf/preprocessing.py @@ -394,7 +394,7 @@ def make_event_weights( Raises ------ ValueError - If `fov_offset_bins` is required but not provided. + If fov_offset_bins is required but not provided. """ if ( kind == "gammas" From 804231256037c5b224b10ed09568f4563f49cfaa Mon Sep 17 00:00:00 2001 From: "Georgios.Voutsinas" Date: Tue, 1 Jul 2025 12:33:42 +0200 Subject: [PATCH 07/33] addressing sonarqube, ruff --- src/ctapipe/irf/preprocessing.py | 2 +- src/ctapipe/irf/tests/test_preprocessing.py | 128 ++++++++------------ 2 files changed, 53 insertions(+), 77 deletions(-) diff --git a/src/ctapipe/irf/preprocessing.py b/src/ctapipe/irf/preprocessing.py index fe77445cc0a..54702729f3a 100644 --- a/src/ctapipe/irf/preprocessing.py +++ b/src/ctapipe/irf/preprocessing.py @@ -277,7 +277,7 @@ def load_preselected_events( return table, n_raw_events, meta def get_simulation_information( - self, loader, obs_time: u.Quantity + self, loader: TableLoader, obs_time: u.Quantity ) -> tuple[SimulatedEventsInfo, PowerLaw]: """ Extract simulation information from the input file. diff --git a/src/ctapipe/irf/tests/test_preprocessing.py b/src/ctapipe/irf/tests/test_preprocessing.py index 001aa742ef6..d4101f4070c 100644 --- a/src/ctapipe/irf/tests/test_preprocessing.py +++ b/src/ctapipe/irf/tests/test_preprocessing.py @@ -28,6 +28,48 @@ def dummy_table(): ) +@pytest.fixture() +def test_config(): + return { + "EventLoader": {"event_reader_function": "read_telescope_events_chunked"}, + "EventPreprocessor": { + "energy_reconstructor": "ExtraTreesRegressor", + "fixed_columns": [ + "obs_id", + "event_id", + "tel_id", + "ExtraTreesRegressor_tel_energy", + "ExtraTreesRegressor_tel_energy_uncert", + ], + "output_table_schema": [ + Column( + name="obs_id", dtype=np.uint64, description="Observation block ID" + ), + Column(name="event_id", dtype=np.uint64, description="Array event ID"), + Column(name="tel_id", dtype=np.uint64, description="Telescope ID"), + Column( + name="ExtraTreesRegressor_tel_energy", + unit=u.TeV, + description="Reconstructed energy", + ), + Column( + name="ExtraTreesRegressor_tel_energy_uncert", + unit=u.TeV, + description="Reconstructed energy uncertainty", + ), + ], + "apply_derived_columns": False, + "disable_column_renaming": True, + "apply_check_pointing": False, + }, + "EventQualityQuery": { + "quality_criteria": [ + ("valid reco", "ExtraTreesRegressor_tel_is_valid"), + ] + }, + } + + def test_normalise_column_names(dummy_table): from ctapipe.irf import EventPreprocessor @@ -106,10 +148,11 @@ def test_event_loader(gamma_diffuse_full_reco_file, irf_event_loader_test_config events = loader.make_event_weights( events, meta["spectrum"], "gammas", (0 * u.deg, 1 * u.deg) ) + assert "weight" in events.colnames -def test_preprocessor_tel_table_with_custom_reconstructor(tmp_path): +def test_preprocessor_tel_table_with_custom_reconstructor(tmp_path, test_config): from ctapipe.irf import EventPreprocessor # Create a test table with required columns @@ -131,41 +174,7 @@ def test_preprocessor_tel_table_with_custom_reconstructor(tmp_path): ) # Set up config - config = { - "EventPreprocessor": { - "energy_reconstructor": "ExtraTreesRegressor", - "fixed_columns": [ - "obs_id", - "event_id", - "tel_id", - "ExtraTreesRegressor_tel_energy", - "ExtraTreesRegressor_tel_energy_uncert", - ], - "output_table_schema": [ - Column( - name="obs_id", dtype=np.uint64, description="Observation block ID" - ), - Column(name="event_id", dtype=np.uint64, description="Array event ID"), - Column(name="tel_id", dtype=np.uint64, description="Telescope ID"), - Column( - name="ExtraTreesRegressor_tel_energy", - unit=u.TeV, - description="Reconstructed energy", - ), - Column( - name="ExtraTreesRegressor_tel_energy_uncert", - unit=u.TeV, - description="Reconstructed energy uncertainty", - ), - ], - "apply_derived_columns": False, - "disable_column_renaming": True, - "apply_check_pointing": False, - }, - "EventQualityQuery": { - "quality_criteria": [("valid reco", "ExtraTreesRegressor_tel_is_valid")] - }, - } + config = test_config # Create preprocessor with config preprocessor = EventPreprocessor(config=Config(config)) @@ -187,48 +196,15 @@ def test_preprocessor_tel_table_with_custom_reconstructor(tmp_path): assert np.all(processed["ExtraTreesRegressor_tel_energy"] > 0 * u.TeV) -def test_loader_tel_table(gamma_diffuse_full_reco_file): +def test_loader_tel_table(gamma_diffuse_full_reco_file, test_config): from ctapipe.irf import EventLoader, Spectra - test_config = { - "EventLoader": {"event_reader_function": "read_telescope_events_chunked"}, - "EventPreprocessor": { - "energy_reconstructor": "ExtraTreesRegressor", - "fixed_columns": [ - "obs_id", - "event_id", - "tel_id", - "ExtraTreesRegressor_tel_energy", - "ExtraTreesRegressor_tel_energy_uncert", - ], - "output_table_schema": [ - Column( - name="obs_id", dtype=np.uint64, description="Observation block ID" - ), - Column(name="event_id", dtype=np.uint64, description="Array event ID"), - Column(name="tel_id", dtype=np.uint64, description="Telescope ID"), - Column( - name="ExtraTreesRegressor_tel_energy", - unit=u.TeV, - description="Reconstructed energy", - ), - Column( - name="ExtraTreesRegressor_tel_energy_uncert", - unit=u.TeV, - description="Reconstructed energy uncertainty", - ), - ], - "apply_derived_columns": False, - "disable_column_renaming": True, - "apply_check_pointing": False, - }, - "EventQualityQuery": { - "quality_criteria": [ - ("valid reco", "ExtraTreesRegressor_tel_is_valid"), - ("telescope ID", "(tel_id == 35.0) | (tel_id == 19.0)"), - ] - }, - } + test_config["EventLoader"][ + "event_reader_function" + ] = "read_telescope_events_chunked" + test_config["EventQualityQuery"]["quality_criteria"].append( + ("telescope ID", "(tel_id == 35.0) | (tel_id == 19.0)"), + ) loader = EventLoader( config=Config(test_config), From 5d7b24bc4e7a2df84930c9195a9e3da947dcc6b7 Mon Sep 17 00:00:00 2001 From: "Georgios.Voutsinas" Date: Tue, 1 Jul 2025 14:23:51 +0200 Subject: [PATCH 08/33] addressing review, ruff --- src/ctapipe/irf/preprocessing.py | 28 ++++++++++++++++----- src/ctapipe/irf/tests/test_preprocessing.py | 2 +- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/src/ctapipe/irf/preprocessing.py b/src/ctapipe/irf/preprocessing.py index 54702729f3a..5aef0acdabf 100644 --- a/src/ctapipe/irf/preprocessing.py +++ b/src/ctapipe/irf/preprocessing.py @@ -98,7 +98,7 @@ class EventPreprocessor(Component): output_table_schema = List( default_value=[ - Column(name="obs_id", dtype=np.uint64, description="Observation block ID"), + Column(name="obs_id", dtype=np.uint64, description="Observation Block ID"), Column(name="event_id", dtype=np.uint64, description="Array event ID"), Column(name="true_energy", unit=u.TeV, description="Simulated energy"), Column(name="true_az", unit=u.deg, description="Simulated azimuth"), @@ -115,10 +115,26 @@ class EventPreprocessor(Component): Column(name="pointing_az", unit=u.deg, description="Pointing azimuth"), Column(name="pointing_alt", unit=u.deg, description="Pointing altitude"), Column(name="theta", unit=u.deg, description="Angular offset from source"), - Column(name="true_source_fov_offset", unit=u.deg), - Column(name="reco_source_fov_offset", unit=u.deg), - Column(name="gh_score", unit=u.dimensionless_unscaled), - Column(name="weight", unit=u.dimensionless_unscaled), + Column( + name="true_source_fov_offset", + unit=u.deg, + description="Simulated angular offset from pointing direction", + ), + Column( + name="reco_source_fov_offset", + unit=u.deg, + description="Reconstructed angular offset from pointing direction", + ), + Column( + name="gh_score", + unit=u.dimensionless_unscaled, + description="prediction of the classifier, defined between [0,1]," + " where values close to 1 mean that the positive class" + " (e.g. gamma in gamma-ray analysis) is more likely", + ), + Column( + name="weight", unit=u.dimensionless_unscaled, description="Event weight" + ), ], help="Schema definition for output event QTable", ).tag(config=True) @@ -394,7 +410,7 @@ def make_event_weights( Raises ------ ValueError - If fov_offset_bins is required but not provided. + If ``fov_offset_bins`` is required but not provided. """ if ( kind == "gammas" diff --git a/src/ctapipe/irf/tests/test_preprocessing.py b/src/ctapipe/irf/tests/test_preprocessing.py index d4101f4070c..94e9d5b30f6 100644 --- a/src/ctapipe/irf/tests/test_preprocessing.py +++ b/src/ctapipe/irf/tests/test_preprocessing.py @@ -43,7 +43,7 @@ def test_config(): ], "output_table_schema": [ Column( - name="obs_id", dtype=np.uint64, description="Observation block ID" + name="obs_id", dtype=np.uint64, description="Observation Block ID" ), Column(name="event_id", dtype=np.uint64, description="Array event ID"), Column(name="tel_id", dtype=np.uint64, description="Telescope ID"), From c07cb5256b61080cbb2456a71f78911ef1a24607 Mon Sep 17 00:00:00 2001 From: "Georgios.Voutsinas" Date: Fri, 4 Jul 2025 13:38:08 +0200 Subject: [PATCH 09/33] modifying renaming functionality --- src/ctapipe/irf/preprocessing.py | 5 ++++- src/ctapipe/irf/tests/test_preprocessing.py | 22 +++++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/src/ctapipe/irf/preprocessing.py b/src/ctapipe/irf/preprocessing.py index 5aef0acdabf..8ecd361d850 100644 --- a/src/ctapipe/irf/preprocessing.py +++ b/src/ctapipe/irf/preprocessing.py @@ -156,7 +156,10 @@ def columns_to_rename(self) -> dict: "subarray_pointing_lat": "pointing_alt", "subarray_pointing_lon": "pointing_az", } - return {**default, **self.columns_to_rename_override} + if not self.columns_to_rename_override: + return default + else: + return self.columns_to_rename_override def normalise_column_names(self, events: QTable) -> QTable: """ diff --git a/src/ctapipe/irf/tests/test_preprocessing.py b/src/ctapipe/irf/tests/test_preprocessing.py index 94e9d5b30f6..69a9bcf2634 100644 --- a/src/ctapipe/irf/tests/test_preprocessing.py +++ b/src/ctapipe/irf/tests/test_preprocessing.py @@ -227,3 +227,25 @@ def test_loader_tel_table(gamma_diffuse_full_reco_file, test_config): assert sorted(columns) == sorted(events.colnames) assert np.all(events["ExtraTreesRegressor_tel_energy"] > 0 * u.TeV) assert np.all(np.isin(events["tel_id"], [19, 35])) + + +def test_name_overriding(dummy_table): + from ctapipe.irf import EventPreprocessor + + epp = EventPreprocessor( + energy_reconstructor="dummy", + geometry_reconstructor="geom", + gammaness_classifier="classifier", + disable_column_renaming=False, + fixed_columns=["obs_id", "event_id", "true_az", "true_alt"], + columns_to_rename_override={"true_energy": "false_energy"}, + ) + norm_table = epp.normalise_column_names(dummy_table) + columns = [ + "obs_id", + "event_id", + "false_energy", + "true_az", + "true_alt", + ] + assert sorted(columns) == sorted(norm_table.colnames) From 44a66a9b96961de1c0289f198814e4a589019e9f Mon Sep 17 00:00:00 2001 From: "Georgios.Voutsinas" Date: Mon, 7 Jul 2025 18:37:19 +0200 Subject: [PATCH 10/33] Improving changelog --- docs/changes/2791.optimization.rst | 4 +--- docs/changes/2791.refactoring.rst | 3 --- 2 files changed, 1 insertion(+), 6 deletions(-) delete mode 100644 docs/changes/2791.refactoring.rst diff --git a/docs/changes/2791.optimization.rst b/docs/changes/2791.optimization.rst index 740dfc9466d..c86b57e2d82 100644 --- a/docs/changes/2791.optimization.rst +++ b/docs/changes/2791.optimization.rst @@ -1,3 +1 @@ -Currently, for telescope cross calibration we have implemented our own table filtering. However similar functionality has been implemented in IRF tools. Our idea is to reuse the existing functionality from IRF tools for cross calibration. The added benefits is increased maintainability. Moreover, cross calibration memory efficiency will be improved by reading data by chunks. Finally, this can potentially benefit other usecases that load DL2 tables. - -In order to do that, we will propose some modifications in irf/preprocessing. However we should ensure that the default behaviour stays the same. +Generalise the DL2 table processing implemented in IRF tools in order to be used by the telescope cross-calibration usecase and potentially other usecases that ingest, filter and merge DL2 tables. diff --git a/docs/changes/2791.refactoring.rst b/docs/changes/2791.refactoring.rst deleted file mode 100644 index 740dfc9466d..00000000000 --- a/docs/changes/2791.refactoring.rst +++ /dev/null @@ -1,3 +0,0 @@ -Currently, for telescope cross calibration we have implemented our own table filtering. However similar functionality has been implemented in IRF tools. Our idea is to reuse the existing functionality from IRF tools for cross calibration. The added benefits is increased maintainability. Moreover, cross calibration memory efficiency will be improved by reading data by chunks. Finally, this can potentially benefit other usecases that load DL2 tables. - -In order to do that, we will propose some modifications in irf/preprocessing. However we should ensure that the default behaviour stays the same. From 2ca592e325b5302743bf86b1ef3c7c592debc0b0 Mon Sep 17 00:00:00 2001 From: "Georgios.Voutsinas" Date: Mon, 7 Jul 2025 18:38:05 +0200 Subject: [PATCH 11/33] Improving changelog --- docs/changes/2791.optimization.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/changes/2791.optimization.rst b/docs/changes/2791.optimization.rst index c86b57e2d82..9de6f6b3a5e 100644 --- a/docs/changes/2791.optimization.rst +++ b/docs/changes/2791.optimization.rst @@ -1 +1 @@ -Generalise the DL2 table processing implemented in IRF tools in order to be used by the telescope cross-calibration usecase and potentially other usecases that ingest, filter and merge DL2 tables. +Generalise the DL2 table processing implemented in IRF tools in order to be used by other usecases that ingest, filter and merge DL2 tables. From 9042d95ca8626186e2a3e9227d9f20683f1a4edb Mon Sep 17 00:00:00 2001 From: "Georgios.Voutsinas" Date: Tue, 8 Jul 2025 21:55:55 +0200 Subject: [PATCH 12/33] Removing disable_column_renaming --- src/ctapipe/irf/preprocessing.py | 49 +++++++++------------ src/ctapipe/irf/tests/test_preprocessing.py | 5 ++- 2 files changed, 23 insertions(+), 31 deletions(-) diff --git a/src/ctapipe/irf/preprocessing.py b/src/ctapipe/irf/preprocessing.py index 8ecd361d850..c497c3b61d4 100644 --- a/src/ctapipe/irf/preprocessing.py +++ b/src/ctapipe/irf/preprocessing.py @@ -15,6 +15,7 @@ ) from pyirf.utils import calculate_source_fov_offset, calculate_theta from tables import NoSuchNodeError +from traitlets import Any from ..compat import COPY_IF_NEEDED from ..containers import CoordinateFrameType @@ -67,13 +68,6 @@ class EventPreprocessor(Component): help="Prefix of the classifier `_prediction` column", ).tag(config=True) - disable_column_renaming = Bool( - default_value=False, - help="Disabling the renaming of the columns from" - "e.g. from ReconstructorNameRegressor_energy" - "to reco_energy.", - ).tag(config=True) - apply_derived_columns = Bool( default_value=True, help="Whether to compute derived columns" ).tag(config=True) @@ -85,15 +79,16 @@ class EventPreprocessor(Component): fixed_columns = List( Unicode(), default_value=["obs_id", "event_id", "true_energy", "true_az", "true_alt"], - help="Columns to always keep from the original input", + help="Columns to keep from the input table without renaming.", ).tag(config=True) - # Optional user override - columns_to_rename_override = Dict( - key_trait=Unicode(), - value_trait=Unicode(), - default_value={}, - help="Override of columns to rename. If empty, they are generated dynamically.", + columns_to_rename_override = Any( + default_value=None, + help=( + "Dictionary of columns to rename. " + "Set to None to apply default renaming. " + "Set to {} to disable renaming entirely." + ), ).tag(config=True) output_table_schema = List( @@ -145,21 +140,17 @@ def __init__(self, config=None, parent=None, **kwargs): @property def columns_to_rename(self) -> dict: - if self.disable_column_renaming: - return {} - - default = { - f"{self.energy_reconstructor}_energy": "reco_energy", - f"{self.geometry_reconstructor}_az": "reco_az", - f"{self.geometry_reconstructor}_alt": "reco_alt", - f"{self.gammaness_classifier}_prediction": "gh_score", - "subarray_pointing_lat": "pointing_alt", - "subarray_pointing_lon": "pointing_az", - } - if not self.columns_to_rename_override: - return default - else: - return self.columns_to_rename_override + if self.columns_to_rename_override is None: + return { + f"{self.energy_reconstructor}_energy": "reco_energy", + f"{self.geometry_reconstructor}_az": "reco_az", + f"{self.geometry_reconstructor}_alt": "reco_alt", + f"{self.gammaness_classifier}_prediction": "gh_score", + "subarray_pointing_lat": "pointing_alt", + "subarray_pointing_lon": "pointing_az", + } + + return self.columns_to_rename_override def normalise_column_names(self, events: QTable) -> QTable: """ diff --git a/src/ctapipe/irf/tests/test_preprocessing.py b/src/ctapipe/irf/tests/test_preprocessing.py index 69a9bcf2634..0e77e17d47b 100644 --- a/src/ctapipe/irf/tests/test_preprocessing.py +++ b/src/ctapipe/irf/tests/test_preprocessing.py @@ -34,6 +34,7 @@ def test_config(): "EventLoader": {"event_reader_function": "read_telescope_events_chunked"}, "EventPreprocessor": { "energy_reconstructor": "ExtraTreesRegressor", + "gammaness_classifier": "ExtraTreesClassifier", "fixed_columns": [ "obs_id", "event_id", @@ -41,6 +42,7 @@ def test_config(): "ExtraTreesRegressor_tel_energy", "ExtraTreesRegressor_tel_energy_uncert", ], + "columns_to_rename_override": {}, "output_table_schema": [ Column( name="obs_id", dtype=np.uint64, description="Observation Block ID" @@ -59,7 +61,7 @@ def test_config(): ), ], "apply_derived_columns": False, - "disable_column_renaming": True, + # "disable_column_renaming": True, "apply_check_pointing": False, }, "EventQualityQuery": { @@ -236,7 +238,6 @@ def test_name_overriding(dummy_table): energy_reconstructor="dummy", geometry_reconstructor="geom", gammaness_classifier="classifier", - disable_column_renaming=False, fixed_columns=["obs_id", "event_id", "true_az", "true_alt"], columns_to_rename_override={"true_energy": "false_energy"}, ) From 4661647a2a931a40f3d7622fe1417fa937cc6dc8 Mon Sep 17 00:00:00 2001 From: "Georgios.Voutsinas" Date: Wed, 9 Jul 2025 16:37:45 +0200 Subject: [PATCH 13/33] addressing review --- ...2791.optimization.rst => 2791.feature.rst} | 0 src/ctapipe/irf/preprocessing.py | 40 +++++++++---------- src/ctapipe/irf/tests/test_preprocessing.py | 4 +- 3 files changed, 22 insertions(+), 22 deletions(-) rename docs/changes/{2791.optimization.rst => 2791.feature.rst} (100%) diff --git a/docs/changes/2791.optimization.rst b/docs/changes/2791.feature.rst similarity index 100% rename from docs/changes/2791.optimization.rst rename to docs/changes/2791.feature.rst diff --git a/src/ctapipe/irf/preprocessing.py b/src/ctapipe/irf/preprocessing.py index c497c3b61d4..6b5290c8686 100644 --- a/src/ctapipe/irf/preprocessing.py +++ b/src/ctapipe/irf/preprocessing.py @@ -15,7 +15,7 @@ ) from pyirf.utils import calculate_source_fov_offset, calculate_theta from tables import NoSuchNodeError -from traitlets import Any +from traitlets import default from ..compat import COPY_IF_NEEDED from ..containers import CoordinateFrameType @@ -82,12 +82,14 @@ class EventPreprocessor(Component): help="Columns to keep from the input table without renaming.", ).tag(config=True) - columns_to_rename_override = Any( - default_value=None, + columns_to_rename = Dict( + key_trait=Unicode(), + value_trait=Unicode(), help=( "Dictionary of columns to rename. " - "Set to None to apply default renaming. " - "Set to {} to disable renaming entirely." + "Leave unset to apply default renaming. " + "Set to an empty dictionary to disable renaming entirely. " + "Set to a partial dictionary to override only some names." ), ).tag(config=True) @@ -138,19 +140,16 @@ def __init__(self, config=None, parent=None, **kwargs): super().__init__(config=config, parent=parent, **kwargs) self.quality_query = EventQualityQuery(parent=self) - @property - def columns_to_rename(self) -> dict: - if self.columns_to_rename_override is None: - return { - f"{self.energy_reconstructor}_energy": "reco_energy", - f"{self.geometry_reconstructor}_az": "reco_az", - f"{self.geometry_reconstructor}_alt": "reco_alt", - f"{self.gammaness_classifier}_prediction": "gh_score", - "subarray_pointing_lat": "pointing_alt", - "subarray_pointing_lon": "pointing_az", - } - - return self.columns_to_rename_override + @default("columns_to_rename") + def _default_columns_to_rename(self): + return { + f"{self.energy_reconstructor}_energy": "reco_energy", + f"{self.geometry_reconstructor}_az": "reco_az", + f"{self.geometry_reconstructor}_alt": "reco_alt", + f"{self.gammaness_classifier}_prediction": "gh_score", + "subarray_pointing_lat": "pointing_alt", + "subarray_pointing_lon": "pointing_az", + } def normalise_column_names(self, events: QTable) -> QTable: """ @@ -185,8 +184,9 @@ def normalise_column_names(self, events: QTable) -> QTable: "At the moment only pointing in altaz is supported." ) - rename_from = list(self.columns_to_rename.keys()) - rename_to = list(self.columns_to_rename.values()) + rename_dict = self.columns_to_rename + rename_from = list(rename_dict.keys()) + rename_to = list(rename_dict.values()) keep_columns = self.fixed_columns + rename_from for col in keep_columns: diff --git a/src/ctapipe/irf/tests/test_preprocessing.py b/src/ctapipe/irf/tests/test_preprocessing.py index 0e77e17d47b..79eb1995d65 100644 --- a/src/ctapipe/irf/tests/test_preprocessing.py +++ b/src/ctapipe/irf/tests/test_preprocessing.py @@ -42,7 +42,7 @@ def test_config(): "ExtraTreesRegressor_tel_energy", "ExtraTreesRegressor_tel_energy_uncert", ], - "columns_to_rename_override": {}, + "columns_to_rename": {}, "output_table_schema": [ Column( name="obs_id", dtype=np.uint64, description="Observation Block ID" @@ -239,7 +239,7 @@ def test_name_overriding(dummy_table): geometry_reconstructor="geom", gammaness_classifier="classifier", fixed_columns=["obs_id", "event_id", "true_az", "true_alt"], - columns_to_rename_override={"true_energy": "false_energy"}, + columns_to_rename={"true_energy": "false_energy"}, ) norm_table = epp.normalise_column_names(dummy_table) columns = [ From dd4902a5e10522a470991269dae8a3f00677f769 Mon Sep 17 00:00:00 2001 From: "Georgios.Voutsinas" Date: Tue, 15 Jul 2025 11:00:21 +0200 Subject: [PATCH 14/33] simpifying the table construction --- src/ctapipe/conftest.py | 19 ++-- src/ctapipe/irf/preprocessing.py | 111 ++++++++++++-------- src/ctapipe/irf/tests/test_preprocessing.py | 73 +++++++++++-- 3 files changed, 139 insertions(+), 64 deletions(-) diff --git a/src/ctapipe/conftest.py b/src/ctapipe/conftest.py index 57f0a618c1e..195f4da940e 100644 --- a/src/ctapipe/conftest.py +++ b/src/ctapipe/conftest.py @@ -832,16 +832,21 @@ def irf_events_table(): names=bulk, units={c: tab[c].unit for c in bulk}, ) - # Setting values following pyirf test in pyirf/irf/tests/test_background.py - bulk_tab["reco_energy"] = np.append(np.full(N1, 1), np.full(N2, 2)) * u.TeV - bulk_tab["true_energy"] = np.append(np.full(N1, 0.9), np.full(N2, 2.1)) * u.TeV - bulk_tab["reco_source_fov_offset"] = ( - np.append(np.full(N1, 0.1), np.full(N2, 0.05)) * u.deg + bulk_tab.replace_column( + "reco_energy", np.append(np.full(N1, 1), np.full(N2, 2)) * u.TeV + ) + bulk_tab.replace_column( + "true_energy", np.append(np.full(N1, 0.9), np.full(N2, 2.1)) * u.TeV ) - bulk_tab["true_source_fov_offset"] = ( - np.append(np.full(N1, 0.11), np.full(N2, 0.04)) * u.deg + bulk_tab.replace_column( + "reco_source_fov_offset", np.append(np.full(N1, 0.1), np.full(N2, 0.05)) * u.deg ) + bulk_tab.replace_column( + "true_source_fov_offset", + np.append(np.full(N1, 0.11), np.full(N2, 0.04)) * u.deg, + ) + for name in unitless: bulk_tab.add_column( Column(name=name, unit=tab[name].unit, data=np.zeros(N) * np.nan) diff --git a/src/ctapipe/irf/preprocessing.py b/src/ctapipe/irf/preprocessing.py index 6b5290c8686..728515c63b5 100644 --- a/src/ctapipe/irf/preprocessing.py +++ b/src/ctapipe/irf/preprocessing.py @@ -76,12 +76,6 @@ class EventPreprocessor(Component): default_value=True, help="Accept only pointing in altaz and not divergent." ).tag(config=True) - fixed_columns = List( - Unicode(), - default_value=["obs_id", "event_id", "true_energy", "true_az", "true_alt"], - help="Columns to keep from the input table without renaming.", - ).tag(config=True) - columns_to_rename = Dict( key_trait=Unicode(), value_trait=Unicode(), @@ -103,35 +97,8 @@ class EventPreprocessor(Component): Column(name="reco_energy", unit=u.TeV, description="Reconstructed energy"), Column(name="reco_az", unit=u.deg, description="Reconstructed azimuth"), Column(name="reco_alt", unit=u.deg, description="Reconstructed altitude"), - Column( - name="reco_fov_lat", unit=u.deg, description="Reconstructed FOV lat" - ), - Column( - name="reco_fov_lon", unit=u.deg, description="Reconstructed FOV lon" - ), Column(name="pointing_az", unit=u.deg, description="Pointing azimuth"), Column(name="pointing_alt", unit=u.deg, description="Pointing altitude"), - Column(name="theta", unit=u.deg, description="Angular offset from source"), - Column( - name="true_source_fov_offset", - unit=u.deg, - description="Simulated angular offset from pointing direction", - ), - Column( - name="reco_source_fov_offset", - unit=u.deg, - description="Reconstructed angular offset from pointing direction", - ), - Column( - name="gh_score", - unit=u.dimensionless_unscaled, - description="prediction of the classifier, defined between [0,1]," - " where values close to 1 mean that the positive class" - " (e.g. gamma in gamma-ray analysis) is more likely", - ), - Column( - name="weight", unit=u.dimensionless_unscaled, description="Event weight" - ), ], help="Schema definition for output event QTable", ).tag(config=True) @@ -184,19 +151,24 @@ def normalise_column_names(self, events: QTable) -> QTable: "At the moment only pointing in altaz is supported." ) + columns_to_keep = [] + for col in self.output_table_schema: + columns_to_keep.append(col.name) + rename_dict = self.columns_to_rename rename_from = list(rename_dict.keys()) rename_to = list(rename_dict.values()) - keep_columns = self.fixed_columns + rename_from - for col in keep_columns: + fixed_columns = list(set(columns_to_keep) - set(rename_to)) + columns_to_read = fixed_columns + rename_from + for col in columns_to_read: if col not in events.colnames: raise ValueError( f"Input files must conform to the ctapipe DL2 data model. " f"Required column {col} is missing." ) - events = QTable(events[keep_columns], copy=COPY_IF_NEEDED) + events = QTable(events[columns_to_read], copy=COPY_IF_NEEDED) if rename_from and rename_to: events.rename_columns(rename_from, rename_to) return events @@ -204,14 +176,60 @@ def normalise_column_names(self, events: QTable) -> QTable: def make_empty_table(self) -> QTable: """ Create an empty event table based on the configured output schema. - - Returns - ------- - QTable - Empty event table with configured schema. """ + schema = list(self.output_table_schema) # make a copy! + + if self.apply_derived_columns: + schema.extend( + [ + Column( + name="reco_fov_lat", + unit=u.deg, + description="Reconstructed FOV lat", + ), + Column( + name="reco_fov_lon", + unit=u.deg, + description="Reconstructed FOV lon", + ), + Column( + name="theta", + unit=u.deg, + description="Angular offset from source", + ), + Column( + name="true_source_fov_offset", + unit=u.deg, + description="Simulated angular offset from pointing direction", + ), + Column( + name="reco_source_fov_offset", + unit=u.deg, + description="Reconstructed angular offset from pointing direction", + ), + Column( + name="gh_score", + unit=u.dimensionless_unscaled, + description="prediction of the classifier, defined between [0,1]," + " where values close to 1 mean that the positive class" + " (e.g. gamma in gamma-ray analysis) is more likely", + ), + Column( + name="weight", + unit=u.dimensionless_unscaled, + description="Event weight", + ), + ] + ) - return QTable(self.output_table_schema) + return QTable( + names=[col.name for col in schema], + dtype=[col.dtype for col in schema], + units=[col.unit for col in schema] + if any(col.unit for col in schema) + else None, + meta={}, + ) class EventLoader(Component): @@ -278,6 +296,7 @@ def load_preselected_events( selected = events[self.epp.quality_query.get_table_mask(events)] selected = self.epp.normalise_column_names(selected) if self.epp.apply_derived_columns: + # header = self.make_derived_columns(header) selected = self.make_derived_columns(selected) bits.append(selected) n_raw_events += len(events) @@ -349,10 +368,6 @@ def make_derived_columns(self, events: QTable) -> QTable: QTable Table with added derived columns. """ - events["weight"] = ( - 1.0 * u.dimensionless_unscaled - ) # defer calculation of proper weights to later - events["gh_score"].unit = u.dimensionless_unscaled events["theta"] = calculate_theta( events, assumed_source_az=events["true_az"], @@ -373,6 +388,10 @@ def make_derived_columns(self, events: QTable) -> QTable: reco_nominal = reco.transform_to(nominal) events["reco_fov_lon"] = u.Quantity(-reco_nominal.fov_lon) # minus for GADF events["reco_fov_lat"] = u.Quantity(reco_nominal.fov_lat) + events["weight"] = ( + 1.0 * u.dimensionless_unscaled + ) # defer calculation of proper weights to later + events["gh_score"].unit = u.dimensionless_unscaled return events def make_event_weights( diff --git a/src/ctapipe/irf/tests/test_preprocessing.py b/src/ctapipe/irf/tests/test_preprocessing.py index 79eb1995d65..98420a55a0b 100644 --- a/src/ctapipe/irf/tests/test_preprocessing.py +++ b/src/ctapipe/irf/tests/test_preprocessing.py @@ -7,7 +7,7 @@ from traitlets.config import Config -@pytest.fixture(scope="module") +@pytest.fixture(scope="function") def dummy_table(): """Dummy table to test column renaming.""" return Table( @@ -28,20 +28,13 @@ def dummy_table(): ) -@pytest.fixture() +@pytest.fixture(scope="function") def test_config(): return { "EventLoader": {"event_reader_function": "read_telescope_events_chunked"}, "EventPreprocessor": { "energy_reconstructor": "ExtraTreesRegressor", "gammaness_classifier": "ExtraTreesClassifier", - "fixed_columns": [ - "obs_id", - "event_id", - "tel_id", - "ExtraTreesRegressor_tel_energy", - "ExtraTreesRegressor_tel_energy_uncert", - ], "columns_to_rename": {}, "output_table_schema": [ Column( @@ -75,10 +68,30 @@ def test_config(): def test_normalise_column_names(dummy_table): from ctapipe.irf import EventPreprocessor + output_table_schema = [ + Column(name="obs_id", dtype=np.uint64, description="Observation Block ID"), + Column(name="event_id", dtype=np.uint64, description="Array event ID"), + Column(name="true_energy", unit=u.TeV, description="Simulated energy"), + Column(name="true_az", unit=u.deg, description="Simulated azimuth"), + Column(name="true_alt", unit=u.deg, description="Simulated altitude"), + Column(name="reco_energy", unit=u.TeV, description="Reconstructed energy"), + Column(name="reco_az", unit=u.deg, description="Reconstructed azimuth"), + Column(name="reco_alt", unit=u.deg, description="Reconstructed altitude"), + Column(name="pointing_alt", unit=u.deg, description="Pointing latitude"), + Column(name="pointing_az", unit=u.deg, description="Pointing longitude"), + Column( + name="gh_score", + unit=u.dimensionless_unscaled, + description="prediction of the classifier, defined between [0,1]," + " where values close to 1 mean that the positive class" + " (e.g. gamma in gamma-ray analysis) is more likely", + ), + ] epp = EventPreprocessor( energy_reconstructor="dummy", geometry_reconstructor="geom", gammaness_classifier="classifier", + output_table_schema=output_table_schema, ) norm_table = epp.normalise_column_names(dummy_table) @@ -104,6 +117,7 @@ def test_normalise_column_names(dummy_table): energy_reconstructor="dummy", geometry_reconstructor="geom", gammaness_classifier="classifier", + output_table_schema=output_table_schema, ) _ = epp.normalise_column_names(dummy_table) @@ -142,7 +156,6 @@ def test_event_loader(gamma_diffuse_full_reco_file, irf_event_loader_test_config ] assert sorted(columns) == sorted(events.colnames) - assert isinstance(count, int) assert isinstance(meta["sim_info"], SimulatedEventsInfo) assert isinstance(meta["spectrum"], PowerLaw) @@ -238,8 +251,39 @@ def test_name_overriding(dummy_table): energy_reconstructor="dummy", geometry_reconstructor="geom", gammaness_classifier="classifier", - fixed_columns=["obs_id", "event_id", "true_az", "true_alt"], columns_to_rename={"true_energy": "false_energy"}, + output_table_schema=[ + Column(name="obs_id", dtype=np.uint64, description="Observation Block ID"), + Column(name="event_id", dtype=np.uint64, description="Array event ID"), + Column(name="false_energy", unit=u.TeV, description="Simulated energy"), + Column(name="true_az", unit=u.deg, description="Simulated azimuth"), + Column(name="true_alt", unit=u.deg, description="Simulated altitude"), + Column(name="dummy_energy", unit=u.TeV, description="Reconstructed energy"), + Column(name="geom_az", unit=u.deg, description="Reconstructed azimuth"), + Column(name="geom_alt", unit=u.deg, description="Reconstructed altitude"), + Column( + name="subarray_pointing_frame", + unit=u.dimensionless_unscaled, + description="Pointing frame", + ), + Column( + name="subarray_pointing_lat", + unit=u.deg, + description="Pointing latitude", + ), + Column( + name="subarray_pointing_lon", + unit=u.deg, + description="Pointing longitude", + ), + Column( + name="classifier_prediction", + unit=u.dimensionless_unscaled, + description="prediction of the classifier, defined between [0,1]," + " where values close to 1 mean that the positive class" + " (e.g. gamma in gamma-ray analysis) is more likely", + ), + ], ) norm_table = epp.normalise_column_names(dummy_table) columns = [ @@ -248,5 +292,12 @@ def test_name_overriding(dummy_table): "false_energy", "true_az", "true_alt", + "dummy_energy", + "classifier_prediction", + "geom_alt", + "geom_az", + "subarray_pointing_frame", + "subarray_pointing_lat", + "subarray_pointing_lon", ] assert sorted(columns) == sorted(norm_table.colnames) From 42f58d2dd7e715fc51d3203ea30f57cc32a7fb8b Mon Sep 17 00:00:00 2001 From: "Georgios.Voutsinas" Date: Tue, 15 Jul 2025 12:00:54 +0200 Subject: [PATCH 15/33] addressing review --- src/ctapipe/irf/preprocessing.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/ctapipe/irf/preprocessing.py b/src/ctapipe/irf/preprocessing.py index 728515c63b5..1a26ac8333d 100644 --- a/src/ctapipe/irf/preprocessing.py +++ b/src/ctapipe/irf/preprocessing.py @@ -151,9 +151,7 @@ def normalise_column_names(self, events: QTable) -> QTable: "At the moment only pointing in altaz is supported." ) - columns_to_keep = [] - for col in self.output_table_schema: - columns_to_keep.append(col.name) + columns_to_keep = [col.name for col in self.output_table_schema] rename_dict = self.columns_to_rename rename_from = list(rename_dict.keys()) @@ -177,7 +175,9 @@ def make_empty_table(self) -> QTable: """ Create an empty event table based on the configured output schema. """ - schema = list(self.output_table_schema) # make a copy! + schema = list( + self.output_table_schema + ) # make a shallow copy to extend the schema with derived columns if self.apply_derived_columns: schema.extend( @@ -296,7 +296,6 @@ def load_preselected_events( selected = events[self.epp.quality_query.get_table_mask(events)] selected = self.epp.normalise_column_names(selected) if self.epp.apply_derived_columns: - # header = self.make_derived_columns(header) selected = self.make_derived_columns(selected) bits.append(selected) n_raw_events += len(events) From 36f3eebdff4abe8247e1d7118ea01e43049991c3 Mon Sep 17 00:00:00 2001 From: "Georgios.Voutsinas" Date: Wed, 16 Jul 2025 12:11:54 +0200 Subject: [PATCH 16/33] relocating preprocessing to io --- .../dl2_tables_preprocessing.py} | 3 ++- src/ctapipe/{irf => io}/tests/test_preprocessing.py | 12 +++++++----- src/ctapipe/irf/__init__.py | 3 ++- src/ctapipe/irf/optimize.py | 3 ++- src/ctapipe/tools/compute_irf.py | 3 ++- 5 files changed, 15 insertions(+), 9 deletions(-) rename src/ctapipe/{irf/preprocessing.py => io/dl2_tables_preprocessing.py} (99%) rename src/ctapipe/{irf => io}/tests/test_preprocessing.py (96%) diff --git a/src/ctapipe/irf/preprocessing.py b/src/ctapipe/io/dl2_tables_preprocessing.py similarity index 99% rename from src/ctapipe/irf/preprocessing.py rename to src/ctapipe/io/dl2_tables_preprocessing.py index 1a26ac8333d..e54f90ff495 100644 --- a/src/ctapipe/irf/preprocessing.py +++ b/src/ctapipe/io/dl2_tables_preprocessing.py @@ -17,13 +17,14 @@ from tables import NoSuchNodeError from traitlets import default +from ctapipe.irf.spectra import SPECTRA, Spectra + from ..compat import COPY_IF_NEEDED from ..containers import CoordinateFrameType from ..coordinates import NominalFrame from ..core import Component, QualityQuery from ..core.traits import Bool, Dict, List, Tuple, Unicode from ..io import TableLoader -from .spectra import SPECTRA, Spectra __all__ = ["EventLoader", "EventPreprocessor", "EventQualityQuery"] diff --git a/src/ctapipe/irf/tests/test_preprocessing.py b/src/ctapipe/io/tests/test_preprocessing.py similarity index 96% rename from src/ctapipe/irf/tests/test_preprocessing.py rename to src/ctapipe/io/tests/test_preprocessing.py index 98420a55a0b..a9a9e1af3be 100644 --- a/src/ctapipe/irf/tests/test_preprocessing.py +++ b/src/ctapipe/io/tests/test_preprocessing.py @@ -66,7 +66,7 @@ def test_config(): def test_normalise_column_names(dummy_table): - from ctapipe.irf import EventPreprocessor + from ctapipe.io.dl2_tables_preprocessing import EventPreprocessor output_table_schema = [ Column(name="obs_id", dtype=np.uint64, description="Observation Block ID"), @@ -123,7 +123,8 @@ def test_normalise_column_names(dummy_table): def test_event_loader(gamma_diffuse_full_reco_file, irf_event_loader_test_config): - from ctapipe.irf import EventLoader, Spectra + from ctapipe.io.dl2_tables_preprocessing import EventLoader + from ctapipe.irf import Spectra loader = EventLoader( config=irf_event_loader_test_config, @@ -168,7 +169,7 @@ def test_event_loader(gamma_diffuse_full_reco_file, irf_event_loader_test_config def test_preprocessor_tel_table_with_custom_reconstructor(tmp_path, test_config): - from ctapipe.irf import EventPreprocessor + from ctapipe.io.dl2_tables_preprocessing import EventPreprocessor # Create a test table with required columns table = QTable( @@ -212,7 +213,8 @@ def test_preprocessor_tel_table_with_custom_reconstructor(tmp_path, test_config) def test_loader_tel_table(gamma_diffuse_full_reco_file, test_config): - from ctapipe.irf import EventLoader, Spectra + from ctapipe.io.dl2_tables_preprocessing import EventLoader + from ctapipe.irf import Spectra test_config["EventLoader"][ "event_reader_function" @@ -245,7 +247,7 @@ def test_loader_tel_table(gamma_diffuse_full_reco_file, test_config): def test_name_overriding(dummy_table): - from ctapipe.irf import EventPreprocessor + from ctapipe.io.dl2_tables_preprocessing import EventPreprocessor epp = EventPreprocessor( energy_reconstructor="dummy", diff --git a/src/ctapipe/irf/__init__.py b/src/ctapipe/irf/__init__.py index dbbd812e1c1..83cbf973cc6 100644 --- a/src/ctapipe/irf/__init__.py +++ b/src/ctapipe/irf/__init__.py @@ -8,6 +8,8 @@ raise OptionalDependencyMissing("pyirf") from None +from ctapipe.io.dl2_tables_preprocessing import EventLoader, EventPreprocessor + from .benchmarks import ( AngularResolution2dMaker, EnergyBiasResolution2dMaker, @@ -31,7 +33,6 @@ PointSourceSensitivityOptimizer, ThetaPercentileCutCalculator, ) -from .preprocessing import EventLoader, EventPreprocessor from .spectra import ENERGY_FLUX_UNIT, FLUX_UNIT, SPECTRA, Spectra __all__ = [ diff --git a/src/ctapipe/irf/optimize.py b/src/ctapipe/irf/optimize.py index fd8f9e9ad52..02a1482eb05 100644 --- a/src/ctapipe/irf/optimize.py +++ b/src/ctapipe/irf/optimize.py @@ -11,10 +11,11 @@ from pyirf.cut_optimization import optimize_gh_cut from pyirf.cuts import calculate_percentile_cut, evaluate_binned_cut +from ctapipe.io.dl2_tables_preprocessing import EventQualityQuery + from ..core import Component, QualityQuery from ..core.traits import AstroQuantity, Float, Integer, Path from .binning import DefaultRecoEnergyBins, ResultValidRange -from .preprocessing import EventQualityQuery __all__ = [ "CutOptimizerBase", diff --git a/src/ctapipe/tools/compute_irf.py b/src/ctapipe/tools/compute_irf.py index f9465293ec7..471cc37e898 100644 --- a/src/ctapipe/tools/compute_irf.py +++ b/src/ctapipe/tools/compute_irf.py @@ -17,6 +17,8 @@ from pyirf.cuts import evaluate_binned_cut from pyirf.io import create_rad_max_hdu +from ctapipe.io.dl2_tables_preprocessing import EventQualityQuery + from ..core import Provenance, Tool, ToolConfigurationError, traits from ..core.traits import AstroQuantity, Bool, Integer, classes_with_traits, flag from ..irf import ( @@ -36,7 +38,6 @@ EnergyDispersionMakerBase, PSFMakerBase, ) -from ..irf.preprocessing import EventQualityQuery __all__ = ["IrfTool"] From d03577536e387076b37b9512bee320b30fdd5bf0 Mon Sep 17 00:00:00 2001 From: "Georgios.Voutsinas" Date: Wed, 16 Jul 2025 18:23:57 +0200 Subject: [PATCH 17/33] fixing docs and circular imports --- docs/api-reference/io/index.rst | 3 ++ docs/api-reference/irf/index.rst | 1 - docs/changes/2791.feature.rst | 1 + src/ctapipe/conftest.py | 6 ++-- src/ctapipe/io/__init__.py | 3 ++ src/ctapipe/io/dl2_tables_preprocessing.py | 24 +++++++------- src/ctapipe/io/tests/test_preprocessing.py | 32 +++++++++---------- src/ctapipe/irf/__init__.py | 7 ++-- src/ctapipe/irf/optimize.py | 11 +++---- src/ctapipe/irf/tests/test_benchmarks.py | 6 ++-- src/ctapipe/irf/tests/test_optimize.py | 12 +++---- src/ctapipe/resources/compute_irf.yaml | 4 +-- src/ctapipe/resources/optimize_cuts.yaml | 4 +-- src/ctapipe/tools/compute_irf.py | 15 ++++----- src/ctapipe/tools/optimize_event_selection.py | 11 ++++--- src/ctapipe/tools/tests/test_compute_irf.py | 4 +-- 16 files changed, 74 insertions(+), 70 deletions(-) diff --git a/docs/api-reference/io/index.rst b/docs/api-reference/io/index.rst index fc9b9218fbd..6c184c7e724 100644 --- a/docs/api-reference/io/index.rst +++ b/docs/api-reference/io/index.rst @@ -248,6 +248,9 @@ Reference/API .. automodapi:: ctapipe.io.metadata :no-inheritance-diagram: +.. automodapi:: ctapipe.io.dl2_tables_preprocessing + :no-inheritance-diagram: + .. automodapi:: ctapipe.io.eventsource :no-inheritance-diagram: diff --git a/docs/api-reference/irf/index.rst b/docs/api-reference/irf/index.rst index a8f2cc41f3b..fab2810178f 100644 --- a/docs/api-reference/irf/index.rst +++ b/docs/api-reference/irf/index.rst @@ -33,7 +33,6 @@ Submodules irfs benchmarks binning - preprocessing spectra diff --git a/docs/changes/2791.feature.rst b/docs/changes/2791.feature.rst index 9de6f6b3a5e..7f8e6fecbcb 100644 --- a/docs/changes/2791.feature.rst +++ b/docs/changes/2791.feature.rst @@ -1 +1,2 @@ Generalise the DL2 table processing implemented in IRF tools in order to be used by other usecases that ingest, filter and merge DL2 tables. +The module was moved from IRF to IO. diff --git a/src/ctapipe/conftest.py b/src/ctapipe/conftest.py index 195f4da940e..a4beda28061 100644 --- a/src/ctapipe/conftest.py +++ b/src/ctapipe/conftest.py @@ -781,7 +781,7 @@ def irf_event_loader_test_config(): return Config( { - "EventPreprocessor": { + "DL2EventPreprocessor": { "energy_reconstructor": "ExtraTreesRegressor", "geometry_reconstructor": "HillasReconstructor", "gammaness_classifier": "ExtraTreesClassifier", @@ -812,12 +812,12 @@ def event_loader_config_path(irf_event_loader_test_config, irf_tmp_path): @pytest.fixture(scope="session") def irf_events_table(): - from ctapipe.irf import EventPreprocessor + from ctapipe.io import DL2EventPreprocessor N1 = 1000 N2 = 100 N = N1 + N2 - epp = EventPreprocessor() + epp = DL2EventPreprocessor() tab = epp.make_empty_table() ids, bulk, unitless = tab.colnames[:2], tab.colnames[2:-2], tab.colnames[-2:] diff --git a/src/ctapipe/io/__init__.py b/src/ctapipe/io/__init__.py index cfb5fc5501e..2d7f579456d 100644 --- a/src/ctapipe/io/__init__.py +++ b/src/ctapipe/io/__init__.py @@ -6,6 +6,7 @@ """ from .astropy_helpers import read_table, write_table # noqa: I001 from .datalevels import DataLevel +from .dl2_tables_preprocessing import DL2EventPreprocessor, DL2EventLoader from .eventsource import EventSource from .eventseeker import EventSeeker from .tableio import TableReader, TableWriter @@ -35,4 +36,6 @@ "DataWriter", "DATA_MODEL_VERSION", "get_hdf5_datalevels", + "DL2EventPreprocessor", + "DL2EventLoader", ] diff --git a/src/ctapipe/io/dl2_tables_preprocessing.py b/src/ctapipe/io/dl2_tables_preprocessing.py index e54f90ff495..b3ee822f284 100644 --- a/src/ctapipe/io/dl2_tables_preprocessing.py +++ b/src/ctapipe/io/dl2_tables_preprocessing.py @@ -17,19 +17,17 @@ from tables import NoSuchNodeError from traitlets import default -from ctapipe.irf.spectra import SPECTRA, Spectra - from ..compat import COPY_IF_NEEDED from ..containers import CoordinateFrameType from ..coordinates import NominalFrame from ..core import Component, QualityQuery from ..core.traits import Bool, Dict, List, Tuple, Unicode -from ..io import TableLoader +from .tableloader import TableLoader -__all__ = ["EventLoader", "EventPreprocessor", "EventQualityQuery"] +__all__ = ["DL2EventLoader", "DL2EventPreprocessor", "DL2EventQualityQuery"] -class EventQualityQuery(QualityQuery): +class DL2EventQualityQuery(QualityQuery): """ Event pre-selection quality criteria for IRF computation with different defaults. """ @@ -49,10 +47,10 @@ class EventQualityQuery(QualityQuery): ).tag(config=True) -class EventPreprocessor(Component): +class DL2EventPreprocessor(Component): """Defines pre-selection cuts and the necessary renaming of columns.""" - classes = [EventQualityQuery] + classes = [DL2EventQualityQuery] energy_reconstructor = Unicode( default_value="RandomForestRegressor", @@ -106,7 +104,7 @@ class EventPreprocessor(Component): def __init__(self, config=None, parent=None, **kwargs): super().__init__(config=config, parent=parent, **kwargs) - self.quality_query = EventQualityQuery(parent=self) + self.quality_query = DL2EventQualityQuery(parent=self) @default("columns_to_rename") def _default_columns_to_rename(self): @@ -233,12 +231,12 @@ def make_empty_table(self) -> QTable: ) -class EventLoader(Component): +class DL2EventLoader(Component): """ Component for loading events and simulation metadata, applying preselection and optional derived column logic. """ - classes = [EventPreprocessor] + classes = [DL2EventPreprocessor] # User-selectable event reading function and kwargs event_reader_function = Unicode( @@ -254,9 +252,11 @@ class EventLoader(Component): help="Extra keyword arguments passed to the event reading function, e.g., {'path': '/dl2/event/telescope/Reconstructor'}", ).tag(config=True) - def __init__(self, file: Path, target_spectrum: Spectra, **kwargs): + def __init__(self, file: Path, target_spectrum: "Spectra", **kwargs): # noqa: F821 + from ..irf.spectra import SPECTRA + super().__init__(**kwargs) - self.epp = EventPreprocessor(parent=self) + self.epp = DL2EventPreprocessor(parent=self) self.target_spectrum = SPECTRA[target_spectrum] self.file = file diff --git a/src/ctapipe/io/tests/test_preprocessing.py b/src/ctapipe/io/tests/test_preprocessing.py index a9a9e1af3be..f97990709fe 100644 --- a/src/ctapipe/io/tests/test_preprocessing.py +++ b/src/ctapipe/io/tests/test_preprocessing.py @@ -31,8 +31,8 @@ def dummy_table(): @pytest.fixture(scope="function") def test_config(): return { - "EventLoader": {"event_reader_function": "read_telescope_events_chunked"}, - "EventPreprocessor": { + "DL2EventLoader": {"event_reader_function": "read_telescope_events_chunked"}, + "DL2EventPreprocessor": { "energy_reconstructor": "ExtraTreesRegressor", "gammaness_classifier": "ExtraTreesClassifier", "columns_to_rename": {}, @@ -57,7 +57,7 @@ def test_config(): # "disable_column_renaming": True, "apply_check_pointing": False, }, - "EventQualityQuery": { + "DL2EventQualityQuery": { "quality_criteria": [ ("valid reco", "ExtraTreesRegressor_tel_is_valid"), ] @@ -66,7 +66,7 @@ def test_config(): def test_normalise_column_names(dummy_table): - from ctapipe.io.dl2_tables_preprocessing import EventPreprocessor + from ctapipe.io.dl2_tables_preprocessing import DL2EventPreprocessor output_table_schema = [ Column(name="obs_id", dtype=np.uint64, description="Observation Block ID"), @@ -87,7 +87,7 @@ def test_normalise_column_names(dummy_table): " (e.g. gamma in gamma-ray analysis) is more likely", ), ] - epp = EventPreprocessor( + epp = DL2EventPreprocessor( energy_reconstructor="dummy", geometry_reconstructor="geom", gammaness_classifier="classifier", @@ -113,7 +113,7 @@ def test_normalise_column_names(dummy_table): with pytest.raises(ValueError, match="Required column geom_alt is missing."): dummy_table.rename_column("geom_alt", "alt_geom") - epp = EventPreprocessor( + epp = DL2EventPreprocessor( energy_reconstructor="dummy", geometry_reconstructor="geom", gammaness_classifier="classifier", @@ -123,10 +123,10 @@ def test_normalise_column_names(dummy_table): def test_event_loader(gamma_diffuse_full_reco_file, irf_event_loader_test_config): - from ctapipe.io.dl2_tables_preprocessing import EventLoader + from ctapipe.io.dl2_tables_preprocessing import DL2EventLoader from ctapipe.irf import Spectra - loader = EventLoader( + loader = DL2EventLoader( config=irf_event_loader_test_config, file=gamma_diffuse_full_reco_file, target_spectrum=Spectra.CRAB_HEGRA, @@ -169,7 +169,7 @@ def test_event_loader(gamma_diffuse_full_reco_file, irf_event_loader_test_config def test_preprocessor_tel_table_with_custom_reconstructor(tmp_path, test_config): - from ctapipe.io.dl2_tables_preprocessing import EventPreprocessor + from ctapipe.io.dl2_tables_preprocessing import DL2EventPreprocessor # Create a test table with required columns table = QTable( @@ -193,7 +193,7 @@ def test_preprocessor_tel_table_with_custom_reconstructor(tmp_path, test_config) config = test_config # Create preprocessor with config - preprocessor = EventPreprocessor(config=Config(config)) + preprocessor = DL2EventPreprocessor(config=Config(config)) # Apply quality query and preprocessing mask = preprocessor.quality_query.get_table_mask(table) @@ -213,17 +213,17 @@ def test_preprocessor_tel_table_with_custom_reconstructor(tmp_path, test_config) def test_loader_tel_table(gamma_diffuse_full_reco_file, test_config): - from ctapipe.io.dl2_tables_preprocessing import EventLoader + from ctapipe.io.dl2_tables_preprocessing import DL2EventLoader from ctapipe.irf import Spectra - test_config["EventLoader"][ + test_config["DL2EventLoader"][ "event_reader_function" ] = "read_telescope_events_chunked" - test_config["EventQualityQuery"]["quality_criteria"].append( + test_config["DL2EventQualityQuery"]["quality_criteria"].append( ("telescope ID", "(tel_id == 35.0) | (tel_id == 19.0)"), ) - loader = EventLoader( + loader = DL2EventLoader( config=Config(test_config), file=gamma_diffuse_full_reco_file, target_spectrum=Spectra.CRAB_HEGRA, @@ -247,9 +247,9 @@ def test_loader_tel_table(gamma_diffuse_full_reco_file, test_config): def test_name_overriding(dummy_table): - from ctapipe.io.dl2_tables_preprocessing import EventPreprocessor + from ctapipe.io.dl2_tables_preprocessing import DL2EventPreprocessor - epp = EventPreprocessor( + epp = DL2EventPreprocessor( energy_reconstructor="dummy", geometry_reconstructor="geom", gammaness_classifier="classifier", diff --git a/src/ctapipe/irf/__init__.py b/src/ctapipe/irf/__init__.py index 83cbf973cc6..7ac94c3cbf1 100644 --- a/src/ctapipe/irf/__init__.py +++ b/src/ctapipe/irf/__init__.py @@ -8,8 +8,7 @@ raise OptionalDependencyMissing("pyirf") from None -from ctapipe.io.dl2_tables_preprocessing import EventLoader, EventPreprocessor - +from ..io.dl2_tables_preprocessing import DL2EventLoader, DL2EventPreprocessor from .benchmarks import ( AngularResolution2dMaker, EnergyBiasResolution2dMaker, @@ -47,8 +46,8 @@ "OptimizationResult", "PointSourceSensitivityOptimizer", "PercentileCuts", - "EventLoader", - "EventPreprocessor", + "DL2EventLoader", + "DL2EventPreprocessor", "Spectra", "GhPercentileCutCalculator", "ThetaPercentileCutCalculator", diff --git a/src/ctapipe/irf/optimize.py b/src/ctapipe/irf/optimize.py index 02a1482eb05..3aa99bf21ac 100644 --- a/src/ctapipe/irf/optimize.py +++ b/src/ctapipe/irf/optimize.py @@ -11,10 +11,9 @@ from pyirf.cut_optimization import optimize_gh_cut from pyirf.cuts import calculate_percentile_cut, evaluate_binned_cut -from ctapipe.io.dl2_tables_preprocessing import EventQualityQuery - from ..core import Component, QualityQuery from ..core.traits import AstroQuantity, Float, Integer, Path +from ..io.dl2_tables_preprocessing import DL2EventQualityQuery from .binning import DefaultRecoEnergyBins, ResultValidRange __all__ = [ @@ -169,7 +168,7 @@ def _check_events(self, events: dict[str, QTable]): def __call__( self, events: dict[str, QTable], - quality_query: EventQualityQuery, + quality_query: DL2EventQualityQuery, clf_prefix: str, ) -> OptimizationResult: """ @@ -181,7 +180,7 @@ def __call__( events: dict[str, astropy.table.QTable] Dictionary containing tables of events used for calculating cuts. This has to include "signal" events and can include "background" events. - quality_query: ctapipe.irf.EventPreprocessor + quality_query: ctapipe.io.DL2EventPreprocessor ``ctapipe.core.QualityQuery`` subclass containing preselection criteria for events. clf_prefix: str @@ -306,7 +305,7 @@ def __init__(self, config=None, parent=None, **kwargs): def __call__( self, events: dict[str, QTable], - quality_query: EventQualityQuery, + quality_query: DL2EventQualityQuery, clf_prefix: str, ) -> OptimizationResult: self._check_events(events) @@ -394,7 +393,7 @@ def __init__(self, config=None, parent=None, **kwargs): def __call__( self, events: dict[str, QTable], - quality_query: EventQualityQuery, + quality_query: DL2EventQualityQuery, clf_prefix: str, ) -> OptimizationResult: self._check_events(events) diff --git a/src/ctapipe/irf/tests/test_benchmarks.py b/src/ctapipe/irf/tests/test_benchmarks.py index 8264003c9ee..4c34095903d 100644 --- a/src/ctapipe/irf/tests/test_benchmarks.py +++ b/src/ctapipe/irf/tests/test_benchmarks.py @@ -83,10 +83,10 @@ def test_make_2d_ang_res(irf_events_table): def test_make_2d_sensitivity( gamma_diffuse_full_reco_file, proton_full_reco_file, irf_event_loader_test_config ): - from ctapipe.irf import EventLoader, Sensitivity2dMaker, Spectra + from ctapipe.io import DL2EventLoader, Sensitivity2dMaker, Spectra from ctapipe.irf.tests.test_irfs import _check_boundaries_in_hdu - gamma_loader = EventLoader( + gamma_loader = DL2EventLoader( config=irf_event_loader_test_config, file=gamma_diffuse_full_reco_file, target_spectrum=Spectra.CRAB_HEGRA, @@ -95,7 +95,7 @@ def test_make_2d_sensitivity( chunk_size=10000, obs_time=u.Quantity(50, u.h), ) - proton_loader = EventLoader( + proton_loader = DL2EventLoader( config=irf_event_loader_test_config, file=proton_full_reco_file, target_spectrum=Spectra.IRFDOC_PROTON_SPECTRUM, diff --git a/src/ctapipe/irf/tests/test_optimize.py b/src/ctapipe/irf/tests/test_optimize.py index 366c7ca334a..5d94bfc66a7 100644 --- a/src/ctapipe/irf/tests/test_optimize.py +++ b/src/ctapipe/irf/tests/test_optimize.py @@ -8,14 +8,14 @@ def test_optimization_result(tmp_path, irf_event_loader_test_config): - from ctapipe.irf import ( - EventPreprocessor, + from ctapipe.io import ( + DL2EventPreprocessor, OptimizationResult, ResultValidRange, ) result_path = tmp_path / "result.h5" - epp = EventPreprocessor(irf_event_loader_test_config) + epp = DL2EventPreprocessor(irf_event_loader_test_config) gh_cuts = QTable( data=[[0.2, 0.8, 1.5] * u.TeV, [0.8, 1.5, 10] * u.TeV, [0.82, 0.91, 0.88]], names=["low", "high", "cut"], @@ -87,9 +87,9 @@ def test_cut_optimizer( proton_full_reco_file, irf_event_loader_test_config, ): - from ctapipe.irf import EventLoader, OptimizationResult, Spectra + from ctapipe.io import DL2EventLoader, OptimizationResult, Spectra - gamma_loader = EventLoader( + gamma_loader = DL2EventLoader( config=irf_event_loader_test_config, file=gamma_diffuse_full_reco_file, target_spectrum=Spectra.CRAB_HEGRA, @@ -98,7 +98,7 @@ def test_cut_optimizer( chunk_size=10000, obs_time=u.Quantity(50, u.h), ) - proton_loader = EventLoader( + proton_loader = DL2EventLoader( config=irf_event_loader_test_config, file=proton_full_reco_file, target_spectrum=Spectra.IRFDOC_PROTON_SPECTRUM, diff --git a/src/ctapipe/resources/compute_irf.yaml b/src/ctapipe/resources/compute_irf.yaml index 806f73d160b..411e1782440 100644 --- a/src/ctapipe/resources/compute_irf.yaml +++ b/src/ctapipe/resources/compute_irf.yaml @@ -18,12 +18,12 @@ IrfTool: energy_bias_resolution_maker_name: "EnergyBiasResolution2dMaker" sensitivity_maker_name: "Sensitivity2dMaker" -EventPreprocessor: +DL2EventPreprocessor: energy_reconstructor: "RandomForestRegressor" geometry_reconstructor: "HillasReconstructor" gammaness_classifier: "RandomForestClassifier" - EventQualityQuery: + DL2EventQualityQuery: quality_criteria: - ["multiplicity 4", "np.count_nonzero(HillasReconstructor_telescopes,axis=1) >= 4"] - ["valid classifier", "RandomForestClassifier_is_valid"] diff --git a/src/ctapipe/resources/optimize_cuts.yaml b/src/ctapipe/resources/optimize_cuts.yaml index 2639b1f8373..e99ed0fe1df 100644 --- a/src/ctapipe/resources/optimize_cuts.yaml +++ b/src/ctapipe/resources/optimize_cuts.yaml @@ -12,12 +12,12 @@ EventSelectionOptimizer: obs_time: 50 hour optimization_algorithm: "PointSourceSensitivityOptimizer" # Alternative: "PercentileCuts" -EventPreprocessor: +DL2EventPreprocessor: energy_reconstructor: "RandomForestRegressor" geometry_reconstructor: "HillasReconstructor" gammaness_classifier: "RandomForestClassifier" - EventQualityQuery: + DL2EventQualityQuery: quality_criteria: - ["multiplicity 4", "np.count_nonzero(HillasReconstructor_telescopes,axis=1) >= 4"] - ["valid classifier", "RandomForestClassifier_is_valid"] diff --git a/src/ctapipe/tools/compute_irf.py b/src/ctapipe/tools/compute_irf.py index 471cc37e898..25dadaad98a 100644 --- a/src/ctapipe/tools/compute_irf.py +++ b/src/ctapipe/tools/compute_irf.py @@ -17,12 +17,11 @@ from pyirf.cuts import evaluate_binned_cut from pyirf.io import create_rad_max_hdu -from ctapipe.io.dl2_tables_preprocessing import EventQualityQuery - from ..core import Provenance, Tool, ToolConfigurationError, traits from ..core.traits import AstroQuantity, Bool, Integer, classes_with_traits, flag +from ..io.dl2_tables_preprocessing import DL2EventQualityQuery from ..irf import ( - EventLoader, + DL2EventLoader, OptimizationResult, Spectra, check_bins_in_range, @@ -217,7 +216,7 @@ class IrfTool(Tool): classes = ( [ - EventLoader, + DL2EventLoader, ] + classes_with_traits(BackgroundRateMakerBase) + classes_with_traits(EffectiveAreaMakerBase) @@ -248,7 +247,7 @@ def setup(self): raise_error=self.range_check_error, ) self.event_loaders = { - "gammas": EventLoader( + "gammas": DL2EventLoader( parent=self, file=self.gamma_file, target_spectrum=self.gamma_target_spectrum, @@ -262,13 +261,13 @@ def setup(self): "At least a proton file required when specifying `do_background`." ) - self.event_loaders["protons"] = EventLoader( + self.event_loaders["protons"] = DL2EventLoader( parent=self, file=self.proton_file, target_spectrum=self.proton_target_spectrum, ) if self.electron_file and self.electron_file.exists(): - self.event_loaders["electrons"] = EventLoader( + self.event_loaders["electrons"] = DL2EventLoader( parent=self, file=self.electron_file, target_spectrum=self.electron_target_spectrum, @@ -488,7 +487,7 @@ def start(self): ], ) ) - loader.epp.quality_query = EventQualityQuery( + loader.epp.quality_query = DL2EventQualityQuery( parent=loader, quality_criteria=self.opt_result.quality_query.quality_criteria, ) diff --git a/src/ctapipe/tools/optimize_event_selection.py b/src/ctapipe/tools/optimize_event_selection.py index 186b8a34fb4..0cfc57435f8 100644 --- a/src/ctapipe/tools/optimize_event_selection.py +++ b/src/ctapipe/tools/optimize_event_selection.py @@ -5,7 +5,8 @@ from ..core import Provenance, Tool, traits from ..core.traits import AstroQuantity, Integer, classes_with_traits -from ..irf import EventLoader, Spectra +from ..io import DL2EventLoader +from ..irf import Spectra from ..irf.optimize import CutOptimizerBase __all__ = ["EventSelectionOptimizer"] @@ -102,7 +103,7 @@ class EventSelectionOptimizer(Tool): "chunk_size": "EventSelectionOptimizer.chunk_size", } - classes = [EventLoader] + classes_with_traits(CutOptimizerBase) + classes = [DL2EventLoader] + classes_with_traits(CutOptimizerBase) def setup(self): """ @@ -112,7 +113,7 @@ def setup(self): self.optimization_algorithm, parent=self ) self.event_loaders = { - "gammas": EventLoader( + "gammas": DL2EventLoader( parent=self, file=self.gamma_file, target_spectrum=self.gamma_target_spectrum, @@ -127,13 +128,13 @@ def setup(self): f"using {self.optimization_algorithm}." ) - self.event_loaders["protons"] = EventLoader( + self.event_loaders["protons"] = DL2EventLoader( parent=self, file=self.proton_file, target_spectrum=self.proton_target_spectrum, ) if self.electron_file and self.electron_file.exists(): - self.event_loaders["electrons"] = EventLoader( + self.event_loaders["electrons"] = DL2EventLoader( parent=self, file=self.electron_file, target_spectrum=self.electron_target_spectrum, diff --git a/src/ctapipe/tools/tests/test_compute_irf.py b/src/ctapipe/tools/tests/test_compute_irf.py index acd35a50aaf..8a272bcaad3 100644 --- a/src/ctapipe/tools/tests/test_compute_irf.py +++ b/src/ctapipe/tools/tests/test_compute_irf.py @@ -240,11 +240,11 @@ def test_irf_tool_wrong_cuts( with config_path.open("w") as f: json.dump( { - "EventPreprocessor": { + "DL2EventPreprocessor": { "energy_reconstructor": "ExtraTreesRegressor", "geometry_reconstructor": "HillasReconstructor", "gammaness_classifier": "ExtraTreesClassifier", - "EventQualityQuery": { + "DL2EventQualityQuery": { "quality_criteria": [ # No criteria for minimum event multiplicity ("valid classifier", "ExtraTreesClassifier_is_valid"), From 40d9f501596c885024e0e7c185114a766dca335e Mon Sep 17 00:00:00 2001 From: "Georgios.Voutsinas" Date: Wed, 16 Jul 2025 19:26:49 +0200 Subject: [PATCH 18/33] fixing tests --- src/ctapipe/conftest.py | 2 +- src/ctapipe/irf/tests/test_benchmarks.py | 3 ++- src/ctapipe/irf/tests/test_optimize.py | 7 ++++--- src/ctapipe/tools/tests/test_compute_irf.py | 19 +++++++++++++++++++ 4 files changed, 26 insertions(+), 5 deletions(-) diff --git a/src/ctapipe/conftest.py b/src/ctapipe/conftest.py index a4beda28061..be32417575e 100644 --- a/src/ctapipe/conftest.py +++ b/src/ctapipe/conftest.py @@ -785,7 +785,7 @@ def irf_event_loader_test_config(): "energy_reconstructor": "ExtraTreesRegressor", "geometry_reconstructor": "HillasReconstructor", "gammaness_classifier": "ExtraTreesClassifier", - "EventQualityQuery": { + "DL2EventQualityQuery": { "quality_criteria": [ ( "multiplicity 4", diff --git a/src/ctapipe/irf/tests/test_benchmarks.py b/src/ctapipe/irf/tests/test_benchmarks.py index 4c34095903d..b8957bdb978 100644 --- a/src/ctapipe/irf/tests/test_benchmarks.py +++ b/src/ctapipe/irf/tests/test_benchmarks.py @@ -83,7 +83,8 @@ def test_make_2d_ang_res(irf_events_table): def test_make_2d_sensitivity( gamma_diffuse_full_reco_file, proton_full_reco_file, irf_event_loader_test_config ): - from ctapipe.io import DL2EventLoader, Sensitivity2dMaker, Spectra + from ctapipe.io import DL2EventLoader + from ctapipe.irf import Sensitivity2dMaker, Spectra from ctapipe.irf.tests.test_irfs import _check_boundaries_in_hdu gamma_loader = DL2EventLoader( diff --git a/src/ctapipe/irf/tests/test_optimize.py b/src/ctapipe/irf/tests/test_optimize.py index 5d94bfc66a7..fb5b99694d0 100644 --- a/src/ctapipe/irf/tests/test_optimize.py +++ b/src/ctapipe/irf/tests/test_optimize.py @@ -8,8 +8,8 @@ def test_optimization_result(tmp_path, irf_event_loader_test_config): - from ctapipe.io import ( - DL2EventPreprocessor, + from ctapipe.io import DL2EventPreprocessor + from ctapipe.irf import ( OptimizationResult, ResultValidRange, ) @@ -87,7 +87,8 @@ def test_cut_optimizer( proton_full_reco_file, irf_event_loader_test_config, ): - from ctapipe.io import DL2EventLoader, OptimizationResult, Spectra + from ctapipe.io import DL2EventLoader + from ctapipe.irf import OptimizationResult, Spectra gamma_loader = DL2EventLoader( config=irf_event_loader_test_config, diff --git a/src/ctapipe/tools/tests/test_compute_irf.py b/src/ctapipe/tools/tests/test_compute_irf.py index 8a272bcaad3..796e4ec8434 100644 --- a/src/ctapipe/tools/tests/test_compute_irf.py +++ b/src/ctapipe/tools/tests/test_compute_irf.py @@ -33,6 +33,25 @@ def dummy_cuts_file( return output_path +""" +@pytest.fixture(scope="module") +def test_config(): + return { + "DL2EventPreprocessor": { + "energy_reconstructor": "ExtraTreesRegressor", + "geometry_reconstructor": "HillasReconstructor", + "gammaness_classifier": "ExtraTreesClassifier", + }, + "DL2EventQualityQuery": { + "quality_criteria": [ + ("valid geom reco", "HillasReconstructor_is_valid"), + ("valid energy reco", "ExtraTreesRegressor_is_valid"), + ] + } + } +""" + + @pytest.mark.parametrize("include_background", (False, True)) @pytest.mark.parametrize("spatial_selection_applied", (True, False)) def test_irf_tool( From 15396e37e9b5e5fe8df2399e525d3240afd1ce53 Mon Sep 17 00:00:00 2001 From: "Georgios.Voutsinas" Date: Wed, 16 Jul 2025 20:58:13 +0200 Subject: [PATCH 19/33] fixing import that strangely wasn't caught by the tests --- docs/api-reference/irf/preprocessing.rst | 12 ------------ src/ctapipe/tools/compute_irf.py | 6 ++++-- src/ctapipe/tools/tests/test_compute_irf.py | 19 ------------------- 3 files changed, 4 insertions(+), 33 deletions(-) delete mode 100644 docs/api-reference/irf/preprocessing.rst diff --git a/docs/api-reference/irf/preprocessing.rst b/docs/api-reference/irf/preprocessing.rst deleted file mode 100644 index 9d57445cbe3..00000000000 --- a/docs/api-reference/irf/preprocessing.rst +++ /dev/null @@ -1,12 +0,0 @@ -.. _preprocessing: - -******************************* -Event Loading and Preprocessing -******************************* - - -Reference/ API -============== - -.. automodapi:: ctapipe.irf.preprocessing - :no-inheritance-diagram: diff --git a/src/ctapipe/tools/compute_irf.py b/src/ctapipe/tools/compute_irf.py index 25dadaad98a..f4ca8f97295 100644 --- a/src/ctapipe/tools/compute_irf.py +++ b/src/ctapipe/tools/compute_irf.py @@ -19,9 +19,11 @@ from ..core import Provenance, Tool, ToolConfigurationError, traits from ..core.traits import AstroQuantity, Bool, Integer, classes_with_traits, flag -from ..io.dl2_tables_preprocessing import DL2EventQualityQuery -from ..irf import ( +from ..io.dl2_tables_preprocessing import ( DL2EventLoader, + DL2EventQualityQuery, +) +from ..irf import ( OptimizationResult, Spectra, check_bins_in_range, diff --git a/src/ctapipe/tools/tests/test_compute_irf.py b/src/ctapipe/tools/tests/test_compute_irf.py index 796e4ec8434..8a272bcaad3 100644 --- a/src/ctapipe/tools/tests/test_compute_irf.py +++ b/src/ctapipe/tools/tests/test_compute_irf.py @@ -33,25 +33,6 @@ def dummy_cuts_file( return output_path -""" -@pytest.fixture(scope="module") -def test_config(): - return { - "DL2EventPreprocessor": { - "energy_reconstructor": "ExtraTreesRegressor", - "geometry_reconstructor": "HillasReconstructor", - "gammaness_classifier": "ExtraTreesClassifier", - }, - "DL2EventQualityQuery": { - "quality_criteria": [ - ("valid geom reco", "HillasReconstructor_is_valid"), - ("valid energy reco", "ExtraTreesRegressor_is_valid"), - ] - } - } -""" - - @pytest.mark.parametrize("include_background", (False, True)) @pytest.mark.parametrize("spatial_selection_applied", (True, False)) def test_irf_tool( From ccd427bab9366d9560f4b49c2daa34384608e2d3 Mon Sep 17 00:00:00 2001 From: "Georgios.Voutsinas" Date: Thu, 17 Jul 2025 15:06:25 +0200 Subject: [PATCH 20/33] trying to fix the minimal dependencies issue --- src/ctapipe/io/dl2_tables_preprocessing.py | 59 ++++++++++++++-------- src/ctapipe/io/tests/test_preprocessing.py | 48 ------------------ src/ctapipe/irf/tests/test_load_events.py | 49 ++++++++++++++++++ 3 files changed, 86 insertions(+), 70 deletions(-) create mode 100644 src/ctapipe/irf/tests/test_load_events.py diff --git a/src/ctapipe/io/dl2_tables_preprocessing.py b/src/ctapipe/io/dl2_tables_preprocessing.py index b3ee822f284..40858fe8054 100644 --- a/src/ctapipe/io/dl2_tables_preprocessing.py +++ b/src/ctapipe/io/dl2_tables_preprocessing.py @@ -1,4 +1,13 @@ """Module containing classes related to event loading and preprocessing""" +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + import astropy + import pyirf + + import ctapipe from pathlib import Path @@ -6,14 +15,6 @@ import numpy as np from astropy.coordinates import AltAz, SkyCoord from astropy.table import Column, QTable, Table, vstack -from pyirf.simulations import SimulatedEventsInfo -from pyirf.spectral import ( - DIFFUSE_FLUX_UNIT, - POINT_SOURCE_FLUX_UNIT, - PowerLaw, - calculate_event_weights, -) -from pyirf.utils import calculate_source_fov_offset, calculate_theta from tables import NoSuchNodeError from traitlets import default @@ -306,23 +307,25 @@ def load_preselected_events( return table, n_raw_events, meta def get_simulation_information( - self, loader: TableLoader, obs_time: u.Quantity - ) -> tuple[SimulatedEventsInfo, PowerLaw]: + self, + loader: ctapipe.io.tableloader.TableLoader, + obs_time: astropy.units.Quantity, + ) -> tuple[pyirf.simulations.SimulatedEventsInfo, pyirf.spectral.PowerLaw]: """ Extract simulation information from the input file. Parameters ---------- - loader : TableLoader + loader : ctapipe.io.tableloader.TableLoader Loader object for reading from the input file. - obs_time : Quantity + obs_time : astropy.units.Quantity Total observation time. Returns ------- - sim_info : SimulatedEventsInfo + sim_info : pyirf.simulations.SimulatedEventsInfo Metadata about the simulated events. - spectrum : PowerLaw + spectrum : pyirf.spectral.PowerLaw Power-law model derived from simulation configuration. Raises @@ -330,6 +333,10 @@ def get_simulation_information( NotImplementedError If simulation parameters vary across runs. """ + + from pyirf.simulations import SimulatedEventsInfo + from pyirf.spectral import PowerLaw + sim = loader.read_simulation_configuration() try: show = loader.read_shower_distribution() @@ -368,6 +375,8 @@ def make_derived_columns(self, events: QTable) -> QTable: QTable Table with added derived columns. """ + from pyirf.utils import calculate_source_fov_offset, calculate_theta + events["theta"] = calculate_theta( events, assumed_source_az=events["true_az"], @@ -396,28 +405,28 @@ def make_derived_columns(self, events: QTable) -> QTable: def make_event_weights( self, - events: QTable, - spectrum: PowerLaw, + events: astropy.table.QTable, + spectrum: pyirf.spectral.PowerLaw, kind: str, - fov_offset_bins: u.Quantity | None = None, - ) -> QTable: + fov_offset_bins: astropy.units.Quantity | None = None, + ) -> astropy.table.QTable: """ Compute event weights to match the target spectrum. Parameters ---------- - events : QTable + events : astropy.table.QTable Input events. - spectrum : PowerLaw + spectrum : pyirf.spectral.PowerLaw Spectrum from simulation. kind : str Type of events ("gammas", etc.). - fov_offset_bins : Quantity, optional + fov_offset_bins : astropy.units.Quantity, optional Offset bins for integrating the diffuse flux into point source bins. Returns ------- - QTable + astropy.table.QTable Table with updated weights. Raises @@ -425,6 +434,12 @@ def make_event_weights( ValueError If ``fov_offset_bins`` is required but not provided. """ + from pyirf.spectral import ( + DIFFUSE_FLUX_UNIT, + POINT_SOURCE_FLUX_UNIT, + calculate_event_weights, + ) + if ( kind == "gammas" and self.target_spectrum.normalization.unit.is_equivalent( diff --git a/src/ctapipe/io/tests/test_preprocessing.py b/src/ctapipe/io/tests/test_preprocessing.py index f97990709fe..9882f72cc35 100644 --- a/src/ctapipe/io/tests/test_preprocessing.py +++ b/src/ctapipe/io/tests/test_preprocessing.py @@ -2,8 +2,6 @@ import numpy as np import pytest from astropy.table import Column, QTable, Table -from pyirf.simulations import SimulatedEventsInfo -from pyirf.spectral import PowerLaw from traitlets.config import Config @@ -122,52 +120,6 @@ def test_normalise_column_names(dummy_table): _ = epp.normalise_column_names(dummy_table) -def test_event_loader(gamma_diffuse_full_reco_file, irf_event_loader_test_config): - from ctapipe.io.dl2_tables_preprocessing import DL2EventLoader - from ctapipe.irf import Spectra - - loader = DL2EventLoader( - config=irf_event_loader_test_config, - file=gamma_diffuse_full_reco_file, - target_spectrum=Spectra.CRAB_HEGRA, - ) - events, count, meta = loader.load_preselected_events( - chunk_size=10000, - obs_time=u.Quantity(50, u.h), - ) - - columns = [ - "obs_id", - "event_id", - "true_energy", - "true_az", - "true_alt", - "reco_energy", - "reco_az", - "reco_alt", - "reco_fov_lat", - "reco_fov_lon", - "gh_score", - "pointing_az", - "pointing_alt", - "theta", - "true_source_fov_offset", - "reco_source_fov_offset", - "weight", - ] - - assert sorted(columns) == sorted(events.colnames) - assert isinstance(count, int) - assert isinstance(meta["sim_info"], SimulatedEventsInfo) - assert isinstance(meta["spectrum"], PowerLaw) - - events = loader.make_event_weights( - events, meta["spectrum"], "gammas", (0 * u.deg, 1 * u.deg) - ) - - assert "weight" in events.colnames - - def test_preprocessor_tel_table_with_custom_reconstructor(tmp_path, test_config): from ctapipe.io.dl2_tables_preprocessing import DL2EventPreprocessor diff --git a/src/ctapipe/irf/tests/test_load_events.py b/src/ctapipe/irf/tests/test_load_events.py new file mode 100644 index 00000000000..0ec95767e6a --- /dev/null +++ b/src/ctapipe/irf/tests/test_load_events.py @@ -0,0 +1,49 @@ +import astropy.units as u +from pyirf.simulations import SimulatedEventsInfo +from pyirf.spectral import PowerLaw + +from ctapipe.io.dl2_tables_preprocessing import DL2EventLoader +from ctapipe.irf import Spectra + + +def test_event_loader(gamma_diffuse_full_reco_file, irf_event_loader_test_config): + loader = DL2EventLoader( + config=irf_event_loader_test_config, + file=gamma_diffuse_full_reco_file, + target_spectrum=Spectra.CRAB_HEGRA, + ) + events, count, meta = loader.load_preselected_events( + chunk_size=10000, + obs_time=u.Quantity(50, u.h), + ) + + columns = [ + "obs_id", + "event_id", + "true_energy", + "true_az", + "true_alt", + "reco_energy", + "reco_az", + "reco_alt", + "reco_fov_lat", + "reco_fov_lon", + "gh_score", + "pointing_az", + "pointing_alt", + "theta", + "true_source_fov_offset", + "reco_source_fov_offset", + "weight", + ] + + assert sorted(columns) == sorted(events.colnames) + assert isinstance(count, int) + assert isinstance(meta["sim_info"], SimulatedEventsInfo) + assert isinstance(meta["spectrum"], PowerLaw) + + events = loader.make_event_weights( + events, meta["spectrum"], "gammas", (0 * u.deg, 1 * u.deg) + ) + + assert "weight" in events.colnames From 8c924fa7ed5a12faf3179fc4ccfbfa3184b3b40c Mon Sep 17 00:00:00 2001 From: "mykhailo.dalchenko" Date: Thu, 17 Jul 2025 15:56:58 +0200 Subject: [PATCH 21/33] Revert "trying to fix the minimal dependencies issue" This reverts commit ccd427bab9366d9560f4b49c2daa34384608e2d3. --- src/ctapipe/io/dl2_tables_preprocessing.py | 59 ++++++++-------------- src/ctapipe/io/tests/test_preprocessing.py | 48 ++++++++++++++++++ src/ctapipe/irf/tests/test_load_events.py | 49 ------------------ 3 files changed, 70 insertions(+), 86 deletions(-) delete mode 100644 src/ctapipe/irf/tests/test_load_events.py diff --git a/src/ctapipe/io/dl2_tables_preprocessing.py b/src/ctapipe/io/dl2_tables_preprocessing.py index 40858fe8054..b3ee822f284 100644 --- a/src/ctapipe/io/dl2_tables_preprocessing.py +++ b/src/ctapipe/io/dl2_tables_preprocessing.py @@ -1,13 +1,4 @@ """Module containing classes related to event loading and preprocessing""" -from __future__ import annotations - -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - import astropy - import pyirf - - import ctapipe from pathlib import Path @@ -15,6 +6,14 @@ import numpy as np from astropy.coordinates import AltAz, SkyCoord from astropy.table import Column, QTable, Table, vstack +from pyirf.simulations import SimulatedEventsInfo +from pyirf.spectral import ( + DIFFUSE_FLUX_UNIT, + POINT_SOURCE_FLUX_UNIT, + PowerLaw, + calculate_event_weights, +) +from pyirf.utils import calculate_source_fov_offset, calculate_theta from tables import NoSuchNodeError from traitlets import default @@ -307,25 +306,23 @@ def load_preselected_events( return table, n_raw_events, meta def get_simulation_information( - self, - loader: ctapipe.io.tableloader.TableLoader, - obs_time: astropy.units.Quantity, - ) -> tuple[pyirf.simulations.SimulatedEventsInfo, pyirf.spectral.PowerLaw]: + self, loader: TableLoader, obs_time: u.Quantity + ) -> tuple[SimulatedEventsInfo, PowerLaw]: """ Extract simulation information from the input file. Parameters ---------- - loader : ctapipe.io.tableloader.TableLoader + loader : TableLoader Loader object for reading from the input file. - obs_time : astropy.units.Quantity + obs_time : Quantity Total observation time. Returns ------- - sim_info : pyirf.simulations.SimulatedEventsInfo + sim_info : SimulatedEventsInfo Metadata about the simulated events. - spectrum : pyirf.spectral.PowerLaw + spectrum : PowerLaw Power-law model derived from simulation configuration. Raises @@ -333,10 +330,6 @@ def get_simulation_information( NotImplementedError If simulation parameters vary across runs. """ - - from pyirf.simulations import SimulatedEventsInfo - from pyirf.spectral import PowerLaw - sim = loader.read_simulation_configuration() try: show = loader.read_shower_distribution() @@ -375,8 +368,6 @@ def make_derived_columns(self, events: QTable) -> QTable: QTable Table with added derived columns. """ - from pyirf.utils import calculate_source_fov_offset, calculate_theta - events["theta"] = calculate_theta( events, assumed_source_az=events["true_az"], @@ -405,28 +396,28 @@ def make_derived_columns(self, events: QTable) -> QTable: def make_event_weights( self, - events: astropy.table.QTable, - spectrum: pyirf.spectral.PowerLaw, + events: QTable, + spectrum: PowerLaw, kind: str, - fov_offset_bins: astropy.units.Quantity | None = None, - ) -> astropy.table.QTable: + fov_offset_bins: u.Quantity | None = None, + ) -> QTable: """ Compute event weights to match the target spectrum. Parameters ---------- - events : astropy.table.QTable + events : QTable Input events. - spectrum : pyirf.spectral.PowerLaw + spectrum : PowerLaw Spectrum from simulation. kind : str Type of events ("gammas", etc.). - fov_offset_bins : astropy.units.Quantity, optional + fov_offset_bins : Quantity, optional Offset bins for integrating the diffuse flux into point source bins. Returns ------- - astropy.table.QTable + QTable Table with updated weights. Raises @@ -434,12 +425,6 @@ def make_event_weights( ValueError If ``fov_offset_bins`` is required but not provided. """ - from pyirf.spectral import ( - DIFFUSE_FLUX_UNIT, - POINT_SOURCE_FLUX_UNIT, - calculate_event_weights, - ) - if ( kind == "gammas" and self.target_spectrum.normalization.unit.is_equivalent( diff --git a/src/ctapipe/io/tests/test_preprocessing.py b/src/ctapipe/io/tests/test_preprocessing.py index 9882f72cc35..f97990709fe 100644 --- a/src/ctapipe/io/tests/test_preprocessing.py +++ b/src/ctapipe/io/tests/test_preprocessing.py @@ -2,6 +2,8 @@ import numpy as np import pytest from astropy.table import Column, QTable, Table +from pyirf.simulations import SimulatedEventsInfo +from pyirf.spectral import PowerLaw from traitlets.config import Config @@ -120,6 +122,52 @@ def test_normalise_column_names(dummy_table): _ = epp.normalise_column_names(dummy_table) +def test_event_loader(gamma_diffuse_full_reco_file, irf_event_loader_test_config): + from ctapipe.io.dl2_tables_preprocessing import DL2EventLoader + from ctapipe.irf import Spectra + + loader = DL2EventLoader( + config=irf_event_loader_test_config, + file=gamma_diffuse_full_reco_file, + target_spectrum=Spectra.CRAB_HEGRA, + ) + events, count, meta = loader.load_preselected_events( + chunk_size=10000, + obs_time=u.Quantity(50, u.h), + ) + + columns = [ + "obs_id", + "event_id", + "true_energy", + "true_az", + "true_alt", + "reco_energy", + "reco_az", + "reco_alt", + "reco_fov_lat", + "reco_fov_lon", + "gh_score", + "pointing_az", + "pointing_alt", + "theta", + "true_source_fov_offset", + "reco_source_fov_offset", + "weight", + ] + + assert sorted(columns) == sorted(events.colnames) + assert isinstance(count, int) + assert isinstance(meta["sim_info"], SimulatedEventsInfo) + assert isinstance(meta["spectrum"], PowerLaw) + + events = loader.make_event_weights( + events, meta["spectrum"], "gammas", (0 * u.deg, 1 * u.deg) + ) + + assert "weight" in events.colnames + + def test_preprocessor_tel_table_with_custom_reconstructor(tmp_path, test_config): from ctapipe.io.dl2_tables_preprocessing import DL2EventPreprocessor diff --git a/src/ctapipe/irf/tests/test_load_events.py b/src/ctapipe/irf/tests/test_load_events.py deleted file mode 100644 index 0ec95767e6a..00000000000 --- a/src/ctapipe/irf/tests/test_load_events.py +++ /dev/null @@ -1,49 +0,0 @@ -import astropy.units as u -from pyirf.simulations import SimulatedEventsInfo -from pyirf.spectral import PowerLaw - -from ctapipe.io.dl2_tables_preprocessing import DL2EventLoader -from ctapipe.irf import Spectra - - -def test_event_loader(gamma_diffuse_full_reco_file, irf_event_loader_test_config): - loader = DL2EventLoader( - config=irf_event_loader_test_config, - file=gamma_diffuse_full_reco_file, - target_spectrum=Spectra.CRAB_HEGRA, - ) - events, count, meta = loader.load_preselected_events( - chunk_size=10000, - obs_time=u.Quantity(50, u.h), - ) - - columns = [ - "obs_id", - "event_id", - "true_energy", - "true_az", - "true_alt", - "reco_energy", - "reco_az", - "reco_alt", - "reco_fov_lat", - "reco_fov_lon", - "gh_score", - "pointing_az", - "pointing_alt", - "theta", - "true_source_fov_offset", - "reco_source_fov_offset", - "weight", - ] - - assert sorted(columns) == sorted(events.colnames) - assert isinstance(count, int) - assert isinstance(meta["sim_info"], SimulatedEventsInfo) - assert isinstance(meta["spectrum"], PowerLaw) - - events = loader.make_event_weights( - events, meta["spectrum"], "gammas", (0 * u.deg, 1 * u.deg) - ) - - assert "weight" in events.colnames From fb5f8eacf5358c8c6065f858add944c7b538381a Mon Sep 17 00:00:00 2001 From: "Georgios.Voutsinas" Date: Thu, 17 Jul 2025 16:17:17 +0200 Subject: [PATCH 22/33] moving all tests that implicitly use pyirf to irf --- src/ctapipe/io/tests/test_preprocessing.py | 34 ---------- src/ctapipe/irf/tests/test_load_events.py | 75 ++++++++++++++++++++++ 2 files changed, 75 insertions(+), 34 deletions(-) diff --git a/src/ctapipe/io/tests/test_preprocessing.py b/src/ctapipe/io/tests/test_preprocessing.py index 9882f72cc35..49a90455e38 100644 --- a/src/ctapipe/io/tests/test_preprocessing.py +++ b/src/ctapipe/io/tests/test_preprocessing.py @@ -164,40 +164,6 @@ def test_preprocessor_tel_table_with_custom_reconstructor(tmp_path, test_config) assert np.all(processed["ExtraTreesRegressor_tel_energy"] > 0 * u.TeV) -def test_loader_tel_table(gamma_diffuse_full_reco_file, test_config): - from ctapipe.io.dl2_tables_preprocessing import DL2EventLoader - from ctapipe.irf import Spectra - - test_config["DL2EventLoader"][ - "event_reader_function" - ] = "read_telescope_events_chunked" - test_config["DL2EventQualityQuery"]["quality_criteria"].append( - ("telescope ID", "(tel_id == 35.0) | (tel_id == 19.0)"), - ) - - loader = DL2EventLoader( - config=Config(test_config), - file=gamma_diffuse_full_reco_file, - target_spectrum=Spectra.CRAB_HEGRA, - ) - events, count, meta = loader.load_preselected_events( - chunk_size=10000, - obs_time=u.Quantity(50, u.h), - ) - - columns = [ - "obs_id", - "event_id", - "tel_id", - "ExtraTreesRegressor_tel_energy", - "ExtraTreesRegressor_tel_energy_uncert", - ] - - assert sorted(columns) == sorted(events.colnames) - assert np.all(events["ExtraTreesRegressor_tel_energy"] > 0 * u.TeV) - assert np.all(np.isin(events["tel_id"], [19, 35])) - - def test_name_overriding(dummy_table): from ctapipe.io.dl2_tables_preprocessing import DL2EventPreprocessor diff --git a/src/ctapipe/irf/tests/test_load_events.py b/src/ctapipe/irf/tests/test_load_events.py index 0ec95767e6a..8acc407ecba 100644 --- a/src/ctapipe/irf/tests/test_load_events.py +++ b/src/ctapipe/irf/tests/test_load_events.py @@ -1,11 +1,52 @@ import astropy.units as u +import numpy as np +import pytest +from astropy.table import Column from pyirf.simulations import SimulatedEventsInfo from pyirf.spectral import PowerLaw +from traitlets.config import Config from ctapipe.io.dl2_tables_preprocessing import DL2EventLoader from ctapipe.irf import Spectra +@pytest.fixture(scope="function") +def test_config(): + return { + "DL2EventLoader": {"event_reader_function": "read_telescope_events_chunked"}, + "DL2EventPreprocessor": { + "energy_reconstructor": "ExtraTreesRegressor", + "gammaness_classifier": "ExtraTreesClassifier", + "columns_to_rename": {}, + "output_table_schema": [ + Column( + name="obs_id", dtype=np.uint64, description="Observation Block ID" + ), + Column(name="event_id", dtype=np.uint64, description="Array event ID"), + Column(name="tel_id", dtype=np.uint64, description="Telescope ID"), + Column( + name="ExtraTreesRegressor_tel_energy", + unit=u.TeV, + description="Reconstructed energy", + ), + Column( + name="ExtraTreesRegressor_tel_energy_uncert", + unit=u.TeV, + description="Reconstructed energy uncertainty", + ), + ], + "apply_derived_columns": False, + # "disable_column_renaming": True, + "apply_check_pointing": False, + }, + "DL2EventQualityQuery": { + "quality_criteria": [ + ("valid reco", "ExtraTreesRegressor_tel_is_valid"), + ] + }, + } + + def test_event_loader(gamma_diffuse_full_reco_file, irf_event_loader_test_config): loader = DL2EventLoader( config=irf_event_loader_test_config, @@ -47,3 +88,37 @@ def test_event_loader(gamma_diffuse_full_reco_file, irf_event_loader_test_config ) assert "weight" in events.colnames + + +def test_loader_tel_table(gamma_diffuse_full_reco_file, test_config): + from ctapipe.io.dl2_tables_preprocessing import DL2EventLoader + from ctapipe.irf import Spectra + + test_config["DL2EventLoader"][ + "event_reader_function" + ] = "read_telescope_events_chunked" + test_config["DL2EventQualityQuery"]["quality_criteria"].append( + ("telescope ID", "(tel_id == 35.0) | (tel_id == 19.0)"), + ) + + loader = DL2EventLoader( + config=Config(test_config), + file=gamma_diffuse_full_reco_file, + target_spectrum=Spectra.CRAB_HEGRA, + ) + events, count, meta = loader.load_preselected_events( + chunk_size=10000, + obs_time=u.Quantity(50, u.h), + ) + + columns = [ + "obs_id", + "event_id", + "tel_id", + "ExtraTreesRegressor_tel_energy", + "ExtraTreesRegressor_tel_energy_uncert", + ] + + assert sorted(columns) == sorted(events.colnames) + assert np.all(events["ExtraTreesRegressor_tel_energy"] > 0 * u.TeV) + assert np.all(np.isin(events["tel_id"], [19, 35])) From 7f3a16c2bdf2c95a07bb0bca53baba0e4870d6a1 Mon Sep 17 00:00:00 2001 From: "Georgios.Voutsinas" Date: Thu, 17 Jul 2025 16:45:39 +0200 Subject: [PATCH 23/33] removing duplicated lines --- src/ctapipe/conftest.py | 37 ++++++++++++++++++++ src/ctapipe/io/tests/test_preprocessing.py | 37 -------------------- src/ctapipe/irf/tests/test_load_events.py | 39 ---------------------- 3 files changed, 37 insertions(+), 76 deletions(-) diff --git a/src/ctapipe/conftest.py b/src/ctapipe/conftest.py index be32417575e..a110c89bee2 100644 --- a/src/ctapipe/conftest.py +++ b/src/ctapipe/conftest.py @@ -856,3 +856,40 @@ def irf_events_table(): ev = vstack([e_tab, tab], join_type="exact", metadata_conflicts="silent") return ev + + +@pytest.fixture(scope="function") +def test_config(): + return { + "DL2EventLoader": {"event_reader_function": "read_telescope_events_chunked"}, + "DL2EventPreprocessor": { + "energy_reconstructor": "ExtraTreesRegressor", + "gammaness_classifier": "ExtraTreesClassifier", + "columns_to_rename": {}, + "output_table_schema": [ + Column( + name="obs_id", dtype=np.uint64, description="Observation Block ID" + ), + Column(name="event_id", dtype=np.uint64, description="Array event ID"), + Column(name="tel_id", dtype=np.uint64, description="Telescope ID"), + Column( + name="ExtraTreesRegressor_tel_energy", + unit=u.TeV, + description="Reconstructed energy", + ), + Column( + name="ExtraTreesRegressor_tel_energy_uncert", + unit=u.TeV, + description="Reconstructed energy uncertainty", + ), + ], + "apply_derived_columns": False, + # "disable_column_renaming": True, + "apply_check_pointing": False, + }, + "DL2EventQualityQuery": { + "quality_criteria": [ + ("valid reco", "ExtraTreesRegressor_tel_is_valid"), + ] + }, + } diff --git a/src/ctapipe/io/tests/test_preprocessing.py b/src/ctapipe/io/tests/test_preprocessing.py index 49a90455e38..7fc60165c5c 100644 --- a/src/ctapipe/io/tests/test_preprocessing.py +++ b/src/ctapipe/io/tests/test_preprocessing.py @@ -26,43 +26,6 @@ def dummy_table(): ) -@pytest.fixture(scope="function") -def test_config(): - return { - "DL2EventLoader": {"event_reader_function": "read_telescope_events_chunked"}, - "DL2EventPreprocessor": { - "energy_reconstructor": "ExtraTreesRegressor", - "gammaness_classifier": "ExtraTreesClassifier", - "columns_to_rename": {}, - "output_table_schema": [ - Column( - name="obs_id", dtype=np.uint64, description="Observation Block ID" - ), - Column(name="event_id", dtype=np.uint64, description="Array event ID"), - Column(name="tel_id", dtype=np.uint64, description="Telescope ID"), - Column( - name="ExtraTreesRegressor_tel_energy", - unit=u.TeV, - description="Reconstructed energy", - ), - Column( - name="ExtraTreesRegressor_tel_energy_uncert", - unit=u.TeV, - description="Reconstructed energy uncertainty", - ), - ], - "apply_derived_columns": False, - # "disable_column_renaming": True, - "apply_check_pointing": False, - }, - "DL2EventQualityQuery": { - "quality_criteria": [ - ("valid reco", "ExtraTreesRegressor_tel_is_valid"), - ] - }, - } - - def test_normalise_column_names(dummy_table): from ctapipe.io.dl2_tables_preprocessing import DL2EventPreprocessor diff --git a/src/ctapipe/irf/tests/test_load_events.py b/src/ctapipe/irf/tests/test_load_events.py index 8acc407ecba..a94a886205c 100644 --- a/src/ctapipe/irf/tests/test_load_events.py +++ b/src/ctapipe/irf/tests/test_load_events.py @@ -1,7 +1,5 @@ import astropy.units as u import numpy as np -import pytest -from astropy.table import Column from pyirf.simulations import SimulatedEventsInfo from pyirf.spectral import PowerLaw from traitlets.config import Config @@ -10,43 +8,6 @@ from ctapipe.irf import Spectra -@pytest.fixture(scope="function") -def test_config(): - return { - "DL2EventLoader": {"event_reader_function": "read_telescope_events_chunked"}, - "DL2EventPreprocessor": { - "energy_reconstructor": "ExtraTreesRegressor", - "gammaness_classifier": "ExtraTreesClassifier", - "columns_to_rename": {}, - "output_table_schema": [ - Column( - name="obs_id", dtype=np.uint64, description="Observation Block ID" - ), - Column(name="event_id", dtype=np.uint64, description="Array event ID"), - Column(name="tel_id", dtype=np.uint64, description="Telescope ID"), - Column( - name="ExtraTreesRegressor_tel_energy", - unit=u.TeV, - description="Reconstructed energy", - ), - Column( - name="ExtraTreesRegressor_tel_energy_uncert", - unit=u.TeV, - description="Reconstructed energy uncertainty", - ), - ], - "apply_derived_columns": False, - # "disable_column_renaming": True, - "apply_check_pointing": False, - }, - "DL2EventQualityQuery": { - "quality_criteria": [ - ("valid reco", "ExtraTreesRegressor_tel_is_valid"), - ] - }, - } - - def test_event_loader(gamma_diffuse_full_reco_file, irf_event_loader_test_config): loader = DL2EventLoader( config=irf_event_loader_test_config, From 5d07e5a421aeab422d058e3790b1001148bb36a4 Mon Sep 17 00:00:00 2001 From: "mykhailo.dalchenko" Date: Thu, 17 Jul 2025 16:56:45 +0200 Subject: [PATCH 24/33] Make pyirf dependency optional, fix dl2_tables_preprocessing exposure --- src/ctapipe/io/dl2_tables_preprocessing.py | 33 ++++++++++++++++------ src/ctapipe/irf/__init__.py | 3 -- 2 files changed, 25 insertions(+), 11 deletions(-) diff --git a/src/ctapipe/io/dl2_tables_preprocessing.py b/src/ctapipe/io/dl2_tables_preprocessing.py index b3ee822f284..6b7bf6a4851 100644 --- a/src/ctapipe/io/dl2_tables_preprocessing.py +++ b/src/ctapipe/io/dl2_tables_preprocessing.py @@ -6,14 +6,26 @@ import numpy as np from astropy.coordinates import AltAz, SkyCoord from astropy.table import Column, QTable, Table, vstack -from pyirf.simulations import SimulatedEventsInfo -from pyirf.spectral import ( - DIFFUSE_FLUX_UNIT, - POINT_SOURCE_FLUX_UNIT, - PowerLaw, - calculate_event_weights, -) -from pyirf.utils import calculate_source_fov_offset, calculate_theta + +try: + from pyirf.simulations import SimulatedEventsInfo + from pyirf.spectral import ( + DIFFUSE_FLUX_UNIT, + POINT_SOURCE_FLUX_UNIT, + PowerLaw, + calculate_event_weights, + ) + from pyirf.utils import calculate_source_fov_offset, calculate_theta +except ModuleNotFoundError: + SimulatedEventsInfo = None + DIFFUSE_FLUX_UNIT = None + POINT_SOURCE_FLUX_UNIT = None + PowerLaw = None + calculate_event_weights = None + calculate_source_fov_offset = None + calculate_theta = None + + from tables import NoSuchNodeError from traitlets import default @@ -330,6 +342,11 @@ def get_simulation_information( NotImplementedError If simulation parameters vary across runs. """ + from ..exceptions import OptionalDependencyMissing + + if SimulatedEventsInfo is None: + raise OptionalDependencyMissing("pyirf") + sim = loader.read_simulation_configuration() try: show = loader.read_shower_distribution() diff --git a/src/ctapipe/irf/__init__.py b/src/ctapipe/irf/__init__.py index 7ac94c3cbf1..d4abdb43fac 100644 --- a/src/ctapipe/irf/__init__.py +++ b/src/ctapipe/irf/__init__.py @@ -8,7 +8,6 @@ raise OptionalDependencyMissing("pyirf") from None -from ..io.dl2_tables_preprocessing import DL2EventLoader, DL2EventPreprocessor from .benchmarks import ( AngularResolution2dMaker, EnergyBiasResolution2dMaker, @@ -46,8 +45,6 @@ "OptimizationResult", "PointSourceSensitivityOptimizer", "PercentileCuts", - "DL2EventLoader", - "DL2EventPreprocessor", "Spectra", "GhPercentileCutCalculator", "ThetaPercentileCutCalculator", From 9af99dd17c10983321fc2c929d845b97e213d4e4 Mon Sep 17 00:00:00 2001 From: "mykhailo.dalchenko" Date: Fri, 18 Jul 2025 10:59:19 +0200 Subject: [PATCH 25/33] Fix min tests for preprocessing --- src/ctapipe/io/tests/test_preprocessing.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/ctapipe/io/tests/test_preprocessing.py b/src/ctapipe/io/tests/test_preprocessing.py index 348c8d4d96b..9415617aaee 100644 --- a/src/ctapipe/io/tests/test_preprocessing.py +++ b/src/ctapipe/io/tests/test_preprocessing.py @@ -2,8 +2,6 @@ import numpy as np import pytest from astropy.table import Column, QTable, Table -from pyirf.simulations import SimulatedEventsInfo -from pyirf.spectral import PowerLaw from traitlets.config import Config @@ -86,6 +84,10 @@ def test_normalise_column_names(dummy_table): def test_event_loader(gamma_diffuse_full_reco_file, irf_event_loader_test_config): + pytest.importorskip("pyirf", reason="pyirf is an optional dependency") + from pyirf.simulations import SimulatedEventsInfo + from pyirf.spectral import PowerLaw + from ctapipe.io.dl2_tables_preprocessing import DL2EventLoader from ctapipe.irf import Spectra From 4a234a6aac8727ffd9d8c6f668bebb8c64f81f29 Mon Sep 17 00:00:00 2001 From: "mykhailo.dalchenko" Date: Tue, 22 Jul 2025 15:41:53 +0200 Subject: [PATCH 26/33] Remove "u.dimensionless_unscaled" units from gh_score and weights. --- src/ctapipe/io/dl2_tables_preprocessing.py | 21 +++++++++------------ src/ctapipe/io/tests/test_preprocessing.py | 2 +- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/src/ctapipe/io/dl2_tables_preprocessing.py b/src/ctapipe/io/dl2_tables_preprocessing.py index 6b7bf6a4851..172282dd1a2 100644 --- a/src/ctapipe/io/dl2_tables_preprocessing.py +++ b/src/ctapipe/io/dl2_tables_preprocessing.py @@ -110,6 +110,13 @@ class DL2EventPreprocessor(Component): Column(name="reco_alt", unit=u.deg, description="Reconstructed altitude"), Column(name="pointing_az", unit=u.deg, description="Pointing azimuth"), Column(name="pointing_alt", unit=u.deg, description="Pointing altitude"), + Column( + name="gh_score", + dtype=np.float64, + description="prediction of the classifier, defined between [0,1]," + " where values close to 1 mean that the positive class" + " (e.g. gamma in gamma-ray analysis) is more likely", + ), ], help="Schema definition for output event QTable", ).tag(config=True) @@ -218,16 +225,9 @@ def make_empty_table(self) -> QTable: unit=u.deg, description="Reconstructed angular offset from pointing direction", ), - Column( - name="gh_score", - unit=u.dimensionless_unscaled, - description="prediction of the classifier, defined between [0,1]," - " where values close to 1 mean that the positive class" - " (e.g. gamma in gamma-ray analysis) is more likely", - ), Column( name="weight", - unit=u.dimensionless_unscaled, + dtype=np.float64, description="Event weight", ), ] @@ -405,10 +405,7 @@ def make_derived_columns(self, events: QTable) -> QTable: reco_nominal = reco.transform_to(nominal) events["reco_fov_lon"] = u.Quantity(-reco_nominal.fov_lon) # minus for GADF events["reco_fov_lat"] = u.Quantity(reco_nominal.fov_lat) - events["weight"] = ( - 1.0 * u.dimensionless_unscaled - ) # defer calculation of proper weights to later - events["gh_score"].unit = u.dimensionless_unscaled + events["weight"] = 1.0 # defer calculation of proper weights to later return events def make_event_weights( diff --git a/src/ctapipe/io/tests/test_preprocessing.py b/src/ctapipe/io/tests/test_preprocessing.py index 9415617aaee..46879ac6350 100644 --- a/src/ctapipe/io/tests/test_preprocessing.py +++ b/src/ctapipe/io/tests/test_preprocessing.py @@ -42,7 +42,7 @@ def test_normalise_column_names(dummy_table): Column(name="pointing_az", unit=u.deg, description="Pointing longitude"), Column( name="gh_score", - unit=u.dimensionless_unscaled, + dtype=np.float64, description="prediction of the classifier, defined between [0,1]," " where values close to 1 mean that the positive class" " (e.g. gamma in gamma-ray analysis) is more likely", From 7d4960d7f330afd649722abb17fef425e673f595 Mon Sep 17 00:00:00 2001 From: "mykhailo.dalchenko" Date: Tue, 22 Jul 2025 17:01:29 +0200 Subject: [PATCH 27/33] fix test fixture (new order of columns) --- src/ctapipe/conftest.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/ctapipe/conftest.py b/src/ctapipe/conftest.py index a110c89bee2..1f34a453c27 100644 --- a/src/ctapipe/conftest.py +++ b/src/ctapipe/conftest.py @@ -820,7 +820,11 @@ def irf_events_table(): epp = DL2EventPreprocessor() tab = epp.make_empty_table() - ids, bulk, unitless = tab.colnames[:2], tab.colnames[2:-2], tab.colnames[-2:] + ids = ["obs_id", "event_id"] + unitless = set( + [colname for colname in tab.colnames if tab[colname].unit is None] + ) - set(ids) + bulk = set(tab.colnames) - set(ids) - set(unitless) id_tab = QTable( data=np.zeros((N, len(ids)), dtype=np.uint64), From e9a17caba30e91979a0cc9deb6b3d760e829b41f Mon Sep 17 00:00:00 2001 From: "Georgios.Voutsinas" Date: Mon, 28 Jul 2025 16:48:30 +0200 Subject: [PATCH 28/33] improving the help of a config parameter --- src/ctapipe/io/dl2_tables_preprocessing.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/ctapipe/io/dl2_tables_preprocessing.py b/src/ctapipe/io/dl2_tables_preprocessing.py index 172282dd1a2..adeaa5b8c1c 100644 --- a/src/ctapipe/io/dl2_tables_preprocessing.py +++ b/src/ctapipe/io/dl2_tables_preprocessing.py @@ -84,7 +84,12 @@ class DL2EventPreprocessor(Component): ).tag(config=True) apply_check_pointing = Bool( - default_value=True, help="Accept only pointing in altaz and not divergent." + default_value=True, + help=( + "Check whether the pointing is supported." + "For the moment, only pointing in altaz is supported." + "Divergent pointing is also not supported." + ), ).tag(config=True) columns_to_rename = Dict( From 1fa70129627b9176c3e4ba4c0160e2cb8d6898d4 Mon Sep 17 00:00:00 2001 From: "Georgios.Voutsinas" Date: Mon, 4 Aug 2025 14:45:37 +0200 Subject: [PATCH 29/33] supported pointing frames --- src/ctapipe/conftest.py | 2 +- src/ctapipe/io/dl2_tables_preprocessing.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/ctapipe/conftest.py b/src/ctapipe/conftest.py index 1f34a453c27..3765643c4da 100644 --- a/src/ctapipe/conftest.py +++ b/src/ctapipe/conftest.py @@ -889,7 +889,7 @@ def test_config(): ], "apply_derived_columns": False, # "disable_column_renaming": True, - "apply_check_pointing": False, + "allow_unsupported_pointing_frames": True, }, "DL2EventQualityQuery": { "quality_criteria": [ diff --git a/src/ctapipe/io/dl2_tables_preprocessing.py b/src/ctapipe/io/dl2_tables_preprocessing.py index adeaa5b8c1c..e85b7506605 100644 --- a/src/ctapipe/io/dl2_tables_preprocessing.py +++ b/src/ctapipe/io/dl2_tables_preprocessing.py @@ -83,8 +83,8 @@ class DL2EventPreprocessor(Component): default_value=True, help="Whether to compute derived columns" ).tag(config=True) - apply_check_pointing = Bool( - default_value=True, + allow_unsupported_pointing_frames = Bool( + default_value=False, help=( "Check whether the pointing is supported." "For the moment, only pointing in altaz is supported." @@ -162,7 +162,7 @@ def normalise_column_names(self, events: QTable) -> QTable: ValueError If required columns are missing. """ - if self.apply_check_pointing: + if not self.allow_unsupported_pointing_frames: if events["subarray_pointing_lat"].std() > 1e-3: raise NotImplementedError( "No support for making irfs from varying pointings yet" From 6bfc3710f2d9e790c1bd9eeff8219393b7e20c8a Mon Sep 17 00:00:00 2001 From: "Georgios.Voutsinas" Date: Sun, 21 Sep 2025 16:16:46 +0100 Subject: [PATCH 30/33] Variable names, warning, imports --- src/ctapipe/io/dl2_tables_preprocessing.py | 35 +++++++++++----------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/src/ctapipe/io/dl2_tables_preprocessing.py b/src/ctapipe/io/dl2_tables_preprocessing.py index e85b7506605..165fe5fa9a7 100644 --- a/src/ctapipe/io/dl2_tables_preprocessing.py +++ b/src/ctapipe/io/dl2_tables_preprocessing.py @@ -16,15 +16,10 @@ calculate_event_weights, ) from pyirf.utils import calculate_source_fov_offset, calculate_theta -except ModuleNotFoundError: - SimulatedEventsInfo = None - DIFFUSE_FLUX_UNIT = None - POINT_SOURCE_FLUX_UNIT = None - PowerLaw = None - calculate_event_weights = None - calculate_source_fov_offset = None - calculate_theta = None + has_pyirf = True +except ModuleNotFoundError: + has_pyirf = False from tables import NoSuchNodeError from traitlets import default @@ -302,24 +297,26 @@ def load_preselected_events( opts = dict(dl2=True, simulated=True, observation_info=True) - with TableLoader(self.file, parent=self, **opts) as load: - header = self.epp.make_empty_table() - sim_info, spectrum = self.get_simulation_information(load, obs_time) + with TableLoader(self.file, parent=self, **opts) as loader: + table_template = self.epp.make_empty_table() + sim_info, spectrum = self.get_simulation_information(loader, obs_time) meta = {"sim_info": sim_info, "spectrum": spectrum} - bits = [header] + event_chunks = [table_template] n_raw_events = 0 - reader_func = getattr(load, self.event_reader_function) + reader_func = getattr(loader, self.event_reader_function) table_reader = reader_func(chunk_size, **opts, **self.event_reader_kwargs) for _, _, events in table_reader: selected = events[self.epp.quality_query.get_table_mask(events)] selected = self.epp.normalise_column_names(selected) if self.epp.apply_derived_columns: selected = self.make_derived_columns(selected) - bits.append(selected) + event_chunks.append(selected) n_raw_events += len(events) - bits.append(header) # Putting it last ensures the correct metadata is used - table = vstack(bits, join_type="exact", metadata_conflicts="silent") + event_chunks.append( + table_template + ) # Putting it last ensures the correct metadata is used + table = vstack(event_chunks, join_type="exact", metadata_conflicts="silent") return table, n_raw_events, meta def get_simulation_information( @@ -347,15 +344,17 @@ def get_simulation_information( NotImplementedError If simulation parameters vary across runs. """ - from ..exceptions import OptionalDependencyMissing if SimulatedEventsInfo is None: - raise OptionalDependencyMissing("pyirf") + raise ImportError("pyirf is required for this functionality") sim = loader.read_simulation_configuration() try: show = loader.read_shower_distribution() except NoSuchNodeError: + self.log.warning( + "Simulation distributions were not found in the input files, falling back to estimating the number of showers from the simulation configuration." + ) show = Table([sim["n_showers"]], names=["n_entries"], dtype=[np.int64]) for itm in ["spectral_index", "energy_range_min", "energy_range_max"]: From fed7c7fe1bbdb8c4062ddc3b4cb98ce3cd4bc4a4 Mon Sep 17 00:00:00 2001 From: "Georgios.Voutsinas" Date: Sun, 21 Sep 2025 16:29:48 +0100 Subject: [PATCH 31/33] Variable names, warning, imports --- src/ctapipe/io/dl2_tables_preprocessing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ctapipe/io/dl2_tables_preprocessing.py b/src/ctapipe/io/dl2_tables_preprocessing.py index 165fe5fa9a7..3d7e5444594 100644 --- a/src/ctapipe/io/dl2_tables_preprocessing.py +++ b/src/ctapipe/io/dl2_tables_preprocessing.py @@ -321,7 +321,7 @@ def load_preselected_events( def get_simulation_information( self, loader: TableLoader, obs_time: u.Quantity - ) -> tuple[SimulatedEventsInfo, PowerLaw]: + ) -> tuple["SimulatedEventsInfo", "PowerLaw"]: """ Extract simulation information from the input file. @@ -345,7 +345,7 @@ def get_simulation_information( If simulation parameters vary across runs. """ - if SimulatedEventsInfo is None: + if not has_pyirf: raise ImportError("pyirf is required for this functionality") sim = loader.read_simulation_configuration() From 4061c9da5b0eb39eeffb3d5fe2558bdc534d2a71 Mon Sep 17 00:00:00 2001 From: "Georgios.Voutsinas" Date: Sun, 21 Sep 2025 16:53:32 +0100 Subject: [PATCH 32/33] Put annotation when needed by the new importing scheme --- src/ctapipe/io/dl2_tables_preprocessing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ctapipe/io/dl2_tables_preprocessing.py b/src/ctapipe/io/dl2_tables_preprocessing.py index 3d7e5444594..d9ea514c702 100644 --- a/src/ctapipe/io/dl2_tables_preprocessing.py +++ b/src/ctapipe/io/dl2_tables_preprocessing.py @@ -415,7 +415,7 @@ def make_derived_columns(self, events: QTable) -> QTable: def make_event_weights( self, events: QTable, - spectrum: PowerLaw, + spectrum: "PowerLaw", kind: str, fov_offset_bins: u.Quantity | None = None, ) -> QTable: From 11d98e271d3f140d53efa445ad3233628fd1380f Mon Sep 17 00:00:00 2001 From: "Georgios.Voutsinas" Date: Mon, 22 Sep 2025 12:05:38 +0100 Subject: [PATCH 33/33] Reverting to correct exception --- src/ctapipe/io/dl2_tables_preprocessing.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/ctapipe/io/dl2_tables_preprocessing.py b/src/ctapipe/io/dl2_tables_preprocessing.py index d9ea514c702..6838ad700d4 100644 --- a/src/ctapipe/io/dl2_tables_preprocessing.py +++ b/src/ctapipe/io/dl2_tables_preprocessing.py @@ -344,9 +344,10 @@ def get_simulation_information( NotImplementedError If simulation parameters vary across runs. """ + from ..exceptions import OptionalDependencyMissing if not has_pyirf: - raise ImportError("pyirf is required for this functionality") + raise OptionalDependencyMissing("pyirf") sim = loader.read_simulation_configuration() try: