diff --git a/Configuration/ProcessModifiers/python/enableTruth_cff.py b/Configuration/ProcessModifiers/python/enableTruth_cff.py
new file mode 100644
index 0000000000000..3b82cbd1f5a10
--- /dev/null
+++ b/Configuration/ProcessModifiers/python/enableTruth_cff.py
@@ -0,0 +1,3 @@
+import FWCore.ParameterSet.Config as cms
+
+enableTruth = cms.Modifier()
\ No newline at end of file
diff --git a/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py b/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py
index 1887169482d37..9006faf3563b9 100644
--- a/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py
+++ b/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py
@@ -1094,6 +1094,33 @@ def condition(self, fragment, stepList, key, hasHarvest):
upgradeWFs['ticlv5_TrackLinkingGNN'].step3 = {'--procModifiers': 'ticlv5_TrackLinkingGNN'}
upgradeWFs['ticlv5_TrackLinkingGNN'].step4 = {'--procModifiers': 'ticlv5_TrackLinkingGNN'}
+
+
+class UpgradeWorkflow_enableTruth(UpgradeWorkflow):
+ def setup_(self, step, stepName, stepDict, k, properties):
+ if 'RecoGlobal' in step:
+ stepDict[stepName][k] = deepcopy(stepDict[step][k])
+
+ if '--procModifiers' in stepDict[stepName][k]:
+ stepDict[stepName][k]['--procModifiers'] += ',enableTruth'
+ else:
+ stepDict[stepName][k]['--procModifiers'] = 'enableTruth'
+
+ def condition(self, fragment, stepList, key, hasHarvest):
+ return 'Run4' in key
+
+
+upgradeWFs['enableTruth'] = UpgradeWorkflow_enableTruth(
+ steps = [
+ 'RecoGlobal',
+ ],
+ PU = [
+ 'RecoGlobal',
+ ],
+ suffix = '_enableTruth',
+ offset = 0.88,
+)
+
# L3 Tracker Muon Outside-In reconstruction first
class UpgradeWorkflow_phase2L3MuonsOIFirst(UpgradeWorkflow):
def setup_(self, step, stepName, stepDict, k, properties):
diff --git a/PhysicsTools/TruthInfo/BuildFile.xml b/PhysicsTools/TruthInfo/BuildFile.xml
new file mode 100644
index 0000000000000..6726efdfce6e0
--- /dev/null
+++ b/PhysicsTools/TruthInfo/BuildFile.xml
@@ -0,0 +1,9 @@
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/PhysicsTools/TruthInfo/README.md b/PhysicsTools/TruthInfo/README.md
new file mode 100644
index 0000000000000..ca2faabdd8f94
--- /dev/null
+++ b/PhysicsTools/TruthInfo/README.md
@@ -0,0 +1,1131 @@
+# TruthInfo prototype
+
+This package contains a prototype truth graph representation for CMSSW. The goal is to provide a compact, navigable, physics-oriented abstraction of the generator, simulation, and detector-hit truth history of an event.
+
+The current implementation is split into three layers:
+
+1. `TruthGraph`: a compact raw graph built directly from existing CMS truth products.
+2. `truth::Graph`: a higher-level logical graph exposing particles, vertices, payload, and navigation methods.
+3. `truth::LogicalGraphHitIndex`: an auxiliary hit index associating logical particles to calorimeter SimHits and, when available, to reconstructed RecHits.
+
+The prototype is intended for validation, reconstruction studies, visualization, and future truth-reco association work.
+
+## Motivation
+
+CMS currently exposes truth information through several low-level collections, such as HepMC, GenParticles, SimTracks, SimVertices, TrackingParticles, SimClusters, CaloParticles, SimHits, and detector-specific RecHits. These collections are useful, but they encode different views of the event history and are often tied to detector-specific or production-specific conventions.
+
+This package explores a different model: a single event-level truth graph that can be navigated using physics concepts, with optional detector-hit indices layered on top.
+
+Typical questions this should make easier are:
+
+* Do two reconstructed objects come from the same parent particle?
+* Did a given resonance, such as a Z boson, exist in the event history?
+* Do two reconstructed objects come from the same Z boson?
+* Which parton initiated a reconstructed jet?
+* Is an object associated with the hard interaction or with pileup?
+* Which detector-level interactions contributed to a reconstructed object?
+* Which SimHits were produced directly by a particle?
+* Which SimHits were produced by the full subgraph starting from a particle?
+* Which RecHits correspond to those SimHits through a DetId association?
+* Should a reconstructed object be associated to a single truth particle, to a branch, or to an aggregated subgraph?
+
+The intended user-facing API should allow reconstruction and validation code to operate on stable physics abstractions rather than directly depending on the storage details of `GenParticle`, `SimTrack`, `GenVertex`, `SimVertex`, `PCaloHit`, or detector-specific RecHit collections.
+
+## Package layout
+
+```text
+PhysicsTools/TruthInfo/
+ interface/
+ TruthGraph.h
+ Graph.h
+ LogicalGraphHitIndex.h
+ LogicalGraphHitIndexBuilder.h
+ src/
+ TruthGraph.cc
+ Graph.cc
+ LogicalGraphHitIndexBuilder.cc
+ classes.h
+ classes_def.xml
+ plugins/
+ TruthGraphProducer.cc
+ TruthGraphDumper.cc
+ TruthLogicalGraphProducer.cc
+ TruthLogicalGraphDumper.cc
+ TruthLogicalGraphHitIndexProducer.cc
+ BuildFile.xml
+ python/
+ truthGraphProducer_cfi.py
+ truthLogicalGraphDumper_cfi.py
+ BuildFile.xml
+````
+
+The auxiliary RecHit lookup used by the hit index is produced separately in:
+
+```text
+SimCalorimetry/HGCalAssociatorProducers/
+ interface/
+ DetIdRecHitMap.h
+ plugins/
+ SimHitToRecHitMapProducer.cc
+```
+
+Despite living under `HGCalAssociatorProducers`, the current `SimHitToRecHitMapProducer` is not HGCal-only. It accepts both `HGCRecHitCollection` inputs and `reco::PFRecHitCollection` inputs.
+
+## Raw graph: `TruthGraph`
+
+`TruthGraph` is a compact, read-only graph representation of event truth information. It is designed as an intermediate event data product built from existing CMS collections.
+
+### Node types
+
+The raw graph supports the following node kinds:
+
+```cpp
+enum class NodeKind : uint8_t {
+ GenEvent,
+ GenVertex,
+ GenParticle,
+ SimVertex,
+ SimTrack
+};
+```
+
+Each node stores a `NodeRef`:
+
+```cpp
+struct NodeRef {
+ NodeKind kind;
+ int64_t key;
+};
+```
+
+The meaning of `key` depends on the node kind:
+
+* `GenEvent`: generator connected-component id.
+* `GenVertex`: HepMC barcode or HepMC3 vertex id.
+* `GenParticle`: HepMC barcode or HepMC3 particle id.
+* `SimVertex`: index in the `SimVertexContainer`.
+* `SimTrack`: `SimTrack::trackId()`.
+
+### Edge types
+
+The raw graph supports edge categories:
+
+```cpp
+enum class EdgeKind : uint8_t {
+ Gen,
+ Sim,
+ GenToSim,
+ SimToGen
+};
+```
+
+At present:
+
+* `Gen` edges describe the generator graph.
+* `Sim` edges describe the simulation graph.
+* `GenToSim` edges connect matched generator particles or vertices to simulation nodes.
+* `SimToGen` is reserved.
+
+The DOT dumper labels cross-domain edges explicitly, for example:
+
+```text
+GenToSim
+SimToGen
+```
+
+### Storage model
+
+Edges are stored in compressed sparse row form:
+
+```cpp
+std::vector offsets;
+std::vector edges;
+std::vector edgeKind;
+```
+
+For node `i`, outgoing edges are stored in:
+
+```cpp
+edges[offsets[i] ... offsets[i + 1])
+```
+
+The corresponding edge kinds are stored in the same range of `edgeKind`.
+
+The class provides convenience accessors such as:
+
+```cpp
+uint32_t nNodes() const;
+uint32_t nEdges() const;
+std::span children(uint32_t nodeId) const;
+std::span childrenEdgeKinds(uint32_t nodeId) const;
+const NodeRef& nodeRef(uint32_t nodeId) const;
+bool isConsistent() const;
+```
+
+### Cached metadata
+
+The raw graph stores lightweight metadata arrays parallel to the node list:
+
+```cpp
+std::vector pdgId;
+std::vector status;
+std::vector statusFlags;
+std::vector eventId;
+std::vector genEventOfNode;
+```
+
+It also stores association arrays:
+
+```cpp
+std::vector simTrackToGen;
+std::vector simTrackToVtx;
+```
+
+These are indexed by raw node id. Entries that are not meaningful for a given node type are filled with default values, typically `0` or `-1`.
+
+## `TruthGraphProducer`
+
+`TruthGraphProducer` builds the raw `TruthGraph` from:
+
+* HepMC3, when available;
+* HepMC2, as fallback;
+* `SimTrackContainer`;
+* `SimVertexContainer`.
+
+Default input tags are:
+
+```python
+genEventHepMC3 = cms.InputTag("generatorSmeared")
+genEventHepMC = cms.InputTag("generatorSmeared")
+simTracks = cms.InputTag("g4SimHits")
+simVertices = cms.InputTag("g4SimHits")
+```
+
+The producer creates:
+
+* one `GenEvent` node per connected generator component;
+* one node per generator vertex;
+* one node per generator particle;
+* one node per simulation vertex;
+* one node per simulation track.
+
+Generator components are computed using a disjoint-set union over the generator particle-vertex graph.
+
+Simulation topology is built from:
+
+* `SimTrack::vertIndex()` for `SimVertex -> SimTrack` edges;
+* `SimVertex::parentIndex()` for `SimTrack -> SimVertex` edges.
+
+Generator-to-simulation particle associations are built using the available `SimTrack` to generator information. The implementation keeps this association explicit in the raw graph instead of assuming that raw generator iteration indices can always be interpreted as stable GenParticle indices.
+
+When enabled, cross-domain edges are also added:
+
+* `GenParticle -> SimTrack`;
+* `GenVertex -> SimVertex`, using the production vertex of the associated generator particle when available.
+
+The cross edges are controlled by:
+
+```python
+addGenToSimEdges = cms.bool(True)
+```
+
+## Logical graph: `truth::Graph`
+
+`truth::Graph` is the user-facing abstraction built from the raw `TruthGraph`. It is intended to expose a stable, physics-oriented API.
+
+The logical graph is bipartite:
+
+```text
+Particle -> decay Vertex -> outgoing Particle
+Particle <- production Vertex <- incoming Particle
+```
+
+The main public types are:
+
+```cpp
+namespace truth {
+ class Graph;
+ class Particle;
+ class Vertex;
+
+ struct ParticleData;
+ struct VertexData;
+ struct Checkpoint;
+}
+```
+
+### `truth::Particle`
+
+A `truth::Particle` may combine generator-level and simulation-level information when a robust correspondence exists.
+
+The stored payload is:
+
+```cpp
+struct ParticleData {
+ int32_t genNode = -1;
+ int32_t simNode = -1;
+
+ int32_t pdgId = 0;
+ int16_t status = 0;
+ uint16_t statusFlags = 0;
+
+ uint64_t eventId = 0;
+ int32_t genEvent = -1;
+
+ math::XYZTLorentzVectorD momentum;
+ std::vector checkpoints;
+
+ bool hasGen() const;
+ bool hasSim() const;
+ bool valid() const;
+};
+```
+
+The convention for the momentum is "best available":
+
+1. for GEN+SIM particles, use the GEN four-momentum;
+2. for SIM-only particles, use the `SimTrack` four-momentum;
+3. otherwise keep the default value.
+
+Useful methods include:
+
+```cpp
+bool hasGen() const;
+bool hasSim() const;
+int32_t pdgId() const;
+int16_t status() const;
+uint16_t statusFlags() const;
+uint64_t eventId() const;
+int32_t genEvent() const;
+const math::XYZTLorentzVectorD& momentum() const;
+
+bool isRoot() const;
+bool isLeaf() const;
+
+std::vector productionVertices() const;
+std::vector decayVertices() const;
+
+std::vector parents() const;
+std::vector children() const;
+std::vector ancestors() const;
+std::vector descendants() const;
+
+bool hasAncestorPdgId(int pdgId) const;
+std::optional firstAncestorWithPdgId(int pdgId) const;
+std::optional firstCommonAncestor(truth::Particle other) const;
+```
+
+### `truth::Vertex`
+
+A `truth::Vertex` stores vertex-level payload:
+
+```cpp
+struct VertexData {
+ int32_t genNode = -1;
+ int32_t simNode = -1;
+
+ uint64_t eventId = 0;
+ int32_t genEvent = -1;
+
+ math::XYZTLorentzVectorD position;
+
+ bool hasGen() const;
+ bool hasSim() const;
+ bool valid() const;
+};
+```
+
+Useful methods include:
+
+```cpp
+bool hasGen() const;
+bool hasSim() const;
+uint64_t eventId() const;
+int32_t genEvent() const;
+const math::XYZTLorentzVectorD& position() const;
+
+bool isSource() const;
+bool isSink() const;
+
+std::vector incomingParticles() const;
+std::vector outgoingParticles() const;
+```
+
+### Vertex treatment
+
+Generator-level and simulation-level particles may be merged when a robust association exists.
+
+Generator-level and simulation-level vertices can be merged by configuration when the producer has enough information to do so:
+
+```python
+mergeGenSimVertices = cms.bool(True)
+```
+
+This is useful for compact visualization and for downstream navigation. During debugging, it can be disabled to inspect generator and simulation vertex semantics separately.
+
+### Intermediate GEN particle collapsing
+
+The logical graph producer can collapse simple generator-only chains of the form:
+
+```text
+P -> V -> C
+```
+
+when:
+
+* `P` is not final-state;
+* `C` is the only daughter;
+* `P` and `C` have the same PDG id.
+
+This is controlled by:
+
+```python
+collapseIntermediateGenParticles = cms.bool(True)
+```
+
+This helps reduce visual clutter from intermediate generator copies while preserving the physically relevant branching structure.
+
+### Filtering by mother PDG id
+
+The logical graph can optionally be restricted to a selected particle species and its descendants:
+
+```python
+motherPdgId = cms.int32(0)
+```
+
+The default value `0` keeps the full graph.
+
+A non-zero value keeps particles matching the requested PDG id and the corresponding descendant subgraphs.
+
+## Trajectory checkpoints
+
+The logical particle model supports trajectory checkpoints:
+
+```cpp
+struct Checkpoint {
+ uint32_t checkpointId = 0;
+ math::XYZTLorentzVectorF position;
+ math::XYZTLorentzVectorF momentum;
+};
+```
+
+A checkpoint represents a relevant point along the propagation history of a particle.
+
+The current prototype uses checkpoint `0` to store boundary-crossing information from `SimTrack`:
+
+* position at boundary;
+* momentum at boundary;
+* boundary identifier.
+
+This is meant to be a generic mechanism. A natural long-term direction is to build the truth graph directly while Geant4 tracks are being created and simulated, rather than reconstructing it afterwards from final CMS products. That would allow the graph to record multiple checkpoints along the propagation history and preserve information that is difficult to recover a posteriori.
+
+## `TruthLogicalGraphProducer`
+
+`TruthLogicalGraphProducer` builds a standalone `truth::Graph` from the raw `TruthGraph`.
+
+Default input tags are:
+
+```python
+src = cms.InputTag("truthGraphProducer")
+simTracks = cms.InputTag("g4SimHits")
+simVertices = cms.InputTag("g4SimHits")
+genEventHepMC3 = cms.InputTag("generatorSmeared")
+genEventHepMC = cms.InputTag("generatorSmeared")
+```
+
+The producer performs the following steps:
+
+1. Read and validate the raw `TruthGraph`.
+2. Load optional payload from HepMC2, HepMC3, `SimTrackContainer`, and `SimVertexContainer`.
+3. Assign temporary logical ids to raw particle and vertex nodes.
+4. Merge particle nodes across GEN and SIM when a robust association exists.
+5. Optionally merge GEN and SIM vertices.
+6. Optionally collapse intermediate GEN-only particle copies.
+7. Fill standalone `ParticleData` and `VertexData` payload.
+8. Rebuild the logical bipartite graph in CSR-like adjacency vectors.
+9. Validate the produced `truth::Graph`.
+
+The produced graph is independent of the raw graph for ordinary navigation, but it keeps optional back-references to raw node ids for debugging:
+
+```cpp
+ParticleData::genNode
+ParticleData::simNode
+VertexData::genNode
+VertexData::simNode
+```
+
+## Logical hit index: `truth::LogicalGraphHitIndex`
+
+`truth::LogicalGraphHitIndex` is an auxiliary data product that associates logical graph particles to calorimeter SimHits and, when possible, to RecHits.
+
+The key idea is that SimHits are directly associated to the particle that produced them, while subgraph hit collections are computed by aggregating over descendants.
+
+For each logical particle, the hit index can answer two different questions:
+
+1. Which SimHits were produced directly by this particle?
+2. Which SimHits were produced by this particle and by all particles in the subgraph below it?
+
+This is important because both views are useful:
+
+* direct hits preserve local detector contributions from one particle;
+* subgraph hits represent the full detector footprint of a shower, decay branch, or composite truth object.
+
+### Hit payload
+
+The hit index stores compact hit records:
+
+```cpp
+struct Hit {
+ uint32_t detId;
+ uint32_t recHitIndex;
+ float energy;
+};
+```
+
+The `detId` is the reco DetId used for matching to RecHits.
+
+The `recHitIndex` is the index in the global RecHit ordering produced by `SimHitToRecHitMapProducer`. If no RecHit was found for the SimHit DetId, it is set to:
+
+```cpp
+truth::LogicalGraphHitIndex::Hit::invalidRecHitIndex
+```
+
+The `energy` is the accumulated SimHit energy for that logical particle and DetId.
+
+### Direct and subgraph access
+
+The hit index exposes spans of hits per logical particle, for example:
+
+```cpp
+auto directHits = hitIndex.directHits(particleId);
+auto subgraphHits = hitIndex.subgraphHits(particleId);
+```
+
+The direct hits are attached only to the particle that directly produced them.
+
+The subgraph hits are accumulated from the particle and all its descendants in the logical graph.
+
+This makes merging two particles or two subgraphs straightforward: one can merge the corresponding hit spans by DetId or by RecHit index, depending on the intended matching metric.
+
+## `TruthLogicalGraphHitIndexProducer`
+
+`TruthLogicalGraphHitIndexProducer` builds `truth::LogicalGraphHitIndex`.
+
+It consumes:
+
+* the logical `truth::Graph`;
+* the raw `TruthGraph`;
+* selected `PCaloHit` collections;
+* an optional DetId to RecHit index map produced by `SimHitToRecHitMapProducer`.
+
+Typical configuration:
+
+```python
+process.truthLogicalGraphHitIndexProducer = cms.EDProducer(
+ "TruthLogicalGraphHitIndexProducer",
+
+ src = cms.InputTag("truthLogicalGraphProducer"),
+ rawSrc = cms.InputTag("truthGraphProducer"),
+
+ recHitMap = cms.InputTag("simHitToRecHitMapProducer"),
+
+ simHitCollections = cms.VInputTag(
+ cms.InputTag("g4SimHits", "HGCHitsEE", "SIM"),
+ cms.InputTag("g4SimHits", "HGCHitsHEfront", "SIM"),
+ cms.InputTag("g4SimHits", "HGCHitsHEback", "SIM"),
+ cms.InputTag("g4SimHits", "EcalHitsEB", "SIM"),
+ cms.InputTag("g4SimHits", "HcalHits", "SIM"),
+ ),
+
+ doHGCalRelabelling = cms.bool(False),
+)
+```
+
+The producer performs the following steps:
+
+1. Build a `SimTrack::trackId()` to logical particle id map.
+2. Read the configured `PCaloHit` collections.
+3. Convert SimHit DetIds to reco DetIds when requested.
+4. Look up the corresponding RecHit index through the DetId map, if available.
+5. Fill direct hits for the logical particle associated to the Geant track id.
+6. Propagate and merge hit collections upward to build subgraph hit collections.
+
+The hit index is intentionally separate from `truth::Graph`. This keeps the graph compact and allows detector-specific hit indices to evolve independently.
+
+## `SimHitToRecHitMapProducer`
+
+`SimHitToRecHitMapProducer` builds the DetId to RecHit index lookup consumed by `TruthLogicalGraphHitIndexProducer`.
+
+The produced type is:
+
+```cpp
+hgcal::DetIdRecHitMap
+```
+
+with the current alias defined in:
+
+```cpp
+SimCalorimetry/HGCalAssociatorProducers/interface/DetIdRecHitMap.h
+```
+
+Conceptually it is:
+
+```cpp
+std::unordered_map
+```
+
+mapping:
+
+```text
+reco DetId rawId -> global RecHit index
+```
+
+The global RecHit index is built by concatenating the configured RecHit collections in a deterministic order:
+
+1. all configured `HGCRecHitCollection` inputs;
+2. all configured `reco::PFRecHitCollection` inputs.
+
+Typical configuration for RECO step3 output:
+
+```python
+process.simHitToRecHitMapProducer = cms.EDProducer(
+ "SimHitToRecHitMapProducer",
+
+ hgcalRecHits = cms.VInputTag(
+ cms.InputTag("HGCalRecHit", "HGCEERecHits", "RECO"),
+ cms.InputTag("HGCalRecHit", "HGCHEFRecHits", "RECO"),
+ cms.InputTag("HGCalRecHit", "HGCHEBRecHits", "RECO"),
+ ),
+
+ pfRecHits = cms.VInputTag(
+ cms.InputTag("particleFlowRecHitECAL", "Cleaned", "RECO"),
+ cms.InputTag("particleFlowRecHitHBHE", "Cleaned", "RECO"),
+ cms.InputTag("particleFlowRecHitHF", "Cleaned", "RECO"),
+ cms.InputTag("particleFlowRecHitHO", "Cleaned", "RECO"),
+ ),
+)
+```
+
+Do not include both `HGCalRecHit` and `particleFlowRecHitHGC` unless the intended indexing and double-counting policy is explicit. In the current workflow, HGCAL RecHits are taken from `HGCalRecHit`, while barrel and forward PF RecHits are taken from the cleaned `particleFlowRecHit*` collections.
+
+The map needs a ROOT dictionary because it is an EDM product, even if it is not written to the output file.
+
+## Graph navigation examples
+
+### Iterate over particles
+
+```cpp
+auto const& graph = event.get(truthGraphToken_);
+
+for (auto particle : graph.particleViews()) {
+ if (!particle.valid())
+ continue;
+
+ const auto pdgId = particle.pdgId();
+ const auto p4 = particle.momentum();
+}
+```
+
+### Find particles from a Z boson
+
+```cpp
+for (auto particle : graph.particleViews()) {
+ if (particle.hasAncestorPdgId(23)) {
+ // This particle has a Z boson somewhere in its ancestor chain.
+ }
+}
+```
+
+### Find the first common ancestor of two particles
+
+```cpp
+auto p1 = graph.particle(firstId);
+auto p2 = graph.particle(secondId);
+
+auto common = p1.firstCommonAncestor(p2);
+if (common.has_value()) {
+ const int pdgId = common->pdgId();
+}
+```
+
+### Access production and decay vertices
+
+```cpp
+for (auto particle : graph.particleViews()) {
+ for (auto vertex : particle.productionVertices()) {
+ const auto x4 = vertex.position();
+ }
+
+ for (auto vertex : particle.decayVertices()) {
+ const auto x4 = vertex.position();
+ }
+}
+```
+
+### Navigate parent and child particles
+
+```cpp
+for (auto particle : graph.particleViews()) {
+ auto parents = particle.parents();
+ auto children = particle.children();
+}
+```
+
+### Access checkpoints
+
+```cpp
+for (auto particle : graph.particleViews()) {
+ for (auto const& checkpoint : particle.checkpoints()) {
+ const auto id = checkpoint.checkpointId;
+ const auto position = checkpoint.position;
+ const auto momentum = checkpoint.momentum;
+ }
+}
+```
+
+### Access direct and subgraph hits
+
+```cpp
+auto const& hitIndex = event.get(hitIndexToken_);
+
+for (uint32_t particleId = 0; particleId < hitIndex.nParticles(); ++particleId) {
+ auto directHits = hitIndex.directHits(particleId);
+ auto subgraphHits = hitIndex.subgraphHits(particleId);
+
+ float directEnergy = 0.f;
+ for (auto const& hit : directHits) {
+ directEnergy += hit.energy;
+ }
+
+ float subgraphEnergy = 0.f;
+ for (auto const& hit : subgraphHits) {
+ subgraphEnergy += hit.energy;
+ }
+}
+```
+
+## Dumping and visualization
+
+Two graph dumper modules are provided.
+
+### Raw graph dumper
+
+`TruthGraphDumper` writes a DOT representation of the raw graph.
+
+It includes enriched labels using HepMC and simulation products when available.
+
+Default output:
+
+```text
+truthgraph.dot
+```
+
+The dumper can be configured with:
+
+```python
+process.truthGraphDumper = cms.EDAnalyzer(
+ "TruthGraphDumper",
+ src = cms.InputTag("truthGraphProducer"),
+ dotFile = cms.string("truthgraph.dot"),
+ maxNodes = cms.uint32(5000),
+ maxEdgesPerNode = cms.uint32(200),
+
+ simTracks = cms.InputTag("g4SimHits"),
+ simVertices = cms.InputTag("g4SimHits"),
+ genEventHepMC = cms.InputTag("generatorSmeared"),
+ genEventHepMC3 = cms.InputTag("generatorSmeared"),
+)
+```
+
+### Logical graph dumper
+
+`TruthLogicalGraphDumper` writes a DOT representation of the logical graph.
+
+Default output:
+
+```text
+truthlogicalgraph.dot
+```
+
+The dumper can also use:
+
+* the raw `TruthGraph`, to enrich labels with raw node information;
+* the `LogicalGraphHitIndex`, to annotate particles with direct and subgraph SimHit summaries;
+* the RecHit collections, to compute RecHit energy summaries from `recHitIndex`.
+
+Example:
+
+```python
+process.truthLogicalGraphDumper = cms.EDAnalyzer(
+ "TruthLogicalGraphDumper",
+ src = cms.InputTag("truthLogicalGraphProducer"),
+ rawSrc = cms.InputTag("truthGraphProducer"),
+ hitIndex = cms.InputTag("truthLogicalGraphHitIndexProducer"),
+
+ hgcalRecHits = cms.VInputTag(
+ cms.InputTag("HGCalRecHit", "HGCEERecHits", "RECO"),
+ cms.InputTag("HGCalRecHit", "HGCHEFRecHits", "RECO"),
+ cms.InputTag("HGCalRecHit", "HGCHEBRecHits", "RECO"),
+ ),
+
+ pfRecHits = cms.VInputTag(
+ cms.InputTag("particleFlowRecHitECAL", "Cleaned", "RECO"),
+ cms.InputTag("particleFlowRecHitHBHE", "Cleaned", "RECO"),
+ cms.InputTag("particleFlowRecHitHF", "Cleaned", "RECO"),
+ cms.InputTag("particleFlowRecHitHO", "Cleaned", "RECO"),
+ ),
+
+ dotFile = cms.string("truthlogicalgraph.dot"),
+
+ maxParticles = cms.uint32(5000),
+ maxVertices = cms.uint32(5000),
+ maxEdgesPerNode = cms.uint32(200),
+
+ hideLargeSimSourceVertices = cms.bool(True),
+ largeSimSourceVertexMinOutgoing = cms.uint32(50),
+)
+```
+
+Particle labels include summaries such as:
+
+```text
+direct simHits: N simE=...
+direct recHits: N missing=... recoE=...
+subgraph simHits: N simE=...
+subgraph recHits: N missing=... recoE=...
+```
+
+DOT files can be converted with Graphviz, for example:
+
+```bash
+dot -Tsvg truthlogicalgraph_run1_lumi1_event1.dot -o truthlogicalgraph_run1_lumi1_event1.svg
+```
+
+To convert all DOT files in a directory:
+
+```bash
+for f in *.dot; do dot -Tsvg "$f" -o "${f%.dot}.svg"; done
+```
+
+To inspect whether hit information is present in the DOT output:
+
+```bash
+grep -n "simHits\|recHits\|simE\|recoE" truthlogicalgraph_run1_lumi1_event1.dot | head -80
+```
+
+## Example configuration on step3.root
+
+A typical standalone test configuration running on a `step3.root` file is:
+
+```python
+import FWCore.ParameterSet.Config as cms
+
+process = cms.Process("TRUTHGRAPH")
+
+process.load("FWCore.MessageService.MessageLogger_cfi")
+
+process.load("Configuration.Geometry.GeometryExtendedRun4D120Reco_cff")
+
+process.maxEvents = cms.untracked.PSet(
+ input = cms.untracked.int32(-1)
+)
+
+process.source = cms.Source(
+ "PoolSource",
+ fileNames = cms.untracked.vstring("file:step3.root")
+)
+
+process.options = cms.untracked.PSet(
+ wantSummary = cms.untracked.bool(True)
+)
+
+process.truthGraphProducer = cms.EDProducer(
+ "TruthGraphProducer",
+ genEventHepMC3 = cms.InputTag("generatorSmeared"),
+ genEventHepMC = cms.InputTag("generatorSmeared"),
+ simTracks = cms.InputTag("g4SimHits"),
+ simVertices = cms.InputTag("g4SimHits"),
+ addGenToSimEdges = cms.bool(True),
+)
+
+process.truthGraphDumper = cms.EDAnalyzer(
+ "TruthGraphDumper",
+ src = cms.InputTag("truthGraphProducer"),
+ dotFile = cms.string("truthgraph.dot"),
+ maxNodes = cms.uint32(20000),
+ maxEdgesPerNode = cms.uint32(50),
+ simTracks = cms.InputTag("g4SimHits"),
+ simVertices = cms.InputTag("g4SimHits"),
+ genEventHepMC = cms.InputTag("generatorSmeared"),
+ genEventHepMC3 = cms.InputTag("generatorSmeared"),
+)
+
+process.truthLogicalGraphProducer = cms.EDProducer(
+ "TruthLogicalGraphProducer",
+ src = cms.InputTag("truthGraphProducer"),
+ simTracks = cms.InputTag("g4SimHits"),
+ simVertices = cms.InputTag("g4SimHits"),
+ genEventHepMC3 = cms.InputTag("generatorSmeared"),
+ genEventHepMC = cms.InputTag("generatorSmeared"),
+
+ motherPdgId = cms.int32(0),
+ mergeGenSimVertices = cms.bool(True),
+ collapseIntermediateGenParticles = cms.bool(True),
+)
+
+process.simHitToRecHitMapProducer = cms.EDProducer(
+ "SimHitToRecHitMapProducer",
+
+ hgcalRecHits = cms.VInputTag(
+ cms.InputTag("HGCalRecHit", "HGCEERecHits", "RECO"),
+ cms.InputTag("HGCalRecHit", "HGCHEFRecHits", "RECO"),
+ cms.InputTag("HGCalRecHit", "HGCHEBRecHits", "RECO"),
+ ),
+
+ pfRecHits = cms.VInputTag(
+ cms.InputTag("particleFlowRecHitECAL", "Cleaned", "RECO"),
+ cms.InputTag("particleFlowRecHitHBHE", "Cleaned", "RECO"),
+ cms.InputTag("particleFlowRecHitHF", "Cleaned", "RECO"),
+ cms.InputTag("particleFlowRecHitHO", "Cleaned", "RECO"),
+ ),
+)
+
+process.truthLogicalGraphHitIndexProducer = cms.EDProducer(
+ "TruthLogicalGraphHitIndexProducer",
+
+ src = cms.InputTag("truthLogicalGraphProducer"),
+ rawSrc = cms.InputTag("truthGraphProducer"),
+
+ recHitMap = cms.InputTag("simHitToRecHitMapProducer"),
+
+ simHitCollections = cms.VInputTag(
+ cms.InputTag("g4SimHits", "HGCHitsEE", "SIM"),
+ cms.InputTag("g4SimHits", "HGCHitsHEfront", "SIM"),
+ cms.InputTag("g4SimHits", "HGCHitsHEback", "SIM"),
+ cms.InputTag("g4SimHits", "EcalHitsEB", "SIM"),
+ cms.InputTag("g4SimHits", "HcalHits", "SIM"),
+ ),
+
+ doHGCalRelabelling = cms.bool(False),
+)
+
+process.truthLogicalGraphDumper = cms.EDAnalyzer(
+ "TruthLogicalGraphDumper",
+ src = cms.InputTag("truthLogicalGraphProducer"),
+ rawSrc = cms.InputTag("truthGraphProducer"),
+ hitIndex = cms.InputTag("truthLogicalGraphHitIndexProducer"),
+
+ hgcalRecHits = cms.VInputTag(
+ cms.InputTag("HGCalRecHit", "HGCEERecHits", "RECO"),
+ cms.InputTag("HGCalRecHit", "HGCHEFRecHits", "RECO"),
+ cms.InputTag("HGCalRecHit", "HGCHEBRecHits", "RECO"),
+ ),
+
+ pfRecHits = cms.VInputTag(
+ cms.InputTag("particleFlowRecHitECAL", "Cleaned", "RECO"),
+ cms.InputTag("particleFlowRecHitHBHE", "Cleaned", "RECO"),
+ cms.InputTag("particleFlowRecHitHF", "Cleaned", "RECO"),
+ cms.InputTag("particleFlowRecHitHO", "Cleaned", "RECO"),
+ ),
+
+ dotFile = cms.string("truthlogicalgraph.dot"),
+
+ maxParticles = cms.uint32(20000),
+ maxVertices = cms.uint32(20000),
+ maxEdgesPerNode = cms.uint32(300),
+)
+
+process.MessageLogger.cerr.threshold = "INFO"
+process.MessageLogger.cerr.default = cms.untracked.PSet(
+ limit = cms.untracked.int32(0)
+)
+process.MessageLogger.cerr.TruthGraphProducer = cms.untracked.PSet(
+ limit = cms.untracked.int32(-1)
+)
+process.MessageLogger.cerr.TruthLogicalGraphProducer = cms.untracked.PSet(
+ limit = cms.untracked.int32(-1)
+)
+process.MessageLogger.cerr.TruthLogicalGraphHitIndexProducer = cms.untracked.PSet(
+ limit = cms.untracked.int32(-1)
+)
+process.MessageLogger.cerr.SimHitToRecHitMapProducer = cms.untracked.PSet(
+ limit = cms.untracked.int32(-1)
+)
+
+process.truthGraph_step = cms.Path(
+ process.truthGraphProducer +
+ process.truthGraphDumper +
+ process.truthLogicalGraphProducer +
+ process.simHitToRecHitMapProducer +
+ process.truthLogicalGraphHitIndexProducer +
+ process.truthLogicalGraphDumper
+)
+```
+
+## Event content checks
+
+Useful commands to inspect available products are:
+
+```bash
+edmDumpEventContent step3.root | grep -E 'PFRecHit|particleFlowRecHit'
+```
+
+and:
+
+```bash
+edmDumpEventContent step3.root | grep -E 'HGCRecHit|HGCalRecHit|HGCEERecHits|HGCHEFRecHits|HGCHEBRecHits'
+```
+
+For a typical Phase-2 RECO file, the useful collections are:
+
+```text
+vector "HGCalRecHit" "HGCEERecHits" "RECO"
+vector "HGCalRecHit" "HGCHEFRecHits" "RECO"
+vector "HGCalRecHit" "HGCHEBRecHits" "RECO"
+
+vector "particleFlowRecHitECAL" "Cleaned" "RECO"
+vector "particleFlowRecHitHBHE" "Cleaned" "RECO"
+vector "particleFlowRecHitHF" "Cleaned" "RECO"
+vector "particleFlowRecHitHO" "Cleaned" "RECO"
+```
+
+The relevant SimHit collections include:
+
+```text
+vector "g4SimHits" "HGCHitsEE" "SIM"
+vector "g4SimHits" "HGCHitsHEfront" "SIM"
+vector "g4SimHits" "HGCHitsHEback" "SIM"
+vector "g4SimHits" "EcalHitsEB" "SIM"
+vector "g4SimHits" "HcalHits" "SIM"
+vector "g4SimHits" "" "SIM"
+vector "g4SimHits" "" "SIM"
+```
+
+## Intended matching strategy
+
+The long-term matching model is to build detector-level associators from hits to `truth::Particle`.
+
+In this model:
+
+* the graph provides the particle and vertex structure;
+* detector-level truth association is performed through hits;
+* reconstructed objects can be associated either to a single `truth::Particle` or to a larger truth branch;
+* truth information can be aggregated over a coherent branch when a reconstructed object corresponds to a composite truth structure.
+
+Different reconstruction domains need different matching metrics:
+
+* tracking association is usually based on shared hits, not on hit energy;
+* calorimeter clustering association can use energy fractions or energy-weighted metrics;
+* timing objects may need time-aware matching;
+* other reconstruction objects may require detector-specific metrics.
+
+The graph should therefore act as the common truth substrate, while matching definitions remain detector-aware and use-case dependent.
+
+## Possible future `truth::Branch` abstraction
+
+A future `truth::Branch` abstraction is intended to represent a coherent subgraph selected from the full truth graph.
+
+This would provide a natural target for truth-reco association when a reconstructed object is not well described by a single truth particle.
+
+Possible branch-level operations include:
+
+* aggregate particles in a physically connected subgraph;
+* collect all detector hits attached to particles in the branch;
+* compute detector-specific matching scores;
+* compare two branches;
+* define stable references for composite truth structures;
+* merge two selected truth structures and their hit content.
+
+In this picture, `truth::Branch` avoids forcing an early collapse of the truth history into fixed reference objects.
+
+## Hits attached to particles
+
+The current prototype already implements the first version of hit attachment through `truth::LogicalGraphHitIndex`.
+
+This keeps hit information outside the main logical graph data product, while allowing the graph dumper and future associators to query:
+
+* direct SimHits from a particle;
+* subgraph SimHits from a particle and all descendants;
+* matched RecHit indices;
+* SimHit energy sums;
+* RecHit energy sums.
+
+A possible long-term direction is to generalize this further so that detector-specific truth objects such as `TrackingParticle`, `SimCluster`, and `CaloParticle` can be expressed as views or derived abstractions on top of the same graph and hit-index infrastructure.
+
+This would make it possible to:
+
+* use one common truth structure across subsystems;
+* preserve detector-specific information without fragmenting the truth model;
+* define multiple matching strategies on top of the same graph;
+* aggregate truth information dynamically over particles, vertices, or branches.
+
+## Current status
+
+The current prototype can:
+
+* build a raw `TruthGraph` from generator and simulation products;
+* build a logical `truth::Graph` from the raw graph;
+* merge matched generator and simulation particles;
+* optionally merge generator and simulation vertices;
+* optionally collapse intermediate GEN-only particle copies;
+* navigate particle and vertex relations;
+* compute parents, children, ancestors, descendants, roots, and leaves;
+* find ancestors with a given PDG id;
+* find a common ancestor between two particles;
+* store boundary-crossing checkpoints;
+* build a direct and subgraph calorimeter SimHit index per logical particle;
+* map SimHits to RecHit indices through DetId when a RecHit map is available;
+* dump raw and logical graphs to DOT for debugging;
+* annotate logical graph DOT nodes with SimHit and RecHit energy summaries.
+
+## Known limitations
+
+The current implementation is a prototype and several aspects are intentionally conservative.
+
+Known limitations include:
+
+* GEN-SIM association still needs broader validation;
+* the semantics of vertex merging require further study;
+* checkpoint information is currently limited to boundary-crossing information from `SimTrack`;
+* the hit index currently targets calorimeter `PCaloHit` inputs;
+* RecHit association is currently DetId-based and does not encode more detailed detector response information;
+* duplicate DetId handling depends on the configured RecHit input ordering and map policy;
+* `truth::Branch` is part of the target design but not yet implemented;
+* the logical API is still evolving;
+* the raw graph construction needs further validation on realistic events and pileup scenarios.
+
+## Next steps
+
+Planned or natural next steps are:
+
+1. Continue debugging the raw truth graph construction.
+2. Validate HepMC2 and HepMC3 behaviour on representative workflows.
+3. Refine GEN-SIM particle association.
+4. Study the vertex merging policy in realistic events.
+5. Extend the checkpoint model beyond boundary crossings.
+6. Investigate direct graph construction during Geant4 simulation.
+7. Extend hit indexing beyond calorimeter `PCaloHit` where appropriate.
+8. Prototype detector-specific truth-reco matching metrics on top of the hit index.
+9. Implement a `truth::Branch` abstraction.
+10. Study how existing detector-specific truth containers could be represented as views over the graph.
+11. Stabilize the logical API for downstream reconstruction and validation code.
+
+## Design principle
+
+The main design principle is to separate storage details from physics navigation.
+
+Low-level CMS products remain the source of truth for building the graph, but downstream code should be able to ask physics questions through a stable interface:
+
+```cpp
+particle.parents();
+particle.children();
+particle.ancestors();
+particle.firstCommonAncestor(other);
+particle.hasAncestorPdgId(23);
+hitIndex.directHits(particle.id());
+hitIndex.subgraphHits(particle.id());
+```
+
+rather than reimplementing event-history navigation and hit aggregation separately for each reconstruction or validation use case.
+
+
diff --git a/PhysicsTools/TruthInfo/interface/Graph.h b/PhysicsTools/TruthInfo/interface/Graph.h
new file mode 100644
index 0000000000000..3a58f5d530e4c
--- /dev/null
+++ b/PhysicsTools/TruthInfo/interface/Graph.h
@@ -0,0 +1,244 @@
+#ifndef PhysicsTools_TruthInfo_interface_Graph_h
+#define PhysicsTools_TruthInfo_interface_Graph_h
+
+#include
+#include
+#include
+#include
+
+#include "DataFormats/Math/interface/LorentzVector.h"
+
+namespace truth {
+
+ struct Checkpoint {
+ uint32_t checkpointId = 0;
+ math::XYZTLorentzVectorF position;
+ math::XYZTLorentzVectorF momentum;
+ };
+
+ struct ParticleData {
+ // Optional provenance/debug back-references to the raw TruthGraph nodes.
+ // -1 means "not available".
+ int32_t genNode = -1;
+ int32_t simNode = -1;
+
+ // Merged metadata.
+ int32_t pdgId = 0;
+ int16_t status = 0;
+
+ // Packed reco::GenStatusFlags bitfield, when available.
+ // 0 means "not available" or "no flags set".
+ uint16_t statusFlags = 0;
+
+ // SIM event id when available, 0 otherwise.
+ uint64_t eventId = 0;
+
+ // GEN connected component id from the raw TruthGraph, -1 if not applicable.
+ int32_t genEvent = -1;
+
+ // Standalone payload.
+ // Nominal physics four-momentum.
+ // For GEN+SIM particles, this is the GEN four-momentum.
+ // For SIM-only particles, this is the SimTrack four-momentum.
+ math::XYZTLorentzVectorD momentum;
+
+ // Optional trajectory checkpoints.
+ std::vector checkpoints;
+
+ [[nodiscard]] bool hasGen() const { return genNode >= 0; }
+ [[nodiscard]] bool hasSim() const { return simNode >= 0; }
+ [[nodiscard]] bool valid() const { return hasGen() || hasSim(); }
+ };
+
+ struct VertexData {
+ // Optional provenance/debug back-references to the raw TruthGraph nodes.
+ // -1 means "not available".
+ int32_t genNode = -1;
+ int32_t simNode = -1;
+
+ // SIM event id when available, 0 otherwise.
+ uint64_t eventId = 0;
+
+ // GEN connected component id from the raw TruthGraph, -1 if not applicable.
+ int32_t genEvent = -1;
+
+ // Standalone payload.
+ // Convention: "best available" position.
+ // Prefer SIM if present, otherwise GEN, otherwise default-constructed.
+ math::XYZTLorentzVectorD position;
+
+ [[nodiscard]] bool hasGen() const { return genNode >= 0; }
+ [[nodiscard]] bool hasSim() const { return simNode >= 0; }
+ [[nodiscard]] bool valid() const { return hasGen() || hasSim(); }
+ };
+
+ class Graph;
+ class Particle;
+ class Vertex;
+
+ class Particle {
+ public:
+ Particle() = default;
+ Particle(Graph const* graph, uint32_t id) : graph_(graph), id_(id) {}
+
+ [[nodiscard]] bool valid() const { return graph_ != nullptr; }
+ [[nodiscard]] uint32_t id() const { return id_; }
+
+ [[nodiscard]] const ParticleData& data() const;
+
+ [[nodiscard]] bool hasGen() const;
+ [[nodiscard]] bool hasSim() const;
+ [[nodiscard]] int32_t pdgId() const;
+ [[nodiscard]] int16_t status() const;
+ [[nodiscard]] uint16_t statusFlags() const;
+ [[nodiscard]] uint64_t eventId() const;
+ [[nodiscard]] int32_t genEvent() const;
+ [[nodiscard]] const math::XYZTLorentzVectorD& momentum() const;
+
+ [[nodiscard]] std::span checkpoints() const;
+ [[nodiscard]] bool hasCheckpoints() const;
+ [[nodiscard]] std::optional checkpoint(uint32_t checkpointId) const;
+
+ [[nodiscard]] bool isRoot() const;
+ [[nodiscard]] bool isLeaf() const;
+
+ [[nodiscard]] std::vector productionVertices() const;
+ [[nodiscard]] std::vector decayVertices() const;
+
+ [[nodiscard]] std::vector parents() const;
+ [[nodiscard]] std::vector children() const;
+
+ [[nodiscard]] std::vector ancestors() const;
+ [[nodiscard]] std::vector descendants() const;
+
+ [[nodiscard]] bool hasAncestorPdgId(int pdgId) const;
+ [[nodiscard]] std::optional firstAncestorWithPdgId(int pdgId) const;
+ [[nodiscard]] std::optional firstCommonAncestor(Particle other) const;
+
+ [[nodiscard]] bool operator==(Particle const& other) const { return graph_ == other.graph_ && id_ == other.id_; }
+ [[nodiscard]] bool operator!=(Particle const& other) const { return !(*this == other); }
+
+ private:
+ Graph const* graph_ = nullptr;
+ uint32_t id_ = 0;
+ };
+
+ class Vertex {
+ public:
+ Vertex() = default;
+ Vertex(Graph const* graph, uint32_t id) : graph_(graph), id_(id) {}
+
+ [[nodiscard]] bool valid() const { return graph_ != nullptr; }
+ [[nodiscard]] uint32_t id() const { return id_; }
+
+ [[nodiscard]] const VertexData& data() const;
+
+ [[nodiscard]] bool hasGen() const;
+ [[nodiscard]] bool hasSim() const;
+ [[nodiscard]] uint64_t eventId() const;
+ [[nodiscard]] int32_t genEvent() const;
+ [[nodiscard]] const math::XYZTLorentzVectorD& position() const;
+
+ [[nodiscard]] bool isSource() const;
+ [[nodiscard]] bool isSink() const;
+
+ [[nodiscard]] std::vector incomingParticles() const;
+ [[nodiscard]] std::vector outgoingParticles() const;
+
+ [[nodiscard]] bool operator==(Vertex const& other) const { return graph_ == other.graph_ && id_ == other.id_; }
+ [[nodiscard]] bool operator!=(Vertex const& other) const { return !(*this == other); }
+
+ private:
+ Graph const* graph_ = nullptr;
+ uint32_t id_ = 0;
+ };
+
+ class Graph {
+ public:
+ using size_type = uint32_t;
+
+ std::vector particles;
+ std::vector vertices;
+
+ // Particle -> decay vertices
+ std::vector particleToDecayVertexOffsets;
+ std::vector particleToDecayVertices;
+
+ // Particle -> production vertices
+ std::vector particleToProductionVertexOffsets;
+ std::vector particleToProductionVertices;
+
+ // Vertex -> outgoing particles
+ std::vector vertexToOutgoingParticleOffsets;
+ std::vector vertexToOutgoingParticles;
+
+ // Vertex -> incoming particles
+ std::vector vertexToIncomingParticleOffsets;
+ std::vector vertexToIncomingParticles;
+
+ [[nodiscard]] size_type nParticles() const { return static_cast(particles.size()); }
+ [[nodiscard]] size_type nVertices() const { return static_cast(vertices.size()); }
+
+ [[nodiscard]] bool empty() const { return particles.empty() && vertices.empty(); }
+
+ [[nodiscard]] Particle particle(size_type id) const;
+ [[nodiscard]] Vertex vertex(size_type id) const;
+
+ [[nodiscard]] std::vector particleViews() const;
+ [[nodiscard]] std::vector vertexViews() const;
+
+ [[nodiscard]] std::vector roots() const;
+ [[nodiscard]] std::vector leaves() const;
+
+ [[nodiscard]] std::vector sourceVertices() const;
+ [[nodiscard]] std::vector sinkVertices() const;
+
+ [[nodiscard]] std::span decayVertices(size_type particleId) const {
+ const auto b = particleToDecayVertexOffsets.at(particleId);
+ const auto e = particleToDecayVertexOffsets.at(particleId + 1);
+ return std::span(particleToDecayVertices.data() + b, e - b);
+ }
+
+ [[nodiscard]] std::span productionVertices(size_type particleId) const {
+ const auto b = particleToProductionVertexOffsets.at(particleId);
+ const auto e = particleToProductionVertexOffsets.at(particleId + 1);
+ return std::span(particleToProductionVertices.data() + b, e - b);
+ }
+
+ [[nodiscard]] std::span outgoingParticles(size_type vertexId) const {
+ const auto b = vertexToOutgoingParticleOffsets.at(vertexId);
+ const auto e = vertexToOutgoingParticleOffsets.at(vertexId + 1);
+ return std::span(vertexToOutgoingParticles.data() + b, e - b);
+ }
+
+ [[nodiscard]] std::span incomingParticles(size_type vertexId) const {
+ const auto b = vertexToIncomingParticleOffsets.at(vertexId);
+ const auto e = vertexToIncomingParticleOffsets.at(vertexId + 1);
+ return std::span(vertexToIncomingParticles.data() + b, e - b);
+ }
+
+ [[nodiscard]] bool isConsistent() const;
+
+ private:
+ friend class Particle;
+ friend class Vertex;
+
+ [[nodiscard]] std::vector productionVerticesOf(size_type particleId) const;
+ [[nodiscard]] std::vector decayVerticesOf(size_type particleId) const;
+
+ [[nodiscard]] std::vector parentsOf(size_type particleId) const;
+ [[nodiscard]] std::vector childrenOf(size_type particleId) const;
+
+ [[nodiscard]] std::vector ancestorsOf(size_type particleId) const;
+ [[nodiscard]] std::vector descendantsOf(size_type particleId) const;
+
+ [[nodiscard]] std::optional firstAncestorWithPdgIdOf(size_type particleId, int pdgId) const;
+ [[nodiscard]] std::optional firstCommonAncestorOf(size_type a, size_type b) const;
+
+ [[nodiscard]] std::vector incomingParticlesOf(size_type vertexId) const;
+ [[nodiscard]] std::vector outgoingParticlesOf(size_type vertexId) const;
+ };
+
+} // namespace truth
+
+#endif
diff --git a/PhysicsTools/TruthInfo/interface/LogicalGraphHitIndex.h b/PhysicsTools/TruthInfo/interface/LogicalGraphHitIndex.h
new file mode 100644
index 0000000000000..103f0088ca194
--- /dev/null
+++ b/PhysicsTools/TruthInfo/interface/LogicalGraphHitIndex.h
@@ -0,0 +1,68 @@
+#ifndef PhysicsTools_TruthInfo_LogicalGraphHitIndex_h
+#define PhysicsTools_TruthInfo_LogicalGraphHitIndex_h
+
+#include
+#include
+#include
+#include
+
+namespace truth {
+
+ class LogicalGraphHitIndex {
+ public:
+ struct Hit {
+ static constexpr uint32_t invalidRecHitIndex = std::numeric_limits::max();
+
+ uint32_t detId = 0;
+ uint32_t recHitIndex = invalidRecHitIndex;
+ float energy = 0.f;
+
+ [[nodiscard]] bool hasRecHit() const { return recHitIndex != invalidRecHitIndex; }
+ };
+
+ LogicalGraphHitIndex() = default;
+
+ LogicalGraphHitIndex(uint32_t nParticles,
+ std::vector directOffsets,
+ std::vector directHits,
+ std::vector subgraphOffsets,
+ std::vector subgraphHits)
+ : nParticles_(nParticles),
+ directOffsets_(std::move(directOffsets)),
+ directHits_(std::move(directHits)),
+ subgraphOffsets_(std::move(subgraphOffsets)),
+ subgraphHits_(std::move(subgraphHits)) {}
+
+ [[nodiscard]] uint32_t nParticles() const { return nParticles_; }
+
+ [[nodiscard]] std::span directHits(uint32_t particleId) const {
+ const auto b = directOffsets_.at(particleId);
+ const auto e = directOffsets_.at(particleId + 1);
+ return std::span(directHits_.data() + b, e - b);
+ }
+
+ [[nodiscard]] std::span subgraphHits(uint32_t particleId) const {
+ const auto b = subgraphOffsets_.at(particleId);
+ const auto e = subgraphOffsets_.at(particleId + 1);
+ return std::span(subgraphHits_.data() + b, e - b);
+ }
+
+ [[nodiscard]] const std::vector& directOffsets() const { return directOffsets_; }
+ [[nodiscard]] const std::vector& directHitStorage() const { return directHits_; }
+
+ [[nodiscard]] const std::vector& subgraphOffsets() const { return subgraphOffsets_; }
+ [[nodiscard]] const std::vector& subgraphHitStorage() const { return subgraphHits_; }
+
+ private:
+ uint32_t nParticles_ = 0;
+
+ std::vector directOffsets_;
+ std::vector directHits_;
+
+ std::vector subgraphOffsets_;
+ std::vector subgraphHits_;
+ };
+
+} // namespace truth
+
+#endif
diff --git a/PhysicsTools/TruthInfo/interface/LogicalGraphHitIndexBuilder.h b/PhysicsTools/TruthInfo/interface/LogicalGraphHitIndexBuilder.h
new file mode 100644
index 0000000000000..36cf3f647a89c
--- /dev/null
+++ b/PhysicsTools/TruthInfo/interface/LogicalGraphHitIndexBuilder.h
@@ -0,0 +1,50 @@
+#ifndef PhysicsTools_TruthInfo_LogicalGraphHitIndexBuilder_h
+#define PhysicsTools_TruthInfo_LogicalGraphHitIndexBuilder_h
+
+#include
+#include
+#include
+
+#include "PhysicsTools/TruthInfo/interface/LogicalGraphHitIndex.h"
+
+namespace truth {
+
+ class LogicalGraphHitIndexBuilder {
+ public:
+ explicit LogicalGraphHitIndexBuilder(uint32_t nParticles);
+
+ void setSimTrackForParticle(uint32_t particleId, uint32_t trackId);
+ void addParticleChild(uint32_t parentParticleId, uint32_t childParticleId);
+
+ void addHitForTrack(uint32_t trackId, uint32_t detId, uint32_t recHitIndex, float energy);
+
+ [[nodiscard]] LogicalGraphHitIndex finish();
+
+ private:
+ using Hit = LogicalGraphHitIndex::Hit;
+
+ struct HitAccumulator {
+ uint32_t recHitIndex = Hit::invalidRecHitIndex;
+ float energy = 0.f;
+ };
+
+ using HitMap = std::unordered_map;
+
+ static void addHit(HitMap& hits, uint32_t detId, uint32_t recHitIndex, float energy);
+ static void addHit(HitMap& hits, Hit const& hit);
+ static std::vector sortedHits(HitMap const& hits);
+
+ void fillSubgraphHits(uint32_t particleId, std::vector& state);
+
+ uint32_t nParticles_ = 0;
+
+ std::unordered_map trackIdToParticle_;
+ std::vector> children_;
+
+ std::vector directHits_;
+ std::vector subgraphHits_;
+ };
+
+} // namespace truth
+
+#endif
diff --git a/PhysicsTools/TruthInfo/interface/TruthGraph.h b/PhysicsTools/TruthInfo/interface/TruthGraph.h
new file mode 100644
index 0000000000000..ad5a1ec134da6
--- /dev/null
+++ b/PhysicsTools/TruthInfo/interface/TruthGraph.h
@@ -0,0 +1,100 @@
+// Author: Felice Pantaleo - CERN
+// Date: 03/2026
+// A compact, read-only graph representation of the truth information in an event.
+// The graph is built in the TruthGraphProducer module, which also fills the node metadata and associations.
+// The graph is intended to be a common data format for various use cases (e.g. validation, analysis, visualization).
+
+#ifndef PhysicsTools_TruthInfo_interface_TruthGraph_h
+#define PhysicsTools_TruthInfo_interface_TruthGraph_h
+
+#include
+#include
+#include
+
+class TruthGraph {
+public:
+ enum class NodeKind : uint8_t {
+ GenEvent = 0,
+ GenVertex = 1,
+ GenParticle = 2,
+ SimVertex = 3,
+ SimTrack = 4,
+ };
+
+ // Edge categories (for visualization / filtering)
+ enum class EdgeKind : uint8_t {
+ Gen = 0, // within GEN realm
+ Sim = 1, // within SIM realm
+ GenToSim = 2, // realm boundary GEN -> SIM
+ SimToGen = 3 // reserved (we don't produce these now)
+ };
+
+ struct NodeRef {
+ NodeKind kind = NodeKind::GenParticle;
+ int64_t key = 0; // GenParticle: index; SimTrack: trackId; SimVertex: index; GenVertex: barcode/index
+ };
+
+ TruthGraph() = default;
+
+ // CSR out-edges: offsets.size() == nNodes+1
+ // edges.size() == nEdges
+ // edgeKind.size() == nEdges
+ std::vector offsets;
+ std::vector edges;
+ std::vector edgeKind; // stores TruthGraph::EdgeKind as uint8_t
+
+ // Node metadata: nodes.size() == nNodes
+ std::vector nodes;
+
+ // Cached payload (optional)
+ std::vector pdgId; // 0 if not applicable
+ std::vector status; // 0 if not applicable
+ std::vector statusFlags; // packed reco::GenStatusFlags, 0 if not available
+ // Packed EncodedEventId for SIM nodes; 0 for GEN nodes
+ std::vector eventId;
+
+ std::vector genEventOfNode; // -1 for SIM; for GEN nodes = component id
+
+ // Associations (nodeId -> nodeId). Only meaningful for SimTrack nodes.
+ // -1 means "no association".
+ std::vector simTrackToGen; // SimTrack nodeId -> GenParticle nodeId
+ std::vector simTrackToVtx; // SimTrack nodeId -> SimVertex nodeId
+
+ uint32_t nNodes() const { return static_cast(nodes.size()); }
+ uint32_t nEdges() const { return static_cast(edges.size()); }
+
+ uint32_t edgeBegin(uint32_t nodeId) const { return offsets.at(nodeId); }
+ uint32_t edgeEnd(uint32_t nodeId) const { return offsets.at(nodeId + 1); }
+
+ std::span children(uint32_t nodeId) const {
+ const auto b = edgeBegin(nodeId);
+ const auto e = edgeEnd(nodeId);
+ return std::span(edges.data() + b, e - b);
+ }
+
+ std::span childrenEdgeKinds(uint32_t nodeId) const {
+ const auto b = edgeBegin(nodeId);
+ const auto e = edgeEnd(nodeId);
+ return std::span(edgeKind.data() + b, e - b);
+ }
+
+ const NodeRef& nodeRef(uint32_t nodeId) const { return nodes.at(nodeId); }
+
+ int32_t nodePdgId(uint32_t nodeId) const { return (nodeId < pdgId.size()) ? pdgId[nodeId] : 0; }
+
+ int16_t nodeStatus(uint32_t nodeId) const { return (nodeId < status.size()) ? status[nodeId] : 0; }
+ uint16_t nodeStatusFlags(uint32_t nodeId) const { return (nodeId < statusFlags.size()) ? statusFlags[nodeId] : 0; }
+ uint64_t nodeEventId(uint32_t nodeId) const { return (nodeId < eventId.size()) ? eventId[nodeId] : 0ull; }
+
+ int32_t nodeSimTrackToGen(uint32_t nodeId) const {
+ return (nodeId < simTrackToGen.size()) ? simTrackToGen[nodeId] : -1;
+ }
+
+ int32_t nodeSimTrackToVtx(uint32_t nodeId) const {
+ return (nodeId < simTrackToVtx.size()) ? simTrackToVtx[nodeId] : -1;
+ }
+
+ bool isConsistent() const;
+};
+
+#endif
diff --git a/PhysicsTools/TruthInfo/interface/TruthLogicalGraphPostProcessor.h b/PhysicsTools/TruthInfo/interface/TruthLogicalGraphPostProcessor.h
new file mode 100644
index 0000000000000..601d60b245256
--- /dev/null
+++ b/PhysicsTools/TruthInfo/interface/TruthLogicalGraphPostProcessor.h
@@ -0,0 +1,51 @@
+#ifndef PhysicsTools_TruthInfo_interface_TruthLogicalGraphPostProcessor_h
+#define PhysicsTools_TruthInfo_interface_TruthLogicalGraphPostProcessor_h
+
+#include
+#include
+
+#include "FWCore/ParameterSet/interface/ParameterSet.h"
+#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
+
+#include "PhysicsTools/TruthInfo/interface/Graph.h"
+
+namespace truth {
+
+ struct LogicalGraphPostProcessingConfig {
+ bool collapseIntermediateGenParticles = true;
+
+ // If empty, no seed-based graph cut is applied.
+ std::vector seedPdgIds;
+
+ // For each selected seed particle, keep this many generations of parents
+ // above the seed before keeping the full downstream graph.
+ uint32_t seedParentDepth = 0;
+
+ // Particles with these exact PDG ids are removed from the final logical graph.
+ // If such a particle is internal, its production and decay vertices are merged
+ // so that the graph remains navigable.
+ std::vector ignoredPdgIds;
+
+ // Exact logical particle ids to remove from the final logical graph.
+ // These ids refer to the graph state at the moment the ignored-particle
+ // collapsing step is applied.
+ std::vector ignoredParticleIds;
+ };
+
+ class TruthLogicalGraphPostProcessor {
+ public:
+ TruthLogicalGraphPostProcessor() = default;
+ explicit TruthLogicalGraphPostProcessor(LogicalGraphPostProcessingConfig config);
+
+ static edm::ParameterSetDescription psetDescription();
+ static LogicalGraphPostProcessingConfig configFromPSet(edm::ParameterSet const& pset);
+
+ [[nodiscard]] Graph process(Graph input) const;
+
+ private:
+ LogicalGraphPostProcessingConfig config_;
+ };
+
+} // namespace truth
+
+#endif
diff --git a/PhysicsTools/TruthInfo/plugins/BuildFile.xml b/PhysicsTools/TruthInfo/plugins/BuildFile.xml
new file mode 100644
index 0000000000000..8e4288cc9aedc
--- /dev/null
+++ b/PhysicsTools/TruthInfo/plugins/BuildFile.xml
@@ -0,0 +1,52 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/PhysicsTools/TruthInfo/plugins/LogicalGraphHitIndexProducer.cc b/PhysicsTools/TruthInfo/plugins/LogicalGraphHitIndexProducer.cc
new file mode 100644
index 0000000000000..831be2a45d06b
--- /dev/null
+++ b/PhysicsTools/TruthInfo/plugins/LogicalGraphHitIndexProducer.cc
@@ -0,0 +1,366 @@
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "FWCore/Framework/interface/Event.h"
+#include "FWCore/Framework/interface/EventSetup.h"
+#include "FWCore/Framework/interface/MakerMacros.h"
+#include "FWCore/Framework/interface/global/EDProducer.h"
+#include "FWCore/MessageLogger/interface/MessageLogger.h"
+#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
+#include "FWCore/ParameterSet/interface/ParameterSet.h"
+#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
+#include "FWCore/Utilities/interface/EDGetToken.h"
+#include "FWCore/Utilities/interface/InputTag.h"
+
+#include "DataFormats/DetId/interface/DetId.h"
+#include "DataFormats/ForwardDetId/interface/HGCalDetId.h"
+#include "Geometry/CaloGeometry/interface/CaloGeometry.h"
+#include "Geometry/HGCalGeometry/interface/HGCalGeometry.h"
+#include "Geometry/HcalCommonData/interface/HcalHitRelabeller.h"
+#include "Geometry/HcalTowerAlgo/interface/HcalGeometry.h"
+#include "Geometry/Records/interface/CaloGeometryRecord.h"
+#include "SimDataFormats/CaloHit/interface/PCaloHit.h"
+#include "SimDataFormats/CaloTest/interface/HGCalTestNumbering.h"
+
+#include "PhysicsTools/TruthInfo/interface/Graph.h"
+#include "PhysicsTools/TruthInfo/interface/LogicalGraphHitIndex.h"
+#include "PhysicsTools/TruthInfo/interface/LogicalGraphHitIndexBuilder.h"
+#include "PhysicsTools/TruthInfo/interface/TruthGraph.h"
+
+#include "SimCalorimetry/HGCalAssociatorProducers/interface/DetIdRecHitMap.h"
+
+namespace {
+
+ struct LogicalGraphView {
+ explicit LogicalGraphView(truth::Graph const& graph) : graph_(graph) {}
+
+ uint32_t nParticles() const { return graph_.nParticles(); }
+
+ bool particleHasSim(uint32_t particleId) const {
+ return particleId < graph_.particles.size() && graph_.particles[particleId].hasSim();
+ }
+
+ int32_t particleSimNode(uint32_t particleId) const { return graph_.particles[particleId].simNode; }
+
+ template
+ void forEachParticleChild(uint32_t parentParticleId, F&& f) const {
+ if (parentParticleId >= graph_.nParticles())
+ return;
+
+ for (const uint32_t vertexId : graph_.decayVertices(parentParticleId)) {
+ if (vertexId >= graph_.nVertices())
+ continue;
+
+ for (const uint32_t childId : graph_.outgoingParticles(vertexId)) {
+ f(childId);
+ }
+ }
+ }
+
+ truth::Graph const& graph_;
+ };
+
+ uint32_t checkedTrackId(int64_t key) {
+ if (key < 0 || key > static_cast(std::numeric_limits::max()))
+ return 0;
+
+ return static_cast(key);
+ }
+
+ bool inputTagLooksLikeHGCal(edm::InputTag const& tag) {
+ const std::string& instance = tag.instance();
+ return instance.find("HGCHits") != std::string::npos || instance.find("HGCEE") != std::string::npos ||
+ instance.find("HGCHE") != std::string::npos;
+ }
+
+ bool inputTagLooksLikeHcal(edm::InputTag const& tag) {
+ const std::string& instance = tag.instance();
+ return instance.find("HcalHits") != std::string::npos || instance.find("Hcal") != std::string::npos;
+ }
+
+ struct RelabelContext {
+ int geometryType = -1;
+
+ std::array hgTopologies = {nullptr, nullptr, nullptr};
+ std::array hgConstants = {nullptr, nullptr, nullptr};
+
+ HcalDDDRecConstants const* hcalConstants = nullptr;
+ };
+
+} // namespace
+
+class TruthLogicalGraphHitIndexProducer : public edm::global::EDProducer<> {
+public:
+ explicit TruthLogicalGraphHitIndexProducer(edm::ParameterSet const& cfg);
+ ~TruthLogicalGraphHitIndexProducer() override = default;
+
+ static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
+
+private:
+ void produce(edm::StreamID, edm::Event&, edm::EventSetup const&) const override;
+
+ void fillTrackToParticleMap(LogicalGraphView const& graph,
+ TruthGraph const& rawGraph,
+ truth::LogicalGraphHitIndexBuilder& builder) const;
+
+ void fillSimHits(edm::Event& event,
+ edm::EventSetup const& setup,
+ truth::LogicalGraphHitIndexBuilder& builder,
+ hgcal::DetIdRecHitMap const* recHitMap) const;
+
+ RelabelContext makeRelabelContext(edm::EventSetup const& setup) const;
+
+ uint32_t recoDetIdForSimHit(PCaloHit const& simHit,
+ bool isHGCalCollection,
+ bool isHcalCollection,
+ RelabelContext const& context) const;
+
+ edm::EDGetTokenT graphToken_;
+ edm::EDGetTokenT rawGraphToken_;
+ edm::EDGetTokenT recHitMapToken_;
+
+ std::vector simHitTags_;
+ std::vector>> simHitTokens_;
+
+ edm::ESGetToken geomToken_;
+
+ bool doHGCalRelabelling_ = true;
+};
+
+TruthLogicalGraphHitIndexProducer::TruthLogicalGraphHitIndexProducer(edm::ParameterSet const& cfg)
+ : graphToken_(consumes(cfg.getParameter("src"))),
+ rawGraphToken_(consumes(cfg.getParameter("rawSrc"))),
+ recHitMapToken_(consumes(cfg.getParameter("recHitMap"))),
+ simHitTags_(cfg.getParameter>("simHitCollections")),
+ geomToken_(esConsumes()),
+ doHGCalRelabelling_(cfg.getParameter("doHGCalRelabelling")) {
+ simHitTokens_.reserve(simHitTags_.size());
+ for (auto const& tag : simHitTags_) {
+ simHitTokens_.push_back(consumes>(tag));
+ }
+
+ produces();
+}
+
+void TruthLogicalGraphHitIndexProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
+ edm::ParameterSetDescription desc;
+
+ desc.add("src", edm::InputTag("truthLogicalGraphProducer"));
+ desc.add("rawSrc", edm::InputTag("truthGraphProducer"));
+ desc.add("recHitMap", edm::InputTag("simHitToRecHitMapProducer"));
+
+ desc.add>("simHitCollections",
+ {edm::InputTag("g4SimHits", "HGCHitsEE"),
+ edm::InputTag("g4SimHits", "HGCHitsHEfront"),
+ edm::InputTag("g4SimHits", "HGCHitsHEback")});
+
+ desc.add("doHGCalRelabelling", true)
+ ->setComment("Convert old HGCAL simulation DetIds to reco DetIds before looking up recHits");
+
+ descriptions.addWithDefaultLabel(desc);
+}
+
+void TruthLogicalGraphHitIndexProducer::produce(edm::StreamID, edm::Event& event, edm::EventSetup const& setup) const {
+ auto const& graph = event.get(graphToken_);
+ auto const& rawGraph = event.get(rawGraphToken_);
+
+ edm::Handle hRecHitMap;
+ event.getByToken(recHitMapToken_, hRecHitMap);
+ auto const* recHitMap = hRecHitMap.isValid() ? &(*hRecHitMap) : nullptr;
+
+ LogicalGraphView graphView(graph);
+
+ truth::LogicalGraphHitIndexBuilder builder(graphView.nParticles());
+
+ fillTrackToParticleMap(graphView, rawGraph, builder);
+ fillSimHits(event, setup, builder, recHitMap);
+
+ auto output = std::make_unique(builder.finish());
+ event.put(std::move(output));
+}
+
+void TruthLogicalGraphHitIndexProducer::fillTrackToParticleMap(LogicalGraphView const& graph,
+ TruthGraph const& rawGraph,
+ truth::LogicalGraphHitIndexBuilder& builder) const {
+ for (uint32_t particleId = 0; particleId < graph.nParticles(); ++particleId) {
+ if (!graph.particleHasSim(particleId))
+ continue;
+
+ const int32_t simNode = graph.particleSimNode(particleId);
+ if (simNode < 0)
+ continue;
+
+ const uint32_t simNodeU32 = static_cast(simNode);
+ if (simNodeU32 >= rawGraph.nNodes())
+ continue;
+
+ auto const& ref = rawGraph.nodeRef(simNodeU32);
+ if (ref.kind != TruthGraph::NodeKind::SimTrack)
+ continue;
+
+ const uint32_t trackId = checkedTrackId(ref.key);
+ if (trackId == 0)
+ continue;
+
+ builder.setSimTrackForParticle(particleId, trackId);
+ }
+
+ for (uint32_t parentId = 0; parentId < graph.nParticles(); ++parentId) {
+ graph.forEachParticleChild(parentId, [&](uint32_t childId) { builder.addParticleChild(parentId, childId); });
+ }
+}
+
+RelabelContext TruthLogicalGraphHitIndexProducer::makeRelabelContext(edm::EventSetup const& setup) const {
+ RelabelContext context;
+
+ if (!doHGCalRelabelling_)
+ return context;
+
+ auto const& geom = setup.getData(geomToken_);
+
+ auto const* hcalGeometry = static_cast(geom.getSubdetectorGeometry(DetId::Hcal, HcalEndcap));
+ if (hcalGeometry != nullptr) {
+ context.hcalConstants = hcalGeometry->topology().dddConstants();
+ }
+
+ auto const* eeGeometry =
+ static_cast(geom.getSubdetectorGeometry(DetId::HGCalEE, ForwardSubdetector::ForwardEmpty));
+
+ if (eeGeometry != nullptr) {
+ context.geometryType = 1;
+
+ auto const* fhGeometry = static_cast(
+ geom.getSubdetectorGeometry(DetId::HGCalHSi, ForwardSubdetector::ForwardEmpty));
+ auto const* bhGeometry = static_cast(
+ geom.getSubdetectorGeometry(DetId::HGCalHSc, ForwardSubdetector::ForwardEmpty));
+
+ context.hgTopologies[0] = &eeGeometry->topology();
+ context.hgTopologies[1] = fhGeometry != nullptr ? &fhGeometry->topology() : nullptr;
+ context.hgTopologies[2] = bhGeometry != nullptr ? &bhGeometry->topology() : nullptr;
+
+ for (unsigned i = 0; i < context.hgTopologies.size(); ++i) {
+ if (context.hgTopologies[i] != nullptr)
+ context.hgConstants[i] = &context.hgTopologies[i]->dddConstants();
+ }
+
+ return context;
+ }
+
+ context.geometryType = 0;
+
+ eeGeometry = static_cast(geom.getSubdetectorGeometry(DetId::Forward, HGCEE));
+ auto const* fhGeometry = static_cast(geom.getSubdetectorGeometry(DetId::Forward, HGCHEF));
+
+ context.hgTopologies[0] = eeGeometry != nullptr ? &eeGeometry->topology() : nullptr;
+ context.hgTopologies[1] = fhGeometry != nullptr ? &fhGeometry->topology() : nullptr;
+
+ for (unsigned i = 0; i < context.hgTopologies.size(); ++i) {
+ if (context.hgTopologies[i] != nullptr)
+ context.hgConstants[i] = &context.hgTopologies[i]->dddConstants();
+ }
+
+ return context;
+}
+
+uint32_t TruthLogicalGraphHitIndexProducer::recoDetIdForSimHit(PCaloHit const& simHit,
+ bool isHGCalCollection,
+ bool isHcalCollection,
+ RelabelContext const& context) const {
+ const uint32_t simId = simHit.id();
+
+ if (!doHGCalRelabelling_) {
+ return simId;
+ }
+
+ if (isHGCalCollection) {
+ if (context.geometryType == 1) {
+ return simId;
+ }
+
+ int subdet = 0;
+ int layer = 0;
+ int cell = 0;
+ int sec = 0;
+ int subsec = 0;
+ int zp = 0;
+
+ HGCalTestNumbering::unpackHexagonIndex(simId, subdet, zp, layer, sec, subsec, cell);
+
+ const int hgcalIndex = subdet - 3;
+ if (hgcalIndex < 0 || hgcalIndex >= static_cast(context.hgConstants.size()))
+ return 0;
+
+ auto const* constants = context.hgConstants[hgcalIndex];
+ auto const* topology = context.hgTopologies[hgcalIndex];
+
+ if (constants == nullptr || topology == nullptr)
+ return 0;
+
+ const auto recoLayerCell = constants->simToReco(cell, layer, sec, topology->detectorType());
+ cell = recoLayerCell.first;
+ layer = recoLayerCell.second;
+
+ if (layer < 0)
+ return 0;
+
+ return HGCalDetId(static_cast(subdet), zp, layer, subsec, sec, cell).rawId();
+ }
+
+ if (isHcalCollection && context.hcalConstants != nullptr) {
+ return HcalHitRelabeller::relabel(simId, context.hcalConstants).rawId();
+ }
+
+ return simId;
+}
+
+void TruthLogicalGraphHitIndexProducer::fillSimHits(edm::Event& event,
+ edm::EventSetup const& setup,
+ truth::LogicalGraphHitIndexBuilder& builder,
+ hgcal::DetIdRecHitMap const* recHitMap) const {
+ const RelabelContext relabelContext = makeRelabelContext(setup);
+
+ for (uint32_t tokenIndex = 0; tokenIndex < simHitTokens_.size(); ++tokenIndex) {
+ auto const& token = simHitTokens_[tokenIndex];
+ auto const& tag = simHitTags_[tokenIndex];
+
+ edm::Handle> hSimHits;
+ event.getByToken(token, hSimHits);
+
+ if (!hSimHits.isValid()) {
+ edm::LogWarning("TruthLogicalGraphHitIndexProducer")
+ << "Missing PCaloHit collection " << tag.encode() << ". Skipping it.";
+ continue;
+ }
+
+ const bool isHGCalCollection = inputTagLooksLikeHGCal(tag);
+ const bool isHcalCollection = inputTagLooksLikeHcal(tag);
+
+ for (auto const& simHit : *hSimHits) {
+ const int geantTrackId = simHit.geantTrackId();
+ if (geantTrackId <= 0)
+ continue;
+
+ const uint32_t detId = recoDetIdForSimHit(simHit, isHGCalCollection, isHcalCollection, relabelContext);
+ if (detId == 0)
+ continue;
+
+ uint32_t recHitIndex = truth::LogicalGraphHitIndex::Hit::invalidRecHitIndex;
+
+ if (recHitMap != nullptr) {
+ const auto it = recHitMap->find(detId);
+ if (it != recHitMap->end()) {
+ recHitIndex = it->second;
+ }
+ }
+
+ builder.addHitForTrack(static_cast(geantTrackId), detId, recHitIndex, simHit.energy());
+ }
+ }
+}
+
+DEFINE_FWK_MODULE(TruthLogicalGraphHitIndexProducer);
diff --git a/PhysicsTools/TruthInfo/plugins/RecHitFlatTableProducer.cc b/PhysicsTools/TruthInfo/plugins/RecHitFlatTableProducer.cc
new file mode 100644
index 0000000000000..c46a8c30844ba
--- /dev/null
+++ b/PhysicsTools/TruthInfo/plugins/RecHitFlatTableProducer.cc
@@ -0,0 +1,99 @@
+#include "FWCore/Framework/interface/stream/EDProducer.h"
+#include "FWCore/Framework/interface/Event.h"
+#include "FWCore/Framework/interface/stream/moduleAbilities.h"
+#include "FWCore/Utilities/interface/ESGetToken.h"
+#include "FWCore/Utilities/interface/transform.h"
+#include "FWCore/ParameterSet/interface/ParameterSet.h"
+#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
+#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
+#include "DataFormats/NanoAOD/interface/FlatTable.h"
+
+#include "RecoLocalCalo/HGCalRecAlgos/interface/RecHitTools.h"
+#include "DataFormats/HGCRecHit/interface/HGCRecHitCollections.h"
+
+#include
+#include
+
+class RecHitFlatTableProducer : public edm::stream::EDProducer {
+public:
+ RecHitFlatTableProducer(edm::ParameterSet const& params)
+ : objName_(params.getParameter("objName")),
+ rechits_tokens_{
+ edm::vector_transform(params.getParameter>("label_rechits"),
+ [this](const edm::InputTag& lab) { return consumes(lab); })},
+ caloGeometry_token_(esConsumes()) {
+ produces();
+ }
+
+ ~RecHitFlatTableProducer() override {}
+
+ void produce(edm::Event& event, edm::EventSetup const& iSetup) override {
+ std::vector rechit_ID;
+ std::vector rechit_energy;
+ std::vector rechit_x;
+ std::vector rechit_y;
+ std::vector rechit_z;
+ std::vector rechit_time;
+ std::vector rechit_radius;
+ std::vector rechit_simEnergy;
+ std::vector rechit_simEnergyEM;
+ std::vector rechit_simEnergyHad;
+
+ for (auto const& rh_token : rechits_tokens_) {
+ edm::Handle rechit_handle;
+ event.getByToken(rh_token, rechit_handle);
+ const auto& rhColl = *rechit_handle;
+ for (auto const& rh : rhColl) {
+ rechit_energy.push_back(rh.energy());
+ auto const rhPosition = rhtools_.getPosition(rh.detid());
+ rechit_x.push_back(rhPosition.x());
+ rechit_y.push_back(rhPosition.y());
+ rechit_z.push_back(rhPosition.z());
+ rechit_ID.push_back(rh.detid().rawId());
+ rechit_time.push_back(rh.time());
+ rechit_radius.push_back(rhtools_.getRadiusToSide(rh.detid()));
+ // const auto hitId = hitMap->find(DetId(rh.detid()));
+ // if (hitId != hitMap->end()) {
+ // rechit_simEnergy.push_back(hitIdToEnergies[hitId->second].energy);
+ // rechit_simEnergyEM.push_back(hitIdToEnergies[hitId->second].energyEM);
+ // rechit_simEnergyHad.push_back(hitIdToEnergies[hitId->second].energyHad);
+ // }
+ }
+ }
+
+ auto tab = std::make_unique(rechit_ID.size(), objName_, false, false);
+ tab->addColumn("rechit_ID", rechit_ID, "Rechit ID");
+ tab->addColumn("rechit_x", rechit_x, "Rechit X from rechittools");
+ tab->addColumn("rechit_y", rechit_y, "Rechit Y from rechittools");
+ tab->addColumn("rechit_z", rechit_z, "Rechit Z from rechittools");
+ tab->addColumn("rechit_radius", rechit_radius, "Rechit radius to side from rechittools");
+
+ event.put(std::move(tab));
+ }
+
+ void beginRun(edm::Run const&, edm::EventSetup const& es) override {
+ edm::ESHandle geom = es.getHandle(caloGeometry_token_);
+ rhtools_.setGeometry(*geom);
+ }
+
+ static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
+ edm::ParameterSetDescription desc;
+ desc.add("objName", "rechits")->setComment("name of the nanoaod::FlatTable to extend with this table");
+ desc.add>("label_rechits",
+ {edm::InputTag("HGCalRecHit", "HGCEERecHits"),
+ edm::InputTag("HGCalRecHit", "HGCHEFRecHits"),
+ edm::InputTag("HGCalRecHit", "HGCHEBRecHits")});
+ descriptions.add("recHitTable", desc);
+ }
+
+protected:
+ const std::string objName_;
+ // const edm::EDGetTokenT> src_;
+ const std::vector> rechits_tokens_;
+
+ edm::ESGetToken caloGeometry_token_;
+ hgcal::RecHitTools rhtools_;
+};
+
+#include "FWCore/Framework/interface/MakerMacros.h"
+DEFINE_FWK_MODULE(RecHitFlatTableProducer);
diff --git a/PhysicsTools/TruthInfo/plugins/TruthGraphDumper.cc b/PhysicsTools/TruthInfo/plugins/TruthGraphDumper.cc
new file mode 100644
index 0000000000000..5d95788119b05
--- /dev/null
+++ b/PhysicsTools/TruthInfo/plugins/TruthGraphDumper.cc
@@ -0,0 +1,613 @@
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "FWCore/Framework/interface/one/EDAnalyzer.h"
+#include "FWCore/Framework/interface/Event.h"
+#include "FWCore/Framework/interface/EventSetup.h"
+#include "FWCore/Framework/interface/MakerMacros.h"
+#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
+#include "FWCore/ParameterSet/interface/ParameterSet.h"
+#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
+#include "FWCore/Utilities/interface/EDGetToken.h"
+#include "FWCore/Utilities/interface/InputTag.h"
+
+#include "SimDataFormats/Track/interface/SimTrackContainer.h"
+#include "SimDataFormats/Vertex/interface/SimVertexContainer.h"
+
+#include "SimDataFormats/GeneratorProducts/interface/HepMCProduct.h"
+#include "HepMC/GenEvent.h"
+#include "HepMC/GenParticle.h"
+#include "HepMC/GenVertex.h"
+
+#include "SimDataFormats/GeneratorProducts/interface/HepMC3Product.h"
+#include "HepMC3/GenEvent.h"
+#include "HepMC3/GenParticle.h"
+#include "HepMC3/GenVertex.h"
+
+#include "PhysicsTools/TruthInfo/interface/TruthGraph.h"
+
+namespace {
+
+ // --- PDG naming (UTF-8)
+ std::string pdgNameUtf8(int pdgId) {
+ const int ap = std::abs(pdgId);
+
+ if (pdgId == 11)
+ return "e⁻";
+ if (pdgId == -11)
+ return "e⁺";
+ if (pdgId == 13)
+ return "μ⁻";
+ if (pdgId == -13)
+ return "μ⁺";
+ if (pdgId == 15)
+ return "τ⁻";
+ if (pdgId == -15)
+ return "τ⁺";
+
+ if (pdgId == 12)
+ return "νₑ";
+ if (pdgId == -12)
+ return "ν̄ₑ";
+ if (pdgId == 14)
+ return "ν_μ";
+ if (pdgId == -14)
+ return "ν̄_μ";
+ if (pdgId == 16)
+ return "ν_τ";
+ if (pdgId == -16)
+ return "ν̄_τ";
+
+ if (pdgId == 22)
+ return "γ";
+ if (pdgId == 21)
+ return "g";
+ if (pdgId == 23)
+ return "Z⁰";
+ if (pdgId == 24)
+ return "W⁺";
+ if (pdgId == -24)
+ return "W⁻";
+ if (pdgId == 25)
+ return "H";
+
+ if (pdgId == 2212)
+ return "p";
+ if (pdgId == -2212)
+ return "p̄";
+ if (pdgId == 2112)
+ return "n";
+ if (pdgId == -2112)
+ return "n̄";
+
+ if (pdgId == 111)
+ return "π⁰";
+ if (pdgId == 211)
+ return "π⁺";
+ if (pdgId == -211)
+ return "π⁻";
+ if (pdgId == 321)
+ return "K⁺";
+ if (pdgId == -321)
+ return "K⁻";
+ if (pdgId == 130)
+ return "K⁰_L";
+ if (pdgId == 310)
+ return "K⁰_S";
+
+ if (ap >= 1 && ap <= 6) {
+ static const char* qname[7] = {"", "d", "u", "s", "c", "b", "t"};
+ std::string s = qname[ap];
+ if (pdgId < 0)
+ s = "anti-" + s;
+ return s;
+ }
+
+ return "pdg";
+ }
+
+ std::string pdgLabel(int pdgId) {
+ std::ostringstream ss;
+ const std::string name = pdgNameUtf8(pdgId);
+ if (name == "pdg")
+ ss << "pdg(" << pdgId << ")";
+ else
+ ss << name << " (" << pdgId << ")";
+ return ss.str();
+ }
+
+ template
+ std::string fmtP4(const P4T& p4) {
+ std::ostringstream ss;
+ ss.setf(std::ios::fixed);
+ ss << std::setprecision(3) << "(" << p4.px() << ", " << p4.py() << ", " << p4.pz() << ", " << p4.e() << ")";
+ return ss.str();
+ }
+
+ template
+ std::string fmtX4(const X4T& x4) {
+ std::ostringstream ss;
+ ss.setf(std::ios::fixed);
+ ss << std::setprecision(3) << "(" << x4.x() << ", " << x4.y() << ", " << x4.z() << ", " << x4.t() << ")";
+ return ss.str();
+ }
+
+ const char* kindName(TruthGraph::NodeKind k) {
+ switch (k) {
+ case TruthGraph::NodeKind::GenEvent:
+ return "GenEvent";
+ case TruthGraph::NodeKind::GenVertex:
+ return "GenVertex";
+ case TruthGraph::NodeKind::GenParticle:
+ return "GenParticle";
+ case TruthGraph::NodeKind::SimVertex:
+ return "SimVertex";
+ case TruthGraph::NodeKind::SimTrack:
+ return "SimTrack";
+ }
+ return "Unknown";
+ }
+
+ const char* shapeFor(TruthGraph::NodeKind k) {
+ switch (k) {
+ case TruthGraph::NodeKind::GenEvent:
+ return "box";
+ case TruthGraph::NodeKind::GenVertex:
+ return "diamond";
+ case TruthGraph::NodeKind::GenParticle:
+ return "box";
+ case TruthGraph::NodeKind::SimVertex:
+ return "diamond";
+ case TruthGraph::NodeKind::SimTrack:
+ return "ellipse";
+ }
+ return "box";
+ }
+
+ std::string edgeAttrs(TruthGraph::EdgeKind kind) {
+ using EdgeKind = TruthGraph::EdgeKind;
+
+ switch (kind) {
+ case EdgeKind::Gen:
+ return " [style=solid, edgeType=\"Gen\"]";
+
+ case EdgeKind::Sim:
+ return " [style=solid, edgeType=\"Sim\"]";
+
+ case EdgeKind::GenToSim:
+ return " [dir=both, style=dashed, label=\"GenToSim\", edgeType=\"GenToSim\"]";
+
+ case EdgeKind::SimToGen:
+ return " [dir=both, style=dotted, label=\"SimToGen\", edgeType=\"SimToGen\"]";
+ }
+
+ return " [style=solid, edgeType=\"Unknown\"]";
+ }
+
+ std::string statusFlagsLabel(uint16_t flags) {
+ struct FlagInfo {
+ uint16_t bit;
+ const char* name;
+ };
+
+ static constexpr FlagInfo flagInfos[] = {
+ {1u << 0, "isPrompt"},
+ {1u << 1, "isDecayedLeptonHadron"},
+ {1u << 2, "isTauDecayProduct"},
+ {1u << 3, "isPromptTauDecayProduct"},
+ {1u << 4, "isDirectTauDecayProduct"},
+ {1u << 5, "isDirectPromptTauDecayProduct"},
+ {1u << 6, "isDirectHadronDecayProduct"},
+ {1u << 7, "isHardProcess"},
+ {1u << 8, "fromHardProcess"},
+ {1u << 9, "isHardProcessTauDecayProduct"},
+ {1u << 10, "isDirectHardProcessTauDecayProduct"},
+ {1u << 11, "fromHardProcessBeforeFSR"},
+ {1u << 12, "isFirstCopy"},
+ {1u << 13, "isLastCopy"},
+ {1u << 14, "isLastCopyBeforeFSR"},
+ };
+
+ std::ostringstream ss;
+ bool first = true;
+
+ for (auto const& flag : flagInfos) {
+ if ((flags & flag.bit) == 0)
+ continue;
+
+ if (!first)
+ ss << ", ";
+ ss << flag.name;
+ first = false;
+ }
+
+ if (first)
+ return "none";
+
+ return ss.str();
+ }
+
+ std::string dotQuote(std::string const& input) {
+ std::string out;
+ out.reserve(input.size() + 2);
+
+ out.push_back('"');
+ for (char c : input) {
+ switch (c) {
+ case '\\':
+ out += "\\\\";
+ break;
+ case '"':
+ out += "\\\"";
+ break;
+ case '\n':
+ out += "\\n";
+ break;
+ default:
+ out.push_back(c);
+ break;
+ }
+ }
+ out.push_back('"');
+
+ return out;
+ }
+
+ std::string appendEventIdToFilename(std::string const& filename, edm::EventID const& id) {
+ const auto dotPos = filename.rfind('.');
+
+ std::ostringstream ss;
+ if (dotPos == std::string::npos) {
+ ss << filename;
+ ss << "_run" << id.run();
+ ss << "_lumi" << id.luminosityBlock();
+ ss << "_event" << id.event();
+ return ss.str();
+ }
+
+ ss << filename.substr(0, dotPos);
+ ss << "_run" << id.run();
+ ss << "_lumi" << id.luminosityBlock();
+ ss << "_event" << id.event();
+ ss << filename.substr(dotPos);
+
+ return ss.str();
+ }
+} // anonymous namespace
+
+class TruthGraphDumper : public edm::one::EDAnalyzer<> {
+public:
+ explicit TruthGraphDumper(const edm::ParameterSet& cfg)
+ : token_(consumes(cfg.getParameter("src"))),
+ dotFile_(cfg.getParameter("dotFile")),
+ maxNodes_(cfg.getParameter("maxNodes")),
+ maxEdgesPerNode_(cfg.getParameter("maxEdgesPerNode")),
+ simTracksToken_(mayConsume(cfg.getParameter("simTracks"))),
+ simVerticesToken_(mayConsume(cfg.getParameter("simVertices"))),
+ hepmc2Token_(mayConsume(cfg.getParameter("genEventHepMC"))),
+ hepmc3Token_(mayConsume(cfg.getParameter("genEventHepMC3"))) {}
+
+ static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
+ edm::ParameterSetDescription desc;
+ desc.add("src", edm::InputTag("truthGraphProducer"));
+ desc.add("dotFile", "truthgraph.dot");
+ desc.add("maxNodes", 5000)->setComment("Truncate to keep DOT manageable");
+ desc.add("maxEdgesPerNode", 200)->setComment("Truncate fanout per node");
+
+ desc.add("simTracks", edm::InputTag("g4SimHits"))
+ ->setComment("SimTrackContainer (optional, used to enrich SimTrack nodes)");
+ desc.add