diff --git a/Configuration/ProcessModifiers/python/enableTruth_cff.py b/Configuration/ProcessModifiers/python/enableTruth_cff.py new file mode 100644 index 0000000000000..3b82cbd1f5a10 --- /dev/null +++ b/Configuration/ProcessModifiers/python/enableTruth_cff.py @@ -0,0 +1,3 @@ +import FWCore.ParameterSet.Config as cms + +enableTruth = cms.Modifier() \ No newline at end of file diff --git a/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py b/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py index 1887169482d37..9006faf3563b9 100644 --- a/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py +++ b/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py @@ -1094,6 +1094,33 @@ def condition(self, fragment, stepList, key, hasHarvest): upgradeWFs['ticlv5_TrackLinkingGNN'].step3 = {'--procModifiers': 'ticlv5_TrackLinkingGNN'} upgradeWFs['ticlv5_TrackLinkingGNN'].step4 = {'--procModifiers': 'ticlv5_TrackLinkingGNN'} + + +class UpgradeWorkflow_enableTruth(UpgradeWorkflow): + def setup_(self, step, stepName, stepDict, k, properties): + if 'RecoGlobal' in step: + stepDict[stepName][k] = deepcopy(stepDict[step][k]) + + if '--procModifiers' in stepDict[stepName][k]: + stepDict[stepName][k]['--procModifiers'] += ',enableTruth' + else: + stepDict[stepName][k]['--procModifiers'] = 'enableTruth' + + def condition(self, fragment, stepList, key, hasHarvest): + return 'Run4' in key + + +upgradeWFs['enableTruth'] = UpgradeWorkflow_enableTruth( + steps = [ + 'RecoGlobal', + ], + PU = [ + 'RecoGlobal', + ], + suffix = '_enableTruth', + offset = 0.88, +) + # L3 Tracker Muon Outside-In reconstruction first class UpgradeWorkflow_phase2L3MuonsOIFirst(UpgradeWorkflow): def setup_(self, step, stepName, stepDict, k, properties): diff --git a/PhysicsTools/TruthInfo/BuildFile.xml b/PhysicsTools/TruthInfo/BuildFile.xml new file mode 100644 index 0000000000000..6726efdfce6e0 --- /dev/null +++ b/PhysicsTools/TruthInfo/BuildFile.xml @@ -0,0 +1,9 @@ + + + + + + + + + \ No newline at end of file diff --git a/PhysicsTools/TruthInfo/README.md b/PhysicsTools/TruthInfo/README.md new file mode 100644 index 0000000000000..ca2faabdd8f94 --- /dev/null +++ b/PhysicsTools/TruthInfo/README.md @@ -0,0 +1,1131 @@ +# TruthInfo prototype + +This package contains a prototype truth graph representation for CMSSW. The goal is to provide a compact, navigable, physics-oriented abstraction of the generator, simulation, and detector-hit truth history of an event. + +The current implementation is split into three layers: + +1. `TruthGraph`: a compact raw graph built directly from existing CMS truth products. +2. `truth::Graph`: a higher-level logical graph exposing particles, vertices, payload, and navigation methods. +3. `truth::LogicalGraphHitIndex`: an auxiliary hit index associating logical particles to calorimeter SimHits and, when available, to reconstructed RecHits. + +The prototype is intended for validation, reconstruction studies, visualization, and future truth-reco association work. + +## Motivation + +CMS currently exposes truth information through several low-level collections, such as HepMC, GenParticles, SimTracks, SimVertices, TrackingParticles, SimClusters, CaloParticles, SimHits, and detector-specific RecHits. These collections are useful, but they encode different views of the event history and are often tied to detector-specific or production-specific conventions. + +This package explores a different model: a single event-level truth graph that can be navigated using physics concepts, with optional detector-hit indices layered on top. + +Typical questions this should make easier are: + +* Do two reconstructed objects come from the same parent particle? +* Did a given resonance, such as a Z boson, exist in the event history? +* Do two reconstructed objects come from the same Z boson? +* Which parton initiated a reconstructed jet? +* Is an object associated with the hard interaction or with pileup? +* Which detector-level interactions contributed to a reconstructed object? +* Which SimHits were produced directly by a particle? +* Which SimHits were produced by the full subgraph starting from a particle? +* Which RecHits correspond to those SimHits through a DetId association? +* Should a reconstructed object be associated to a single truth particle, to a branch, or to an aggregated subgraph? + +The intended user-facing API should allow reconstruction and validation code to operate on stable physics abstractions rather than directly depending on the storage details of `GenParticle`, `SimTrack`, `GenVertex`, `SimVertex`, `PCaloHit`, or detector-specific RecHit collections. + +## Package layout + +```text +PhysicsTools/TruthInfo/ + interface/ + TruthGraph.h + Graph.h + LogicalGraphHitIndex.h + LogicalGraphHitIndexBuilder.h + src/ + TruthGraph.cc + Graph.cc + LogicalGraphHitIndexBuilder.cc + classes.h + classes_def.xml + plugins/ + TruthGraphProducer.cc + TruthGraphDumper.cc + TruthLogicalGraphProducer.cc + TruthLogicalGraphDumper.cc + TruthLogicalGraphHitIndexProducer.cc + BuildFile.xml + python/ + truthGraphProducer_cfi.py + truthLogicalGraphDumper_cfi.py + BuildFile.xml +```` + +The auxiliary RecHit lookup used by the hit index is produced separately in: + +```text +SimCalorimetry/HGCalAssociatorProducers/ + interface/ + DetIdRecHitMap.h + plugins/ + SimHitToRecHitMapProducer.cc +``` + +Despite living under `HGCalAssociatorProducers`, the current `SimHitToRecHitMapProducer` is not HGCal-only. It accepts both `HGCRecHitCollection` inputs and `reco::PFRecHitCollection` inputs. + +## Raw graph: `TruthGraph` + +`TruthGraph` is a compact, read-only graph representation of event truth information. It is designed as an intermediate event data product built from existing CMS collections. + +### Node types + +The raw graph supports the following node kinds: + +```cpp +enum class NodeKind : uint8_t { + GenEvent, + GenVertex, + GenParticle, + SimVertex, + SimTrack +}; +``` + +Each node stores a `NodeRef`: + +```cpp +struct NodeRef { + NodeKind kind; + int64_t key; +}; +``` + +The meaning of `key` depends on the node kind: + +* `GenEvent`: generator connected-component id. +* `GenVertex`: HepMC barcode or HepMC3 vertex id. +* `GenParticle`: HepMC barcode or HepMC3 particle id. +* `SimVertex`: index in the `SimVertexContainer`. +* `SimTrack`: `SimTrack::trackId()`. + +### Edge types + +The raw graph supports edge categories: + +```cpp +enum class EdgeKind : uint8_t { + Gen, + Sim, + GenToSim, + SimToGen +}; +``` + +At present: + +* `Gen` edges describe the generator graph. +* `Sim` edges describe the simulation graph. +* `GenToSim` edges connect matched generator particles or vertices to simulation nodes. +* `SimToGen` is reserved. + +The DOT dumper labels cross-domain edges explicitly, for example: + +```text +GenToSim +SimToGen +``` + +### Storage model + +Edges are stored in compressed sparse row form: + +```cpp +std::vector offsets; +std::vector edges; +std::vector edgeKind; +``` + +For node `i`, outgoing edges are stored in: + +```cpp +edges[offsets[i] ... offsets[i + 1]) +``` + +The corresponding edge kinds are stored in the same range of `edgeKind`. + +The class provides convenience accessors such as: + +```cpp +uint32_t nNodes() const; +uint32_t nEdges() const; +std::span children(uint32_t nodeId) const; +std::span childrenEdgeKinds(uint32_t nodeId) const; +const NodeRef& nodeRef(uint32_t nodeId) const; +bool isConsistent() const; +``` + +### Cached metadata + +The raw graph stores lightweight metadata arrays parallel to the node list: + +```cpp +std::vector pdgId; +std::vector status; +std::vector statusFlags; +std::vector eventId; +std::vector genEventOfNode; +``` + +It also stores association arrays: + +```cpp +std::vector simTrackToGen; +std::vector simTrackToVtx; +``` + +These are indexed by raw node id. Entries that are not meaningful for a given node type are filled with default values, typically `0` or `-1`. + +## `TruthGraphProducer` + +`TruthGraphProducer` builds the raw `TruthGraph` from: + +* HepMC3, when available; +* HepMC2, as fallback; +* `SimTrackContainer`; +* `SimVertexContainer`. + +Default input tags are: + +```python +genEventHepMC3 = cms.InputTag("generatorSmeared") +genEventHepMC = cms.InputTag("generatorSmeared") +simTracks = cms.InputTag("g4SimHits") +simVertices = cms.InputTag("g4SimHits") +``` + +The producer creates: + +* one `GenEvent` node per connected generator component; +* one node per generator vertex; +* one node per generator particle; +* one node per simulation vertex; +* one node per simulation track. + +Generator components are computed using a disjoint-set union over the generator particle-vertex graph. + +Simulation topology is built from: + +* `SimTrack::vertIndex()` for `SimVertex -> SimTrack` edges; +* `SimVertex::parentIndex()` for `SimTrack -> SimVertex` edges. + +Generator-to-simulation particle associations are built using the available `SimTrack` to generator information. The implementation keeps this association explicit in the raw graph instead of assuming that raw generator iteration indices can always be interpreted as stable GenParticle indices. + +When enabled, cross-domain edges are also added: + +* `GenParticle -> SimTrack`; +* `GenVertex -> SimVertex`, using the production vertex of the associated generator particle when available. + +The cross edges are controlled by: + +```python +addGenToSimEdges = cms.bool(True) +``` + +## Logical graph: `truth::Graph` + +`truth::Graph` is the user-facing abstraction built from the raw `TruthGraph`. It is intended to expose a stable, physics-oriented API. + +The logical graph is bipartite: + +```text +Particle -> decay Vertex -> outgoing Particle +Particle <- production Vertex <- incoming Particle +``` + +The main public types are: + +```cpp +namespace truth { + class Graph; + class Particle; + class Vertex; + + struct ParticleData; + struct VertexData; + struct Checkpoint; +} +``` + +### `truth::Particle` + +A `truth::Particle` may combine generator-level and simulation-level information when a robust correspondence exists. + +The stored payload is: + +```cpp +struct ParticleData { + int32_t genNode = -1; + int32_t simNode = -1; + + int32_t pdgId = 0; + int16_t status = 0; + uint16_t statusFlags = 0; + + uint64_t eventId = 0; + int32_t genEvent = -1; + + math::XYZTLorentzVectorD momentum; + std::vector checkpoints; + + bool hasGen() const; + bool hasSim() const; + bool valid() const; +}; +``` + +The convention for the momentum is "best available": + +1. for GEN+SIM particles, use the GEN four-momentum; +2. for SIM-only particles, use the `SimTrack` four-momentum; +3. otherwise keep the default value. + +Useful methods include: + +```cpp +bool hasGen() const; +bool hasSim() const; +int32_t pdgId() const; +int16_t status() const; +uint16_t statusFlags() const; +uint64_t eventId() const; +int32_t genEvent() const; +const math::XYZTLorentzVectorD& momentum() const; + +bool isRoot() const; +bool isLeaf() const; + +std::vector productionVertices() const; +std::vector decayVertices() const; + +std::vector parents() const; +std::vector children() const; +std::vector ancestors() const; +std::vector descendants() const; + +bool hasAncestorPdgId(int pdgId) const; +std::optional firstAncestorWithPdgId(int pdgId) const; +std::optional firstCommonAncestor(truth::Particle other) const; +``` + +### `truth::Vertex` + +A `truth::Vertex` stores vertex-level payload: + +```cpp +struct VertexData { + int32_t genNode = -1; + int32_t simNode = -1; + + uint64_t eventId = 0; + int32_t genEvent = -1; + + math::XYZTLorentzVectorD position; + + bool hasGen() const; + bool hasSim() const; + bool valid() const; +}; +``` + +Useful methods include: + +```cpp +bool hasGen() const; +bool hasSim() const; +uint64_t eventId() const; +int32_t genEvent() const; +const math::XYZTLorentzVectorD& position() const; + +bool isSource() const; +bool isSink() const; + +std::vector incomingParticles() const; +std::vector outgoingParticles() const; +``` + +### Vertex treatment + +Generator-level and simulation-level particles may be merged when a robust association exists. + +Generator-level and simulation-level vertices can be merged by configuration when the producer has enough information to do so: + +```python +mergeGenSimVertices = cms.bool(True) +``` + +This is useful for compact visualization and for downstream navigation. During debugging, it can be disabled to inspect generator and simulation vertex semantics separately. + +### Intermediate GEN particle collapsing + +The logical graph producer can collapse simple generator-only chains of the form: + +```text +P -> V -> C +``` + +when: + +* `P` is not final-state; +* `C` is the only daughter; +* `P` and `C` have the same PDG id. + +This is controlled by: + +```python +collapseIntermediateGenParticles = cms.bool(True) +``` + +This helps reduce visual clutter from intermediate generator copies while preserving the physically relevant branching structure. + +### Filtering by mother PDG id + +The logical graph can optionally be restricted to a selected particle species and its descendants: + +```python +motherPdgId = cms.int32(0) +``` + +The default value `0` keeps the full graph. + +A non-zero value keeps particles matching the requested PDG id and the corresponding descendant subgraphs. + +## Trajectory checkpoints + +The logical particle model supports trajectory checkpoints: + +```cpp +struct Checkpoint { + uint32_t checkpointId = 0; + math::XYZTLorentzVectorF position; + math::XYZTLorentzVectorF momentum; +}; +``` + +A checkpoint represents a relevant point along the propagation history of a particle. + +The current prototype uses checkpoint `0` to store boundary-crossing information from `SimTrack`: + +* position at boundary; +* momentum at boundary; +* boundary identifier. + +This is meant to be a generic mechanism. A natural long-term direction is to build the truth graph directly while Geant4 tracks are being created and simulated, rather than reconstructing it afterwards from final CMS products. That would allow the graph to record multiple checkpoints along the propagation history and preserve information that is difficult to recover a posteriori. + +## `TruthLogicalGraphProducer` + +`TruthLogicalGraphProducer` builds a standalone `truth::Graph` from the raw `TruthGraph`. + +Default input tags are: + +```python +src = cms.InputTag("truthGraphProducer") +simTracks = cms.InputTag("g4SimHits") +simVertices = cms.InputTag("g4SimHits") +genEventHepMC3 = cms.InputTag("generatorSmeared") +genEventHepMC = cms.InputTag("generatorSmeared") +``` + +The producer performs the following steps: + +1. Read and validate the raw `TruthGraph`. +2. Load optional payload from HepMC2, HepMC3, `SimTrackContainer`, and `SimVertexContainer`. +3. Assign temporary logical ids to raw particle and vertex nodes. +4. Merge particle nodes across GEN and SIM when a robust association exists. +5. Optionally merge GEN and SIM vertices. +6. Optionally collapse intermediate GEN-only particle copies. +7. Fill standalone `ParticleData` and `VertexData` payload. +8. Rebuild the logical bipartite graph in CSR-like adjacency vectors. +9. Validate the produced `truth::Graph`. + +The produced graph is independent of the raw graph for ordinary navigation, but it keeps optional back-references to raw node ids for debugging: + +```cpp +ParticleData::genNode +ParticleData::simNode +VertexData::genNode +VertexData::simNode +``` + +## Logical hit index: `truth::LogicalGraphHitIndex` + +`truth::LogicalGraphHitIndex` is an auxiliary data product that associates logical graph particles to calorimeter SimHits and, when possible, to RecHits. + +The key idea is that SimHits are directly associated to the particle that produced them, while subgraph hit collections are computed by aggregating over descendants. + +For each logical particle, the hit index can answer two different questions: + +1. Which SimHits were produced directly by this particle? +2. Which SimHits were produced by this particle and by all particles in the subgraph below it? + +This is important because both views are useful: + +* direct hits preserve local detector contributions from one particle; +* subgraph hits represent the full detector footprint of a shower, decay branch, or composite truth object. + +### Hit payload + +The hit index stores compact hit records: + +```cpp +struct Hit { + uint32_t detId; + uint32_t recHitIndex; + float energy; +}; +``` + +The `detId` is the reco DetId used for matching to RecHits. + +The `recHitIndex` is the index in the global RecHit ordering produced by `SimHitToRecHitMapProducer`. If no RecHit was found for the SimHit DetId, it is set to: + +```cpp +truth::LogicalGraphHitIndex::Hit::invalidRecHitIndex +``` + +The `energy` is the accumulated SimHit energy for that logical particle and DetId. + +### Direct and subgraph access + +The hit index exposes spans of hits per logical particle, for example: + +```cpp +auto directHits = hitIndex.directHits(particleId); +auto subgraphHits = hitIndex.subgraphHits(particleId); +``` + +The direct hits are attached only to the particle that directly produced them. + +The subgraph hits are accumulated from the particle and all its descendants in the logical graph. + +This makes merging two particles or two subgraphs straightforward: one can merge the corresponding hit spans by DetId or by RecHit index, depending on the intended matching metric. + +## `TruthLogicalGraphHitIndexProducer` + +`TruthLogicalGraphHitIndexProducer` builds `truth::LogicalGraphHitIndex`. + +It consumes: + +* the logical `truth::Graph`; +* the raw `TruthGraph`; +* selected `PCaloHit` collections; +* an optional DetId to RecHit index map produced by `SimHitToRecHitMapProducer`. + +Typical configuration: + +```python +process.truthLogicalGraphHitIndexProducer = cms.EDProducer( + "TruthLogicalGraphHitIndexProducer", + + src = cms.InputTag("truthLogicalGraphProducer"), + rawSrc = cms.InputTag("truthGraphProducer"), + + recHitMap = cms.InputTag("simHitToRecHitMapProducer"), + + simHitCollections = cms.VInputTag( + cms.InputTag("g4SimHits", "HGCHitsEE", "SIM"), + cms.InputTag("g4SimHits", "HGCHitsHEfront", "SIM"), + cms.InputTag("g4SimHits", "HGCHitsHEback", "SIM"), + cms.InputTag("g4SimHits", "EcalHitsEB", "SIM"), + cms.InputTag("g4SimHits", "HcalHits", "SIM"), + ), + + doHGCalRelabelling = cms.bool(False), +) +``` + +The producer performs the following steps: + +1. Build a `SimTrack::trackId()` to logical particle id map. +2. Read the configured `PCaloHit` collections. +3. Convert SimHit DetIds to reco DetIds when requested. +4. Look up the corresponding RecHit index through the DetId map, if available. +5. Fill direct hits for the logical particle associated to the Geant track id. +6. Propagate and merge hit collections upward to build subgraph hit collections. + +The hit index is intentionally separate from `truth::Graph`. This keeps the graph compact and allows detector-specific hit indices to evolve independently. + +## `SimHitToRecHitMapProducer` + +`SimHitToRecHitMapProducer` builds the DetId to RecHit index lookup consumed by `TruthLogicalGraphHitIndexProducer`. + +The produced type is: + +```cpp +hgcal::DetIdRecHitMap +``` + +with the current alias defined in: + +```cpp +SimCalorimetry/HGCalAssociatorProducers/interface/DetIdRecHitMap.h +``` + +Conceptually it is: + +```cpp +std::unordered_map +``` + +mapping: + +```text +reco DetId rawId -> global RecHit index +``` + +The global RecHit index is built by concatenating the configured RecHit collections in a deterministic order: + +1. all configured `HGCRecHitCollection` inputs; +2. all configured `reco::PFRecHitCollection` inputs. + +Typical configuration for RECO step3 output: + +```python +process.simHitToRecHitMapProducer = cms.EDProducer( + "SimHitToRecHitMapProducer", + + hgcalRecHits = cms.VInputTag( + cms.InputTag("HGCalRecHit", "HGCEERecHits", "RECO"), + cms.InputTag("HGCalRecHit", "HGCHEFRecHits", "RECO"), + cms.InputTag("HGCalRecHit", "HGCHEBRecHits", "RECO"), + ), + + pfRecHits = cms.VInputTag( + cms.InputTag("particleFlowRecHitECAL", "Cleaned", "RECO"), + cms.InputTag("particleFlowRecHitHBHE", "Cleaned", "RECO"), + cms.InputTag("particleFlowRecHitHF", "Cleaned", "RECO"), + cms.InputTag("particleFlowRecHitHO", "Cleaned", "RECO"), + ), +) +``` + +Do not include both `HGCalRecHit` and `particleFlowRecHitHGC` unless the intended indexing and double-counting policy is explicit. In the current workflow, HGCAL RecHits are taken from `HGCalRecHit`, while barrel and forward PF RecHits are taken from the cleaned `particleFlowRecHit*` collections. + +The map needs a ROOT dictionary because it is an EDM product, even if it is not written to the output file. + +## Graph navigation examples + +### Iterate over particles + +```cpp +auto const& graph = event.get(truthGraphToken_); + +for (auto particle : graph.particleViews()) { + if (!particle.valid()) + continue; + + const auto pdgId = particle.pdgId(); + const auto p4 = particle.momentum(); +} +``` + +### Find particles from a Z boson + +```cpp +for (auto particle : graph.particleViews()) { + if (particle.hasAncestorPdgId(23)) { + // This particle has a Z boson somewhere in its ancestor chain. + } +} +``` + +### Find the first common ancestor of two particles + +```cpp +auto p1 = graph.particle(firstId); +auto p2 = graph.particle(secondId); + +auto common = p1.firstCommonAncestor(p2); +if (common.has_value()) { + const int pdgId = common->pdgId(); +} +``` + +### Access production and decay vertices + +```cpp +for (auto particle : graph.particleViews()) { + for (auto vertex : particle.productionVertices()) { + const auto x4 = vertex.position(); + } + + for (auto vertex : particle.decayVertices()) { + const auto x4 = vertex.position(); + } +} +``` + +### Navigate parent and child particles + +```cpp +for (auto particle : graph.particleViews()) { + auto parents = particle.parents(); + auto children = particle.children(); +} +``` + +### Access checkpoints + +```cpp +for (auto particle : graph.particleViews()) { + for (auto const& checkpoint : particle.checkpoints()) { + const auto id = checkpoint.checkpointId; + const auto position = checkpoint.position; + const auto momentum = checkpoint.momentum; + } +} +``` + +### Access direct and subgraph hits + +```cpp +auto const& hitIndex = event.get(hitIndexToken_); + +for (uint32_t particleId = 0; particleId < hitIndex.nParticles(); ++particleId) { + auto directHits = hitIndex.directHits(particleId); + auto subgraphHits = hitIndex.subgraphHits(particleId); + + float directEnergy = 0.f; + for (auto const& hit : directHits) { + directEnergy += hit.energy; + } + + float subgraphEnergy = 0.f; + for (auto const& hit : subgraphHits) { + subgraphEnergy += hit.energy; + } +} +``` + +## Dumping and visualization + +Two graph dumper modules are provided. + +### Raw graph dumper + +`TruthGraphDumper` writes a DOT representation of the raw graph. + +It includes enriched labels using HepMC and simulation products when available. + +Default output: + +```text +truthgraph.dot +``` + +The dumper can be configured with: + +```python +process.truthGraphDumper = cms.EDAnalyzer( + "TruthGraphDumper", + src = cms.InputTag("truthGraphProducer"), + dotFile = cms.string("truthgraph.dot"), + maxNodes = cms.uint32(5000), + maxEdgesPerNode = cms.uint32(200), + + simTracks = cms.InputTag("g4SimHits"), + simVertices = cms.InputTag("g4SimHits"), + genEventHepMC = cms.InputTag("generatorSmeared"), + genEventHepMC3 = cms.InputTag("generatorSmeared"), +) +``` + +### Logical graph dumper + +`TruthLogicalGraphDumper` writes a DOT representation of the logical graph. + +Default output: + +```text +truthlogicalgraph.dot +``` + +The dumper can also use: + +* the raw `TruthGraph`, to enrich labels with raw node information; +* the `LogicalGraphHitIndex`, to annotate particles with direct and subgraph SimHit summaries; +* the RecHit collections, to compute RecHit energy summaries from `recHitIndex`. + +Example: + +```python +process.truthLogicalGraphDumper = cms.EDAnalyzer( + "TruthLogicalGraphDumper", + src = cms.InputTag("truthLogicalGraphProducer"), + rawSrc = cms.InputTag("truthGraphProducer"), + hitIndex = cms.InputTag("truthLogicalGraphHitIndexProducer"), + + hgcalRecHits = cms.VInputTag( + cms.InputTag("HGCalRecHit", "HGCEERecHits", "RECO"), + cms.InputTag("HGCalRecHit", "HGCHEFRecHits", "RECO"), + cms.InputTag("HGCalRecHit", "HGCHEBRecHits", "RECO"), + ), + + pfRecHits = cms.VInputTag( + cms.InputTag("particleFlowRecHitECAL", "Cleaned", "RECO"), + cms.InputTag("particleFlowRecHitHBHE", "Cleaned", "RECO"), + cms.InputTag("particleFlowRecHitHF", "Cleaned", "RECO"), + cms.InputTag("particleFlowRecHitHO", "Cleaned", "RECO"), + ), + + dotFile = cms.string("truthlogicalgraph.dot"), + + maxParticles = cms.uint32(5000), + maxVertices = cms.uint32(5000), + maxEdgesPerNode = cms.uint32(200), + + hideLargeSimSourceVertices = cms.bool(True), + largeSimSourceVertexMinOutgoing = cms.uint32(50), +) +``` + +Particle labels include summaries such as: + +```text +direct simHits: N simE=... +direct recHits: N missing=... recoE=... +subgraph simHits: N simE=... +subgraph recHits: N missing=... recoE=... +``` + +DOT files can be converted with Graphviz, for example: + +```bash +dot -Tsvg truthlogicalgraph_run1_lumi1_event1.dot -o truthlogicalgraph_run1_lumi1_event1.svg +``` + +To convert all DOT files in a directory: + +```bash +for f in *.dot; do dot -Tsvg "$f" -o "${f%.dot}.svg"; done +``` + +To inspect whether hit information is present in the DOT output: + +```bash +grep -n "simHits\|recHits\|simE\|recoE" truthlogicalgraph_run1_lumi1_event1.dot | head -80 +``` + +## Example configuration on step3.root + +A typical standalone test configuration running on a `step3.root` file is: + +```python +import FWCore.ParameterSet.Config as cms + +process = cms.Process("TRUTHGRAPH") + +process.load("FWCore.MessageService.MessageLogger_cfi") + +process.load("Configuration.Geometry.GeometryExtendedRun4D120Reco_cff") + +process.maxEvents = cms.untracked.PSet( + input = cms.untracked.int32(-1) +) + +process.source = cms.Source( + "PoolSource", + fileNames = cms.untracked.vstring("file:step3.root") +) + +process.options = cms.untracked.PSet( + wantSummary = cms.untracked.bool(True) +) + +process.truthGraphProducer = cms.EDProducer( + "TruthGraphProducer", + genEventHepMC3 = cms.InputTag("generatorSmeared"), + genEventHepMC = cms.InputTag("generatorSmeared"), + simTracks = cms.InputTag("g4SimHits"), + simVertices = cms.InputTag("g4SimHits"), + addGenToSimEdges = cms.bool(True), +) + +process.truthGraphDumper = cms.EDAnalyzer( + "TruthGraphDumper", + src = cms.InputTag("truthGraphProducer"), + dotFile = cms.string("truthgraph.dot"), + maxNodes = cms.uint32(20000), + maxEdgesPerNode = cms.uint32(50), + simTracks = cms.InputTag("g4SimHits"), + simVertices = cms.InputTag("g4SimHits"), + genEventHepMC = cms.InputTag("generatorSmeared"), + genEventHepMC3 = cms.InputTag("generatorSmeared"), +) + +process.truthLogicalGraphProducer = cms.EDProducer( + "TruthLogicalGraphProducer", + src = cms.InputTag("truthGraphProducer"), + simTracks = cms.InputTag("g4SimHits"), + simVertices = cms.InputTag("g4SimHits"), + genEventHepMC3 = cms.InputTag("generatorSmeared"), + genEventHepMC = cms.InputTag("generatorSmeared"), + + motherPdgId = cms.int32(0), + mergeGenSimVertices = cms.bool(True), + collapseIntermediateGenParticles = cms.bool(True), +) + +process.simHitToRecHitMapProducer = cms.EDProducer( + "SimHitToRecHitMapProducer", + + hgcalRecHits = cms.VInputTag( + cms.InputTag("HGCalRecHit", "HGCEERecHits", "RECO"), + cms.InputTag("HGCalRecHit", "HGCHEFRecHits", "RECO"), + cms.InputTag("HGCalRecHit", "HGCHEBRecHits", "RECO"), + ), + + pfRecHits = cms.VInputTag( + cms.InputTag("particleFlowRecHitECAL", "Cleaned", "RECO"), + cms.InputTag("particleFlowRecHitHBHE", "Cleaned", "RECO"), + cms.InputTag("particleFlowRecHitHF", "Cleaned", "RECO"), + cms.InputTag("particleFlowRecHitHO", "Cleaned", "RECO"), + ), +) + +process.truthLogicalGraphHitIndexProducer = cms.EDProducer( + "TruthLogicalGraphHitIndexProducer", + + src = cms.InputTag("truthLogicalGraphProducer"), + rawSrc = cms.InputTag("truthGraphProducer"), + + recHitMap = cms.InputTag("simHitToRecHitMapProducer"), + + simHitCollections = cms.VInputTag( + cms.InputTag("g4SimHits", "HGCHitsEE", "SIM"), + cms.InputTag("g4SimHits", "HGCHitsHEfront", "SIM"), + cms.InputTag("g4SimHits", "HGCHitsHEback", "SIM"), + cms.InputTag("g4SimHits", "EcalHitsEB", "SIM"), + cms.InputTag("g4SimHits", "HcalHits", "SIM"), + ), + + doHGCalRelabelling = cms.bool(False), +) + +process.truthLogicalGraphDumper = cms.EDAnalyzer( + "TruthLogicalGraphDumper", + src = cms.InputTag("truthLogicalGraphProducer"), + rawSrc = cms.InputTag("truthGraphProducer"), + hitIndex = cms.InputTag("truthLogicalGraphHitIndexProducer"), + + hgcalRecHits = cms.VInputTag( + cms.InputTag("HGCalRecHit", "HGCEERecHits", "RECO"), + cms.InputTag("HGCalRecHit", "HGCHEFRecHits", "RECO"), + cms.InputTag("HGCalRecHit", "HGCHEBRecHits", "RECO"), + ), + + pfRecHits = cms.VInputTag( + cms.InputTag("particleFlowRecHitECAL", "Cleaned", "RECO"), + cms.InputTag("particleFlowRecHitHBHE", "Cleaned", "RECO"), + cms.InputTag("particleFlowRecHitHF", "Cleaned", "RECO"), + cms.InputTag("particleFlowRecHitHO", "Cleaned", "RECO"), + ), + + dotFile = cms.string("truthlogicalgraph.dot"), + + maxParticles = cms.uint32(20000), + maxVertices = cms.uint32(20000), + maxEdgesPerNode = cms.uint32(300), +) + +process.MessageLogger.cerr.threshold = "INFO" +process.MessageLogger.cerr.default = cms.untracked.PSet( + limit = cms.untracked.int32(0) +) +process.MessageLogger.cerr.TruthGraphProducer = cms.untracked.PSet( + limit = cms.untracked.int32(-1) +) +process.MessageLogger.cerr.TruthLogicalGraphProducer = cms.untracked.PSet( + limit = cms.untracked.int32(-1) +) +process.MessageLogger.cerr.TruthLogicalGraphHitIndexProducer = cms.untracked.PSet( + limit = cms.untracked.int32(-1) +) +process.MessageLogger.cerr.SimHitToRecHitMapProducer = cms.untracked.PSet( + limit = cms.untracked.int32(-1) +) + +process.truthGraph_step = cms.Path( + process.truthGraphProducer + + process.truthGraphDumper + + process.truthLogicalGraphProducer + + process.simHitToRecHitMapProducer + + process.truthLogicalGraphHitIndexProducer + + process.truthLogicalGraphDumper +) +``` + +## Event content checks + +Useful commands to inspect available products are: + +```bash +edmDumpEventContent step3.root | grep -E 'PFRecHit|particleFlowRecHit' +``` + +and: + +```bash +edmDumpEventContent step3.root | grep -E 'HGCRecHit|HGCalRecHit|HGCEERecHits|HGCHEFRecHits|HGCHEBRecHits' +``` + +For a typical Phase-2 RECO file, the useful collections are: + +```text +vector "HGCalRecHit" "HGCEERecHits" "RECO" +vector "HGCalRecHit" "HGCHEFRecHits" "RECO" +vector "HGCalRecHit" "HGCHEBRecHits" "RECO" + +vector "particleFlowRecHitECAL" "Cleaned" "RECO" +vector "particleFlowRecHitHBHE" "Cleaned" "RECO" +vector "particleFlowRecHitHF" "Cleaned" "RECO" +vector "particleFlowRecHitHO" "Cleaned" "RECO" +``` + +The relevant SimHit collections include: + +```text +vector "g4SimHits" "HGCHitsEE" "SIM" +vector "g4SimHits" "HGCHitsHEfront" "SIM" +vector "g4SimHits" "HGCHitsHEback" "SIM" +vector "g4SimHits" "EcalHitsEB" "SIM" +vector "g4SimHits" "HcalHits" "SIM" +vector "g4SimHits" "" "SIM" +vector "g4SimHits" "" "SIM" +``` + +## Intended matching strategy + +The long-term matching model is to build detector-level associators from hits to `truth::Particle`. + +In this model: + +* the graph provides the particle and vertex structure; +* detector-level truth association is performed through hits; +* reconstructed objects can be associated either to a single `truth::Particle` or to a larger truth branch; +* truth information can be aggregated over a coherent branch when a reconstructed object corresponds to a composite truth structure. + +Different reconstruction domains need different matching metrics: + +* tracking association is usually based on shared hits, not on hit energy; +* calorimeter clustering association can use energy fractions or energy-weighted metrics; +* timing objects may need time-aware matching; +* other reconstruction objects may require detector-specific metrics. + +The graph should therefore act as the common truth substrate, while matching definitions remain detector-aware and use-case dependent. + +## Possible future `truth::Branch` abstraction + +A future `truth::Branch` abstraction is intended to represent a coherent subgraph selected from the full truth graph. + +This would provide a natural target for truth-reco association when a reconstructed object is not well described by a single truth particle. + +Possible branch-level operations include: + +* aggregate particles in a physically connected subgraph; +* collect all detector hits attached to particles in the branch; +* compute detector-specific matching scores; +* compare two branches; +* define stable references for composite truth structures; +* merge two selected truth structures and their hit content. + +In this picture, `truth::Branch` avoids forcing an early collapse of the truth history into fixed reference objects. + +## Hits attached to particles + +The current prototype already implements the first version of hit attachment through `truth::LogicalGraphHitIndex`. + +This keeps hit information outside the main logical graph data product, while allowing the graph dumper and future associators to query: + +* direct SimHits from a particle; +* subgraph SimHits from a particle and all descendants; +* matched RecHit indices; +* SimHit energy sums; +* RecHit energy sums. + +A possible long-term direction is to generalize this further so that detector-specific truth objects such as `TrackingParticle`, `SimCluster`, and `CaloParticle` can be expressed as views or derived abstractions on top of the same graph and hit-index infrastructure. + +This would make it possible to: + +* use one common truth structure across subsystems; +* preserve detector-specific information without fragmenting the truth model; +* define multiple matching strategies on top of the same graph; +* aggregate truth information dynamically over particles, vertices, or branches. + +## Current status + +The current prototype can: + +* build a raw `TruthGraph` from generator and simulation products; +* build a logical `truth::Graph` from the raw graph; +* merge matched generator and simulation particles; +* optionally merge generator and simulation vertices; +* optionally collapse intermediate GEN-only particle copies; +* navigate particle and vertex relations; +* compute parents, children, ancestors, descendants, roots, and leaves; +* find ancestors with a given PDG id; +* find a common ancestor between two particles; +* store boundary-crossing checkpoints; +* build a direct and subgraph calorimeter SimHit index per logical particle; +* map SimHits to RecHit indices through DetId when a RecHit map is available; +* dump raw and logical graphs to DOT for debugging; +* annotate logical graph DOT nodes with SimHit and RecHit energy summaries. + +## Known limitations + +The current implementation is a prototype and several aspects are intentionally conservative. + +Known limitations include: + +* GEN-SIM association still needs broader validation; +* the semantics of vertex merging require further study; +* checkpoint information is currently limited to boundary-crossing information from `SimTrack`; +* the hit index currently targets calorimeter `PCaloHit` inputs; +* RecHit association is currently DetId-based and does not encode more detailed detector response information; +* duplicate DetId handling depends on the configured RecHit input ordering and map policy; +* `truth::Branch` is part of the target design but not yet implemented; +* the logical API is still evolving; +* the raw graph construction needs further validation on realistic events and pileup scenarios. + +## Next steps + +Planned or natural next steps are: + +1. Continue debugging the raw truth graph construction. +2. Validate HepMC2 and HepMC3 behaviour on representative workflows. +3. Refine GEN-SIM particle association. +4. Study the vertex merging policy in realistic events. +5. Extend the checkpoint model beyond boundary crossings. +6. Investigate direct graph construction during Geant4 simulation. +7. Extend hit indexing beyond calorimeter `PCaloHit` where appropriate. +8. Prototype detector-specific truth-reco matching metrics on top of the hit index. +9. Implement a `truth::Branch` abstraction. +10. Study how existing detector-specific truth containers could be represented as views over the graph. +11. Stabilize the logical API for downstream reconstruction and validation code. + +## Design principle + +The main design principle is to separate storage details from physics navigation. + +Low-level CMS products remain the source of truth for building the graph, but downstream code should be able to ask physics questions through a stable interface: + +```cpp +particle.parents(); +particle.children(); +particle.ancestors(); +particle.firstCommonAncestor(other); +particle.hasAncestorPdgId(23); +hitIndex.directHits(particle.id()); +hitIndex.subgraphHits(particle.id()); +``` + +rather than reimplementing event-history navigation and hit aggregation separately for each reconstruction or validation use case. + + diff --git a/PhysicsTools/TruthInfo/interface/Graph.h b/PhysicsTools/TruthInfo/interface/Graph.h new file mode 100644 index 0000000000000..3a58f5d530e4c --- /dev/null +++ b/PhysicsTools/TruthInfo/interface/Graph.h @@ -0,0 +1,244 @@ +#ifndef PhysicsTools_TruthInfo_interface_Graph_h +#define PhysicsTools_TruthInfo_interface_Graph_h + +#include +#include +#include +#include + +#include "DataFormats/Math/interface/LorentzVector.h" + +namespace truth { + + struct Checkpoint { + uint32_t checkpointId = 0; + math::XYZTLorentzVectorF position; + math::XYZTLorentzVectorF momentum; + }; + + struct ParticleData { + // Optional provenance/debug back-references to the raw TruthGraph nodes. + // -1 means "not available". + int32_t genNode = -1; + int32_t simNode = -1; + + // Merged metadata. + int32_t pdgId = 0; + int16_t status = 0; + + // Packed reco::GenStatusFlags bitfield, when available. + // 0 means "not available" or "no flags set". + uint16_t statusFlags = 0; + + // SIM event id when available, 0 otherwise. + uint64_t eventId = 0; + + // GEN connected component id from the raw TruthGraph, -1 if not applicable. + int32_t genEvent = -1; + + // Standalone payload. + // Nominal physics four-momentum. + // For GEN+SIM particles, this is the GEN four-momentum. + // For SIM-only particles, this is the SimTrack four-momentum. + math::XYZTLorentzVectorD momentum; + + // Optional trajectory checkpoints. + std::vector checkpoints; + + [[nodiscard]] bool hasGen() const { return genNode >= 0; } + [[nodiscard]] bool hasSim() const { return simNode >= 0; } + [[nodiscard]] bool valid() const { return hasGen() || hasSim(); } + }; + + struct VertexData { + // Optional provenance/debug back-references to the raw TruthGraph nodes. + // -1 means "not available". + int32_t genNode = -1; + int32_t simNode = -1; + + // SIM event id when available, 0 otherwise. + uint64_t eventId = 0; + + // GEN connected component id from the raw TruthGraph, -1 if not applicable. + int32_t genEvent = -1; + + // Standalone payload. + // Convention: "best available" position. + // Prefer SIM if present, otherwise GEN, otherwise default-constructed. + math::XYZTLorentzVectorD position; + + [[nodiscard]] bool hasGen() const { return genNode >= 0; } + [[nodiscard]] bool hasSim() const { return simNode >= 0; } + [[nodiscard]] bool valid() const { return hasGen() || hasSim(); } + }; + + class Graph; + class Particle; + class Vertex; + + class Particle { + public: + Particle() = default; + Particle(Graph const* graph, uint32_t id) : graph_(graph), id_(id) {} + + [[nodiscard]] bool valid() const { return graph_ != nullptr; } + [[nodiscard]] uint32_t id() const { return id_; } + + [[nodiscard]] const ParticleData& data() const; + + [[nodiscard]] bool hasGen() const; + [[nodiscard]] bool hasSim() const; + [[nodiscard]] int32_t pdgId() const; + [[nodiscard]] int16_t status() const; + [[nodiscard]] uint16_t statusFlags() const; + [[nodiscard]] uint64_t eventId() const; + [[nodiscard]] int32_t genEvent() const; + [[nodiscard]] const math::XYZTLorentzVectorD& momentum() const; + + [[nodiscard]] std::span checkpoints() const; + [[nodiscard]] bool hasCheckpoints() const; + [[nodiscard]] std::optional checkpoint(uint32_t checkpointId) const; + + [[nodiscard]] bool isRoot() const; + [[nodiscard]] bool isLeaf() const; + + [[nodiscard]] std::vector productionVertices() const; + [[nodiscard]] std::vector decayVertices() const; + + [[nodiscard]] std::vector parents() const; + [[nodiscard]] std::vector children() const; + + [[nodiscard]] std::vector ancestors() const; + [[nodiscard]] std::vector descendants() const; + + [[nodiscard]] bool hasAncestorPdgId(int pdgId) const; + [[nodiscard]] std::optional firstAncestorWithPdgId(int pdgId) const; + [[nodiscard]] std::optional firstCommonAncestor(Particle other) const; + + [[nodiscard]] bool operator==(Particle const& other) const { return graph_ == other.graph_ && id_ == other.id_; } + [[nodiscard]] bool operator!=(Particle const& other) const { return !(*this == other); } + + private: + Graph const* graph_ = nullptr; + uint32_t id_ = 0; + }; + + class Vertex { + public: + Vertex() = default; + Vertex(Graph const* graph, uint32_t id) : graph_(graph), id_(id) {} + + [[nodiscard]] bool valid() const { return graph_ != nullptr; } + [[nodiscard]] uint32_t id() const { return id_; } + + [[nodiscard]] const VertexData& data() const; + + [[nodiscard]] bool hasGen() const; + [[nodiscard]] bool hasSim() const; + [[nodiscard]] uint64_t eventId() const; + [[nodiscard]] int32_t genEvent() const; + [[nodiscard]] const math::XYZTLorentzVectorD& position() const; + + [[nodiscard]] bool isSource() const; + [[nodiscard]] bool isSink() const; + + [[nodiscard]] std::vector incomingParticles() const; + [[nodiscard]] std::vector outgoingParticles() const; + + [[nodiscard]] bool operator==(Vertex const& other) const { return graph_ == other.graph_ && id_ == other.id_; } + [[nodiscard]] bool operator!=(Vertex const& other) const { return !(*this == other); } + + private: + Graph const* graph_ = nullptr; + uint32_t id_ = 0; + }; + + class Graph { + public: + using size_type = uint32_t; + + std::vector particles; + std::vector vertices; + + // Particle -> decay vertices + std::vector particleToDecayVertexOffsets; + std::vector particleToDecayVertices; + + // Particle -> production vertices + std::vector particleToProductionVertexOffsets; + std::vector particleToProductionVertices; + + // Vertex -> outgoing particles + std::vector vertexToOutgoingParticleOffsets; + std::vector vertexToOutgoingParticles; + + // Vertex -> incoming particles + std::vector vertexToIncomingParticleOffsets; + std::vector vertexToIncomingParticles; + + [[nodiscard]] size_type nParticles() const { return static_cast(particles.size()); } + [[nodiscard]] size_type nVertices() const { return static_cast(vertices.size()); } + + [[nodiscard]] bool empty() const { return particles.empty() && vertices.empty(); } + + [[nodiscard]] Particle particle(size_type id) const; + [[nodiscard]] Vertex vertex(size_type id) const; + + [[nodiscard]] std::vector particleViews() const; + [[nodiscard]] std::vector vertexViews() const; + + [[nodiscard]] std::vector roots() const; + [[nodiscard]] std::vector leaves() const; + + [[nodiscard]] std::vector sourceVertices() const; + [[nodiscard]] std::vector sinkVertices() const; + + [[nodiscard]] std::span decayVertices(size_type particleId) const { + const auto b = particleToDecayVertexOffsets.at(particleId); + const auto e = particleToDecayVertexOffsets.at(particleId + 1); + return std::span(particleToDecayVertices.data() + b, e - b); + } + + [[nodiscard]] std::span productionVertices(size_type particleId) const { + const auto b = particleToProductionVertexOffsets.at(particleId); + const auto e = particleToProductionVertexOffsets.at(particleId + 1); + return std::span(particleToProductionVertices.data() + b, e - b); + } + + [[nodiscard]] std::span outgoingParticles(size_type vertexId) const { + const auto b = vertexToOutgoingParticleOffsets.at(vertexId); + const auto e = vertexToOutgoingParticleOffsets.at(vertexId + 1); + return std::span(vertexToOutgoingParticles.data() + b, e - b); + } + + [[nodiscard]] std::span incomingParticles(size_type vertexId) const { + const auto b = vertexToIncomingParticleOffsets.at(vertexId); + const auto e = vertexToIncomingParticleOffsets.at(vertexId + 1); + return std::span(vertexToIncomingParticles.data() + b, e - b); + } + + [[nodiscard]] bool isConsistent() const; + + private: + friend class Particle; + friend class Vertex; + + [[nodiscard]] std::vector productionVerticesOf(size_type particleId) const; + [[nodiscard]] std::vector decayVerticesOf(size_type particleId) const; + + [[nodiscard]] std::vector parentsOf(size_type particleId) const; + [[nodiscard]] std::vector childrenOf(size_type particleId) const; + + [[nodiscard]] std::vector ancestorsOf(size_type particleId) const; + [[nodiscard]] std::vector descendantsOf(size_type particleId) const; + + [[nodiscard]] std::optional firstAncestorWithPdgIdOf(size_type particleId, int pdgId) const; + [[nodiscard]] std::optional firstCommonAncestorOf(size_type a, size_type b) const; + + [[nodiscard]] std::vector incomingParticlesOf(size_type vertexId) const; + [[nodiscard]] std::vector outgoingParticlesOf(size_type vertexId) const; + }; + +} // namespace truth + +#endif diff --git a/PhysicsTools/TruthInfo/interface/LogicalGraphHitIndex.h b/PhysicsTools/TruthInfo/interface/LogicalGraphHitIndex.h new file mode 100644 index 0000000000000..103f0088ca194 --- /dev/null +++ b/PhysicsTools/TruthInfo/interface/LogicalGraphHitIndex.h @@ -0,0 +1,68 @@ +#ifndef PhysicsTools_TruthInfo_LogicalGraphHitIndex_h +#define PhysicsTools_TruthInfo_LogicalGraphHitIndex_h + +#include +#include +#include +#include + +namespace truth { + + class LogicalGraphHitIndex { + public: + struct Hit { + static constexpr uint32_t invalidRecHitIndex = std::numeric_limits::max(); + + uint32_t detId = 0; + uint32_t recHitIndex = invalidRecHitIndex; + float energy = 0.f; + + [[nodiscard]] bool hasRecHit() const { return recHitIndex != invalidRecHitIndex; } + }; + + LogicalGraphHitIndex() = default; + + LogicalGraphHitIndex(uint32_t nParticles, + std::vector directOffsets, + std::vector directHits, + std::vector subgraphOffsets, + std::vector subgraphHits) + : nParticles_(nParticles), + directOffsets_(std::move(directOffsets)), + directHits_(std::move(directHits)), + subgraphOffsets_(std::move(subgraphOffsets)), + subgraphHits_(std::move(subgraphHits)) {} + + [[nodiscard]] uint32_t nParticles() const { return nParticles_; } + + [[nodiscard]] std::span directHits(uint32_t particleId) const { + const auto b = directOffsets_.at(particleId); + const auto e = directOffsets_.at(particleId + 1); + return std::span(directHits_.data() + b, e - b); + } + + [[nodiscard]] std::span subgraphHits(uint32_t particleId) const { + const auto b = subgraphOffsets_.at(particleId); + const auto e = subgraphOffsets_.at(particleId + 1); + return std::span(subgraphHits_.data() + b, e - b); + } + + [[nodiscard]] const std::vector& directOffsets() const { return directOffsets_; } + [[nodiscard]] const std::vector& directHitStorage() const { return directHits_; } + + [[nodiscard]] const std::vector& subgraphOffsets() const { return subgraphOffsets_; } + [[nodiscard]] const std::vector& subgraphHitStorage() const { return subgraphHits_; } + + private: + uint32_t nParticles_ = 0; + + std::vector directOffsets_; + std::vector directHits_; + + std::vector subgraphOffsets_; + std::vector subgraphHits_; + }; + +} // namespace truth + +#endif diff --git a/PhysicsTools/TruthInfo/interface/LogicalGraphHitIndexBuilder.h b/PhysicsTools/TruthInfo/interface/LogicalGraphHitIndexBuilder.h new file mode 100644 index 0000000000000..36cf3f647a89c --- /dev/null +++ b/PhysicsTools/TruthInfo/interface/LogicalGraphHitIndexBuilder.h @@ -0,0 +1,50 @@ +#ifndef PhysicsTools_TruthInfo_LogicalGraphHitIndexBuilder_h +#define PhysicsTools_TruthInfo_LogicalGraphHitIndexBuilder_h + +#include +#include +#include + +#include "PhysicsTools/TruthInfo/interface/LogicalGraphHitIndex.h" + +namespace truth { + + class LogicalGraphHitIndexBuilder { + public: + explicit LogicalGraphHitIndexBuilder(uint32_t nParticles); + + void setSimTrackForParticle(uint32_t particleId, uint32_t trackId); + void addParticleChild(uint32_t parentParticleId, uint32_t childParticleId); + + void addHitForTrack(uint32_t trackId, uint32_t detId, uint32_t recHitIndex, float energy); + + [[nodiscard]] LogicalGraphHitIndex finish(); + + private: + using Hit = LogicalGraphHitIndex::Hit; + + struct HitAccumulator { + uint32_t recHitIndex = Hit::invalidRecHitIndex; + float energy = 0.f; + }; + + using HitMap = std::unordered_map; + + static void addHit(HitMap& hits, uint32_t detId, uint32_t recHitIndex, float energy); + static void addHit(HitMap& hits, Hit const& hit); + static std::vector sortedHits(HitMap const& hits); + + void fillSubgraphHits(uint32_t particleId, std::vector& state); + + uint32_t nParticles_ = 0; + + std::unordered_map trackIdToParticle_; + std::vector> children_; + + std::vector directHits_; + std::vector subgraphHits_; + }; + +} // namespace truth + +#endif diff --git a/PhysicsTools/TruthInfo/interface/TruthGraph.h b/PhysicsTools/TruthInfo/interface/TruthGraph.h new file mode 100644 index 0000000000000..ad5a1ec134da6 --- /dev/null +++ b/PhysicsTools/TruthInfo/interface/TruthGraph.h @@ -0,0 +1,100 @@ +// Author: Felice Pantaleo - CERN +// Date: 03/2026 +// A compact, read-only graph representation of the truth information in an event. +// The graph is built in the TruthGraphProducer module, which also fills the node metadata and associations. +// The graph is intended to be a common data format for various use cases (e.g. validation, analysis, visualization). + +#ifndef PhysicsTools_TruthInfo_interface_TruthGraph_h +#define PhysicsTools_TruthInfo_interface_TruthGraph_h + +#include +#include +#include + +class TruthGraph { +public: + enum class NodeKind : uint8_t { + GenEvent = 0, + GenVertex = 1, + GenParticle = 2, + SimVertex = 3, + SimTrack = 4, + }; + + // Edge categories (for visualization / filtering) + enum class EdgeKind : uint8_t { + Gen = 0, // within GEN realm + Sim = 1, // within SIM realm + GenToSim = 2, // realm boundary GEN -> SIM + SimToGen = 3 // reserved (we don't produce these now) + }; + + struct NodeRef { + NodeKind kind = NodeKind::GenParticle; + int64_t key = 0; // GenParticle: index; SimTrack: trackId; SimVertex: index; GenVertex: barcode/index + }; + + TruthGraph() = default; + + // CSR out-edges: offsets.size() == nNodes+1 + // edges.size() == nEdges + // edgeKind.size() == nEdges + std::vector offsets; + std::vector edges; + std::vector edgeKind; // stores TruthGraph::EdgeKind as uint8_t + + // Node metadata: nodes.size() == nNodes + std::vector nodes; + + // Cached payload (optional) + std::vector pdgId; // 0 if not applicable + std::vector status; // 0 if not applicable + std::vector statusFlags; // packed reco::GenStatusFlags, 0 if not available + // Packed EncodedEventId for SIM nodes; 0 for GEN nodes + std::vector eventId; + + std::vector genEventOfNode; // -1 for SIM; for GEN nodes = component id + + // Associations (nodeId -> nodeId). Only meaningful for SimTrack nodes. + // -1 means "no association". + std::vector simTrackToGen; // SimTrack nodeId -> GenParticle nodeId + std::vector simTrackToVtx; // SimTrack nodeId -> SimVertex nodeId + + uint32_t nNodes() const { return static_cast(nodes.size()); } + uint32_t nEdges() const { return static_cast(edges.size()); } + + uint32_t edgeBegin(uint32_t nodeId) const { return offsets.at(nodeId); } + uint32_t edgeEnd(uint32_t nodeId) const { return offsets.at(nodeId + 1); } + + std::span children(uint32_t nodeId) const { + const auto b = edgeBegin(nodeId); + const auto e = edgeEnd(nodeId); + return std::span(edges.data() + b, e - b); + } + + std::span childrenEdgeKinds(uint32_t nodeId) const { + const auto b = edgeBegin(nodeId); + const auto e = edgeEnd(nodeId); + return std::span(edgeKind.data() + b, e - b); + } + + const NodeRef& nodeRef(uint32_t nodeId) const { return nodes.at(nodeId); } + + int32_t nodePdgId(uint32_t nodeId) const { return (nodeId < pdgId.size()) ? pdgId[nodeId] : 0; } + + int16_t nodeStatus(uint32_t nodeId) const { return (nodeId < status.size()) ? status[nodeId] : 0; } + uint16_t nodeStatusFlags(uint32_t nodeId) const { return (nodeId < statusFlags.size()) ? statusFlags[nodeId] : 0; } + uint64_t nodeEventId(uint32_t nodeId) const { return (nodeId < eventId.size()) ? eventId[nodeId] : 0ull; } + + int32_t nodeSimTrackToGen(uint32_t nodeId) const { + return (nodeId < simTrackToGen.size()) ? simTrackToGen[nodeId] : -1; + } + + int32_t nodeSimTrackToVtx(uint32_t nodeId) const { + return (nodeId < simTrackToVtx.size()) ? simTrackToVtx[nodeId] : -1; + } + + bool isConsistent() const; +}; + +#endif diff --git a/PhysicsTools/TruthInfo/interface/TruthLogicalGraphPostProcessor.h b/PhysicsTools/TruthInfo/interface/TruthLogicalGraphPostProcessor.h new file mode 100644 index 0000000000000..601d60b245256 --- /dev/null +++ b/PhysicsTools/TruthInfo/interface/TruthLogicalGraphPostProcessor.h @@ -0,0 +1,51 @@ +#ifndef PhysicsTools_TruthInfo_interface_TruthLogicalGraphPostProcessor_h +#define PhysicsTools_TruthInfo_interface_TruthLogicalGraphPostProcessor_h + +#include +#include + +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" + +#include "PhysicsTools/TruthInfo/interface/Graph.h" + +namespace truth { + + struct LogicalGraphPostProcessingConfig { + bool collapseIntermediateGenParticles = true; + + // If empty, no seed-based graph cut is applied. + std::vector seedPdgIds; + + // For each selected seed particle, keep this many generations of parents + // above the seed before keeping the full downstream graph. + uint32_t seedParentDepth = 0; + + // Particles with these exact PDG ids are removed from the final logical graph. + // If such a particle is internal, its production and decay vertices are merged + // so that the graph remains navigable. + std::vector ignoredPdgIds; + + // Exact logical particle ids to remove from the final logical graph. + // These ids refer to the graph state at the moment the ignored-particle + // collapsing step is applied. + std::vector ignoredParticleIds; + }; + + class TruthLogicalGraphPostProcessor { + public: + TruthLogicalGraphPostProcessor() = default; + explicit TruthLogicalGraphPostProcessor(LogicalGraphPostProcessingConfig config); + + static edm::ParameterSetDescription psetDescription(); + static LogicalGraphPostProcessingConfig configFromPSet(edm::ParameterSet const& pset); + + [[nodiscard]] Graph process(Graph input) const; + + private: + LogicalGraphPostProcessingConfig config_; + }; + +} // namespace truth + +#endif diff --git a/PhysicsTools/TruthInfo/plugins/BuildFile.xml b/PhysicsTools/TruthInfo/plugins/BuildFile.xml new file mode 100644 index 0000000000000..8e4288cc9aedc --- /dev/null +++ b/PhysicsTools/TruthInfo/plugins/BuildFile.xml @@ -0,0 +1,52 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/PhysicsTools/TruthInfo/plugins/LogicalGraphHitIndexProducer.cc b/PhysicsTools/TruthInfo/plugins/LogicalGraphHitIndexProducer.cc new file mode 100644 index 0000000000000..831be2a45d06b --- /dev/null +++ b/PhysicsTools/TruthInfo/plugins/LogicalGraphHitIndexProducer.cc @@ -0,0 +1,366 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/global/EDProducer.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/EDGetToken.h" +#include "FWCore/Utilities/interface/InputTag.h" + +#include "DataFormats/DetId/interface/DetId.h" +#include "DataFormats/ForwardDetId/interface/HGCalDetId.h" +#include "Geometry/CaloGeometry/interface/CaloGeometry.h" +#include "Geometry/HGCalGeometry/interface/HGCalGeometry.h" +#include "Geometry/HcalCommonData/interface/HcalHitRelabeller.h" +#include "Geometry/HcalTowerAlgo/interface/HcalGeometry.h" +#include "Geometry/Records/interface/CaloGeometryRecord.h" +#include "SimDataFormats/CaloHit/interface/PCaloHit.h" +#include "SimDataFormats/CaloTest/interface/HGCalTestNumbering.h" + +#include "PhysicsTools/TruthInfo/interface/Graph.h" +#include "PhysicsTools/TruthInfo/interface/LogicalGraphHitIndex.h" +#include "PhysicsTools/TruthInfo/interface/LogicalGraphHitIndexBuilder.h" +#include "PhysicsTools/TruthInfo/interface/TruthGraph.h" + +#include "SimCalorimetry/HGCalAssociatorProducers/interface/DetIdRecHitMap.h" + +namespace { + + struct LogicalGraphView { + explicit LogicalGraphView(truth::Graph const& graph) : graph_(graph) {} + + uint32_t nParticles() const { return graph_.nParticles(); } + + bool particleHasSim(uint32_t particleId) const { + return particleId < graph_.particles.size() && graph_.particles[particleId].hasSim(); + } + + int32_t particleSimNode(uint32_t particleId) const { return graph_.particles[particleId].simNode; } + + template + void forEachParticleChild(uint32_t parentParticleId, F&& f) const { + if (parentParticleId >= graph_.nParticles()) + return; + + for (const uint32_t vertexId : graph_.decayVertices(parentParticleId)) { + if (vertexId >= graph_.nVertices()) + continue; + + for (const uint32_t childId : graph_.outgoingParticles(vertexId)) { + f(childId); + } + } + } + + truth::Graph const& graph_; + }; + + uint32_t checkedTrackId(int64_t key) { + if (key < 0 || key > static_cast(std::numeric_limits::max())) + return 0; + + return static_cast(key); + } + + bool inputTagLooksLikeHGCal(edm::InputTag const& tag) { + const std::string& instance = tag.instance(); + return instance.find("HGCHits") != std::string::npos || instance.find("HGCEE") != std::string::npos || + instance.find("HGCHE") != std::string::npos; + } + + bool inputTagLooksLikeHcal(edm::InputTag const& tag) { + const std::string& instance = tag.instance(); + return instance.find("HcalHits") != std::string::npos || instance.find("Hcal") != std::string::npos; + } + + struct RelabelContext { + int geometryType = -1; + + std::array hgTopologies = {nullptr, nullptr, nullptr}; + std::array hgConstants = {nullptr, nullptr, nullptr}; + + HcalDDDRecConstants const* hcalConstants = nullptr; + }; + +} // namespace + +class TruthLogicalGraphHitIndexProducer : public edm::global::EDProducer<> { +public: + explicit TruthLogicalGraphHitIndexProducer(edm::ParameterSet const& cfg); + ~TruthLogicalGraphHitIndexProducer() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + void produce(edm::StreamID, edm::Event&, edm::EventSetup const&) const override; + + void fillTrackToParticleMap(LogicalGraphView const& graph, + TruthGraph const& rawGraph, + truth::LogicalGraphHitIndexBuilder& builder) const; + + void fillSimHits(edm::Event& event, + edm::EventSetup const& setup, + truth::LogicalGraphHitIndexBuilder& builder, + hgcal::DetIdRecHitMap const* recHitMap) const; + + RelabelContext makeRelabelContext(edm::EventSetup const& setup) const; + + uint32_t recoDetIdForSimHit(PCaloHit const& simHit, + bool isHGCalCollection, + bool isHcalCollection, + RelabelContext const& context) const; + + edm::EDGetTokenT graphToken_; + edm::EDGetTokenT rawGraphToken_; + edm::EDGetTokenT recHitMapToken_; + + std::vector simHitTags_; + std::vector>> simHitTokens_; + + edm::ESGetToken geomToken_; + + bool doHGCalRelabelling_ = true; +}; + +TruthLogicalGraphHitIndexProducer::TruthLogicalGraphHitIndexProducer(edm::ParameterSet const& cfg) + : graphToken_(consumes(cfg.getParameter("src"))), + rawGraphToken_(consumes(cfg.getParameter("rawSrc"))), + recHitMapToken_(consumes(cfg.getParameter("recHitMap"))), + simHitTags_(cfg.getParameter>("simHitCollections")), + geomToken_(esConsumes()), + doHGCalRelabelling_(cfg.getParameter("doHGCalRelabelling")) { + simHitTokens_.reserve(simHitTags_.size()); + for (auto const& tag : simHitTags_) { + simHitTokens_.push_back(consumes>(tag)); + } + + produces(); +} + +void TruthLogicalGraphHitIndexProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + + desc.add("src", edm::InputTag("truthLogicalGraphProducer")); + desc.add("rawSrc", edm::InputTag("truthGraphProducer")); + desc.add("recHitMap", edm::InputTag("simHitToRecHitMapProducer")); + + desc.add>("simHitCollections", + {edm::InputTag("g4SimHits", "HGCHitsEE"), + edm::InputTag("g4SimHits", "HGCHitsHEfront"), + edm::InputTag("g4SimHits", "HGCHitsHEback")}); + + desc.add("doHGCalRelabelling", true) + ->setComment("Convert old HGCAL simulation DetIds to reco DetIds before looking up recHits"); + + descriptions.addWithDefaultLabel(desc); +} + +void TruthLogicalGraphHitIndexProducer::produce(edm::StreamID, edm::Event& event, edm::EventSetup const& setup) const { + auto const& graph = event.get(graphToken_); + auto const& rawGraph = event.get(rawGraphToken_); + + edm::Handle hRecHitMap; + event.getByToken(recHitMapToken_, hRecHitMap); + auto const* recHitMap = hRecHitMap.isValid() ? &(*hRecHitMap) : nullptr; + + LogicalGraphView graphView(graph); + + truth::LogicalGraphHitIndexBuilder builder(graphView.nParticles()); + + fillTrackToParticleMap(graphView, rawGraph, builder); + fillSimHits(event, setup, builder, recHitMap); + + auto output = std::make_unique(builder.finish()); + event.put(std::move(output)); +} + +void TruthLogicalGraphHitIndexProducer::fillTrackToParticleMap(LogicalGraphView const& graph, + TruthGraph const& rawGraph, + truth::LogicalGraphHitIndexBuilder& builder) const { + for (uint32_t particleId = 0; particleId < graph.nParticles(); ++particleId) { + if (!graph.particleHasSim(particleId)) + continue; + + const int32_t simNode = graph.particleSimNode(particleId); + if (simNode < 0) + continue; + + const uint32_t simNodeU32 = static_cast(simNode); + if (simNodeU32 >= rawGraph.nNodes()) + continue; + + auto const& ref = rawGraph.nodeRef(simNodeU32); + if (ref.kind != TruthGraph::NodeKind::SimTrack) + continue; + + const uint32_t trackId = checkedTrackId(ref.key); + if (trackId == 0) + continue; + + builder.setSimTrackForParticle(particleId, trackId); + } + + for (uint32_t parentId = 0; parentId < graph.nParticles(); ++parentId) { + graph.forEachParticleChild(parentId, [&](uint32_t childId) { builder.addParticleChild(parentId, childId); }); + } +} + +RelabelContext TruthLogicalGraphHitIndexProducer::makeRelabelContext(edm::EventSetup const& setup) const { + RelabelContext context; + + if (!doHGCalRelabelling_) + return context; + + auto const& geom = setup.getData(geomToken_); + + auto const* hcalGeometry = static_cast(geom.getSubdetectorGeometry(DetId::Hcal, HcalEndcap)); + if (hcalGeometry != nullptr) { + context.hcalConstants = hcalGeometry->topology().dddConstants(); + } + + auto const* eeGeometry = + static_cast(geom.getSubdetectorGeometry(DetId::HGCalEE, ForwardSubdetector::ForwardEmpty)); + + if (eeGeometry != nullptr) { + context.geometryType = 1; + + auto const* fhGeometry = static_cast( + geom.getSubdetectorGeometry(DetId::HGCalHSi, ForwardSubdetector::ForwardEmpty)); + auto const* bhGeometry = static_cast( + geom.getSubdetectorGeometry(DetId::HGCalHSc, ForwardSubdetector::ForwardEmpty)); + + context.hgTopologies[0] = &eeGeometry->topology(); + context.hgTopologies[1] = fhGeometry != nullptr ? &fhGeometry->topology() : nullptr; + context.hgTopologies[2] = bhGeometry != nullptr ? &bhGeometry->topology() : nullptr; + + for (unsigned i = 0; i < context.hgTopologies.size(); ++i) { + if (context.hgTopologies[i] != nullptr) + context.hgConstants[i] = &context.hgTopologies[i]->dddConstants(); + } + + return context; + } + + context.geometryType = 0; + + eeGeometry = static_cast(geom.getSubdetectorGeometry(DetId::Forward, HGCEE)); + auto const* fhGeometry = static_cast(geom.getSubdetectorGeometry(DetId::Forward, HGCHEF)); + + context.hgTopologies[0] = eeGeometry != nullptr ? &eeGeometry->topology() : nullptr; + context.hgTopologies[1] = fhGeometry != nullptr ? &fhGeometry->topology() : nullptr; + + for (unsigned i = 0; i < context.hgTopologies.size(); ++i) { + if (context.hgTopologies[i] != nullptr) + context.hgConstants[i] = &context.hgTopologies[i]->dddConstants(); + } + + return context; +} + +uint32_t TruthLogicalGraphHitIndexProducer::recoDetIdForSimHit(PCaloHit const& simHit, + bool isHGCalCollection, + bool isHcalCollection, + RelabelContext const& context) const { + const uint32_t simId = simHit.id(); + + if (!doHGCalRelabelling_) { + return simId; + } + + if (isHGCalCollection) { + if (context.geometryType == 1) { + return simId; + } + + int subdet = 0; + int layer = 0; + int cell = 0; + int sec = 0; + int subsec = 0; + int zp = 0; + + HGCalTestNumbering::unpackHexagonIndex(simId, subdet, zp, layer, sec, subsec, cell); + + const int hgcalIndex = subdet - 3; + if (hgcalIndex < 0 || hgcalIndex >= static_cast(context.hgConstants.size())) + return 0; + + auto const* constants = context.hgConstants[hgcalIndex]; + auto const* topology = context.hgTopologies[hgcalIndex]; + + if (constants == nullptr || topology == nullptr) + return 0; + + const auto recoLayerCell = constants->simToReco(cell, layer, sec, topology->detectorType()); + cell = recoLayerCell.first; + layer = recoLayerCell.second; + + if (layer < 0) + return 0; + + return HGCalDetId(static_cast(subdet), zp, layer, subsec, sec, cell).rawId(); + } + + if (isHcalCollection && context.hcalConstants != nullptr) { + return HcalHitRelabeller::relabel(simId, context.hcalConstants).rawId(); + } + + return simId; +} + +void TruthLogicalGraphHitIndexProducer::fillSimHits(edm::Event& event, + edm::EventSetup const& setup, + truth::LogicalGraphHitIndexBuilder& builder, + hgcal::DetIdRecHitMap const* recHitMap) const { + const RelabelContext relabelContext = makeRelabelContext(setup); + + for (uint32_t tokenIndex = 0; tokenIndex < simHitTokens_.size(); ++tokenIndex) { + auto const& token = simHitTokens_[tokenIndex]; + auto const& tag = simHitTags_[tokenIndex]; + + edm::Handle> hSimHits; + event.getByToken(token, hSimHits); + + if (!hSimHits.isValid()) { + edm::LogWarning("TruthLogicalGraphHitIndexProducer") + << "Missing PCaloHit collection " << tag.encode() << ". Skipping it."; + continue; + } + + const bool isHGCalCollection = inputTagLooksLikeHGCal(tag); + const bool isHcalCollection = inputTagLooksLikeHcal(tag); + + for (auto const& simHit : *hSimHits) { + const int geantTrackId = simHit.geantTrackId(); + if (geantTrackId <= 0) + continue; + + const uint32_t detId = recoDetIdForSimHit(simHit, isHGCalCollection, isHcalCollection, relabelContext); + if (detId == 0) + continue; + + uint32_t recHitIndex = truth::LogicalGraphHitIndex::Hit::invalidRecHitIndex; + + if (recHitMap != nullptr) { + const auto it = recHitMap->find(detId); + if (it != recHitMap->end()) { + recHitIndex = it->second; + } + } + + builder.addHitForTrack(static_cast(geantTrackId), detId, recHitIndex, simHit.energy()); + } + } +} + +DEFINE_FWK_MODULE(TruthLogicalGraphHitIndexProducer); diff --git a/PhysicsTools/TruthInfo/plugins/RecHitFlatTableProducer.cc b/PhysicsTools/TruthInfo/plugins/RecHitFlatTableProducer.cc new file mode 100644 index 0000000000000..c46a8c30844ba --- /dev/null +++ b/PhysicsTools/TruthInfo/plugins/RecHitFlatTableProducer.cc @@ -0,0 +1,99 @@ +#include "FWCore/Framework/interface/stream/EDProducer.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/stream/moduleAbilities.h" +#include "FWCore/Utilities/interface/ESGetToken.h" +#include "FWCore/Utilities/interface/transform.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "DataFormats/NanoAOD/interface/FlatTable.h" + +#include "RecoLocalCalo/HGCalRecAlgos/interface/RecHitTools.h" +#include "DataFormats/HGCRecHit/interface/HGCRecHitCollections.h" + +#include +#include + +class RecHitFlatTableProducer : public edm::stream::EDProducer { +public: + RecHitFlatTableProducer(edm::ParameterSet const& params) + : objName_(params.getParameter("objName")), + rechits_tokens_{ + edm::vector_transform(params.getParameter>("label_rechits"), + [this](const edm::InputTag& lab) { return consumes(lab); })}, + caloGeometry_token_(esConsumes()) { + produces(); + } + + ~RecHitFlatTableProducer() override {} + + void produce(edm::Event& event, edm::EventSetup const& iSetup) override { + std::vector rechit_ID; + std::vector rechit_energy; + std::vector rechit_x; + std::vector rechit_y; + std::vector rechit_z; + std::vector rechit_time; + std::vector rechit_radius; + std::vector rechit_simEnergy; + std::vector rechit_simEnergyEM; + std::vector rechit_simEnergyHad; + + for (auto const& rh_token : rechits_tokens_) { + edm::Handle rechit_handle; + event.getByToken(rh_token, rechit_handle); + const auto& rhColl = *rechit_handle; + for (auto const& rh : rhColl) { + rechit_energy.push_back(rh.energy()); + auto const rhPosition = rhtools_.getPosition(rh.detid()); + rechit_x.push_back(rhPosition.x()); + rechit_y.push_back(rhPosition.y()); + rechit_z.push_back(rhPosition.z()); + rechit_ID.push_back(rh.detid().rawId()); + rechit_time.push_back(rh.time()); + rechit_radius.push_back(rhtools_.getRadiusToSide(rh.detid())); + // const auto hitId = hitMap->find(DetId(rh.detid())); + // if (hitId != hitMap->end()) { + // rechit_simEnergy.push_back(hitIdToEnergies[hitId->second].energy); + // rechit_simEnergyEM.push_back(hitIdToEnergies[hitId->second].energyEM); + // rechit_simEnergyHad.push_back(hitIdToEnergies[hitId->second].energyHad); + // } + } + } + + auto tab = std::make_unique(rechit_ID.size(), objName_, false, false); + tab->addColumn("rechit_ID", rechit_ID, "Rechit ID"); + tab->addColumn("rechit_x", rechit_x, "Rechit X from rechittools"); + tab->addColumn("rechit_y", rechit_y, "Rechit Y from rechittools"); + tab->addColumn("rechit_z", rechit_z, "Rechit Z from rechittools"); + tab->addColumn("rechit_radius", rechit_radius, "Rechit radius to side from rechittools"); + + event.put(std::move(tab)); + } + + void beginRun(edm::Run const&, edm::EventSetup const& es) override { + edm::ESHandle geom = es.getHandle(caloGeometry_token_); + rhtools_.setGeometry(*geom); + } + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("objName", "rechits")->setComment("name of the nanoaod::FlatTable to extend with this table"); + desc.add>("label_rechits", + {edm::InputTag("HGCalRecHit", "HGCEERecHits"), + edm::InputTag("HGCalRecHit", "HGCHEFRecHits"), + edm::InputTag("HGCalRecHit", "HGCHEBRecHits")}); + descriptions.add("recHitTable", desc); + } + +protected: + const std::string objName_; + // const edm::EDGetTokenT> src_; + const std::vector> rechits_tokens_; + + edm::ESGetToken caloGeometry_token_; + hgcal::RecHitTools rhtools_; +}; + +#include "FWCore/Framework/interface/MakerMacros.h" +DEFINE_FWK_MODULE(RecHitFlatTableProducer); diff --git a/PhysicsTools/TruthInfo/plugins/TruthGraphDumper.cc b/PhysicsTools/TruthInfo/plugins/TruthGraphDumper.cc new file mode 100644 index 0000000000000..5d95788119b05 --- /dev/null +++ b/PhysicsTools/TruthInfo/plugins/TruthGraphDumper.cc @@ -0,0 +1,613 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "FWCore/Framework/interface/one/EDAnalyzer.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/EDGetToken.h" +#include "FWCore/Utilities/interface/InputTag.h" + +#include "SimDataFormats/Track/interface/SimTrackContainer.h" +#include "SimDataFormats/Vertex/interface/SimVertexContainer.h" + +#include "SimDataFormats/GeneratorProducts/interface/HepMCProduct.h" +#include "HepMC/GenEvent.h" +#include "HepMC/GenParticle.h" +#include "HepMC/GenVertex.h" + +#include "SimDataFormats/GeneratorProducts/interface/HepMC3Product.h" +#include "HepMC3/GenEvent.h" +#include "HepMC3/GenParticle.h" +#include "HepMC3/GenVertex.h" + +#include "PhysicsTools/TruthInfo/interface/TruthGraph.h" + +namespace { + + // --- PDG naming (UTF-8) + std::string pdgNameUtf8(int pdgId) { + const int ap = std::abs(pdgId); + + if (pdgId == 11) + return "e⁻"; + if (pdgId == -11) + return "e⁺"; + if (pdgId == 13) + return "μ⁻"; + if (pdgId == -13) + return "μ⁺"; + if (pdgId == 15) + return "τ⁻"; + if (pdgId == -15) + return "τ⁺"; + + if (pdgId == 12) + return "νₑ"; + if (pdgId == -12) + return "ν̄ₑ"; + if (pdgId == 14) + return "ν_μ"; + if (pdgId == -14) + return "ν̄_μ"; + if (pdgId == 16) + return "ν_τ"; + if (pdgId == -16) + return "ν̄_τ"; + + if (pdgId == 22) + return "γ"; + if (pdgId == 21) + return "g"; + if (pdgId == 23) + return "Z⁰"; + if (pdgId == 24) + return "W⁺"; + if (pdgId == -24) + return "W⁻"; + if (pdgId == 25) + return "H"; + + if (pdgId == 2212) + return "p"; + if (pdgId == -2212) + return "p̄"; + if (pdgId == 2112) + return "n"; + if (pdgId == -2112) + return "n̄"; + + if (pdgId == 111) + return "π⁰"; + if (pdgId == 211) + return "π⁺"; + if (pdgId == -211) + return "π⁻"; + if (pdgId == 321) + return "K⁺"; + if (pdgId == -321) + return "K⁻"; + if (pdgId == 130) + return "K⁰_L"; + if (pdgId == 310) + return "K⁰_S"; + + if (ap >= 1 && ap <= 6) { + static const char* qname[7] = {"", "d", "u", "s", "c", "b", "t"}; + std::string s = qname[ap]; + if (pdgId < 0) + s = "anti-" + s; + return s; + } + + return "pdg"; + } + + std::string pdgLabel(int pdgId) { + std::ostringstream ss; + const std::string name = pdgNameUtf8(pdgId); + if (name == "pdg") + ss << "pdg(" << pdgId << ")"; + else + ss << name << " (" << pdgId << ")"; + return ss.str(); + } + + template + std::string fmtP4(const P4T& p4) { + std::ostringstream ss; + ss.setf(std::ios::fixed); + ss << std::setprecision(3) << "(" << p4.px() << ", " << p4.py() << ", " << p4.pz() << ", " << p4.e() << ")"; + return ss.str(); + } + + template + std::string fmtX4(const X4T& x4) { + std::ostringstream ss; + ss.setf(std::ios::fixed); + ss << std::setprecision(3) << "(" << x4.x() << ", " << x4.y() << ", " << x4.z() << ", " << x4.t() << ")"; + return ss.str(); + } + + const char* kindName(TruthGraph::NodeKind k) { + switch (k) { + case TruthGraph::NodeKind::GenEvent: + return "GenEvent"; + case TruthGraph::NodeKind::GenVertex: + return "GenVertex"; + case TruthGraph::NodeKind::GenParticle: + return "GenParticle"; + case TruthGraph::NodeKind::SimVertex: + return "SimVertex"; + case TruthGraph::NodeKind::SimTrack: + return "SimTrack"; + } + return "Unknown"; + } + + const char* shapeFor(TruthGraph::NodeKind k) { + switch (k) { + case TruthGraph::NodeKind::GenEvent: + return "box"; + case TruthGraph::NodeKind::GenVertex: + return "diamond"; + case TruthGraph::NodeKind::GenParticle: + return "box"; + case TruthGraph::NodeKind::SimVertex: + return "diamond"; + case TruthGraph::NodeKind::SimTrack: + return "ellipse"; + } + return "box"; + } + + std::string edgeAttrs(TruthGraph::EdgeKind kind) { + using EdgeKind = TruthGraph::EdgeKind; + + switch (kind) { + case EdgeKind::Gen: + return " [style=solid, edgeType=\"Gen\"]"; + + case EdgeKind::Sim: + return " [style=solid, edgeType=\"Sim\"]"; + + case EdgeKind::GenToSim: + return " [dir=both, style=dashed, label=\"GenToSim\", edgeType=\"GenToSim\"]"; + + case EdgeKind::SimToGen: + return " [dir=both, style=dotted, label=\"SimToGen\", edgeType=\"SimToGen\"]"; + } + + return " [style=solid, edgeType=\"Unknown\"]"; + } + + std::string statusFlagsLabel(uint16_t flags) { + struct FlagInfo { + uint16_t bit; + const char* name; + }; + + static constexpr FlagInfo flagInfos[] = { + {1u << 0, "isPrompt"}, + {1u << 1, "isDecayedLeptonHadron"}, + {1u << 2, "isTauDecayProduct"}, + {1u << 3, "isPromptTauDecayProduct"}, + {1u << 4, "isDirectTauDecayProduct"}, + {1u << 5, "isDirectPromptTauDecayProduct"}, + {1u << 6, "isDirectHadronDecayProduct"}, + {1u << 7, "isHardProcess"}, + {1u << 8, "fromHardProcess"}, + {1u << 9, "isHardProcessTauDecayProduct"}, + {1u << 10, "isDirectHardProcessTauDecayProduct"}, + {1u << 11, "fromHardProcessBeforeFSR"}, + {1u << 12, "isFirstCopy"}, + {1u << 13, "isLastCopy"}, + {1u << 14, "isLastCopyBeforeFSR"}, + }; + + std::ostringstream ss; + bool first = true; + + for (auto const& flag : flagInfos) { + if ((flags & flag.bit) == 0) + continue; + + if (!first) + ss << ", "; + ss << flag.name; + first = false; + } + + if (first) + return "none"; + + return ss.str(); + } + + std::string dotQuote(std::string const& input) { + std::string out; + out.reserve(input.size() + 2); + + out.push_back('"'); + for (char c : input) { + switch (c) { + case '\\': + out += "\\\\"; + break; + case '"': + out += "\\\""; + break; + case '\n': + out += "\\n"; + break; + default: + out.push_back(c); + break; + } + } + out.push_back('"'); + + return out; + } + + std::string appendEventIdToFilename(std::string const& filename, edm::EventID const& id) { + const auto dotPos = filename.rfind('.'); + + std::ostringstream ss; + if (dotPos == std::string::npos) { + ss << filename; + ss << "_run" << id.run(); + ss << "_lumi" << id.luminosityBlock(); + ss << "_event" << id.event(); + return ss.str(); + } + + ss << filename.substr(0, dotPos); + ss << "_run" << id.run(); + ss << "_lumi" << id.luminosityBlock(); + ss << "_event" << id.event(); + ss << filename.substr(dotPos); + + return ss.str(); + } +} // anonymous namespace + +class TruthGraphDumper : public edm::one::EDAnalyzer<> { +public: + explicit TruthGraphDumper(const edm::ParameterSet& cfg) + : token_(consumes(cfg.getParameter("src"))), + dotFile_(cfg.getParameter("dotFile")), + maxNodes_(cfg.getParameter("maxNodes")), + maxEdgesPerNode_(cfg.getParameter("maxEdgesPerNode")), + simTracksToken_(mayConsume(cfg.getParameter("simTracks"))), + simVerticesToken_(mayConsume(cfg.getParameter("simVertices"))), + hepmc2Token_(mayConsume(cfg.getParameter("genEventHepMC"))), + hepmc3Token_(mayConsume(cfg.getParameter("genEventHepMC3"))) {} + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("src", edm::InputTag("truthGraphProducer")); + desc.add("dotFile", "truthgraph.dot"); + desc.add("maxNodes", 5000)->setComment("Truncate to keep DOT manageable"); + desc.add("maxEdgesPerNode", 200)->setComment("Truncate fanout per node"); + + desc.add("simTracks", edm::InputTag("g4SimHits")) + ->setComment("SimTrackContainer (optional, used to enrich SimTrack nodes)"); + desc.add("simVertices", edm::InputTag("g4SimHits")) + ->setComment("SimVertexContainer (optional, used for future enrichment)"); + + // GEN record (for enriching GenParticle/GenVertex nodes) + desc.add("genEventHepMC", edm::InputTag("generatorSmeared")) + ->setComment("edm::HepMCProduct label (your step1.root shows this is present)"); + desc.add("genEventHepMC3", edm::InputTag("generatorSmeared")) + ->setComment("edm::HepMC3Product label (optional)"); + + descriptions.addWithDefaultLabel(desc); + } + + void analyze(const edm::Event& evt, const edm::EventSetup&) override { + auto const& g = evt.get(token_); + + // --- SIM handles (optional) + edm::Handle hSimTracks; + evt.getByToken(simTracksToken_, hSimTracks); + + std::unordered_map tidToIndex; + if (hSimTracks.isValid()) { + tidToIndex.reserve(hSimTracks->size() * 2); + for (uint32_t i = 0; i < hSimTracks->size(); ++i) { + tidToIndex.emplace((*hSimTracks)[i].trackId(), i); + } + } + + // --- GEN handles (optional) + // Prefer HepMC2 if present (it is in your step1.root); else HepMC3. + edm::Handle hHepMC2; + evt.getByToken(hepmc2Token_, hHepMC2); + + edm::Handle hHepMC3; + evt.getByToken(hepmc3Token_, hHepMC3); + + // HepMC2 lookup maps + std::unordered_map bc2p; + std::unordered_map bc2v; + HepMC::GenEvent const* ev2 = nullptr; + + if (hHepMC2.isValid() && hHepMC2->GetEvent() != nullptr) { + ev2 = hHepMC2->GetEvent(); + bc2p.reserve(ev2->particles_size() * 2); + bc2v.reserve(ev2->vertices_size() * 2); + + for (auto p = ev2->particles_begin(); p != ev2->particles_end(); ++p) { + bc2p.emplace((*p)->barcode(), *p); + } + for (auto v = ev2->vertices_begin(); v != ev2->vertices_end(); ++v) { + bc2v.emplace((*v)->barcode(), *v); + } + } + + // HepMC3 reconstruction + maps + HepMC3::GenEvent ev3; + std::unordered_map id3p; + std::unordered_map id3v; + bool have3 = false; + + if (!ev2 && hHepMC3.isValid() && hHepMC3->GetEvent() != nullptr) { + have3 = true; + const HepMC3::GenEventData* data = hHepMC3->GetEvent(); + ev3.read_data(*data); + + id3p.reserve(ev3.particles().size() * 2); + id3v.reserve(ev3.vertices().size() * 2); + + for (auto const& pptr : ev3.particles()) { + if (pptr) + id3p.emplace(pptr->id(), pptr); + } + for (auto const& vptr : ev3.vertices()) { + if (vptr) + id3v.emplace(vptr->id(), vptr); + } + } + + const std::string eventDotFile = appendEventIdToFilename(dotFile_, evt.id()); + std::ofstream os(eventDotFile); + os << "digraph TruthGraph {\n"; + os << " rankdir=LR;\n"; + os << " node [fontsize=10];\n"; + + const uint32_t n = std::min(g.nNodes(), maxNodes_); + + // nodes + for (uint32_t i = 0; i < n; ++i) { + auto const& r = g.nodeRef(i); + const auto pdg = g.nodePdgId(i); + const auto st = g.nodeStatus(i); + const auto eid = g.nodeEventId(i); + + // SimTrack enrichment + bool crossedBoundary = false; + bool haveSim = false; + SimTrack const* simt = nullptr; + + if (r.kind == TruthGraph::NodeKind::SimTrack && hSimTracks.isValid()) { + const int64_t key = r.key; // trackId + if (key >= 0 && key <= static_cast(std::numeric_limits::max())) { + auto it = tidToIndex.find(static_cast(key)); + if (it != tidToIndex.end()) { + simt = &(*hSimTracks)[it->second]; + haveSim = true; + crossedBoundary = simt->crossedBoundary(); + } + } + } + + // Node style + os << " n" << i << " [shape=" << shapeFor(r.kind) << ", type=" << dotQuote(kindName(r.kind)) << ", "; + os << "crossedBoundary=" << crossedBoundary << ","; + if (crossedBoundary) + os << "color=\"red\", penwidth=2, "; + + os << "pdg=" << pdg << ", status=" << st << ", eid=" << eid << ","; + // --- GEN enrichment + if (r.kind == TruthGraph::NodeKind::GenEvent) { + if (ev2) { + os << "HepMCversion=2, event=" << ev2->event_number() << ", spid=" << ev2->signal_process_id() << ","; + } else if (have3) { + os << "HepMCversion=3, event=" << ev3.event_number() << ","; + } + } else if (r.kind == TruthGraph::NodeKind::GenParticle) { + const int bc = static_cast(r.key); + if (ev2) { + auto it = bc2p.find(bc); + if (it != bc2p.end()) { + auto const* p = it->second; + const int prod = p->production_vertex() ? p->production_vertex()->barcode() : 0; + const int endv = p->end_vertex() ? p->end_vertex()->barcode() : 0; + os << "pid=" << p->pdg_id() << ", status=" << p->status() << ", p4=" << dotQuote(fmtP4(p->momentum())) + << ", m=" << std::fixed << std::setprecision(3) << p->generated_mass() << ", prodVtx=" << prod + << ", endVtx=" << endv << ","; + } + } else if (have3) { + auto it = id3p.find(bc); + if (it != id3p.end() && it->second) { + auto const& p = it->second; + const int prod = p->production_vertex() ? p->production_vertex()->id() : 0; + const int endv = p->end_vertex() ? p->end_vertex()->id() : 0; + os << "pid=" << p->pid() << ", status=" << p->status() << ", p4=" << dotQuote(fmtP4(p->momentum())) + << ", prodVtx=" << prod << ", endVtx=" << endv << ","; + } + } + } else if (r.kind == TruthGraph::NodeKind::GenVertex) { + const int bc = static_cast(r.key); + if (ev2) { + auto it = bc2v.find(bc); + if (it != bc2v.end()) { + auto const* v = it->second; + os << "barcode=" << v->barcode() << ", x4=" << dotQuote(fmtX4(v->position())) + << ", nIn=" << v->particles_in_size() << ", nOut=" << v->particles_out_size() << ","; + } + } else if (have3) { + auto it = id3v.find(bc); + if (it != id3v.end() && it->second) { + auto const& v = it->second; + os << "status=" << v->status() << ", x4=" << dotQuote(fmtX4(v->position())) + << ", nIn=" << v->particles_in().size() << ", nOut=" << v->particles_out().size() << ","; + } + } + } + + // --- SIM enrichment + if (r.kind == TruthGraph::NodeKind::SimTrack && haveSim) { + os << "p4=" << dotQuote(fmtP4(simt->momentum())) << ","; + const int32_t gn = g.nodeSimTrackToGen(i); + os << "GenParticle_nodeId=" << gn << ","; + + const int32_t vn = g.nodeSimTrackToVtx(i); + os << "SimVertex_nodeId=" << vn << ","; + + if (crossedBoundary) { + os << "idAtBoundary=" << simt->getIDAtBoundary() + << ", x4boundary=" << dotQuote(fmtX4(simt->getPositionAtBoundary())) + << ", p4boundary=" << dotQuote(fmtP4(simt->getMomentumAtBoundary())) << ","; + } + } + + // HTML label + os << "label=<\n"; + os << " \n"; + os << " \n"; + + if (pdg != 0) + os << " \n"; + if (st != 0) + os << " \n"; + if (eid != 0) + os << " \n"; + + // --- GEN enrichment + if (r.kind == TruthGraph::NodeKind::GenEvent) { + if (ev2) { + os << " \n"; + } else if (have3) { + os << " \n"; + } + } else if (r.kind == TruthGraph::NodeKind::GenParticle) { + const int bc = static_cast(r.key); + if (ev2) { + auto it = bc2p.find(bc); + if (it != bc2p.end()) { + auto const* p = it->second; + os << " \n"; + os << " \n"; + os << " \n"; + os << " \n"; + const int prod = p->production_vertex() ? p->production_vertex()->barcode() : 0; + const int endv = p->end_vertex() ? p->end_vertex()->barcode() : 0; + os << " \n"; + } + } else if (have3) { + auto it = id3p.find(bc); + if (it != id3p.end() && it->second) { + auto const& p = it->second; + os << " \n"; + os << " \n"; + os << " \n"; + const int prod = p->production_vertex() ? p->production_vertex()->id() : 0; + const int endv = p->end_vertex() ? p->end_vertex()->id() : 0; + os << " \n"; + } + } + } else if (r.kind == TruthGraph::NodeKind::GenVertex) { + const int bc = static_cast(r.key); + if (ev2) { + auto it = bc2v.find(bc); + if (it != bc2v.end()) { + auto const* v = it->second; + os << " \n"; + os << " \n"; + os << " \n"; + } + } else if (have3) { + auto it = id3v.find(bc); + if (it != id3v.end() && it->second) { + auto const& v = it->second; + os << " \n"; + os << " \n"; + os << " \n"; + } + } + } + + // --- SIM enrichment + if (r.kind == TruthGraph::NodeKind::SimTrack && haveSim) { + os << " \n"; + + const int32_t gn = g.nodeSimTrackToGen(i); + if (gn >= 0) + os << " \n"; + + const int32_t vn = g.nodeSimTrackToVtx(i); + if (vn >= 0) + os << " \n"; + + if (crossedBoundary) { + os << " \n"; + os << " \n"; + os << " \n"; + } + } + + os << "
" << i << " " << kindName(r.kind) << " key=" << r.key << "
pid: " << pdgLabel(pdg) << "
status: " << st << "
eid: " << eid << "
HepMC2: event=" << ev2->event_number() << " spid=" << ev2->signal_process_id() + << "
HepMC3: event=" << ev3.event_number() << "
pid: " << pdgLabel(p->pdg_id()) << "
status: " << p->status() << "
p4: " << fmtP4(p->momentum()) << "
m: " << std::fixed << std::setprecision(3) << p->generated_mass() << "
prodVtx: " << prod << " endVtx: " << endv << "
pid: " << pdgLabel(p->pid()) << "
status: " << p->status() << "
p4: " << fmtP4(p->momentum()) << "
prodVtx: " << prod << " endVtx: " << endv << "
barcode: " << v->barcode() << "
x4: " << fmtX4(v->position()) << "
nIn: " << v->particles_in_size() << " nOut: " << v->particles_out_size() + << "
status: " << v->status() << "
x4: " << fmtX4(v->position()) << "
nIn: " << v->particles_in().size() << " nOut: " << v->particles_out().size() + << "
p4: " << fmtP4(simt->momentum()) << "
GenParticle nodeId: " << gn << "
SimVertex nodeId: " << vn << "
crossedBoundary: true" + << " idAtBoundary=" << simt->getIDAtBoundary() << "
x4@boundary: " << fmtX4(simt->getPositionAtBoundary()) + << "
p4@boundary: " << fmtP4(simt->getMomentumAtBoundary()) + << "
\n"; + os << " >];\n"; + } + + // edges + for (uint32_t src = 0; src < n; ++src) { + const uint32_t b = g.edgeBegin(src); + const uint32_t e = g.edgeEnd(src); + + unsigned kept = 0; + for (uint32_t pos = b; pos < e; ++pos) { + const uint32_t dst = g.edges[pos]; + if (dst >= n) + continue; + os << " n" << src << " -> n" << dst << edgeAttrs(static_cast(g.edgeKind[pos])) << ";\n"; + if (++kept >= maxEdgesPerNode_) + break; + } + } + + os << "}\n"; + os.close(); + } + +private: + edm::EDGetTokenT token_; + std::string dotFile_; + unsigned maxNodes_; + unsigned maxEdgesPerNode_; + + edm::EDGetTokenT simTracksToken_; + edm::EDGetTokenT simVerticesToken_; + + edm::EDGetTokenT hepmc2Token_; + edm::EDGetTokenT hepmc3Token_; +}; + +DEFINE_FWK_MODULE(TruthGraphDumper); diff --git a/PhysicsTools/TruthInfo/plugins/TruthGraphProducer.cc b/PhysicsTools/TruthInfo/plugins/TruthGraphProducer.cc new file mode 100644 index 0000000000000..f8a4d37b5419b --- /dev/null +++ b/PhysicsTools/TruthInfo/plugins/TruthGraphProducer.cc @@ -0,0 +1,684 @@ +// Author: Felice Pantaleo - CERN +// Date: 03/2026 + +#include +#include +#include +#include +#include +#include +#include + +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/stream/EDProducer.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/InputTag.h" + +#include "SimDataFormats/EncodedEventId/interface/EncodedEventId.h" +#include "SimDataFormats/Track/interface/SimTrackContainer.h" +#include "SimDataFormats/Vertex/interface/SimVertexContainer.h" + +// Legacy HepMC, HepMC2. +#include "SimDataFormats/GeneratorProducts/interface/HepMCProduct.h" +#include "HepMC/GenEvent.h" +#include "HepMC/GenParticle.h" +#include "HepMC/GenVertex.h" + +// HepMC3. +#include "SimDataFormats/GeneratorProducts/interface/HepMC3Product.h" +#include "HepMC3/GenEvent.h" +#include "HepMC3/GenParticle.h" +#include "HepMC3/GenVertex.h" + +#include "PhysicsTools/TruthInfo/interface/TruthGraph.h" + +namespace { + + // Pack EncodedEventId into 64 bit without relying on a particular public API. + uint64_t packEventId(EncodedEventId const& id) { + uint64_t out = 0; + static_assert(sizeof(EncodedEventId) <= sizeof(uint64_t), "EncodedEventId larger than 64 bits, adjust packing"); + std::memcpy(&out, &id, sizeof(EncodedEventId)); + return out; + } + + struct DSU { + std::vector p; + std::vector r; + + explicit DSU(int n) : p(n), r(n, 0) { + for (int i = 0; i < n; ++i) + p[i] = i; + } + + int find(int x) { + while (p[x] != x) { + p[x] = p[p[x]]; + x = p[x]; + } + return x; + } + + void unite(int a, int b) { + a = find(a); + b = find(b); + + if (a == b) + return; + + if (r[a] < r[b]) + std::swap(a, b); + + p[b] = a; + + if (r[a] == r[b]) + ++r[a]; + } + }; + + inline int64_t genKeyVertex(int barcode) { return (int64_t(barcode) << 1) | 1LL; } + + inline int64_t genKeyParticle(int barcode) { return (int64_t(barcode) << 1); } + + struct GenBuild { + std::vector vtxBarcodes; + std::vector partBarcodes; + + // index -> barcode in HepMC iteration order. Kept only for diagnostics. + // SimTrack::genpartIndex() is not an index into this vector: for primary + // SimTracks it is a HepMC barcode. + std::vector particleBarcodeByIndex; + + std::vector> vtxToPart; + std::vector> partToVtx; + + std::unordered_map particlePdgIdByBarcode; + std::unordered_map particleStatusByBarcode; + }; + + GenBuild buildFromHepMC2(HepMC::GenEvent const& ev) { + GenBuild gb; + + std::unordered_set seenV; + std::unordered_set seenP; + + gb.particlePdgIdByBarcode.reserve(ev.particles_size() * 2); + gb.particleStatusByBarcode.reserve(ev.particles_size() * 2); + gb.particleBarcodeByIndex.reserve(ev.particles_size()); + + for (auto v = ev.vertices_begin(); v != ev.vertices_end(); ++v) { + if (*v == nullptr) + continue; + + const int vbc = (*v)->barcode(); + + if (seenV.insert(vbc).second) + gb.vtxBarcodes.push_back(vbc); + + for (auto po = (*v)->particles_out_const_begin(); po != (*v)->particles_out_const_end(); ++po) { + if (*po == nullptr) + continue; + + const int pbc = (*po)->barcode(); + + if (seenP.insert(pbc).second) + gb.partBarcodes.push_back(pbc); + + gb.vtxToPart.emplace_back(vbc, pbc); + } + + for (auto pi = (*v)->particles_in_const_begin(); pi != (*v)->particles_in_const_end(); ++pi) { + if (*pi == nullptr) + continue; + + const int pbc = (*pi)->barcode(); + + if (seenP.insert(pbc).second) + gb.partBarcodes.push_back(pbc); + + gb.partToVtx.emplace_back(pbc, vbc); + } + } + + for (auto p = ev.particles_begin(); p != ev.particles_end(); ++p) { + if (*p == nullptr) + continue; + + const int pbc = (*p)->barcode(); + + gb.particleBarcodeByIndex.push_back(pbc); + gb.particlePdgIdByBarcode.emplace(pbc, (*p)->pdg_id()); + gb.particleStatusByBarcode.emplace(pbc, static_cast((*p)->status())); + + if (seenP.insert(pbc).second) + gb.partBarcodes.push_back(pbc); + } + + return gb; + } + + GenBuild buildFromHepMC3(HepMC3::GenEvent const& ev) { + GenBuild gb; + + std::unordered_set seenV; + std::unordered_set seenP; + + gb.particlePdgIdByBarcode.reserve(ev.particles().size() * 2); + gb.particleStatusByBarcode.reserve(ev.particles().size() * 2); + gb.particleBarcodeByIndex.reserve(ev.particles().size()); + + for (auto const& vptr : ev.vertices()) { + if (!vptr) + continue; + + const int vbc = vptr->id(); + + if (seenV.insert(vbc).second) + gb.vtxBarcodes.push_back(vbc); + + for (auto const& po : vptr->particles_out()) { + if (!po) + continue; + + const int pbc = po->id(); + + if (seenP.insert(pbc).second) + gb.partBarcodes.push_back(pbc); + + gb.vtxToPart.emplace_back(vbc, pbc); + } + + for (auto const& pi : vptr->particles_in()) { + if (!pi) + continue; + + const int pbc = pi->id(); + + if (seenP.insert(pbc).second) + gb.partBarcodes.push_back(pbc); + + gb.partToVtx.emplace_back(pbc, vbc); + } + } + + for (auto const& pptr : ev.particles()) { + if (!pptr) + continue; + + const int pbc = pptr->id(); + + gb.particleBarcodeByIndex.push_back(pbc); + gb.particlePdgIdByBarcode.emplace(pbc, pptr->pid()); + gb.particleStatusByBarcode.emplace(pbc, static_cast(pptr->status())); + + if (seenP.insert(pbc).second) + gb.partBarcodes.push_back(pbc); + } + + return gb; + } + + template + bool validHandle(HandleT const& h) { + return h.isValid(); + } + +} // namespace + +class TruthGraphProducer : public edm::stream::EDProducer<> { +public: + explicit TruthGraphProducer(const edm::ParameterSet& cfg) + : hepmc3Token_(mayConsume(cfg.getParameter("genEventHepMC3"))), + hepmc2Token_(mayConsume(cfg.getParameter("genEventHepMC"))), + simTrackToken_(consumes(cfg.getParameter("simTracks"))), + simVertexToken_(consumes(cfg.getParameter("simVertices"))), + addGenToSimEdges_(cfg.getParameter("addGenToSimEdges")) { + produces(); + } + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + + desc.add("genEventHepMC3", edm::InputTag("generatorSmeared")) + ->setComment("edm::HepMC3Product label, preferred when available"); + desc.add("genEventHepMC", edm::InputTag("generatorSmeared")) + ->setComment("edm::HepMCProduct label, legacy fallback"); + + desc.add("simTracks", edm::InputTag("g4SimHits")) + ->setComment("SimTrackContainer label, typically g4SimHits"); + desc.add("simVertices", edm::InputTag("g4SimHits")) + ->setComment("SimVertexContainer label, typically g4SimHits"); + + desc.add("addGenToSimEdges", true) + ->setComment( + "If true, add GenParticle -> SimTrack cross edges. The association is built only for primary " + "SimTracks, interpreting SimTrack::genpartIndex() as a HepMC barcode."); + + descriptions.addWithDefaultLabel(desc); + } + + void produce(edm::Event& evt, const edm::EventSetup&) override { + auto out = std::make_unique(); + + const auto& simTracks = evt.get(simTrackToken_); + const auto& simVertices = evt.get(simVertexToken_); + + GenBuild gb; + bool haveGen = false; + + { + edm::Handle h3; + evt.getByToken(hepmc3Token_, h3); + + if (validHandle(h3) && h3->GetEvent() != nullptr) { + const HepMC3::GenEventData* data = h3->GetEvent(); + + HepMC3::GenEvent ev3; + ev3.read_data(*data); + + gb = buildFromHepMC3(ev3); + haveGen = true; + } + } + + if (!haveGen) { + edm::Handle h2; + evt.getByToken(hepmc2Token_, h2); + + if (validHandle(h2) && h2->GetEvent() != nullptr) { + gb = buildFromHepMC2(*h2->GetEvent()); + haveGen = true; + } + } + + const uint32_t nSimVtx = static_cast(simVertices.size()); + const uint32_t nSimTrk = static_cast(simTracks.size()); + + int nGenEvents = 0; + + std::unordered_map tempIndex; + std::vector tempKeys; + + auto getTemp = [&](int64_t k) -> int { + auto it = tempIndex.find(k); + if (it != tempIndex.end()) + return it->second; + + const int idx = static_cast(tempKeys.size()); + tempIndex.emplace(k, idx); + tempKeys.push_back(k); + return idx; + }; + + std::vector compOfTemp; + std::unordered_map repToComp; + + if (haveGen) { + for (int vbc : gb.vtxBarcodes) + (void)getTemp(genKeyVertex(vbc)); + + for (int pbc : gb.partBarcodes) + (void)getTemp(genKeyParticle(pbc)); + + DSU dsu(static_cast(tempKeys.size())); + + for (auto const& e : gb.vtxToPart) { + dsu.unite(getTemp(genKeyVertex(e.first)), getTemp(genKeyParticle(e.second))); + } + + for (auto const& e : gb.partToVtx) { + dsu.unite(getTemp(genKeyParticle(e.first)), getTemp(genKeyVertex(e.second))); + } + + compOfTemp.resize(tempKeys.size(), -1); + + for (int i = 0; i < static_cast(tempKeys.size()); ++i) { + const int rep = dsu.find(i); + + auto it = repToComp.find(rep); + if (it == repToComp.end()) { + const int cid = nGenEvents++; + repToComp.emplace(rep, cid); + compOfTemp[i] = cid; + } else { + compOfTemp[i] = it->second; + } + } + + if (nGenEvents == 0) + nGenEvents = 1; + } + + const uint32_t nGenVtx = haveGen ? static_cast(gb.vtxBarcodes.size()) : 0u; + const uint32_t nGenPar = haveGen ? static_cast(gb.partBarcodes.size()) : 0u; + + const uint32_t baseGenEvent = 0; + const uint32_t baseGenVtx = baseGenEvent + static_cast(nGenEvents); + const uint32_t baseGenPar = baseGenVtx + nGenVtx; + const uint32_t baseSimVtx = baseGenPar + nGenPar; + const uint32_t baseSimTrk = baseSimVtx + nSimVtx; + + const uint32_t nNodes = baseSimTrk + nSimTrk; + + out->nodes.resize(nNodes); + + out->pdgId.assign(nNodes, 0); + out->status.assign(nNodes, 0); + out->eventId.assign(nNodes, 0); + out->statusFlags.assign(nNodes, 0); + out->genEventOfNode.assign(nNodes, -1); + + out->simTrackToGen.assign(nNodes, -1); + out->simTrackToVtx.assign(nNodes, -1); + + for (int cid = 0; cid < nGenEvents; ++cid) { + const uint32_t nodeId = baseGenEvent + static_cast(cid); + + out->nodes[nodeId] = TruthGraph::NodeRef{TruthGraph::NodeKind::GenEvent, static_cast(cid)}; + out->eventId[nodeId] = 0; + out->genEventOfNode[nodeId] = cid; + } + + std::unordered_map genVtxBarcodeToNode; + std::unordered_map genParBarcodeToNode; + + genVtxBarcodeToNode.reserve(nGenVtx * 2); + genParBarcodeToNode.reserve(nGenPar * 2); + + if (haveGen) { + for (uint32_t i = 0; i < nGenVtx; ++i) { + const int vbc = gb.vtxBarcodes[i]; + const uint32_t nodeId = baseGenVtx + i; + + genVtxBarcodeToNode.emplace(vbc, nodeId); + + out->nodes[nodeId] = TruthGraph::NodeRef{TruthGraph::NodeKind::GenVertex, static_cast(vbc)}; + out->eventId[nodeId] = 0; + + const int tidx = tempIndex.at(genKeyVertex(vbc)); + out->genEventOfNode[nodeId] = compOfTemp[tidx]; + } + + for (uint32_t i = 0; i < nGenPar; ++i) { + const int pbc = gb.partBarcodes[i]; + const uint32_t nodeId = baseGenPar + i; + + genParBarcodeToNode.emplace(pbc, nodeId); + + out->nodes[nodeId] = TruthGraph::NodeRef{TruthGraph::NodeKind::GenParticle, static_cast(pbc)}; + out->eventId[nodeId] = 0; + + const int tidx = tempIndex.at(genKeyParticle(pbc)); + out->genEventOfNode[nodeId] = compOfTemp[tidx]; + + auto itPdg = gb.particlePdgIdByBarcode.find(pbc); + if (itPdg != gb.particlePdgIdByBarcode.end()) + out->pdgId[nodeId] = itPdg->second; + + auto itStatus = gb.particleStatusByBarcode.find(pbc); + if (itStatus != gb.particleStatusByBarcode.end()) + out->status[nodeId] = itStatus->second; + + // Do not fill statusFlags from reco::GenParticle unless we have a validated + // barcode-to-reco::GenParticle association. + out->statusFlags[nodeId] = 0; + } + } + + std::vector simVtxIndexToNode(nSimVtx, 0); + + for (uint32_t i = 0; i < nSimVtx; ++i) { + const uint32_t nodeId = baseSimVtx + i; + + simVtxIndexToNode[i] = nodeId; + + out->nodes[nodeId] = TruthGraph::NodeRef{TruthGraph::NodeKind::SimVertex, static_cast(i)}; + out->eventId[nodeId] = packEventId(simVertices[i].eventId()); + } + + std::unordered_map simTrackIdToNode; + simTrackIdToNode.reserve(nSimTrk * 2); + + for (uint32_t i = 0; i < nSimTrk; ++i) { + auto const& simTrack = simTracks[i]; + + const uint32_t nodeId = baseSimTrk + i; + const uint32_t tid = simTrack.trackId(); + + simTrackIdToNode.emplace(tid, nodeId); + + out->nodes[nodeId] = TruthGraph::NodeRef{TruthGraph::NodeKind::SimTrack, static_cast(tid)}; + out->pdgId[nodeId] = simTrack.type(); + out->eventId[nodeId] = packEventId(simTrack.eventId()); + + const int vtxIdx = simTrack.vertIndex(); + if (vtxIdx >= 0 && static_cast(vtxIdx) < nSimVtx) { + out->simTrackToVtx[nodeId] = static_cast(simVtxIndexToNode[static_cast(vtxIdx)]); + } + + // SimTrack::genpartIndex() must be used only for primary G4 tracks. + // For non-primary tracks, getPrimaryOrLastStoredID() can still contain + // a generator barcode, but that is ancestry information for orphan or + // backscattered tracks, not a direct SimTrack -> GenParticle association. + if (addGenToSimEdges_ && haveGen && simTrack.isPrimary()) { + const int barcode = simTrack.genpartIndex(); + + if (barcode != -1) { + auto it = genParBarcodeToNode.find(barcode); + + if (it != genParBarcodeToNode.end()) { + const int simPdgId = simTrack.type(); + const int genPdgId = out->nodePdgId(it->second); + + if (genPdgId == 0 || genPdgId == simPdgId) { + out->simTrackToGen[nodeId] = static_cast(it->second); + } else { + edm::LogPrint("TruthGraphProducer") + << "Rejecting primary SimTrack->GenParticle association with mismatched PDG id: " + << "simTrack index=" << i << " trackId=" << simTrack.trackId() << " genBarcode=" << barcode + << " simPdgId=" << simPdgId << " genNode=" << it->second << " genPdgId=" << genPdgId; + } + } else { + edm::LogPrint("TruthGraphProducer") + << "Rejecting primary SimTrack->GenParticle association with missing GEN barcode: " + << "simTrack index=" << i << " trackId=" << simTrack.trackId() << " genBarcode=" << barcode; + } + } + } + } + + std::vector> edgePairs; + std::vector edgeKinds; + + edgePairs.reserve(8 * (nGenVtx + nGenPar + nSimTrk)); + edgeKinds.reserve(edgePairs.capacity()); + + auto push_edge = [&](uint32_t src, uint32_t dst, TruthGraph::EdgeKind k) { + edgePairs.emplace_back(src, dst); + edgeKinds.emplace_back(static_cast(k)); + }; + + if (haveGen) { + std::unordered_map vtxIncoming; + vtxIncoming.reserve(nGenVtx * 2); + + for (int vbc : gb.vtxBarcodes) + vtxIncoming.emplace(vbc, 0); + + for (auto const& e : gb.partToVtx) { + auto it = vtxIncoming.find(e.second); + if (it != vtxIncoming.end()) + ++it->second; + } + + std::vector> rootsByComp(nGenEvents); + std::vector> allVtxByComp(nGenEvents); + + for (int vbc : gb.vtxBarcodes) { + const int tidx = tempIndex.at(genKeyVertex(vbc)); + const int cid = compOfTemp[tidx]; + + if (cid < 0 || cid >= nGenEvents) + continue; + + allVtxByComp[cid].push_back(vbc); + + if (vtxIncoming[vbc] == 0) + rootsByComp[cid].push_back(vbc); + } + + for (int cid = 0; cid < nGenEvents; ++cid) { + const uint32_t genEventNode = baseGenEvent + static_cast(cid); + + auto roots = rootsByComp[cid]; + if (roots.empty()) + roots = allVtxByComp[cid]; + + for (int vbc : roots) { + auto itV = genVtxBarcodeToNode.find(vbc); + if (itV != genVtxBarcodeToNode.end()) { + push_edge(genEventNode, itV->second, TruthGraph::EdgeKind::Gen); + } + } + } + + for (auto const& e : gb.vtxToPart) { + auto itV = genVtxBarcodeToNode.find(e.first); + auto itP = genParBarcodeToNode.find(e.second); + + if (itV != genVtxBarcodeToNode.end() && itP != genParBarcodeToNode.end()) { + push_edge(itV->second, itP->second, TruthGraph::EdgeKind::Gen); + } + } + + for (auto const& e : gb.partToVtx) { + auto itP = genParBarcodeToNode.find(e.first); + auto itV = genVtxBarcodeToNode.find(e.second); + + if (itP != genParBarcodeToNode.end() && itV != genVtxBarcodeToNode.end()) { + push_edge(itP->second, itV->second, TruthGraph::EdgeKind::Gen); + } + } + } + + for (uint32_t i = 0; i < nSimTrk; ++i) { + auto const& simTrack = simTracks[i]; + + const uint32_t childNode = baseSimTrk + i; + + const int vtxIdx = simTrack.vertIndex(); + if (vtxIdx < 0 || static_cast(vtxIdx) >= nSimVtx) + continue; + + const uint32_t vtxNode = simVtxIndexToNode[static_cast(vtxIdx)]; + + push_edge(vtxNode, childNode, TruthGraph::EdgeKind::Sim); + + const int parentTid = simVertices[static_cast(vtxIdx)].parentIndex(); + + if (parentTid > 0) { + auto itParent = simTrackIdToNode.find(static_cast(parentTid)); + if (itParent != simTrackIdToNode.end()) { + push_edge(itParent->second, vtxNode, TruthGraph::EdgeKind::Sim); + } + } + } + + // Cross-domain particle associations only. These edges are created only for + // primary SimTracks that carry a validated HepMC barcode. + // + // GenVertex -> SimVertex edges are intentionally not created here because + // shared Geant4 source or injection vertices can create artificial many-to-one topology. + if (addGenToSimEdges_ && haveGen) { + for (uint32_t i = 0; i < nSimTrk; ++i) { + const uint32_t simNode = baseSimTrk + i; + const int32_t genNode = out->simTrackToGen[simNode]; + + if (genNode >= 0) { + push_edge(static_cast(genNode), simNode, TruthGraph::EdgeKind::GenToSim); + } + } + } + + out->offsets.assign(nNodes + 1, 0); + + for (auto const& e : edgePairs) { + if (e.first < nNodes) + ++out->offsets[e.first + 1]; + } + + for (uint32_t i = 1; i <= nNodes; ++i) + out->offsets[i] += out->offsets[i - 1]; + + const uint32_t nEdges = out->offsets.back(); + + out->edges.assign(nEdges, 0); + out->edgeKind.assign(nEdges, static_cast(TruthGraph::EdgeKind::Gen)); + + std::vector cursor = out->offsets; + + for (std::size_t i = 0; i < edgePairs.size(); ++i) { + const uint32_t src = edgePairs[i].first; + const uint32_t dst = edgePairs[i].second; + + if (src < nNodes && dst < nNodes) { + const uint32_t pos = cursor[src]++; + out->edges[pos] = dst; + out->edgeKind[pos] = edgeKinds[i]; + } + } + + unsigned nGenEventOut = 0; + unsigned nGenVertexOut = 0; + unsigned nGenParticleOut = 0; + unsigned nSimVertexOut = 0; + unsigned nSimTrackOut = 0; + unsigned nGenToSimParticleLinks = 0; + + for (uint32_t i = 0; i < out->nNodes(); ++i) { + switch (out->nodeRef(i).kind) { + case TruthGraph::NodeKind::GenEvent: + ++nGenEventOut; + break; + case TruthGraph::NodeKind::GenVertex: + ++nGenVertexOut; + break; + case TruthGraph::NodeKind::GenParticle: + ++nGenParticleOut; + break; + case TruthGraph::NodeKind::SimVertex: + ++nSimVertexOut; + break; + case TruthGraph::NodeKind::SimTrack: + ++nSimTrackOut; + if (out->simTrackToGen[i] >= 0) + ++nGenToSimParticleLinks; + break; + } + } + + edm::LogPrint("TruthGraphProducer") << "TruthGraph nodes: " + << "GenEvent=" << nGenEventOut << " GenVertex=" << nGenVertexOut + << " GenParticle=" << nGenParticleOut << " SimVertex=" << nSimVertexOut + << " SimTrack=" << nSimTrackOut << " total=" << out->nNodes() + << " edges=" << out->nEdges() + << " primaryGenToSimParticleLinks=" << nGenToSimParticleLinks; + + evt.put(std::move(out)); + } + +private: + edm::EDGetTokenT hepmc3Token_; + edm::EDGetTokenT hepmc2Token_; + edm::EDGetTokenT simTrackToken_; + edm::EDGetTokenT simVertexToken_; + + bool addGenToSimEdges_; +}; + +DEFINE_FWK_MODULE(TruthGraphProducer); diff --git a/PhysicsTools/TruthInfo/plugins/TruthLogicalGraphDumper.cc b/PhysicsTools/TruthInfo/plugins/TruthLogicalGraphDumper.cc new file mode 100644 index 0000000000000..50f3ea5bd6361 --- /dev/null +++ b/PhysicsTools/TruthInfo/plugins/TruthLogicalGraphDumper.cc @@ -0,0 +1,782 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/one/EDAnalyzer.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/InputTag.h" + +#include "DataFormats/HGCRecHit/interface/HGCRecHitCollections.h" +#include "DataFormats/ParticleFlowReco/interface/PFRecHit.h" + +#include "PhysicsTools/TruthInfo/interface/Graph.h" +#include "PhysicsTools/TruthInfo/interface/LogicalGraphHitIndex.h" +#include "PhysicsTools/TruthInfo/interface/TruthGraph.h" + +namespace { + + std::string pdgNameUtf8(int pdgId) { + const int ap = std::abs(pdgId); + + if (pdgId == 11) + return "e-"; + if (pdgId == -11) + return "e+"; + if (pdgId == 13) + return "mu-"; + if (pdgId == -13) + return "mu+"; + if (pdgId == 15) + return "tau-"; + if (pdgId == -15) + return "tau+"; + + if (pdgId == 12) + return "nu_e"; + if (pdgId == -12) + return "anti-nu_e"; + if (pdgId == 14) + return "nu_mu"; + if (pdgId == -14) + return "anti-nu_mu"; + if (pdgId == 16) + return "nu_tau"; + if (pdgId == -16) + return "anti-nu_tau"; + + if (pdgId == 22) + return "gamma"; + if (pdgId == 21) + return "g"; + if (pdgId == 23) + return "Z0"; + if (pdgId == 24) + return "W+"; + if (pdgId == -24) + return "W-"; + if (pdgId == 25) + return "H"; + + if (pdgId == 2212) + return "p"; + if (pdgId == -2212) + return "anti-p"; + if (pdgId == 2112) + return "n"; + if (pdgId == -2112) + return "anti-n"; + + if (pdgId == 111) + return "pi0"; + if (pdgId == 211) + return "pi+"; + if (pdgId == -211) + return "pi-"; + if (pdgId == 321) + return "K+"; + if (pdgId == -321) + return "K-"; + if (pdgId == 130) + return "K0_L"; + if (pdgId == 310) + return "K0_S"; + + if (ap >= 1 && ap <= 6) { + static const char* qname[7] = {"", "d", "u", "s", "c", "b", "t"}; + std::string s = qname[ap]; + if (pdgId < 0) + s = "anti-" + s; + return s; + } + + return "pdg"; + } + + std::string pdgLabel(int pdgId) { + std::ostringstream ss; + const std::string name = pdgNameUtf8(pdgId); + if (name == "pdg") + ss << "pdg(" << pdgId << ")"; + else + ss << name << " (" << pdgId << ")"; + return ss.str(); + } + + const char* rawKindName(TruthGraph::NodeKind k) { + switch (k) { + case TruthGraph::NodeKind::GenEvent: + return "GenEvent"; + case TruthGraph::NodeKind::GenVertex: + return "GenVertex"; + case TruthGraph::NodeKind::GenParticle: + return "GenParticle"; + case TruthGraph::NodeKind::SimVertex: + return "SimVertex"; + case TruthGraph::NodeKind::SimTrack: + return "SimTrack"; + } + return "Unknown"; + } + + std::string rawNodeSummary(TruthGraph const* raw, int32_t nodeId) { + if (raw == nullptr || nodeId < 0 || static_cast(nodeId) >= raw->nNodes()) + return "n/a"; + + auto const& r = raw->nodeRef(static_cast(nodeId)); + + std::ostringstream ss; + ss << rawKindName(r.kind) << " #" << nodeId << " key=" << r.key; + return ss.str(); + } + + template + std::string fmtP4(P4 const& p4) { + std::ostringstream ss; + ss.setf(std::ios::fixed); + ss.precision(3); + ss << "(" << p4.px() << ", " << p4.py() << ", " << p4.pz() << ", " << p4.e() << ")"; + return ss.str(); + } + + const char* logicalVertexDomain(truth::VertexData const& d) { + if (d.hasGen() && !d.hasSim()) + return "GEN"; + if (!d.hasGen() && d.hasSim()) + return "SIM"; + if (d.hasGen() && d.hasSim()) + return "GEN+SIM"; + return "UNKNOWN"; + } + + std::string statusFlagsLabel(uint16_t flags) { + struct FlagInfo { + uint16_t bit; + const char* name; + }; + + static constexpr FlagInfo flagInfos[] = { + {1u << 0, "isPrompt"}, + {1u << 1, "isDecayedLeptonHadron"}, + {1u << 2, "isTauDecayProduct"}, + {1u << 3, "isPromptTauDecayProduct"}, + {1u << 4, "isDirectTauDecayProduct"}, + {1u << 5, "isDirectPromptTauDecayProduct"}, + {1u << 6, "isDirectHadronDecayProduct"}, + {1u << 7, "isHardProcess"}, + {1u << 8, "fromHardProcess"}, + {1u << 9, "isHardProcessTauDecayProduct"}, + {1u << 10, "isDirectHardProcessTauDecayProduct"}, + {1u << 11, "fromHardProcessBeforeFSR"}, + {1u << 12, "isFirstCopy"}, + {1u << 13, "isLastCopy"}, + {1u << 14, "isLastCopyBeforeFSR"}, + }; + + std::ostringstream ss; + bool first = true; + + for (auto const& flag : flagInfos) { + if ((flags & flag.bit) == 0) + continue; + + if (!first) + ss << ", "; + ss << flag.name; + first = false; + } + + if (first) + return "none"; + + return ss.str(); + } + + std::string fmtEnergy(float energy) { + std::ostringstream ss; + ss.setf(std::ios::fixed); + ss << std::setprecision(6) << energy; + return ss.str(); + } + + struct HitSummary { + uint32_t nSimHits = 0; + uint32_t nMatchedRecHits = 0; + uint32_t nMissingRecHits = 0; + float simHitEnergy = 0.f; + float recHitEnergy = 0.f; + }; + + HitSummary summarizeHits(std::span hits, + std::vector const& recHitEnergies) { + HitSummary summary; + summary.nSimHits = static_cast(hits.size()); + + for (auto const& hit : hits) { + summary.simHitEnergy += hit.energy; + + if (hit.recHitIndex == truth::LogicalGraphHitIndex::Hit::invalidRecHitIndex || + hit.recHitIndex >= recHitEnergies.size()) { + ++summary.nMissingRecHits; + continue; + } + + ++summary.nMatchedRecHits; + summary.recHitEnergy += recHitEnergies[hit.recHitIndex]; + } + + return summary; + } + + template + void forEachDescendantParticle(truth::Graph const& g, uint32_t particleId, F&& f) { + if (particleId >= g.nParticles()) + return; + + for (const uint32_t vertexId : g.decayVertices(particleId)) { + if (vertexId >= g.nVertices()) + continue; + + for (const uint32_t childId : g.outgoingParticles(vertexId)) { + if (childId >= g.nParticles()) + continue; + + f(childId); + forEachDescendantParticle(g, childId, f); + } + } + } + + bool hasVisibleIncomingParticle(truth::Graph const& g, uint32_t vertexId, std::vector const& hideParticle) { + for (const uint32_t p : g.incomingParticles(vertexId)) { + if (p < hideParticle.size() && !hideParticle[p]) + return true; + } + + return false; + } + + bool hasVisibleOutgoingParticle(truth::Graph const& g, uint32_t vertexId, std::vector const& hideParticle) { + for (const uint32_t p : g.outgoingParticles(vertexId)) { + if (p < hideParticle.size() && !hideParticle[p]) + return true; + } + + return false; + } + + bool shouldHideVertexAfterParticleFiltering(truth::Graph const& g, + uint32_t vertexId, + std::vector const& hideParticle) { + const bool hasIncoming = !g.incomingParticles(vertexId).empty(); + const bool hasOutgoing = !g.outgoingParticles(vertexId).empty(); + + const bool hasVisibleIncoming = hasVisibleIncomingParticle(g, vertexId, hideParticle); + const bool hasVisibleOutgoing = hasVisibleOutgoingParticle(g, vertexId, hideParticle); + + // Hide vertices fully disconnected by the particle filter. + if (!hasVisibleIncoming && !hasVisibleOutgoing) + return true; + + // Hide decay vertices that no longer have visible daughters. + if (hasOutgoing && !hasVisibleOutgoing) + return true; + + // Hide production/source vertices that no longer have visible outgoing particles. + if (!hasIncoming && hasOutgoing && !hasVisibleOutgoing) + return true; + + return false; + } + + std::string appendEventIdToFilename(std::string const& filename, edm::EventID const& id) { + const auto dotPos = filename.rfind('.'); + + std::ostringstream ss; + + if (dotPos == std::string::npos) { + ss << filename; + ss << "_run" << id.run(); + ss << "_lumi" << id.luminosityBlock(); + ss << "_event" << id.event(); + return ss.str(); + } + + ss << filename.substr(0, dotPos); + ss << "_run" << id.run(); + ss << "_lumi" << id.luminosityBlock(); + ss << "_event" << id.event(); + ss << filename.substr(dotPos); + + return ss.str(); + } + +} // namespace + +class TruthLogicalGraphDumper : public edm::one::EDAnalyzer<> { +public: + explicit TruthLogicalGraphDumper(const edm::ParameterSet& cfg) + : token_(consumes(cfg.getParameter("src"))), + rawToken_(mayConsume(cfg.getParameter("rawSrc"))), + hitIndexTag_(cfg.getParameter("hitIndex")), + hitIndexToken_(mayConsume(hitIndexTag_)), + useHitIndex_(!hitIndexTag_.label().empty()), + dotFile_(cfg.getParameter("dotFile")), + maxParticles_(cfg.getParameter("maxParticles")), + maxVertices_(cfg.getParameter("maxVertices")), + maxEdgesPerNode_(cfg.getParameter("maxEdgesPerNode")), + hideLargeSimSourceVertices_(cfg.getParameter("hideLargeSimSourceVertices")), + dumpSimHits_(cfg.getParameter("dumpSimHits")), + largeSimSourceVertexMinOutgoing_(cfg.getParameter("largeSimSourceVertexMinOutgoing")), + hideZeroSimHitSubgraphs_(cfg.getParameter("hideZeroSimHitSubgraphs")) { + const auto hgcalRecHitTags = cfg.getParameter>("hgcalRecHits"); + hgcalRecHitTags_.reserve(hgcalRecHitTags.size()); + hgcalRecHitTokens_.reserve(hgcalRecHitTags.size()); + + for (auto const& tag : hgcalRecHitTags) { + hgcalRecHitTags_.push_back(tag); + hgcalRecHitTokens_.push_back(mayConsume(tag)); + } + + const auto pfRecHitTags = cfg.getParameter>("pfRecHits"); + pfRecHitTags_.reserve(pfRecHitTags.size()); + pfRecHitTokens_.reserve(pfRecHitTags.size()); + + for (auto const& tag : pfRecHitTags) { + pfRecHitTags_.push_back(tag); + pfRecHitTokens_.push_back(mayConsume(tag)); + } + } + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + + desc.add("src", edm::InputTag("truthLogicalGraphProducer")); + desc.add("rawSrc", edm::InputTag("truthGraphProducer")) + ->setComment("Optional raw TruthGraph used only to enrich labels"); + + desc.add("hitIndex", edm::InputTag("")) + ->setComment("Optional LogicalGraphHitIndex used to annotate particles with SimHit and RecHit summaries"); + + desc.add>("hgcalRecHits", + {edm::InputTag("HGCalRecHit", "HGCEERecHits", "RECO"), + edm::InputTag("HGCalRecHit", "HGCHEFRecHits", "RECO"), + edm::InputTag("HGCalRecHit", "HGCHEBRecHits", "RECO")}) + ->setComment("HGCRecHit collections, in the same order used by SimHitToRecHitMapProducer"); + + desc.add>("pfRecHits", + {edm::InputTag("particleFlowRecHitECAL", "Cleaned", "RECO"), + edm::InputTag("particleFlowRecHitHBHE", "Cleaned", "RECO"), + edm::InputTag("particleFlowRecHitHF", "Cleaned", "RECO"), + edm::InputTag("particleFlowRecHitHO", "Cleaned", "RECO")}) + ->setComment("PFRecHit collections, in the same order used by SimHitToRecHitMapProducer"); + + desc.add("dotFile", "truthlogicalgraph.dot"); + + desc.add("maxParticles", 5000)->setComment("Truncate logical particle nodes"); + desc.add("maxVertices", 5000)->setComment("Truncate logical vertex nodes"); + desc.add("maxEdgesPerNode", 200)->setComment("Truncate fanout per node"); + + desc.add("hideLargeSimSourceVertices", true) + ->setComment("If true, do not print large SIM-only source vertices in the DOT output"); + desc.add("dumpSimHits", true)->setComment("If true, dump all simhits"); + + desc.add("largeSimSourceVertexMinOutgoing", 50) + ->setComment("Minimum outgoing multiplicity for hiding a SIM-only source vertex"); + + desc.add("hideZeroSimHitSubgraphs", false) + ->setComment( + "If true, hide every SIM-backed particle whose subgraph has zero SimHits, together with its descendant " + "subgraph. Requires hitIndex to be configured."); + + descriptions.addWithDefaultLabel(desc); + } + + void analyze(const edm::Event& evt, const edm::EventSetup&) override { + auto const& g = evt.get(token_); + + edm::Handle hRaw; + evt.getByToken(rawToken_, hRaw); + TruthGraph const* raw = hRaw.isValid() ? &(*hRaw) : nullptr; + + edm::Handle hHitIndex; + if (useHitIndex_) { + evt.getByToken(hitIndexToken_, hHitIndex); + } + truth::LogicalGraphHitIndex const* hitIndex = hHitIndex.isValid() ? &(*hHitIndex) : nullptr; + + const std::vector recHitEnergies = collectRecHitEnergies(evt); + + const std::string eventDotFile = appendEventIdToFilename(dotFile_, evt.id()); + + std::ofstream os(eventDotFile); + + os << "digraph TruthLogicalGraph {\n"; + os << " rankdir=LR;\n"; + os << " node [fontsize=10];\n"; + + const uint32_t nParticles = std::min(g.nParticles(), maxParticles_); + const uint32_t nVertices = std::min(g.nVertices(), maxVertices_); + + std::vector hideVertex(nVertices, 0); + + if (hideLargeSimSourceVertices_) { + for (uint32_t i = 0; i < nVertices; ++i) { + auto v = g.vertex(i); + auto const& d = v.data(); + + const auto incoming = v.incomingParticles(); + const auto outgoing = v.outgoingParticles(); + + if (!d.hasGen() && d.hasSim() && incoming.empty() && outgoing.size() >= largeSimSourceVertexMinOutgoing_) { + hideVertex[i] = 1; + } + } + } + + std::vector hideParticle(nParticles, 0); + + if (hideZeroSimHitSubgraphs_) { + if (hitIndex == nullptr) { + edm::LogWarning("TruthLogicalGraphDumper") + << "hideZeroSimHitSubgraphs is enabled, but no valid LogicalGraphHitIndex was provided. " + << "No zero-hit subgraphs will be hidden."; + } else { + for (uint32_t i = 0; i < nParticles; ++i) { + auto const& d = g.particle(i).data(); + + if (!d.hasSim()) + continue; + + if (i >= hitIndex->nParticles()) + continue; + + if (!hitIndex->subgraphHits(i).empty()) + continue; + + hideParticle[i] = 1; + + forEachDescendantParticle(g, i, [&](uint32_t childId) { + if (childId < hideParticle.size()) + hideParticle[childId] = 1; + }); + } + } + } + + for (uint32_t i = 0; i < nVertices; ++i) { + if (hideVertex[i]) + continue; + + if (shouldHideVertexAfterParticleFiltering(g, i, hideParticle)) { + hideVertex[i] = 1; + } + } + + // ------------------------------------------------------------------ + // Particle nodes + // ------------------------------------------------------------------ + for (uint32_t i = 0; i < nParticles; ++i) { + if (hideParticle[i]) + continue; + + auto p = g.particle(i); + auto const& d = p.data(); + + const bool hasHitInfo = hitIndex != nullptr && i < hitIndex->nParticles(); + + const auto directHits = + hasHitInfo ? hitIndex->directHits(i) : std::span(); + const auto subgraphHits = + hasHitInfo ? hitIndex->subgraphHits(i) : std::span(); + + const HitSummary directSummary = hasHitInfo ? summarizeHits(directHits, recHitEnergies) : HitSummary(); + const HitSummary subgraphSummary = hasHitInfo ? summarizeHits(subgraphHits, recHitEnergies) : HitSummary(); + + os << " p" << i << " [shape=ellipse, hasCheckpoints=" << p.hasCheckpoints() << ", hasGen=" << p.hasGen() + << ", hasSim=" << d.hasSim(); + + if (p.hasCheckpoints()) { + os << ", color=\"red\", penwidth=2"; + } else if (d.hasGen() && d.hasSim()) { + os << ", penwidth=2"; + } else if (d.hasGen()) { + os << ", color=\"blue\""; + } else if (d.hasSim()) { + os << ", color=\"darkgreen\""; + } + + os << ", pid=" << d.pdgId << ", status=" << d.status << ", statusFlags=" << d.statusFlags << ", flags=<" + << statusFlagsLabel(d.statusFlags) << ">" + << ", eid=" << d.eventId << ", genEvent=" << d.genEvent << ", isRoot=" << p.isRoot() + << ", isLeaf=" << p.isLeaf() << ", p4=\"" << fmtP4(d.momentum) + << "\", nProdVtx=" << p.productionVertices().size() << ", nDecayVtx=" << p.decayVertices().size() + << ", nParents=" << p.parents().size() << ", nChildren=" << p.children().size() + << ", nCheckpoints=" << d.checkpoints.size(); + + if (hasHitInfo) { + os << ", nDirectSimHits=" << directSummary.nSimHits << ", nDirectRecHits=" << directSummary.nMatchedRecHits + << ", directSimHitEnergy=" << fmtEnergy(directSummary.simHitEnergy) + << ", directRecHitEnergy=" << fmtEnergy(directSummary.recHitEnergy) + << ", nSubgraphSimHits=" << subgraphSummary.nSimHits + << ", nSubgraphRecHits=" << subgraphSummary.nMatchedRecHits + << ", subgraphSimHitEnergy=" << fmtEnergy(subgraphSummary.simHitEnergy) + << ", subgraphRecHitEnergy=" << fmtEnergy(subgraphSummary.recHitEnergy); + if (dumpSimHits_) { + os << ", directHitsDetIds=\""; + for (auto h : directHits) { + os << h.detId << ","; + } + os << "\""; + os << ", directHitsEnergies=\""; + for (auto h : directHits) { + os << h.energy << ","; + } + os << "\""; + } + } + + if (raw != nullptr) { + os << ", raw_GEN=<" << rawNodeSummary(raw, d.genNode) << ">, raw_SIM=<" << rawNodeSummary(raw, d.simNode) + << ">"; + } + + os << ", label=<\n"; + os << " \n"; + os << " \n"; + + if (d.pdgId != 0) + os << " \n"; + + if (d.status != 0) + os << " \n"; + + if (d.statusFlags != 0) { + os << " \n"; + os << " \n"; + } + + if (d.eventId != 0) + os << " \n"; + + if (d.genEvent >= 0) + os << " \n"; + + os << " \n"; + + os << " \n"; + + os << " \n"; + + os << " \n"; + + os << " \n"; + + os << " \n"; + + if (hasHitInfo) { + os << " \n"; + os << " \n"; + + os << " \n"; + os << " \n"; + } + + for (auto const& cp : d.checkpoints) { + os << " \n"; + os << " \n"; + os << " \n"; + } + + if (raw != nullptr) { + os << " \n"; + os << " \n"; + } + + os << "
Particle " << i << "
pid: " << pdgLabel(d.pdgId) << "
status: " << d.status << "
statusFlags: " << d.statusFlags << "
flags: " << statusFlagsLabel(d.statusFlags) << "
eid: " << d.eventId << "
genEvent: " << d.genEvent << "
hasGen: " << (d.hasGen() ? "yes" : "no") << " hasSim: " << (d.hasSim() ? "yes" : "no") + << "
isRoot: " << (p.isRoot() ? "yes" : "no") << " isLeaf: " << (p.isLeaf() ? "yes" : "no") + << "
p4: " << fmtP4(d.momentum) << "
nProdVtx: " << p.productionVertices().size() << " nDecayVtx: " << p.decayVertices().size() + << "
nParents: " << p.parents().size() << " nChildren: " << p.children().size() + << "
nCheckpoints: " << d.checkpoints.size() << "
direct simHits: " << directSummary.nSimHits + << " simE=" << fmtEnergy(directSummary.simHitEnergy) << "
direct recHits: " << directSummary.nMatchedRecHits + << " missing=" << directSummary.nMissingRecHits << " recoE=" << fmtEnergy(directSummary.recHitEnergy) + << "
subgraph simHits: " << subgraphSummary.nSimHits + << " simE=" << fmtEnergy(subgraphSummary.simHitEnergy) << "
subgraph recHits: " << subgraphSummary.nMatchedRecHits + << " missing=" << subgraphSummary.nMissingRecHits << " recoE=" << fmtEnergy(subgraphSummary.recHitEnergy) + << "
checkpointId: " << cp.checkpointId << "
x4@checkpoint: " << fmtP4(cp.position) << "
p4@checkpoint: " << fmtP4(cp.momentum) << "
raw GEN: " << rawNodeSummary(raw, d.genNode) << "
raw SIM: " << rawNodeSummary(raw, d.simNode) << "
\n"; + os << " >];\n"; + } + + // ------------------------------------------------------------------ + // Vertex nodes + // ------------------------------------------------------------------ + for (uint32_t i = 0; i < nVertices; ++i) { + if (hideVertex[i]) + continue; + + auto v = g.vertex(i); + auto const& d = v.data(); + + os << " v" << i << " [shape=diamond"; + + if (d.hasGen() && d.hasSim()) { + os << ", color=\"purple\", penwidth=2"; + } else if (d.hasGen()) { + os << ", color=\"blue\""; + } else if (d.hasSim()) { + os << ", color=\"darkgreen\""; + } + + os << ", label=<\n"; + os << " \n"; + os << " \n"; + + os << " \n"; + + if (d.eventId != 0) + os << " \n"; + + if (d.genEvent >= 0) + os << " \n"; + + os << " \n"; + + os << " \n"; + + os << " \n"; + + os << " \n"; + + if (raw != nullptr) { + os << " \n"; + os << " \n"; + } + + os << "
Vertex " << i << "
domain: " << logicalVertexDomain(d) << "
eid: " << d.eventId << "
genEvent: " << d.genEvent << "
hasGen: " << (d.hasGen() ? "yes" : "no") << " hasSim: " << (d.hasSim() ? "yes" : "no") + << "
isSource: " << (v.isSource() ? "yes" : "no") << " isSink: " << (v.isSink() ? "yes" : "no") + << "
x4: " << fmtP4(d.position) << "
nIn: " << v.incomingParticles().size() << " nOut: " << v.outgoingParticles().size() + << "
raw GEN: " << rawNodeSummary(raw, d.genNode) << "
raw SIM: " << rawNodeSummary(raw, d.simNode) << "
\n"; + os << " >];\n"; + } + + // ------------------------------------------------------------------ + // Edges: physics-forward only + // ------------------------------------------------------------------ + for (uint32_t i = 0; i < nParticles; ++i) { + if (hideParticle[i]) + continue; + + unsigned kept = 0; + + for (uint32_t v : g.decayVertices(i)) { + if (v >= nVertices) + continue; + + if (hideVertex[v]) + continue; + + os << " p" << i << " -> v" << v << ";\n"; + + if (++kept >= maxEdgesPerNode_) + break; + } + } + + for (uint32_t i = 0; i < nVertices; ++i) { + if (hideVertex[i]) + continue; + + unsigned kept = 0; + + for (uint32_t p : g.outgoingParticles(i)) { + if (p >= nParticles) + continue; + + if (hideParticle[p]) + continue; + + os << " v" << i << " -> p" << p << ";\n"; + + if (++kept >= maxEdgesPerNode_) + break; + } + } + + os << "}\n"; + } + +private: + std::vector collectRecHitEnergies(const edm::Event& evt) const { + std::vector energies; + + // This must match the global recHit indexing order used by SimHitToRecHitMapProducer: + // first all HGCRecHit collections, then all PFRecHit collections. + for (uint32_t i = 0; i < hgcalRecHitTokens_.size(); ++i) { + edm::Handle handle; + evt.getByToken(hgcalRecHitTokens_[i], handle); + + if (!handle.isValid()) { + edm::LogWarning("TruthLogicalGraphDumper") << "Missing HGCRecHit collection " << hgcalRecHitTags_[i].encode() + << ". Skipping it while rebuilding recHit energies."; + continue; + } + + energies.reserve(energies.size() + handle->size()); + for (auto const& hit : *handle) { + energies.push_back(hit.energy()); + } + } + for (uint32_t i = 0; i < pfRecHitTokens_.size(); ++i) { + edm::Handle handle; + evt.getByToken(pfRecHitTokens_[i], handle); + std::cout << pfRecHitTags_[i].label() << " size() " << handle->size() << std::endl; + + if (!handle.isValid()) { + edm::LogWarning("TruthLogicalGraphDumper") << "Missing reco::PFRecHitCollection " << pfRecHitTags_[i].encode() + << ". Skipping it while rebuilding recHit energies."; + continue; + } + + energies.reserve(energies.size() + handle->size()); + for (auto const& hit : *handle) { + energies.push_back(hit.energy()); + } + } + + return energies; + } + + edm::EDGetTokenT token_; + edm::EDGetTokenT rawToken_; + + edm::InputTag hitIndexTag_; + edm::EDGetTokenT hitIndexToken_; + bool useHitIndex_; + + std::vector hgcalRecHitTags_; + std::vector> hgcalRecHitTokens_; + + std::vector pfRecHitTags_; + std::vector> pfRecHitTokens_; + + std::string dotFile_; + unsigned maxParticles_; + unsigned maxVertices_; + unsigned maxEdgesPerNode_; + bool hideLargeSimSourceVertices_; + bool dumpSimHits_; + unsigned largeSimSourceVertexMinOutgoing_; + bool hideZeroSimHitSubgraphs_; +}; + +DEFINE_FWK_MODULE(TruthLogicalGraphDumper); diff --git a/PhysicsTools/TruthInfo/plugins/TruthLogicalGraphProducer.cc b/PhysicsTools/TruthInfo/plugins/TruthLogicalGraphProducer.cc new file mode 100644 index 0000000000000..62e2a8cde8a2f --- /dev/null +++ b/PhysicsTools/TruthInfo/plugins/TruthLogicalGraphProducer.cc @@ -0,0 +1,809 @@ +// Author: Felice Pantaleo - CERN +// Date: 03/2026 +// +// Build a logical truth::Graph from the raw heterogeneous TruthGraph. +// The topology comes from the raw TruthGraph. +// Standalone payload (momentum/position/checkpoints) is materialized from optional +// HepMC2 / HepMC3 / SimTrack / SimVertex inputs. +// +// GenParticle and SimTrack nodes are merged when a robust association exists. +// The corresponding production GenVertex and SimVertex nodes are also merged, +// but only for locally one-to-one GenVertex <-> SimVertex associations induced +// by the same GenParticle <-> SimTrack match. This avoids creating artificial +// high-degree merged vertices from transitive many-to-one vertex matches. + +#include +#include +#include +#include +#include +#include + +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/stream/EDProducer.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/Exception.h" +#include "FWCore/Utilities/interface/InputTag.h" + +#include "DataFormats/Math/interface/LorentzVector.h" + +#include "SimDataFormats/Track/interface/SimTrackContainer.h" +#include "SimDataFormats/Vertex/interface/SimVertexContainer.h" + +// Legacy HepMC (HepMC2) +#include "SimDataFormats/GeneratorProducts/interface/HepMCProduct.h" +#include "HepMC/GenEvent.h" +#include "HepMC/GenParticle.h" +#include "HepMC/GenVertex.h" + +// HepMC3 +#include "SimDataFormats/GeneratorProducts/interface/HepMC3Product.h" +#include "HepMC3/GenEvent.h" +#include "HepMC3/GenParticle.h" +#include "HepMC3/GenVertex.h" + +#include "PhysicsTools/TruthInfo/interface/Graph.h" +#include "PhysicsTools/TruthInfo/interface/TruthGraph.h" +#include "PhysicsTools/TruthInfo/interface/TruthLogicalGraphPostProcessor.h" + +namespace { + + struct DSU { + std::vector parent; + std::vector rank; + + explicit DSU(int n) : parent(n), rank(n, 0) { + for (int i = 0; i < n; ++i) + parent[i] = i; + } + + int find(int x) { + while (parent[x] != x) { + parent[x] = parent[parent[x]]; + x = parent[x]; + } + return x; + } + + void unite(int a, int b) { + a = find(a); + b = find(b); + + if (a == b) + return; + + if (rank[a] < rank[b]) + std::swap(a, b); + + parent[b] = a; + + if (rank[a] == rank[b]) + ++rank[a]; + } + }; + + struct GenParticlePayload { + int32_t pdgId = 0; + int16_t status = 0; + math::XYZTLorentzVectorD momentum; + }; + + struct GenVertexPayload { + math::XYZTLorentzVectorD position; + }; + + bool isParticleKind(TruthGraph::NodeKind kind) { + return kind == TruthGraph::NodeKind::GenParticle || kind == TruthGraph::NodeKind::SimTrack; + } + + bool isVertexKind(TruthGraph::NodeKind kind) { + return kind == TruthGraph::NodeKind::GenVertex || kind == TruthGraph::NodeKind::SimVertex; + } + + bool isGenParticleToSimTrack(TruthGraph const& g, uint32_t src, uint32_t dst) { + auto const& s = g.nodeRef(src); + auto const& d = g.nodeRef(dst); + + return s.kind == TruthGraph::NodeKind::GenParticle && d.kind == TruthGraph::NodeKind::SimTrack; + } + + void buildCSR(uint32_t nSources, + std::vector>& pairs, + std::vector& offsets, + std::vector& flat) { + std::sort(pairs.begin(), pairs.end()); + pairs.erase(std::unique(pairs.begin(), pairs.end()), pairs.end()); + + pairs.erase( + std::remove_if(pairs.begin(), pairs.end(), [nSources](auto const& edge) { return edge.first >= nSources; }), + pairs.end()); + + offsets.assign(nSources + 1, 0); + + for (auto const& edge : pairs) { + ++offsets[edge.first + 1]; + } + + for (uint32_t i = 1; i <= nSources; ++i) { + offsets[i] += offsets[i - 1]; + } + + flat.assign(pairs.size(), 0); + + auto cursor = offsets; + for (auto const& edge : pairs) { + flat[cursor[edge.first]++] = edge.second; + } + } + + template + bool validHandle(HandleT const& handle) { + return handle.isValid(); + } + + void fillGenPayloadFromHepMC2(HepMC::GenEvent const& ev, + std::unordered_map& particlePayload, + std::unordered_map& vertexPayload) { + particlePayload.reserve(ev.particles_size() * 2); + vertexPayload.reserve(ev.vertices_size() * 2); + + for (auto p = ev.particles_begin(); p != ev.particles_end(); ++p) { + if (*p == nullptr) + continue; + + const int barcode = (*p)->barcode(); + + GenParticlePayload payload; + payload.pdgId = (*p)->pdg_id(); + payload.status = static_cast((*p)->status()); + payload.momentum = math::XYZTLorentzVectorD( + (*p)->momentum().px(), (*p)->momentum().py(), (*p)->momentum().pz(), (*p)->momentum().e()); + + particlePayload.emplace(barcode, payload); + } + + for (auto v = ev.vertices_begin(); v != ev.vertices_end(); ++v) { + if (*v == nullptr) + continue; + + const int barcode = (*v)->barcode(); + + GenVertexPayload payload; + payload.position = math::XYZTLorentzVectorD( + (*v)->position().x(), (*v)->position().y(), (*v)->position().z(), (*v)->position().t()); + + vertexPayload.emplace(barcode, payload); + } + } + + void fillGenPayloadFromHepMC3(HepMC3::GenEvent const& ev, + std::unordered_map& particlePayload, + std::unordered_map& vertexPayload) { + particlePayload.reserve(ev.particles().size() * 2); + vertexPayload.reserve(ev.vertices().size() * 2); + + for (auto const& pptr : ev.particles()) { + if (!pptr) + continue; + + const int id = pptr->id(); + + GenParticlePayload payload; + payload.pdgId = pptr->pid(); + payload.status = static_cast(pptr->status()); + payload.momentum = math::XYZTLorentzVectorD( + pptr->momentum().px(), pptr->momentum().py(), pptr->momentum().pz(), pptr->momentum().e()); + + particlePayload.emplace(id, payload); + } + + for (auto const& vptr : ev.vertices()) { + if (!vptr) + continue; + + const int id = vptr->id(); + + GenVertexPayload payload; + payload.position = math::XYZTLorentzVectorD( + vptr->position().x(), vptr->position().y(), vptr->position().z(), vptr->position().t()); + + vertexPayload.emplace(id, payload); + } + } + + std::vector buildKeepMaskForAllRawNodes(TruthGraph const& raw) { + return std::vector(raw.nNodes(), 1); + } + + std::vector buildGenParticleToProductionGenVertexMap(TruthGraph const& raw, + std::vector const& keepRawNode) { + const uint32_t nRawNodes = raw.nNodes(); + + std::vector genParticleToProductionGenVertex(nRawNodes, -1); + + for (uint32_t src = 0; src < nRawNodes; ++src) { + if (!keepRawNode[src]) + continue; + + if (raw.nodeRef(src).kind != TruthGraph::NodeKind::GenVertex) + continue; + + const auto dsts = raw.children(src); + const auto edgeKinds = raw.childrenEdgeKinds(src); + + for (std::size_t i = 0; i < dsts.size(); ++i) { + const uint32_t dst = dsts[i]; + + if (dst >= nRawNodes || !keepRawNode[dst]) + continue; + + if (raw.nodeRef(dst).kind != TruthGraph::NodeKind::GenParticle) + continue; + + if (static_cast(edgeKinds[i]) != TruthGraph::EdgeKind::Gen) + continue; + + if (genParticleToProductionGenVertex[dst] < 0) + genParticleToProductionGenVertex[dst] = static_cast(src); + } + } + + return genParticleToProductionGenVertex; + } + + void buildRawSimVertexDegrees(TruthGraph const& raw, + std::vector const& keepRawNode, + std::vector& simVertexIncomingSimTracks, + std::vector& simVertexOutgoingSimTracks) { + const uint32_t nRawNodes = raw.nNodes(); + + simVertexIncomingSimTracks.assign(nRawNodes, 0); + simVertexOutgoingSimTracks.assign(nRawNodes, 0); + + for (uint32_t src = 0; src < nRawNodes; ++src) { + if (!keepRawNode[src]) + continue; + + const auto dsts = raw.children(src); + const auto edgeKinds = raw.childrenEdgeKinds(src); + + for (std::size_t i = 0; i < dsts.size(); ++i) { + const uint32_t dst = dsts[i]; + + if (dst >= nRawNodes || !keepRawNode[dst]) + continue; + + if (static_cast(edgeKinds[i]) != TruthGraph::EdgeKind::Sim) + continue; + + const auto srcKind = raw.nodeRef(src).kind; + const auto dstKind = raw.nodeRef(dst).kind; + + if (srcKind == TruthGraph::NodeKind::SimTrack && dstKind == TruthGraph::NodeKind::SimVertex) { + ++simVertexIncomingSimTracks[dst]; + } else if (srcKind == TruthGraph::NodeKind::SimVertex && dstKind == TruthGraph::NodeKind::SimTrack) { + ++simVertexOutgoingSimTracks[src]; + } + } + } + } + +} // namespace + +class TruthLogicalGraphProducer : public edm::stream::EDProducer<> { +public: + explicit TruthLogicalGraphProducer(edm::ParameterSet const& cfg) + : srcToken_(consumes(cfg.getParameter("src"))), + simTrackToken_(mayConsume(cfg.getParameter("simTracks"))), + simVertexToken_(mayConsume(cfg.getParameter("simVertices"))), + hepmc3Token_(mayConsume(cfg.getParameter("genEventHepMC3"))), + hepmc2Token_(mayConsume(cfg.getParameter("genEventHepMC"))), + mergeGenSimVertices_(cfg.getParameter("mergeGenSimVertices")), + postProcessor_(truth::TruthLogicalGraphPostProcessor::configFromPSet( + cfg.getParameter("postProcessing"))) { + produces(); + } + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + + desc.add("src", edm::InputTag("truthGraphProducer")); + desc.add("simTracks", edm::InputTag("g4SimHits")); + desc.add("simVertices", edm::InputTag("g4SimHits")); + desc.add("genEventHepMC3", edm::InputTag("generatorSmeared")); + desc.add("genEventHepMC", edm::InputTag("generatorSmeared")); + + desc.add("mergeGenSimVertices", true) + ->setComment( + "If true, merge production GenVertex and SimVertex only for locally one-to-one matches induced by " + "GenParticle <-> SimTrack associations."); + + desc.add("postProcessing", truth::TruthLogicalGraphPostProcessor::psetDescription()) + ->setComment("Logical graph post-processing configuration."); + + descriptions.addWithDefaultLabel(desc); + } + + void produce(edm::Event& evt, edm::EventSetup const&) override { + auto const& raw = evt.get(srcToken_); + + if (!raw.isConsistent()) { + throw cms::Exception("TruthLogicalGraphProducer") << "Input TruthGraph is not consistent"; + } + + auto out = std::make_unique(); + + const uint32_t nRawNodes = raw.nNodes(); + + edm::Handle hSimTracks; + evt.getByToken(simTrackToken_, hSimTracks); + + edm::Handle hSimVertices; + evt.getByToken(simVertexToken_, hSimVertices); + + std::unordered_map simTrackIdToIndex; + + if (validHandle(hSimTracks)) { + simTrackIdToIndex.reserve(hSimTracks->size() * 2); + + for (uint32_t i = 0; i < hSimTracks->size(); ++i) { + simTrackIdToIndex.emplace((*hSimTracks)[i].trackId(), i); + } + } + + std::unordered_map genParticlePayload; + std::unordered_map genVertexPayload; + + bool haveGenPayload = false; + + { + edm::Handle h3; + evt.getByToken(hepmc3Token_, h3); + + if (validHandle(h3) && h3->GetEvent() != nullptr) { + const HepMC3::GenEventData* data = h3->GetEvent(); + + HepMC3::GenEvent ev3; + ev3.read_data(*data); + + fillGenPayloadFromHepMC3(ev3, genParticlePayload, genVertexPayload); + haveGenPayload = true; + } + } + + if (!haveGenPayload) { + edm::Handle h2; + evt.getByToken(hepmc2Token_, h2); + + if (validHandle(h2) && h2->GetEvent() != nullptr) { + fillGenPayloadFromHepMC2(*h2->GetEvent(), genParticlePayload, genVertexPayload); + haveGenPayload = true; + } + } + + const auto keepRawNode = buildKeepMaskForAllRawNodes(raw); + const auto genParticleToProductionGenVertex = buildGenParticleToProductionGenVertexMap(raw, keepRawNode); + + std::vector rawSimVertexIncomingSimTracks; + std::vector rawSimVertexOutgoingSimTracks; + buildRawSimVertexDegrees(raw, keepRawNode, rawSimVertexIncomingSimTracks, rawSimVertexOutgoingSimTracks); + + // ------------------------------------------------------------------ + // 1. Temporary ids + // ------------------------------------------------------------------ + std::vector rawToParticleTmp(nRawNodes, -1); + std::vector rawToVertexTmp(nRawNodes, -1); + + int nParticleTmp = 0; + int nVertexTmp = 0; + + for (uint32_t nodeId = 0; nodeId < nRawNodes; ++nodeId) { + if (!keepRawNode[nodeId]) + continue; + + const auto kind = raw.nodeRef(nodeId).kind; + + if (isParticleKind(kind)) { + rawToParticleTmp[nodeId] = nParticleTmp++; + } else if (isVertexKind(kind)) { + rawToVertexTmp[nodeId] = nVertexTmp++; + } + } + + DSU particleDSU(nParticleTmp); + DSU vertexDSU(nVertexTmp); + + std::vector> productionVertexMergeCandidates; + + auto addProductionVertexMergeCandidate = [&](uint32_t simTrackNode, uint32_t genParticleNode) { + if (!mergeGenSimVertices_) + return; + + const int32_t simVertexNode = raw.nodeSimTrackToVtx(simTrackNode); + if (simVertexNode < 0) + return; + + if (genParticleNode >= genParticleToProductionGenVertex.size()) + return; + + const int32_t genVertexNode = genParticleToProductionGenVertex[genParticleNode]; + if (genVertexNode < 0) + return; + + const uint32_t gv = static_cast(genVertexNode); + const uint32_t sv = static_cast(simVertexNode); + + if (gv >= nRawNodes || sv >= nRawNodes) + return; + + if (!keepRawNode[gv] || !keepRawNode[sv]) + return; + + if (rawToVertexTmp[gv] < 0 || rawToVertexTmp[sv] < 0) + return; + + if (raw.nodeRef(gv).kind != TruthGraph::NodeKind::GenVertex) + return; + + if (raw.nodeRef(sv).kind != TruthGraph::NodeKind::SimVertex) + return; + + productionVertexMergeCandidates.emplace_back(gv, sv); + }; + + // ------------------------------------------------------------------ + // 2. Merge particles across GEN <-> SIM and collect vertex merge candidates + // ------------------------------------------------------------------ + for (uint32_t nodeId = 0; nodeId < nRawNodes; ++nodeId) { + if (!keepRawNode[nodeId]) + continue; + + auto const& ref = raw.nodeRef(nodeId); + + if (ref.kind != TruthGraph::NodeKind::SimTrack) + continue; + + const int32_t genNode = raw.nodeSimTrackToGen(nodeId); + if (genNode < 0) + continue; + + const uint32_t genNodeU32 = static_cast(genNode); + if (genNodeU32 >= nRawNodes) + continue; + + if (!keepRawNode[genNodeU32]) + continue; + + if (raw.nodeRef(genNodeU32).kind != TruthGraph::NodeKind::GenParticle) + continue; + + if (rawToParticleTmp[nodeId] < 0 || rawToParticleTmp[genNodeU32] < 0) + continue; + + particleDSU.unite(rawToParticleTmp[nodeId], rawToParticleTmp[genNodeU32]); + addProductionVertexMergeCandidate(nodeId, genNodeU32); + } + + for (uint32_t src = 0; src < nRawNodes; ++src) { + if (!keepRawNode[src]) + continue; + + const auto dsts = raw.children(src); + const auto ekinds = raw.childrenEdgeKinds(src); + + for (std::size_t i = 0; i < dsts.size(); ++i) { + const uint32_t dst = dsts[i]; + + if (dst >= nRawNodes || !keepRawNode[dst]) + continue; + + const auto ek = static_cast(ekinds[i]); + + if (ek != TruthGraph::EdgeKind::GenToSim && ek != TruthGraph::EdgeKind::SimToGen) + continue; + + if (!isGenParticleToSimTrack(raw, src, dst)) + continue; + + if (rawToParticleTmp[src] < 0 || rawToParticleTmp[dst] < 0) + continue; + + particleDSU.unite(rawToParticleTmp[src], rawToParticleTmp[dst]); + addProductionVertexMergeCandidate(dst, src); + } + } + + if (mergeGenSimVertices_) { + std::sort(productionVertexMergeCandidates.begin(), productionVertexMergeCandidates.end()); + productionVertexMergeCandidates.erase( + std::unique(productionVertexMergeCandidates.begin(), productionVertexMergeCandidates.end()), + productionVertexMergeCandidates.end()); + + std::vector genVertexCandidateMultiplicity(nRawNodes, 0); + std::vector simVertexCandidateMultiplicity(nRawNodes, 0); + + for (auto const& candidate : productionVertexMergeCandidates) { + if (genVertexCandidateMultiplicity[candidate.first] < UINT16_MAX) + ++genVertexCandidateMultiplicity[candidate.first]; + + if (simVertexCandidateMultiplicity[candidate.second] < UINT16_MAX) + ++simVertexCandidateMultiplicity[candidate.second]; + } + + for (auto const& candidate : productionVertexMergeCandidates) { + const uint32_t gv = candidate.first; + const uint32_t sv = candidate.second; + + // Only accept locally one-to-one GenVertex <-> SimVertex matches. + // This prevents one busy SimVertex from absorbing many unrelated GenVertices. + if (genVertexCandidateMultiplicity[gv] != 1 || simVertexCandidateMultiplicity[sv] != 1) + continue; + + // Do not merge secondary SIM vertices produced by an existing SimTrack. + // Primary/injection SIM vertices can legitimately have multiple outgoing primary tracks. + if (rawSimVertexIncomingSimTracks[sv] != 0) + continue; + + if (rawSimVertexOutgoingSimTracks[sv] == 0) + continue; + + vertexDSU.unite(rawToVertexTmp[gv], rawToVertexTmp[sv]); + } + } + + // ------------------------------------------------------------------ + // 3. Compress particle and vertex representatives + // ------------------------------------------------------------------ + std::unordered_map particleRepToLogical; + std::vector rawToParticle(nRawNodes, -1); + + for (uint32_t nodeId = 0; nodeId < nRawNodes; ++nodeId) { + if (!keepRawNode[nodeId]) + continue; + + if (rawToParticleTmp[nodeId] >= 0) { + const int rep = particleDSU.find(rawToParticleTmp[nodeId]); + auto result = particleRepToLogical.emplace(rep, static_cast(particleRepToLogical.size())); + + rawToParticle[nodeId] = static_cast(result.first->second); + } + } + + std::unordered_map vertexRepToLogical; + std::vector rawToVertex(nRawNodes, -1); + + for (uint32_t nodeId = 0; nodeId < nRawNodes; ++nodeId) { + if (!keepRawNode[nodeId]) + continue; + + if (rawToVertexTmp[nodeId] >= 0) { + const int rep = vertexDSU.find(rawToVertexTmp[nodeId]); + auto result = vertexRepToLogical.emplace(rep, static_cast(vertexRepToLogical.size())); + + rawToVertex[nodeId] = static_cast(result.first->second); + } + } + + out->particles.resize(particleRepToLogical.size()); + out->vertices.resize(vertexRepToLogical.size()); + + // ------------------------------------------------------------------ + // 4. Fill payload + // ------------------------------------------------------------------ + for (uint32_t nodeId = 0; nodeId < nRawNodes; ++nodeId) { + if (!keepRawNode[nodeId]) + continue; + + auto const& ref = raw.nodeRef(nodeId); + + if (rawToParticle[nodeId] >= 0) { + auto& p = out->particles[static_cast(rawToParticle[nodeId])]; + + if (ref.kind == TruthGraph::NodeKind::GenParticle) { + p.genNode = static_cast(nodeId); + + if (nodeId < raw.genEventOfNode.size()) + p.genEvent = raw.genEventOfNode[nodeId]; + + if (p.pdgId == 0) + p.pdgId = raw.nodePdgId(nodeId); + + if (p.status == 0) + p.status = raw.nodeStatus(nodeId); + + if (p.statusFlags == 0) + p.statusFlags = raw.nodeStatusFlags(nodeId); + + if (haveGenPayload) { + const int barcode = static_cast(ref.key); + auto it = genParticlePayload.find(barcode); + + if (it != genParticlePayload.end()) { + if (p.pdgId == 0) + p.pdgId = it->second.pdgId; + + if (p.status == 0) + p.status = it->second.status; + + // Keep the GEN four-momentum as nominal for GEN and GEN+SIM logical particles. + p.momentum = it->second.momentum; + } + } + + } else if (ref.kind == TruthGraph::NodeKind::SimTrack) { + p.simNode = static_cast(nodeId); + + if (p.pdgId == 0) + p.pdgId = raw.nodePdgId(nodeId); + + if (p.status == 0) + p.status = raw.nodeStatus(nodeId); + + if (p.eventId == 0) + p.eventId = raw.nodeEventId(nodeId); + + if (validHandle(hSimTracks)) { + const auto trackId = static_cast(ref.key); + auto it = simTrackIdToIndex.find(trackId); + + if (it != simTrackIdToIndex.end()) { + auto const& t = (*hSimTracks)[it->second]; + + const math::XYZTLorentzVectorD simMomentum( + t.momentum().px(), t.momentum().py(), t.momentum().pz(), t.momentum().e()); + + // For SIM-only logical particles, use the SimTrack momentum. + // For GEN+SIM logical particles, the GEN momentum remains the nominal one. + if (!p.hasGen()) { + p.momentum = simMomentum; + } + + if (t.crossedBoundary()) { + truth::Checkpoint cp; + cp.checkpointId = 0; + + const auto& xb = t.getPositionAtBoundary(); + cp.position = math::XYZTLorentzVectorF(xb.x(), xb.y(), xb.z(), xb.t()); + + const auto& pb = t.getMomentumAtBoundary(); + cp.momentum = math::XYZTLorentzVectorF(pb.px(), pb.py(), pb.pz(), pb.e()); + + p.checkpoints.push_back(cp); + } + } + } + } + } + + if (rawToVertex[nodeId] >= 0) { + auto& v = out->vertices[static_cast(rawToVertex[nodeId])]; + + if (ref.kind == TruthGraph::NodeKind::GenVertex) { + v.genNode = static_cast(nodeId); + + if (nodeId < raw.genEventOfNode.size()) + v.genEvent = raw.genEventOfNode[nodeId]; + + if (haveGenPayload) { + const int barcode = static_cast(ref.key); + auto it = genVertexPayload.find(barcode); + + if (it != genVertexPayload.end()) { + // Keep the GEN position as nominal for GEN and GEN+SIM logical vertices. + v.position = it->second.position; + } + } + + } else if (ref.kind == TruthGraph::NodeKind::SimVertex) { + v.simNode = static_cast(nodeId); + + if (v.eventId == 0) + v.eventId = raw.nodeEventId(nodeId); + + if (validHandle(hSimVertices)) { + const auto simIndex = static_cast(ref.key); + + if (simIndex < hSimVertices->size()) { + auto const& sv = (*hSimVertices)[simIndex]; + const auto& pos = sv.position(); + + // For SIM-only logical vertices, use the SimVertex position. + // For GEN+SIM logical vertices, the GEN position remains the nominal one. + if (!v.hasGen()) { + v.position = math::XYZTLorentzVectorD(pos.x(), pos.y(), pos.z(), pos.t()); + } + } + } + } + } + } + + // ------------------------------------------------------------------ + // 5. Rebuild logical graph + // ------------------------------------------------------------------ + std::vector> particleToDecayVertexPairs; + std::vector> particleToProductionVertexPairs; + std::vector> vertexToOutgoingParticlePairs; + std::vector> vertexToIncomingParticlePairs; + + for (uint32_t src = 0; src < nRawNodes; ++src) { + if (!keepRawNode[src]) + continue; + + auto const& srcRef = raw.nodeRef(src); + const auto dsts = raw.children(src); + + for (uint32_t dst : dsts) { + if (dst >= nRawNodes || !keepRawNode[dst]) + continue; + + auto const& dstRef = raw.nodeRef(dst); + + if (isVertexKind(srcRef.kind) && isParticleKind(dstRef.kind)) { + const int32_t logicalVertex = rawToVertex[src]; + const int32_t logicalParticle = rawToParticle[dst]; + + if (logicalVertex >= 0 && logicalParticle >= 0) { + vertexToOutgoingParticlePairs.emplace_back(static_cast(logicalVertex), + static_cast(logicalParticle)); + particleToProductionVertexPairs.emplace_back(static_cast(logicalParticle), + static_cast(logicalVertex)); + } + + } else if (isParticleKind(srcRef.kind) && isVertexKind(dstRef.kind)) { + const int32_t logicalParticle = rawToParticle[src]; + const int32_t logicalVertex = rawToVertex[dst]; + + if (logicalParticle >= 0 && logicalVertex >= 0) { + particleToDecayVertexPairs.emplace_back(static_cast(logicalParticle), + static_cast(logicalVertex)); + vertexToIncomingParticlePairs.emplace_back(static_cast(logicalVertex), + static_cast(logicalParticle)); + } + } + } + } + + buildCSR( + out->nParticles(), particleToDecayVertexPairs, out->particleToDecayVertexOffsets, out->particleToDecayVertices); + + buildCSR(out->nParticles(), + particleToProductionVertexPairs, + out->particleToProductionVertexOffsets, + out->particleToProductionVertices); + + buildCSR(out->nVertices(), + vertexToOutgoingParticlePairs, + out->vertexToOutgoingParticleOffsets, + out->vertexToOutgoingParticles); + + buildCSR(out->nVertices(), + vertexToIncomingParticlePairs, + out->vertexToIncomingParticleOffsets, + out->vertexToIncomingParticles); + + *out = postProcessor_.process(std::move(*out)); + + if (!out->isConsistent()) { + throw cms::Exception("TruthLogicalGraphProducer") << "Produced truth::Graph is not consistent"; + } + + evt.put(std::move(out)); + } + +private: + edm::EDGetTokenT srcToken_; + edm::EDGetTokenT simTrackToken_; + edm::EDGetTokenT simVertexToken_; + edm::EDGetTokenT hepmc3Token_; + edm::EDGetTokenT hepmc2Token_; + + bool mergeGenSimVertices_; + truth::TruthLogicalGraphPostProcessor postProcessor_; +}; + +DEFINE_FWK_MODULE(TruthLogicalGraphProducer); diff --git a/PhysicsTools/TruthInfo/python/recHitTable_cff.py b/PhysicsTools/TruthInfo/python/recHitTable_cff.py new file mode 100644 index 0000000000000..2ea45926a5791 --- /dev/null +++ b/PhysicsTools/TruthInfo/python/recHitTable_cff.py @@ -0,0 +1,4 @@ +from PhysicsTools.TruthInfo.recHitTable_cfi import recHitTable as _recHitTable + + +recHitTable = _recHitTable.clone() \ No newline at end of file diff --git a/PhysicsTools/TruthInfo/src/Graph.cc b/PhysicsTools/TruthInfo/src/Graph.cc new file mode 100644 index 0000000000000..84831a8948872 --- /dev/null +++ b/PhysicsTools/TruthInfo/src/Graph.cc @@ -0,0 +1,411 @@ +#include "PhysicsTools/TruthInfo/interface/Graph.h" + +#include +#include +#include + +namespace { + bool checkCSR(std::vector const& offsets, std::vector const& edges, std::size_t nObjects) { + if (offsets.size() != nObjects + 1) + return false; + if (!offsets.empty() && offsets.front() != 0) + return false; + if (!offsets.empty() && offsets.back() != edges.size()) + return false; + + for (std::size_t i = 1; i < offsets.size(); ++i) { + if (offsets[i] < offsets[i - 1]) + return false; + } + return true; + } + + bool checkTargets(std::vector const& edges, uint32_t limit) { + for (auto v : edges) { + if (v >= limit) + return false; + } + return true; + } +} // namespace + +const truth::ParticleData& truth::Particle::data() const { return graph_->particles.at(id_); } + +bool truth::Particle::hasGen() const { return data().hasGen(); } + +bool truth::Particle::hasSim() const { return data().hasSim(); } + +int32_t truth::Particle::pdgId() const { return data().pdgId; } + +int16_t truth::Particle::status() const { return data().status; } + +uint64_t truth::Particle::eventId() const { return data().eventId; } + +int32_t truth::Particle::genEvent() const { return data().genEvent; } + +const math::XYZTLorentzVectorD& truth::Particle::momentum() const { return data().momentum; } + +std::span truth::Particle::checkpoints() const { + return std::span(data().checkpoints.data(), data().checkpoints.size()); +} + +bool truth::Particle::hasCheckpoints() const { return !data().checkpoints.empty(); } + +std::optional truth::Particle::checkpoint(uint32_t checkpointId) const { + for (auto const& cp : data().checkpoints) { + if (cp.checkpointId == checkpointId) + return cp; + } + return std::nullopt; +} + +uint16_t truth::Particle::statusFlags() const { return data().statusFlags; } + +bool truth::Particle::isRoot() const { return valid() && graph_->productionVertices(id_).empty(); } + +bool truth::Particle::isLeaf() const { return valid() && graph_->decayVertices(id_).empty(); } + +std::vector truth::Particle::productionVertices() const { + return valid() ? graph_->productionVerticesOf(id_) : std::vector{}; +} + +std::vector truth::Particle::decayVertices() const { + return valid() ? graph_->decayVerticesOf(id_) : std::vector{}; +} + +std::vector truth::Particle::parents() const { + return valid() ? graph_->parentsOf(id_) : std::vector{}; +} + +std::vector truth::Particle::children() const { + return valid() ? graph_->childrenOf(id_) : std::vector{}; +} + +std::vector truth::Particle::ancestors() const { + return valid() ? graph_->ancestorsOf(id_) : std::vector{}; +} + +std::vector truth::Particle::descendants() const { + return valid() ? graph_->descendantsOf(id_) : std::vector{}; +} + +bool truth::Particle::hasAncestorPdgId(int pdgId) const { return firstAncestorWithPdgId(pdgId).has_value(); } + +std::optional truth::Particle::firstAncestorWithPdgId(int pdgId) const { + return valid() ? graph_->firstAncestorWithPdgIdOf(id_, pdgId) : std::nullopt; +} + +std::optional truth::Particle::firstCommonAncestor(Particle other) const { + if (!valid() || !other.valid() || graph_ != other.graph_) + return std::nullopt; + return graph_->firstCommonAncestorOf(id_, other.id_); +} + +const truth::VertexData& truth::Vertex::data() const { return graph_->vertices.at(id_); } + +bool truth::Vertex::hasGen() const { return data().hasGen(); } + +bool truth::Vertex::hasSim() const { return data().hasSim(); } + +uint64_t truth::Vertex::eventId() const { return data().eventId; } + +int32_t truth::Vertex::genEvent() const { return data().genEvent; } + +const math::XYZTLorentzVectorD& truth::Vertex::position() const { return data().position; } + +bool truth::Vertex::isSource() const { return valid() && graph_->incomingParticles(id_).empty(); } + +bool truth::Vertex::isSink() const { return valid() && graph_->outgoingParticles(id_).empty(); } + +std::vector truth::Vertex::incomingParticles() const { + return valid() ? graph_->incomingParticlesOf(id_) : std::vector{}; +} + +std::vector truth::Vertex::outgoingParticles() const { + return valid() ? graph_->outgoingParticlesOf(id_) : std::vector{}; +} + +truth::Particle truth::Graph::particle(size_type id) const { + return id < nParticles() ? Particle(this, id) : Particle{}; +} + +truth::Vertex truth::Graph::vertex(size_type id) const { return id < nVertices() ? Vertex(this, id) : Vertex{}; } + +std::vector truth::Graph::particleViews() const { + std::vector out; + out.reserve(nParticles()); + for (size_type i = 0; i < nParticles(); ++i) + out.emplace_back(this, i); + return out; +} + +std::vector truth::Graph::vertexViews() const { + std::vector out; + out.reserve(nVertices()); + for (size_type i = 0; i < nVertices(); ++i) + out.emplace_back(this, i); + return out; +} + +std::vector truth::Graph::roots() const { + std::vector out; + for (size_type i = 0; i < nParticles(); ++i) { + if (productionVertices(i).empty()) + out.emplace_back(this, i); + } + return out; +} + +std::vector truth::Graph::leaves() const { + std::vector out; + for (size_type i = 0; i < nParticles(); ++i) { + if (decayVertices(i).empty()) + out.emplace_back(this, i); + } + return out; +} + +std::vector truth::Graph::sourceVertices() const { + std::vector out; + for (size_type i = 0; i < nVertices(); ++i) { + if (incomingParticles(i).empty()) + out.emplace_back(this, i); + } + return out; +} + +std::vector truth::Graph::sinkVertices() const { + std::vector out; + for (size_type i = 0; i < nVertices(); ++i) { + if (outgoingParticles(i).empty()) + out.emplace_back(this, i); + } + return out; +} + +std::vector truth::Graph::productionVerticesOf(size_type particleId) const { + std::vector out; + for (uint32_t v : productionVertices(particleId)) + out.emplace_back(this, v); + return out; +} + +std::vector truth::Graph::decayVerticesOf(size_type particleId) const { + std::vector out; + for (uint32_t v : decayVertices(particleId)) + out.emplace_back(this, v); + return out; +} + +std::vector truth::Graph::incomingParticlesOf(size_type vertexId) const { + std::vector out; + for (uint32_t p : incomingParticles(vertexId)) + out.emplace_back(this, p); + return out; +} + +std::vector truth::Graph::outgoingParticlesOf(size_type vertexId) const { + std::vector out; + for (uint32_t p : outgoingParticles(vertexId)) + out.emplace_back(this, p); + return out; +} + +std::vector truth::Graph::parentsOf(size_type particleId) const { + std::vector out; + if (particleId >= nParticles()) + return out; + + std::vector seen(nParticles(), 0); + + for (uint32_t v : productionVertices(particleId)) { + for (uint32_t p : incomingParticles(v)) { + if (p == particleId) + continue; + if (!seen[p]) { + seen[p] = 1; + out.emplace_back(this, p); + } + } + } + return out; +} + +std::vector truth::Graph::childrenOf(size_type particleId) const { + std::vector out; + if (particleId >= nParticles()) + return out; + + std::vector seen(nParticles(), 0); + + for (uint32_t v : decayVertices(particleId)) { + for (uint32_t p : outgoingParticles(v)) { + if (p == particleId) + continue; + if (!seen[p]) { + seen[p] = 1; + out.emplace_back(this, p); + } + } + } + return out; +} + +std::vector truth::Graph::ancestorsOf(size_type particleId) const { + std::vector out; + if (particleId >= nParticles()) + return out; + + std::vector dist(nParticles(), -1); + std::queue q; + + for (auto const& parent : parentsOf(particleId)) { + dist[parent.id()] = 1; + q.push(parent.id()); + out.push_back(parent); + } + + while (!q.empty()) { + const uint32_t cur = q.front(); + q.pop(); + + for (auto const& parent : parentsOf(cur)) { + if (dist[parent.id()] >= 0) + continue; + dist[parent.id()] = dist[cur] + 1; + q.push(parent.id()); + out.push_back(parent); + } + } + + return out; +} + +std::vector truth::Graph::descendantsOf(size_type particleId) const { + std::vector out; + if (particleId >= nParticles()) + return out; + + std::vector dist(nParticles(), -1); + std::queue q; + + for (auto const& child : childrenOf(particleId)) { + dist[child.id()] = 1; + q.push(child.id()); + out.push_back(child); + } + + while (!q.empty()) { + const uint32_t cur = q.front(); + q.pop(); + + for (auto const& child : childrenOf(cur)) { + if (dist[child.id()] >= 0) + continue; + dist[child.id()] = dist[cur] + 1; + q.push(child.id()); + out.push_back(child); + } + } + + return out; +} + +std::optional truth::Graph::firstAncestorWithPdgIdOf(size_type particleId, int pdgId) const { + if (particleId >= nParticles()) + return std::nullopt; + + std::vector seen(nParticles(), 0); + std::queue q; + + for (auto const& parent : parentsOf(particleId)) { + seen[parent.id()] = 1; + q.push(parent.id()); + } + + while (!q.empty()) { + const uint32_t cur = q.front(); + q.pop(); + + if (particles[cur].pdgId == pdgId) + return particle(cur); + + for (auto const& parent : parentsOf(cur)) { + if (seen[parent.id()]) + continue; + seen[parent.id()] = 1; + q.push(parent.id()); + } + } + + return std::nullopt; +} + +std::optional truth::Graph::firstCommonAncestorOf(size_type a, size_type b) const { + if (a >= nParticles() || b >= nParticles()) + return std::nullopt; + + std::vector distA(nParticles(), -1); + std::vector distB(nParticles(), -1); + + auto fillDistances = [this](uint32_t start, std::vector& dist) { + std::queue q; + dist[start] = 0; + q.push(start); + + while (!q.empty()) { + const uint32_t cur = q.front(); + q.pop(); + + for (auto const& parent : parentsOf(cur)) { + if (dist[parent.id()] >= 0) + continue; + dist[parent.id()] = dist[cur] + 1; + q.push(parent.id()); + } + } + }; + + fillDistances(a, distA); + fillDistances(b, distB); + + int bestId = -1; + int bestScore = -1; + int bestMaxDist = -1; + + for (uint32_t i = 0; i < nParticles(); ++i) { + if (distA[i] < 0 || distB[i] < 0) + continue; + + const int score = distA[i] + distB[i]; + const int maxDist = std::max(distA[i], distB[i]); + + if (bestId < 0 || score < bestScore || (score == bestScore && maxDist < bestMaxDist) || + (score == bestScore && maxDist == bestMaxDist && static_cast(i) < bestId)) { + bestId = static_cast(i); + bestScore = score; + bestMaxDist = maxDist; + } + } + + if (bestId < 0) + return std::nullopt; + + return particle(static_cast(bestId)); +} + +bool truth::Graph::isConsistent() const { + const bool p2dv = checkCSR(particleToDecayVertexOffsets, particleToDecayVertices, particles.size()) && + checkTargets(particleToDecayVertices, nVertices()); + + const bool p2pv = checkCSR(particleToProductionVertexOffsets, particleToProductionVertices, particles.size()) && + checkTargets(particleToProductionVertices, nVertices()); + + const bool v2op = checkCSR(vertexToOutgoingParticleOffsets, vertexToOutgoingParticles, vertices.size()) && + checkTargets(vertexToOutgoingParticles, nParticles()); + + const bool v2ip = checkCSR(vertexToIncomingParticleOffsets, vertexToIncomingParticles, vertices.size()) && + checkTargets(vertexToIncomingParticles, nParticles()); + + return p2dv && p2pv && v2op && v2ip; +} diff --git a/PhysicsTools/TruthInfo/src/LogicalGraphHitIndexBuilder.cc b/PhysicsTools/TruthInfo/src/LogicalGraphHitIndexBuilder.cc new file mode 100644 index 0000000000000..8fe6de8c339fc --- /dev/null +++ b/PhysicsTools/TruthInfo/src/LogicalGraphHitIndexBuilder.cc @@ -0,0 +1,143 @@ +#include "PhysicsTools/TruthInfo/interface/LogicalGraphHitIndexBuilder.h" + +#include +#include + +namespace truth { + + LogicalGraphHitIndexBuilder::LogicalGraphHitIndexBuilder(uint32_t nParticles) + : nParticles_(nParticles), children_(nParticles), directHits_(nParticles), subgraphHits_(nParticles) {} + + void LogicalGraphHitIndexBuilder::setSimTrackForParticle(uint32_t particleId, uint32_t trackId) { + if (particleId >= nParticles_) + return; + + trackIdToParticle_[trackId] = particleId; + } + + void LogicalGraphHitIndexBuilder::addParticleChild(uint32_t parentParticleId, uint32_t childParticleId) { + if (parentParticleId >= nParticles_ || childParticleId >= nParticles_) + return; + + children_[parentParticleId].push_back(childParticleId); + } + + void LogicalGraphHitIndexBuilder::addHitForTrack(uint32_t trackId, + uint32_t detId, + uint32_t recHitIndex, + float energy) { + if (energy <= 0.f) + return; + + auto it = trackIdToParticle_.find(trackId); + if (it == trackIdToParticle_.end()) + return; + + addHit(directHits_[it->second], detId, recHitIndex, energy); + } + + void LogicalGraphHitIndexBuilder::addHit(HitMap& hits, uint32_t detId, uint32_t recHitIndex, float energy) { + auto& entry = hits[detId]; + entry.energy += energy; + + if (entry.recHitIndex == Hit::invalidRecHitIndex && recHitIndex != Hit::invalidRecHitIndex) { + entry.recHitIndex = recHitIndex; + } + } + + void LogicalGraphHitIndexBuilder::addHit(HitMap& hits, Hit const& hit) { + addHit(hits, hit.detId, hit.recHitIndex, hit.energy); + } + + std::vector LogicalGraphHitIndexBuilder::sortedHits(HitMap const& hits) { + std::vector out; + out.reserve(hits.size()); + + for (auto const& [detId, acc] : hits) { + if (acc.energy <= 0.f) + continue; + + Hit hit; + hit.detId = detId; + hit.recHitIndex = acc.recHitIndex; + hit.energy = acc.energy; + out.push_back(hit); + } + + std::sort(out.begin(), out.end(), [](Hit const& a, Hit const& b) { + if (a.detId != b.detId) + return a.detId < b.detId; + return a.recHitIndex < b.recHitIndex; + }); + + return out; + } + + void LogicalGraphHitIndexBuilder::fillSubgraphHits(uint32_t particleId, std::vector& state) { + if (particleId >= nParticles_) + return; + + if (state[particleId] == 2) + return; + + if (state[particleId] == 1) + return; + + state[particleId] = 1; + + auto& out = subgraphHits_[particleId]; + + for (auto const& [detId, acc] : directHits_[particleId]) { + addHit(out, detId, acc.recHitIndex, acc.energy); + } + + for (uint32_t childId : children_[particleId]) { + if (childId >= nParticles_) + continue; + + fillSubgraphHits(childId, state); + + for (auto const& [detId, acc] : subgraphHits_[childId]) { + addHit(out, detId, acc.recHitIndex, acc.energy); + } + } + + state[particleId] = 2; + } + + LogicalGraphHitIndex LogicalGraphHitIndexBuilder::finish() { + std::vector state(nParticles_, 0); + for (uint32_t particleId = 0; particleId < nParticles_; ++particleId) { + fillSubgraphHits(particleId, state); + } + + std::vector directOffsets; + std::vector directHitStorage; + directOffsets.reserve(nParticles_ + 1); + directOffsets.push_back(0); + + for (uint32_t particleId = 0; particleId < nParticles_; ++particleId) { + auto hits = sortedHits(directHits_[particleId]); + directHitStorage.insert(directHitStorage.end(), hits.begin(), hits.end()); + directOffsets.push_back(static_cast(directHitStorage.size())); + } + + std::vector subgraphOffsets; + std::vector subgraphHitStorage; + subgraphOffsets.reserve(nParticles_ + 1); + subgraphOffsets.push_back(0); + + for (uint32_t particleId = 0; particleId < nParticles_; ++particleId) { + auto hits = sortedHits(subgraphHits_[particleId]); + subgraphHitStorage.insert(subgraphHitStorage.end(), hits.begin(), hits.end()); + subgraphOffsets.push_back(static_cast(subgraphHitStorage.size())); + } + + return LogicalGraphHitIndex(nParticles_, + std::move(directOffsets), + std::move(directHitStorage), + std::move(subgraphOffsets), + std::move(subgraphHitStorage)); + } + +} // namespace truth diff --git a/PhysicsTools/TruthInfo/src/TruthGraph.cc b/PhysicsTools/TruthInfo/src/TruthGraph.cc new file mode 100644 index 0000000000000..ae7cb5454b8ba --- /dev/null +++ b/PhysicsTools/TruthInfo/src/TruthGraph.cc @@ -0,0 +1,24 @@ +// Author: Felice Pantaleo - CERN +// Date: 03/2026 +// A compact, read-only graph representation of the truth information in an event. +// The graph is built in the TruthGraphProducer module, which also fills the node metadata and associations. +// The graph is intended to be a common data format for various use cases (e.g. validation, analysis, visualization). + +#include "PhysicsTools/TruthInfo/interface/TruthGraph.h" + +bool TruthGraph::isConsistent() const { + if (offsets.size() != nodes.size() + 1) + return false; + if (!offsets.empty() && offsets.front() != 0) + return false; + if (!offsets.empty() && offsets.back() != edges.size()) + return false; + if (!edgeKind.empty() && edgeKind.size() != edges.size()) + return false; + + for (size_t i = 1; i < offsets.size(); ++i) { + if (offsets[i] < offsets[i - 1]) + return false; + } + return true; +} diff --git a/PhysicsTools/TruthInfo/src/TruthLogicalGraphPostProcessor.cc b/PhysicsTools/TruthInfo/src/TruthLogicalGraphPostProcessor.cc new file mode 100644 index 0000000000000..1cf6e1013eda1 --- /dev/null +++ b/PhysicsTools/TruthInfo/src/TruthLogicalGraphPostProcessor.cc @@ -0,0 +1,908 @@ +#include "PhysicsTools/TruthInfo/interface/TruthLogicalGraphPostProcessor.h" + +#include +#include +#include +#include +#include +#include + +namespace { + + struct DSU { + std::vector parent; + std::vector rank; + + explicit DSU(int n) : parent(n), rank(n, 0) { + for (int i = 0; i < n; ++i) + parent[i] = i; + } + + int find(int x) { + while (parent[x] != x) { + parent[x] = parent[parent[x]]; + x = parent[x]; + } + return x; + } + + void unite(int a, int b) { + a = find(a); + b = find(b); + + if (a == b) + return; + + if (rank[a] < rank[b]) + std::swap(a, b); + + parent[b] = a; + + if (rank[a] == rank[b]) + ++rank[a]; + } + }; + + bool containsPdgId(std::vector const& pdgIds, int32_t pdgId) { + return std::find(pdgIds.begin(), pdgIds.end(), pdgId) != pdgIds.end(); + } + + bool containsParticleId(std::vector const& particleIds, uint32_t particleId) { + return std::find(particleIds.begin(), particleIds.end(), particleId) != particleIds.end(); + } + + bool isIgnoredParticle(truth::Graph const& graph, + uint32_t particleId, + std::vector const& ignoredPdgIds, + std::vector const& ignoredParticleIds) { + if (particleId >= graph.nParticles()) + return false; + + if (containsParticleId(ignoredParticleIds, particleId)) + return true; + + if (containsPdgId(ignoredPdgIds, graph.particles[particleId].pdgId)) + return true; + + return false; + } + + bool isStableGenParticle(truth::Graph const& graph, uint32_t particleId) { + if (particleId >= graph.nParticles()) + return false; + + auto const& particle = graph.particles[particleId]; + + return particle.hasGen() && particle.status == 1; + } + + void buildCSR(uint32_t nSources, + std::vector>& pairs, + std::vector& offsets, + std::vector& flat) { + std::sort(pairs.begin(), pairs.end()); + pairs.erase(std::unique(pairs.begin(), pairs.end()), pairs.end()); + + pairs.erase( + std::remove_if(pairs.begin(), pairs.end(), [nSources](auto const& edge) { return edge.first >= nSources; }), + pairs.end()); + + offsets.assign(nSources + 1, 0); + + for (auto const& edge : pairs) { + ++offsets[edge.first + 1]; + } + + for (uint32_t i = 1; i <= nSources; ++i) { + offsets[i] += offsets[i - 1]; + } + + flat.assign(pairs.size(), 0); + + auto cursor = offsets; + for (auto const& edge : pairs) { + flat[cursor[edge.first]++] = edge.second; + } + } + + bool directCollapsibleGenParticleChain(truth::Graph const& graph, + uint32_t particleId, + uint32_t& childId, + uint32_t& decayVertexId) { + if (particleId >= graph.nParticles()) + return false; + + auto const& particle = graph.particles[particleId]; + + if (!particle.hasGen()) + return false; + + // Never collapse stable final-state GEN particles. + if (particle.status == 1) + return false; + + if (particle.pdgId == 0) + return false; + + const auto decayVertices = graph.decayVertices(particleId); + if (decayVertices.size() != 1) + return false; + + const uint32_t vertexId = decayVertices.front(); + if (vertexId >= graph.nVertices()) + return false; + + auto const& vertex = graph.vertices[vertexId]; + + if (!vertex.hasGen() || vertex.hasSim()) + return false; + + const auto incoming = graph.incomingParticles(vertexId); + const auto outgoing = graph.outgoingParticles(vertexId); + + if (incoming.size() != 1 || incoming.front() != particleId) + return false; + + if (outgoing.size() != 1) + return false; + + const uint32_t candidateChild = outgoing.front(); + if (candidateChild >= graph.nParticles()) + return false; + + if (candidateChild == particleId) + return false; + + auto const& child = graph.particles[candidateChild]; + + if (!child.hasGen()) + return false; + + if (child.pdgId != particle.pdgId) + return false; + + childId = candidateChild; + decayVertexId = vertexId; + + return true; + } + + truth::Graph collapseIntermediateGenParticleChains(truth::Graph const& input) { + if (input.empty()) + return input; + + const uint32_t nParticles = input.nParticles(); + const uint32_t nVertices = input.nVertices(); + + std::vector directChild(nParticles, -1); + std::vector skipVertex(nVertices, 0); + + for (uint32_t particleId = 0; particleId < nParticles; ++particleId) { + uint32_t childId = 0; + uint32_t decayVertexId = 0; + + if (!directCollapsibleGenParticleChain(input, particleId, childId, decayVertexId)) + continue; + + directChild[particleId] = static_cast(childId); + skipVertex[decayVertexId] = 1; + } + + std::vector particleRepresentative(nParticles, 0); + + for (uint32_t particleId = 0; particleId < nParticles; ++particleId) { + uint32_t current = particleId; + + for (uint32_t step = 0; step < nParticles; ++step) { + const int32_t next = directChild[current]; + if (next < 0) + break; + + current = static_cast(next); + } + + particleRepresentative[particleId] = current; + } + + truth::Graph output; + + std::unordered_map representativeToNewParticle; + representativeToNewParticle.reserve(nParticles); + + std::vector oldParticleToNew(nParticles, -1); + + for (uint32_t oldParticle = 0; oldParticle < nParticles; ++oldParticle) { + const uint32_t representative = particleRepresentative[oldParticle]; + + auto inserted = + representativeToNewParticle.emplace(representative, static_cast(output.particles.size())); + + const uint32_t newParticle = inserted.first->second; + + if (inserted.second) { + output.particles.push_back(input.particles[representative]); + } + + oldParticleToNew[oldParticle] = static_cast(newParticle); + } + + std::vector keepVertex(nVertices, 0); + + for (uint32_t oldVertex = 0; oldVertex < nVertices; ++oldVertex) { + if (skipVertex[oldVertex]) + continue; + + for (uint32_t oldParticle : input.incomingParticles(oldVertex)) { + if (oldParticle < oldParticleToNew.size() && oldParticleToNew[oldParticle] >= 0) { + keepVertex[oldVertex] = 1; + break; + } + } + + if (keepVertex[oldVertex]) + continue; + + for (uint32_t oldParticle : input.outgoingParticles(oldVertex)) { + if (oldParticle < oldParticleToNew.size() && oldParticleToNew[oldParticle] >= 0) { + keepVertex[oldVertex] = 1; + break; + } + } + } + + std::vector oldVertexToNew(nVertices, -1); + + for (uint32_t oldVertex = 0; oldVertex < nVertices; ++oldVertex) { + if (!keepVertex[oldVertex]) + continue; + + oldVertexToNew[oldVertex] = static_cast(output.vertices.size()); + output.vertices.push_back(input.vertices[oldVertex]); + } + + std::vector> particleToDecayVertexPairs; + std::vector> particleToProductionVertexPairs; + std::vector> vertexToOutgoingParticlePairs; + std::vector> vertexToIncomingParticlePairs; + + for (uint32_t oldVertex = 0; oldVertex < nVertices; ++oldVertex) { + const int32_t newVertex = oldVertexToNew[oldVertex]; + if (newVertex < 0) + continue; + + for (uint32_t oldParticle : input.incomingParticles(oldVertex)) { + if (oldParticle >= oldParticleToNew.size()) + continue; + + const int32_t newParticle = oldParticleToNew[oldParticle]; + if (newParticle < 0) + continue; + + particleToDecayVertexPairs.emplace_back(static_cast(newParticle), static_cast(newVertex)); + vertexToIncomingParticlePairs.emplace_back(static_cast(newVertex), + static_cast(newParticle)); + } + + for (uint32_t oldParticle : input.outgoingParticles(oldVertex)) { + if (oldParticle >= oldParticleToNew.size()) + continue; + + const int32_t newParticle = oldParticleToNew[oldParticle]; + if (newParticle < 0) + continue; + + vertexToOutgoingParticlePairs.emplace_back(static_cast(newVertex), + static_cast(newParticle)); + particleToProductionVertexPairs.emplace_back(static_cast(newParticle), + static_cast(newVertex)); + } + } + + buildCSR(output.nParticles(), + particleToDecayVertexPairs, + output.particleToDecayVertexOffsets, + output.particleToDecayVertices); + + buildCSR(output.nParticles(), + particleToProductionVertexPairs, + output.particleToProductionVertexOffsets, + output.particleToProductionVertices); + + buildCSR(output.nVertices(), + vertexToOutgoingParticlePairs, + output.vertexToOutgoingParticleOffsets, + output.vertexToOutgoingParticles); + + buildCSR(output.nVertices(), + vertexToIncomingParticlePairs, + output.vertexToIncomingParticleOffsets, + output.vertexToIncomingParticles); + + return output; + } + + void markDownstreamFromParticle(truth::Graph const& graph, + uint32_t particleId, + std::vector& keepParticle, + std::vector& keepVertex) { + if (particleId >= graph.nParticles()) + return; + + std::queue queue; + + if (!keepParticle[particleId]) { + keepParticle[particleId] = 1; + queue.push(particleId); + } + + while (!queue.empty()) { + const uint32_t currentParticle = queue.front(); + queue.pop(); + + for (uint32_t vertexId : graph.decayVertices(currentParticle)) { + if (vertexId >= graph.nVertices()) + continue; + + keepVertex[vertexId] = 1; + + for (uint32_t childId : graph.outgoingParticles(vertexId)) { + if (childId >= graph.nParticles()) + continue; + + if (!keepParticle[childId]) { + keepParticle[childId] = 1; + queue.push(childId); + } + } + } + } + } + + void markUpstreamToParticle(truth::Graph const& graph, + uint32_t particleId, + std::vector& keepParticle, + std::vector& keepVertex) { + if (particleId >= graph.nParticles()) + return; + + std::queue queue; + + if (!keepParticle[particleId]) { + keepParticle[particleId] = 1; + } + + queue.push(particleId); + + while (!queue.empty()) { + const uint32_t currentParticle = queue.front(); + queue.pop(); + + for (uint32_t vertexId : graph.productionVertices(currentParticle)) { + if (vertexId >= graph.nVertices()) + continue; + + keepVertex[vertexId] = 1; + + for (uint32_t parentId : graph.incomingParticles(vertexId)) { + if (parentId >= graph.nParticles()) + continue; + + if (!keepParticle[parentId]) { + keepParticle[parentId] = 1; + queue.push(parentId); + } + } + } + } + } + + std::vector expandSeedsWithParents(truth::Graph const& graph, + std::vector const& seedParticles, + uint32_t parentDepth) { + std::vector startParticles = seedParticles; + + std::vector seen(graph.nParticles(), 0); + std::queue> queue; + + for (const uint32_t seed : seedParticles) { + if (seed >= graph.nParticles()) + continue; + + if (!seen[seed]) { + seen[seed] = 1; + queue.emplace(seed, 0); + } + } + + while (!queue.empty()) { + const auto [particleId, depth] = queue.front(); + queue.pop(); + + if (depth >= parentDepth) + continue; + + for (const uint32_t vertexId : graph.productionVertices(particleId)) { + if (vertexId >= graph.nVertices()) + continue; + + for (const uint32_t parentId : graph.incomingParticles(vertexId)) { + if (parentId >= graph.nParticles()) + continue; + + if (!seen[parentId]) { + seen[parentId] = 1; + startParticles.push_back(parentId); + queue.emplace(parentId, depth + 1); + } + } + } + } + + std::sort(startParticles.begin(), startParticles.end()); + startParticles.erase(std::unique(startParticles.begin(), startParticles.end()), startParticles.end()); + + return startParticles; + } + + void closeKeptVerticesWithAllParents(truth::Graph const& graph, + std::vector& keepParticle, + std::vector& keepVertex) { + bool changed = true; + + while (changed) { + changed = false; + + for (uint32_t vertexId = 0; vertexId < graph.nVertices(); ++vertexId) { + if (!keepVertex[vertexId]) + continue; + + for (uint32_t parentId : graph.incomingParticles(vertexId)) { + if (parentId >= graph.nParticles()) + continue; + + if (keepParticle[parentId]) + continue; + + markUpstreamToParticle(graph, parentId, keepParticle, keepVertex); + changed = true; + } + } + } + } + + void dropVerticesWithoutVisibleParticles(truth::Graph const& graph, + std::vector const& keepParticle, + std::vector& keepVertex) { + for (uint32_t vertexId = 0; vertexId < graph.nVertices(); ++vertexId) { + if (!keepVertex[vertexId]) + continue; + + bool hasVisibleIncoming = false; + bool hasVisibleOutgoing = false; + + for (uint32_t particleId : graph.incomingParticles(vertexId)) { + if (particleId < graph.nParticles() && keepParticle[particleId]) { + hasVisibleIncoming = true; + break; + } + } + + for (uint32_t particleId : graph.outgoingParticles(vertexId)) { + if (particleId < graph.nParticles() && keepParticle[particleId]) { + hasVisibleOutgoing = true; + break; + } + } + + if (!hasVisibleIncoming && !hasVisibleOutgoing) + keepVertex[vertexId] = 0; + } + } + + truth::Graph rebuildFilteredGraph(truth::Graph const& input, + std::vector const& keepParticle, + std::vector const& keepVertex, + std::vector const& connectToCollapsedStableVertex) { + const uint32_t nParticles = input.nParticles(); + const uint32_t nVertices = input.nVertices(); + + const bool needCollapsedStableVertex = std::any_of(connectToCollapsedStableVertex.begin(), + connectToCollapsedStableVertex.end(), + [](uint8_t value) { return value != 0; }); + + truth::Graph output; + + std::vector oldParticleToNew(nParticles, -1); + std::vector oldVertexToNew(nVertices, -1); + + output.particles.reserve(nParticles); + output.vertices.reserve(nVertices + (needCollapsedStableVertex ? 1 : 0)); + + for (uint32_t oldParticle = 0; oldParticle < nParticles; ++oldParticle) { + if (!keepParticle[oldParticle]) + continue; + + oldParticleToNew[oldParticle] = static_cast(output.particles.size()); + output.particles.push_back(input.particles[oldParticle]); + } + + for (uint32_t oldVertex = 0; oldVertex < nVertices; ++oldVertex) { + if (!keepVertex[oldVertex]) + continue; + + oldVertexToNew[oldVertex] = static_cast(output.vertices.size()); + output.vertices.push_back(input.vertices[oldVertex]); + } + + int32_t collapsedStableVertex = -1; + + if (needCollapsedStableVertex) { + collapsedStableVertex = static_cast(output.vertices.size()); + + truth::VertexData vertex; + vertex.genNode = -1; + vertex.simNode = -1; + vertex.eventId = 0; + vertex.genEvent = -1; + + output.vertices.push_back(vertex); + } + + std::vector> particleToDecayVertexPairs; + std::vector> particleToProductionVertexPairs; + std::vector> vertexToOutgoingParticlePairs; + std::vector> vertexToIncomingParticlePairs; + + for (uint32_t oldVertex = 0; oldVertex < nVertices; ++oldVertex) { + const int32_t newVertex = oldVertexToNew[oldVertex]; + if (newVertex < 0) + continue; + + for (uint32_t oldParticle : input.incomingParticles(oldVertex)) { + if (oldParticle >= nParticles) + continue; + + const int32_t newParticle = oldParticleToNew[oldParticle]; + if (newParticle < 0) + continue; + + particleToDecayVertexPairs.emplace_back(static_cast(newParticle), static_cast(newVertex)); + vertexToIncomingParticlePairs.emplace_back(static_cast(newVertex), + static_cast(newParticle)); + } + + for (uint32_t oldParticle : input.outgoingParticles(oldVertex)) { + if (oldParticle >= nParticles) + continue; + + const int32_t newParticle = oldParticleToNew[oldParticle]; + if (newParticle < 0) + continue; + + vertexToOutgoingParticlePairs.emplace_back(static_cast(newVertex), + static_cast(newParticle)); + particleToProductionVertexPairs.emplace_back(static_cast(newParticle), + static_cast(newVertex)); + } + } + + if (collapsedStableVertex >= 0) { + const uint32_t newVertex = static_cast(collapsedStableVertex); + + for (uint32_t oldParticle = 0; oldParticle < nParticles; ++oldParticle) { + if (!connectToCollapsedStableVertex[oldParticle]) + continue; + + const int32_t newParticle = oldParticleToNew[oldParticle]; + if (newParticle < 0) + continue; + + vertexToOutgoingParticlePairs.emplace_back(newVertex, static_cast(newParticle)); + particleToProductionVertexPairs.emplace_back(static_cast(newParticle), newVertex); + } + } + + buildCSR(output.nParticles(), + particleToDecayVertexPairs, + output.particleToDecayVertexOffsets, + output.particleToDecayVertices); + + buildCSR(output.nParticles(), + particleToProductionVertexPairs, + output.particleToProductionVertexOffsets, + output.particleToProductionVertices); + + buildCSR(output.nVertices(), + vertexToOutgoingParticlePairs, + output.vertexToOutgoingParticleOffsets, + output.vertexToOutgoingParticles); + + buildCSR(output.nVertices(), + vertexToIncomingParticlePairs, + output.vertexToIncomingParticleOffsets, + output.vertexToIncomingParticles); + + return output; + } + + truth::Graph filterGraphBySeedPdgIds(truth::Graph const& input, + truth::LogicalGraphPostProcessingConfig const& config) { + if (input.empty()) + return input; + + if (config.seedPdgIds.empty()) + return input; + + const uint32_t nParticles = input.nParticles(); + const uint32_t nVertices = input.nVertices(); + + std::vector seedParticles; + + for (uint32_t particleId = 0; particleId < nParticles; ++particleId) { + if (containsPdgId(config.seedPdgIds, input.particles[particleId].pdgId)) + seedParticles.push_back(particleId); + } + + if (seedParticles.empty()) + return input; + + const auto startParticles = expandSeedsWithParents(input, seedParticles, config.seedParentDepth); + + std::vector keepParticle(nParticles, 0); + std::vector keepVertex(nVertices, 0); + + for (const uint32_t particleId : startParticles) { + markDownstreamFromParticle(input, particleId, keepParticle, keepVertex); + } + + // The graph is a DAG, not a tree. If a kept downstream vertex has parents + // that are not in the selected subgraph, keep those parents and their full + // upstream history to avoid creating misleading partial vertices. + closeKeptVerticesWithAllParents(input, keepParticle, keepVertex); + + // Keep every final-state GEN particle unless the user explicitly asked to + // ignore/collapse it. Stable particles outside the interesting subgraph are + // attached to one artificial source vertex. + std::vector connectToCollapsedStableVertex(nParticles, 0); + + for (uint32_t particleId = 0; particleId < nParticles; ++particleId) { + if (!isStableGenParticle(input, particleId)) + continue; + + if (isIgnoredParticle(input, particleId, config.ignoredPdgIds, config.ignoredParticleIds)) + continue; + + if (keepParticle[particleId]) + continue; + + keepParticle[particleId] = 1; + connectToCollapsedStableVertex[particleId] = 1; + } + + dropVerticesWithoutVisibleParticles(input, keepParticle, keepVertex); + + return rebuildFilteredGraph(input, keepParticle, keepVertex, connectToCollapsedStableVertex); + } + + truth::Graph collapseIgnoredParticles(truth::Graph const& input, + std::vector const& ignoredPdgIds, + std::vector const& ignoredParticleIds) { + if (input.empty()) + return input; + + if (ignoredPdgIds.empty() && ignoredParticleIds.empty()) + return input; + + const uint32_t nParticles = input.nParticles(); + const uint32_t nVertices = input.nVertices(); + + std::vector removeParticle(nParticles, 0); + + for (uint32_t particleId = 0; particleId < nParticles; ++particleId) { + if (isIgnoredParticle(input, particleId, ignoredPdgIds, ignoredParticleIds)) + removeParticle[particleId] = 1; + } + + if (std::none_of(removeParticle.begin(), removeParticle.end(), [](uint8_t value) { return value != 0; })) + return input; + + DSU vertexDSU(static_cast(nVertices)); + + for (uint32_t particleId = 0; particleId < nParticles; ++particleId) { + if (!removeParticle[particleId]) + continue; + + std::vector connectedVertices; + + for (uint32_t vertexId : input.productionVertices(particleId)) { + if (vertexId < nVertices) + connectedVertices.push_back(vertexId); + } + + for (uint32_t vertexId : input.decayVertices(particleId)) { + if (vertexId < nVertices) + connectedVertices.push_back(vertexId); + } + + if (connectedVertices.size() < 2) + continue; + + const uint32_t first = connectedVertices.front(); + + for (std::size_t i = 1; i < connectedVertices.size(); ++i) { + vertexDSU.unite(static_cast(first), static_cast(connectedVertices[i])); + } + } + + truth::Graph output; + + std::vector oldParticleToNew(nParticles, -1); + + for (uint32_t oldParticle = 0; oldParticle < nParticles; ++oldParticle) { + if (removeParticle[oldParticle]) + continue; + + oldParticleToNew[oldParticle] = static_cast(output.particles.size()); + output.particles.push_back(input.particles[oldParticle]); + } + + std::unordered_map vertexRepToNew; + vertexRepToNew.reserve(nVertices); + + std::vector oldVertexToNew(nVertices, -1); + + auto getNewVertex = [&](uint32_t oldVertex) -> uint32_t { + if (oldVertexToNew[oldVertex] >= 0) + return static_cast(oldVertexToNew[oldVertex]); + + const int rep = vertexDSU.find(static_cast(oldVertex)); + auto inserted = vertexRepToNew.emplace(rep, static_cast(output.vertices.size())); + + if (inserted.second) { + output.vertices.push_back(input.vertices[oldVertex]); + } + + oldVertexToNew[oldVertex] = static_cast(inserted.first->second); + return inserted.first->second; + }; + + std::vector> particleToDecayVertexPairs; + std::vector> particleToProductionVertexPairs; + std::vector> vertexToOutgoingParticlePairs; + std::vector> vertexToIncomingParticlePairs; + + for (uint32_t oldVertex = 0; oldVertex < nVertices; ++oldVertex) { + bool hasVisibleIncoming = false; + bool hasVisibleOutgoing = false; + + for (uint32_t oldParticle : input.incomingParticles(oldVertex)) { + if (oldParticle < nParticles && oldParticleToNew[oldParticle] >= 0) { + hasVisibleIncoming = true; + break; + } + } + + for (uint32_t oldParticle : input.outgoingParticles(oldVertex)) { + if (oldParticle < nParticles && oldParticleToNew[oldParticle] >= 0) { + hasVisibleOutgoing = true; + break; + } + } + + if (!hasVisibleIncoming && !hasVisibleOutgoing) + continue; + + const uint32_t newVertex = getNewVertex(oldVertex); + + for (uint32_t oldParticle : input.incomingParticles(oldVertex)) { + if (oldParticle >= nParticles) + continue; + + const int32_t newParticle = oldParticleToNew[oldParticle]; + if (newParticle < 0) + continue; + + particleToDecayVertexPairs.emplace_back(static_cast(newParticle), newVertex); + vertexToIncomingParticlePairs.emplace_back(newVertex, static_cast(newParticle)); + } + + for (uint32_t oldParticle : input.outgoingParticles(oldVertex)) { + if (oldParticle >= nParticles) + continue; + + const int32_t newParticle = oldParticleToNew[oldParticle]; + if (newParticle < 0) + continue; + + vertexToOutgoingParticlePairs.emplace_back(newVertex, static_cast(newParticle)); + particleToProductionVertexPairs.emplace_back(static_cast(newParticle), newVertex); + } + } + + buildCSR(output.nParticles(), + particleToDecayVertexPairs, + output.particleToDecayVertexOffsets, + output.particleToDecayVertices); + + buildCSR(output.nParticles(), + particleToProductionVertexPairs, + output.particleToProductionVertexOffsets, + output.particleToProductionVertices); + + buildCSR(output.nVertices(), + vertexToOutgoingParticlePairs, + output.vertexToOutgoingParticleOffsets, + output.vertexToOutgoingParticles); + + buildCSR(output.nVertices(), + vertexToIncomingParticlePairs, + output.vertexToIncomingParticleOffsets, + output.vertexToIncomingParticles); + + return output; + } + +} // namespace + +namespace truth { + + TruthLogicalGraphPostProcessor::TruthLogicalGraphPostProcessor(LogicalGraphPostProcessingConfig config) + : config_(std::move(config)) {} + + edm::ParameterSetDescription TruthLogicalGraphPostProcessor::psetDescription() { + edm::ParameterSetDescription desc; + + desc.add("collapseIntermediateGenParticles", true) + ->setComment( + "If true, collapse GEN chains P -> V -> C where P has status != 1, C is the only daughter, " + "and P and C have the same PDG id. Status-1 GEN particles are never collapsed by this rule."); + + desc.add>("seedPdgIds", {}) + ->setComment( + "If non-empty, keep particles with these exact PDG ids, keep seedParentDepth generations above them, " + "then keep the full downstream subgraph. Stable GEN particles outside the selected subgraph are kept " + "and attached to one artificial collapsed vertex."); + + desc.add("seedParentDepth", 0) + ->setComment("Number of parent generations to keep above each seed particle before keeping downstream."); + + desc.add>("ignoredPdgIds", {}) + ->setComment( + "Particles with these exact PDG ids are always removed from the final logical graph. If internal, " + "their production and decay vertices are merged so the graph remains connected."); + + desc.add>("ignoredParticleIds", {}) + ->setComment( + "Logical particle ids to remove from the final logical graph. These ids refer to the graph state at " + "the moment the ignored-particle collapsing step is applied."); + + return desc; + } + + LogicalGraphPostProcessingConfig TruthLogicalGraphPostProcessor::configFromPSet(edm::ParameterSet const& pset) { + LogicalGraphPostProcessingConfig config; + + config.collapseIntermediateGenParticles = pset.getParameter("collapseIntermediateGenParticles"); + config.seedPdgIds = pset.getParameter>("seedPdgIds"); + config.seedParentDepth = pset.getParameter("seedParentDepth"); + config.ignoredPdgIds = pset.getParameter>("ignoredPdgIds"); + config.ignoredParticleIds = pset.getParameter>("ignoredParticleIds"); + + return config; + } + + Graph TruthLogicalGraphPostProcessor::process(Graph input) const { + if (config_.collapseIntermediateGenParticles) { + input = collapseIntermediateGenParticleChains(input); + } + + input = filterGraphBySeedPdgIds(input, config_); + + if (!config_.ignoredPdgIds.empty() || !config_.ignoredParticleIds.empty()) { + input = collapseIgnoredParticles(input, config_.ignoredPdgIds, config_.ignoredParticleIds); + } + + return input; + } + +} // namespace truth diff --git a/PhysicsTools/TruthInfo/src/classes.h b/PhysicsTools/TruthInfo/src/classes.h new file mode 100644 index 0000000000000..8ddcc2323fe40 --- /dev/null +++ b/PhysicsTools/TruthInfo/src/classes.h @@ -0,0 +1,35 @@ +#ifndef PhysicsTools_TruthInfo_src_classes_h +#define PhysicsTools_TruthInfo_src_classes_h + +#include "DataFormats/Common/interface/Wrapper.h" +#include "PhysicsTools/TruthInfo/interface/Graph.h" +#include "PhysicsTools/TruthInfo/interface/TruthGraph.h" +#include "PhysicsTools/TruthInfo/interface/LogicalGraphHitIndex.h" + +namespace { + struct dictionary { + TruthGraph rawTruthGraph; + edm::Wrapper wrappedRawTruthGraph; + + TruthGraph::NodeRef rawTruthNodeRef; + std::vector rawTruthNodeRefs; + + truth::Graph logicalTruthGraph; + edm::Wrapper wrappedLogicalTruthGraph; + + truth::Checkpoint logicalTruthCheckpoint; + std::vector logicalTruthCheckpoints; + + truth::ParticleData logicalTruthParticleData; + std::vector logicalTruthParticleDataVec; + + truth::VertexData logicalTruthVertexData; + std::vector logicalTruthVertexDataVec; + + truth::LogicalGraphHitIndex logicalGraphHitIndex; + truth::LogicalGraphHitIndex::Hit logicalGraphHitIndexHit; + std::vector logicalGraphHitIndexHitVector; + }; +} // namespace + +#endif diff --git a/PhysicsTools/TruthInfo/src/classes_def.xml b/PhysicsTools/TruthInfo/src/classes_def.xml new file mode 100644 index 0000000000000..7f113e85a0154 --- /dev/null +++ b/PhysicsTools/TruthInfo/src/classes_def.xml @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/PhysicsTools/TruthInfo/test/BuildFile.xml b/PhysicsTools/TruthInfo/test/BuildFile.xml new file mode 100644 index 0000000000000..191307ada5470 --- /dev/null +++ b/PhysicsTools/TruthInfo/test/BuildFile.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/PhysicsTools/TruthInfo/test/TruthLogicalGraphPostProcessor_t.cpp b/PhysicsTools/TruthInfo/test/TruthLogicalGraphPostProcessor_t.cpp new file mode 100644 index 0000000000000..337fcbf660da2 --- /dev/null +++ b/PhysicsTools/TruthInfo/test/TruthLogicalGraphPostProcessor_t.cpp @@ -0,0 +1,663 @@ +#include "Utilities/Testing/interface/CppUnit_testdriver.icpp" +#include "cppunit/extensions/HelperMacros.h" + +#include +#include +#include +#include +#include + +#include "FWCore/Utilities/interface/Exception.h" +#include "PhysicsTools/TruthInfo/interface/Graph.h" +#include "PhysicsTools/TruthInfo/interface/TruthLogicalGraphPostProcessor.h" + +namespace { + + void buildCSR(uint32_t nSources, + std::vector>& pairs, + std::vector& offsets, + std::vector& flat) { + std::sort(pairs.begin(), pairs.end()); + pairs.erase(std::unique(pairs.begin(), pairs.end()), pairs.end()); + + offsets.assign(nSources + 1, 0); + + for (auto const& pair : pairs) { + CPPUNIT_ASSERT(pair.first < nSources); + ++offsets[pair.first + 1]; + } + + for (uint32_t i = 1; i <= nSources; ++i) { + offsets[i] += offsets[i - 1]; + } + + flat.assign(pairs.size(), 0); + auto cursor = offsets; + + for (auto const& pair : pairs) { + flat[cursor[pair.first]++] = pair.second; + } + } + + struct GraphBuilder { + explicit GraphBuilder(uint32_t nParticles, uint32_t nVertices) { + graph.particles.resize(nParticles); + graph.vertices.resize(nVertices); + } + + void setGenParticle(uint32_t particleId, int32_t pdgId, int16_t status, int32_t genNode) { + CPPUNIT_ASSERT(particleId < graph.nParticles()); + + auto& particle = graph.particles[particleId]; + particle.genNode = genNode; + particle.simNode = -1; + particle.pdgId = pdgId; + particle.status = status; + particle.statusFlags = 0; + particle.eventId = 0; + particle.genEvent = 0; + } + + void setSimParticle(uint32_t particleId, int32_t pdgId, int32_t simNode) { + CPPUNIT_ASSERT(particleId < graph.nParticles()); + + auto& particle = graph.particles[particleId]; + particle.genNode = -1; + particle.simNode = simNode; + particle.pdgId = pdgId; + particle.status = 0; + particle.statusFlags = 0; + particle.eventId = 1; + particle.genEvent = -1; + } + + void setGenSimParticle(uint32_t particleId, int32_t pdgId, int16_t status, int32_t genNode, int32_t simNode) { + CPPUNIT_ASSERT(particleId < graph.nParticles()); + + auto& particle = graph.particles[particleId]; + particle.genNode = genNode; + particle.simNode = simNode; + particle.pdgId = pdgId; + particle.status = status; + particle.statusFlags = 0; + particle.eventId = 1; + particle.genEvent = 0; + } + + void setGenVertex(uint32_t vertexId, int32_t genNode) { + CPPUNIT_ASSERT(vertexId < graph.nVertices()); + + auto& vertex = graph.vertices[vertexId]; + vertex.genNode = genNode; + vertex.simNode = -1; + vertex.eventId = 0; + vertex.genEvent = 0; + } + + void setSimVertex(uint32_t vertexId, int32_t simNode) { + CPPUNIT_ASSERT(vertexId < graph.nVertices()); + + auto& vertex = graph.vertices[vertexId]; + vertex.genNode = -1; + vertex.simNode = simNode; + vertex.eventId = 1; + vertex.genEvent = -1; + } + + void setGenSimVertex(uint32_t vertexId, int32_t genNode, int32_t simNode) { + CPPUNIT_ASSERT(vertexId < graph.nVertices()); + + auto& vertex = graph.vertices[vertexId]; + vertex.genNode = genNode; + vertex.simNode = simNode; + vertex.eventId = 1; + vertex.genEvent = 0; + } + + void addDecay(uint32_t particleId, uint32_t vertexId) { + CPPUNIT_ASSERT(particleId < graph.nParticles()); + CPPUNIT_ASSERT(vertexId < graph.nVertices()); + + particleToDecayVertexPairs.emplace_back(particleId, vertexId); + vertexToIncomingParticlePairs.emplace_back(vertexId, particleId); + } + + void addProduction(uint32_t vertexId, uint32_t particleId) { + CPPUNIT_ASSERT(vertexId < graph.nVertices()); + CPPUNIT_ASSERT(particleId < graph.nParticles()); + + vertexToOutgoingParticlePairs.emplace_back(vertexId, particleId); + particleToProductionVertexPairs.emplace_back(particleId, vertexId); + } + + truth::Graph finish() { + buildCSR(graph.nParticles(), + particleToDecayVertexPairs, + graph.particleToDecayVertexOffsets, + graph.particleToDecayVertices); + + buildCSR(graph.nParticles(), + particleToProductionVertexPairs, + graph.particleToProductionVertexOffsets, + graph.particleToProductionVertices); + + buildCSR(graph.nVertices(), + vertexToOutgoingParticlePairs, + graph.vertexToOutgoingParticleOffsets, + graph.vertexToOutgoingParticles); + + buildCSR(graph.nVertices(), + vertexToIncomingParticlePairs, + graph.vertexToIncomingParticleOffsets, + graph.vertexToIncomingParticles); + + CPPUNIT_ASSERT(graph.isConsistent()); + + return graph; + } + + truth::Graph graph; + + std::vector> particleToDecayVertexPairs; + std::vector> particleToProductionVertexPairs; + std::vector> vertexToOutgoingParticlePairs; + std::vector> vertexToIncomingParticlePairs; + }; + + uint32_t countParticlesWithPdgId(truth::Graph const& graph, int32_t pdgId) { + uint32_t count = 0; + + for (auto const& particle : graph.particles) { + if (particle.pdgId == pdgId) + ++count; + } + + return count; + } + + uint32_t countStableGenParticles(truth::Graph const& graph) { + uint32_t count = 0; + + for (auto const& particle : graph.particles) { + if (particle.hasGen() && particle.status == 1) + ++count; + } + + return count; + } + + bool hasGenSimParticleWithPdgId(truth::Graph const& graph, int32_t pdgId) { + return std::any_of(graph.particles.begin(), graph.particles.end(), [pdgId](auto const& particle) { + return particle.pdgId == pdgId && particle.hasGen() && particle.hasSim(); + }); + } + + bool hasArtificialVertex(truth::Graph const& graph) { + return std::any_of(graph.vertices.begin(), graph.vertices.end(), [](auto const& vertex) { + return !vertex.hasGen() && !vertex.hasSim(); + }); + } + + uint32_t artificialVertexId(truth::Graph const& graph) { + for (uint32_t i = 0; i < graph.nVertices(); ++i) { + auto const& vertex = graph.vertices[i]; + + if (!vertex.hasGen() && !vertex.hasSim()) + return i; + } + + CPPUNIT_ASSERT(false); + return 0; + } + + uint32_t findParticleWithPdgId(truth::Graph const& graph, int32_t pdgId) { + for (uint32_t i = 0; i < graph.nParticles(); ++i) { + if (graph.particles[i].pdgId == pdgId) + return i; + } + + CPPUNIT_ASSERT(false); + return 0; + } + + truth::LogicalGraphPostProcessingConfig defaultConfig() { + truth::LogicalGraphPostProcessingConfig config; + config.collapseIntermediateGenParticles = false; + config.seedPdgIds = {}; + config.seedParentDepth = 0; + config.ignoredPdgIds = {}; + config.ignoredParticleIds = {}; + return config; + } + + truth::Graph runPostProcessing(truth::Graph graph, truth::LogicalGraphPostProcessingConfig const& config) { + truth::TruthLogicalGraphPostProcessor processor(config); + return processor.process(std::move(graph)); + } + +} // namespace + +class TestTruthLogicalGraphPostProcessor : public CppUnit::TestFixture { + CPPUNIT_TEST_SUITE(TestTruthLogicalGraphPostProcessor); + CPPUNIT_TEST(testStatusOneGenParticlesAreNeverCollapsed); + CPPUNIT_TEST(testStableGenSimParticlesSurviveIntermediateCollapse); + CPPUNIT_TEST(testSeedCutKeepsUnrelatedStableGenSimParticlesThroughArtificialVertex); + CPPUNIT_TEST(testDagClosureKeepsAllParentsOfKeptVertices); + CPPUNIT_TEST(testIgnoredParticlesAreCollapsedAway); + CPPUNIT_TEST(testSeedCutWithIgnoredParticles); + CPPUNIT_TEST(testIgnoredParticleIdsAreCollapsedAway); + CPPUNIT_TEST_SUITE_END(); + +public: + void testStatusOneGenParticlesAreNeverCollapsed(); + void testStableGenSimParticlesSurviveIntermediateCollapse(); + void testSeedCutKeepsUnrelatedStableGenSimParticlesThroughArtificialVertex(); + void testDagClosureKeepsAllParentsOfKeptVertices(); + void testIgnoredParticlesAreCollapsedAway(); + void testSeedCutWithIgnoredParticles(); + void testIgnoredParticleIdsAreCollapsedAway(); +}; + +CPPUNIT_TEST_SUITE_REGISTRATION(TestTruthLogicalGraphPostProcessor); + +void TestTruthLogicalGraphPostProcessor::testStatusOneGenParticlesAreNeverCollapsed() { + try { + GraphBuilder builder(2, 1); + + // This topology is intentionally unphysical: a status-1 GEN particle has a + // decay vertex to another same-PDG status-1 particle. The postprocessor must + // still never collapse a status-1 GEN particle. + builder.setGenParticle(0, 22, 1, 100); + builder.setGenParticle(1, 22, 1, 101); + builder.setGenVertex(0, 200); + + builder.addDecay(0, 0); + builder.addProduction(0, 1); + + auto graph = builder.finish(); + + auto config = defaultConfig(); + config.collapseIntermediateGenParticles = true; + + auto output = runPostProcessing(std::move(graph), config); + + CPPUNIT_ASSERT(output.isConsistent()); + CPPUNIT_ASSERT_EQUAL(uint32_t(2), output.nParticles()); + CPPUNIT_ASSERT_EQUAL(uint32_t(2), countParticlesWithPdgId(output, 22)); + CPPUNIT_ASSERT_EQUAL(uint32_t(2), countStableGenParticles(output)); + } catch (cms::Exception const& ex) { + std::cerr << ex.what() << std::endl; + CPPUNIT_ASSERT(false); + } +} + +void TestTruthLogicalGraphPostProcessor::testStableGenSimParticlesSurviveIntermediateCollapse() { + try { + GraphBuilder builder(3, 2); + + // gamma(status 2) -> gamma(status 1, GEN+SIM) + // e-(status 1, GEN+SIM) is an independent stable final-state particle. + builder.setGenParticle(0, 22, 2, 100); + builder.setGenSimParticle(1, 22, 1, 101, 1001); + builder.setGenSimParticle(2, 11, 1, 102, 1002); + + builder.setGenVertex(0, 200); + builder.setGenSimVertex(1, 201, 2001); + + builder.addDecay(0, 0); + builder.addProduction(0, 1); + builder.addProduction(1, 2); + + auto graph = builder.finish(); + + auto config = defaultConfig(); + config.collapseIntermediateGenParticles = true; + + auto output = runPostProcessing(std::move(graph), config); + + CPPUNIT_ASSERT(output.isConsistent()); + + // The intermediate gamma can collapse into the final stable gamma, but the + // status-1 GEN+SIM gamma must remain materialized. + CPPUNIT_ASSERT_EQUAL(uint32_t(2), output.nParticles()); + CPPUNIT_ASSERT(hasGenSimParticleWithPdgId(output, 22)); + CPPUNIT_ASSERT(hasGenSimParticleWithPdgId(output, 11)); + CPPUNIT_ASSERT_EQUAL(uint32_t(2), countStableGenParticles(output)); + } catch (cms::Exception const& ex) { + std::cerr << ex.what() << std::endl; + CPPUNIT_ASSERT(false); + } +} + +void TestTruthLogicalGraphPostProcessor::testSeedCutKeepsUnrelatedStableGenSimParticlesThroughArtificialVertex() { + try { + GraphBuilder builder(4, 3); + + // Interesting branch: + // Z -> gamma(status 1, GEN+SIM) + // + // Unrelated stable final state: + // artificial filtering must keep e-(status 1, GEN+SIM), but attach it to + // one artificial vertex instead of keeping its unrelated production chain. + builder.setGenParticle(0, 23, 2, 100); + builder.setGenSimParticle(1, 22, 1, 101, 1001); + builder.setGenParticle(2, 999, 2, 102); + builder.setGenSimParticle(3, 11, 1, 103, 1003); + + builder.setGenSimVertex(0, 200, 2000); + builder.setGenVertex(1, 201); + builder.setGenSimVertex(2, 202, 2002); + + builder.addDecay(0, 0); + builder.addProduction(0, 1); + + builder.addDecay(2, 1); + builder.addProduction(1, 3); + + // A second production vertex for the stable electron, to make sure the + // filtered graph does not keep unrelated upstream structure. + builder.addProduction(2, 3); + + auto graph = builder.finish(); + + auto config = defaultConfig(); + config.seedPdgIds = {22}; + config.seedParentDepth = 1; + + auto output = runPostProcessing(std::move(graph), config); + + CPPUNIT_ASSERT(output.isConsistent()); + + CPPUNIT_ASSERT(hasGenSimParticleWithPdgId(output, 22)); + CPPUNIT_ASSERT(hasGenSimParticleWithPdgId(output, 11)); + CPPUNIT_ASSERT(hasArtificialVertex(output)); + + const uint32_t electron = findParticleWithPdgId(output, 11); + const auto productionVertices = output.productionVertices(electron); + + CPPUNIT_ASSERT_EQUAL(std::size_t(1), productionVertices.size()); + + const uint32_t collapsedVertex = artificialVertexId(output); + CPPUNIT_ASSERT_EQUAL(collapsedVertex, productionVertices.front()); + + const auto artificialOutgoing = output.outgoingParticles(collapsedVertex); + CPPUNIT_ASSERT(std::find(artificialOutgoing.begin(), artificialOutgoing.end(), electron) != + artificialOutgoing.end()); + + const auto artificialIncoming = output.incomingParticles(collapsedVertex); + CPPUNIT_ASSERT(artificialIncoming.empty()); + } catch (cms::Exception const& ex) { + std::cerr << ex.what() << std::endl; + CPPUNIT_ASSERT(false); + } +} + +void TestTruthLogicalGraphPostProcessor::testDagClosureKeepsAllParentsOfKeptVertices() { + try { + GraphBuilder builder(5, 3); + + // DAG topology: + // + // H -> v0 -> pi0 + // Z --------^ + // pi0 -> v1 -> gamma + // e- stable, unrelated + // + // The seed is H. Keeping downstream from H keeps v0. Since v0 also has Z as + // an incoming parent, the postprocessor must include Z and the upstream path + // to Z instead of showing v0 as if it had only H as parent. + builder.setGenParticle(0, 25, 2, 100); + builder.setGenParticle(1, 23, 2, 101); + builder.setGenParticle(2, 111, 2, 102); + builder.setGenSimParticle(3, 22, 1, 103, 1003); + builder.setGenSimParticle(4, 11, 1, 104, 1004); + + builder.setGenVertex(0, 200); + builder.setGenVertex(1, 201); + builder.setGenSimVertex(2, 202, 2002); + + builder.addDecay(0, 0); + builder.addDecay(1, 0); + builder.addProduction(0, 2); + + builder.addDecay(2, 1); + builder.addProduction(1, 3); + + builder.addProduction(2, 4); + + auto graph = builder.finish(); + + auto config = defaultConfig(); + config.seedPdgIds = {25}; + config.seedParentDepth = 0; + + auto output = runPostProcessing(std::move(graph), config); + + CPPUNIT_ASSERT(output.isConsistent()); + + CPPUNIT_ASSERT_EQUAL(uint32_t(1), countParticlesWithPdgId(output, 25)); + CPPUNIT_ASSERT_EQUAL(uint32_t(1), countParticlesWithPdgId(output, 23)); + CPPUNIT_ASSERT_EQUAL(uint32_t(1), countParticlesWithPdgId(output, 111)); + CPPUNIT_ASSERT_EQUAL(uint32_t(1), countParticlesWithPdgId(output, 22)); + + // The unrelated stable electron is still kept, but via the artificial vertex. + CPPUNIT_ASSERT_EQUAL(uint32_t(1), countParticlesWithPdgId(output, 11)); + CPPUNIT_ASSERT(hasArtificialVertex(output)); + + const uint32_t pi0 = findParticleWithPdgId(output, 111); + const auto pi0ProductionVertices = output.productionVertices(pi0); + + CPPUNIT_ASSERT_EQUAL(std::size_t(1), pi0ProductionVertices.size()); + + const auto incoming = output.incomingParticles(pi0ProductionVertices.front()); + + CPPUNIT_ASSERT_EQUAL(std::size_t(2), incoming.size()); + + std::vector incomingPdgIds; + incomingPdgIds.reserve(incoming.size()); + + for (uint32_t parent : incoming) { + incomingPdgIds.push_back(output.particles[parent].pdgId); + } + + std::sort(incomingPdgIds.begin(), incomingPdgIds.end()); + + CPPUNIT_ASSERT_EQUAL(int32_t(23), incomingPdgIds[0]); + CPPUNIT_ASSERT_EQUAL(int32_t(25), incomingPdgIds[1]); + } catch (cms::Exception const& ex) { + std::cerr << ex.what() << std::endl; + CPPUNIT_ASSERT(false); + } +} + +void TestTruthLogicalGraphPostProcessor::testIgnoredParticlesAreCollapsedAway() { + try { + GraphBuilder builder(3, 2); + + // Z -> gamma -> e- + // + // If gamma is ignored, it should disappear and the two vertices around it + // should be merged, preserving a navigable Z -> e- connection. + builder.setGenParticle(0, 23, 2, 100); + builder.setGenParticle(1, 22, 2, 101); + builder.setGenSimParticle(2, 11, 1, 102, 1002); + + builder.setGenVertex(0, 200); + builder.setGenSimVertex(1, 201, 2001); + + builder.addDecay(0, 0); + builder.addProduction(0, 1); + + builder.addDecay(1, 1); + builder.addProduction(1, 2); + + auto graph = builder.finish(); + + auto config = defaultConfig(); + config.ignoredPdgIds = {22}; + + auto output = runPostProcessing(std::move(graph), config); + + CPPUNIT_ASSERT(output.isConsistent()); + + CPPUNIT_ASSERT_EQUAL(uint32_t(0), countParticlesWithPdgId(output, 22)); + CPPUNIT_ASSERT_EQUAL(uint32_t(1), countParticlesWithPdgId(output, 23)); + CPPUNIT_ASSERT_EQUAL(uint32_t(1), countParticlesWithPdgId(output, 11)); + + const uint32_t z = findParticleWithPdgId(output, 23); + const uint32_t electron = findParticleWithPdgId(output, 11); + + const auto decayVertices = output.decayVertices(z); + CPPUNIT_ASSERT_EQUAL(std::size_t(1), decayVertices.size()); + + const auto outgoing = output.outgoingParticles(decayVertices.front()); + CPPUNIT_ASSERT(std::find(outgoing.begin(), outgoing.end(), electron) != outgoing.end()); + } catch (cms::Exception const& ex) { + std::cerr << ex.what() << std::endl; + CPPUNIT_ASSERT(false); + } +} + +void TestTruthLogicalGraphPostProcessor::testSeedCutWithIgnoredParticles() { + try { + GraphBuilder builder(6, 4); + + // Interesting branch: + // H -> pi0 -> gamma(status 1, GEN+SIM) + // + // Extra parent on the kept pi0 production vertex: + // Z --------^ + // + // Unrelated stable final-state e- is kept through the artificial vertex. + // + // Then ignoredPdgIds removes pi0, merging H/Z directly to gamma. + builder.setGenParticle(0, 25, 2, 100); + builder.setGenParticle(1, 23, 2, 101); + builder.setGenParticle(2, 111, 2, 102); + builder.setGenSimParticle(3, 22, 1, 103, 1003); + builder.setGenParticle(4, 999, 2, 104); + builder.setGenSimParticle(5, 11, 1, 105, 1005); + + builder.setGenVertex(0, 200); + builder.setGenSimVertex(1, 201, 2001); + builder.setGenVertex(2, 202); + builder.setGenSimVertex(3, 203, 2003); + + builder.addDecay(0, 0); + builder.addDecay(1, 0); + builder.addProduction(0, 2); + + builder.addDecay(2, 1); + builder.addProduction(1, 3); + + builder.addDecay(4, 2); + builder.addProduction(2, 5); + + builder.addProduction(3, 5); + + auto graph = builder.finish(); + + auto config = defaultConfig(); + config.seedPdgIds = {25}; + config.seedParentDepth = 0; + config.ignoredPdgIds = {111}; + + auto output = runPostProcessing(std::move(graph), config); + + CPPUNIT_ASSERT(output.isConsistent()); + + CPPUNIT_ASSERT_EQUAL(uint32_t(1), countParticlesWithPdgId(output, 25)); + CPPUNIT_ASSERT_EQUAL(uint32_t(1), countParticlesWithPdgId(output, 23)); + CPPUNIT_ASSERT_EQUAL(uint32_t(0), countParticlesWithPdgId(output, 111)); + CPPUNIT_ASSERT_EQUAL(uint32_t(1), countParticlesWithPdgId(output, 22)); + CPPUNIT_ASSERT_EQUAL(uint32_t(1), countParticlesWithPdgId(output, 11)); + + CPPUNIT_ASSERT(hasGenSimParticleWithPdgId(output, 22)); + CPPUNIT_ASSERT(hasGenSimParticleWithPdgId(output, 11)); + CPPUNIT_ASSERT(hasArtificialVertex(output)); + + const uint32_t gamma = findParticleWithPdgId(output, 22); + const auto gammaProductionVertices = output.productionVertices(gamma); + + CPPUNIT_ASSERT_EQUAL(std::size_t(1), gammaProductionVertices.size()); + + const auto incoming = output.incomingParticles(gammaProductionVertices.front()); + + std::vector incomingPdgIds; + incomingPdgIds.reserve(incoming.size()); + + for (uint32_t parent : incoming) { + incomingPdgIds.push_back(output.particles[parent].pdgId); + } + + std::sort(incomingPdgIds.begin(), incomingPdgIds.end()); + + CPPUNIT_ASSERT_EQUAL(std::size_t(2), incomingPdgIds.size()); + CPPUNIT_ASSERT_EQUAL(int32_t(23), incomingPdgIds[0]); + CPPUNIT_ASSERT_EQUAL(int32_t(25), incomingPdgIds[1]); + + const uint32_t electron = findParticleWithPdgId(output, 11); + const auto electronProductionVertices = output.productionVertices(electron); + + CPPUNIT_ASSERT_EQUAL(std::size_t(1), electronProductionVertices.size()); + CPPUNIT_ASSERT_EQUAL(artificialVertexId(output), electronProductionVertices.front()); + } catch (cms::Exception const& ex) { + std::cerr << ex.what() << std::endl; + CPPUNIT_ASSERT(false); + } +} + +void TestTruthLogicalGraphPostProcessor::testIgnoredParticleIdsAreCollapsedAway() { + try { + GraphBuilder builder(4, 3); + + // Z -> a -> gamma -> e- + // + // Only particle id 2 is ignored. This verifies that ignoredParticleIds is + // independent from PDG id matching: the status-1 gamma is removed because its + // logical id is explicitly listed, not because all photons are ignored. + builder.setGenParticle(0, 23, 2, 100); + builder.setGenParticle(1, 36, 2, 101); + builder.setGenSimParticle(2, 22, 1, 102, 1002); + builder.setGenSimParticle(3, 11, 1, 103, 1003); + + builder.setGenVertex(0, 200); + builder.setGenSimVertex(1, 201, 2001); + builder.setGenSimVertex(2, 202, 2002); + + builder.addDecay(0, 0); + builder.addProduction(0, 1); + + builder.addDecay(1, 1); + builder.addProduction(1, 2); + + builder.addDecay(2, 2); + builder.addProduction(2, 3); + + auto graph = builder.finish(); + + auto config = defaultConfig(); + config.ignoredParticleIds = {2}; + + auto output = runPostProcessing(std::move(graph), config); + + CPPUNIT_ASSERT(output.isConsistent()); + + CPPUNIT_ASSERT_EQUAL(uint32_t(1), countParticlesWithPdgId(output, 23)); + CPPUNIT_ASSERT_EQUAL(uint32_t(1), countParticlesWithPdgId(output, 36)); + CPPUNIT_ASSERT_EQUAL(uint32_t(0), countParticlesWithPdgId(output, 22)); + CPPUNIT_ASSERT_EQUAL(uint32_t(1), countParticlesWithPdgId(output, 11)); + + const uint32_t a = findParticleWithPdgId(output, 36); + const uint32_t electron = findParticleWithPdgId(output, 11); + + const auto decayVertices = output.decayVertices(a); + CPPUNIT_ASSERT_EQUAL(std::size_t(1), decayVertices.size()); + + const auto outgoing = output.outgoingParticles(decayVertices.front()); + CPPUNIT_ASSERT(std::find(outgoing.begin(), outgoing.end(), electron) != outgoing.end()); + } catch (cms::Exception const& ex) { + std::cerr << ex.what() << std::endl; + CPPUNIT_ASSERT(false); + } +} diff --git a/PhysicsTools/TruthInfo/test/dumpTruthGraphsFromGENSIMRECO_cfg.py b/PhysicsTools/TruthInfo/test/dumpTruthGraphsFromGENSIMRECO_cfg.py new file mode 100644 index 0000000000000..df785bc6cc54c --- /dev/null +++ b/PhysicsTools/TruthInfo/test/dumpTruthGraphsFromGENSIMRECO_cfg.py @@ -0,0 +1,213 @@ +import FWCore.ParameterSet.Config as cms + +# user options +import os +from argparse import ArgumentParser +parser = ArgumentParser() +parser.add_argument("inputFile", nargs='?', default="step3.root", + metavar='FILE', help="Input file, default=%(default)r" ) +parser.add_argument('-o', "--outdir", default='', + help="output directory, default=%(default)r" ) +parser.add_argument('-n', "--maxevts", type=int, default=-1, + help="maximum number of events to process, default=%(default)s" ) +parser.add_argument('-m', "--merge", dest='mergeGenSim', action='store_true', + help="merge GEN-SIM nodes of duplicates" ) +parser.add_argument('-c', "--collapse", action='store_true', help="collapse GenParticle copies" ) +parser.add_argument('-t', "--tag", default='', help="tag for out put file" ) +args = parser.parse_args() +if '/' not in args.inputFile and ':' not in args.inputFile: + args.inputFile = 'file:'+args.inputFile +if args.outdir and not os.path.exists(args.outdir): + os.makedirs(args.outdir, exist_ok=True) + +process = cms.Process("TRUTHGRAPH") + +process.load("FWCore.MessageService.MessageLogger_cfi") + +# Needed if TruthLogicalGraphHitIndexProducer does HGCal simId -> reco DetId relabelling. +# Keep this consistent with the geometry used to produce step3.root. +process.load("Configuration.Geometry.GeometryExtendedRun4D120Reco_cff") + +process.maxEvents = cms.untracked.PSet( + input=cms.untracked.int32(args.maxevts) +) + +process.source = cms.Source( + "PoolSource", + fileNames=cms.untracked.vstring( + args.inputFile #"file:step3.root" + ) +) + +process.options = cms.untracked.PSet( + wantSummary=cms.untracked.bool(True) +) + +process.truthGraphProducer = cms.EDProducer( + "TruthGraphProducer", + genEventHepMC3=cms.InputTag("generatorSmeared"), + genEventHepMC=cms.InputTag("generatorSmeared"), + simTracks=cms.InputTag("g4SimHits"), + simVertices=cms.InputTag("g4SimHits"), + addGenToSimEdges=cms.bool(True), +) + +process.truthGraphDumper = cms.EDAnalyzer( + "TruthGraphDumper", + src=cms.InputTag("truthGraphProducer"), + dotFile=cms.string(os.path.join(args.outdir,f"truthgraph{args.tag}.dot")), # output file + maxNodes=cms.uint32(20000), + maxEdgesPerNode=cms.uint32(50), + simTracks=cms.InputTag("g4SimHits"), + simVertices=cms.InputTag("g4SimHits"), + genEventHepMC=cms.InputTag("generatorSmeared"), + genEventHepMC3=cms.InputTag("generatorSmeared"), +) + +process.truthLogicalGraphProducer = cms.EDProducer( + "TruthLogicalGraphProducer", + src=cms.InputTag("truthGraphProducer"), + simTracks=cms.InputTag("g4SimHits"), + simVertices=cms.InputTag("g4SimHits"), + genEventHepMC3=cms.InputTag("generatorSmeared"), + genEventHepMC=cms.InputTag("generatorSmeared"), + + mergeGenSimVertices=cms.bool(True), + + postProcessing=cms.PSet( + collapseIntermediateGenParticles=cms.bool(True), + + # Empty means: keep the full logical graph. + # Example: cms.vint32(22) keeps photons as seeds, + # keeps seedParentDepth parent generations above each seed, + # then keeps everything downstream. + seedPdgIds=cms.vint32(23,15,-15,25,4,5,6), + + seedParentDepth=cms.uint32(1), + + # Remove particles by PDG id. + # Example: cms.vint32(22) removes all photons from the final logical graph. + ignoredPdgIds=cms.vint32(), + + # Remove particles by logical particle id. + # These ids refer to the graph after the previous postprocessing steps + # and before ignored-particle collapsing. + ignoredParticleIds=cms.vuint32(), + ), +) +process.simHitToRecHitMapProducer = cms.EDProducer( + "SimHitToRecHitMapProducer", + + hgcalRecHits=cms.VInputTag( + cms.InputTag("HGCalRecHit", "HGCEERecHits", "RECO"), + cms.InputTag("HGCalRecHit", "HGCHEFRecHits", "RECO"), + cms.InputTag("HGCalRecHit", "HGCHEBRecHits", "RECO"), + ), + + pfRecHits=cms.VInputTag( + cms.InputTag("particleFlowRecHitECAL", "Cleaned", "RECO"), + cms.InputTag("particleFlowRecHitHBHE", "Cleaned", "RECO"), + cms.InputTag("particleFlowRecHitHF", "Cleaned", "RECO"), + cms.InputTag("particleFlowRecHitHO", "Cleaned", "RECO"), + ), +) +process.truthLogicalGraphHitIndexProducer = cms.EDProducer( + "TruthLogicalGraphHitIndexProducer", + + src=cms.InputTag("truthLogicalGraphProducer"), + rawSrc=cms.InputTag("truthGraphProducer"), + + recHitMap=cms.InputTag("simHitToRecHitMapProducer"), + + simHitCollections=cms.VInputTag( + cms.InputTag("g4SimHits", "HGCHitsEE", "SIM"), + cms.InputTag("g4SimHits", "HGCHitsHEfront", "SIM"), + cms.InputTag("g4SimHits", "HGCHitsHEback", "SIM"), + cms.InputTag("g4SimHits", "EcalHitsEB", "SIM"), + cms.InputTag("g4SimHits", "HcalHits", "SIM"), + ), + + doHGCalRelabelling=cms.bool(False), +) + +process.truthLogicalGraphDumper = cms.EDAnalyzer( + "TruthLogicalGraphDumper", + src=cms.InputTag("truthLogicalGraphProducer"), + rawSrc=cms.InputTag("truthGraphProducer"), + hitIndex=cms.InputTag("truthLogicalGraphHitIndexProducer"), + + hgcalRecHits=cms.VInputTag( + cms.InputTag("HGCalRecHit", "HGCEERecHits", "RECO"), + cms.InputTag("HGCalRecHit", "HGCHEFRecHits", "RECO"), + cms.InputTag("HGCalRecHit", "HGCHEBRecHits", "RECO"), + ), + + pfRecHits=cms.VInputTag( + cms.InputTag("particleFlowRecHitECAL", "Cleaned", "RECO"), + cms.InputTag("particleFlowRecHitHBHE", "Cleaned", "RECO"), + cms.InputTag("particleFlowRecHitHF", "Cleaned", "RECO"), + cms.InputTag("particleFlowRecHitHO", "Cleaned", "RECO"), + ), + + dotFile=cms.string(os.path.join(args.outdir,f"truthlogicalgraph{args.tag}.dot")), # output file + + maxParticles=cms.uint32(20000), + maxVertices=cms.uint32(20000), + maxEdgesPerNode=cms.uint32(300), + + hideLargeSimSourceVertices=cms.bool(True), + largeSimSourceVertexMinOutgoing=cms.uint32(50), + + hideZeroSimHitSubgraphs=cms.bool(True), +) + + +process.load("PhysicsTools.TruthInfo.recHitTable_cff") +# process.load('Configuration.EventContent.EventContent_cff') +process.NANOAODSIMoutput = cms.OutputModule("NanoAODOutputModule", + compressionAlgorithm = cms.untracked.string('ZLIB'), + compressionLevel = cms.untracked.int32(9), + dataset = cms.untracked.PSet( + dataTier = cms.untracked.string('NANOAODSIM'), + filterName = cms.untracked.string('') + ), + fileName = cms.untracked.string(os.path.join(args.outdir,f"rechits_nano{args.tag}.root")), + outputCommands = cms.untracked.vstring( + 'drop *', + 'keep nanoaodFlatTable_*Table_*_*', + # 'keep edmTriggerResults_*_*_*', + 'keep String_*_genModel_*', + 'keep nanoaodMergeableCounterTable_*Table_*_*', + 'keep nanoaodUniqueString_nanoMetadata_*_*', + 'keep nanoaodFlatTable_*Table*_*_*' +) +) + +process.MessageLogger.cerr.threshold = "INFO" +process.MessageLogger.cerr.default = cms.untracked.PSet( + limit=cms.untracked.int32(0) +) +process.MessageLogger.cerr.TruthGraphProducer = cms.untracked.PSet( + limit=cms.untracked.int32(-1) +) +process.MessageLogger.cerr.TruthLogicalGraphProducer = cms.untracked.PSet( + limit=cms.untracked.int32(-1) +) +process.MessageLogger.cerr.TruthLogicalGraphHitIndexProducer = cms.untracked.PSet( + limit=cms.untracked.int32(-1) +) +process.MessageLogger.cerr.SimHitToRecHitMapProducer = cms.untracked.PSet( + limit=cms.untracked.int32(-1) +) + +process.truthGraph_step = cms.Path( + process.truthGraphProducer + + process.truthGraphDumper + + process.truthLogicalGraphProducer + + process.simHitToRecHitMapProducer + + process.truthLogicalGraphHitIndexProducer + + process.truthLogicalGraphDumper + + process.recHitTable +) + +process.nano_step = cms.EndPath(process.NANOAODSIMoutput) diff --git a/SimCalorimetry/HGCalAssociatorProducers/BuildFile.xml b/SimCalorimetry/HGCalAssociatorProducers/BuildFile.xml new file mode 100644 index 0000000000000..98175ecd0ebc1 --- /dev/null +++ b/SimCalorimetry/HGCalAssociatorProducers/BuildFile.xml @@ -0,0 +1,3 @@ + + + \ No newline at end of file diff --git a/SimCalorimetry/HGCalAssociatorProducers/interface/DetIdRecHitMap.h b/SimCalorimetry/HGCalAssociatorProducers/interface/DetIdRecHitMap.h new file mode 100644 index 0000000000000..fdb2cf7b044ba --- /dev/null +++ b/SimCalorimetry/HGCalAssociatorProducers/interface/DetIdRecHitMap.h @@ -0,0 +1,23 @@ +#ifndef SimCalorimetry_HGCalAssociatorProducers_DetIdRecHitMap_h +#define SimCalorimetry_HGCalAssociatorProducers_DetIdRecHitMap_h + +#include +#include + +namespace hgcal { + + // Maps DetId::rawId() to a global recHit index. + // + // The index is defined by the concatenation order used by + // SimHitToRecHitMapProducer: + // + // 1. all configured HGCRecHitCollection inputs, in cfg order + // 2. all configured reco::PFRecHitCollection inputs, in cfg order + // + // It is not an index into a single EDM collection unless only one collection + // is configured. + using DetIdRecHitMap = std::unordered_map; + +} // namespace hgcal + +#endif diff --git a/SimCalorimetry/HGCalAssociatorProducers/plugins/SimHitToRecHitMapProducer.cc b/SimCalorimetry/HGCalAssociatorProducers/plugins/SimHitToRecHitMapProducer.cc new file mode 100644 index 0000000000000..306446c3aa36a --- /dev/null +++ b/SimCalorimetry/HGCalAssociatorProducers/plugins/SimHitToRecHitMapProducer.cc @@ -0,0 +1,124 @@ +#include +#include +#include + +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/global/EDProducer.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/EDGetToken.h" +#include "FWCore/Utilities/interface/InputTag.h" + +#include "DataFormats/HGCRecHit/interface/HGCRecHitCollections.h" +#include "DataFormats/ParticleFlowReco/interface/PFRecHit.h" + +#include "SimCalorimetry/HGCalAssociatorProducers/interface/DetIdRecHitMap.h" + +class SimHitToRecHitMapProducer : public edm::global::EDProducer<> { +public: + explicit SimHitToRecHitMapProducer(edm::ParameterSet const& ps); + ~SimHitToRecHitMapProducer() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + void produce(edm::StreamID, edm::Event&, edm::EventSetup const&) const override; + + std::vector> hgcalRecHitTokens_; + std::vector> pfRecHitTokens_; +}; + +SimHitToRecHitMapProducer::SimHitToRecHitMapProducer(edm::ParameterSet const& ps) { + const auto hgcalTags = ps.getParameter>("hgcalRecHits"); + const auto pfTags = ps.getParameter>("pfRecHits"); + + hgcalRecHitTokens_.reserve(hgcalTags.size()); + for (auto const& tag : hgcalTags) { + hgcalRecHitTokens_.push_back(consumes(tag)); + } + + pfRecHitTokens_.reserve(pfTags.size()); + for (auto const& tag : pfTags) { + pfRecHitTokens_.push_back(consumes(tag)); + } + + produces(); +} + +void SimHitToRecHitMapProducer::produce(edm::StreamID, edm::Event& event, edm::EventSetup const&) const { + auto output = std::make_unique(); + + uint32_t globalRecHitIndex = 0; + + for (auto const& token : hgcalRecHitTokens_) { + edm::Handle handle; + event.getByToken(token, handle); + + if (!handle.isValid()) { + edm::LogWarning("SimHitToRecHitMapProducer") << "Missing HGCRecHitCollection. Skipping it."; + continue; + } + + output->reserve(output->size() + handle->size()); + + for (auto const& hit : *handle) { + const uint32_t rawId = hit.detid().rawId(); + + const auto [_, inserted] = output->emplace(rawId, globalRecHitIndex); + if (!inserted) { + edm::LogWarning("SimHitToRecHitMapProducer") + << "Duplicate HGCAL DetId rawId=" << rawId << ". Keeping the first recHit index."; + } + + ++globalRecHitIndex; + } + } + + for (auto const& token : pfRecHitTokens_) { + edm::Handle handle; + event.getByToken(token, handle); + + if (!handle.isValid()) { + edm::LogWarning("SimHitToRecHitMapProducer") << "Missing reco::PFRecHitCollection. Skipping it."; + continue; + } + + output->reserve(output->size() + handle->size()); + + for (auto const& hit : *handle) { + const uint32_t rawId = hit.detId(); + + const auto [_, inserted] = output->emplace(rawId, globalRecHitIndex); + if (!inserted) { + edm::LogWarning("SimHitToRecHitMapProducer") + << "Duplicate PFRecHit DetId rawId=" << rawId << ". Keeping the first recHit index."; + } + + ++globalRecHitIndex; + } + } + + event.put(std::move(output)); +} + +void SimHitToRecHitMapProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + + desc.add>("hgcalRecHits", + {edm::InputTag("HGCalRecHit", "HGCEERecHits"), + edm::InputTag("HGCalRecHit", "HGCHEFRecHits"), + edm::InputTag("HGCalRecHit", "HGCHEBRecHits")}); + + desc.add>("pfRecHits", + {edm::InputTag("particleFlowRecHitECAL"), + edm::InputTag("particleFlowRecHitHBHE"), + edm::InputTag("particleFlowRecHitHO")}); + + descriptions.addWithDefaultLabel(desc); +} + +DEFINE_FWK_MODULE(SimHitToRecHitMapProducer); diff --git a/SimCalorimetry/HGCalAssociatorProducers/src/classes.h b/SimCalorimetry/HGCalAssociatorProducers/src/classes.h new file mode 100644 index 0000000000000..6ce2489b35235 --- /dev/null +++ b/SimCalorimetry/HGCalAssociatorProducers/src/classes.h @@ -0,0 +1,15 @@ +#ifndef SimCalorimetry_HGCalAssociatorProducers_classes_h +#define SimCalorimetry_HGCalAssociatorProducers_classes_h + +#include + +#include "DataFormats/Common/interface/Wrapper.h" + +namespace SimCalorimetry_HGCalAssociatorProducers { + struct dictionary { + std::unordered_map simHitToRecHitMap_; + edm::Wrapper> simHitToRecHitMapWrapper_; + }; +} // namespace SimCalorimetry_HGCalAssociatorProducers + +#endif diff --git a/SimCalorimetry/HGCalAssociatorProducers/src/classes_def.xml b/SimCalorimetry/HGCalAssociatorProducers/src/classes_def.xml new file mode 100644 index 0000000000000..28bd90432cb7e --- /dev/null +++ b/SimCalorimetry/HGCalAssociatorProducers/src/classes_def.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file