diff --git a/PerfTools/AllocMonitor/plugins/ModuleEventAllocMonitor.cc b/PerfTools/AllocMonitor/plugins/ModuleEventAllocMonitor.cc index 627d01b29214f..a05c84ee93638 100644 --- a/PerfTools/AllocMonitor/plugins/ModuleEventAllocMonitor.cc +++ b/PerfTools/AllocMonitor/plugins/ModuleEventAllocMonitor.cc @@ -13,6 +13,8 @@ // system include files #include #include +#include +#include #include // user include files @@ -29,7 +31,6 @@ #include "FWCore/Utilities/interface/thread_safety_macros.h" #include "monitor_file_utilities.h" -#include "mea_AllocMap.h" #include "ThreadTracker.h" #define DEBUGGER_BREAK @@ -40,9 +41,14 @@ void break_on_unmatched_dealloc() {} } #endif namespace { - using namespace edm::service::moduleEventAlloc; using namespace edm::moduleAlloc::monitor_file_utilities; + using AllocMap = std::unordered_map; + auto accumulateValues(AllocMap const& map) { + auto const values = map | std::views::transform([](auto const& element) { return element.second; }); + return std::accumulate(values.begin(), values.end(), 0ULL); + } + struct ThreadAllocInfo { AllocMap allocMap_; std::vector unmatched_; @@ -55,11 +61,16 @@ namespace { std::size_t numUnmatchedDeallocs_ = 0; bool active_ = false; - void alloc(void const* iAddress, std::size_t iSize) { allocMap_.insert(iAddress, iSize); } + void alloc(void const* iAddress, std::size_t iSize) { + // Note that the iAddress might already be in the map if the + // allocation occurred within and the deallocation outside of + // the measurement region. + allocMap_[iAddress] = iSize; + } void dealloc(void const* iAddress, std::size_t iSize) { - auto size = allocMap_.erase(iAddress); - if (size == 0) { + auto nErased = allocMap_.erase(iAddress); + if (nErased == 0) { #if defined(DEBUGGER_BREAK) break_on_unmatched_dealloc(); #endif @@ -77,7 +88,7 @@ namespace { totalUnmatchedDealloc_ = 0; numMatchedDeallocs_ = 0; numUnmatchedDeallocs_ = 0; - allocMap_.clear(); + AllocMap{}.swap(allocMap_); // release memory unmatched_.clear(); active_ = true; } @@ -293,7 +304,8 @@ class ModuleEventAllocMonitor { iAR.watchPreModuleEvent([this](auto const& iStream, auto const& iMod) { auto mod_id = module_id(iMod); - auto acquireInfo = [this, iStream, mod_id]() { + // Explicit return type to avoid copies of return value for sure + auto acquireInfo = [this, iStream, mod_id]() -> AllocMap const& { //acquire might have started stuff streamSync_[iStream.streamID().value()].load(); auto index = moduleIndex(mod_id); @@ -308,16 +320,15 @@ class ModuleEventAllocMonitor { auto mod_id = module_id(iMod); auto info = filter_.stopOnThread(mod_id); if (info) { - auto v = std::accumulate(info->allocMap_.allocationSizes().begin(), info->allocMap_.allocationSizes().end(), 0); + auto v = accumulateValues(info->allocMap_); std::stringstream s; s << "M " << mod_id << " " << iStream.streamID().value() << " " << info->totalMatchedDeallocSize_ << " " << info->numMatchedDeallocs_ << " " << info->totalUnmatchedDealloc_ << " " << info->numUnmatchedDeallocs_ - << " " << v << " " << info->allocMap_.allocationSizes().size() << "\n"; + << " " << v << " " << info->allocMap_.size() << "\n"; file->write(s.str()); auto index = moduleIndex(mod_id); auto finishedOrder = streamNFinishedModules_[iStream.streamID().value()]++; - streamModuleFinishOrder_[finishedOrder + nModules_ * iStream.streamID().value()] = - nModules_ * iStream.streamID().value() + index; + streamModuleFinishOrder_[finishedOrder + nModules_ * iStream.streamID().value()] = index; streamModuleAllocs_[nModules_ * iStream.streamID().value() + index] = info->allocMap_; ++streamSync_[iStream.streamID().value()]; } @@ -335,27 +346,18 @@ class ModuleEventAllocMonitor { auto mod_id = module_id(iMod); auto info = filter_.stopOnThread(mod_id); if (info) { - assert(info->allocMap_.allocationSizes().size() == info->allocMap_.size()); - auto v = std::accumulate(info->allocMap_.allocationSizes().begin(), info->allocMap_.allocationSizes().end(), 0); + auto v = accumulateValues(info->allocMap_); std::stringstream s; s << "A " << mod_id << " " << iStream.streamID().value() << " " << info->totalMatchedDeallocSize_ << " " << info->numMatchedDeallocs_ << " " << info->totalUnmatchedDealloc_ << " " << info->numUnmatchedDeallocs_ - << " " << v << " " << info->allocMap_.allocationSizes().size() << "\n"; + << " " << v << " " << info->allocMap_.size() << "\n"; file->write(s.str()); auto index = mod_id; if (not moduleIDs_.empty()) { auto it = std::lower_bound(moduleIDs_.begin(), moduleIDs_.end(), mod_id); index = it - moduleIDs_.begin(); } - { - auto const& alloc = streamModuleAllocs_[nModules_ * iStream.streamID().value() + index]; - assert(alloc.size() == alloc.allocationSizes().size()); - } streamModuleAllocs_[nModules_ * iStream.streamID().value() + index] = info->allocMap_; - { - auto const& alloc = streamModuleAllocs_[nModules_ * iStream.streamID().value() + index]; - assert(alloc.size() == alloc.allocationSizes().size()); - } ++streamSync_[iStream.streamID().value()]; streamModuleInAcquire_[nModules_ * iStream.streamID().value() + index].store(false); } @@ -377,30 +379,50 @@ class ModuleEventAllocMonitor { auto info = filter_.stopOnThread(); if (info) { streamSync_[iStream.streamID().value()].load(); - //search for associated allocs to deallocs in reverse order that modules finished - auto nRan = streamNFinishedModules_[iStream.streamID().value()].load(); - auto itBegin = - std::reverse_iterator(streamModuleFinishOrder_.begin() + iStream.streamID().value() * nModules_ + nRan); - auto const itEnd = itBegin + nRan; + + // value is size, index + std::unordered_map> combinedAllocMap; + { + auto const nRan = streamNFinishedModules_[iStream.streamID().value()].load(); + assert(nRan <= nModules_); + auto const streamModuleOffset = iStream.streamID().value() * nModules_; + auto const itBegin = streamModuleFinishOrder_.begin() + streamModuleOffset; + auto const itEnd = itBegin + nRan; + + std::size_t const total = + std::accumulate(itBegin, itEnd, 0U, [this, streamModuleOffset](unsigned int a, auto const index) { + return a + streamModuleAllocs_[index + streamModuleOffset].size(); + }); + combinedAllocMap.reserve(total); + + for (auto it = itBegin; it != itEnd; ++it) { + auto& allocs = streamModuleAllocs_[*it + streamModuleOffset]; + for (auto const [addr, size] : allocs) { + // need to keep the address -> (size, module) + // association of the last finished module as that is + // the most likely module (that we can figure out) that + // allocated the memory of a data product that was + // destructed here + combinedAllocMap[addr] = std::pair(size, *it); + } + AllocMap{}.swap(allocs); + } + } streamNFinishedModules_[iStream.streamID().value()].store(0); { std::vector moduleDeallocSize(nModules_); std::vector moduleDeallocCount(nModules_); - for (auto& address : info->unmatched_) { - decltype(streamModuleAllocs_[0].findOffset(address)) offset = 0; - auto found = std::find_if(itBegin, itEnd, [&address, &offset, this](auto const& index) { - auto const& elem = streamModuleAllocs_[index]; - return elem.size() != 0 and (offset = elem.findOffset(address)) != elem.size(); - }); - if (found != itEnd) { - auto index = *found - nModules_ * iStream.streamID().value(); - moduleDeallocSize[index] += streamModuleAllocs_[*found].allocationSizes()[offset]; + for (auto const& address : info->unmatched_) { + auto const found = combinedAllocMap.find(address); + if (found != combinedAllocMap.end()) { + auto const index = found->second.second; + moduleDeallocSize[index] += found->second.first; moduleDeallocCount[index] += 1; } } for (unsigned int index = 0; index < nModules_; ++index) { if (moduleDeallocCount[index] != 0) { - auto id = moduleIDs_.empty() ? index : moduleIDs_[index]; + auto const id = moduleIDs_.empty() ? index : moduleIDs_[index]; std::stringstream s; s << "D " << id << " " << iStream.streamID().value() << " " << moduleDeallocSize[index] << " " << moduleDeallocCount[index] << "\n"; @@ -408,14 +430,6 @@ class ModuleEventAllocMonitor { } } } - - { - auto itBegin = streamModuleAllocs_.begin() + nModules_ * iStream.streamID().value(); - auto itEnd = itBegin + nModules_; - for (auto it = itBegin; it != itEnd; ++it) { - it->clear(); - } - } } }); } diff --git a/PerfTools/AllocMonitor/plugins/mea_AllocMap.h b/PerfTools/AllocMonitor/plugins/mea_AllocMap.h deleted file mode 100644 index ec827e0db6952..0000000000000 --- a/PerfTools/AllocMonitor/plugins/mea_AllocMap.h +++ /dev/null @@ -1,98 +0,0 @@ -#ifndef PerfTools_AllocMonitor_mea_AllocMap_h -#define PerfTools_AllocMonitor_mea_AllocMap_h -// -*- C++ -*- -// -// Package: PerfTools/AllocMonitor -// Class : AllocMap -// -/**\class mea_AllocMap mea_AllocMap.h "PerfTools/AllocMonitor/interface/mea_AllocMap.h" - - Description: [one line class summary] - - Usage: - - -*/ -// -// Original Author: Christopher Jones -// Created: Fri, 11 Oct 2024 19:15:46 GMT -// - -// system include files -#include -#include - -// user include files - -// forward declarations - -namespace edm::service::moduleEventAlloc { - class AllocMap { - public: - AllocMap() = default; - AllocMap(AllocMap const&) = default; - AllocMap& operator=(AllocMap const&) = default; - AllocMap(AllocMap&&) = default; - AllocMap& operator=(AllocMap&&) = default; - - // ---------- const member functions --------------------- - std::size_t size() const { return keys_.size(); } - - //returns size() if not here - std::size_t findOffset(void const* iKey) const { - auto bound = std::lower_bound(keys_.begin(), keys_.end(), iKey); - if (bound == keys_.end() or *bound != iKey) { - return size(); - } - return bound - keys_.begin(); - } - - std::vector const& allocationSizes() const { return values_; } - // ---------- static member functions -------------------- - - // ---------- member functions --------------------------- - void insert(void const* iKey, std::size_t iValue) { - auto offset = insertOffset(iKey); - if (offset != size() and keys_[offset] == iKey) { - values_[offset] = iValue; - return; - } - keys_.insert(keys_.begin() + offset, iKey); - values_.insert(values_.begin() + offset, iValue); - } - //returns 0 if not here else returns allocation size - std::size_t erase(void const* iKey) { - assert(keys_.size() == values_.size()); - auto offset = findOffset(iKey); - if (offset == size()) { - return 0; - } - auto v = values_[offset]; - values_.erase(values_.begin() + offset); - keys_.erase(keys_.begin() + offset); - - return v; - } - void clearSizes() { - values_.clear(); - values_.shrink_to_fit(); - } - - void clear() { - clearSizes(); - keys_.clear(); - keys_.shrink_to_fit(); - } - - private: - // ---------- member data -------------------------------- - std::size_t insertOffset(void const* key) const { - auto bound = std::lower_bound(keys_.begin(), keys_.end(), key); - return bound - keys_.begin(); - } - - std::vector keys_; - std::vector values_; - }; -} // namespace edm::service::moduleEventAlloc -#endif diff --git a/PerfTools/AllocMonitor/test/BuildFile.xml b/PerfTools/AllocMonitor/test/BuildFile.xml index eb74229c9fe79..206154ce02bcc 100644 --- a/PerfTools/AllocMonitor/test/BuildFile.xml +++ b/PerfTools/AllocMonitor/test/BuildFile.xml @@ -23,6 +23,7 @@ + diff --git a/PerfTools/AllocMonitor/test/moduleEventAlloc_cfg.py b/PerfTools/AllocMonitor/test/moduleEventAlloc_cfg.py new file mode 100644 index 0000000000000..1b5da7e7f98e1 --- /dev/null +++ b/PerfTools/AllocMonitor/test/moduleEventAlloc_cfg.py @@ -0,0 +1,70 @@ +import FWCore.ParameterSet.Config as cms + +import argparse +import sys + +parser = argparse.ArgumentParser(prog=sys.argv[0], description='Test ModuleEventAllocMonitor service.') +parser.add_argument("--output", default="moduleEventAlloc.log", help="Output log file") +parser.add_argument("--skipEvents", action="store_true", help="Test skipping events") +parser.add_argument("--edmodule", action="store_true", help="Show only specific ed module") +parser.add_argument("--maxEvents", type=int, default=3, help="Specify maxEvents") +parser.add_argument("--threads", type=int, default=1, help="Set number of threads and streams") +args = parser.parse_args() + + +process = cms.Process("TEST") + +process.source = cms.Source("EmptySource") + +process.maxEvents.input = args.maxEvents +if args.threads > 1: + process.options.numberOfThreads = args.threads + +process.thing = cms.EDProducer("ThingProducer", + offsetDelta = cms.int32(1) +) + +process.OtherThing = cms.EDProducer("OtherThingProducer", thingTag = cms.InputTag("thing")) + +process.thingProducer = cms.EDProducer("ThingProducer", + offsetDelta = cms.int32(100), + nThings = cms.int32(50) +) +process.get = cms.EDAnalyzer("edmtest::ThingAnalyzer") + +process.Int = cms.EDProducer( + "IntProducer", + ivalue = cms.int32(67) +) +process.add_(cms.Service("WaitingService")) +process.acquireInt = cms.EDProducer( + "AcquireIntStreamProducer", + tags = cms.VInputTag(), + produceTag = cms.InputTag("Int") +) +process.getInt = cms.EDAnalyzer( + "MultipleIntsAnalyzer", + getFromModules = cms.untracked.VInputTag("acquireInt") +) + +process.out = cms.OutputModule("AsciiOutputModule") + + +process.ep = cms.EndPath( + process.out + + process.get + + process.getInt, + cms.Task(process.thing, + process.OtherThing, + process.thingProducer, + process.Int, + process.acquireInt) +) + +#process.add_(cms.Service("Tracer")) +process.add_(cms.Service("ModuleEventAllocMonitor", fileName = cms.untracked.string(args.output))) +if args.skipEvents: + process.ModuleEventAllocMonitor.nEventsToSkip = cms.untracked.uint32(2) + +if args.edmodule: + process.ModuleEventAllocMonitor.moduleNames = cms.untracked.vstring(["thingProducer"]) diff --git a/PerfTools/AllocMonitor/test/runModuleEventAlloc.sh b/PerfTools/AllocMonitor/test/runModuleEventAlloc.sh new file mode 100755 index 0000000000000..551ec5c006ee9 --- /dev/null +++ b/PerfTools/AllocMonitor/test/runModuleEventAlloc.sh @@ -0,0 +1,104 @@ +#!/bin/sh -ex + +function die { echo $1: status $2 ; exit $2; } + +LOCAL_TEST_DIR=${SCRAM_TEST_PATH} + +LD_PRELOAD="libPerfToolsAllocMonitorPreload.so" cmsRun ${LOCAL_TEST_DIR}/moduleEventAlloc_cfg.py || die 'Failure using moduleEventAlloc_cfg.py' $? + +ORIGLOGLINES=$(grep -v "#" moduleEventAlloc.log | grep -v "@" | wc -l) + +THINGANALYZERID=$(grep edmtest::ThingAnalyzer moduleEventAlloc.log | awk '{print $4}') +grep "M ${THINGANALYZERID} " moduleEventAlloc.log | cut -d ' ' -f6- > thingAnalyzerUnmatched.txt 2>&1 +diff thingAnalyzerUnmatched.txt ${LOCAL_TEST_DIR}/unittest_output/moduleEventAllocMonitor_thingAnalyzerUnmatched.txt || die "differences in thingAnalyzerUnmatched" $? + +edmModuleEventAllocMonitorAnalyze.py --grew moduleEventAlloc.log > grew.txt 2>&1 +LINES=$(cat grew.txt | wc -l) +echo $LINES +if [ "$LINES" -ne 1 ]; then + echo "Some modules were reported to retain memory over the job" + cat grew.txt + exit 1 +fi + +edmModuleEventAllocMonitorAnalyze.py --retained moduleEventAlloc.log > retained.txt 2>&1 +LINES=$(cat retained.txt | wc -l) +if [ "$LINES" -ne 2 ]; then + echo "Unexpected number of modules reported to retain memory from one event to another. Expected AcquireIntStreamProducer" + cat retained.txt + exit 1 +fi +grep -q AcquireIntStreamProducer retained.txt || die 'AcquireIntStreamProducer not reported to retain memory from one event to another.' $? + +for F in product tempSize nTemp; do + edmModuleEventAllocMonitorAnalyze.py --${F} moduleEventAlloc.log > ${F}.txt 2>&1 + LINES=$(cat ${F}.txt | wc -l) + if [ "$LINES" -ne 6 ]; then + echo "Unexpected number of modules in ${F}. Expected 5 modules" + cat ${F}.txt + exit 1 + fi + grep -q IntProducer ${F}.txt || die 'IntProducer not reported ${F}.txt' $? + grep -q ThingProducer ${F}.txt || die 'ThingProducer not reported ${F}.txt' $? + grep -q AcquireIntStreamProducer ${F}.txt || die 'AcquireIntStreamProducer not reported ${F}.txt' $? + grep -q OtherThingProducer ${F}.txt || die 'OtherThingProducer not reported ${F}.txt' $? +done + +############### only 1 ED module kept +PREFIX="edmodule" +LD_PRELOAD="libPerfToolsAllocMonitorPreload.so" cmsRun ${LOCAL_TEST_DIR}/moduleEventAlloc_cfg.py --edmodule --output ${PREFIX}_moduleEventAlloc.log || die 'Failure using moduleEventAlloc_cfg.py --edmodule' $? + +edmModuleEventAllocMonitorAnalyze.py --grew ${PREFIX}_moduleEventAlloc.log > ${PREFIX}_grew.txt 2>&1 +LINES=$(cat ${PREFIX}_grew.txt | wc -l) +echo $LINES +if [ "$LINES" -ne 1 ]; then + echo "Some modules were reported to retain memory over the job" + cat ${PREFIX}_grew.txt + exit 1 +fi + +edmModuleEventAllocMonitorAnalyze.py --retained ${PREFIX}_moduleEventAlloc.log > ${PREFIX}_retained.txt 2>&1 +LINES=$(cat ${PREFIX}_retained.txt | wc -l) +if [ "$LINES" -ne 1 ]; then + echo "Some modules were reported to retain memory from one event to another" + cat ${PREFIX}_retained.txt + exit 1 +fi + +for F in product tempSize nTemp; do + edmModuleEventAllocMonitorAnalyze.py --${F} ${PREFIX}_moduleEventAlloc.log > ${PREFIX}_${F}.txt 2>&1 + LINES=$(cat ${PREFIX}_${F}.txt | wc -l) + if [ "$LINES" -ne 2 ]; then + echo "Unexpected number of modules in ${F}. Expected 1 modules" + cat ${PREFIX}_${F}.txt + exit 1 + fi + grep -q ThingProducer ${PREFIX}_${F}.txt || die 'ThingProducer not reported ${PREFIX}_${F}.txt' $? +done + +############## skip events +PREFIX="skip" +LD_PRELOAD="libPerfToolsAllocMonitorPreload.so" cmsRun ${LOCAL_TEST_DIR}/moduleEventAlloc_cfg.py --skipEvents --output ${PREFIX}_moduleEventAlloc.log || die 'Failure using moduleEventAlloc_cfg.py --skipEvents' $? + +SKIPLOGLINES=$(grep -v "#" ${PREFIX}_moduleEventAlloc.log | grep -v "@" | wc -l) + +if [ $ORIGLOGLINES -le $SKIPLOGLINES ]; then + echo "Skipping events resulted in ${SKIPLOGLINES} entries in the log, whereas original log resulted in ${ORIGLOGLINES}" + exit 1 +fi + + +############## multiple threads, mostly just that it technically runs +PREFIX="mt" +LD_PRELOAD="libPerfToolsAllocMonitorPreload.so" cmsRun ${LOCAL_TEST_DIR}/moduleEventAlloc_cfg.py --maxEvents 64 --threads 8 --output ${PREFIX}_moduleEventAlloc.log || die 'Failure using moduleEventAlloc_cfg.py --threads 8' $? + + +for F in grew retained product tempSize nTemp; do + edmModuleEventAllocMonitorAnalyze.py --${F} ${PREFIX}_moduleEventAlloc.log > ${PREFIX}_${F}.txt 2>&1 + LINES=$(cat ${PREFIX}_${F}.txt | wc -l) + if [ "$LINES" -eq 0 ]; then + echo "${PREFIX}_${F}.txt is empty" + cat ${PREFIX}_${F}.txt + exit 1 + fi +done diff --git a/PerfTools/AllocMonitor/test/test_catch2_allocmap.cc b/PerfTools/AllocMonitor/test/test_catch2_allocmap.cc deleted file mode 100644 index 91c1cbe0e4b77..0000000000000 --- a/PerfTools/AllocMonitor/test/test_catch2_allocmap.cc +++ /dev/null @@ -1,81 +0,0 @@ -#include "catch2/catch_all.hpp" - -#include "PerfTools/AllocMonitor/plugins/mea_AllocMap.h" - -using namespace edm::service::moduleEventAlloc; - -namespace { - void* address(int i) { return reinterpret_cast(i); } -} // namespace - -TEST_CASE("Test ema::AllocMap", "[AllocMap]") { - SECTION("empty") { - AllocMap map; - CHECK(map.size() == 0); - CHECK(map.findOffset(nullptr) == 0); - CHECK(map.allocationSizes().empty()); - } - - SECTION("insert in order") { - AllocMap map; - map.insert(address(1), 1); - CHECK(map.size() == 1); - CHECK(map.findOffset(address(1)) == 0); - CHECK(map.allocationSizes() == std::vector({1})); - map.insert(address(2), 2); - CHECK(map.size() == 2); - CHECK(map.findOffset(address(1)) == 0); - CHECK(map.findOffset(address(2)) == 1); - CHECK(map.allocationSizes() == std::vector({1, 2})); - map.insert(address(3), 3); - CHECK(map.size() == 3); - CHECK(map.findOffset(address(1)) == 0); - CHECK(map.findOffset(address(2)) == 1); - CHECK(map.findOffset(address(3)) == 2); - CHECK(map.allocationSizes() == std::vector({1, 2, 3})); - - SECTION("missing find in front") { CHECK(map.findOffset(nullptr) == map.size()); } - SECTION("missing find off end") { CHECK(map.findOffset(address(4)) == map.size()); } - SECTION("overwrite value") { - map.insert(address(2), 4); - CHECK(map.allocationSizes() == std::vector({1, 4, 3})); - } - } - SECTION("insert in reverse order") { - AllocMap map; - map.insert(address(3), 3); - CHECK(map.size() == 1); - CHECK(map.findOffset(address(3)) == 0); - CHECK(map.allocationSizes() == std::vector({3})); - map.insert(address(2), 2); - CHECK(map.size() == 2); - CHECK(map.findOffset(address(2)) == 0); - CHECK(map.findOffset(address(3)) == 1); - CHECK(map.allocationSizes() == std::vector({2, 3})); - map.insert(address(1), 1); - CHECK(map.size() == 3); - CHECK(map.findOffset(address(1)) == 0); - CHECK(map.findOffset(address(2)) == 1); - CHECK(map.findOffset(address(3)) == 2); - CHECK(map.allocationSizes() == std::vector({1, 2, 3})); - } - SECTION("insert in middle") { - AllocMap map; - map.insert(address(1), 1); - CHECK(map.size() == 1); - CHECK(map.findOffset(address(1)) == 0); - CHECK(map.allocationSizes() == std::vector({1})); - map.insert(address(3), 3); - CHECK(map.size() == 2); - CHECK(map.findOffset(address(1)) == 0); - CHECK(map.findOffset(address(3)) == 1); - CHECK(map.allocationSizes() == std::vector({1, 3})); - SECTION("missing findOffset") { CHECK(map.findOffset(address(2)) == map.size()); } - map.insert(address(2), 2); - CHECK(map.size() == 3); - CHECK(map.findOffset(address(1)) == 0); - CHECK(map.findOffset(address(2)) == 1); - CHECK(map.findOffset(address(3)) == 2); - CHECK(map.allocationSizes() == std::vector({1, 2, 3})); - } -} diff --git a/PerfTools/AllocMonitor/test/unittest_output/moduleEventAllocMonitor_thingAnalyzerUnmatched.txt b/PerfTools/AllocMonitor/test/unittest_output/moduleEventAllocMonitor_thingAnalyzerUnmatched.txt new file mode 100644 index 0000000000000..392f21a24fa02 --- /dev/null +++ b/PerfTools/AllocMonitor/test/unittest_output/moduleEventAllocMonitor_thingAnalyzerUnmatched.txt @@ -0,0 +1,3 @@ +0 0 0 0 +0 0 0 0 +0 0 0 0 diff --git a/SimCalorimetry/Configuration/python/SimCalorimetry_EventContent_cff.py b/SimCalorimetry/Configuration/python/SimCalorimetry_EventContent_cff.py index ff0a11ed9eb8c..ae1ff68357822 100644 --- a/SimCalorimetry/Configuration/python/SimCalorimetry_EventContent_cff.py +++ b/SimCalorimetry/Configuration/python/SimCalorimetry_EventContent_cff.py @@ -75,12 +75,12 @@ # mods for HGCAL _phase2_hgc_extraCommands = cms.PSet( # using PSet in order to customize with Modifier - v = cms.vstring('keep *_simHGCalUnsuppressedDigis_EE_*', 'keep *_simHGCalUnsuppressedDigis_HEfront_*', 'keep *_simHGCalUnsuppressedDigis_HEback_*', 'keep *_mix_MergedCaloTruth_*', 'keep *_mix_MergedMtdTruth_*', 'keep *_mix_MergedMtdTruthLC_*', 'keep *_mix_MergedMtdTruthST_*'), + v = cms.vstring('keep *_simHGCalUnsuppressedDigis_EE_*', 'keep *_simHGCalUnsuppressedDigis_HEfront_*', 'keep *_simHGCalUnsuppressedDigis_HEback_*', 'keep *_mix_MergedCaloTruth*_*', 'keep *_mix_MergedMtdTruth_*', 'keep *_mix_MergedMtdTruthLC_*', 'keep *_mix_MergedMtdTruthST_*'), ) # For phase2 premixing switch the sim digi collections to the ones including pileup from Configuration.ProcessModifiers.premix_stage2_cff import premix_stage2 premix_stage2.toModify(_phase2_hgc_extraCommands, - v = ['keep *_mixData_HGCDigisEE_*', 'keep *_mixData_HGCDigisHEfront_*', 'keep *_mixData_HGCDigisHEback_*', 'keep *_mixData_MergedCaloTruth_*', 'keep *_mix_MergedMtdTruth_*', 'keep *_mixData_MergedMtdTruthLC_*', 'keep *_mix_MergedMtdTruthST_*']) + v = ['keep *_mixData_HGCDigisEE_*', 'keep *_mixData_HGCDigisHEfront_*', 'keep *_mixData_HGCDigisHEback_*', 'keep *_mixData_MergedCaloTruth*_*', 'keep *_mix_MergedMtdTruth_*', 'keep *_mixData_MergedMtdTruthLC_*', 'keep *_mix_MergedMtdTruthST_*']) phase2_hgcal.toModify( SimCalorimetryRAW, outputCommands = SimCalorimetryRAW.outputCommands + _phase2_hgc_extraCommands.v ) phase2_hgcal.toModify( SimCalorimetryFEVTDEBUG, outputCommands = SimCalorimetryFEVTDEBUG.outputCommands + _phase2_hgc_extraCommands.v ) phase2_hgcal.toModify( SimCalorimetryRECO, outputCommands = SimCalorimetryRECO.outputCommands + _phase2_hgc_extraCommands.v ) diff --git a/SimDataFormats/CaloAnalysis/interface/CaloParticle.h b/SimDataFormats/CaloAnalysis/interface/CaloParticle.h index 83a02359e6f77..e27d5cbc13c35 100644 --- a/SimDataFormats/CaloAnalysis/interface/CaloParticle.h +++ b/SimDataFormats/CaloAnalysis/interface/CaloParticle.h @@ -183,6 +183,12 @@ namespace io_v1 { return result; } + /** @brief clear the hits and fractions list */ + void clearHitsAndFractions() { + hits_.clear(); + fractions_.clear(); + } + /** @brief returns the accumulated sim energy in the cluster */ float simEnergy() const { return simhit_energy_; } diff --git a/SimDataFormats/CaloAnalysis/interface/SimCluster.h b/SimDataFormats/CaloAnalysis/interface/SimCluster.h index c0aff69826def..e0c0d0869baf7 100644 --- a/SimDataFormats/CaloAnalysis/interface/SimCluster.h +++ b/SimDataFormats/CaloAnalysis/interface/SimCluster.h @@ -6,22 +6,22 @@ #include "DataFormats/Math/interface/LorentzVector.h" #include "DataFormats/Math/interface/Point3D.h" #include "DataFormats/Math/interface/Vector3D.h" +#include "DataFormats/DetId/interface/DetId.h" #include "SimDataFormats/CaloHit/interface/PCaloHit.h" #include "SimDataFormats/EncodedEventId/interface/EncodedEventId.h" #include "SimDataFormats/Track/interface/SimTrack.h" + #include +#include #include - -#include "DataFormats/DetId/interface/DetId.h" -#include "DataFormats/EcalDetId/interface/EcalSubdetector.h" -#include "DataFormats/HcalDetId/interface/HcalSubdetector.h" +#include namespace io_v1 { - /** @brief Monte Carlo truth information used for tracking validation. + /** @brief Monte Carlo truth information used for calorimeter reco validation * - * Object with references to the original SimTrack and parent and daughter - * TrackingVertices. Simulation with high (~100) pileup was taking too much - * memory so the class was slimmed down and copies of the SimHits were removed. + * Object with copies to the original SimTrack, and eventual reference to GenParticle + * Simulated calorimeter hits are saved as a list of pairs (DetId, fraction of reco hit energy contributed by the SimTrack) + * ('absolute' hit energy is usually not saved) * * @author original author unknown, re-engineering and slimming by Subir Sarkar * (subir.sarkar@cern.ch), some tweaking and documentation by Mark Grimes @@ -42,13 +42,14 @@ namespace io_v1 { typedef reco::GenParticleRefVector::iterator genp_iterator; typedef std::vector::const_iterator g4t_iterator; - SimCluster(); + SimCluster() = default; SimCluster(const SimTrack &simtrk); SimCluster(EncodedEventId eventID, uint32_t particleID); // for PU - - // destructor - ~SimCluster(); + /** Build a SimCluster from a collection of SimCluster. Hits&fractions are merged, SimTracks are all added in g4Tracks_ */ + template + requires std::ranges::input_range && std::same_as, SimCluster> + static SimCluster mergeHitsFromCollection(R const &); /** @brief PDG ID. * @@ -246,10 +247,42 @@ namespace io_v1 { math::XYZTLorentzVectorF theMomentum_; - /// references to G4 and reco::GenParticle tracks - std::vector g4Tracks_; + std::vector g4Tracks_; ///< Copies of SimTrack used to build SimCluster (usually there is only one) + /// Ref to GenParticle (in case the SimCluster is created from the entire GenParticle). Usually either empty or length 1 reco::GenParticleRefVector genParticles_; }; + + template + requires std::ranges::input_range && std::same_as, SimCluster> + SimCluster SimCluster::mergeHitsFromCollection(R const &inputs) { + assert(!std::ranges::empty(inputs)); + SimCluster ret; + ret.event_ = inputs[0].event_; + ret.particleId_ = inputs[0].particleId_; + + ret.g4Tracks_.reserve(inputs.size()); + std::unordered_map acc_fractions; ///< Map DetId->(fraction) + for (SimCluster const &other : inputs) { + ret.simhit_energy_ += other.simhit_energy_; + + assert(other.hits_.size() == other.fractions_.size()); + for (std::size_t i = 0; i < other.hits_.size(); i++) { + acc_fractions[other.hits_[i]] += other.fractions_[i]; + } + + ret.g4Tracks_.insert(ret.g4Tracks_.end(), other.g4Tracks_.begin(), other.g4Tracks_.end()); + } + + ret.hits_.reserve(acc_fractions.size()); + ret.fractions_.reserve(acc_fractions.size()); + for (const auto &hit_and_fraction : acc_fractions) { + ret.hits_.push_back(hit_and_fraction.first); + ret.fractions_.push_back(hit_and_fraction.second); + } + ret.nsimhits_ = ret.hits_.size(); + + return ret; + } } // namespace io_v1 using SimCluster = io_v1::SimCluster; diff --git a/SimDataFormats/CaloAnalysis/src/SimCluster.cc b/SimDataFormats/CaloAnalysis/src/SimCluster.cc index d51e3e90c7c79..93b1573a4dcc1 100644 --- a/SimDataFormats/CaloAnalysis/src/SimCluster.cc +++ b/SimDataFormats/CaloAnalysis/src/SimCluster.cc @@ -2,17 +2,12 @@ #include "DataFormats/HepMCCandidate/interface/GenParticle.h" -#include "FWCore/MessageLogger/interface/MessageLogger.h" - #include +#include namespace io_v1 { const unsigned int SimCluster::longLivedTag = 65536; - SimCluster::SimCluster() { - // No operation - } - SimCluster::SimCluster(const SimTrack &simtrk) { addG4Track(simtrk); event_ = simtrk.eventId(); @@ -27,8 +22,6 @@ namespace io_v1 { particleId_ = particleID; } - SimCluster::~SimCluster() {} - std::ostream &operator<<(std::ostream &s, SimCluster const &tp) { s << "CP momentum, q, ID, & Event #: " << tp.p4() << " " << tp.charge() << " " << tp.pdgId() << " " << tp.eventId().bunchCrossing() << "." << tp.eventId().event() << std::endl; diff --git a/SimDataFormats/CaloAnalysis/src/classes_def.xml b/SimDataFormats/CaloAnalysis/src/classes_def.xml index b51c82e72e449..5ca14dbc056a3 100644 --- a/SimDataFormats/CaloAnalysis/src/classes_def.xml +++ b/SimDataFormats/CaloAnalysis/src/classes_def.xml @@ -6,6 +6,7 @@ + @@ -26,6 +27,7 @@ + diff --git a/SimGeneral/CaloAnalysis/plugins/BuildFile.xml b/SimGeneral/CaloAnalysis/plugins/BuildFile.xml index 7b1458cd095ba..5506c33df2dc0 100644 --- a/SimGeneral/CaloAnalysis/plugins/BuildFile.xml +++ b/SimGeneral/CaloAnalysis/plugins/BuildFile.xml @@ -24,6 +24,7 @@ + diff --git a/SimGeneral/CaloAnalysis/plugins/CaloTruthAccumulator.cc b/SimGeneral/CaloAnalysis/plugins/CaloTruthAccumulator.cc index 5fc2b7544b64a..1f2c3e32f1f8c 100644 --- a/SimGeneral/CaloAnalysis/plugins/CaloTruthAccumulator.cc +++ b/SimGeneral/CaloAnalysis/plugins/CaloTruthAccumulator.cc @@ -1,30 +1,35 @@ -#define DEBUG false - -#if DEBUG -#pragma GCC diagnostic pop -#endif - +/** + * @brief CaloTruthAccumulator creates simulation truth objects for calorimeter : SimCluster and CaloParticle + * Creates collections : + * - CaloParticle (both in CaloParticle and SimCluster dataformat) : set of simhits created by a genParticle (and all its decay products etc) + * - SimCluster "boundary" : for every SimTrack crossing the tracker-calorimeter boundary, create a SimCluster object collecting all simhits from it and its decay products + * - SimCluster "legacy" : every SimTrack with simhits makes a SimCluster (depends on SimTrack persistence criteria) + * - SimCluster "merged" : takes "boundary" SimClusters and merges them according to some algorithm (to merge SimClusters that are not separable at reconstruction level) + * and RefVectors to map back to CaloParticle (and map "boundary" to "merged") + */ #include +#include #include - -#include // for std::accumulate +#include +#include #include +#include +#include #include "FWCore/Framework/interface/ConsumesCollector.h" #include "FWCore/Framework/interface/ESWatcher.h" #include "FWCore/Framework/interface/Event.h" #include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/ConsumesCollector.h" #include "FWCore/Framework/interface/ProducesCollector.h" #include "FWCore/MessageLogger/interface/MessageLogger.h" #include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" #include "FWCore/Utilities/interface/InputTag.h" #include "DataFormats/ForwardDetId/interface/HGCalDetId.h" #include "DataFormats/HcalDetId/interface/HcalDetId.h" -#include "DataFormats/HcalDetId/interface/HcalTestNumbering.h" #include "DataFormats/HepMCCandidate/interface/GenParticle.h" -#include "DataFormats/SiPixelDetId/interface/PixelSubdetector.h" -#include "DataFormats/SiStripDetId/interface/StripSubdetector.h" #include "SimDataFormats/CaloAnalysis/interface/CaloParticle.h" #include "SimDataFormats/CaloAnalysis/interface/CaloParticleFwd.h" @@ -34,12 +39,12 @@ #include "SimDataFormats/CaloTest/interface/HGCalTestNumbering.h" #include "SimDataFormats/GeneratorProducts/interface/HepMCProduct.h" #include "SimDataFormats/Vertex/interface/SimVertex.h" +#include "SimDataFormats/Track/interface/SimTrack.h" #include "SimGeneral/MixingModule/interface/DecayGraph.h" #include "SimGeneral/MixingModule/interface/DigiAccumulatorMixMod.h" #include "SimGeneral/MixingModule/interface/DigiAccumulatorMixModFactory.h" #include "SimGeneral/MixingModule/interface/PileUpEventPrincipal.h" -#include "SimGeneral/TrackingAnalysis/interface/EncodedTruthId.h" #include "Geometry/CaloGeometry/interface/CaloGeometry.h" #include "Geometry/HGCalGeometry/interface/HGCalGeometry.h" @@ -48,11 +53,156 @@ #include "Geometry/Records/interface/CaloGeometryRecord.h" #include +#include +#include namespace { using Index_t = unsigned; using Barcode_t = int; - const std::string messageCategoryGraph_("CaloTruthAccumulatorGraphProducer"); + + /** Config for building one SimCluster collection */ + struct SimClusterConfig { + SimClusterCollection outputClusters; + edm::EDPutTokenT outputClusters_token; + + /// For the map back to "CaloParticle" in SimCluster dataformat + SimClusterRefVector clustersToCaloParticleMap; + edm::EDPutTokenT clustersToCaloParticleMap_token; + + SimClusterConfig(edm::ProducesCollector &c, std::string tag) + : outputClusters_token(c.produces(tag)), + clustersToCaloParticleMap_token(c.produces(tag)) {} + + void clearAndReleaseMemory() { + // Using the clear-and-minimize idiom to ensure the vector memory is completely released after each event is processed + SimClusterCollection().swap(outputClusters); + SimClusterRefVector().swap(clustersToCaloParticleMap); + } + }; + + /** + * Base class for a builder of SimCluster that has callbacks both when a CaloParticle is ended or when a SimCluster (inside a CaloParticle) is started + * Typically used by SimCluster merging algorithm, in the following way : + * 1) a CaloParticle is started + * 2) nested inside CaloParticle, several SimCluster can be built + * 3) when a CaloParticle is ended, the merging algorithm merges some SimCluster and adds some "merged" SimCluster to another collection + */ + class SubClusterMergerBase { + public: + /// Called when a SimCluster is started (along with the index of the SimCluster in its output collection) + void start_subcluster(DecayChain::edge_descriptor e, const DecayChain &g, std::size_t currentSubClusterIndex); + /** + * Called when a CaloParticle is finished. + * @param subClustersBuilt is the list of SimCluster built such that the indices "currentSubClusterIndex" passed to "start_subcluster" match + */ + void end_parentCluster(std::span subClustersBuilt, std::size_t currentCaloParticleIndex); + }; + + /// Uses fastjet jet clustering algorithm to merge SimCluster (only those from the same CaloParticle) + template + class SimClusterMergerByFastJet : public SubClusterMergerBase { + public: + SimClusterMergerByFastJet(SimClusterCollection &clusters, + ClusterParentIndexRecorderT caloParticleParentIndexRecorder, + ClusterParentIndexRecorderT subClusterToMergedClusterParentIndexRecorder, + fastjet::JetDefinition const &jetDefinition) + : clusters_(clusters), + caloParticleParentIndexRecorder_(caloParticleParentIndexRecorder), + subClusterToMergedClusterParentIndexRecorder_(subClusterToMergedClusterParentIndexRecorder), + jetDefinition_(jetDefinition) {} + + void start_subcluster(DecayChain::edge_descriptor e, const DecayChain &g, std::size_t currentSubClusterIndex) { + auto edge_property = get(edge_weight, g, e); + SimTrack const &simtrack = *edge_property.simTrack; + + /* Build the particle 4-vector such that the energy is the energy of SimTrack at boundary, + the momentum 3-vector points to the boundary position, and the mass is zero */ + auto energyAtBoundary = simtrack.getMomentumAtBoundary().E(); + auto momentum3D = energyAtBoundary * simtrack.getPositionAtBoundary().Vect().Unit(); + auto &jet = fjInputs_.emplace_back(momentum3D.X(), momentum3D.Y(), momentum3D.Z(), energyAtBoundary); + jet.set_user_index(currentSubClusterIndex); // Store the index in SimCLuster collection for later + } + + void end_parentCluster(std::span subClustersBuilt, std::size_t currentCaloParticleIndex) { + // Clustering + fastjet::ClusterSequence sequence(fjInputs_, jetDefinition_); + auto jets = sequence.inclusive_jets(); + + /// Map from index of subcluster to index of merged cluster + boost::container::flat_map mapToSub; + mapToSub.reserve(fjInputs_.size()); + + // Merging + for (fastjet::PseudoJet const &jet : jets) { + auto constituents = fastjet::sorted_by_E(jet.constituents()); + assert(!constituents.empty()); + assert(constituents[0].user_index() < static_cast(subClustersBuilt.size())); + for (fastjet::PseudoJet const &pseudoJet : constituents) { + // Index of merged cluster is clusters_.size() since we will emplace it just below + mapToSub[pseudoJet.user_index()] = clusters_.size(); + } + + clusters_.emplace_back(SimCluster::mergeHitsFromCollection( + constituents | std::views::transform([&](fastjet::PseudoJet const &pseudoJet) { + return subClustersBuilt[static_cast(pseudoJet.user_index())]; + }))); + + // Record CaloParticle index for the merged SimCluster + caloParticleParentIndexRecorder_.recordParentClusterIndex(currentCaloParticleIndex); + } + + for (fastjet::PseudoJet const &subCluster : fjInputs_) { + // Important to keep fjInputs_ in sync with subcluster indices + subClusterToMergedClusterParentIndexRecorder_.recordParentClusterIndex(mapToSub[subCluster.user_index()]); + } + + fjInputs_.clear(); + } + + private: + SimClusterCollection &clusters_; ///< output merged clusters + + std::vector fjInputs_; ///< SubClusters to be merged when end_parentCluster is called + ClusterParentIndexRecorderT caloParticleParentIndexRecorder_; ///< build RefVector to CaloParticle + /// build RefVector from "sub" cluster to "this" cluster collection + ClusterParentIndexRecorderT subClusterToMergedClusterParentIndexRecorder_; + fastjet::JetDefinition const &jetDefinition_; + }; + + class SimClusterMergerByFastJetConfig : public SimClusterConfig { + public: + SimClusterMergerByFastJetConfig(edm::ProducesCollector &c, std::string tag, const edm::ParameterSet &ps) + : SimClusterConfig(c, tag), + subClustersToMergedClusterMap_token(c.produces(tag + "MapFromSubCluster")), + jetClusteringRadius_(ps.getParameter("jetClusteringRadius")), + jetDefinition_(fastjet::antikt_algorithm, jetClusteringRadius_) {} + + void fillPSetDescription(edm::ParameterSetDescription &desc) { + desc.add("jetClusteringRadius", 0.05)->setComment("Distance parameter for clustering algorithm"); + } + + template + auto getMerger(ClusterParentIndexRecorderT caloParticleParentIndexRecorder, + ClusterParentIndexRecorderT subClusterToMergedClusterParentIndexRecorder) { + return SimClusterMergerByFastJet(outputClusters, + caloParticleParentIndexRecorder, + subClusterToMergedClusterParentIndexRecorder, + jetDefinition_); + } + + void clearAndReleaseMemory() /* override */ { + SimClusterConfig::clearAndReleaseMemory(); + SimClusterRefVector().swap(subClustersToMergedClusterMap); + } + + /// For the map from subCluster to merged SimCluster + SimClusterRefVector subClustersToMergedClusterMap; + edm::EDPutTokenT subClustersToMergedClusterMap_token; + + private: + double jetClusteringRadius_; + const fastjet::JetDefinition jetDefinition_; + }; } // namespace class CaloTruthAccumulator : public DigiAccumulatorMixMod { @@ -71,18 +221,19 @@ class CaloTruthAccumulator : public DigiAccumulatorMixMod { const edm::EventSetup &setup, const edm::Handle &hepMCproduct); - /** @brief Fills the supplied vector with pointers to the SimHits, checking - * for bad modules if required */ + /** + * @brief Iterate over all SimHits collections (vectors of PCaloHit) and build maps between tracks, DetId and simEnergies + * @return nested map G4 Track ID -> DetId -> accumulated SimHit energy (over same track, in case of loops) + * Side-effect : Also this->m_detIdToTotalSimEnergy is filled (map DetId-> accumulated sim energy), for normalization into fractions in finalizeEvent + */ template - void fillSimHits(std::vector> &returnValue, - std::unordered_map> &simTrackDetIdEnergyMap, - const T &event, - const edm::EventSetup &setup); + std::unordered_map> fillSimTrackDetIdEnergyMap(const T &event, + const edm::EventSetup &setup); const std::string messageCategory_; - std::unordered_map m_detIdToTotalSimEnergy; // keep track of cell normalizations - std::unordered_multimap m_simHitBarcodeToIndex; + /// Map DetId-> accumulated sim energy, to keep track of cell normalizations + std::unordered_map m_detIdToTotalSimEnergy; /** The maximum bunch crossing BEFORE the signal crossing to create TrackinParticles for. Use positive values. If set to zero no @@ -99,134 +250,319 @@ class CaloTruthAccumulator : public DigiAccumulatorMixMod { const edm::InputTag simTrackLabel_; const edm::InputTag simVertexLabel_; - edm::Handle> hSimTracks; - edm::Handle> hSimVertices; - std::vector collectionTags_; - edm::InputTag genParticleLabel_; + std::vector collectionTags_; ///< SimHits collections + const edm::InputTag genParticleLabel_; /// Needed to add HepMC::GenVertex to SimVertex - edm::InputTag hepMCproductLabel_; + const edm::InputTag hepMCproductLabel_; const edm::ESGetToken geomToken_; edm::ESWatcher geomWatcher_; - const double minEnergy_, maxPseudoRapidity_; + const double minEnergy_, maxPseudoRapidity_; ///< CaloParticle creation criteria + bool produceLegacySimCluster_, produceBoundaryAndMergedSimCluster_; const bool premixStage1_; -public: - struct OutputCollections { - std::unique_ptr pSimClusters; - std::unique_ptr pCaloParticles; - }; - - struct calo_particles { - std::vector sc_start_; - std::vector sc_stop_; + CaloParticleCollection outputCaloParticles_; + edm::EDPutTokenT outputCaloParticles_token_; + + SimClusterConfig legacySimClusters_config_; ///< Legacy SimCluster from every SimTrack with simhits + SimClusterConfig boundarySimClusters_config_; ///< SimClusters from each SimTrack crossing boundary + /// SimCluster that are identical to CaloParticle (to make it easier on downstream code, only one dataformat for everything) + SimClusterConfig caloParticleSimClusters_config_; + /// SimCluster built by merging "boundary" SimCluster (coming from the same CaloParticle) that are very close together (using jet clustering algorithm) + SimClusterMergerByFastJetConfig simClusterMergerFastJet_config_; + /// apply a function to all SimCluster config + template + void applyToSimClusterConfig(T f) { + if (produceLegacySimCluster_ && produceBoundaryAndMergedSimCluster_) + std::apply([f](auto &&...args) { (f(args), ...); }, + std::tie(legacySimClusters_config_, + boundarySimClusters_config_, + caloParticleSimClusters_config_, + simClusterMergerFastJet_config_)); + else if (produceLegacySimCluster_) + f(legacySimClusters_config_); + else if (produceBoundaryAndMergedSimCluster_) + std::apply( + [f](auto &&...args) { (f(args), ...); }, + std::tie(boundarySimClusters_config_, caloParticleSimClusters_config_, simClusterMergerFastJet_config_)); + } - void swap(calo_particles &oth) { - sc_start_.swap(oth.sc_start_); - sc_stop_.swap(oth.sc_stop_); - } + /// To build the RefVector from SimCluster to "CaloParticle" SimCluster collection + edm::RefProd caloParticles_refProd_; + /// To build the RefVector from "merged" SimCluster to "boundary" SimCluster collection + edm::RefProd boundarySimCluster_refProd_; - void clear() { - sc_start_.clear(); - sc_stop_.clear(); - } - }; - -private: const HGCalTopology *hgtopo_[3] = {nullptr, nullptr, nullptr}; const HGCalDDDConstants *hgddd_[3] = {nullptr, nullptr, nullptr}; const HcalDDDRecConstants *hcddd_ = nullptr; - OutputCollections output_; - calo_particles m_caloParticles; // geometry type (0 pre-TDR; 1 TDR) int geometryType_; - bool doHGCAL; + const bool doHGCAL; }; /* Graph utility functions */ namespace { - class CaloParticle_dfs_visitor : public boost::default_dfs_visitor { + /// Helper class to access hits, energies and time + class VisitorHelper { public: - CaloParticle_dfs_visitor(CaloTruthAccumulator::OutputCollections &output, - CaloTruthAccumulator::calo_particles &caloParticles, - std::unordered_multimap &simHitBarcodeToIndex, - std::unordered_map> &simTrackDetIdEnergyMap, - std::unordered_map &vertex_time_map, - Selector selector) - : output_(output), - caloParticles_(caloParticles), - simHitBarcodeToIndex_(simHitBarcodeToIndex), - simTrackDetIdEnergyMap_(simTrackDetIdEnergyMap), - vertex_time_map_(vertex_time_map), - selector_(selector), - insideCP_(false) {} - template - void discover_vertex(Vertex u, const Graph &g) { - // If we reach the vertex 0, it means that we are backtracking with respect - // to the first generation of stable particles: simply return; - // if (u == 0) return; - print_vertex(u, g); - auto const vertex_property = get(vertex_name, g, u); - if (!vertex_property.simTrack) - return; - auto trackIdx = vertex_property.simTrack->trackId(); - IfLogDebug(DEBUG, messageCategoryGraph_) - << " Found " << simHitBarcodeToIndex_.count(trackIdx) << " associated simHits" << std::endl; - if (insideCP_ && simHitBarcodeToIndex_.count(trackIdx)) { - output_.pSimClusters->emplace_back(*vertex_property.simTrack); - auto &simcluster = output_.pSimClusters->back(); - std::unordered_map acc_energy; - for (auto const &hit_and_energy : simTrackDetIdEnergyMap_[trackIdx]) { + VisitorHelper(std::unordered_map> const &simTrackDetIdEnergyMap, + std::unordered_map const &vertex_time_map) + : simTrackDetIdEnergyMap_(simTrackDetIdEnergyMap), vertex_time_map_(vertex_time_map) {} + + // to check if SimTrack has simHits : use DecayGraph EdgeProperty + + /** Given a track G4 ID, give map DetId->accumulated SimHit energy */ + std::map const &hits_and_energies(unsigned int trackIdx) const { + return simTrackDetIdEnergyMap_.at(trackIdx); + } + + float getVertexTime(uint32_t simVertexId) { return vertex_time_map_.at(simVertexId); } + + private: + /// nested map G4 Track ID -> DetId -> accumulated SimHit energy (over same track, in case of loops) + std::unordered_map> const &simTrackDetIdEnergyMap_; + std::unordered_map const &vertex_time_map_; ///< Map SimVertex index to sim vertex time + }; + + /** + * Accumlates SimHits energies and DetId as each graph edge (=SimTrack) is visited + * @tparam SimClassNameT either SimCluster or CaloParticle + */ + template + class ClusterEnergyAccumulator { + public: + ClusterEnergyAccumulator(VisitorHelper const &helper) : helper_(helper) {} + + template + void accumulate_edge_in_cluster(Edge e, const Graph &g) { + auto edge_property = get(edge_weight, g, e); + const SimTrack *edge_simTrack = edge_property.simTrack; + + if (!edge_simTrack) [[unlikely]] + return; // Should not happen + auto trackIdx = edge_simTrack->trackId(); + if (edge_property.simHits != 0) { + for (auto const &hit_and_energy : helper_.hits_and_energies(trackIdx)) { acc_energy[hit_and_energy.first] += hit_and_energy.second; } - for (auto const &hit_and_energy : acc_energy) { - simcluster.addRecHitAndFraction(hit_and_energy.first, hit_and_energy.second); - } } } - template - void examine_edge(Edge e, const Graph &g) { - auto src = source(e, g); - auto vertex_property = get(vertex_name, g, src); - if (src == 0 or (vertex_property.simTrack == nullptr)) { - auto edge_property = get(edge_weight, g, e); - IfLogDebug(DEBUG, messageCategoryGraph_) << "Considering CaloParticle: " << edge_property.simTrack->trackId(); - if (selector_(edge_property)) { - insideCP_ = true; - IfLogDebug(DEBUG, messageCategoryGraph_) << "Adding CaloParticle: " << edge_property.simTrack->trackId(); - output_.pCaloParticles->emplace_back(*(edge_property.simTrack)); - output_.pCaloParticles->back().setSimTime(vertex_time_map_[(edge_property.simTrack)->vertIndex()]); - caloParticles_.sc_start_.push_back(output_.pSimClusters->size()); - } else { - insideCP_ = false; - } + + void doEndCluster(SimClassNameT &cluster) { + for (auto const &hit_and_energy : acc_energy) { + cluster.addRecHitAndFraction(hit_and_energy.first, hit_and_energy.second); + } + acc_energy.clear(); + } + + private: + VisitorHelper const &helper_; + std::unordered_map acc_energy; ///< Map DetId->simHit energies for the current cluster + }; + + /** + * Visitor class for depth_first_search. + * - on a root edge, check the associated GenParticle. Check if it passes selection + * if it does not, discard + * if it does, start a new CaloParticle (record edge) + * - on any edge within a CaloParticle, check SimCluster conditions. + * If they pass, create a SimCluster. + * @tparam Selector_t lambda for selections to create a CaloParticle + * @tparam SubClusterVisitorTuple std::tuple of visitors for creating SimCluster collections nested inside the CaloParticle + */ + template + class PrimaryVisitor : public boost::default_dfs_visitor { + public: + /** + * @param caloParticles output collection + * @param caloParticleSelector functor EdgeProperty -> bool to determine if a SimTrack should be promoted to CaloParticle + * @param subClusterVisitors tuple of visitors for callbacks (use as std::make_tuple(SubClusterVisitor( ...), SubClusterVisitor<...>(...), ....) ) + */ + PrimaryVisitor(VisitorHelper &helper, + std::vector &caloParticles, + Selector_t caloParticleSelector, + SubClusterVisitorTuple subClusterVisitors) + : helper_(helper), + caloParticleSelector_(caloParticleSelector), + caloParticleAccumulator_(helper), + caloParticles_(caloParticles), + subClusterVisitors_(subClusterVisitors) {} + + void examine_edge(DecayChain::edge_descriptor e, const DecayChain &g) { + auto edge_property = get(edge_weight, g, e); + if (!insideCluster_ && caloParticleSelector_(edge_property)) { + insideCluster_ = true; + caloParticleEdge_ = e; + // Create a new CaloParticle + caloParticles_.emplace_back(*edge_property.simTrack); + // For a CaloParticle the simTime is set at the vertex ! + caloParticles_.back().setSimTime(helper_.getVertexTime(edge_property.simTrack->vertIndex())); + // This loops over all elements of subClusterVisitors_ tuple and calls begin_parentCluster on them (compile-time loop expansion) + std::apply([&](auto &...x) { (..., x.begin_parentCluster(e, g)); }, subClusterVisitors_); + } + + if (insideCluster_) { + caloParticleAccumulator_.accumulate_edge_in_cluster(e, g); // accumulate simhits energies + + std::apply([&](auto &...x) { (..., x.examine_edge(e, g, caloParticles_.size() - 1)); }, subClusterVisitors_); + } + } + + void finish_edge(DecayChain::edge_descriptor e, const DecayChain &g) { + if (insideCluster_) { + std::apply([&](auto &...x) { (..., x.finish_edge(e, g)); }, subClusterVisitors_); + } + if (insideCluster_ && e == caloParticleEdge_) { + insideCluster_ = false; + caloParticleAccumulator_.doEndCluster(caloParticles_.back()); + std::apply([&](auto &...x) { (..., x.end_parentCluster(caloParticles_.size() - 1)); }, subClusterVisitors_); } } - template - void finish_edge(Edge e, const Graph &g) { - auto src = source(e, g); - auto vertex_property = get(vertex_name, g, src); - if (src == 0 or (vertex_property.simTrack == nullptr)) { + private: + VisitorHelper &helper_; + + Selector_t caloParticleSelector_; + ClusterEnergyAccumulator caloParticleAccumulator_; + std::vector &caloParticles_; ///< Output collection + + SubClusterVisitorTuple subClusterVisitors_; ///< std::tuple of SubClusterVisitor + + bool insideCluster_{false}; ///< are we inside a CaloParticle (during DFS algorithm) + /// Keep track of CaloParticle root edge to flag end of cluster in DFS + DecayChain::edge_descriptor caloParticleEdge_; + }; + + /** + * Fills RefVector to keep mapping from SimCluster to another collection (typically CaloParticle). + * @tparam ParentClusterCollectionT type of the collection to refer to (eg: std::vector or std::vector) + */ + template + class ClusterParentIndexRecorder { + public: + /** The RefProd is to fill the RefVector with edm::Ref (build it with getRefBeforePut) */ + ClusterParentIndexRecorder(edm::RefVector &refVector, + edm::RefProd const &refProd) + : refVector_(refVector), refProd_(refProd) {} + + void recordParentClusterIndex(std::size_t parentClusterIndex) { + refVector_.push_back(edm::Ref(refProd_, parentClusterIndex)); + } + + private: + edm::RefVector &refVector_; ///< output RefVector + /// Need the RefProd to build the edm::Ref to insert into RefVector + edm::RefProd const &refProd_; + }; + + /** Base class for a graph visitor building SubCluster (ie SimCluster that are created inside a CaloParticle) */ + class SubClusterVisitorBase { + public: + /// Called during DFS at each edge(=SimTrack) inside a CaloParticle + void examine_edge(DecayChain::edge_descriptor e, const DecayChain &g, std::size_t currentCaloParticleIndex) {} + void finish_edge(DecayChain::edge_descriptor e, const DecayChain &g) {} + + /// Called when a "parent" cluster (ie a CaloParticle) is started + void begin_parentCluster(DecayChain::edge_descriptor e, const DecayChain &g) {} + /// Called when a "parent" cluster (ie a CaloParticle) is ended, along with the CaloParticle index into its own collection (for building edm::Ref) + void end_parentCluster(std::size_t currentCaloParticleIndex) {} + }; + + /** + * Visitor that creates cluster with reference to another "parent" cluster collection, like SimCluster inside CaloParticle + * Does not make "nested sub-clusters" + * @tparam SubClusterT typically SimCluster + * @tparam ClusterParentIndexRecorderT type of the object in charge of building the RefVector to parent (@see ClusterParentIndexRecorder) + * @tparam Selector_t lambda for criteria for creating a subcluster + */ + template > + class SubClusterVisitor : public SubClusterVisitorBase { + public: + SubClusterVisitor(std::vector &clusters, + ClusterParentIndexRecorderT parentIndexRecorder, + VisitorHelper const &helper, + Selector_t selector, + SubClusterMergerTupleT subClusterMergers = SubClusterMergerTupleT()) + : clusters_(clusters), + accumulator(helper), + indexRecorder(parentIndexRecorder), + selector_(selector), + subClusterMergers_(subClusterMergers) {} + + void examine_edge(DecayChain::edge_descriptor e, const DecayChain &g, std::size_t currentCaloParticleIndex) { + if (!insideCluster_ && selector_(get(edge_weight, g, e))) { + insideCluster_ = true; + clusterRootEdge_ = e; + + // Create a new cluster auto edge_property = get(edge_weight, g, e); - if (selector_(edge_property)) { - caloParticles_.sc_stop_.push_back(output_.pSimClusters->size()); - insideCP_ = false; - } + clusters_.emplace_back(*edge_property.simTrack); + indexRecorder.recordParentClusterIndex(currentCaloParticleIndex); + + std::apply([&](auto &...x) { (..., x.start_subcluster(e, g, clusters_.size() - 1)); }, subClusterMergers_); + } + + if (insideCluster_) { + accumulator.accumulate_edge_in_cluster(e, g); } } + void finish_edge(DecayChain::edge_descriptor e, const DecayChain &g) { + if (insideCluster_ && e == clusterRootEdge_) { // we backtracked to the starting edge + insideCluster_ = false; + accumulator.doEndCluster(clusters_.back()); + } + } + + void end_parentCluster(std::size_t currentCaloParticleIndex) { + std::apply([&](auto &...x) { (..., x.end_parentCluster(clusters_, currentCaloParticleIndex)); }, + subClusterMergers_); + } + private: - CaloTruthAccumulator::OutputCollections &output_; - CaloTruthAccumulator::calo_particles &caloParticles_; - std::unordered_multimap &simHitBarcodeToIndex_; - std::unordered_map> &simTrackDetIdEnergyMap_; - std::unordered_map &vertex_time_map_; - Selector selector_; - bool insideCP_; + std::vector &clusters_; ///< output + ClusterEnergyAccumulator accumulator; ///< keep track of simhits + ClusterParentIndexRecorderT indexRecorder; ///< build RefVector to CaloParticle + Selector_t selector_; ///< lambda for criteria for creating a subcluster + bool insideCluster_{false}; ///< is the current DFS algo position inside a subcluster + DecayChain::edge_descriptor clusterRootEdge_; ///< root edge of the subcluster + + /// tuple of mergers that get called at the end of a cluster to possibly do some merging + SubClusterMergerTupleT subClusterMergers_; }; + + /**Creates SimCluster for every SimTrack with simHits. One SimCluster only includes the simHits from the SimTrack it was made of (depends on SimTrack saving criteria) */ + template + class LegacySimClusterVisitor : public SubClusterVisitorBase { + public: + LegacySimClusterVisitor(std::vector &clusters, + ClusterParentIndexRecorderT parentIndexRecorder, + VisitorHelper const &helper) + : clusters_(clusters), accumulator(helper), indexRecorder(parentIndexRecorder) {} + + void examine_edge(DecayChain::edge_descriptor e, const DecayChain &g, std::size_t currentCaloParticleIndex) { + auto edge_property = get(edge_weight, g, e); + if (edge_property.simHits != 0) { + // Create a new cluster + clusters_.emplace_back(*edge_property.simTrack); + indexRecorder.recordParentClusterIndex(currentCaloParticleIndex); + accumulator.accumulate_edge_in_cluster(e, g); + accumulator.doEndCluster(clusters_.back()); + } + } + void finish_edge(DecayChain::edge_descriptor e, const DecayChain &g) {} + + private: + std::vector &clusters_; + ClusterEnergyAccumulator accumulator; + ClusterParentIndexRecorderT indexRecorder; + }; + } // namespace CaloTruthAccumulator::CaloTruthAccumulator(const edm::ParameterSet &config, @@ -243,18 +579,25 @@ CaloTruthAccumulator::CaloTruthAccumulator(const edm::ParameterSet &config, geomToken_(iC.esConsumes()), minEnergy_(config.getParameter("MinEnergy")), maxPseudoRapidity_(config.getParameter("MaxPseudoRapidity")), + produceLegacySimCluster_(config.getParameter("produceLegacySimCluster")), + produceBoundaryAndMergedSimCluster_(config.getParameter("produceBoundaryAndMergedSimCluster")), premixStage1_(config.getParameter("premixStage1")), + outputCaloParticles_token_(producesCollector.produces("MergedCaloTruth")), + legacySimClusters_config_(producesCollector, "MergedCaloTruth"), + boundarySimClusters_config_(producesCollector, "MergedCaloTruthBoundaryTrackSimCluster"), + caloParticleSimClusters_config_(producesCollector, "MergedCaloTruthCaloParticle"), + simClusterMergerFastJet_config_(producesCollector, + "MergedCaloTruthMergedSimCluster", + config.getParameter("simClusterMergerConfig")), geometryType_(-1), doHGCAL(config.getParameter("doHGCAL")) { - producesCollector.produces("MergedCaloTruth"); - producesCollector.produces("MergedCaloTruth"); if (premixStage1_) { - producesCollector.produces>>("MergedCaloTruth"); + producesCollector.produces>>( + "MergedCaloTruth"); // (DetId, total simhit energy) pairs } iC.consumes>(simTrackLabel_); iC.consumes>(simVertexLabel_); - iC.consumes>(genParticleLabel_); iC.consumes>(genParticleLabel_); iC.consumes>(hepMCproductLabel_); @@ -273,11 +616,6 @@ CaloTruthAccumulator::CaloTruthAccumulator(const edm::ParameterSet &config, } void CaloTruthAccumulator::initializeEvent(edm::Event const &event, edm::EventSetup const &setup) { - output_.pSimClusters = std::make_unique(); - output_.pCaloParticles = std::make_unique(); - - m_detIdToTotalSimEnergy.clear(); - if (geomWatcher_.check(setup)) { auto const &geom = setup.getData(geomToken_); const HGCalGeometry *eegeom = nullptr, *fhgeom = nullptr, *bhgeomnew = nullptr; @@ -315,6 +653,12 @@ void CaloTruthAccumulator::initializeEvent(edm::Event const &event, edm::EventSe hcddd_ = bhgeom->topology().dddConstants(); } } + // Seems const_cast is necessary here + caloParticles_refProd_ = const_cast(event).getRefBeforePut( + caloParticleSimClusters_config_.outputClusters_token); + if (produceBoundaryAndMergedSimCluster_) + boundarySimCluster_refProd_ = const_cast(event).getRefBeforePut( + boundarySimClusters_config_.outputClusters_token); } /** Create handle to edm::HepMCProduct here because event.getByLabel with @@ -347,14 +691,38 @@ void CaloTruthAccumulator::accumulate(PileUpEventPrincipal const &event, } } +namespace { + /** Normalize the energies in the SimCluster/CaloParticle collection, from absolute SimHit energies to fraction of total simHits energies */ + template + void normalizeCollection(SimCaloCollection &simClusters, + std::unordered_map const &detIdToTotalSimEnergy) { + for (auto &sc : simClusters) { + auto hitsAndEnergies = sc.hits_and_fractions(); + sc.clearHitsAndFractions(); + // Note : addHitEnergy is actually never used, so we do not clear and refill for it + for (auto &hAndE : hitsAndEnergies) { + const float totalenergy = detIdToTotalSimEnergy.at(hAndE.first); + float fraction = 0.; + if (totalenergy > 0) + fraction = hAndE.second / totalenergy; + else + edm::LogWarning("CaloTruthAccumulator") + << "TotalSimEnergy for hit " << hAndE.first << " is 0! The fraction for this hit cannot be computed."; + sc.addRecHitAndFraction(hAndE.first, fraction); + } + } + } +}; // namespace + void CaloTruthAccumulator::finalizeEvent(edm::Event &event, edm::EventSetup const &setup) { - edm::LogInfo(messageCategory_) << "Adding " << output_.pSimClusters->size() << " SimParticles and " - << output_.pCaloParticles->size() << " CaloParticles to the event."; + edm::LogInfo(messageCategory_) << "Adding " << legacySimClusters_config_.outputClusters.size() + << " legacy SimClusters and " << outputCaloParticles_.size() + << " CaloParticles to the event."; // We need to normalize the hits and energies into hits and fractions (since // we have looped over all pileup events) // For premixing stage1 we keep the energies, they will be normalized to - // fractions in stage2 + // fractions in stage2 (in PreMixingCaloParticleWorker.cc) if (premixStage1_) { auto totalEnergies = std::make_unique>>(); @@ -363,42 +731,38 @@ void CaloTruthAccumulator::finalizeEvent(edm::Event &event, edm::EventSetup cons std::sort(totalEnergies->begin(), totalEnergies->end()); event.put(std::move(totalEnergies), "MergedCaloTruth"); } else { - for (auto &sc : *(output_.pSimClusters)) { - auto hitsAndEnergies = sc.hits_and_fractions(); - sc.clearHitsAndFractions(); - sc.clearHitsEnergy(); - for (auto &hAndE : hitsAndEnergies) { - const float totalenergy = m_detIdToTotalSimEnergy[hAndE.first]; - float fraction = 0.; - if (totalenergy > 0) - fraction = hAndE.second / totalenergy; - else - edm::LogWarning(messageCategory_) - << "TotalSimEnergy for hit " << hAndE.first << " is 0! The fraction for this hit cannot be computed."; - sc.addRecHitAndFraction(hAndE.first, fraction); - sc.addHitEnergy(hAndE.second); - } - } + applyToSimClusterConfig( + [this](auto &config) { normalizeCollection(config.outputClusters, m_detIdToTotalSimEnergy); }); + normalizeCollection(outputCaloParticles_, m_detIdToTotalSimEnergy); } - // save the SimCluster orphan handle so we can fill the calo particles - auto scHandle = event.put(std::move(output_.pSimClusters), "MergedCaloTruth"); - - // now fill the calo particles - for (unsigned i = 0; i < output_.pCaloParticles->size(); ++i) { - auto &cp = (*output_.pCaloParticles)[i]; - for (unsigned j = m_caloParticles.sc_start_[i]; j < m_caloParticles.sc_stop_[i]; ++j) { - edm::Ref ref(scHandle, j); - cp.addSimCluster(ref); + if (produceLegacySimCluster_) { + // fill the calo particles with their ref to SimCluster + auto legacySimClustersRefProd = event.getRefBeforePut(legacySimClusters_config_.outputClusters_token); + for (unsigned i = 0; i < legacySimClusters_config_.outputClusters.size(); ++i) { + // get the key of the edm::Ref + auto &cp = outputCaloParticles_[legacySimClusters_config_.clustersToCaloParticleMap[i].key()]; + cp.addSimCluster(edm::Ref(legacySimClustersRefProd, i)); } } - event.put(std::move(output_.pCaloParticles), "MergedCaloTruth"); - - calo_particles().swap(m_caloParticles); - - std::unordered_map().swap(m_detIdToTotalSimEnergy); - std::unordered_multimap().swap(m_simHitBarcodeToIndex); + event.emplace(outputCaloParticles_token_, std::move(outputCaloParticles_)); + // puts the vector into "empty" determined state (instead of "moved-from" undetermined state) + // also uses clear-and-minimize idiom to ensure the memory is released + CaloParticleCollection().swap(outputCaloParticles_); + + applyToSimClusterConfig([&event](auto &config) { + event.emplace(config.outputClusters_token, std::move(config.outputClusters)); + event.emplace(config.clustersToCaloParticleMap_token, std::move(config.clustersToCaloParticleMap)); + // puts the vector into "empty" determined state (instead of "moved-from" undetermined state) + // and ensure the memory is released (should already be released due to the move, but make sure) + config.clearAndReleaseMemory(); + }); + if (produceBoundaryAndMergedSimCluster_) + event.emplace(simClusterMergerFastJet_config_.subClustersToMergedClusterMap_token, + std::move(simClusterMergerFastJet_config_.subClustersToMergedClusterMap)); + + std::unordered_map().swap(m_detIdToTotalSimEnergy); // clear-and-minimize idiom } template @@ -407,22 +771,16 @@ void CaloTruthAccumulator::accumulateEvent(const T &event, const edm::Handle &hepMCproduct) { edm::Handle> hGenParticles; edm::Handle> hGenParticleIndices; - + edm::Handle> hSimTracks; + edm::Handle> hSimVertices; + // We must always use getByLabel (event can be PileupEventPrincipal which does not have the get, getByToken, etc functions) event.getByLabel(simTrackLabel_, hSimTracks); event.getByLabel(simVertexLabel_, hSimVertices); event.getByLabel(genParticleLabel_, hGenParticles); event.getByLabel(genParticleLabel_, hGenParticleIndices); - std::vector> simHitPointers; - std::unordered_map> simTrackDetIdEnergyMap; - fillSimHits(simHitPointers, simTrackDetIdEnergyMap, event, setup); - - // Clear maps from previous event fill them for this one - m_simHitBarcodeToIndex.clear(); - for (unsigned int i = 0; i < simHitPointers.size(); ++i) { - m_simHitBarcodeToIndex.emplace(simHitPointers[i].second->geantTrackId(), i); - } + std::unordered_map> simTrackDetIdEnergyMap = fillSimTrackDetIdEnergyMap(event, setup); auto const &tracks = *hSimTracks; auto const &vertices = *hSimVertices; @@ -525,22 +883,76 @@ void CaloTruthAccumulator::accumulateEvent(const T &event, } SimHitsAccumulator_dfs_visitor vis; depth_first_search(decay, visitor(vis)); - CaloParticle_dfs_visitor caloParticleCreator( - output_, - m_caloParticles, - m_simHitBarcodeToIndex, - simTrackDetIdEnergyMap, - vertex_time_map, - [&](EdgeProperty &edge_property) -> bool { - // Apply selection on SimTracks in order to promote them to be - // CaloParticles. The function returns TRUE if the particle satisfies - // the selection, FALSE otherwise. Therefore the correct logic to select - // the particle is to ask for TRUE as return value. - return (edge_property.cumulative_simHits != 0 and !edge_property.simTrack->noGenpart() and - edge_property.simTrack->momentum().E() > minEnergy_ and - std::abs(edge_property.simTrack->momentum().Eta()) < maxPseudoRapidity_); - }); - depth_first_search(decay, visitor(caloParticleCreator)); + + VisitorHelper visitorHelper(simTrackDetIdEnergyMap, vertex_time_map); + + auto passGenPartSelections_lambda = [&](EdgeProperty const &edge_property) { + return (edge_property.cumulative_simHits != 0 and !edge_property.simTrack->noGenpart() and + edge_property.simTrack->momentum().E() > minEnergy_ and + std::abs(edge_property.simTrack->momentum().Eta()) < maxPseudoRapidity_); + }; + + auto makeCaloParticleSimCluster = [&]() { // "CaloParticle" SimCluster + return SubClusterVisitor( + caloParticleSimClusters_config_.outputClusters, + // Trivial 1-1 mapping, kept for convenience + ClusterParentIndexRecorder(caloParticleSimClusters_config_.clustersToCaloParticleMap, caloParticles_refProd_), + visitorHelper, + [&](EdgeProperty const &edge_property) -> bool { + // Create a SimCluster for every CaloParticle (duplicates the CaloParticle for convenience of use, to have one single dataformat) + return true; + }); + }; + auto makeLegacySimCluster = [&]() { // "legacy" SimCluster (from every SimTrack with simhits) + return LegacySimClusterVisitor( + legacySimClusters_config_.outputClusters, + ClusterParentIndexRecorder(legacySimClusters_config_.clustersToCaloParticleMap, caloParticles_refProd_), + visitorHelper); + }; + auto makeBoundarySimCluster = [&]() { // "boundary" SimCluster (from every SimTrack crossing boundary) + return SubClusterVisitor( + boundarySimClusters_config_.outputClusters, + ClusterParentIndexRecorder(boundarySimClusters_config_.clustersToCaloParticleMap, caloParticles_refProd_), + visitorHelper, + [&](EdgeProperty const &edge_property) -> bool { + // Create SimCluster from every SimTrack crossing boundary (and which has simhits either itself as in its descendants), and that is inside a CaloParticle + return edge_property.cumulative_simHits != 0 && edge_property.simTrack->crossedBoundary(); + }, + // "merging" subcluster configuration + std::make_tuple(simClusterMergerFastJet_config_.getMerger( + ClusterParentIndexRecorder(simClusterMergerFastJet_config_.clustersToCaloParticleMap, + caloParticles_refProd_), + ClusterParentIndexRecorder(simClusterMergerFastJet_config_.subClustersToMergedClusterMap, + boundarySimCluster_refProd_)))); + }; + + if (produceLegacySimCluster_ && produceBoundaryAndMergedSimCluster_) { + // Do the graph search for all 3 vistors at the same time + // "visitor()" is a Boost BGL named parameter + depth_first_search( + decay, + visitor(PrimaryVisitor( + visitorHelper, + outputCaloParticles_, + passGenPartSelections_lambda, + std::make_tuple(makeCaloParticleSimCluster(), makeLegacySimCluster(), makeBoundarySimCluster())))); + } else if (produceLegacySimCluster_) { + depth_first_search(decay, + visitor(PrimaryVisitor(visitorHelper, + outputCaloParticles_, + passGenPartSelections_lambda, + std::make_tuple(makeLegacySimCluster())))); + } else if (produceBoundaryAndMergedSimCluster_) { + depth_first_search( + decay, + visitor(PrimaryVisitor(visitorHelper, + outputCaloParticles_, + passGenPartSelections_lambda, + std::make_tuple(makeCaloParticleSimCluster(), makeBoundarySimCluster())))); + } else + depth_first_search( + decay, + visitor(PrimaryVisitor(visitorHelper, outputCaloParticles_, passGenPartSelections_lambda, std::make_tuple()))); #if DEBUG boost::write_graphviz(std::cout, @@ -551,10 +963,9 @@ void CaloTruthAccumulator::accumulateEvent(const T &event, } template -void CaloTruthAccumulator::fillSimHits(std::vector> &returnValue, - std::unordered_map> &simTrackDetIdEnergyMap, - const T &event, - const edm::EventSetup &setup) { +std::unordered_map> CaloTruthAccumulator::fillSimTrackDetIdEnergyMap( + const T &event, const edm::EventSetup &setup) { + std::unordered_map> simTrackDetIdEnergyMap; for (auto const &collectionTag : collectionTags_) { edm::Handle> hSimHits; const bool isHcal = (collectionTag.instance().find("HcalHits") != std::string::npos); @@ -602,11 +1013,11 @@ void CaloTruthAccumulator::fillSimHits(std::vector(g)), src, VertexProperty(src_vertex_property.simTrack, cumulative)); put(get(edge_weight, const_cast(g)), - e, + e, // Here EdgeProperty.cumulative_simHits has also the source vertex cumulative_simHits added, which seems weird (depends on graph traversal order ?). It's not used in this algo though EdgeProperty(edge_property.simTrack, edge_property.simHits, cumulative)); IfLogDebug(DEBUG, messageCategoryGraph_) << " Finished edge: " << e << " Track id: " << get(edge_weight, g, e).simTrack->trackId() diff --git a/SimGeneral/MixingModule/python/caloTruthProducer_cfi.py b/SimGeneral/MixingModule/python/caloTruthProducer_cfi.py index 93573b7772832..3a17ea7ba3155 100644 --- a/SimGeneral/MixingModule/python/caloTruthProducer_cfi.py +++ b/SimGeneral/MixingModule/python/caloTruthProducer_cfi.py @@ -8,6 +8,8 @@ # alwaysAddAncestors = cms.bool(True), MinEnergy = cms.double(0.5), MaxPseudoRapidity = cms.double(5.0), + produceLegacySimCluster = cms.bool(True), + produceBoundaryAndMergedSimCluster = cms.bool(True), premixStage1 = cms.bool(False), doHGCAL = cms.bool(True), maximumPreviousBunchCrossing = cms.uint32(0), @@ -30,6 +32,9 @@ genParticleCollection = cms.InputTag('genParticles'), allowDifferentSimHitProcesses = cms.bool(False), # should be True for FastSim, False for FullSim HepMCProductLabel = cms.InputTag('generatorSmeared'), + simClusterMergerConfig = cms.PSet( + jetClusteringRadius = cms.double(0.05) + ) ) from Configuration.ProcessModifiers.premix_stage1_cff import premix_stage1