GenXProject · Copilot · Jan 9, 2026 · Jan 9, 2026 · Jan 9, 2026 · Jan 9, 2026
diff --git a/docs/src/Tutorials/Tutorial_3_K-means_time_domain_reduction.md b/docs/src/Tutorials/Tutorial_3_K-means_time_domain_reduction.md
@@ -5,6 +5,9 @@
 
 A good tool to reduce computation time of GenX is to use [Time-domain reduction](@ref). Time-domain Reduction is a method that selects a smaller set of time steps from the data in a way that reduces computation time while still capturing the main information of the model. In this tutorial, we go over how TDR works in GenX and how it uses K-means clustering to choose the optimal time steps. For more information on TDR in capacity expansion models, see [Mallapragada et al](https://www.sciencedirect.com/science/article/pii/S0360544218315238).
 
+!!! note "Automatic Input File Validation"
+    GenX automatically tracks changes to time-series input files and TDR settings. When time-domain reduction is performed, GenX saves SHA256 hashes of the input files. On subsequent runs, if any input file or TDR setting has changed, GenX will automatically re-run the time-domain reduction to ensure clustered data remains valid. This prevents accidentally using stale clustered data when inputs have been modified. For more details, see the [Time-domain reduction](@ref) documentation.
+
 ### Table of Contents
 * [Time Domain Reduction](#TDR)
 * [K-Means Clustering](#Kmeans)

diff --git a/docs/src/User_Guide/TDR_input.md b/docs/src/User_Guide/TDR_input.md
@@ -24,3 +24,23 @@ It's also possible for GenX perform clustering separately from the optimization
 |DemandWeight| Default = 1, a multiplier on demand columns to optionally prioritize better fits for demand profiles over resource capacity factor or fuel price profiles.|
 |WeightTotal |Default = 8760, the sum to which the relative weights of representative periods will be scaled.|
 |ClusterFuelPrices| Either 1 or 0, whether or not to use the fuel price time series in `Fuels_data.csv` in the clustering process. If 'no', this function will still write `Fuels_data.csv` in the TimeDomainReductionFolder with reshaped fuel prices based on the number and size of the representative periods but will not use the fuel price time series for selection of representative periods.|
+
+## Input File Hash Verification
+
+GenX automatically tracks changes to time-series input files and TDR settings to ensure that clustered data remains valid. When time-domain reduction is performed, GenX computes SHA256 hashes of the following files and stores them in `tdr_input_hashes.yml` within the TDR results folder:
+
+- `Demand_data.csv` (or `Load_data.csv`)
+- `Generators_variability.csv`
+- `Fuels_data.csv`
+- `time_domain_reduction_settings.yml`
+
+On subsequent runs, GenX checks these hashes against the current input files. If any file has changed, GenX will automatically re-run the time-domain reduction to ensure the clustered data reflects the updated inputs. This prevents users from accidentally using stale clustered data when input files or TDR settings have been modified.
+
+**Behavior:**
+- If the TDR results folder doesn't exist, GenX performs time-domain reduction
+- If the TDR results folder exists but the hash file is missing, GenX performs time-domain reduction
+- If the TDR results folder and hash file exist, GenX compares stored hashes with current input files:
+  - If hashes match, GenX uses the existing clustered data
+  - If any hash differs, GenX prints a message and re-runs time-domain reduction
+
+This feature is automatic and requires no user configuration. To force a re-run of time-domain reduction, simply delete the TDR results folder as before.
diff --git a/src/GenX.jl b/src/GenX.jl
@@ -38,6 +38,7 @@ using RecursiveArrayTools
 using Statistics
 using HiGHS
 using Logging
+using SHA
 
 using PrecompileTools: @compile_workload
 
@@ -72,6 +73,7 @@ include_all_in_folder("load_inputs")
 include_all_in_folder("model")
 include_all_in_folder("write_outputs")
 
+include("time_domain_reduction/tdr_hash_utils.jl")
 include("time_domain_reduction/time_domain_reduction.jl")
 include("time_domain_reduction/precluster.jl")
 include("time_domain_reduction/full_time_series_reconstruction.jl")

diff --git a/src/case_runners/case_runner.jl b/src/case_runners/case_runner.jl
@@ -41,11 +41,27 @@ function run_genx_case!(case::AbstractString, optimizer::Any = HiGHS.Optimizer)
     end
 end
 
-function time_domain_reduced_files_exist(tdrpath)
+function time_domain_reduced_files_exist(tdrpath, case_path=nothing, setup=nothing)
     tdr_demand = file_exists(tdrpath, ["Demand_data.csv", "Load_data.csv"])
     tdr_genvar = isfile(joinpath(tdrpath, "Generators_variability.csv"))
     tdr_fuels = isfile(joinpath(tdrpath, "Fuels_data.csv"))
-    return (tdr_demand && tdr_genvar && tdr_fuels)
+    files_exist = (tdr_demand && tdr_genvar && tdr_fuels)
+
+    # If files don't exist, return false
+    if !files_exist
+        return false
+    end
+
+    # If case_path and setup are provided, also check if input files have changed
+    if !isnothing(case_path) && !isnothing(setup)
+        inputs_changed = tdr_inputs_have_changed(case_path, tdrpath, setup)
+        if inputs_changed
+            println("TDR input files or settings have changed since last clustering.")
+            return false
+        end
+    end
+
+    return true
 end
 
 function run_genx_case_simple!(case::AbstractString, mysetup::Dict, optimizer::Any)
@@ -56,7 +72,7 @@ function run_genx_case_simple!(case::AbstractString, mysetup::Dict, optimizer::A
         TDRpath = joinpath(case, mysetup["TimeDomainReductionFolder"])
         system_path = joinpath(case, mysetup["SystemFolder"])
         prevent_doubled_timedomainreduction(system_path)
-        if !time_domain_reduced_files_exist(TDRpath)
+        if !time_domain_reduced_files_exist(TDRpath, case, mysetup)
             println("Clustering Time Series Data (Grouped)...")
             cluster_inputs(case, settings_path, mysetup)
         else
@@ -121,7 +137,7 @@ function run_genx_case_multistage!(case::AbstractString, mysetup::Dict, optimize
         TDRpath = joinpath(first_stage_path, mysetup["TimeDomainReductionFolder"])
         system_path = joinpath(first_stage_path, mysetup["SystemFolder"])
         prevent_doubled_timedomainreduction(system_path)
-        if !time_domain_reduced_files_exist(TDRpath)
+        if !time_domain_reduced_files_exist(TDRpath, case, mysetup)
             if (mysetup["MultiStage"] == 1) &&
                (TDRSettingsDict["MultiStageConcatenate"] == 0)
                 println("Clustering Time Series Data (Individually)...")