add local benchmarks file (#2)

vkuzo · web-flow · commit 234cf7014ecc · 2026-03-14T08:15:56.000+05:30
Summary: same as `run_all_benchmarks.sh`, but with `reduce-overhead` and for a local machine Test Plan: ``` // full run // note: this run used a local torchao build with pytorch/ao#4031 time HF_HUB_DISABLE_PROGRESS_BARS=1 ./run_all_benchmarks_local.sh 2>&1 | tee ~/tmp/20260313_diffusers_full_sweep_logs_mslk.tx // output: https://gist.github.com/vkuzo/40ee0268a590e270900a2538055b13f0 ```
diff --git a/run_all_benchmarks_local.sh b/run_all_benchmarks_local.sh
@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+# Run MX-format quantization benchmarks locally across all models, batch sizes, and quant modes.
+# Compilation is always enabled with reduce-overhead mode.
+#
+# Usage:
+#   bash run_all_benchmarks_local.sh
+
+set -euo pipefail
+
+MODELS=(
+    "black-forest-labs/FLUX.1-dev"
+    "Qwen/Qwen-Image"
+    "Lightricks/LTX-2"
+)
+
+BATCH_SIZES=(1 4 8)
+
+run() {
+    local model_id="$1"
+    local batch_size="$2"
+    local quant_mode="$3"   # "none", "nvfp4", or "fp8"
+
+    echo ""
+    echo "================================================================"
+    echo "  model     : ${model_id}"
+    echo "  batch_size: ${batch_size}"
+    echo "  quant_mode: ${quant_mode}"
+    echo "  compile   : enabled (reduce-overhead)"
+    echo "================================================================"
+
+    time python benchmark.py \
+        --model_id "${model_id}" \
+        --batch_size "${batch_size}" \
+        --enable_compilation \
+        --quant_mode "${quant_mode}" \
+        --torch_compile_mode reduce-overhead
+}
+
+for model in "${MODELS[@]}"; do
+    for bs in "${BATCH_SIZES[@]}"; do
+        for quant in "none" "nvfp4" "fp8"; do
+            run "${model}" "${bs}" "${quant}"
+        done
+    done
+done
+
+echo ""
+echo "All benchmarks complete."