Skip to content

Commit c3288d0

Browse files
committed
[Change] Scan progress: logarithmic backoff + watchdog semantics for non-native engines
[Change] Event log progress lines use logarithmic backoff: 60s for first 5m, then 5m/15m/30m intervals as elapsed time grows — reduces log spam on long-running scans from ~120 lines/2h to ~13 [Change] ClamAV/YARA background timer reframed as watchdog liveness check: {watchdog} prefix replaces {scan}, human-readable elapsed format (Xh Ym) [Change] -L/--list-active ETA column: shows n/a (text), null (JSON), -1 (TSV) for clamav/clamdscan/yara engines where per-file progress is structurally unavailable
1 parent a38f134 commit c3288d0

2 files changed

Lines changed: 80 additions & 30 deletions

File tree

files/internals/lmd_lifecycle.sh

Lines changed: 33 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -324,15 +324,22 @@ _lifecycle_render_text_active() {
324324
fi
325325
_elapsed_str=$(_lifecycle_format_elapsed "$_elapsed_secs")
326326

327-
# ETA: (elapsed * total / processed) - elapsed; requires active progress
327+
# ETA: (elapsed * total / processed) - elapsed; requires per-file progress.
328+
# ClamAV and YARA run as opaque single-process engines — per-file progress
329+
# is structurally unavailable, so ETA is always n/a for those engines.
328330
local _eta_str="-"
329331
local _pos="${_meta_progress_pos:-0}" _tot="${_meta_progress_total:-0}"
330332
[ "$_pos" = "-" ] && _pos=0
331333
[ "$_tot" = "-" ] && _tot=0
332-
if [ "$_pos" -gt 0 ] && [ "$_tot" -gt 0 ] && [ "$_elapsed_secs" -gt 0 ]; then
333-
local _eta_secs=$(( (_elapsed_secs * _tot / _pos) - _elapsed_secs ))
334-
_eta_str="~$(_lifecycle_format_elapsed "$_eta_secs")"
335-
fi
334+
case "${_meta_engine:-}" in
335+
clamav|clamdscan|yara) _eta_str="n/a" ;;
336+
*)
337+
if [ "$_pos" -gt 0 ] && [ "$_tot" -gt 0 ] && [ "$_elapsed_secs" -gt 0 ]; then
338+
local _eta_secs=$(( (_elapsed_secs * _tot / _pos) - _elapsed_secs ))
339+
_eta_str="~$(_lifecycle_format_elapsed "$_eta_secs")"
340+
fi
341+
;;
342+
esac
336343

337344
if [ "$_verbose" = "1" ]; then
338345
local _progress_str="-"
@@ -418,12 +425,18 @@ _lifecycle_render_json_active() {
418425
printf ' "total_files": %s,\n' "$_i_total"
419426
printf ' "hits": %s,\n' "$_i_hits"
420427
printf ' "elapsed": %s,\n' "$_i_elapsed"
421-
# ETA: (elapsed * total / processed) - elapsed
422-
local _i_eta=0
423-
if [ "$_i_prog_pos" -gt 0 ] && [ "$_i_prog_total" -gt 0 ] && [ "$_i_elapsed" -gt 0 ]; then
424-
_i_eta=$(( (_i_elapsed * _i_prog_total / _i_prog_pos) - _i_elapsed ))
425-
fi
426-
printf ' "eta": %s,\n' "$_i_eta"
428+
# ETA: null for engines without per-file progress (clamav, yara);
429+
# computed for native engine when progress data is available
430+
local _eta_val="0"
431+
case "${_meta_engine:-}" in
432+
clamav|clamdscan|yara) _eta_val="null" ;;
433+
*)
434+
if [ "$_i_prog_pos" -gt 0 ] && [ "$_i_prog_total" -gt 0 ] && [ "$_i_elapsed" -gt 0 ]; then
435+
_eta_val=$(( (_i_elapsed * _i_prog_total / _i_prog_pos) - _i_elapsed ))
436+
fi
437+
;;
438+
esac
439+
printf ' "eta": %s,\n' "$_eta_val"
427440
printf ' "workers": %s,\n' "$_i_workers"
428441
printf ' "stages": "%s",\n' "$_j_stages"
429442
printf ' "sig_version": "%s",\n' "$_j_sig"
@@ -460,14 +473,19 @@ _lifecycle_render_tsv_active() {
460473
if [ "$_tsv_elapsed" = "0" ] && [ -n "$_meta_started" ] && [ "$_meta_started" != "0" ]; then
461474
_tsv_elapsed="$(( $(command date +%s) - _meta_started ))"
462475
fi
463-
# ETA
476+
# ETA: -1 for engines without per-file progress (clamav, clamdscan, yara)
464477
local _tsv_eta=0
465478
local _tp="${_meta_progress_pos:-0}" _tt="${_meta_progress_total:-0}"
466479
[ "$_tp" = "-" ] && _tp=0
467480
[ "$_tt" = "-" ] && _tt=0
468-
if [ "$_tp" -gt 0 ] && [ "$_tt" -gt 0 ] && [ "$_tsv_elapsed" -gt 0 ]; then
469-
_tsv_eta=$(( (_tsv_elapsed * _tt / _tp) - _tsv_elapsed ))
470-
fi
481+
case "${_meta_engine:-}" in
482+
clamav|clamdscan|yara) _tsv_eta=-1 ;;
483+
*)
484+
if [ "$_tp" -gt 0 ] && [ "$_tt" -gt 0 ] && [ "$_tsv_elapsed" -gt 0 ]; then
485+
_tsv_eta=$(( (_tsv_elapsed * _tt / _tp) - _tsv_elapsed ))
486+
fi
487+
;;
488+
esac
471489
printf '%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \
472490
"$_scanid" "$_state" "${_meta_pid:--}" "${_meta_path:--}" \
473491
"${_meta_engine:--}" "${_meta_total_files:--}" "${_meta_hits:-0}" \

files/internals/lmd_scan.sh

Lines changed: 47 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,23 @@ _bg_progress_interval="${scan_progress_log_interval:-60}"
231231
[[ "$_bg_progress_interval" =~ ^[0-9]+$ ]] || _bg_progress_interval=60 # sanitize non-numeric to default
232232
_bg_progress_last=0
233233

234+
# _bg_backoff_interval elapsed base_interval
235+
# Logarithmic backoff for event_log progress lines. Returns the effective
236+
# interval via stdout. Early progress stays frequent; long scans taper off
237+
# to avoid log spam with low-value repeated lines.
238+
_bg_backoff_interval() {
239+
local _elapsed="$1" _base="$2"
240+
if [ "$_elapsed" -ge 3600 ]; then
241+
echo 1800 # 60m+: every 30 minutes
242+
elif [ "$_elapsed" -ge 900 ]; then
243+
echo 900 # 15-60m: every 15 minutes
244+
elif [ "$_elapsed" -ge 300 ]; then
245+
echo 300 # 5-15m: every 5 minutes
246+
else
247+
echo "$_base" # 0-5m: configured interval
248+
fi
249+
}
250+
234251
_meta_progress_interval=10 # seconds between meta progress updates
235252
_meta_progress_last=0
236253

@@ -255,15 +272,16 @@ _scan_progress_meta() {
255272

256273
_scan_progress_log() {
257274
# Log a progress checkpoint to event_log during background scans.
258-
# Called from worker poll loops; rate-limited to _bg_progress_interval.
275+
# Called from worker poll loops; rate-limited with logarithmic backoff.
259276
# Args: stage processed file_count elapsed
260277
[ "$set_background" != "1" ] && return
261278
[ "$_in_scan_context" != "1" ] && return
262279
[ "$_bg_progress_interval" -le 0 ] 2>/dev/null && return # safe: sanitized at init; suppresses edge-case comparison error
263280
local _stage="$1" _processed="$2" _file_count="$3" _elapsed="$4"
264-
local _now _pct _eta _eta_str
281+
local _now _pct _eta _eta_str _effective_interval
265282
_now=$SECONDS
266-
if [ $((_now - _bg_progress_last)) -lt "$_bg_progress_interval" ]; then
283+
_effective_interval=$(_bg_backoff_interval "$_elapsed" "$_bg_progress_interval")
284+
if [ $((_now - _bg_progress_last)) -lt "$_effective_interval" ]; then
267285
return
268286
fi
269287
_bg_progress_last=$_now
@@ -350,28 +368,42 @@ _wait_workers_with_progress() {
350368
}
351369

352370
_start_elapsed_timer() {
353-
# Start a background elapsed-time progress ticker for single-process stages.
371+
# Start a background elapsed-time progress ticker for single-process stages
372+
# (ClamAV, YARA) where per-file progress is unavailable.
354373
# Usage: _start_elapsed_timer <stage> <file_count>
355374
# Sets _timer_pid to the background process PID.
356375
# Caller must call _stop_elapsed_timer when the stage completes.
357-
# In foreground: console progress every 2s. In background: log checkpoint only.
376+
# In foreground: console progress every 2s (unchanged).
377+
# In background: watchdog log with logarithmic backoff — subshell cannot
378+
# see parent's progress vars, so these are liveness checks, not progress.
358379
_timer_pid=""
359-
local _stage="$1" _file_count="$2" _start_ts _poll _paused_accum=0
380+
local _stage="$1" _file_count="$2" _start_ts _base_poll _paused_accum=0
360381
_start_ts=$SECONDS
361382
if [ "$set_background" == "1" ]; then
362-
_poll="${_bg_progress_interval:-60}"
363-
[[ "$_poll" =~ ^[0-9]+$ ]] || _poll=60 # sanitize non-numeric to default
364-
[ "$_poll" -le 0 ] && return
383+
_base_poll="${_bg_progress_interval:-60}"
384+
[[ "$_base_poll" =~ ^[0-9]+$ ]] || _base_poll=60 # sanitize non-numeric to default
385+
[ "$_base_poll" -le 0 ] && return
386+
local _elapsed _sleep_dur _eh _em _es _efmt
365387
while true; do
366-
sleep "$_poll"
367-
# Suppress progress while scan is paused
388+
_elapsed=$(( SECONDS - _start_ts - _paused_accum ))
389+
_sleep_dur=$(_bg_backoff_interval "$_elapsed" "$_base_poll")
390+
sleep "$_sleep_dur"
391+
# Suppress while scan is paused
368392
if [ -n "${scanid:-}" ] && [ -f "$tmpdir/.pause.$scanid" ]; then
369-
_paused_accum=$((_paused_accum + _poll))
393+
_paused_accum=$((_paused_accum + _sleep_dur))
370394
continue
371395
fi
372-
# Note: subshell cannot see parent's progress_hits/progress_cleaned updates;
373-
# report elapsed time only — accurate hit counts come from _scan_progress_log
374-
eout "{scan} [$_stage] $_file_count files | elapsed $(( SECONDS - _start_ts - _paused_accum ))s"
396+
_elapsed=$(( SECONDS - _start_ts - _paused_accum ))
397+
# Format elapsed as human-readable
398+
_eh=$((_elapsed / 3600)) _em=$(((_elapsed % 3600) / 60)) _es=$((_elapsed % 60))
399+
if [ "$_eh" -gt 0 ]; then
400+
_efmt="${_eh}h ${_em}m"
401+
elif [ "$_em" -gt 0 ]; then
402+
_efmt="${_em}m ${_es}s"
403+
else
404+
_efmt="${_es}s"
405+
fi
406+
eout "{watchdog} [$_stage] $_file_count files | running ${_efmt}"
375407
done &
376408
_timer_pid=$!
377409
else

0 commit comments

Comments
 (0)