Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/builtin.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#endif
#include "builtin.h"
#include "compile.h"
#include "jq.h"
#include "jq_parser.h"
#include "bytecode.h"
#include "linker.h"
Expand Down Expand Up @@ -2105,6 +2106,10 @@ int builtins_bind(jq_state *jq, block* bb) {
builtins = gen_cbinding(function_list, sizeof(function_list)/sizeof(function_list[0]), builtins);
builtins = gen_builtin_list(builtins);

// Collect all function names before block_bind_referenced discards unreferenced ones
jv all_funcs = block_list_funcs(builtins, 0);
jq_set_known_symbols(jq, all_funcs, jv_array());

*bb = block_bind_referenced(builtins, *bb, OP_IS_CALL_PSEUDO);
return nerrors;
}
164 changes: 161 additions & 3 deletions src/compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -1120,9 +1120,131 @@ make_env(jv env)
return jv_copy(r);
}

// Standard Levenshtein distance, capped at `max` to avoid O(n^2) work on
// very long strings. Returns max+1 when the distance exceeds the cap
static int levenshtein(const char *a, const char *b, int max) {
int la = (int)strlen(a);
int lb = (int)strlen(b);
// Quick bounds check: if lengths differ by more than max, bail early
if (abs(la - lb) > max) return max + 1;

// Use two alternating rows
int *row0 = jv_mem_alloc((lb + 1) * sizeof(int));
int *row1 = jv_mem_alloc((lb + 1) * sizeof(int));
for (int j = 0; j <= lb; j++) row0[j] = j;

for (int i = 1; i <= la; i++) {
row1[0] = i;
int row_min = row1[0];
for (int j = 1; j <= lb; j++) {
int cost = (a[i-1] == b[j-1]) ? 0 : 1;
int v = row0[j-1] + cost; // substitution
int del = row0[j] + 1; // deletion
int ins = row1[j-1] + 1; // insertion
if (del < v) v = del;
if (ins < v) v = ins;
row1[j] = v;
if (v < row_min) row_min = v;
}
if (row_min > max) {
jv_mem_free(row0);
jv_mem_free(row1);
return max + 1;
}
// swap rows
int *tmp = row0; row0 = row1; row1 = tmp;
}
int result = row0[lb];
jv_mem_free(row0);
jv_mem_free(row1);
return result;
}

// Collect bound variable names (op==LOADV/STOREV with bound_by set) and
// function binder names (op==CLOSURE_CREATE / CLOSURE_CREATE_C with
// bound_by==self) recursively through subfn and arglist.
// We pass two separate jv arrays: one for variables, one for functions
// (with their arities encoded as "name/arity").
static void collect_bound_symbols_inner(inst *i,
jv *vars, // "$name" entries
jv *funcs) { // "name/arity" entries
if (!i) return;
for (; i; i = i->next) {
int flags = opcode_describe(i->op)->flags;
if ((flags & OP_HAS_BINDING) && i->bound_by == i && i->symbol) {
// Skip internal break labels
if (i->symbol[0] == '*') goto recurse;

if (i->op == STOREV) {
// Variable binder
char *vname = jv_mem_alloc(1 + strlen(i->symbol) + 1);
vname[0] = '$';
strcpy(vname + 1, i->symbol);
// Avoid duplicates (cheap check)
int dup = 0;
for (int k = 0; k < jv_array_length(jv_copy(*vars)); k++) {
jv elem = jv_array_get(jv_copy(*vars), k);
if (strcmp(jv_string_value(elem), vname) == 0) { jv_free(elem); dup = 1; break; }
jv_free(elem);
}
if (!dup) *vars = jv_array_append(*vars, jv_string(vname));
jv_mem_free(vname);
} else if (i->op == CLOSURE_CREATE || i->op == CLOSURE_CREATE_C) {
// Function binder: count formals
int nformals = i->nformals;
if (nformals < 0) nformals = 0;
jv fname = jv_string_fmt("%s/%d", i->symbol, nformals);
int dup = 0;
for (int k = 0; k < jv_array_length(jv_copy(*funcs)); k++) {
jv elem = jv_array_get(jv_copy(*funcs), k);
if (strcmp(jv_string_value(elem), jv_string_value(fname)) == 0) { jv_free(elem); dup = 1; break; }
jv_free(elem);
}
if (!dup) *funcs = jv_array_append(*funcs, jv_copy(fname));
jv_free(fname);
}
}
recurse:
collect_bound_symbols_inner(i->subfn.first, vars, funcs);
collect_bound_symbols_inner(i->arglist.first, vars, funcs);
}
}

// Collect all bound symbols reachable from a block.
static void collect_bound_symbols(block b, jv *vars, jv *funcs) {
*vars = jv_array();
*funcs = jv_array();
collect_bound_symbols_inner(b.first, vars, funcs);
}

// Return a jv_string with the best suggestion for `needle` from `candidates`
// (array of jv strings), or jv_invalid() if nothing is close enough.
// `threshold` is the max edit distance we'll accept.
static jv best_suggestion(const char *needle, jv candidates, int threshold) {
int best_dist = threshold + 1;
jv best = jv_invalid();
int n = jv_array_length(jv_copy(candidates));
for (int i = 0; i < n; i++) {
jv cand = jv_array_get(jv_copy(candidates), i);
int d = levenshtein(needle, jv_string_value(cand), threshold);
if (d < best_dist) {
best_dist = d;
jv_free(best);
best = jv_copy(cand);
}
jv_free(cand);
}
return best;
}

// Expands call instructions into a calling sequence
static int expand_call_arglist(block* b, jv args, jv *env) {
int errors = 0;

// Pre-pass: collect all bound symbols so we can suggest similar names
jv bound_vars, bound_funcs;
collect_bound_symbols(*b, &bound_vars, &bound_funcs);

block ret = gen_noop();
for (inst* curr; (curr = block_take(b));) {
if (opcode_describe(curr->op)->flags & OP_HAS_BINDING) {
Expand All @@ -1133,12 +1255,46 @@ static int expand_call_arglist(block* b, jv args, jv *env) {
curr->op = LOADK;
curr->imm.constant = jv_object_get(jv_copy(args), jv_string(curr->symbol));
} else if (!curr->bound_by) {
if (curr->symbol[0] == '*' && curr->symbol[1] >= '1' && curr->symbol[1] <= '3' && curr->symbol[2] == '\0')
if (curr->symbol[0] == '*' && curr->symbol[1] >= '1' && curr->symbol[1] <= '3' && curr->symbol[2] == '\0') {
locfile_locate(curr->locfile, curr->source, "jq: error: break used outside labeled control structure");
else if (curr->op == LOADV)
} else if (curr->op == LOADV) {
locfile_locate(curr->locfile, curr->source, "jq: error: $%s is not defined", curr->symbol);
else
// Suggest a similar variable name
char *needle = jv_mem_alloc(1 + strlen(curr->symbol) + 1);
needle[0] = '$';
strcpy(needle + 1, curr->symbol);
// Merge locally bound vars with globally known vars
jv all_vars = jq_get_known_vars(curr->locfile->jq);
int nlocal = jv_array_length(jv_copy(bound_vars));
for (int k = 0; k < nlocal; k++)
all_vars = jv_array_append(all_vars, jv_array_get(jv_copy(bound_vars), k));
jv suggestion = best_suggestion(needle, all_vars, 3);
jv_mem_free(needle);
jv_free(all_vars);
if (jv_is_valid(suggestion)) {
jq_report_error(curr->locfile->jq,
jv_string_fmt("jq: Did you mean: %s?", jv_string_value(suggestion)));
jv_free(suggestion);
}
} else {
locfile_locate(curr->locfile, curr->source, "jq: error: %s/%d is not defined", curr->symbol, curr->nactuals);
// Suggest a similar function name
char *needle = jv_mem_alloc(strlen(curr->symbol) + 1 + 20 + 1);
sprintf(needle, "%s/%d", curr->symbol, curr->nactuals);
// Merge locally bound funcs with globally known builtins
jv all_funcs = jq_get_known_funcs(curr->locfile->jq);
int nlocal = jv_array_length(jv_copy(bound_funcs));
for (int k = 0; k < nlocal; k++)
all_funcs = jv_array_append(all_funcs, jv_array_get(jv_copy(bound_funcs), k));
jv suggestion = best_suggestion(needle, all_funcs, 3);
jv_mem_free(needle);
jv_free(all_funcs);
if (jv_is_valid(suggestion)) {
jq_report_error(curr->locfile->jq,
jv_string_fmt("jq: Did you mean: %s?", jv_string_value(suggestion)));
jv_free(suggestion);
}
}
errors++;
// don't process this instruction if it's not well-defined
ret = BLOCK(ret, inst_block(curr));
Expand Down Expand Up @@ -1208,6 +1364,8 @@ static int expand_call_arglist(block* b, jv args, jv *env) {
ret = BLOCK(ret, prelude, inst_block(curr));
}
*b = ret;
jv_free(bound_vars);
jv_free(bound_funcs);
return errors;
}

Expand Down
25 changes: 25 additions & 0 deletions src/execute.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ struct jq_state {
void *debug_cb_data;
jq_msg_cb stderr_cb;
void *stderr_cb_data;

// All known function names ("name/arity") and variable names ("$name"),
// collected before block_bind_referenced discards unreferenced builtins
jv known_funcs; // jv_array of "name/arity" strings
jv known_vars; // jv_array of "$name" strings
};

struct closure {
Expand Down Expand Up @@ -1086,6 +1091,9 @@ jq_state *jq_init(void) {
jq->path = jv_null();
jq->value_at_path = jv_null();

jq->known_funcs = jv_array();
jq->known_vars = jv_array();

jq->nomem_handler = NULL;
jq->nomem_handler_data = NULL;
return jq;
Expand Down Expand Up @@ -1138,6 +1146,8 @@ void jq_teardown(jq_state **jq) {
bytecode_free(old_jq->bc);
old_jq->bc = 0;
jv_free(old_jq->attrs);
jv_free(old_jq->known_funcs);
jv_free(old_jq->known_vars);

jv_mem_free(old_jq);
}
Expand Down Expand Up @@ -1288,6 +1298,21 @@ jv jq_get_attr(jq_state *jq, jv attr) {
return jv_object_get(jv_copy(jq->attrs), attr);
}

void jq_set_known_symbols(jq_state *jq, jv funcs, jv vars) {
jv_free(jq->known_funcs);
jv_free(jq->known_vars);
jq->known_funcs = funcs;
jq->known_vars = vars;
}

jv jq_get_known_funcs(jq_state *jq) {
return jv_copy(jq->known_funcs);
}

jv jq_get_known_vars(jq_state *jq) {
return jv_copy(jq->known_vars);
}

void jq_dump_disassembly(jq_state *jq, int indent) {
dump_disassembly(indent, jq->bc);
}
Expand Down
4 changes: 4 additions & 0 deletions src/jq.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ jv jq_get_lib_dirs(jq_state *);
void jq_set_attr(jq_state *, jv, jv);
jv jq_get_attr(jq_state *, jv);

void jq_set_known_symbols(jq_state *, jv, jv);
jv jq_get_known_funcs(jq_state *);
jv jq_get_known_vars(jq_state *);

/*
* We use char * instead of jf for filenames here because filenames
* should be in the process' locale's codeset, which may not be UTF-8,
Expand Down
15 changes: 15 additions & 0 deletions tests/jq.test
Original file line number Diff line number Diff line change
Expand Up @@ -2603,6 +2603,21 @@ try (reduce range(10001) as $_ ([]; [.]) as $x | $x | contains($x)) catch .
null
"Containment check too deep"

# Tests for issue #3268 for "Did you mean?" hints for undefined symbols
# The %%FAIL tests verify the primary error message is unchanged
# Hint lines ("jq: Did you mean: ...") are tested in shtest

%%FAIL
to_string
jq: error: to_string/0 is not defined at <top-level>, line 1, column 1:
to_string
^^^^^^^^^

%%FAIL
1 as $X | $x
jq: error: $x is not defined at <top-level>, line 1, column 11:
1 as $X | $x
^^
# regression test for CVE-2026-43896
reduce range(10000) as $_ ({}; {a: .}) as $x | $x * $x | length
null
Expand Down
30 changes: 30 additions & 0 deletions tests/shtest
Original file line number Diff line number Diff line change
Expand Up @@ -893,4 +893,34 @@ if echo '42' | $JQ -f "$d/nul_prog.jq" >/dev/null 2>/dev/null; then
exit 1
fi

# Tests for issue #3268: "Did you mean?" hints for undefined symbols

# Function typo: to_string -> tostring
cat > $d/expected <<'EOF'
jq: error: to_string/0 is not defined at <top-level>, line 1, column 1:
to_string
^^^^^^^^^
jq: Did you mean: tostring/0?
jq: 1 compile error
EOF
$JQ -n 'to_string' > /dev/null 2> $d/out && {
echo "Expected compile error for 'to_string'"
exit 1
}
diff $d/out $d/expected

# Variable typo: $x -> $X (case mismatch)
cat > $d/expected <<'EOF'
jq: error: $x is not defined at <top-level>, line 1, column 11:
1 as $X | $x
^^
jq: Did you mean: $X?
jq: 1 compile error
EOF
$JQ -n '1 as $X | $x' > /dev/null 2> $d/out && {
echo "Expected compile error for '\$x'"
exit 1
}
diff $d/out $d/expected

exit 0