Skip to content

Commit 3c8c7dd

Browse files
authored
Refactor list merge/dup index: replace hash probes & setdefault with defaultdict (#51)
1 parent ac1aaf0 commit 3c8c7dd

1 file changed

Lines changed: 9 additions & 15 deletions

File tree

nac_yaml/yaml.py

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import logging
66
import os
77
import subprocess # nosec B404
8+
from collections import defaultdict
89
from pathlib import Path
910
from typing import Any
1011

@@ -292,15 +293,13 @@ def _has_duplicates_in_list(items: list[Any]) -> bool:
292293
return False
293294

294295
# Build inverted index: (key, value) -> [indices]
295-
index: dict[tuple[str, Any], list[int]] = {}
296+
index: dict[tuple[str, Any], list[int]] = defaultdict(list)
296297
for i, prims in enumerate(primitives_list):
297298
for k, v in prims.items():
298299
try:
299-
kv = (k, v)
300-
hash(kv)
300+
index[(k, v)].append(i)
301301
except TypeError:
302302
continue
303-
index.setdefault(kv, []).append(i)
304303

305304
# Check candidate pairs from buckets with 2+ entries
306305
checked: set[tuple[int, int]] = set()
@@ -344,17 +343,15 @@ def _merge_list_items_indexed(
344343
else:
345344
dest_primitives.append(None)
346345

347-
index: dict[tuple[str, Any], list[int]] = {}
346+
index: dict[tuple[str, Any], list[int]] = defaultdict(list)
348347
for i, prims in enumerate(dest_primitives):
349348
if prims is None:
350349
continue
351350
for k, v in prims.items():
352351
try:
353-
pair = (k, v)
354-
hash(pair)
352+
index[(k, v)].append(i)
355353
except TypeError:
356354
continue
357-
index.setdefault(pair, []).append(i)
358355

359356
for source_item in source_items:
360357
if not isinstance(source_item, dict):
@@ -370,12 +367,11 @@ def _merge_list_items_indexed(
370367
candidate_set: set[int] = set()
371368
for k, v in src_prims.items():
372369
try:
373-
pair = (k, v)
374-
hash(pair)
370+
bucket = index.get((k, v))
371+
if bucket:
372+
candidate_set.update(bucket)
375373
except TypeError:
376374
continue
377-
if pair in index:
378-
candidate_set.update(index[pair])
379375

380376
# Check candidates in destination order (first-match semantics)
381377
matched = False
@@ -400,11 +396,9 @@ def _merge_list_items_indexed(
400396
dest_primitives.append(src_prims)
401397
for k, v in src_prims.items():
402398
try:
403-
pair = (k, v)
404-
hash(pair)
399+
index[(k, v)].append(new_idx)
405400
except TypeError:
406401
continue
407-
index.setdefault(pair, []).append(new_idx)
408402

409403

410404
def merge_list_item(

0 commit comments

Comments
 (0)