Skip to content

Commit 95b15d5

Browse files
authored
SOLR-18194: fix nested docs detection false positive (#4279)
Previously if a segment had updates to the same Solr document (delete + add within a commit interval) the UPDATECOREINDEX action would falsely identify it as having child documents. These are not supported by the action so it would unnecessarily fail. We improve the check to compare cardinality of id with _root_ to identify child documents.
1 parent 2093185 commit 95b15d5

3 files changed

Lines changed: 141 additions & 21 deletions

File tree

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
title: Fix nested docs detection false positive
2+
type: changed
3+
authors:
4+
- name: Luke Kot-Zaniewski
5+
links:
6+
- name: SOLR-18194
7+
url: https://issues.apache.org/jira/browse/SOLR-18194

solr/core/src/java/org/apache/solr/handler/admin/api/UpgradeCoreIndex.java

Lines changed: 33 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -149,11 +149,11 @@ private UpgradeCoreIndexResponse performUpgradeImpl(
149149

150150
RefCounted<SolrIndexSearcher> searcherRef = core.getSearcher();
151151
try {
152-
// Check for nested documents before processing - we don't support them
153-
if (indexContainsNestedDocs(searcherRef.get())) {
152+
// Check for child documents before processing - we don't support them
153+
if (indexContainsChildDocs(searcherRef.get())) {
154154
throw new SolrException(
155155
BAD_REQUEST,
156-
"UPGRADECOREINDEX does not support indexes containing nested documents. "
156+
"UPGRADECOREINDEX does not support indexes containing child/nested documents. "
157157
+ " Consider reindexing your data "
158158
+ "from the original source.");
159159
}
@@ -259,26 +259,44 @@ private boolean shouldUpgradeSegment(LeafReaderContext lrc) {
259259
return (segmentMinVersion == null || segmentMinVersion.major < Version.LATEST.major);
260260
}
261261

262-
private boolean indexContainsNestedDocs(SolrIndexSearcher searcher) throws IOException {
262+
private boolean indexContainsChildDocs(SolrIndexSearcher searcher) throws IOException {
263263
IndexSchema schema = searcher.getSchema();
264264

265-
// First check if schema supports nested docs
265+
// First check if schema supports child docs
266266
if (!schema.isUsableForChildDocs()) {
267267
return false;
268268
}
269269

270-
// Check if _root_ field has fewer unique values than documents with that field.
271-
// This indicates multiple docs share the same _root_ (i.e., child docs exist)
270+
String uniqueKeyFieldName = schema.getUniqueKeyField().getName();
271+
272+
// Compare unique _root_ values against unique id values per segment.
273+
// For non-child docs, every document's _root_ equals its own id, so the number of
274+
// distinct _root_ values equals the number of distinct id values. For child docs,
275+
// children share the parent's _root_ value, so there are fewer distinct _root_ values
276+
// than distinct id values.
277+
//
278+
// We intentionally compare against unique id values rather than Terms.getDocCount()
279+
// (the number of documents with the _root_ field) because segment-level term statistics
280+
// include deleted documents. Updates (delete + re-add of the same id) can leave multiple
281+
// documents with the same _root_ value within a segment, causing getDocCount() to exceed
282+
// the unique _root_ count even when no child docs exist.
272283
IndexReader reader = searcher.getIndexReader();
273284
for (LeafReaderContext leaf : reader.leaves()) {
274-
Terms terms = leaf.reader().terms(IndexSchema.ROOT_FIELD_NAME);
275-
if (terms != null) {
276-
long uniqueRootValues = terms.size();
277-
int docsWithRoot = terms.getDocCount();
278-
279-
if (uniqueRootValues == -1 || uniqueRootValues < docsWithRoot) {
280-
return true; // Codec doesn't store number of terms (so a safe fallback), or multiple docs
281-
// share same _root_ (aka nested docs exist)
285+
Terms rootTerms = leaf.reader().terms(IndexSchema.ROOT_FIELD_NAME);
286+
if (rootTerms != null) {
287+
long uniqueRootValues = rootTerms.size();
288+
if (uniqueRootValues == -1) {
289+
return true; // Codec doesn't report term count; assume child docs as a safe fallback
290+
}
291+
292+
Terms idTerms = leaf.reader().terms(uniqueKeyFieldName);
293+
long uniqueIdValues = (idTerms != null) ? idTerms.size() : -1;
294+
if (uniqueIdValues == -1) {
295+
return true; // Codec doesn't report term count; assume child docs as a safe fallback
296+
}
297+
298+
if (uniqueRootValues < uniqueIdValues) {
299+
return true; // Fewer distinct _root_ values than distinct ids means child docs exist
282300
}
283301
}
284302
}

solr/core/src/test/org/apache/solr/handler/admin/UpgradeCoreIndexActionTest.java

Lines changed: 101 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -323,11 +323,11 @@ private void setMinVersionForSegments(SolrCore core, Set<String> segments, Versi
323323
private record SegmentLayout(String coreName, String seg1, String seg2, String seg3) {}
324324

325325
@Test
326-
public void testUpgradeCoreIndexFailsWithNestedDocuments() throws Exception {
326+
public void testUpgradeCoreIndexFailsWithChildDocuments() throws Exception {
327327
final SolrCore core = h.getCore();
328328
final String coreName = core.getName();
329329

330-
// Create a parent document with a child document (nested doc)
330+
// Create a parent document with a child document
331331
SolrInputDocument parentDoc = new SolrInputDocument();
332332
parentDoc.addField("id", "100");
333333
parentDoc.addField("title", "Parent Document");
@@ -338,7 +338,7 @@ public void testUpgradeCoreIndexFailsWithNestedDocuments() throws Exception {
338338

339339
parentDoc.addChildDocument(childDoc);
340340

341-
// Index the nested document
341+
// Index the parent+child document
342342
try (SolrQueryRequestBase req = new SolrQueryRequestBase(core, new ModifiableSolrParams())) {
343343
AddUpdateCommand cmd = new AddUpdateCommand(req);
344344
cmd.solrDoc = parentDoc;
@@ -349,7 +349,7 @@ public void testUpgradeCoreIndexFailsWithNestedDocuments() throws Exception {
349349
// Verify documents were indexed (parent + child = 2 docs)
350350
assertQ(req("q", "*:*"), "//result[@numFound='2']");
351351

352-
// Attempt to upgrade the index - should fail because of nested documents
352+
// Attempt to upgrade the index - should fail because of child documents
353353
CoreAdminHandler admin = new CoreAdminHandler(h.getCoreContainer());
354354
try {
355355
final SolrQueryResponse resp = new SolrQueryResponse();
@@ -365,13 +365,108 @@ public void testUpgradeCoreIndexFailsWithNestedDocuments() throws Exception {
365365
coreName),
366366
resp));
367367

368-
// Verify the exception message indicates nested documents are not supported
368+
// Verify the exception message indicates child documents are not supported
369369
assertThat(
370370
thrown.getMessage(),
371-
containsString("does not support indexes containing nested documents"));
371+
containsString("does not support indexes containing child/nested documents"));
372372
} finally {
373373
admin.shutdown();
374374
admin.close();
375375
}
376376
}
377+
378+
@Test
379+
public void testChildDocsDetection_noChildDocs() throws Exception {
380+
addDocsWithRandomUpdatesAndDeletes();
381+
382+
final String coreName = h.getCore().getName();
383+
CoreAdminHandler admin = new CoreAdminHandler(h.getCoreContainer());
384+
try {
385+
final SolrQueryResponse resp = new SolrQueryResponse();
386+
admin.handleRequestBody(
387+
req(
388+
CoreAdminParams.ACTION,
389+
CoreAdminParams.CoreAdminAction.UPGRADECOREINDEX.toString(),
390+
CoreAdminParams.CORE,
391+
coreName),
392+
resp);
393+
assertNull("Unexpected exception: " + resp.getException(), resp.getException());
394+
} finally {
395+
admin.shutdown();
396+
admin.close();
397+
}
398+
}
399+
400+
@Test
401+
public void testChildDocsDetection_withChildDocs() throws Exception {
402+
addChildDoc("100", "101");
403+
addDocsWithRandomUpdatesAndDeletes();
404+
405+
final String coreName = h.getCore().getName();
406+
CoreAdminHandler admin = new CoreAdminHandler(h.getCoreContainer());
407+
try {
408+
final SolrQueryResponse resp = new SolrQueryResponse();
409+
SolrException thrown =
410+
assertThrows(
411+
SolrException.class,
412+
() ->
413+
admin.handleRequestBody(
414+
req(
415+
CoreAdminParams.ACTION,
416+
CoreAdminParams.CoreAdminAction.UPGRADECOREINDEX.toString(),
417+
CoreAdminParams.CORE,
418+
coreName),
419+
resp));
420+
assertThat(
421+
thrown.getMessage(),
422+
containsString("does not support indexes containing child/nested documents"));
423+
} finally {
424+
admin.shutdown();
425+
admin.close();
426+
}
427+
}
428+
429+
/**
430+
* Add non-child docs with a random number of within-commit updates and deletes. This exercises
431+
* the false-positive scenario for child doc detection: updates and deletes leave behind deleted
432+
* entries in the same segment, causing multiple docs to share the same {@code _root_} value.
433+
*
434+
* <p>With NoMergePolicy and a 100MB RAM buffer (from SolrIndexConfig defaults), no flush or merge
435+
* occurs mid-batch, guaranteeing co-location in a single segment.
436+
*/
437+
private void addDocsWithRandomUpdatesAndDeletes() {
438+
int numDocs = 10;
439+
for (int i = 0; i < numDocs; i++) {
440+
assertU(adoc("id", String.valueOf(i), "title", "doc" + i));
441+
}
442+
int numUpdates = random().nextInt(4);
443+
for (int i = 0; i < numUpdates; i++) {
444+
assertU(adoc("id", String.valueOf(i), "title", "updated_doc" + i));
445+
}
446+
int numDeletes = random().nextInt(4);
447+
for (int i = 0; i < numDeletes; i++) {
448+
assertU(delI(String.valueOf(numDocs - 1 - i)));
449+
}
450+
assertU(commit("openSearcher", "true"));
451+
}
452+
453+
/** Index a parent document with a single child via the update handler. */
454+
private void addChildDoc(String parentId, String childId) throws Exception {
455+
SolrCore core = h.getCore();
456+
SolrInputDocument parentDoc = new SolrInputDocument();
457+
parentDoc.addField("id", parentId);
458+
parentDoc.addField("title", "Parent " + parentId);
459+
460+
SolrInputDocument childDoc = new SolrInputDocument();
461+
childDoc.addField("id", childId);
462+
childDoc.addField("title", "Child " + childId);
463+
parentDoc.addChildDocument(childDoc);
464+
465+
try (SolrQueryRequestBase solrReq =
466+
new SolrQueryRequestBase(core, new ModifiableSolrParams())) {
467+
AddUpdateCommand cmd = new AddUpdateCommand(solrReq);
468+
cmd.solrDoc = parentDoc;
469+
core.getUpdateHandler().addDoc(cmd);
470+
}
471+
}
377472
}

0 commit comments

Comments
 (0)