-
Notifications
You must be signed in to change notification settings - Fork 730
feat: add top-level kNN search via Query\Knn and Query::setKnn() #2314
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: 9.x
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,55 @@ | ||
| <?php | ||
|
|
||
| declare(strict_types=1); | ||
|
|
||
| namespace Elastica\Query; | ||
|
|
||
| use Elastica\Param; | ||
|
|
||
| /** | ||
| * Top-level kNN search. | ||
| * | ||
| * Note: `knn` is a sibling of `query` in the search request body, not a clause inside it. | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That is the part I stumble over. Should we make it part of Query or make it it's own top level thing extending Parms instead?
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You're right, knn is a sibling of query in the search body. Nothing today prevents a user from passing a Knn to Query::setQuery() or inside a BoolQuery, where it would silently produce an invalid request. I'll switch to extending Param indeed. Pushing the change shortly ! |
||
| * Attach it via {@see \Elastica\Query::setKnn()} rather than putting it inside a BoolQuery. | ||
| * | ||
| * @see https://www.elastic.co/docs/solutions/search/vector/knn | ||
| */ | ||
| class Knn extends Param | ||
| { | ||
| /** | ||
| * @param float[] $queryVector | ||
| */ | ||
| public function __construct(string $field, array $queryVector, int $k, int $numCandidates) | ||
| { | ||
| $this->setParam('field', $field); | ||
| $this->setParam('query_vector', $queryVector); | ||
| $this->setParam('k', $k); | ||
| $this->setParam('num_candidates', $numCandidates); | ||
| } | ||
|
|
||
| /** | ||
| * Adds a Query DSL filter applied before the kNN search. | ||
| * | ||
| * Filters are ANDed together by Elasticsearch. | ||
| */ | ||
| public function addFilter(AbstractQuery $filter): self | ||
| { | ||
| return $this->addParam('filter', $filter); | ||
| } | ||
|
|
||
| /** | ||
| * Sets the minimum similarity required for a document to be considered a match. | ||
| */ | ||
| public function setSimilarity(float $similarity): self | ||
| { | ||
| return $this->setParam('similarity', $similarity); | ||
| } | ||
|
|
||
| /** | ||
| * Boost applied to the kNN score before it is combined with other clauses. | ||
| */ | ||
| public function setBoost(float $boost): self | ||
| { | ||
| return $this->setParam('boost', $boost); | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,134 @@ | ||
| <?php | ||
|
|
||
| declare(strict_types=1); | ||
|
|
||
| namespace Elastica\Test\Query; | ||
|
|
||
| use Elastica\Document; | ||
| use Elastica\Mapping; | ||
| use Elastica\Query; | ||
| use Elastica\Query\Knn; | ||
| use Elastica\Query\Range; | ||
| use Elastica\Query\Terms; | ||
| use Elastica\Test\Base as BaseTest; | ||
| use PHPUnit\Framework\Attributes\Group; | ||
|
|
||
| /** | ||
| * @internal | ||
| */ | ||
| class KnnTest extends BaseTest | ||
| { | ||
| #[Group('unit')] | ||
| public function testToArray(): void | ||
| { | ||
| $knn = new Knn('vector', [0.1, 0.2, 0.3], 100, 200); | ||
|
|
||
| $expected = [ | ||
| 'knn' => [ | ||
| 'field' => 'vector', | ||
| 'query_vector' => [0.1, 0.2, 0.3], | ||
| 'k' => 100, | ||
| 'num_candidates' => 200, | ||
| ], | ||
| ]; | ||
|
|
||
| $this->assertSame($expected, $knn->toArray()); | ||
| } | ||
|
|
||
| #[Group('unit')] | ||
| public function testToArrayWithFiltersSimilarityAndBoost(): void | ||
| { | ||
| $knn = new Knn('vector', [0.5, 0.5], 10, 20); | ||
| $knn->addFilter(new Terms('tag', ['foo'])); | ||
| $knn->addFilter(new Range('age', ['gte' => 20])); | ||
| $knn->setSimilarity(0.7); | ||
| $knn->setBoost(1.5); | ||
|
|
||
| $expected = [ | ||
| 'knn' => [ | ||
| 'field' => 'vector', | ||
| 'query_vector' => [0.5, 0.5], | ||
| 'k' => 10, | ||
| 'num_candidates' => 20, | ||
| 'filter' => [ | ||
| ['terms' => ['tag' => ['foo']]], | ||
| ['range' => ['age' => ['gte' => 20]]], | ||
| ], | ||
| 'similarity' => 0.7, | ||
| 'boost' => 1.5, | ||
| ], | ||
| ]; | ||
|
|
||
| $this->assertSame($expected, $knn->toArray()); | ||
| } | ||
|
|
||
| #[Group('unit')] | ||
| public function testQuerySetKnnEmbedsSingleKnnAtTopLevel(): void | ||
| { | ||
| $query = new Query(); | ||
| $query->setKnn(new Knn('vector', [0.1, 0.2], 5, 10)); | ||
|
|
||
| $body = $query->toArray(); | ||
|
|
||
| $this->assertSame([ | ||
| 'field' => 'vector', | ||
| 'query_vector' => [0.1, 0.2], | ||
| 'k' => 5, | ||
| 'num_candidates' => 10, | ||
| ], $body['knn']); | ||
| $this->assertArrayNotHasKey('query', $body, 'knn-only requests must not be auto-padded with a match_all query'); | ||
| } | ||
|
|
||
| #[Group('unit')] | ||
| public function testQuerySetKnnAcceptsListOfKnnForMultipleKnnSearches(): void | ||
| { | ||
| $query = new Query(); | ||
| $query->setKnn([ | ||
| new Knn('a.vector', [0.1], 5, 10), | ||
| new Knn('b.vector', [0.2], 5, 10), | ||
| ]); | ||
|
|
||
| $body = $query->toArray(); | ||
|
|
||
| $this->assertCount(2, $body['knn']); | ||
| $this->assertSame('a.vector', $body['knn'][0]['field']); | ||
| $this->assertSame('b.vector', $body['knn'][1]['field']); | ||
| $this->assertArrayNotHasKey('query', $body, 'multi-knn requests must not be auto-padded with a match_all query'); | ||
| } | ||
|
|
||
| #[Group('functional')] | ||
| public function testKnnSearchAgainstDenseVectorField(): void | ||
| { | ||
| $index = $this->_createIndex(); | ||
| $index->setMapping(new Mapping([ | ||
| 'tag' => ['type' => 'keyword'], | ||
| 'vector' => [ | ||
| 'type' => 'dense_vector', | ||
| 'dims' => 3, | ||
| 'index' => true, | ||
| 'similarity' => 'cosine', | ||
| ], | ||
| ])); | ||
|
|
||
| $index->addDocuments([ | ||
| new Document('1', ['tag' => 'foo', 'vector' => [1.0, 0.0, 0.0]]), | ||
| new Document('2', ['tag' => 'foo', 'vector' => [0.9, 0.1, 0.0]]), | ||
| new Document('3', ['tag' => 'bar', 'vector' => [0.0, 0.0, 1.0]]), | ||
| ]); | ||
| $index->refresh(); | ||
|
|
||
| $knn = new Knn('vector', [1.0, 0.0, 0.0], 2, 10); | ||
| $knn->addFilter(new Terms('tag', ['foo'])); | ||
|
|
||
| $query = new Query(); | ||
| $query->setKnn($knn); | ||
|
|
||
| $results = $index->search($query); | ||
|
|
||
| $ids = \array_map(static fn ($r): string => $r->getId(), $results->getResults()); | ||
|
|
||
| $this->assertContains('1', $ids); | ||
| $this->assertContains('2', $ids); | ||
| $this->assertNotContains('3', $ids, 'tag filter must exclude documents with another tag value'); | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Validate
setKnn()array input before mapping.Line 513 accepts any
array, but the mapper assumeslist<Knn>. Empty/non-list/non-Knnentries fail late (or build invalidknnpayloads). Add explicit guards and throwInvalidExceptionwith a clear message.Proposed fix
public function setKnn(Knn|array $knn): self { if (\is_array($knn)) { + if ([] === $knn || !\array_is_list($knn)) { + throw new InvalidException('Knn must be a non-empty list of Knn instances.'); + } + foreach ($knn as $entry) { + if (!$entry instanceof Knn) { + throw new InvalidException('Each knn entry must be an instance of '.Knn::class.'.'); + } + } $value = \array_map(static fn (Knn $k): array => $k->toArray()['knn'], $knn); } else { $value = $knn->toArray()['knn']; } return $this->setParam('knn', $value); }🤖 Prompt for AI Agents