Skip to content

Commit c1d5b8e

Browse files
committed
Some more progress.
1 parent 193a5e7 commit c1d5b8e

3 files changed

Lines changed: 131 additions & 0 deletions

File tree

paradedb/sqlalchemy/search.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,37 @@ def more_like_this(
172172
) -> ColumnElement[bool]:
173173
if (document_id is None) == (document is None):
174174
raise InvalidArgumentError("exactly one of document_id or document must be provided")
175+
if document is not None and fields is not None:
176+
raise InvalidArgumentError("fields can only be used with document_id")
177+
178+
if min_term_frequency is not None and min_term_frequency < 0:
179+
raise InvalidArgumentError("min_term_frequency must be >= 0")
180+
if max_query_terms is not None and max_query_terms <= 0:
181+
raise InvalidArgumentError("max_query_terms must be > 0")
182+
if min_doc_frequency is not None and min_doc_frequency < 0:
183+
raise InvalidArgumentError("min_doc_frequency must be >= 0")
184+
if max_doc_frequency is not None and max_doc_frequency < 0:
185+
raise InvalidArgumentError("max_doc_frequency must be >= 0")
186+
if (
187+
min_doc_frequency is not None
188+
and max_doc_frequency is not None
189+
and min_doc_frequency > max_doc_frequency
190+
):
191+
raise InvalidArgumentError("min_doc_frequency cannot be greater than max_doc_frequency")
192+
if min_word_length is not None and min_word_length < 0:
193+
raise InvalidArgumentError("min_word_length must be >= 0")
194+
if max_word_length is not None and max_word_length < 0:
195+
raise InvalidArgumentError("max_word_length must be >= 0")
196+
if (
197+
min_word_length is not None
198+
and max_word_length is not None
199+
and min_word_length > max_word_length
200+
):
201+
raise InvalidArgumentError("min_word_length cannot be greater than max_word_length")
202+
if boost_factor is not None and boost_factor < 0:
203+
raise InvalidArgumentError("boost_factor must be >= 0")
204+
if stopwords is not None and any((not isinstance(word, str)) or (not word.strip()) for word in stopwords):
205+
raise InvalidArgumentError("stopwords entries must be non-empty strings")
175206

176207
options_provided = any(
177208
option is not None

tests/integration/test_advanced_search_integration.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
from conftest import Product, assert_uses_paradedb_scan
77
from paradedb.sqlalchemy import search
8+
from paradedb.sqlalchemy.errors import InvalidArgumentError
89

910

1011
pytestmark = pytest.mark.integration
@@ -60,3 +61,30 @@ def test_more_like_this_by_document_payload(session):
6061
assert_uses_paradedb_scan(session, stmt)
6162
ids = list(session.scalars(stmt))
6263
assert 3 in ids
64+
65+
66+
def test_more_like_this_rejects_fields_with_document():
67+
with pytest.raises(InvalidArgumentError, match="fields can only be used with document_id"):
68+
search.more_like_this(Product.id, document={"description": "x"}, fields=["description"])
69+
70+
71+
def test_more_like_this_rejects_invalid_numeric_options():
72+
with pytest.raises(InvalidArgumentError, match="min_term_frequency must be >= 0"):
73+
search.more_like_this(Product.id, document_id=1, min_term_frequency=-1)
74+
75+
with pytest.raises(InvalidArgumentError, match="max_query_terms must be > 0"):
76+
search.more_like_this(Product.id, document_id=1, max_query_terms=0)
77+
78+
with pytest.raises(InvalidArgumentError, match="min_doc_frequency cannot be greater than max_doc_frequency"):
79+
search.more_like_this(Product.id, document_id=1, min_doc_frequency=10, max_doc_frequency=2)
80+
81+
with pytest.raises(InvalidArgumentError, match="min_word_length cannot be greater than max_word_length"):
82+
search.more_like_this(Product.id, document_id=1, min_word_length=10, max_word_length=2)
83+
84+
with pytest.raises(InvalidArgumentError, match="boost_factor must be >= 0"):
85+
search.more_like_this(Product.id, document_id=1, boost_factor=-1.0)
86+
87+
88+
def test_more_like_this_rejects_invalid_stopwords():
89+
with pytest.raises(InvalidArgumentError, match="stopwords entries must be non-empty strings"):
90+
search.more_like_this(Product.id, document_id=1, stopwords=["ok", ""])
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
from __future__ import annotations
2+
3+
import pytest
4+
from sqlalchemy import Column, Integer, MetaData, Table, Text, select, text
5+
from sqlalchemy.dialects.postgresql import INT4RANGE
6+
from sqlalchemy.orm import Session
7+
8+
from conftest import assert_uses_paradedb_scan
9+
from paradedb.sqlalchemy import search
10+
11+
12+
pytestmark = pytest.mark.integration
13+
14+
15+
def test_range_query_with_op_and_all_predicate(engine):
16+
metadata = MetaData()
17+
items = Table(
18+
"range_items",
19+
metadata,
20+
Column("id", Integer, primary_key=True),
21+
Column("description", Text, nullable=False),
22+
Column("weight_range", INT4RANGE, nullable=False),
23+
)
24+
25+
with engine.begin() as conn:
26+
conn.execute(text("DROP INDEX IF EXISTS range_items_bm25_idx"))
27+
conn.execute(text("DROP TABLE IF EXISTS range_items"))
28+
metadata.create_all(engine)
29+
30+
with engine.begin() as conn:
31+
conn.execute(
32+
text(
33+
"CREATE INDEX range_items_bm25_idx ON range_items USING bm25 (id, description, weight_range) WITH (key_field='id')"
34+
)
35+
)
36+
conn.execute(
37+
text(
38+
"""
39+
INSERT INTO range_items (id, description, weight_range)
40+
VALUES
41+
(1, 'Ergonomic camera strap', int4range(1, 8)),
42+
(2, 'Mechanical keyboard', int4range(8, 15)),
43+
(3, 'Running shoes', int4range(3, 10))
44+
"""
45+
)
46+
)
47+
48+
try:
49+
with Session(engine) as session:
50+
stmt = (
51+
select(items.c.id)
52+
.where(items.c.weight_range.op("@>")(5))
53+
.where(search.all(items.c.id))
54+
.order_by(items.c.id)
55+
)
56+
assert_uses_paradedb_scan(session, stmt, index_name="range_items_bm25_idx")
57+
ids = list(session.scalars(stmt))
58+
assert ids == [1, 3]
59+
60+
stmt_with_text = (
61+
select(items.c.id)
62+
.where(items.c.weight_range.op("@>")(5))
63+
.where(search.match_any(items.c.description, "running", "camera"))
64+
.order_by(items.c.id)
65+
)
66+
assert_uses_paradedb_scan(session, stmt_with_text, index_name="range_items_bm25_idx")
67+
ids_with_text = list(session.scalars(stmt_with_text))
68+
assert ids_with_text == [1, 3]
69+
finally:
70+
with engine.begin() as conn:
71+
conn.execute(text("DROP INDEX IF EXISTS range_items_bm25_idx"))
72+
conn.execute(text("DROP TABLE IF EXISTS range_items"))

0 commit comments

Comments
 (0)