Skip to content

Commit b956323

Browse files
committed
add range queries
add new field id_num add range queries over id_num
1 parent 7cee369 commit b956323

File tree

8 files changed

+21
-7
lines changed

8 files changed

+21
-7
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ COMMANDS ?= TOP_10 TOP_10_COUNT COUNT
77

88
# ENGINES ?= tantivy-0.13 lucene-8.4.0 pisa-0.8.2 rucene-0.1 bleve-0.8.0-scorch rucene-0.1 tantivy-0.11 tantivy-0.14 tantivy-0.15 tantivy-0.16 tantivy-0.17 tantivy-0.18 tantivy-0.19
99
# ENGINES ?= tantivy-0.16 lucene-8.10.1 pisa-0.8.2 bleve-0.8.0-scorch rucene-0.1
10-
ENGINES ?= tantivy-0.16 tantivy-0.17 tantivy-0.18 tantivy-0.19
10+
ENGINES ?= tantivy-0.19 lucene-8.10.1
1111
PORT ?= 8080
1212

1313
help:

corpus_transform.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,12 @@ def transform(text):
1616

1717
if doc["url"] == "":
1818
continue
19+
if len(doc["url"].split("curid=",1)) == 1:
20+
continue
1921

2022
doc_transformed = {
2123
"id": doc["url"],
24+
"id_num": int(doc["url"].split("curid=",1)[1]),
2225
"text": transform(doc["body"])
2326
}
2427

engines/lucene-8.10.1/src/main/java/BuildIndex.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,11 @@ public static void main(String[] args) throws IOException {
2525
final Document document = new Document();
2626

2727
StoredField idField = new StoredField("id", "");
28+
IntPoint idNumField = new IntPoint("id_num", 0);
2829
TextField textField = new TextField("text", "", Field.Store.NO);
2930

3031
document.add(idField);
32+
document.add(idNumField);
3133
document.add(textField);
3234

3335
String line;
@@ -37,8 +39,10 @@ public static void main(String[] args) throws IOException {
3739
}
3840
final JsonObject parsed_doc = Json.parse(line).asObject();
3941
final String id = parsed_doc.get("id").asString();
42+
final int id_num = parsed_doc.get("id_num").asInt();
4043
final String text = parsed_doc.get("text").asString();
4144
idField.setStringValue(id);
45+
idNumField.setIntValue(id_num);
4246
textField.setStringValue(text);
4347
writer.addDocument(document);
4448
}

engines/tantivy-0.19/src/bin/build_index.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use futures::executor::block_on;
22
use std::env;
33
use std::io::BufRead;
44
use std::path::Path;
5-
use tantivy::schema::{Schema, STORED, TEXT};
5+
use tantivy::schema::{Schema, FAST, INDEXED, STORED, TEXT};
66
use tantivy::Index;
77

88
fn main() {
@@ -12,6 +12,7 @@ fn main() {
1212

1313
fn create_schema() -> Schema {
1414
let mut schema_builder = Schema::builder();
15+
schema_builder.add_u64_field("id_num", FAST | INDEXED);
1516
schema_builder.add_text_field("id", STORED);
1617
schema_builder.add_text_field("text", TEXT);
1718
schema_builder.build()

format_queries.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,14 +33,14 @@ def generate_queries(words):
3333
}
3434

3535
for line in fileinput.input():
36-
(count, query) = PTN.split(line.decode("utf-8").strip(), 1)
36+
(count, query) = PTN.split(line.strip(), 1)
3737
count = int(count)
3838
if not LETTERS_ONLY.match(query):
3939
continue
4040
words = PTN.split(query)
4141
for q in generate_queries(words):
4242
try:
43-
qdoc = json.dumps(q).encode("utf-8")
44-
print qdoc
43+
qdoc = json.dumps(q)
44+
print(qdoc)
4545
except:
4646
pass

queries.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -897,3 +897,9 @@
897897
{"query": "\"laborers international union of north america\"", "tags": ["phrase", "phrase:num_tokens_>3"]}
898898
{"query": "laborers international union of north america", "tags": ["union", "global", "union:num_tokens_>3"]}
899899
{"query": "+\"the who\" +uk", "tags": ["two-phase-critic"]}
900+
{"query": "id_num:[48694410 TO 48694420] +griffith +observatory", "tags": ["range", "range_selective"]}
901+
{"query": "id_num:[48694410 TO 48694420] +the", "tags": ["range", "range_selective"]}
902+
{"query": "id_num:[48694410 TO 48694420] niceville high school", "tags": ["range", "range_selective"]}
903+
{"query": "id_num:[0 TO 10000000] +griffith +observatory", "tags": ["range", "range_unselective"]}
904+
{"query": "id_num:[0 TO 10000000] +the", "tags": ["range", "range_unselective"]}
905+
{"query": "id_num:[0 TO 10000000] niceville high school", "tags": ["range", "range_unselective"]}

results.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

web/build/results.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)