Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions beir/retrieval/models/sparta.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,9 @@ def encode_corpus(self, corpus: List[Dict[str, str]], batch_size: int = 16, **kw
sentences = [(doc["title"] + self.sep + doc["text"]).strip() for doc in corpus]
sparse_idx = 0
num_elements = len(sentences) * self.sparse_vector_dim
col = np.zeros(num_elements, dtype=np.int)
row = np.zeros(num_elements, dtype=np.int)
values = np.zeros(num_elements, dtype=np.float)
col = np.zeros(num_elements, dtype=np.int64)
row = np.zeros(num_elements, dtype=np.int64)
values = np.zeros(num_elements, dtype=np.float64)

for start_idx in trange(0, len(sentences), batch_size, desc="docs"):
doc_embs = self._compute_sparse_embeddings(sentences[start_idx: start_idx + batch_size])
Expand All @@ -74,4 +74,4 @@ def encode_corpus(self, corpus: List[Dict[str, str]], batch_size: int = 16, **kw
values[sparse_idx] = score
sparse_idx += 1

return csr_matrix((values, (row, col)), shape=(len(self.bert_input_embeddings), len(sentences)), dtype=np.float)
return csr_matrix((values, (row, col)), shape=(len(self.bert_input_embeddings), len(sentences)), dtype=np.float64)
16 changes: 8 additions & 8 deletions beir/retrieval/models/unicoil.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def __init__(self, model_path: Union[str, Tuple] = None, sep: str = " ", query_m
self.model.eval()

def encode_query(self, query: str, batch_size: int = 16, **kwargs):
embedding = np.zeros(self.bert_input_emb, dtype=np.float)
embedding = np.zeros(self.bert_input_emb, dtype=np.float64)
input_ids = self.tokenizer(query, max_length=self.query_max_length, padding='longest',
truncation=True, add_special_tokens=True,
return_tensors='pt').to(self.device)["input_ids"]
Expand Down Expand Up @@ -59,9 +59,9 @@ def encode(
non_zero_tokens += len(token_ids_and_embs)
passage_embs.append(token_ids_and_embs)

col = np.zeros(non_zero_tokens, dtype=np.int)
row = np.zeros(non_zero_tokens, dtype=np.int)
values = np.zeros(non_zero_tokens, dtype=np.float)
col = np.zeros(non_zero_tokens, dtype=np.int64)
row = np.zeros(non_zero_tokens, dtype=np.int64)
values = np.zeros(non_zero_tokens, dtype=np.float64)
sparse_idx = 0

for pid, emb in enumerate(passage_embs):
Expand All @@ -71,7 +71,7 @@ def encode(
values[sparse_idx] = score
sparse_idx += 1

return csr_matrix((values, (col, row)), shape=(len(sentences), self.bert_input_emb), dtype=np.float)
return csr_matrix((values, (col, row)), shape=(len(sentences), self.bert_input_emb), dtype=np.float64)

# class UniCOIL:
# def __init__(self, model_path: Union[str, Tuple] = None, sep: str = " ", **kwargs):
Expand All @@ -98,7 +98,7 @@ def encode(
# batch_size: int = 32,
# max_length: int = 512) -> np.ndarray:

# embeddings = np.zeros((len(sentences), self.sparse_vector_dim), dtype=np.float)
# embeddings = np.zeros((len(sentences), self.sparse_vector_dim), dtype=np.float64)

# for start_idx in trange(0, len(sentences), batch_size, desc="docs"):
# documents = sentences[start_idx: start_idx + batch_size]
Expand All @@ -114,7 +114,7 @@ def encode(
# np.put(embeddings[start_idx + idx], batch_token_ids[idx], batch_weights[idx].flatten())

# return embeddings
# # return csr_matrix((values, (row, col)), shape=(len(sentences), self.sparse_vector_dim), dtype=np.float).toarray()
# # return csr_matrix((values, (row, col)), shape=(len(sentences), self.sparse_vector_dim), dtype=np.float64).toarray()


# Chunks of this code has been taken from: https://github.com/castorini/pyserini/blob/master/pyserini/encode/_unicoil.py
Expand Down Expand Up @@ -165,4 +165,4 @@ def forward(
sequence_output = outputs.last_hidden_state
tok_weights = self.tok_proj(sequence_output)
tok_weights = torch.relu(tok_weights)
return tok_weights
return tok_weights
2 changes: 1 addition & 1 deletion beir/retrieval/search/dense/exact_search_multi_gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ def search(self,
query_id = query_ids[query_itr]
for i in range(len(cos_scores_top_k_values)):
sub_corpus_id = cos_scores_top_k_idx[i][query_itr]
score = cos_scores_top_k_values[i][query_itr].item() # convert np.float to float
score = cos_scores_top_k_values[i][query_itr].item() # convert np.float64 to float
corpus_id = corpus_ids[sub_corpus_id]
if corpus_id != query_id:
self.results[query_id][corpus_id] = score
Expand Down
4 changes: 2 additions & 2 deletions beir/retrieval/search/dense/faiss_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def search(self, query_embeddings: np.ndarray, k: int, binary_k: int = 1000, rer
if self._passage_ids is not None:
ids_arr = self._passage_ids[ids_arr.reshape(-1)].reshape(num_queries, -1)
else:
ids_arr = np.array([self.index.id_map.at(int(id_)) for id_ in ids_arr.reshape(-1)], dtype=np.int)
ids_arr = np.array([self.index.id_map.at(int(id_)) for id_ in ids_arr.reshape(-1)], dtype=np.int64)
ids_arr = ids_arr.reshape(num_queries, -1)

scores_arr = scores_arr[np.arange(num_queries)[:, None], sorted_indices]
Expand All @@ -171,4 +171,4 @@ def build(
for start in trange(0, len(passage_ids), buffer_size):
index.add(passage_embeddings[start : start + buffer_size])

return cls(index, passage_ids, passage_embeddings)
return cls(index, passage_ids, passage_embeddings)