From 70e73b5637506c8082e1a131da8819ec6df56683 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arthur=20C=C3=A2mara?= Date: Tue, 19 Oct 2021 14:50:55 +0200 Subject: [PATCH] Fixed how to start Anserini docker and HTTP error Previous instructions was building from a local Dockerfile. Pulling and running (In detach mode) should be enough. HTTPS was not working. HTTP does. --- .../retrieval/evaluation/lexical/evaluate_anserini_bm25.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/examples/retrieval/evaluation/lexical/evaluate_anserini_bm25.py b/examples/retrieval/evaluation/lexical/evaluate_anserini_bm25.py index 34b7e93e..3eeb22e6 100644 --- a/examples/retrieval/evaluation/lexical/evaluate_anserini_bm25.py +++ b/examples/retrieval/evaluation/lexical/evaluate_anserini_bm25.py @@ -7,8 +7,7 @@ After docker installation, please follow the steps below to get docker container up and running: 1. docker pull beir/pyserini-fastapi -2. docker build -t pyserini-fastapi . -3. docker run -p 8000:8000 -it --rm pyserini-fastapi +2. docker run -p 8000:8000 -it -d --rm beir/pyserini-fastapi Once the docker container is up and running in local, now run the code below. This code doesn't require GPU to run. @@ -34,7 +33,7 @@ #### Download scifact.zip dataset and unzip the dataset dataset = "scifact" -url = "https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/{}.zip".format(dataset) +url = "http://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/{}.zip".format(dataset) out_dir = os.path.join(pathlib.Path(__file__).parent.absolute(), "datasets") data_path = util.download_and_unzip(url, out_dir) corpus, queries, qrels = GenericDataLoader(data_path).load(split="test") @@ -83,4 +82,4 @@ scores = sorted(scores_dict.items(), key=lambda item: item[1], reverse=True) for rank in range(10): doc_id = scores[rank][0] - logging.info("Doc %d: %s [%s] - %s\n" % (rank+1, doc_id, corpus[doc_id].get("title"), corpus[doc_id].get("text"))) \ No newline at end of file + logging.info("Doc %d: %s [%s] - %s\n" % (rank+1, doc_id, corpus[doc_id].get("title"), corpus[doc_id].get("text")))