Skip to content

Commit 887ed78

Browse files
committed
調整 input_str 字數超過 5000 字的處理方法。
1 parent fe6853d commit 887ed78

2 files changed

Lines changed: 39 additions & 38 deletions

File tree

ArticutAPI/ArticutAPI.py

Lines changed: 38 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -91,10 +91,10 @@ def parse(self, inputSTR, level="", userDefinedDictFILE=None, chemicalBOOL=True,
9191
if level.lower() not in ("lv1", "lv2", "lv3"):
9292
level = self.level
9393

94-
self.openDataPlaceAccessBOOL=openDataPlaceAccessBOOL
95-
self.wikiDataBOOL=wikiDataBOOL
96-
self.chemicalBOOL=chemicalBOOL
97-
self.emojiBOOL=emojiBOOL
94+
self.openDataPlaceAccessBOOL = openDataPlaceAccessBOOL
95+
self.wikiDataBOOL = wikiDataBOOL
96+
self.chemicalBOOL = chemicalBOOL
97+
self.emojiBOOL = emojiBOOL
9898
url = "{}/Articut/API/".format(self.url)
9999
if level in ("lv1", "lv2"):
100100
payload = {"username": self.username, #String Type:使用者帳號 email
@@ -103,7 +103,7 @@ def parse(self, inputSTR, level="", userDefinedDictFILE=None, chemicalBOOL=True,
103103
"level": level.lower(), #String Type:指定為 lv1 極致斷詞 (斷得較細) 或 lv2 詞組斷詞 (斷得較粗)。
104104
"chemical": self.chemicalBOOL, #Bool Type:為 True 或 False,表示是否允許 Articut 讀取 Chemical 偵測化學類名稱。
105105
"emoji": self.emojiBOOL, #Bool Type:為 True 或 False,表示是否允許 Articut 偵測 Emoji 符號。
106-
"opendata_place":self.openDataPlaceAccessBOOL, #Bool Type:為 True 或 False,表示是否允許 Articut 讀取 OpenData 中的地點名稱。
106+
"opendata_place": self.openDataPlaceAccessBOOL, #Bool Type:為 True 或 False,表示是否允許 Articut 讀取 OpenData 中的地點名稱。
107107
"wikidata": self.wikiDataBOOL} #Bool Type:為 True 或 False,表示是否允許 Articut 讀取 WikiData 中的條目名稱。
108108
else:
109109
payload = {"username": self.username, #String Type:使用者帳號 email
@@ -112,10 +112,10 @@ def parse(self, inputSTR, level="", userDefinedDictFILE=None, chemicalBOOL=True,
112112
"level": level.lower(), #String Type:指定為 lv3 語意斷詞。
113113
"chemical": self.chemicalBOOL, #Bool Type:為 True 或 False,表示是否允許 Articut 讀取 Chemical 偵測化學類名稱。
114114
"emoji": self.emojiBOOL, #Bool Type:為 True 或 False,表示是否允許 Articut 偵測 Emoji 符號。
115-
"opendata_place":self.openDataPlaceAccessBOOL, #Bool Type:為 True 或 False,表示是否允許 Articut 讀取 OpenData 中的地點名稱。
115+
"opendata_place": self.openDataPlaceAccessBOOL, #Bool Type:為 True 或 False,表示是否允許 Articut 讀取 OpenData 中的地點名稱。
116116
"wikidata": self.wikiDataBOOL, #Bool Type:為 True 或 False,表示是否允許 Articut 讀取 WikiData 中的條目名稱。
117-
"index_with_pos":False,
118-
"pinyin":pinyin
117+
"index_with_pos": False,
118+
"pinyin": pinyin
119119
}
120120
if timeRef:
121121
payload["time_ref"] = str(timeRef)
@@ -150,35 +150,36 @@ def parse(self, inputSTR, level="", userDefinedDictFILE=None, chemicalBOOL=True,
150150
retry_count = 0
151151
while True:
152152
try:
153-
result = requests.post(url, json=payload)
154-
if result.status_code == 200:
155-
result = result.json()
156-
if not result["status"]:
157-
return result
153+
responseDICT = requests.post(url, json=payload)
154+
if responseDICT.status_code == 200:
155+
responseDICT = responseDICT.json()
156+
if not responseDICT["status"]:
157+
return responseDICT
158158

159159
if resultDICT:
160-
resultDICT["exec_time"] += result["exec_time"]
161-
resultDICT["word_count_balance"] = result["word_count_balance"]
160+
resultDICT["exec_time"] += responseDICT["exec_time"]
161+
if "word_count_balance" in responseDICT:
162+
resultDICT["word_count_balance"] = responseDICT["word_count_balance"]
162163
if level in ("lv1", "lv2"):
163-
resultDICT["result_obj"].extend(result["result_obj"])
164-
resultDICT["result_pos"].extend(result["result_pos"])
165-
resultDICT["result_segmentation"] += "/{}".format(result["result_segmentation"])
164+
resultDICT["result_obj"].extend(responseDICT["result_obj"])
165+
resultDICT["result_pos"].extend(responseDICT["result_pos"])
166+
resultDICT["result_segmentation"] += "/{}".format(responseDICT["result_segmentation"])
166167
else:
167-
resultDICT["input"].extend([[i[0] + count, i[1] + count] for i in result["input"]])
168-
resultDICT["entity"].extend(result["entity"])
169-
resultDICT["event"].extend(result["event"])
170-
resultDICT["person"].extend(result["person"])
171-
resultDICT["site"].extend(result["site"])
172-
resultDICT["time"].extend(result["time"])
173-
resultDICT["user_defined"].extend(result["user_defined"])
174-
resultDICT["utterance"].extend(result["utterance"])
175-
resultDICT["number"] = {**resultDICT["number"], **result["number"]}
176-
resultDICT["unit"] = {**resultDICT["unit"], **result["unit"]}
168+
resultDICT["input"].extend([[i[0] + count, i[1] + count] for i in responseDICT["input"]])
169+
resultDICT["entity"].extend(responseDICT["entity"])
170+
resultDICT["event"].extend(responseDICT["event"])
171+
resultDICT["person"].extend(responseDICT["person"])
172+
resultDICT["site"].extend(responseDICT["site"])
173+
resultDICT["time"].extend(responseDICT["time"])
174+
resultDICT["user_defined"].extend(responseDICT["user_defined"])
175+
resultDICT["utterance"].extend(responseDICT["utterance"])
176+
resultDICT["number"] = {**resultDICT["number"], **responseDICT["number"]}
177+
resultDICT["unit"] = {**resultDICT["unit"], **responseDICT["unit"]}
177178
else:
178-
resultDICT = result
179+
resultDICT = responseDICT
179180
count += len(x)
180181
else:
181-
return result
182+
return responseDICT
182183

183184
# 成功取得結果跳出 while 迴圈
184185
break
@@ -222,12 +223,12 @@ def versions(self):
222223
url = "{}/Articut/Versions/".format(self.url)
223224
payload = {"username": self.username,
224225
"api_key": self.apikey}
225-
result = requests.post(url, data=payload)
226-
if result.status_code == 200:
227-
result = result.json()
228-
result["product"] = "{}/product/".format(self.url)
229-
result["document"] = "{}/document/".format(self.url)
230-
return result
226+
responseDICT = requests.post(url, data=payload)
227+
if responseDICT.status_code == 200:
228+
responseDICT = responseDICT.json()
229+
responseDICT["product"] = "{}/product/".format(self.url)
230+
responseDICT["document"] = "{}/document/".format(self.url)
231+
return responseDICT
231232

232233
##############################################################################
233234
# Toolkits #
@@ -350,7 +351,7 @@ def getWikiDataLIST(self, parseResultDICT, indexWithPOS=True):
350351
inputSTR = "劉克襄在本次活動當中,分享了台北中山北路一日遊路線。他表示當初自己領著柯文哲一同探索了雙連市場與中山捷運站的小吃與商圈,還有商圈內的文創商店與日系雜物店鋪,都令柯文哲留下深刻的印象。劉克襄也認為,雙連市場內的魯肉飯、圓仔湯與切仔麵,還有九條通的日式店家、居酒屋等特色,也能讓人感受到台北舊城區不一樣的魅力。" #Articut-GraphQL Demo
351352
#inputSTR = "業經前案判決非法持有可發射子彈具殺傷力之槍枝罪"
352353
#inputSTR = "劉克襄在本次活動當中,分享了台北中山北路一日遊路線。"
353-
inputSTR = "在常溫下可將銀氧化成氧化銀"
354+
#inputSTR = "在常溫下可將銀氧化成氧化銀"
354355
articut = Articut()
355356

356357
print("inputSTR:{}\n".format(inputSTR))

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
setuptools.setup(
77
name="ArticutAPI",
8-
version="1.3.4",
8+
version="1.3.5",
99
author="Droidtown Linguistic Tech. Co. Ltd.",
1010
author_email="info@droidtown.co",
1111
description="Articut NLP system provides not only finest results on Chinese word segmentaion (CWS), Part-of-Speech tagging (POS) and Named Entity Recogintion tagging (NER), but also the fastest online API service in the NLP industry.",

0 commit comments

Comments
 (0)