44import json
55import re
66import unicodedata
7-
7+ from copy import deepcopy
88from math import log10
9+ try :
10+ from data .idf import aidf
11+ except :
12+ from .data .idf import aidf
13+
14+ if aidf :
15+ aidfDICT = deepcopy (aidf )
916
1017class WordExtractionTFIDF (object ):
11- def __init__ (self ):
18+ def __init__ (self , aidf = aidf ):
1219 self .thd = 0.06
1320 self .idfDICT , self .docCount = self ._getIdfDict ("idf.json" )
1421
@@ -17,15 +24,20 @@ def __str__(self):
1724
1825 def _getIdfDict (self , fn ):
1926 import os
20- fn = "{}/data/{}" .format (os .path .dirname (os .path .abspath (__file__ )),fn )
27+ fn = "{}/data/{}" .format (os .path .dirname (os .path .abspath (__file__ )), fn )
2128 try :
22- aidf = json .load (open (fn , "r" , encoding = ("UTF-8" )))
23- # idfDICT = {"":[fn id list], w:[ids, ids, ...], ...}
24- return aidf , len (aidf ["" ])
29+ if os .path .isfile (fn ):
30+ aidf = json .load (open (fn , "r" , encoding = ("UTF-8" )))
31+ # idfDICT = {"":[fn id list], w:[ids, ids, ...], ...}
32+ return aidf , len (aidf ["" ])
2533 except Exception as e :
2634 print ("idf dict import error." )
2735 print (str (e ))
28- return None
36+
37+ if aidfDICT :
38+ return aidfDICT , len (aidfDICT ["" ])
39+ else :
40+ return {}, 0
2941
3042 def eval (self , wct , wlst , dct , idfd ):
3143 # wlst = {w:ct, w:ct, ...}
@@ -77,7 +89,10 @@ def extractKeyword(self, inputSTR, topK, withWeight, allowPOS):
7789 if self .thd < 0 : self .thd = 20
7890
7991 # get word list
80- wordLIST = inputSTR .split ("/" ) # wordLIST = ["沒有", "人", ...]
92+ if type (inputSTR ) == list :
93+ wordLIST = inputSTR
94+ else :
95+ wordLIST = inputSTR .split ("/" ) # wordLIST = ["沒有", "人", ...]
8196
8297 # get tfDICT and get wct of the inputSTR
8398 tfDICT , wct = self .getTfList (wordLIST ) # tfDICT = {"沒有":1, "命運": 2, ...}
0 commit comments