|
5 | 5 | */ |
6 | 6 |
|
7 | 7 | #include "languagemodel.h" |
| 8 | +#include <algorithm> |
8 | 9 | #include <cassert> |
9 | 10 | #include <cmath> |
10 | 11 | #include <cstdlib> |
|
27 | 28 | #include "lm/config.hh" |
28 | 29 | #include "lm/lm_exception.hh" |
29 | 30 | #include "lm/model.hh" |
| 31 | +#include "lm/return.hh" |
30 | 32 | #include "lm/state.hh" |
31 | 33 | #include "lm/word_index.hh" |
32 | 34 | #include "util/string_piece.hh" |
| 35 | +#include "utils.h" |
33 | 36 |
|
34 | 37 | namespace libime { |
35 | 38 |
|
@@ -72,6 +75,8 @@ const DATrie<float> &StaticLanguageModelFile::predictionTrie() const { |
72 | 75 |
|
73 | 76 | static_assert(sizeof(void *) + sizeof(lm::ngram::State) <= StateSize, "Size"); |
74 | 77 |
|
| 78 | +LanguageModelBase::~LanguageModelBase() {} |
| 79 | + |
75 | 80 | bool LanguageModelBase::isNodeUnknown(const LatticeNode &node) const { |
76 | 81 | return isUnknown(node.idx(), node.word()); |
77 | 82 | } |
@@ -217,6 +222,32 @@ bool LanguageModel::isUnknown(WordIndex idx, std::string_view /*word*/) const { |
217 | 222 | return idx == unknown(); |
218 | 223 | } |
219 | 224 |
|
| 225 | +unsigned int |
| 226 | +LanguageModel::maxNgramLength(const std::vector<std::string> &words) const { |
| 227 | + FCITX_D(); |
| 228 | + if (!d->model()) { |
| 229 | + return 0; |
| 230 | + } |
| 231 | + State state = nullState(); |
| 232 | + State outState; |
| 233 | + |
| 234 | + unsigned int maxNgramLength = 0; |
| 235 | + std::vector<WordNode> nodes; |
| 236 | + for (const auto &word : words) { |
| 237 | + const auto idx = index(word); |
| 238 | + lm::FullScoreReturn full = |
| 239 | + d->model()->FullScore(lmState(state), idx, lmState(outState)); |
| 240 | + unsigned int ngramLength = full.ngram_length; |
| 241 | + if (ngramLength == 1 && idx == unknown()) { |
| 242 | + ngramLength = 0; |
| 243 | + } |
| 244 | + |
| 245 | + maxNgramLength = std::max(maxNgramLength, ngramLength); |
| 246 | + state = outState; |
| 247 | + } |
| 248 | + return maxNgramLength; |
| 249 | +} |
| 250 | + |
220 | 251 | void LanguageModel::setUnknownPenalty(float unknown) { |
221 | 252 | FCITX_D(); |
222 | 253 | d->unknown_ = unknown; |
|
0 commit comments