Skip to content

Commit 12b62cb

Browse files
authored
Support multiple entry in inner fuzzy (#111)
Previous commit added a duplicate entry in inner fuzzy, which would require some additional change to support. It seems there's no much benefit in exposing inner fuzzy, the new version will make use to transparent map and be hidden API.
1 parent a9a8761 commit 12b62cb

5 files changed

Lines changed: 89 additions & 6 deletions

File tree

src/libime/core/utils_p.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,19 @@ inline int millisecondsTill(T t0) {
194194
.count();
195195
}
196196

197+
struct StringHash {
198+
using hash_type = std::hash<std::string_view>;
199+
using is_transparent = void;
200+
201+
std::size_t operator()(const char *str) const { return hash_type{}(str); }
202+
std::size_t operator()(std::string_view str) const {
203+
return hash_type{}(str);
204+
}
205+
std::size_t operator()(const std::string &str) const {
206+
return hash_type{}(str);
207+
}
208+
};
209+
197210
} // namespace libime
198211

199212
#endif // _LIBIME_LIBIME_CORE_UTILS_P_H_

src/libime/pinyin/pinyindata.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <vector>
1717
#include <fcitx-utils/log.h>
1818
#include <fcitx-utils/stringutils.h>
19+
#include "pinyindata_p.h"
1920
#include "pinyinencoder.h"
2021

2122
namespace libime {
@@ -152,6 +153,39 @@ getInnerSegment() {
152153
return innerSegment;
153154
}
154155

156+
const InnerSegmentMap &getInnerSegmentV2() {
157+
static const InnerSegmentMap innerSegment = []() {
158+
InnerSegmentMap innerSegmentV2;
159+
for (const auto &[key, value] : getInnerSegment()) {
160+
innerSegmentV2[key].push_back(value);
161+
}
162+
163+
for (const auto &newItem : std::vector<
164+
std::pair<std::string, std::pair<std::string, std::string>>>{
165+
{"qiao", {"qia", "o"}},
166+
{"niao", {"nia", "o"}},
167+
{"liao", {"lia", "o"}},
168+
{"zhuo", {"zhu", "o"}},
169+
{"diao", {"dia", "o"}},
170+
{"shao", {"sha", "o"}},
171+
{"xiao", {"xia", "o"}},
172+
{"zhua", {"zhu", "a"}},
173+
{"shuo", {"shu", "o"}},
174+
{"shua", {"shu", "a"}},
175+
{"zhao", {"zha", "o"}},
176+
{"jiao", {"jia", "o"}},
177+
{"chuo", {"chu", "o"}},
178+
{"chua", {"chu", "a"}},
179+
{"chao", {"cha", "o"}},
180+
}) {
181+
innerSegmentV2[newItem.first].push_back(newItem.second);
182+
}
183+
return innerSegmentV2;
184+
}();
185+
186+
return innerSegment;
187+
}
188+
155189
inline bool operator==(const PinyinEntry &a, const PinyinEntry &b) {
156190
return a.pinyin() == b.pinyin() && a.initial() == b.initial() &&
157191
a.final() == b.final() && a.flags() == b.flags();

src/libime/pinyin/pinyindata_p.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
/*
2+
* SPDX-FileCopyrightText: 2017-2017 CSSlayer <wengxt@gmail.com>
3+
*
4+
* SPDX-License-Identifier: LGPL-2.1-or-later
5+
*/
6+
#ifndef _FCITX_LIBIME_PINYIN_PINYINDATA_P_H_
7+
#define _FCITX_LIBIME_PINYIN_PINYINDATA_P_H_
8+
9+
#include <functional>
10+
#include <string>
11+
#include <unordered_map>
12+
#include <utility>
13+
#include <vector>
14+
#include "libime/core/utils_p.h"
15+
16+
namespace libime {
17+
18+
using InnerSegmentMap =
19+
std::unordered_map<std::string,
20+
std::vector<std::pair<std::string, std::string>>,
21+
StringHash, std::equal_to<>>;
22+
23+
const std::unordered_map<std::string,
24+
std::vector<std::pair<std::string, std::string>>,
25+
StringHash, std::equal_to<>> &
26+
getInnerSegmentV2();
27+
} // namespace libime
28+
29+
#endif // _FCITX_LIBIME_PINYIN_PINYINDATA_P_H_

src/libime/pinyin/pinyinencoder.cpp

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "libime/core/segmentgraph.h"
3030
#include "pinyincorrectionprofile.h"
3131
#include "pinyindata.h"
32+
#include "pinyindata_p.h"
3233
#include "shuangpinprofile.h"
3334

3435
namespace libime {
@@ -295,13 +296,15 @@ PinyinEncoder::parseUserPinyin(std::string userPinyin,
295296
fuzzyFlags.test(PinyinFuzzyFlag::Inner)) ||
296297
(nextPinyin.size() == 3 &&
297298
flags.test(PinyinFuzzyFlag::InnerShort))) {
298-
const auto &innerSegments = getInnerSegment();
299-
auto iter = innerSegments.find(std::string(nextPinyin));
299+
const auto &innerSegments = getInnerSegmentV2();
300+
auto iter = innerSegments.find(nextPinyin);
300301
if (iter != innerSegments.end()) {
301-
result.addNext(top,
302-
top + iter->second.first.size());
303-
result.addNext(top + iter->second.first.size(),
304-
top + nextSize[i]);
302+
for (const auto &innerSeg : iter->second) {
303+
result.addNext(top,
304+
top + innerSeg.first.size());
305+
result.addNext(top + innerSeg.first.size(),
306+
top + nextSize[i]);
307+
}
305308
}
306309
} else if (nextPinyin.size() == 2 &&
307310
flags.test(PinyinFuzzyFlag::InnerShort) &&

test/testpinyinencoder.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,10 @@ int main() {
251251

252252
check("sangeren", PinyinFuzzyFlag::Inner, {"san", "ge", "ren"});
253253

254+
check("jiao", PinyinFuzzyFlag::Inner, {"jiao"});
255+
check("jiao", PinyinFuzzyFlag::Inner, {"ji", "ao"});
256+
check("jiao", PinyinFuzzyFlag::Inner, {"jia", "o"});
257+
254258
{
255259
PinyinCorrectionProfile profile(BuiltinPinyinCorrectionProfile::Qwerty);
256260
auto graph = PinyinEncoder::parseUserPinyin(

0 commit comments

Comments
 (0)