Skip to content

Commit d6e1207

Browse files
Merge pull request #128 from contour-terminal/fix/issue-32-hangul-jamo-width
width: set conjoining Hangul V/T Jamo to zero width
2 parents 5919a03 + 5e70184 commit d6e1207

File tree

4 files changed

+67
-0
lines changed

4 files changed

+67
-0
lines changed

src/libunicode/tablegen/multistage_generator.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -526,6 +526,16 @@ void generateMultistageFiles(UcdParser const& parser, std::string const& outputD
526526
records[static_cast<size_t>(cp)].char_width =
527527
computeCharWidth(records[static_cast<size_t>(cp)], zeroWidthGCs, eawWide, eawFullwidth);
528528

529+
// Conjoining Hangul V/T Jamo must be width 0 so that decomposed syllables
530+
// (L + V + T) sum to the same width as their precomposed forms (issue #32).
531+
// Only V and T need override; L, LV, LVT already get width 2 from EAW=Wide.
532+
for (auto const& r: parser.hangulSyllableType())
533+
{
534+
if (r.property == "V" || r.property == "T")
535+
for (auto cp = r.first; cp <= r.last; ++cp)
536+
records[static_cast<size_t>(cp)].char_width = 0;
537+
}
538+
529539
// ---- Generate multistage tables ----
530540
std::cout << "[tablegen] Generating multistage tables (properties)...\n";
531541

src/libunicode/tablegen/ucd_parser.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,7 @@ void UcdParser::parseAll()
194194
// Phase 2: UCD API files
195195
loadGraphemeBreakProps();
196196
loadEastAsianWidths();
197+
loadHangulSyllableType();
197198
loadEmojiProps();
198199
loadBidiMirrored();
199200
loadBidiMirroringGlyph();
@@ -443,6 +444,13 @@ void UcdParser::loadEastAsianWidths()
443444
_eastAsianWidths = loadGenericProperties(_ucdDir + "/EastAsianWidth.txt");
444445
}
445446

447+
// ---- Hangul Syllable Type ----
448+
449+
void UcdParser::loadHangulSyllableType()
450+
{
451+
_hangulSyllableType = loadGenericProperties(_ucdDir + "/HangulSyllableType.txt");
452+
}
453+
446454
// ---- Emoji Properties ----
447455

448456
void UcdParser::loadEmojiProps()

src/libunicode/tablegen/ucd_parser.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,9 @@ class UcdParser
122122
/// East Asian Width ranges, sorted by start codepoint.
123123
[[nodiscard]] auto const& eastAsianWidths() const noexcept { return _eastAsianWidths; }
124124

125+
/// Hangul Syllable Type ranges (L, V, T, LV, LVT), sorted by start codepoint.
126+
[[nodiscard]] auto const& hangulSyllableType() const noexcept { return _hangulSyllableType; }
127+
125128
/// Emoji properties grouped by property name.
126129
[[nodiscard]] auto const& emojiProps() const noexcept { return _emojiProps; }
127130

@@ -174,6 +177,7 @@ class UcdParser
174177
void loadBlocks();
175178
void loadGraphemeBreakProps();
176179
void loadEastAsianWidths();
180+
void loadHangulSyllableType();
177181
void loadEmojiProps();
178182
void loadBidiMirrored();
179183
void loadBidiMirroringGlyph();
@@ -229,6 +233,9 @@ class UcdParser
229233
// East Asian Width
230234
std::vector<PropertyRange> _eastAsianWidths;
231235

236+
// Hangul Syllable Type
237+
std::vector<PropertyRange> _hangulSyllableType;
238+
232239
// Emoji
233240
std::map<std::string, std::vector<PropertyRange>> _emojiProps;
234241

src/libunicode/width_test.cpp

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,48 @@ TEST_CASE("width.single_codepoint", "[width]")
3939
CHECK(unicode::width(U'\U0001F480') == 2); // 💀 :skull:
4040
}
4141

42+
TEST_CASE("width.hangul_jamo", "[width]")
43+
{
44+
// Leading Jamo (Choseong) — EAW=Wide → width 2
45+
CHECK(unicode::width(U'\u1100') == 2); // HANGUL CHOSEONG KIYEOK
46+
CHECK(unicode::width(U'\u115F') == 2); // HANGUL CHOSEONG FILLER
47+
CHECK(unicode::width(U'\uA960') == 2); // HANGUL CHOSEONG TIKEUT-MIEUM (Extended-A)
48+
CHECK(unicode::width(U'\uA97C') == 2); // HANGUL CHOSEONG SSANGYEORINHIEUH (Extended-A)
49+
50+
// Vowel Jamo (Jungseong) — conjoining → width 0
51+
CHECK(unicode::width(U'\u1160') == 0); // HANGUL JUNGSEONG FILLER
52+
CHECK(unicode::width(U'\u1161') == 0); // HANGUL JUNGSEONG A
53+
CHECK(unicode::width(U'\u11A7') == 0); // HANGUL JUNGSEONG O-YAE
54+
CHECK(unicode::width(U'\uD7B0') == 0); // HANGUL JUNGSEONG O-YEO (Extended-B)
55+
CHECK(unicode::width(U'\uD7C6') == 0); // HANGUL JUNGSEONG ARAEA-E (Extended-B)
56+
57+
// Trailing Jamo (Jongseong) — conjoining → width 0
58+
CHECK(unicode::width(U'\u11A8') == 0); // HANGUL JONGSEONG KIYEOK
59+
CHECK(unicode::width(U'\u11FF') == 0); // HANGUL JONGSEONG SSANGNIEUN
60+
CHECK(unicode::width(U'\uD7CB') == 0); // HANGUL JONGSEONG NIEUN-RIEUL (Extended-B)
61+
CHECK(unicode::width(U'\uD7FB') == 0); // HANGUL JONGSEONG PHIEUPH-THIEUTH (Extended-B)
62+
63+
// Precomposed syllables — EAW=Wide → width 2
64+
CHECK(unicode::width(U'\uAC00') == 2); // HANGUL SYLLABLE GA (first)
65+
CHECK(unicode::width(U'\uD7A3') == 2); // HANGUL SYLLABLE HIH (last)
66+
67+
// Compatibility Jamo — EAW=Wide → width 2
68+
CHECK(unicode::width(U'\u3131') == 2); // HANGUL LETTER KIYEOK
69+
70+
// Halfwidth Jamo — no conjoining → width 1
71+
CHECK(unicode::width(U'\uFFA0') == 1); // HALFWIDTH HANGUL FILLER
72+
CHECK(unicode::width(U'\uFFBE') == 1); // HALFWIDTH HANGUL LETTER KIYEOK
73+
}
74+
75+
TEST_CASE("width.hangul_decomposed_syllable", "[width]")
76+
{
77+
// L + V = grapheme cluster of width 2
78+
CHECK(unicode::grapheme_cluster_width(U"\u1100\u1161"sv) == 2);
79+
80+
// L + V + T = grapheme cluster of width 2
81+
CHECK(unicode::grapheme_cluster_width(U"\u1100\u1161\u11A8"sv) == 2);
82+
}
83+
4284
TEST_CASE("grapheme_cluster_width.empty", "[width]")
4385
{
4486
CHECK(unicode::grapheme_cluster_width(std::u32string_view {}) == 0);

0 commit comments

Comments
 (0)