File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 1+ 0.2.2 2023-09-06
2+ - Fix behaviour for end of text character positions
3+ when no end of sentence occured before.
4+
150.2.1 2023-09-05
26 - Add english tokenizer.
37 - Fix buffer bug.
Original file line number Diff line number Diff line change @@ -1018,6 +1018,10 @@ PARSECHAR:
10181018
10191019 if eot {
10201020 eot = false
1021+ if ! sentenceEnd {
1022+ sentenceEnd = true
1023+ w .SentenceEnd (buffc )
1024+ }
10211025 textEnd = true
10221026 w .TextEnd (0 )
10231027 if DEBUG {
Original file line number Diff line number Diff line change @@ -592,6 +592,10 @@ PARSECHARM:
592592
593593 if eot {
594594 eot = false
595+ if ! sentenceEnd {
596+ sentenceEnd = true
597+ w .SentenceEnd (buffc )
598+ }
595599 textEnd = true
596600 w .TextEnd (buffc )
597601 rewindBuffer = true
Original file line number Diff line number Diff line change @@ -85,6 +85,18 @@ func TestTokenWriterFromOptions(t *testing.T) {
8585 matStr = w .String ()
8686 assert .Equal ("1 5 5 6\n 1 6\n 0 3 3 4\n 0 4\n " , matStr )
8787
88+ w .Reset ()
89+ mat .TransduceTokenWriter (strings .NewReader ("Tree\n \x04 \n " ), tws )
90+
91+ matStr = w .String ()
92+ assert .Equal ("0 4\n 0 4\n " , matStr )
93+
94+ w .Reset ()
95+ mat .TransduceTokenWriter (strings .NewReader ("Tree.\n \x04 \n " ), tws )
96+
97+ matStr = w .String ()
98+ assert .Equal ("0 4 4 5\n 0 5\n " , matStr )
99+
88100 //
89101 // Write sentence offsets without token offsets
90102 tws = NewTokenWriter (w , SENTENCE_POS | NEWLINE_AFTER_EOT )
You can’t perform that action at this time.
0 commit comments