icu/icu.icuXXXX.virama.prevnext.patch

98 lines
3.6 KiB
Diff
Raw Normal View History

2006-10-05 11:19:02 +00:00
diff -ur icu.orig/source/common/rbbi.cpp icu/source/common/rbbi.cpp
--- icu.orig/source/common/rbbi.cpp 2006-10-05 11:54:13.000000000 +0100
+++ icu/source/common/rbbi.cpp 2006-10-05 11:57:31.000000000 +0100
@@ -879,6 +879,21 @@
RBBI_END // state machine processing is after end of user text.
};
+#define VIRAMA_SCRIPT(wc) ((wc) >= 0x0901 && (wc) <= 0x17FF)
+#define VIRAMA(wc) ((wc) == 0x094D || \
+ (wc) == 0x09CD || \
+ (wc) == 0x0A4D || \
+ (wc) == 0x0ACD || \
+ (wc) == 0x0B4D || \
+ (wc) == 0x0BCD || \
+ (wc) == 0x0C4D || \
+ (wc) == 0x0CCD || \
+ (wc) == 0x0D4D || \
+ (wc) == 0x0DCA || \
+ (wc) == 0x0E3A || \
+ (wc) == 0x0F84 || \
+ (wc) == 0x1039 || \
+ (wc) == 0x17D2)
//-----------------------------------------------------------------------------------
//
@@ -896,6 +911,7 @@
RBBIRunMode mode;
RBBIStateTableRow *row;
+ UChar32 prevchar;
UChar32 c;
int32_t lookaheadStatus = 0;
int32_t lookaheadTagIdx = 0;
@@ -919,6 +935,7 @@
// if we're already at the end of the text, return DONE.
initialPosition = (int32_t)UTEXT_GETNATIVEINDEX(fText);
result = initialPosition;
+ prevchar = 0;
c = UTEXT_NEXT32(fText);
if (fData == NULL || c==U_SENTINEL) {
return BreakIterator::DONE;
@@ -1001,6 +1018,11 @@
// State Transition - move machine to its next state
//
+ if (VIRAMA_SCRIPT(c) && VIRAMA(prevchar))
+ {
+ state = START_STATE;
+ row = (RBBIStateTableRow *) (tableData + tableRowLen * state);
+ }
state = row->fNextState[category];
row = (RBBIStateTableRow *)
// (statetable->fTableData + (statetable->fRowLen * state));
@@ -1059,6 +1081,7 @@
// the input position. The next iteration will be processing the
// first real input character.
if (mode == RBBI_RUN) {
+ prevchar = c;
c = UTEXT_NEXT32(fText);
} else {
if (mode == RBBI_START) {
@@ -1107,6 +1130,7 @@
int16_t category = 0;
RBBIRunMode mode;
RBBIStateTableRow *row;
+ UChar32 prevchar;
UChar32 c;
int32_t lookaheadStatus = 0;
int32_t result = 0;
@@ -1135,6 +1159,7 @@
// Set up the starting char.
initialPosition = (int32_t)UTEXT_GETNATIVEINDEX(fText);
result = initialPosition;
+ prevchar = 0;
c = UTEXT_PREVIOUS32(fText);
// Set the initial state for the state machine
@@ -1218,6 +1243,11 @@
// State Transition - move machine to its next state
//
+ if (VIRAMA_SCRIPT(prevchar) && VIRAMA(c))
+ {
+ state = START_STATE;
+ row = (RBBIStateTableRow *) (statetable->fTableData + (statetable->fRowLen * state));
+ }
state = row->fNextState[category];
row = (RBBIStateTableRow *)
(statetable->fTableData + (statetable->fRowLen * state));
@@ -1269,6 +1299,7 @@
// the input position. The next iteration will be processing the
// first real input character.
if (mode == RBBI_RUN) {
+ prevchar = c;
c = UTEXT_PREVIOUS32(fText);
} else {
if (mode == RBBI_START) {