115 lines
3.9 KiB
Diff
115 lines
3.9 KiB
Diff
diff -up onig-6.8.2/src/gb18030.c.orig onig-6.8.2/src/gb18030.c
|
|
--- onig-6.8.2/src/gb18030.c.orig 2023-10-17 12:12:44.944352236 +0200
|
|
+++ onig-6.8.2/src/gb18030.c 2023-10-17 12:14:52.188483869 +0200
|
|
@@ -76,6 +76,20 @@ gb18030_mbc_enc_len(const UChar* p)
|
|
}
|
|
|
|
static int
|
|
+gb18030_code_to_mbclen(OnigCodePoint code)
|
|
+{
|
|
+ if ((code & 0xff000000) != 0) return 4;
|
|
+ else if ((code & 0xff0000) != 0) return ONIGERR_INVALID_CODE_POINT_VALUE;
|
|
+ else if ((code & 0xff00) != 0) return 2;
|
|
+ else {
|
|
+ if (GB18030_MAP[(int )(code & 0xff)] == CM)
|
|
+ return ONIGERR_INVALID_CODE_POINT_VALUE;
|
|
+
|
|
+ return 1;
|
|
+ }
|
|
+}
|
|
+
|
|
+static int
|
|
is_valid_mbc_string(const UChar* p, const UChar* end)
|
|
{
|
|
while (p < end) {
|
|
@@ -522,7 +536,7 @@ OnigEncodingType OnigEncodingGB18030 = {
|
|
1, /* min enc length */
|
|
onigenc_is_mbc_newline_0x0a,
|
|
gb18030_mbc_to_code,
|
|
- onigenc_mb4_code_to_mbclen,
|
|
+ gb18030_code_to_mbclen,
|
|
gb18030_code_to_mbc,
|
|
gb18030_mbc_case_fold,
|
|
onigenc_ascii_apply_all_case_fold,
|
|
diff -up onig-6.8.2/src/regparse.c.orig onig-6.8.2/src/regparse.c
|
|
--- onig-6.8.2/src/regparse.c.orig 2023-10-17 12:17:56.661666528 +0200
|
|
+++ onig-6.8.2/src/regparse.c 2023-10-17 12:29:57.807302184 +0200
|
|
@@ -5839,6 +5839,7 @@ add_ctype_to_cc(CClassNode* cc, int ctyp
|
|
|
|
int c, r;
|
|
int ascii_mode;
|
|
+ int is_single;
|
|
const OnigCodePoint *ranges;
|
|
OnigCodePoint limit;
|
|
OnigCodePoint sb_out;
|
|
@@ -5860,6 +5861,7 @@ add_ctype_to_cc(CClassNode* cc, int ctyp
|
|
}
|
|
|
|
r = 0;
|
|
+ is_single = ONIGENC_IS_SINGLEBYTE(enc);
|
|
limit = ascii_mode ? ASCII_LIMIT : SINGLE_BYTE_SIZE;
|
|
|
|
switch (ctype) {
|
|
@@ -5876,19 +5878,25 @@ add_ctype_to_cc(CClassNode* cc, int ctyp
|
|
case ONIGENC_CTYPE_ALNUM:
|
|
if (not != 0) {
|
|
for (c = 0; c < (int )limit; c++) {
|
|
- if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
|
|
- BITSET_SET_BIT(cc->bs, c);
|
|
+ if (is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1) {
|
|
+ if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
|
|
+ BITSET_SET_BIT(cc->bs, c);
|
|
+ }
|
|
}
|
|
for (c = limit; c < SINGLE_BYTE_SIZE; c++) {
|
|
- BITSET_SET_BIT(cc->bs, c);
|
|
+ if (is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1)
|
|
+ BITSET_SET_BIT(cc->bs, c);
|
|
}
|
|
|
|
- ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
|
|
+ if (is_single == 0)
|
|
+ ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
|
|
}
|
|
else {
|
|
for (c = 0; c < (int )limit; c++) {
|
|
- if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
|
|
- BITSET_SET_BIT(cc->bs, c);
|
|
+ if (is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1) {
|
|
+ if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
|
|
+ BITSET_SET_BIT(cc->bs, c);
|
|
+ }
|
|
}
|
|
}
|
|
break;
|
|
@@ -5898,21 +5906,25 @@ add_ctype_to_cc(CClassNode* cc, int ctyp
|
|
case ONIGENC_CTYPE_WORD:
|
|
if (not != 0) {
|
|
for (c = 0; c < (int )limit; c++) {
|
|
- if (ONIGENC_CODE_TO_MBCLEN(enc, c) > 0 /* check invalid code point */
|
|
+ /* check invalid code point */
|
|
+ if ((is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1)
|
|
&& ! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
|
|
BITSET_SET_BIT(cc->bs, c);
|
|
}
|
|
for (c = limit; c < SINGLE_BYTE_SIZE; c++) {
|
|
- if (ONIGENC_CODE_TO_MBCLEN(enc, c) > 0)
|
|
+ if (is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1)
|
|
BITSET_SET_BIT(cc->bs, c);
|
|
}
|
|
+ if (ascii_mode != 0 && is_single == 0)
|
|
+ ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
|
|
}
|
|
else {
|
|
for (c = 0; c < (int )limit; c++) {
|
|
- if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
|
|
+ if ((is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1)
|
|
+ && ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
|
|
BITSET_SET_BIT(cc->bs, c);
|
|
}
|
|
- if (ascii_mode == 0)
|
|
+ if (ascii_mode == 0 && is_single == 0)
|
|
ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
|
|
}
|
|
break;
|