oniguruma/SOURCES/oniguruma-6.8.2-CVE-2019-19...

115 lines
3.9 KiB
Diff

diff -up onig-6.8.2/src/gb18030.c.orig onig-6.8.2/src/gb18030.c
--- onig-6.8.2/src/gb18030.c.orig 2023-10-17 12:12:44.944352236 +0200
+++ onig-6.8.2/src/gb18030.c 2023-10-17 12:14:52.188483869 +0200
@@ -76,6 +76,20 @@ gb18030_mbc_enc_len(const UChar* p)
}
static int
+gb18030_code_to_mbclen(OnigCodePoint code)
+{
+ if ((code & 0xff000000) != 0) return 4;
+ else if ((code & 0xff0000) != 0) return ONIGERR_INVALID_CODE_POINT_VALUE;
+ else if ((code & 0xff00) != 0) return 2;
+ else {
+ if (GB18030_MAP[(int )(code & 0xff)] == CM)
+ return ONIGERR_INVALID_CODE_POINT_VALUE;
+
+ return 1;
+ }
+}
+
+static int
is_valid_mbc_string(const UChar* p, const UChar* end)
{
while (p < end) {
@@ -522,7 +536,7 @@ OnigEncodingType OnigEncodingGB18030 = {
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
gb18030_mbc_to_code,
- onigenc_mb4_code_to_mbclen,
+ gb18030_code_to_mbclen,
gb18030_code_to_mbc,
gb18030_mbc_case_fold,
onigenc_ascii_apply_all_case_fold,
diff -up onig-6.8.2/src/regparse.c.orig onig-6.8.2/src/regparse.c
--- onig-6.8.2/src/regparse.c.orig 2023-10-17 12:17:56.661666528 +0200
+++ onig-6.8.2/src/regparse.c 2023-10-17 12:29:57.807302184 +0200
@@ -5839,6 +5839,7 @@ add_ctype_to_cc(CClassNode* cc, int ctyp
int c, r;
int ascii_mode;
+ int is_single;
const OnigCodePoint *ranges;
OnigCodePoint limit;
OnigCodePoint sb_out;
@@ -5860,6 +5861,7 @@ add_ctype_to_cc(CClassNode* cc, int ctyp
}
r = 0;
+ is_single = ONIGENC_IS_SINGLEBYTE(enc);
limit = ascii_mode ? ASCII_LIMIT : SINGLE_BYTE_SIZE;
switch (ctype) {
@@ -5876,19 +5878,25 @@ add_ctype_to_cc(CClassNode* cc, int ctyp
case ONIGENC_CTYPE_ALNUM:
if (not != 0) {
for (c = 0; c < (int )limit; c++) {
- if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
- BITSET_SET_BIT(cc->bs, c);
+ if (is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1) {
+ if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
+ BITSET_SET_BIT(cc->bs, c);
+ }
}
for (c = limit; c < SINGLE_BYTE_SIZE; c++) {
- BITSET_SET_BIT(cc->bs, c);
+ if (is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1)
+ BITSET_SET_BIT(cc->bs, c);
}
- ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
+ if (is_single == 0)
+ ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
}
else {
for (c = 0; c < (int )limit; c++) {
- if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
- BITSET_SET_BIT(cc->bs, c);
+ if (is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1) {
+ if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
+ BITSET_SET_BIT(cc->bs, c);
+ }
}
}
break;
@@ -5898,21 +5906,25 @@ add_ctype_to_cc(CClassNode* cc, int ctyp
case ONIGENC_CTYPE_WORD:
if (not != 0) {
for (c = 0; c < (int )limit; c++) {
- if (ONIGENC_CODE_TO_MBCLEN(enc, c) > 0 /* check invalid code point */
+ /* check invalid code point */
+ if ((is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1)
&& ! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
BITSET_SET_BIT(cc->bs, c);
}
for (c = limit; c < SINGLE_BYTE_SIZE; c++) {
- if (ONIGENC_CODE_TO_MBCLEN(enc, c) > 0)
+ if (is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1)
BITSET_SET_BIT(cc->bs, c);
}
+ if (ascii_mode != 0 && is_single == 0)
+ ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
}
else {
for (c = 0; c < (int )limit; c++) {
- if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
+ if ((is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1)
+ && ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
BITSET_SET_BIT(cc->bs, c);
}
- if (ascii_mode == 0)
+ if (ascii_mode == 0 && is_single == 0)
ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
}
break;