lua/lua-5.4.4-bug8.patch
2023-02-14 15:09:51 -05:00

137 lines
4.5 KiB
Diff
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

diff -up lua-5.4.4/lua-5.4.4-tests/utf8.lua.bug8 lua-5.4.4/lua-5.4.4-tests/utf8.lua
--- lua-5.4.4/lua-5.4.4-tests/utf8.lua.bug8 2023-02-14 14:53:22.877506883 -0500
+++ lua-5.4.4/lua-5.4.4-tests/utf8.lua 2023-02-14 14:53:28.297574398 -0500
@@ -97,9 +97,15 @@ do -- error indication in utf8.len
assert(not a and b == p)
end
check("abc\xE3def", 4)
- check("汉字\x80", #("汉字") + 1)
check("\xF4\x9F\xBF", 1)
check("\xF4\x9F\xBF\xBF", 1)
+ -- spurious continuation bytes
+ check("汉字\x80", #("汉字") + 1)
+ check("\x80hello", 1)
+ check("hel\x80lo", 4)
+ check("汉字\xBF", #("汉字") + 1)
+ check("\xBFhello", 1)
+ check("hel\xBFlo", 4)
end
-- errors in utf8.codes
@@ -112,12 +118,16 @@ do
end
errorcodes("ab\xff")
errorcodes("\u{110000}")
+ errorcodes("in\x80valid")
+ errorcodes("\xbfinvalid")
+ errorcodes("αλφ\xBFα")
-- calling interation function with invalid arguments
local f = utf8.codes("")
assert(f("", 2) == nil)
assert(f("", -1) == nil)
assert(f("", math.mininteger) == nil)
+
end
-- error in initial position for offset
diff -up lua-5.4.4/src/lutf8lib.c.bug8 lua-5.4.4/src/lutf8lib.c
--- lua-5.4.4/src/lutf8lib.c.bug8 2023-02-14 14:53:49.114833691 -0500
+++ lua-5.4.4/src/lutf8lib.c 2023-02-14 14:59:12.660905056 -0500
@@ -25,6 +25,9 @@
#define MAXUTF 0x7FFFFFFFu
+
+#define MSGInvalid "invalid UTF-8 code"
+
/*
** Integer type for decoded UTF-8 values; MAXUTF needs 31 bits.
*/
@@ -35,7 +38,8 @@ typedef unsigned long utfint;
#endif
-#define iscont(p) ((*(p) & 0xC0) == 0x80)
+#define iscont(c) (((c) & 0xC0) == 0x80)
+#define iscontp(p) iscont(*(p))
/* from strlib */
@@ -65,7 +69,7 @@ static const char *utf8_decode (const ch
int count = 0; /* to count number of continuation bytes */
for (; c & 0x40; c <<= 1) { /* while it needs continuation bytes... */
unsigned int cc = (unsigned char)s[++count]; /* read next byte */
- if ((cc & 0xC0) != 0x80) /* not a continuation byte? */
+ if (!iscont(cc)) /* not a continuation byte? */
return NULL; /* invalid byte sequence */
res = (res << 6) | (cc & 0x3F); /* add lower 6 bits from cont. byte */
}
@@ -140,7 +144,7 @@ static int codepoint (lua_State *L) {
utfint code;
s = utf8_decode(s, &code, !lax);
if (s == NULL)
- return luaL_error(L, "invalid UTF-8 code");
+ return luaL_error(L, MSGInvalid);
lua_pushinteger(L, code);
n++;
}
@@ -190,16 +194,16 @@ static int byteoffset (lua_State *L) {
"position out of bounds");
if (n == 0) {
/* find beginning of current byte sequence */
- while (posi > 0 && iscont(s + posi)) posi--;
+ while (posi > 0 && iscontp(s + posi)) posi--;
}
else {
- if (iscont(s + posi))
+ if (iscontp(s + posi))
return luaL_error(L, "initial position is a continuation byte");
if (n < 0) {
while (n < 0 && posi > 0) { /* move back */
do { /* find beginning of previous character */
posi--;
- } while (posi > 0 && iscont(s + posi));
+ } while (posi > 0 && iscontp(s + posi));
n++;
}
}
@@ -208,7 +212,7 @@ static int byteoffset (lua_State *L) {
while (n > 0 && posi < (lua_Integer)len) {
do { /* find beginning of next character */
posi++;
- } while (iscont(s + posi)); /* (cannot pass final '\0') */
+ } while (iscontp(s + posi)); /* (cannot pass final '\0') */
n--;
}
}
@@ -226,15 +230,15 @@ static int iter_aux (lua_State *L, int s
const char *s = luaL_checklstring(L, 1, &len);
lua_Unsigned n = (lua_Unsigned)lua_tointeger(L, 2);
if (n < len) {
- while (iscont(s + n)) n++; /* skip continuation bytes */
+ while (iscontp(s + n)) n++; /* go to next character */
}
if (n >= len) /* (also handles original 'n' being negative) */
return 0; /* no more codepoints */
else {
utfint code;
const char *next = utf8_decode(s + n, &code, strict);
- if (next == NULL)
- return luaL_error(L, "invalid UTF-8 code");
+ if (next == NULL || iscontp(next))
+ return luaL_error(L, MSGInvalid);
lua_pushinteger(L, n + 1);
lua_pushinteger(L, code);
return 2;
@@ -253,7 +257,8 @@ static int iter_auxlax (lua_State *L) {
static int iter_codes (lua_State *L) {
int lax = lua_toboolean(L, 2);
- luaL_checkstring(L, 1);
+ const char *s = luaL_checkstring(L, 1);
+ luaL_argcheck(L, !iscontp(s), 1, MSGInvalid);
lua_pushcfunction(L, lax ? iter_auxlax : iter_auxstrict);
lua_pushvalue(L, 1);
lua_pushinteger(L, 0);