e5da57f10e
> PR/437: Fix handling of invalid unicode characters.
503 lines
14 KiB
Diff
503 lines
14 KiB
Diff
From d5c106a95c49508f5e214f2fa174968eee2352fc Mon Sep 17 00:00:00 2001
|
|
From: christos <christos>
|
|
Date: Sat, 6 Jun 2015 21:19:07 +0000
|
|
Subject: [PATCH] PR/437: Fix handling of invalid unicode characters. tcsh uses
|
|
the high order bits to encode attributes in the prompt and the high bit in
|
|
regular characters. Make the drawing routines take an argument indicating if
|
|
we are drawing the prompt or not, so that we can decide how to deal with the
|
|
high bits. This solution is the minimum diff and does not allow "large valued"
|
|
unicode characters to be in the prompt (because they would conflict with the
|
|
attribute bits). A better solution would be to have a struct for each
|
|
character so we could encode extra attributes.
|
|
|
|
---
|
|
Fixes | 1 +
|
|
ed.chared.c | 2 +-
|
|
ed.refresh.c | 54 ++++++++++++++++++++++++++++++++++++++++++------------
|
|
ed.xmap.c | 2 +-
|
|
sh.file.c | 2 +-
|
|
sh.glob.c | 9 +++++++--
|
|
sh.h | 15 ++++++++++++---
|
|
sh.hist.c | 2 +-
|
|
sh.misc.c | 22 ++++++++++++++++++----
|
|
tc.func.c | 7 +++++++
|
|
tc.nls.c | 40 ++++++++++++++++++++++++++++++----------
|
|
tc.nls.h | 3 ++-
|
|
tc.printf.c | 2 +-
|
|
tc.str.c | 22 ++++++++++++++++++----
|
|
tw.parse.c | 7 ++++++-
|
|
15 files changed, 148 insertions(+), 42 deletions(-)
|
|
|
|
diff --git a/Fixes b/Fixes
|
|
index 7d0ceac..aa779b1 100644
|
|
--- a/Fixes
|
|
+++ b/Fixes
|
|
@@ -1,3 +1,4 @@
|
|
+ 2. PR/437: Fix handling of invalid unicode characters.
|
|
1. PR/451: Fix error messages containing %c to be always '%c'
|
|
|
|
41. V6.19.00 - 20150521
|
|
diff --git a/ed.chared.c b/ed.chared.c
|
|
index bade211..c0bd41b 100644
|
|
--- a/ed.chared.c
|
|
+++ b/ed.chared.c
|
|
@@ -3387,7 +3387,7 @@ e_stuff_char(Char c)
|
|
(void) Cookedmode();
|
|
|
|
(void) xwrite(SHIN, "\n", 1);
|
|
- len = one_wctomb(buf, c & CHAR);
|
|
+ len = one_wctomb(buf, c);
|
|
for (i = 0; i < len; i++)
|
|
(void) ioctl(SHIN, TIOCSTI, (ioctl_t) &buf[i]);
|
|
|
|
diff --git a/ed.refresh.c b/ed.refresh.c
|
|
index 9e6da00..a88c5e5 100644
|
|
--- a/ed.refresh.c
|
|
+++ b/ed.refresh.c
|
|
@@ -46,7 +46,7 @@ static int vcursor_h, vcursor_v;
|
|
static int rprompt_h, rprompt_v;
|
|
|
|
static int MakeLiteral (Char *, int, Char);
|
|
-static int Draw (Char *, int);
|
|
+static int Draw (Char *, int, int);
|
|
static void Vdraw (Char, int);
|
|
static void RefreshPromptpart (Char *);
|
|
static void update_line (Char *, Char *, int);
|
|
@@ -159,15 +159,44 @@ static int MakeLiteral(Char *str, int len, Char addlit)
|
|
return i | LITERAL;
|
|
}
|
|
|
|
+/* draw char at cp, expand tabs, ctl chars */
|
|
static int
|
|
-Draw(Char *cp, int nocomb) /* draw char at cp, expand tabs, ctl chars */
|
|
+Draw(Char *cp, int nocomb, int drawPrompt)
|
|
{
|
|
int w, i, lv, lh;
|
|
Char c, attr;
|
|
|
|
+#ifdef WIDE_STRINGS
|
|
+ if (!drawPrompt) { /* draw command-line */
|
|
+ attr = 0;
|
|
+ c = *cp;
|
|
+ } else { /* draw prompt */
|
|
+ /* prompt with attributes(UNDER,BOLD,STANDOUT) */
|
|
+ if (*cp & (UNDER | BOLD | STANDOUT)) { /* *cp >= STANDOUT */
|
|
+
|
|
+ /* example)
|
|
+ * We can't distinguish whether (*cp=)0x02ffffff is
|
|
+ * U+02FFFFFF or U+00FFFFFF|STANDOUT.
|
|
+ * We handle as U+00FFFFFF|STANDOUT, only when drawing prompt. */
|
|
+ attr = (*cp & ATTRIBUTES);
|
|
+ /* ~(UNDER | BOLD | STANDOUT) = 0xf1ffffff */
|
|
+ c = *cp & ~(UNDER | BOLD | STANDOUT);
|
|
+
|
|
+ /* if c is ctrl code, we handle *cp as havnig no attributes */
|
|
+ if ((c < 0x20 && c >= 0) || c == 0x7f) {
|
|
+ attr = 0;
|
|
+ c = *cp;
|
|
+ }
|
|
+ } else { /* prompt without attributes */
|
|
+ attr = 0;
|
|
+ c = *cp;
|
|
+ }
|
|
+ }
|
|
+#else
|
|
attr = *cp & ~CHAR;
|
|
c = *cp & CHAR;
|
|
- w = NLSClassify(c, nocomb);
|
|
+#endif
|
|
+ w = NLSClassify(c, nocomb, drawPrompt);
|
|
switch (w) {
|
|
case NLSCLASS_NL:
|
|
Vdraw('\0', 0); /* assure end of line */
|
|
@@ -201,10 +230,11 @@ Draw(Char *cp, int nocomb) /* draw char at cp, expand tabs, ctl chars */
|
|
case NLSCLASS_ILLEGAL2:
|
|
case NLSCLASS_ILLEGAL3:
|
|
case NLSCLASS_ILLEGAL4:
|
|
- Vdraw('\\' | attr, 1);
|
|
- Vdraw('U' | attr, 1);
|
|
- Vdraw('+' | attr, 1);
|
|
- for (i = 8 * NLSCLASS_ILLEGAL_SIZE(w) - 4; i >= 0; i -= 4)
|
|
+ case NLSCLASS_ILLEGAL5:
|
|
+ Vdraw('\\', 1);
|
|
+ Vdraw('U', 1);
|
|
+ Vdraw('+', 1);
|
|
+ for (i = 16 + 4 * (-w-5); i >= 0; i -= 4)
|
|
Vdraw("0123456789ABCDEF"[(c >> i) & 15] | attr, 1);
|
|
break;
|
|
case 0:
|
|
@@ -302,7 +332,7 @@ RefreshPromptpart(Char *buf)
|
|
}
|
|
}
|
|
else
|
|
- cp += Draw(cp, cp == buf);
|
|
+ cp += Draw(cp, cp == buf, 1);
|
|
}
|
|
}
|
|
|
|
@@ -354,7 +384,7 @@ Refresh(void)
|
|
cur_v = vcursor_v;
|
|
Cursor = cp;
|
|
}
|
|
- cp += Draw(cp, cp == InputBuf);
|
|
+ cp += Draw(cp, cp == InputBuf, 0);
|
|
}
|
|
|
|
if (cur_h == -1) { /* if I haven't been set yet, I'm at the end */
|
|
@@ -1126,7 +1156,7 @@ RefCursor(void)
|
|
cp++;
|
|
continue;
|
|
}
|
|
- w = NLSClassify(*cp & CHAR, cp == Prompt);
|
|
+ w = NLSClassify(*cp & CHAR, cp == Prompt, 0);
|
|
cp++;
|
|
switch(w) {
|
|
case NLSCLASS_NL:
|
|
@@ -1158,7 +1188,7 @@ RefCursor(void)
|
|
}
|
|
|
|
for (cp = InputBuf; cp < Cursor;) { /* do input buffer to Cursor */
|
|
- w = NLSClassify(*cp & CHAR, cp == InputBuf);
|
|
+ w = NLSClassify(*cp & CHAR, cp == InputBuf, 0);
|
|
cp++;
|
|
switch(w) {
|
|
case NLSCLASS_NL:
|
|
@@ -1251,7 +1281,7 @@ RefPlusOne(int l)
|
|
}
|
|
cp = Cursor - l;
|
|
c = *cp & CHAR;
|
|
- w = NLSClassify(c, cp == InputBuf);
|
|
+ w = NLSClassify(c, cp == InputBuf, 0);
|
|
switch(w) {
|
|
case NLSCLASS_CTRL:
|
|
PutPlusOne('^', 1);
|
|
diff --git a/ed.xmap.c b/ed.xmap.c
|
|
index 6e1d56e..36bce1e 100644
|
|
--- a/ed.xmap.c
|
|
+++ b/ed.xmap.c
|
|
@@ -743,7 +743,7 @@ unparsestring(const CStr *str, const Char *sep)
|
|
*b++ = (unsigned char) p;
|
|
}
|
|
else if (p == ' ' || (Isprint(p) && !Isspace(p)))
|
|
- b += one_wctomb((char *)b, p & CHAR);
|
|
+ b += one_wctomb((char *)b, p);
|
|
else {
|
|
*b++ = '\\';
|
|
*b++ = ((p >> 6) & 7) + '0';
|
|
diff --git a/sh.file.c b/sh.file.c
|
|
index 343b774..3989d8a 100644
|
|
--- a/sh.file.c
|
|
+++ b/sh.file.c
|
|
@@ -249,7 +249,7 @@ pushback(const Char *string)
|
|
char buf[MB_LEN_MAX];
|
|
size_t i, len;
|
|
|
|
- len = one_wctomb(buf, *p & CHAR);
|
|
+ len = one_wctomb(buf, *p);
|
|
for (i = 0; i < len; i++)
|
|
(void) ioctl(SHOUT, TIOCSTI, (ioctl_t) &buf[i]);
|
|
}
|
|
diff --git a/sh.glob.c b/sh.glob.c
|
|
index fc510bf..7d008aa 100644
|
|
--- a/sh.glob.c
|
|
+++ b/sh.glob.c
|
|
@@ -594,8 +594,13 @@ trim(Char **t)
|
|
Char *p;
|
|
|
|
while ((p = *t++) != '\0')
|
|
- while (*p)
|
|
- *p++ &= TRIM;
|
|
+ while (*p) {
|
|
+#if INVALID_BYTE != 0
|
|
+ if ((*p & INVALID_BYTE) != INVALID_BYTE) /* *p < INVALID_BYTE */
|
|
+#endif
|
|
+ *p &= TRIM;
|
|
+ p++;
|
|
+ }
|
|
}
|
|
|
|
int
|
|
diff --git a/sh.h b/sh.h
|
|
index e71a24e..75de557 100644
|
|
--- a/sh.h
|
|
+++ b/sh.h
|
|
@@ -707,14 +707,21 @@ extern struct sigaction parterm; /* Parents terminate catch */
|
|
#define ASCII 0177
|
|
#ifdef WIDE_STRINGS /* Implies SHORT_STRINGS */
|
|
/* 31st char bit used for 'ing (not 32nd, we want all values nonnegative) */
|
|
-# define QUOTE 0x40000000
|
|
-# define TRIM 0x3FFFFFFF /* Mask to strip quote bit */
|
|
+/*
|
|
+ * Notice
|
|
+ *
|
|
+ * By fix for handling unicode name file, 32nd bit is used.
|
|
+ * We need use '&' instead of '> or <' when comparing with INVALID_BYTE etc..
|
|
+ * Cast to uChar is not recommended,
|
|
+ * becase Char is 4bytes but uChar is 8bytes on I32LP64. */
|
|
+# define QUOTE 0x80000000
|
|
+# define TRIM 0x7FFFFFFF /* Mask to strip quote bit */
|
|
# define UNDER 0x08000000 /* Underline flag */
|
|
# define BOLD 0x04000000 /* Bold flag */
|
|
# define STANDOUT 0x02000000 /* Standout flag */
|
|
# define LITERAL 0x01000000 /* Literal character flag */
|
|
# define ATTRIBUTES 0x0F000000 /* The bits used for attributes */
|
|
-# define INVALID_BYTE 0x00800000 /* Invalid character on input */
|
|
+# define INVALID_BYTE 0xF0000000 /* Invalid character on input */
|
|
# ifdef SOLARIS2
|
|
# define CHAR 0x30FFFFFF /* Mask to mask out the character */
|
|
# else
|
|
@@ -743,6 +750,8 @@ extern struct sigaction parterm; /* Parents terminate catch */
|
|
#endif
|
|
#define CHAR_DBWIDTH (LITERAL|(LITERAL-1))
|
|
|
|
+# define MAX_UTF32 0x7FFFFFFF /* max UTF32 is U+7FFFFFFF */
|
|
+
|
|
EXTERN int AsciiOnly; /* If set only 7 bits expected in characters */
|
|
|
|
/*
|
|
diff --git a/sh.hist.c b/sh.hist.c
|
|
index b8f71b7..c0eded5 100644
|
|
--- a/sh.hist.c
|
|
+++ b/sh.hist.c
|
|
@@ -1199,7 +1199,7 @@ fmthist(int fmt, ptr_t ptr)
|
|
buf = xmalloc(Strlen(istr) * MB_LEN_MAX + 1);
|
|
|
|
for (p = buf, ip = istr; *ip != '\0'; ip++)
|
|
- p += one_wctomb(p, CHAR & *ip);
|
|
+ p += one_wctomb(p, *ip);
|
|
|
|
*p = '\0';
|
|
xfree(istr);
|
|
diff --git a/sh.misc.c b/sh.misc.c
|
|
index 7232b12..233ba5f 100644
|
|
--- a/sh.misc.c
|
|
+++ b/sh.misc.c
|
|
@@ -450,8 +450,13 @@ strip(Char *cp)
|
|
|
|
if (!cp)
|
|
return (cp);
|
|
- while ((*dp++ &= TRIM) != '\0')
|
|
- continue;
|
|
+ while (*dp != '\0') {
|
|
+#if INVALID_BYTE != 0
|
|
+ if ((*dp & INVALID_BYTE) != INVALID_BYTE) /* *dp < INVALID_BYTE */
|
|
+#endif
|
|
+ *dp &= TRIM;
|
|
+ dp++;
|
|
+ }
|
|
return (cp);
|
|
}
|
|
|
|
@@ -462,8 +467,17 @@ quote(Char *cp)
|
|
|
|
if (!cp)
|
|
return (cp);
|
|
- while (*dp != '\0')
|
|
- *dp++ |= QUOTE;
|
|
+ while (*dp != '\0') {
|
|
+#ifdef WIDE_STRINGS
|
|
+ if ((*dp & 0xffffff80) == 0) /* *dp < 0x80 */
|
|
+#elif defined SHORT_STRINGS
|
|
+ if ((*dp & 0xff80) == 0) /* *dp < 0x80 */
|
|
+#else
|
|
+ if ((*dp & 0x80) == 0) /* *dp < 0x80 */
|
|
+#endif
|
|
+ *dp |= QUOTE;
|
|
+ dp++;
|
|
+ }
|
|
return (cp);
|
|
}
|
|
|
|
diff --git a/tc.func.c b/tc.func.c
|
|
index 2b28a68..5a909d6 100644
|
|
--- a/tc.func.c
|
|
+++ b/tc.func.c
|
|
@@ -124,7 +124,14 @@ expand_lex(const struct wordent *sp0, int from, int to)
|
|
(((*s & TRIM) == '\\') && (prev_c != '\\')))) {
|
|
Strbuf_append1(&buf, '\\');
|
|
}
|
|
+#if INVALID_BYTE != 0
|
|
+ if ((*s & INVALID_BYTE) != INVALID_BYTE) /* *s < INVALID_BYTE */
|
|
+ Strbuf_append1(&buf, *s & TRIM);
|
|
+ else
|
|
+ Strbuf_append1(&buf, *s);
|
|
+#else
|
|
Strbuf_append1(&buf, *s & TRIM);
|
|
+#endif
|
|
prev_c = *s;
|
|
}
|
|
Strbuf_append1(&buf, ' ');
|
|
diff --git a/tc.nls.c b/tc.nls.c
|
|
index 2c38f3f..22ad173 100644
|
|
--- a/tc.nls.c
|
|
+++ b/tc.nls.c
|
|
@@ -64,7 +64,11 @@ NLSWidth(Char c)
|
|
{
|
|
# ifdef HAVE_WCWIDTH
|
|
int l;
|
|
+#if INVALID_BYTE != 0
|
|
+ if ((c & INVALID_BYTE) == INVALID_BYTE) /* c >= INVALID_BYTE */
|
|
+#else
|
|
if (c & INVALID_BYTE)
|
|
+#endif
|
|
return 1;
|
|
l = xwcwidth((wchar_t) c);
|
|
return l >= 0 ? l : 0;
|
|
@@ -116,12 +120,36 @@ NLSChangeCase(const Char *p, int mode)
|
|
}
|
|
|
|
int
|
|
-NLSClassify(Char c, int nocomb)
|
|
+NLSClassify(Char c, int nocomb, int drawPrompt)
|
|
{
|
|
int w;
|
|
- if (c & INVALID_BYTE)
|
|
+#ifndef SHORT_STRINGS
|
|
+ if ((c & 0x80) != 0) /* c >= 0x80 */
|
|
return NLSCLASS_ILLEGAL;
|
|
+#endif
|
|
+ if (!drawPrompt) { /* draw command-line */
|
|
+#if INVALID_BYTE != 0
|
|
+ if ((c & INVALID_BYTE) == INVALID_BYTE) /* c >= INVALID_BYTE */
|
|
+ return NLSCLASS_ILLEGAL;
|
|
+ if ((c & INVALID_BYTE) == QUOTE && (c & 0x80) == 0) /* c >= QUOTE */
|
|
+ return 1;
|
|
+ if (c >= 0x10000000) /* U+10000000 = FC 90 80 80 80 80 */
|
|
+ return NLSCLASS_ILLEGAL5;
|
|
+ if (c >= 0x1000000) /* U+1000000 = F9 80 80 80 80 */
|
|
+ return NLSCLASS_ILLEGAL4;
|
|
+ if (c >= 0x100000) /* U+100000 = F4 80 80 80 */
|
|
+ return NLSCLASS_ILLEGAL3;
|
|
+#endif
|
|
+ if (c >= 0x10000) /* U+10000 = F0 90 80 80 */
|
|
+ return NLSCLASS_ILLEGAL2;
|
|
+ }
|
|
w = NLSWidth(c);
|
|
+ if (drawPrompt) { /* draw prompt */
|
|
+ if (w > 0)
|
|
+ return w;
|
|
+ if (w == 0)
|
|
+ return 1;
|
|
+ }
|
|
if ((w > 0 && !(Iscntrl(c) && (c & CHAR) < 0x100)) || (Isprint(c) && !nocomb))
|
|
return w;
|
|
if (Iscntrl(c) && (c & CHAR) < 0x100) {
|
|
@@ -131,13 +159,5 @@ NLSClassify(Char c, int nocomb)
|
|
return NLSCLASS_TAB;
|
|
return NLSCLASS_CTRL;
|
|
}
|
|
-#ifdef WIDE_STRINGS
|
|
- if (c >= 0x1000000)
|
|
- return NLSCLASS_ILLEGAL4;
|
|
- if (c >= 0x10000)
|
|
- return NLSCLASS_ILLEGAL3;
|
|
-#endif
|
|
- if (c >= 0x100)
|
|
- return NLSCLASS_ILLEGAL2;
|
|
return NLSCLASS_ILLEGAL;
|
|
}
|
|
diff --git a/tc.nls.h b/tc.nls.h
|
|
index 4d27741..6930682 100644
|
|
--- a/tc.nls.h
|
|
+++ b/tc.nls.h
|
|
@@ -43,7 +43,7 @@ extern int NLSStringWidth (const Char *);
|
|
#endif
|
|
|
|
extern Char *NLSChangeCase (const Char *, int);
|
|
-extern int NLSClassify (Char, int);
|
|
+extern int NLSClassify (Char, int, int);
|
|
|
|
#define NLSCLASS_CTRL (-1)
|
|
#define NLSCLASS_TAB (-2)
|
|
@@ -52,6 +52,7 @@ extern int NLSClassify (Char, int);
|
|
#define NLSCLASS_ILLEGAL2 (-5)
|
|
#define NLSCLASS_ILLEGAL3 (-6)
|
|
#define NLSCLASS_ILLEGAL4 (-7)
|
|
+#define NLSCLASS_ILLEGAL5 (-8)
|
|
|
|
#define NLSCLASS_ILLEGAL_SIZE(x) (-(x) - (-(NLSCLASS_ILLEGAL) - 1))
|
|
|
|
diff --git a/tc.printf.c b/tc.printf.c
|
|
index 7f2612d..c6be145 100644
|
|
--- a/tc.printf.c
|
|
+++ b/tc.printf.c
|
|
@@ -289,7 +289,7 @@ doprnt(void (*addchar) (int), const char *sfmt, va_list ap)
|
|
(*addchar) ('\\' | attributes);
|
|
count++;
|
|
}
|
|
- len = one_wctomb(cbuf, *Bp & CHAR);
|
|
+ len = one_wctomb(cbuf, *Bp);
|
|
for (pos = 0; pos < len; pos++) {
|
|
(*addchar) ((unsigned char)cbuf[pos] | attributes
|
|
| (*Bp & ATTRIBUTES));
|
|
diff --git a/tc.str.c b/tc.str.c
|
|
index c407cb8..c2b5ac8 100644
|
|
--- a/tc.str.c
|
|
+++ b/tc.str.c
|
|
@@ -66,10 +66,24 @@ one_wctomb(char *s, Char wchar)
|
|
{
|
|
int len;
|
|
|
|
- if (wchar & INVALID_BYTE) {
|
|
- s[0] = wchar & 0xFF;
|
|
+#if INVALID_BYTE != 0
|
|
+ if ((wchar & INVALID_BYTE) == INVALID_BYTE) { /* wchar >= INVALID_BYTE */
|
|
+ /* invalid char
|
|
+ * exmaple)
|
|
+ * if wchar = f0000090(=90|INVALID_BYTE), then *s = ffffff90 */
|
|
+ *s = (char)wchar;
|
|
len = 1;
|
|
+#else
|
|
+ if (wchar & (CHAR & INVALID_BYTE)) {
|
|
+ s[0] = wchar & (CHAR & 0xFF);
|
|
+ len = 1;
|
|
+#endif
|
|
} else {
|
|
+#if INVALID_BYTE != 0
|
|
+ wchar &= MAX_UTF32;
|
|
+#else
|
|
+ wchar &= CHAR;
|
|
+#endif
|
|
#ifdef UTF16_STRINGS
|
|
if (wchar >= 0x10000) {
|
|
/* UTF-16 systems can't handle these values directly in calls to
|
|
@@ -224,7 +238,7 @@ short2str(const Char *src)
|
|
dst = sdst;
|
|
edst = &dst[dstsize];
|
|
while (*src) {
|
|
- dst += one_wctomb(dst, *src & CHAR);
|
|
+ dst += one_wctomb(dst, *src);
|
|
src++;
|
|
if (dst >= edst) {
|
|
char *wdst = dst;
|
|
@@ -544,7 +558,7 @@ short2qstr(const Char *src)
|
|
dst = &edst[-MALLOC_INCR];
|
|
}
|
|
}
|
|
- dst += one_wctomb(dst, *src & CHAR);
|
|
+ dst += one_wctomb(dst, *src);
|
|
src++;
|
|
if (dst >= edst) {
|
|
ptrdiff_t i = dst - edst;
|
|
diff --git a/tw.parse.c b/tw.parse.c
|
|
index 8309ed8..94982d6 100644
|
|
--- a/tw.parse.c
|
|
+++ b/tw.parse.c
|
|
@@ -618,7 +618,12 @@ insert_meta(const Char *cp, const Char *cpend, const Char *word,
|
|
break;
|
|
|
|
wq = w & QUOTE;
|
|
- w &= ~QUOTE;
|
|
+#if INVALID_BYTE != 0
|
|
+ /* add checking INVALID_BYTE for FIX UTF32 */
|
|
+ if ((w & INVALID_BYTE) != INVALID_BYTE) /* w < INVALID_BYTE */
|
|
+#else
|
|
+ w &= ~QUOTE;
|
|
+#endif
|
|
|
|
if (cmap(w, _ESC | _QF))
|
|
wq = QUOTE; /* quotes are always quoted */
|
|
--
|
|
2.5.5
|
|
|