tcsh-6.19.00-007-fix-handling-of-invalid-unicode-characters.patch added

> PR/437: Fix handling of invalid unicode characters.
This commit is contained in:
David Kaspar [Dee'Kej] 2016-05-03 13:01:57 +02:00
parent 6d81d4664b
commit e5da57f10e
2 changed files with 504 additions and 0 deletions

View File

@ -0,0 +1,502 @@
From d5c106a95c49508f5e214f2fa174968eee2352fc Mon Sep 17 00:00:00 2001
From: christos <christos>
Date: Sat, 6 Jun 2015 21:19:07 +0000
Subject: [PATCH] PR/437: Fix handling of invalid unicode characters. tcsh uses
the high order bits to encode attributes in the prompt and the high bit in
regular characters. Make the drawing routines take an argument indicating if
we are drawing the prompt or not, so that we can decide how to deal with the
high bits. This solution is the minimum diff and does not allow "large valued"
unicode characters to be in the prompt (because they would conflict with the
attribute bits). A better solution would be to have a struct for each
character so we could encode extra attributes.
---
Fixes | 1 +
ed.chared.c | 2 +-
ed.refresh.c | 54 ++++++++++++++++++++++++++++++++++++++++++------------
ed.xmap.c | 2 +-
sh.file.c | 2 +-
sh.glob.c | 9 +++++++--
sh.h | 15 ++++++++++++---
sh.hist.c | 2 +-
sh.misc.c | 22 ++++++++++++++++++----
tc.func.c | 7 +++++++
tc.nls.c | 40 ++++++++++++++++++++++++++++++----------
tc.nls.h | 3 ++-
tc.printf.c | 2 +-
tc.str.c | 22 ++++++++++++++++++----
tw.parse.c | 7 ++++++-
15 files changed, 148 insertions(+), 42 deletions(-)
diff --git a/Fixes b/Fixes
index 7d0ceac..aa779b1 100644
--- a/Fixes
+++ b/Fixes
@@ -1,3 +1,4 @@
+ 2. PR/437: Fix handling of invalid unicode characters.
1. PR/451: Fix error messages containing %c to be always '%c'
41. V6.19.00 - 20150521
diff --git a/ed.chared.c b/ed.chared.c
index bade211..c0bd41b 100644
--- a/ed.chared.c
+++ b/ed.chared.c
@@ -3387,7 +3387,7 @@ e_stuff_char(Char c)
(void) Cookedmode();
(void) xwrite(SHIN, "\n", 1);
- len = one_wctomb(buf, c & CHAR);
+ len = one_wctomb(buf, c);
for (i = 0; i < len; i++)
(void) ioctl(SHIN, TIOCSTI, (ioctl_t) &buf[i]);
diff --git a/ed.refresh.c b/ed.refresh.c
index 9e6da00..a88c5e5 100644
--- a/ed.refresh.c
+++ b/ed.refresh.c
@@ -46,7 +46,7 @@ static int vcursor_h, vcursor_v;
static int rprompt_h, rprompt_v;
static int MakeLiteral (Char *, int, Char);
-static int Draw (Char *, int);
+static int Draw (Char *, int, int);
static void Vdraw (Char, int);
static void RefreshPromptpart (Char *);
static void update_line (Char *, Char *, int);
@@ -159,15 +159,44 @@ static int MakeLiteral(Char *str, int len, Char addlit)
return i | LITERAL;
}
+/* draw char at cp, expand tabs, ctl chars */
static int
-Draw(Char *cp, int nocomb) /* draw char at cp, expand tabs, ctl chars */
+Draw(Char *cp, int nocomb, int drawPrompt)
{
int w, i, lv, lh;
Char c, attr;
+#ifdef WIDE_STRINGS
+ if (!drawPrompt) { /* draw command-line */
+ attr = 0;
+ c = *cp;
+ } else { /* draw prompt */
+ /* prompt with attributes(UNDER,BOLD,STANDOUT) */
+ if (*cp & (UNDER | BOLD | STANDOUT)) { /* *cp >= STANDOUT */
+
+ /* example)
+ * We can't distinguish whether (*cp=)0x02ffffff is
+ * U+02FFFFFF or U+00FFFFFF|STANDOUT.
+ * We handle as U+00FFFFFF|STANDOUT, only when drawing prompt. */
+ attr = (*cp & ATTRIBUTES);
+ /* ~(UNDER | BOLD | STANDOUT) = 0xf1ffffff */
+ c = *cp & ~(UNDER | BOLD | STANDOUT);
+
+ /* if c is ctrl code, we handle *cp as havnig no attributes */
+ if ((c < 0x20 && c >= 0) || c == 0x7f) {
+ attr = 0;
+ c = *cp;
+ }
+ } else { /* prompt without attributes */
+ attr = 0;
+ c = *cp;
+ }
+ }
+#else
attr = *cp & ~CHAR;
c = *cp & CHAR;
- w = NLSClassify(c, nocomb);
+#endif
+ w = NLSClassify(c, nocomb, drawPrompt);
switch (w) {
case NLSCLASS_NL:
Vdraw('\0', 0); /* assure end of line */
@@ -201,10 +230,11 @@ Draw(Char *cp, int nocomb) /* draw char at cp, expand tabs, ctl chars */
case NLSCLASS_ILLEGAL2:
case NLSCLASS_ILLEGAL3:
case NLSCLASS_ILLEGAL4:
- Vdraw('\\' | attr, 1);
- Vdraw('U' | attr, 1);
- Vdraw('+' | attr, 1);
- for (i = 8 * NLSCLASS_ILLEGAL_SIZE(w) - 4; i >= 0; i -= 4)
+ case NLSCLASS_ILLEGAL5:
+ Vdraw('\\', 1);
+ Vdraw('U', 1);
+ Vdraw('+', 1);
+ for (i = 16 + 4 * (-w-5); i >= 0; i -= 4)
Vdraw("0123456789ABCDEF"[(c >> i) & 15] | attr, 1);
break;
case 0:
@@ -302,7 +332,7 @@ RefreshPromptpart(Char *buf)
}
}
else
- cp += Draw(cp, cp == buf);
+ cp += Draw(cp, cp == buf, 1);
}
}
@@ -354,7 +384,7 @@ Refresh(void)
cur_v = vcursor_v;
Cursor = cp;
}
- cp += Draw(cp, cp == InputBuf);
+ cp += Draw(cp, cp == InputBuf, 0);
}
if (cur_h == -1) { /* if I haven't been set yet, I'm at the end */
@@ -1126,7 +1156,7 @@ RefCursor(void)
cp++;
continue;
}
- w = NLSClassify(*cp & CHAR, cp == Prompt);
+ w = NLSClassify(*cp & CHAR, cp == Prompt, 0);
cp++;
switch(w) {
case NLSCLASS_NL:
@@ -1158,7 +1188,7 @@ RefCursor(void)
}
for (cp = InputBuf; cp < Cursor;) { /* do input buffer to Cursor */
- w = NLSClassify(*cp & CHAR, cp == InputBuf);
+ w = NLSClassify(*cp & CHAR, cp == InputBuf, 0);
cp++;
switch(w) {
case NLSCLASS_NL:
@@ -1251,7 +1281,7 @@ RefPlusOne(int l)
}
cp = Cursor - l;
c = *cp & CHAR;
- w = NLSClassify(c, cp == InputBuf);
+ w = NLSClassify(c, cp == InputBuf, 0);
switch(w) {
case NLSCLASS_CTRL:
PutPlusOne('^', 1);
diff --git a/ed.xmap.c b/ed.xmap.c
index 6e1d56e..36bce1e 100644
--- a/ed.xmap.c
+++ b/ed.xmap.c
@@ -743,7 +743,7 @@ unparsestring(const CStr *str, const Char *sep)
*b++ = (unsigned char) p;
}
else if (p == ' ' || (Isprint(p) && !Isspace(p)))
- b += one_wctomb((char *)b, p & CHAR);
+ b += one_wctomb((char *)b, p);
else {
*b++ = '\\';
*b++ = ((p >> 6) & 7) + '0';
diff --git a/sh.file.c b/sh.file.c
index 343b774..3989d8a 100644
--- a/sh.file.c
+++ b/sh.file.c
@@ -249,7 +249,7 @@ pushback(const Char *string)
char buf[MB_LEN_MAX];
size_t i, len;
- len = one_wctomb(buf, *p & CHAR);
+ len = one_wctomb(buf, *p);
for (i = 0; i < len; i++)
(void) ioctl(SHOUT, TIOCSTI, (ioctl_t) &buf[i]);
}
diff --git a/sh.glob.c b/sh.glob.c
index fc510bf..7d008aa 100644
--- a/sh.glob.c
+++ b/sh.glob.c
@@ -594,8 +594,13 @@ trim(Char **t)
Char *p;
while ((p = *t++) != '\0')
- while (*p)
- *p++ &= TRIM;
+ while (*p) {
+#if INVALID_BYTE != 0
+ if ((*p & INVALID_BYTE) != INVALID_BYTE) /* *p < INVALID_BYTE */
+#endif
+ *p &= TRIM;
+ p++;
+ }
}
int
diff --git a/sh.h b/sh.h
index e71a24e..75de557 100644
--- a/sh.h
+++ b/sh.h
@@ -707,14 +707,21 @@ extern struct sigaction parterm; /* Parents terminate catch */
#define ASCII 0177
#ifdef WIDE_STRINGS /* Implies SHORT_STRINGS */
/* 31st char bit used for 'ing (not 32nd, we want all values nonnegative) */
-# define QUOTE 0x40000000
-# define TRIM 0x3FFFFFFF /* Mask to strip quote bit */
+/*
+ * Notice
+ *
+ * By fix for handling unicode name file, 32nd bit is used.
+ * We need use '&' instead of '> or <' when comparing with INVALID_BYTE etc..
+ * Cast to uChar is not recommended,
+ * becase Char is 4bytes but uChar is 8bytes on I32LP64. */
+# define QUOTE 0x80000000
+# define TRIM 0x7FFFFFFF /* Mask to strip quote bit */
# define UNDER 0x08000000 /* Underline flag */
# define BOLD 0x04000000 /* Bold flag */
# define STANDOUT 0x02000000 /* Standout flag */
# define LITERAL 0x01000000 /* Literal character flag */
# define ATTRIBUTES 0x0F000000 /* The bits used for attributes */
-# define INVALID_BYTE 0x00800000 /* Invalid character on input */
+# define INVALID_BYTE 0xF0000000 /* Invalid character on input */
# ifdef SOLARIS2
# define CHAR 0x30FFFFFF /* Mask to mask out the character */
# else
@@ -743,6 +750,8 @@ extern struct sigaction parterm; /* Parents terminate catch */
#endif
#define CHAR_DBWIDTH (LITERAL|(LITERAL-1))
+# define MAX_UTF32 0x7FFFFFFF /* max UTF32 is U+7FFFFFFF */
+
EXTERN int AsciiOnly; /* If set only 7 bits expected in characters */
/*
diff --git a/sh.hist.c b/sh.hist.c
index b8f71b7..c0eded5 100644
--- a/sh.hist.c
+++ b/sh.hist.c
@@ -1199,7 +1199,7 @@ fmthist(int fmt, ptr_t ptr)
buf = xmalloc(Strlen(istr) * MB_LEN_MAX + 1);
for (p = buf, ip = istr; *ip != '\0'; ip++)
- p += one_wctomb(p, CHAR & *ip);
+ p += one_wctomb(p, *ip);
*p = '\0';
xfree(istr);
diff --git a/sh.misc.c b/sh.misc.c
index 7232b12..233ba5f 100644
--- a/sh.misc.c
+++ b/sh.misc.c
@@ -450,8 +450,13 @@ strip(Char *cp)
if (!cp)
return (cp);
- while ((*dp++ &= TRIM) != '\0')
- continue;
+ while (*dp != '\0') {
+#if INVALID_BYTE != 0
+ if ((*dp & INVALID_BYTE) != INVALID_BYTE) /* *dp < INVALID_BYTE */
+#endif
+ *dp &= TRIM;
+ dp++;
+ }
return (cp);
}
@@ -462,8 +467,17 @@ quote(Char *cp)
if (!cp)
return (cp);
- while (*dp != '\0')
- *dp++ |= QUOTE;
+ while (*dp != '\0') {
+#ifdef WIDE_STRINGS
+ if ((*dp & 0xffffff80) == 0) /* *dp < 0x80 */
+#elif defined SHORT_STRINGS
+ if ((*dp & 0xff80) == 0) /* *dp < 0x80 */
+#else
+ if ((*dp & 0x80) == 0) /* *dp < 0x80 */
+#endif
+ *dp |= QUOTE;
+ dp++;
+ }
return (cp);
}
diff --git a/tc.func.c b/tc.func.c
index 2b28a68..5a909d6 100644
--- a/tc.func.c
+++ b/tc.func.c
@@ -124,7 +124,14 @@ expand_lex(const struct wordent *sp0, int from, int to)
(((*s & TRIM) == '\\') && (prev_c != '\\')))) {
Strbuf_append1(&buf, '\\');
}
+#if INVALID_BYTE != 0
+ if ((*s & INVALID_BYTE) != INVALID_BYTE) /* *s < INVALID_BYTE */
+ Strbuf_append1(&buf, *s & TRIM);
+ else
+ Strbuf_append1(&buf, *s);
+#else
Strbuf_append1(&buf, *s & TRIM);
+#endif
prev_c = *s;
}
Strbuf_append1(&buf, ' ');
diff --git a/tc.nls.c b/tc.nls.c
index 2c38f3f..22ad173 100644
--- a/tc.nls.c
+++ b/tc.nls.c
@@ -64,7 +64,11 @@ NLSWidth(Char c)
{
# ifdef HAVE_WCWIDTH
int l;
+#if INVALID_BYTE != 0
+ if ((c & INVALID_BYTE) == INVALID_BYTE) /* c >= INVALID_BYTE */
+#else
if (c & INVALID_BYTE)
+#endif
return 1;
l = xwcwidth((wchar_t) c);
return l >= 0 ? l : 0;
@@ -116,12 +120,36 @@ NLSChangeCase(const Char *p, int mode)
}
int
-NLSClassify(Char c, int nocomb)
+NLSClassify(Char c, int nocomb, int drawPrompt)
{
int w;
- if (c & INVALID_BYTE)
+#ifndef SHORT_STRINGS
+ if ((c & 0x80) != 0) /* c >= 0x80 */
return NLSCLASS_ILLEGAL;
+#endif
+ if (!drawPrompt) { /* draw command-line */
+#if INVALID_BYTE != 0
+ if ((c & INVALID_BYTE) == INVALID_BYTE) /* c >= INVALID_BYTE */
+ return NLSCLASS_ILLEGAL;
+ if ((c & INVALID_BYTE) == QUOTE && (c & 0x80) == 0) /* c >= QUOTE */
+ return 1;
+ if (c >= 0x10000000) /* U+10000000 = FC 90 80 80 80 80 */
+ return NLSCLASS_ILLEGAL5;
+ if (c >= 0x1000000) /* U+1000000 = F9 80 80 80 80 */
+ return NLSCLASS_ILLEGAL4;
+ if (c >= 0x100000) /* U+100000 = F4 80 80 80 */
+ return NLSCLASS_ILLEGAL3;
+#endif
+ if (c >= 0x10000) /* U+10000 = F0 90 80 80 */
+ return NLSCLASS_ILLEGAL2;
+ }
w = NLSWidth(c);
+ if (drawPrompt) { /* draw prompt */
+ if (w > 0)
+ return w;
+ if (w == 0)
+ return 1;
+ }
if ((w > 0 && !(Iscntrl(c) && (c & CHAR) < 0x100)) || (Isprint(c) && !nocomb))
return w;
if (Iscntrl(c) && (c & CHAR) < 0x100) {
@@ -131,13 +159,5 @@ NLSClassify(Char c, int nocomb)
return NLSCLASS_TAB;
return NLSCLASS_CTRL;
}
-#ifdef WIDE_STRINGS
- if (c >= 0x1000000)
- return NLSCLASS_ILLEGAL4;
- if (c >= 0x10000)
- return NLSCLASS_ILLEGAL3;
-#endif
- if (c >= 0x100)
- return NLSCLASS_ILLEGAL2;
return NLSCLASS_ILLEGAL;
}
diff --git a/tc.nls.h b/tc.nls.h
index 4d27741..6930682 100644
--- a/tc.nls.h
+++ b/tc.nls.h
@@ -43,7 +43,7 @@ extern int NLSStringWidth (const Char *);
#endif
extern Char *NLSChangeCase (const Char *, int);
-extern int NLSClassify (Char, int);
+extern int NLSClassify (Char, int, int);
#define NLSCLASS_CTRL (-1)
#define NLSCLASS_TAB (-2)
@@ -52,6 +52,7 @@ extern int NLSClassify (Char, int);
#define NLSCLASS_ILLEGAL2 (-5)
#define NLSCLASS_ILLEGAL3 (-6)
#define NLSCLASS_ILLEGAL4 (-7)
+#define NLSCLASS_ILLEGAL5 (-8)
#define NLSCLASS_ILLEGAL_SIZE(x) (-(x) - (-(NLSCLASS_ILLEGAL) - 1))
diff --git a/tc.printf.c b/tc.printf.c
index 7f2612d..c6be145 100644
--- a/tc.printf.c
+++ b/tc.printf.c
@@ -289,7 +289,7 @@ doprnt(void (*addchar) (int), const char *sfmt, va_list ap)
(*addchar) ('\\' | attributes);
count++;
}
- len = one_wctomb(cbuf, *Bp & CHAR);
+ len = one_wctomb(cbuf, *Bp);
for (pos = 0; pos < len; pos++) {
(*addchar) ((unsigned char)cbuf[pos] | attributes
| (*Bp & ATTRIBUTES));
diff --git a/tc.str.c b/tc.str.c
index c407cb8..c2b5ac8 100644
--- a/tc.str.c
+++ b/tc.str.c
@@ -66,10 +66,24 @@ one_wctomb(char *s, Char wchar)
{
int len;
- if (wchar & INVALID_BYTE) {
- s[0] = wchar & 0xFF;
+#if INVALID_BYTE != 0
+ if ((wchar & INVALID_BYTE) == INVALID_BYTE) { /* wchar >= INVALID_BYTE */
+ /* invalid char
+ * exmaple)
+ * if wchar = f0000090(=90|INVALID_BYTE), then *s = ffffff90 */
+ *s = (char)wchar;
len = 1;
+#else
+ if (wchar & (CHAR & INVALID_BYTE)) {
+ s[0] = wchar & (CHAR & 0xFF);
+ len = 1;
+#endif
} else {
+#if INVALID_BYTE != 0
+ wchar &= MAX_UTF32;
+#else
+ wchar &= CHAR;
+#endif
#ifdef UTF16_STRINGS
if (wchar >= 0x10000) {
/* UTF-16 systems can't handle these values directly in calls to
@@ -224,7 +238,7 @@ short2str(const Char *src)
dst = sdst;
edst = &dst[dstsize];
while (*src) {
- dst += one_wctomb(dst, *src & CHAR);
+ dst += one_wctomb(dst, *src);
src++;
if (dst >= edst) {
char *wdst = dst;
@@ -544,7 +558,7 @@ short2qstr(const Char *src)
dst = &edst[-MALLOC_INCR];
}
}
- dst += one_wctomb(dst, *src & CHAR);
+ dst += one_wctomb(dst, *src);
src++;
if (dst >= edst) {
ptrdiff_t i = dst - edst;
diff --git a/tw.parse.c b/tw.parse.c
index 8309ed8..94982d6 100644
--- a/tw.parse.c
+++ b/tw.parse.c
@@ -618,7 +618,12 @@ insert_meta(const Char *cp, const Char *cpend, const Char *word,
break;
wq = w & QUOTE;
- w &= ~QUOTE;
+#if INVALID_BYTE != 0
+ /* add checking INVALID_BYTE for FIX UTF32 */
+ if ((w & INVALID_BYTE) != INVALID_BYTE) /* w < INVALID_BYTE */
+#else
+ w &= ~QUOTE;
+#endif
if (cmap(w, _ESC | _QF))
wq = QUOTE; /* quotes are always quoted */
--
2.5.5

View File

@ -35,6 +35,7 @@ Patch003: tcsh-6.19.00-003-avoid-gcc5-calloc-optimization.patch
Patch004: tcsh-6.19.00-004-remove-unused-variable.patch
Patch005: tcsh-6.19.00-005-ge0-is-always-true-for-unsigned.patch
Patch006: tcsh-6.19.00-006-_SIGWINCH-added.patch
Patch007: tcsh-6.19.00-007-fix-handling-of-invalid-unicode-characters.patch
# Downstream patches -- these should be always included when doing rebase:
@ -162,6 +163,7 @@ fi
tcsh-6.19.00-004-remove-unused-variable.patch
tcsh-6.19.00-005-ge0-is-always-true-for-unsigned.patch
tcsh-6.19.00-006-_SIGWINCH-added.patch
tcsh-6.19.00-007-fix-handling-of-invalid-unicode-characters.patch
* Thu Apr 21 2016 David Kaspar [Dee'Kej] <dkaspar@redhat.com> - 6.19.00-6
- Drop tcsh-6.15.00-closem.patch - issue not reproducible, patch not accepted by upstream