From e5da57f10ea8779efa5c3879d50f9357392efa70 Mon Sep 17 00:00:00 2001 From: "David Kaspar [Dee'Kej]" Date: Tue, 3 May 2016 13:01:57 +0200 Subject: [PATCH] tcsh-6.19.00-007-fix-handling-of-invalid-unicode-characters.patch added > PR/437: Fix handling of invalid unicode characters. --- ...ndling-of-invalid-unicode-characters.patch | 502 ++++++++++++++++++ tcsh.spec | 2 + 2 files changed, 504 insertions(+) create mode 100644 tcsh-6.19.00-007-fix-handling-of-invalid-unicode-characters.patch diff --git a/tcsh-6.19.00-007-fix-handling-of-invalid-unicode-characters.patch b/tcsh-6.19.00-007-fix-handling-of-invalid-unicode-characters.patch new file mode 100644 index 0000000..6402ce8 --- /dev/null +++ b/tcsh-6.19.00-007-fix-handling-of-invalid-unicode-characters.patch @@ -0,0 +1,502 @@ +From d5c106a95c49508f5e214f2fa174968eee2352fc Mon Sep 17 00:00:00 2001 +From: christos +Date: Sat, 6 Jun 2015 21:19:07 +0000 +Subject: [PATCH] PR/437: Fix handling of invalid unicode characters. tcsh uses + the high order bits to encode attributes in the prompt and the high bit in + regular characters. Make the drawing routines take an argument indicating if + we are drawing the prompt or not, so that we can decide how to deal with the + high bits. This solution is the minimum diff and does not allow "large valued" + unicode characters to be in the prompt (because they would conflict with the + attribute bits). A better solution would be to have a struct for each + character so we could encode extra attributes. + +--- + Fixes | 1 + + ed.chared.c | 2 +- + ed.refresh.c | 54 ++++++++++++++++++++++++++++++++++++++++++------------ + ed.xmap.c | 2 +- + sh.file.c | 2 +- + sh.glob.c | 9 +++++++-- + sh.h | 15 ++++++++++++--- + sh.hist.c | 2 +- + sh.misc.c | 22 ++++++++++++++++++---- + tc.func.c | 7 +++++++ + tc.nls.c | 40 ++++++++++++++++++++++++++++++---------- + tc.nls.h | 3 ++- + tc.printf.c | 2 +- + tc.str.c | 22 ++++++++++++++++++---- + tw.parse.c | 7 ++++++- + 15 files changed, 148 insertions(+), 42 deletions(-) + +diff --git a/Fixes b/Fixes +index 7d0ceac..aa779b1 100644 +--- a/Fixes ++++ b/Fixes +@@ -1,3 +1,4 @@ ++ 2. PR/437: Fix handling of invalid unicode characters. + 1. PR/451: Fix error messages containing %c to be always '%c' + + 41. V6.19.00 - 20150521 +diff --git a/ed.chared.c b/ed.chared.c +index bade211..c0bd41b 100644 +--- a/ed.chared.c ++++ b/ed.chared.c +@@ -3387,7 +3387,7 @@ e_stuff_char(Char c) + (void) Cookedmode(); + + (void) xwrite(SHIN, "\n", 1); +- len = one_wctomb(buf, c & CHAR); ++ len = one_wctomb(buf, c); + for (i = 0; i < len; i++) + (void) ioctl(SHIN, TIOCSTI, (ioctl_t) &buf[i]); + +diff --git a/ed.refresh.c b/ed.refresh.c +index 9e6da00..a88c5e5 100644 +--- a/ed.refresh.c ++++ b/ed.refresh.c +@@ -46,7 +46,7 @@ static int vcursor_h, vcursor_v; + static int rprompt_h, rprompt_v; + + static int MakeLiteral (Char *, int, Char); +-static int Draw (Char *, int); ++static int Draw (Char *, int, int); + static void Vdraw (Char, int); + static void RefreshPromptpart (Char *); + static void update_line (Char *, Char *, int); +@@ -159,15 +159,44 @@ static int MakeLiteral(Char *str, int len, Char addlit) + return i | LITERAL; + } + ++/* draw char at cp, expand tabs, ctl chars */ + static int +-Draw(Char *cp, int nocomb) /* draw char at cp, expand tabs, ctl chars */ ++Draw(Char *cp, int nocomb, int drawPrompt) + { + int w, i, lv, lh; + Char c, attr; + ++#ifdef WIDE_STRINGS ++ if (!drawPrompt) { /* draw command-line */ ++ attr = 0; ++ c = *cp; ++ } else { /* draw prompt */ ++ /* prompt with attributes(UNDER,BOLD,STANDOUT) */ ++ if (*cp & (UNDER | BOLD | STANDOUT)) { /* *cp >= STANDOUT */ ++ ++ /* example) ++ * We can't distinguish whether (*cp=)0x02ffffff is ++ * U+02FFFFFF or U+00FFFFFF|STANDOUT. ++ * We handle as U+00FFFFFF|STANDOUT, only when drawing prompt. */ ++ attr = (*cp & ATTRIBUTES); ++ /* ~(UNDER | BOLD | STANDOUT) = 0xf1ffffff */ ++ c = *cp & ~(UNDER | BOLD | STANDOUT); ++ ++ /* if c is ctrl code, we handle *cp as havnig no attributes */ ++ if ((c < 0x20 && c >= 0) || c == 0x7f) { ++ attr = 0; ++ c = *cp; ++ } ++ } else { /* prompt without attributes */ ++ attr = 0; ++ c = *cp; ++ } ++ } ++#else + attr = *cp & ~CHAR; + c = *cp & CHAR; +- w = NLSClassify(c, nocomb); ++#endif ++ w = NLSClassify(c, nocomb, drawPrompt); + switch (w) { + case NLSCLASS_NL: + Vdraw('\0', 0); /* assure end of line */ +@@ -201,10 +230,11 @@ Draw(Char *cp, int nocomb) /* draw char at cp, expand tabs, ctl chars */ + case NLSCLASS_ILLEGAL2: + case NLSCLASS_ILLEGAL3: + case NLSCLASS_ILLEGAL4: +- Vdraw('\\' | attr, 1); +- Vdraw('U' | attr, 1); +- Vdraw('+' | attr, 1); +- for (i = 8 * NLSCLASS_ILLEGAL_SIZE(w) - 4; i >= 0; i -= 4) ++ case NLSCLASS_ILLEGAL5: ++ Vdraw('\\', 1); ++ Vdraw('U', 1); ++ Vdraw('+', 1); ++ for (i = 16 + 4 * (-w-5); i >= 0; i -= 4) + Vdraw("0123456789ABCDEF"[(c >> i) & 15] | attr, 1); + break; + case 0: +@@ -302,7 +332,7 @@ RefreshPromptpart(Char *buf) + } + } + else +- cp += Draw(cp, cp == buf); ++ cp += Draw(cp, cp == buf, 1); + } + } + +@@ -354,7 +384,7 @@ Refresh(void) + cur_v = vcursor_v; + Cursor = cp; + } +- cp += Draw(cp, cp == InputBuf); ++ cp += Draw(cp, cp == InputBuf, 0); + } + + if (cur_h == -1) { /* if I haven't been set yet, I'm at the end */ +@@ -1126,7 +1156,7 @@ RefCursor(void) + cp++; + continue; + } +- w = NLSClassify(*cp & CHAR, cp == Prompt); ++ w = NLSClassify(*cp & CHAR, cp == Prompt, 0); + cp++; + switch(w) { + case NLSCLASS_NL: +@@ -1158,7 +1188,7 @@ RefCursor(void) + } + + for (cp = InputBuf; cp < Cursor;) { /* do input buffer to Cursor */ +- w = NLSClassify(*cp & CHAR, cp == InputBuf); ++ w = NLSClassify(*cp & CHAR, cp == InputBuf, 0); + cp++; + switch(w) { + case NLSCLASS_NL: +@@ -1251,7 +1281,7 @@ RefPlusOne(int l) + } + cp = Cursor - l; + c = *cp & CHAR; +- w = NLSClassify(c, cp == InputBuf); ++ w = NLSClassify(c, cp == InputBuf, 0); + switch(w) { + case NLSCLASS_CTRL: + PutPlusOne('^', 1); +diff --git a/ed.xmap.c b/ed.xmap.c +index 6e1d56e..36bce1e 100644 +--- a/ed.xmap.c ++++ b/ed.xmap.c +@@ -743,7 +743,7 @@ unparsestring(const CStr *str, const Char *sep) + *b++ = (unsigned char) p; + } + else if (p == ' ' || (Isprint(p) && !Isspace(p))) +- b += one_wctomb((char *)b, p & CHAR); ++ b += one_wctomb((char *)b, p); + else { + *b++ = '\\'; + *b++ = ((p >> 6) & 7) + '0'; +diff --git a/sh.file.c b/sh.file.c +index 343b774..3989d8a 100644 +--- a/sh.file.c ++++ b/sh.file.c +@@ -249,7 +249,7 @@ pushback(const Char *string) + char buf[MB_LEN_MAX]; + size_t i, len; + +- len = one_wctomb(buf, *p & CHAR); ++ len = one_wctomb(buf, *p); + for (i = 0; i < len; i++) + (void) ioctl(SHOUT, TIOCSTI, (ioctl_t) &buf[i]); + } +diff --git a/sh.glob.c b/sh.glob.c +index fc510bf..7d008aa 100644 +--- a/sh.glob.c ++++ b/sh.glob.c +@@ -594,8 +594,13 @@ trim(Char **t) + Char *p; + + while ((p = *t++) != '\0') +- while (*p) +- *p++ &= TRIM; ++ while (*p) { ++#if INVALID_BYTE != 0 ++ if ((*p & INVALID_BYTE) != INVALID_BYTE) /* *p < INVALID_BYTE */ ++#endif ++ *p &= TRIM; ++ p++; ++ } + } + + int +diff --git a/sh.h b/sh.h +index e71a24e..75de557 100644 +--- a/sh.h ++++ b/sh.h +@@ -707,14 +707,21 @@ extern struct sigaction parterm; /* Parents terminate catch */ + #define ASCII 0177 + #ifdef WIDE_STRINGS /* Implies SHORT_STRINGS */ + /* 31st char bit used for 'ing (not 32nd, we want all values nonnegative) */ +-# define QUOTE 0x40000000 +-# define TRIM 0x3FFFFFFF /* Mask to strip quote bit */ ++/* ++ * Notice ++ * ++ * By fix for handling unicode name file, 32nd bit is used. ++ * We need use '&' instead of '> or <' when comparing with INVALID_BYTE etc.. ++ * Cast to uChar is not recommended, ++ * becase Char is 4bytes but uChar is 8bytes on I32LP64. */ ++# define QUOTE 0x80000000 ++# define TRIM 0x7FFFFFFF /* Mask to strip quote bit */ + # define UNDER 0x08000000 /* Underline flag */ + # define BOLD 0x04000000 /* Bold flag */ + # define STANDOUT 0x02000000 /* Standout flag */ + # define LITERAL 0x01000000 /* Literal character flag */ + # define ATTRIBUTES 0x0F000000 /* The bits used for attributes */ +-# define INVALID_BYTE 0x00800000 /* Invalid character on input */ ++# define INVALID_BYTE 0xF0000000 /* Invalid character on input */ + # ifdef SOLARIS2 + # define CHAR 0x30FFFFFF /* Mask to mask out the character */ + # else +@@ -743,6 +750,8 @@ extern struct sigaction parterm; /* Parents terminate catch */ + #endif + #define CHAR_DBWIDTH (LITERAL|(LITERAL-1)) + ++# define MAX_UTF32 0x7FFFFFFF /* max UTF32 is U+7FFFFFFF */ ++ + EXTERN int AsciiOnly; /* If set only 7 bits expected in characters */ + + /* +diff --git a/sh.hist.c b/sh.hist.c +index b8f71b7..c0eded5 100644 +--- a/sh.hist.c ++++ b/sh.hist.c +@@ -1199,7 +1199,7 @@ fmthist(int fmt, ptr_t ptr) + buf = xmalloc(Strlen(istr) * MB_LEN_MAX + 1); + + for (p = buf, ip = istr; *ip != '\0'; ip++) +- p += one_wctomb(p, CHAR & *ip); ++ p += one_wctomb(p, *ip); + + *p = '\0'; + xfree(istr); +diff --git a/sh.misc.c b/sh.misc.c +index 7232b12..233ba5f 100644 +--- a/sh.misc.c ++++ b/sh.misc.c +@@ -450,8 +450,13 @@ strip(Char *cp) + + if (!cp) + return (cp); +- while ((*dp++ &= TRIM) != '\0') +- continue; ++ while (*dp != '\0') { ++#if INVALID_BYTE != 0 ++ if ((*dp & INVALID_BYTE) != INVALID_BYTE) /* *dp < INVALID_BYTE */ ++#endif ++ *dp &= TRIM; ++ dp++; ++ } + return (cp); + } + +@@ -462,8 +467,17 @@ quote(Char *cp) + + if (!cp) + return (cp); +- while (*dp != '\0') +- *dp++ |= QUOTE; ++ while (*dp != '\0') { ++#ifdef WIDE_STRINGS ++ if ((*dp & 0xffffff80) == 0) /* *dp < 0x80 */ ++#elif defined SHORT_STRINGS ++ if ((*dp & 0xff80) == 0) /* *dp < 0x80 */ ++#else ++ if ((*dp & 0x80) == 0) /* *dp < 0x80 */ ++#endif ++ *dp |= QUOTE; ++ dp++; ++ } + return (cp); + } + +diff --git a/tc.func.c b/tc.func.c +index 2b28a68..5a909d6 100644 +--- a/tc.func.c ++++ b/tc.func.c +@@ -124,7 +124,14 @@ expand_lex(const struct wordent *sp0, int from, int to) + (((*s & TRIM) == '\\') && (prev_c != '\\')))) { + Strbuf_append1(&buf, '\\'); + } ++#if INVALID_BYTE != 0 ++ if ((*s & INVALID_BYTE) != INVALID_BYTE) /* *s < INVALID_BYTE */ ++ Strbuf_append1(&buf, *s & TRIM); ++ else ++ Strbuf_append1(&buf, *s); ++#else + Strbuf_append1(&buf, *s & TRIM); ++#endif + prev_c = *s; + } + Strbuf_append1(&buf, ' '); +diff --git a/tc.nls.c b/tc.nls.c +index 2c38f3f..22ad173 100644 +--- a/tc.nls.c ++++ b/tc.nls.c +@@ -64,7 +64,11 @@ NLSWidth(Char c) + { + # ifdef HAVE_WCWIDTH + int l; ++#if INVALID_BYTE != 0 ++ if ((c & INVALID_BYTE) == INVALID_BYTE) /* c >= INVALID_BYTE */ ++#else + if (c & INVALID_BYTE) ++#endif + return 1; + l = xwcwidth((wchar_t) c); + return l >= 0 ? l : 0; +@@ -116,12 +120,36 @@ NLSChangeCase(const Char *p, int mode) + } + + int +-NLSClassify(Char c, int nocomb) ++NLSClassify(Char c, int nocomb, int drawPrompt) + { + int w; +- if (c & INVALID_BYTE) ++#ifndef SHORT_STRINGS ++ if ((c & 0x80) != 0) /* c >= 0x80 */ + return NLSCLASS_ILLEGAL; ++#endif ++ if (!drawPrompt) { /* draw command-line */ ++#if INVALID_BYTE != 0 ++ if ((c & INVALID_BYTE) == INVALID_BYTE) /* c >= INVALID_BYTE */ ++ return NLSCLASS_ILLEGAL; ++ if ((c & INVALID_BYTE) == QUOTE && (c & 0x80) == 0) /* c >= QUOTE */ ++ return 1; ++ if (c >= 0x10000000) /* U+10000000 = FC 90 80 80 80 80 */ ++ return NLSCLASS_ILLEGAL5; ++ if (c >= 0x1000000) /* U+1000000 = F9 80 80 80 80 */ ++ return NLSCLASS_ILLEGAL4; ++ if (c >= 0x100000) /* U+100000 = F4 80 80 80 */ ++ return NLSCLASS_ILLEGAL3; ++#endif ++ if (c >= 0x10000) /* U+10000 = F0 90 80 80 */ ++ return NLSCLASS_ILLEGAL2; ++ } + w = NLSWidth(c); ++ if (drawPrompt) { /* draw prompt */ ++ if (w > 0) ++ return w; ++ if (w == 0) ++ return 1; ++ } + if ((w > 0 && !(Iscntrl(c) && (c & CHAR) < 0x100)) || (Isprint(c) && !nocomb)) + return w; + if (Iscntrl(c) && (c & CHAR) < 0x100) { +@@ -131,13 +159,5 @@ NLSClassify(Char c, int nocomb) + return NLSCLASS_TAB; + return NLSCLASS_CTRL; + } +-#ifdef WIDE_STRINGS +- if (c >= 0x1000000) +- return NLSCLASS_ILLEGAL4; +- if (c >= 0x10000) +- return NLSCLASS_ILLEGAL3; +-#endif +- if (c >= 0x100) +- return NLSCLASS_ILLEGAL2; + return NLSCLASS_ILLEGAL; + } +diff --git a/tc.nls.h b/tc.nls.h +index 4d27741..6930682 100644 +--- a/tc.nls.h ++++ b/tc.nls.h +@@ -43,7 +43,7 @@ extern int NLSStringWidth (const Char *); + #endif + + extern Char *NLSChangeCase (const Char *, int); +-extern int NLSClassify (Char, int); ++extern int NLSClassify (Char, int, int); + + #define NLSCLASS_CTRL (-1) + #define NLSCLASS_TAB (-2) +@@ -52,6 +52,7 @@ extern int NLSClassify (Char, int); + #define NLSCLASS_ILLEGAL2 (-5) + #define NLSCLASS_ILLEGAL3 (-6) + #define NLSCLASS_ILLEGAL4 (-7) ++#define NLSCLASS_ILLEGAL5 (-8) + + #define NLSCLASS_ILLEGAL_SIZE(x) (-(x) - (-(NLSCLASS_ILLEGAL) - 1)) + +diff --git a/tc.printf.c b/tc.printf.c +index 7f2612d..c6be145 100644 +--- a/tc.printf.c ++++ b/tc.printf.c +@@ -289,7 +289,7 @@ doprnt(void (*addchar) (int), const char *sfmt, va_list ap) + (*addchar) ('\\' | attributes); + count++; + } +- len = one_wctomb(cbuf, *Bp & CHAR); ++ len = one_wctomb(cbuf, *Bp); + for (pos = 0; pos < len; pos++) { + (*addchar) ((unsigned char)cbuf[pos] | attributes + | (*Bp & ATTRIBUTES)); +diff --git a/tc.str.c b/tc.str.c +index c407cb8..c2b5ac8 100644 +--- a/tc.str.c ++++ b/tc.str.c +@@ -66,10 +66,24 @@ one_wctomb(char *s, Char wchar) + { + int len; + +- if (wchar & INVALID_BYTE) { +- s[0] = wchar & 0xFF; ++#if INVALID_BYTE != 0 ++ if ((wchar & INVALID_BYTE) == INVALID_BYTE) { /* wchar >= INVALID_BYTE */ ++ /* invalid char ++ * exmaple) ++ * if wchar = f0000090(=90|INVALID_BYTE), then *s = ffffff90 */ ++ *s = (char)wchar; + len = 1; ++#else ++ if (wchar & (CHAR & INVALID_BYTE)) { ++ s[0] = wchar & (CHAR & 0xFF); ++ len = 1; ++#endif + } else { ++#if INVALID_BYTE != 0 ++ wchar &= MAX_UTF32; ++#else ++ wchar &= CHAR; ++#endif + #ifdef UTF16_STRINGS + if (wchar >= 0x10000) { + /* UTF-16 systems can't handle these values directly in calls to +@@ -224,7 +238,7 @@ short2str(const Char *src) + dst = sdst; + edst = &dst[dstsize]; + while (*src) { +- dst += one_wctomb(dst, *src & CHAR); ++ dst += one_wctomb(dst, *src); + src++; + if (dst >= edst) { + char *wdst = dst; +@@ -544,7 +558,7 @@ short2qstr(const Char *src) + dst = &edst[-MALLOC_INCR]; + } + } +- dst += one_wctomb(dst, *src & CHAR); ++ dst += one_wctomb(dst, *src); + src++; + if (dst >= edst) { + ptrdiff_t i = dst - edst; +diff --git a/tw.parse.c b/tw.parse.c +index 8309ed8..94982d6 100644 +--- a/tw.parse.c ++++ b/tw.parse.c +@@ -618,7 +618,12 @@ insert_meta(const Char *cp, const Char *cpend, const Char *word, + break; + + wq = w & QUOTE; +- w &= ~QUOTE; ++#if INVALID_BYTE != 0 ++ /* add checking INVALID_BYTE for FIX UTF32 */ ++ if ((w & INVALID_BYTE) != INVALID_BYTE) /* w < INVALID_BYTE */ ++#else ++ w &= ~QUOTE; ++#endif + + if (cmap(w, _ESC | _QF)) + wq = QUOTE; /* quotes are always quoted */ +-- +2.5.5 + diff --git a/tcsh.spec b/tcsh.spec index 1b170d1..92b3f5d 100644 --- a/tcsh.spec +++ b/tcsh.spec @@ -35,6 +35,7 @@ Patch003: tcsh-6.19.00-003-avoid-gcc5-calloc-optimization.patch Patch004: tcsh-6.19.00-004-remove-unused-variable.patch Patch005: tcsh-6.19.00-005-ge0-is-always-true-for-unsigned.patch Patch006: tcsh-6.19.00-006-_SIGWINCH-added.patch +Patch007: tcsh-6.19.00-007-fix-handling-of-invalid-unicode-characters.patch # Downstream patches -- these should be always included when doing rebase: @@ -162,6 +163,7 @@ fi tcsh-6.19.00-004-remove-unused-variable.patch tcsh-6.19.00-005-ge0-is-always-true-for-unsigned.patch tcsh-6.19.00-006-_SIGWINCH-added.patch + tcsh-6.19.00-007-fix-handling-of-invalid-unicode-characters.patch * Thu Apr 21 2016 David Kaspar [Dee'Kej] - 6.19.00-6 - Drop tcsh-6.15.00-closem.patch - issue not reproducible, patch not accepted by upstream