From 4886463bb6d3df2b827d784a97e13c7765d57178 Mon Sep 17 00:00:00 2001 From: Martijn Dekker Date: Sat, 12 Feb 2022 21:27:36 +0000 Subject: [PATCH] Disable broken KEYBD trap for multibyte characters In UTF-8 locales, ksh breaks when a KEYBD trap is active, even a dummy no-op one like 'trap : KEYBD'. Entering multi-byte characters fails (the input is interrupted and a new prompt is displayed) and pasting content with multi-byte characters produces corrupted results. The cause is that the KEYBD trap code is not multibyte-ready. Unfortunately nobody yet understands the edit.c code well enough to implement a proper fix. Pending that, this commit implements a workaround that at least avoids breaking the shell. src/cmd/ksh93/edit/edit.c: ed_getchar(): - When a multi-byte locale is active, do not trigger the the KEYBD trap except for ASCII characters (1-127). Resolves: https://github.com/ksh93/ksh/issues/307 (cherry-picked from 4886463bb6d3df2b827d784a97e13c7765d57178) --- src/cmd/ksh93/edit/edit.c | 5 ++++- src/cmd/ksh93/sh.1 | 4 ++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/cmd/ksh93/edit/edit.c b/src/cmd/ksh93/edit/edit.c index d9933b63bb6a..371f5a9ec8fb 100644 --- a/src/cmd/ksh93/edit/edit.c +++ b/src/cmd/ksh93/edit/edit.c @@ -1122,7 +1122,10 @@ int ed_getchar(register Edit_t *ep,int mode) killpg(getpgrp(),SIGINT); siglongjmp(ep->e_env, UINTR); } - if(mode<=0 && ep->sh->st.trap[SH_KEYTRAP]) + if(mode<=0 && ep->sh->st.trap[SH_KEYTRAP] + /* workaround for : + * do not trigger KEYBD for non-ASCII in multibyte locale */ + && (CC_NATIVE!=CC_ASCII || !mbwide() || c > -128)) { ep->e_keytrap = 1; n=1; diff --git a/src/cmd/ksh93/sh.1 b/src/cmd/ksh93/sh.1 index 841c28a43bb8..61f71c2d1698 100644 --- a/src/cmd/ksh93/sh.1 +++ b/src/cmd/ksh93/sh.1 @@ -9062,6 +9062,10 @@ Thus, a trap on .B CHLD won't be executed until the foreground job terminates. .PP +In locales that use a multibyte character set such as UTF-8, the +.B KEYBD +trap is only triggered for ASCII characters (1-127). +.PP It is a good idea to leave a space after the comma operator in arithmetic expressions to prevent the comma from being interpreted as the decimal point character in certain locales. From 96d73c08a2786806f3def1fda66641b81e0af988 Mon Sep 17 00:00:00 2001 From: SHIMIZU Akifumi Date: Mon, 7 Apr 2025 19:47:16 +0900 Subject: [PATCH] Fix long multibyte characters paste issue via ssh (#840) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When I paste long multibyte characters(over 80 byte) to ksh via SSH, the characters are not displayed correctly. For example, the following input demonstrates the issue. ja_JP.UTF-8 encoding is used. Expected command line display: $ echo "長い文字列を入れるとkshで文字列が乱れる場合があるようです" Actual command line display: $ です"echo "長い文字列を入れるとkshで文字列が乱れる場合がある ...with the cursor over the 'e' in 'echo'. This issue appears to be caused by the ed_read() function splitting a multibyte character sequence when reading into an 80-byte buffer. This leads to incorrect character interpretation and display. Therefore, we edited the code to handle the case where the buffer size is full in the middle of a multi-byte character. src/cmd/ksh93/sh/edit.c: - putstack(): - Before retrying to interpret a multibyte character in case of a split due to end of buffer, restore the start position 'p'. - Fix zeroing out errno = EILSEQ. - ed_getchar(): Avoid a potential buffer overflow in 'readin'; allow for an extra multibyte character, not merely an extra byte. Co-authored-by: Martijn Dekker --- src/cmd/ksh93/edit/edit.c | 12 +++++++----- 1 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/cmd/ksh93/edit/edit.c b/src/cmd/ksh93/edit/edit.c index b0a8de5..d827331 100644 --- a/src/cmd/ksh93/edit/edit.c +++ b/src/cmd/ksh93/edit/edit.c @@ -973,6 +973,7 @@ static int putstack(Edit_t *ep,char string[], register int nbyte, int type) } else { + char *prevp = p; again: if((c=mbchar(p)) >=0) { @@ -980,19 +981,20 @@ static int putstack(Edit_t *ep,char string[], register int nbyte, int type) if(type) c = -c; } -#ifdef EILSEQ - else if(errno == EILSEQ) - errno = 0; -#endif else if((endp-p) < mbmax()) { + if(errno == EILSEQ) + errno = 0; if ((c=ed_read(ep,ep->e_fd,endp, 1,0)) == 1) { + p = prevp; *++endp = 0; goto again; } return(c); } + else if(errno == EILSEQ) + errno = 0; else { ed_ringbell(); @@ -1044,7 +1046,7 @@ static int putstack(Edit_t *ep,char string[], register int nbyte, int type) int ed_getchar(register Edit_t *ep,int mode) { register int n, c; - char readin[LOOKAHEAD+1]; + char *readin = fmtbuf(LOOKAHEAD + mbmax()); if(!ep->e_lookahead) { ed_flush(ep);