129 lines
		
	
	
		
			3.7 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
			
		
		
	
	
			129 lines
		
	
	
		
			3.7 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
| From e8321526823f2b7b945aeb04e5cdb1367e3d5b94 Mon Sep 17 00:00:00 2001
 | |
| From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= <zbyszek@in.waw.pl>
 | |
| Date: Fri, 17 Jan 2014 21:28:41 -0500
 | |
| Subject: [PATCH] core: do not print invalid utf-8 in error messages
 | |
| 
 | |
| Conflicts:
 | |
| 	TODO
 | |
| ---
 | |
|  src/shared/fileio.c  | 11 +++++++----
 | |
|  src/shared/utf8.c    | 26 ++++++++++++++++++++++++++
 | |
|  src/shared/utf8.h    |  3 +++
 | |
|  src/test/test-utf8.c | 17 +++++++++++++++++
 | |
|  4 files changed, 53 insertions(+), 4 deletions(-)
 | |
| 
 | |
| diff --git a/src/shared/fileio.c b/src/shared/fileio.c
 | |
| index d28e38a..121cd57 100644
 | |
| --- a/src/shared/fileio.c
 | |
| +++ b/src/shared/fileio.c
 | |
| @@ -467,15 +467,18 @@ static int parse_env_file_push(const char *filename, unsigned line,
 | |
|          va_list aq, *ap = userdata;
 | |
|  
 | |
|          if (!utf8_is_valid(key)) {
 | |
| -                log_error("%s:%u: invalid UTF-8 for key '%s', ignoring.",
 | |
| -                          filename, line, key);
 | |
| +                _cleanup_free_ char *p = utf8_escape_invalid(key);
 | |
| +
 | |
| +                log_error("%s:%u: invalid UTF-8 in key '%s', ignoring.",
 | |
| +                          filename, line, p);
 | |
|                  return -EINVAL;
 | |
|          }
 | |
|  
 | |
|          if (value && !utf8_is_valid(value)) {
 | |
| -                /* FIXME: filter UTF-8 */
 | |
| +                _cleanup_free_ char *p = utf8_escape_invalid(value);
 | |
| +
 | |
|                  log_error("%s:%u: invalid UTF-8 value for key %s: '%s', ignoring.",
 | |
| -                          filename, line, key, value);
 | |
| +                          filename, line, key, p);
 | |
|                  return -EINVAL;
 | |
|          }
 | |
|  
 | |
| diff --git a/src/shared/utf8.c b/src/shared/utf8.c
 | |
| index 31120af..2b70d45 100644
 | |
| --- a/src/shared/utf8.c
 | |
| +++ b/src/shared/utf8.c
 | |
| @@ -172,6 +172,32 @@ const char *utf8_is_valid(const char *str) {
 | |
|          return str;
 | |
|  }
 | |
|  
 | |
| +char *utf8_escape_invalid(const char *str) {
 | |
| +        char *p, *s;
 | |
| +
 | |
| +        assert(str);
 | |
| +
 | |
| +        p = s = malloc(strlen(str) * 4 + 1);
 | |
| +        if (!p)
 | |
| +                return NULL;
 | |
| +
 | |
| +        while (*str) {
 | |
| +                int len;
 | |
| +
 | |
| +                len = utf8_encoded_valid_unichar(str);
 | |
| +                if (len > 0) {
 | |
| +                        s = mempcpy(s, str, len);
 | |
| +                        str += len;
 | |
| +                } else {
 | |
| +                        s = mempcpy(s, UTF8_REPLACEMENT_CHARACTER, strlen(UTF8_REPLACEMENT_CHARACTER));
 | |
| +                        str += 1;
 | |
| +                }
 | |
| +        }
 | |
| +        *s = '\0';
 | |
| +
 | |
| +        return p;
 | |
| +}
 | |
| +
 | |
|  char *ascii_is_valid(const char *str) {
 | |
|          const char *p;
 | |
|  
 | |
| diff --git a/src/shared/utf8.h b/src/shared/utf8.h
 | |
| index 96a03ea..f93dfb8 100644
 | |
| --- a/src/shared/utf8.h
 | |
| +++ b/src/shared/utf8.h
 | |
| @@ -25,8 +25,11 @@
 | |
|  
 | |
|  #include "macro.h"
 | |
|  
 | |
| +#define UTF8_REPLACEMENT_CHARACTER "\xef\xbf\xbd"
 | |
| +
 | |
|  const char *utf8_is_valid(const char *s) _pure_;
 | |
|  char *ascii_is_valid(const char *s) _pure_;
 | |
| +char *utf8_escape_invalid(const char *s);
 | |
|  
 | |
|  bool utf8_is_printable(const char* str, size_t length) _pure_;
 | |
|  
 | |
| diff --git a/src/test/test-utf8.c b/src/test/test-utf8.c
 | |
| index f0182ee..53c1d47 100644
 | |
| --- a/src/test/test-utf8.c
 | |
| +++ b/src/test/test-utf8.c
 | |
| @@ -66,12 +66,29 @@ static void test_utf8_encoded_valid_unichar(void) {
 | |
|  
 | |
|  }
 | |
|  
 | |
| +static void test_utf8_escaping(void) {
 | |
| +        _cleanup_free_ char *p1, *p2, *p3;
 | |
| +
 | |
| +        p1 = utf8_escape_invalid("goo goo goo");
 | |
| +        puts(p1);
 | |
| +        assert_se(utf8_is_valid(p1));
 | |
| +
 | |
| +        p2 = utf8_escape_invalid("\341\204\341\204");
 | |
| +        puts(p2);
 | |
| +        assert_se(utf8_is_valid(p2));
 | |
| +
 | |
| +        p3 = utf8_escape_invalid("\341\204");
 | |
| +        puts(p3);
 | |
| +        assert_se(utf8_is_valid(p3));
 | |
| +}
 | |
| +
 | |
|  int main(int argc, char *argv[]) {
 | |
|          test_utf8_is_valid();
 | |
|          test_utf8_is_printable();
 | |
|          test_ascii_is_valid();
 | |
|          test_ascii_filter();
 | |
|          test_utf8_encoded_valid_unichar();
 | |
| +        test_utf8_escaping();
 | |
|  
 | |
|          return 0;
 | |
|  }
 |