From 254a99729c1d161b1866d1921f63d8a2d7678da9 Mon Sep 17 00:00:00 2001 From: "David Kaspar [Dee'Kej]" Date: Tue, 24 Jan 2017 18:39:21 +0100 Subject: [PATCH] ghostscript-9.20-remove-and-reimplement-ConvertUTF.patch added Resolves: #1404933 --- ...20-remove-and-reimplement-ConvertUTF.patch | 1228 +++++++++++++++++ ghostscript.spec | 9 +- 2 files changed, 1236 insertions(+), 1 deletion(-) create mode 100644 ghostscript-9.20-remove-and-reimplement-ConvertUTF.patch diff --git a/ghostscript-9.20-remove-and-reimplement-ConvertUTF.patch b/ghostscript-9.20-remove-and-reimplement-ConvertUTF.patch new file mode 100644 index 0000000..3083071 --- /dev/null +++ b/ghostscript-9.20-remove-and-reimplement-ConvertUTF.patch @@ -0,0 +1,1228 @@ +From 273a133110838ee5702e7eb6409a853c598211b2 Mon Sep 17 00:00:00 2001 +From: Ken Sharp +Date: Thu, 29 Sep 2016 17:35:05 +0100 +Subject: [PATCH] Remove (and re-implement) ConvertUTF.c + +Bug #697122 " embedded ConvertUTF.c is buggy and licensed incompatibly with GPL/APGL" + +Its not clear that this code is incompatible with GPL, nor do we think +any 'bugginess' in the code affects us, since we are using a comparatively +small part of the included code. + +Nevertheless its possible to remove the code, and re-implement the small +part we actually need, and that is done here. + +Also removed the DSCEncodingToUnicode option which was insanely difficult +to use, and incorrectly documented. + +Yhis shows one difference, 692486_-_heap_overflow_in_pdf_to_ucs2.pdf +now correctly throws an error, because the PDF file contains document +information (Application) which has an invalid UTF16-BE sequence. +--- + base/ConvertUTF.c | 539 ----------------------------------------- + base/ConvertUTF.h | 155 ------------ + base/lib.mak | 4 - + devices/devs.mak | 5 +- + devices/vector/gdevpdf.c | 16 +- + devices/vector/gdevpdfb.h | 1 - + devices/vector/gdevpdfe.c | 270 +++++++++++---------- + devices/vector/gdevpdfp.c | 1 - + devices/vector/gdevpdfx.h | 17 +- + windows/ghostscript.vcproj | 8 - + windows/ghostscript_rt.vcxproj | 2 - + 11 files changed, 155 insertions(+), 863 deletions(-) + delete mode 100644 base/ConvertUTF.c + delete mode 100644 base/ConvertUTF.h + +diff --git a/base/ConvertUTF.c b/base/ConvertUTF.c +deleted file mode 100644 +index cb2e2de..0000000 +--- a/base/ConvertUTF.c ++++ /dev/null +@@ -1,539 +0,0 @@ +-/* +- * Copyright 2001-2004 Unicode, Inc. +- * +- * Disclaimer +- * +- * This source code is provided as is by Unicode, Inc. No claims are +- * made as to fitness for any particular purpose. No warranties of any +- * kind are expressed or implied. The recipient agrees to determine +- * applicability of information provided. If this file has been +- * purchased on magnetic or optical media from Unicode, Inc., the +- * sole remedy for any claim will be exchange of defective media +- * within 90 days of receipt. +- * +- * Limitations on Rights to Redistribute This Code +- * +- * Unicode, Inc. hereby grants the right to freely use the information +- * supplied in this file in the creation of products supporting the +- * Unicode Standard, and to make copies of this file in any form +- * for internal or external distribution as long as this notice +- * remains attached. +- */ +- +- +-/* --------------------------------------------------------------------- +- +- Conversions between UTF32, UTF-16, and UTF-8. Source code file. +- Author: Mark E. Davis, 1994. +- Rev History: Rick McGowan, fixes & updates May 2001. +- Sept 2001: fixed const & error conditions per +- mods suggested by S. Parent & A. Lillich. +- June 2002: Tim Dodd added detection and handling of incomplete +- source sequences, enhanced error detection, added casts +- to eliminate compiler warnings. +- July 2003: slight mods to back out aggressive FFFE detection. +- Jan 2004: updated switches in from-UTF8 conversions. +- Oct 2004: updated to use UNI_MAX_LEGAL_UTF32 in UTF-32 conversions. +- +- See the header file "ConvertUTF.h" for complete documentation. +- +------------------------------------------------------------------------- */ +- +-#include "ConvertUTF.h" +-#ifdef CVTUTF_DEBUG +-#include +-#endif +- +-static const int halfShift = 10; /* used for shifting by 10 bits */ +- +-static const UTF32 halfBase = 0x0010000UL; +-static const UTF32 halfMask = 0x3FFUL; +- +-#define UNI_SUR_HIGH_START (UTF32)0xD800 +-#define UNI_SUR_HIGH_END (UTF32)0xDBFF +-#define UNI_SUR_LOW_START (UTF32)0xDC00 +-#define UNI_SUR_LOW_END (UTF32)0xDFFF +-#define false 0 +-#define true 1 +- +-/* --------------------------------------------------------------------- */ +- +-ConversionResult ConvertUTF32toUTF16 ( +- const UTF32** sourceStart, const UTF32* sourceEnd, +- UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) { +- ConversionResult result = conversionOK; +- const UTF32* source = *sourceStart; +- UTF16* target = *targetStart; +- while (source < sourceEnd) { +- UTF32 ch; +- if (target >= targetEnd) { +- result = targetExhausted; break; +- } +- ch = *source++; +- if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */ +- /* UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values */ +- if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { +- if (flags == strictConversion) { +- --source; /* return to the illegal value itself */ +- result = sourceIllegal; +- break; +- } else { +- *target++ = UNI_REPLACEMENT_CHAR; +- } +- } else { +- *target++ = (UTF16)ch; /* normal case */ +- } +- } else if (ch > UNI_MAX_LEGAL_UTF32) { +- if (flags == strictConversion) { +- result = sourceIllegal; +- } else { +- *target++ = UNI_REPLACEMENT_CHAR; +- } +- } else { +- /* target is a character in range 0xFFFF - 0x10FFFF. */ +- if (target + 1 >= targetEnd) { +- --source; /* Back up source pointer! */ +- result = targetExhausted; break; +- } +- ch -= halfBase; +- *target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START); +- *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START); +- } +- } +- *sourceStart = source; +- *targetStart = target; +- return result; +-} +- +-/* --------------------------------------------------------------------- */ +- +-ConversionResult ConvertUTF16toUTF32 ( +- const UTF16** sourceStart, const UTF16* sourceEnd, +- UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) { +- ConversionResult result = conversionOK; +- const UTF16* source = *sourceStart; +- UTF32* target = *targetStart; +- UTF32 ch, ch2; +- while (source < sourceEnd) { +- const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */ +- ch = *source++; +- /* If we have a surrogate pair, convert to UTF32 first. */ +- if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { +- /* If the 16 bits following the high surrogate are in the source buffer... */ +- if (source < sourceEnd) { +- ch2 = *source; +- /* If it's a low surrogate, convert to UTF32. */ +- if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) { +- ch = ((ch - UNI_SUR_HIGH_START) << halfShift) +- + (ch2 - UNI_SUR_LOW_START) + halfBase; +- ++source; +- } else if (flags == strictConversion) { /* it's an unpaired high surrogate */ +- --source; /* return to the illegal value itself */ +- result = sourceIllegal; +- break; +- } +- } else { /* We don't have the 16 bits following the high surrogate. */ +- --source; /* return to the high surrogate */ +- result = sourceExhausted; +- break; +- } +- } else if (flags == strictConversion) { +- /* UTF-16 surrogate values are illegal in UTF-32 */ +- if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) { +- --source; /* return to the illegal value itself */ +- result = sourceIllegal; +- break; +- } +- } +- if (target >= targetEnd) { +- source = oldSource; /* Back up source pointer! */ +- result = targetExhausted; break; +- } +- *target++ = ch; +- } +- *sourceStart = source; +- *targetStart = target; +-#ifdef CVTUTF_DEBUG +-if (result == sourceIllegal) { +- fprintf(stderr, "ConvertUTF16toUTF32 illegal seq 0x%04x,%04x\n", ch, ch2); +- fflush(stderr); +-} +-#endif +- return result; +-} +- +-/* --------------------------------------------------------------------- */ +- +-/* +- * Index into the table below with the first byte of a UTF-8 sequence to +- * get the number of trailing bytes that are supposed to follow it. +- * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is +- * left as-is for anyone who may want to do such conversion, which was +- * allowed in earlier algorithms. +- */ +-static const char trailingBytesForUTF8[256] = { +- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 +-}; +- +-/* +- * Magic values subtracted from a buffer value during UTF8 conversion. +- * This table contains as many values as there might be trailing bytes +- * in a UTF-8 sequence. +- */ +-static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, +- 0x03C82080UL, 0xFA082080UL, 0x82082080UL }; +- +-/* +- * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed +- * into the first byte, depending on how many bytes follow. There are +- * as many entries in this table as there are UTF-8 sequence types. +- * (I.e., one byte sequence, two byte... etc.). Remember that sequencs +- * for *legal* UTF-8 will be 4 or fewer bytes total. +- */ +-static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; +- +-/* --------------------------------------------------------------------- */ +- +-/* The interface converts a whole buffer to avoid function-call overhead. +- * Constants have been gathered. Loops & conditionals have been removed as +- * much as possible for efficiency, in favor of drop-through switches. +- * (See "Note A" at the bottom of the file for equivalent code.) +- * If your compiler supports it, the "isLegalUTF8" call can be turned +- * into an inline function. +- */ +- +-/* --------------------------------------------------------------------- */ +- +-ConversionResult ConvertUTF16toUTF8 ( +- const UTF16** sourceStart, const UTF16* sourceEnd, +- UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) { +- ConversionResult result = conversionOK; +- const UTF16* source = *sourceStart; +- UTF8* target = *targetStart; +- while (source < sourceEnd) { +- UTF32 ch; +- unsigned short bytesToWrite = 0; +- const UTF32 byteMask = 0xBF; +- const UTF32 byteMark = 0x80; +- const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */ +- ch = *source++; +- /* If we have a surrogate pair, convert to UTF32 first. */ +- if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { +- /* If the 16 bits following the high surrogate are in the source buffer... */ +- if (source < sourceEnd) { +- UTF32 ch2 = *source; +- /* If it's a low surrogate, convert to UTF32. */ +- if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) { +- ch = ((ch - UNI_SUR_HIGH_START) << halfShift) +- + (ch2 - UNI_SUR_LOW_START) + halfBase; +- ++source; +- } else if (flags == strictConversion) { /* it's an unpaired high surrogate */ +- --source; /* return to the illegal value itself */ +- result = sourceIllegal; +- break; +- } +- } else { /* We don't have the 16 bits following the high surrogate. */ +- --source; /* return to the high surrogate */ +- result = sourceExhausted; +- break; +- } +- } else if (flags == strictConversion) { +- /* UTF-16 surrogate values are illegal in UTF-32 */ +- if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) { +- --source; /* return to the illegal value itself */ +- result = sourceIllegal; +- break; +- } +- } +- /* Figure out how many bytes the result will require */ +- if (ch < (UTF32)0x80) { bytesToWrite = 1; +- } else if (ch < (UTF32)0x800) { bytesToWrite = 2; +- } else if (ch < (UTF32)0x10000) { bytesToWrite = 3; +- } else if (ch < (UTF32)0x110000) { bytesToWrite = 4; +- } else { bytesToWrite = 3; +- ch = UNI_REPLACEMENT_CHAR; +- } +- +- target += bytesToWrite; +- if (target > targetEnd) { +- source = oldSource; /* Back up source pointer! */ +- target -= bytesToWrite; result = targetExhausted; break; +- } +- switch (bytesToWrite) { /* note: everything falls through. */ +- case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; +- case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; +- case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; +- case 1: *--target = (UTF8)(ch | firstByteMark[bytesToWrite]); +- } +- target += bytesToWrite; +- } +- *sourceStart = source; +- *targetStart = target; +- return result; +-} +- +-/* --------------------------------------------------------------------- */ +- +-/* +- * Utility routine to tell whether a sequence of bytes is legal UTF-8. +- * This must be called with the length pre-determined by the first byte. +- * If not calling this from ConvertUTF8to*, then the length can be set by: +- * length = trailingBytesForUTF8[*source]+1; +- * and the sequence is illegal right away if there aren't that many bytes +- * available. +- * If presented with a length > 4, this returns false. The Unicode +- * definition of UTF-8 goes up to 4-byte sequences. +- */ +- +-static Boolean isLegalUTF8(const UTF8 *source, int length) { +- UTF8 a; +- const UTF8 *srcptr = source+length; +- switch (length) { +- default: return false; +- /* Everything else falls through when "true"... */ +- case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; +- case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; +- case 2: if ((a = (*--srcptr)) > 0xBF) return false; +- +- switch (*source) { +- /* no fall-through in this inner switch */ +- case 0xE0: if (a < 0xA0) return false; break; +- case 0xED: if (a > 0x9F) return false; break; +- case 0xF0: if (a < 0x90) return false; break; +- case 0xF4: if (a > 0x8F) return false; break; +- default: if (a < 0x80) return false; +- } +- +- case 1: if (*source >= 0x80 && *source < 0xC2) return false; +- } +- if (*source > 0xF4) return false; +- return true; +-} +- +-/* --------------------------------------------------------------------- */ +- +-/* +- * Exported function to return whether a UTF-8 sequence is legal or not. +- * This is not used here; it's just exported. +- */ +-Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) { +- int length = trailingBytesForUTF8[*source]+1; +- if (source+length > sourceEnd) { +- return false; +- } +- return isLegalUTF8(source, length); +-} +- +-/* --------------------------------------------------------------------- */ +- +-ConversionResult ConvertUTF8toUTF16 ( +- const UTF8** sourceStart, const UTF8* sourceEnd, +- UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) { +- ConversionResult result = conversionOK; +- const UTF8* source = *sourceStart; +- UTF16* target = *targetStart; +- while (source < sourceEnd) { +- UTF32 ch = 0; +- unsigned short extraBytesToRead = trailingBytesForUTF8[*source]; +- if (source + extraBytesToRead >= sourceEnd) { +- result = sourceExhausted; break; +- } +- /* Do this check whether lenient or strict */ +- if (! isLegalUTF8(source, extraBytesToRead+1)) { +- result = sourceIllegal; +- break; +- } +- /* +- * The cases all fall through. See "Note A" below. +- */ +- switch (extraBytesToRead) { +- case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */ +- case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */ +- case 3: ch += *source++; ch <<= 6; +- case 2: ch += *source++; ch <<= 6; +- case 1: ch += *source++; ch <<= 6; +- case 0: ch += *source++; +- } +- ch -= offsetsFromUTF8[extraBytesToRead]; +- +- if (target >= targetEnd) { +- source -= (extraBytesToRead+1); /* Back up source pointer! */ +- result = targetExhausted; break; +- } +- if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */ +- /* UTF-16 surrogate values are illegal in UTF-32 */ +- if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { +- if (flags == strictConversion) { +- source -= (extraBytesToRead+1); /* return to the illegal value itself */ +- result = sourceIllegal; +- break; +- } else { +- *target++ = UNI_REPLACEMENT_CHAR; +- } +- } else { +- *target++ = (UTF16)ch; /* normal case */ +- } +- } else if (ch > UNI_MAX_UTF16) { +- if (flags == strictConversion) { +- result = sourceIllegal; +- source -= (extraBytesToRead+1); /* return to the start */ +- break; /* Bail out; shouldn't continue */ +- } else { +- *target++ = UNI_REPLACEMENT_CHAR; +- } +- } else { +- /* target is a character in range 0xFFFF - 0x10FFFF. */ +- if (target + 1 >= targetEnd) { +- source -= (extraBytesToRead+1); /* Back up source pointer! */ +- result = targetExhausted; break; +- } +- ch -= halfBase; +- *target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START); +- *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START); +- } +- } +- *sourceStart = source; +- *targetStart = target; +- return result; +-} +- +-/* --------------------------------------------------------------------- */ +- +-ConversionResult ConvertUTF32toUTF8 ( +- const UTF32** sourceStart, const UTF32* sourceEnd, +- UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) { +- ConversionResult result = conversionOK; +- const UTF32* source = *sourceStart; +- UTF8* target = *targetStart; +- while (source < sourceEnd) { +- UTF32 ch; +- unsigned short bytesToWrite = 0; +- const UTF32 byteMask = 0xBF; +- const UTF32 byteMark = 0x80; +- ch = *source++; +- if (flags == strictConversion ) { +- /* UTF-16 surrogate values are illegal in UTF-32 */ +- if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { +- --source; /* return to the illegal value itself */ +- result = sourceIllegal; +- break; +- } +- } +- /* +- * Figure out how many bytes the result will require. Turn any +- * illegally large UTF32 things (> Plane 17) into replacement chars. +- */ +- if (ch < (UTF32)0x80) { bytesToWrite = 1; +- } else if (ch < (UTF32)0x800) { bytesToWrite = 2; +- } else if (ch < (UTF32)0x10000) { bytesToWrite = 3; +- } else if (ch <= UNI_MAX_LEGAL_UTF32) { bytesToWrite = 4; +- } else { bytesToWrite = 3; +- ch = UNI_REPLACEMENT_CHAR; +- result = sourceIllegal; +- } +- +- target += bytesToWrite; +- if (target > targetEnd) { +- --source; /* Back up source pointer! */ +- target -= bytesToWrite; result = targetExhausted; break; +- } +- switch (bytesToWrite) { /* note: everything falls through. */ +- case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; +- case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; +- case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; +- case 1: *--target = (UTF8) (ch | firstByteMark[bytesToWrite]); +- } +- target += bytesToWrite; +- } +- *sourceStart = source; +- *targetStart = target; +- return result; +-} +- +-/* --------------------------------------------------------------------- */ +- +-ConversionResult ConvertUTF8toUTF32 ( +- const UTF8** sourceStart, const UTF8* sourceEnd, +- UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) { +- ConversionResult result = conversionOK; +- const UTF8* source = *sourceStart; +- UTF32* target = *targetStart; +- while (source < sourceEnd) { +- UTF32 ch = 0; +- unsigned short extraBytesToRead = trailingBytesForUTF8[*source]; +- if (source + extraBytesToRead >= sourceEnd) { +- result = sourceExhausted; break; +- } +- /* Do this check whether lenient or strict */ +- if (! isLegalUTF8(source, extraBytesToRead+1)) { +- result = sourceIllegal; +- break; +- } +- /* +- * The cases all fall through. See "Note A" below. +- */ +- switch (extraBytesToRead) { +- case 5: ch += *source++; ch <<= 6; +- case 4: ch += *source++; ch <<= 6; +- case 3: ch += *source++; ch <<= 6; +- case 2: ch += *source++; ch <<= 6; +- case 1: ch += *source++; ch <<= 6; +- case 0: ch += *source++; +- } +- ch -= offsetsFromUTF8[extraBytesToRead]; +- +- if (target >= targetEnd) { +- source -= (extraBytesToRead+1); /* Back up the source pointer! */ +- result = targetExhausted; break; +- } +- if (ch <= UNI_MAX_LEGAL_UTF32) { +- /* +- * UTF-16 surrogate values are illegal in UTF-32, and anything +- * over Plane 17 (> 0x10FFFF) is illegal. +- */ +- if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { +- if (flags == strictConversion) { +- source -= (extraBytesToRead+1); /* return to the illegal value itself */ +- result = sourceIllegal; +- break; +- } else { +- *target++ = UNI_REPLACEMENT_CHAR; +- } +- } else { +- *target++ = ch; +- } +- } else { /* i.e., ch > UNI_MAX_LEGAL_UTF32 */ +- result = sourceIllegal; +- *target++ = UNI_REPLACEMENT_CHAR; +- } +- } +- *sourceStart = source; +- *targetStart = target; +- return result; +-} +- +-/* --------------------------------------------------------------------- +- +- Note A. +- The fall-through switches in UTF-8 reading code save a +- temp variable, some decrements & conditionals. The switches +- are equivalent to the following loop: +- { +- int tmpBytesToRead = extraBytesToRead+1; +- do { +- ch += *source++; +- --tmpBytesToRead; +- if (tmpBytesToRead) ch <<= 6; +- } while (tmpBytesToRead > 0); +- } +- In UTF-8 writing code, the switches on "bytesToWrite" are +- similarly unrolled loops. +- +- --------------------------------------------------------------------- */ +diff --git a/base/ConvertUTF.h b/base/ConvertUTF.h +deleted file mode 100644 +index 538bec6..0000000 +--- a/base/ConvertUTF.h ++++ /dev/null +@@ -1,155 +0,0 @@ +-/* +- * Copyright 2001-2004 Unicode, Inc. +- * +- * Disclaimer +- * +- * This source code is provided as is by Unicode, Inc. No claims are +- * made as to fitness for any particular purpose. No warranties of any +- * kind are expressed or implied. The recipient agrees to determine +- * applicability of information provided. If this file has been +- * purchased on magnetic or optical media from Unicode, Inc., the +- * sole remedy for any claim will be exchange of defective media +- * within 90 days of receipt. +- * +- * Limitations on Rights to Redistribute This Code +- * +- * Unicode, Inc. hereby grants the right to freely use the information +- * supplied in this file in the creation of products supporting the +- * Unicode Standard, and to make copies of this file in any form +- * for internal or external distribution as long as this notice +- * remains attached. +- */ +- +- +-#ifndef ConvertUTF_INCLUDED +-#define ConvertUTF_INCLUDED +- +-/* --------------------------------------------------------------------- +- +- Conversions between UTF32, UTF-16, and UTF-8. Header file. +- +- Several funtions are included here, forming a complete set of +- conversions between the three formats. UTF-7 is not included +- here, but is handled in a separate source file. +- +- Each of these routines takes pointers to input buffers and output +- buffers. The input buffers are const. +- +- Each routine converts the text between *sourceStart and sourceEnd, +- putting the result into the buffer between *targetStart and +- targetEnd. Note: the end pointers are *after* the last item: e.g. +- *(sourceEnd - 1) is the last item. +- +- The return result indicates whether the conversion was successful, +- and if not, whether the problem was in the source or target buffers. +- (Only the first encountered problem is indicated.) +- +- After the conversion, *sourceStart and *targetStart are both +- updated to point to the end of last text successfully converted in +- the respective buffers. +- +- Input parameters: +- sourceStart - pointer to a pointer to the source buffer. +- The contents of this are modified on return so that +- it points at the next thing to be converted. +- targetStart - similarly, pointer to pointer to the target buffer. +- sourceEnd, targetEnd - respectively pointers to the ends of the +- two buffers, for overflow checking only. +- +- These conversion functions take a ConversionFlags argument. When this +- flag is set to strict, both irregular sequences and isolated surrogates +- will cause an error. When the flag is set to lenient, both irregular +- sequences and isolated surrogates are converted. +- +- Whether the flag is strict or lenient, all illegal sequences will cause +- an error return. This includes sequences such as: , , +- or in UTF-8, and values above 0x10FFFF in UTF-32. Conformant code +- must check for illegal sequences. +- +- When the flag is set to lenient, characters over 0x10FFFF are converted +- to the replacement character; otherwise (when the flag is set to strict) +- they constitute an error. +- +- Output parameters: +- The value "sourceIllegal" is returned from some routines if the input +- sequence is malformed. When "sourceIllegal" is returned, the source +- value will point to the illegal value that caused the problem. E.g., +- in UTF-8 when a sequence is malformed, it points to the start of the +- malformed sequence. +- +- Author: Mark E. Davis, 1994. +- Rev History: Rick McGowan, fixes & updates May 2001. +- Fixes & updates, Sept 2001. +- +------------------------------------------------------------------------- */ +- +-/* --------------------------------------------------------------------- +- The following 4 definitions are compiler-specific. +- The C standard does not guarantee that wchar_t has at least +- 16 bits, so wchar_t is no less portable than unsigned short! +- All should be unsigned values to avoid sign extension during +- bit mask & shift operations. +------------------------------------------------------------------------- */ +- +-typedef unsigned long UTF32; /* at least 32 bits */ +-typedef unsigned short UTF16; /* at least 16 bits */ +-typedef unsigned char UTF8; /* typically 8 bits */ +-typedef unsigned char Boolean; /* 0 or 1 */ +- +-/* Some fundamental constants */ +-#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD +-#define UNI_MAX_BMP (UTF32)0x0000FFFF +-#define UNI_MAX_UTF16 (UTF32)0x0010FFFF +-#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF +-#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF +- +-typedef enum { +- conversionOK, /* conversion successful */ +- sourceExhausted, /* partial character in source, but hit end */ +- targetExhausted, /* insuff. room in target for conversion */ +- sourceIllegal /* source sequence is illegal/malformed */ +-} ConversionResult; +- +-typedef enum { +- strictConversion = 0, +- lenientConversion +-} ConversionFlags; +- +-/* This is for C++ and does no harm in C */ +-#ifdef __cplusplus +-extern "C" { +-#endif +- +-ConversionResult ConvertUTF8toUTF16 ( +- const UTF8** sourceStart, const UTF8* sourceEnd, +- UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags); +- +-ConversionResult ConvertUTF16toUTF8 ( +- const UTF16** sourceStart, const UTF16* sourceEnd, +- UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags); +- +-ConversionResult ConvertUTF8toUTF32 ( +- const UTF8** sourceStart, const UTF8* sourceEnd, +- UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags); +- +-ConversionResult ConvertUTF32toUTF8 ( +- const UTF32** sourceStart, const UTF32* sourceEnd, +- UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags); +- +-ConversionResult ConvertUTF16toUTF32 ( +- const UTF16** sourceStart, const UTF16* sourceEnd, +- UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags); +- +-ConversionResult ConvertUTF32toUTF16 ( +- const UTF32** sourceStart, const UTF32* sourceEnd, +- UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags); +- +-Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd); +- +-#ifdef __cplusplus +-} +-#endif +- +-/* --------------------------------------------------------------------- */ +- +-#endif /* ConvertUTF_INCLUDED */ +diff --git a/base/lib.mak b/base/lib.mak +index 173e2c6..2de6565 100644 +--- a/base/lib.mak ++++ b/base/lib.mak +@@ -52,7 +52,6 @@ GLLCMS2CC=$(CC) $(LCMS2_CFLAGS) $(CFLAGS) $(I_)$(GLI_) $(II)$(LCMS2SRCDIR)$(D)in + lcms2_h=$(LCMS2SRCDIR)$(D)include$(D)lcms2.h + lcms2_plugin_h=$(LCMS2SRCDIR)$(D)include$(D)lcms2_plugin.h + +-ConvertUTF_h=$(GLSRC)ConvertUTF.h + gdevdcrd_h=$(GLSRC)gdevdcrd.h + gdevpccm_h=$(GLSRC)gdevpccm.h + +@@ -1097,9 +1096,6 @@ $(GLOBJ)gdevpccm.$(OBJ) : $(GLSRC)gdevpccm.c $(AK)\ + $(gx_h) $(gsmatrix_h) $(gxdevice_h) $(gdevpccm_h) $(LIB_MAK) $(MAKEDIRS) + $(GLCC) $(GLO_)gdevpccm.$(OBJ) $(C_) $(GLSRC)gdevpccm.c + +-$(GLOBJ)ConvertUTF.$(OBJ) : $(GLSRC)ConvertUTF.c $(ConvertUTF_h) $(LIB_MAK) $(MAKEDIRS) +- $(GLCC) $(GLO_)ConvertUTF.$(OBJ) $(C_) $(GLSRC)ConvertUTF.c +- + ### Memory devices + + $(GLOBJ)gdevmem.$(OBJ) : $(GLSRC)gdevmem.c $(AK) $(gx_h) $(gserrors_h) \ +diff --git a/devices/devs.mak b/devices/devs.mak +index ea27ab0..51ec363 100644 +--- a/devices/devs.mak ++++ b/devices/devs.mak +@@ -835,9 +835,8 @@ pdfwrite5_=$(DEVOBJ)gdevpdfm.$(OBJ) + pdfwrite6_=$(DEVOBJ)gdevpdfo.$(OBJ) $(DEVOBJ)gdevpdfp.$(OBJ) $(DEVOBJ)gdevpdft.$(OBJ) + pdfwrite7_=$(DEVOBJ)gdevpdfr.$(OBJ) + pdfwrite8_=$(DEVOBJ)gdevpdfu.$(OBJ) $(DEVOBJ)gdevpdfv.$(OBJ) $(DEVOBJ)gdevagl.$(OBJ) +-pdfwrite9_= $(GLOBJ)ConvertUTF.$(OBJ) +-pdfwrite10_=$(DEVOBJ)gsflip.$(OBJ) +-pdfwrite11_=$(DEVOBJ)scantab.$(OBJ) $(DEVOBJ)sfilter2.$(OBJ) ++pdfwrite9_=$(DEVOBJ)gsflip.$(OBJ) ++pdfwrite10_=$(DEVOBJ)scantab.$(OBJ) $(DEVOBJ)sfilter2.$(OBJ) + pdfwrite_=$(pdfwrite1_) $(pdfwrite2_) $(pdfwrite3_) $(pdfwrite4_)\ + $(pdfwrite5_) $(pdfwrite6_) $(pdfwrite7_) $(pdfwrite8_) $(pdfwrite9_)\ + $(pdfwrite10_) $(pdfwrite11_) +diff --git a/devices/vector/gdevpdf.c b/devices/vector/gdevpdf.c +index 2b3186d..20e0ae8 100644 +--- a/devices/vector/gdevpdf.c ++++ b/devices/vector/gdevpdf.c +@@ -111,14 +111,13 @@ ENUM_PTRS_WITH(device_pdfwrite_enum_ptrs, gx_device_pdf *pdev) + ENUM_PTR(32, gx_device_pdf, pres_soft_mask_dict); + ENUM_PTR(33, gx_device_pdf, PDFXTrimBoxToMediaBoxOffset.data); + ENUM_PTR(34, gx_device_pdf, PDFXBleedBoxToTrimBoxOffset.data); +- ENUM_PTR(35, gx_device_pdf, DSCEncodingToUnicode.data); +- ENUM_PTR(36, gx_device_pdf, Identity_ToUnicode_CMaps[0]); +- ENUM_PTR(37, gx_device_pdf, Identity_ToUnicode_CMaps[1]); +- ENUM_PTR(38, gx_device_pdf, vgstack); +- ENUM_PTR(39, gx_device_pdf, outline_levels); +- ENUM_PTR(40, gx_device_pdf, EmbeddedFiles); +- ENUM_PTR(41, gx_device_pdf, pdf_font_dir); +- ENUM_PTR(42, gx_device_pdf, ExtensionMetadata); ++ ENUM_PTR(35, gx_device_pdf, Identity_ToUnicode_CMaps[0]); ++ ENUM_PTR(36, gx_device_pdf, Identity_ToUnicode_CMaps[1]); ++ ENUM_PTR(37, gx_device_pdf, vgstack); ++ ENUM_PTR(38, gx_device_pdf, outline_levels); ++ ENUM_PTR(39, gx_device_pdf, EmbeddedFiles); ++ ENUM_PTR(40, gx_device_pdf, pdf_font_dir); ++ ENUM_PTR(41, gx_device_pdf, ExtensionMetadata); + #define e1(i,elt) ENUM_PARAM_STRING_PTR(i + gx_device_pdf_num_ptrs, gx_device_pdf, elt); + gx_device_pdf_do_param_strings(e1) + #undef e1 +@@ -165,7 +164,6 @@ static RELOC_PTRS_WITH(device_pdfwrite_reloc_ptrs, gx_device_pdf *pdev) + RELOC_PTR(gx_device_pdf, pres_soft_mask_dict); + RELOC_PTR(gx_device_pdf, PDFXTrimBoxToMediaBoxOffset.data); + RELOC_PTR(gx_device_pdf, PDFXBleedBoxToTrimBoxOffset.data); +- RELOC_PTR(gx_device_pdf, DSCEncodingToUnicode.data); + RELOC_PTR(gx_device_pdf, Identity_ToUnicode_CMaps[0]); + RELOC_PTR(gx_device_pdf, Identity_ToUnicode_CMaps[1]); + RELOC_PTR(gx_device_pdf, vgstack); +diff --git a/devices/vector/gdevpdfb.h b/devices/vector/gdevpdfb.h +index 08f18c5..447f0f5 100644 +--- a/devices/vector/gdevpdfb.h ++++ b/devices/vector/gdevpdfb.h +@@ -141,7 +141,6 @@ const gx_device_pdf PDF_DEVICE_IDENT = + 12000, /* MaxClipPathSize */ /* HP LaserJet 1320 hangs with 14000. */ + 256000, /* MaxShadingBitmapSize */ + PDF_DEVICE_MaxInlineImageSize, /* MaxInlineImageSize */ +- {0, 0}, /* DSCEncodingToUnicode */ + {0, 0, 0}, /* OwnerPassword */ + {0, 0, 0}, /* UserPassword */ + 0, /* KeyLength */ +diff --git a/devices/vector/gdevpdfe.c b/devices/vector/gdevpdfe.c +index 1aa1f25..f23a02d 100644 +--- a/devices/vector/gdevpdfe.c ++++ b/devices/vector/gdevpdfe.c +@@ -26,7 +26,6 @@ + #include "gdevpdfx.h" + #include "gdevpdfg.h" + #include "gdevpdfo.h" +-#include "ConvertUTF.h" + + char PDFDocEncodingLookup [92] = { + 0x20, 0x22, 0x20, 0x20, 0x20, 0x21, 0x20, 0x26, +@@ -343,155 +342,162 @@ decode_escape(const byte *data, int data_length, int *index) + return c; /* A wrong escapement sequence. */ + } + +-static int +-pdf_xmp_write_translated(gx_device_pdf *pdev, stream *s, const byte *data, int data_length, +- void(*write)(stream *s, const byte *data, int data_length)) ++/* ++ * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed ++ * into the first byte, depending on how many bytes follow. There are ++ * as many entries in this table as there are UTF-8 sequence types. ++ * (I.e., one byte sequence, two byte... etc.). Remember that sequencs ++ * for *legal* UTF-8 will be 4 or fewer bytes total. ++ */ ++static const char firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; ++ ++static int gs_ConvertUTF16(char *UTF16, int UTF16Len, unsigned char **UTF8Start, int UTF8Len) + { +- if (pdev->DSCEncodingToUnicode.data == 0) { +- int i, j=0; +- unsigned char *buf0; ++ int i, bytes = 0; ++ short U16; ++ unsigned char *UTF8 = *UTF8Start; ++ unsigned char *UTF8End = UTF8 + UTF8Len; + +- buf0 = (unsigned char *)gs_alloc_bytes(pdev->memory, data_length * sizeof(unsigned char), +- "pdf_xmp_write_translated"); +- if (buf0 == NULL) +- return_error(gs_error_VMerror); +- for (i = 0; i < data_length; i++) { +- byte c = data[i]; ++ if (fabs(UTF16Len % sizeof(short)) != 0) ++ return gs_note_error(gs_error_rangecheck); ++ ++ for (i=0;i= 0xD800 && U16 <= 0xDBFF) { ++ return gs_note_error(gs_error_rangecheck); + } +- if (buf0[0] != 0xfe || buf0[1] != 0xff) { +- unsigned char *buf1; +- /* We must assume that the information is PDFDocEncoding. In this case +- * we need to convert it into UTF-8. If we just convert it to UTF-16 +- * then we can safely fall through to the code below. +- */ +- /* NB the code below skips the BOM in positions 0 and 1, so we need +- * two extra bytes, to be ignored. +- */ +- buf1 = (unsigned char *)gs_alloc_bytes(pdev->memory, (j * sizeof(UTF16)) + 2, +- "pdf_xmp_write_translated"); +- if (buf1 == NULL) { +- gs_free_object(pdev->memory, buf0, "pdf_xmp_write_translated"); +- return_error(gs_error_VMerror); +- } +- memset(buf1, 0x00, (j * sizeof(UTF16)) + 2); +- for (i = 0; i < j; i++) { +- if (buf0[i] <= 0x7f || buf0[i] >= 0xAE) { +- if (buf0[i] == 0x7f) { +- emprintf1(pdev->memory, "PDFDocEncoding %x cannot be represented in Unicode\n", +- buf0[i]); +- } else +- buf1[(i * 2) + 3] = buf0[i]; ++ if (U16 >= 0xDC00 && U16 <= 0xDFFF) { ++ return gs_note_error(gs_error_rangecheck); ++ } ++ ++ if(U16 < 0x80) { ++ bytes = 1; ++ } else { ++ if (U16 < 0x800) { ++ bytes = 2; ++ } else { ++ if (U16 < 0x10000) { ++ bytes = 3; + } else { +- buf1[(i * 2) + 2] = PDFDocEncodingLookup[(buf0[i] - 0x80) * 2]; +- buf1[(i * 2) + 3] = PDFDocEncodingLookup[((buf0[i] - 0x80) * 2) + 1]; +- if (PDFDocEncodingLookup[((buf0[i] - 0x80) * 2) + 1] == 0x00) +- emprintf1(pdev->memory, "PDFDocEncoding %x cannot be represented in Unicode\n", +- PDFDocEncodingLookup[((buf0[i] - 0x80) * 2) + 1]); ++ if (U16 < 0x111000) { ++ bytes = 4; ++ } else { ++ bytes = 3; ++ U16 = 0xFFFD; ++ } + } + } ++ } ++ if (UTF8 + bytes > UTF8End) ++ return gs_note_error(gs_error_VMerror); ++ ++ /* Write from end to beginning, low bytes first */ ++ UTF8 += bytes; ++ ++ switch(bytes) { ++ case 4: ++ *--UTF8 = (unsigned char)((U16 | 0x80) & 0xBF); ++ U16 >>= 6; ++ case 3: ++ *--UTF8 = (unsigned char)((U16 | 0x80) & 0xBF); ++ U16 >>= 6; ++ case 2: ++ *--UTF8 = (unsigned char)((U16 | 0x80) & 0xBF); ++ U16 >>= 6; ++ case 1: ++ *--UTF8 = (unsigned char)(U16 | firstByteMark[bytes]); ++ break; ++ default: ++ return gs_note_error(gs_error_rangecheck); ++ } ++ ++ /* Move to start of next set */ ++ UTF8 += bytes; ++ } ++ *UTF8Start = UTF8; ++ return 0; ++} ++ ++static int ++pdf_xmp_write_translated(gx_device_pdf *pdev, stream *s, const byte *data, int data_length, ++ void(*write)(stream *s, const byte *data, int data_length)) ++{ ++ int i, j=0; ++ unsigned char *buf0; ++ ++ buf0 = (unsigned char *)gs_alloc_bytes(pdev->memory, data_length * sizeof(unsigned char), ++ "pdf_xmp_write_translated"); ++ if (buf0 == NULL) ++ return_error(gs_error_VMerror); ++ for (i = 0; i < data_length; i++) { ++ byte c = data[i]; ++ ++ if (c == '\\') ++ c = decode_escape(data, data_length, &i); ++ buf0[j] = c; ++ j++; ++ } ++ if (buf0[0] != 0xfe || buf0[1] != 0xff) { ++ unsigned char *buf1; ++ /* We must assume that the information is PDFDocEncoding. In this case ++ * we need to convert it into UTF-8. If we just convert it to UTF-16 ++ * then we can safely fall through to the code below. ++ */ ++ /* NB the code below skips the BOM in positions 0 and 1, so we need ++ * two extra bytes, to be ignored. ++ */ ++ buf1 = (unsigned char *)gs_alloc_bytes(pdev->memory, (j * sizeof(short)) + 2, ++ "pdf_xmp_write_translated"); ++ if (buf1 == NULL) { + gs_free_object(pdev->memory, buf0, "pdf_xmp_write_translated"); +- buf0 = buf1; +- data_length = j = (j * 2) + 2; ++ return_error(gs_error_VMerror); + } +- { +- /* Its a Unicode (UTF-16BE) string, convert to UTF-8 */ +- UTF16 *buf0b, U16; +- UTF8 *buf1, *buf1b; +- +- /* A single UTF-16 (2 bytes) can end up as 4 bytes in UTF-8 */ +- buf1 = (UTF8 *)gs_alloc_bytes(pdev->memory, data_length * 2 * sizeof(unsigned char), +- "pdf_xmp_write_translated"); +- if (buf1 == NULL) { +- gs_free_object(pdev->memory, buf0, "pdf_xmp_write_translated"); +- return_error(gs_error_VMerror); +- } +- buf1b = buf1; +- /* Skip the Byte Order Mark (0xfe 0xff) */ +- buf0b = (UTF16 *)(buf0 + 2); +- /* ConvertUTF16to UTF8 expects a buffer of UTF16s in the local +- * endian-ness, but the data is big-endian. In case this is a little-endian +- * machine, process the buffer from big-endian to whatever is right for this platform. +- */ +- for (i = 2; i < j; i+=2) { +- U16 = (buf0[i] << 8) + buf0[i + 1]; +- *(buf0b++) = U16; +- } +- buf0b = (UTF16 *)(buf0 + 2); +- switch (ConvertUTF16toUTF8((const UTF16**)&buf0b, (UTF16 *)(buf0 + j), +- &buf1b, buf1 + (data_length * 2 * sizeof(unsigned char)), strictConversion)) { +- case conversionOK: +- write(s, buf1, buf1b - buf1); +- gs_free_object(pdev->memory, buf1, "pdf_xmp_write_translated"); +- break; +- case sourceExhausted: +- case targetExhausted: +- case sourceIllegal: +- default: +- gs_free_object(pdev->memory, buf0, "pdf_xmp_write_translated"); +- gs_free_object(pdev->memory, buf1, "pdf_xmp_write_translated"); +- return_error(gs_error_rangecheck); ++ memset(buf1, 0x00, (j * sizeof(short)) + 2); ++ for (i = 0; i < j; i++) { ++ if (buf0[i] <= 0x7f || buf0[i] >= 0xAE) { ++ if (buf0[i] == 0x7f) { ++ emprintf1(pdev->memory, "PDFDocEncoding %x cannot be represented in Unicode\n", ++ buf0[i]); ++ } else ++ buf1[(i * 2) + 3] = buf0[i]; ++ } else { ++ buf1[(i * 2) + 2] = PDFDocEncodingLookup[(buf0[i] - 0x80) * 2]; ++ buf1[(i * 2) + 3] = PDFDocEncodingLookup[((buf0[i] - 0x80) * 2) + 1]; ++ if (PDFDocEncodingLookup[((buf0[i] - 0x80) * 2) + 1] == 0x00) ++ emprintf1(pdev->memory, "PDFDocEncoding %x cannot be represented in Unicode\n", ++ PDFDocEncodingLookup[((buf0[i] - 0x80) * 2) + 1]); + } + } + gs_free_object(pdev->memory, buf0, "pdf_xmp_write_translated"); +- return 0; +- } else { +- UTF16 *buf0; +- const UTF16 *buf0b; +- UTF8 *buf1, *buf1b; +- int i, j = 0; +- +- buf0 = (UTF16 *)gs_alloc_bytes(pdev->memory, data_length * sizeof(UTF16), +- "pdf_xmp_write_translated"); +- if (buf0 == NULL) +- return_error(gs_error_VMerror); +- buf1 = (UTF8 *)gs_alloc_bytes(pdev->memory, data_length * 2, +- "pdf_xmp_write_translated"); ++ buf0 = buf1; ++ data_length = j = (j * 2) + 2; ++ } ++ { ++ /* Its a Unicode (UTF-16BE) string, convert to UTF-8 */ ++ short *buf0b; ++ char *buf1, *buf1b; ++ int code; ++ ++ /* A single UTF-16 (2 bytes) can end up as 4 bytes in UTF-8 */ ++ buf1 = (char *)gs_alloc_bytes(pdev->memory, data_length * 2 * sizeof(unsigned char), ++ "pdf_xmp_write_translated"); + if (buf1 == NULL) { + gs_free_object(pdev->memory, buf0, "pdf_xmp_write_translated"); + return_error(gs_error_VMerror); + } +- buf0b = buf0; + buf1b = buf1; +- for (i = 0; i < data_length; i++) { +- byte c = data[i]; +- int v; +- +- if (c == '\\') +- c = decode_escape(data, data_length, &i); +- if (c > pdev->DSCEncodingToUnicode.size) { +- gs_free_object(pdev->memory, buf0, "pdf_xmp_write_translated"); +- gs_free_object(pdev->memory, buf1, "pdf_xmp_write_translated"); +- return_error(gs_error_rangecheck); +- } +- +- v = pdev->DSCEncodingToUnicode.data[c]; +- if (v == -1) +- v = '?'; /* Arbitrary. */ +- buf0[j] = v; +- j++; +- } +- switch (ConvertUTF16toUTF8(&buf0b, buf0 + j, +- &buf1b, buf1 + data_length * 2, strictConversion)) { +- case conversionOK: +- write(s, buf1, buf1b - buf1); +- break; +- case sourceExhausted: +- case targetExhausted: +- case sourceIllegal: +- default: +- gs_free_object(pdev->memory, buf0, "pdf_xmp_write_translated"); +- gs_free_object(pdev->memory, buf1, "pdf_xmp_write_translated"); +- return_error(gs_error_rangecheck); +- } +- gs_free_object(pdev->memory, buf0, "pdf_xmp_write_translated"); +- gs_free_object(pdev->memory, buf1, "pdf_xmp_write_translated"); +- return 0; ++ /* Skip the Byte Order Mark (0xfe 0xff) */ ++ buf0b = (short *)(buf0 + 2); ++ code = gs_ConvertUTF16((char *)buf0b, j - 2, (unsigned char **)&buf1b, data_length * 2 * sizeof(unsigned char)); ++ if (code < 0) ++ return code; ++ write(s, (const byte *)buf1, buf1b - buf1); + } ++ gs_free_object(pdev->memory, buf0, "pdf_xmp_write_translated"); ++ return 0; + } + + static int +diff --git a/devices/vector/gdevpdfp.c b/devices/vector/gdevpdfp.c +index 0fa07e3..6ebcb0d 100644 +--- a/devices/vector/gdevpdfp.c ++++ b/devices/vector/gdevpdfp.c +@@ -77,7 +77,6 @@ static const gs_param_item_t pdf_param_items[] = { + pi("CompressStreams", gs_param_type_bool, CompressStreams), + pi("PrintStatistics", gs_param_type_bool, PrintStatistics), + pi("MaxInlineImageSize", gs_param_type_long, MaxInlineImageSize), +- pi("DSCEncodingToUnicode", gs_param_type_int_array, DSCEncodingToUnicode), + + /* PDF Encryption */ + pi("OwnerPassword", gs_param_type_string, OwnerPassword), +diff --git a/devices/vector/gdevpdfx.h b/devices/vector/gdevpdfx.h +index 308900a..c436220 100644 +--- a/devices/vector/gdevpdfx.h ++++ b/devices/vector/gdevpdfx.h +@@ -601,7 +601,6 @@ struct gx_device_pdf_s { + a bitmap representation of a shading. + (Bigger shadings to be downsampled). */ + long MaxInlineImageSize; +- gs_param_int_array DSCEncodingToUnicode; + /* Encryption parameters */ + gs_param_string OwnerPassword; + gs_param_string UserPassword; +@@ -911,14 +910,14 @@ struct gx_device_pdf_s { + m(28,sbstack) m(29,substream_Resources) m(30,font3)\ + m(31,accumulating_substream_resource) \ + m(32,pres_soft_mask_dict) m(33,PDFXTrimBoxToMediaBoxOffset.data)\ +- m(34,PDFXBleedBoxToTrimBoxOffset.data) m(35, DSCEncodingToUnicode.data)\ +- m(36,Identity_ToUnicode_CMaps[0]) m(37,Identity_ToUnicode_CMaps[1])\ +- m(38,vgstack)\ +- m(39, outline_levels) +- m(40, gx_device_pdf, EmbeddedFiles); +- m(41, gx_device_pdf, pdf_font_dir); +- m(42, gx_device_pdf, Extension_Metadata);*/ +-#define gx_device_pdf_num_ptrs 43 ++ m(34,PDFXBleedBoxToTrimBoxOffset.data) ++ m(35,Identity_ToUnicode_CMaps[0]) m(36,Identity_ToUnicode_CMaps[1])\ ++ m(37,vgstack)\ ++ m(38, outline_levels) ++ m(39, gx_device_pdf, EmbeddedFiles); ++ m(40, gx_device_pdf, pdf_font_dir); ++ m(41, gx_device_pdf, Extension_Metadata);*/ ++#define gx_device_pdf_num_ptrs 42 + #define gx_device_pdf_do_param_strings(m)\ + m(0, OwnerPassword) m(1, UserPassword) m(2, NoEncrypt)\ + m(3, DocumentUUID) m(4, InstanceUUID) +diff --git a/windows/ghostscript.vcproj b/windows/ghostscript.vcproj +index a96d317..450cb26 100644 +--- a/windows/ghostscript.vcproj ++++ b/windows/ghostscript.vcproj +@@ -1794,10 +1794,6 @@ + > + + +- +- + +@@ -3330,10 +3326,6 @@ + > + + +- +- + +diff --git a/windows/ghostscript_rt.vcxproj b/windows/ghostscript_rt.vcxproj +index 2348f08..fae2e1f 100644 +--- a/windows/ghostscript_rt.vcxproj ++++ b/windows/ghostscript_rt.vcxproj +@@ -427,7 +427,6 @@ + + + +- + + + +@@ -1689,7 +1688,6 @@ + + + +- + + + +-- +2.9.3 + diff --git a/ghostscript.spec b/ghostscript.spec index 8d52f90..8228cb0 100644 --- a/ghostscript.spec +++ b/ghostscript.spec @@ -5,7 +5,7 @@ Summary: A PostScript interpreter and renderer Name: ghostscript Version: %{gs_ver} -Release: 5%{?dist} +Release: 6%{?dist} # Included CMap data is Redistributable, no modification permitted, # see http://bugzilla.redhat.com/487510 @@ -21,6 +21,7 @@ Patch2: ghostscript-9.20-runlibfileifexists.patch Patch3: ghostscript-9.20-run-dvipdf-securely.patch Patch4: ghostscript-9.20-urw-fonts-naming.patch Patch10: ghostscript-9.20-handle-glyphdirectory-correctly.patch +Patch11: ghostscript-9.20-remove-and-reimplement-ConvertUTF.patch # Security patches: Patch5: ghostscript-9.20-cve-2016-7979.patch @@ -147,6 +148,9 @@ rm -rf expat freetype icclib jasper jpeg jpegxr lcms lcms2 libpng openjpeg zlib # handle GlyphDirectory as an array (http://bugs.ghostscript.com/show_bug.cgi?id=697286): %patch10 -p1 +# Remove (and re-implement) ConvertUTF.c because of licensing issues (bug #1404933): +%patch11 -p1 + # Convert manual pages to UTF-8 from8859_1() { iconv -f iso-8859-1 -t utf-8 < "$1" > "${1}_" @@ -343,6 +347,9 @@ rm -rf $RPM_BUILD_ROOT %{_libdir}/libgs.so %changelog +* Tue Jan 24 2017 David Kaspar [Dee'Kej] - 9.20-6 +- Remove and (re-implement) ConvertUTF.c (bug #1404933) + * Thu Nov 3 2016 David Kaspar [Dee'Kej] - 9.20-5 - Added fix to avoid SIGSEGV for some *.ps files. More info here: