Update to 60.1

This commit is contained in:
Pete Walter 2017-11-30 13:00:24 +00:00
parent 099ad42e4d
commit 06d5a2e248
10 changed files with 9 additions and 782 deletions

1
.gitignore vendored
View File

@ -1,2 +1,3 @@
/icu4c-5*-src.tgz
/icu4c-6*-src.tgz
/icu-config.sh

View File

@ -1,156 +0,0 @@
Index: icu/trunk/source/common/locid.cpp
===================================================================
--- icu/source/common/locid.cpp (revision 39282)
+++ icu/source/common/locid.cpp (revision 39384)
@@ -45,4 +45,5 @@
#include "ucln_cmn.h"
#include "ustr_imp.h"
+#include "charstr.h"
U_CDECL_BEGIN
@@ -59,4 +60,10 @@
static UHashtable *gDefaultLocalesHashT = NULL;
static Locale *gDefaultLocale = NULL;
+
+/**
+ * \def ULOC_STRING_LIMIT
+ * strings beyond this value crash in CharString
+ */
+#define ULOC_STRING_LIMIT 357913941
U_NAMESPACE_END
@@ -286,5 +293,5 @@
else
{
- MaybeStackArray<char, ULOC_FULLNAME_CAPACITY> togo;
+ UErrorCode status = U_ZERO_ERROR;
int32_t size = 0;
int32_t lsize = 0;
@@ -292,5 +299,4 @@
int32_t vsize = 0;
int32_t ksize = 0;
- char *p;
// Calculate the size of the resulting string.
@@ -300,6 +306,12 @@
{
lsize = (int32_t)uprv_strlen(newLanguage);
+ if ( lsize < 0 || lsize > ULOC_STRING_LIMIT ) { // int32 wrap
+ setToBogus();
+ return;
+ }
size = lsize;
}
+
+ CharString togo(newLanguage, lsize, status); // start with newLanguage
// _Country
@@ -307,4 +319,8 @@
{
csize = (int32_t)uprv_strlen(newCountry);
+ if ( csize < 0 || csize > ULOC_STRING_LIMIT ) { // int32 wrap
+ setToBogus();
+ return;
+ }
size += csize;
}
@@ -321,4 +337,8 @@
// remove trailing _'s
vsize = (int32_t)uprv_strlen(newVariant);
+ if ( vsize < 0 || vsize > ULOC_STRING_LIMIT ) { // int32 wrap
+ setToBogus();
+ return;
+ }
while( (vsize>1) && (newVariant[vsize-1] == SEP_CHAR) )
{
@@ -345,48 +365,30 @@
{
ksize = (int32_t)uprv_strlen(newKeywords);
+ if ( ksize < 0 || ksize > ULOC_STRING_LIMIT ) {
+ setToBogus();
+ return;
+ }
size += ksize + 1;
}
-
// NOW we have the full locale string..
-
- /*if the whole string is longer than our internal limit, we need
- to go to the heap for temporary buffers*/
- if (size >= togo.getCapacity())
- {
- // If togo_heap could not be created, initialize with default settings.
- if (togo.resize(size+1) == NULL) {
- init(NULL, FALSE);
- }
- }
-
- togo[0] = 0;
-
// Now, copy it back.
- p = togo.getAlias();
- if ( lsize != 0 )
- {
- uprv_strcpy(p, newLanguage);
- p += lsize;
- }
+
+ // newLanguage is already copied
if ( ( vsize != 0 ) || (csize != 0) ) // at least: __v
{ // ^
- *p++ = SEP_CHAR;
+ togo.append(SEP_CHAR, status);
}
if ( csize != 0 )
{
- uprv_strcpy(p, newCountry);
- p += csize;
+ togo.append(newCountry, status);
}
if ( vsize != 0)
{
- *p++ = SEP_CHAR; // at least: __v
-
- uprv_strncpy(p, newVariant, vsize); // Must use strncpy because
- p += vsize; // of trimming (above).
- *p = 0; // terminate
+ togo.append(SEP_CHAR, status)
+ .append(newVariant, vsize, status);
}
@@ -394,19 +396,23 @@
{
if (uprv_strchr(newKeywords, '=')) {
- *p++ = '@'; /* keyword parsing */
+ togo.append('@', status); /* keyword parsing */
}
else {
- *p++ = '_'; /* Variant parsing with a script */
+ togo.append('_', status); /* Variant parsing with a script */
if ( vsize == 0) {
- *p++ = '_'; /* No country found */
+ togo.append('_', status); /* No country found */
}
}
- uprv_strcpy(p, newKeywords);
- p += ksize;
- }
-
+ togo.append(newKeywords, status);
+ }
+
+ if (U_FAILURE(status)) {
+ // Something went wrong with appending, etc.
+ setToBogus();
+ return;
+ }
// Parse it, because for example 'language' might really be a complete
// string.
- init(togo.getAlias(), FALSE);
+ init(togo.data(), FALSE);
}
}

View File

@ -1,44 +0,0 @@
diff -ru orig.icu/source/layout/IndicReordering.cpp icu/source/layout/IndicReordering.cpp
--- orig.icu/source/layout/IndicReordering.cpp 2016-04-15 18:30:15.788856946 +0200
+++ icu/source/layout/IndicReordering.cpp 2016-04-15 18:30:39.244998995 +0200
@@ -13,6 +13,7 @@
U_NAMESPACE_BEGIN
+#define ccmpFeatureTag LE_CCMP_FEATURE_TAG
#define loclFeatureTag LE_LOCL_FEATURE_TAG
#define initFeatureTag LE_INIT_FEATURE_TAG
#define nuktFeatureTag LE_NUKT_FEATURE_TAG
@@ -35,6 +36,7 @@
#define caltFeatureTag LE_CALT_FEATURE_TAG
#define kernFeatureTag LE_KERN_FEATURE_TAG
+#define ccmpFeatureMask 0x00000001UL
#define loclFeatureMask 0x80000000UL
#define rphfFeatureMask 0x40000000UL
#define blwfFeatureMask 0x20000000UL
@@ -73,7 +75,7 @@
#define repositionedGlyphMask 0x00000002UL
-#define basicShapingFormsMask ( loclFeatureMask | nuktFeatureMask | akhnFeatureMask | rkrfFeatureMask | blwfFeatureMask | halfFeatureMask | vatuFeatureMask | cjctFeatureMask )
+#define basicShapingFormsMask ( ccmpFeatureMask | loclFeatureMask | nuktFeatureMask | akhnFeatureMask | rkrfFeatureMask | blwfFeatureMask | halfFeatureMask | vatuFeatureMask | cjctFeatureMask )
#define positioningFormsMask ( kernFeatureMask | distFeatureMask | abvmFeatureMask | blwmFeatureMask )
#define presentationFormsMask ( presFeatureMask | abvsFeatureMask | blwsFeatureMask | pstsFeatureMask | halnFeatureMask | caltFeatureMask )
@@ -484,6 +486,7 @@
#define tagArray0 (rphfFeatureMask | tagArray1)
static const FeatureMap featureMap[] = {
+ {ccmpFeatureTag, ccmpFeatureMask},
{loclFeatureTag, loclFeatureMask},
{initFeatureTag, initFeatureMask},
{nuktFeatureTag, nuktFeatureMask},
@@ -506,6 +509,7 @@
static const le_int32 featureCount = LE_ARRAY_SIZE(featureMap);
static const FeatureMap v2FeatureMap[] = {
+ {ccmpFeatureTag, ccmpFeatureMask},
{loclFeatureTag, loclFeatureMask},
{nuktFeatureTag, nuktFeatureMask},
{akhnFeatureTag, akhnFeatureMask},

View File

@ -1,129 +0,0 @@
Index: icu/trunk/source/layout/IndicReordering.cpp
===================================================================
--- icu/trunk/source/layout/IndicReordering.cpp (revision 25772)
+++ icu/trunk/source/layout/IndicReordering.cpp (revision 26090)
@@ -126,4 +126,8 @@
FeatureMask fSMFeatures;
+ LEUnicode fPreBaseConsonant;
+ LEUnicode fPreBaseVirama;
+ le_int32 fPBCIndex;
+ FeatureMask fPBCFeatures;
void saveMatra(LEUnicode matra, le_int32 matraIndex, IndicClassTable::CharClass matraClass)
@@ -172,5 +176,6 @@
fMatraFeatures(0), fMPreOutIndex(-1), fMPreFixups(mpreFixups),
fVMabove(0), fVMpost(0), fVMIndex(0), fVMFeatures(0),
- fSMabove(0), fSMbelow(0), fSMIndex(0), fSMFeatures(0)
+ fSMabove(0), fSMbelow(0), fSMIndex(0), fSMFeatures(0),
+ fPreBaseConsonant(0), fPreBaseVirama(0), fPBCIndex(0), fPBCFeatures(0)
{
// nothing else to do...
@@ -191,4 +196,6 @@
fVMabove = fVMpost = 0;
fSMabove = fSMbelow = 0;
+
+ fPreBaseConsonant = fPreBaseVirama = 0;
}
@@ -386,4 +393,12 @@
}
+ void notePreBaseConsonant(le_uint32 index,LEUnicode PBConsonant, LEUnicode PBVirama, FeatureMask features)
+ {
+ fPBCIndex = index;
+ fPreBaseConsonant = PBConsonant;
+ fPreBaseVirama = PBVirama;
+ fPBCFeatures = features;
+ }
+
void noteBaseConsonant()
{
@@ -465,4 +480,20 @@
}
+ void writePreBaseConsonant()
+ {
+ // The TDIL spec says that consonant + virama + RRA should produce a rakar in Malayalam. However,
+ // it seems that almost none of the fonts for Malayalam are set up to handle this.
+ // So, we're going to force the issue here by using the rakar as defined with RA in most fonts.
+
+ if (fPreBaseConsonant == 0x0d31) { // RRA
+ fPreBaseConsonant = 0x0d30; // RA
+ }
+
+ if (fPreBaseConsonant != 0) {
+ writeChar(fPreBaseConsonant, fPBCIndex, fPBCFeatures);
+ writeChar(fPreBaseVirama,fPBCIndex-1,fPBCFeatures);
+ }
+ }
+
le_int32 getOutputIndex()
{
@@ -723,4 +754,5 @@
}
+
IndicClassTable::CharClass charClass = CC_RESERVED;
IndicClassTable::CharClass nextClass = CC_RESERVED;
@@ -730,7 +762,9 @@
le_bool seenVattu = FALSE;
le_bool seenBelowBaseForm = FALSE;
+ le_bool seenPreBaseForm = FALSE;
le_bool hasNukta = FALSE;
le_bool hasBelowBaseForm = FALSE;
le_bool hasPostBaseForm = FALSE;
+ le_bool hasPreBaseForm = FALSE;
if (postBase < markStart && classTable->isNukta(chars[postBase])) {
@@ -746,12 +780,20 @@
hasBelowBaseForm = IndicClassTable::hasBelowBaseForm(charClass) && !hasNukta;
hasPostBaseForm = IndicClassTable::hasPostBaseForm(charClass) && !hasNukta;
+ hasPreBaseForm = IndicClassTable::hasPreBaseForm(charClass) && !hasNukta;
if (IndicClassTable::isConsonant(charClass)) {
if (postBaseLimit == 0 || seenVattu ||
(baseConsonant > baseLimit && !classTable->isVirama(chars[baseConsonant - 1])) ||
- !(hasBelowBaseForm || hasPostBaseForm)) {
+ !(hasBelowBaseForm || hasPostBaseForm || hasPreBaseForm)) {
break;
}
+ // Note any pre-base consonants
+ if ( baseConsonant == lastConsonant && lastConsonant > 0 &&
+ hasPreBaseForm && classTable->isVirama(chars[baseConsonant - 1])) {
+ output.notePreBaseConsonant(lastConsonant,chars[lastConsonant],chars[lastConsonant-1],tagArray2);
+ seenPreBaseForm = TRUE;
+
+ }
// consonants with nuktas are never vattus
seenVattu = IndicClassTable::isVattu(charClass) && !hasNukta;
@@ -786,10 +828,12 @@
// write any pre-base consonants
+ output.writePreBaseConsonant();
+
le_bool supressVattu = TRUE;
for (i = baseLimit; i < baseConsonant; i += 1) {
LEUnicode ch = chars[i];
- // Don't put 'blwf' on first consonant.
- FeatureMask features = (i == baseLimit? tagArray2 : tagArray1);
+ // Don't put 'pstf' or 'blwf' on anything before the base consonant.
+ FeatureMask features = tagArray1 & ~( pstfFeatureMask | blwfFeatureMask );
charClass = classTable->getCharClass(ch);
@@ -842,5 +886,5 @@
// write below-base consonants
- if (baseConsonant != lastConsonant) {
+ if (baseConsonant != lastConsonant && !seenPreBaseForm) {
for (i = bcSpan + 1; i < postBase; i += 1) {
output.writeChar(chars[i], i, tagArray1);
@@ -872,5 +916,5 @@
// write post-base consonants
// FIXME: does this put the right tags on post-base consonants?
- if (baseConsonant != lastConsonant) {
+ if (baseConsonant != lastConsonant && !seenPreBaseForm) {
if (postBase <= lastConsonant) {
for (i = postBase; i <= lastConsonant; i += 1) {

View File

@ -1,14 +0,0 @@
diff -ru orig.icu/source/layout/LookupProcessor.cpp icu/source/layout/LookupProcessor.cpp
--- orig.icu/source/layout/LookupProcessor.cpp 2016-03-23 21:56:44.000000000 +0100
+++ icu/source/layout/LookupProcessor.cpp 2016-04-15 18:19:26.903927005 +0200
@@ -223,7 +223,9 @@
if (requiredFeatureIndex != 0xFFFF) {
requiredFeatureTable = featureListTable->getFeatureTable(featureListTable, requiredFeatureIndex, &requiredFeatureTag, success);
- featureReferences += SWAPW(requiredFeatureTable->lookupCount);
+ if (requiredFeatureTable.isValid()) {
+ featureReferences += SWAPW(requiredFeatureTable->lookupCount);
+ }
}
lookupOrderArray = LE_NEW_ARRAY(le_uint16, featureReferences);

View File

@ -1,28 +1,21 @@
#%%global debugtrace 1
Name: icu
Version: 57.1
Release: 9%{?dist}
Version: 60.1
Release: 1%{?dist}
Summary: International Components for Unicode
License: MIT and UCD and Public Domain
URL: http://www.icu-project.org/
Source0: http://download.icu-project.org/files/icu4c/57.1/icu4c-57_1-src.tgz
Source0: http://download.icu-project.org/files/icu4c/60.1/icu4c-60_1-src.tgz
Source1: icu-config.sh
BuildRequires: doxygen, autoconf, python
Requires: lib%{name}%{?_isa} = %{version}-%{release}
Patch1: icu.8198.revert.icu5431.patch
Patch2: icu.8800.freeserif.crash.patch
Patch3: icu.7601.Indic-ccmp.patch
Patch4: gennorm2-man.patch
Patch5: icuinfo-man.patch
Patch6: armv7hl-disable-tests.patch
Patch7: rhbz1360340-icu-changeset-39109.patch
Patch8: diff-icu_trunk_source_common_locid.cpp-from-39282-to-39384.patch
Patch9: rhbz1444101-icu-changeset-39671.patch
Patch10: rhbz1510932-icu-changeset-40324.patch
%description
Tools and utilities for developing with icu.
@ -63,18 +56,11 @@ BuildArch: noarch
%prep
%setup -q -n %{name}
%patch1 -p2 -R -b .icu8198.revert.icu5431.patch
%patch2 -p1 -b .icu8800.freeserif.crash.patch
%patch3 -p1 -b .icu7601.Indic-ccmp.patch
%patch4 -p1 -b .gennorm2-man.patch
%patch5 -p1 -b .icuinfo-man.patch
%ifarch armv7hl
%patch6 -p1 -b .armv7hl-disable-tests.patch
%endif
%patch7 -p1 -b .rhbz1360340-icu-changeset-39109.patch
%patch8 -p1 -b .diff-icu_trunk_source_common_locid.cpp-from-39282-to-39384.patch
%patch9 -p1 -b .rhbz1444101-icu-changeset-39671.patch
%patch10 -p1 -b .rhbz1510932-icu-changeset-40324.patch
%build
@ -96,14 +82,6 @@ OPTIONS=$OPTIONS' --enable-debug --enable-tracing'
#rhbz#225896
sed -i 's|-nodefaultlibs -nostdlib||' config/mh-linux
#rhbz#681941
sed -i 's|^LIBS =.*|LIBS = -L../lib -licuuc -lpthread -lm|' i18n/Makefile
sed -i 's|^LIBS =.*|LIBS = -nostdlib -L../lib -licuuc -licui18n -lc -lgcc|' io/Makefile
sed -i 's|^LIBS =.*|LIBS = -nostdlib -L../lib -licuuc -lc|' layout/Makefile
sed -i 's|^LIBS =.*|LIBS = -nostdlib -L../lib -licuuc -licule -lc|' layoutex/Makefile
sed -i 's|^LIBS =.*|LIBS = -nostdlib -L../../lib -licutu -licuuc -lc|' tools/ctestfw/Makefile
# As of ICU 52.1 the -nostdlib in tools/toolutil/Makefile results in undefined reference to `__dso_handle'
sed -i 's|^LIBS =.*|LIBS = -L../../lib -licui18n -licuuc -lpthread -lc|' tools/toolutil/Makefile
#rhbz#813484
sed -i 's| \$(docfilesdir)/installdox||' Makefile
# There is no source/doc/html/search/ directory
@ -190,7 +168,6 @@ LD_LIBRARY_PATH=lib:stubdata:tools/ctestfw:$LD_LIBRARY_PATH bin/uconv -l
%{_bindir}/icuinfo
%{_mandir}/man1/%{name}-config.1*
%{_mandir}/man1/icuinfo.1*
%{_includedir}/layout
%{_includedir}/unicode
%{_libdir}/*.so
%{_libdir}/pkgconfig/*.pc
@ -208,6 +185,9 @@ LD_LIBRARY_PATH=lib:stubdata:tools/ctestfw:$LD_LIBRARY_PATH bin/uconv -l
%changelog
* Thu Nov 30 2017 Pete Walter <pwalter@fedoraproject.org> - 60.1-1
- Update to 60.1
* Wed Nov 08 2017 Eike Rathke <erack@redhat.com> - 57.1-9
- Resolves: rhbz#1510932 CVE-2017-14952

View File

@ -1,244 +0,0 @@
# Offsets corected from https://ssl.icu-project.org/trac/changeset/39109
diff -ru icu.orig/source/common/uloc.cpp icu/source/common/uloc.cpp
--- icu.orig/source/common/uloc.cpp 2016-03-23 21:50:12.000000000 +0100
+++ icu/source/common/uloc.cpp 2016-11-01 15:21:16.542151312 +0100
@@ -2246,7 +2246,7 @@
typedef struct {
float q;
int32_t dummy; /* to avoid uninitialized memory copy from qsort */
- char *locale;
+ char locale[ULOC_FULLNAME_CAPACITY+1];
} _acceptLangItem;
static int32_t U_CALLCONV
@@ -2288,9 +2288,7 @@
UEnumeration* availableLocales,
UErrorCode *status)
{
- _acceptLangItem *j;
- _acceptLangItem smallBuffer[30];
- char **strs;
+ MaybeStackArray<_acceptLangItem, 4> items; // Struct for collecting items.
char tmp[ULOC_FULLNAME_CAPACITY +1];
int32_t n = 0;
const char *itemEnd;
@@ -2300,11 +2298,7 @@
int32_t res;
int32_t i;
int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);
- int32_t jSize;
- char *tempstr; /* Use for null pointer check */
- j = smallBuffer;
- jSize = UPRV_LENGTHOF(smallBuffer);
if(U_FAILURE(*status)) {
return -1;
}
@@ -2332,27 +2326,29 @@
while(isspace(*t)) {
t++;
}
- j[n].q = (float)_uloc_strtod(t,NULL);
+ items[n].q = (float)_uloc_strtod(t,NULL);
} else {
/* no semicolon - it's 1.0 */
- j[n].q = 1.0f;
+ items[n].q = 1.0f;
paramEnd = itemEnd;
}
- j[n].dummy=0;
+ items[n].dummy=0;
/* eat spaces prior to semi */
for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)
;
- /* Check for null pointer from uprv_strndup */
- tempstr = uprv_strndup(s,(int32_t)((t+1)-s));
- if (tempstr == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return -1;
- }
- j[n].locale = tempstr;
- uloc_canonicalize(j[n].locale,tmp,UPRV_LENGTHOF(tmp),status);
- if(strcmp(j[n].locale,tmp)) {
- uprv_free(j[n].locale);
- j[n].locale=uprv_strdup(tmp);
+ int32_t slen = ((t+1)-s);
+ if(slen > ULOC_FULLNAME_CAPACITY) {
+ *status = U_BUFFER_OVERFLOW_ERROR;
+ return -1; // too big
+ }
+ uprv_strncpy(items[n].locale, s, slen);
+ items[n].locale[slen]=0; // terminate
+ int32_t clen = uloc_canonicalize(items[n].locale, tmp, UPRV_LENGTHOF(tmp)-1, status);
+ if(U_FAILURE(*status)) return -1;
+ if((clen!=slen) || (uprv_strncmp(items[n].locale, tmp, slen))) {
+ // canonicalization had an effect- copy back
+ uprv_strncpy(items[n].locale, tmp, clen);
+ items[n].locale[clen] = 0; // terminate
}
#if defined(ULOC_DEBUG)
/*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
@@ -2362,63 +2358,29 @@
while(*s==',') { /* eat duplicate commas */
s++;
}
- if(n>=jSize) {
- if(j==smallBuffer) { /* overflowed the small buffer. */
- j = static_cast<_acceptLangItem *>(uprv_malloc(sizeof(j[0])*(jSize*2)));
- if(j!=NULL) {
- uprv_memcpy(j,smallBuffer,sizeof(j[0])*jSize);
- }
+ if(n>=items.getCapacity()) { // If we need more items
+ if(NULL == items.resize(items.getCapacity()*2, items.getCapacity())) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return -1;
+ }
#if defined(ULOC_DEBUG)
- fprintf(stderr,"malloced at size %d\n", jSize);
+ fprintf(stderr,"malloced at size %d\n", items.getCapacity());
#endif
- } else {
- j = static_cast<_acceptLangItem *>(uprv_realloc(j, sizeof(j[0])*jSize*2));
-#if defined(ULOC_DEBUG)
- fprintf(stderr,"re-alloced at size %d\n", jSize);
-#endif
- }
- jSize *= 2;
- if(j==NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return -1;
- }
}
}
- uprv_sortArray(j, n, sizeof(j[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
+ uprv_sortArray(items.getAlias(), n, sizeof(items[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
+ LocalArray<const char*> strs(new const char*[n], *status);
if(U_FAILURE(*status)) {
- if(j != smallBuffer) {
-#if defined(ULOC_DEBUG)
- fprintf(stderr,"freeing j %p\n", j);
-#endif
- uprv_free(j);
- }
- return -1;
- }
- strs = static_cast<char **>(uprv_malloc((size_t)(sizeof(strs[0])*n)));
- /* Check for null pointer */
- if (strs == NULL) {
- uprv_free(j); /* Free to avoid memory leak */
- *status = U_MEMORY_ALLOCATION_ERROR;
- return -1;
+ return -1;
}
for(i=0;i<n;i++) {
#if defined(ULOC_DEBUG)
/*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
#endif
- strs[i]=j[i].locale;
+ strs[i]=items[i].locale;
}
res = uloc_acceptLanguage(result, resultAvailable, outResult,
- (const char**)strs, n, availableLocales, status);
- for(i=0;i<n;i++) {
- uprv_free(strs[i]);
- }
- uprv_free(strs);
- if(j != smallBuffer) {
-#if defined(ULOC_DEBUG)
- fprintf(stderr,"freeing j %p\n", j);
-#endif
- uprv_free(j);
- }
+ strs.getAlias(), n, availableLocales, status);
return res;
}
diff -ru icu.orig/source/test/cintltst/cloctst.c icu/source/test/cintltst/cloctst.c
--- icu.orig/source/test/cintltst/cloctst.c 2016-03-23 21:48:18.000000000 +0100
+++ icu/source/test/cintltst/cloctst.c 2016-11-01 15:21:16.544151315 +0100
@@ -2775,16 +2775,20 @@
const char *icuSet; /**< ? */
const char *expect; /**< The expected locale result */
UAcceptResult res; /**< The expected error code */
+ UErrorCode expectStatus; /**< expected status */
} tests[] = {
- /*0*/{ 0, NULL, "mt_MT", ULOC_ACCEPT_VALID },
- /*1*/{ 1, NULL, "en", ULOC_ACCEPT_VALID },
- /*2*/{ 2, NULL, "en", ULOC_ACCEPT_FALLBACK },
- /*3*/{ 3, NULL, "", ULOC_ACCEPT_FAILED },
- /*4*/{ 4, NULL, "es", ULOC_ACCEPT_VALID },
-
- /*5*/{ 5, NULL, "en", ULOC_ACCEPT_VALID }, /* XF */
- /*6*/{ 6, NULL, "ja", ULOC_ACCEPT_FALLBACK }, /* XF */
- /*7*/{ 7, NULL, "zh", ULOC_ACCEPT_FALLBACK }, /* XF */
+ /*0*/{ 0, NULL, "mt_MT", ULOC_ACCEPT_VALID, U_ZERO_ERROR},
+ /*1*/{ 1, NULL, "en", ULOC_ACCEPT_VALID, U_ZERO_ERROR},
+ /*2*/{ 2, NULL, "en", ULOC_ACCEPT_FALLBACK, U_ZERO_ERROR},
+ /*3*/{ 3, NULL, "", ULOC_ACCEPT_FAILED, U_ZERO_ERROR},
+ /*4*/{ 4, NULL, "es", ULOC_ACCEPT_VALID, U_ZERO_ERROR},
+ /*5*/{ 5, NULL, "en", ULOC_ACCEPT_VALID, U_ZERO_ERROR}, /* XF */
+ /*6*/{ 6, NULL, "ja", ULOC_ACCEPT_FALLBACK, U_ZERO_ERROR}, /* XF */
+ /*7*/{ 7, NULL, "zh", ULOC_ACCEPT_FALLBACK, U_ZERO_ERROR}, /* XF */
+ /*8*/{ 8, NULL, "", ULOC_ACCEPT_FAILED, U_ZERO_ERROR }, /* */
+ /*9*/{ 9, NULL, "", ULOC_ACCEPT_FAILED, U_ZERO_ERROR }, /* */
+ /*10*/{10, NULL, "", ULOC_ACCEPT_FAILED, U_BUFFER_OVERFLOW_ERROR }, /* */
+ /*11*/{11, NULL, "", ULOC_ACCEPT_FAILED, U_BUFFER_OVERFLOW_ERROR }, /* */
};
const int32_t numTests = UPRV_LENGTHOF(tests);
static const char *http[] = {
@@ -2800,10 +2804,25 @@
"xxx-yyy;q=.01, xxx-yyy;q=.01, xxx-yyy;q=.01, xxx-yyy;q=.01, xxx-yyy;q=.01, "
"xxx-yyy;q=.01, xxx-yyy;q=.01, xxx-yyy;q=.01, xx-yy;q=.1, "
"es",
-
/*5*/ "zh-xx;q=0.9, en;q=0.6",
/*6*/ "ja-JA",
/*7*/ "zh-xx;q=0.9",
+ /*08*/ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", // 156
+ /*09*/ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB", // 157 (this hits U_STRING_NOT_TERMINATED_WARNING )
+ /*10*/ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABC", // 158
+ /*11*/ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", // 163 bytes
};
for(i=0;i<numTests;i++) {
@@ -2818,17 +2837,22 @@
(void)rc; /* Suppress set but not used warning. */
uenum_close(available);
log_verbose(" got %s, %s [%s]\n", tmp[0]?tmp:"(EMPTY)", acceptResult(outResult), u_errorName(status));
- if(outResult != tests[i].res) {
+ if(status != tests[i].expectStatus) {
+ log_err_status(status, "FAIL: expected status %s but got %s\n", u_errorName(tests[i].expectStatus), u_errorName(status));
+ } else if(U_SUCCESS(tests[i].expectStatus)) {
+ /* don't check content if expected failure */
+ if(outResult != tests[i].res) {
log_err_status(status, "FAIL: #%d: expected outResult of %s but got %s\n", i,
acceptResult( tests[i].res),
acceptResult( outResult));
log_info("test #%d: http[%s], ICU[%s], expect %s, %s\n",
i, http[tests[i].httpSet], tests[i].icuSet, tests[i].expect,acceptResult(tests[i].res));
- }
- if((outResult>0)&&uprv_strcmp(tmp, tests[i].expect)) {
- log_err_status(status, "FAIL: #%d: expected %s but got %s\n", i, tests[i].expect, tmp);
- log_info("test #%d: http[%s], ICU[%s], expect %s, %s\n",
- i, http[tests[i].httpSet], tests[i].icuSet, tests[i].expect, acceptResult(tests[i].res));
+ }
+ if((outResult>0)&&uprv_strcmp(tmp, tests[i].expect)) {
+ log_err_status(status, "FAIL: #%d: expected %s but got %s\n", i, tests[i].expect, tmp);
+ log_info("test #%d: http[%s], ICU[%s], expect %s, %s\n",
+ i, http[tests[i].httpSet], tests[i].icuSet, tests[i].expect, acceptResult(tests[i].res));
+ }
}
}
}

View File

@ -1,156 +0,0 @@
# https://ssl.icu-project.org/trac/changeset/39671
Index: /trunk/icu4c/source/common/utext.cpp
===================================================================
--- icu.orig/source/common/utext.cpp
+++ icu/source/common/utext.cpp
@@ -848,7 +848,13 @@
// Chunk size.
-// Must be less than 85, because of byte mapping from UChar indexes to native indexes.
-// Worst case is three native bytes to one UChar. (Supplemenaries are 4 native bytes
-// to two UChars.)
+// Must be less than 42 (256/6), because of byte mapping from UChar indexes to native indexes.
+// Worst case there are six UTF-8 bytes per UChar.
+// obsolete 6 byte form fd + 5 trails maps to fffd
+// obsolete 5 byte form fc + 4 trails maps to fffd
+// non-shortest 4 byte forms maps to fffd
+// normal supplementaries map to a pair of utf-16, two utf8 bytes per utf-16 unit
+// mapToUChars array size must allow for the worst case, 6.
+// This could be brought down to 4, by treating fd and fc as pure illegal,
+// rather than obsolete lead bytes. But that is not compatible with the utf-8 access macros.
//
enum { UTF8_TEXT_CHUNK_SIZE=32 };
@@ -890,5 +896,5 @@
// one for a supplementary starting in the last normal position,
// and one for an entry for the buffer limit position.
- uint8_t mapToUChars[UTF8_TEXT_CHUNK_SIZE*3+6]; // Map native offset from bufNativeStart to
+ uint8_t mapToUChars[UTF8_TEXT_CHUNK_SIZE*6+6]; // Map native offset from bufNativeStart to
// correspoding offset in filled part of buf.
int32_t align;
@@ -1033,4 +1039,5 @@
u8b = (UTF8Buf *)ut->p; // the current buffer
mapIndex = ix - u8b->toUCharsMapStart;
+ U_ASSERT(mapIndex < (int32_t)sizeof(UTF8Buf::mapToUChars));
ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;
return TRUE;
@@ -1299,4 +1306,8 @@
// If index is at the end, there is no character there to look at.
if (ix != ut->b) {
+ // Note: this function will only move the index back if it is on a trail byte
+ // and there is a preceding lead byte and the sequence from the lead
+ // through this trail could be part of a valid UTF-8 sequence
+ // Otherwise the index remains unchanged.
U8_SET_CP_START(s8, 0, ix);
}
@@ -1312,5 +1323,8 @@
uint8_t *mapToNative = u8b->mapToNative;
uint8_t *mapToUChars = u8b->mapToUChars;
- int32_t toUCharsMapStart = ix - (UTF8_TEXT_CHUNK_SIZE*3 + 1);
+ int32_t toUCharsMapStart = ix - sizeof(UTF8Buf::mapToUChars) + 1;
+ // Note that toUCharsMapStart can be negative. Happens when the remaining
+ // text from current position to the beginning is less than the buffer size.
+ // + 1 because mapToUChars must have a slot at the end for the bufNativeLimit entry.
int32_t destIx = UTF8_TEXT_CHUNK_SIZE+2; // Start in the overflow region
// at end of buffer to leave room
@@ -1339,4 +1353,5 @@
// Special case ASCII range for speed.
buf[destIx] = (UChar)c;
+ U_ASSERT(toUCharsMapStart <= srcIx);
mapToUChars[srcIx - toUCharsMapStart] = (uint8_t)destIx;
mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);
@@ -1368,4 +1383,5 @@
mapToUChars[sIx-- - toUCharsMapStart] = (uint8_t)destIx;
} while (sIx >= srcIx);
+ U_ASSERT(toUCharsMapStart <= (srcIx+1));
// Set native indexing limit to be the current position.
@@ -1542,4 +1558,5 @@
U_ASSERT(index<=ut->chunkNativeLimit);
int32_t mapIndex = index - u8b->toUCharsMapStart;
+ U_ASSERT(mapIndex < (int32_t)sizeof(UTF8Buf::mapToUChars));
int32_t offset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;
U_ASSERT(offset>=0 && offset<=ut->chunkLength);
Index: /trunk/icu4c/source/test/intltest/utxttest.cpp
===================================================================
--- icu.orig/source/test/intltest/utxttest.cpp
+++ icu/source/test/intltest/utxttest.cpp
@@ -68,4 +68,6 @@
case 7: name = "Ticket12130";
if (exec) Ticket12130(); break;
+ case 8: name = "Ticket12888";
+ if (exec) Ticket12888(); break;
default: name = ""; break;
}
@@ -1584,2 +1586,62 @@
utext_close(&ut);
}
+
+// Ticket 12888: bad handling of illegal utf-8 containing many instances of the archaic, now illegal,
+// six byte utf-8 forms. Original implementation had an assumption that
+// there would be at most three utf-8 bytes per UTF-16 code unit.
+// The five and six byte sequences map to a single replacement character.
+
+void UTextTest::Ticket12888() {
+ const char *badString =
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80";
+
+ UErrorCode status = U_ZERO_ERROR;
+ LocalUTextPointer ut(utext_openUTF8(NULL, badString, -1, &status));
+ TEST_SUCCESS(status);
+ for (;;) {
+ UChar32 c = utext_next32(ut.getAlias());
+ if (c == U_SENTINEL) {
+ break;
+ }
+ }
+ int32_t endIdx = utext_getNativeIndex(ut.getAlias());
+ if (endIdx != (int32_t)strlen(badString)) {
+ errln("%s:%d expected=%d, actual=%d", __FILE__, __LINE__, strlen(badString), endIdx);
+ return;
+ }
+
+ for (int32_t prevIndex = endIdx; prevIndex>0;) {
+ UChar32 c = utext_previous32(ut.getAlias());
+ int32_t currentIndex = utext_getNativeIndex(ut.getAlias());
+ if (c != 0xfffd) {
+ errln("%s:%d (expected, actual, index) = (%d, %d, %d)\n",
+ __FILE__, __LINE__, 0xfffd, c, currentIndex);
+ break;
+ }
+ if (currentIndex != prevIndex - 6) {
+ errln("%s:%d: wrong index. Expected, actual = %d, %d",
+ __FILE__, __LINE__, prevIndex - 6, currentIndex);
+ break;
+ }
+ prevIndex = currentIndex;
+ }
+}
Index: /trunk/icu4c/source/test/intltest/utxttest.h
===================================================================
--- icu.orig/source/test/intltest/utxttest.h
+++ icu/source/test/intltest/utxttest.h
@@ -39,4 +39,5 @@
void Ticket10983();
void Ticket12130();
+ void Ticket12888();
private:

View File

@ -1,11 +0,0 @@
# https://ssl.icu-project.org/trac/changeset/40324/trunk/icu4c/source/i18n/zonemeta.cpp
Index: trunk/icu4c/source/i18n/zonemeta.cpp
===================================================================
--- icu.orig/source/i18n/zonemeta.cpp
+++ icu/source/i18n/zonemeta.cpp
@@ -691,5 +691,4 @@
if (U_FAILURE(status)) {
delete mzMappings;
- deleteOlsonToMetaMappingEntry(entry);
uprv_free(entry);
break;

View File

@ -1,2 +1,2 @@
976734806026a4ef8bdd17937c8898b9 icu4c-57_1-src.tgz
a5b57b0b6589e202ce4f812c46e0aa37 icu-config.sh
SHA512 (icu4c-60_1-src.tgz) = a6798f70add1b3bac2197a49e09a0bb636279af7019ce572f63a30ab713e09657ee9d9b20aac7ea806fbb84667ca2eca981411e5053b47c3c705aa496a669233
MD5 (icu-config.sh) = a5b57b0b6589e202ce4f812c46e0aa37