From 05879b6d7b59bcd3f144aa60561053c23c901847 Mon Sep 17 00:00:00 2001 From: Caolan McNamara Date: Tue, 3 Jun 2008 09:09:35 +0000 Subject: [PATCH] icu 4.0 --- .cvsignore | 2 +- icu.regexp.patch | 301 ----------------------------------------------- icu.spec | 20 ++-- sources | 2 +- 4 files changed, 13 insertions(+), 312 deletions(-) delete mode 100644 icu.regexp.patch diff --git a/.cvsignore b/.cvsignore index 1220f29..c395ecf 100644 --- a/.cvsignore +++ b/.cvsignore @@ -1 +1 @@ -icu4c-3_8_1-src.tgz +icu4c-4_0_d02-src.tgz diff --git a/icu.regexp.patch b/icu.regexp.patch deleted file mode 100644 index b88c8b7..0000000 --- a/icu.regexp.patch +++ /dev/null @@ -1,301 +0,0 @@ -Index: source/i18n/regexcmp.cpp -=================================================================== ---- source/i18n/regexcmp.cpp (revision 23291) -+++ source/i18n/regexcmp.cpp (revision 23292) -@@ -1186,14 +1186,17 @@ - // Because capture groups can be forward-referenced by back-references, - // we fill the operand with the capture group number. At the end - // of compilation, it will be changed to the variable's location. -- U_ASSERT(groupNum > 0); -- int32_t op; -- if (fModeFlags & UREGEX_CASE_INSENSITIVE) { -- op = URX_BUILD(URX_BACKREF_I, groupNum); -+ if (groupNum < 1) { -+ error(U_REGEX_INVALID_BACK_REF); - } else { -- op = URX_BUILD(URX_BACKREF, groupNum); -+ int32_t op; -+ if (fModeFlags & UREGEX_CASE_INSENSITIVE) { -+ op = URX_BUILD(URX_BACKREF_I, groupNum); -+ } else { -+ op = URX_BUILD(URX_BACKREF, groupNum); -+ } -+ fRXPat->fCompiledPat->addElement(op, *fStatus); - } -- fRXPat->fCompiledPat->addElement(op, *fStatus); - } - break; - -Index: source/i18n/rematch.cpp -=================================================================== ---- source/i18n/rematch.cpp (revision 23291) -+++ source/i18n/rematch.cpp (revision 23292) -@@ -30,6 +30,15 @@ - - U_NAMESPACE_BEGIN - -+// Limit the size of the back track stack, to avoid system failures caused -+// by heap exhaustion. Units are in 32 bit words, not bytes. -+// This value puts ICU's limits higher than most other regexp implementations, -+// which use recursion rather than the heap, and take more storage per -+// backtrack point. -+// This constant is _temporary_. Proper API to control the value will added. -+// -+static const int32_t BACKTRACK_STACK_CAPACITY = 8000000; -+ - //----------------------------------------------------------------------------- - // - // Constructor and Destructor -@@ -53,8 +62,9 @@ - } - if (fStack == NULL || fData == NULL) { - fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; -+ } else { -+ fStack->setMaxCapacity(BACKTRACK_STACK_CAPACITY); - } -- - reset(RegexStaticSets::gStaticSets->fEmptyString); - } - -@@ -78,6 +88,8 @@ - } - if (fStack == NULL || fData == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; -+ } else { -+ fStack->setMaxCapacity(BACKTRACK_STACK_CAPACITY); - } - reset(input); - } -@@ -102,6 +114,8 @@ - } - if (fStack == NULL || fData == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; -+ } else { -+ fStack->setMaxCapacity(BACKTRACK_STACK_CAPACITY); - } - reset(RegexStaticSets::gStaticSets->fEmptyString); - } -@@ -1014,6 +1028,14 @@ - inline REStackFrame *RegexMatcher::StateSave(REStackFrame *fp, int32_t savePatIdx, int32_t frameSize, UErrorCode &status) { - // push storage for a new frame. - int32_t *newFP = fStack->reserveBlock(frameSize, status); -+ if (newFP == NULL) { -+ // Heap allocation error on attempted stack expansion. -+ // We need to return a writable stack frame, so just return the -+ // previous frame. The match operation will stop quickly -+ // becuase of the error status, after which the frame will never -+ // be looked at again. -+ return fp; -+ } - fp = (REStackFrame *)(newFP - frameSize); // in case of realloc of stack. - - // New stack frame = copy of old top frame. -@@ -1029,8 +1051,8 @@ - fp->fPatIdx = savePatIdx; - return (REStackFrame *)newFP; - } -- -- -+ -+ - //-------------------------------------------------------------------------------- - // - // MatchAt This is the actual matching engine. -@@ -2261,6 +2283,7 @@ - } - - if (U_FAILURE(status)) { -+ isMatch = FALSE; - break; - } - } -Index: source/test/intltest/regextst.h -=================================================================== ---- source/test/intltest/regextst.h (revision 23291) -+++ source/test/intltest/regextst.h (revision 23292) -@@ -30,6 +30,7 @@ - virtual void Extended(); - virtual void Errors(); - virtual void PerlTests(); -+ virtual void Bug6149(); - - // The following functions are internal to the regexp tests. - virtual UBool doRegexLMTest(const char *pat, const char *text, UBool looking, UBool match, int32_t line); -Index: source/test/intltest/regextst.cpp -=================================================================== ---- source/test/intltest/regextst.cpp (revision 23291) -+++ source/test/intltest/regextst.cpp (revision 23292) -@@ -66,6 +66,10 @@ - case 6: name = "PerlTests"; - if (exec) PerlTests(); - break; -+ case 7: name = "Bug 6149"; -+ if (exec) Bug6149(); -+ break; -+ - - - default: name = ""; -@@ -1639,6 +1643,12 @@ - - // Ticket 5389 - REGEX_ERR("*c", 1, 1, U_REGEX_RULE_SYNTAX); -+ -+ // Invalid Back Reference \0 -+ // For ICU 3.8 and earlier -+ // For ICU versions newer than 3.8, \0 introduces an octal escape. -+ // -+ REGEX_ERR("(ab)\\0", 1, 6, U_REGEX_INVALID_BACK_REF); - - } - -@@ -2122,6 +2132,26 @@ - } - - -+//-------------------------------------------------------------- -+// -+// Bug6149 Verify limits to heap expansion for backtrack stack. -+// Use this pattern, -+// "(a?){1,}" -+// The zero-length match will repeat forever. -+// (That this goes into a loop is another bug) -+// -+//--------------------------------------------------------------- -+void RegexTest::Bug6149() { -+ UnicodeString pattern("(a?){1,}"); -+ UnicodeString s("xyz"); -+ uint32_t flags = 0; -+ UErrorCode status = U_ZERO_ERROR; -+ -+ RegexMatcher matcher(pattern, s, flags, status); -+ UBool result = false; -+ REGEX_ASSERT_FAIL(result=matcher.matches(status), U_BUFFER_OVERFLOW_ERROR); -+ REGEX_ASSERT(result == FALSE); -+ } - - #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */ - -Index: source/common/uvectr32.cpp -=================================================================== ---- source/common/uvectr32.cpp (revision 23291) -+++ source/common/uvectr32.cpp (revision 23292) -@@ -26,6 +26,7 @@ - UVector32::UVector32(UErrorCode &status) : - count(0), - capacity(0), -+ maxCapacity(0), - elements(NULL) - { - _init(DEFUALT_CAPACITY, status); -@@ -34,6 +35,7 @@ - UVector32::UVector32(int32_t initialCapacity, UErrorCode &status) : - count(0), - capacity(0), -+ maxCapacity(0), - elements(0) - { - _init(initialCapacity, status); -@@ -46,6 +48,9 @@ - if (initialCapacity < 1) { - initialCapacity = DEFUALT_CAPACITY; - } -+ if (maxCapacity>0 && maxCapacity= minimumCapacity) { - return TRUE; -- } else { -- int32_t newCap = capacity * 2; -- if (newCap < minimumCapacity) { -- newCap = minimumCapacity; -- } -- int32_t* newElems = (int32_t *)uprv_malloc(sizeof(int32_t)*newCap); -- if (newElems == 0) { -- status = U_MEMORY_ALLOCATION_ERROR; -- return FALSE; -- } -- uprv_memcpy(newElems, elements, sizeof(elements[0]) * count); -- uprv_free(elements); -- elements = newElems; -- capacity = newCap; -- return TRUE; - } -+ if (maxCapacity>0 && minimumCapacity>maxCapacity) { -+ status = U_BUFFER_OVERFLOW_ERROR; -+ return FALSE; -+ } -+ int32_t newCap = capacity * 2; -+ if (newCap < minimumCapacity) { -+ newCap = minimumCapacity; -+ } -+ if (maxCapacity > 0 && newCap > maxCapacity) { -+ newCap = maxCapacity; -+ } -+ int32_t* newElems = (int32_t *)uprv_malloc(sizeof(int32_t)*newCap); -+ if (newElems == 0) { -+ status = U_MEMORY_ALLOCATION_ERROR; -+ return FALSE; -+ } -+ uprv_memcpy(newElems, elements, sizeof(elements[0]) * count); -+ uprv_free(elements); -+ elements = newElems; -+ capacity = newCap; -+ return TRUE; - } - -+void UVector32::setMaxCapacity(int32_t limit) { -+ U_ASSERT(limit >= 0); -+ maxCapacity = limit; -+ if (maxCapacity < 0) { -+ maxCapacity = 0; -+ } -+} -+ - /** - * Change the size of this vector as follows: If newSize is smaller, - * then truncate the array, possibly deleting held elements for i >= -Index: source/common/uvectr32.h -=================================================================== ---- source/common/uvectr32.h (revision 23291) -+++ source/common/uvectr32.h (revision 23292) -@@ -61,6 +61,8 @@ - int32_t count; - - int32_t capacity; -+ -+ int32_t maxCapacity; // Limit beyond which capacity is not permitted to grow. - - int32_t* elements; - -@@ -162,6 +164,14 @@ - int32_t *getBuffer() const; - - /** -+ * Set the maximum allowed buffer capacity for this vector/stack. -+ * Default with no limit set is unlimited, go until malloc() fails. -+ * A Limit of zero means unlimited capacity. -+ * Units are vector elements (32 bits each), not bytes. -+ */ -+ void setMaxCapacity(int32_t limit); -+ -+ /** - * ICU "poor man's RTTI", returns a UClassID for this class. - */ - static UClassID U_EXPORT2 getStaticClassID(); -@@ -221,7 +231,9 @@ - } - - inline int32_t *UVector32::reserveBlock(int32_t size, UErrorCode &status) { -- ensureCapacity(count+size, status); -+ if (ensureCapacity(count+size, status) == FALSE) { -+ return NULL; -+ } - int32_t *rp = elements+count; - count += size; - return rp; diff --git a/icu.spec b/icu.spec index 33ddbe0..1045104 100644 --- a/icu.spec +++ b/icu.spec @@ -1,11 +1,11 @@ Name: icu -Version: 3.8.1 -Release: 8%{?dist} +Version: 4.0 +Release: 0.1.d02%{?dist} Summary: International Components for Unicode Group: Development/Tools License: MIT URL: http://www.icu-project.org/ -Source0: http://download.icu-project.org/files/icu4c/3.8.1/icu4c-3_8_1-src.tgz +Source0: http://download.icu-project.org/files/icu4c/4.0/icu4c-4_0_d02-src.tgz BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) BuildRequires: doxygen, autoconf @@ -19,9 +19,8 @@ Patch7: icu.icu5506.multiplevowels.patch Patch8: icu.icuXXXX.malayalam.bysyllable.patch Patch9: icu.icu6008.arm.padding.patch Patch10: icu.icu5498.openoffice.org.patch -Patch11: icu.regexp.patch -Patch12: icu.icu6213.worstcase.patch -Patch13: icu.icu6284.strictalias.patch +Patch11: icu.icu6213.worstcase.patch +Patch12: icu.icu6284.strictalias.patch %description Tools and utilities for developing with icu. @@ -69,9 +68,8 @@ Group: Documentation %patch8 -p1 -b .icuXXXX.malayalam.bysyllable.patch %patch9 -p1 -b .icu6008.arm.padding.patch %patch10 -p1 -b .icu5498.openoffice.org.patch -%patch11 -p0 -b .regexp.patch -%patch12 -p1 -b .icu6213.worstcase.patch -%patch13 -p1 -b .icu6284.strictalias.patch +%patch11 -p1 -b .icu6213.worstcase.patch +%patch12 -p1 -b .icu6284.strictalias.patch %build cd source @@ -144,6 +142,10 @@ rm -rf $RPM_BUILD_ROOT %doc source/__docs/%{name}/html/* %changelog +* Sun May 31 2008 Caolan McNamara - 4.0-0.1.d02 +- 4.0 release candidate +- drop integrated icu.regexp.patch + * Mon May 19 2008 Caolan McNamara - 3.8.1-8 - add icu.icu6284.strictalias.patch and build with strict-aliasing diff --git a/sources b/sources index 5e8153d..2eb13c4 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -a827dbc9d909febd4ec39b90386868ba icu4c-3_8_1-src.tgz +9b87cb664b518b0ca5c801ecdbca1bf4 icu4c-4_0_d02-src.tgz