148 lines
4.9 KiB
Diff
148 lines
4.9 KiB
Diff
|
From c1b97d6d896b1f22fdf5d28471ef7859ec840a57 Mon Sep 17 00:00:00 2001
|
||
|
From: Andreas Schwab <schwab@redhat.com>
|
||
|
Date: Wed, 1 Sep 2010 17:26:15 +0200
|
||
|
Subject: [PATCH] Fix handling of collating symbols in regexps
|
||
|
|
||
|
[BZ #11561]
|
||
|
* posix/regcomp.c (parse_bracket_exp): When looking up collating
|
||
|
elements compare against the byte sequence of it, not its name.
|
||
|
|
||
|
---
|
||
|
ChangeLog | 4 +++
|
||
|
posix/regcomp.c | 72 ++++++++++++++++++++----------------------------------
|
||
|
2 files changed, 31 insertions(+), 45 deletions(-)
|
||
|
|
||
|
--- a/posix/regcomp.c
|
||
|
+++ b/posix/regcomp.c
|
||
|
@@ -2772,40 +2772,29 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
|
||
|
|
||
|
/* Local function for parse_bracket_exp used in _LIBC environement.
|
||
|
Seek the collating symbol entry correspondings to NAME.
|
||
|
- Return the index of the symbol in the SYMB_TABLE. */
|
||
|
+ Return the index of the symbol in the SYMB_TABLE,
|
||
|
+ or -1 if not found. */
|
||
|
|
||
|
auto inline int32_t
|
||
|
__attribute ((always_inline))
|
||
|
- seek_collating_symbol_entry (name, name_len)
|
||
|
- const unsigned char *name;
|
||
|
- size_t name_len;
|
||
|
+ seek_collating_symbol_entry (const unsigned char *name, size_t name_len)
|
||
|
{
|
||
|
- int32_t hash = elem_hash ((const char *) name, name_len);
|
||
|
- int32_t elem = hash % table_size;
|
||
|
- if (symb_table[2 * elem] != 0)
|
||
|
- {
|
||
|
- int32_t second = hash % (table_size - 2) + 1;
|
||
|
-
|
||
|
- do
|
||
|
- {
|
||
|
- /* First compare the hashing value. */
|
||
|
- if (symb_table[2 * elem] == hash
|
||
|
- /* Compare the length of the name. */
|
||
|
- && name_len == extra[symb_table[2 * elem + 1]]
|
||
|
- /* Compare the name. */
|
||
|
- && memcmp (name, &extra[symb_table[2 * elem + 1] + 1],
|
||
|
- name_len) == 0)
|
||
|
- {
|
||
|
- /* Yep, this is the entry. */
|
||
|
- break;
|
||
|
- }
|
||
|
+ int32_t elem;
|
||
|
|
||
|
- /* Next entry. */
|
||
|
- elem += second;
|
||
|
- }
|
||
|
- while (symb_table[2 * elem] != 0);
|
||
|
- }
|
||
|
- return elem;
|
||
|
+ for (elem = 0; elem < table_size; elem++)
|
||
|
+ if (symb_table[2 * elem] != 0)
|
||
|
+ {
|
||
|
+ int32_t idx = symb_table[2 * elem + 1];
|
||
|
+ /* Skip the name of collating element name. */
|
||
|
+ idx += 1 + extra[idx];
|
||
|
+ if (/* Compare the length of the name. */
|
||
|
+ name_len == extra[idx]
|
||
|
+ /* Compare the name. */
|
||
|
+ && memcmp (name, &extra[idx + 1], name_len) == 0)
|
||
|
+ /* Yep, this is the entry. */
|
||
|
+ return elem;
|
||
|
+ }
|
||
|
+ return -1;
|
||
|
}
|
||
|
|
||
|
/* Local function for parse_bracket_exp used in _LIBC environment.
|
||
|
@@ -2814,8 +2803,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
|
||
|
|
||
|
auto inline unsigned int
|
||
|
__attribute ((always_inline))
|
||
|
- lookup_collation_sequence_value (br_elem)
|
||
|
- bracket_elem_t *br_elem;
|
||
|
+ lookup_collation_sequence_value (bracket_elem_t *br_elem)
|
||
|
{
|
||
|
if (br_elem->type == SB_CHAR)
|
||
|
{
|
||
|
@@ -2843,7 +2831,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
|
||
|
int32_t elem, idx;
|
||
|
elem = seek_collating_symbol_entry (br_elem->opr.name,
|
||
|
sym_name_len);
|
||
|
- if (symb_table[2 * elem] != 0)
|
||
|
+ if (elem != -1)
|
||
|
{
|
||
|
/* We found the entry. */
|
||
|
idx = symb_table[2 * elem + 1];
|
||
|
@@ -2861,7 +2849,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
|
||
|
/* Return the collation sequence value. */
|
||
|
return *(unsigned int *) (extra + idx);
|
||
|
}
|
||
|
- else if (symb_table[2 * elem] == 0 && sym_name_len == 1)
|
||
|
+ else if (sym_name_len == 1)
|
||
|
{
|
||
|
/* No valid character. Match it as a single byte
|
||
|
character. */
|
||
|
@@ -2883,11 +2871,8 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
|
||
|
|
||
|
auto inline reg_errcode_t
|
||
|
__attribute ((always_inline))
|
||
|
- build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem)
|
||
|
- re_charset_t *mbcset;
|
||
|
- int *range_alloc;
|
||
|
- bitset_t sbcset;
|
||
|
- bracket_elem_t *start_elem, *end_elem;
|
||
|
+ build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,
|
||
|
+ bracket_elem_t *start_elem, bracket_elem_t *end_elem)
|
||
|
{
|
||
|
unsigned int ch;
|
||
|
uint32_t start_collseq;
|
||
|
@@ -2966,25 +2951,22 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
|
||
|
|
||
|
auto inline reg_errcode_t
|
||
|
__attribute ((always_inline))
|
||
|
- build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name)
|
||
|
- re_charset_t *mbcset;
|
||
|
- int *coll_sym_alloc;
|
||
|
- bitset_t sbcset;
|
||
|
- const unsigned char *name;
|
||
|
+ build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
|
||
|
+ int *coll_sym_alloc, const unsigned char *name)
|
||
|
{
|
||
|
int32_t elem, idx;
|
||
|
size_t name_len = strlen ((const char *) name);
|
||
|
if (nrules != 0)
|
||
|
{
|
||
|
elem = seek_collating_symbol_entry (name, name_len);
|
||
|
- if (symb_table[2 * elem] != 0)
|
||
|
+ if (elem != -1)
|
||
|
{
|
||
|
/* We found the entry. */
|
||
|
idx = symb_table[2 * elem + 1];
|
||
|
/* Skip the name of collating element name. */
|
||
|
idx += 1 + extra[idx];
|
||
|
}
|
||
|
- else if (symb_table[2 * elem] == 0 && name_len == 1)
|
||
|
+ else if (name_len == 1)
|
||
|
{
|
||
|
/* No valid character, treat it as a normal
|
||
|
character. */
|