Rebuild for Python 3.6

Added patch for fixing invalid escape sequences with Python 3.6
This commit is contained in:
Charalampos Stratakis 2016-12-12 19:17:03 +01:00
parent 5140d18684
commit bf3e7d8ed5
2 changed files with 61 additions and 2 deletions

View File

@ -0,0 +1,52 @@
diff --git a/html5lib/sanitizer.py b/html5lib/sanitizer.py
index 71dc521..56e3ac7 100644
--- a/html5lib/sanitizer.py
+++ b/html5lib/sanitizer.py
@@ -185,7 +185,7 @@ class HTMLSanitizerMixin(object):
for attr in self.attr_val_is_uri:
if attr not in attrs:
continue
- val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '',
+ val_unescaped = re.sub(r"[`\000-\040\177-\240\s]+", '',
unescape(attrs[attr])).lower()
# remove replacement characters from unescaped characters
val_unescaped = val_unescaped.replace("\ufffd", "")
@@ -199,7 +199,7 @@ class HTMLSanitizerMixin(object):
' ',
unescape(attrs[attr]))
if (token["name"] in self.svg_allow_local_href and
- 'xlink:href' in attrs and re.search('^\s*[^#\s].*',
+ 'xlink:href' in attrs and re.search(r'^\s*[^#\s].*',
attrs['xlink:href'])):
del attrs['xlink:href']
if 'style' in attrs:
@@ -228,16 +228,16 @@ class HTMLSanitizerMixin(object):
def sanitize_css(self, style):
# disallow urls
- style = re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style)
+ style = re.compile(r'url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style)
# gauntlet
- if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
+ if not re.match(r"""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
return ''
- if not re.match("^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
+ if not re.match(r"^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
return ''
clean = []
- for prop, value in re.findall("([-\w]+)\s*:\s*([^:;]*)", style):
+ for prop, value in re.findall(r"([-\w]+)\s*:\s*([^:;]*)", style):
if not value:
continue
if prop.lower() in self.allowed_css_properties:
@@ -246,7 +246,7 @@ class HTMLSanitizerMixin(object):
'padding']:
for keyword in value.split():
if not keyword in self.acceptable_css_keywords and \
- not re.match("^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword):
+ not re.match(r"^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword):
break
else:
clean.append(prop + ': ' + value + ';')

View File

@ -6,13 +6,15 @@
Name: python-%{modulename}
Summary: A python based HTML parser/tokenizer
Version: 0.999
Release: 9%{?dist}
Release: 10%{?dist}
Epoch: 1
Group: Development/Libraries
License: MIT
URL: https://pypi.python.org/pypi/%{modulename}
Source0: https://pypi.python.org/packages/source/h/%{modulename}/%{modulename}-%{version}.tar.gz
# Patch for fixing invalid escape sequences with Python 3.6
Patch0: fix-invalid-escape-sequences.patch
BuildArch: noarch
Requires: python-six
@ -45,6 +47,7 @@ specification for maximum compatibility with major desktop web browsers.
%prep
%setup -q -n %{modulename}-%{version}
%patch0 -p1
%if 0%{?with_python3}
rm -rf %{py3dir}
@ -94,6 +97,10 @@ popd
%changelog
* Mon Dec 12 2016 Charalampos Stratakis <cstratak@redhat.com> - 1:0.999-10
- Rebuild for Python 3.6
- Fix invalid escape sequences
* Tue Jul 19 2016 Fedora Release Engineering <rel-eng@lists.fedoraproject.org> - 1:0.999-9
- https://fedoraproject.org/wiki/Changes/Automatic_Provides_for_Python_RPM_Packages