import libkkc-data-0.2.7-11.el8
This commit is contained in:
commit
e45aed0557
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
SOURCES/libkkc-data-0.2.7.tar.xz
|
1
.libkkc-data.metadata
Normal file
1
.libkkc-data.metadata
Normal file
@ -0,0 +1 @@
|
|||||||
|
6e5d833744f8311908f12168dcf697633efc2795 SOURCES/libkkc-data-0.2.7.tar.xz
|
128
SOURCES/libkkc-data-HEAD.patch
Normal file
128
SOURCES/libkkc-data-HEAD.patch
Normal file
@ -0,0 +1,128 @@
|
|||||||
|
diff --git a/tools/genfilter.py b/tools/genfilter.py
|
||||||
|
index 5ffab32..0c5f75a 100644
|
||||||
|
--- a/tools/genfilter.py
|
||||||
|
+++ b/tools/genfilter.py
|
||||||
|
@@ -84,24 +84,24 @@ class FilterGenerator(object):
|
||||||
|
|
||||||
|
def generate(self):
|
||||||
|
size = os.fstat(self.infile.fileno()).st_size
|
||||||
|
- n = size / self.record_size
|
||||||
|
+ n = size // self.record_size
|
||||||
|
m = int(math.ceil(-n*math.log10(ERROR_RATE) /
|
||||||
|
math.pow(math.log10(2), 2)))
|
||||||
|
- m = (m/8 + 1)*8
|
||||||
|
+ m = (m//8 + 1)*8
|
||||||
|
inmem = mmap.mmap(self.infile.fileno(),
|
||||||
|
size,
|
||||||
|
access=mmap.ACCESS_READ)
|
||||||
|
- outmem = bytearray(m/8)
|
||||||
|
- for i in xrange(0, n):
|
||||||
|
+ outmem = bytearray(m//8)
|
||||||
|
+ for i in range(0, n):
|
||||||
|
offset = i*self.record_size
|
||||||
|
b0, b1 = struct.unpack("=LL", inmem[offset:offset+8])
|
||||||
|
- for k in xrange(0, 4):
|
||||||
|
+ for k in range(0, 4):
|
||||||
|
h = murmur_hash3_32(b0, b1, k)
|
||||||
|
h = int(h * (m / float(0xFFFFFFFF)))
|
||||||
|
- outmem[h/8] |= (1 << (h%8))
|
||||||
|
+ outmem[h//8] |= (1 << (h%8))
|
||||||
|
inmem.close()
|
||||||
|
- # Convert bytearray to str, for Python 2.6 compatibility.
|
||||||
|
- self.outfile.write(str(outmem))
|
||||||
|
+ # Convert bytearray to bytes, for Python 3 compatibility.
|
||||||
|
+ self.outfile.write(bytes(outmem))
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
import sys
|
||||||
|
@@ -110,7 +110,7 @@ if __name__ == '__main__':
|
||||||
|
parser = argparse.ArgumentParser(description='filter')
|
||||||
|
parser.add_argument('infile', type=argparse.FileType('r'),
|
||||||
|
help='input file')
|
||||||
|
- parser.add_argument('outfile', type=argparse.FileType('w'),
|
||||||
|
+ parser.add_argument('outfile', type=argparse.FileType('wb'),
|
||||||
|
help='output file')
|
||||||
|
parser.add_argument('record_size', type=int,
|
||||||
|
help='record size')
|
||||||
|
diff --git a/tools/sortlm.py b/tools/sortlm.py
|
||||||
|
index a0dd8fe..40f0837 100644
|
||||||
|
--- a/tools/sortlm.py
|
||||||
|
+++ b/tools/sortlm.py
|
||||||
|
@@ -40,10 +40,10 @@ class SortedGenerator(object):
|
||||||
|
self.__min_cost = 0.0
|
||||||
|
|
||||||
|
def read(self):
|
||||||
|
- print "reading N-grams"
|
||||||
|
+ print("reading N-grams")
|
||||||
|
self.__read_tries()
|
||||||
|
self.__read_ngrams()
|
||||||
|
- print "min cost = %lf" % self.__min_cost
|
||||||
|
+ print("min cost = %lf" % self.__min_cost)
|
||||||
|
|
||||||
|
def __read_tries(self):
|
||||||
|
while True:
|
||||||
|
@@ -58,7 +58,7 @@ class SortedGenerator(object):
|
||||||
|
line = self.__infile.readline()
|
||||||
|
if line == "":
|
||||||
|
break
|
||||||
|
- line = line.strip()
|
||||||
|
+ line = line.strip('\n')
|
||||||
|
if line == "":
|
||||||
|
break
|
||||||
|
match = self.__ngram_line_regex.match(line)
|
||||||
|
@@ -89,7 +89,7 @@ class SortedGenerator(object):
|
||||||
|
line = self.__infile.readline()
|
||||||
|
if line == "":
|
||||||
|
break
|
||||||
|
- line = line.strip()
|
||||||
|
+ line = line.strip('\n')
|
||||||
|
if line == "":
|
||||||
|
break
|
||||||
|
match = self.__ngram_line_regex.match(line)
|
||||||
|
@@ -125,14 +125,11 @@ class SortedGenerator(object):
|
||||||
|
def quantize(cost, min_cost):
|
||||||
|
return max(0, min(65535, int(cost * 65535 / min_cost)))
|
||||||
|
|
||||||
|
- def cmp_header(a, b):
|
||||||
|
- return cmp(a[0], b[0])
|
||||||
|
-
|
||||||
|
- print "writing 1-gram file"
|
||||||
|
+ print("writing 1-gram file")
|
||||||
|
unigram_offsets = {}
|
||||||
|
unigram_file = open("%s.1gram" % self.__output_prefix, "wb")
|
||||||
|
offset = 0
|
||||||
|
- for ids, value in sorted(self.__ngram_entries[0].iteritems()):
|
||||||
|
+ for ids, value in sorted(self.__ngram_entries[0].items()):
|
||||||
|
unigram_offsets[ids[0]] = offset
|
||||||
|
s = struct.pack("=HHH",
|
||||||
|
quantize(value[0], self.__min_cost),
|
||||||
|
@@ -143,13 +140,13 @@ class SortedGenerator(object):
|
||||||
|
offset += 1
|
||||||
|
unigram_file.close()
|
||||||
|
|
||||||
|
- print "writing 2-gram file"
|
||||||
|
+ print("writing 2-gram file")
|
||||||
|
bigram_offsets = {}
|
||||||
|
bigram_file = open("%s.2gram" % self.__output_prefix, "wb")
|
||||||
|
keys = self.__ngram_entries[1].keys()
|
||||||
|
items = [(struct.pack("=LL", ids[1], unigram_offsets[ids[0]]), ids) for ids in keys]
|
||||||
|
offset = 0
|
||||||
|
- for header, ids in sorted(items, cmp=cmp_header):
|
||||||
|
+ for header, ids in sorted(items, key=lambda x: x[0]):
|
||||||
|
value = self.__ngram_entries[1][ids]
|
||||||
|
bigram_offsets[ids] = offset
|
||||||
|
s = struct.pack("=HH",
|
||||||
|
@@ -160,11 +157,11 @@ class SortedGenerator(object):
|
||||||
|
bigram_file.close()
|
||||||
|
|
||||||
|
if len(self.__ngram_entries[2]) > 0:
|
||||||
|
- print "writing 3-gram file"
|
||||||
|
+ print("writing 3-gram file")
|
||||||
|
trigram_file = open("%s.3gram" % self.__output_prefix, "wb")
|
||||||
|
keys = self.__ngram_entries[2].keys()
|
||||||
|
items = [(struct.pack("=LL", ids[2], bigram_offsets[(ids[0], ids[1])]), ids) for ids in keys]
|
||||||
|
- for header, ids in sorted(items, cmp=cmp_header):
|
||||||
|
+ for header, ids in sorted(items, key=lambda x: x[0]):
|
||||||
|
value = self.__ngram_entries[2][ids]
|
||||||
|
s = struct.pack("=H",
|
||||||
|
quantize(value[0], self.__min_cost))
|
82
SPECS/libkkc-data.spec
Normal file
82
SPECS/libkkc-data.spec
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
%global debug_package %{nil}
|
||||||
|
|
||||||
|
Name: libkkc-data
|
||||||
|
Version: 0.2.7
|
||||||
|
Release: 11%{?dist}
|
||||||
|
Epoch: 1
|
||||||
|
Summary: Language model data for libkkc
|
||||||
|
|
||||||
|
License: GPLv3+
|
||||||
|
URL: https://bitbucket.org/libkkc
|
||||||
|
Source0: https://bitbucket.org/libkkc/libkkc-data/downloads/%{name}-%{version}.tar.xz
|
||||||
|
|
||||||
|
# Upstream commit https://github.com/ueno/libkkc/commit/ba1c1bd3eb86d887fc3689c3142732658071b5f7
|
||||||
|
Patch0: libkkc-data-HEAD.patch
|
||||||
|
|
||||||
|
BuildRequires: python3-devel
|
||||||
|
BuildRequires: python3-marisa
|
||||||
|
|
||||||
|
%description
|
||||||
|
The %{name} package contains the language model data that libkkc uses
|
||||||
|
at run time.
|
||||||
|
|
||||||
|
|
||||||
|
%prep
|
||||||
|
%setup -q
|
||||||
|
%patch0 -p1 -b .HEAD
|
||||||
|
|
||||||
|
|
||||||
|
%build
|
||||||
|
%configure --disable-static PYTHON=python3
|
||||||
|
make %{?_smp_mflags}
|
||||||
|
|
||||||
|
|
||||||
|
%install
|
||||||
|
%make_install INSTALL="install -p"
|
||||||
|
|
||||||
|
|
||||||
|
%files
|
||||||
|
%doc COPYING
|
||||||
|
%{_libdir}/libkkc
|
||||||
|
|
||||||
|
|
||||||
|
%changelog
|
||||||
|
* Tue Aug 14 2018 Parag Nemade <pnemade AT redhat DOT com> - 1:0.2.7-11
|
||||||
|
- Resolves:rh#1615534: libkkc-data FTBFS for missing BuildRequires
|
||||||
|
- moved this package to use python3
|
||||||
|
|
||||||
|
* Wed Feb 07 2018 Fedora Release Engineering <releng@fedoraproject.org> - 1:0.2.7-10
|
||||||
|
- Rebuilt for https://fedoraproject.org/wiki/Fedora_28_Mass_Rebuild
|
||||||
|
|
||||||
|
* Thu Aug 03 2017 Fedora Release Engineering <releng@fedoraproject.org> - 1:0.2.7-9
|
||||||
|
- Rebuilt for https://fedoraproject.org/wiki/Fedora_27_Binutils_Mass_Rebuild
|
||||||
|
|
||||||
|
* Wed Jul 26 2017 Fedora Release Engineering <releng@fedoraproject.org> - 1:0.2.7-8
|
||||||
|
- Rebuilt for https://fedoraproject.org/wiki/Fedora_27_Mass_Rebuild
|
||||||
|
|
||||||
|
* Fri Feb 10 2017 Fedora Release Engineering <releng@fedoraproject.org> - 1:0.2.7-7
|
||||||
|
- Rebuilt for https://fedoraproject.org/wiki/Fedora_26_Mass_Rebuild
|
||||||
|
|
||||||
|
* Thu Feb 04 2016 Fedora Release Engineering <releng@fedoraproject.org> - 1:0.2.7-6
|
||||||
|
- Rebuilt for https://fedoraproject.org/wiki/Fedora_24_Mass_Rebuild
|
||||||
|
|
||||||
|
* Wed Jun 17 2015 Fedora Release Engineering <rel-eng@lists.fedoraproject.org> - 1:0.2.7-5
|
||||||
|
- Rebuilt for https://fedoraproject.org/wiki/Fedora_23_Mass_Rebuild
|
||||||
|
|
||||||
|
* Sun Aug 17 2014 Fedora Release Engineering <rel-eng@lists.fedoraproject.org> - 1:0.2.7-4
|
||||||
|
- Rebuilt for https://fedoraproject.org/wiki/Fedora_21_22_Mass_Rebuild
|
||||||
|
|
||||||
|
* Sat Jun 07 2014 Fedora Release Engineering <rel-eng@lists.fedoraproject.org> - 1:0.2.7-3
|
||||||
|
- Rebuilt for https://fedoraproject.org/wiki/Fedora_21_Mass_Rebuild
|
||||||
|
|
||||||
|
* Fri Sep 20 2013 Daiki Ueno <dueno@redhat.com> - 1:0.2.7-2
|
||||||
|
- bump release to avoid NVR conflict
|
||||||
|
|
||||||
|
* Fri Sep 20 2013 Daiki Ueno <dueno@redhat.com> - 1:0.2.7-1
|
||||||
|
- add COPYING to %%doc
|
||||||
|
- disable debuginfo
|
||||||
|
- add Epoch to avoid conflict with the libkkc package
|
||||||
|
|
||||||
|
* Tue Sep 17 2013 Daiki Ueno <dueno@redhat.com> - 0.2.7-1
|
||||||
|
- initial packaging for Fedora, splitting from libkkc
|
||||||
|
|
Loading…
Reference in New Issue
Block a user