re-import sources as agreed with the maintainer
This commit is contained in:
parent
5bd3de2283
commit
1c7fdeef52
2
.gitignore
vendored
2
.gitignore
vendored
@ -1 +1 @@
|
||||
/zaf-0-0.1.20080714svn.tar.bz2
|
||||
zaf-0-0.1.20080714svn.tar.bz2
|
||||
|
15
tests/scripts/run_tests.sh
Normal file
15
tests/scripts/run_tests.sh
Normal file
@ -0,0 +1,15 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
gcc -Wall test-hyphen.c -o test-hyphen -lhyphen
|
||||
|
||||
echo "Test to hyphenate given word => woordafbreking"
|
||||
echo "woordafbreking" | ./test-hyphen /usr/share/hyphen/hyph_af_ZA.dic /dev/stdin
|
||||
|
||||
echo ""
|
||||
echo ""
|
||||
|
||||
echo "Test to give all possible ways to hyphenate the given word => woordafbreking"
|
||||
echo ""woordafbreking | ./test-hyphen -d /usr/share/hyphen/hyph_af_ZA.dic /dev/stdin
|
||||
|
212
tests/scripts/test-hyphen.c
Normal file
212
tests/scripts/test-hyphen.c
Normal file
@ -0,0 +1,212 @@
|
||||
/* This test file is part of upstream hyphen project */
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#include "hyphen.h"
|
||||
|
||||
#define BUFSIZE 1000
|
||||
|
||||
void help() {
|
||||
fprintf(stderr,"correct syntax is:\n");
|
||||
fprintf(stderr,"example [-d | -dd] hyphen_dictionary_file file_of_words_to_check\n");
|
||||
fprintf(stderr,"-o = use old algorithm (without non-standard hyphenation)\n");
|
||||
fprintf(stderr,"-d = hyphenation with listing of the possible hyphenations\n");
|
||||
fprintf(stderr,"-n = print hyphenation vector\n");
|
||||
}
|
||||
|
||||
/* get a pointer to the nth 8-bit or UTF-8 character of the word */
|
||||
char * hindex(char * word, int n, int utf8) {
|
||||
int j = 0;
|
||||
while (j < n) {
|
||||
j++;
|
||||
word++;
|
||||
while (utf8 && ((((unsigned char) *word) >> 6) == 2)) word++;
|
||||
}
|
||||
return word;
|
||||
}
|
||||
|
||||
/* list possible hyphenations with -dd option (example for the usage of the hyphenate2() function) */
|
||||
void single_hyphenations(char * word, char * hyphen, char ** rep, int * pos, int * cut, int utf8) {
|
||||
int i, k, j = 0;
|
||||
char r;
|
||||
for (i = 0; (i + 1) < strlen(word); i++) {
|
||||
if (utf8 && ((((unsigned char) word[i]) >> 6) == 2)) continue;
|
||||
if ((hyphen[j] & 1)) {
|
||||
if (rep && rep[j]) {
|
||||
k = hindex(word, j - pos[j] + 1, utf8) - word;
|
||||
r = word[k];
|
||||
word[k] = 0;
|
||||
printf(" - %s%s", word, rep[j]);
|
||||
word[k] = r;
|
||||
printf("%s\n", hindex(word + k, cut[j], utf8));
|
||||
} else {
|
||||
k = hindex(word, j + 1, utf8) - word;
|
||||
r = word[k];
|
||||
word[k] = 0;
|
||||
printf(" - %s=", word);
|
||||
word[k] = r;
|
||||
printf("%s\n", word + k);
|
||||
}
|
||||
}
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char** argv)
|
||||
{
|
||||
|
||||
HyphenDict *dict;
|
||||
int df;
|
||||
int wtc;
|
||||
FILE* wtclst;
|
||||
int k, n, i, j, c;
|
||||
char buf[BUFSIZE + 1];
|
||||
int nHyphCount;
|
||||
char *hyphens;
|
||||
char *lcword;
|
||||
char *hyphword;
|
||||
char hword[BUFSIZE * 2];
|
||||
int arg = 1;
|
||||
int optd = 1;
|
||||
int optn = 0;
|
||||
int optdd = 0;
|
||||
char ** rep;
|
||||
int * pos;
|
||||
int * cut;
|
||||
|
||||
/* first parse the command line options */
|
||||
/* arg1 - hyphen dictionary file, arg2 - file of words to check */
|
||||
|
||||
if (argv[arg]) {
|
||||
if (strcmp(argv[arg], "-o") == 0) {
|
||||
optd = 0;
|
||||
arg++;
|
||||
}
|
||||
if (strcmp(argv[arg], "-n") == 0) {
|
||||
optn = 1;
|
||||
arg++;
|
||||
}
|
||||
if (argv[arg] && strcmp(argv[arg], "-d") == 0) {
|
||||
optd = 1;
|
||||
optdd = 1;
|
||||
arg++;
|
||||
}
|
||||
}
|
||||
|
||||
if (argv[arg]) {
|
||||
df = arg++;
|
||||
} else {
|
||||
help();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (argv[arg]) {
|
||||
wtc = arg++;
|
||||
} else {
|
||||
help();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* load the hyphenation dictionary */
|
||||
if ((dict = hnj_hyphen_load(argv[df])) == NULL) {
|
||||
fprintf(stderr, "Couldn't find file %s\n", argv[df]);
|
||||
fflush(stderr);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* open the words to check list */
|
||||
wtclst = fopen(argv[wtc],"r");
|
||||
if (!wtclst) {
|
||||
fprintf(stderr,"Error - could not open file of words to check\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
||||
/* now read each word from the wtc file */
|
||||
while(fgets(buf,BUFSIZE,wtclst) != NULL) {
|
||||
k = strlen(buf);
|
||||
if (k && buf[k - 1] == '\n') buf[k - 1] = '\0';
|
||||
if (k >=2 && buf[k - 2] == '\r') buf[k-- - 2] = '\0';
|
||||
|
||||
/* set aside some buffers to hold lower cased */
|
||||
/* and hyphen information */
|
||||
lcword = (char *) malloc(k+1);
|
||||
hyphens = (char *)malloc(k+5);
|
||||
/* basic ascii lower-case, not suitable for real-world usage*/
|
||||
for (i = 0; i < k; ++i) {
|
||||
lcword[i] = buf[i];
|
||||
if ( (lcword[i] >= 'A') && (lcword[i] <= 'Z') )
|
||||
lcword[i] += 32;
|
||||
}
|
||||
|
||||
/* first remove any trailing periods */
|
||||
n = k-1;
|
||||
while((n >=0) && (lcword[n] == '.')) n--;
|
||||
n++;
|
||||
|
||||
/* now actually try to hyphenate the word */
|
||||
|
||||
rep = NULL;
|
||||
pos = NULL;
|
||||
cut = NULL;
|
||||
hword[0] = '\0';
|
||||
|
||||
if ((!optd && hnj_hyphen_hyphenate(dict, lcword, n-1, hyphens)) ||
|
||||
(optd && hnj_hyphen_hyphenate2(dict, lcword, n-1, hyphens, hword, &rep, &pos, &cut))) {
|
||||
free(hyphens);
|
||||
free(lcword);
|
||||
fprintf(stderr, "hyphenation error\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (optn) fprintf(stderr, "%s\n", hyphens);
|
||||
|
||||
if (!optd) {
|
||||
/* now backfill hyphens[] for any removed periods */
|
||||
for (c = n; c < k; c++) hyphens[c] = '0';
|
||||
hyphens[k] = '\0';
|
||||
|
||||
/* now create a new char string showing hyphenation positions */
|
||||
/* count the hyphens and allocate space for the new hypehanted string */
|
||||
nHyphCount = 0;
|
||||
for (i = 0; i < n; i++)
|
||||
if (hyphens[i]&1)
|
||||
nHyphCount++;
|
||||
hyphword = (char *) malloc(k+1+nHyphCount);
|
||||
j = 0;
|
||||
for (i = 0; i < n; i++) {
|
||||
hyphword[j++] = buf[i];
|
||||
if (hyphens[i]&1) {
|
||||
hyphword[j++] = '-';
|
||||
}
|
||||
}
|
||||
hyphword[j] = '\0';
|
||||
fprintf(stdout,"%s\n",hyphword);
|
||||
fflush(stdout);
|
||||
free(hyphword);
|
||||
} else {
|
||||
fprintf(stdout,"%s\n", hword);
|
||||
|
||||
|
||||
if (optdd) single_hyphenations(lcword, hyphens, rep, pos, cut, dict->utf8);
|
||||
if (rep) {
|
||||
for (i = 0; i < n - 1; i++) {
|
||||
if (rep[i]) free(rep[i]);
|
||||
}
|
||||
free(rep);
|
||||
free(pos);
|
||||
free(cut);
|
||||
}
|
||||
}
|
||||
free(hyphens);
|
||||
free(lcword);
|
||||
}
|
||||
|
||||
fclose(wtclst);
|
||||
hnj_hyphen_free(dict);
|
||||
return 0;
|
||||
}
|
13
tests/tests.yml
Normal file
13
tests/tests.yml
Normal file
@ -0,0 +1,13 @@
|
||||
- hosts: localhost
|
||||
roles:
|
||||
- role: standard-test-basic
|
||||
required_packages:
|
||||
- hyphen-devel
|
||||
- gcc
|
||||
tags:
|
||||
- classic
|
||||
tests:
|
||||
- sample:
|
||||
dir: scripts/
|
||||
run: ./run_tests.sh
|
||||
|
Loading…
Reference in New Issue
Block a user