Compare commits
No commits in common. "c8" and "c9s" have entirely different histories.
1
.fmf/version
Normal file
1
.fmf/version
Normal file
@ -0,0 +1 @@
|
|||||||
|
1
|
2
.gitignore
vendored
2
.gitignore
vendored
@ -1 +1 @@
|
|||||||
SOURCES/lynx2.8.9rel.1.tar.bz2
|
/lynx2.*.tar.bz2
|
||||||
|
@ -1 +0,0 @@
|
|||||||
3e00ac30d008e0aa879bfd037abcfd9c0dd2faec SOURCES/lynx2.8.9rel.1.tar.bz2
|
|
6
ci.fmf
Normal file
6
ci.fmf
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
summary:
|
||||||
|
Run all tests
|
||||||
|
discover:
|
||||||
|
how: fmf
|
||||||
|
execute:
|
||||||
|
how: tmt
|
6
gating.yaml
Normal file
6
gating.yaml
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
--- !Policy
|
||||||
|
product_versions:
|
||||||
|
- rhel-9
|
||||||
|
decision_context: osci_compose_gate
|
||||||
|
rules:
|
||||||
|
- !PassingTestCaseRule {test_case_name: osci.brew-build.tier0.functional}
|
143
lynx-2.8.9-fix-page-download.patch
Normal file
143
lynx-2.8.9-fix-page-download.patch
Normal file
@ -0,0 +1,143 @@
|
|||||||
|
From 6c764e123c86ccce03d4ffaee11085b9badd6765 Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Thomas E. Dickey" <dickey@invisible-island.net>
|
||||||
|
Date: Thu, 29 Jul 2021 21:15:21 +0000
|
||||||
|
Subject: [PATCH] snapshot of project "lynx", label v2-9-0dev_7b
|
||||||
|
|
||||||
|
Upstream-commit: 6c764e123c86ccce03d4ffaee11085b9badd6765
|
||||||
|
Cherry-picked-by: Lukáš Zaoral <lzaoral@redhat.com>
|
||||||
|
|
||||||
|
---
|
||||||
|
src/LYDownload.c | 6 +++---
|
||||||
|
src/LYLocal.c | 2 +-
|
||||||
|
src/LYMainLoop.c | 2 +-
|
||||||
|
src/LYPrint.c | 16 ++++++++--------
|
||||||
|
src/LYUpload.c | 8 ++++----
|
||||||
|
5 files changed, 17 insertions(+), 17 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/src/LYDownload.c b/src/LYDownload.c
|
||||||
|
index fcaba027..afd6638e 100644
|
||||||
|
--- a/src/LYDownload.c
|
||||||
|
+++ b/src/LYDownload.c
|
||||||
|
@ -63,7 +63,7 @@ void LYDownload(char *line)
|
||||||
|
/*
|
||||||
|
* Parse out the File, sug_file, and the Method.
|
||||||
|
*/
|
||||||
|
- if ((file = strstr(Line, "/File=")) == NULL)
|
||||||
|
+ if ((file = LYstrstr(Line, "/File=")) == NULL)
|
||||||
|
goto failed;
|
||||||
|
*file = '\0';
|
||||||
|
/*
|
||||||
|
@@ -71,7 +71,7 @@ void LYDownload(char *line)
|
||||||
|
*/
|
||||||
|
file += 6;
|
||||||
|
|
||||||
|
- if ((sug_file = strstr(file + 1, "/SugFile=")) != NULL) {
|
||||||
|
+ if ((sug_file = LYstrstr(file + 1, "/SugFile=")) != NULL) {
|
||||||
|
*sug_file = '\0';
|
||||||
|
/*
|
||||||
|
* Go past "SugFile=".
|
||||||
|
@@ -113,7 +113,7 @@ void LYDownload(char *line)
|
||||||
|
#endif /* _WINDOWS */
|
||||||
|
#endif /* DIRED_SUPPORT */
|
||||||
|
|
||||||
|
- if ((method = strstr(Line, "Method=")) == NULL)
|
||||||
|
+ if ((method = LYstrstr(Line, "Method=")) == NULL)
|
||||||
|
goto failed;
|
||||||
|
/*
|
||||||
|
* Go past "Method=".
|
||||||
|
diff --git a/src/LYLocal.c b/src/LYLocal.c
|
||||||
|
index 2e14a526..bb9ba29f 100644
|
||||||
|
--- a/src/LYLocal.c
|
||||||
|
+++ b/src/LYLocal.c
|
||||||
|
@@ -1700,7 +1700,7 @@ static char *match_op(const char *prefix,
|
||||||
|
size_t len = strlen(prefix);
|
||||||
|
|
||||||
|
if (!StrNCmp("LYNXDIRED://", data, 12)
|
||||||
|
- && !StrNCmp(prefix, data + 12, len)) {
|
||||||
|
+ && !strncasecomp(prefix, data + 12, len)) {
|
||||||
|
len += 12;
|
||||||
|
#if defined(USE_DOS_DRIVES)
|
||||||
|
if (data[len] == '/') { /* this is normal */
|
||||||
|
diff --git a/src/LYMainLoop.c b/src/LYMainLoop.c
|
||||||
|
index 6f96d63c..7a3df1c4 100644
|
||||||
|
--- a/src/LYMainLoop.c
|
||||||
|
+++ b/src/LYMainLoop.c
|
||||||
|
@@ -2207,7 +2207,7 @@ static int handle_LYK_DOWNLOAD(int *cmd,
|
||||||
|
}
|
||||||
|
|
||||||
|
} else if (lynx_edit_mode && !no_dired_support &&
|
||||||
|
- !strstr(links[curdoc.link].lname, "/SugFile=")) {
|
||||||
|
+ !LYstrstr(links[curdoc.link].lname, "/SugFile=")) {
|
||||||
|
/*
|
||||||
|
* Don't bother making a /tmp copy of the local file.
|
||||||
|
*/
|
||||||
|
diff --git a/src/LYPrint.c b/src/LYPrint.c
|
||||||
|
index 6ac1f4b2..58b81a6f 100644
|
||||||
|
--- a/src/LYPrint.c
|
||||||
|
+++ b/src/LYPrint.c
|
||||||
|
@@ -1111,7 +1111,7 @@ int printfile(DocInfo *newdoc)
|
||||||
|
/*
|
||||||
|
* Get the number of lines in the file.
|
||||||
|
*/
|
||||||
|
- if ((cp = strstr(link_info, "lines=")) != NULL) {
|
||||||
|
+ if ((cp = LYstrstr(link_info, "lines=")) != NULL) {
|
||||||
|
/*
|
||||||
|
* Terminate prev string here.
|
||||||
|
*/
|
||||||
|
@@ -1127,24 +1127,24 @@ int printfile(DocInfo *newdoc)
|
||||||
|
/*
|
||||||
|
* Determine the type.
|
||||||
|
*/
|
||||||
|
- if (strstr(link_info, "LOCAL_FILE")) {
|
||||||
|
+ if (LYstrstr(link_info, "LOCAL_FILE")) {
|
||||||
|
type = TO_FILE;
|
||||||
|
- } else if (strstr(link_info, "TO_SCREEN")) {
|
||||||
|
+ } else if (LYstrstr(link_info, "TO_SCREEN")) {
|
||||||
|
type = TO_SCREEN;
|
||||||
|
- } else if (strstr(link_info, "LPANSI")) {
|
||||||
|
+ } else if (LYstrstr(link_info, "LPANSI")) {
|
||||||
|
Lpansi = TRUE;
|
||||||
|
type = TO_SCREEN;
|
||||||
|
- } else if (strstr(link_info, "MAIL_FILE")) {
|
||||||
|
+ } else if (LYstrstr(link_info, "MAIL_FILE")) {
|
||||||
|
type = MAIL;
|
||||||
|
- } else if (strstr(link_info, "PRINTER")) {
|
||||||
|
+ } else if (LYstrstr(link_info, "PRINTER")) {
|
||||||
|
type = PRINTER;
|
||||||
|
|
||||||
|
- if ((cp = strstr(link_info, "number=")) != NULL) {
|
||||||
|
+ if ((cp = LYstrstr(link_info, "number=")) != NULL) {
|
||||||
|
/* number of characters in "number=" */
|
||||||
|
cp += 7;
|
||||||
|
printer_number = atoi(cp);
|
||||||
|
}
|
||||||
|
- if ((cp = strstr(link_info, "pagelen=")) != NULL) {
|
||||||
|
+ if ((cp = LYstrstr(link_info, "pagelen=")) != NULL) {
|
||||||
|
/* number of characters in "pagelen=" */
|
||||||
|
cp += 8;
|
||||||
|
pagelen = atoi(cp);
|
||||||
|
diff --git a/src/LYUpload.c b/src/LYUpload.c
|
||||||
|
index 20cfd5a3..a83a1037 100644
|
||||||
|
--- a/src/LYUpload.c
|
||||||
|
+++ b/src/LYUpload.c
|
||||||
|
@@ -53,16 +53,16 @@ int LYUpload(char *line)
|
||||||
|
/*
|
||||||
|
* Use configured upload commands.
|
||||||
|
*/
|
||||||
|
- if ((directory = strstr(line, "TO=")) == NULL)
|
||||||
|
+ if ((directory = LYstrstr(line, "TO=")) == NULL)
|
||||||
|
goto failed;
|
||||||
|
*(directory - 1) = '\0';
|
||||||
|
- /* go past "Directory=" */
|
||||||
|
+ /* go past "TO=" */
|
||||||
|
directory += 3;
|
||||||
|
|
||||||
|
- if ((method = strstr(line, "UPLOAD=")) == NULL)
|
||||||
|
+ if ((method = LYstrstr(line, "UPLOAD=")) == NULL)
|
||||||
|
goto failed;
|
||||||
|
/*
|
||||||
|
- * Go past "Method=".
|
||||||
|
+ * Go past "UPLOAD=".
|
||||||
|
*/
|
||||||
|
method += 7;
|
||||||
|
method_number = atoi(method);
|
327
lynx-2.8.9-idn2-backport.patch
Normal file
327
lynx-2.8.9-idn2-backport.patch
Normal file
@ -0,0 +1,327 @@
|
|||||||
|
From 3af65474cbec9485534ad00d604c442e42095ee5 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Kamil Dudka <kdudka@redhat.com>
|
||||||
|
Date: Mon, 21 Mar 2022 15:57:49 +0100
|
||||||
|
Subject: [PATCH] backport IDN2 support from lynx2.9.0dev.10
|
||||||
|
|
||||||
|
---
|
||||||
|
WWW/Library/Implementation/HTParse.c | 49 +++++++++++++++++++++++++---
|
||||||
|
WWW/Library/Implementation/HTParse.h | 12 +++++++
|
||||||
|
aclocal.m4 | 31 ++++++++++++++----
|
||||||
|
config.hin | 1 +
|
||||||
|
src/LYMain.c | 3 ++
|
||||||
|
src/LYOptions.c | 33 +++++++++++++++++++
|
||||||
|
src/LYrcFile.c | 14 ++++++++
|
||||||
|
src/LYrcFile.h | 1 +
|
||||||
|
8 files changed, 132 insertions(+), 12 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/WWW/Library/Implementation/HTParse.c b/WWW/Library/Implementation/HTParse.c
|
||||||
|
index c5d947f..f1f8208 100644
|
||||||
|
--- a/WWW/Library/Implementation/HTParse.c
|
||||||
|
+++ b/WWW/Library/Implementation/HTParse.c
|
||||||
|
@@ -22,9 +22,14 @@
|
||||||
|
#endif /* __MINGW32__ */
|
||||||
|
#endif
|
||||||
|
|
||||||
|
-#ifdef USE_IDNA
|
||||||
|
+#ifdef USE_IDN2
|
||||||
|
+#include <idn2.h>
|
||||||
|
+#define FreeIdna(out) idn2_free(out)
|
||||||
|
+#elif defined(USE_IDNA)
|
||||||
|
#include <idna.h>
|
||||||
|
#include <idn-free.h>
|
||||||
|
+#define FreeIdna(out) idn_free(out)
|
||||||
|
+#define IDN2_OK IDNA_SUCCESS
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define HEX_ESCAPE '%'
|
||||||
|
@@ -242,7 +247,7 @@ char *HTParsePort(char *host, int *portp)
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
-#ifdef USE_IDNA
|
||||||
|
+#if defined(USE_IDNA) || defined(USE_IDN2)
|
||||||
|
static int hex_decode(int ch)
|
||||||
|
{
|
||||||
|
int result = -1;
|
||||||
|
@@ -299,8 +304,42 @@ static void convert_to_idna(char *host)
|
||||||
|
}
|
||||||
|
if (code) {
|
||||||
|
*dst = '\0';
|
||||||
|
+#ifdef USE_IDN2
|
||||||
|
+#if (!defined(IDN2_VERSION_NUMBER) || IDN2_VERSION_NUMBER < 0x02000003)
|
||||||
|
+ /*
|
||||||
|
+ * Older libidn2 mishandles STD3, stripping underscores.
|
||||||
|
+ */
|
||||||
|
+ if (strchr(buffer, '_') != NULL) {
|
||||||
|
+ code = -1;
|
||||||
|
+ } else
|
||||||
|
+#endif
|
||||||
|
+ switch (LYidnaMode) {
|
||||||
|
+ case LYidna2003:
|
||||||
|
+ code = idn2_to_ascii_8z(buffer, &output, IDN2_TRANSITIONAL);
|
||||||
|
+ break;
|
||||||
|
+ case LYidna2008:
|
||||||
|
+ /* IDNA2008 rules without the TR46 amendments */
|
||||||
|
+ code = idn2_to_ascii_8z(buffer, &output, 0);
|
||||||
|
+ break;
|
||||||
|
+ case LYidnaTR46:
|
||||||
|
+ code = idn2_to_ascii_8z(buffer, &output, IDN2_NONTRANSITIONAL
|
||||||
|
+ | IDN2_NFC_INPUT);
|
||||||
|
+ break;
|
||||||
|
+ case LYidnaCompat:
|
||||||
|
+ /* IDNA2008 */
|
||||||
|
+ code = idn2_to_ascii_8z(buffer, &output, IDN2_NONTRANSITIONAL
|
||||||
|
+ | IDN2_NFC_INPUT);
|
||||||
|
+ if (code == IDN2_DISALLOWED) {
|
||||||
|
+ /* IDNA2003 - compatible */
|
||||||
|
+ code = idn2_to_ascii_8z(buffer, &output, IDN2_TRANSITIONAL);
|
||||||
|
+ }
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+#else
|
||||||
|
code = idna_to_ascii_8z(buffer, &output, IDNA_USE_STD3_ASCII_RULES);
|
||||||
|
- if (code == IDNA_SUCCESS) {
|
||||||
|
+#endif
|
||||||
|
+ if (code == IDN2_OK) {
|
||||||
|
+ CTRACE((tfp, "convert_to_idna: `%s' -> `%s': OK\n", buffer, output));
|
||||||
|
strcpy(host, output);
|
||||||
|
strcat(host, params);
|
||||||
|
} else {
|
||||||
|
@@ -309,7 +348,7 @@ static void convert_to_idna(char *host)
|
||||||
|
idna_strerror((Idna_rc) code)));
|
||||||
|
}
|
||||||
|
if (output)
|
||||||
|
- idn_free(output);
|
||||||
|
+ FreeIdna(output);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
free(buffer);
|
||||||
|
@@ -539,7 +578,7 @@ char *HTParse(const char *aName,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
-#ifdef USE_IDNA
|
||||||
|
+#if defined(USE_IDNA) || defined(USE_IDN2)
|
||||||
|
/*
|
||||||
|
* Depending on locale-support, we could have a literal UTF-8
|
||||||
|
* string as a host name, or a URL-encoded form of that.
|
||||||
|
diff --git a/WWW/Library/Implementation/HTParse.h b/WWW/Library/Implementation/HTParse.h
|
||||||
|
index ce1bff6..5496d82 100644
|
||||||
|
--- a/WWW/Library/Implementation/HTParse.h
|
||||||
|
+++ b/WWW/Library/Implementation/HTParse.h
|
||||||
|
@@ -49,6 +49,18 @@ extern "C" {
|
||||||
|
#define URL_XALPHAS UCH(1)
|
||||||
|
#define URL_XPALPHAS UCH(2)
|
||||||
|
#define URL_PATH UCH(4)
|
||||||
|
+
|
||||||
|
+#ifdef USE_IDN2
|
||||||
|
+ typedef enum {
|
||||||
|
+ LYidna2003 = 1,
|
||||||
|
+ LYidna2008,
|
||||||
|
+ LYidnaTR46,
|
||||||
|
+ LYidnaCompat
|
||||||
|
+ } HTIdnaModes;
|
||||||
|
+
|
||||||
|
+ extern int LYidnaMode;
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
/* Strip white space off a string. HTStrip()
|
||||||
|
* -------------------------------
|
||||||
|
*
|
||||||
|
diff --git a/aclocal.m4 b/aclocal.m4
|
||||||
|
index 41139f9..4c68aec 100644
|
||||||
|
--- a/aclocal.m4
|
||||||
|
+++ b/aclocal.m4
|
||||||
|
@@ -3341,11 +3341,12 @@ test -d "$oldincludedir" && {
|
||||||
|
$1="[$]$1 $cf_header_path_list"
|
||||||
|
])dnl
|
||||||
|
dnl ---------------------------------------------------------------------------
|
||||||
|
-dnl CF_HELP_MESSAGE version: 3 updated: 1998/01/14 10:56:23
|
||||||
|
+dnl CF_HELP_MESSAGE version: 4 updated: 2019/12/31 08:53:54
|
||||||
|
dnl ---------------
|
||||||
|
dnl Insert text into the help-message, for readability, from AC_ARG_WITH.
|
||||||
|
AC_DEFUN([CF_HELP_MESSAGE],
|
||||||
|
-[AC_DIVERT_HELP([$1])dnl
|
||||||
|
+[CF_ACVERSION_CHECK(2.53,[],[
|
||||||
|
+AC_DIVERT_HELP($1)])dnl
|
||||||
|
])dnl
|
||||||
|
dnl ---------------------------------------------------------------------------
|
||||||
|
dnl CF_INET_ADDR version: 7 updated: 2013/10/08 17:47:05
|
||||||
|
@@ -6600,25 +6601,41 @@ if test "$with_dmalloc" = yes ; then
|
||||||
|
fi
|
||||||
|
])dnl
|
||||||
|
dnl ---------------------------------------------------------------------------
|
||||||
|
-dnl CF_WITH_IDNA version: 10 updated: 2015/04/15 19:08:48
|
||||||
|
+dnl CF_WITH_IDNA version: 11 updated: 2021/07/05 09:09:42
|
||||||
|
dnl ------------
|
||||||
|
-dnl Check for libidn, use it if found.
|
||||||
|
+dnl Check for libidn2, use it if found. Otherwise, check for libidn, use that.
|
||||||
|
dnl
|
||||||
|
dnl $1 = optional path for headers/library
|
||||||
|
AC_DEFUN([CF_WITH_IDNA],[
|
||||||
|
- CF_ADD_OPTIONAL_PATH($1)
|
||||||
|
+CF_ADD_OPTIONAL_PATH($1)
|
||||||
|
|
||||||
|
- CF_FIND_LINKAGE([
|
||||||
|
+CF_FIND_LINKAGE([
|
||||||
|
+#include <stdio.h>
|
||||||
|
+#include <idn2.h>
|
||||||
|
+],[
|
||||||
|
+ char *output = 0;
|
||||||
|
+ int code = idn2_to_ascii_8z("name", &output, IDN2_USE_STD3_ASCII_RULES);
|
||||||
|
+ (void) code;
|
||||||
|
+],idn2,,[CF_VERBOSE([unsuccessful, will try idn (older)])],,[$LIBICONV])
|
||||||
|
+
|
||||||
|
+if test "x$cf_cv_find_linkage_idn2" = xyes ; then
|
||||||
|
+ CF_VERBOSE(found idn2 library)
|
||||||
|
+ AC_DEFINE(USE_IDN2,1,[Define to 1 if we should use IDN2 library])
|
||||||
|
+else
|
||||||
|
+ CF_FIND_LINKAGE([
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <idna.h>
|
||||||
|
],[
|
||||||
|
char *output = 0;
|
||||||
|
- int code = idna_to_ascii_8z("name", &output, IDNA_USE_STD3_ASCII_RULES);
|
||||||
|
+ int code = idna_to_ascii_8z("name", &output, IDNA_USE_STD3_ASCII_RULES);
|
||||||
|
+ (void) code;
|
||||||
|
],idn,,,,[$LIBICONV])
|
||||||
|
|
||||||
|
if test "x$cf_cv_find_linkage_idn" = xyes ; then
|
||||||
|
+ CF_VERBOSE(found idn library)
|
||||||
|
AC_DEFINE(USE_IDNA,1,[Define to 1 if we should use IDNA library])
|
||||||
|
fi
|
||||||
|
+fi
|
||||||
|
])dnl
|
||||||
|
dnl ---------------------------------------------------------------------------
|
||||||
|
dnl CF_WITH_PATH version: 11 updated: 2012/09/29 15:04:19
|
||||||
|
diff --git a/config.hin b/config.hin
|
||||||
|
index fb0e4d3..58051bb 100644
|
||||||
|
--- a/config.hin
|
||||||
|
+++ b/config.hin
|
||||||
|
@@ -271,6 +271,7 @@
|
||||||
|
#undef USE_FILE_UPLOAD /* CF_ARG_DISABLE(file-upload) */
|
||||||
|
#undef USE_GNUTLS_FUNCS /* CF_GNUTLS */
|
||||||
|
#undef USE_GNUTLS_INCL /* CF_GNUTLS */
|
||||||
|
+#undef USE_IDN2 /* CF_ARG_DISABLE(idna) */
|
||||||
|
#undef USE_IDNA /* CF_ARG_DISABLE(idna) */
|
||||||
|
#undef USE_JUSTIFY_ELTS /* CF_ARG_DISABLE(justify-elts) */
|
||||||
|
#undef USE_LOCALE_CHARSET /* CF_ARG_DISABLE(locale-charset) */
|
||||||
|
diff --git a/src/LYMain.c b/src/LYMain.c
|
||||||
|
index d36707e..ecfbe69 100644
|
||||||
|
--- a/src/LYMain.c
|
||||||
|
+++ b/src/LYMain.c
|
||||||
|
@@ -178,6 +178,9 @@ lynx_list_item_type *externals = NULL;
|
||||||
|
|
||||||
|
/* linked list of external options */
|
||||||
|
#endif
|
||||||
|
+#ifdef USE_IDN2
|
||||||
|
+int LYidnaMode = LYidnaTR46;
|
||||||
|
+#endif
|
||||||
|
|
||||||
|
lynx_list_item_type *uploaders = NULL;
|
||||||
|
int LYShowColor = SHOW_COLOR_UNKNOWN; /* to show or not */
|
||||||
|
diff --git a/src/LYOptions.c b/src/LYOptions.c
|
||||||
|
index 6b4b0e0..e0e4732 100644
|
||||||
|
--- a/src/LYOptions.c
|
||||||
|
+++ b/src/LYOptions.c
|
||||||
|
@@ -2356,6 +2356,18 @@ static const char *assume_char_set_string = RC_ASSUME_CHARSET;
|
||||||
|
static const char *display_char_set_string = RC_CHARACTER_SET;
|
||||||
|
static const char *raw_mode_string = RC_RAW_MODE;
|
||||||
|
|
||||||
|
+#ifdef USE_IDN2
|
||||||
|
+static const char *idna_mode_string = RC_IDNA_MODE;
|
||||||
|
+static OptValues idna_values[] =
|
||||||
|
+{
|
||||||
|
+ {LYidna2003, N_("IDNA 2003"), "idna2003"},
|
||||||
|
+ {LYidna2008, N_("IDNA 2008"), "idna2008"},
|
||||||
|
+ {LYidnaTR46, N_("IDNA TR46"), "idnaTR46"},
|
||||||
|
+ {LYidnaCompat, N_("IDNA Compatible"), "idnaCompat"},
|
||||||
|
+ END_OPTIONS
|
||||||
|
+};
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
#ifdef USE_LOCALE_CHARSET
|
||||||
|
static const char *locale_charset_string = RC_LOCALE_CHARSET;
|
||||||
|
#endif
|
||||||
|
@@ -3233,6 +3245,13 @@ int postoptions(DocInfo *newdoc)
|
||||||
|
current_char_set = newval;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
+#ifdef USE_IDN2
|
||||||
|
+ /* Internationalized Domain Names: SELECT */
|
||||||
|
+ if (!strcmp(data[i].tag, idna_mode_string)
|
||||||
|
+ && GetOptValues(idna_values, data[i].value, &code)) {
|
||||||
|
+ LYidnaMode = code;
|
||||||
|
+ }
|
||||||
|
+#endif
|
||||||
|
|
||||||
|
/* Raw Mode: ON/OFF */
|
||||||
|
if (!strcmp(data[i].tag, raw_mode_string)
|
||||||
|
@@ -3933,6 +3952,20 @@ static int gen_options(char **newfile)
|
||||||
|
EndSelect(fp0);
|
||||||
|
}
|
||||||
|
|
||||||
|
+#ifdef USE_IDN2
|
||||||
|
+ /* Internationalized Domain Names: SELECT */
|
||||||
|
+ {
|
||||||
|
+ PutLabel(fp0, gettext("Internationalized domain names"), idna_mode_string);
|
||||||
|
+ BeginSelect(fp0, idna_mode_string);
|
||||||
|
+ for (i = 0; idna_values[i].value != 0; i++) {
|
||||||
|
+ PutOption(fp0, idna_values[i].value == LYidnaMode,
|
||||||
|
+ idna_values[i].HtmlName,
|
||||||
|
+ idna_values[i].LongName);
|
||||||
|
+ }
|
||||||
|
+ EndSelect(fp0);
|
||||||
|
+ }
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
/* Raw Mode: ON/OFF */
|
||||||
|
if (LYHaveCJKCharacterSet) {
|
||||||
|
/*
|
||||||
|
diff --git a/src/LYrcFile.c b/src/LYrcFile.c
|
||||||
|
index 1754f12..b98bfc2 100644
|
||||||
|
--- a/src/LYrcFile.c
|
||||||
|
+++ b/src/LYrcFile.c
|
||||||
|
@@ -71,6 +71,16 @@ static Config_Enum tbl_file_sort[] = {
|
||||||
|
{ NULL, -1 },
|
||||||
|
};
|
||||||
|
|
||||||
|
+#ifdef USE_IDN2
|
||||||
|
+static Config_Enum tbl_idna_mode[] = {
|
||||||
|
+ { "IDNA2003", LYidna2003 },
|
||||||
|
+ { "IDNA2008", LYidna2008 },
|
||||||
|
+ { "TR46", LYidnaTR46 },
|
||||||
|
+ { "Compatible", LYidnaCompat },
|
||||||
|
+ { NULL, -1 },
|
||||||
|
+};
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
Config_Enum tbl_keypad_mode[] = {
|
||||||
|
{ "FIELDS_ARE_NUMBERED", FIELDS_ARE_NUMBERED },
|
||||||
|
{ "LINKS_AND_FIELDS_ARE_NUMBERED", LINKS_AND_FIELDS_ARE_NUMBERED },
|
||||||
|
@@ -462,6 +472,10 @@ file lists such as FTP directories. The options are:\n\
|
||||||
|
MAYBE_SET(RC_HTML5_CHARSETS, html5_charsets, MSG_ENABLE_LYNXRC),
|
||||||
|
MAYBE_FUN(RC_HTTP_PROTOCOL, get_http_protocol, put_http_protocol,
|
||||||
|
MSG_ENABLE_LYNXRC),
|
||||||
|
+#ifdef USE_IDN2
|
||||||
|
+ MAYBE_ENU(RC_IDNA_MODE, LYidnaMode, tbl_idna_mode,
|
||||||
|
+ MSG_ENABLE_LYNXRC),
|
||||||
|
+#endif
|
||||||
|
#ifdef EXP_KEYBOARD_LAYOUT
|
||||||
|
PARSE_ARY(RC_KBLAYOUT, current_layout, LYKbLayoutNames, NULL),
|
||||||
|
#endif
|
||||||
|
diff --git a/src/LYrcFile.h b/src/LYrcFile.h
|
||||||
|
index 3cf07c0..cd41a0f 100644
|
||||||
|
--- a/src/LYrcFile.h
|
||||||
|
+++ b/src/LYrcFile.h
|
||||||
|
@@ -110,6 +110,7 @@
|
||||||
|
#define RC_HTTPS_PROXY "https_proxy"
|
||||||
|
#define RC_HTTP_PROTOCOL "http_protocol"
|
||||||
|
#define RC_HTTP_PROXY "http_proxy"
|
||||||
|
+#define RC_IDNA_MODE "idna_mode"
|
||||||
|
#define RC_INCLUDE "include"
|
||||||
|
#define RC_INFLATE_PATH "inflate_path"
|
||||||
|
#define RC_INFOSECS "infosecs"
|
||||||
|
--
|
||||||
|
2.34.1
|
||||||
|
|
@ -3,9 +3,8 @@
|
|||||||
Summary: A text-based Web browser
|
Summary: A text-based Web browser
|
||||||
Name: lynx
|
Name: lynx
|
||||||
Version: 2.8.9
|
Version: 2.8.9
|
||||||
Release: 4%{?dist}
|
Release: 20%{?dist}
|
||||||
License: GPLv2
|
License: GPLv2
|
||||||
Group: Applications/Internet
|
|
||||||
Source: https://invisible-mirror.net/archives/lynx/tarballs/lynx%{version}rel.%{devrel}.tar.bz2
|
Source: https://invisible-mirror.net/archives/lynx/tarballs/lynx%{version}rel.%{devrel}.tar.bz2
|
||||||
URL: http://lynx.browser.org/
|
URL: http://lynx.browser.org/
|
||||||
|
|
||||||
@ -24,27 +23,36 @@ Patch2: lynx-CVE-2008-4690.patch
|
|||||||
# avoid build failure caused by mistakenly excluded <locale.h>
|
# avoid build failure caused by mistakenly excluded <locale.h>
|
||||||
Patch3: lynx-2.8.8-locale.patch
|
Patch3: lynx-2.8.8-locale.patch
|
||||||
|
|
||||||
# fix bugs detected by static analysis (#1602612)
|
# fix bugs detected by static analysis
|
||||||
Patch4: lynx-2.8.9-static-analysis.patch
|
Patch4: lynx-2.8.9-static-analysis.patch
|
||||||
|
|
||||||
# fix disclosure of HTTP auth credentials via SNI data (CVE-2021-38165)
|
# fix disclosure of HTTP auth credentials via SNI data (CVE-2021-38165)
|
||||||
Patch5: lynx-2.8.9-CVE-2021-38165.patch
|
Patch5: lynx-2.8.9-CVE-2021-38165.patch
|
||||||
|
|
||||||
|
# backport IDN2 support from upstream (#2040124)
|
||||||
|
Patch6: lynx-2.8.9-idn2-backport.patch
|
||||||
|
|
||||||
|
# backport fix for page downloading from upstream (RHEL-16809)
|
||||||
|
# upstream commit: https://github.com/ThomasDickey/lynx-snapshots/commit/6c764e123c86ccce03d4ffaee11085b9badd6765
|
||||||
|
Patch7: lynx-2.8.9-fix-page-download.patch
|
||||||
|
|
||||||
Provides: webclient
|
Provides: webclient
|
||||||
Provides: text-www-browser
|
Provides: text-www-browser
|
||||||
|
BuildRequires: autoconf
|
||||||
BuildRequires: dos2unix
|
BuildRequires: dos2unix
|
||||||
BuildRequires: gcc
|
BuildRequires: gcc
|
||||||
BuildRequires: gettext
|
BuildRequires: gettext
|
||||||
BuildRequires: openssl-devel
|
BuildRequires: libidn2-devel
|
||||||
|
BuildRequires: make
|
||||||
BuildRequires: ncurses-devel
|
BuildRequires: ncurses-devel
|
||||||
BuildRequires: slang-devel
|
BuildRequires: openssl-devel
|
||||||
BuildRequires: telnet
|
BuildRequires: telnet
|
||||||
BuildRequires: unzip
|
BuildRequires: unzip
|
||||||
BuildRequires: zip
|
BuildRequires: zip
|
||||||
BuildRequires: zlib-devel
|
BuildRequires: zlib-devel
|
||||||
|
|
||||||
# provides /usr/share/doc/HTML/en-US/index.html used as STARTFILE on RHEL
|
# provides /usr/share/doc/HTML/en-US/index.html used as STARTFILE on RHEL
|
||||||
%if 0%{?rhel}
|
%if 0%{?rhel} && !0%{?eln}
|
||||||
Requires: redhat-indexhtml
|
Requires: redhat-indexhtml
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
@ -63,10 +71,14 @@ exits quickly and swiftly displays web pages.
|
|||||||
%patch3 -p1
|
%patch3 -p1
|
||||||
%patch4 -p1
|
%patch4 -p1
|
||||||
%patch5 -p1
|
%patch5 -p1
|
||||||
|
%patch6 -p1
|
||||||
|
%patch7 -p1
|
||||||
|
|
||||||
|
autoconf
|
||||||
|
|
||||||
%{!?_pkgdocdir: %global _pkgdocdir %{_docdir}/%{name}-%{version}}
|
%{!?_pkgdocdir: %global _pkgdocdir %{_docdir}/%{name}-%{version}}
|
||||||
sed -e "s,^HELPFILE:.*,HELPFILE:file://localhost%{_pkgdocdir}/lynx_help/lynx_help_main.html,g" -i lynx.cfg
|
sed -e "s,^HELPFILE:.*,HELPFILE:file://localhost%{_pkgdocdir}/lynx_help/lynx_help_main.html,g" -i lynx.cfg
|
||||||
%if 0%{?rhel}
|
%if 0%{?rhel} && !0%{?eln}
|
||||||
sed -e 's,^STARTFILE:.*,STARTFILE:file:/usr/share/doc/HTML/en-US/index.html,' -i lynx.cfg
|
sed -e 's,^STARTFILE:.*,STARTFILE:file:/usr/share/doc/HTML/en-US/index.html,' -i lynx.cfg
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
@ -130,6 +142,7 @@ EOF
|
|||||||
%find_lang %{name}
|
%find_lang %{name}
|
||||||
|
|
||||||
%files -f %{name}.lang
|
%files -f %{name}.lang
|
||||||
|
%license COPYING
|
||||||
%doc docs README INSTALLATION samples
|
%doc docs README INSTALLATION samples
|
||||||
%doc test lynx.hlp lynx_help
|
%doc test lynx.hlp lynx_help
|
||||||
%{_bindir}/lynx
|
%{_bindir}/lynx
|
||||||
@ -139,11 +152,55 @@ EOF
|
|||||||
%config(noreplace,missingok) %{_sysconfdir}/lynx-site.cfg
|
%config(noreplace,missingok) %{_sysconfdir}/lynx-site.cfg
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
* Tue Aug 31 2021 Kamil Dudka <kdudka@redhat.com> - 2.8.9-4
|
* Tue Nov 21 2023 Lukáš Zaoral <lzaoral@redhat.com> - 2.8.9-20
|
||||||
|
- fix page downloading (RHEL-16809)
|
||||||
|
|
||||||
|
* Mon Mar 21 2022 Kamil Dudka <kdudka@redhat.com> - 2.8.9-19
|
||||||
|
- backport IDN2 support from upstream (#2040124)
|
||||||
|
|
||||||
|
* Tue Aug 31 2021 Kamil Dudka <kdudka@redhat.com> - 2.8.9-18
|
||||||
- fix disclosure of HTTP auth credentials via SNI data (CVE-2021-38165)
|
- fix disclosure of HTTP auth credentials via SNI data (CVE-2021-38165)
|
||||||
|
|
||||||
* Thu Nov 08 2018 Kamil Dudka <kdudka@redhat.com> - 2.8.9-2
|
* Mon Aug 09 2021 Mohan Boddu <mboddu@redhat.com> - 2.8.9-14.1
|
||||||
- fix bugs detected by static analysis (#1602612)
|
- Rebuilt for IMA sigs, glibc 2.34, aarch64 flags
|
||||||
|
Related: rhbz#1991688
|
||||||
|
|
||||||
|
* Wed Jun 16 2021 Mohan Boddu <mboddu@redhat.com> - 2.8.9-13.1
|
||||||
|
- Rebuilt for RHEL 9 BETA for openssl 3.0
|
||||||
|
Related: rhbz#1971065
|
||||||
|
|
||||||
|
* Fri Apr 16 2021 Mohan Boddu <mboddu@redhat.com> - 2.8.9-12.1
|
||||||
|
- Rebuilt for RHEL 9 BETA on Apr 15th 2021. Related: rhbz#1947937
|
||||||
|
|
||||||
|
* Tue Jan 26 2021 Fedora Release Engineering <releng@fedoraproject.org> - 2.8.9-11
|
||||||
|
- Rebuilt for https://fedoraproject.org/wiki/Fedora_34_Mass_Rebuild
|
||||||
|
|
||||||
|
* Wed Dec 30 2020 Kamil Dudka <kdudka@redhat.com> - 2.8.9-10
|
||||||
|
- remove unused build-time dependency on slang-devel (#1910966)
|
||||||
|
|
||||||
|
* Thu Aug 06 2020 Merlin Mathesius <mmathesi@redhat.com> - 2.8.9-9
|
||||||
|
- Skip RHEL-specific Requires and STARTFILE edit when building for ELN
|
||||||
|
|
||||||
|
* Tue Jul 28 2020 Fedora Release Engineering <releng@fedoraproject.org> - 2.8.9-8
|
||||||
|
- Rebuilt for https://fedoraproject.org/wiki/Fedora_33_Mass_Rebuild
|
||||||
|
|
||||||
|
* Wed Jan 29 2020 Fedora Release Engineering <releng@fedoraproject.org> - 2.8.9-7
|
||||||
|
- Rebuilt for https://fedoraproject.org/wiki/Fedora_32_Mass_Rebuild
|
||||||
|
|
||||||
|
* Thu Jul 25 2019 Fedora Release Engineering <releng@fedoraproject.org> - 2.8.9-6
|
||||||
|
- Rebuilt for https://fedoraproject.org/wiki/Fedora_31_Mass_Rebuild
|
||||||
|
|
||||||
|
* Mon Mar 11 2019 Kamil Dudka <kdudka@redhat.com> - 2.8.9-5
|
||||||
|
- include license file in the package (#1686886)
|
||||||
|
|
||||||
|
* Fri Feb 01 2019 Fedora Release Engineering <releng@fedoraproject.org> - 2.8.9-4
|
||||||
|
- Rebuilt for https://fedoraproject.org/wiki/Fedora_30_Mass_Rebuild
|
||||||
|
|
||||||
|
* Thu Nov 08 2018 Kamil Dudka <kdudka@redhat.com> - 2.8.9-3
|
||||||
|
- fix bugs detected by static analysis
|
||||||
|
|
||||||
|
* Fri Jul 13 2018 Fedora Release Engineering <releng@fedoraproject.org> - 2.8.9-2
|
||||||
|
- Rebuilt for https://fedoraproject.org/wiki/Fedora_29_Mass_Rebuild
|
||||||
|
|
||||||
* Tue Jul 10 2018 Kamil Dudka <kdudka@redhat.com> - 2.8.9-1
|
* Tue Jul 10 2018 Kamil Dudka <kdudka@redhat.com> - 2.8.9-1
|
||||||
- update to the latest upstream release
|
- update to the latest upstream release
|
8
lynx2.8.9rel.1.tar.bz2.asc
Normal file
8
lynx2.8.9rel.1.tar.bz2.asc
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
-----BEGIN PGP SIGNATURE-----
|
||||||
|
Version: GnuPG v1
|
||||||
|
Comment: See https://invisible-island.net/public/public.html for info
|
||||||
|
|
||||||
|
iEYEABECAAYFAltCkmAACgkQcCNT4PfkjtsdgwCgxEb9vOFqxH7bdNJLT87wfV0d
|
||||||
|
vnYAn0Z22RAmDDEyyQ6bZAZp2+ft+5xt
|
||||||
|
=kxww
|
||||||
|
-----END PGP SIGNATURE-----
|
1
sources
Normal file
1
sources
Normal file
@ -0,0 +1 @@
|
|||||||
|
SHA512 (lynx2.8.9rel.1.tar.bz2) = 61edbe082684fcbd91bdbf4f4d27c3baf92358811aaffc2f8af46adf23ca7b48aede1520fc5f2a8fc974a2f4bbf4e57e7e6027a187bfc6101e56878c98178e6d
|
1
tests/lynx-dump/.gitignore
vendored
Normal file
1
tests/lynx-dump/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
/psl-output.txt
|
1
tests/lynx-dump/data/.gitignore
vendored
Normal file
1
tests/lynx-dump/data/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
/*.out
|
171
tests/lynx-dump/data/ALT88592.html
Normal file
171
tests/lynx-dump/data/ALT88592.html
Normal file
@ -0,0 +1,171 @@
|
|||||||
|
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
||||||
|
<HTML>
|
||||||
|
<HEAD>
|
||||||
|
<TITLE>Character table modified and enhanced for iso8859-2 - ALT test</TITLE>
|
||||||
|
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-2">
|
||||||
|
<LINK REV="made" HREF="mailto:lynx-dev@nongnu.org">
|
||||||
|
<LINK REV="owner" HREF="http://mail.gnu.org/mailman/listinfo/lynx-dev/">
|
||||||
|
<!-- A BASE tag for the SRC attributes of dummy images.
|
||||||
|
They should be inaccessible so that the ALT text will be shown in graphical browsers.
|
||||||
|
Use file: to save network resources. -->
|
||||||
|
<BASE HREF="file://localhost/this.path.intentionally.invalid/">
|
||||||
|
</HEAD>
|
||||||
|
|
||||||
|
<BODY>
|
||||||
|
|
||||||
|
<H1 ALIGN=center>iso8859-2 plus table - ALT test</H1>
|
||||||
|
|
||||||
|
<PRE>
|
||||||
|
Description Code Entity name
|
||||||
|
=================================== ============ ==============
|
||||||
|
quotation mark <IMG SRC=X ALT=" &#34; --> " &quot; --> "">
|
||||||
|
ampersand <IMG SRC=X ALT=" &#38; --> & &amp; --> &">
|
||||||
|
less-than sign <IMG SRC=X ALT=" &#60; --> < &lt; --> <">
|
||||||
|
greater-than sign <IMG SRC=X ALT=" &#62; --> > &gt; --> >">
|
||||||
|
|
||||||
|
Description Char Code Entity name
|
||||||
|
=================================== ==== ============ ==============
|
||||||
|
non-breaking space <IMG SRC=X ALT=" &#160; -->   &nbsp; --> ">
|
||||||
|
capital A, ogonek <IMG SRC=X ALT=" ¡ &#260; --> Ą &Aogon; --> Ą">
|
||||||
|
breve <IMG SRC=X ALT=" {¢} {&#728;}-->{˘} {&breve;} -->{˘}">
|
||||||
|
capital L, stroke <IMG SRC=X ALT=" £ &#321; --> Ł &Lstrok; --> Ł">
|
||||||
|
general currency sign <IMG SRC=X ALT=" ¤ &#164; --> ¤ &curren; --> ¤">
|
||||||
|
capital L, caron <IMG SRC=X ALT=" ¥ &#317; --> Ľ &Lcaron; --> Ľ">
|
||||||
|
capital S, acute accent <IMG SRC=X ALT=" ¦ &#346; --> Ś &Sacute; --> Ś">
|
||||||
|
section sign <IMG SRC=X ALT=" § &#167; --> § &sect; --> §">
|
||||||
|
umlaut (dieresis) <IMG SRC=X ALT=" ¨ &#168; --> ¨ &uml; --> ¨">
|
||||||
|
<IMG SRC=X ALT=" &die; --> ¨">
|
||||||
|
capital S, caron <IMG SRC=X ALT=" © &#352; --> Š &Scaron; --> Š">
|
||||||
|
capital S, cedilla <IMG SRC=X ALT=" ª &#350; --> Ş &Scedil; --> Ş">
|
||||||
|
capital T, caron <IMG SRC=X ALT=" « &#356; --> Ť &Tcaron; --> Ť">
|
||||||
|
capital Z, acute accent <IMG SRC=X ALT=" ¬ &#377; --> Ź &Zacute; --> Ź">
|
||||||
|
soft hyphen <IMG SRC=X ALT=" [] [&#173;]-->[­] [&shy;] -->[­]">
|
||||||
|
capital Z, caron <IMG SRC=X ALT=" ® &#381; --> Ž &Zcaron; --> Ž">
|
||||||
|
capital Z, dot above <IMG SRC=X ALT=" ¯ &#379; --> Ż &Zdot; --> Ż">
|
||||||
|
degree sign <IMG SRC=X ALT=" ° &#176; --> ° &deg; --> °">
|
||||||
|
small a, ogonek <IMG SRC=X ALT=" ± &#261; --> ą &aogon; --> ą">
|
||||||
|
ogonek <IMG SRC=X ALT=" {²} {&#731;}-->{˛} {&ogon;} -->{˛}">
|
||||||
|
small l, stroke <IMG SRC=X ALT=" ³ &#322; --> ł &lstrok; --> ł">
|
||||||
|
acute accent <IMG SRC=X ALT=" ´ &#180; --> ´ &acute; --> ´">
|
||||||
|
small l, caron <IMG SRC=X ALT=" µ &#318; --> ľ &lcaron; --> ľ">
|
||||||
|
small s, acute accent <IMG SRC=X ALT=" ¶ &#347; --> ś &sacute; --> ś">
|
||||||
|
caron <IMG SRC=X ALT=" {·} {&#711;}-->{ˇ} {&caron;} -->{ˇ}">
|
||||||
|
cedilla <IMG SRC=X ALT=" ¸ &#184; --> ¸ &cedil; --> ¸">
|
||||||
|
small s, caron <IMG SRC=X ALT=" ¹ &#353; --> š &scaron; --> š">
|
||||||
|
small s, cedilla <IMG SRC=X ALT=" º &#351; --> ş &scedil; --> ş">
|
||||||
|
small t, caron <IMG SRC=X ALT=" » &#357; --> ť &tcaron; --> ť">
|
||||||
|
small z, acute accent <IMG SRC=X ALT=" ¼ &#378; --> ź &zacute; --> ź">
|
||||||
|
double acute accent <IMG SRC=X ALT=" {½} {&#733;}-->{˝} {&dblac;} -->{˝}">
|
||||||
|
small z, caron <IMG SRC=X ALT=" ¾ &#382; --> ž &zcaron; --> ž">
|
||||||
|
small z, dot above <IMG SRC=X ALT=" ¿ &#380; --> ż &zdot; --> ż ">
|
||||||
|
capital R, acute accent <IMG SRC=X ALT=" À &#340; --> Ŕ &Racute; --> Ŕ">
|
||||||
|
capital A, acute accent <IMG SRC=X ALT=" Á &#193; --> Á &Aacute; --> Á">
|
||||||
|
capital A, circumflex accent <IMG SRC=X ALT=" Â &#194; --> Â &Acirc; --> Â">
|
||||||
|
capital A, breve <IMG SRC=X ALT=" Ã &#258; --> Ă &Abreve; --> Ă">
|
||||||
|
capital A, dieresis or umlaut mark <IMG SRC=X ALT=" Ä &#196; --> Ä &Auml; --> Ä">
|
||||||
|
capital L, acute accent <IMG SRC=X ALT=" Å &#313; --> Ĺ &Lacute; --> Ĺ">
|
||||||
|
capital C, acute accent <IMG SRC=X ALT=" Æ &#262; --> Ć &Cacute; --> Ć">
|
||||||
|
capital C, cedilla <IMG SRC=X ALT=" Ç &#199; --> Ç &Ccedil; --> Ç">
|
||||||
|
capital C, caron <IMG SRC=X ALT=" È &#268; --> Č &Ccaron; --> Č">
|
||||||
|
capital E, acute accent <IMG SRC=X ALT=" É &#201; --> É &Eacute; --> É">
|
||||||
|
capital E, ogonek <IMG SRC=X ALT=" Ê &#280; --> Ę &Eogon; --> Ę">
|
||||||
|
capital E, dieresis or umlaut mark <IMG SRC=X ALT=" Ë &#203; --> Ë &Euml; --> Ë">
|
||||||
|
capital E, caron <IMG SRC=X ALT=" Ì &#282; --> Ě &Ecaron; --> Ě">
|
||||||
|
capital I, acute accent <IMG SRC=X ALT=" Í &#205; --> Í &Iacute; --> Í">
|
||||||
|
capital I, circumflex accent <IMG SRC=X ALT=" Î &#206; --> Î &Icirc; --> Î">
|
||||||
|
capital D, caron <IMG SRC=X ALT=" Ï &#270; --> Ď &Dcaron; --> Ď">
|
||||||
|
capital D, stroke <IMG SRC=X ALT=" Ð &#272; --> Đ &Dstrok; --> Đ">
|
||||||
|
capital Eth, Icelandic <IMG SRC=X ALT=" N/A &#208; --> Ð &ETH; --> Ð">
|
||||||
|
capital N, acute accent <IMG SRC=X ALT=" Ñ &#323; --> Ń &Nacute; --> Ń">
|
||||||
|
capital N, caron <IMG SRC=X ALT=" Ò &#327; --> Ň &Ncaron; --> Ň">
|
||||||
|
capital O, acute accent <IMG SRC=X ALT=" Ó &#211; --> Ó &Oacute; --> Ó">
|
||||||
|
capital O, circumflex accent <IMG SRC=X ALT=" Ô &#212; --> Ô &Ocirc; --> Ô">
|
||||||
|
capital O, double acute accent <IMG SRC=X ALT=" Õ &#368; --> Ű &Odblac; --> Ő">
|
||||||
|
capital O, dieresis or umlaut mark <IMG SRC=X ALT=" Ö &#214; --> Ö &Ouml; --> Ö">
|
||||||
|
multiply sign <IMG SRC=X ALT=" × &#215; --> × &times; --> ×">
|
||||||
|
capital R, caron <IMG SRC=X ALT=" Ø &#344; --> Ř &Rcaron; --> Ř">
|
||||||
|
capital U, ring <IMG SRC=X ALT=" Ù &#366; --> Ů &Uring; --> Ů">
|
||||||
|
capital U, acute accent <IMG SRC=X ALT=" Ú &#218; --> Ú &Uacute; --> Ú">
|
||||||
|
capital U, double acute accent <IMG SRC=X ALT=" Û &#368; --> Ű &Udblac; --> Ű">
|
||||||
|
capital U, dieresis or umlaut mark <IMG SRC=X ALT=" Ü &#220; --> Ü &Uuml; --> Ü">
|
||||||
|
capital Y, acute accent <IMG SRC=X ALT=" Ý &#221; --> Ý &Yacute; --> Ý">
|
||||||
|
capital T, cedilla <IMG SRC=X ALT=" Þ &#354; --> Ţ &Tcedil; --> Ţ">
|
||||||
|
small sharp s, German (sz ligature) <IMG SRC=X ALT=" ß &#223; --> ß &szlig; --> ß">
|
||||||
|
small r, acute accent <IMG SRC=X ALT=" à &#341; --> ŕ &racute; --> ŕ">
|
||||||
|
small a, acute accent <IMG SRC=X ALT=" á &#225; --> á &aacute; --> á">
|
||||||
|
small a, circumflex accent <IMG SRC=X ALT=" â &#226; --> â &acirc; --> â">
|
||||||
|
small a, breve <IMG SRC=X ALT=" ã &#259; --> ă &abreve; --> ă">
|
||||||
|
small a, dieresis or umlaut mark <IMG SRC=X ALT=" ä &#228; --> ä &auml; --> ä">
|
||||||
|
small l, acute accent <IMG SRC=X ALT=" å &#314; --> ĺ &lacute; --> ĺ">
|
||||||
|
small c, acute accent <IMG SRC=X ALT=" æ &#263; --> ć &cacute; --> ć">
|
||||||
|
small c, cedilla <IMG SRC=X ALT=" ç &#231; --> ç &ccedil; --> ç">
|
||||||
|
small c, caron <IMG SRC=X ALT=" è &#269; --> č &ccaron; --> č">
|
||||||
|
small e, acute accent <IMG SRC=X ALT=" é &#233; --> é &eacute; --> é">
|
||||||
|
small e, ogonek <IMG SRC=X ALT=" ê &#281; --> ę &eogon; --> ę">
|
||||||
|
small e, dieresis or umlaut mark <IMG SRC=X ALT=" ë &#235; --> ë &euml; --> ë">
|
||||||
|
small e, caron <IMG SRC=X ALT=" ì &#283; --> ě &ecaron; --> ě">
|
||||||
|
small i, acute accent <IMG SRC=X ALT=" í &#237; --> í &iacute; --> í">
|
||||||
|
small i, circumflex accent <IMG SRC=X ALT=" î &#238; --> î &icirc; --> î">
|
||||||
|
small d, caron <IMG SRC=X ALT=" ï &#271; --> ď &dcaron; --> ď">
|
||||||
|
small d, stroke <IMG SRC=X ALT=" ð &#273; --> đ &dstrok; --> đ">
|
||||||
|
small eth, Icelandic <IMG SRC=X ALT=" N/A &#240; --> ð &eth; --> ð">
|
||||||
|
small n, acute accent <IMG SRC=X ALT=" ñ &#324; --> ń &nacute; --> ń">
|
||||||
|
small n, caron <IMG SRC=X ALT=" ò &#328; --> ň &ncaron; --> ň">
|
||||||
|
small o, acute accent <IMG SRC=X ALT=" ó &#243; --> ó &oacute; --> ó">
|
||||||
|
small o, circumflex accent <IMG SRC=X ALT=" ô &#244; --> ô &ocirc; --> ô">
|
||||||
|
small o, double acute accent <IMG SRC=X ALT=" õ &#369; --> ű &odblac; --> ő">
|
||||||
|
small o, dieresis or umlaut mark <IMG SRC=X ALT=" ö &#246; --> ö &ouml; --> ö">
|
||||||
|
division sign <IMG SRC=X ALT=" ÷ &#247; --> ÷ &divide; --> ÷">
|
||||||
|
small r, caron <IMG SRC=X ALT=" ø &#345; --> ř &rcaron; --> ř">
|
||||||
|
small u, ring <IMG SRC=X ALT=" ù &#367; --> ů &uring; --> ů">
|
||||||
|
small u, acute accent <IMG SRC=X ALT=" ú &#250; --> ú &uacute; --> ú">
|
||||||
|
small u, double acute accent <IMG SRC=X ALT=" û &#369; --> ű &udblac; --> ű">
|
||||||
|
small u, dieresis or umlaut mark <IMG SRC=X ALT=" ü &#252; --> ü &uuml; --> ü">
|
||||||
|
small y, acute accent <IMG SRC=X ALT=" ý &#253; --> ý &yacute; --> ý">
|
||||||
|
small t, cedilla <IMG SRC=X ALT=" þ &#355; --> ţ &tcedil; --> ţ">
|
||||||
|
dot above <IMG SRC=X ALT=" {ÿ} {&#729;}-->{˙} {&dot;} -->{˙}">
|
||||||
|
|
||||||
|
Some other characters of interest Char Code Entity name
|
||||||
|
=================================== ==== ============ ==============
|
||||||
|
capital AE diphthong (ligature) <IMG SRC=X ALT=" N/A &#198; --> Æ &AElig; --> Æ">
|
||||||
|
small ae diphthong (ligature) <IMG SRC=X ALT=" N/A &#230; --> æ &aelig; --> æ">
|
||||||
|
capital OE ligature <IMG SRC=X ALT=" N/A {&#338;}-->{Œ} {&OElig;} -->{Œ}">
|
||||||
|
small oe ligature <IMG SRC=X ALT=" N/A {&#339;}-->{œ} {&oelig;} -->{œ}">
|
||||||
|
copyright <IMG SRC=X ALT=" N/A &#169; --> © &copy; --> ©">
|
||||||
|
registered trademark <IMG SRC=X ALT=" N/A &#174; --> ® &reg; --> ®">
|
||||||
|
trademark sign <IMG SRC=X ALT=" N/A &#8482;--> ™ &trade; --> ™">
|
||||||
|
em space <IMG SRC=X ALT=" N/A [&#8195;]->[ ] [&emsp;] -->[ ]">
|
||||||
|
en space <IMG SRC=X ALT=" N/A [&#8194;]->[ ] [&ensp;] -->[ ]">
|
||||||
|
1/3-em space <IMG SRC=X ALT=" N/A [&#8196;]->[ ] [&emsp13;] -->[ ]">
|
||||||
|
1/4-em space <IMG SRC=X ALT=" N/A [&#8197;]->[ ] [&emsp14;] -->[ ]">
|
||||||
|
thin space <IMG SRC=X ALT=" N/A [&#8201;]->[ ] [&thinsp;]-->[ ]">
|
||||||
|
hair space <IMG SRC=X ALT=" N/A [&#8202;]->[ ] [&hairsp;]-->[ ]">
|
||||||
|
em dash <IMG SRC=X ALT=" N/A [&#8212;]->[—] [&mdash;] -->[—]">
|
||||||
|
en dash <IMG SRC=X ALT=" N/A [&#8211;]->[–] [&ndash;] -->[–]">
|
||||||
|
|
||||||
|
</PRE><!-- </PRE> no HotJava preBeta hackx - kw -->
|
||||||
|
<!-- second /PRE is a hack for HotJava 1.0 preBeta 1 -->
|
||||||
|
<HR>
|
||||||
|
<P>
|
||||||
|
Characters not found in ISO-8859-2 have "N/A" in the <TT>Char</TT> column.
|
||||||
|
Some characters for which I could not find entity names in either
|
||||||
|
<A HREF="http://www.internic.net/rfc/rfc2070.txt">RFC 2070</A>
|
||||||
|
or the
|
||||||
|
<A HREF="ftp://www.ucc.ie/pub/sgml/">ISOlat1, ISOlat2, ISOnum, ISOpub and ISOtech</A>
|
||||||
|
sets (the ones included by Peter Flynn's
|
||||||
|
<A HREF="http://www.ucc.ie/doc/www/html/dtds/htmlpro.html">HTML Pro DTD</A>)
|
||||||
|
are shown enclosed in <TT>{</TT>braces<TT>}</TT>.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
See Martin Ramsch's original
|
||||||
|
<A CHARSET="iso-8859-1" HREF="http://www.uni-passau.de/~ramsch/iso8859-1.html">ISO-8859-1 Table</A>
|
||||||
|
for related info and links, and for some notes on entity names.
|
||||||
|
This file is mostly just an adaptation of his table
|
||||||
|
to the ISO-8859-2 character set.
|
||||||
|
|
||||||
|
<HR>
|
||||||
|
|
||||||
|
<ADDRESS>kweide@tezcat.com 1997-03-09</ADDRESS>
|
||||||
|
|
||||||
|
</BODY>
|
||||||
|
</HTML>
|
151
tests/lynx-dump/data/ALT88592.html.exp
Normal file
151
tests/lynx-dump/data/ALT88592.html.exp
Normal file
@ -0,0 +1,151 @@
|
|||||||
|
iso8859-2 plus table - ALT test
|
||||||
|
|
||||||
|
Description Code Entity name
|
||||||
|
=================================== ============ ==============
|
||||||
|
quotation mark " --> " " --> "
|
||||||
|
ampersand & --> & & --> &
|
||||||
|
less-than sign < --> < < --> <
|
||||||
|
greater-than sign > --> > > --> >
|
||||||
|
|
||||||
|
Description Char Code Entity name
|
||||||
|
=================================== ==== ============ ==============
|
||||||
|
non-breaking space   --> -->
|
||||||
|
capital A, ogonek Ą Ą --> Ą Ą --> Ą
|
||||||
|
breve {˘} {˘}-->{˘} {˘} -->{˘}
|
||||||
|
capital L, stroke Ł Ł --> Ł Ł --> Ł
|
||||||
|
general currency sign ¤ ¤ --> ¤ ¤ --> ¤
|
||||||
|
capital L, caron Ľ Ľ --> Ľ Ľ --> Ľ
|
||||||
|
capital S, acute accent Ś Ś --> Ś Ś --> Ś
|
||||||
|
section sign § § --> § § --> §
|
||||||
|
umlaut (dieresis) ¨ ¨ --> ¨ ¨ --> ¨
|
||||||
|
¨ --> ¨
|
||||||
|
capital S, caron Š Š --> Š Š --> Š
|
||||||
|
capital S, cedilla Ş Ş --> Ş Ş --> Ş
|
||||||
|
capital T, caron Ť Ť --> Ť Ť --> Ť
|
||||||
|
capital Z, acute accent Ź Ź --> Ź Ź --> Ź
|
||||||
|
soft hyphen [] [­]-->[] [­] -->[]
|
||||||
|
capital Z, caron Ž Ž --> Ž Ž --> Ž
|
||||||
|
capital Z, dot above Ż Ż --> Ż Ż --> Ż
|
||||||
|
degree sign ° ° --> ° ° --> °
|
||||||
|
small a, ogonek ą ą --> ą ą --> ą
|
||||||
|
ogonek {˛} {˛}-->{˛} {˛} -->{˛}
|
||||||
|
small l, stroke ł ł --> ł ł --> ł
|
||||||
|
acute accent ´ ´ --> ´ ´ --> ´
|
||||||
|
small l, caron ľ ľ --> ľ ľ --> ľ
|
||||||
|
small s, acute accent ś ś --> ś ś --> ś
|
||||||
|
caron {ˇ} {ˇ}-->{ˇ} {ˇ} -->{ˇ}
|
||||||
|
cedilla ¸ ¸ --> ¸ ¸ --> ¸
|
||||||
|
small s, caron š š --> š š --> š
|
||||||
|
small s, cedilla ş ş --> ş ş --> ş
|
||||||
|
small t, caron ť ť --> ť ť --> ť
|
||||||
|
small z, acute accent ź ź --> ź ź --> ź
|
||||||
|
double acute accent {˝} {˝}-->{˝} {˝} -->{˝}
|
||||||
|
small z, caron ž ž --> ž ž --> ž
|
||||||
|
small z, dot above ż ż --> ż ż --> ż
|
||||||
|
capital R, acute accent Ŕ Ŕ --> Ŕ Ŕ --> Ŕ
|
||||||
|
capital A, acute accent Á Á --> Á Á --> Á
|
||||||
|
capital A, circumflex accent   -->   --> Â
|
||||||
|
capital A, breve Ă Ă --> Ă Ă --> Ă
|
||||||
|
capital A, dieresis or umlaut mark Ä Ä --> Ä Ä --> Ä
|
||||||
|
capital L, acute accent Ĺ Ĺ --> Ĺ Ĺ --> Ĺ
|
||||||
|
capital C, acute accent Ć Ć --> Ć Ć --> Ć
|
||||||
|
capital C, cedilla Ç Ç --> Ç Ç --> Ç
|
||||||
|
capital C, caron Č Č --> Č Č --> Č
|
||||||
|
capital E, acute accent É É --> É É --> É
|
||||||
|
capital E, ogonek Ę Ę --> Ę Ę --> Ę
|
||||||
|
capital E, dieresis or umlaut mark Ë Ë --> Ë Ë --> Ë
|
||||||
|
capital E, caron Ě Ě --> Ě Ě --> Ě
|
||||||
|
capital I, acute accent Í Í --> Í Í --> Í
|
||||||
|
capital I, circumflex accent Î Î --> Î Î --> Î
|
||||||
|
capital D, caron Ď Ď --> Ď Ď --> Ď
|
||||||
|
capital D, stroke Đ Đ --> Đ Đ --> Đ
|
||||||
|
capital Eth, Icelandic N/A Ð --> Ð Ð --> Ð
|
||||||
|
capital N, acute accent Ń Ń --> Ń Ń --> Ń
|
||||||
|
capital N, caron Ň Ň --> Ň Ň --> Ň
|
||||||
|
capital O, acute accent Ó Ó --> Ó Ó --> Ó
|
||||||
|
capital O, circumflex accent Ô Ô --> Ô Ô --> Ô
|
||||||
|
capital O, double acute accent Ő Ű --> Ű Ő --> Ő
|
||||||
|
capital O, dieresis or umlaut mark Ö Ö --> Ö Ö --> Ö
|
||||||
|
multiply sign × × --> × × --> ×
|
||||||
|
capital R, caron Ř Ř --> Ř Ř --> Ř
|
||||||
|
capital U, ring Ů Ů --> Ů Ů --> Ů
|
||||||
|
capital U, acute accent Ú Ú --> Ú Ú --> Ú
|
||||||
|
capital U, double acute accent Ű Ű --> Ű Ű --> Ű
|
||||||
|
capital U, dieresis or umlaut mark Ü Ü --> Ü Ü --> Ü
|
||||||
|
capital Y, acute accent Ý Ý --> Ý Ý --> Ý
|
||||||
|
capital T, cedilla Ţ Ţ --> Ţ Ţ --> Ţ
|
||||||
|
small sharp s, German (sz ligature) ß ß --> ß ß --> ß
|
||||||
|
small r, acute accent ŕ ŕ --> ŕ ŕ --> ŕ
|
||||||
|
small a, acute accent á á --> á á --> á
|
||||||
|
small a, circumflex accent â â --> â â --> â
|
||||||
|
small a, breve ă ă --> ă ă --> ă
|
||||||
|
small a, dieresis or umlaut mark ä ä --> ä ä --> ä
|
||||||
|
small l, acute accent ĺ ĺ --> ĺ ĺ --> ĺ
|
||||||
|
small c, acute accent ć ć --> ć ć --> ć
|
||||||
|
small c, cedilla ç ç --> ç ç --> ç
|
||||||
|
small c, caron č č --> č č --> č
|
||||||
|
small e, acute accent é é --> é é --> é
|
||||||
|
small e, ogonek ę ę --> ę ę --> ę
|
||||||
|
small e, dieresis or umlaut mark ë ë --> ë ë --> ë
|
||||||
|
small e, caron ě ě --> ě ě --> ě
|
||||||
|
small i, acute accent í í --> í í --> í
|
||||||
|
small i, circumflex accent î î --> î î --> î
|
||||||
|
small d, caron ď ď --> ď ď --> ď
|
||||||
|
small d, stroke đ đ --> đ đ --> đ
|
||||||
|
small eth, Icelandic N/A ð --> ð ð --> ð
|
||||||
|
small n, acute accent ń ń --> ń ń --> ń
|
||||||
|
small n, caron ň ň --> ň ň --> ň
|
||||||
|
small o, acute accent ó ó --> ó ó --> ó
|
||||||
|
small o, circumflex accent ô ô --> ô ô --> ô
|
||||||
|
small o, double acute accent ő ű --> ű ő --> ő
|
||||||
|
small o, dieresis or umlaut mark ö ö --> ö ö --> ö
|
||||||
|
division sign ÷ ÷ --> ÷ ÷ --> ÷
|
||||||
|
small r, caron ř ř --> ř ř --> ř
|
||||||
|
small u, ring ů ů --> ů ů --> ů
|
||||||
|
small u, acute accent ú ú --> ú ú --> ú
|
||||||
|
small u, double acute accent ű ű --> ű ű --> ű
|
||||||
|
small u, dieresis or umlaut mark ü ü --> ü ü --> ü
|
||||||
|
small y, acute accent ý ý --> ý ý --> ý
|
||||||
|
small t, cedilla ţ ţ --> ţ ţ --> ţ
|
||||||
|
dot above {˙} {˙}-->{˙} {˙} -->{˙}
|
||||||
|
|
||||||
|
Some other characters of interest Char Code Entity name
|
||||||
|
=================================== ==== ============ ==============
|
||||||
|
capital AE diphthong (ligature) N/A Æ --> Æ Æ --> Æ
|
||||||
|
small ae diphthong (ligature) N/A æ --> æ æ --> æ
|
||||||
|
capital OE ligature N/A {Œ}-->{Œ} {Œ} -->{Œ}
|
||||||
|
small oe ligature N/A {œ}-->{œ} {œ} -->{œ}
|
||||||
|
copyright N/A © --> © © --> ©
|
||||||
|
registered trademark N/A ® --> ® ® --> ®
|
||||||
|
trademark sign N/A ™--> ™ ™ --> ™
|
||||||
|
em space N/A [ ]->[ ] [ ] -->[ ]
|
||||||
|
en space N/A [ ]->[ ] [ ] -->[ ]
|
||||||
|
1/3-em space N/A [ ]->[ ] [ ] -->[ ]
|
||||||
|
1/4-em space N/A [ ]->[ ] [ ] -->[ ]
|
||||||
|
thin space N/A [ ]->[ ] [ ]-->[ ]
|
||||||
|
hair space N/A [ ]->[ ] [ ]-->[ ]
|
||||||
|
em dash N/A [—]->[—] [—] -->[—]
|
||||||
|
en dash N/A [–]->[–] [–] -->[–]
|
||||||
|
|
||||||
|
__________________________________________________________________
|
||||||
|
|
||||||
|
Characters not found in ISO-8859-2 have "N/A" in the Char column. Some
|
||||||
|
characters for which I could not find entity names in either [1]RFC
|
||||||
|
2070 or the [2]ISOlat1, ISOlat2, ISOnum, ISOpub and ISOtech sets (the
|
||||||
|
ones included by Peter Flynn's [3]HTML Pro DTD) are shown enclosed in
|
||||||
|
{braces}.
|
||||||
|
|
||||||
|
See Martin Ramsch's original [4]ISO-8859-1 Table for related info and
|
||||||
|
links, and for some notes on entity names. This file is mostly just an
|
||||||
|
adaptation of his table to the ISO-8859-2 character set.
|
||||||
|
__________________________________________________________________
|
||||||
|
|
||||||
|
|
||||||
|
kweide@tezcat.com 1997-03-09
|
||||||
|
|
||||||
|
References
|
||||||
|
|
||||||
|
1. http://www.internic.net/rfc/rfc2070.txt
|
||||||
|
2. ftp://www.ucc.ie/pub/sgml/
|
||||||
|
3. http://www.ucc.ie/doc/www/html/dtds/htmlpro.html
|
||||||
|
4. http://www.uni-passau.de/~ramsch/iso8859-1.html
|
341
tests/lynx-dump/data/COPYING
Normal file
341
tests/lynx-dump/data/COPYING
Normal file
@ -0,0 +1,341 @@
|
|||||||
|
See the COPYHEADER file which gives copyright information for Lynx.
|
||||||
|
------------------------------------------------------------------------------
|
||||||
|
GNU GENERAL PUBLIC LICENSE
|
||||||
|
Version 2, June 1991
|
||||||
|
|
||||||
|
Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
Everyone is permitted to copy and distribute verbatim copies
|
||||||
|
of this license document, but changing it is not allowed.
|
||||||
|
|
||||||
|
Preamble
|
||||||
|
|
||||||
|
The licenses for most software are designed to take away your
|
||||||
|
freedom to share and change it. By contrast, the GNU General Public
|
||||||
|
License is intended to guarantee your freedom to share and change free
|
||||||
|
software--to make sure the software is free for all its users. This
|
||||||
|
General Public License applies to most of the Free Software
|
||||||
|
Foundation's software and to any other program whose authors commit to
|
||||||
|
using it. (Some other Free Software Foundation software is covered by
|
||||||
|
the GNU Lesser General Public License instead.) You can apply it to
|
||||||
|
your programs, too.
|
||||||
|
|
||||||
|
When we speak of free software, we are referring to freedom, not
|
||||||
|
price. Our General Public Licenses are designed to make sure that you
|
||||||
|
have the freedom to distribute copies of free software (and charge for
|
||||||
|
this service if you wish), that you receive source code or can get it
|
||||||
|
if you want it, that you can change the software or use pieces of it
|
||||||
|
in new free programs; and that you know you can do these things.
|
||||||
|
|
||||||
|
To protect your rights, we need to make restrictions that forbid
|
||||||
|
anyone to deny you these rights or to ask you to surrender the rights.
|
||||||
|
These restrictions translate to certain responsibilities for you if you
|
||||||
|
distribute copies of the software, or if you modify it.
|
||||||
|
|
||||||
|
For example, if you distribute copies of such a program, whether
|
||||||
|
gratis or for a fee, you must give the recipients all the rights that
|
||||||
|
you have. You must make sure that they, too, receive or can get the
|
||||||
|
source code. And you must show them these terms so they know their
|
||||||
|
rights.
|
||||||
|
|
||||||
|
We protect your rights with two steps: (1) copyright the software, and
|
||||||
|
(2) offer you this license which gives you legal permission to copy,
|
||||||
|
distribute and/or modify the software.
|
||||||
|
|
||||||
|
Also, for each author's protection and ours, we want to make certain
|
||||||
|
that everyone understands that there is no warranty for this free
|
||||||
|
software. If the software is modified by someone else and passed on, we
|
||||||
|
want its recipients to know that what they have is not the original, so
|
||||||
|
that any problems introduced by others will not reflect on the original
|
||||||
|
authors' reputations.
|
||||||
|
|
||||||
|
Finally, any free program is threatened constantly by software
|
||||||
|
patents. We wish to avoid the danger that redistributors of a free
|
||||||
|
program will individually obtain patent licenses, in effect making the
|
||||||
|
program proprietary. To prevent this, we have made it clear that any
|
||||||
|
patent must be licensed for everyone's free use or not licensed at all.
|
||||||
|
|
||||||
|
The precise terms and conditions for copying, distribution and
|
||||||
|
modification follow.
|
||||||
|
|
||||||
|
GNU GENERAL PUBLIC LICENSE
|
||||||
|
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
||||||
|
|
||||||
|
0. This License applies to any program or other work which contains
|
||||||
|
a notice placed by the copyright holder saying it may be distributed
|
||||||
|
under the terms of this General Public License. The "Program", below,
|
||||||
|
refers to any such program or work, and a "work based on the Program"
|
||||||
|
means either the Program or any derivative work under copyright law:
|
||||||
|
that is to say, a work containing the Program or a portion of it,
|
||||||
|
either verbatim or with modifications and/or translated into another
|
||||||
|
language. (Hereinafter, translation is included without limitation in
|
||||||
|
the term "modification".) Each licensee is addressed as "you".
|
||||||
|
|
||||||
|
Activities other than copying, distribution and modification are not
|
||||||
|
covered by this License; they are outside its scope. The act of
|
||||||
|
running the Program is not restricted, and the output from the Program
|
||||||
|
is covered only if its contents constitute a work based on the
|
||||||
|
Program (independent of having been made by running the Program).
|
||||||
|
Whether that is true depends on what the Program does.
|
||||||
|
|
||||||
|
1. You may copy and distribute verbatim copies of the Program's
|
||||||
|
source code as you receive it, in any medium, provided that you
|
||||||
|
conspicuously and appropriately publish on each copy an appropriate
|
||||||
|
copyright notice and disclaimer of warranty; keep intact all the
|
||||||
|
notices that refer to this License and to the absence of any warranty;
|
||||||
|
and give any other recipients of the Program a copy of this License
|
||||||
|
along with the Program.
|
||||||
|
|
||||||
|
You may charge a fee for the physical act of transferring a copy, and
|
||||||
|
you may at your option offer warranty protection in exchange for a fee.
|
||||||
|
|
||||||
|
2. You may modify your copy or copies of the Program or any portion
|
||||||
|
of it, thus forming a work based on the Program, and copy and
|
||||||
|
distribute such modifications or work under the terms of Section 1
|
||||||
|
above, provided that you also meet all of these conditions:
|
||||||
|
|
||||||
|
a) You must cause the modified files to carry prominent notices
|
||||||
|
stating that you changed the files and the date of any change.
|
||||||
|
|
||||||
|
b) You must cause any work that you distribute or publish, that in
|
||||||
|
whole or in part contains or is derived from the Program or any
|
||||||
|
part thereof, to be licensed as a whole at no charge to all third
|
||||||
|
parties under the terms of this License.
|
||||||
|
|
||||||
|
c) If the modified program normally reads commands interactively
|
||||||
|
when run, you must cause it, when started running for such
|
||||||
|
interactive use in the most ordinary way, to print or display an
|
||||||
|
announcement including an appropriate copyright notice and a
|
||||||
|
notice that there is no warranty (or else, saying that you provide
|
||||||
|
a warranty) and that users may redistribute the program under
|
||||||
|
these conditions, and telling the user how to view a copy of this
|
||||||
|
License. (Exception: if the Program itself is interactive but
|
||||||
|
does not normally print such an announcement, your work based on
|
||||||
|
the Program is not required to print an announcement.)
|
||||||
|
|
||||||
|
These requirements apply to the modified work as a whole. If
|
||||||
|
identifiable sections of that work are not derived from the Program,
|
||||||
|
and can be reasonably considered independent and separate works in
|
||||||
|
themselves, then this License, and its terms, do not apply to those
|
||||||
|
sections when you distribute them as separate works. But when you
|
||||||
|
distribute the same sections as part of a whole which is a work based
|
||||||
|
on the Program, the distribution of the whole must be on the terms of
|
||||||
|
this License, whose permissions for other licensees extend to the
|
||||||
|
entire whole, and thus to each and every part regardless of who wrote it.
|
||||||
|
|
||||||
|
Thus, it is not the intent of this section to claim rights or contest
|
||||||
|
your rights to work written entirely by you; rather, the intent is to
|
||||||
|
exercise the right to control the distribution of derivative or
|
||||||
|
collective works based on the Program.
|
||||||
|
|
||||||
|
In addition, mere aggregation of another work not based on the Program
|
||||||
|
with the Program (or with a work based on the Program) on a volume of
|
||||||
|
a storage or distribution medium does not bring the other work under
|
||||||
|
the scope of this License.
|
||||||
|
|
||||||
|
3. You may copy and distribute the Program (or a work based on it,
|
||||||
|
under Section 2) in object code or executable form under the terms of
|
||||||
|
Sections 1 and 2 above provided that you also do one of the following:
|
||||||
|
|
||||||
|
a) Accompany it with the complete corresponding machine-readable
|
||||||
|
source code, which must be distributed under the terms of Sections
|
||||||
|
1 and 2 above on a medium customarily used for software interchange; or,
|
||||||
|
|
||||||
|
b) Accompany it with a written offer, valid for at least three
|
||||||
|
years, to give any third party, for a charge no more than your
|
||||||
|
cost of physically performing source distribution, a complete
|
||||||
|
machine-readable copy of the corresponding source code, to be
|
||||||
|
distributed under the terms of Sections 1 and 2 above on a medium
|
||||||
|
customarily used for software interchange; or,
|
||||||
|
|
||||||
|
c) Accompany it with the information you received as to the offer
|
||||||
|
to distribute corresponding source code. (This alternative is
|
||||||
|
allowed only for noncommercial distribution and only if you
|
||||||
|
received the program in object code or executable form with such
|
||||||
|
an offer, in accord with Subsection b above.)
|
||||||
|
|
||||||
|
The source code for a work means the preferred form of the work for
|
||||||
|
making modifications to it. For an executable work, complete source
|
||||||
|
code means all the source code for all modules it contains, plus any
|
||||||
|
associated interface definition files, plus the scripts used to
|
||||||
|
control compilation and installation of the executable. However, as a
|
||||||
|
special exception, the source code distributed need not include
|
||||||
|
anything that is normally distributed (in either source or binary
|
||||||
|
form) with the major components (compiler, kernel, and so on) of the
|
||||||
|
operating system on which the executable runs, unless that component
|
||||||
|
itself accompanies the executable.
|
||||||
|
|
||||||
|
If distribution of executable or object code is made by offering
|
||||||
|
access to copy from a designated place, then offering equivalent
|
||||||
|
access to copy the source code from the same place counts as
|
||||||
|
distribution of the source code, even though third parties are not
|
||||||
|
compelled to copy the source along with the object code.
|
||||||
|
|
||||||
|
4. You may not copy, modify, sublicense, or distribute the Program
|
||||||
|
except as expressly provided under this License. Any attempt
|
||||||
|
otherwise to copy, modify, sublicense or distribute the Program is
|
||||||
|
void, and will automatically terminate your rights under this License.
|
||||||
|
However, parties who have received copies, or rights, from you under
|
||||||
|
this License will not have their licenses terminated so long as such
|
||||||
|
parties remain in full compliance.
|
||||||
|
|
||||||
|
5. You are not required to accept this License, since you have not
|
||||||
|
signed it. However, nothing else grants you permission to modify or
|
||||||
|
distribute the Program or its derivative works. These actions are
|
||||||
|
prohibited by law if you do not accept this License. Therefore, by
|
||||||
|
modifying or distributing the Program (or any work based on the
|
||||||
|
Program), you indicate your acceptance of this License to do so, and
|
||||||
|
all its terms and conditions for copying, distributing or modifying
|
||||||
|
the Program or works based on it.
|
||||||
|
|
||||||
|
6. Each time you redistribute the Program (or any work based on the
|
||||||
|
Program), the recipient automatically receives a license from the
|
||||||
|
original licensor to copy, distribute or modify the Program subject to
|
||||||
|
these terms and conditions. You may not impose any further
|
||||||
|
restrictions on the recipients' exercise of the rights granted herein.
|
||||||
|
You are not responsible for enforcing compliance by third parties to
|
||||||
|
this License.
|
||||||
|
|
||||||
|
7. If, as a consequence of a court judgment or allegation of patent
|
||||||
|
infringement or for any other reason (not limited to patent issues),
|
||||||
|
conditions are imposed on you (whether by court order, agreement or
|
||||||
|
otherwise) that contradict the conditions of this License, they do not
|
||||||
|
excuse you from the conditions of this License. If you cannot
|
||||||
|
distribute so as to satisfy simultaneously your obligations under this
|
||||||
|
License and any other pertinent obligations, then as a consequence you
|
||||||
|
may not distribute the Program at all. For example, if a patent
|
||||||
|
license would not permit royalty-free redistribution of the Program by
|
||||||
|
all those who receive copies directly or indirectly through you, then
|
||||||
|
the only way you could satisfy both it and this License would be to
|
||||||
|
refrain entirely from distribution of the Program.
|
||||||
|
|
||||||
|
If any portion of this section is held invalid or unenforceable under
|
||||||
|
any particular circumstance, the balance of the section is intended to
|
||||||
|
apply and the section as a whole is intended to apply in other
|
||||||
|
circumstances.
|
||||||
|
|
||||||
|
It is not the purpose of this section to induce you to infringe any
|
||||||
|
patents or other property right claims or to contest validity of any
|
||||||
|
such claims; this section has the sole purpose of protecting the
|
||||||
|
integrity of the free software distribution system, which is
|
||||||
|
implemented by public license practices. Many people have made
|
||||||
|
generous contributions to the wide range of software distributed
|
||||||
|
through that system in reliance on consistent application of that
|
||||||
|
system; it is up to the author/donor to decide if he or she is willing
|
||||||
|
to distribute software through any other system and a licensee cannot
|
||||||
|
impose that choice.
|
||||||
|
|
||||||
|
This section is intended to make thoroughly clear what is believed to
|
||||||
|
be a consequence of the rest of this License.
|
||||||
|
|
||||||
|
8. If the distribution and/or use of the Program is restricted in
|
||||||
|
certain countries either by patents or by copyrighted interfaces, the
|
||||||
|
original copyright holder who places the Program under this License
|
||||||
|
may add an explicit geographical distribution limitation excluding
|
||||||
|
those countries, so that distribution is permitted only in or among
|
||||||
|
countries not thus excluded. In such case, this License incorporates
|
||||||
|
the limitation as if written in the body of this License.
|
||||||
|
|
||||||
|
9. The Free Software Foundation may publish revised and/or new versions
|
||||||
|
of the General Public License from time to time. Such new versions will
|
||||||
|
be similar in spirit to the present version, but may differ in detail to
|
||||||
|
address new problems or concerns.
|
||||||
|
|
||||||
|
Each version is given a distinguishing version number. If the Program
|
||||||
|
specifies a version number of this License which applies to it and "any
|
||||||
|
later version", you have the option of following the terms and conditions
|
||||||
|
either of that version or of any later version published by the Free
|
||||||
|
Software Foundation. If the Program does not specify a version number of
|
||||||
|
this License, you may choose any version ever published by the Free Software
|
||||||
|
Foundation.
|
||||||
|
|
||||||
|
10. If you wish to incorporate parts of the Program into other free
|
||||||
|
programs whose distribution conditions are different, write to the author
|
||||||
|
to ask for permission. For software which is copyrighted by the Free
|
||||||
|
Software Foundation, write to the Free Software Foundation; we sometimes
|
||||||
|
make exceptions for this. Our decision will be guided by the two goals
|
||||||
|
of preserving the free status of all derivatives of our free software and
|
||||||
|
of promoting the sharing and reuse of software generally.
|
||||||
|
|
||||||
|
NO WARRANTY
|
||||||
|
|
||||||
|
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
|
||||||
|
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
|
||||||
|
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
|
||||||
|
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
|
||||||
|
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||||
|
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
|
||||||
|
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
|
||||||
|
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
|
||||||
|
REPAIR OR CORRECTION.
|
||||||
|
|
||||||
|
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||||
|
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
|
||||||
|
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
|
||||||
|
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
|
||||||
|
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
|
||||||
|
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
|
||||||
|
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
|
||||||
|
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGES.
|
||||||
|
|
||||||
|
END OF TERMS AND CONDITIONS
|
||||||
|
|
||||||
|
How to Apply These Terms to Your New Programs
|
||||||
|
|
||||||
|
If you develop a new program, and you want it to be of the greatest
|
||||||
|
possible use to the public, the best way to achieve this is to make it
|
||||||
|
free software which everyone can redistribute and change under these terms.
|
||||||
|
|
||||||
|
To do so, attach the following notices to the program. It is safest
|
||||||
|
to attach them to the start of each source file to most effectively
|
||||||
|
convey the exclusion of warranty; and each file should have at least
|
||||||
|
the "copyright" line and a pointer to where the full notice is found.
|
||||||
|
|
||||||
|
<one line to give the program's name and a brief idea of what it does.>
|
||||||
|
Copyright (C) <year> <name of author>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
Also add information on how to contact you by electronic and paper mail.
|
||||||
|
|
||||||
|
If the program is interactive, make it output a short notice like this
|
||||||
|
when it starts in an interactive mode:
|
||||||
|
|
||||||
|
Gnomovision version 69, Copyright (C) year name of author
|
||||||
|
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
|
||||||
|
This is free software, and you are welcome to redistribute it
|
||||||
|
under certain conditions; type `show c' for details.
|
||||||
|
|
||||||
|
The hypothetical commands `show w' and `show c' should show the appropriate
|
||||||
|
parts of the General Public License. Of course, the commands you use may
|
||||||
|
be called something other than `show w' and `show c'; they could even be
|
||||||
|
mouse-clicks or menu items--whatever suits your program.
|
||||||
|
|
||||||
|
You should also get your employer (if you work as a programmer) or your
|
||||||
|
school, if any, to sign a "copyright disclaimer" for the program, if
|
||||||
|
necessary. Here is a sample; alter the names:
|
||||||
|
|
||||||
|
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
|
||||||
|
`Gnomovision' (which makes passes at compilers) written by James Hacker.
|
||||||
|
|
||||||
|
<signature of Ty Coon>, 1 April 1989
|
||||||
|
Ty Coon, President of Vice
|
||||||
|
|
||||||
|
This General Public License does not permit incorporating your program into
|
||||||
|
proprietary programs. If your program is a subroutine library, you may
|
||||||
|
consider it more useful to permit linking proprietary applications with the
|
||||||
|
library. If this is what you want to do, use the GNU Lesser General
|
||||||
|
Public License instead of this License.
|
83
tests/lynx-dump/data/ISO_LATIN1_test.html
Normal file
83
tests/lynx-dump/data/ISO_LATIN1_test.html
Normal file
@ -0,0 +1,83 @@
|
|||||||
|
<!DOCTYPE html public "-//IETF//DTD HTML 3.0//EN">
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>Test of minimal ISO LATIN1 character set</title>
|
||||||
|
<link rev="made" href="mailto:lynx-dev@nongnu.org">
|
||||||
|
</head>
|
||||||
|
|
||||||
|
<body>
|
||||||
|
<h1>minimal ISO LATIN1 text entities</h1>
|
||||||
|
<ul>
|
||||||
|
<li>"Æ", /* capital AE diphthong (ligature) */
|
||||||
|
<li>"Á", /* capital A, acute accent */
|
||||||
|
<li>"Â", /* capital A, circumflex accent */
|
||||||
|
<li>"À", /* capital A, grave accent */
|
||||||
|
<li>"Å", /* capital A, ring */
|
||||||
|
<li>"Ã", /* capital A, tilde */
|
||||||
|
<li>"Ä", /* capital A, dieresis or umlaut mark */
|
||||||
|
<li>"Ç", /* capital C, cedilla */
|
||||||
|
<li>"Ð", /* capital Eth, Icelandic */
|
||||||
|
<li>"É", /* capital E, acute accent */
|
||||||
|
<li>"Ê", /* capital E, circumflex accent */
|
||||||
|
<li>"È", /* capital E, grave accent */
|
||||||
|
<li>"Ë", /* capital E, dieresis or umlaut mark */
|
||||||
|
<li>"Í", /* capital I, acute accent */
|
||||||
|
<li>"Î", /* capital I, circumflex accent */
|
||||||
|
<li>"Ì", /* capital I, grave accent */
|
||||||
|
<li>"Ï", /* capital I, dieresis or umlaut mark */
|
||||||
|
<li>"Ñ", /* capital N, tilde */
|
||||||
|
<li>"Ó", /* capital O, acute accent */
|
||||||
|
<li>"Ô", /* capital O, circumflex accent */
|
||||||
|
<li>"Ò", /* capital O, grave accent */
|
||||||
|
<li>"Ø", /* capital O, slash */
|
||||||
|
<li>"Õ", /* capital O, tilde */
|
||||||
|
<li>"Ö", /* capital O, dieresis or umlaut mark */
|
||||||
|
<li>"Þ", /* capital THORN, Icelandic */
|
||||||
|
<li>"Ú", /* capital U, acute accent */
|
||||||
|
<li>"Û", /* capital U, circumflex accent */
|
||||||
|
<li>"Ù", /* capital U, grave accent */
|
||||||
|
<li>"Ü", /* capital U, dieresis or umlaut mark */
|
||||||
|
<li>"Ý", /* capital Y, acute accent */
|
||||||
|
<li>"á", /* small a, acute accent */
|
||||||
|
<li>"â", /* small a, circumflex accent */
|
||||||
|
<li>"æ", /* small ae diphthong (ligature) */
|
||||||
|
<li>"à", /* small a, grave accent */
|
||||||
|
<li>"&", /* ampersand */
|
||||||
|
<li>"å", /* small a, ring */
|
||||||
|
<li>"ã", /* small a, tilde */
|
||||||
|
<li>"ä", /* small a, dieresis or umlaut mark */
|
||||||
|
<li>"ç", /* small c, cedilla */
|
||||||
|
<li>"é", /* small e, acute accent */
|
||||||
|
<li>"ê", /* small e, circumflex accent */
|
||||||
|
<li>"è", /* small e, grave accent */
|
||||||
|
<li>" ", /* emsp, em space - not collapsed */
|
||||||
|
<li>" ", /* ensp, en space - not collapsed */
|
||||||
|
<li>"ð", /* small eth, Icelandic */
|
||||||
|
<li>"ë", /* small e, dieresis or umlaut mark */
|
||||||
|
<li>">", /* greater than */
|
||||||
|
<li>"í", /* small i, acute accent */
|
||||||
|
<li>"î", /* small i, circumflex accent */
|
||||||
|
<li>"ì", /* small i, grave accent */
|
||||||
|
<li>"ï", /* small i, dieresis or umlaut mark */
|
||||||
|
<li>"<", /* less than */
|
||||||
|
<li>" ", /* nbsp, non breaking space */
|
||||||
|
<li>"ñ", /* small n, tilde */
|
||||||
|
<li>"ó", /* small o, acute accent */
|
||||||
|
<li>"ô", /* small o, circumflex accent */
|
||||||
|
<li>"ò", /* small o, grave accent */
|
||||||
|
<li>"ø", /* small o, slash */
|
||||||
|
<li>"õ", /* small o, tilde */
|
||||||
|
<li>"ö", /* small o, dieresis or umlaut mark */
|
||||||
|
<li>""", /* quote, '"' */
|
||||||
|
<li>"ß", /* small sharp s, German (sz ligature) */
|
||||||
|
<li>"þ", /* small thorn, Icelandic */
|
||||||
|
<li>"ú", /* small u, acute accent */
|
||||||
|
<li>"û", /* small u, circumflex accent */
|
||||||
|
<li>"ù", /* small u, grave accent */
|
||||||
|
<li>"ü", /* small u, dieresis or umlaut mark */
|
||||||
|
<li>"ý", /* small y, acute accent */
|
||||||
|
<li>"ÿ", /* small y, dieresis or umlaut mark */
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
</body>
|
||||||
|
</html>
|
71
tests/lynx-dump/data/ISO_LATIN1_test.html.exp
Normal file
71
tests/lynx-dump/data/ISO_LATIN1_test.html.exp
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
minimal ISO LATIN1 text entities
|
||||||
|
|
||||||
|
* "Æ", /* capital AE diphthong (ligature) */
|
||||||
|
* "Á", /* capital A, acute accent */
|
||||||
|
* "Â", /* capital A, circumflex accent */
|
||||||
|
* "À", /* capital A, grave accent */
|
||||||
|
* "Å", /* capital A, ring */
|
||||||
|
* "Ã", /* capital A, tilde */
|
||||||
|
* "Ä", /* capital A, dieresis or umlaut mark */
|
||||||
|
* "Ç", /* capital C, cedilla */
|
||||||
|
* "Ð", /* capital Eth, Icelandic */
|
||||||
|
* "É", /* capital E, acute accent */
|
||||||
|
* "Ê", /* capital E, circumflex accent */
|
||||||
|
* "È", /* capital E, grave accent */
|
||||||
|
* "Ë", /* capital E, dieresis or umlaut mark */
|
||||||
|
* "Í", /* capital I, acute accent */
|
||||||
|
* "Î", /* capital I, circumflex accent */
|
||||||
|
* "Ì", /* capital I, grave accent */
|
||||||
|
* "Ï", /* capital I, dieresis or umlaut mark */
|
||||||
|
* "Ñ", /* capital N, tilde */
|
||||||
|
* "Ó", /* capital O, acute accent */
|
||||||
|
* "Ô", /* capital O, circumflex accent */
|
||||||
|
* "Ò", /* capital O, grave accent */
|
||||||
|
* "Ø", /* capital O, slash */
|
||||||
|
* "Õ", /* capital O, tilde */
|
||||||
|
* "Ö", /* capital O, dieresis or umlaut mark */
|
||||||
|
* "Þ", /* capital THORN, Icelandic */
|
||||||
|
* "Ú", /* capital U, acute accent */
|
||||||
|
* "Û", /* capital U, circumflex accent */
|
||||||
|
* "Ù", /* capital U, grave accent */
|
||||||
|
* "Ü", /* capital U, dieresis or umlaut mark */
|
||||||
|
* "Ý", /* capital Y, acute accent */
|
||||||
|
* "á", /* small a, acute accent */
|
||||||
|
* "â", /* small a, circumflex accent */
|
||||||
|
* "æ", /* small ae diphthong (ligature) */
|
||||||
|
* "à", /* small a, grave accent */
|
||||||
|
* "&", /* ampersand */
|
||||||
|
* "å", /* small a, ring */
|
||||||
|
* "ã", /* small a, tilde */
|
||||||
|
* "ä", /* small a, dieresis or umlaut mark */
|
||||||
|
* "ç", /* small c, cedilla */
|
||||||
|
* "é", /* small e, acute accent */
|
||||||
|
* "ê", /* small e, circumflex accent */
|
||||||
|
* "è", /* small e, grave accent */
|
||||||
|
* " ", /* emsp, em space - not collapsed */
|
||||||
|
* " ", /* ensp, en space - not collapsed */
|
||||||
|
* "ð", /* small eth, Icelandic */
|
||||||
|
* "ë", /* small e, dieresis or umlaut mark */
|
||||||
|
* ">", /* greater than */
|
||||||
|
* "í", /* small i, acute accent */
|
||||||
|
* "î", /* small i, circumflex accent */
|
||||||
|
* "ì", /* small i, grave accent */
|
||||||
|
* "ï", /* small i, dieresis or umlaut mark */
|
||||||
|
* "<", /* less than */
|
||||||
|
* " ", /* nbsp, non breaking space */
|
||||||
|
* "ñ", /* small n, tilde */
|
||||||
|
* "ó", /* small o, acute accent */
|
||||||
|
* "ô", /* small o, circumflex accent */
|
||||||
|
* "ò", /* small o, grave accent */
|
||||||
|
* "ø", /* small o, slash */
|
||||||
|
* "õ", /* small o, tilde */
|
||||||
|
* "ö", /* small o, dieresis or umlaut mark */
|
||||||
|
* """, /* quote, '"' */
|
||||||
|
* "ß", /* small sharp s, German (sz ligature) */
|
||||||
|
* "þ", /* small thorn, Icelandic */
|
||||||
|
* "ú", /* small u, acute accent */
|
||||||
|
* "û", /* small u, circumflex accent */
|
||||||
|
* "ù", /* small u, grave accent */
|
||||||
|
* "ü", /* small u, dieresis or umlaut mark */
|
||||||
|
* "ý", /* small y, acute accent */
|
||||||
|
* "ÿ", /* small y, dieresis or umlaut mark */
|
1
tests/lynx-dump/data/README
Normal file
1
tests/lynx-dump/data/README
Normal file
@ -0,0 +1 @@
|
|||||||
|
copy of /usr/share/doc/lynx/test after installing lynx-2.8.9-15.fc34
|
8
tests/lynx-dump/data/README.txt
Normal file
8
tests/lynx-dump/data/README.txt
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
ISO_LATIN1_test.html and iso-8859-1.html are for testing the translation of
|
||||||
|
HTML entities with the character sets that are selectable via the 'o'ptions
|
||||||
|
menu.
|
||||||
|
|
||||||
|
TestComment.html and tabtest.html are for testing comment and TAB handling.
|
||||||
|
|
||||||
|
Any other files in this directory do not represent a test suite. They
|
||||||
|
are used during program testing to track down odd and mysterious bugs.
|
50
tests/lynx-dump/data/TestComment.html
Normal file
50
tests/lynx-dump/data/TestComment.html
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0 Level 2//EN">
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>HTML Comment Parser Test</title>
|
||||||
|
<link rev="made" href="mailto:pg@sweng.stortek.com">
|
||||||
|
<base href="http://nyx10.cs.du.edu:8001/~pgilmart/TestComment.html">
|
||||||
|
</head>
|
||||||
|
|
||||||
|
<body>
|
||||||
|
<P> Test of the HTML/SGML comment syntax, as given in the W3 HTML Spec:
|
||||||
|
<a
|
||||||
|
href="http://www.w3.org/hypertext/WWW/MarkUp/html-spec/html-spec_3.html#SEC15">
|
||||||
|
Comments</a>
|
||||||
|
|
||||||
|
<P>See especially, the footnote:
|
||||||
|
<a
|
||||||
|
href="http://www.w3.org/hypertext/WWW/MarkUp/html-spec/html-spec_foot.html#FOOT10"
|
||||||
|
>(10)</a>
|
||||||
|
|
||||||
|
<P>
|
||||||
|
Co-vary the LYK_MINIMAL and LYK_HISTORICAL command key toggles (use the
|
||||||
|
'k'eymap command to see their key bindings) to establish Valid, Minimal
|
||||||
|
or Historical comment parsing, and toggle trace mode on (Ctrl-T), to see
|
||||||
|
how comment parsing is affected.
|
||||||
|
|
||||||
|
<P>Case 01 through Case 14 should appear
|
||||||
|
as short separate paragraphs with the case numbers aligned vertically.
|
||||||
|
Some noise characters may appear to the right as a byproduct of code present
|
||||||
|
for error recovery, but there should be no noise before each case number.
|
||||||
|
|
||||||
|
<P> Case <!-- trivial --> | 01 | Trivial
|
||||||
|
<P> Case <!-- extra hyphens and spaces -- -- -- > | 02 | Hyphens and Spaces
|
||||||
|
<P> Case <!-- extra < < < --> | 03 | Extra LT --> --> -->
|
||||||
|
<P> Case <!-- balanced < < < > > > --> | 04 | Balanced
|
||||||
|
<P> Case <!-- extra > -- --> > still in comment --> | 05 | Extra GT
|
||||||
|
<P> Case <!-- stuff between -- and > -- still in comment --> | 06 | Stuff Inside
|
||||||
|
<P> Case <!-- Extra <!-- -- Second Comment --> | 07 | Extra Open --> -->
|
||||||
|
<P> Case <!-- New Line between --
|
||||||
|
-- Second Comment --
|
||||||
|
> | 08 | New line
|
||||||
|
<P> Case <!---> degenerate --> | 09 | Degenerate <P> Case <!----> | 10 | Empty
|
||||||
|
<P> Case <!-- perverse <!--> | 11 | Perverse --> --> -->
|
||||||
|
<P> Case <!-- Comment -- -- and a half > this is still in comment -- > | 12 | Multiple Comments --> --> -->
|
||||||
|
<P> Case <!> | 13 | Zero Comments
|
||||||
|
<P> Case <!-- < >
|
||||||
|
< > Still in comment --> | 14 | Last
|
||||||
|
|
||||||
|
<P><<STRONG>Tests completed!</STRONG>>
|
||||||
|
</body>
|
||||||
|
</html>
|
49
tests/lynx-dump/data/TestComment.html.exp
Normal file
49
tests/lynx-dump/data/TestComment.html.exp
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
Test of the HTML/SGML comment syntax, as given in the W3 HTML Spec:
|
||||||
|
[1]Comments
|
||||||
|
|
||||||
|
See especially, the footnote: [2](10)
|
||||||
|
|
||||||
|
Co-vary the LYK_MINIMAL and LYK_HISTORICAL command key toggles (use the
|
||||||
|
'k'eymap command to see their key bindings) to establish Valid, Minimal
|
||||||
|
or Historical comment parsing, and toggle trace mode on (Ctrl-T), to
|
||||||
|
see how comment parsing is affected.
|
||||||
|
|
||||||
|
Case 01 through Case 14 should appear as short separate paragraphs with
|
||||||
|
the case numbers aligned vertically. Some noise characters may appear
|
||||||
|
to the right as a byproduct of code present for error recovery, but
|
||||||
|
there should be no noise before each case number.
|
||||||
|
|
||||||
|
Case | 01 | Trivial
|
||||||
|
|
||||||
|
Case | 02 | Hyphens and Spaces
|
||||||
|
|
||||||
|
Case | 03 | Extra LT --> --> -->
|
||||||
|
|
||||||
|
Case | 04 | Balanced
|
||||||
|
|
||||||
|
Case > still in comment --> | 05 | Extra GT
|
||||||
|
|
||||||
|
Case | 06 | Stuff Inside
|
||||||
|
|
||||||
|
Case | 07 | Extra Open --> -->
|
||||||
|
|
||||||
|
Case | 08 | New line
|
||||||
|
|
||||||
|
Case | 09 | Degenerate
|
||||||
|
|
||||||
|
Case | 10 | Empty
|
||||||
|
|
||||||
|
Case | 11 | Perverse --> --> -->
|
||||||
|
|
||||||
|
Case | 12 | Multiple Comments --> --> -->
|
||||||
|
|
||||||
|
Case | 13 | Zero Comments
|
||||||
|
|
||||||
|
Case | 14 | Last
|
||||||
|
|
||||||
|
<Tests completed!>
|
||||||
|
|
||||||
|
References
|
||||||
|
|
||||||
|
1. http://www.w3.org/hypertext/WWW/MarkUp/html-spec/html-spec_3.html#SEC15
|
||||||
|
2. http://www.w3.org/hypertext/WWW/MarkUp/html-spec/html-spec_foot.html#FOOT10
|
46
tests/lynx-dump/data/bad-html.html
Normal file
46
tests/lynx-dump/data/bad-html.html
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
<!DOCTYPE html PUBLIC "-//IETF//DTD HTML 3.0//EN">
|
||||||
|
<HTML>
|
||||||
|
<HEAD>
|
||||||
|
<TITLE>Examples of "Bad HTML" per Lynx</TITLE>
|
||||||
|
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">
|
||||||
|
<LINK REV="made" HREF="mailto:lynx-dev@nongnu.org">
|
||||||
|
<LINK REV="owner" HREF="http://mail.gnu.org/mailman/listinfo/lynx-dev/">
|
||||||
|
</HEAD>
|
||||||
|
|
||||||
|
<BODY>
|
||||||
|
<h2>Unterminated TEXTAREA</h2>
|
||||||
|
<form action="http://localhost/cgi-bin/bogus-parms" method="get">
|
||||||
|
<textarea name="50cols" cols="50" rows=3>
|
||||||
|
This is not empty.
|
||||||
|
</textarea>
|
||||||
|
<br>
|
||||||
|
<textarea name="50percent" cols="50%" rows=3>
|
||||||
|
This seems to have a button.
|
||||||
|
<button>Button 1</button>
|
||||||
|
</textarea>
|
||||||
|
<hr>
|
||||||
|
<input type="submit" value="Submit this form">
|
||||||
|
<br>
|
||||||
|
<input type="reset" value="Reset this form">
|
||||||
|
</form>
|
||||||
|
|
||||||
|
<h2>Unterminated SELECT</h2>
|
||||||
|
<select>
|
||||||
|
<option>first option</option>
|
||||||
|
<option>second option</option>
|
||||||
|
<option>third option</option>
|
||||||
|
</notselect>
|
||||||
|
<br>
|
||||||
|
<select>
|
||||||
|
<option>first option</option>
|
||||||
|
<option>second option</option>
|
||||||
|
<option>third option</option>
|
||||||
|
</select>
|
||||||
|
|
||||||
|
<h2>OPTION not within SELECT</h2>
|
||||||
|
<option>third option</option>
|
||||||
|
|
||||||
|
<h2>TEXTAREA ending without starting</h2>
|
||||||
|
</textarea>
|
||||||
|
|
||||||
|
</BODY>
|
24
tests/lynx-dump/data/bad-html.html.exp
Normal file
24
tests/lynx-dump/data/bad-html.html.exp
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
Unterminated TEXTAREA
|
||||||
|
|
||||||
|
|
||||||
|
This is not empty.________________________________
|
||||||
|
__________________________________________________
|
||||||
|
__________________________________________________
|
||||||
|
|
||||||
|
This seems to have a button.______________________
|
||||||
|
<button>Button 1</button>_________________________
|
||||||
|
__________________________________________________
|
||||||
|
__________________________________________________________________
|
||||||
|
|
||||||
|
Submit this form
|
||||||
|
Reset this form
|
||||||
|
|
||||||
|
Unterminated SELECT
|
||||||
|
|
||||||
|
[first option_][first option_]
|
||||||
|
|
||||||
|
OPTION not within SELECT
|
||||||
|
|
||||||
|
TEXTAREA ending without starting
|
||||||
|
|
||||||
|
third option
|
63
tests/lynx-dump/data/c1.html
Normal file
63
tests/lynx-dump/data/c1.html
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
<!DOCTYPE html PUBLIC "-//IETF//DTD HTML 3.0//EN">
|
||||||
|
<HTML>
|
||||||
|
<HEAD>
|
||||||
|
<TITLE>Test of invalid NCRs 128-159</TITLE>
|
||||||
|
</HEAD>
|
||||||
|
<BODY><H2>Test of invalid NCRs 128-159</H2>
|
||||||
|
<P>
|
||||||
|
Authoring tools on MS Windows, in particular MS FrontPage ("WYSIWYG" HTML editor),
|
||||||
|
generate invalid <DFN>Numerical Character References</DFN> for characters
|
||||||
|
commonly found in positions 128...159 (0x80...0x9f) in Windows fonts. Although
|
||||||
|
these are valid codepoints for <em>windows-1252</em> (and other
|
||||||
|
windows-xxxx) charsets, valid NCRs always refer to the document character set
|
||||||
|
in the SGML sense, not to the character encoding scheme (or charset). For HTML,
|
||||||
|
the SGML document character set is fixed, it is always a subset of Unicode
|
||||||
|
(or ISO 10646). In Unicode and its iso-8859-1 subset, values 128...159 are
|
||||||
|
C1 control characters, they must not appear in HTML. Valid NCRs for the
|
||||||
|
intended characters use Unicode values greater than 256.
|
||||||
|
<p>
|
||||||
|
Lynx tries to interpret some of the invalid codes, by assuming that they are
|
||||||
|
windows-1252 codepoints.
|
||||||
|
<PRE>
|
||||||
|
|
||||||
|
You may want to press '\' to view the source of this test.
|
||||||
|
|
||||||
|
<em>Code invalid NCR <!-- --> <tab id=c>valid NCR, description</em>
|
||||||
|
<em> normal in ALT <a id=table></a> </em>
|
||||||
|
|
||||||
|
0x80 € <IMG SRC=X ALT="€"> <tab to=c>€ #EURO SIGN
|
||||||
|
0x81  <IMG SRC=X ALT=""> <!----> #NOT USED
|
||||||
|
0x82 ‚ <IMG SRC=X ALT="‚"> <tab to=c>‚ #SINGLE LOW-9 QUOTATION MARK
|
||||||
|
0x83 ƒ <IMG SRC=X ALT="ƒ"> <tab to=c>ƒ #LATIN SMALL LETTER F WITH HOOK
|
||||||
|
0x84 „ <IMG SRC=X ALT="„"> <tab to=c>„ #DOUBLE LOW-9 QUOTATION MARK
|
||||||
|
0x85 … <IMG SRC=X ALT="…"> <tab to=c>… #HORIZONTAL ELLIPSIS
|
||||||
|
0x86 † <IMG SRC=X ALT="†"> <tab to=c>† #DAGGER
|
||||||
|
0x87 ‡ <IMG SRC=X ALT="‡"> <tab to=c>‡ #DOUBLE DAGGER
|
||||||
|
0x88 ˆ <IMG SRC=X ALT="ˆ"> <tab to=c>ˆ #MODIFIER LETTER CIRCUMFLEX ACCENT
|
||||||
|
0x89 ‰ <IMG SRC=X ALT="‰"> <tab to=c>‰ #PER MILLE SIGN
|
||||||
|
0x8a Š <IMG SRC=X ALT="Š"> <tab to=c>Š #LATIN CAPITAL LETTER S WITH CARON
|
||||||
|
0x8b ‹ <IMG SRC=X ALT="‹"> <tab to=c>‹ #SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||||
|
0x8c Œ <IMG SRC=X ALT="Œ"> <tab to=c>Œ #LATIN CAPITAL LIGATURE OE
|
||||||
|
0x8d  <IMG SRC=X ALT=""> <!----> #NOT USED
|
||||||
|
0x8e Ž <IMG SRC=X ALT="Ž"> <!--Ž--> #NOT USED
|
||||||
|
0x8f  <IMG SRC=X ALT=""> <!----> #NOT USED
|
||||||
|
0x90  <IMG SRC=X ALT=""> <!----> #NOT USED
|
||||||
|
0x91 ‘ <IMG SRC=X ALT="‘"> <tab to=c>‘ #LEFT SINGLE QUOTATION MARK
|
||||||
|
0x92 ’ <IMG SRC=X ALT="’"> <tab to=c>’ #RIGHT SINGLE QUOTATION MARK
|
||||||
|
0x93 “ <IMG SRC=X ALT="“"> <tab to=c>“ #LEFT DOUBLE QUOTATION MARK
|
||||||
|
0x94 ” <IMG SRC=X ALT="”"> <tab to=c>” #RIGHT DOUBLE QUOTATION MARK
|
||||||
|
0x95 • <IMG SRC=X ALT="•"> <tab to=c>• #BULLET
|
||||||
|
0x96 – <IMG SRC=X ALT="–"> <tab to=c>– #EN DASH
|
||||||
|
0x97 — <IMG SRC=X ALT="—"> <tab to=c>— #EM DASH
|
||||||
|
0x98 ˜ <IMG SRC=X ALT="˜"> <tab to=c>˜ #SMALL TILDE
|
||||||
|
0x99 ™ <IMG SRC=X ALT="™"> <tab to=c>™ #TRADE MARK SIGN
|
||||||
|
0x9a š <IMG SRC=X ALT="š"> <tab to=c>š #LATIN SMALL LETTER S WITH CARON
|
||||||
|
0x9b › <IMG SRC=X ALT="›"> <tab to=c>› #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||||
|
0x9c œ <IMG SRC=X ALT="œ"> <tab to=c>œ #LATIN SMALL LIGATURE OE
|
||||||
|
0x9d  <IMG SRC=X ALT=""> <!----> #NOT USED
|
||||||
|
0x9e ž <IMG SRC=X ALT="ž"> <!--ž--> #NOT USED
|
||||||
|
0x9f Ÿ <IMG SRC=X ALT="Ÿ"> <tab to=c>Ÿ #LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||||
|
|
||||||
|
</PRE>
|
||||||
|
</BODY>
|
||||||
|
</HTML>
|
57
tests/lynx-dump/data/c1.html.exp
Normal file
57
tests/lynx-dump/data/c1.html.exp
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
|
||||||
|
|
||||||
|
Test of invalid NCRs 128-159
|
||||||
|
|
||||||
|
Authoring tools on MS Windows, in particular MS FrontPage ("WYSIWYG"
|
||||||
|
HTML editor), generate invalid Numerical Character References for
|
||||||
|
characters commonly found in positions 128...159 (0x80...0x9f) in
|
||||||
|
Windows fonts. Although these are valid codepoints for windows-1252
|
||||||
|
(and other windows-xxxx) charsets, valid NCRs always refer to the
|
||||||
|
document character set in the SGML sense, not to the character encoding
|
||||||
|
scheme (or charset). For HTML, the SGML document character set is
|
||||||
|
fixed, it is always a subset of Unicode (or ISO 10646). In Unicode and
|
||||||
|
its iso-8859-1 subset, values 128...159 are C1 control characters, they
|
||||||
|
must not appear in HTML. Valid NCRs for the intended characters use
|
||||||
|
Unicode values greater than 256.
|
||||||
|
|
||||||
|
Lynx tries to interpret some of the invalid codes, by assuming that
|
||||||
|
they are windows-1252 codepoints.
|
||||||
|
|
||||||
|
|
||||||
|
You may want to press '\' to view the source of this test.
|
||||||
|
|
||||||
|
Code invalid NCR valid NCR, description
|
||||||
|
normal in ALT
|
||||||
|
|
||||||
|
0x80 € € #EURO SIGN
|
||||||
|
0x81 <20> #NOT USED
|
||||||
|
0x82 ‚ ‚ #SINGLE LOW-9 QUOTATION MARK
|
||||||
|
0x83 ƒ ƒ #LATIN SMALL LETTER F WITH HOOK
|
||||||
|
0x84 „ „ #DOUBLE LOW-9 QUOTATION MARK
|
||||||
|
0x85
… … #HORIZONTAL ELLIPSIS
|
||||||
|
0x86 † † #DAGGER
|
||||||
|
0x87 ‡ ‡ #DOUBLE DAGGER
|
||||||
|
0x88 ˆ ˆ #MODIFIER LETTER CIRCUMFLEX ACCENT
|
||||||
|
0x89 ‰ ‰ #PER MILLE SIGN
|
||||||
|
0x8a Š Š #LATIN CAPITAL LETTER S WITH CARON
|
||||||
|
0x8b ‹ ‹ #SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||||
|
0x8c Œ Œ #LATIN CAPITAL LIGATURE OE
|
||||||
|
0x8d <20> #NOT USED
|
||||||
|
0x8e Ž #NOT USED
|
||||||
|
0x8f <20> #NOT USED
|
||||||
|
0x90 <20> #NOT USED
|
||||||
|
0x91 ‘ ‘ #LEFT SINGLE QUOTATION MARK
|
||||||
|
0x92 ’ ’ #RIGHT SINGLE QUOTATION MARK
|
||||||
|
0x93 “ “ #LEFT DOUBLE QUOTATION MARK
|
||||||
|
0x94 ” ” #RIGHT DOUBLE QUOTATION MARK
|
||||||
|
0x95 • • #BULLET
|
||||||
|
0x96 – – #EN DASH
|
||||||
|
0x97 — — #EM DASH
|
||||||
|
0x98 ˜ ˜ #SMALL TILDE
|
||||||
|
0x99 ™ ™ #TRADE MARK SIGN
|
||||||
|
0x9a š š #LATIN SMALL LETTER S WITH CARON
|
||||||
|
0x9b › › #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||||
|
0x9c œ œ #LATIN SMALL LIGATURE OE
|
||||||
|
0x9d <20> #NOT USED
|
||||||
|
0x9e ž #NOT USED
|
||||||
|
0x9f Ÿ Ÿ #LATIN CAPITAL LETTER Y WITH DIAERESIS
|
14
tests/lynx-dump/data/circle.html
Normal file
14
tests/lynx-dump/data/circle.html
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN">
|
||||||
|
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta name="generator" content=
|
||||||
|
"HTML Tidy for Linux (vers 25 March 2009), see www.w3.org">
|
||||||
|
|
||||||
|
<title>Test ImageMap - circle</title>
|
||||||
|
</head>
|
||||||
|
|
||||||
|
<body>
|
||||||
|
<p>CIRCLE</p>
|
||||||
|
</body>
|
||||||
|
</html>
|
1
tests/lynx-dump/data/circle.html.exp
Normal file
1
tests/lynx-dump/data/circle.html.exp
Normal file
@ -0,0 +1 @@
|
|||||||
|
CIRCLE
|
178
tests/lynx-dump/data/cp-1252.html
Normal file
178
tests/lynx-dump/data/cp-1252.html
Normal file
@ -0,0 +1,178 @@
|
|||||||
|
<!DOCTYPE html PUBLIC "-//IETF//DTD HTML 3.0//EN">
|
||||||
|
<HTML>
|
||||||
|
<HEAD>
|
||||||
|
<TITLE>Character table for cp-1252</TITLE>
|
||||||
|
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=cp-1252">
|
||||||
|
<LINK REV="made" HREF="mailto:lynx-dev@nongnu.org">
|
||||||
|
<LINK REV="owner" HREF="http://mail.gnu.org/mailman/listinfo/lynx-dev/">
|
||||||
|
<LINK REL="sibling" HREF="iso-8859-1.html" TITLE="iso-8859-1 test">
|
||||||
|
<LINK REL="sibling" HREF="ALT88592.html" TITLE="iso-8859-2 ALT test">
|
||||||
|
</HEAD>
|
||||||
|
|
||||||
|
<BODY>
|
||||||
|
|
||||||
|
<H1 ALIGN=center>cp-1252 table</H1>
|
||||||
|
|
||||||
|
<PRE>
|
||||||
|
Description Code Entity name
|
||||||
|
=================================== ============ ==============
|
||||||
|
quotation mark &#34; --> " &quot; --> "
|
||||||
|
ampersand &#38; --> & &amp; --> &
|
||||||
|
less-than sign &#60; --> < &lt; --> <
|
||||||
|
greater-than sign &#62; --> > &gt; --> >
|
||||||
|
|
||||||
|
Description Char Code Entity name
|
||||||
|
=================================== ==== ============ ==============
|
||||||
|
euro sign € &128; --> €
|
||||||
|
single low-9 quotation mark ‚ &130; --> ‚
|
||||||
|
latin small letter f with hook ƒ &131; --> ƒ
|
||||||
|
double low-9 quotation mark „ &132; --> „
|
||||||
|
horizontal ellipsis … &133; --> …
|
||||||
|
dagger † &134; --> †
|
||||||
|
double dagger ‡ &135; --> ‡
|
||||||
|
modifier letter circumflex accent ˆ &136; --> ˆ
|
||||||
|
per mille sign ‰ &137; --> ‰
|
||||||
|
latin capital letter s with caron Š &138; --> Š
|
||||||
|
single left-pointing angle quote mark ‹ &139; --> ‹
|
||||||
|
latin capital ligature oe Œ &140; --> Œ
|
||||||
|
latin capital letter z with caron Ž &142; --> Ž
|
||||||
|
|
||||||
|
left single quotation mark ‘ &145; --> ‘
|
||||||
|
right single quotation mark ’ &146; --> ’
|
||||||
|
left double quotation mark “ &147; --> “
|
||||||
|
right double quotation mark ” &148; --> ”
|
||||||
|
bullet • &149; --> •
|
||||||
|
en dash – &150; --> –
|
||||||
|
em dash — &151; --> —
|
||||||
|
small tilde ˜ &152; --> ˜
|
||||||
|
trade mark sign ™ &153; --> ™
|
||||||
|
latin small letter s with caron š &154; --> š
|
||||||
|
single right-pointing angle quote mark › &155; --> ›
|
||||||
|
latin small ligature oe œ &156; --> œ
|
||||||
|
latin small letter z with caron ž &158; --> ž
|
||||||
|
latin capital letter y with diaeresis Ÿ &159; --> Ÿ
|
||||||
|
|
||||||
|
non-breaking space &#160; -->   &nbsp; -->
|
||||||
|
inverted exclamation ¡ &#161; --> ¡ &iexcl; --> ¡
|
||||||
|
cent sign ¢ &#162; --> ¢ &cent; --> ¢
|
||||||
|
pound sterling £ &#163; --> £ &pound; --> £
|
||||||
|
general currency sign ¤ &#164; --> ¤ &curren; --> ¤
|
||||||
|
yen sign ¥ &#165; --> ¥ &yen; --> ¥
|
||||||
|
broken vertical bar ¦ &#166; --> ¦ &brvbar; --> ¦
|
||||||
|
section sign § &#167; --> § &sect; --> §
|
||||||
|
umlaut (dieresis) ¨ &#168; --> ¨ &uml; --> ¨
|
||||||
|
copyright © &#169; --> © &copy; --> ©
|
||||||
|
feminine ordinal ª &#170; --> ª &ordf; --> ª
|
||||||
|
left angle quote, guillemotleft « &#171; --> « &laquo; --> «
|
||||||
|
not sign ¬ &#172; --> ¬ &not; --> ¬
|
||||||
|
soft hyphen &#173; --> ­ &shy; --> ­
|
||||||
|
registered trademark ® &#174; --> ® &reg; --> ®
|
||||||
|
macron accent ¯ &#175; --> ¯ &macr; --> ¯
|
||||||
|
|
||||||
|
degree sign ° &#176; --> ° &deg; --> °
|
||||||
|
plus or minus ± &#177; --> ± &plusmn; --> ±
|
||||||
|
superscript two ² &#178; --> ² &sup2; --> ²
|
||||||
|
superscript three ³ &#179; --> ³ &sup3; --> ³
|
||||||
|
acute accent ´ &#180; --> ´ &acute; --> ´
|
||||||
|
micro sign µ &#181; --> µ &micro; --> µ
|
||||||
|
paragraph sign ¶ &#182; --> ¶ &para; --> ¶
|
||||||
|
middle dot · &#183; --> · &middot; --> ·
|
||||||
|
cedilla ¸ &#184; --> ¸ &cedil; --> ¸
|
||||||
|
superscript one ¹ &#185; --> ¹ &sup1; --> ¹
|
||||||
|
masculine ordinal º &#186; --> º &ordm; --> º
|
||||||
|
right angle quote, guillemotright » &#187; --> » &raquo; --> »
|
||||||
|
vulgar fraction one-quarter ¼ &#188; --> ¼ &frac14; --> ¼
|
||||||
|
vulgar fraction one-half ½ &#189; --> ½ &frac12; --> ½
|
||||||
|
vulgar fraction three-fourths ¾ &#190; --> ¾ &frac34; --> ¾
|
||||||
|
inverted question mark ¿ &#191; --> ¿ &iquest; --> ¿
|
||||||
|
|
||||||
|
latin capital letter a with grave À &#192; --> À &Agrave; --> À
|
||||||
|
latin capital letter a with acute Á &#193; --> Á &Aacute; --> Á
|
||||||
|
latin capital letter a with circumflex  &#194; -->  &Acirc; --> Â
|
||||||
|
latin capital letter a with tilde à &#195; --> à &Atilde; --> Ã
|
||||||
|
latin capital letter a with diaeresis Ä &#196; --> Ä &Auml; --> Ä
|
||||||
|
latin capital letter a with ring above Å &#197; --> Å &Aring; --> Å
|
||||||
|
latin capital letter ae Æ &#198; --> Æ &AElig; --> Æ
|
||||||
|
latin capital letter c with cedilla Ç &#199; --> Ç &Ccedil; --> Ç
|
||||||
|
latin capital letter e with grave È &#200; --> È &Egrave; --> È
|
||||||
|
latin capital letter e with acute É &#201; --> É &Eacute; --> É
|
||||||
|
latin capital letter e with circumflex Ê &#202; --> Ê &Ecirc; --> Ê
|
||||||
|
latin capital letter e with diaeresis Ë &#203; --> Ë &Euml; --> Ë
|
||||||
|
latin capital letter i with grave Ì &#204; --> Ì &Igrave; --> Ì
|
||||||
|
latin capital letter i with acute Í &#205; --> Í &Iacute; --> Í
|
||||||
|
latin capital letter i with circumflex Î &#206; --> Î &Icirc; --> Î
|
||||||
|
latin capital letter i with diaeresis Ï &#207; --> Ï &Iuml; --> Ï
|
||||||
|
|
||||||
|
latin capital letter eth Ð &#208; --> Ð &ETH; --> Ð
|
||||||
|
latin capital letter n with tilde Ñ &#209; --> Ñ &Ntilde; --> Ñ
|
||||||
|
latin capital letter o with grave Ò &#210; --> Ò &Ograve; --> Ò
|
||||||
|
latin capital letter o with acute Ó &#211; --> Ó &Oacute; --> Ó
|
||||||
|
latin capital letter o with circumflex Ô &#212; --> Ô &Ocirc; --> Ô
|
||||||
|
latin capital letter o with tilde Õ &#213; --> Õ &Otilde; --> Õ
|
||||||
|
latin capital letter o with diaeresis Ö &#214; --> Ö &Ouml; --> Ö
|
||||||
|
multiplication sign × &#215; --> × &times; --> ×
|
||||||
|
latin capital letter o with stroke Ø &#216; --> Ø &Oslash; --> Ø
|
||||||
|
latin capital letter u with grave Ù &#217; --> Ù &Ugrave; --> Ù
|
||||||
|
latin capital letter u with acute Ú &#218; --> Ú &Uacute; --> Ú
|
||||||
|
latin capital letter u with circumflex Û &#219; --> Û &Ucirc; --> Û
|
||||||
|
latin capital letter u with diaeresis Ü &#220; --> Ü &Uuml; --> Ü
|
||||||
|
latin capital letter y with acute Ý &#221; --> Ý &Yacute; --> Ý
|
||||||
|
latin capital letter thorn Þ &#222; --> Þ &THORN; --> Þ
|
||||||
|
latin small letter sharp s ß &#223; --> ß &szlig; --> ß
|
||||||
|
|
||||||
|
latin small letter a with grave à &#224; --> à &agrave; --> à
|
||||||
|
latin small letter a with acute á &#225; --> á &aacute; --> á
|
||||||
|
latin small letter a with circumflex â &#226; --> â &acirc; --> â
|
||||||
|
latin small letter a with tilde ã &#227; --> ã &atilde; --> ã
|
||||||
|
latin small letter a with diaeresis ä &#228; --> ä &auml; --> ä
|
||||||
|
latin small letter a with ring above å &#229; --> å &aring; --> å
|
||||||
|
latin small letter ae æ &#230; --> æ &aelig; --> æ
|
||||||
|
latin small letter c with cedilla ç &#231; --> ç &ccedil; --> ç
|
||||||
|
latin small letter e with grave è &#232; --> è &egrave; --> è
|
||||||
|
latin small letter e with acute é &#233; --> é &eacute; --> é
|
||||||
|
latin small letter e with circumflex ê &#234; --> ê &ecirc; --> ê
|
||||||
|
latin small letter e with diaeresis ë &#235; --> ë &euml; --> ë
|
||||||
|
latin small letter i with grave ì &#236; --> ì &igrave; --> ì
|
||||||
|
latin small letter i with acute í &#237; --> í &iacute; --> í
|
||||||
|
latin small letter i with circumflex î &#238; --> î &icirc; --> î
|
||||||
|
latin small letter i with diaeresis ï &#239; --> ï &iuml; --> ï
|
||||||
|
|
||||||
|
latin small letter eth ð &#240; --> ð &eth; --> ð
|
||||||
|
latin small letter n with tilde ñ &#241; --> ñ &ntilde; --> ñ
|
||||||
|
latin small letter o with grave ò &#242; --> ò &ograve; --> ò
|
||||||
|
latin small letter o with acute ó &#243; --> ó &oacute; --> ó
|
||||||
|
latin small letter o with circumflex ô &#244; --> ô &ocirc; --> ô
|
||||||
|
latin small letter o with tilde õ &#245; --> õ &otilde; --> õ
|
||||||
|
latin small letter o with diaeresis ö &#246; --> ö &ouml; --> ö
|
||||||
|
division sign ÷ &#247; --> ÷ &divide; --> ÷
|
||||||
|
latin small letter o with stroke ø &#248; --> ø &oslash; --> ø
|
||||||
|
latin small letter u with grave ù &#249; --> ù &ugrave; --> ù
|
||||||
|
latin small letter u with acute ú &#250; --> ú &uacute; --> ú
|
||||||
|
latin small letter u with circumflex û &#251; --> û &ucirc; --> û
|
||||||
|
latin small letter u with diaeresis ü &#252; --> ü &uuml; --> ü
|
||||||
|
latin small letter y with acute ý &#253; --> ý &yacute; --> ý
|
||||||
|
latin small letter thorn þ &#254; --> þ &thorn; --> þ
|
||||||
|
latin small letter y with diaeresis {ÿ} {&#255;}-->{ÿ} {&yuml;} -->{ÿ}
|
||||||
|
|
||||||
|
Some other characters of interest Char Code Entity name
|
||||||
|
=================================== ==== ============ ==============
|
||||||
|
capital AE diphthong (ligature) N/A &#198; --> Æ &AElig; --> Æ
|
||||||
|
small ae diphthong (ligature) N/A &#230; --> æ &aelig; --> æ
|
||||||
|
capital OE ligature N/A {&#338;}-->{Œ} {&OElig;} -->{Œ}
|
||||||
|
small oe ligature N/A {&#339;}-->{œ} {&oelig;} -->{œ}
|
||||||
|
copyright N/A &#169; --> © &copy; --> ©
|
||||||
|
registered trademark N/A &#174; --> ® &reg; --> ®
|
||||||
|
trademark sign N/A &#8482;--> ™ &trade; --> ™
|
||||||
|
em space N/A [&#8195;]->[ ] [&emsp;] -->[ ]
|
||||||
|
en space N/A [&#8194;]->[ ] [&ensp;] -->[ ]
|
||||||
|
1/3-em space N/A [&#8196;]->[ ] [&emsp13;] -->[ ]
|
||||||
|
1/4-em space N/A [&#8197;]->[ ] [&emsp14;] -->[ ]
|
||||||
|
thin space N/A [&#8201;]->[ ] [&thinsp;]-->[ ]
|
||||||
|
hair space N/A [&#8202;]->[ ] [&hairsp;]-->[ ]
|
||||||
|
em dash N/A [&#8212;]->[—] [&mdash;] -->[—]
|
||||||
|
en dash N/A [&#8211;]->[–] [&ndash;] -->[–]
|
||||||
|
|
||||||
|
</PRE>
|
||||||
|
|
||||||
|
</BODY>
|
||||||
|
</HTML>
|
166
tests/lynx-dump/data/cp-1252.html.exp
Normal file
166
tests/lynx-dump/data/cp-1252.html.exp
Normal file
@ -0,0 +1,166 @@
|
|||||||
|
#[1]iso-8859-1 test [2]iso-8859-2 ALT test
|
||||||
|
|
||||||
|
cp-1252 table
|
||||||
|
|
||||||
|
Description Code Entity name
|
||||||
|
=================================== ============ ==============
|
||||||
|
quotation mark " --> " " --> "
|
||||||
|
ampersand & --> & & --> &
|
||||||
|
less-than sign < --> < < --> <
|
||||||
|
greater-than sign > --> > > --> >
|
||||||
|
|
||||||
|
Description Char Code Entity name
|
||||||
|
=================================== ==== ============ ==============
|
||||||
|
euro sign € &128; -->
|
||||||
|
single low-9 quotation mark ‚ &130; -->
|
||||||
|
latin small letter f with hook ƒ &131; -->
|
||||||
|
double low-9 quotation mark „ &132; -->
|
||||||
|
horizontal ellipsis … &133; -->
|
||||||
|
dagger † &134; -->
|
||||||
|
double dagger ‡ &135; -->
|
||||||
|
modifier letter circumflex accent ˆ &136; -->
|
||||||
|
per mille sign ‰ &137; -->
|
||||||
|
latin capital letter s with caron Š &138; -->
|
||||||
|
single left-pointing angle quote mark ‹ &139; -->
|
||||||
|
latin capital ligature oe Œ &140; -->
|
||||||
|
latin capital letter z with caron Ž &142; -->
|
||||||
|
|
||||||
|
left single quotation mark ‘ &145; -->
|
||||||
|
right single quotation mark ’ &146; -->
|
||||||
|
left double quotation mark “ &147; -->
|
||||||
|
right double quotation mark ” &148; -->
|
||||||
|
bullet • &149; -->
|
||||||
|
en dash – &150; -->
|
||||||
|
em dash — &151; -->
|
||||||
|
small tilde ˜ &152; -->
|
||||||
|
trade mark sign ™ &153; -->
|
||||||
|
latin small letter s with caron š &154; -->
|
||||||
|
single right-pointing angle quote mark › &155; -->
|
||||||
|
latin small ligature oe œ &156; -->
|
||||||
|
latin small letter z with caron ž &158; -->
|
||||||
|
latin capital letter y with diaeresis Ÿ &159; -->
|
||||||
|
|
||||||
|
non-breaking space   --> -->
|
||||||
|
inverted exclamation ¡ ¡ --> ¡ ¡ --> ¡
|
||||||
|
cent sign ¢ ¢ --> ¢ ¢ --> ¢
|
||||||
|
pound sterling £ £ --> £ £ --> £
|
||||||
|
general currency sign ¤ ¤ --> ¤ ¤ --> ¤
|
||||||
|
yen sign ¥ ¥ --> ¥ ¥ --> ¥
|
||||||
|
broken vertical bar ¦ ¦ --> ¦ ¦ --> ¦
|
||||||
|
section sign § § --> § § --> §
|
||||||
|
umlaut (dieresis) ¨ ¨ --> ¨ ¨ --> ¨
|
||||||
|
copyright © © --> © © --> ©
|
||||||
|
feminine ordinal ª ª --> ª ª --> ª
|
||||||
|
left angle quote, guillemotleft « « --> « « --> «
|
||||||
|
not sign ¬ ¬ --> ¬ ¬ --> ¬
|
||||||
|
soft hyphen ­ --> ­ -->
|
||||||
|
registered trademark ® ® --> ® ® --> ®
|
||||||
|
macron accent ¯ ¯ --> ¯ ¯ --> ¯
|
||||||
|
|
||||||
|
degree sign ° ° --> ° ° --> °
|
||||||
|
plus or minus ± ± --> ± ± --> ±
|
||||||
|
superscript two ² ² --> ² ² --> ²
|
||||||
|
superscript three ³ ³ --> ³ ³ --> ³
|
||||||
|
acute accent ´ ´ --> ´ ´ --> ´
|
||||||
|
micro sign µ µ --> µ µ --> µ
|
||||||
|
paragraph sign ¶ ¶ --> ¶ ¶ --> ¶
|
||||||
|
middle dot · · --> · · --> ·
|
||||||
|
cedilla ¸ ¸ --> ¸ ¸ --> ¸
|
||||||
|
superscript one ¹ ¹ --> ¹ ¹ --> ¹
|
||||||
|
masculine ordinal º º --> º º --> º
|
||||||
|
right angle quote, guillemotright » » --> » » --> »
|
||||||
|
vulgar fraction one-quarter ¼ ¼ --> ¼ ¼ --> ¼
|
||||||
|
vulgar fraction one-half ½ ½ --> ½ ½ --> ½
|
||||||
|
vulgar fraction three-fourths ¾ ¾ --> ¾ ¾ --> ¾
|
||||||
|
inverted question mark ¿ ¿ --> ¿ ¿ --> ¿
|
||||||
|
|
||||||
|
latin capital letter a with grave À À --> À À --> À
|
||||||
|
latin capital letter a with acute Á Á --> Á Á --> Á
|
||||||
|
latin capital letter a with circumflex   -->   --> Â
|
||||||
|
latin capital letter a with tilde à à --> à à --> Ã
|
||||||
|
latin capital letter a with diaeresis Ä Ä --> Ä Ä --> Ä
|
||||||
|
latin capital letter a with ring above Å Å --> Å Å --> Å
|
||||||
|
latin capital letter ae Æ Æ --> Æ Æ --> Æ
|
||||||
|
latin capital letter c with cedilla Ç Ç --> Ç Ç --> Ç
|
||||||
|
latin capital letter e with grave È È --> È È --> È
|
||||||
|
latin capital letter e with acute É É --> É É --> É
|
||||||
|
latin capital letter e with circumflex Ê Ê --> Ê Ê --> Ê
|
||||||
|
latin capital letter e with diaeresis Ë Ë --> Ë Ë --> Ë
|
||||||
|
latin capital letter i with grave Ì Ì --> Ì Ì --> Ì
|
||||||
|
latin capital letter i with acute Í Í --> Í Í --> Í
|
||||||
|
latin capital letter i with circumflex Î Î --> Î Î --> Î
|
||||||
|
latin capital letter i with diaeresis Ï Ï --> Ï Ï --> Ï
|
||||||
|
|
||||||
|
latin capital letter eth Ð Ð --> Ð Ð --> Ð
|
||||||
|
latin capital letter n with tilde Ñ Ñ --> Ñ Ñ --> Ñ
|
||||||
|
latin capital letter o with grave Ò Ò --> Ò Ò --> Ò
|
||||||
|
latin capital letter o with acute Ó Ó --> Ó Ó --> Ó
|
||||||
|
latin capital letter o with circumflex Ô Ô --> Ô Ô --> Ô
|
||||||
|
latin capital letter o with tilde Õ Õ --> Õ Õ --> Õ
|
||||||
|
latin capital letter o with diaeresis Ö Ö --> Ö Ö --> Ö
|
||||||
|
multiplication sign × × --> × × --> ×
|
||||||
|
latin capital letter o with stroke Ø Ø --> Ø Ø --> Ø
|
||||||
|
latin capital letter u with grave Ù Ù --> Ù Ù --> Ù
|
||||||
|
latin capital letter u with acute Ú Ú --> Ú Ú --> Ú
|
||||||
|
latin capital letter u with circumflex Û Û --> Û Û --> Û
|
||||||
|
latin capital letter u with diaeresis Ü Ü --> Ü Ü --> Ü
|
||||||
|
latin capital letter y with acute Ý Ý --> Ý Ý --> Ý
|
||||||
|
latin capital letter thorn Þ Þ --> Þ Þ --> Þ
|
||||||
|
latin small letter sharp s ß ß --> ß ß --> ß
|
||||||
|
|
||||||
|
latin small letter a with grave à à --> à à --> à
|
||||||
|
latin small letter a with acute á á --> á á --> á
|
||||||
|
latin small letter a with circumflex â â --> â â --> â
|
||||||
|
latin small letter a with tilde ã ã --> ã ã --> ã
|
||||||
|
latin small letter a with diaeresis ä ä --> ä ä --> ä
|
||||||
|
latin small letter a with ring above å å --> å å --> å
|
||||||
|
latin small letter ae æ æ --> æ æ --> æ
|
||||||
|
latin small letter c with cedilla ç ç --> ç ç --> ç
|
||||||
|
latin small letter e with grave è è --> è è --> è
|
||||||
|
latin small letter e with acute é é --> é é --> é
|
||||||
|
latin small letter e with circumflex ê ê --> ê ê --> ê
|
||||||
|
latin small letter e with diaeresis ë ë --> ë ë --> ë
|
||||||
|
latin small letter i with grave ì ì --> ì ì --> ì
|
||||||
|
latin small letter i with acute í í --> í í --> í
|
||||||
|
latin small letter i with circumflex î î --> î î --> î
|
||||||
|
latin small letter i with diaeresis ï ï --> ï ï --> ï
|
||||||
|
|
||||||
|
latin small letter eth ð ð --> ð ð --> ð
|
||||||
|
latin small letter n with tilde ñ ñ --> ñ ñ --> ñ
|
||||||
|
latin small letter o with grave ò ò --> ò ò --> ò
|
||||||
|
latin small letter o with acute ó ó --> ó ó --> ó
|
||||||
|
latin small letter o with circumflex ô ô --> ô ô --> ô
|
||||||
|
latin small letter o with tilde õ õ --> õ õ --> õ
|
||||||
|
latin small letter o with diaeresis ö ö --> ö ö --> ö
|
||||||
|
division sign ÷ ÷ --> ÷ ÷ --> ÷
|
||||||
|
latin small letter o with stroke ø ø --> ø ø --> ø
|
||||||
|
latin small letter u with grave ù ù --> ù ù --> ù
|
||||||
|
latin small letter u with acute ú ú --> ú ú --> ú
|
||||||
|
latin small letter u with circumflex û û --> û û --> û
|
||||||
|
latin small letter u with diaeresis ü ü --> ü ü --> ü
|
||||||
|
latin small letter y with acute ý ý --> ý ý --> ý
|
||||||
|
latin small letter thorn þ þ --> þ þ --> þ
|
||||||
|
latin small letter y with diaeresis {ÿ} {ÿ}-->{ÿ} {ÿ} -->{ÿ}
|
||||||
|
|
||||||
|
Some other characters of interest Char Code Entity name
|
||||||
|
=================================== ==== ============ ==============
|
||||||
|
capital AE diphthong (ligature) N/A Æ --> Æ Æ --> Æ
|
||||||
|
small ae diphthong (ligature) N/A æ --> æ æ --> æ
|
||||||
|
capital OE ligature N/A {Œ}-->{Œ} {Œ} -->{Œ}
|
||||||
|
small oe ligature N/A {œ}-->{œ} {œ} -->{œ}
|
||||||
|
copyright N/A © --> © © --> ©
|
||||||
|
registered trademark N/A ® --> ® ® --> ®
|
||||||
|
trademark sign N/A ™--> ™ ™ --> ™
|
||||||
|
em space N/A [ ]->[ ] [ ] -->[ ]
|
||||||
|
en space N/A [ ]->[ ] [ ] -->[ ]
|
||||||
|
1/3-em space N/A [ ]->[ ] [ ] -->[ ]
|
||||||
|
1/4-em space N/A [ ]->[ ] [ ] -->[ ]
|
||||||
|
thin space N/A [ ]->[ ] [ ]-->[ ]
|
||||||
|
hair space N/A [ ]->[ ] [ ]-->[ ]
|
||||||
|
em dash N/A [—]->[—] [—] -->[—]
|
||||||
|
en dash N/A [–]->[–] [–] -->[–]
|
||||||
|
|
||||||
|
References
|
||||||
|
|
||||||
|
1.
|
||||||
|
2.
|
183
tests/lynx-dump/data/cp-1252a.html
Normal file
183
tests/lynx-dump/data/cp-1252a.html
Normal file
@ -0,0 +1,183 @@
|
|||||||
|
<!DOCTYPE html PUBLIC "-//IETF//DTD HTML 3.0//EN">
|
||||||
|
<HTML>
|
||||||
|
<HEAD>
|
||||||
|
<TITLE>Character table for cp-1252</TITLE>
|
||||||
|
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=cp-1252">
|
||||||
|
<LINK REV="made" HREF="mailto:lynx-dev@nongnu.org">
|
||||||
|
<LINK REV="owner" HREF="http://mail.gnu.org/mailman/listinfo/lynx-dev/">
|
||||||
|
<LINK REL="sibling" HREF="iso-8859-1.html" TITLE="iso-8859-1 test">
|
||||||
|
<LINK REL="sibling" HREF="ALT88592.html" TITLE="iso-8859-2 ALT test">
|
||||||
|
</HEAD>
|
||||||
|
|
||||||
|
<BODY>
|
||||||
|
|
||||||
|
<H1 ALIGN=center>cp-1252 table</H1>
|
||||||
|
|
||||||
|
<PRE>
|
||||||
|
Description Code Entity name
|
||||||
|
=================================== ============ ==============
|
||||||
|
quotation mark &#34; --> " &quot; --> "
|
||||||
|
ampersand &#38; --> & &amp; --> &
|
||||||
|
less-than sign &#60; --> < &lt; --> <
|
||||||
|
greater-than sign &#62; --> > &gt; --> >
|
||||||
|
|
||||||
|
Description Char Code Entity name
|
||||||
|
=================================== ==== ============ ==============
|
||||||
|
euro sign € &128; --> €
|
||||||
|
undefined <20> &129; --> 
|
||||||
|
single low-9 quotation mark ‚ &130; --> ‚
|
||||||
|
latin small letter f with hook ƒ &131; --> ƒ
|
||||||
|
double low-9 quotation mark „ &132; --> „
|
||||||
|
horizontal ellipsis … &133; --> …
|
||||||
|
dagger † &134; --> †
|
||||||
|
double dagger ‡ &135; --> ‡
|
||||||
|
modifier letter circumflex accent ˆ &136; --> ˆ
|
||||||
|
per mille sign ‰ &137; --> ‰
|
||||||
|
latin capital letter s with caron Š &138; --> Š
|
||||||
|
single left-pointing angle quote mark ‹ &139; --> ‹
|
||||||
|
latin capital ligature oe Œ &140; --> Œ
|
||||||
|
undefined <20> &141; --> 
|
||||||
|
latin capital letter z with caron Ž &142; --> Ž
|
||||||
|
undefined <20> &143; --> 
|
||||||
|
|
||||||
|
undefined <20> &144; --> 
|
||||||
|
left single quotation mark ‘ &145; --> ‘
|
||||||
|
right single quotation mark ’ &146; --> ’
|
||||||
|
left double quotation mark “ &147; --> “
|
||||||
|
right double quotation mark ” &148; --> ”
|
||||||
|
bullet • &149; --> •
|
||||||
|
en dash – &150; --> –
|
||||||
|
em dash — &151; --> —
|
||||||
|
small tilde ˜ &152; --> ˜
|
||||||
|
trade mark sign ™ &153; --> ™
|
||||||
|
latin small letter s with caron š &154; --> š
|
||||||
|
single right-pointing angle quote mark › &155; --> ›
|
||||||
|
latin small ligature oe œ &156; --> œ
|
||||||
|
undefined <20> &157; --> 
|
||||||
|
latin small letter z with caron ž &158; --> ž
|
||||||
|
latin capital letter y with diaeresis Ÿ &159; --> Ÿ
|
||||||
|
|
||||||
|
non-breaking space &#160; -->   &nbsp; -->
|
||||||
|
inverted exclamation ¡ &#161; --> ¡ &iexcl; --> ¡
|
||||||
|
cent sign ¢ &#162; --> ¢ &cent; --> ¢
|
||||||
|
pound sterling £ &#163; --> £ &pound; --> £
|
||||||
|
general currency sign ¤ &#164; --> ¤ &curren; --> ¤
|
||||||
|
yen sign ¥ &#165; --> ¥ &yen; --> ¥
|
||||||
|
broken vertical bar ¦ &#166; --> ¦ &brvbar; --> ¦
|
||||||
|
section sign § &#167; --> § &sect; --> §
|
||||||
|
umlaut (dieresis) ¨ &#168; --> ¨ &uml; --> ¨
|
||||||
|
copyright © &#169; --> © &copy; --> ©
|
||||||
|
feminine ordinal ª &#170; --> ª &ordf; --> ª
|
||||||
|
left angle quote, guillemotleft « &#171; --> « &laquo; --> «
|
||||||
|
not sign ¬ &#172; --> ¬ &not; --> ¬
|
||||||
|
soft hyphen &#173; --> ­ &shy; --> ­
|
||||||
|
registered trademark ® &#174; --> ® &reg; --> ®
|
||||||
|
macron accent ¯ &#175; --> ¯ &macr; --> ¯
|
||||||
|
|
||||||
|
degree sign ° &#176; --> ° &deg; --> °
|
||||||
|
plus or minus ± &#177; --> ± &plusmn; --> ±
|
||||||
|
superscript two ² &#178; --> ² &sup2; --> ²
|
||||||
|
superscript three ³ &#179; --> ³ &sup3; --> ³
|
||||||
|
acute accent ´ &#180; --> ´ &acute; --> ´
|
||||||
|
micro sign µ &#181; --> µ &micro; --> µ
|
||||||
|
paragraph sign ¶ &#182; --> ¶ &para; --> ¶
|
||||||
|
middle dot · &#183; --> · &middot; --> ·
|
||||||
|
cedilla ¸ &#184; --> ¸ &cedil; --> ¸
|
||||||
|
superscript one ¹ &#185; --> ¹ &sup1; --> ¹
|
||||||
|
masculine ordinal º &#186; --> º &ordm; --> º
|
||||||
|
right angle quote, guillemotright » &#187; --> » &raquo; --> »
|
||||||
|
vulgar fraction one-quarter ¼ &#188; --> ¼ &frac14; --> ¼
|
||||||
|
vulgar fraction one-half ½ &#189; --> ½ &frac12; --> ½
|
||||||
|
vulgar fraction three-fourths ¾ &#190; --> ¾ &frac34; --> ¾
|
||||||
|
inverted question mark ¿ &#191; --> ¿ &iquest; --> ¿
|
||||||
|
|
||||||
|
latin capital letter a with grave À &#192; --> À &Agrave; --> À
|
||||||
|
latin capital letter a with acute Á &#193; --> Á &Aacute; --> Á
|
||||||
|
latin capital letter a with circumflex  &#194; -->  &Acirc; --> Â
|
||||||
|
latin capital letter a with tilde à &#195; --> à &Atilde; --> Ã
|
||||||
|
latin capital letter a with diaeresis Ä &#196; --> Ä &Auml; --> Ä
|
||||||
|
latin capital letter a with ring above Å &#197; --> Å &Aring; --> Å
|
||||||
|
latin capital letter ae Æ &#198; --> Æ &AElig; --> Æ
|
||||||
|
latin capital letter c with cedilla Ç &#199; --> Ç &Ccedil; --> Ç
|
||||||
|
latin capital letter e with grave È &#200; --> È &Egrave; --> È
|
||||||
|
latin capital letter e with acute É &#201; --> É &Eacute; --> É
|
||||||
|
latin capital letter e with circumflex Ê &#202; --> Ê &Ecirc; --> Ê
|
||||||
|
latin capital letter e with diaeresis Ë &#203; --> Ë &Euml; --> Ë
|
||||||
|
latin capital letter i with grave Ì &#204; --> Ì &Igrave; --> Ì
|
||||||
|
latin capital letter i with acute Í &#205; --> Í &Iacute; --> Í
|
||||||
|
latin capital letter i with circumflex Î &#206; --> Î &Icirc; --> Î
|
||||||
|
latin capital letter i with diaeresis Ï &#207; --> Ï &Iuml; --> Ï
|
||||||
|
|
||||||
|
latin capital letter eth Ð &#208; --> Ð &ETH; --> Ð
|
||||||
|
latin capital letter n with tilde Ñ &#209; --> Ñ &Ntilde; --> Ñ
|
||||||
|
latin capital letter o with grave Ò &#210; --> Ò &Ograve; --> Ò
|
||||||
|
latin capital letter o with acute Ó &#211; --> Ó &Oacute; --> Ó
|
||||||
|
latin capital letter o with circumflex Ô &#212; --> Ô &Ocirc; --> Ô
|
||||||
|
latin capital letter o with tilde Õ &#213; --> Õ &Otilde; --> Õ
|
||||||
|
latin capital letter o with diaeresis Ö &#214; --> Ö &Ouml; --> Ö
|
||||||
|
multiplication sign × &#215; --> × &times; --> ×
|
||||||
|
latin capital letter o with stroke Ø &#216; --> Ø &Oslash; --> Ø
|
||||||
|
latin capital letter u with grave Ù &#217; --> Ù &Ugrave; --> Ù
|
||||||
|
latin capital letter u with acute Ú &#218; --> Ú &Uacute; --> Ú
|
||||||
|
latin capital letter u with circumflex Û &#219; --> Û &Ucirc; --> Û
|
||||||
|
latin capital letter u with diaeresis Ü &#220; --> Ü &Uuml; --> Ü
|
||||||
|
latin capital letter y with acute Ý &#221; --> Ý &Yacute; --> Ý
|
||||||
|
latin capital letter thorn Þ &#222; --> Þ &THORN; --> Þ
|
||||||
|
latin small letter sharp s ß &#223; --> ß &szlig; --> ß
|
||||||
|
|
||||||
|
latin small letter a with grave à &#224; --> à &agrave; --> à
|
||||||
|
latin small letter a with acute á &#225; --> á &aacute; --> á
|
||||||
|
latin small letter a with circumflex â &#226; --> â &acirc; --> â
|
||||||
|
latin small letter a with tilde ã &#227; --> ã &atilde; --> ã
|
||||||
|
latin small letter a with diaeresis ä &#228; --> ä &auml; --> ä
|
||||||
|
latin small letter a with ring above å &#229; --> å &aring; --> å
|
||||||
|
latin small letter ae æ &#230; --> æ &aelig; --> æ
|
||||||
|
latin small letter c with cedilla ç &#231; --> ç &ccedil; --> ç
|
||||||
|
latin small letter e with grave è &#232; --> è &egrave; --> è
|
||||||
|
latin small letter e with acute é &#233; --> é &eacute; --> é
|
||||||
|
latin small letter e with circumflex ê &#234; --> ê &ecirc; --> ê
|
||||||
|
latin small letter e with diaeresis ë &#235; --> ë &euml; --> ë
|
||||||
|
latin small letter i with grave ì &#236; --> ì &igrave; --> ì
|
||||||
|
latin small letter i with acute í &#237; --> í &iacute; --> í
|
||||||
|
latin small letter i with circumflex î &#238; --> î &icirc; --> î
|
||||||
|
latin small letter i with diaeresis ï &#239; --> ï &iuml; --> ï
|
||||||
|
|
||||||
|
latin small letter eth ð &#240; --> ð &eth; --> ð
|
||||||
|
latin small letter n with tilde ñ &#241; --> ñ &ntilde; --> ñ
|
||||||
|
latin small letter o with grave ò &#242; --> ò &ograve; --> ò
|
||||||
|
latin small letter o with acute ó &#243; --> ó &oacute; --> ó
|
||||||
|
latin small letter o with circumflex ô &#244; --> ô &ocirc; --> ô
|
||||||
|
latin small letter o with tilde õ &#245; --> õ &otilde; --> õ
|
||||||
|
latin small letter o with diaeresis ö &#246; --> ö &ouml; --> ö
|
||||||
|
division sign ÷ &#247; --> ÷ &divide; --> ÷
|
||||||
|
latin small letter o with stroke ø &#248; --> ø &oslash; --> ø
|
||||||
|
latin small letter u with grave ù &#249; --> ù &ugrave; --> ù
|
||||||
|
latin small letter u with acute ú &#250; --> ú &uacute; --> ú
|
||||||
|
latin small letter u with circumflex û &#251; --> û &ucirc; --> û
|
||||||
|
latin small letter u with diaeresis ü &#252; --> ü &uuml; --> ü
|
||||||
|
latin small letter y with acute ý &#253; --> ý &yacute; --> ý
|
||||||
|
latin small letter thorn þ &#254; --> þ &thorn; --> þ
|
||||||
|
latin small letter y with diaeresis {ÿ} {&#255;}-->{ÿ} {&yuml;} -->{ÿ}
|
||||||
|
|
||||||
|
Some other characters of interest Char Code Entity name
|
||||||
|
=================================== ==== ============ ==============
|
||||||
|
capital AE diphthong (ligature) N/A &#198; --> Æ &AElig; --> Æ
|
||||||
|
small ae diphthong (ligature) N/A &#230; --> æ &aelig; --> æ
|
||||||
|
capital OE ligature N/A {&#338;}-->{Œ} {&OElig;} -->{Œ}
|
||||||
|
small oe ligature N/A {&#339;}-->{œ} {&oelig;} -->{œ}
|
||||||
|
copyright N/A &#169; --> © &copy; --> ©
|
||||||
|
registered trademark N/A &#174; --> ® &reg; --> ®
|
||||||
|
trademark sign N/A &#8482;--> ™ &trade; --> ™
|
||||||
|
em space N/A [&#8195;]->[ ] [&emsp;] -->[ ]
|
||||||
|
en space N/A [&#8194;]->[ ] [&ensp;] -->[ ]
|
||||||
|
1/3-em space N/A [&#8196;]->[ ] [&emsp13;] -->[ ]
|
||||||
|
1/4-em space N/A [&#8197;]->[ ] [&emsp14;] -->[ ]
|
||||||
|
thin space N/A [&#8201;]->[ ] [&thinsp;]-->[ ]
|
||||||
|
hair space N/A [&#8202;]->[ ] [&hairsp;]-->[ ]
|
||||||
|
em dash N/A [&#8212;]->[—] [&mdash;] -->[—]
|
||||||
|
en dash N/A [&#8211;]->[–] [&ndash;] -->[–]
|
||||||
|
|
||||||
|
</PRE>
|
||||||
|
|
||||||
|
</BODY>
|
||||||
|
</HTML>
|
171
tests/lynx-dump/data/cp-1252a.html.exp
Normal file
171
tests/lynx-dump/data/cp-1252a.html.exp
Normal file
@ -0,0 +1,171 @@
|
|||||||
|
#[1]iso-8859-1 test [2]iso-8859-2 ALT test
|
||||||
|
|
||||||
|
cp-1252 table
|
||||||
|
|
||||||
|
Description Code Entity name
|
||||||
|
=================================== ============ ==============
|
||||||
|
quotation mark " --> " " --> "
|
||||||
|
ampersand & --> & & --> &
|
||||||
|
less-than sign < --> < < --> <
|
||||||
|
greater-than sign > --> > > --> >
|
||||||
|
|
||||||
|
Description Char Code Entity name
|
||||||
|
=================================== ==== ============ ==============
|
||||||
|
euro sign € &128; -->
|
||||||
|
undefined <20> &129; -->
|
||||||
|
single low-9 quotation mark ‚ &130; -->
|
||||||
|
latin small letter f with hook ƒ &131; -->
|
||||||
|
double low-9 quotation mark „ &132; -->
|
||||||
|
horizontal ellipsis … &133; -->
|
||||||
|
dagger † &134; -->
|
||||||
|
double dagger ‡ &135; -->
|
||||||
|
modifier letter circumflex accent ˆ &136; -->
|
||||||
|
per mille sign ‰ &137; -->
|
||||||
|
latin capital letter s with caron Š &138; -->
|
||||||
|
single left-pointing angle quote mark ‹ &139; -->
|
||||||
|
latin capital ligature oe Œ &140; -->
|
||||||
|
undefined <20> &141; -->
|
||||||
|
latin capital letter z with caron Ž &142; -->
|
||||||
|
undefined <20> &143; -->
|
||||||
|
|
||||||
|
undefined <20> &144; -->
|
||||||
|
left single quotation mark ‘ &145; -->
|
||||||
|
right single quotation mark ’ &146; -->
|
||||||
|
left double quotation mark “ &147; -->
|
||||||
|
right double quotation mark ” &148; -->
|
||||||
|
bullet • &149; -->
|
||||||
|
en dash – &150; -->
|
||||||
|
em dash — &151; -->
|
||||||
|
small tilde ˜ &152; -->
|
||||||
|
trade mark sign ™ &153; -->
|
||||||
|
latin small letter s with caron š &154; -->
|
||||||
|
single right-pointing angle quote mark › &155; -->
|
||||||
|
latin small ligature oe œ &156; -->
|
||||||
|
undefined <20> &157; -->
|
||||||
|
latin small letter z with caron ž &158; -->
|
||||||
|
latin capital letter y with diaeresis Ÿ &159; -->
|
||||||
|
|
||||||
|
non-breaking space   --> -->
|
||||||
|
inverted exclamation ¡ ¡ --> ¡ ¡ --> ¡
|
||||||
|
cent sign ¢ ¢ --> ¢ ¢ --> ¢
|
||||||
|
pound sterling £ £ --> £ £ --> £
|
||||||
|
general currency sign ¤ ¤ --> ¤ ¤ --> ¤
|
||||||
|
yen sign ¥ ¥ --> ¥ ¥ --> ¥
|
||||||
|
broken vertical bar ¦ ¦ --> ¦ ¦ --> ¦
|
||||||
|
section sign § § --> § § --> §
|
||||||
|
umlaut (dieresis) ¨ ¨ --> ¨ ¨ --> ¨
|
||||||
|
copyright © © --> © © --> ©
|
||||||
|
feminine ordinal ª ª --> ª ª --> ª
|
||||||
|
left angle quote, guillemotleft « « --> « « --> «
|
||||||
|
not sign ¬ ¬ --> ¬ ¬ --> ¬
|
||||||
|
soft hyphen ­ --> ­ -->
|
||||||
|
registered trademark ® ® --> ® ® --> ®
|
||||||
|
macron accent ¯ ¯ --> ¯ ¯ --> ¯
|
||||||
|
|
||||||
|
degree sign ° ° --> ° ° --> °
|
||||||
|
plus or minus ± ± --> ± ± --> ±
|
||||||
|
superscript two ² ² --> ² ² --> ²
|
||||||
|
superscript three ³ ³ --> ³ ³ --> ³
|
||||||
|
acute accent ´ ´ --> ´ ´ --> ´
|
||||||
|
micro sign µ µ --> µ µ --> µ
|
||||||
|
paragraph sign ¶ ¶ --> ¶ ¶ --> ¶
|
||||||
|
middle dot · · --> · · --> ·
|
||||||
|
cedilla ¸ ¸ --> ¸ ¸ --> ¸
|
||||||
|
superscript one ¹ ¹ --> ¹ ¹ --> ¹
|
||||||
|
masculine ordinal º º --> º º --> º
|
||||||
|
right angle quote, guillemotright » » --> » » --> »
|
||||||
|
vulgar fraction one-quarter ¼ ¼ --> ¼ ¼ --> ¼
|
||||||
|
vulgar fraction one-half ½ ½ --> ½ ½ --> ½
|
||||||
|
vulgar fraction three-fourths ¾ ¾ --> ¾ ¾ --> ¾
|
||||||
|
inverted question mark ¿ ¿ --> ¿ ¿ --> ¿
|
||||||
|
|
||||||
|
latin capital letter a with grave À À --> À À --> À
|
||||||
|
latin capital letter a with acute Á Á --> Á Á --> Á
|
||||||
|
latin capital letter a with circumflex   -->   --> Â
|
||||||
|
latin capital letter a with tilde à à --> à à --> Ã
|
||||||
|
latin capital letter a with diaeresis Ä Ä --> Ä Ä --> Ä
|
||||||
|
latin capital letter a with ring above Å Å --> Å Å --> Å
|
||||||
|
latin capital letter ae Æ Æ --> Æ Æ --> Æ
|
||||||
|
latin capital letter c with cedilla Ç Ç --> Ç Ç --> Ç
|
||||||
|
latin capital letter e with grave È È --> È È --> È
|
||||||
|
latin capital letter e with acute É É --> É É --> É
|
||||||
|
latin capital letter e with circumflex Ê Ê --> Ê Ê --> Ê
|
||||||
|
latin capital letter e with diaeresis Ë Ë --> Ë Ë --> Ë
|
||||||
|
latin capital letter i with grave Ì Ì --> Ì Ì --> Ì
|
||||||
|
latin capital letter i with acute Í Í --> Í Í --> Í
|
||||||
|
latin capital letter i with circumflex Î Î --> Î Î --> Î
|
||||||
|
latin capital letter i with diaeresis Ï Ï --> Ï Ï --> Ï
|
||||||
|
|
||||||
|
latin capital letter eth Ð Ð --> Ð Ð --> Ð
|
||||||
|
latin capital letter n with tilde Ñ Ñ --> Ñ Ñ --> Ñ
|
||||||
|
latin capital letter o with grave Ò Ò --> Ò Ò --> Ò
|
||||||
|
latin capital letter o with acute Ó Ó --> Ó Ó --> Ó
|
||||||
|
latin capital letter o with circumflex Ô Ô --> Ô Ô --> Ô
|
||||||
|
latin capital letter o with tilde Õ Õ --> Õ Õ --> Õ
|
||||||
|
latin capital letter o with diaeresis Ö Ö --> Ö Ö --> Ö
|
||||||
|
multiplication sign × × --> × × --> ×
|
||||||
|
latin capital letter o with stroke Ø Ø --> Ø Ø --> Ø
|
||||||
|
latin capital letter u with grave Ù Ù --> Ù Ù --> Ù
|
||||||
|
latin capital letter u with acute Ú Ú --> Ú Ú --> Ú
|
||||||
|
latin capital letter u with circumflex Û Û --> Û Û --> Û
|
||||||
|
latin capital letter u with diaeresis Ü Ü --> Ü Ü --> Ü
|
||||||
|
latin capital letter y with acute Ý Ý --> Ý Ý --> Ý
|
||||||
|
latin capital letter thorn Þ Þ --> Þ Þ --> Þ
|
||||||
|
latin small letter sharp s ß ß --> ß ß --> ß
|
||||||
|
|
||||||
|
latin small letter a with grave à à --> à à --> à
|
||||||
|
latin small letter a with acute á á --> á á --> á
|
||||||
|
latin small letter a with circumflex â â --> â â --> â
|
||||||
|
latin small letter a with tilde ã ã --> ã ã --> ã
|
||||||
|
latin small letter a with diaeresis ä ä --> ä ä --> ä
|
||||||
|
latin small letter a with ring above å å --> å å --> å
|
||||||
|
latin small letter ae æ æ --> æ æ --> æ
|
||||||
|
latin small letter c with cedilla ç ç --> ç ç --> ç
|
||||||
|
latin small letter e with grave è è --> è è --> è
|
||||||
|
latin small letter e with acute é é --> é é --> é
|
||||||
|
latin small letter e with circumflex ê ê --> ê ê --> ê
|
||||||
|
latin small letter e with diaeresis ë ë --> ë ë --> ë
|
||||||
|
latin small letter i with grave ì ì --> ì ì --> ì
|
||||||
|
latin small letter i with acute í í --> í í --> í
|
||||||
|
latin small letter i with circumflex î î --> î î --> î
|
||||||
|
latin small letter i with diaeresis ï ï --> ï ï --> ï
|
||||||
|
|
||||||
|
latin small letter eth ð ð --> ð ð --> ð
|
||||||
|
latin small letter n with tilde ñ ñ --> ñ ñ --> ñ
|
||||||
|
latin small letter o with grave ò ò --> ò ò --> ò
|
||||||
|
latin small letter o with acute ó ó --> ó ó --> ó
|
||||||
|
latin small letter o with circumflex ô ô --> ô ô --> ô
|
||||||
|
latin small letter o with tilde õ õ --> õ õ --> õ
|
||||||
|
latin small letter o with diaeresis ö ö --> ö ö --> ö
|
||||||
|
division sign ÷ ÷ --> ÷ ÷ --> ÷
|
||||||
|
latin small letter o with stroke ø ø --> ø ø --> ø
|
||||||
|
latin small letter u with grave ù ù --> ù ù --> ù
|
||||||
|
latin small letter u with acute ú ú --> ú ú --> ú
|
||||||
|
latin small letter u with circumflex û û --> û û --> û
|
||||||
|
latin small letter u with diaeresis ü ü --> ü ü --> ü
|
||||||
|
latin small letter y with acute ý ý --> ý ý --> ý
|
||||||
|
latin small letter thorn þ þ --> þ þ --> þ
|
||||||
|
latin small letter y with diaeresis {ÿ} {ÿ}-->{ÿ} {ÿ} -->{ÿ}
|
||||||
|
|
||||||
|
Some other characters of interest Char Code Entity name
|
||||||
|
=================================== ==== ============ ==============
|
||||||
|
capital AE diphthong (ligature) N/A Æ --> Æ Æ --> Æ
|
||||||
|
small ae diphthong (ligature) N/A æ --> æ æ --> æ
|
||||||
|
capital OE ligature N/A {Œ}-->{Œ} {Œ} -->{Œ}
|
||||||
|
small oe ligature N/A {œ}-->{œ} {œ} -->{œ}
|
||||||
|
copyright N/A © --> © © --> ©
|
||||||
|
registered trademark N/A ® --> ® ® --> ®
|
||||||
|
trademark sign N/A ™--> ™ ™ --> ™
|
||||||
|
em space N/A [ ]->[ ] [ ] -->[ ]
|
||||||
|
en space N/A [ ]->[ ] [ ] -->[ ]
|
||||||
|
1/3-em space N/A [ ]->[ ] [ ] -->[ ]
|
||||||
|
1/4-em space N/A [ ]->[ ] [ ] -->[ ]
|
||||||
|
thin space N/A [ ]->[ ] [ ]-->[ ]
|
||||||
|
hair space N/A [ ]->[ ] [ ]-->[ ]
|
||||||
|
em dash N/A [—]->[—] [—] -->[—]
|
||||||
|
en dash N/A [–]->[–] [–] -->[–]
|
||||||
|
|
||||||
|
References
|
||||||
|
|
||||||
|
1.
|
||||||
|
2.
|
BIN
tests/lynx-dump/data/image.jpg
Normal file
BIN
tests/lynx-dump/data/image.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.3 KiB |
241
tests/lynx-dump/data/iso-8859-1.html
Normal file
241
tests/lynx-dump/data/iso-8859-1.html
Normal file
@ -0,0 +1,241 @@
|
|||||||
|
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
||||||
|
<!-- X-URL: http://www.ramsch.org/martin/uni/fmi-hp/iso8859-1.html -->
|
||||||
|
<!-- Date: Tue, 28 Dec 2004 20:24:09 GMT -->
|
||||||
|
<!-- Last-Modified: Mon, 15 May 2000 09:37:37 GMT -->
|
||||||
|
<HTML>
|
||||||
|
<HEAD>
|
||||||
|
<TITLE>Martin Ramsch - iso8859-1 table</TITLE>
|
||||||
|
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">
|
||||||
|
<BASE HREF="http://www.ramsch.org/martin/uni/fmi-hp/iso8859-1.html">
|
||||||
|
</HEAD>
|
||||||
|
|
||||||
|
<BODY>
|
||||||
|
|
||||||
|
<H1 ALIGN=center>iso8859-1 table</H1>
|
||||||
|
|
||||||
|
<PRE>
|
||||||
|
Description Code Entity name
|
||||||
|
=================================== ============ ==============
|
||||||
|
quotation mark &#34; --> " &quot; --> "
|
||||||
|
ampersand &#38; --> & &amp; --> &
|
||||||
|
less-than sign &#60; --> < &lt; --> <
|
||||||
|
greater-than sign &#62; --> > &gt; --> >
|
||||||
|
|
||||||
|
Description Char Code Entity name
|
||||||
|
=================================== ==== ============ ==============
|
||||||
|
non-breaking space &#160; -->   &nbsp; -->
|
||||||
|
inverted exclamation ¡ &#161; --> ¡ &iexcl; --> ¡
|
||||||
|
cent sign ¢ &#162; --> ¢ &cent; --> ¢
|
||||||
|
pound sterling £ &#163; --> £ &pound; --> £
|
||||||
|
general currency sign ¤ &#164; --> ¤ &curren; --> ¤
|
||||||
|
yen sign ¥ &#165; --> ¥ &yen; --> ¥
|
||||||
|
broken vertical bar ¦ &#166; --> ¦ &brvbar; --> ¦
|
||||||
|
Non-standard &brkbar; --> &brkbar;
|
||||||
|
section sign § &#167; --> § &sect; --> §
|
||||||
|
umlaut (dieresis) ¨ &#168; --> ¨ &uml; --> ¨
|
||||||
|
Non-standard &die; --> ¨
|
||||||
|
copyright © &#169; --> © &copy; --> ©
|
||||||
|
feminine ordinal ª &#170; --> ª &ordf; --> ª
|
||||||
|
left angle quote, guillemotleft « &#171; --> « &laquo; --> «
|
||||||
|
not sign ¬ &#172; --> ¬ &not; --> ¬
|
||||||
|
soft hyphen &#173; --> ­ &shy; --> ­
|
||||||
|
registered trademark ® &#174; --> ® &reg; --> ®
|
||||||
|
macron accent ¯ &#175; --> ¯ &macr; --> ¯
|
||||||
|
Non-standard &hibar; --> &hibar;
|
||||||
|
degree sign ° &#176; --> ° &deg; --> °
|
||||||
|
plus or minus ± &#177; --> ± &plusmn; --> ±
|
||||||
|
superscript two ² &#178; --> ² &sup2; --> ²
|
||||||
|
superscript three ³ &#179; --> ³ &sup3; --> ³
|
||||||
|
acute accent ´ &#180; --> ´ &acute; --> ´
|
||||||
|
micro sign µ &#181; --> µ &micro; --> µ
|
||||||
|
paragraph sign ¶ &#182; --> ¶ &para; --> ¶
|
||||||
|
middle dot · &#183; --> · &middot; --> ·
|
||||||
|
cedilla ¸ &#184; --> ¸ &cedil; --> ¸
|
||||||
|
superscript one ¹ &#185; --> ¹ &sup1; --> ¹
|
||||||
|
masculine ordinal º &#186; --> º &ordm; --> º
|
||||||
|
right angle quote, guillemotright » &#187; --> » &raquo; --> »
|
||||||
|
fraction one-fourth ¼ &#188; --> ¼ &frac14; --> ¼
|
||||||
|
fraction one-half ½ &#189; --> ½ &frac12; --> ½
|
||||||
|
fraction three-fourths ¾ &#190; --> ¾ &frac34; --> ¾
|
||||||
|
inverted question mark ¿ &#191; --> ¿ &iquest; --> ¿
|
||||||
|
capital A, grave accent À &#192; --> À &Agrave; --> À
|
||||||
|
capital A, acute accent Á &#193; --> Á &Aacute; --> Á
|
||||||
|
capital A, circumflex accent  &#194; -->  &Acirc; --> Â
|
||||||
|
capital A, tilde à &#195; --> à &Atilde; --> Ã
|
||||||
|
capital A, dieresis or umlaut mark Ä &#196; --> Ä &Auml; --> Ä
|
||||||
|
capital A, ring Å &#197; --> Å &Aring; --> Å
|
||||||
|
capital AE diphthong (ligature) Æ &#198; --> Æ &AElig; --> Æ
|
||||||
|
capital C, cedilla Ç &#199; --> Ç &Ccedil; --> Ç
|
||||||
|
capital E, grave accent È &#200; --> È &Egrave; --> È
|
||||||
|
capital E, acute accent É &#201; --> É &Eacute; --> É
|
||||||
|
capital E, circumflex accent Ê &#202; --> Ê &Ecirc; --> Ê
|
||||||
|
capital E, dieresis or umlaut mark Ë &#203; --> Ë &Euml; --> Ë
|
||||||
|
capital I, grave accent Ì &#204; --> Ì &Igrave; --> Ì
|
||||||
|
capital I, acute accent Í &#205; --> Í &Iacute; --> Í
|
||||||
|
capital I, circumflex accent Î &#206; --> Î &Icirc; --> Î
|
||||||
|
capital I, dieresis or umlaut mark Ï &#207; --> Ï &Iuml; --> Ï
|
||||||
|
capital Eth, Icelandic Ð &#208; --> Ð &ETH; --> Ð
|
||||||
|
Non-standard &Dstrok; --> Đ
|
||||||
|
capital N, tilde Ñ &#209; --> Ñ &Ntilde; --> Ñ
|
||||||
|
capital O, grave accent Ò &#210; --> Ò &Ograve; --> Ò
|
||||||
|
capital O, acute accent Ó &#211; --> Ó &Oacute; --> Ó
|
||||||
|
capital O, circumflex accent Ô &#212; --> Ô &Ocirc; --> Ô
|
||||||
|
capital O, tilde Õ &#213; --> Õ &Otilde; --> Õ
|
||||||
|
capital O, dieresis or umlaut mark Ö &#214; --> Ö &Ouml; --> Ö
|
||||||
|
multiply sign × &#215; --> × &times; --> ×
|
||||||
|
capital O, slash Ø &#216; --> Ø &Oslash; --> Ø
|
||||||
|
capital U, grave accent Ù &#217; --> Ù &Ugrave; --> Ù
|
||||||
|
capital U, acute accent Ú &#218; --> Ú &Uacute; --> Ú
|
||||||
|
capital U, circumflex accent Û &#219; --> Û &Ucirc; --> Û
|
||||||
|
capital U, dieresis or umlaut mark Ü &#220; --> Ü &Uuml; --> Ü
|
||||||
|
capital Y, acute accent Ý &#221; --> Ý &Yacute; --> Ý
|
||||||
|
capital THORN, Icelandic Þ &#222; --> Þ &THORN; --> Þ
|
||||||
|
small sharp s, German (sz ligature) ß &#223; --> ß &szlig; --> ß
|
||||||
|
small a, grave accent à &#224; --> à &agrave; --> à
|
||||||
|
small a, acute accent á &#225; --> á &aacute; --> á
|
||||||
|
small a, circumflex accent â &#226; --> â &acirc; --> â
|
||||||
|
small a, tilde ã &#227; --> ã &atilde; --> ã
|
||||||
|
small a, dieresis or umlaut mark ä &#228; --> ä &auml; --> ä
|
||||||
|
small a, ring å &#229; --> å &aring; --> å
|
||||||
|
small ae diphthong (ligature) æ &#230; --> æ &aelig; --> æ
|
||||||
|
small c, cedilla ç &#231; --> ç &ccedil; --> ç
|
||||||
|
small e, grave accent è &#232; --> è &egrave; --> è
|
||||||
|
small e, acute accent é &#233; --> é &eacute; --> é
|
||||||
|
small e, circumflex accent ê &#234; --> ê &ecirc; --> ê
|
||||||
|
small e, dieresis or umlaut mark ë &#235; --> ë &euml; --> ë
|
||||||
|
small i, grave accent ì &#236; --> ì &igrave; --> ì
|
||||||
|
small i, acute accent í &#237; --> í &iacute; --> í
|
||||||
|
small i, circumflex accent î &#238; --> î &icirc; --> î
|
||||||
|
small i, dieresis or umlaut mark ï &#239; --> ï &iuml; --> ï
|
||||||
|
small eth, Icelandic ð &#240; --> ð &eth; --> ð
|
||||||
|
small n, tilde ñ &#241; --> ñ &ntilde; --> ñ
|
||||||
|
small o, grave accent ò &#242; --> ò &ograve; --> ò
|
||||||
|
small o, acute accent ó &#243; --> ó &oacute; --> ó
|
||||||
|
small o, circumflex accent ô &#244; --> ô &ocirc; --> ô
|
||||||
|
small o, tilde õ &#245; --> õ &otilde; --> õ
|
||||||
|
small o, dieresis or umlaut mark ö &#246; --> ö &ouml; --> ö
|
||||||
|
division sign ÷ &#247; --> ÷ &divide; --> ÷
|
||||||
|
small o, slash ø &#248; --> ø &oslash; --> ø
|
||||||
|
small u, grave accent ù &#249; --> ù &ugrave; --> ù
|
||||||
|
small u, acute accent ú &#250; --> ú &uacute; --> ú
|
||||||
|
small u, circumflex accent û &#251; --> û &ucirc; --> û
|
||||||
|
small u, dieresis or umlaut mark ü &#252; --> ü &uuml; --> ü
|
||||||
|
small y, acute accent ý &#253; --> ý &yacute; --> ý
|
||||||
|
small thorn, Icelandic þ &#254; --> þ &thorn; --> þ
|
||||||
|
small y, dieresis or umlaut mark ÿ &#255; --> ÿ &yuml; --> ÿ
|
||||||
|
</PRE>
|
||||||
|
<!-- removed: second /PRE, a hack for HotJava 1.0 preBeta 1 -->
|
||||||
|
<HR>
|
||||||
|
|
||||||
|
<STRONG>How to read</STRONG> this table. The columns are
|
||||||
|
<DL COMPACT>
|
||||||
|
<DT>1st:<DD>textual <EM>description</EM> of the character
|
||||||
|
<DT>2nd:<DD>character inserted directly into the HTML page as <EM>one
|
||||||
|
byte</EM>
|
||||||
|
<DT>3rd:<DD>character written as <EM>numeric HTML entity</EM>, in the
|
||||||
|
format:<BR>"how it looks literally" <CODE>--></CODE>
|
||||||
|
"what your browser does with it"
|
||||||
|
<DT>4th:<DD>character written as <EM>symbolic HTML entity</EM>, in the
|
||||||
|
format:<BR>"how it looks literally" <CODE>--></CODE>
|
||||||
|
"what your browser does with it"
|
||||||
|
</DL>
|
||||||
|
|
||||||
|
So for example, if you see something like "<CODE>&divide; -->
|
||||||
|
&divide;</CODE>" in the 4th column, this means your browser
|
||||||
|
doesn't know about the entity name "divide" and just puts it
|
||||||
|
literally.
|
||||||
|
|
||||||
|
<P>
|
||||||
|
<STRONG>This table</STRONG> grew out of an overview of the "ISO
|
||||||
|
Latin-1 Character Set" overview related to the Hyper-G Text Format
|
||||||
|
(<A HREF="http://www.hyperwave.de/HTFdoc">HTF</A>).
|
||||||
|
|
||||||
|
The entity names <CODE>&brkbar;</CODE> and <CODE>&Dstrok;</CODE>
|
||||||
|
seem to be unique to HTF.
|
||||||
|
|
||||||
|
The entity name <CODE>&hibar;</CODE> has been supported by X Mosaic
|
||||||
|
but seems to be replaced with <CODE>&macr;</CODE>.
|
||||||
|
|
||||||
|
The entity names <CODE>&uml;</CODE> and <CODE>&die;</CODE> should
|
||||||
|
be equivalent.
|
||||||
|
|
||||||
|
<P><STRONG>The standards stuff:</STRONG>
|
||||||
|
The
|
||||||
|
<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/html-spec/">HTML 2.0 Standard</A>
|
||||||
|
includes a section on
|
||||||
|
<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/html-spec/html-spec_9.html#SEC99">Character Entity Sets</A>
|
||||||
|
and an overview on the
|
||||||
|
<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/html-spec/html-spec_13.html#SEC106">HTML Coded Character Set</A>
|
||||||
|
(The entity names are derived from <A HREF="http://www.ucc.ie/info/net/isolat1.html">ISO 8879</A>).
|
||||||
|
<BR>
|
||||||
|
|
||||||
|
Or have a look at the
|
||||||
|
<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/html3/latin1.html">Latin-1 Character Entities</A>
|
||||||
|
as listed in an draft for the
|
||||||
|
<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/html3/CoverPage.html">HTML 3.0 specification</A>.
|
||||||
|
<BR>
|
||||||
|
|
||||||
|
The
|
||||||
|
<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/HTMLPlus/htmlplus_59.html">Appendix II</A>
|
||||||
|
of CERN's
|
||||||
|
<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/HTMLPlus/htmlplus_1.html">HTML+ Discussion Document</A>
|
||||||
|
contains a
|
||||||
|
<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/HTMLPlus/htmlplus_table.ps">table</A>
|
||||||
|
(in PostScript format) of the proposed character entities for HTML+ and their
|
||||||
|
corresponding character codes for Unicode and the Adobe Latin-1 & Symbol
|
||||||
|
character sets.
|
||||||
|
<P>
|
||||||
|
|
||||||
|
<STRONG>Please note</STRONG> that there is nothing wrong with using
|
||||||
|
characters of ISO Latin-1 above 127: the normal transmission protocol
|
||||||
|
for the WWW,
|
||||||
|
<A HREF="http://www.w3.org/pub/WWW/Protocols/rfc1945/rfc1945">HTTP/1.0</A>,
|
||||||
|
uses the 8bit ISO latin-1 as default encoding.
|
||||||
|
(Thanks to Roman
|
||||||
|
Czyborra for pointing this out!)
|
||||||
|
<P>
|
||||||
|
|
||||||
|
<STRONG>Other information:</STRONG>
|
||||||
|
<UL>
|
||||||
|
|
||||||
|
<LI><STRONG>Kevin J. Brewer</STRONG> has done two very good pages on the subject:
|
||||||
|
<UL>
|
||||||
|
<LI><A HREF="http://www.bbsinc.com/iso8859.html">ASCII - ISO 8859-1 (Latin-1) with HTML 3.0 Entities Table</A> and
|
||||||
|
<LI><A HREF="http://www.bbsinc.com/iso8879.html">ISO 8879 Entities Gopher Menu</A>
|
||||||
|
</UL>
|
||||||
|
|
||||||
|
<LI>The excellent overview on the series of
|
||||||
|
<A HREF="http://czyborra.com/charsets/iso8859.html">ISO 8859
|
||||||
|
character sets</A> compiled by Roman Czyborra.
|
||||||
|
|
||||||
|
<LI>Also have a look on Alan Flavell's page of
|
||||||
|
<A HREF="http://ppewww.ph.gla.ac.uk/%7Eflavell/iso8859/iso8859-pointers.html">pointers
|
||||||
|
to information about ISO8859</A>. It's written very well!
|
||||||
|
|
||||||
|
<LI>Maybe also of interest to you is the
|
||||||
|
<A HREF="ftp://ftp.vlsivie.tuwien.ac.at/pub/8bit/FAQ-ISO-8859-1">ISO
|
||||||
|
8859-1 FAQ</A> by Michael Gschwind
|
||||||
|
(<A HREF="mailto:mike@vlsivie.tuwien.ac.at">mike@vlsivie.tuwien.ac.at</A>),
|
||||||
|
part of his page on
|
||||||
|
<A HREF="http://www.vlsivie.tuwien.ac.at/mike/i18n.html">Internationalization</A>.
|
||||||
|
|
||||||
|
<LI>For users of X11R5 on SunOS systems: the
|
||||||
|
<A HREF="Compose.txt">table over the compose combinations</A>
|
||||||
|
(also coded <A HREF="Compose.html">with entities</A> where possible).
|
||||||
|
It's taken from the MIT X sources in
|
||||||
|
<CODE>server/ddx/sun/Compose.list</CODE>.
|
||||||
|
|
||||||
|
<LI>Finally you could have a look at
|
||||||
|
<A HREF="ftp://ds.internic.net/rfc/rfc1345.txt">RFC 1345:
|
||||||
|
Character Mnemonics & Character Sets</A>
|
||||||
|
by K. Simonsen (06/11/92, 103 pages, approx. 240 kbyte).
|
||||||
|
|
||||||
|
</UL>
|
||||||
|
|
||||||
|
|
||||||
|
<HR>
|
||||||
|
|
||||||
|
<ADDRESS><A HREF="http://ramsch.home.pages.de/">Martin Ramsch</A>, 16.02.1994, 07.01.1996, 01.07.1996, 1998-10-09, 2000-05-15</ADDRESS>
|
||||||
|
|
||||||
|
</BODY>
|
||||||
|
</HTML>
|
198
tests/lynx-dump/data/iso-8859-1.html.exp
Normal file
198
tests/lynx-dump/data/iso-8859-1.html.exp
Normal file
@ -0,0 +1,198 @@
|
|||||||
|
iso8859-1 table
|
||||||
|
|
||||||
|
Description Code Entity name
|
||||||
|
=================================== ============ ==============
|
||||||
|
quotation mark " --> " " --> "
|
||||||
|
ampersand & --> & & --> &
|
||||||
|
less-than sign < --> < < --> <
|
||||||
|
greater-than sign > --> > > --> >
|
||||||
|
|
||||||
|
Description Char Code Entity name
|
||||||
|
=================================== ==== ============ ==============
|
||||||
|
non-breaking space   --> -->
|
||||||
|
inverted exclamation ¡ ¡ --> ¡ ¡ --> ¡
|
||||||
|
cent sign ¢ ¢ --> ¢ ¢ --> ¢
|
||||||
|
pound sterling £ £ --> £ £ --> £
|
||||||
|
general currency sign ¤ ¤ --> ¤ ¤ --> ¤
|
||||||
|
yen sign ¥ ¥ --> ¥ ¥ --> ¥
|
||||||
|
broken vertical bar ¦ ¦ --> ¦ ¦ --> ¦
|
||||||
|
Non-standard &brkbar; --> ¦
|
||||||
|
section sign § § --> § § --> §
|
||||||
|
umlaut (dieresis) ¨ ¨ --> ¨ ¨ --> ¨
|
||||||
|
Non-standard ¨ --> ¨
|
||||||
|
copyright © © --> © © --> ©
|
||||||
|
feminine ordinal ª ª --> ª ª --> ª
|
||||||
|
left angle quote, guillemotleft « « --> « « --> «
|
||||||
|
not sign ¬ ¬ --> ¬ ¬ --> ¬
|
||||||
|
soft hyphen ­ --> ­ -->
|
||||||
|
registered trademark ® ® --> ® ® --> ®
|
||||||
|
macron accent ¯ ¯ --> ¯ ¯ --> ¯
|
||||||
|
Non-standard &hibar; --> ¯
|
||||||
|
degree sign ° ° --> ° ° --> °
|
||||||
|
plus or minus ± ± --> ± ± --> ±
|
||||||
|
superscript two ² ² --> ² ² --> ²
|
||||||
|
superscript three ³ ³ --> ³ ³ --> ³
|
||||||
|
acute accent ´ ´ --> ´ ´ --> ´
|
||||||
|
micro sign µ µ --> µ µ --> µ
|
||||||
|
paragraph sign ¶ ¶ --> ¶ ¶ --> ¶
|
||||||
|
middle dot · · --> · · --> ·
|
||||||
|
cedilla ¸ ¸ --> ¸ ¸ --> ¸
|
||||||
|
superscript one ¹ ¹ --> ¹ ¹ --> ¹
|
||||||
|
masculine ordinal º º --> º º --> º
|
||||||
|
right angle quote, guillemotright » » --> » » --> »
|
||||||
|
fraction one-fourth ¼ ¼ --> ¼ ¼ --> ¼
|
||||||
|
fraction one-half ½ ½ --> ½ ½ --> ½
|
||||||
|
fraction three-fourths ¾ ¾ --> ¾ ¾ --> ¾
|
||||||
|
inverted question mark ¿ ¿ --> ¿ ¿ --> ¿
|
||||||
|
capital A, grave accent À À --> À À --> À
|
||||||
|
capital A, acute accent Á Á --> Á Á --> Á
|
||||||
|
capital A, circumflex accent   -->   --> Â
|
||||||
|
capital A, tilde à à --> à à --> Ã
|
||||||
|
capital A, dieresis or umlaut mark Ä Ä --> Ä Ä --> Ä
|
||||||
|
capital A, ring Å Å --> Å Å --> Å
|
||||||
|
capital AE diphthong (ligature) Æ Æ --> Æ Æ --> Æ
|
||||||
|
capital C, cedilla Ç Ç --> Ç Ç --> Ç
|
||||||
|
capital E, grave accent È È --> È È --> È
|
||||||
|
capital E, acute accent É É --> É É --> É
|
||||||
|
capital E, circumflex accent Ê Ê --> Ê Ê --> Ê
|
||||||
|
capital E, dieresis or umlaut mark Ë Ë --> Ë Ë --> Ë
|
||||||
|
capital I, grave accent Ì Ì --> Ì Ì --> Ì
|
||||||
|
capital I, acute accent Í Í --> Í Í --> Í
|
||||||
|
capital I, circumflex accent Î Î --> Î Î --> Î
|
||||||
|
capital I, dieresis or umlaut mark Ï Ï --> Ï Ï --> Ï
|
||||||
|
capital Eth, Icelandic Ð Ð --> Ð Ð --> Ð
|
||||||
|
Non-standard Đ --> Đ
|
||||||
|
capital N, tilde Ñ Ñ --> Ñ Ñ --> Ñ
|
||||||
|
capital O, grave accent Ò Ò --> Ò Ò --> Ò
|
||||||
|
capital O, acute accent Ó Ó --> Ó Ó --> Ó
|
||||||
|
capital O, circumflex accent Ô Ô --> Ô Ô --> Ô
|
||||||
|
capital O, tilde Õ Õ --> Õ Õ --> Õ
|
||||||
|
capital O, dieresis or umlaut mark Ö Ö --> Ö Ö --> Ö
|
||||||
|
multiply sign × × --> × × --> ×
|
||||||
|
capital O, slash Ø Ø --> Ø Ø --> Ø
|
||||||
|
capital U, grave accent Ù Ù --> Ù Ù --> Ù
|
||||||
|
capital U, acute accent Ú Ú --> Ú Ú --> Ú
|
||||||
|
capital U, circumflex accent Û Û --> Û Û --> Û
|
||||||
|
capital U, dieresis or umlaut mark Ü Ü --> Ü Ü --> Ü
|
||||||
|
capital Y, acute accent Ý Ý --> Ý Ý --> Ý
|
||||||
|
capital THORN, Icelandic Þ Þ --> Þ Þ --> Þ
|
||||||
|
small sharp s, German (sz ligature) ß ß --> ß ß --> ß
|
||||||
|
small a, grave accent à à --> à à --> à
|
||||||
|
small a, acute accent á á --> á á --> á
|
||||||
|
small a, circumflex accent â â --> â â --> â
|
||||||
|
small a, tilde ã ã --> ã ã --> ã
|
||||||
|
small a, dieresis or umlaut mark ä ä --> ä ä --> ä
|
||||||
|
small a, ring å å --> å å --> å
|
||||||
|
small ae diphthong (ligature) æ æ --> æ æ --> æ
|
||||||
|
small c, cedilla ç ç --> ç ç --> ç
|
||||||
|
small e, grave accent è è --> è è --> è
|
||||||
|
small e, acute accent é é --> é é --> é
|
||||||
|
small e, circumflex accent ê ê --> ê ê --> ê
|
||||||
|
small e, dieresis or umlaut mark ë ë --> ë ë --> ë
|
||||||
|
small i, grave accent ì ì --> ì ì --> ì
|
||||||
|
small i, acute accent í í --> í í --> í
|
||||||
|
small i, circumflex accent î î --> î î --> î
|
||||||
|
small i, dieresis or umlaut mark ï ï --> ï ï --> ï
|
||||||
|
small eth, Icelandic ð ð --> ð ð --> ð
|
||||||
|
small n, tilde ñ ñ --> ñ ñ --> ñ
|
||||||
|
small o, grave accent ò ò --> ò ò --> ò
|
||||||
|
small o, acute accent ó ó --> ó ó --> ó
|
||||||
|
small o, circumflex accent ô ô --> ô ô --> ô
|
||||||
|
small o, tilde õ õ --> õ õ --> õ
|
||||||
|
small o, dieresis or umlaut mark ö ö --> ö ö --> ö
|
||||||
|
division sign ÷ ÷ --> ÷ ÷ --> ÷
|
||||||
|
small o, slash ø ø --> ø ø --> ø
|
||||||
|
small u, grave accent ù ù --> ù ù --> ù
|
||||||
|
small u, acute accent ú ú --> ú ú --> ú
|
||||||
|
small u, circumflex accent û û --> û û --> û
|
||||||
|
small u, dieresis or umlaut mark ü ü --> ü ü --> ü
|
||||||
|
small y, acute accent ý ý --> ý ý --> ý
|
||||||
|
small thorn, Icelandic þ þ --> þ þ --> þ
|
||||||
|
small y, dieresis or umlaut mark ÿ ÿ --> ÿ ÿ --> ÿ
|
||||||
|
__________________________________________________________________
|
||||||
|
|
||||||
|
How to read this table. The columns are
|
||||||
|
1st:
|
||||||
|
textual description of the character
|
||||||
|
2nd:
|
||||||
|
character inserted directly into the HTML page as one byte
|
||||||
|
3rd:
|
||||||
|
character written as numeric HTML entity, in the format:
|
||||||
|
"how it looks literally" --> "what your browser does with it"
|
||||||
|
4th:
|
||||||
|
character written as symbolic HTML entity, in the format:
|
||||||
|
"how it looks literally" --> "what your browser does with it"
|
||||||
|
|
||||||
|
So for example, if you see something like "÷ --> ÷" in
|
||||||
|
the 4th column, this means your browser doesn't know about the entity
|
||||||
|
name "divide" and just puts it literally.
|
||||||
|
|
||||||
|
This table grew out of an overview of the "ISO Latin-1 Character Set"
|
||||||
|
overview related to the Hyper-G Text Format ([1]HTF). The entity names
|
||||||
|
&brkbar; and Đ seem to be unique to HTF. The entity name &hibar;
|
||||||
|
has been supported by X Mosaic but seems to be replaced with ¯.
|
||||||
|
The entity names ¨ and ¨ should be equivalent.
|
||||||
|
|
||||||
|
The standards stuff: The [2]HTML 2.0 Standard includes a section on
|
||||||
|
[3]Character Entity Sets and an overview on the [4]HTML Coded Character
|
||||||
|
Set (The entity names are derived from [5]ISO 8879).
|
||||||
|
Or have a look at the [6]Latin-1 Character Entities as listed in an
|
||||||
|
draft for the [7]HTML 3.0 specification.
|
||||||
|
The [8]Appendix II of CERN's [9]HTML+ Discussion Document contains a
|
||||||
|
[10]table (in PostScript format) of the proposed character entities for
|
||||||
|
HTML+ and their corresponding character codes for Unicode and the Adobe
|
||||||
|
Latin-1 & Symbol character sets.
|
||||||
|
|
||||||
|
Please note that there is nothing wrong with using characters of ISO
|
||||||
|
Latin-1 above 127: the normal transmission protocol for the WWW,
|
||||||
|
[11]HTTP/1.0, uses the 8bit ISO latin-1 as default encoding. (Thanks to
|
||||||
|
Roman Czyborra for pointing this out!)
|
||||||
|
|
||||||
|
Other information:
|
||||||
|
* Kevin J. Brewer has done two very good pages on the subject:
|
||||||
|
+ [12]ASCII - ISO 8859-1 (Latin-1) with HTML 3.0 Entities Table
|
||||||
|
and
|
||||||
|
+ [13]ISO 8879 Entities Gopher Menu
|
||||||
|
* The excellent overview on the series of [14]ISO 8859 character sets
|
||||||
|
compiled by Roman Czyborra.
|
||||||
|
* Also have a look on Alan Flavell's page of [15]pointers to
|
||||||
|
information about ISO8859. It's written very well!
|
||||||
|
* Maybe also of interest to you is the [16]ISO 8859-1 FAQ by Michael
|
||||||
|
Gschwind ([17]mike@vlsivie.tuwien.ac.at), part of his page on
|
||||||
|
[18]Internationalization.
|
||||||
|
* For users of X11R5 on SunOS systems: the [19]table over the compose
|
||||||
|
combinations (also coded [20]with entities where possible). It's
|
||||||
|
taken from the MIT X sources in server/ddx/sun/Compose.list.
|
||||||
|
* Finally you could have a look at [21]RFC 1345: Character Mnemonics
|
||||||
|
& Character Sets by K. Simonsen (06/11/92, 103 pages, approx. 240
|
||||||
|
kbyte).
|
||||||
|
__________________________________________________________________
|
||||||
|
|
||||||
|
|
||||||
|
[22]Martin Ramsch, 16.02.1994, 07.01.1996, 01.07.1996, 1998-10-09,
|
||||||
|
2000-05-15
|
||||||
|
|
||||||
|
References
|
||||||
|
|
||||||
|
1. http://www.hyperwave.de/HTFdoc
|
||||||
|
2. http://www.w3.org/hypertext/WWW/MarkUp/html-spec/
|
||||||
|
3. http://www.w3.org/hypertext/WWW/MarkUp/html-spec/html-spec_9.html#SEC99
|
||||||
|
4. http://www.w3.org/hypertext/WWW/MarkUp/html-spec/html-spec_13.html#SEC106
|
||||||
|
5. http://www.ucc.ie/info/net/isolat1.html
|
||||||
|
6. http://www.w3.org/hypertext/WWW/MarkUp/html3/latin1.html
|
||||||
|
7. http://www.w3.org/hypertext/WWW/MarkUp/html3/CoverPage.html
|
||||||
|
8. http://www.w3.org/hypertext/WWW/MarkUp/HTMLPlus/htmlplus_59.html
|
||||||
|
9. http://www.w3.org/hypertext/WWW/MarkUp/HTMLPlus/htmlplus_1.html
|
||||||
|
10. http://www.w3.org/hypertext/WWW/MarkUp/HTMLPlus/htmlplus_table.ps
|
||||||
|
11. http://www.w3.org/pub/WWW/Protocols/rfc1945/rfc1945
|
||||||
|
12. http://www.bbsinc.com/iso8859.html
|
||||||
|
13. http://www.bbsinc.com/iso8879.html
|
||||||
|
14. http://czyborra.com/charsets/iso8859.html
|
||||||
|
15. http://ppewww.ph.gla.ac.uk/~flavell/iso8859/iso8859-pointers.html
|
||||||
|
16. ftp://ftp.vlsivie.tuwien.ac.at/pub/8bit/FAQ-ISO-8859-1
|
||||||
|
17. mailto:mike@vlsivie.tuwien.ac.at
|
||||||
|
18. http://www.vlsivie.tuwien.ac.at/mike/i18n.html
|
||||||
|
19. http://www.ramsch.org/martin/uni/fmi-hp/Compose.txt
|
||||||
|
20. http://www.ramsch.org/martin/uni/fmi-hp/Compose.html
|
||||||
|
21. ftp://ds.internic.net/rfc/rfc1345.txt
|
||||||
|
22. http://ramsch.home.pages.de/
|
275
tests/lynx-dump/data/iso-8859-1a.html
Normal file
275
tests/lynx-dump/data/iso-8859-1a.html
Normal file
@ -0,0 +1,275 @@
|
|||||||
|
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
||||||
|
<!-- X-URL: http://www.ramsch.org/martin/uni/fmi-hp/iso8859-1.html -->
|
||||||
|
<!-- Date: Tue, 28 Dec 2004 20:24:09 GMT -->
|
||||||
|
<!-- Last-Modified: Mon, 15 May 2000 09:37:37 GMT -->
|
||||||
|
<HTML>
|
||||||
|
<HEAD>
|
||||||
|
<TITLE>Martin Ramsch - iso8859-1 table</TITLE>
|
||||||
|
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">
|
||||||
|
<BASE HREF="http://www.ramsch.org/martin/uni/fmi-hp/iso8859-1.html">
|
||||||
|
</HEAD>
|
||||||
|
|
||||||
|
<BODY>
|
||||||
|
|
||||||
|
<H1 ALIGN=center>iso8859-1 table, with cp-1252</H1>
|
||||||
|
|
||||||
|
<PRE>
|
||||||
|
Description Code Entity name
|
||||||
|
=================================== ============ ==============
|
||||||
|
quotation mark &#34; --> " &quot; --> "
|
||||||
|
ampersand &#38; --> & &amp; --> &
|
||||||
|
less-than sign &#60; --> < &lt; --> <
|
||||||
|
greater-than sign &#62; --> > &gt; --> >
|
||||||
|
|
||||||
|
Description Char Code Entity name
|
||||||
|
=================================== ==== ============ ==============
|
||||||
|
euro sign € &128; --> €
|
||||||
|
undefined <20> &129; --> 
|
||||||
|
single low-9 quotation mark ‚ &130; --> ‚
|
||||||
|
latin small letter f with hook ƒ &131; --> ƒ
|
||||||
|
double low-9 quotation mark „ &132; --> „
|
||||||
|
horizontal ellipsis … &133; --> …
|
||||||
|
dagger † &134; --> †
|
||||||
|
double dagger ‡ &135; --> ‡
|
||||||
|
modifier letter circumflex accent ˆ &136; --> ˆ
|
||||||
|
per mille sign ‰ &137; --> ‰
|
||||||
|
latin capital letter s with caron Š &138; --> Š
|
||||||
|
single left-pointing angle quote mark ‹ &139; --> ‹
|
||||||
|
latin capital ligature oe Œ &140; --> Œ
|
||||||
|
undefined <20> &141; --> 
|
||||||
|
latin capital letter z with caron Ž &142; --> Ž
|
||||||
|
undefined <20> &143; --> 
|
||||||
|
|
||||||
|
undefined <20> &144; --> 
|
||||||
|
left single quotation mark ‘ &145; --> ‘
|
||||||
|
right single quotation mark ’ &146; --> ’
|
||||||
|
left double quotation mark “ &147; --> “
|
||||||
|
right double quotation mark ” &148; --> ”
|
||||||
|
bullet • &149; --> •
|
||||||
|
en dash – &150; --> –
|
||||||
|
em dash — &151; --> —
|
||||||
|
small tilde ˜ &152; --> ˜
|
||||||
|
trade mark sign ™ &153; --> ™
|
||||||
|
latin small letter s with caron š &154; --> š
|
||||||
|
single right-pointing angle quote mark › &155; --> ›
|
||||||
|
latin small ligature oe œ &156; --> œ
|
||||||
|
undefined <20> &157; --> 
|
||||||
|
latin small letter z with caron ž &158; --> ž
|
||||||
|
latin capital letter y with diaeresis Ÿ &159; --> Ÿ
|
||||||
|
|
||||||
|
non-breaking space &#160; -->   &nbsp; -->
|
||||||
|
inverted exclamation ¡ &#161; --> ¡ &iexcl; --> ¡
|
||||||
|
cent sign ¢ &#162; --> ¢ &cent; --> ¢
|
||||||
|
pound sterling £ &#163; --> £ &pound; --> £
|
||||||
|
general currency sign ¤ &#164; --> ¤ &curren; --> ¤
|
||||||
|
yen sign ¥ &#165; --> ¥ &yen; --> ¥
|
||||||
|
broken vertical bar ¦ &#166; --> ¦ &brvbar; --> ¦
|
||||||
|
Non-standard &brkbar; --> &brkbar;
|
||||||
|
section sign § &#167; --> § &sect; --> §
|
||||||
|
umlaut (dieresis) ¨ &#168; --> ¨ &uml; --> ¨
|
||||||
|
Non-standard &die; --> ¨
|
||||||
|
copyright © &#169; --> © &copy; --> ©
|
||||||
|
feminine ordinal ª &#170; --> ª &ordf; --> ª
|
||||||
|
left angle quote, guillemotleft « &#171; --> « &laquo; --> «
|
||||||
|
not sign ¬ &#172; --> ¬ &not; --> ¬
|
||||||
|
soft hyphen &#173; --> ­ &shy; --> ­
|
||||||
|
registered trademark ® &#174; --> ® &reg; --> ®
|
||||||
|
macron accent ¯ &#175; --> ¯ &macr; --> ¯
|
||||||
|
Non-standard &hibar; --> &hibar;
|
||||||
|
degree sign ° &#176; --> ° &deg; --> °
|
||||||
|
plus or minus ± &#177; --> ± &plusmn; --> ±
|
||||||
|
superscript two ² &#178; --> ² &sup2; --> ²
|
||||||
|
superscript three ³ &#179; --> ³ &sup3; --> ³
|
||||||
|
acute accent ´ &#180; --> ´ &acute; --> ´
|
||||||
|
micro sign µ &#181; --> µ &micro; --> µ
|
||||||
|
paragraph sign ¶ &#182; --> ¶ &para; --> ¶
|
||||||
|
middle dot · &#183; --> · &middot; --> ·
|
||||||
|
cedilla ¸ &#184; --> ¸ &cedil; --> ¸
|
||||||
|
superscript one ¹ &#185; --> ¹ &sup1; --> ¹
|
||||||
|
masculine ordinal º &#186; --> º &ordm; --> º
|
||||||
|
right angle quote, guillemotright » &#187; --> » &raquo; --> »
|
||||||
|
fraction one-fourth ¼ &#188; --> ¼ &frac14; --> ¼
|
||||||
|
fraction one-half ½ &#189; --> ½ &frac12; --> ½
|
||||||
|
fraction three-fourths ¾ &#190; --> ¾ &frac34; --> ¾
|
||||||
|
inverted question mark ¿ &#191; --> ¿ &iquest; --> ¿
|
||||||
|
capital A, grave accent À &#192; --> À &Agrave; --> À
|
||||||
|
capital A, acute accent Á &#193; --> Á &Aacute; --> Á
|
||||||
|
capital A, circumflex accent  &#194; -->  &Acirc; --> Â
|
||||||
|
capital A, tilde à &#195; --> à &Atilde; --> Ã
|
||||||
|
capital A, dieresis or umlaut mark Ä &#196; --> Ä &Auml; --> Ä
|
||||||
|
capital A, ring Å &#197; --> Å &Aring; --> Å
|
||||||
|
capital AE diphthong (ligature) Æ &#198; --> Æ &AElig; --> Æ
|
||||||
|
capital C, cedilla Ç &#199; --> Ç &Ccedil; --> Ç
|
||||||
|
capital E, grave accent È &#200; --> È &Egrave; --> È
|
||||||
|
capital E, acute accent É &#201; --> É &Eacute; --> É
|
||||||
|
capital E, circumflex accent Ê &#202; --> Ê &Ecirc; --> Ê
|
||||||
|
capital E, dieresis or umlaut mark Ë &#203; --> Ë &Euml; --> Ë
|
||||||
|
capital I, grave accent Ì &#204; --> Ì &Igrave; --> Ì
|
||||||
|
capital I, acute accent Í &#205; --> Í &Iacute; --> Í
|
||||||
|
capital I, circumflex accent Î &#206; --> Î &Icirc; --> Î
|
||||||
|
capital I, dieresis or umlaut mark Ï &#207; --> Ï &Iuml; --> Ï
|
||||||
|
capital Eth, Icelandic Ð &#208; --> Ð &ETH; --> Ð
|
||||||
|
Non-standard &Dstrok; --> Đ
|
||||||
|
capital N, tilde Ñ &#209; --> Ñ &Ntilde; --> Ñ
|
||||||
|
capital O, grave accent Ò &#210; --> Ò &Ograve; --> Ò
|
||||||
|
capital O, acute accent Ó &#211; --> Ó &Oacute; --> Ó
|
||||||
|
capital O, circumflex accent Ô &#212; --> Ô &Ocirc; --> Ô
|
||||||
|
capital O, tilde Õ &#213; --> Õ &Otilde; --> Õ
|
||||||
|
capital O, dieresis or umlaut mark Ö &#214; --> Ö &Ouml; --> Ö
|
||||||
|
multiply sign × &#215; --> × &times; --> ×
|
||||||
|
capital O, slash Ø &#216; --> Ø &Oslash; --> Ø
|
||||||
|
capital U, grave accent Ù &#217; --> Ù &Ugrave; --> Ù
|
||||||
|
capital U, acute accent Ú &#218; --> Ú &Uacute; --> Ú
|
||||||
|
capital U, circumflex accent Û &#219; --> Û &Ucirc; --> Û
|
||||||
|
capital U, dieresis or umlaut mark Ü &#220; --> Ü &Uuml; --> Ü
|
||||||
|
capital Y, acute accent Ý &#221; --> Ý &Yacute; --> Ý
|
||||||
|
capital THORN, Icelandic Þ &#222; --> Þ &THORN; --> Þ
|
||||||
|
small sharp s, German (sz ligature) ß &#223; --> ß &szlig; --> ß
|
||||||
|
small a, grave accent à &#224; --> à &agrave; --> à
|
||||||
|
small a, acute accent á &#225; --> á &aacute; --> á
|
||||||
|
small a, circumflex accent â &#226; --> â &acirc; --> â
|
||||||
|
small a, tilde ã &#227; --> ã &atilde; --> ã
|
||||||
|
small a, dieresis or umlaut mark ä &#228; --> ä &auml; --> ä
|
||||||
|
small a, ring å &#229; --> å &aring; --> å
|
||||||
|
small ae diphthong (ligature) æ &#230; --> æ &aelig; --> æ
|
||||||
|
small c, cedilla ç &#231; --> ç &ccedil; --> ç
|
||||||
|
small e, grave accent è &#232; --> è &egrave; --> è
|
||||||
|
small e, acute accent é &#233; --> é &eacute; --> é
|
||||||
|
small e, circumflex accent ê &#234; --> ê &ecirc; --> ê
|
||||||
|
small e, dieresis or umlaut mark ë &#235; --> ë &euml; --> ë
|
||||||
|
small i, grave accent ì &#236; --> ì &igrave; --> ì
|
||||||
|
small i, acute accent í &#237; --> í &iacute; --> í
|
||||||
|
small i, circumflex accent î &#238; --> î &icirc; --> î
|
||||||
|
small i, dieresis or umlaut mark ï &#239; --> ï &iuml; --> ï
|
||||||
|
small eth, Icelandic ð &#240; --> ð &eth; --> ð
|
||||||
|
small n, tilde ñ &#241; --> ñ &ntilde; --> ñ
|
||||||
|
small o, grave accent ò &#242; --> ò &ograve; --> ò
|
||||||
|
small o, acute accent ó &#243; --> ó &oacute; --> ó
|
||||||
|
small o, circumflex accent ô &#244; --> ô &ocirc; --> ô
|
||||||
|
small o, tilde õ &#245; --> õ &otilde; --> õ
|
||||||
|
small o, dieresis or umlaut mark ö &#246; --> ö &ouml; --> ö
|
||||||
|
division sign ÷ &#247; --> ÷ &divide; --> ÷
|
||||||
|
small o, slash ø &#248; --> ø &oslash; --> ø
|
||||||
|
small u, grave accent ù &#249; --> ù &ugrave; --> ù
|
||||||
|
small u, acute accent ú &#250; --> ú &uacute; --> ú
|
||||||
|
small u, circumflex accent û &#251; --> û &ucirc; --> û
|
||||||
|
small u, dieresis or umlaut mark ü &#252; --> ü &uuml; --> ü
|
||||||
|
small y, acute accent ý &#253; --> ý &yacute; --> ý
|
||||||
|
small thorn, Icelandic þ &#254; --> þ &thorn; --> þ
|
||||||
|
small y, dieresis or umlaut mark ÿ &#255; --> ÿ &yuml; --> ÿ
|
||||||
|
</PRE>
|
||||||
|
<!-- removed: second /PRE, a hack for HotJava 1.0 preBeta 1 -->
|
||||||
|
<HR>
|
||||||
|
|
||||||
|
<STRONG>How to read</STRONG> this table. The columns are
|
||||||
|
<DL COMPACT>
|
||||||
|
<DT>1st:<DD>textual <EM>description</EM> of the character
|
||||||
|
<DT>2nd:<DD>character inserted directly into the HTML page as <EM>one
|
||||||
|
byte</EM>
|
||||||
|
<DT>3rd:<DD>character written as <EM>numeric HTML entity</EM>, in the
|
||||||
|
format:<BR>"how it looks literally" <CODE>--></CODE>
|
||||||
|
"what your browser does with it"
|
||||||
|
<DT>4th:<DD>character written as <EM>symbolic HTML entity</EM>, in the
|
||||||
|
format:<BR>"how it looks literally" <CODE>--></CODE>
|
||||||
|
"what your browser does with it"
|
||||||
|
</DL>
|
||||||
|
|
||||||
|
So for example, if you see something like "<CODE>&divide; -->
|
||||||
|
&divide;</CODE>" in the 4th column, this means your browser
|
||||||
|
doesn't know about the entity name "divide" and just puts it
|
||||||
|
literally.
|
||||||
|
|
||||||
|
<P>
|
||||||
|
<STRONG>This table</STRONG> grew out of an overview of the "ISO
|
||||||
|
Latin-1 Character Set" overview related to the Hyper-G Text Format
|
||||||
|
(<A HREF="http://www.hyperwave.de/HTFdoc">HTF</A>).
|
||||||
|
|
||||||
|
The entity names <CODE>&brkbar;</CODE> and <CODE>&Dstrok;</CODE>
|
||||||
|
seem to be unique to HTF.
|
||||||
|
|
||||||
|
The entity name <CODE>&hibar;</CODE> has been supported by X Mosaic
|
||||||
|
but seems to be replaced with <CODE>&macr;</CODE>.
|
||||||
|
|
||||||
|
The entity names <CODE>&uml;</CODE> and <CODE>&die;</CODE> should
|
||||||
|
be equivalent.
|
||||||
|
|
||||||
|
<P><STRONG>The standards stuff:</STRONG>
|
||||||
|
The
|
||||||
|
<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/html-spec/">HTML 2.0 Standard</A>
|
||||||
|
includes a section on
|
||||||
|
<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/html-spec/html-spec_9.html#SEC99">Character Entity Sets</A>
|
||||||
|
and an overview on the
|
||||||
|
<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/html-spec/html-spec_13.html#SEC106">HTML Coded Character Set</A>
|
||||||
|
(The entity names are derived from <A HREF="http://www.ucc.ie/info/net/isolat1.html">ISO 8879</A>).
|
||||||
|
<BR>
|
||||||
|
|
||||||
|
Or have a look at the
|
||||||
|
<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/html3/latin1.html">Latin-1 Character Entities</A>
|
||||||
|
as listed in an draft for the
|
||||||
|
<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/html3/CoverPage.html">HTML 3.0 specification</A>.
|
||||||
|
<BR>
|
||||||
|
|
||||||
|
The
|
||||||
|
<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/HTMLPlus/htmlplus_59.html">Appendix II</A>
|
||||||
|
of CERN's
|
||||||
|
<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/HTMLPlus/htmlplus_1.html">HTML+ Discussion Document</A>
|
||||||
|
contains a
|
||||||
|
<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/HTMLPlus/htmlplus_table.ps">table</A>
|
||||||
|
(in PostScript format) of the proposed character entities for HTML+ and their
|
||||||
|
corresponding character codes for Unicode and the Adobe Latin-1 & Symbol
|
||||||
|
character sets.
|
||||||
|
<P>
|
||||||
|
|
||||||
|
<STRONG>Please note</STRONG> that there is nothing wrong with using
|
||||||
|
characters of ISO Latin-1 above 127: the normal transmission protocol
|
||||||
|
for the WWW,
|
||||||
|
<A HREF="http://www.w3.org/pub/WWW/Protocols/rfc1945/rfc1945">HTTP/1.0</A>,
|
||||||
|
uses the 8bit ISO latin-1 as default encoding.
|
||||||
|
(Thanks to Roman
|
||||||
|
Czyborra for pointing this out!)
|
||||||
|
<P>
|
||||||
|
|
||||||
|
<STRONG>Other information:</STRONG>
|
||||||
|
<UL>
|
||||||
|
|
||||||
|
<LI><STRONG>Kevin J. Brewer</STRONG> has done two very good pages on the subject:
|
||||||
|
<UL>
|
||||||
|
<LI><A HREF="http://www.bbsinc.com/iso8859.html">ASCII - ISO 8859-1 (Latin-1) with HTML 3.0 Entities Table</A> and
|
||||||
|
<LI><A HREF="http://www.bbsinc.com/iso8879.html">ISO 8879 Entities Gopher Menu</A>
|
||||||
|
</UL>
|
||||||
|
|
||||||
|
<LI>The excellent overview on the series of
|
||||||
|
<A HREF="http://czyborra.com/charsets/iso8859.html">ISO 8859
|
||||||
|
character sets</A> compiled by Roman Czyborra.
|
||||||
|
|
||||||
|
<LI>Also have a look on Alan Flavell's page of
|
||||||
|
<A HREF="http://ppewww.ph.gla.ac.uk/%7Eflavell/iso8859/iso8859-pointers.html">pointers
|
||||||
|
to information about ISO8859</A>. It's written very well!
|
||||||
|
|
||||||
|
<LI>Maybe also of interest to you is the
|
||||||
|
<A HREF="ftp://ftp.vlsivie.tuwien.ac.at/pub/8bit/FAQ-ISO-8859-1">ISO
|
||||||
|
8859-1 FAQ</A> by Michael Gschwind
|
||||||
|
(<A HREF="mailto:mike@vlsivie.tuwien.ac.at">mike@vlsivie.tuwien.ac.at</A>),
|
||||||
|
part of his page on
|
||||||
|
<A HREF="http://www.vlsivie.tuwien.ac.at/mike/i18n.html">Internationalization</A>.
|
||||||
|
|
||||||
|
<LI>For users of X11R5 on SunOS systems: the
|
||||||
|
<A HREF="Compose.txt">table over the compose combinations</A>
|
||||||
|
(also coded <A HREF="Compose.html">with entities</A> where possible).
|
||||||
|
It's taken from the MIT X sources in
|
||||||
|
<CODE>server/ddx/sun/Compose.list</CODE>.
|
||||||
|
|
||||||
|
<LI>Finally you could have a look at
|
||||||
|
<A HREF="ftp://ds.internic.net/rfc/rfc1345.txt">RFC 1345:
|
||||||
|
Character Mnemonics & Character Sets</A>
|
||||||
|
by K. Simonsen (06/11/92, 103 pages, approx. 240 kbyte).
|
||||||
|
|
||||||
|
</UL>
|
||||||
|
|
||||||
|
|
||||||
|
<HR>
|
||||||
|
|
||||||
|
<ADDRESS><A HREF="http://ramsch.home.pages.de/">Martin Ramsch</A>, 16.02.1994, 07.01.1996, 01.07.1996, 1998-10-09, 2000-05-15</ADDRESS>
|
||||||
|
|
||||||
|
</BODY>
|
||||||
|
</HTML>
|
232
tests/lynx-dump/data/iso-8859-1a.html.exp
Normal file
232
tests/lynx-dump/data/iso-8859-1a.html.exp
Normal file
@ -0,0 +1,232 @@
|
|||||||
|
iso8859-1 table, with cp-1252
|
||||||
|
|
||||||
|
Description Code Entity name
|
||||||
|
=================================== ============ ==============
|
||||||
|
quotation mark " --> " " --> "
|
||||||
|
ampersand & --> & & --> &
|
||||||
|
less-than sign < --> < < --> <
|
||||||
|
greater-than sign > --> > > --> >
|
||||||
|
|
||||||
|
Description Char Code Entity name
|
||||||
|
=================================== ==== ============ ==============
|
||||||
|
euro sign &128; -->
|
||||||
|
undefined &129; -->
|
||||||
|
single low-9 quotation mark &130; -->
|
||||||
|
latin small letter f with hook &131; -->
|
||||||
|
double low-9 quotation mark &132; -->
|
||||||
|
horizontal ellipsis &133; -->
|
||||||
|
dagger &134; -->
|
||||||
|
double dagger &135; -->
|
||||||
|
modifier letter circumflex accent &136; -->
|
||||||
|
per mille sign &137; -->
|
||||||
|
latin capital letter s with caron &138; -->
|
||||||
|
single left-pointing angle quote mark &139; -->
|
||||||
|
latin capital ligature oe &140; -->
|
||||||
|
undefined &141; -->
|
||||||
|
latin capital letter z with caron &142; -->
|
||||||
|
undefined &143; -->
|
||||||
|
|
||||||
|
undefined &144; -->
|
||||||
|
left single quotation mark &145; -->
|
||||||
|
right single quotation mark &146; -->
|
||||||
|
left double quotation mark &147; -->
|
||||||
|
right double quotation mark &148; -->
|
||||||
|
bullet &149; -->
|
||||||
|
en dash &150; -->
|
||||||
|
em dash &151; -->
|
||||||
|
small tilde &152; -->
|
||||||
|
trade mark sign &153; -->
|
||||||
|
latin small letter s with caron &154; -->
|
||||||
|
single right-pointing angle quote mark &155; -->
|
||||||
|
latin small ligature oe &156; -->
|
||||||
|
undefined &157; -->
|
||||||
|
latin small letter z with caron &158; -->
|
||||||
|
latin capital letter y with diaeresis &159; -->
|
||||||
|
|
||||||
|
non-breaking space   --> -->
|
||||||
|
inverted exclamation ¡ ¡ --> ¡ ¡ --> ¡
|
||||||
|
cent sign ¢ ¢ --> ¢ ¢ --> ¢
|
||||||
|
pound sterling £ £ --> £ £ --> £
|
||||||
|
general currency sign ¤ ¤ --> ¤ ¤ --> ¤
|
||||||
|
yen sign ¥ ¥ --> ¥ ¥ --> ¥
|
||||||
|
broken vertical bar ¦ ¦ --> ¦ ¦ --> ¦
|
||||||
|
Non-standard &brkbar; --> ¦
|
||||||
|
section sign § § --> § § --> §
|
||||||
|
umlaut (dieresis) ¨ ¨ --> ¨ ¨ --> ¨
|
||||||
|
Non-standard ¨ --> ¨
|
||||||
|
copyright © © --> © © --> ©
|
||||||
|
feminine ordinal ª ª --> ª ª --> ª
|
||||||
|
left angle quote, guillemotleft « « --> « « --> «
|
||||||
|
not sign ¬ ¬ --> ¬ ¬ --> ¬
|
||||||
|
soft hyphen ­ --> ­ -->
|
||||||
|
registered trademark ® ® --> ® ® --> ®
|
||||||
|
macron accent ¯ ¯ --> ¯ ¯ --> ¯
|
||||||
|
Non-standard &hibar; --> ¯
|
||||||
|
degree sign ° ° --> ° ° --> °
|
||||||
|
plus or minus ± ± --> ± ± --> ±
|
||||||
|
superscript two ² ² --> ² ² --> ²
|
||||||
|
superscript three ³ ³ --> ³ ³ --> ³
|
||||||
|
acute accent ´ ´ --> ´ ´ --> ´
|
||||||
|
micro sign µ µ --> µ µ --> µ
|
||||||
|
paragraph sign ¶ ¶ --> ¶ ¶ --> ¶
|
||||||
|
middle dot · · --> · · --> ·
|
||||||
|
cedilla ¸ ¸ --> ¸ ¸ --> ¸
|
||||||
|
superscript one ¹ ¹ --> ¹ ¹ --> ¹
|
||||||
|
masculine ordinal º º --> º º --> º
|
||||||
|
right angle quote, guillemotright » » --> » » --> »
|
||||||
|
fraction one-fourth ¼ ¼ --> ¼ ¼ --> ¼
|
||||||
|
fraction one-half ½ ½ --> ½ ½ --> ½
|
||||||
|
fraction three-fourths ¾ ¾ --> ¾ ¾ --> ¾
|
||||||
|
inverted question mark ¿ ¿ --> ¿ ¿ --> ¿
|
||||||
|
capital A, grave accent À À --> À À --> À
|
||||||
|
capital A, acute accent Á Á --> Á Á --> Á
|
||||||
|
capital A, circumflex accent   -->   --> Â
|
||||||
|
capital A, tilde à à --> à à --> Ã
|
||||||
|
capital A, dieresis or umlaut mark Ä Ä --> Ä Ä --> Ä
|
||||||
|
capital A, ring Å Å --> Å Å --> Å
|
||||||
|
capital AE diphthong (ligature) Æ Æ --> Æ Æ --> Æ
|
||||||
|
capital C, cedilla Ç Ç --> Ç Ç --> Ç
|
||||||
|
capital E, grave accent È È --> È È --> È
|
||||||
|
capital E, acute accent É É --> É É --> É
|
||||||
|
capital E, circumflex accent Ê Ê --> Ê Ê --> Ê
|
||||||
|
capital E, dieresis or umlaut mark Ë Ë --> Ë Ë --> Ë
|
||||||
|
capital I, grave accent Ì Ì --> Ì Ì --> Ì
|
||||||
|
capital I, acute accent Í Í --> Í Í --> Í
|
||||||
|
capital I, circumflex accent Î Î --> Î Î --> Î
|
||||||
|
capital I, dieresis or umlaut mark Ï Ï --> Ï Ï --> Ï
|
||||||
|
capital Eth, Icelandic Ð Ð --> Ð Ð --> Ð
|
||||||
|
Non-standard Đ --> Đ
|
||||||
|
capital N, tilde Ñ Ñ --> Ñ Ñ --> Ñ
|
||||||
|
capital O, grave accent Ò Ò --> Ò Ò --> Ò
|
||||||
|
capital O, acute accent Ó Ó --> Ó Ó --> Ó
|
||||||
|
capital O, circumflex accent Ô Ô --> Ô Ô --> Ô
|
||||||
|
capital O, tilde Õ Õ --> Õ Õ --> Õ
|
||||||
|
capital O, dieresis or umlaut mark Ö Ö --> Ö Ö --> Ö
|
||||||
|
multiply sign × × --> × × --> ×
|
||||||
|
capital O, slash Ø Ø --> Ø Ø --> Ø
|
||||||
|
capital U, grave accent Ù Ù --> Ù Ù --> Ù
|
||||||
|
capital U, acute accent Ú Ú --> Ú Ú --> Ú
|
||||||
|
capital U, circumflex accent Û Û --> Û Û --> Û
|
||||||
|
capital U, dieresis or umlaut mark Ü Ü --> Ü Ü --> Ü
|
||||||
|
capital Y, acute accent Ý Ý --> Ý Ý --> Ý
|
||||||
|
capital THORN, Icelandic Þ Þ --> Þ Þ --> Þ
|
||||||
|
small sharp s, German (sz ligature) ß ß --> ß ß --> ß
|
||||||
|
small a, grave accent à à --> à à --> à
|
||||||
|
small a, acute accent á á --> á á --> á
|
||||||
|
small a, circumflex accent â â --> â â --> â
|
||||||
|
small a, tilde ã ã --> ã ã --> ã
|
||||||
|
small a, dieresis or umlaut mark ä ä --> ä ä --> ä
|
||||||
|
small a, ring å å --> å å --> å
|
||||||
|
small ae diphthong (ligature) æ æ --> æ æ --> æ
|
||||||
|
small c, cedilla ç ç --> ç ç --> ç
|
||||||
|
small e, grave accent è è --> è è --> è
|
||||||
|
small e, acute accent é é --> é é --> é
|
||||||
|
small e, circumflex accent ê ê --> ê ê --> ê
|
||||||
|
small e, dieresis or umlaut mark ë ë --> ë ë --> ë
|
||||||
|
small i, grave accent ì ì --> ì ì --> ì
|
||||||
|
small i, acute accent í í --> í í --> í
|
||||||
|
small i, circumflex accent î î --> î î --> î
|
||||||
|
small i, dieresis or umlaut mark ï ï --> ï ï --> ï
|
||||||
|
small eth, Icelandic ð ð --> ð ð --> ð
|
||||||
|
small n, tilde ñ ñ --> ñ ñ --> ñ
|
||||||
|
small o, grave accent ò ò --> ò ò --> ò
|
||||||
|
small o, acute accent ó ó --> ó ó --> ó
|
||||||
|
small o, circumflex accent ô ô --> ô ô --> ô
|
||||||
|
small o, tilde õ õ --> õ õ --> õ
|
||||||
|
small o, dieresis or umlaut mark ö ö --> ö ö --> ö
|
||||||
|
division sign ÷ ÷ --> ÷ ÷ --> ÷
|
||||||
|
small o, slash ø ø --> ø ø --> ø
|
||||||
|
small u, grave accent ù ù --> ù ù --> ù
|
||||||
|
small u, acute accent ú ú --> ú ú --> ú
|
||||||
|
small u, circumflex accent û û --> û û --> û
|
||||||
|
small u, dieresis or umlaut mark ü ü --> ü ü --> ü
|
||||||
|
small y, acute accent ý ý --> ý ý --> ý
|
||||||
|
small thorn, Icelandic þ þ --> þ þ --> þ
|
||||||
|
small y, dieresis or umlaut mark ÿ ÿ --> ÿ ÿ --> ÿ
|
||||||
|
__________________________________________________________________
|
||||||
|
|
||||||
|
How to read this table. The columns are
|
||||||
|
1st:
|
||||||
|
textual description of the character
|
||||||
|
2nd:
|
||||||
|
character inserted directly into the HTML page as one byte
|
||||||
|
3rd:
|
||||||
|
character written as numeric HTML entity, in the format:
|
||||||
|
"how it looks literally" --> "what your browser does with it"
|
||||||
|
4th:
|
||||||
|
character written as symbolic HTML entity, in the format:
|
||||||
|
"how it looks literally" --> "what your browser does with it"
|
||||||
|
|
||||||
|
So for example, if you see something like "÷ --> ÷" in
|
||||||
|
the 4th column, this means your browser doesn't know about the entity
|
||||||
|
name "divide" and just puts it literally.
|
||||||
|
|
||||||
|
This table grew out of an overview of the "ISO Latin-1 Character Set"
|
||||||
|
overview related to the Hyper-G Text Format ([1]HTF). The entity names
|
||||||
|
&brkbar; and Đ seem to be unique to HTF. The entity name &hibar;
|
||||||
|
has been supported by X Mosaic but seems to be replaced with ¯.
|
||||||
|
The entity names ¨ and ¨ should be equivalent.
|
||||||
|
|
||||||
|
The standards stuff: The [2]HTML 2.0 Standard includes a section on
|
||||||
|
[3]Character Entity Sets and an overview on the [4]HTML Coded Character
|
||||||
|
Set (The entity names are derived from [5]ISO 8879).
|
||||||
|
Or have a look at the [6]Latin-1 Character Entities as listed in an
|
||||||
|
draft for the [7]HTML 3.0 specification.
|
||||||
|
The [8]Appendix II of CERN's [9]HTML+ Discussion Document contains a
|
||||||
|
[10]table (in PostScript format) of the proposed character entities for
|
||||||
|
HTML+ and their corresponding character codes for Unicode and the Adobe
|
||||||
|
Latin-1 & Symbol character sets.
|
||||||
|
|
||||||
|
Please note that there is nothing wrong with using characters of ISO
|
||||||
|
Latin-1 above 127: the normal transmission protocol for the WWW,
|
||||||
|
[11]HTTP/1.0, uses the 8bit ISO latin-1 as default encoding. (Thanks to
|
||||||
|
Roman Czyborra for pointing this out!)
|
||||||
|
|
||||||
|
Other information:
|
||||||
|
* Kevin J. Brewer has done two very good pages on the subject:
|
||||||
|
+ [12]ASCII - ISO 8859-1 (Latin-1) with HTML 3.0 Entities Table
|
||||||
|
and
|
||||||
|
+ [13]ISO 8879 Entities Gopher Menu
|
||||||
|
* The excellent overview on the series of [14]ISO 8859 character sets
|
||||||
|
compiled by Roman Czyborra.
|
||||||
|
* Also have a look on Alan Flavell's page of [15]pointers to
|
||||||
|
information about ISO8859. It's written very well!
|
||||||
|
* Maybe also of interest to you is the [16]ISO 8859-1 FAQ by Michael
|
||||||
|
Gschwind ([17]mike@vlsivie.tuwien.ac.at), part of his page on
|
||||||
|
[18]Internationalization.
|
||||||
|
* For users of X11R5 on SunOS systems: the [19]table over the compose
|
||||||
|
combinations (also coded [20]with entities where possible). It's
|
||||||
|
taken from the MIT X sources in server/ddx/sun/Compose.list.
|
||||||
|
* Finally you could have a look at [21]RFC 1345: Character Mnemonics
|
||||||
|
& Character Sets by K. Simonsen (06/11/92, 103 pages, approx. 240
|
||||||
|
kbyte).
|
||||||
|
__________________________________________________________________
|
||||||
|
|
||||||
|
|
||||||
|
[22]Martin Ramsch, 16.02.1994, 07.01.1996, 01.07.1996, 1998-10-09,
|
||||||
|
2000-05-15
|
||||||
|
|
||||||
|
References
|
||||||
|
|
||||||
|
1. http://www.hyperwave.de/HTFdoc
|
||||||
|
2. http://www.w3.org/hypertext/WWW/MarkUp/html-spec/
|
||||||
|
3. http://www.w3.org/hypertext/WWW/MarkUp/html-spec/html-spec_9.html#SEC99
|
||||||
|
4. http://www.w3.org/hypertext/WWW/MarkUp/html-spec/html-spec_13.html#SEC106
|
||||||
|
5. http://www.ucc.ie/info/net/isolat1.html
|
||||||
|
6. http://www.w3.org/hypertext/WWW/MarkUp/html3/latin1.html
|
||||||
|
7. http://www.w3.org/hypertext/WWW/MarkUp/html3/CoverPage.html
|
||||||
|
8. http://www.w3.org/hypertext/WWW/MarkUp/HTMLPlus/htmlplus_59.html
|
||||||
|
9. http://www.w3.org/hypertext/WWW/MarkUp/HTMLPlus/htmlplus_1.html
|
||||||
|
10. http://www.w3.org/hypertext/WWW/MarkUp/HTMLPlus/htmlplus_table.ps
|
||||||
|
11. http://www.w3.org/pub/WWW/Protocols/rfc1945/rfc1945
|
||||||
|
12. http://www.bbsinc.com/iso8859.html
|
||||||
|
13. http://www.bbsinc.com/iso8879.html
|
||||||
|
14. http://czyborra.com/charsets/iso8859.html
|
||||||
|
15. http://ppewww.ph.gla.ac.uk/~flavell/iso8859/iso8859-pointers.html
|
||||||
|
16. ftp://ftp.vlsivie.tuwien.ac.at/pub/8bit/FAQ-ISO-8859-1
|
||||||
|
17. mailto:mike@vlsivie.tuwien.ac.at
|
||||||
|
18. http://www.vlsivie.tuwien.ac.at/mike/i18n.html
|
||||||
|
19. http://www.ramsch.org/martin/uni/fmi-hp/Compose.txt
|
||||||
|
20. http://www.ramsch.org/martin/uni/fmi-hp/Compose.html
|
||||||
|
21. ftp://ds.internic.net/rfc/rfc1345.txt
|
||||||
|
22. http://ramsch.home.pages.de/
|
174
tests/lynx-dump/data/iso-8859-2.html
Normal file
174
tests/lynx-dump/data/iso-8859-2.html
Normal file
@ -0,0 +1,174 @@
|
|||||||
|
<!DOCTYPE html PUBLIC "-//IETF//DTD HTML 3.0//EN">
|
||||||
|
<!-- X-URL: http://www.uni-passau.de/~ramsch/iso8859-1.html -->
|
||||||
|
<HTML>
|
||||||
|
<HEAD>
|
||||||
|
<TITLE>Martin Ramsch's character table modified and enhanced for iso8859-2</TITLE>
|
||||||
|
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-2">
|
||||||
|
<LINK REV="made" HREF="mailto:lynx-dev@nongnu.org">
|
||||||
|
<LINK REV="owner" HREF="http://mail.gnu.org/mailman/listinfo/lynx-dev/">
|
||||||
|
<LINK REL="sibling" HREF="iso-8859-1.html" TITLE="iso-8859-1 test">
|
||||||
|
<LINK REL="sibling" HREF="ALT88592.html" TITLE="iso-8859-2 ALT test">
|
||||||
|
</HEAD>
|
||||||
|
|
||||||
|
<BODY>
|
||||||
|
|
||||||
|
<H1 ALIGN=center>iso8859-2 plus table</H1>
|
||||||
|
|
||||||
|
<PRE>
|
||||||
|
Description Code Entity name
|
||||||
|
=================================== ============ ==============
|
||||||
|
quotation mark &#34; --> " &quot; --> "
|
||||||
|
ampersand &#38; --> & &amp; --> &
|
||||||
|
less-than sign &#60; --> < &lt; --> <
|
||||||
|
greater-than sign &#62; --> > &gt; --> >
|
||||||
|
|
||||||
|
Description Char Code Entity name
|
||||||
|
=================================== ==== ============ ==============
|
||||||
|
non-breaking space &#160; -->   &nbsp; -->
|
||||||
|
capital A, ogonek Ą &#260; --> Ą &Aogon; --> Ą
|
||||||
|
breve {˘} {&#728;}-->{˘} {&breve;} -->{˘}
|
||||||
|
capital L, stroke Ł &#321; --> Ł &Lstrok; --> Ł
|
||||||
|
general currency sign ¤ &#164; --> ¤ &curren; --> ¤
|
||||||
|
capital L, caron Ľ &#317; --> Ľ &Lcaron; --> Ľ
|
||||||
|
capital S, acute accent Ś &#346; --> Ś &Sacute; --> Ś
|
||||||
|
section sign § &#167; --> § &sect; --> §
|
||||||
|
umlaut (dieresis) ¨ &#168; --> ¨ &uml; --> ¨
|
||||||
|
&die; --> ¨
|
||||||
|
capital S, caron Š &#352; --> Š &Scaron; --> Š
|
||||||
|
capital S, cedilla Ş &#350; --> Ş &Scedil; --> Ş
|
||||||
|
capital T, caron Ť &#356; --> Ť &Tcaron; --> Ť
|
||||||
|
capital Z, acute accent Ź &#377; --> Ź &Zacute; --> Ź
|
||||||
|
soft hyphen [] [&#173;]-->[­] [&shy;] -->[­]
|
||||||
|
capital Z, caron Ž &#381; --> Ž &Zcaron; --> Ž
|
||||||
|
capital Z, dot above Ż &#379; --> Ż &Zdot; --> Ż
|
||||||
|
degree sign ° &#176; --> ° &deg; --> °
|
||||||
|
small a, ogonek ą &#261; --> ą &aogon; --> ą
|
||||||
|
ogonek {˛} {&#731;}-->{˛} {&ogon;} -->{˛}
|
||||||
|
small l, stroke ł &#322; --> ł &lstrok; --> ł
|
||||||
|
acute accent ´ &#180; --> ´ &acute; --> ´
|
||||||
|
small l, caron ľ &#318; --> ľ &lcaron; --> ľ
|
||||||
|
small s, acute accent ś &#347; --> ś &sacute; --> ś
|
||||||
|
caron {ˇ} {&#711;}-->{ˇ} {&caron;} -->{ˇ}
|
||||||
|
cedilla ¸ &#184; --> ¸ &cedil; --> ¸
|
||||||
|
small s, caron š &#353; --> š &scaron; --> š
|
||||||
|
small s, cedilla ş &#351; --> ş &scedil; --> ş
|
||||||
|
small t, caron ť &#357; --> ť &tcaron; --> ť
|
||||||
|
small z, acute accent ź &#378; --> ź &zacute; --> ź
|
||||||
|
double acute accent {˝} {&#733;}-->{˝} {&dblac;} -->{˝}
|
||||||
|
small z, caron ž &#382; --> ž &zcaron; --> ž
|
||||||
|
small z, dot above ż &#380; --> ż &zdot; --> ż
|
||||||
|
capital R, acute accent Ŕ &#340; --> Ŕ &Racute; --> Ŕ
|
||||||
|
capital A, acute accent Á &#193; --> Á &Aacute; --> Á
|
||||||
|
capital A, circumflex accent  &#194; -->  &Acirc; --> Â
|
||||||
|
capital A, breve Ă &#258; --> Ă &Abreve; --> Ă
|
||||||
|
capital A, dieresis or umlaut mark Ä &#196; --> Ä &Auml; --> Ä
|
||||||
|
capital L, acute accent Ĺ &#313; --> Ĺ &Lacute; --> Ĺ
|
||||||
|
capital C, acute accent Ć &#262; --> Ć &Cacute; --> Ć
|
||||||
|
capital C, cedilla Ç &#199; --> Ç &Ccedil; --> Ç
|
||||||
|
capital C, caron Č &#268; --> Č &Ccaron; --> Č
|
||||||
|
capital E, acute accent É &#201; --> É &Eacute; --> É
|
||||||
|
capital E, ogonek Ę &#280; --> Ę &Eogon; --> Ę
|
||||||
|
capital E, dieresis or umlaut mark Ë &#203; --> Ë &Euml; --> Ë
|
||||||
|
capital E, caron Ě &#282; --> Ě &Ecaron; --> Ě
|
||||||
|
capital I, acute accent Í &#205; --> Í &Iacute; --> Í
|
||||||
|
capital I, circumflex accent Î &#206; --> Î &Icirc; --> Î
|
||||||
|
capital D, caron Ď &#270; --> Ď &Dcaron; --> Ď
|
||||||
|
capital D, stroke Đ &#272; --> Đ &Dstrok; --> Đ
|
||||||
|
capital Eth, Icelandic N/A &#208; --> Ð &ETH; --> Ð
|
||||||
|
capital N, acute accent Ń &#323; --> Ń &Nacute; --> Ń
|
||||||
|
capital N, caron Ň &#327; --> Ň &Ncaron; --> Ň
|
||||||
|
capital O, acute accent Ó &#211; --> Ó &Oacute; --> Ó
|
||||||
|
capital O, circumflex accent Ô &#212; --> Ô &Ocirc; --> Ô
|
||||||
|
capital O, double acute accent Ő &#368; --> Ű &Odblac; --> Ő
|
||||||
|
capital O, dieresis or umlaut mark Ö &#214; --> Ö &Ouml; --> Ö
|
||||||
|
multiply sign × &#215; --> × &times; --> ×
|
||||||
|
capital R, caron Ř &#344; --> Ř &Rcaron; --> Ř
|
||||||
|
capital U, ring Ů &#366; --> Ů &Uring; --> Ů
|
||||||
|
capital U, acute accent Ú &#218; --> Ú &Uacute; --> Ú
|
||||||
|
capital U, double acute accent Ű &#368; --> Ű &Udblac; --> Ű
|
||||||
|
capital U, dieresis or umlaut mark Ü &#220; --> Ü &Uuml; --> Ü
|
||||||
|
capital Y, acute accent Ý &#221; --> Ý &Yacute; --> Ý
|
||||||
|
capital T, cedilla Ţ &#354; --> Ţ &Tcedil; --> Ţ
|
||||||
|
small sharp s, German (sz ligature) ß &#223; --> ß &szlig; --> ß
|
||||||
|
small r, acute accent ŕ &#341; --> ŕ &racute; --> ŕ
|
||||||
|
small a, acute accent á &#225; --> á &aacute; --> á
|
||||||
|
small a, circumflex accent â &#226; --> â &acirc; --> â
|
||||||
|
small a, breve ă &#259; --> ă &abreve; --> ă
|
||||||
|
small a, dieresis or umlaut mark ä &#228; --> ä &auml; --> ä
|
||||||
|
small l, acute accent ĺ &#314; --> ĺ &lacute; --> ĺ
|
||||||
|
small c, acute accent ć &#263; --> ć &cacute; --> ć
|
||||||
|
small c, cedilla ç &#231; --> ç &ccedil; --> ç
|
||||||
|
small c, caron č &#269; --> č &ccaron; --> č
|
||||||
|
small e, acute accent é &#233; --> é &eacute; --> é
|
||||||
|
small e, ogonek ę &#281; --> ę &eogon; --> ę
|
||||||
|
small e, dieresis or umlaut mark ë &#235; --> ë &euml; --> ë
|
||||||
|
small e, caron ě &#283; --> ě &ecaron; --> ě
|
||||||
|
small i, acute accent í &#237; --> í &iacute; --> í
|
||||||
|
small i, circumflex accent î &#238; --> î &icirc; --> î
|
||||||
|
small d, caron ď &#271; --> ď &dcaron; --> ď
|
||||||
|
small d, stroke đ &#273; --> đ &dstrok; --> đ
|
||||||
|
small eth, Icelandic N/A &#240; --> ð &eth; --> ð
|
||||||
|
small n, acute accent ń &#324; --> ń &nacute; --> ń
|
||||||
|
small n, caron ň &#328; --> ň &ncaron; --> ň
|
||||||
|
small o, acute accent ó &#243; --> ó &oacute; --> ó
|
||||||
|
small o, circumflex accent ô &#244; --> ô &ocirc; --> ô
|
||||||
|
small o, double acute accent ő &#369; --> ű &odblac; --> ő
|
||||||
|
small o, dieresis or umlaut mark ö &#246; --> ö &ouml; --> ö
|
||||||
|
division sign ÷ &#247; --> ÷ &divide; --> ÷
|
||||||
|
small r, caron ř &#345; --> ř &rcaron; --> ř
|
||||||
|
small u, ring ů &#367; --> ů &uring; --> ů
|
||||||
|
small u, acute accent ú &#250; --> ú &uacute; --> ú
|
||||||
|
small u, double acute accent ű &#369; --> ű &udblac; --> ű
|
||||||
|
small u, dieresis or umlaut mark ü &#252; --> ü &uuml; --> ü
|
||||||
|
small y, acute accent ý &#253; --> ý &yacute; --> ý
|
||||||
|
small t, cedilla ţ &#355; --> ţ &tcedil; --> ţ
|
||||||
|
dot above {˙} {&#729;}-->{˙} {&dot;} -->{˙}
|
||||||
|
|
||||||
|
Some other characters of interest Char Code Entity name
|
||||||
|
=================================== ==== ============ ==============
|
||||||
|
capital AE diphthong (ligature) N/A &#198; --> Æ &AElig; --> Æ
|
||||||
|
small ae diphthong (ligature) N/A &#230; --> æ &aelig; --> æ
|
||||||
|
capital OE ligature N/A {&#338;}-->{Œ} {&OElig;} -->{Œ}
|
||||||
|
small oe ligature N/A {&#339;}-->{œ} {&oelig;} -->{œ}
|
||||||
|
copyright N/A &#169; --> © &copy; --> ©
|
||||||
|
registered trademark N/A &#174; --> ® &reg; --> ®
|
||||||
|
trademark sign N/A &#8482;--> ™ &trade; --> ™
|
||||||
|
em space N/A [&#8195;]->[ ] [&emsp;] -->[ ]
|
||||||
|
en space N/A [&#8194;]->[ ] [&ensp;] -->[ ]
|
||||||
|
1/3-em space N/A [&#8196;]->[ ] [&emsp13;] -->[ ]
|
||||||
|
1/4-em space N/A [&#8197;]->[ ] [&emsp14;] -->[ ]
|
||||||
|
thin space N/A [&#8201;]->[ ] [&thinsp;]-->[ ]
|
||||||
|
hair space N/A [&#8202;]->[ ] [&hairsp;]-->[ ]
|
||||||
|
em dash N/A [&#8212;]->[—] [&mdash;] -->[—]
|
||||||
|
en dash N/A [&#8211;]->[–] [&ndash;] -->[–]
|
||||||
|
|
||||||
|
</PRE><!-- </PRE> no HotJava preBeta hackx - kw -->
|
||||||
|
<!-- second /PRE is a hack for HotJava 1.0 preBeta 1 -->
|
||||||
|
<HR>
|
||||||
|
<P>
|
||||||
|
Characters not found in ISO-8859-2 have "N/A" in the <TT>Char</TT> column.
|
||||||
|
Some characters for which I could not find entity names in either
|
||||||
|
<A HREF="http://www.internic.net/rfc/rfc2070.txt">RFC 2070</A>
|
||||||
|
or the
|
||||||
|
<A HREF="ftp://www.ucc.ie/pub/sgml/">ISOlat1, ISOlat2, ISOnum, ISOpub and ISOtech</A>
|
||||||
|
sets (the ones included by Peter Flynn's
|
||||||
|
<A HREF="http://www.ucc.ie/doc/www/html/dtds/htmlpro.html">HTML Pro DTD</A>)
|
||||||
|
are shown enclosed in <TT>{</TT>braces<TT>}</TT>.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
There also is a variation of this table which tests
|
||||||
|
<A HREF="ALT88592.html">ISO-8859-2 characters and entities in ALT attributes</A>.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
See Martin Ramsch's original
|
||||||
|
<A CHARSET="iso-8859-1" HREF="http://www.uni-passau.de/~ramsch/iso8859-1.html">ISO-8859-1 Table</A>
|
||||||
|
for related info and links, and for some notes on entity names.
|
||||||
|
This file is mostly just an adaptation of his table
|
||||||
|
to the ISO-8859-2 character set.
|
||||||
|
</P>
|
||||||
|
<HR>
|
||||||
|
|
||||||
|
<ADDRESS>kweide@tezcat.com 1997-03-09</ADDRESS>
|
||||||
|
|
||||||
|
</BODY>
|
||||||
|
</HTML>
|
159
tests/lynx-dump/data/iso-8859-2.html.exp
Normal file
159
tests/lynx-dump/data/iso-8859-2.html.exp
Normal file
@ -0,0 +1,159 @@
|
|||||||
|
#[1]iso-8859-1 test [2]iso-8859-2 ALT test
|
||||||
|
|
||||||
|
iso8859-2 plus table
|
||||||
|
|
||||||
|
Description Code Entity name
|
||||||
|
=================================== ============ ==============
|
||||||
|
quotation mark " --> " " --> "
|
||||||
|
ampersand & --> & & --> &
|
||||||
|
less-than sign < --> < < --> <
|
||||||
|
greater-than sign > --> > > --> >
|
||||||
|
|
||||||
|
Description Char Code Entity name
|
||||||
|
=================================== ==== ============ ==============
|
||||||
|
non-breaking space   --> -->
|
||||||
|
capital A, ogonek Ą Ą --> Ą Ą --> Ą
|
||||||
|
breve {˘} {˘}-->{˘} {˘} -->{˘}
|
||||||
|
capital L, stroke Ł Ł --> Ł Ł --> Ł
|
||||||
|
general currency sign ¤ ¤ --> ¤ ¤ --> ¤
|
||||||
|
capital L, caron Ľ Ľ --> Ľ Ľ --> Ľ
|
||||||
|
capital S, acute accent Ś Ś --> Ś Ś --> Ś
|
||||||
|
section sign § § --> § § --> §
|
||||||
|
umlaut (dieresis) ¨ ¨ --> ¨ ¨ --> ¨
|
||||||
|
¨ --> ¨
|
||||||
|
capital S, caron Š Š --> Š Š --> Š
|
||||||
|
capital S, cedilla Ş Ş --> Ş Ş --> Ş
|
||||||
|
capital T, caron Ť Ť --> Ť Ť --> Ť
|
||||||
|
capital Z, acute accent Ź Ź --> Ź Ź --> Ź
|
||||||
|
soft hyphen [] [­]-->[] [­] -->[]
|
||||||
|
capital Z, caron Ž Ž --> Ž Ž --> Ž
|
||||||
|
capital Z, dot above Ż Ż --> Ż Ż --> Ż
|
||||||
|
degree sign ° ° --> ° ° --> °
|
||||||
|
small a, ogonek ą ą --> ą ą --> ą
|
||||||
|
ogonek {˛} {˛}-->{˛} {˛} -->{˛}
|
||||||
|
small l, stroke ł ł --> ł ł --> ł
|
||||||
|
acute accent ´ ´ --> ´ ´ --> ´
|
||||||
|
small l, caron ľ ľ --> ľ ľ --> ľ
|
||||||
|
small s, acute accent ś ś --> ś ś --> ś
|
||||||
|
caron {ˇ} {ˇ}-->{ˇ} {ˇ} -->{ˇ}
|
||||||
|
cedilla ¸ ¸ --> ¸ ¸ --> ¸
|
||||||
|
small s, caron š š --> š š --> š
|
||||||
|
small s, cedilla ş ş --> ş ş --> ş
|
||||||
|
small t, caron ť ť --> ť ť --> ť
|
||||||
|
small z, acute accent ź ź --> ź ź --> ź
|
||||||
|
double acute accent {˝} {˝}-->{˝} {˝} -->{˝}
|
||||||
|
small z, caron ž ž --> ž ž --> ž
|
||||||
|
small z, dot above ż ż --> ż ż --> ż
|
||||||
|
capital R, acute accent Ŕ Ŕ --> Ŕ Ŕ --> Ŕ
|
||||||
|
capital A, acute accent Á Á --> Á Á --> Á
|
||||||
|
capital A, circumflex accent   -->   --> Â
|
||||||
|
capital A, breve Ă Ă --> Ă Ă --> Ă
|
||||||
|
capital A, dieresis or umlaut mark Ä Ä --> Ä Ä --> Ä
|
||||||
|
capital L, acute accent Ĺ Ĺ --> Ĺ Ĺ --> Ĺ
|
||||||
|
capital C, acute accent Ć Ć --> Ć Ć --> Ć
|
||||||
|
capital C, cedilla Ç Ç --> Ç Ç --> Ç
|
||||||
|
capital C, caron Č Č --> Č Č --> Č
|
||||||
|
capital E, acute accent É É --> É É --> É
|
||||||
|
capital E, ogonek Ę Ę --> Ę Ę --> Ę
|
||||||
|
capital E, dieresis or umlaut mark Ë Ë --> Ë Ë --> Ë
|
||||||
|
capital E, caron Ě Ě --> Ě Ě --> Ě
|
||||||
|
capital I, acute accent Í Í --> Í Í --> Í
|
||||||
|
capital I, circumflex accent Î Î --> Î Î --> Î
|
||||||
|
capital D, caron Ď Ď --> Ď Ď --> Ď
|
||||||
|
capital D, stroke Đ Đ --> Đ Đ --> Đ
|
||||||
|
capital Eth, Icelandic N/A Ð --> Ð Ð --> Ð
|
||||||
|
capital N, acute accent Ń Ń --> Ń Ń --> Ń
|
||||||
|
capital N, caron Ň Ň --> Ň Ň --> Ň
|
||||||
|
capital O, acute accent Ó Ó --> Ó Ó --> Ó
|
||||||
|
capital O, circumflex accent Ô Ô --> Ô Ô --> Ô
|
||||||
|
capital O, double acute accent Ő Ű --> Ű Ő --> Ő
|
||||||
|
capital O, dieresis or umlaut mark Ö Ö --> Ö Ö --> Ö
|
||||||
|
multiply sign × × --> × × --> ×
|
||||||
|
capital R, caron Ř Ř --> Ř Ř --> Ř
|
||||||
|
capital U, ring Ů Ů --> Ů Ů --> Ů
|
||||||
|
capital U, acute accent Ú Ú --> Ú Ú --> Ú
|
||||||
|
capital U, double acute accent Ű Ű --> Ű Ű --> Ű
|
||||||
|
capital U, dieresis or umlaut mark Ü Ü --> Ü Ü --> Ü
|
||||||
|
capital Y, acute accent Ý Ý --> Ý Ý --> Ý
|
||||||
|
capital T, cedilla Ţ Ţ --> Ţ Ţ --> Ţ
|
||||||
|
small sharp s, German (sz ligature) ß ß --> ß ß --> ß
|
||||||
|
small r, acute accent ŕ ŕ --> ŕ ŕ --> ŕ
|
||||||
|
small a, acute accent á á --> á á --> á
|
||||||
|
small a, circumflex accent â â --> â â --> â
|
||||||
|
small a, breve ă ă --> ă ă --> ă
|
||||||
|
small a, dieresis or umlaut mark ä ä --> ä ä --> ä
|
||||||
|
small l, acute accent ĺ ĺ --> ĺ ĺ --> ĺ
|
||||||
|
small c, acute accent ć ć --> ć ć --> ć
|
||||||
|
small c, cedilla ç ç --> ç ç --> ç
|
||||||
|
small c, caron č č --> č č --> č
|
||||||
|
small e, acute accent é é --> é é --> é
|
||||||
|
small e, ogonek ę ę --> ę ę --> ę
|
||||||
|
small e, dieresis or umlaut mark ë ë --> ë ë --> ë
|
||||||
|
small e, caron ě ě --> ě ě --> ě
|
||||||
|
small i, acute accent í í --> í í --> í
|
||||||
|
small i, circumflex accent î î --> î î --> î
|
||||||
|
small d, caron ď ď --> ď ď --> ď
|
||||||
|
small d, stroke đ đ --> đ đ --> đ
|
||||||
|
small eth, Icelandic N/A ð --> ð ð --> ð
|
||||||
|
small n, acute accent ń ń --> ń ń --> ń
|
||||||
|
small n, caron ň ň --> ň ň --> ň
|
||||||
|
small o, acute accent ó ó --> ó ó --> ó
|
||||||
|
small o, circumflex accent ô ô --> ô ô --> ô
|
||||||
|
small o, double acute accent ő ű --> ű ő --> ő
|
||||||
|
small o, dieresis or umlaut mark ö ö --> ö ö --> ö
|
||||||
|
division sign ÷ ÷ --> ÷ ÷ --> ÷
|
||||||
|
small r, caron ř ř --> ř ř --> ř
|
||||||
|
small u, ring ů ů --> ů ů --> ů
|
||||||
|
small u, acute accent ú ú --> ú ú --> ú
|
||||||
|
small u, double acute accent ű ű --> ű ű --> ű
|
||||||
|
small u, dieresis or umlaut mark ü ü --> ü ü --> ü
|
||||||
|
small y, acute accent ý ý --> ý ý --> ý
|
||||||
|
small t, cedilla ţ ţ --> ţ ţ --> ţ
|
||||||
|
dot above {˙} {˙}-->{˙} {˙} -->{˙}
|
||||||
|
|
||||||
|
Some other characters of interest Char Code Entity name
|
||||||
|
=================================== ==== ============ ==============
|
||||||
|
capital AE diphthong (ligature) N/A Æ --> Æ Æ --> Æ
|
||||||
|
small ae diphthong (ligature) N/A æ --> æ æ --> æ
|
||||||
|
capital OE ligature N/A {Œ}-->{Œ} {Œ} -->{Œ}
|
||||||
|
small oe ligature N/A {œ}-->{œ} {œ} -->{œ}
|
||||||
|
copyright N/A © --> © © --> ©
|
||||||
|
registered trademark N/A ® --> ® ® --> ®
|
||||||
|
trademark sign N/A ™--> ™ ™ --> ™
|
||||||
|
em space N/A [ ]->[ ] [ ] -->[ ]
|
||||||
|
en space N/A [ ]->[ ] [ ] -->[ ]
|
||||||
|
1/3-em space N/A [ ]->[ ] [ ] -->[ ]
|
||||||
|
1/4-em space N/A [ ]->[ ] [ ] -->[ ]
|
||||||
|
thin space N/A [ ]->[ ] [ ]-->[ ]
|
||||||
|
hair space N/A [ ]->[ ] [ ]-->[ ]
|
||||||
|
em dash N/A [—]->[—] [—] -->[—]
|
||||||
|
en dash N/A [–]->[–] [–] -->[–]
|
||||||
|
|
||||||
|
__________________________________________________________________
|
||||||
|
|
||||||
|
Characters not found in ISO-8859-2 have "N/A" in the Char column. Some
|
||||||
|
characters for which I could not find entity names in either [3]RFC
|
||||||
|
2070 or the [4]ISOlat1, ISOlat2, ISOnum, ISOpub and ISOtech sets (the
|
||||||
|
ones included by Peter Flynn's [5]HTML Pro DTD) are shown enclosed in
|
||||||
|
{braces}.
|
||||||
|
|
||||||
|
There also is a variation of this table which tests [6]ISO-8859-2
|
||||||
|
characters and entities in ALT attributes.
|
||||||
|
|
||||||
|
See Martin Ramsch's original [7]ISO-8859-1 Table for related info and
|
||||||
|
links, and for some notes on entity names. This file is mostly just an
|
||||||
|
adaptation of his table to the ISO-8859-2 character set.
|
||||||
|
__________________________________________________________________
|
||||||
|
|
||||||
|
|
||||||
|
kweide@tezcat.com 1997-03-09
|
||||||
|
|
||||||
|
References
|
||||||
|
|
||||||
|
1.
|
||||||
|
2.
|
||||||
|
3. http://www.internic.net/rfc/rfc2070.txt
|
||||||
|
4. ftp://www.ucc.ie/pub/sgml/
|
||||||
|
5. http://www.ucc.ie/doc/www/html/dtds/htmlpro.html
|
||||||
|
6.
|
||||||
|
7. http://www.uni-passau.de/~ramsch/iso8859-1.html
|
208
tests/lynx-dump/data/iso-8859-2a.html
Normal file
208
tests/lynx-dump/data/iso-8859-2a.html
Normal file
@ -0,0 +1,208 @@
|
|||||||
|
<!DOCTYPE html PUBLIC "-//IETF//DTD HTML 3.0//EN">
|
||||||
|
<!-- X-URL: http://www.uni-passau.de/~ramsch/iso8859-1.html -->
|
||||||
|
<HTML>
|
||||||
|
<HEAD>
|
||||||
|
<TITLE>Martin Ramsch's character table modified and enhanced for iso8859-2</TITLE>
|
||||||
|
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-2">
|
||||||
|
<LINK REV="made" HREF="mailto:lynx-dev@nongnu.org">
|
||||||
|
<LINK REV="owner" HREF="http://mail.gnu.org/mailman/listinfo/lynx-dev/">
|
||||||
|
<LINK REL="sibling" HREF="iso-8859-1.html" TITLE="iso-8859-1 test">
|
||||||
|
<LINK REL="sibling" HREF="ALT88592.html" TITLE="iso-8859-2 ALT test">
|
||||||
|
</HEAD>
|
||||||
|
|
||||||
|
<BODY>
|
||||||
|
|
||||||
|
<H1 ALIGN=center>iso8859-2 plus table, and cp-1252</H1>
|
||||||
|
|
||||||
|
<PRE>
|
||||||
|
Description Code Entity name
|
||||||
|
=================================== ============ ==============
|
||||||
|
quotation mark &#34; --> " &quot; --> "
|
||||||
|
ampersand &#38; --> & &amp; --> &
|
||||||
|
less-than sign &#60; --> < &lt; --> <
|
||||||
|
greater-than sign &#62; --> > &gt; --> >
|
||||||
|
|
||||||
|
Description Char Code Entity name
|
||||||
|
=================================== ==== ============ ==============
|
||||||
|
euro sign € &128; --> €
|
||||||
|
undefined <20> &129; --> 
|
||||||
|
single low-9 quotation mark ‚ &130; --> ‚
|
||||||
|
latin small letter f with hook ƒ &131; --> ƒ
|
||||||
|
double low-9 quotation mark „ &132; --> „
|
||||||
|
horizontal ellipsis … &133; --> …
|
||||||
|
dagger † &134; --> †
|
||||||
|
double dagger ‡ &135; --> ‡
|
||||||
|
modifier letter circumflex accent ˆ &136; --> ˆ
|
||||||
|
per mille sign ‰ &137; --> ‰
|
||||||
|
latin capital letter s with caron Š &138; --> Š
|
||||||
|
single left-pointing angle quote mark ‹ &139; --> ‹
|
||||||
|
latin capital ligature oe Œ &140; --> Œ
|
||||||
|
undefined <20> &141; --> 
|
||||||
|
latin capital letter z with caron Ž &142; --> Ž
|
||||||
|
undefined <20> &143; --> 
|
||||||
|
|
||||||
|
undefined <20> &144; --> 
|
||||||
|
left single quotation mark ‘ &145; --> ‘
|
||||||
|
right single quotation mark ’ &146; --> ’
|
||||||
|
left double quotation mark “ &147; --> “
|
||||||
|
right double quotation mark ” &148; --> ”
|
||||||
|
bullet • &149; --> •
|
||||||
|
en dash – &150; --> –
|
||||||
|
em dash — &151; --> —
|
||||||
|
small tilde ˜ &152; --> ˜
|
||||||
|
trade mark sign ™ &153; --> ™
|
||||||
|
latin small letter s with caron š &154; --> š
|
||||||
|
single right-pointing angle quote mark › &155; --> ›
|
||||||
|
latin small ligature oe œ &156; --> œ
|
||||||
|
undefined <20> &157; --> 
|
||||||
|
latin small letter z with caron ž &158; --> ž
|
||||||
|
latin capital letter y with diaeresis Ÿ &159; --> Ÿ
|
||||||
|
|
||||||
|
non-breaking space &#160; -->   &nbsp; -->
|
||||||
|
capital A, ogonek ¡ &#260; --> Ą &Aogon; --> Ą
|
||||||
|
breve {¢} {&#728;}-->{˘} {&breve;} -->{˘}
|
||||||
|
capital L, stroke £ &#321; --> Ł &Lstrok; --> Ł
|
||||||
|
general currency sign ¤ &#164; --> ¤ &curren; --> ¤
|
||||||
|
capital L, caron ¥ &#317; --> Ľ &Lcaron; --> Ľ
|
||||||
|
capital S, acute accent ¦ &#346; --> Ś &Sacute; --> Ś
|
||||||
|
section sign § &#167; --> § &sect; --> §
|
||||||
|
umlaut (dieresis) ¨ &#168; --> ¨ &uml; --> ¨
|
||||||
|
&die; --> ¨
|
||||||
|
capital S, caron © &#352; --> Š &Scaron; --> Š
|
||||||
|
capital S, cedilla ª &#350; --> Ş &Scedil; --> Ş
|
||||||
|
capital T, caron « &#356; --> Ť &Tcaron; --> Ť
|
||||||
|
capital Z, acute accent ¬ &#377; --> Ź &Zacute; --> Ź
|
||||||
|
soft hyphen [] [&#173;]-->[­] [&shy;] -->[­]
|
||||||
|
capital Z, caron ® &#381; --> Ž &Zcaron; --> Ž
|
||||||
|
capital Z, dot above ¯ &#379; --> Ż &Zdot; --> Ż
|
||||||
|
degree sign ° &#176; --> ° &deg; --> °
|
||||||
|
small a, ogonek ± &#261; --> ą &aogon; --> ą
|
||||||
|
ogonek {²} {&#731;}-->{˛} {&ogon;} -->{˛}
|
||||||
|
small l, stroke ³ &#322; --> ł &lstrok; --> ł
|
||||||
|
acute accent ´ &#180; --> ´ &acute; --> ´
|
||||||
|
small l, caron µ &#318; --> ľ &lcaron; --> ľ
|
||||||
|
small s, acute accent ¶ &#347; --> ś &sacute; --> ś
|
||||||
|
caron {·} {&#711;}-->{ˇ} {&caron;} -->{ˇ}
|
||||||
|
cedilla ¸ &#184; --> ¸ &cedil; --> ¸
|
||||||
|
small s, caron ¹ &#353; --> š &scaron; --> š
|
||||||
|
small s, cedilla º &#351; --> ş &scedil; --> ş
|
||||||
|
small t, caron » &#357; --> ť &tcaron; --> ť
|
||||||
|
small z, acute accent ¼ &#378; --> ź &zacute; --> ź
|
||||||
|
double acute accent {½} {&#733;}-->{˝} {&dblac;} -->{˝}
|
||||||
|
small z, caron ¾ &#382; --> ž &zcaron; --> ž
|
||||||
|
small z, dot above ¿ &#380; --> ż &zdot; --> ż
|
||||||
|
capital R, acute accent À &#340; --> Ŕ &Racute; --> Ŕ
|
||||||
|
capital A, acute accent Á &#193; --> Á &Aacute; --> Á
|
||||||
|
capital A, circumflex accent  &#194; -->  &Acirc; --> Â
|
||||||
|
capital A, breve à &#258; --> Ă &Abreve; --> Ă
|
||||||
|
capital A, dieresis or umlaut mark Ä &#196; --> Ä &Auml; --> Ä
|
||||||
|
capital L, acute accent Å &#313; --> Ĺ &Lacute; --> Ĺ
|
||||||
|
capital C, acute accent Æ &#262; --> Ć &Cacute; --> Ć
|
||||||
|
capital C, cedilla Ç &#199; --> Ç &Ccedil; --> Ç
|
||||||
|
capital C, caron È &#268; --> Č &Ccaron; --> Č
|
||||||
|
capital E, acute accent É &#201; --> É &Eacute; --> É
|
||||||
|
capital E, ogonek Ê &#280; --> Ę &Eogon; --> Ę
|
||||||
|
capital E, dieresis or umlaut mark Ë &#203; --> Ë &Euml; --> Ë
|
||||||
|
capital E, caron Ì &#282; --> Ě &Ecaron; --> Ě
|
||||||
|
capital I, acute accent Í &#205; --> Í &Iacute; --> Í
|
||||||
|
capital I, circumflex accent Î &#206; --> Î &Icirc; --> Î
|
||||||
|
capital D, caron Ï &#270; --> Ď &Dcaron; --> Ď
|
||||||
|
capital D, stroke Ð &#272; --> Đ &Dstrok; --> Đ
|
||||||
|
capital Eth, Icelandic N/A &#208; --> Ð &ETH; --> Ð
|
||||||
|
capital N, acute accent Ñ &#323; --> Ń &Nacute; --> Ń
|
||||||
|
capital N, caron Ò &#327; --> Ň &Ncaron; --> Ň
|
||||||
|
capital O, acute accent Ó &#211; --> Ó &Oacute; --> Ó
|
||||||
|
capital O, circumflex accent Ô &#212; --> Ô &Ocirc; --> Ô
|
||||||
|
capital O, double acute accent Õ &#368; --> Ű &Odblac; --> Ő
|
||||||
|
capital O, dieresis or umlaut mark Ö &#214; --> Ö &Ouml; --> Ö
|
||||||
|
multiply sign × &#215; --> × &times; --> ×
|
||||||
|
capital R, caron Ø &#344; --> Ř &Rcaron; --> Ř
|
||||||
|
capital U, ring Ù &#366; --> Ů &Uring; --> Ů
|
||||||
|
capital U, acute accent Ú &#218; --> Ú &Uacute; --> Ú
|
||||||
|
capital U, double acute accent Û &#368; --> Ű &Udblac; --> Ű
|
||||||
|
capital U, dieresis or umlaut mark Ü &#220; --> Ü &Uuml; --> Ü
|
||||||
|
capital Y, acute accent Ý &#221; --> Ý &Yacute; --> Ý
|
||||||
|
capital T, cedilla Þ &#354; --> Ţ &Tcedil; --> Ţ
|
||||||
|
small sharp s, German (sz ligature) ß &#223; --> ß &szlig; --> ß
|
||||||
|
small r, acute accent à &#341; --> ŕ &racute; --> ŕ
|
||||||
|
small a, acute accent á &#225; --> á &aacute; --> á
|
||||||
|
small a, circumflex accent â &#226; --> â &acirc; --> â
|
||||||
|
small a, breve ã &#259; --> ă &abreve; --> ă
|
||||||
|
small a, dieresis or umlaut mark ä &#228; --> ä &auml; --> ä
|
||||||
|
small l, acute accent å &#314; --> ĺ &lacute; --> ĺ
|
||||||
|
small c, acute accent æ &#263; --> ć &cacute; --> ć
|
||||||
|
small c, cedilla ç &#231; --> ç &ccedil; --> ç
|
||||||
|
small c, caron è &#269; --> č &ccaron; --> č
|
||||||
|
small e, acute accent é &#233; --> é &eacute; --> é
|
||||||
|
small e, ogonek ê &#281; --> ę &eogon; --> ę
|
||||||
|
small e, dieresis or umlaut mark ë &#235; --> ë &euml; --> ë
|
||||||
|
small e, caron ì &#283; --> ě &ecaron; --> ě
|
||||||
|
small i, acute accent í &#237; --> í &iacute; --> í
|
||||||
|
small i, circumflex accent î &#238; --> î &icirc; --> î
|
||||||
|
small d, caron ï &#271; --> ď &dcaron; --> ď
|
||||||
|
small d, stroke ð &#273; --> đ &dstrok; --> đ
|
||||||
|
small eth, Icelandic N/A &#240; --> ð &eth; --> ð
|
||||||
|
small n, acute accent ñ &#324; --> ń &nacute; --> ń
|
||||||
|
small n, caron ò &#328; --> ň &ncaron; --> ň
|
||||||
|
small o, acute accent ó &#243; --> ó &oacute; --> ó
|
||||||
|
small o, circumflex accent ô &#244; --> ô &ocirc; --> ô
|
||||||
|
small o, double acute accent õ &#369; --> ű &odblac; --> ő
|
||||||
|
small o, dieresis or umlaut mark ö &#246; --> ö &ouml; --> ö
|
||||||
|
division sign ÷ &#247; --> ÷ &divide; --> ÷
|
||||||
|
small r, caron ø &#345; --> ř &rcaron; --> ř
|
||||||
|
small u, ring ù &#367; --> ů &uring; --> ů
|
||||||
|
small u, acute accent ú &#250; --> ú &uacute; --> ú
|
||||||
|
small u, double acute accent û &#369; --> ű &udblac; --> ű
|
||||||
|
small u, dieresis or umlaut mark ü &#252; --> ü &uuml; --> ü
|
||||||
|
small y, acute accent ý &#253; --> ý &yacute; --> ý
|
||||||
|
small t, cedilla þ &#355; --> ţ &tcedil; --> ţ
|
||||||
|
dot above {ÿ} {&#729;}-->{˙} {&dot;} -->{˙}
|
||||||
|
|
||||||
|
Some other characters of interest Char Code Entity name
|
||||||
|
=================================== ==== ============ ==============
|
||||||
|
capital AE diphthong (ligature) N/A &#198; --> Æ &AElig; --> Æ
|
||||||
|
small ae diphthong (ligature) N/A &#230; --> æ &aelig; --> æ
|
||||||
|
capital OE ligature N/A {&#338;}-->{Œ} {&OElig;} -->{Œ}
|
||||||
|
small oe ligature N/A {&#339;}-->{œ} {&oelig;} -->{œ}
|
||||||
|
copyright N/A &#169; --> © &copy; --> ©
|
||||||
|
registered trademark N/A &#174; --> ® &reg; --> ®
|
||||||
|
trademark sign N/A &#8482;--> ™ &trade; --> ™
|
||||||
|
em space N/A [&#8195;]->[ ] [&emsp;] -->[ ]
|
||||||
|
en space N/A [&#8194;]->[ ] [&ensp;] -->[ ]
|
||||||
|
1/3-em space N/A [&#8196;]->[ ] [&emsp13;] -->[ ]
|
||||||
|
1/4-em space N/A [&#8197;]->[ ] [&emsp14;] -->[ ]
|
||||||
|
thin space N/A [&#8201;]->[ ] [&thinsp;]-->[ ]
|
||||||
|
hair space N/A [&#8202;]->[ ] [&hairsp;]-->[ ]
|
||||||
|
em dash N/A [&#8212;]->[—] [&mdash;] -->[—]
|
||||||
|
en dash N/A [&#8211;]->[–] [&ndash;] -->[–]
|
||||||
|
|
||||||
|
</PRE><!-- </PRE> no HotJava preBeta hackx - kw -->
|
||||||
|
<!-- second /PRE is a hack for HotJava 1.0 preBeta 1 -->
|
||||||
|
<HR>
|
||||||
|
<P>
|
||||||
|
Characters not found in ISO-8859-2 have "N/A" in the <TT>Char</TT> column.
|
||||||
|
Some characters for which I could not find entity names in either
|
||||||
|
<A HREF="http://www.internic.net/rfc/rfc2070.txt">RFC 2070</A>
|
||||||
|
or the
|
||||||
|
<A HREF="ftp://www.ucc.ie/pub/sgml/">ISOlat1, ISOlat2, ISOnum, ISOpub and ISOtech</A>
|
||||||
|
sets (the ones included by Peter Flynn's
|
||||||
|
<A HREF="http://www.ucc.ie/doc/www/html/dtds/htmlpro.html">HTML Pro DTD</A>)
|
||||||
|
are shown enclosed in <TT>{</TT>braces<TT>}</TT>.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
There also is a variation of this table which tests
|
||||||
|
<A HREF="ALT88592.html">ISO-8859-2 characters and entities in ALT attributes</A>.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
See Martin Ramsch's original
|
||||||
|
<A CHARSET="iso-8859-1" HREF="http://www.uni-passau.de/~ramsch/iso8859-1.html">ISO-8859-1 Table</A>
|
||||||
|
for related info and links, and for some notes on entity names.
|
||||||
|
This file is mostly just an adaptation of his table
|
||||||
|
to the ISO-8859-2 character set.
|
||||||
|
</P>
|
||||||
|
<HR>
|
||||||
|
|
||||||
|
<ADDRESS>kweide@tezcat.com 1997-03-09</ADDRESS>
|
||||||
|
|
||||||
|
</BODY>
|
||||||
|
</HTML>
|
193
tests/lynx-dump/data/iso-8859-2a.html.exp
Normal file
193
tests/lynx-dump/data/iso-8859-2a.html.exp
Normal file
@ -0,0 +1,193 @@
|
|||||||
|
#[1]iso-8859-1 test [2]iso-8859-2 ALT test
|
||||||
|
|
||||||
|
iso8859-2 plus table, and cp-1252
|
||||||
|
|
||||||
|
Description Code Entity name
|
||||||
|
=================================== ============ ==============
|
||||||
|
quotation mark " --> " " --> "
|
||||||
|
ampersand & --> & & --> &
|
||||||
|
less-than sign < --> < < --> <
|
||||||
|
greater-than sign > --> > > --> >
|
||||||
|
|
||||||
|
Description Char Code Entity name
|
||||||
|
=================================== ==== ============ ==============
|
||||||
|
euro sign &128; -->
|
||||||
|
undefined &129; -->
|
||||||
|
single low-9 quotation mark &130; -->
|
||||||
|
latin small letter f with hook &131; -->
|
||||||
|
double low-9 quotation mark &132; -->
|
||||||
|
horizontal ellipsis &133; -->
|
||||||
|
dagger &134; -->
|
||||||
|
double dagger &135; -->
|
||||||
|
modifier letter circumflex accent &136; -->
|
||||||
|
per mille sign &137; -->
|
||||||
|
latin capital letter s with caron &138; -->
|
||||||
|
single left-pointing angle quote mark &139; -->
|
||||||
|
latin capital ligature oe &140; -->
|
||||||
|
undefined &141; -->
|
||||||
|
latin capital letter z with caron &142; -->
|
||||||
|
undefined &143; -->
|
||||||
|
|
||||||
|
undefined &144; -->
|
||||||
|
left single quotation mark &145; -->
|
||||||
|
right single quotation mark &146; -->
|
||||||
|
left double quotation mark &147; -->
|
||||||
|
right double quotation mark &148; -->
|
||||||
|
bullet &149; -->
|
||||||
|
en dash &150; -->
|
||||||
|
em dash &151; -->
|
||||||
|
small tilde &152; -->
|
||||||
|
trade mark sign &153; -->
|
||||||
|
latin small letter s with caron &154; -->
|
||||||
|
single right-pointing angle quote mark &155; -->
|
||||||
|
latin small ligature oe &156; -->
|
||||||
|
undefined &157; -->
|
||||||
|
latin small letter z with caron &158; -->
|
||||||
|
latin capital letter y with diaeresis &159; -->
|
||||||
|
|
||||||
|
non-breaking space   --> -->
|
||||||
|
capital A, ogonek Ą Ą --> Ą Ą --> Ą
|
||||||
|
breve {˘} {˘}-->{˘} {˘} -->{˘}
|
||||||
|
capital L, stroke Ł Ł --> Ł Ł --> Ł
|
||||||
|
general currency sign ¤ ¤ --> ¤ ¤ --> ¤
|
||||||
|
capital L, caron Ľ Ľ --> Ľ Ľ --> Ľ
|
||||||
|
capital S, acute accent Ś Ś --> Ś Ś --> Ś
|
||||||
|
section sign § § --> § § --> §
|
||||||
|
umlaut (dieresis) ¨ ¨ --> ¨ ¨ --> ¨
|
||||||
|
¨ --> ¨
|
||||||
|
capital S, caron Š Š --> Š Š --> Š
|
||||||
|
capital S, cedilla Ş Ş --> Ş Ş --> Ş
|
||||||
|
capital T, caron Ť Ť --> Ť Ť --> Ť
|
||||||
|
capital Z, acute accent Ź Ź --> Ź Ź --> Ź
|
||||||
|
soft hyphen [] [­]-->[] [­] -->[]
|
||||||
|
capital Z, caron Ž Ž --> Ž Ž --> Ž
|
||||||
|
capital Z, dot above Ż Ż --> Ż Ż --> Ż
|
||||||
|
degree sign ° ° --> ° ° --> °
|
||||||
|
small a, ogonek ą ą --> ą ą --> ą
|
||||||
|
ogonek {˛} {˛}-->{˛} {˛} -->{˛}
|
||||||
|
small l, stroke ł ł --> ł ł --> ł
|
||||||
|
acute accent ´ ´ --> ´ ´ --> ´
|
||||||
|
small l, caron ľ ľ --> ľ ľ --> ľ
|
||||||
|
small s, acute accent ś ś --> ś ś --> ś
|
||||||
|
caron {ˇ} {ˇ}-->{ˇ} {ˇ} -->{ˇ}
|
||||||
|
cedilla ¸ ¸ --> ¸ ¸ --> ¸
|
||||||
|
small s, caron š š --> š š --> š
|
||||||
|
small s, cedilla ş ş --> ş ş --> ş
|
||||||
|
small t, caron ť ť --> ť ť --> ť
|
||||||
|
small z, acute accent ź ź --> ź ź --> ź
|
||||||
|
double acute accent {˝} {˝}-->{˝} {˝} -->{˝}
|
||||||
|
small z, caron ž ž --> ž ž --> ž
|
||||||
|
small z, dot above ż ż --> ż ż --> ż
|
||||||
|
capital R, acute accent Ŕ Ŕ --> Ŕ Ŕ --> Ŕ
|
||||||
|
capital A, acute accent Á Á --> Á Á --> Á
|
||||||
|
capital A, circumflex accent   -->   --> Â
|
||||||
|
capital A, breve Ă Ă --> Ă Ă --> Ă
|
||||||
|
capital A, dieresis or umlaut mark Ä Ä --> Ä Ä --> Ä
|
||||||
|
capital L, acute accent Ĺ Ĺ --> Ĺ Ĺ --> Ĺ
|
||||||
|
capital C, acute accent Ć Ć --> Ć Ć --> Ć
|
||||||
|
capital C, cedilla Ç Ç --> Ç Ç --> Ç
|
||||||
|
capital C, caron Č Č --> Č Č --> Č
|
||||||
|
capital E, acute accent É É --> É É --> É
|
||||||
|
capital E, ogonek Ę Ę --> Ę Ę --> Ę
|
||||||
|
capital E, dieresis or umlaut mark Ë Ë --> Ë Ë --> Ë
|
||||||
|
capital E, caron Ě Ě --> Ě Ě --> Ě
|
||||||
|
capital I, acute accent Í Í --> Í Í --> Í
|
||||||
|
capital I, circumflex accent Î Î --> Î Î --> Î
|
||||||
|
capital D, caron Ď Ď --> Ď Ď --> Ď
|
||||||
|
capital D, stroke Đ Đ --> Đ Đ --> Đ
|
||||||
|
capital Eth, Icelandic N/A Ð --> Ð Ð --> Ð
|
||||||
|
capital N, acute accent Ń Ń --> Ń Ń --> Ń
|
||||||
|
capital N, caron Ň Ň --> Ň Ň --> Ň
|
||||||
|
capital O, acute accent Ó Ó --> Ó Ó --> Ó
|
||||||
|
capital O, circumflex accent Ô Ô --> Ô Ô --> Ô
|
||||||
|
capital O, double acute accent Ő Ű --> Ű Ő --> Ő
|
||||||
|
capital O, dieresis or umlaut mark Ö Ö --> Ö Ö --> Ö
|
||||||
|
multiply sign × × --> × × --> ×
|
||||||
|
capital R, caron Ř Ř --> Ř Ř --> Ř
|
||||||
|
capital U, ring Ů Ů --> Ů Ů --> Ů
|
||||||
|
capital U, acute accent Ú Ú --> Ú Ú --> Ú
|
||||||
|
capital U, double acute accent Ű Ű --> Ű Ű --> Ű
|
||||||
|
capital U, dieresis or umlaut mark Ü Ü --> Ü Ü --> Ü
|
||||||
|
capital Y, acute accent Ý Ý --> Ý Ý --> Ý
|
||||||
|
capital T, cedilla Ţ Ţ --> Ţ Ţ --> Ţ
|
||||||
|
small sharp s, German (sz ligature) ß ß --> ß ß --> ß
|
||||||
|
small r, acute accent ŕ ŕ --> ŕ ŕ --> ŕ
|
||||||
|
small a, acute accent á á --> á á --> á
|
||||||
|
small a, circumflex accent â â --> â â --> â
|
||||||
|
small a, breve ă ă --> ă ă --> ă
|
||||||
|
small a, dieresis or umlaut mark ä ä --> ä ä --> ä
|
||||||
|
small l, acute accent ĺ ĺ --> ĺ ĺ --> ĺ
|
||||||
|
small c, acute accent ć ć --> ć ć --> ć
|
||||||
|
small c, cedilla ç ç --> ç ç --> ç
|
||||||
|
small c, caron č č --> č č --> č
|
||||||
|
small e, acute accent é é --> é é --> é
|
||||||
|
small e, ogonek ę ę --> ę ę --> ę
|
||||||
|
small e, dieresis or umlaut mark ë ë --> ë ë --> ë
|
||||||
|
small e, caron ě ě --> ě ě --> ě
|
||||||
|
small i, acute accent í í --> í í --> í
|
||||||
|
small i, circumflex accent î î --> î î --> î
|
||||||
|
small d, caron ď ď --> ď ď --> ď
|
||||||
|
small d, stroke đ đ --> đ đ --> đ
|
||||||
|
small eth, Icelandic N/A ð --> ð ð --> ð
|
||||||
|
small n, acute accent ń ń --> ń ń --> ń
|
||||||
|
small n, caron ň ň --> ň ň --> ň
|
||||||
|
small o, acute accent ó ó --> ó ó --> ó
|
||||||
|
small o, circumflex accent ô ô --> ô ô --> ô
|
||||||
|
small o, double acute accent ő ű --> ű ő --> ő
|
||||||
|
small o, dieresis or umlaut mark ö ö --> ö ö --> ö
|
||||||
|
division sign ÷ ÷ --> ÷ ÷ --> ÷
|
||||||
|
small r, caron ř ř --> ř ř --> ř
|
||||||
|
small u, ring ů ů --> ů ů --> ů
|
||||||
|
small u, acute accent ú ú --> ú ú --> ú
|
||||||
|
small u, double acute accent ű ű --> ű ű --> ű
|
||||||
|
small u, dieresis or umlaut mark ü ü --> ü ü --> ü
|
||||||
|
small y, acute accent ý ý --> ý ý --> ý
|
||||||
|
small t, cedilla ţ ţ --> ţ ţ --> ţ
|
||||||
|
dot above {˙} {˙}-->{˙} {˙} -->{˙}
|
||||||
|
|
||||||
|
Some other characters of interest Char Code Entity name
|
||||||
|
=================================== ==== ============ ==============
|
||||||
|
capital AE diphthong (ligature) N/A Æ --> Æ Æ --> Æ
|
||||||
|
small ae diphthong (ligature) N/A æ --> æ æ --> æ
|
||||||
|
capital OE ligature N/A {Œ}-->{Œ} {Œ} -->{Œ}
|
||||||
|
small oe ligature N/A {œ}-->{œ} {œ} -->{œ}
|
||||||
|
copyright N/A © --> © © --> ©
|
||||||
|
registered trademark N/A ® --> ® ® --> ®
|
||||||
|
trademark sign N/A ™--> ™ ™ --> ™
|
||||||
|
em space N/A [ ]->[ ] [ ] -->[ ]
|
||||||
|
en space N/A [ ]->[ ] [ ] -->[ ]
|
||||||
|
1/3-em space N/A [ ]->[ ] [ ] -->[ ]
|
||||||
|
1/4-em space N/A [ ]->[ ] [ ] -->[ ]
|
||||||
|
thin space N/A [ ]->[ ] [ ]-->[ ]
|
||||||
|
hair space N/A [ ]->[ ] [ ]-->[ ]
|
||||||
|
em dash N/A [—]->[—] [—] -->[—]
|
||||||
|
en dash N/A [–]->[–] [–] -->[–]
|
||||||
|
|
||||||
|
__________________________________________________________________
|
||||||
|
|
||||||
|
Characters not found in ISO-8859-2 have "N/A" in the Char column. Some
|
||||||
|
characters for which I could not find entity names in either [3]RFC
|
||||||
|
2070 or the [4]ISOlat1, ISOlat2, ISOnum, ISOpub and ISOtech sets (the
|
||||||
|
ones included by Peter Flynn's [5]HTML Pro DTD) are shown enclosed in
|
||||||
|
{braces}.
|
||||||
|
|
||||||
|
There also is a variation of this table which tests [6]ISO-8859-2
|
||||||
|
characters and entities in ALT attributes.
|
||||||
|
|
||||||
|
See Martin Ramsch's original [7]ISO-8859-1 Table for related info and
|
||||||
|
links, and for some notes on entity names. This file is mostly just an
|
||||||
|
adaptation of his table to the ISO-8859-2 character set.
|
||||||
|
__________________________________________________________________
|
||||||
|
|
||||||
|
|
||||||
|
kweide@tezcat.com 1997-03-09
|
||||||
|
|
||||||
|
References
|
||||||
|
|
||||||
|
1.
|
||||||
|
2.
|
||||||
|
3. http://www.internic.net/rfc/rfc2070.txt
|
||||||
|
4. ftp://www.ucc.ie/pub/sgml/
|
||||||
|
5. http://www.ucc.ie/doc/www/html/dtds/htmlpro.html
|
||||||
|
6.
|
||||||
|
7. http://www.uni-passau.de/~ramsch/iso8859-1.html
|
321
tests/lynx-dump/data/koi8-r.html
Normal file
321
tests/lynx-dump/data/koi8-r.html
Normal file
@ -0,0 +1,321 @@
|
|||||||
|
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
||||||
|
<HTML>
|
||||||
|
<HEAD>
|
||||||
|
<TITLE>Test of the KOI8-R symbols</TITLE>
|
||||||
|
</HEAD>
|
||||||
|
<BODY>
|
||||||
|
<PRE>
|
||||||
|
|
||||||
|
This table prepared from KOI8-R.TXT available at ftp.unicode.org
|
||||||
|
|
||||||
|
ftp://ftp.unicode.org/MAPPINGS/VENDORS/MISC/KOI8-R.TXT
|
||||||
|
(if doing ftp, try cd Public/MAPPINGS/VENDORS/MISC)
|
||||||
|
|
||||||
|
|
||||||
|
original comment:
|
||||||
|
|
||||||
|
#
|
||||||
|
# Name: KOI8-R (RFC1489) to Unicode
|
||||||
|
# Unicode version: 3.0
|
||||||
|
# Table version: 1.0
|
||||||
|
# Table format: Format A
|
||||||
|
# Date: 18 August 1999
|
||||||
|
# Authors: Helmut Richter <richter@lrz.de>
|
||||||
|
#
|
||||||
|
# Copyright (c) 1991-1999 Unicode, Inc. All Rights reserved.
|
||||||
|
#
|
||||||
|
# This file is provided as-is by Unicode, Inc. (The Unicode Consortium).
|
||||||
|
# No claims are made as to fitness for any particular purpose. No
|
||||||
|
# warranties of any kind are expressed or implied. The recipient
|
||||||
|
# agrees to determine applicability of information provided. If this
|
||||||
|
# file has been provided on optical media by Unicode, Inc., the sole
|
||||||
|
# remedy for any claim will be exchange of defective media within 90
|
||||||
|
# days of receipt.
|
||||||
|
#
|
||||||
|
# Unicode, Inc. hereby grants the right to freely use the information
|
||||||
|
# supplied in this file in the creation of products supporting the
|
||||||
|
# Unicode Standard, and to make copies of this file in any form for
|
||||||
|
# internal or external distribution as long as this notice remains
|
||||||
|
# attached.
|
||||||
|
#
|
||||||
|
# General notes:
|
||||||
|
#
|
||||||
|
# This table contains the data the Unicode Consortium has on how
|
||||||
|
# KOI8-R characters map into Unicode. The underlying document is the
|
||||||
|
# mapping described in RFC 1489. No statements are made as to whether
|
||||||
|
# this mapping is the same as the mapping defined as "Code Page 878"
|
||||||
|
# with some vendors.
|
||||||
|
#
|
||||||
|
# Format: Three tab-separated columns
|
||||||
|
# Column #1 is the KOI8-R code (in hex as 0xXX)
|
||||||
|
# Column #2 is the Unicode (in hex as 0xXXXX)
|
||||||
|
# Column #3 the Unicode name (follows a comment sign, '#')
|
||||||
|
#
|
||||||
|
# The entries are in KOI8-R order.
|
||||||
|
#
|
||||||
|
# Version history
|
||||||
|
# 1.0 version: created.
|
||||||
|
#
|
||||||
|
# Any comments or problems, contact <errata@unicode.org>
|
||||||
|
# Please note that <errata@unicode.org> is an archival address;
|
||||||
|
# notices will be checked, but do not expect an immediate response.
|
||||||
|
#
|
||||||
|
0x00 0x0000 "�" # NULL
|
||||||
|
0x01 0x0001 "" # START OF HEADING
|
||||||
|
0x02 0x0002 "" # START OF TEXT
|
||||||
|
0x03 0x0003 "" # END OF TEXT
|
||||||
|
0x04 0x0004 "" # END OF TRANSMISSION
|
||||||
|
0x05 0x0005 "" # ENQUIRY
|
||||||
|
0x06 0x0006 "" # ACKNOWLEDGE
|
||||||
|
0x07 0x0007 "" # BELL
|
||||||
|
0x08 0x0008 "" # BACKSPACE
|
||||||
|
0x09 0x0009 "	" # HORIZONTAL TABULATION
|
||||||
|
0x0A 0x000A "
" # LINE FEED
|
||||||
|
0x0B 0x000B "" # VERTICAL TABULATION
|
||||||
|
0x0C 0x000C "" # FORM FEED
|
||||||
|
0x0D 0x000D "
" # CARRIAGE RETURN
|
||||||
|
0x0E 0x000E "" # SHIFT OUT
|
||||||
|
0x0F 0x000F "" # SHIFT IN
|
||||||
|
0x10 0x0010 "" # DATA LINK ESCAPE
|
||||||
|
0x11 0x0011 "" # DEVICE CONTROL ONE
|
||||||
|
0x12 0x0012 "" # DEVICE CONTROL TWO
|
||||||
|
0x13 0x0013 "" # DEVICE CONTROL THREE
|
||||||
|
0x14 0x0014 "" # DEVICE CONTROL FOUR
|
||||||
|
0x15 0x0015 "" # NEGATIVE ACKNOWLEDGE
|
||||||
|
0x16 0x0016 "" # SYNCHRONOUS IDLE
|
||||||
|
0x17 0x0017 "" # END OF TRANSMISSION BLOCK
|
||||||
|
0x18 0x0018 "" # CANCEL
|
||||||
|
0x19 0x0019 "" # END OF MEDIUM
|
||||||
|
0x1A 0x001A "" # SUBSTITUTE
|
||||||
|
0x1B 0x001B "" # ESCAPE
|
||||||
|
0x1C 0x001C "" # FILE SEPARATOR
|
||||||
|
0x1D 0x001D "" # GROUP SEPARATOR
|
||||||
|
0x1E 0x001E "" # RECORD SEPARATOR
|
||||||
|
0x1F 0x001F "" # UNIT SEPARATOR
|
||||||
|
0x20 0x0020 " " # SPACE
|
||||||
|
0x21 0x0021 "!" # EXCLAMATION MARK
|
||||||
|
0x22 0x0022 """ # QUOTATION MARK
|
||||||
|
0x23 0x0023 "#" # NUMBER SIGN
|
||||||
|
0x24 0x0024 "$" # DOLLAR SIGN
|
||||||
|
0x25 0x0025 "%" # PERCENT SIGN
|
||||||
|
0x26 0x0026 "&" # AMPERSAND
|
||||||
|
0x27 0x0027 "'" # APOSTROPHE
|
||||||
|
0x28 0x0028 "(" # LEFT PARENTHESIS
|
||||||
|
0x29 0x0029 ")" # RIGHT PARENTHESIS
|
||||||
|
0x2A 0x002A "*" # ASTERISK
|
||||||
|
0x2B 0x002B "+" # PLUS SIGN
|
||||||
|
0x2C 0x002C "," # COMMA
|
||||||
|
0x2D 0x002D "-" # HYPHEN-MINUS
|
||||||
|
0x2E 0x002E "." # FULL STOP
|
||||||
|
0x2F 0x002F "/" # SOLIDUS
|
||||||
|
0x30 0x0030 "0" # DIGIT ZERO
|
||||||
|
0x31 0x0031 "1" # DIGIT ONE
|
||||||
|
0x32 0x0032 "2" # DIGIT TWO
|
||||||
|
0x33 0x0033 "3" # DIGIT THREE
|
||||||
|
0x34 0x0034 "4" # DIGIT FOUR
|
||||||
|
0x35 0x0035 "5" # DIGIT FIVE
|
||||||
|
0x36 0x0036 "6" # DIGIT SIX
|
||||||
|
0x37 0x0037 "7" # DIGIT SEVEN
|
||||||
|
0x38 0x0038 "8" # DIGIT EIGHT
|
||||||
|
0x39 0x0039 "9" # DIGIT NINE
|
||||||
|
0x3A 0x003A ":" # COLON
|
||||||
|
0x3B 0x003B ";" # SEMICOLON
|
||||||
|
0x3C 0x003C "<" # LESS-THAN SIGN
|
||||||
|
0x3D 0x003D "=" # EQUALS SIGN
|
||||||
|
0x3E 0x003E ">" # GREATER-THAN SIGN
|
||||||
|
0x3F 0x003F "?" # QUESTION MARK
|
||||||
|
0x40 0x0040 "@" # COMMERCIAL AT
|
||||||
|
0x41 0x0041 "A" # LATIN CAPITAL LETTER A
|
||||||
|
0x42 0x0042 "B" # LATIN CAPITAL LETTER B
|
||||||
|
0x43 0x0043 "C" # LATIN CAPITAL LETTER C
|
||||||
|
0x44 0x0044 "D" # LATIN CAPITAL LETTER D
|
||||||
|
0x45 0x0045 "E" # LATIN CAPITAL LETTER E
|
||||||
|
0x46 0x0046 "F" # LATIN CAPITAL LETTER F
|
||||||
|
0x47 0x0047 "G" # LATIN CAPITAL LETTER G
|
||||||
|
0x48 0x0048 "H" # LATIN CAPITAL LETTER H
|
||||||
|
0x49 0x0049 "I" # LATIN CAPITAL LETTER I
|
||||||
|
0x4A 0x004A "J" # LATIN CAPITAL LETTER J
|
||||||
|
0x4B 0x004B "K" # LATIN CAPITAL LETTER K
|
||||||
|
0x4C 0x004C "L" # LATIN CAPITAL LETTER L
|
||||||
|
0x4D 0x004D "M" # LATIN CAPITAL LETTER M
|
||||||
|
0x4E 0x004E "N" # LATIN CAPITAL LETTER N
|
||||||
|
0x4F 0x004F "O" # LATIN CAPITAL LETTER O
|
||||||
|
0x50 0x0050 "P" # LATIN CAPITAL LETTER P
|
||||||
|
0x51 0x0051 "Q" # LATIN CAPITAL LETTER Q
|
||||||
|
0x52 0x0052 "R" # LATIN CAPITAL LETTER R
|
||||||
|
0x53 0x0053 "S" # LATIN CAPITAL LETTER S
|
||||||
|
0x54 0x0054 "T" # LATIN CAPITAL LETTER T
|
||||||
|
0x55 0x0055 "U" # LATIN CAPITAL LETTER U
|
||||||
|
0x56 0x0056 "V" # LATIN CAPITAL LETTER V
|
||||||
|
0x57 0x0057 "W" # LATIN CAPITAL LETTER W
|
||||||
|
0x58 0x0058 "X" # LATIN CAPITAL LETTER X
|
||||||
|
0x59 0x0059 "Y" # LATIN CAPITAL LETTER Y
|
||||||
|
0x5A 0x005A "Z" # LATIN CAPITAL LETTER Z
|
||||||
|
0x5B 0x005B "[" # LEFT SQUARE BRACKET
|
||||||
|
0x5C 0x005C "\" # REVERSE SOLIDUS
|
||||||
|
0x5D 0x005D "]" # RIGHT SQUARE BRACKET
|
||||||
|
0x5E 0x005E "^" # CIRCUMFLEX ACCENT
|
||||||
|
0x5F 0x005F "_" # LOW LINE
|
||||||
|
0x60 0x0060 "`" # GRAVE ACCENT
|
||||||
|
0x61 0x0061 "a" # LATIN SMALL LETTER A
|
||||||
|
0x62 0x0062 "b" # LATIN SMALL LETTER B
|
||||||
|
0x63 0x0063 "c" # LATIN SMALL LETTER C
|
||||||
|
0x64 0x0064 "d" # LATIN SMALL LETTER D
|
||||||
|
0x65 0x0065 "e" # LATIN SMALL LETTER E
|
||||||
|
0x66 0x0066 "f" # LATIN SMALL LETTER F
|
||||||
|
0x67 0x0067 "g" # LATIN SMALL LETTER G
|
||||||
|
0x68 0x0068 "h" # LATIN SMALL LETTER H
|
||||||
|
0x69 0x0069 "i" # LATIN SMALL LETTER I
|
||||||
|
0x6A 0x006A "j" # LATIN SMALL LETTER J
|
||||||
|
0x6B 0x006B "k" # LATIN SMALL LETTER K
|
||||||
|
0x6C 0x006C "l" # LATIN SMALL LETTER L
|
||||||
|
0x6D 0x006D "m" # LATIN SMALL LETTER M
|
||||||
|
0x6E 0x006E "n" # LATIN SMALL LETTER N
|
||||||
|
0x6F 0x006F "o" # LATIN SMALL LETTER O
|
||||||
|
0x70 0x0070 "p" # LATIN SMALL LETTER P
|
||||||
|
0x71 0x0071 "q" # LATIN SMALL LETTER Q
|
||||||
|
0x72 0x0072 "r" # LATIN SMALL LETTER R
|
||||||
|
0x73 0x0073 "s" # LATIN SMALL LETTER S
|
||||||
|
0x74 0x0074 "t" # LATIN SMALL LETTER T
|
||||||
|
0x75 0x0075 "u" # LATIN SMALL LETTER U
|
||||||
|
0x76 0x0076 "v" # LATIN SMALL LETTER V
|
||||||
|
0x77 0x0077 "w" # LATIN SMALL LETTER W
|
||||||
|
0x78 0x0078 "x" # LATIN SMALL LETTER X
|
||||||
|
0x79 0x0079 "y" # LATIN SMALL LETTER Y
|
||||||
|
0x7A 0x007A "z" # LATIN SMALL LETTER Z
|
||||||
|
0x7B 0x007B "{" # LEFT CURLY BRACKET
|
||||||
|
0x7C 0x007C "|" # VERTICAL LINE
|
||||||
|
0x7D 0x007D "}" # RIGHT CURLY BRACKET
|
||||||
|
0x7E 0x007E "~" # TILDE
|
||||||
|
0x7F 0x007F "" # DELETE
|
||||||
|
0x80 0x2500 "─" # BOX DRAWINGS LIGHT HORIZONTAL
|
||||||
|
0x81 0x2502 "│" # BOX DRAWINGS LIGHT VERTICAL
|
||||||
|
0x82 0x250C "┌" # BOX DRAWINGS LIGHT DOWN AND RIGHT
|
||||||
|
0x83 0x2510 "┐" # BOX DRAWINGS LIGHT DOWN AND LEFT
|
||||||
|
0x84 0x2514 "└" # BOX DRAWINGS LIGHT UP AND RIGHT
|
||||||
|
0x85 0x2518 "┘" # BOX DRAWINGS LIGHT UP AND LEFT
|
||||||
|
0x86 0x251C "├" # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
|
||||||
|
0x87 0x2524 "┤" # BOX DRAWINGS LIGHT VERTICAL AND LEFT
|
||||||
|
0x88 0x252C "┬" # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
|
||||||
|
0x89 0x2534 "┴" # BOX DRAWINGS LIGHT UP AND HORIZONTAL
|
||||||
|
0x8A 0x253C "┼" # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
|
||||||
|
0x8B 0x2580 "▀" # UPPER HALF BLOCK
|
||||||
|
0x8C 0x2584 "▄" # LOWER HALF BLOCK
|
||||||
|
0x8D 0x2588 "█" # FULL BLOCK
|
||||||
|
0x8E 0x258C "▌" # LEFT HALF BLOCK
|
||||||
|
0x8F 0x2590 "▐" # RIGHT HALF BLOCK
|
||||||
|
0x90 0x2591 "░" # LIGHT SHADE
|
||||||
|
0x91 0x2592 "▒" # MEDIUM SHADE
|
||||||
|
0x92 0x2593 "▓" # DARK SHADE
|
||||||
|
0x93 0x2320 "⌠" # TOP HALF INTEGRAL
|
||||||
|
0x94 0x25A0 "■" # BLACK SQUARE
|
||||||
|
0x95 0x2219 "∙" # BULLET OPERATOR
|
||||||
|
0x96 0x221A "√" # SQUARE ROOT
|
||||||
|
0x97 0x2248 "≈" # ALMOST EQUAL TO
|
||||||
|
0x98 0x2264 "≤" # LESS-THAN OR EQUAL TO
|
||||||
|
0x99 0x2265 "≥" # GREATER-THAN OR EQUAL TO
|
||||||
|
0x9A 0x00A0 " " # NO-BREAK SPACE
|
||||||
|
0x9B 0x2321 "⌡" # BOTTOM HALF INTEGRAL
|
||||||
|
0x9C 0x00B0 "°" # DEGREE SIGN
|
||||||
|
0x9D 0x00B2 "²" # SUPERSCRIPT TWO
|
||||||
|
0x9E 0x00B7 "·" # MIDDLE DOT
|
||||||
|
0x9F 0x00F7 "÷" # DIVISION SIGN
|
||||||
|
0xA0 0x2550 "═" # BOX DRAWINGS DOUBLE HORIZONTAL
|
||||||
|
0xA1 0x2551 "║" # BOX DRAWINGS DOUBLE VERTICAL
|
||||||
|
0xA2 0x2552 "╒" # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
|
||||||
|
0xA3 0x0451 "ё" # CYRILLIC SMALL LETTER IO
|
||||||
|
0xA4 0x2553 "╓" # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
|
||||||
|
0xA5 0x2554 "╔" # BOX DRAWINGS DOUBLE DOWN AND RIGHT
|
||||||
|
0xA6 0x2555 "╕" # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
|
||||||
|
0xA7 0x2556 "╖" # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
|
||||||
|
0xA8 0x2557 "╗" # BOX DRAWINGS DOUBLE DOWN AND LEFT
|
||||||
|
0xA9 0x2558 "╘" # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
|
||||||
|
0xAA 0x2559 "╙" # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
|
||||||
|
0xAB 0x255A "╚" # BOX DRAWINGS DOUBLE UP AND RIGHT
|
||||||
|
0xAC 0x255B "╛" # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
|
||||||
|
0xAD 0x255C "╜" # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
|
||||||
|
0xAE 0x255D "╝" # BOX DRAWINGS DOUBLE UP AND LEFT
|
||||||
|
0xAF 0x255E "╞" # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
|
||||||
|
0xB0 0x255F "╟" # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
|
||||||
|
0xB1 0x2560 "╠" # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
|
||||||
|
0xB2 0x2561 "╡" # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
|
||||||
|
0xB3 0x0401 "Ё" # CYRILLIC CAPITAL LETTER IO
|
||||||
|
0xB4 0x2562 "╢" # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
|
||||||
|
0xB5 0x2563 "╣" # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
|
||||||
|
0xB6 0x2564 "╤" # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
|
||||||
|
0xB7 0x2565 "╥" # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
|
||||||
|
0xB8 0x2566 "╦" # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
|
||||||
|
0xB9 0x2567 "╧" # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
|
||||||
|
0xBA 0x2568 "╨" # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
|
||||||
|
0xBB 0x2569 "╩" # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
|
||||||
|
0xBC 0x256A "╪" # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
|
||||||
|
0xBD 0x256B "╫" # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
|
||||||
|
0xBE 0x256C "╬" # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
|
||||||
|
0xBF 0x00A9 "©" # COPYRIGHT SIGN
|
||||||
|
0xC0 0x044E "ю" # CYRILLIC SMALL LETTER YU
|
||||||
|
0xC1 0x0430 "а" # CYRILLIC SMALL LETTER A
|
||||||
|
0xC2 0x0431 "б" # CYRILLIC SMALL LETTER BE
|
||||||
|
0xC3 0x0446 "ц" # CYRILLIC SMALL LETTER TSE
|
||||||
|
0xC4 0x0434 "д" # CYRILLIC SMALL LETTER DE
|
||||||
|
0xC5 0x0435 "е" # CYRILLIC SMALL LETTER IE
|
||||||
|
0xC6 0x0444 "ф" # CYRILLIC SMALL LETTER EF
|
||||||
|
0xC7 0x0433 "г" # CYRILLIC SMALL LETTER GHE
|
||||||
|
0xC8 0x0445 "х" # CYRILLIC SMALL LETTER HA
|
||||||
|
0xC9 0x0438 "и" # CYRILLIC SMALL LETTER I
|
||||||
|
0xCA 0x0439 "й" # CYRILLIC SMALL LETTER SHORT I
|
||||||
|
0xCB 0x043A "к" # CYRILLIC SMALL LETTER KA
|
||||||
|
0xCC 0x043B "л" # CYRILLIC SMALL LETTER EL
|
||||||
|
0xCD 0x043C "м" # CYRILLIC SMALL LETTER EM
|
||||||
|
0xCE 0x043D "н" # CYRILLIC SMALL LETTER EN
|
||||||
|
0xCF 0x043E "о" # CYRILLIC SMALL LETTER O
|
||||||
|
0xD0 0x043F "п" # CYRILLIC SMALL LETTER PE
|
||||||
|
0xD1 0x044F "я" # CYRILLIC SMALL LETTER YA
|
||||||
|
0xD2 0x0440 "р" # CYRILLIC SMALL LETTER ER
|
||||||
|
0xD3 0x0441 "с" # CYRILLIC SMALL LETTER ES
|
||||||
|
0xD4 0x0442 "т" # CYRILLIC SMALL LETTER TE
|
||||||
|
0xD5 0x0443 "у" # CYRILLIC SMALL LETTER U
|
||||||
|
0xD6 0x0436 "ж" # CYRILLIC SMALL LETTER ZHE
|
||||||
|
0xD7 0x0432 "в" # CYRILLIC SMALL LETTER VE
|
||||||
|
0xD8 0x044C "ь" # CYRILLIC SMALL LETTER SOFT SIGN
|
||||||
|
0xD9 0x044B "ы" # CYRILLIC SMALL LETTER YERU
|
||||||
|
0xDA 0x0437 "з" # CYRILLIC SMALL LETTER ZE
|
||||||
|
0xDB 0x0448 "ш" # CYRILLIC SMALL LETTER SHA
|
||||||
|
0xDC 0x044D "э" # CYRILLIC SMALL LETTER E
|
||||||
|
0xDD 0x0449 "щ" # CYRILLIC SMALL LETTER SHCHA
|
||||||
|
0xDE 0x0447 "ч" # CYRILLIC SMALL LETTER CHE
|
||||||
|
0xDF 0x044A "ъ" # CYRILLIC SMALL LETTER HARD SIGN
|
||||||
|
0xE0 0x042E "Ю" # CYRILLIC CAPITAL LETTER YU
|
||||||
|
0xE1 0x0410 "А" # CYRILLIC CAPITAL LETTER A
|
||||||
|
0xE2 0x0411 "Б" # CYRILLIC CAPITAL LETTER BE
|
||||||
|
0xE3 0x0426 "Ц" # CYRILLIC CAPITAL LETTER TSE
|
||||||
|
0xE4 0x0414 "Д" # CYRILLIC CAPITAL LETTER DE
|
||||||
|
0xE5 0x0415 "Е" # CYRILLIC CAPITAL LETTER IE
|
||||||
|
0xE6 0x0424 "Ф" # CYRILLIC CAPITAL LETTER EF
|
||||||
|
0xE7 0x0413 "Г" # CYRILLIC CAPITAL LETTER GHE
|
||||||
|
0xE8 0x0425 "Х" # CYRILLIC CAPITAL LETTER HA
|
||||||
|
0xE9 0x0418 "И" # CYRILLIC CAPITAL LETTER I
|
||||||
|
0xEA 0x0419 "Й" # CYRILLIC CAPITAL LETTER SHORT I
|
||||||
|
0xEB 0x041A "К" # CYRILLIC CAPITAL LETTER KA
|
||||||
|
0xEC 0x041B "Л" # CYRILLIC CAPITAL LETTER EL
|
||||||
|
0xED 0x041C "М" # CYRILLIC CAPITAL LETTER EM
|
||||||
|
0xEE 0x041D "Н" # CYRILLIC CAPITAL LETTER EN
|
||||||
|
0xEF 0x041E "О" # CYRILLIC CAPITAL LETTER O
|
||||||
|
0xF0 0x041F "П" # CYRILLIC CAPITAL LETTER PE
|
||||||
|
0xF1 0x042F "Я" # CYRILLIC CAPITAL LETTER YA
|
||||||
|
0xF2 0x0420 "Р" # CYRILLIC CAPITAL LETTER ER
|
||||||
|
0xF3 0x0421 "С" # CYRILLIC CAPITAL LETTER ES
|
||||||
|
0xF4 0x0422 "Т" # CYRILLIC CAPITAL LETTER TE
|
||||||
|
0xF5 0x0423 "У" # CYRILLIC CAPITAL LETTER U
|
||||||
|
0xF6 0x0416 "Ж" # CYRILLIC CAPITAL LETTER ZHE
|
||||||
|
0xF7 0x0412 "В" # CYRILLIC CAPITAL LETTER VE
|
||||||
|
0xF8 0x042C "Ь" # CYRILLIC CAPITAL LETTER SOFT SIGN
|
||||||
|
0xF9 0x042B "Ы" # CYRILLIC CAPITAL LETTER YERU
|
||||||
|
0xFA 0x0417 "З" # CYRILLIC CAPITAL LETTER ZE
|
||||||
|
0xFB 0x0428 "Ш" # CYRILLIC CAPITAL LETTER SHA
|
||||||
|
0xFC 0x042D "Э" # CYRILLIC CAPITAL LETTER E
|
||||||
|
0xFD 0x0429 "Щ" # CYRILLIC CAPITAL LETTER SHCHA
|
||||||
|
0xFE 0x0427 "Ч" # CYRILLIC CAPITAL LETTER CHE
|
||||||
|
0xFF 0x042A "Ъ" # CYRILLIC CAPITAL LETTER HARD SIGN
|
||||||
|
</PRE>
|
||||||
|
</BODY>
|
||||||
|
</HTML>
|
313
tests/lynx-dump/data/koi8-r.html.exp
Normal file
313
tests/lynx-dump/data/koi8-r.html.exp
Normal file
@ -0,0 +1,313 @@
|
|||||||
|
|
||||||
|
This table prepared from KOI8-R.TXT available at ftp.unicode.org
|
||||||
|
|
||||||
|
ftp://ftp.unicode.org/MAPPINGS/VENDORS/MISC/KOI8-R.TXT
|
||||||
|
(if doing ftp, try cd Public/MAPPINGS/VENDORS/MISC)
|
||||||
|
|
||||||
|
|
||||||
|
original comment:
|
||||||
|
|
||||||
|
#
|
||||||
|
# Name: KOI8-R (RFC1489) to Unicode
|
||||||
|
# Unicode version: 3.0
|
||||||
|
# Table version: 1.0
|
||||||
|
# Table format: Format A
|
||||||
|
# Date: 18 August 1999
|
||||||
|
# Authors: Helmut Richter <richter@lrz.de>
|
||||||
|
#
|
||||||
|
# Copyright (c) 1991-1999 Unicode, Inc. All Rights reserved.
|
||||||
|
#
|
||||||
|
# This file is provided as-is by Unicode, Inc. (The Unicode Consortium).
|
||||||
|
# No claims are made as to fitness for any particular purpose. No
|
||||||
|
# warranties of any kind are expressed or implied. The recipient
|
||||||
|
# agrees to determine applicability of information provided. If this
|
||||||
|
# file has been provided on optical media by Unicode, Inc., the sole
|
||||||
|
# remedy for any claim will be exchange of defective media within 90
|
||||||
|
# days of receipt.
|
||||||
|
#
|
||||||
|
# Unicode, Inc. hereby grants the right to freely use the information
|
||||||
|
# supplied in this file in the creation of products supporting the
|
||||||
|
# Unicode Standard, and to make copies of this file in any form for
|
||||||
|
# internal or external distribution as long as this notice remains
|
||||||
|
# attached.
|
||||||
|
#
|
||||||
|
# General notes:
|
||||||
|
#
|
||||||
|
# This table contains the data the Unicode Consortium has on how
|
||||||
|
# KOI8-R characters map into Unicode. The underlying document is the
|
||||||
|
# mapping described in RFC 1489. No statements are made as to whether
|
||||||
|
# this mapping is the same as the mapping defined as "Code Page 878"
|
||||||
|
# with some vendors.
|
||||||
|
#
|
||||||
|
# Format: Three tab-separated columns
|
||||||
|
# Column #1 is the KOI8-R code (in hex as 0xXX)
|
||||||
|
# Column #2 is the Unicode (in hex as 0xXXXX)
|
||||||
|
# Column #3 the Unicode name (follows a comment sign, '#')
|
||||||
|
#
|
||||||
|
# The entries are in KOI8-R order.
|
||||||
|
#
|
||||||
|
# Version history
|
||||||
|
# 1.0 version: created.
|
||||||
|
#
|
||||||
|
# Any comments or problems, contact <errata@unicode.org>
|
||||||
|
# Please note that <errata@unicode.org> is an archival address;
|
||||||
|
# notices will be checked, but do not expect an immediate response.
|
||||||
|
#
|
||||||
|
0x00 0x0000 "�" # NULL
|
||||||
|
0x01 0x0001 "" # START OF HEADING
|
||||||
|
0x02 0x0002 "" # START OF TEXT
|
||||||
|
0x03 0x0003 "" # END OF TEXT
|
||||||
|
0x04 0x0004 "" # END OF TRANSMISSION
|
||||||
|
0x05 0x0005 "" # ENQUIRY
|
||||||
|
0x06 0x0006 "" # ACKNOWLEDGE
|
||||||
|
0x07 0x0007 "" # BELL
|
||||||
|
0x08 0x0008 "" # BACKSPACE
|
||||||
|
0x09 0x0009 " " # HORIZONTAL TABULATION
|
||||||
|
0x0A 0x000A "
|
||||||
|
" # LINE FEED
|
||||||
|
0x0B 0x000B "" # VERTICAL TABULATION
|
||||||
|
0x0C 0x000C "" # FORM FEED
|
||||||
|
0x0D 0x000D "
|
||||||
|
" # CARRIAGE RETURN
|
||||||
|
0x0E 0x000E "" # SHIFT OUT
|
||||||
|
0x0F 0x000F "" # SHIFT IN
|
||||||
|
0x10 0x0010 "" # DATA LINK ESCAPE
|
||||||
|
0x11 0x0011 "" # DEVICE CONTROL ONE
|
||||||
|
0x12 0x0012 "" # DEVICE CONTROL TWO
|
||||||
|
0x13 0x0013 "" # DEVICE CONTROL THREE
|
||||||
|
0x14 0x0014 "" # DEVICE CONTROL FOUR
|
||||||
|
0x15 0x0015 "" # NEGATIVE ACKNOWLEDGE
|
||||||
|
0x16 0x0016 "" # SYNCHRONOUS IDLE
|
||||||
|
0x17 0x0017 "" # END OF TRANSMISSION BLOCK
|
||||||
|
0x18 0x0018 "" # CANCEL
|
||||||
|
0x19 0x0019 "" # END OF MEDIUM
|
||||||
|
0x1A 0x001A "" # SUBSTITUTE
|
||||||
|
0x1B 0x001B "" # ESCAPE
|
||||||
|
0x1C 0x001C "" # FILE SEPARATOR
|
||||||
|
0x1D 0x001D "" # GROUP SEPARATOR
|
||||||
|
0x1E 0x001E "" # RECORD SEPARATOR
|
||||||
|
0x1F 0x001F "" # UNIT SEPARATOR
|
||||||
|
0x20 0x0020 " " # SPACE
|
||||||
|
0x21 0x0021 "!" # EXCLAMATION MARK
|
||||||
|
0x22 0x0022 """ # QUOTATION MARK
|
||||||
|
0x23 0x0023 "#" # NUMBER SIGN
|
||||||
|
0x24 0x0024 "$" # DOLLAR SIGN
|
||||||
|
0x25 0x0025 "%" # PERCENT SIGN
|
||||||
|
0x26 0x0026 "&" # AMPERSAND
|
||||||
|
0x27 0x0027 "'" # APOSTROPHE
|
||||||
|
0x28 0x0028 "(" # LEFT PARENTHESIS
|
||||||
|
0x29 0x0029 ")" # RIGHT PARENTHESIS
|
||||||
|
0x2A 0x002A "*" # ASTERISK
|
||||||
|
0x2B 0x002B "+" # PLUS SIGN
|
||||||
|
0x2C 0x002C "," # COMMA
|
||||||
|
0x2D 0x002D "-" # HYPHEN-MINUS
|
||||||
|
0x2E 0x002E "." # FULL STOP
|
||||||
|
0x2F 0x002F "/" # SOLIDUS
|
||||||
|
0x30 0x0030 "0" # DIGIT ZERO
|
||||||
|
0x31 0x0031 "1" # DIGIT ONE
|
||||||
|
0x32 0x0032 "2" # DIGIT TWO
|
||||||
|
0x33 0x0033 "3" # DIGIT THREE
|
||||||
|
0x34 0x0034 "4" # DIGIT FOUR
|
||||||
|
0x35 0x0035 "5" # DIGIT FIVE
|
||||||
|
0x36 0x0036 "6" # DIGIT SIX
|
||||||
|
0x37 0x0037 "7" # DIGIT SEVEN
|
||||||
|
0x38 0x0038 "8" # DIGIT EIGHT
|
||||||
|
0x39 0x0039 "9" # DIGIT NINE
|
||||||
|
0x3A 0x003A ":" # COLON
|
||||||
|
0x3B 0x003B ";" # SEMICOLON
|
||||||
|
0x3C 0x003C "<" # LESS-THAN SIGN
|
||||||
|
0x3D 0x003D "=" # EQUALS SIGN
|
||||||
|
0x3E 0x003E ">" # GREATER-THAN SIGN
|
||||||
|
0x3F 0x003F "?" # QUESTION MARK
|
||||||
|
0x40 0x0040 "@" # COMMERCIAL AT
|
||||||
|
0x41 0x0041 "A" # LATIN CAPITAL LETTER A
|
||||||
|
0x42 0x0042 "B" # LATIN CAPITAL LETTER B
|
||||||
|
0x43 0x0043 "C" # LATIN CAPITAL LETTER C
|
||||||
|
0x44 0x0044 "D" # LATIN CAPITAL LETTER D
|
||||||
|
0x45 0x0045 "E" # LATIN CAPITAL LETTER E
|
||||||
|
0x46 0x0046 "F" # LATIN CAPITAL LETTER F
|
||||||
|
0x47 0x0047 "G" # LATIN CAPITAL LETTER G
|
||||||
|
0x48 0x0048 "H" # LATIN CAPITAL LETTER H
|
||||||
|
0x49 0x0049 "I" # LATIN CAPITAL LETTER I
|
||||||
|
0x4A 0x004A "J" # LATIN CAPITAL LETTER J
|
||||||
|
0x4B 0x004B "K" # LATIN CAPITAL LETTER K
|
||||||
|
0x4C 0x004C "L" # LATIN CAPITAL LETTER L
|
||||||
|
0x4D 0x004D "M" # LATIN CAPITAL LETTER M
|
||||||
|
0x4E 0x004E "N" # LATIN CAPITAL LETTER N
|
||||||
|
0x4F 0x004F "O" # LATIN CAPITAL LETTER O
|
||||||
|
0x50 0x0050 "P" # LATIN CAPITAL LETTER P
|
||||||
|
0x51 0x0051 "Q" # LATIN CAPITAL LETTER Q
|
||||||
|
0x52 0x0052 "R" # LATIN CAPITAL LETTER R
|
||||||
|
0x53 0x0053 "S" # LATIN CAPITAL LETTER S
|
||||||
|
0x54 0x0054 "T" # LATIN CAPITAL LETTER T
|
||||||
|
0x55 0x0055 "U" # LATIN CAPITAL LETTER U
|
||||||
|
0x56 0x0056 "V" # LATIN CAPITAL LETTER V
|
||||||
|
0x57 0x0057 "W" # LATIN CAPITAL LETTER W
|
||||||
|
0x58 0x0058 "X" # LATIN CAPITAL LETTER X
|
||||||
|
0x59 0x0059 "Y" # LATIN CAPITAL LETTER Y
|
||||||
|
0x5A 0x005A "Z" # LATIN CAPITAL LETTER Z
|
||||||
|
0x5B 0x005B "[" # LEFT SQUARE BRACKET
|
||||||
|
0x5C 0x005C "\" # REVERSE SOLIDUS
|
||||||
|
0x5D 0x005D "]" # RIGHT SQUARE BRACKET
|
||||||
|
0x5E 0x005E "^" # CIRCUMFLEX ACCENT
|
||||||
|
0x5F 0x005F "_" # LOW LINE
|
||||||
|
0x60 0x0060 "`" # GRAVE ACCENT
|
||||||
|
0x61 0x0061 "a" # LATIN SMALL LETTER A
|
||||||
|
0x62 0x0062 "b" # LATIN SMALL LETTER B
|
||||||
|
0x63 0x0063 "c" # LATIN SMALL LETTER C
|
||||||
|
0x64 0x0064 "d" # LATIN SMALL LETTER D
|
||||||
|
0x65 0x0065 "e" # LATIN SMALL LETTER E
|
||||||
|
0x66 0x0066 "f" # LATIN SMALL LETTER F
|
||||||
|
0x67 0x0067 "g" # LATIN SMALL LETTER G
|
||||||
|
0x68 0x0068 "h" # LATIN SMALL LETTER H
|
||||||
|
0x69 0x0069 "i" # LATIN SMALL LETTER I
|
||||||
|
0x6A 0x006A "j" # LATIN SMALL LETTER J
|
||||||
|
0x6B 0x006B "k" # LATIN SMALL LETTER K
|
||||||
|
0x6C 0x006C "l" # LATIN SMALL LETTER L
|
||||||
|
0x6D 0x006D "m" # LATIN SMALL LETTER M
|
||||||
|
0x6E 0x006E "n" # LATIN SMALL LETTER N
|
||||||
|
0x6F 0x006F "o" # LATIN SMALL LETTER O
|
||||||
|
0x70 0x0070 "p" # LATIN SMALL LETTER P
|
||||||
|
0x71 0x0071 "q" # LATIN SMALL LETTER Q
|
||||||
|
0x72 0x0072 "r" # LATIN SMALL LETTER R
|
||||||
|
0x73 0x0073 "s" # LATIN SMALL LETTER S
|
||||||
|
0x74 0x0074 "t" # LATIN SMALL LETTER T
|
||||||
|
0x75 0x0075 "u" # LATIN SMALL LETTER U
|
||||||
|
0x76 0x0076 "v" # LATIN SMALL LETTER V
|
||||||
|
0x77 0x0077 "w" # LATIN SMALL LETTER W
|
||||||
|
0x78 0x0078 "x" # LATIN SMALL LETTER X
|
||||||
|
0x79 0x0079 "y" # LATIN SMALL LETTER Y
|
||||||
|
0x7A 0x007A "z" # LATIN SMALL LETTER Z
|
||||||
|
0x7B 0x007B "{" # LEFT CURLY BRACKET
|
||||||
|
0x7C 0x007C "|" # VERTICAL LINE
|
||||||
|
0x7D 0x007D "}" # RIGHT CURLY BRACKET
|
||||||
|
0x7E 0x007E "~" # TILDE
|
||||||
|
0x7F 0x007F "" # DELETE
|
||||||
|
0x80 0x2500 "─" # BOX DRAWINGS LIGHT HORIZONTAL
|
||||||
|
0x81 0x2502 "│" # BOX DRAWINGS LIGHT VERTICAL
|
||||||
|
0x82 0x250C "┌" # BOX DRAWINGS LIGHT DOWN AND RIGHT
|
||||||
|
0x83 0x2510 "┐" # BOX DRAWINGS LIGHT DOWN AND LEFT
|
||||||
|
0x84 0x2514 "└" # BOX DRAWINGS LIGHT UP AND RIGHT
|
||||||
|
0x85 0x2518 "┘" # BOX DRAWINGS LIGHT UP AND LEFT
|
||||||
|
0x86 0x251C "├" # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
|
||||||
|
0x87 0x2524 "┤" # BOX DRAWINGS LIGHT VERTICAL AND LEFT
|
||||||
|
0x88 0x252C "┬" # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
|
||||||
|
0x89 0x2534 "┴" # BOX DRAWINGS LIGHT UP AND HORIZONTAL
|
||||||
|
0x8A 0x253C "┼" # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
|
||||||
|
0x8B 0x2580 "▀" # UPPER HALF BLOCK
|
||||||
|
0x8C 0x2584 "▄" # LOWER HALF BLOCK
|
||||||
|
0x8D 0x2588 "█" # FULL BLOCK
|
||||||
|
0x8E 0x258C "▌" # LEFT HALF BLOCK
|
||||||
|
0x8F 0x2590 "▐" # RIGHT HALF BLOCK
|
||||||
|
0x90 0x2591 "░" # LIGHT SHADE
|
||||||
|
0x91 0x2592 "▒" # MEDIUM SHADE
|
||||||
|
0x92 0x2593 "▓" # DARK SHADE
|
||||||
|
0x93 0x2320 "⌠" # TOP HALF INTEGRAL
|
||||||
|
0x94 0x25A0 "■" # BLACK SQUARE
|
||||||
|
0x95 0x2219 "∙" # BULLET OPERATOR
|
||||||
|
0x96 0x221A "√" # SQUARE ROOT
|
||||||
|
0x97 0x2248 "≈" # ALMOST EQUAL TO
|
||||||
|
0x98 0x2264 "≤" # LESS-THAN OR EQUAL TO
|
||||||
|
0x99 0x2265 "≥" # GREATER-THAN OR EQUAL TO
|
||||||
|
0x9A 0x00A0 " " # NO-BREAK SPACE
|
||||||
|
0x9B 0x2321 "⌡" # BOTTOM HALF INTEGRAL
|
||||||
|
0x9C 0x00B0 "°" # DEGREE SIGN
|
||||||
|
0x9D 0x00B2 "²" # SUPERSCRIPT TWO
|
||||||
|
0x9E 0x00B7 "·" # MIDDLE DOT
|
||||||
|
0x9F 0x00F7 "÷" # DIVISION SIGN
|
||||||
|
0xA0 0x2550 "═" # BOX DRAWINGS DOUBLE HORIZONTAL
|
||||||
|
0xA1 0x2551 "║" # BOX DRAWINGS DOUBLE VERTICAL
|
||||||
|
0xA2 0x2552 "╒" # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
|
||||||
|
0xA3 0x0451 "ё" # CYRILLIC SMALL LETTER IO
|
||||||
|
0xA4 0x2553 "╓" # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
|
||||||
|
0xA5 0x2554 "╔" # BOX DRAWINGS DOUBLE DOWN AND RIGHT
|
||||||
|
0xA6 0x2555 "╕" # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
|
||||||
|
0xA7 0x2556 "╖" # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
|
||||||
|
0xA8 0x2557 "╗" # BOX DRAWINGS DOUBLE DOWN AND LEFT
|
||||||
|
0xA9 0x2558 "╘" # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
|
||||||
|
0xAA 0x2559 "╙" # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
|
||||||
|
0xAB 0x255A "╚" # BOX DRAWINGS DOUBLE UP AND RIGHT
|
||||||
|
0xAC 0x255B "╛" # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
|
||||||
|
0xAD 0x255C "╜" # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
|
||||||
|
0xAE 0x255D "╝" # BOX DRAWINGS DOUBLE UP AND LEFT
|
||||||
|
0xAF 0x255E "╞" # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
|
||||||
|
0xB0 0x255F "╟" # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
|
||||||
|
0xB1 0x2560 "╠" # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
|
||||||
|
0xB2 0x2561 "╡" # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
|
||||||
|
0xB3 0x0401 "Ё" # CYRILLIC CAPITAL LETTER IO
|
||||||
|
0xB4 0x2562 "╢" # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
|
||||||
|
0xB5 0x2563 "╣" # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
|
||||||
|
0xB6 0x2564 "╤" # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
|
||||||
|
0xB7 0x2565 "╥" # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
|
||||||
|
0xB8 0x2566 "╦" # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
|
||||||
|
0xB9 0x2567 "╧" # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
|
||||||
|
0xBA 0x2568 "╨" # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
|
||||||
|
0xBB 0x2569 "╩" # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
|
||||||
|
0xBC 0x256A "╪" # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
|
||||||
|
0xBD 0x256B "╫" # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
|
||||||
|
0xBE 0x256C "╬" # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
|
||||||
|
0xBF 0x00A9 "©" # COPYRIGHT SIGN
|
||||||
|
0xC0 0x044E "ю" # CYRILLIC SMALL LETTER YU
|
||||||
|
0xC1 0x0430 "а" # CYRILLIC SMALL LETTER A
|
||||||
|
0xC2 0x0431 "б" # CYRILLIC SMALL LETTER BE
|
||||||
|
0xC3 0x0446 "ц" # CYRILLIC SMALL LETTER TSE
|
||||||
|
0xC4 0x0434 "д" # CYRILLIC SMALL LETTER DE
|
||||||
|
0xC5 0x0435 "е" # CYRILLIC SMALL LETTER IE
|
||||||
|
0xC6 0x0444 "ф" # CYRILLIC SMALL LETTER EF
|
||||||
|
0xC7 0x0433 "г" # CYRILLIC SMALL LETTER GHE
|
||||||
|
0xC8 0x0445 "х" # CYRILLIC SMALL LETTER HA
|
||||||
|
0xC9 0x0438 "и" # CYRILLIC SMALL LETTER I
|
||||||
|
0xCA 0x0439 "й" # CYRILLIC SMALL LETTER SHORT I
|
||||||
|
0xCB 0x043A "к" # CYRILLIC SMALL LETTER KA
|
||||||
|
0xCC 0x043B "л" # CYRILLIC SMALL LETTER EL
|
||||||
|
0xCD 0x043C "м" # CYRILLIC SMALL LETTER EM
|
||||||
|
0xCE 0x043D "н" # CYRILLIC SMALL LETTER EN
|
||||||
|
0xCF 0x043E "о" # CYRILLIC SMALL LETTER O
|
||||||
|
0xD0 0x043F "п" # CYRILLIC SMALL LETTER PE
|
||||||
|
0xD1 0x044F "я" # CYRILLIC SMALL LETTER YA
|
||||||
|
0xD2 0x0440 "р" # CYRILLIC SMALL LETTER ER
|
||||||
|
0xD3 0x0441 "с" # CYRILLIC SMALL LETTER ES
|
||||||
|
0xD4 0x0442 "т" # CYRILLIC SMALL LETTER TE
|
||||||
|
0xD5 0x0443 "у" # CYRILLIC SMALL LETTER U
|
||||||
|
0xD6 0x0436 "ж" # CYRILLIC SMALL LETTER ZHE
|
||||||
|
0xD7 0x0432 "в" # CYRILLIC SMALL LETTER VE
|
||||||
|
0xD8 0x044C "ь" # CYRILLIC SMALL LETTER SOFT SIGN
|
||||||
|
0xD9 0x044B "ы" # CYRILLIC SMALL LETTER YERU
|
||||||
|
0xDA 0x0437 "з" # CYRILLIC SMALL LETTER ZE
|
||||||
|
0xDB 0x0448 "ш" # CYRILLIC SMALL LETTER SHA
|
||||||
|
0xDC 0x044D "э" # CYRILLIC SMALL LETTER E
|
||||||
|
0xDD 0x0449 "щ" # CYRILLIC SMALL LETTER SHCHA
|
||||||
|
0xDE 0x0447 "ч" # CYRILLIC SMALL LETTER CHE
|
||||||
|
0xDF 0x044A "ъ" # CYRILLIC SMALL LETTER HARD SIGN
|
||||||
|
0xE0 0x042E "Ю" # CYRILLIC CAPITAL LETTER YU
|
||||||
|
0xE1 0x0410 "А" # CYRILLIC CAPITAL LETTER A
|
||||||
|
0xE2 0x0411 "Б" # CYRILLIC CAPITAL LETTER BE
|
||||||
|
0xE3 0x0426 "Ц" # CYRILLIC CAPITAL LETTER TSE
|
||||||
|
0xE4 0x0414 "Д" # CYRILLIC CAPITAL LETTER DE
|
||||||
|
0xE5 0x0415 "Е" # CYRILLIC CAPITAL LETTER IE
|
||||||
|
0xE6 0x0424 "Ф" # CYRILLIC CAPITAL LETTER EF
|
||||||
|
0xE7 0x0413 "Г" # CYRILLIC CAPITAL LETTER GHE
|
||||||
|
0xE8 0x0425 "Х" # CYRILLIC CAPITAL LETTER HA
|
||||||
|
0xE9 0x0418 "И" # CYRILLIC CAPITAL LETTER I
|
||||||
|
0xEA 0x0419 "Й" # CYRILLIC CAPITAL LETTER SHORT I
|
||||||
|
0xEB 0x041A "К" # CYRILLIC CAPITAL LETTER KA
|
||||||
|
0xEC 0x041B "Л" # CYRILLIC CAPITAL LETTER EL
|
||||||
|
0xED 0x041C "М" # CYRILLIC CAPITAL LETTER EM
|
||||||
|
0xEE 0x041D "Н" # CYRILLIC CAPITAL LETTER EN
|
||||||
|
0xEF 0x041E "О" # CYRILLIC CAPITAL LETTER O
|
||||||
|
0xF0 0x041F "П" # CYRILLIC CAPITAL LETTER PE
|
||||||
|
0xF1 0x042F "Я" # CYRILLIC CAPITAL LETTER YA
|
||||||
|
0xF2 0x0420 "Р" # CYRILLIC CAPITAL LETTER ER
|
||||||
|
0xF3 0x0421 "С" # CYRILLIC CAPITAL LETTER ES
|
||||||
|
0xF4 0x0422 "Т" # CYRILLIC CAPITAL LETTER TE
|
||||||
|
0xF5 0x0423 "У" # CYRILLIC CAPITAL LETTER U
|
||||||
|
0xF6 0x0416 "Ж" # CYRILLIC CAPITAL LETTER ZHE
|
||||||
|
0xF7 0x0412 "В" # CYRILLIC CAPITAL LETTER VE
|
||||||
|
0xF8 0x042C "Ь" # CYRILLIC CAPITAL LETTER SOFT SIGN
|
||||||
|
0xF9 0x042B "Ы" # CYRILLIC CAPITAL LETTER YERU
|
||||||
|
0xFA 0x0417 "З" # CYRILLIC CAPITAL LETTER ZE
|
||||||
|
0xFB 0x0428 "Ш" # CYRILLIC CAPITAL LETTER SHA
|
||||||
|
0xFC 0x042D "Э" # CYRILLIC CAPITAL LETTER E
|
||||||
|
0xFD 0x0429 "Щ" # CYRILLIC CAPITAL LETTER SHCHA
|
||||||
|
0xFE 0x0427 "Ч" # CYRILLIC CAPITAL LETTER CHE
|
||||||
|
0xFF 0x042A "Ъ" # CYRILLIC CAPITAL LETTER HARD SIGN
|
103
tests/lynx-dump/data/quickbrown.html
Normal file
103
tests/lynx-dump/data/quickbrown.html
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
||||||
|
<HTML>
|
||||||
|
<HEAD>
|
||||||
|
<TITLE>Markus Kuhn's quick-brown-fox UTF-8 demo</TITLE>
|
||||||
|
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">
|
||||||
|
<LINK REV="made" HREF="mailto:dickey@invisible-island.net">
|
||||||
|
</HEAD>
|
||||||
|
|
||||||
|
<BODY>
|
||||||
|
<pre>
|
||||||
|
Sentences that contain all letters commonly used in a language
|
||||||
|
--------------------------------------------------------------
|
||||||
|
|
||||||
|
Markus Kuhn <mkuhn@acm.org> -- 1998-11-30
|
||||||
|
|
||||||
|
This file was UTF-8 encoded.
|
||||||
|
|
||||||
|
|
||||||
|
German (de)
|
||||||
|
-----------
|
||||||
|
|
||||||
|
Falsches Üben von Xylophonmusik quält jeden größeren Zwerg
|
||||||
|
(= Wrongful practicing of xylophone music tortures every larger dwarf)
|
||||||
|
|
||||||
|
Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich
|
||||||
|
(= Twelve boxing fighters hunted Eva across the dike of Sylt)
|
||||||
|
|
||||||
|
Heizölrückstoßabdämpfung
|
||||||
|
(= fuel oil recoil absorber) (jqvwxy missing, but all non-ASCII letters in one word)
|
||||||
|
|
||||||
|
English (en)
|
||||||
|
------------
|
||||||
|
|
||||||
|
The quick brown fox jumps over the lazy dog
|
||||||
|
|
||||||
|
French (fr)
|
||||||
|
-----------
|
||||||
|
|
||||||
|
Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à
|
||||||
|
côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce qui lui
|
||||||
|
permet de penser à la cænogenèse de l'être dont il est question dans la
|
||||||
|
cause ambiguë entendue à Moÿ, dans un capharnaüm qui, pense-t-il, diminue
|
||||||
|
çà et là la qualité de son œuvre.
|
||||||
|
|
||||||
|
l'île exiguë
|
||||||
|
Où l'obèse jury mûr
|
||||||
|
Fête l'haï volapük,
|
||||||
|
Âne ex aéquo au whist,
|
||||||
|
Ôtez ce vœu déçu.
|
||||||
|
|
||||||
|
Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en
|
||||||
|
canoë au delà des îles, près du mälström où brûlent les novæ.
|
||||||
|
|
||||||
|
Irish Gaelic (ga)
|
||||||
|
-----------------
|
||||||
|
|
||||||
|
D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh
|
||||||
|
|
||||||
|
Icelandic (is)
|
||||||
|
--------------
|
||||||
|
|
||||||
|
Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa
|
||||||
|
|
||||||
|
Sævör grét áðan því úlpan var ónýt
|
||||||
|
(some ASCII letters missing)
|
||||||
|
|
||||||
|
Hebrew (iw)
|
||||||
|
-----------
|
||||||
|
|
||||||
|
דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה?
|
||||||
|
|
||||||
|
Polish (pl)
|
||||||
|
-----------
|
||||||
|
|
||||||
|
Pchnąć w tę łódź jeża lub ośm skrzyń fig
|
||||||
|
|
||||||
|
Russian (ru)
|
||||||
|
------------
|
||||||
|
|
||||||
|
В чащах юга жил бы цитрус? Да, но фальшивый экземпляр!
|
||||||
|
(= Would a citrus live in the bushes of south? Yes, but a only a fake!)
|
||||||
|
|
||||||
|
|
||||||
|
Please let me know if you find others! Special thanks to the people
|
||||||
|
from all over the world who contributed these sentences.
|
||||||
|
|
||||||
|
</pre>
|
||||||
|
See also:
|
||||||
|
<ul>
|
||||||
|
<li><a href="http://www.columbia.edu/kermit/utf8.html"
|
||||||
|
>http://www.columbia.edu/kermit/utf8.html</a>
|
||||||
|
<li><a href="http://www.kernel.org/"
|
||||||
|
>http://www.kernel.org/</a>
|
||||||
|
<li><a href="http://www.unicode.org/"
|
||||||
|
>http://www.unicode.org/</a>
|
||||||
|
<br>and
|
||||||
|
<li><a href="http://www.cl.cam.ac.uk/~mgk25/ucs/examples/TeX.txt"
|
||||||
|
>http://www.cl.cam.ac.uk/~mgk25/ucs/examples/TeX.txt</a>
|
||||||
|
<li><a href="http://www.cl.cam.ac.uk/~mgk25/ucs/wgl4.txt"
|
||||||
|
>http://www.cl.cam.ac.uk/~mgk25/ucs/wgl4.txt</a>
|
||||||
|
</ul>
|
||||||
|
</BODY>
|
||||||
|
</HTML>
|
93
tests/lynx-dump/data/quickbrown.html.exp
Normal file
93
tests/lynx-dump/data/quickbrown.html.exp
Normal file
@ -0,0 +1,93 @@
|
|||||||
|
Sentences that contain all letters commonly used in a language
|
||||||
|
--------------------------------------------------------------
|
||||||
|
|
||||||
|
Markus Kuhn <mkuhn@acm.org> -- 1998-11-30
|
||||||
|
|
||||||
|
This file was UTF-8 encoded.
|
||||||
|
|
||||||
|
|
||||||
|
German (de)
|
||||||
|
-----------
|
||||||
|
|
||||||
|
Falsches Üben von Xylophonmusik quält jeden größeren Zwerg
|
||||||
|
(= Wrongful practicing of xylophone music tortures every larger dwarf)
|
||||||
|
|
||||||
|
Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich
|
||||||
|
(= Twelve boxing fighters hunted Eva across the dike of Sylt)
|
||||||
|
|
||||||
|
Heizölrückstoßabdämpfung
|
||||||
|
(= fuel oil recoil absorber) (jqvwxy missing, but all non-ASCII letters in one
|
||||||
|
word)
|
||||||
|
|
||||||
|
English (en)
|
||||||
|
------------
|
||||||
|
|
||||||
|
The quick brown fox jumps over the lazy dog
|
||||||
|
|
||||||
|
French (fr)
|
||||||
|
-----------
|
||||||
|
|
||||||
|
Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à
|
||||||
|
côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce qui lui
|
||||||
|
permet de penser à la cænogenèse de l'être dont il est question dans la
|
||||||
|
cause ambiguë entendue à Moÿ, dans un capharnaüm qui, pense-t-il, diminue
|
||||||
|
çà et là la qualité de son œuvre.
|
||||||
|
|
||||||
|
l'île exiguë
|
||||||
|
Où l'obèse jury mûr
|
||||||
|
Fête l'haï volapük,
|
||||||
|
Âne ex aéquo au whist,
|
||||||
|
Ôtez ce vœu déçu.
|
||||||
|
|
||||||
|
Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en
|
||||||
|
canoë au delà des îles, près du mälström où brûlent les novæ.
|
||||||
|
|
||||||
|
Irish Gaelic (ga)
|
||||||
|
-----------------
|
||||||
|
|
||||||
|
D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh
|
||||||
|
|
||||||
|
Icelandic (is)
|
||||||
|
--------------
|
||||||
|
|
||||||
|
Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa
|
||||||
|
|
||||||
|
Sævör grét áðan því úlpan var ónýt
|
||||||
|
(some ASCII letters missing)
|
||||||
|
|
||||||
|
Hebrew (iw)
|
||||||
|
-----------
|
||||||
|
|
||||||
|
דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה?
|
||||||
|
|
||||||
|
Polish (pl)
|
||||||
|
-----------
|
||||||
|
|
||||||
|
Pchnąć w tę łódź jeża lub ośm skrzyń fig
|
||||||
|
|
||||||
|
Russian (ru)
|
||||||
|
------------
|
||||||
|
|
||||||
|
В чащах юга жил бы цитрус? Да, но фальшивый экземпляр!
|
||||||
|
(= Would a citrus live in the bushes of south? Yes, but a only a fake!)
|
||||||
|
|
||||||
|
|
||||||
|
Please let me know if you find others! Special thanks to the people
|
||||||
|
from all over the world who contributed these sentences.
|
||||||
|
|
||||||
|
|
||||||
|
See also:
|
||||||
|
* [1]http://www.columbia.edu/kermit/utf8.html
|
||||||
|
* [2]http://www.kernel.org/
|
||||||
|
* [3]http://www.unicode.org/
|
||||||
|
and
|
||||||
|
* [4]http://www.cl.cam.ac.uk/~mgk25/ucs/examples/TeX.txt
|
||||||
|
* [5]http://www.cl.cam.ac.uk/~mgk25/ucs/wgl4.txt
|
||||||
|
|
||||||
|
References
|
||||||
|
|
||||||
|
1. http://www.columbia.edu/kermit/utf8.html
|
||||||
|
2. http://www.kernel.org/
|
||||||
|
3. http://www.unicode.org/
|
||||||
|
4. http://www.cl.cam.ac.uk/~mgk25/ucs/examples/TeX.txt
|
||||||
|
5. http://www.cl.cam.ac.uk/~mgk25/ucs/wgl4.txt
|
38
tests/lynx-dump/data/raw8bit.html
Normal file
38
tests/lynx-dump/data/raw8bit.html
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
<!DOCTYPE html PUBLIC "-//IETF//DTD HTML 3.0//EN">
|
||||||
|
<HTML>
|
||||||
|
<HEAD>
|
||||||
|
<TITLE> Test of raw 8-bit symbols </TITLE>
|
||||||
|
<!-- you may uncomment the next line
|
||||||
|
and set the document's charset directly via META tag -->
|
||||||
|
<!--META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1"-->
|
||||||
|
</HEAD>
|
||||||
|
<BODY>
|
||||||
|
<PRE>
|
||||||
|
This is a test of translation 8-bit letters for different pairs of
|
||||||
|
document's charset (assumed charset) and display charset,
|
||||||
|
both can be reached from 'O'ptions menu.
|
||||||
|
|
||||||
|
This page (obviously) corresponds to text/html mode
|
||||||
|
but you may test text/plain just by pressing '\'
|
||||||
|
Try also: '@' for ``raw mode'' and '=' for Information Page.
|
||||||
|
|
||||||
|
|
||||||
|
0 1 2 3 4 5 6 7 8 9 A B C D E F
|
||||||
|
20 ! " # $ % & ' ( ) * + , - . /
|
||||||
|
30 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
|
||||||
|
40 @ A B C D E F G H I J K L M N O
|
||||||
|
50 P Q R S T U V W X Y Z [ \ ] ^ _
|
||||||
|
60 ` a b c d e f g h i j k l m n o
|
||||||
|
70 p q r s t u v w x y z { | } ~
|
||||||
|
80 € <20> ‚ ƒ „ … † ‡ ˆ ‰ Š ‹ Œ <20> Ž <20>
|
||||||
|
90 <20> ‘ ’ “ ” • – — ˜ ™ š › œ <20> ž Ÿ
|
||||||
|
A0 ¡ ¢ £ ¤ ¥ ¦ § ¨ © ª « ¬ ® ¯
|
||||||
|
B0 ° ± ² ³ ´ µ ¶ · ¸ ¹ º » ¼ ½ ¾ ¿
|
||||||
|
C0 À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï
|
||||||
|
D0 Ð Ñ Ò Ó Ô Õ Ö × Ø Ù Ú Û Ü Ý Þ ß
|
||||||
|
E0 à á â ã ä å æ ç è é ê ë ì í î ï
|
||||||
|
F0 ð ñ ò ó ô õ ö ÷ ø ù ú û ü ý þ ÿ
|
||||||
|
|
||||||
|
</PRE>
|
||||||
|
</BODY>
|
||||||
|
</HTML>
|
24
tests/lynx-dump/data/raw8bit.html.exp
Normal file
24
tests/lynx-dump/data/raw8bit.html.exp
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
This is a test of translation 8-bit letters for different pairs of
|
||||||
|
document's charset (assumed charset) and display charset,
|
||||||
|
both can be reached from 'O'ptions menu.
|
||||||
|
|
||||||
|
This page (obviously) corresponds to text/html mode
|
||||||
|
but you may test text/plain just by pressing '\'
|
||||||
|
Try also: '@' for ``raw mode'' and '=' for Information Page.
|
||||||
|
|
||||||
|
|
||||||
|
0 1 2 3 4 5 6 7 8 9 A B C D E F
|
||||||
|
20 ! " # $ % & ' ( ) * + , - . /
|
||||||
|
30 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
|
||||||
|
40 @ A B C D E F G H I J K L M N O
|
||||||
|
50 P Q R S T U V W X Y Z [ \ ] ^ _
|
||||||
|
60 ` a b c d e f g h i j k l m n o
|
||||||
|
70 p q r s t u v w x y z { | } ~
|
||||||
|
80
|
||||||
|
90
|
||||||
|
A0 ¡ ¢ £ ¤ ¥ ¦ § ¨ © ª « ¬ ® ¯
|
||||||
|
B0 ° ± ² ³ ´ µ ¶ · ¸ ¹ º » ¼ ½ ¾ ¿
|
||||||
|
C0 À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï
|
||||||
|
D0 Ð Ñ Ò Ó Ô Õ Ö × Ø Ù Ú Û Ü Ý Þ ß
|
||||||
|
E0 à á â ã ä å æ ç è é ê ë ì í î ï
|
||||||
|
F0 ð ñ ò ó ô õ ö ÷ ø ù ú û ü ý þ ÿ
|
1081
tests/lynx-dump/data/sgml.html
Normal file
1081
tests/lynx-dump/data/sgml.html
Normal file
File diff suppressed because it is too large
Load Diff
1069
tests/lynx-dump/data/sgml.html.exp
Normal file
1069
tests/lynx-dump/data/sgml.html.exp
Normal file
File diff suppressed because it is too large
Load Diff
37
tests/lynx-dump/data/spaces.html
Normal file
37
tests/lynx-dump/data/spaces.html
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
<!DOCTYPE html PUBLIC "-//IETF//DTD HTML 3.0//EN">
|
||||||
|
<HTML>
|
||||||
|
<HEAD>
|
||||||
|
<TITLE> Test of some symbols </TITLE>
|
||||||
|
</HEAD>
|
||||||
|
<BODY>
|
||||||
|
<!-- Multiple spaces are normally collapsed unless we are in a <PRE> mode
|
||||||
|
or use "special" spaces like or   - try playing around this page
|
||||||
|
by adding more spaces inside brackets or using <PRE>.
|
||||||
|
-->
|
||||||
|
<!-- PRE -->
|
||||||
|
|
||||||
|
You may press '\' to view the source of this test<br>
|
||||||
|
<em>UNICODE NCR alt-NCR named alt-named</em><br>
|
||||||
|
<p>
|
||||||
|
0x2000 [ ] <IMG SRC=X ALT="[ ]"> # EN QUAD<br>
|
||||||
|
0x2001 [ ] <IMG SRC=X ALT="[ ]"> # EM QUAD<br>
|
||||||
|
0x2002 [ ] <IMG SRC=X ALT="[ ]"> [ ] <IMG SRC=X ALT="[ ]"> # EN SPACE<br>
|
||||||
|
0x2003 [ ] <IMG SRC=X ALT="[ ]"> [ ] <IMG SRC=X ALT="[ ]"> # EM SPACE<br>
|
||||||
|
0x2004 [ ] <IMG SRC=X ALT="[ ]"> [ ] <IMG SRC=X ALT="[ ]"> # THREE-PER-EM SPACE<br>
|
||||||
|
0x2005 [ ] <IMG SRC=X ALT="[ ]"> [ ] <IMG SRC=X ALT="[ ]"> # FOUR-PER-EM SPACE<br>
|
||||||
|
0x2007 [ ] <IMG SRC=X ALT="[ ]"> [ ] <IMG SRC=X ALT="[ ]"> # FIGURE SPACE<br>
|
||||||
|
0x2008 [ ] <IMG SRC=X ALT="[ ]"> [ ] <IMG SRC=X ALT="[ ]"> # PUNCTUATION SPACE<br>
|
||||||
|
0x2009 [ ] <IMG SRC=X ALT="[ ]"> [ ] <IMG SRC=X ALT="[ ]"> # THIN SPACE<br>
|
||||||
|
0x200A [ ] <IMG SRC=X ALT="[ ]"> [ ] <IMG SRC=X ALT="[ ]"> # HAIR SPACE<br>
|
||||||
|
0x200C [‌] <IMG SRC=X ALT="[‌]"> [‌] <IMG SRC=X ALT="[‌]"> # ZERO WIDTH NON-JOINER<br>
|
||||||
|
0x200D [‍] <IMG SRC=X ALT="[‍]"> [‍] <IMG SRC=X ALT="[‍]"> # ZERO WIDTH JOINER<br>
|
||||||
|
0x200E [‎] <IMG SRC=X ALT="[‎]"> [‎] <IMG SRC=X ALT="[‎]"> # LEFT-TO-RIGHT MARK<br>
|
||||||
|
0x200F [‏] <IMG SRC=X ALT="[‏]"> [‏] <IMG SRC=X ALT="[‏]"> # RIGHT-TO-LEFT MARK<br>
|
||||||
|
0x2010 [‐] <IMG SRC=X ALT="[‐]"> [‐] <IMG SRC=X ALT="[‐]"> # HYPHEN<br>
|
||||||
|
0x2013 [–] <IMG SRC=X ALT="[–]"> [–] <IMG SRC=X ALT="[–]"> # EN DASH<br>
|
||||||
|
0x2014 [—] <IMG SRC=X ALT="[—]"> [—] <IMG SRC=X ALT="[—]"> # EM DASH<br>
|
||||||
|
|
||||||
|
|
||||||
|
</PRE>
|
||||||
|
</BODY>
|
||||||
|
</HTML>
|
20
tests/lynx-dump/data/spaces.html.exp
Normal file
20
tests/lynx-dump/data/spaces.html.exp
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
You may press '\' to view the source of this test
|
||||||
|
UNICODE NCR alt-NCR named alt-named
|
||||||
|
|
||||||
|
0x2000 [ ] [ ] # EN QUAD
|
||||||
|
0x2001 [ ] [ ] # EM QUAD
|
||||||
|
0x2002 [ ] [ ] [ ] [ ] # EN SPACE
|
||||||
|
0x2003 [ ] [ ] [ ] [ ] # EM SPACE
|
||||||
|
0x2004 [ ] [ ] [ ] [ ] # THREE-PER-EM SPACE
|
||||||
|
0x2005 [ ] [ ] [ ] [ ] # FOUR-PER-EM SPACE
|
||||||
|
0x2007 [ ] [ ] [ ] [ ] # FIGURE SPACE
|
||||||
|
0x2008 [ ] [ ] [ ] [ ] # PUNCTUATION SPACE
|
||||||
|
0x2009 [ ] [ ] [ ] [ ] # THIN SPACE
|
||||||
|
0x200A [ ] [ ] [ ] [ ] # HAIR SPACE
|
||||||
|
0x200C [] [] [] [] # ZERO WIDTH NON-JOINER
|
||||||
|
0x200D [] [] [] [] # ZERO WIDTH JOINER
|
||||||
|
0x200E [] [] [] [] # LEFT-TO-RIGHT MARK
|
||||||
|
0x200F [] [] [] [] # RIGHT-TO-LEFT MARK
|
||||||
|
0x2010 [‐] [‐] [‐] [‐] # HYPHEN
|
||||||
|
0x2013 [–] [–] [–] [–] # EN DASH
|
||||||
|
0x2014 [—] [—] [—] [—] # EM DASH
|
22
tests/lynx-dump/data/special_urls.html
Normal file
22
tests/lynx-dump/data/special_urls.html
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>Lynx Special URLs</title>
|
||||||
|
<link rev="made" href="mailto:WebMaster@foo.blah.dom">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>Lynx Special URLs</h1>
|
||||||
|
<dl compact>
|
||||||
|
<dd>LYNXCFG:<a href="LYNXCFG:">LYNXCFG (ok)</a>
|
||||||
|
<dd>LYNXCOMPILEOPTS:<a href="LYNXCOMPILEOPTS:">LYNXCOMPILEOPTS (ok)</a>
|
||||||
|
<dd>LYNXCOOKIE:<a href="LYNXCOOKIE:">LYNXCOOKIE is not allowed</a>
|
||||||
|
<dd>LYNXDIRED:<a href="LYNXDIRED:">LYNXDIRED is not allowed</a>
|
||||||
|
<dd>LYNXDOWNLOAD:<a href="LYNXDOWNLOAD:">LYNXDOWNLOAD is not allowed</a>
|
||||||
|
<dd>LYNXHIST:<a href="LYNXHIST:">LYNXHIST is not allowed</a>
|
||||||
|
<dd>LYNXIMGMAP:<a href="LYNXIMGMAP:">LYNXIMGMAP is not allowed</a>
|
||||||
|
<dd>LYNXKEYMAP:<a href="LYNXKEYMAP:">LYNXKEYMAP (ok)</a>
|
||||||
|
<dd>LYNXMESSAGES:<a href="LYNXMESSAGES:">LYNXMESSAGES (ok)</a>
|
||||||
|
<dd>LYNXOPTIONS:<a href="LYNXOPTIONS:">LYNXOPTIONS (ok)</a>
|
||||||
|
<dd>LYNXPRINT:<a href="LYNXPRINT:">LYNXPRINT is not allowed</a>
|
||||||
|
</dl>
|
||||||
|
</body>
|
||||||
|
</html>
|
27
tests/lynx-dump/data/special_urls.html.exp
Normal file
27
tests/lynx-dump/data/special_urls.html.exp
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
Lynx Special URLs
|
||||||
|
|
||||||
|
LYNXCFG:[1]LYNXCFG (ok)
|
||||||
|
LYNXCOMPILEOPTS:[2]LYNXCOMPILEOPTS (ok)
|
||||||
|
LYNXCOOKIE:[3]LYNXCOOKIE is not allowed
|
||||||
|
LYNXDIRED:[4]LYNXDIRED is not allowed
|
||||||
|
LYNXDOWNLOAD:[5]LYNXDOWNLOAD is not allowed
|
||||||
|
LYNXHIST:[6]LYNXHIST is not allowed
|
||||||
|
LYNXIMGMAP:[7]LYNXIMGMAP is not allowed
|
||||||
|
LYNXKEYMAP:[8]LYNXKEYMAP (ok)
|
||||||
|
LYNXMESSAGES:[9]LYNXMESSAGES (ok)
|
||||||
|
LYNXOPTIONS:[10]LYNXOPTIONS (ok)
|
||||||
|
LYNXPRINT:[11]LYNXPRINT is not allowed
|
||||||
|
|
||||||
|
References
|
||||||
|
|
||||||
|
1. LYNXCFG:/
|
||||||
|
2. LYNXCOMPILEOPTS:/
|
||||||
|
3. LYNXCOOKIE:/
|
||||||
|
4. LYNXDIRED:/
|
||||||
|
5. LYNXDOWNLOAD:/
|
||||||
|
6. LYNXHIST:/
|
||||||
|
7. LYNXIMGMAP:/
|
||||||
|
8. LYNXKEYMAP:/
|
||||||
|
9. LYNXMESSAGES:/
|
||||||
|
10. LYNXOPTIONS:/
|
||||||
|
11. LYNXPRINT:/
|
14
tests/lynx-dump/data/square.html
Normal file
14
tests/lynx-dump/data/square.html
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN">
|
||||||
|
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta name="generator" content=
|
||||||
|
"HTML Tidy for Linux (vers 25 March 2009), see www.w3.org">
|
||||||
|
|
||||||
|
<title>Test ImageMap - square</title>
|
||||||
|
</head>
|
||||||
|
|
||||||
|
<body>
|
||||||
|
<p>SQUARE</p>
|
||||||
|
</body>
|
||||||
|
</html>
|
1
tests/lynx-dump/data/square.html.exp
Normal file
1
tests/lynx-dump/data/square.html.exp
Normal file
@ -0,0 +1 @@
|
|||||||
|
SQUARE
|
39
tests/lynx-dump/data/tabtest.html
Normal file
39
tests/lynx-dump/data/tabtest.html
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
<!DOCTYPE HTML PUBLIC "-//W3O//DTD W3 HTML 3.0//EN">
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>Tests of TAB element.</title>
|
||||||
|
<link rev="made" href="mailto:lynx-dev@nongnu.org">
|
||||||
|
</head>
|
||||||
|
|
||||||
|
<body>
|
||||||
|
<h1>Tests of TAB element.</h1>
|
||||||
|
|
||||||
|
<TAB INDENT="16" ID="t0"><em>Normal Style:</em><br>
|
||||||
|
One<TAB INDENT="26" ID="t1">Two<TAB INDENT="44" ID="t2">Three
|
||||||
|
<TAB INDENT="62" ID="t3">Four<TAB INDENT="80" ID="t4">Five
|
||||||
|
<TAB INDENT="98" ID="t5">Six<TAB INDENT="116" ID="t6">Seven
|
||||||
|
<TAB INDENT="132" ID="t7">Eight<br>
|
||||||
|
1.<TAB TO="t1">2.<TAB TO="t2">3.<TAB TO="t3">4.<TAB TO="t4">5.
|
||||||
|
<TAB TO="t5">6.<TAB TO="t6">7.<TAB TO="t7">8.<br>
|
||||||
|
i.<TAB TO="t1">ii.<TAB TO="t2">iii.<TAB TO="t3">iv.<TAB TO="t4">v.
|
||||||
|
<TAB TO="t5">vi.<TAB TO="t6">vii.<TAB TO="t7">viii.
|
||||||
|
|
||||||
|
<p><pre><TAB TO="t0"><em>In PRE block:</em>
|
||||||
|
One<TAB TO="t1">Two<TAB TO="t3">Three<TAB TO="t5">Four<TAB TO="t7">Five
|
||||||
|
1.<TAB TO="t1">2.<TAB TO="t3">3.<TAB TO="t5">4.<TAB TO="t7">5.
|
||||||
|
i.<TAB TO="t1">ii.<TAB TO="t3">iii.<TAB TO="t5">iv.<TAB TO="t7">v.
|
||||||
|
</pre>
|
||||||
|
|
||||||
|
<bq>
|
||||||
|
<TAB TO="t0"><em>In BQ block:</em><br>
|
||||||
|
One<TAB TO="t2">Two<TAB TO="t4">Three<TAB TO="t6">Four<br>
|
||||||
|
1.<TAB TO="t2">2.<TAB TO="t4">3.<TAB TO="t6">4.<br>
|
||||||
|
i.<TAB TO="t2">ii.<TAB TO="t4">iii.<TAB TO="t6">iv.
|
||||||
|
</bq>
|
||||||
|
|
||||||
|
<p><b>noct<TAB ID="tn">ambulant</b> - walking at night<br>
|
||||||
|
<TAB TO="tn">(from Latin: <i>nox noctis</i> night + <i>ambulare</i> walk)
|
||||||
|
<pre>|<TAB INDENT="78">|<TAB INDENT="156">|
|
||||||
|
0<TAB INDENT="76">80<TAB INDENT="152">158</pre>
|
||||||
|
</body>
|
||||||
|
</html>
|
21
tests/lynx-dump/data/tabtest.html.exp
Normal file
21
tests/lynx-dump/data/tabtest.html.exp
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
Tests of TAB element.
|
||||||
|
|
||||||
|
Normal Style:
|
||||||
|
One Two Three Four Five Six Seven Eight
|
||||||
|
1. 2. 3. 4. 5. 6. 7. 8.
|
||||||
|
i. ii. iii. iv. v. vi. vii. viii.
|
||||||
|
|
||||||
|
In PRE block:
|
||||||
|
One Two Three Four Five
|
||||||
|
1. 2. 3. 4. 5.
|
||||||
|
i. ii. iii. iv. v.
|
||||||
|
|
||||||
|
In BQ block:
|
||||||
|
One Two Three Four
|
||||||
|
1. 2. 3. 4.
|
||||||
|
i. ii. iii. iv.
|
||||||
|
|
||||||
|
noctambulant - walking at night
|
||||||
|
(from Latin: nox noctis night + ambulare walk)
|
||||||
|
| | |
|
||||||
|
0 80 158
|
219
tests/lynx-dump/data/tags.html
Normal file
219
tests/lynx-dump/data/tags.html
Normal file
@ -0,0 +1,219 @@
|
|||||||
|
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
||||||
|
<HTML>
|
||||||
|
<HEAD>
|
||||||
|
<TITLE>Tags to Test Color-Style</TITLE>
|
||||||
|
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">
|
||||||
|
<link href="nobody" rev="made">
|
||||||
|
</HEAD>
|
||||||
|
|
||||||
|
<BODY alink="green" bgcolor="yellow">
|
||||||
|
<!-- ====================================================================== -->
|
||||||
|
<br>
|
||||||
|
<h1>Content of an H1 Tag</h1>
|
||||||
|
Text after an H1 Tag.
|
||||||
|
<p>Paragraph after an H1 Tag.
|
||||||
|
<br>
|
||||||
|
<h2>Content of an H2 Tag</h2>
|
||||||
|
Text after an H2 Tag.
|
||||||
|
<p>Paragraph after an H2 Tag.
|
||||||
|
<br>
|
||||||
|
<h3>Content of an H3 Tag</h3>
|
||||||
|
Text after an H3 Tag.
|
||||||
|
<p>Paragraph after an H3 Tag.
|
||||||
|
<br>
|
||||||
|
<h4>Content of an H4 Tag</h4>
|
||||||
|
Text after an H4 Tag.
|
||||||
|
<p>Paragraph after an H4 Tag.
|
||||||
|
<br>
|
||||||
|
<h5>Content of an H5 Tag</h5>
|
||||||
|
Text after an H5 Tag.
|
||||||
|
<p>Paragraph after an H5 Tag.
|
||||||
|
<br>
|
||||||
|
<h6>Content of an H6 Tag</h6>
|
||||||
|
Text after an H6 Tag.
|
||||||
|
<p>Paragraph after an H6 Tag.
|
||||||
|
<!-- ====================================================================== -->
|
||||||
|
This is an <a href="#imagemap">"a"</a> tag.
|
||||||
|
<br>
|
||||||
|
This is an <address>"address"</address> tag.
|
||||||
|
<br>
|
||||||
|
This is a <b>"b"</b> tag.
|
||||||
|
<br>
|
||||||
|
This is a <big>"big"</big> tag.
|
||||||
|
<br>
|
||||||
|
Before quote, <blockquote>this is a "blockquote"</blockquote>, after quote.
|
||||||
|
<br>
|
||||||
|
This is a <center>"center"</center> tag.
|
||||||
|
<br>
|
||||||
|
This is a <cite>"cite"</cite> tag.
|
||||||
|
<br>
|
||||||
|
This is a <code>"code"</code> tag.
|
||||||
|
<br>
|
||||||
|
This is a <div>div</div> tag.
|
||||||
|
<br>
|
||||||
|
This is an <em>"em"</em> tag.
|
||||||
|
<br>
|
||||||
|
This is a <font>"font"</font> tag.
|
||||||
|
<!-- ====================================================================== -->
|
||||||
|
<br>
|
||||||
|
This is an <hr>"hr"<hr> tag.
|
||||||
|
<br>
|
||||||
|
This is an <i>"i"</i> tag.
|
||||||
|
<br>
|
||||||
|
This is an <iframe>"iframe"</iframe> tag.
|
||||||
|
<br>
|
||||||
|
This is an <img alt="img" src="image.jpg"> tag.
|
||||||
|
<br>
|
||||||
|
This is an <label>"label"</label> tag.
|
||||||
|
<br>
|
||||||
|
map: normal: lightgray: blue
|
||||||
|
<br>
|
||||||
|
<pre>
|
||||||
|
This is
|
||||||
|
pre-formatted
|
||||||
|
text (three lines, with pre's on preceding/following lines).
|
||||||
|
</pre>
|
||||||
|
<br>
|
||||||
|
This is a <q>"q"</q>tag.
|
||||||
|
<br>
|
||||||
|
This is a <samp>"samp"</samp> tag.
|
||||||
|
<br>
|
||||||
|
This is a <small>"small"</small> tag.
|
||||||
|
<br>
|
||||||
|
This is a <strong>"strong"</strong> tag.
|
||||||
|
<br>
|
||||||
|
This is a <sub>"sub"</sub> tag.
|
||||||
|
<br>
|
||||||
|
This is a <sup>"sup"</sup> tag.
|
||||||
|
<br>
|
||||||
|
This is a <tt>"tt"</tt> tag.
|
||||||
|
<br>
|
||||||
|
This is a <var>"var"</var> tag.
|
||||||
|
<!-- ====================================================================== -->
|
||||||
|
<h1>Forms</h1>
|
||||||
|
<hr>
|
||||||
|
<form action="http://localhost/cgi-bin/bogus-parms" method="get">
|
||||||
|
First: <input type="text" name="First" size=20>
|
||||||
|
Last: <input type="text" name="Last" size=20>
|
||||||
|
Description: <textarea rows=3 cols=40>
|
||||||
|
contents of textarea
|
||||||
|
</textarea>
|
||||||
|
<hr>
|
||||||
|
<input type="submit" value="Submit this form">
|
||||||
|
<br>
|
||||||
|
<input type="reset" value="Reset this form">
|
||||||
|
</form>
|
||||||
|
|
||||||
|
<h1 align="left">Another form</h1>
|
||||||
|
<hr>
|
||||||
|
<form action="http://localhost/cgi-bin/bogus-parms" method="get">
|
||||||
|
<hr>
|
||||||
|
<input type="checkbox" value="first">first
|
||||||
|
<br><input type="checkbox" value="second">second
|
||||||
|
<br><input type="checkbox" value="third">third
|
||||||
|
<br><input type="checkbox" value="">empty
|
||||||
|
<hr>
|
||||||
|
<input type="submit" value="done">done
|
||||||
|
</form>
|
||||||
|
|
||||||
|
<h1 align="right">Another form</h1>
|
||||||
|
<hr>
|
||||||
|
<form action="http://localhost/cgi-bin/bogus-parms" method="get">
|
||||||
|
<select>
|
||||||
|
<option>first option</option>
|
||||||
|
<option>second option</option>
|
||||||
|
<option>third option</option>
|
||||||
|
</select>
|
||||||
|
<hr>
|
||||||
|
<input type="submit" value="Submit this form">
|
||||||
|
<br>
|
||||||
|
<input type="reset" value="Reset this form">
|
||||||
|
</form>
|
||||||
|
<!-- ====================================================================== -->
|
||||||
|
<table border=2 summary="unquoted table">
|
||||||
|
<caption>Unquoted Table</caption>
|
||||||
|
<tr>
|
||||||
|
<td>First:</td>
|
||||||
|
<td>the first row</td>
|
||||||
|
<td>short</td>
|
||||||
|
<td>last</td></tr>
|
||||||
|
<tr>
|
||||||
|
<td>Second:</td>
|
||||||
|
<td>the second row</td>
|
||||||
|
<td>very long string</td>
|
||||||
|
<td>lower-right</td></tr>
|
||||||
|
</table>
|
||||||
|
<!-- ====================================================================== -->
|
||||||
|
<blockquote><table border=2 summary="quoted table">
|
||||||
|
<caption>Quoted Table</caption>
|
||||||
|
<tr>
|
||||||
|
<td>First:</td>
|
||||||
|
<td>the first row</td>
|
||||||
|
<td>very long string</td>
|
||||||
|
<td>last</td></tr>
|
||||||
|
<tr>
|
||||||
|
<td>Second:</td>
|
||||||
|
<td>the second row</td>
|
||||||
|
<td>short</td>
|
||||||
|
<td>lower-right</td></tr>
|
||||||
|
</table></blockquote>
|
||||||
|
<!-- ====================================================================== -->
|
||||||
|
<br>
|
||||||
|
<h1>An image map</h1>
|
||||||
|
<map name="IMAGEMAP">
|
||||||
|
<area alt="Square" shape="rect" coords="18,18,82,80" href="square.html">
|
||||||
|
<area alt="Circle" shape="circle" coords="127,48,31" href="circle.html">
|
||||||
|
<area alt="Triangle" shape="poly" coords="232,78,303,78,263,14,232,76"
|
||||||
|
href="triangle.html">
|
||||||
|
</map>
|
||||||
|
<!-- ====================================================================== -->
|
||||||
|
<br>
|
||||||
|
<h1>Definition List</h1>
|
||||||
|
This is an definition list:
|
||||||
|
<dl>
|
||||||
|
<dt>the first dt
|
||||||
|
<dd>the first dd
|
||||||
|
<dt>the second dt
|
||||||
|
<dd>the second dd
|
||||||
|
<dl>
|
||||||
|
<dt>the first dt
|
||||||
|
<dd>the first dd
|
||||||
|
<dt>the second dt
|
||||||
|
<dd>the second dd
|
||||||
|
<dt>the third dt
|
||||||
|
<dd>the third dd
|
||||||
|
</dl>
|
||||||
|
<dt>the third dt
|
||||||
|
<dd>the third dd
|
||||||
|
</dl>
|
||||||
|
<!-- ====================================================================== -->
|
||||||
|
<br>
|
||||||
|
<h1>Unordered List</h1>
|
||||||
|
This is an unordered list:
|
||||||
|
<ul>
|
||||||
|
<li>first item
|
||||||
|
<li>second item
|
||||||
|
<ul>
|
||||||
|
<li>first item
|
||||||
|
<li>second item
|
||||||
|
<li>third item
|
||||||
|
</ul>
|
||||||
|
<li>third item
|
||||||
|
</ul>
|
||||||
|
<!-- ====================================================================== -->
|
||||||
|
<br>
|
||||||
|
<h1>Ordered List</h1>
|
||||||
|
This is an ordered list:
|
||||||
|
<ol>
|
||||||
|
<li>first item
|
||||||
|
<li>second item
|
||||||
|
<ol>
|
||||||
|
<li>first item
|
||||||
|
<li>second item
|
||||||
|
<li>third item
|
||||||
|
</ol>
|
||||||
|
<li>third item
|
||||||
|
</ol>
|
||||||
|
|
||||||
|
</BODY>
|
||||||
|
</HTML>
|
185
tests/lynx-dump/data/tags.html.exp
Normal file
185
tests/lynx-dump/data/tags.html.exp
Normal file
@ -0,0 +1,185 @@
|
|||||||
|
Content of an H1 Tag
|
||||||
|
|
||||||
|
Text after an H1 Tag.
|
||||||
|
|
||||||
|
Paragraph after an H1 Tag.
|
||||||
|
|
||||||
|
Content of an H2 Tag
|
||||||
|
|
||||||
|
Text after an H2 Tag.
|
||||||
|
|
||||||
|
Paragraph after an H2 Tag.
|
||||||
|
|
||||||
|
Content of an H3 Tag
|
||||||
|
|
||||||
|
Text after an H3 Tag.
|
||||||
|
|
||||||
|
Paragraph after an H3 Tag.
|
||||||
|
|
||||||
|
Content of an H4 Tag
|
||||||
|
|
||||||
|
Text after an H4 Tag.
|
||||||
|
|
||||||
|
Paragraph after an H4 Tag.
|
||||||
|
|
||||||
|
Content of an H5 Tag
|
||||||
|
|
||||||
|
Text after an H5 Tag.
|
||||||
|
|
||||||
|
Paragraph after an H5 Tag.
|
||||||
|
|
||||||
|
Content of an H6 Tag
|
||||||
|
|
||||||
|
Text after an H6 Tag.
|
||||||
|
|
||||||
|
Paragraph after an H6 Tag. This is an [1]"a" tag.
|
||||||
|
This is an
|
||||||
|
|
||||||
|
|
||||||
|
"address"
|
||||||
|
|
||||||
|
tag.
|
||||||
|
This is a "b" tag.
|
||||||
|
This is a "big" tag.
|
||||||
|
Before quote,
|
||||||
|
|
||||||
|
this is a "blockquote"
|
||||||
|
|
||||||
|
, after quote.
|
||||||
|
This is a
|
||||||
|
|
||||||
|
"center"
|
||||||
|
|
||||||
|
tag.
|
||||||
|
This is a "cite" tag.
|
||||||
|
This is a "code" tag.
|
||||||
|
This is a
|
||||||
|
|
||||||
|
div
|
||||||
|
|
||||||
|
tag.
|
||||||
|
This is an "em" tag.
|
||||||
|
This is a "font" tag.
|
||||||
|
This is an
|
||||||
|
__________________________________________________________________
|
||||||
|
|
||||||
|
"hr"
|
||||||
|
__________________________________________________________________
|
||||||
|
|
||||||
|
tag.
|
||||||
|
This is an "i" tag.
|
||||||
|
This is an "iframe"
|
||||||
|
|
||||||
|
tag.
|
||||||
|
This is an img tag.
|
||||||
|
This is an "label" tag.
|
||||||
|
map: normal: lightgray: blue
|
||||||
|
This is
|
||||||
|
pre-formatted
|
||||||
|
text (three lines, with pre's on preceding/following lines).
|
||||||
|
|
||||||
|
This is a ""q""tag.
|
||||||
|
This is a "samp" tag.
|
||||||
|
This is a "small" tag.
|
||||||
|
This is a "strong" tag.
|
||||||
|
This is a ["sub"] tag.
|
||||||
|
This is a ^"sup" tag.
|
||||||
|
This is a "tt" tag.
|
||||||
|
This is a "var" tag.
|
||||||
|
|
||||||
|
Forms
|
||||||
|
__________________________________________________________________
|
||||||
|
|
||||||
|
First: ____________________ Last: ____________________ Description:
|
||||||
|
contents of textarea____________________
|
||||||
|
________________________________________
|
||||||
|
________________________________________
|
||||||
|
__________________________________________________________________
|
||||||
|
|
||||||
|
Submit this form
|
||||||
|
Reset this form
|
||||||
|
|
||||||
|
Another form
|
||||||
|
__________________________________________________________________
|
||||||
|
__________________________________________________________________
|
||||||
|
|
||||||
|
[ ] first
|
||||||
|
[ ] second
|
||||||
|
[ ] third
|
||||||
|
[ ] empty
|
||||||
|
__________________________________________________________________
|
||||||
|
|
||||||
|
donedone
|
||||||
|
|
||||||
|
Another form
|
||||||
|
__________________________________________________________________
|
||||||
|
|
||||||
|
[first option_]
|
||||||
|
__________________________________________________________________
|
||||||
|
|
||||||
|
Submit this form
|
||||||
|
Reset this form
|
||||||
|
|
||||||
|
CAPTION: Unquoted Table
|
||||||
|
|
||||||
|
First: the first row short last
|
||||||
|
Second: the second row very long string lower-right
|
||||||
|
|
||||||
|
CAPTION: Quoted Table
|
||||||
|
|
||||||
|
First: the first row very long string last
|
||||||
|
Second: the second row short lower-right
|
||||||
|
|
||||||
|
An image map
|
||||||
|
|
||||||
|
Definition List
|
||||||
|
|
||||||
|
This is an definition list:
|
||||||
|
|
||||||
|
the first dt
|
||||||
|
the first dd
|
||||||
|
|
||||||
|
the second dt
|
||||||
|
the second dd
|
||||||
|
|
||||||
|
the first dt
|
||||||
|
the first dd
|
||||||
|
|
||||||
|
the second dt
|
||||||
|
the second dd
|
||||||
|
|
||||||
|
the third dt
|
||||||
|
the third dd
|
||||||
|
|
||||||
|
the third dt
|
||||||
|
the third dd
|
||||||
|
|
||||||
|
Unordered List
|
||||||
|
|
||||||
|
This is an unordered list:
|
||||||
|
* first item
|
||||||
|
* second item
|
||||||
|
+ first item
|
||||||
|
+ second item
|
||||||
|
+ third item
|
||||||
|
* third item
|
||||||
|
|
||||||
|
Ordered List
|
||||||
|
|
||||||
|
This is an ordered list:
|
||||||
|
1. first item
|
||||||
|
2. second item
|
||||||
|
1. first item
|
||||||
|
2. second item
|
||||||
|
3. third item
|
||||||
|
3. third item
|
||||||
|
|
||||||
|
References
|
||||||
|
|
||||||
|
1.
|
||||||
|
|
||||||
|
[USEMAP]
|
||||||
|
|
||||||
|
1.
|
||||||
|
2.
|
||||||
|
3.
|
106
tests/lynx-dump/data/test-styles.html
Normal file
106
tests/lynx-dump/data/test-styles.html
Normal file
@ -0,0 +1,106 @@
|
|||||||
|
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
||||||
|
<HTML>
|
||||||
|
<HEAD>
|
||||||
|
<TITLE>Test Color-Styles</TITLE>
|
||||||
|
</HEAD>
|
||||||
|
<BODY>
|
||||||
|
<h1>Heading 1</h1>
|
||||||
|
<h2>Heading 2</h2>
|
||||||
|
<h3>Heading 3</h3>
|
||||||
|
<h4>Heading 4</h4>
|
||||||
|
<h5>Heading 5</h5>
|
||||||
|
<h6>Heading 6</h6>
|
||||||
|
|
||||||
|
<h1>Heading 1 - Ordinary Text</h1>
|
||||||
|
<h2><a name="fontlike_text">Heading 2 - <b>Fontlike</b> Text</a></h2>
|
||||||
|
<!-- STYLE,BR,TAB -->
|
||||||
|
This is <b>b (bold)</b>.
|
||||||
|
<br>
|
||||||
|
This is <big>big</big>.
|
||||||
|
<br>
|
||||||
|
This is <blink>blink</blink>.
|
||||||
|
<br>
|
||||||
|
This is <i>i (italicized)</i>.
|
||||||
|
<br>
|
||||||
|
This is <small>small</small>.
|
||||||
|
<br>
|
||||||
|
This is <strike>strike</strike>.
|
||||||
|
<br>
|
||||||
|
This is <tt>tt (typewriter)</tt>.
|
||||||
|
<br>
|
||||||
|
This is <u>u (underlined)</u>.
|
||||||
|
|
||||||
|
<h2><a name="emphasized_text">Heading 2 - <em>Emphasized</em> Text</a></h2>
|
||||||
|
This is <cite>cite (citation)</cite>.
|
||||||
|
<br>
|
||||||
|
This is <code>code</code>.
|
||||||
|
<br>
|
||||||
|
This is <del>del</del>.
|
||||||
|
<br>
|
||||||
|
This is <dfn>dfn (definition)</dfn>.
|
||||||
|
<br>
|
||||||
|
This is <em>emphasized</em>.
|
||||||
|
<br>
|
||||||
|
This is <ins>ins</ins>.
|
||||||
|
<br>
|
||||||
|
This is <kbd>kbd (keyboard)</kbd>.
|
||||||
|
<br>
|
||||||
|
This is <q>q (quoted)</q>.
|
||||||
|
<br>
|
||||||
|
This is <samp>samp (sample)</samp>.
|
||||||
|
<br>
|
||||||
|
This is <span>span</span>.
|
||||||
|
<br>
|
||||||
|
This is <strong>strong</strong>.
|
||||||
|
<br>
|
||||||
|
This is <var>var</var>.
|
||||||
|
|
||||||
|
<h1>Heading 1 - Ordinary Links</h1>
|
||||||
|
<a href="#fontlike_text">This is a link</a> to fontlike text.
|
||||||
|
<br>
|
||||||
|
<a href="#emphasized_text">This is a link</a> to emphasized text.
|
||||||
|
|
||||||
|
<h1>Heading 1 - Emphasized Links</h1>
|
||||||
|
<h2><a name="fontlike_links">Heading 2 - <b>Fontlike</b> Links</a></h2>
|
||||||
|
<br>
|
||||||
|
This is <a href="#fontlike_text"><b>b (bold)</b> link</a>.
|
||||||
|
<br>
|
||||||
|
This is <a href="#fontlike_text"><big>big</big> link</a>.
|
||||||
|
<br>
|
||||||
|
This is <a href="#fontlike_text"><blink>blink</blink> link</a>.
|
||||||
|
<br>
|
||||||
|
This is <a href="#fontlike_text"><i>i (italicized)</i> link</a>.
|
||||||
|
<br>
|
||||||
|
This is <a href="#fontlike_text"><small>small</small> link</a>.
|
||||||
|
<br>
|
||||||
|
This is <a href="#fontlike_text"><strike>strike</strike> link</a>.
|
||||||
|
<br>
|
||||||
|
This is <a href="#fontlike_text"><tt>tt (typewriter)</tt> link</a>.
|
||||||
|
<br>
|
||||||
|
This is <a href="#fontlike_text"><u>u (underlined)</u> link</a>.
|
||||||
|
|
||||||
|
<h2><a name="emphasized_links">Heading 2 - <b>Emphasized</b> Links</a></h2>
|
||||||
|
This is <a href="#emphasized_text"><cite>cite (citation)</cite> link</a>.
|
||||||
|
<br>
|
||||||
|
This is <a href="#emphasized_text"><code>code</code> link</a>.
|
||||||
|
<br>
|
||||||
|
This is <a href="#emphasized_text"><del>del</del> link</a>.
|
||||||
|
<br>
|
||||||
|
This is <a href="#emphasized_text"><dfn>dfn (definition)</dfn> link</a>.
|
||||||
|
<br>
|
||||||
|
This is <a href="#emphasized_text"><em>emphasized</em> link</a>.
|
||||||
|
<br>
|
||||||
|
This is <a href="#emphasized_text"><ins>ins</ins> link</a>.
|
||||||
|
<br>
|
||||||
|
This is <a href="#emphasized_text"><kbd>kbd (keyboard)</kbd> link</a>.
|
||||||
|
<br>
|
||||||
|
This is <a href="#emphasized_text"><q>q (quoted)</q> link</a>.
|
||||||
|
<br>
|
||||||
|
This is <a href="#emphasized_text"><samp>samp (sample)</samp> link</a>.
|
||||||
|
<br>
|
||||||
|
This is <a href="#emphasized_text"><span>span</span> link</a>.
|
||||||
|
<br>
|
||||||
|
This is <a href="#emphasized_text"><strong>strong</strong> link</a>.
|
||||||
|
<br>
|
||||||
|
This is <a href="#emphasized_text"><var>var</var> link</a>.
|
||||||
|
</BODY>
|
97
tests/lynx-dump/data/test-styles.html.exp
Normal file
97
tests/lynx-dump/data/test-styles.html.exp
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
Heading 1
|
||||||
|
|
||||||
|
Heading 2
|
||||||
|
|
||||||
|
Heading 3
|
||||||
|
|
||||||
|
Heading 4
|
||||||
|
|
||||||
|
Heading 5
|
||||||
|
|
||||||
|
Heading 6
|
||||||
|
|
||||||
|
Heading 1 - Ordinary Text
|
||||||
|
|
||||||
|
Heading 2 - Fontlike Text
|
||||||
|
|
||||||
|
This is b (bold).
|
||||||
|
This is big.
|
||||||
|
This is blink.
|
||||||
|
This is i (italicized).
|
||||||
|
This is small.
|
||||||
|
This is [DEL: strike :DEL] .
|
||||||
|
This is tt (typewriter).
|
||||||
|
This is u (underlined).
|
||||||
|
|
||||||
|
Heading 2 - Emphasized Text
|
||||||
|
|
||||||
|
This is cite (citation).
|
||||||
|
This is code.
|
||||||
|
This is [DEL: del :DEL] .
|
||||||
|
This is dfn (definition).
|
||||||
|
This is emphasized.
|
||||||
|
This is [INS: ins :INS] .
|
||||||
|
This is kbd (keyboard).
|
||||||
|
This is "q (quoted)".
|
||||||
|
This is samp (sample).
|
||||||
|
This is span.
|
||||||
|
This is strong.
|
||||||
|
This is var.
|
||||||
|
|
||||||
|
Heading 1 - Ordinary Links
|
||||||
|
|
||||||
|
[1]This is a link to fontlike text.
|
||||||
|
[2]This is a link to emphasized text.
|
||||||
|
|
||||||
|
Heading 1 - Emphasized Links
|
||||||
|
|
||||||
|
Heading 2 - Fontlike Links
|
||||||
|
|
||||||
|
This is [3]b (bold) link.
|
||||||
|
This is [4]big link.
|
||||||
|
This is [5]blink link.
|
||||||
|
This is [6]i (italicized) link.
|
||||||
|
This is [7]small link.
|
||||||
|
This is [8][DEL: strike :DEL] link.
|
||||||
|
This is [9]tt (typewriter) link.
|
||||||
|
This is [10]u (underlined) link.
|
||||||
|
|
||||||
|
Heading 2 - Emphasized Links
|
||||||
|
|
||||||
|
This is [11]cite (citation) link.
|
||||||
|
This is [12]code link.
|
||||||
|
This is [13][DEL: del :DEL] link.
|
||||||
|
This is [14]dfn (definition) link.
|
||||||
|
This is [15]emphasized link.
|
||||||
|
This is [16][INS: ins :INS] link.
|
||||||
|
This is [17]kbd (keyboard) link.
|
||||||
|
This is [18]"q (quoted)" link.
|
||||||
|
This is [19]samp (sample) link.
|
||||||
|
This is [20]span link.
|
||||||
|
This is [21]strong link.
|
||||||
|
This is [22]var link.
|
||||||
|
|
||||||
|
References
|
||||||
|
|
||||||
|
1.
|
||||||
|
2.
|
||||||
|
3.
|
||||||
|
4.
|
||||||
|
5.
|
||||||
|
6.
|
||||||
|
7.
|
||||||
|
8.
|
||||||
|
9.
|
||||||
|
10.
|
||||||
|
11.
|
||||||
|
12.
|
||||||
|
13.
|
||||||
|
14.
|
||||||
|
15.
|
||||||
|
16.
|
||||||
|
17.
|
||||||
|
18.
|
||||||
|
19.
|
||||||
|
20.
|
||||||
|
21.
|
||||||
|
22.
|
14
tests/lynx-dump/data/triangle.html
Normal file
14
tests/lynx-dump/data/triangle.html
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN">
|
||||||
|
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta name="generator" content=
|
||||||
|
"HTML Tidy for Linux (vers 25 March 2009), see www.w3.org">
|
||||||
|
|
||||||
|
<title>Test ImageMap - triangle</title>
|
||||||
|
</head>
|
||||||
|
|
||||||
|
<body>
|
||||||
|
<p>TRIANGLE</p>
|
||||||
|
</body>
|
||||||
|
</html>
|
1
tests/lynx-dump/data/triangle.html.exp
Normal file
1
tests/lynx-dump/data/triangle.html.exp
Normal file
@ -0,0 +1 @@
|
|||||||
|
TRIANGLE
|
915
tests/lynx-dump/data/unicode.html
Normal file
915
tests/lynx-dump/data/unicode.html
Normal file
@ -0,0 +1,915 @@
|
|||||||
|
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
||||||
|
<HTML>
|
||||||
|
<HEAD>
|
||||||
|
<TITLE>Test of some Unicode symbols in numeric character reference form</TITLE>
|
||||||
|
</HEAD>
|
||||||
|
<BODY>
|
||||||
|
<PRE>
|
||||||
|
|
||||||
|
This table prepared from SGML.TXT available at ftp.unicode.org
|
||||||
|
|
||||||
|
ftp://ftp.unicode.org/MAPPINGS/VENDORS/MISC/SGML.TXT
|
||||||
|
(if doing ftp, try cd Public/MAPPINGS/VENDORS/MISC)
|
||||||
|
|
||||||
|
|
||||||
|
original comment:
|
||||||
|
|
||||||
|
# Author: John Cowan <cowan@ccil.org>
|
||||||
|
# Date: 25 July 1997
|
||||||
|
#
|
||||||
|
# The following table maps SGML character entities from various
|
||||||
|
# public sets (namely, ISOamsa, ISOamsb, ISOamsc, ISOamsn, ISOamso,
|
||||||
|
# ISOamsr, ISObox, ISOcyr1, ISOcyr2, ISOdia, ISOgrk1, ISOgrk2,
|
||||||
|
# ISOgrk3, ISOgrk4, ISOlat1, ISOlat2, ISOnum, ISOpub, ISOtech,
|
||||||
|
# HTMLspecial, HTMLsymbol) to corresponding Unicode characters.
|
||||||
|
#
|
||||||
|
# The table has four tab-separated columns:
|
||||||
|
# Column 1: SGML character entity name
|
||||||
|
# Column 2: SGML public entity set
|
||||||
|
# Column 3: Unicode 2.0 character code
|
||||||
|
# Column 4: Unicode 2.0 character name (UPPER CASE)
|
||||||
|
# Entries which don't have Unicode equivalents have "0x????"
|
||||||
|
# in Column 3 and a lower case description (from the public entity
|
||||||
|
# set DTD) in Column 4. The mapping is not reversible, because many
|
||||||
|
# distinctions are unified away in Unicode, particularly between
|
||||||
|
# mathematical symbols.
|
||||||
|
#
|
||||||
|
# The table is sorted case-blind by SGML character entity name.
|
||||||
|
#
|
||||||
|
# The contents of this table are drawn from various sources, and
|
||||||
|
# are in the public domain.
|
||||||
|
#
|
||||||
|
<!-- Changes:
|
||||||
|
+ {"euro", 0x20AC}, /* EURO SIGN */
|
||||||
|
|
||||||
|
-->
|
||||||
|
|
||||||
|
This test is illuminated Unicode numeric entities like &#x22AB;
|
||||||
|
We sort the entities according to unicode numbers.
|
||||||
|
You should see visible characters if your display character set support them
|
||||||
|
or some substitution string picked up from src/chrtrans/def7_uni.tbl
|
||||||
|
|
||||||
|
If you see something like &#x34D2; - this number unknown to def7_uni.tbl
|
||||||
|
or the internal browser's implementation is broken.
|
||||||
|
Leonid Pauzner.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
0x0021 ! # EXCLAMATION MARK
|
||||||
|
0x0022 " # QUOTATION MARK
|
||||||
|
0x0023 # # NUMBER SIGN
|
||||||
|
0x0024 $ # DOLLAR SIGN
|
||||||
|
0x0025 % # PERCENT SIGN
|
||||||
|
0x0026 & # AMPERSAND
|
||||||
|
0x0028 ( # LEFT PARENTHESIS
|
||||||
|
0x0029 ) # RIGHT PARENTHESIS
|
||||||
|
0x002A * # ASTERISK
|
||||||
|
0x002B + # PLUS SIGN
|
||||||
|
0x002C , # COMMA
|
||||||
|
0x002D - # HYPHEN-MINUS
|
||||||
|
0x002E . # FULL STOP
|
||||||
|
0x002F / # SOLIDUS
|
||||||
|
0x003A : # COLON
|
||||||
|
0x003B ; # SEMICOLON
|
||||||
|
0x003C < # LESS-THAN SIGN
|
||||||
|
0x003D = # EQUALS SIGN
|
||||||
|
0x003E > # GREATER-THAN SIGN
|
||||||
|
0x003F ? # QUESTION MARK
|
||||||
|
0x0040 @ # COMMERCIAL AT
|
||||||
|
0x005B [ # LEFT SQUARE BRACKET
|
||||||
|
0x005C \ # REVERSE SOLIDUS
|
||||||
|
0x005C \ # REVERSE SOLIDUS
|
||||||
|
0x005D ] # RIGHT SQUARE BRACKET
|
||||||
|
0x005F _ # LOW LINE
|
||||||
|
0x0060 ` # GRAVE ACCENT
|
||||||
|
0x007B { # LEFT CURLY BRACKET
|
||||||
|
0x007C | # VERTICAL LINE
|
||||||
|
0x007D } # RIGHT CURLY BRACKET
|
||||||
|
0x00A0   # NO-BREAK SPACE
|
||||||
|
0x00A1 ¡ # INVERTED EXCLAMATION MARK
|
||||||
|
0x00A2 ¢ # CENT SIGN
|
||||||
|
0x00A3 £ # POUND SIGN
|
||||||
|
0x00A4 ¤ # CURRENCY SIGN
|
||||||
|
0x00A5 ¥ # YEN SIGN
|
||||||
|
0x00A6 ¦ # BROKEN BAR
|
||||||
|
0x00A7 § # SECTION SIGN
|
||||||
|
0x00A8 ¨ # DIAERESIS
|
||||||
|
0x00A9 © # COPYRIGHT SIGN
|
||||||
|
0x00AA ª # FEMININE ORDINAL INDICATOR
|
||||||
|
0x00AB « # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||||
|
0x00AC ¬ # NOT SIGN
|
||||||
|
0x00AD ­ # SOFT HYPHEN
|
||||||
|
0x00AE ® # REGISTERED SIGN
|
||||||
|
0x00AF ¯ # MACRON
|
||||||
|
0x00B0 ° # DEGREE SIGN
|
||||||
|
0x00B1 ± # PLUS-MINUS SIGN
|
||||||
|
0x00B2 ² # SUPERSCRIPT TWO
|
||||||
|
0x00B3 ³ # SUPERSCRIPT THREE
|
||||||
|
0x00B4 ´ # ACUTE ACCENT
|
||||||
|
0x00B5 µ # MICRO SIGN
|
||||||
|
0x00B6 ¶ # PILCROW SIGN
|
||||||
|
0x00B7 · # MIDDLE DOT
|
||||||
|
0x00B8 ¸ # CEDILLA
|
||||||
|
0x00B9 ¹ # SUPERSCRIPT ONE
|
||||||
|
0x00BA º # MASCULINE ORDINAL INDICATOR
|
||||||
|
0x00BB » # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||||
|
0x00BC ¼ # VULGAR FRACTION ONE QUARTER
|
||||||
|
0x00BD ½ # VULGAR FRACTION ONE HALF
|
||||||
|
0x00BE ¾ # VULGAR FRACTION THREE QUARTERS
|
||||||
|
0x00BF ¿ # INVERTED QUESTION MARK
|
||||||
|
0x00C0 À # LATIN CAPITAL LETTER A WITH GRAVE
|
||||||
|
0x00C1 Á # LATIN CAPITAL LETTER A WITH ACUTE
|
||||||
|
0x00C2 Â # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||||
|
0x00C3 Ã # LATIN CAPITAL LETTER A WITH TILDE
|
||||||
|
0x00C4 Ä # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||||
|
0x00C5 Å # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||||
|
0x00C6 Æ # LATIN CAPITAL LETTER AE
|
||||||
|
0x00C7 Ç # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||||
|
0x00C8 È # LATIN CAPITAL LETTER E WITH GRAVE
|
||||||
|
0x00C9 É # LATIN CAPITAL LETTER E WITH ACUTE
|
||||||
|
0x00CA Ê # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||||
|
0x00CB Ë # LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||||
|
0x00CC Ì # LATIN CAPITAL LETTER I WITH GRAVE
|
||||||
|
0x00CD Í # LATIN CAPITAL LETTER I WITH ACUTE
|
||||||
|
0x00CE Î # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||||
|
0x00CF Ï # LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||||
|
0x00D0 Ð # LATIN CAPITAL LETTER ETH
|
||||||
|
0x00D1 Ñ # LATIN CAPITAL LETTER N WITH TILDE
|
||||||
|
0x00D2 Ò # LATIN CAPITAL LETTER O WITH GRAVE
|
||||||
|
0x00D3 Ó # LATIN CAPITAL LETTER O WITH ACUTE
|
||||||
|
0x00D4 Ô # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||||
|
0x00D5 Õ # LATIN CAPITAL LETTER O WITH TILDE
|
||||||
|
0x00D6 Ö # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||||
|
0x00D7 × # MULTIPLICATION SIGN
|
||||||
|
0x00D8 Ø # LATIN CAPITAL LETTER O WITH STROKE
|
||||||
|
0x00D9 Ù # LATIN CAPITAL LETTER U WITH GRAVE
|
||||||
|
0x00DA Ú # LATIN CAPITAL LETTER U WITH ACUTE
|
||||||
|
0x00DB Û # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||||
|
0x00DC Ü # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||||
|
0x00DD Ý # LATIN CAPITAL LETTER Y WITH ACUTE
|
||||||
|
0x00DE Þ # LATIN CAPITAL LETTER THORN
|
||||||
|
0x00DF ß # LATIN SMALL LETTER SHARP S
|
||||||
|
0x00E0 à # LATIN SMALL LETTER A WITH GRAVE
|
||||||
|
0x00E1 á # LATIN SMALL LETTER A WITH ACUTE
|
||||||
|
0x00E2 â # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||||
|
0x00E3 ã # LATIN SMALL LETTER A WITH TILDE
|
||||||
|
0x00E4 ä # LATIN SMALL LETTER A WITH DIAERESIS
|
||||||
|
0x00E5 å # LATIN SMALL LETTER A WITH RING ABOVE
|
||||||
|
0x00E6 æ # LATIN SMALL LETTER AE
|
||||||
|
0x00E7 ç # LATIN SMALL LETTER C WITH CEDILLA
|
||||||
|
0x00E8 è # LATIN SMALL LETTER E WITH GRAVE
|
||||||
|
0x00E9 é # LATIN SMALL LETTER E WITH ACUTE
|
||||||
|
0x00EA ê # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||||
|
0x00EB ë # LATIN SMALL LETTER E WITH DIAERESIS
|
||||||
|
0x00EC ì # LATIN SMALL LETTER I WITH GRAVE
|
||||||
|
0x00ED í # LATIN SMALL LETTER I WITH ACUTE
|
||||||
|
0x00EE î # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||||
|
0x00EF ï # LATIN SMALL LETTER I WITH DIAERESIS
|
||||||
|
0x00F0 ð # LATIN SMALL LETTER ETH
|
||||||
|
0x00F1 ñ # LATIN SMALL LETTER N WITH TILDE
|
||||||
|
0x00F2 ò # LATIN SMALL LETTER O WITH GRAVE
|
||||||
|
0x00F3 ó # LATIN SMALL LETTER O WITH ACUTE
|
||||||
|
0x00F4 ô # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||||
|
0x00F5 õ # LATIN SMALL LETTER O WITH TILDE
|
||||||
|
0x00F6 ö # LATIN SMALL LETTER O WITH DIAERESIS
|
||||||
|
0x00F7 ÷ # DIVISION SIGN
|
||||||
|
0x00F8 ø # LATIN SMALL LETTER O WITH STROKE
|
||||||
|
0x00F9 ù # LATIN SMALL LETTER U WITH GRAVE
|
||||||
|
0x00FA ú # LATIN SMALL LETTER U WITH ACUTE
|
||||||
|
0x00FB û # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||||
|
0x00FC ü # LATIN SMALL LETTER U WITH DIAERESIS
|
||||||
|
0x00FD ý # LATIN SMALL LETTER Y WITH ACUTE
|
||||||
|
0x00FE þ # LATIN SMALL LETTER THORN
|
||||||
|
0x00FF ÿ # LATIN SMALL LETTER Y WITH DIAERESIS
|
||||||
|
0x0100 Ā # LATIN CAPITAL LETTER A WITH MACRON
|
||||||
|
0x0101 ā # LATIN SMALL LETTER A WITH MACRON
|
||||||
|
0x0102 Ă # LATIN CAPITAL LETTER A WITH BREVE
|
||||||
|
0x0103 ă # LATIN SMALL LETTER A WITH BREVE
|
||||||
|
0x0104 Ą # LATIN CAPITAL LETTER A WITH OGONEK
|
||||||
|
0x0105 ą # LATIN SMALL LETTER A WITH OGONEK
|
||||||
|
0x0106 Ć # LATIN CAPITAL LETTER C WITH ACUTE
|
||||||
|
0x0107 ć # LATIN SMALL LETTER C WITH ACUTE
|
||||||
|
0x0108 Ĉ # LATIN CAPITAL LETTER C WITH CIRCUMFLEX
|
||||||
|
0x0109 ĉ # LATIN SMALL LETTER C WITH CIRCUMFLEX
|
||||||
|
0x010A Ċ # LATIN CAPITAL LETTER C WITH DOT ABOVE
|
||||||
|
0x010B ċ # LATIN SMALL LETTER C WITH DOT ABOVE
|
||||||
|
0x010C Č # LATIN CAPITAL LETTER C WITH CARON
|
||||||
|
0x010D č # LATIN SMALL LETTER C WITH CARON
|
||||||
|
0x010E Ď # LATIN CAPITAL LETTER D WITH CARON
|
||||||
|
0x010F ď # LATIN SMALL LETTER D WITH CARON
|
||||||
|
0x0110 Đ # LATIN CAPITAL LETTER D WITH STROKE
|
||||||
|
0x0111 đ # LATIN SMALL LETTER D WITH STROKE
|
||||||
|
0x0112 Ē # LATIN CAPITAL LETTER E WITH MACRON
|
||||||
|
0x0113 ē # LATIN SMALL LETTER E WITH MACRON
|
||||||
|
0x0116 Ė # LATIN CAPITAL LETTER E WITH DOT ABOVE
|
||||||
|
0x0117 ė # LATIN SMALL LETTER E WITH DOT ABOVE
|
||||||
|
0x0118 Ę # LATIN CAPITAL LETTER E WITH OGONEK
|
||||||
|
0x0119 ę # LATIN SMALL LETTER E WITH OGONEK
|
||||||
|
0x011A Ě # LATIN CAPITAL LETTER E WITH CARON
|
||||||
|
0x011B ě # LATIN SMALL LETTER E WITH CARON
|
||||||
|
0x011C Ĝ # LATIN CAPITAL LETTER G WITH CIRCUMFLEX
|
||||||
|
0x011D ĝ # LATIN SMALL LETTER G WITH CIRCUMFLEX
|
||||||
|
0x011E Ğ # LATIN CAPITAL LETTER G WITH BREVE
|
||||||
|
0x011F ğ # LATIN SMALL LETTER G WITH BREVE
|
||||||
|
0x0120 Ġ # LATIN CAPITAL LETTER G WITH DOT ABOVE
|
||||||
|
0x0121 ġ # LATIN SMALL LETTER G WITH DOT ABOVE
|
||||||
|
0x0122 Ģ # LATIN CAPITAL LETTER G WITH CEDILLA
|
||||||
|
0x0123 ģ # LATIN SMALL LETTER G WITH CEDILLA
|
||||||
|
0x0124 Ĥ # LATIN CAPITAL LETTER H WITH CIRCUMFLEX
|
||||||
|
0x0125 ĥ # LATIN SMALL LETTER H WITH CIRCUMFLEX
|
||||||
|
0x0126 Ħ # LATIN CAPITAL LETTER H WITH STROKE
|
||||||
|
0x0127 ħ # LATIN SMALL LETTER H WITH STROKE
|
||||||
|
0x0128 Ĩ # LATIN CAPITAL LETTER I WITH TILDE
|
||||||
|
0x0129 ĩ # LATIN SMALL LETTER I WITH TILDE
|
||||||
|
0x012A Ī # LATIN CAPITAL LETTER I WITH MACRON
|
||||||
|
0x012B ī # LATIN SMALL LETTER I WITH MACRON
|
||||||
|
0x012E Į # LATIN CAPITAL LETTER I WITH OGONEK
|
||||||
|
0x012F į # LATIN SMALL LETTER I WITH OGONEK
|
||||||
|
0x0130 İ # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||||
|
0x0131 ı # LATIN SMALL LETTER DOTLESS I
|
||||||
|
0x0131 ı # LATIN SMALL LETTER DOTLESS I
|
||||||
|
0x0132 IJ # LATIN CAPITAL LIGATURE IJ
|
||||||
|
0x0133 ij # LATIN SMALL LIGATURE IJ
|
||||||
|
0x0134 Ĵ # LATIN CAPITAL LETTER J WITH CIRCUMFLEX
|
||||||
|
0x0135 ĵ # LATIN SMALL LETTER J WITH CIRCUMFLEX
|
||||||
|
0x0136 Ķ # LATIN CAPITAL LETTER K WITH CEDILLA
|
||||||
|
0x0137 ķ # LATIN SMALL LETTER K WITH CEDILLA
|
||||||
|
0x0138 ĸ # LATIN SMALL LETTER KRA
|
||||||
|
0x0139 Ĺ # LATIN CAPITAL LETTER L WITH ACUTE
|
||||||
|
0x013A ĺ # LATIN SMALL LETTER L WITH ACUTE
|
||||||
|
0x013B Ļ # LATIN CAPITAL LETTER L WITH CEDILLA
|
||||||
|
0x013C ļ # LATIN SMALL LETTER L WITH CEDILLA
|
||||||
|
0x013D Ľ # LATIN CAPITAL LETTER L WITH CARON
|
||||||
|
0x013E ľ # LATIN SMALL LETTER L WITH CARON
|
||||||
|
0x013F Ŀ # LATIN CAPITAL LETTER L WITH MIDDLE DOT
|
||||||
|
0x0140 ŀ # LATIN SMALL LETTER L WITH MIDDLE DOT
|
||||||
|
0x0141 Ł # LATIN CAPITAL LETTER L WITH STROKE
|
||||||
|
0x0142 ł # LATIN SMALL LETTER L WITH STROKE
|
||||||
|
0x0143 Ń # LATIN CAPITAL LETTER N WITH ACUTE
|
||||||
|
0x0144 ń # LATIN SMALL LETTER N WITH ACUTE
|
||||||
|
0x0145 Ņ # LATIN CAPITAL LETTER N WITH CEDILLA
|
||||||
|
0x0146 ņ # LATIN SMALL LETTER N WITH CEDILLA
|
||||||
|
0x0147 Ň # LATIN CAPITAL LETTER N WITH CARON
|
||||||
|
0x0148 ň # LATIN SMALL LETTER N WITH CARON
|
||||||
|
0x0149 ʼn # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
|
||||||
|
0x014A Ŋ # LATIN CAPITAL LETTER ENG
|
||||||
|
0x014B ŋ # LATIN SMALL LETTER ENG
|
||||||
|
0x014C Ō # LATIN CAPITAL LETTER O WITH MACRON
|
||||||
|
0x014D ō # LATIN SMALL LETTER O WITH MACRON
|
||||||
|
0x0150 Ő # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
|
||||||
|
0x0151 ő # LATIN SMALL LETTER O WITH DOUBLE ACUTE
|
||||||
|
0x0152 Œ # LATIN CAPITAL LIGATURE OE
|
||||||
|
0x0153 œ # LATIN SMALL LIGATURE OE
|
||||||
|
0x0154 Ŕ # LATIN CAPITAL LETTER R WITH ACUTE
|
||||||
|
0x0155 ŕ # LATIN SMALL LETTER R WITH ACUTE
|
||||||
|
0x0156 Ŗ # LATIN CAPITAL LETTER R WITH CEDILLA
|
||||||
|
0x0157 ŗ # LATIN SMALL LETTER R WITH CEDILLA
|
||||||
|
0x0158 Ř # LATIN CAPITAL LETTER R WITH CARON
|
||||||
|
0x0159 ř # LATIN SMALL LETTER R WITH CARON
|
||||||
|
0x015A Ś # LATIN CAPITAL LETTER S WITH ACUTE
|
||||||
|
0x015B ś # LATIN SMALL LETTER S WITH ACUTE
|
||||||
|
0x015C Ŝ # LATIN CAPITAL LETTER S WITH CIRCUMFLEX
|
||||||
|
0x015D ŝ # LATIN SMALL LETTER S WITH CIRCUMFLEX
|
||||||
|
0x015E Ş # LATIN CAPITAL LETTER S WITH CEDILLA
|
||||||
|
0x015F ş # LATIN SMALL LETTER S WITH CEDILLA
|
||||||
|
0x0160 Š # LATIN CAPITAL LETTER S WITH CARON
|
||||||
|
0x0161 š # LATIN SMALL LETTER S WITH CARON
|
||||||
|
0x0162 Ţ # LATIN CAPITAL LETTER T WITH CEDILLA
|
||||||
|
0x0163 ţ # LATIN SMALL LETTER T WITH CEDILLA
|
||||||
|
0x0164 Ť # LATIN CAPITAL LETTER T WITH CARON
|
||||||
|
0x0165 ť # LATIN SMALL LETTER T WITH CARON
|
||||||
|
0x0166 Ŧ # LATIN CAPITAL LETTER T WITH STROKE
|
||||||
|
0x0167 ŧ # LATIN SMALL LETTER T WITH STROKE
|
||||||
|
0x0168 Ũ # LATIN CAPITAL LETTER U WITH TILDE
|
||||||
|
0x0169 ũ # LATIN SMALL LETTER U WITH TILDE
|
||||||
|
0x016A Ū # LATIN CAPITAL LETTER U WITH MACRON
|
||||||
|
0x016B ū # LATIN SMALL LETTER U WITH MACRON
|
||||||
|
0x016C Ŭ # LATIN CAPITAL LETTER U WITH BREVE
|
||||||
|
0x016D ŭ # LATIN SMALL LETTER U WITH BREVE
|
||||||
|
0x016E Ů # LATIN CAPITAL LETTER U WITH RING ABOVE
|
||||||
|
0x016F ů # LATIN SMALL LETTER U WITH RING ABOVE
|
||||||
|
0x0170 Ű # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
|
||||||
|
0x0171 ű # LATIN SMALL LETTER U WITH DOUBLE ACUTE
|
||||||
|
0x0172 Ų # LATIN CAPITAL LETTER U WITH OGONEK
|
||||||
|
0x0173 ų # LATIN SMALL LETTER U WITH OGONEK
|
||||||
|
0x0174 Ŵ # LATIN CAPITAL LETTER W WITH CIRCUMFLEX
|
||||||
|
0x0175 ŵ # LATIN SMALL LETTER W WITH CIRCUMFLEX
|
||||||
|
0x0176 Ŷ # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
|
||||||
|
0x0177 ŷ # LATIN SMALL LETTER Y WITH CIRCUMFLEX
|
||||||
|
0x0178 Ÿ # LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||||
|
0x0179 Ź # LATIN CAPITAL LETTER Z WITH ACUTE
|
||||||
|
0x017A ź # LATIN SMALL LETTER Z WITH ACUTE
|
||||||
|
0x017B Ż # LATIN CAPITAL LETTER Z WITH DOT ABOVE
|
||||||
|
0x017C ż # LATIN SMALL LETTER Z WITH DOT ABOVE
|
||||||
|
0x017D Ž # LATIN CAPITAL LETTER Z WITH CARON
|
||||||
|
0x017E ž # LATIN SMALL LETTER Z WITH CARON
|
||||||
|
0x0192 ƒ # LATIN SMALL LETTER F WITH HOOK
|
||||||
|
0x01F5 ǵ # LATIN SMALL LETTER G WITH ACUTE
|
||||||
|
0x02BC ʼ # MODIFIER LETTER APOSTROPHE
|
||||||
|
0x02C6 ˆ # MODIFIER LETTER CIRCUMFLEX ACCENT
|
||||||
|
0x02C7 ˇ # CARON
|
||||||
|
0x02D8 ˘ # BREVE
|
||||||
|
0x02D9 ˙ # DOT ABOVE
|
||||||
|
0x02DA ˚ # RING ABOVE
|
||||||
|
0x02DB ˛ # OGONEK
|
||||||
|
0x02DC ˜ # SMALL TILDE
|
||||||
|
0x02DD ˝ # DOUBLE ACUTE ACCENT
|
||||||
|
0x0386 Ά # GREEK CAPITAL LETTER ALPHA WITH TONOS
|
||||||
|
0x0388 Έ # GREEK CAPITAL LETTER EPSILON WITH TONOS
|
||||||
|
0x0389 Ή # GREEK CAPITAL LETTER ETA WITH TONOS
|
||||||
|
0x038A Ί # GREEK CAPITAL LETTER IOTA WITH TONOS
|
||||||
|
0x038C Ό # GREEK CAPITAL LETTER OMICRON WITH TONOS
|
||||||
|
0x038E Ύ # GREEK CAPITAL LETTER UPSILON WITH TONOS
|
||||||
|
0x038F Ώ # GREEK CAPITAL LETTER OMEGA WITH TONOS
|
||||||
|
0x0390 ΐ # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
|
||||||
|
0x0391 Α # GREEK CAPITAL LETTER ALPHA
|
||||||
|
0x0392 Β # GREEK CAPITAL LETTER BETA
|
||||||
|
0x0393 Γ # GREEK CAPITAL LETTER GAMMA
|
||||||
|
0x0394 Δ # GREEK CAPITAL LETTER DELTA
|
||||||
|
0x0395 Ε # GREEK CAPITAL LETTER EPSILON
|
||||||
|
0x0396 Ζ # GREEK CAPITAL LETTER ZETA
|
||||||
|
0x0397 Η # GREEK CAPITAL LETTER ETA
|
||||||
|
0x0398 Θ # GREEK CAPITAL LETTER THETA
|
||||||
|
0x0399 Ι # GREEK CAPITAL LETTER IOTA
|
||||||
|
0x039A Κ # GREEK CAPITAL LETTER KAPPA
|
||||||
|
0x039B Λ # GREEK CAPITAL LETTER LAMDA
|
||||||
|
0x039C Μ # GREEK CAPITAL LETTER MU
|
||||||
|
0x039D Ν # GREEK CAPITAL LETTER NU
|
||||||
|
0x039E Ξ # GREEK CAPITAL LETTER XI
|
||||||
|
0x039F Ο # GREEK CAPITAL LETTER OMICRON
|
||||||
|
0x03A0 Π # GREEK CAPITAL LETTER PI
|
||||||
|
0x03A1 Ρ # GREEK CAPITAL LETTER RHO
|
||||||
|
0x03A3 Σ # GREEK CAPITAL LETTER SIGMA
|
||||||
|
0x03A4 Τ # GREEK CAPITAL LETTER TAU
|
||||||
|
0x03A5 Υ # GREEK CAPITAL LETTER UPSILON
|
||||||
|
0x03A6 Φ # GREEK CAPITAL LETTER PHI
|
||||||
|
0x03A7 Χ # GREEK CAPITAL LETTER CHI
|
||||||
|
0x03A8 Ψ # GREEK CAPITAL LETTER PSI
|
||||||
|
0x03A9 Ω # GREEK CAPITAL LETTER OMEGA
|
||||||
|
0x03AA Ϊ # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
|
||||||
|
0x03AB Ϋ # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
|
||||||
|
0x03AC ά # GREEK SMALL LETTER ALPHA WITH TONOS
|
||||||
|
0x03AD έ # GREEK SMALL LETTER EPSILON WITH TONOS
|
||||||
|
0x03AE ή # GREEK SMALL LETTER ETA WITH TONOS
|
||||||
|
0x03AF ί # GREEK SMALL LETTER IOTA WITH TONOS
|
||||||
|
0x03B0 ΰ # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
|
||||||
|
0x03B1 α # GREEK SMALL LETTER ALPHA
|
||||||
|
0x03B2 β # GREEK SMALL LETTER BETA
|
||||||
|
0x03B3 γ # GREEK SMALL LETTER GAMMA
|
||||||
|
0x03B4 δ # GREEK SMALL LETTER DELTA
|
||||||
|
0x03B5 ε # GREEK SMALL LETTER EPSILON
|
||||||
|
0x03B6 ζ # GREEK SMALL LETTER ZETA
|
||||||
|
0x03B7 η # GREEK SMALL LETTER ETA
|
||||||
|
0x03B8 θ # GREEK SMALL LETTER THETA
|
||||||
|
0x03B9 ι # GREEK SMALL LETTER IOTA
|
||||||
|
0x03BA κ # GREEK SMALL LETTER KAPPA
|
||||||
|
0x03BB λ # GREEK SMALL LETTER LAMDA
|
||||||
|
0x03BC μ # GREEK SMALL LETTER MU
|
||||||
|
0x03BD ν # GREEK SMALL LETTER NU
|
||||||
|
0x03BE ξ # GREEK SMALL LETTER XI
|
||||||
|
0x03BF ο # GREEK SMALL LETTER OMICRON
|
||||||
|
0x03C0 π # GREEK SMALL LETTER PI
|
||||||
|
0x03C1 ρ # GREEK SMALL LETTER RHO
|
||||||
|
0x03C2 ς # GREEK SMALL LETTER FINAL SIGMA
|
||||||
|
0x03C3 σ # GREEK SMALL LETTER SIGMA
|
||||||
|
0x03C4 τ # GREEK SMALL LETTER TAU
|
||||||
|
0x03C5 υ # GREEK SMALL LETTER UPSILON
|
||||||
|
0x03C6 φ # GREEK SMALL LETTER PHI
|
||||||
|
0x03C7 χ # GREEK SMALL LETTER CHI
|
||||||
|
0x03C8 ψ # GREEK SMALL LETTER PSI
|
||||||
|
0x03C9 ω # GREEK SMALL LETTER OMEGA
|
||||||
|
0x03CA ϊ # GREEK SMALL LETTER IOTA WITH DIALYTIKA
|
||||||
|
0x03CB ϋ # GREEK SMALL LETTER UPSILON WITH DIALYTIKA
|
||||||
|
0x03CC ό # GREEK SMALL LETTER OMICRON WITH TONOS
|
||||||
|
0x03CE ώ # GREEK SMALL LETTER OMEGA WITH TONOS
|
||||||
|
0x03D1 ϑ # GREEK THETA SYMBOL
|
||||||
|
0x03D2 ϒ # GREEK UPSILON WITH HOOK SYMBOL
|
||||||
|
0x03D5 ϕ # GREEK PHI SYMBOL
|
||||||
|
0x03D6 ϖ # GREEK PI SYMBOL
|
||||||
|
0x03DC Ϝ # GREEK LETTER DIGAMMA
|
||||||
|
0x03F0 ϰ # GREEK KAPPA SYMBOL
|
||||||
|
0x03F1 ϱ # GREEK RHO SYMBOL
|
||||||
|
0x0401 Ё # CYRILLIC CAPITAL LETTER IO
|
||||||
|
0x0402 Ђ # CYRILLIC CAPITAL LETTER DJE
|
||||||
|
0x0403 Ѓ # CYRILLIC CAPITAL LETTER GJE
|
||||||
|
0x0404 Є # CYRILLIC CAPITAL LETTER UKRAINIAN IE
|
||||||
|
0x0405 Ѕ # CYRILLIC CAPITAL LETTER DZE
|
||||||
|
0x0406 І # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||||
|
0x0407 Ї # CYRILLIC CAPITAL LETTER YI
|
||||||
|
0x0408 Ј # CYRILLIC CAPITAL LETTER JE
|
||||||
|
0x0409 Љ # CYRILLIC CAPITAL LETTER LJE
|
||||||
|
0x040A Њ # CYRILLIC CAPITAL LETTER NJE
|
||||||
|
0x040B Ћ # CYRILLIC CAPITAL LETTER TSHE
|
||||||
|
0x040C Ќ # CYRILLIC CAPITAL LETTER KJE
|
||||||
|
0x040E Ў # CYRILLIC CAPITAL LETTER SHORT U
|
||||||
|
0x040F Џ # CYRILLIC CAPITAL LETTER DZHE
|
||||||
|
0x0410 А # CYRILLIC CAPITAL LETTER A
|
||||||
|
0x0411 Б # CYRILLIC CAPITAL LETTER BE
|
||||||
|
0x0412 В # CYRILLIC CAPITAL LETTER VE
|
||||||
|
0x0413 Г # CYRILLIC CAPITAL LETTER GHE
|
||||||
|
0x0414 Д # CYRILLIC CAPITAL LETTER DE
|
||||||
|
0x0415 Е # CYRILLIC CAPITAL LETTER IE
|
||||||
|
0x0416 Ж # CYRILLIC CAPITAL LETTER ZHE
|
||||||
|
0x0417 З # CYRILLIC CAPITAL LETTER ZE
|
||||||
|
0x0418 И # CYRILLIC CAPITAL LETTER I
|
||||||
|
0x0419 Й # CYRILLIC CAPITAL LETTER SHORT I
|
||||||
|
0x041A К # CYRILLIC CAPITAL LETTER KA
|
||||||
|
0x041B Л # CYRILLIC CAPITAL LETTER EL
|
||||||
|
0x041C М # CYRILLIC CAPITAL LETTER EM
|
||||||
|
0x041D Н # CYRILLIC CAPITAL LETTER EN
|
||||||
|
0x041E О # CYRILLIC CAPITAL LETTER O
|
||||||
|
0x041F П # CYRILLIC CAPITAL LETTER PE
|
||||||
|
0x0420 Р # CYRILLIC CAPITAL LETTER ER
|
||||||
|
0x0421 С # CYRILLIC CAPITAL LETTER ES
|
||||||
|
0x0422 Т # CYRILLIC CAPITAL LETTER TE
|
||||||
|
0x0423 У # CYRILLIC CAPITAL LETTER U
|
||||||
|
0x0424 Ф # CYRILLIC CAPITAL LETTER EF
|
||||||
|
0x0425 Х # CYRILLIC CAPITAL LETTER HA
|
||||||
|
0x0426 Ц # CYRILLIC CAPITAL LETTER TSE
|
||||||
|
0x0427 Ч # CYRILLIC CAPITAL LETTER CHE
|
||||||
|
0x0428 Ш # CYRILLIC CAPITAL LETTER SHA
|
||||||
|
0x0429 Щ # CYRILLIC CAPITAL LETTER SHCHA
|
||||||
|
0x042A Ъ # CYRILLIC CAPITAL LETTER HARD SIGN
|
||||||
|
0x042B Ы # CYRILLIC CAPITAL LETTER YERU
|
||||||
|
0x042C Ь # CYRILLIC CAPITAL LETTER SOFT SIGN
|
||||||
|
0x042D Э # CYRILLIC CAPITAL LETTER E
|
||||||
|
0x042E Ю # CYRILLIC CAPITAL LETTER YU
|
||||||
|
0x042F Я # CYRILLIC CAPITAL LETTER YA
|
||||||
|
0x0430 а # CYRILLIC SMALL LETTER A
|
||||||
|
0x0431 б # CYRILLIC SMALL LETTER BE
|
||||||
|
0x0432 в # CYRILLIC SMALL LETTER VE
|
||||||
|
0x0433 г # CYRILLIC SMALL LETTER GHE
|
||||||
|
0x0434 д # CYRILLIC SMALL LETTER DE
|
||||||
|
0x0435 е # CYRILLIC SMALL LETTER IE
|
||||||
|
0x0436 ж # CYRILLIC SMALL LETTER ZHE
|
||||||
|
0x0437 з # CYRILLIC SMALL LETTER ZE
|
||||||
|
0x0438 и # CYRILLIC SMALL LETTER I
|
||||||
|
0x0439 й # CYRILLIC SMALL LETTER SHORT I
|
||||||
|
0x043A к # CYRILLIC SMALL LETTER KA
|
||||||
|
0x043B л # CYRILLIC SMALL LETTER EL
|
||||||
|
0x043C м # CYRILLIC SMALL LETTER EM
|
||||||
|
0x043D н # CYRILLIC SMALL LETTER EN
|
||||||
|
0x043E о # CYRILLIC SMALL LETTER O
|
||||||
|
0x043F п # CYRILLIC SMALL LETTER PE
|
||||||
|
0x0440 р # CYRILLIC SMALL LETTER ER
|
||||||
|
0x0441 с # CYRILLIC SMALL LETTER ES
|
||||||
|
0x0442 т # CYRILLIC SMALL LETTER TE
|
||||||
|
0x0443 у # CYRILLIC SMALL LETTER U
|
||||||
|
0x0444 ф # CYRILLIC SMALL LETTER EF
|
||||||
|
0x0445 х # CYRILLIC SMALL LETTER HA
|
||||||
|
0x0446 ц # CYRILLIC SMALL LETTER TSE
|
||||||
|
0x0447 ч # CYRILLIC SMALL LETTER CHE
|
||||||
|
0x0448 ш # CYRILLIC SMALL LETTER SHA
|
||||||
|
0x0449 щ # CYRILLIC SMALL LETTER SHCHA
|
||||||
|
0x044A ъ # CYRILLIC SMALL LETTER HARD SIGN
|
||||||
|
0x044B ы # CYRILLIC SMALL LETTER YERU
|
||||||
|
0x044C ь # CYRILLIC SMALL LETTER SOFT SIGN
|
||||||
|
0x044D э # CYRILLIC SMALL LETTER E
|
||||||
|
0x044E ю # CYRILLIC SMALL LETTER YU
|
||||||
|
0x044F я # CYRILLIC SMALL LETTER YA
|
||||||
|
0x0451 ё # CYRILLIC SMALL LETTER IO
|
||||||
|
0x0452 ђ # CYRILLIC SMALL LETTER DJE
|
||||||
|
0x0453 ѓ # CYRILLIC SMALL LETTER GJE
|
||||||
|
0x0454 є # CYRILLIC SMALL LETTER UKRAINIAN IE
|
||||||
|
0x0455 ѕ # CYRILLIC SMALL LETTER DZE
|
||||||
|
0x0456 і # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||||
|
0x0457 ї # CYRILLIC SMALL LETTER YI
|
||||||
|
0x0458 ј # CYRILLIC SMALL LETTER JE
|
||||||
|
0x0459 љ # CYRILLIC SMALL LETTER LJE
|
||||||
|
0x045A њ # CYRILLIC SMALL LETTER NJE
|
||||||
|
0x045B ћ # CYRILLIC SMALL LETTER TSHE
|
||||||
|
0x045C ќ # CYRILLIC SMALL LETTER KJE
|
||||||
|
0x045E ў # CYRILLIC SMALL LETTER SHORT U
|
||||||
|
0x045F џ # CYRILLIC SMALL LETTER DZHE
|
||||||
|
0x2002   # EN SPACE
|
||||||
|
0x2003   # EM SPACE
|
||||||
|
0x2004   # THREE-PER-EM SPACE
|
||||||
|
0x2005   # FOUR-PER-EM SPACE
|
||||||
|
0x2007   # FIGURE SPACE
|
||||||
|
0x2008   # PUNCTUATION SPACE
|
||||||
|
0x2009   # THIN SPACE
|
||||||
|
0x200A   # HAIR SPACE
|
||||||
|
0x200C ‌ # ZERO WIDTH NON-JOINER
|
||||||
|
0x200D ‍ # ZERO WIDTH JOINER
|
||||||
|
0x200E ‎ # LEFT-TO-RIGHT MARK
|
||||||
|
0x200F ‏ # RIGHT-TO-LEFT MARK
|
||||||
|
0x2010 ‐ # HYPHEN
|
||||||
|
0x2013 – # EN DASH
|
||||||
|
0x2014 — # EM DASH
|
||||||
|
0x2015 ― # HORIZONTAL BAR
|
||||||
|
0x2016 ‖ # DOUBLE VERTICAL LINE
|
||||||
|
0x2018 ‘ # LEFT SINGLE QUOTATION MARK
|
||||||
|
0x2018 ‘ # LEFT SINGLE QUOTATION MARK
|
||||||
|
0x2019 ’ # RIGHT SINGLE QUOTATION MARK
|
||||||
|
0x201A ‚ # SINGLE LOW-9 QUOTATION MARK
|
||||||
|
0x201A ‚ # SINGLE LOW-9 QUOTATION MARK
|
||||||
|
0x201C “ # LEFT DOUBLE QUOTATION MARK
|
||||||
|
0x201C “ # LEFT DOUBLE QUOTATION MARK
|
||||||
|
0x201D ” # RIGHT DOUBLE QUOTATION MARK
|
||||||
|
0x201E „ # DOUBLE LOW-9 QUOTATION MARK
|
||||||
|
0x201E „ # DOUBLE LOW-9 QUOTATION MARK
|
||||||
|
0x2020 † # DAGGER
|
||||||
|
0x2021 ‡ # DOUBLE DAGGER
|
||||||
|
0x2022 • # BULLET
|
||||||
|
0x2025 ‥ # TWO DOT LEADER
|
||||||
|
0x2026 … # HORIZONTAL ELLIPSIS
|
||||||
|
0x2026 … # HORIZONTAL ELLIPSIS
|
||||||
|
0x2030 ‰ # PER MILLE SIGN
|
||||||
|
0x2032 ′ # PRIME
|
||||||
|
0x2032 ′ # PRIME
|
||||||
|
0x2033 ″ # DOUBLE PRIME
|
||||||
|
0x2034 ‴ # TRIPLE PRIME
|
||||||
|
0x2035 ‵ # REVERSED PRIME
|
||||||
|
0x2039 ‹ # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||||
|
0x203A › # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||||
|
0x203E ‾ # OVERLINE
|
||||||
|
0x2041 ⁁ # CARET INSERTION POINT
|
||||||
|
0x2043 ⁃ # HYPHEN BULLET
|
||||||
|
0x2044 ⁄ # FRACTION SLASH
|
||||||
|
0x20AC € # EURO SIGN
|
||||||
|
0x20DB ⃛ # COMBINING THREE DOTS ABOVE
|
||||||
|
0x20DC ⃜ # COMBINING FOUR DOTS ABOVE
|
||||||
|
0x2105 ℅ # CARE OF
|
||||||
|
0x210B ℋ # SCRIPT CAPITAL H
|
||||||
|
0x210F ℏ # PLANCK CONSTANT OVER TWO PI
|
||||||
|
0x2111 ℑ # BLACK-LETTER CAPITAL I
|
||||||
|
0x2112 ℒ # SCRIPT CAPITAL L
|
||||||
|
0x2113 ℓ # SCRIPT SMALL L
|
||||||
|
0x2116 № # NUMERO SIGN
|
||||||
|
0x2117 ℗ # SOUND RECORDING COPYRIGHT
|
||||||
|
0x2118 ℘ # SCRIPT CAPITAL P
|
||||||
|
0x211C ℜ # BLACK-LETTER CAPITAL R
|
||||||
|
0x211E ℞ # PRESCRIPTION TAKE
|
||||||
|
0x2122 ™ # TRADE MARK SIGN
|
||||||
|
0x2126 Ω # OHM SIGN
|
||||||
|
0x212B Å # ANGSTROM SIGN
|
||||||
|
0x212C ℬ # SCRIPT CAPITAL B
|
||||||
|
0x2133 ℳ # SCRIPT CAPITAL M
|
||||||
|
0x2134 ℴ # SCRIPT SMALL O
|
||||||
|
0x2135 ℵ # ALEF SYMBOL
|
||||||
|
0x2135 ℵ # ALEF SYMBOL
|
||||||
|
0x2136 ℶ # BET SYMBOL
|
||||||
|
0x2137 ℷ # GIMEL SYMBOL
|
||||||
|
0x2138 ℸ # DALET SYMBOL
|
||||||
|
0x2153 ⅓ # VULGAR FRACTION ONE THIRD
|
||||||
|
0x2154 ⅔ # VULGAR FRACTION TWO THIRDS
|
||||||
|
0x2155 ⅕ # VULGAR FRACTION ONE FIFTH
|
||||||
|
0x2156 ⅖ # VULGAR FRACTION TWO FIFTHS
|
||||||
|
0x2157 ⅗ # VULGAR FRACTION THREE FIFTHS
|
||||||
|
0x2158 ⅘ # VULGAR FRACTION FOUR FIFTHS
|
||||||
|
0x2159 ⅙ # VULGAR FRACTION ONE SIXTH
|
||||||
|
0x215A ⅚ # VULGAR FRACTION FIVE SIXTHS
|
||||||
|
0x215B ⅛ # VULGAR FRACTION ONE EIGHTH
|
||||||
|
0x215C ⅜ # VULGAR FRACTION THREE EIGHTHS
|
||||||
|
0x215D ⅝ # VULGAR FRACTION FIVE EIGHTHS
|
||||||
|
0x215E ⅞ # VULGAR FRACTION SEVEN EIGHTHS
|
||||||
|
0x2190 ← # LEFTWARDS ARROW
|
||||||
|
0x2191 ↑ # UPWARDS ARROW
|
||||||
|
0x2192 → # RIGHTWARDS ARROW
|
||||||
|
0x2193 ↓ # DOWNWARDS ARROW
|
||||||
|
0x2194 ↔ # LEFT RIGHT ARROW
|
||||||
|
0x2195 ↕ # UP DOWN ARROW
|
||||||
|
0x2196 ↖ # NORTH WEST ARROW
|
||||||
|
0x2197 ↗ # NORTH EAST ARROW
|
||||||
|
0x2198 ↘ # SOUTH EAST ARROW
|
||||||
|
0x2199 ↙ # SOUTH WEST ARROW
|
||||||
|
0x219A ↚ # LEFTWARDS ARROW WITH STROKE
|
||||||
|
0x219B ↛ # RIGHTWARDS ARROW WITH STROKE
|
||||||
|
0x219D ↝ # RIGHTWARDS WAVE ARROW
|
||||||
|
0x219E ↞ # LEFTWARDS TWO HEADED ARROW
|
||||||
|
0x21A0 ↠ # RIGHTWARDS TWO HEADED ARROW
|
||||||
|
0x21A2 ↢ # LEFTWARDS ARROW WITH TAIL
|
||||||
|
0x21A3 ↣ # RIGHTWARDS ARROW WITH TAIL
|
||||||
|
0x21A6 ↦ # RIGHTWARDS ARROW FROM BAR
|
||||||
|
0x21A9 ↩ # LEFTWARDS ARROW WITH HOOK
|
||||||
|
0x21AA ↪ # RIGHTWARDS ARROW WITH HOOK
|
||||||
|
0x21AB ↫ # LEFTWARDS ARROW WITH LOOP
|
||||||
|
0x21AC ↬ # RIGHTWARDS ARROW WITH LOOP
|
||||||
|
0x21AD ↭ # LEFT RIGHT WAVE ARROW
|
||||||
|
0x21AE ↮ # LEFT RIGHT ARROW WITH STROKE
|
||||||
|
0x21B0 ↰ # UPWARDS ARROW WITH TIP LEFTWARDS
|
||||||
|
0x21B1 ↱ # UPWARDS ARROW WITH TIP RIGHTWARDS
|
||||||
|
0x21B5 ↵ # DOWNWARDS ARROW WITH CORNER LEFTWARDS
|
||||||
|
0x21B6 ↶ # ANTICLOCKWISE TOP SEMICIRCLE ARROW
|
||||||
|
0x21B7 ↷ # CLOCKWISE TOP SEMICIRCLE ARROW
|
||||||
|
0x21BA ↺ # ANTICLOCKWISE OPEN CIRCLE ARROW
|
||||||
|
0x21BB ↻ # CLOCKWISE OPEN CIRCLE ARROW
|
||||||
|
0x21BC ↼ # LEFTWARDS HARPOON WITH BARB UPWARDS
|
||||||
|
0x21BD ↽ # LEFTWARDS HARPOON WITH BARB DOWNWARDS
|
||||||
|
0x21BE ↾ # UPWARDS HARPOON WITH BARB RIGHTWARDS
|
||||||
|
0x21BF ↿ # UPWARDS HARPOON WITH BARB LEFTWARDS
|
||||||
|
0x21C0 ⇀ # RIGHTWARDS HARPOON WITH BARB UPWARDS
|
||||||
|
0x21C1 ⇁ # RIGHTWARDS HARPOON WITH BARB DOWNWARDS
|
||||||
|
0x21C2 ⇂ # DOWNWARDS HARPOON WITH BARB RIGHTWARDS
|
||||||
|
0x21C3 ⇃ # DOWNWARDS HARPOON WITH BARB LEFTWARDS
|
||||||
|
0x21C4 ⇄ # RIGHTWARDS ARROW OVER LEFTWARDS ARROW
|
||||||
|
0x21C6 ⇆ # LEFTWARDS ARROW OVER RIGHTWARDS ARROW
|
||||||
|
0x21C7 ⇇ # LEFTWARDS PAIRED ARROWS
|
||||||
|
0x21C8 ⇈ # UPWARDS PAIRED ARROWS
|
||||||
|
0x21C9 ⇉ # RIGHTWARDS PAIRED ARROWS
|
||||||
|
0x21CA ⇊ # DOWNWARDS PAIRED ARROWS
|
||||||
|
0x21CB ⇋ # LEFTWARDS HARPOON OVER RIGHTWARDS HARPOON
|
||||||
|
0x21CC ⇌ # RIGHTWARDS HARPOON OVER LEFTWARDS HARPOON
|
||||||
|
0x21CD ⇍ # LEFTWARDS DOUBLE ARROW WITH STROKE
|
||||||
|
0x21CE ⇎ # LEFT RIGHT DOUBLE ARROW WITH STROKE
|
||||||
|
0x21CF ⇏ # RIGHTWARDS DOUBLE ARROW WITH STROKE
|
||||||
|
0x21D0 ⇐ # LEFTWARDS DOUBLE ARROW
|
||||||
|
0x21D1 ⇑ # UPWARDS DOUBLE ARROW
|
||||||
|
0x21D2 ⇒ # RIGHTWARDS DOUBLE ARROW
|
||||||
|
0x21D3 ⇓ # DOWNWARDS DOUBLE ARROW
|
||||||
|
0x21D4 ⇔ # LEFT RIGHT DOUBLE ARROW
|
||||||
|
0x21D5 ⇕ # UP DOWN DOUBLE ARROW
|
||||||
|
0x21DA ⇚ # LEFTWARDS TRIPLE ARROW
|
||||||
|
0x21DB ⇛ # RIGHTWARDS TRIPLE ARROW
|
||||||
|
0x2200 ∀ # FOR ALL
|
||||||
|
0x2201 ∁ # COMPLEMENT
|
||||||
|
0x2202 ∂ # PARTIAL DIFFERENTIAL
|
||||||
|
0x2203 ∃ # THERE EXISTS
|
||||||
|
0x2204 ∄ # THERE DOES NOT EXIST
|
||||||
|
0x2205 ∅ # EMPTY SET
|
||||||
|
0x2207 ∇ # NABLA
|
||||||
|
0x2208 ∈ # ELEMENT OF
|
||||||
|
0x2209 ∉ # NOT AN ELEMENT OF
|
||||||
|
0x220A ∊ # SMALL ELEMENT OF
|
||||||
|
0x220B ∋ # CONTAINS AS MEMBER
|
||||||
|
0x220D ∍ # SMALL CONTAINS AS MEMBER
|
||||||
|
0x220F ∏ # N-ARY PRODUCT
|
||||||
|
0x2210 ∐ # N-ARY COPRODUCT
|
||||||
|
0x2211 ∑ # N-ARY SUMMATION
|
||||||
|
0x2212 − # MINUS SIGN
|
||||||
|
0x2213 ∓ # MINUS-OR-PLUS SIGN
|
||||||
|
0x2214 ∔ # DOT PLUS
|
||||||
|
0x2216 ∖ # SET MINUS
|
||||||
|
0x2217 ∗ # ASTERISK OPERATOR
|
||||||
|
0x2218 ∘ # RING OPERATOR
|
||||||
|
0x221A √ # SQUARE ROOT
|
||||||
|
0x221D ∝ # PROPORTIONAL TO
|
||||||
|
0x221E ∞ # INFINITY
|
||||||
|
0x221F ∟ # RIGHT ANGLE
|
||||||
|
0x2220 ∠ # ANGLE
|
||||||
|
0x2221 ∡ # MEASURED ANGLE
|
||||||
|
0x2222 ∢ # SPHERICAL ANGLE
|
||||||
|
0x2223 ∣ # DIVIDES
|
||||||
|
0x2224 ∤ # DOES NOT DIVIDE
|
||||||
|
0x2225 ∥ # PARALLEL TO
|
||||||
|
0x2226 ∦ # NOT PARALLEL TO
|
||||||
|
0x2227 ∧ # LOGICAL AND
|
||||||
|
0x2228 ∨ # LOGICAL OR
|
||||||
|
0x2229 ∩ # INTERSECTION
|
||||||
|
0x222A ∪ # UNION
|
||||||
|
0x222B ∫ # INTEGRAL
|
||||||
|
0x222E ∮ # CONTOUR INTEGRAL
|
||||||
|
0x2234 ∴ # THEREFORE
|
||||||
|
0x2235 ∵ # BECAUSE
|
||||||
|
0x223C ∼ # TILDE OPERATOR
|
||||||
|
0x223D ∽ # REVERSED TILDE
|
||||||
|
0x2240 ≀ # WREATH PRODUCT
|
||||||
|
0x2241 ≁ # NOT TILDE
|
||||||
|
0x2243 ≃ # ASYMPTOTICALLY EQUAL TO
|
||||||
|
0x2244 ≄ # NOT ASYMPTOTICALLY EQUAL TO
|
||||||
|
0x2245 ≅ # APPROXIMATELY EQUAL TO
|
||||||
|
0x2247 ≇ # NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO
|
||||||
|
0x2248 ≈ # ALMOST EQUAL TO
|
||||||
|
0x2249 ≉ # NOT ALMOST EQUAL TO
|
||||||
|
0x224A ≊ # ALMOST EQUAL OR EQUAL TO
|
||||||
|
0x224C ≌ # ALL EQUAL TO
|
||||||
|
0x224E ≎ # GEOMETRICALLY EQUIVALENT TO
|
||||||
|
0x224F ≏ # DIFFERENCE BETWEEN
|
||||||
|
0x2250 ≐ # APPROACHES THE LIMIT
|
||||||
|
0x2251 ≑ # GEOMETRICALLY EQUAL TO
|
||||||
|
0x2252 ≒ # APPROXIMATELY EQUAL TO OR THE IMAGE OF
|
||||||
|
0x2253 ≓ # IMAGE OF OR APPROXIMATELY EQUAL TO
|
||||||
|
0x2254 ≔ # COLON EQUALS
|
||||||
|
0x2255 ≕ # EQUALS COLON
|
||||||
|
0x2256 ≖ # RING IN EQUAL TO
|
||||||
|
0x2257 ≗ # RING EQUAL TO
|
||||||
|
0x2259 ≙ # ESTIMATES
|
||||||
|
0x225C ≜ # DELTA EQUAL TO
|
||||||
|
0x2260 ≠ # NOT EQUAL TO
|
||||||
|
0x2261 ≡ # IDENTICAL TO
|
||||||
|
0x2262 ≢ # NOT IDENTICAL TO
|
||||||
|
0x2264 ≤ # LESS-THAN OR EQUAL TO
|
||||||
|
0x2265 ≥ # GREATER-THAN OR EQUAL TO
|
||||||
|
0x2266 ≦ # LESS-THAN OVER EQUAL TO
|
||||||
|
0x2267 ≧ # GREATER-THAN OVER EQUAL TO
|
||||||
|
0x2268 ≨ # LESS-THAN BUT NOT EQUAL TO
|
||||||
|
0x2269 ≩ # GREATER-THAN BUT NOT EQUAL TO
|
||||||
|
0x226A ≪ # MUCH LESS-THAN
|
||||||
|
0x226B ≫ # MUCH GREATER-THAN
|
||||||
|
0x226C ≬ # BETWEEN
|
||||||
|
0x226E ≮ # NOT LESS-THAN
|
||||||
|
0x226F ≯ # NOT GREATER-THAN
|
||||||
|
0x2270 ≰ # NEITHER LESS-THAN NOR EQUAL TO
|
||||||
|
0x2271 ≱ # NEITHER GREATER-THAN NOR EQUAL TO
|
||||||
|
0x2272 ≲ # LESS-THAN OR EQUIVALENT TO
|
||||||
|
0x2273 ≳ # GREATER-THAN OR EQUIVALENT TO
|
||||||
|
0x2276 ≶ # LESS-THAN OR GREATER-THAN
|
||||||
|
0x2277 ≷ # GREATER-THAN OR LESS-THAN
|
||||||
|
0x227A ≺ # PRECEDES
|
||||||
|
0x227B ≻ # SUCCEEDS
|
||||||
|
0x227C ≼ # PRECEDES OR EQUAL TO
|
||||||
|
0x227D ≽ # SUCCEEDS OR EQUAL TO
|
||||||
|
0x227E ≾ # PRECEDES OR EQUIVALENT TO
|
||||||
|
0x227F ≿ # SUCCEEDS OR EQUIVALENT TO
|
||||||
|
0x2280 ⊀ # DOES NOT PRECEDE
|
||||||
|
0x2281 ⊁ # DOES NOT SUCCEED
|
||||||
|
0x2282 ⊂ # SUBSET OF
|
||||||
|
0x2283 ⊃ # SUPERSET OF
|
||||||
|
0x2284 ⊄ # NOT A SUBSET OF
|
||||||
|
0x2285 ⊅ # NOT A SUPERSET OF
|
||||||
|
0x2286 ⊆ # SUBSET OF OR EQUAL TO
|
||||||
|
0x2287 ⊇ # SUPERSET OF OR EQUAL TO
|
||||||
|
0x2288 ⊈ # NEITHER A SUBSET OF NOR EQUAL TO
|
||||||
|
0x2289 ⊉ # NEITHER A SUPERSET OF NOR EQUAL TO
|
||||||
|
0x228A ⊊ # SUBSET OF WITH NOT EQUAL TO
|
||||||
|
0x228B ⊋ # SUPERSET OF WITH NOT EQUAL TO
|
||||||
|
0x228E ⊎ # MULTISET UNION
|
||||||
|
0x228F ⊏ # SQUARE IMAGE OF
|
||||||
|
0x2290 ⊐ # SQUARE ORIGINAL OF
|
||||||
|
0x2291 ⊑ # SQUARE IMAGE OF OR EQUAL TO
|
||||||
|
0x2292 ⊒ # SQUARE ORIGINAL OF OR EQUAL TO
|
||||||
|
0x2293 ⊓ # SQUARE CAP
|
||||||
|
0x2294 ⊔ # SQUARE CUP
|
||||||
|
0x2295 ⊕ # CIRCLED PLUS
|
||||||
|
0x2296 ⊖ # CIRCLED MINUS
|
||||||
|
0x2297 ⊗ # CIRCLED TIMES
|
||||||
|
0x2298 ⊘ # CIRCLED DIVISION SLASH
|
||||||
|
0x2299 ⊙ # CIRCLED DOT OPERATOR
|
||||||
|
0x229A ⊚ # CIRCLED RING OPERATOR
|
||||||
|
0x229B ⊛ # CIRCLED ASTERISK OPERATOR
|
||||||
|
0x229D ⊝ # CIRCLED DASH
|
||||||
|
0x229E ⊞ # SQUARED PLUS
|
||||||
|
0x229F ⊟ # SQUARED MINUS
|
||||||
|
0x22A0 ⊠ # SQUARED TIMES
|
||||||
|
0x22A1 ⊡ # SQUARED DOT OPERATOR
|
||||||
|
0x22A2 ⊢ # RIGHT TACK
|
||||||
|
0x22A3 ⊣ # LEFT TACK
|
||||||
|
0x22A4 ⊤ # DOWN TACK
|
||||||
|
0x22A5 ⊥ # UP TACK
|
||||||
|
0x22A7 ⊧ # MODELS
|
||||||
|
0x22A8 ⊨ # TRUE
|
||||||
|
0x22A9 ⊩ # FORCES
|
||||||
|
0x22AA ⊪ # TRIPLE VERTICAL BAR RIGHT TURNSTILE
|
||||||
|
0x22AC ⊬ # DOES NOT PROVE
|
||||||
|
0x22AD ⊭ # NOT TRUE
|
||||||
|
0x22AE ⊮ # DOES NOT FORCE
|
||||||
|
0x22AF ⊯ # NEGATED DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE
|
||||||
|
0x22B2 ⊲ # NORMAL SUBGROUP OF
|
||||||
|
0x22B3 ⊳ # CONTAINS AS NORMAL SUBGROUP
|
||||||
|
0x22B4 ⊴ # NORMAL SUBGROUP OF OR EQUAL TO
|
||||||
|
0x22B5 ⊵ # CONTAINS AS NORMAL SUBGROUP OR EQUAL TO
|
||||||
|
0x22B8 ⊸ # MULTIMAP
|
||||||
|
0x22BA ⊺ # INTERCALATE
|
||||||
|
0x22BB ⊻ # XOR
|
||||||
|
0x22BC ⊼ # NAND
|
||||||
|
0x22C4 ⋄ # DIAMOND OPERATOR
|
||||||
|
0x22C5 ⋅ # DOT OPERATOR
|
||||||
|
0x22C6 ⋆ # STAR OPERATOR
|
||||||
|
0x22C7 ⋇ # DIVISION TIMES
|
||||||
|
0x22C8 ⋈ # BOWTIE
|
||||||
|
0x22C9 ⋉ # LEFT NORMAL FACTOR SEMIDIRECT PRODUCT
|
||||||
|
0x22CA ⋊ # RIGHT NORMAL FACTOR SEMIDIRECT PRODUCT
|
||||||
|
0x22CB ⋋ # LEFT SEMIDIRECT PRODUCT
|
||||||
|
0x22CC ⋌ # RIGHT SEMIDIRECT PRODUCT
|
||||||
|
0x22CD ⋍ # REVERSED TILDE EQUALS
|
||||||
|
0x22CE ⋎ # CURLY LOGICAL OR
|
||||||
|
0x22CF ⋏ # CURLY LOGICAL AND
|
||||||
|
0x22D0 ⋐ # DOUBLE SUBSET
|
||||||
|
0x22D1 ⋑ # DOUBLE SUPERSET
|
||||||
|
0x22D2 ⋒ # DOUBLE INTERSECTION
|
||||||
|
0x22D3 ⋓ # DOUBLE UNION
|
||||||
|
0x22D4 ⋔ # PITCHFORK
|
||||||
|
0x22D6 ⋖ # LESS-THAN WITH DOT
|
||||||
|
0x22D7 ⋗ # GREATER-THAN WITH DOT
|
||||||
|
0x22D8 ⋘ # VERY MUCH LESS-THAN
|
||||||
|
0x22D9 ⋙ # VERY MUCH GREATER-THAN
|
||||||
|
0x22DA ⋚ # LESS-THAN EQUAL TO OR GREATER-THAN
|
||||||
|
0x22DB ⋛ # GREATER-THAN EQUAL TO OR LESS-THAN
|
||||||
|
0x22DC ⋜ # EQUAL TO OR LESS-THAN
|
||||||
|
0x22DD ⋝ # EQUAL TO OR GREATER-THAN
|
||||||
|
0x22DE ⋞ # EQUAL TO OR PRECEDES
|
||||||
|
0x22DF ⋟ # EQUAL TO OR SUCCEEDS
|
||||||
|
0x22E0 ⋠ # DOES NOT PRECEDE OR EQUAL
|
||||||
|
0x22E1 ⋡ # DOES NOT SUCCEED OR EQUAL
|
||||||
|
0x22E6 ⋦ # LESS-THAN BUT NOT EQUIVALENT TO
|
||||||
|
0x22E7 ⋧ # GREATER-THAN BUT NOT EQUIVALENT TO
|
||||||
|
0x22E8 ⋨ # PRECEDES BUT NOT EQUIVALENT TO
|
||||||
|
0x22E9 ⋩ # SUCCEEDS BUT NOT EQUIVALENT TO
|
||||||
|
0x22EA ⋪ # NOT NORMAL SUBGROUP OF
|
||||||
|
0x22EB ⋫ # DOES NOT CONTAIN AS NORMAL SUBGROUP
|
||||||
|
0x22EC ⋬ # NOT NORMAL SUBGROUP OF OR EQUAL TO
|
||||||
|
0x22ED ⋭ # DOES NOT CONTAIN AS NORMAL SUBGROUP OR EQUAL
|
||||||
|
0x22EE ⋮ # VERTICAL ELLIPSIS
|
||||||
|
0x2306 ⌆ # PERSPECTIVE
|
||||||
|
0x2308 ⌈ # LEFT CEILING
|
||||||
|
0x2309 ⌉ # RIGHT CEILING
|
||||||
|
0x230A ⌊ # LEFT FLOOR
|
||||||
|
0x230B ⌋ # RIGHT FLOOR
|
||||||
|
0x230C ⌌ # BOTTOM RIGHT CROP
|
||||||
|
0x230D ⌍ # BOTTOM LEFT CROP
|
||||||
|
0x230E ⌎ # TOP RIGHT CROP
|
||||||
|
0x230F ⌏ # TOP LEFT CROP
|
||||||
|
0x2315 ⌕ # TELEPHONE RECORDER
|
||||||
|
0x2316 ⌖ # POSITION INDICATOR
|
||||||
|
0x231C ⌜ # TOP LEFT CORNER
|
||||||
|
0x231D ⌝ # TOP RIGHT CORNER
|
||||||
|
0x231E ⌞ # BOTTOM LEFT CORNER
|
||||||
|
0x231F ⌟ # BOTTOM RIGHT CORNER
|
||||||
|
0x2322 ⌢ # FROWN
|
||||||
|
0x2323 ⌣ # SMILE
|
||||||
|
0x2329 〈 # LEFT-POINTING ANGLE BRACKET
|
||||||
|
0x232A 〉 # RIGHT-POINTING ANGLE BRACKET
|
||||||
|
0x2423 ␣ # OPEN BOX
|
||||||
|
0x24C8 Ⓢ # CIRCLED LATIN CAPITAL LETTER S
|
||||||
|
0x2500 ─ # BOX DRAWINGS LIGHT HORIZONTAL
|
||||||
|
0x2502 │ # BOX DRAWINGS LIGHT VERTICAL
|
||||||
|
0x250C ┌ # BOX DRAWINGS LIGHT DOWN AND RIGHT
|
||||||
|
0x2510 ┐ # BOX DRAWINGS LIGHT DOWN AND LEFT
|
||||||
|
0x2514 └ # BOX DRAWINGS LIGHT UP AND RIGHT
|
||||||
|
0x2518 ┘ # BOX DRAWINGS LIGHT UP AND LEFT
|
||||||
|
0x251C ├ # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
|
||||||
|
0x2524 ┤ # BOX DRAWINGS LIGHT VERTICAL AND LEFT
|
||||||
|
0x252C ┬ # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
|
||||||
|
0x2534 ┴ # BOX DRAWINGS LIGHT UP AND HORIZONTAL
|
||||||
|
0x253C ┼ # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
|
||||||
|
0x2550 ═ # BOX DRAWINGS DOUBLE HORIZONTAL
|
||||||
|
0x2551 ║ # BOX DRAWINGS DOUBLE VERTICAL
|
||||||
|
0x2552 ╒ # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
|
||||||
|
0x2553 ╓ # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
|
||||||
|
0x2554 ╔ # BOX DRAWINGS DOUBLE DOWN AND RIGHT
|
||||||
|
0x2555 ╕ # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
|
||||||
|
0x2556 ╖ # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
|
||||||
|
0x2557 ╗ # BOX DRAWINGS DOUBLE DOWN AND LEFT
|
||||||
|
0x2558 ╘ # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
|
||||||
|
0x2559 ╙ # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
|
||||||
|
0x255A ╚ # BOX DRAWINGS DOUBLE UP AND RIGHT
|
||||||
|
0x255B ╛ # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
|
||||||
|
0x255C ╜ # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
|
||||||
|
0x255D ╝ # BOX DRAWINGS DOUBLE UP AND LEFT
|
||||||
|
0x255E ╞ # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
|
||||||
|
0x255F ╟ # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
|
||||||
|
0x2560 ╠ # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
|
||||||
|
0x2561 ╡ # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
|
||||||
|
0x2562 ╢ # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
|
||||||
|
0x2563 ╣ # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
|
||||||
|
0x2564 ╤ # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
|
||||||
|
0x2565 ╥ # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
|
||||||
|
0x2566 ╦ # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
|
||||||
|
0x2567 ╧ # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
|
||||||
|
0x2568 ╨ # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
|
||||||
|
0x2569 ╩ # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
|
||||||
|
0x256A ╪ # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
|
||||||
|
0x256B ╫ # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
|
||||||
|
0x256C ╬ # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
|
||||||
|
0x2580 ▀ # UPPER HALF BLOCK
|
||||||
|
0x2584 ▄ # LOWER HALF BLOCK
|
||||||
|
0x2588 █ # FULL BLOCK
|
||||||
|
0x2591 ░ # LIGHT SHADE
|
||||||
|
0x2592 ▒ # MEDIUM SHADE
|
||||||
|
0x2593 ▓ # DARK SHADE
|
||||||
|
0x25A1 □ # WHITE SQUARE
|
||||||
|
0x25AA ▪ # BLACK SMALL SQUARE
|
||||||
|
0x25AD ▭ # WHITE RECTANGLE
|
||||||
|
0x25AE ▮ # BLACK VERTICAL RECTANGLE
|
||||||
|
0x25B3 △ # WHITE UP-POINTING TRIANGLE
|
||||||
|
0x25B4 ▴ # BLACK UP-POINTING SMALL TRIANGLE
|
||||||
|
0x25B5 ▵ # WHITE UP-POINTING SMALL TRIANGLE
|
||||||
|
0x25B8 ▸ # BLACK RIGHT-POINTING SMALL TRIANGLE
|
||||||
|
0x25B9 ▹ # WHITE RIGHT-POINTING SMALL TRIANGLE
|
||||||
|
0x25BD ▽ # WHITE DOWN-POINTING TRIANGLE
|
||||||
|
0x25BE ▾ # BLACK DOWN-POINTING SMALL TRIANGLE
|
||||||
|
0x25BF ▿ # WHITE DOWN-POINTING SMALL TRIANGLE
|
||||||
|
0x25C2 ◂ # BLACK LEFT-POINTING SMALL TRIANGLE
|
||||||
|
0x25C3 ◃ # WHITE LEFT-POINTING SMALL TRIANGLE
|
||||||
|
0x25CA ◊ # LOZENGE
|
||||||
|
0x25CB ○ # WHITE CIRCLE
|
||||||
|
0x2605 ★ # BLACK STAR
|
||||||
|
0x2606 ☆ # WHITE STAR
|
||||||
|
0x260E ☎ # BLACK TELEPHONE
|
||||||
|
0x2640 ♀ # FEMALE SIGN
|
||||||
|
0x2642 ♂ # MALE SIGN
|
||||||
|
0x2660 ♠ # BLACK SPADE SUIT
|
||||||
|
0x2663 ♣ # BLACK CLUB SUIT
|
||||||
|
0x2665 ♥ # BLACK HEART SUIT
|
||||||
|
0x2666 ♦ # BLACK DIAMOND SUIT
|
||||||
|
0x266A ♪ # EIGHTH NOTE
|
||||||
|
0x266D ♭ # MUSIC FLAT SIGN
|
||||||
|
0x266E ♮ # MUSIC NATURAL SIGN
|
||||||
|
0x266F ♯ # MUSIC SHARP SIGN
|
||||||
|
0x2713 ✓ # CHECK MARK
|
||||||
|
0x2717 ✗ # BALLOT X
|
||||||
|
0x2720 ✠ # MALTESE CROSS
|
||||||
|
0x2726 ✦ # BLACK FOUR POINTED STAR
|
||||||
|
0x2727 ✧ # WHITE FOUR POINTED STAR
|
||||||
|
0x2736 ✶ # SIX POINTED BLACK STAR
|
||||||
|
0xFB00 ff # LATIN SMALL LIGATURE FF
|
||||||
|
0xFB01 fi # LATIN SMALL LIGATURE FI
|
||||||
|
0xFB02 fl # LATIN SMALL LIGATURE FL
|
||||||
|
0xFB03 ffi # LATIN SMALL LIGATURE FFI
|
||||||
|
0xFB04 ffl # LATIN SMALL LIGATURE FFL
|
||||||
|
|
||||||
|
|
||||||
|
</PRE>
|
||||||
|
</BODY>
|
||||||
|
</HTML>
|
900
tests/lynx-dump/data/unicode.html.exp
Normal file
900
tests/lynx-dump/data/unicode.html.exp
Normal file
@ -0,0 +1,900 @@
|
|||||||
|
|
||||||
|
This table prepared from SGML.TXT available at ftp.unicode.org
|
||||||
|
|
||||||
|
ftp://ftp.unicode.org/MAPPINGS/VENDORS/MISC/SGML.TXT
|
||||||
|
(if doing ftp, try cd Public/MAPPINGS/VENDORS/MISC)
|
||||||
|
|
||||||
|
|
||||||
|
original comment:
|
||||||
|
|
||||||
|
# Author: John Cowan <cowan@ccil.org>
|
||||||
|
# Date: 25 July 1997
|
||||||
|
#
|
||||||
|
# The following table maps SGML character entities from various
|
||||||
|
# public sets (namely, ISOamsa, ISOamsb, ISOamsc, ISOamsn, ISOamso,
|
||||||
|
# ISOamsr, ISObox, ISOcyr1, ISOcyr2, ISOdia, ISOgrk1, ISOgrk2,
|
||||||
|
# ISOgrk3, ISOgrk4, ISOlat1, ISOlat2, ISOnum, ISOpub, ISOtech,
|
||||||
|
# HTMLspecial, HTMLsymbol) to corresponding Unicode characters.
|
||||||
|
#
|
||||||
|
# The table has four tab-separated columns:
|
||||||
|
# Column 1: SGML character entity name
|
||||||
|
# Column 2: SGML public entity set
|
||||||
|
# Column 3: Unicode 2.0 character code
|
||||||
|
# Column 4: Unicode 2.0 character name (UPPER CASE)
|
||||||
|
# Entries which don't have Unicode equivalents have "0x????"
|
||||||
|
# in Column 3 and a lower case description (from the public entity
|
||||||
|
# set DTD) in Column 4. The mapping is not reversible, because many
|
||||||
|
# distinctions are unified away in Unicode, particularly between
|
||||||
|
# mathematical symbols.
|
||||||
|
#
|
||||||
|
# The table is sorted case-blind by SGML character entity name.
|
||||||
|
#
|
||||||
|
# The contents of this table are drawn from various sources, and
|
||||||
|
# are in the public domain.
|
||||||
|
#
|
||||||
|
|
||||||
|
|
||||||
|
This test is illuminated Unicode numeric entities like ⊫
|
||||||
|
We sort the entities according to unicode numbers.
|
||||||
|
You should see visible characters if your display character set support them
|
||||||
|
or some substitution string picked up from src/chrtrans/def7_uni.tbl
|
||||||
|
|
||||||
|
If you see something like 㓒 - this number unknown to def7_uni.tbl
|
||||||
|
or the internal browser's implementation is broken.
|
||||||
|
Leonid Pauzner.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
0x0021 ! # EXCLAMATION MARK
|
||||||
|
0x0022 " # QUOTATION MARK
|
||||||
|
0x0023 # # NUMBER SIGN
|
||||||
|
0x0024 $ # DOLLAR SIGN
|
||||||
|
0x0025 % # PERCENT SIGN
|
||||||
|
0x0026 & # AMPERSAND
|
||||||
|
0x0028 ( # LEFT PARENTHESIS
|
||||||
|
0x0029 ) # RIGHT PARENTHESIS
|
||||||
|
0x002A * # ASTERISK
|
||||||
|
0x002B + # PLUS SIGN
|
||||||
|
0x002C , # COMMA
|
||||||
|
0x002D - # HYPHEN-MINUS
|
||||||
|
0x002E . # FULL STOP
|
||||||
|
0x002F / # SOLIDUS
|
||||||
|
0x003A : # COLON
|
||||||
|
0x003B ; # SEMICOLON
|
||||||
|
0x003C < # LESS-THAN SIGN
|
||||||
|
0x003D = # EQUALS SIGN
|
||||||
|
0x003E > # GREATER-THAN SIGN
|
||||||
|
0x003F ? # QUESTION MARK
|
||||||
|
0x0040 @ # COMMERCIAL AT
|
||||||
|
0x005B [ # LEFT SQUARE BRACKET
|
||||||
|
0x005C \ # REVERSE SOLIDUS
|
||||||
|
0x005C \ # REVERSE SOLIDUS
|
||||||
|
0x005D ] # RIGHT SQUARE BRACKET
|
||||||
|
0x005F _ # LOW LINE
|
||||||
|
0x0060 ` # GRAVE ACCENT
|
||||||
|
0x007B { # LEFT CURLY BRACKET
|
||||||
|
0x007C | # VERTICAL LINE
|
||||||
|
0x007D } # RIGHT CURLY BRACKET
|
||||||
|
0x00A0 # NO-BREAK SPACE
|
||||||
|
0x00A1 ¡ # INVERTED EXCLAMATION MARK
|
||||||
|
0x00A2 ¢ # CENT SIGN
|
||||||
|
0x00A3 £ # POUND SIGN
|
||||||
|
0x00A4 ¤ # CURRENCY SIGN
|
||||||
|
0x00A5 ¥ # YEN SIGN
|
||||||
|
0x00A6 ¦ # BROKEN BAR
|
||||||
|
0x00A7 § # SECTION SIGN
|
||||||
|
0x00A8 ¨ # DIAERESIS
|
||||||
|
0x00A9 © # COPYRIGHT SIGN
|
||||||
|
0x00AA ª # FEMININE ORDINAL INDICATOR
|
||||||
|
0x00AB « # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||||
|
0x00AC ¬ # NOT SIGN
|
||||||
|
0x00AD # SOFT HYPHEN
|
||||||
|
0x00AE ® # REGISTERED SIGN
|
||||||
|
0x00AF ¯ # MACRON
|
||||||
|
0x00B0 ° # DEGREE SIGN
|
||||||
|
0x00B1 ± # PLUS-MINUS SIGN
|
||||||
|
0x00B2 ² # SUPERSCRIPT TWO
|
||||||
|
0x00B3 ³ # SUPERSCRIPT THREE
|
||||||
|
0x00B4 ´ # ACUTE ACCENT
|
||||||
|
0x00B5 µ # MICRO SIGN
|
||||||
|
0x00B6 ¶ # PILCROW SIGN
|
||||||
|
0x00B7 · # MIDDLE DOT
|
||||||
|
0x00B8 ¸ # CEDILLA
|
||||||
|
0x00B9 ¹ # SUPERSCRIPT ONE
|
||||||
|
0x00BA º # MASCULINE ORDINAL INDICATOR
|
||||||
|
0x00BB » # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||||
|
0x00BC ¼ # VULGAR FRACTION ONE QUARTER
|
||||||
|
0x00BD ½ # VULGAR FRACTION ONE HALF
|
||||||
|
0x00BE ¾ # VULGAR FRACTION THREE QUARTERS
|
||||||
|
0x00BF ¿ # INVERTED QUESTION MARK
|
||||||
|
0x00C0 À # LATIN CAPITAL LETTER A WITH GRAVE
|
||||||
|
0x00C1 Á # LATIN CAPITAL LETTER A WITH ACUTE
|
||||||
|
0x00C2 Â # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||||
|
0x00C3 Ã # LATIN CAPITAL LETTER A WITH TILDE
|
||||||
|
0x00C4 Ä # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||||
|
0x00C5 Å # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||||
|
0x00C6 Æ # LATIN CAPITAL LETTER AE
|
||||||
|
0x00C7 Ç # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||||
|
0x00C8 È # LATIN CAPITAL LETTER E WITH GRAVE
|
||||||
|
0x00C9 É # LATIN CAPITAL LETTER E WITH ACUTE
|
||||||
|
0x00CA Ê # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||||
|
0x00CB Ë # LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||||
|
0x00CC Ì # LATIN CAPITAL LETTER I WITH GRAVE
|
||||||
|
0x00CD Í # LATIN CAPITAL LETTER I WITH ACUTE
|
||||||
|
0x00CE Î # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||||
|
0x00CF Ï # LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||||
|
0x00D0 Ð # LATIN CAPITAL LETTER ETH
|
||||||
|
0x00D1 Ñ # LATIN CAPITAL LETTER N WITH TILDE
|
||||||
|
0x00D2 Ò # LATIN CAPITAL LETTER O WITH GRAVE
|
||||||
|
0x00D3 Ó # LATIN CAPITAL LETTER O WITH ACUTE
|
||||||
|
0x00D4 Ô # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||||
|
0x00D5 Õ # LATIN CAPITAL LETTER O WITH TILDE
|
||||||
|
0x00D6 Ö # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||||
|
0x00D7 × # MULTIPLICATION SIGN
|
||||||
|
0x00D8 Ø # LATIN CAPITAL LETTER O WITH STROKE
|
||||||
|
0x00D9 Ù # LATIN CAPITAL LETTER U WITH GRAVE
|
||||||
|
0x00DA Ú # LATIN CAPITAL LETTER U WITH ACUTE
|
||||||
|
0x00DB Û # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||||
|
0x00DC Ü # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||||
|
0x00DD Ý # LATIN CAPITAL LETTER Y WITH ACUTE
|
||||||
|
0x00DE Þ # LATIN CAPITAL LETTER THORN
|
||||||
|
0x00DF ß # LATIN SMALL LETTER SHARP S
|
||||||
|
0x00E0 à # LATIN SMALL LETTER A WITH GRAVE
|
||||||
|
0x00E1 á # LATIN SMALL LETTER A WITH ACUTE
|
||||||
|
0x00E2 â # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||||
|
0x00E3 ã # LATIN SMALL LETTER A WITH TILDE
|
||||||
|
0x00E4 ä # LATIN SMALL LETTER A WITH DIAERESIS
|
||||||
|
0x00E5 å # LATIN SMALL LETTER A WITH RING ABOVE
|
||||||
|
0x00E6 æ # LATIN SMALL LETTER AE
|
||||||
|
0x00E7 ç # LATIN SMALL LETTER C WITH CEDILLA
|
||||||
|
0x00E8 è # LATIN SMALL LETTER E WITH GRAVE
|
||||||
|
0x00E9 é # LATIN SMALL LETTER E WITH ACUTE
|
||||||
|
0x00EA ê # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||||
|
0x00EB ë # LATIN SMALL LETTER E WITH DIAERESIS
|
||||||
|
0x00EC ì # LATIN SMALL LETTER I WITH GRAVE
|
||||||
|
0x00ED í # LATIN SMALL LETTER I WITH ACUTE
|
||||||
|
0x00EE î # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||||
|
0x00EF ï # LATIN SMALL LETTER I WITH DIAERESIS
|
||||||
|
0x00F0 ð # LATIN SMALL LETTER ETH
|
||||||
|
0x00F1 ñ # LATIN SMALL LETTER N WITH TILDE
|
||||||
|
0x00F2 ò # LATIN SMALL LETTER O WITH GRAVE
|
||||||
|
0x00F3 ó # LATIN SMALL LETTER O WITH ACUTE
|
||||||
|
0x00F4 ô # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||||
|
0x00F5 õ # LATIN SMALL LETTER O WITH TILDE
|
||||||
|
0x00F6 ö # LATIN SMALL LETTER O WITH DIAERESIS
|
||||||
|
0x00F7 ÷ # DIVISION SIGN
|
||||||
|
0x00F8 ø # LATIN SMALL LETTER O WITH STROKE
|
||||||
|
0x00F9 ù # LATIN SMALL LETTER U WITH GRAVE
|
||||||
|
0x00FA ú # LATIN SMALL LETTER U WITH ACUTE
|
||||||
|
0x00FB û # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||||
|
0x00FC ü # LATIN SMALL LETTER U WITH DIAERESIS
|
||||||
|
0x00FD ý # LATIN SMALL LETTER Y WITH ACUTE
|
||||||
|
0x00FE þ # LATIN SMALL LETTER THORN
|
||||||
|
0x00FF ÿ # LATIN SMALL LETTER Y WITH DIAERESIS
|
||||||
|
0x0100 Ā # LATIN CAPITAL LETTER A WITH MACRON
|
||||||
|
0x0101 ā # LATIN SMALL LETTER A WITH MACRON
|
||||||
|
0x0102 Ă # LATIN CAPITAL LETTER A WITH BREVE
|
||||||
|
0x0103 ă # LATIN SMALL LETTER A WITH BREVE
|
||||||
|
0x0104 Ą # LATIN CAPITAL LETTER A WITH OGONEK
|
||||||
|
0x0105 ą # LATIN SMALL LETTER A WITH OGONEK
|
||||||
|
0x0106 Ć # LATIN CAPITAL LETTER C WITH ACUTE
|
||||||
|
0x0107 ć # LATIN SMALL LETTER C WITH ACUTE
|
||||||
|
0x0108 Ĉ # LATIN CAPITAL LETTER C WITH CIRCUMFLEX
|
||||||
|
0x0109 ĉ # LATIN SMALL LETTER C WITH CIRCUMFLEX
|
||||||
|
0x010A Ċ # LATIN CAPITAL LETTER C WITH DOT ABOVE
|
||||||
|
0x010B ċ # LATIN SMALL LETTER C WITH DOT ABOVE
|
||||||
|
0x010C Č # LATIN CAPITAL LETTER C WITH CARON
|
||||||
|
0x010D č # LATIN SMALL LETTER C WITH CARON
|
||||||
|
0x010E Ď # LATIN CAPITAL LETTER D WITH CARON
|
||||||
|
0x010F ď # LATIN SMALL LETTER D WITH CARON
|
||||||
|
0x0110 Đ # LATIN CAPITAL LETTER D WITH STROKE
|
||||||
|
0x0111 đ # LATIN SMALL LETTER D WITH STROKE
|
||||||
|
0x0112 Ē # LATIN CAPITAL LETTER E WITH MACRON
|
||||||
|
0x0113 ē # LATIN SMALL LETTER E WITH MACRON
|
||||||
|
0x0116 Ė # LATIN CAPITAL LETTER E WITH DOT ABOVE
|
||||||
|
0x0117 ė # LATIN SMALL LETTER E WITH DOT ABOVE
|
||||||
|
0x0118 Ę # LATIN CAPITAL LETTER E WITH OGONEK
|
||||||
|
0x0119 ę # LATIN SMALL LETTER E WITH OGONEK
|
||||||
|
0x011A Ě # LATIN CAPITAL LETTER E WITH CARON
|
||||||
|
0x011B ě # LATIN SMALL LETTER E WITH CARON
|
||||||
|
0x011C Ĝ # LATIN CAPITAL LETTER G WITH CIRCUMFLEX
|
||||||
|
0x011D ĝ # LATIN SMALL LETTER G WITH CIRCUMFLEX
|
||||||
|
0x011E Ğ # LATIN CAPITAL LETTER G WITH BREVE
|
||||||
|
0x011F ğ # LATIN SMALL LETTER G WITH BREVE
|
||||||
|
0x0120 Ġ # LATIN CAPITAL LETTER G WITH DOT ABOVE
|
||||||
|
0x0121 ġ # LATIN SMALL LETTER G WITH DOT ABOVE
|
||||||
|
0x0122 Ģ # LATIN CAPITAL LETTER G WITH CEDILLA
|
||||||
|
0x0123 ģ # LATIN SMALL LETTER G WITH CEDILLA
|
||||||
|
0x0124 Ĥ # LATIN CAPITAL LETTER H WITH CIRCUMFLEX
|
||||||
|
0x0125 ĥ # LATIN SMALL LETTER H WITH CIRCUMFLEX
|
||||||
|
0x0126 Ħ # LATIN CAPITAL LETTER H WITH STROKE
|
||||||
|
0x0127 ħ # LATIN SMALL LETTER H WITH STROKE
|
||||||
|
0x0128 Ĩ # LATIN CAPITAL LETTER I WITH TILDE
|
||||||
|
0x0129 ĩ # LATIN SMALL LETTER I WITH TILDE
|
||||||
|
0x012A Ī # LATIN CAPITAL LETTER I WITH MACRON
|
||||||
|
0x012B ī # LATIN SMALL LETTER I WITH MACRON
|
||||||
|
0x012E Į # LATIN CAPITAL LETTER I WITH OGONEK
|
||||||
|
0x012F į # LATIN SMALL LETTER I WITH OGONEK
|
||||||
|
0x0130 İ # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||||
|
0x0131 ı # LATIN SMALL LETTER DOTLESS I
|
||||||
|
0x0131 ı # LATIN SMALL LETTER DOTLESS I
|
||||||
|
0x0132 IJ # LATIN CAPITAL LIGATURE IJ
|
||||||
|
0x0133 ij # LATIN SMALL LIGATURE IJ
|
||||||
|
0x0134 Ĵ # LATIN CAPITAL LETTER J WITH CIRCUMFLEX
|
||||||
|
0x0135 ĵ # LATIN SMALL LETTER J WITH CIRCUMFLEX
|
||||||
|
0x0136 Ķ # LATIN CAPITAL LETTER K WITH CEDILLA
|
||||||
|
0x0137 ķ # LATIN SMALL LETTER K WITH CEDILLA
|
||||||
|
0x0138 ĸ # LATIN SMALL LETTER KRA
|
||||||
|
0x0139 Ĺ # LATIN CAPITAL LETTER L WITH ACUTE
|
||||||
|
0x013A ĺ # LATIN SMALL LETTER L WITH ACUTE
|
||||||
|
0x013B Ļ # LATIN CAPITAL LETTER L WITH CEDILLA
|
||||||
|
0x013C ļ # LATIN SMALL LETTER L WITH CEDILLA
|
||||||
|
0x013D Ľ # LATIN CAPITAL LETTER L WITH CARON
|
||||||
|
0x013E ľ # LATIN SMALL LETTER L WITH CARON
|
||||||
|
0x013F Ŀ # LATIN CAPITAL LETTER L WITH MIDDLE DOT
|
||||||
|
0x0140 ŀ # LATIN SMALL LETTER L WITH MIDDLE DOT
|
||||||
|
0x0141 Ł # LATIN CAPITAL LETTER L WITH STROKE
|
||||||
|
0x0142 ł # LATIN SMALL LETTER L WITH STROKE
|
||||||
|
0x0143 Ń # LATIN CAPITAL LETTER N WITH ACUTE
|
||||||
|
0x0144 ń # LATIN SMALL LETTER N WITH ACUTE
|
||||||
|
0x0145 Ņ # LATIN CAPITAL LETTER N WITH CEDILLA
|
||||||
|
0x0146 ņ # LATIN SMALL LETTER N WITH CEDILLA
|
||||||
|
0x0147 Ň # LATIN CAPITAL LETTER N WITH CARON
|
||||||
|
0x0148 ň # LATIN SMALL LETTER N WITH CARON
|
||||||
|
0x0149 ʼn # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
|
||||||
|
0x014A Ŋ # LATIN CAPITAL LETTER ENG
|
||||||
|
0x014B ŋ # LATIN SMALL LETTER ENG
|
||||||
|
0x014C Ō # LATIN CAPITAL LETTER O WITH MACRON
|
||||||
|
0x014D ō # LATIN SMALL LETTER O WITH MACRON
|
||||||
|
0x0150 Ő # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
|
||||||
|
0x0151 ő # LATIN SMALL LETTER O WITH DOUBLE ACUTE
|
||||||
|
0x0152 Œ # LATIN CAPITAL LIGATURE OE
|
||||||
|
0x0153 œ # LATIN SMALL LIGATURE OE
|
||||||
|
0x0154 Ŕ # LATIN CAPITAL LETTER R WITH ACUTE
|
||||||
|
0x0155 ŕ # LATIN SMALL LETTER R WITH ACUTE
|
||||||
|
0x0156 Ŗ # LATIN CAPITAL LETTER R WITH CEDILLA
|
||||||
|
0x0157 ŗ # LATIN SMALL LETTER R WITH CEDILLA
|
||||||
|
0x0158 Ř # LATIN CAPITAL LETTER R WITH CARON
|
||||||
|
0x0159 ř # LATIN SMALL LETTER R WITH CARON
|
||||||
|
0x015A Ś # LATIN CAPITAL LETTER S WITH ACUTE
|
||||||
|
0x015B ś # LATIN SMALL LETTER S WITH ACUTE
|
||||||
|
0x015C Ŝ # LATIN CAPITAL LETTER S WITH CIRCUMFLEX
|
||||||
|
0x015D ŝ # LATIN SMALL LETTER S WITH CIRCUMFLEX
|
||||||
|
0x015E Ş # LATIN CAPITAL LETTER S WITH CEDILLA
|
||||||
|
0x015F ş # LATIN SMALL LETTER S WITH CEDILLA
|
||||||
|
0x0160 Š # LATIN CAPITAL LETTER S WITH CARON
|
||||||
|
0x0161 š # LATIN SMALL LETTER S WITH CARON
|
||||||
|
0x0162 Ţ # LATIN CAPITAL LETTER T WITH CEDILLA
|
||||||
|
0x0163 ţ # LATIN SMALL LETTER T WITH CEDILLA
|
||||||
|
0x0164 Ť # LATIN CAPITAL LETTER T WITH CARON
|
||||||
|
0x0165 ť # LATIN SMALL LETTER T WITH CARON
|
||||||
|
0x0166 Ŧ # LATIN CAPITAL LETTER T WITH STROKE
|
||||||
|
0x0167 ŧ # LATIN SMALL LETTER T WITH STROKE
|
||||||
|
0x0168 Ũ # LATIN CAPITAL LETTER U WITH TILDE
|
||||||
|
0x0169 ũ # LATIN SMALL LETTER U WITH TILDE
|
||||||
|
0x016A Ū # LATIN CAPITAL LETTER U WITH MACRON
|
||||||
|
0x016B ū # LATIN SMALL LETTER U WITH MACRON
|
||||||
|
0x016C Ŭ # LATIN CAPITAL LETTER U WITH BREVE
|
||||||
|
0x016D ŭ # LATIN SMALL LETTER U WITH BREVE
|
||||||
|
0x016E Ů # LATIN CAPITAL LETTER U WITH RING ABOVE
|
||||||
|
0x016F ů # LATIN SMALL LETTER U WITH RING ABOVE
|
||||||
|
0x0170 Ű # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
|
||||||
|
0x0171 ű # LATIN SMALL LETTER U WITH DOUBLE ACUTE
|
||||||
|
0x0172 Ų # LATIN CAPITAL LETTER U WITH OGONEK
|
||||||
|
0x0173 ų # LATIN SMALL LETTER U WITH OGONEK
|
||||||
|
0x0174 Ŵ # LATIN CAPITAL LETTER W WITH CIRCUMFLEX
|
||||||
|
0x0175 ŵ # LATIN SMALL LETTER W WITH CIRCUMFLEX
|
||||||
|
0x0176 Ŷ # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
|
||||||
|
0x0177 ŷ # LATIN SMALL LETTER Y WITH CIRCUMFLEX
|
||||||
|
0x0178 Ÿ # LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||||
|
0x0179 Ź # LATIN CAPITAL LETTER Z WITH ACUTE
|
||||||
|
0x017A ź # LATIN SMALL LETTER Z WITH ACUTE
|
||||||
|
0x017B Ż # LATIN CAPITAL LETTER Z WITH DOT ABOVE
|
||||||
|
0x017C ż # LATIN SMALL LETTER Z WITH DOT ABOVE
|
||||||
|
0x017D Ž # LATIN CAPITAL LETTER Z WITH CARON
|
||||||
|
0x017E ž # LATIN SMALL LETTER Z WITH CARON
|
||||||
|
0x0192 ƒ # LATIN SMALL LETTER F WITH HOOK
|
||||||
|
0x01F5 ǵ # LATIN SMALL LETTER G WITH ACUTE
|
||||||
|
0x02BC ʼ # MODIFIER LETTER APOSTROPHE
|
||||||
|
0x02C6 ˆ # MODIFIER LETTER CIRCUMFLEX ACCENT
|
||||||
|
0x02C7 ˇ # CARON
|
||||||
|
0x02D8 ˘ # BREVE
|
||||||
|
0x02D9 ˙ # DOT ABOVE
|
||||||
|
0x02DA ˚ # RING ABOVE
|
||||||
|
0x02DB ˛ # OGONEK
|
||||||
|
0x02DC ˜ # SMALL TILDE
|
||||||
|
0x02DD ˝ # DOUBLE ACUTE ACCENT
|
||||||
|
0x0386 Ά # GREEK CAPITAL LETTER ALPHA WITH TONOS
|
||||||
|
0x0388 Έ # GREEK CAPITAL LETTER EPSILON WITH TONOS
|
||||||
|
0x0389 Ή # GREEK CAPITAL LETTER ETA WITH TONOS
|
||||||
|
0x038A Ί # GREEK CAPITAL LETTER IOTA WITH TONOS
|
||||||
|
0x038C Ό # GREEK CAPITAL LETTER OMICRON WITH TONOS
|
||||||
|
0x038E Ύ # GREEK CAPITAL LETTER UPSILON WITH TONOS
|
||||||
|
0x038F Ώ # GREEK CAPITAL LETTER OMEGA WITH TONOS
|
||||||
|
0x0390 ΐ # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
|
||||||
|
0x0391 Α # GREEK CAPITAL LETTER ALPHA
|
||||||
|
0x0392 Β # GREEK CAPITAL LETTER BETA
|
||||||
|
0x0393 Γ # GREEK CAPITAL LETTER GAMMA
|
||||||
|
0x0394 Δ # GREEK CAPITAL LETTER DELTA
|
||||||
|
0x0395 Ε # GREEK CAPITAL LETTER EPSILON
|
||||||
|
0x0396 Ζ # GREEK CAPITAL LETTER ZETA
|
||||||
|
0x0397 Η # GREEK CAPITAL LETTER ETA
|
||||||
|
0x0398 Θ # GREEK CAPITAL LETTER THETA
|
||||||
|
0x0399 Ι # GREEK CAPITAL LETTER IOTA
|
||||||
|
0x039A Κ # GREEK CAPITAL LETTER KAPPA
|
||||||
|
0x039B Λ # GREEK CAPITAL LETTER LAMDA
|
||||||
|
0x039C Μ # GREEK CAPITAL LETTER MU
|
||||||
|
0x039D Ν # GREEK CAPITAL LETTER NU
|
||||||
|
0x039E Ξ # GREEK CAPITAL LETTER XI
|
||||||
|
0x039F Ο # GREEK CAPITAL LETTER OMICRON
|
||||||
|
0x03A0 Π # GREEK CAPITAL LETTER PI
|
||||||
|
0x03A1 Ρ # GREEK CAPITAL LETTER RHO
|
||||||
|
0x03A3 Σ # GREEK CAPITAL LETTER SIGMA
|
||||||
|
0x03A4 Τ # GREEK CAPITAL LETTER TAU
|
||||||
|
0x03A5 Υ # GREEK CAPITAL LETTER UPSILON
|
||||||
|
0x03A6 Φ # GREEK CAPITAL LETTER PHI
|
||||||
|
0x03A7 Χ # GREEK CAPITAL LETTER CHI
|
||||||
|
0x03A8 Ψ # GREEK CAPITAL LETTER PSI
|
||||||
|
0x03A9 Ω # GREEK CAPITAL LETTER OMEGA
|
||||||
|
0x03AA Ϊ # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
|
||||||
|
0x03AB Ϋ # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
|
||||||
|
0x03AC ά # GREEK SMALL LETTER ALPHA WITH TONOS
|
||||||
|
0x03AD έ # GREEK SMALL LETTER EPSILON WITH TONOS
|
||||||
|
0x03AE ή # GREEK SMALL LETTER ETA WITH TONOS
|
||||||
|
0x03AF ί # GREEK SMALL LETTER IOTA WITH TONOS
|
||||||
|
0x03B0 ΰ # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
|
||||||
|
0x03B1 α # GREEK SMALL LETTER ALPHA
|
||||||
|
0x03B2 β # GREEK SMALL LETTER BETA
|
||||||
|
0x03B3 γ # GREEK SMALL LETTER GAMMA
|
||||||
|
0x03B4 δ # GREEK SMALL LETTER DELTA
|
||||||
|
0x03B5 ε # GREEK SMALL LETTER EPSILON
|
||||||
|
0x03B6 ζ # GREEK SMALL LETTER ZETA
|
||||||
|
0x03B7 η # GREEK SMALL LETTER ETA
|
||||||
|
0x03B8 θ # GREEK SMALL LETTER THETA
|
||||||
|
0x03B9 ι # GREEK SMALL LETTER IOTA
|
||||||
|
0x03BA κ # GREEK SMALL LETTER KAPPA
|
||||||
|
0x03BB λ # GREEK SMALL LETTER LAMDA
|
||||||
|
0x03BC μ # GREEK SMALL LETTER MU
|
||||||
|
0x03BD ν # GREEK SMALL LETTER NU
|
||||||
|
0x03BE ξ # GREEK SMALL LETTER XI
|
||||||
|
0x03BF ο # GREEK SMALL LETTER OMICRON
|
||||||
|
0x03C0 π # GREEK SMALL LETTER PI
|
||||||
|
0x03C1 ρ # GREEK SMALL LETTER RHO
|
||||||
|
0x03C2 ς # GREEK SMALL LETTER FINAL SIGMA
|
||||||
|
0x03C3 σ # GREEK SMALL LETTER SIGMA
|
||||||
|
0x03C4 τ # GREEK SMALL LETTER TAU
|
||||||
|
0x03C5 υ # GREEK SMALL LETTER UPSILON
|
||||||
|
0x03C6 φ # GREEK SMALL LETTER PHI
|
||||||
|
0x03C7 χ # GREEK SMALL LETTER CHI
|
||||||
|
0x03C8 ψ # GREEK SMALL LETTER PSI
|
||||||
|
0x03C9 ω # GREEK SMALL LETTER OMEGA
|
||||||
|
0x03CA ϊ # GREEK SMALL LETTER IOTA WITH DIALYTIKA
|
||||||
|
0x03CB ϋ # GREEK SMALL LETTER UPSILON WITH DIALYTIKA
|
||||||
|
0x03CC ό # GREEK SMALL LETTER OMICRON WITH TONOS
|
||||||
|
0x03CE ώ # GREEK SMALL LETTER OMEGA WITH TONOS
|
||||||
|
0x03D1 ϑ # GREEK THETA SYMBOL
|
||||||
|
0x03D2 ϒ # GREEK UPSILON WITH HOOK SYMBOL
|
||||||
|
0x03D5 ϕ # GREEK PHI SYMBOL
|
||||||
|
0x03D6 ϖ # GREEK PI SYMBOL
|
||||||
|
0x03DC Ϝ # GREEK LETTER DIGAMMA
|
||||||
|
0x03F0 ϰ # GREEK KAPPA SYMBOL
|
||||||
|
0x03F1 ϱ # GREEK RHO SYMBOL
|
||||||
|
0x0401 Ё # CYRILLIC CAPITAL LETTER IO
|
||||||
|
0x0402 Ђ # CYRILLIC CAPITAL LETTER DJE
|
||||||
|
0x0403 Ѓ # CYRILLIC CAPITAL LETTER GJE
|
||||||
|
0x0404 Є # CYRILLIC CAPITAL LETTER UKRAINIAN IE
|
||||||
|
0x0405 Ѕ # CYRILLIC CAPITAL LETTER DZE
|
||||||
|
0x0406 І # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||||
|
0x0407 Ї # CYRILLIC CAPITAL LETTER YI
|
||||||
|
0x0408 Ј # CYRILLIC CAPITAL LETTER JE
|
||||||
|
0x0409 Љ # CYRILLIC CAPITAL LETTER LJE
|
||||||
|
0x040A Њ # CYRILLIC CAPITAL LETTER NJE
|
||||||
|
0x040B Ћ # CYRILLIC CAPITAL LETTER TSHE
|
||||||
|
0x040C Ќ # CYRILLIC CAPITAL LETTER KJE
|
||||||
|
0x040E Ў # CYRILLIC CAPITAL LETTER SHORT U
|
||||||
|
0x040F Џ # CYRILLIC CAPITAL LETTER DZHE
|
||||||
|
0x0410 А # CYRILLIC CAPITAL LETTER A
|
||||||
|
0x0411 Б # CYRILLIC CAPITAL LETTER BE
|
||||||
|
0x0412 В # CYRILLIC CAPITAL LETTER VE
|
||||||
|
0x0413 Г # CYRILLIC CAPITAL LETTER GHE
|
||||||
|
0x0414 Д # CYRILLIC CAPITAL LETTER DE
|
||||||
|
0x0415 Е # CYRILLIC CAPITAL LETTER IE
|
||||||
|
0x0416 Ж # CYRILLIC CAPITAL LETTER ZHE
|
||||||
|
0x0417 З # CYRILLIC CAPITAL LETTER ZE
|
||||||
|
0x0418 И # CYRILLIC CAPITAL LETTER I
|
||||||
|
0x0419 Й # CYRILLIC CAPITAL LETTER SHORT I
|
||||||
|
0x041A К # CYRILLIC CAPITAL LETTER KA
|
||||||
|
0x041B Л # CYRILLIC CAPITAL LETTER EL
|
||||||
|
0x041C М # CYRILLIC CAPITAL LETTER EM
|
||||||
|
0x041D Н # CYRILLIC CAPITAL LETTER EN
|
||||||
|
0x041E О # CYRILLIC CAPITAL LETTER O
|
||||||
|
0x041F П # CYRILLIC CAPITAL LETTER PE
|
||||||
|
0x0420 Р # CYRILLIC CAPITAL LETTER ER
|
||||||
|
0x0421 С # CYRILLIC CAPITAL LETTER ES
|
||||||
|
0x0422 Т # CYRILLIC CAPITAL LETTER TE
|
||||||
|
0x0423 У # CYRILLIC CAPITAL LETTER U
|
||||||
|
0x0424 Ф # CYRILLIC CAPITAL LETTER EF
|
||||||
|
0x0425 Х # CYRILLIC CAPITAL LETTER HA
|
||||||
|
0x0426 Ц # CYRILLIC CAPITAL LETTER TSE
|
||||||
|
0x0427 Ч # CYRILLIC CAPITAL LETTER CHE
|
||||||
|
0x0428 Ш # CYRILLIC CAPITAL LETTER SHA
|
||||||
|
0x0429 Щ # CYRILLIC CAPITAL LETTER SHCHA
|
||||||
|
0x042A Ъ # CYRILLIC CAPITAL LETTER HARD SIGN
|
||||||
|
0x042B Ы # CYRILLIC CAPITAL LETTER YERU
|
||||||
|
0x042C Ь # CYRILLIC CAPITAL LETTER SOFT SIGN
|
||||||
|
0x042D Э # CYRILLIC CAPITAL LETTER E
|
||||||
|
0x042E Ю # CYRILLIC CAPITAL LETTER YU
|
||||||
|
0x042F Я # CYRILLIC CAPITAL LETTER YA
|
||||||
|
0x0430 а # CYRILLIC SMALL LETTER A
|
||||||
|
0x0431 б # CYRILLIC SMALL LETTER BE
|
||||||
|
0x0432 в # CYRILLIC SMALL LETTER VE
|
||||||
|
0x0433 г # CYRILLIC SMALL LETTER GHE
|
||||||
|
0x0434 д # CYRILLIC SMALL LETTER DE
|
||||||
|
0x0435 е # CYRILLIC SMALL LETTER IE
|
||||||
|
0x0436 ж # CYRILLIC SMALL LETTER ZHE
|
||||||
|
0x0437 з # CYRILLIC SMALL LETTER ZE
|
||||||
|
0x0438 и # CYRILLIC SMALL LETTER I
|
||||||
|
0x0439 й # CYRILLIC SMALL LETTER SHORT I
|
||||||
|
0x043A к # CYRILLIC SMALL LETTER KA
|
||||||
|
0x043B л # CYRILLIC SMALL LETTER EL
|
||||||
|
0x043C м # CYRILLIC SMALL LETTER EM
|
||||||
|
0x043D н # CYRILLIC SMALL LETTER EN
|
||||||
|
0x043E о # CYRILLIC SMALL LETTER O
|
||||||
|
0x043F п # CYRILLIC SMALL LETTER PE
|
||||||
|
0x0440 р # CYRILLIC SMALL LETTER ER
|
||||||
|
0x0441 с # CYRILLIC SMALL LETTER ES
|
||||||
|
0x0442 т # CYRILLIC SMALL LETTER TE
|
||||||
|
0x0443 у # CYRILLIC SMALL LETTER U
|
||||||
|
0x0444 ф # CYRILLIC SMALL LETTER EF
|
||||||
|
0x0445 х # CYRILLIC SMALL LETTER HA
|
||||||
|
0x0446 ц # CYRILLIC SMALL LETTER TSE
|
||||||
|
0x0447 ч # CYRILLIC SMALL LETTER CHE
|
||||||
|
0x0448 ш # CYRILLIC SMALL LETTER SHA
|
||||||
|
0x0449 щ # CYRILLIC SMALL LETTER SHCHA
|
||||||
|
0x044A ъ # CYRILLIC SMALL LETTER HARD SIGN
|
||||||
|
0x044B ы # CYRILLIC SMALL LETTER YERU
|
||||||
|
0x044C ь # CYRILLIC SMALL LETTER SOFT SIGN
|
||||||
|
0x044D э # CYRILLIC SMALL LETTER E
|
||||||
|
0x044E ю # CYRILLIC SMALL LETTER YU
|
||||||
|
0x044F я # CYRILLIC SMALL LETTER YA
|
||||||
|
0x0451 ё # CYRILLIC SMALL LETTER IO
|
||||||
|
0x0452 ђ # CYRILLIC SMALL LETTER DJE
|
||||||
|
0x0453 ѓ # CYRILLIC SMALL LETTER GJE
|
||||||
|
0x0454 є # CYRILLIC SMALL LETTER UKRAINIAN IE
|
||||||
|
0x0455 ѕ # CYRILLIC SMALL LETTER DZE
|
||||||
|
0x0456 і # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||||
|
0x0457 ї # CYRILLIC SMALL LETTER YI
|
||||||
|
0x0458 ј # CYRILLIC SMALL LETTER JE
|
||||||
|
0x0459 љ # CYRILLIC SMALL LETTER LJE
|
||||||
|
0x045A њ # CYRILLIC SMALL LETTER NJE
|
||||||
|
0x045B ћ # CYRILLIC SMALL LETTER TSHE
|
||||||
|
0x045C ќ # CYRILLIC SMALL LETTER KJE
|
||||||
|
0x045E ў # CYRILLIC SMALL LETTER SHORT U
|
||||||
|
0x045F џ # CYRILLIC SMALL LETTER DZHE
|
||||||
|
0x2002 # EN SPACE
|
||||||
|
0x2003 # EM SPACE
|
||||||
|
0x2004 # THREE-PER-EM SPACE
|
||||||
|
0x2005 # FOUR-PER-EM SPACE
|
||||||
|
0x2007 # FIGURE SPACE
|
||||||
|
0x2008 # PUNCTUATION SPACE
|
||||||
|
0x2009 # THIN SPACE
|
||||||
|
0x200A # HAIR SPACE
|
||||||
|
0x200C # ZERO WIDTH NON-JOINER
|
||||||
|
0x200D # ZERO WIDTH JOINER
|
||||||
|
0x200E # LEFT-TO-RIGHT MARK
|
||||||
|
0x200F # RIGHT-TO-LEFT MARK
|
||||||
|
0x2010 ‐ # HYPHEN
|
||||||
|
0x2013 – # EN DASH
|
||||||
|
0x2014 — # EM DASH
|
||||||
|
0x2015 ― # HORIZONTAL BAR
|
||||||
|
0x2016 ‖ # DOUBLE VERTICAL LINE
|
||||||
|
0x2018 ‘ # LEFT SINGLE QUOTATION MARK
|
||||||
|
0x2018 ‘ # LEFT SINGLE QUOTATION MARK
|
||||||
|
0x2019 ’ # RIGHT SINGLE QUOTATION MARK
|
||||||
|
0x201A ‚ # SINGLE LOW-9 QUOTATION MARK
|
||||||
|
0x201A ‚ # SINGLE LOW-9 QUOTATION MARK
|
||||||
|
0x201C “ # LEFT DOUBLE QUOTATION MARK
|
||||||
|
0x201C “ # LEFT DOUBLE QUOTATION MARK
|
||||||
|
0x201D ” # RIGHT DOUBLE QUOTATION MARK
|
||||||
|
0x201E „ # DOUBLE LOW-9 QUOTATION MARK
|
||||||
|
0x201E „ # DOUBLE LOW-9 QUOTATION MARK
|
||||||
|
0x2020 † # DAGGER
|
||||||
|
0x2021 ‡ # DOUBLE DAGGER
|
||||||
|
0x2022 • # BULLET
|
||||||
|
0x2025 ‥ # TWO DOT LEADER
|
||||||
|
0x2026 … # HORIZONTAL ELLIPSIS
|
||||||
|
0x2026 … # HORIZONTAL ELLIPSIS
|
||||||
|
0x2030 ‰ # PER MILLE SIGN
|
||||||
|
0x2032 ′ # PRIME
|
||||||
|
0x2032 ′ # PRIME
|
||||||
|
0x2033 ″ # DOUBLE PRIME
|
||||||
|
0x2034 ‴ # TRIPLE PRIME
|
||||||
|
0x2035 ‵ # REVERSED PRIME
|
||||||
|
0x2039 ‹ # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||||
|
0x203A › # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||||
|
0x203E ‾ # OVERLINE
|
||||||
|
0x2041 ⁁ # CARET INSERTION POINT
|
||||||
|
0x2043 ⁃ # HYPHEN BULLET
|
||||||
|
0x2044 ⁄ # FRACTION SLASH
|
||||||
|
0x20AC € # EURO SIGN
|
||||||
|
0x20DB ⃛ # COMBINING THREE DOTS ABOVE
|
||||||
|
0x20DC ⃜ # COMBINING FOUR DOTS ABOVE
|
||||||
|
0x2105 ℅ # CARE OF
|
||||||
|
0x210B ℋ # SCRIPT CAPITAL H
|
||||||
|
0x210F ℏ # PLANCK CONSTANT OVER TWO PI
|
||||||
|
0x2111 ℑ # BLACK-LETTER CAPITAL I
|
||||||
|
0x2112 ℒ # SCRIPT CAPITAL L
|
||||||
|
0x2113 ℓ # SCRIPT SMALL L
|
||||||
|
0x2116 № # NUMERO SIGN
|
||||||
|
0x2117 ℗ # SOUND RECORDING COPYRIGHT
|
||||||
|
0x2118 ℘ # SCRIPT CAPITAL P
|
||||||
|
0x211C ℜ # BLACK-LETTER CAPITAL R
|
||||||
|
0x211E ℞ # PRESCRIPTION TAKE
|
||||||
|
0x2122 ™ # TRADE MARK SIGN
|
||||||
|
0x2126 Ω # OHM SIGN
|
||||||
|
0x212B Å # ANGSTROM SIGN
|
||||||
|
0x212C ℬ # SCRIPT CAPITAL B
|
||||||
|
0x2133 ℳ # SCRIPT CAPITAL M
|
||||||
|
0x2134 ℴ # SCRIPT SMALL O
|
||||||
|
0x2135 ℵ # ALEF SYMBOL
|
||||||
|
0x2135 ℵ # ALEF SYMBOL
|
||||||
|
0x2136 ℶ # BET SYMBOL
|
||||||
|
0x2137 ℷ # GIMEL SYMBOL
|
||||||
|
0x2138 ℸ # DALET SYMBOL
|
||||||
|
0x2153 ⅓ # VULGAR FRACTION ONE THIRD
|
||||||
|
0x2154 ⅔ # VULGAR FRACTION TWO THIRDS
|
||||||
|
0x2155 ⅕ # VULGAR FRACTION ONE FIFTH
|
||||||
|
0x2156 ⅖ # VULGAR FRACTION TWO FIFTHS
|
||||||
|
0x2157 ⅗ # VULGAR FRACTION THREE FIFTHS
|
||||||
|
0x2158 ⅘ # VULGAR FRACTION FOUR FIFTHS
|
||||||
|
0x2159 ⅙ # VULGAR FRACTION ONE SIXTH
|
||||||
|
0x215A ⅚ # VULGAR FRACTION FIVE SIXTHS
|
||||||
|
0x215B ⅛ # VULGAR FRACTION ONE EIGHTH
|
||||||
|
0x215C ⅜ # VULGAR FRACTION THREE EIGHTHS
|
||||||
|
0x215D ⅝ # VULGAR FRACTION FIVE EIGHTHS
|
||||||
|
0x215E ⅞ # VULGAR FRACTION SEVEN EIGHTHS
|
||||||
|
0x2190 ← # LEFTWARDS ARROW
|
||||||
|
0x2191 ↑ # UPWARDS ARROW
|
||||||
|
0x2192 → # RIGHTWARDS ARROW
|
||||||
|
0x2193 ↓ # DOWNWARDS ARROW
|
||||||
|
0x2194 ↔ # LEFT RIGHT ARROW
|
||||||
|
0x2195 ↕ # UP DOWN ARROW
|
||||||
|
0x2196 ↖ # NORTH WEST ARROW
|
||||||
|
0x2197 ↗ # NORTH EAST ARROW
|
||||||
|
0x2198 ↘ # SOUTH EAST ARROW
|
||||||
|
0x2199 ↙ # SOUTH WEST ARROW
|
||||||
|
0x219A ↚ # LEFTWARDS ARROW WITH STROKE
|
||||||
|
0x219B ↛ # RIGHTWARDS ARROW WITH STROKE
|
||||||
|
0x219D ↝ # RIGHTWARDS WAVE ARROW
|
||||||
|
0x219E ↞ # LEFTWARDS TWO HEADED ARROW
|
||||||
|
0x21A0 ↠ # RIGHTWARDS TWO HEADED ARROW
|
||||||
|
0x21A2 ↢ # LEFTWARDS ARROW WITH TAIL
|
||||||
|
0x21A3 ↣ # RIGHTWARDS ARROW WITH TAIL
|
||||||
|
0x21A6 ↦ # RIGHTWARDS ARROW FROM BAR
|
||||||
|
0x21A9 ↩ # LEFTWARDS ARROW WITH HOOK
|
||||||
|
0x21AA ↪ # RIGHTWARDS ARROW WITH HOOK
|
||||||
|
0x21AB ↫ # LEFTWARDS ARROW WITH LOOP
|
||||||
|
0x21AC ↬ # RIGHTWARDS ARROW WITH LOOP
|
||||||
|
0x21AD ↭ # LEFT RIGHT WAVE ARROW
|
||||||
|
0x21AE ↮ # LEFT RIGHT ARROW WITH STROKE
|
||||||
|
0x21B0 ↰ # UPWARDS ARROW WITH TIP LEFTWARDS
|
||||||
|
0x21B1 ↱ # UPWARDS ARROW WITH TIP RIGHTWARDS
|
||||||
|
0x21B5 ↵ # DOWNWARDS ARROW WITH CORNER LEFTWARDS
|
||||||
|
0x21B6 ↶ # ANTICLOCKWISE TOP SEMICIRCLE ARROW
|
||||||
|
0x21B7 ↷ # CLOCKWISE TOP SEMICIRCLE ARROW
|
||||||
|
0x21BA ↺ # ANTICLOCKWISE OPEN CIRCLE ARROW
|
||||||
|
0x21BB ↻ # CLOCKWISE OPEN CIRCLE ARROW
|
||||||
|
0x21BC ↼ # LEFTWARDS HARPOON WITH BARB UPWARDS
|
||||||
|
0x21BD ↽ # LEFTWARDS HARPOON WITH BARB DOWNWARDS
|
||||||
|
0x21BE ↾ # UPWARDS HARPOON WITH BARB RIGHTWARDS
|
||||||
|
0x21BF ↿ # UPWARDS HARPOON WITH BARB LEFTWARDS
|
||||||
|
0x21C0 ⇀ # RIGHTWARDS HARPOON WITH BARB UPWARDS
|
||||||
|
0x21C1 ⇁ # RIGHTWARDS HARPOON WITH BARB DOWNWARDS
|
||||||
|
0x21C2 ⇂ # DOWNWARDS HARPOON WITH BARB RIGHTWARDS
|
||||||
|
0x21C3 ⇃ # DOWNWARDS HARPOON WITH BARB LEFTWARDS
|
||||||
|
0x21C4 ⇄ # RIGHTWARDS ARROW OVER LEFTWARDS ARROW
|
||||||
|
0x21C6 ⇆ # LEFTWARDS ARROW OVER RIGHTWARDS ARROW
|
||||||
|
0x21C7 ⇇ # LEFTWARDS PAIRED ARROWS
|
||||||
|
0x21C8 ⇈ # UPWARDS PAIRED ARROWS
|
||||||
|
0x21C9 ⇉ # RIGHTWARDS PAIRED ARROWS
|
||||||
|
0x21CA ⇊ # DOWNWARDS PAIRED ARROWS
|
||||||
|
0x21CB ⇋ # LEFTWARDS HARPOON OVER RIGHTWARDS HARPOON
|
||||||
|
0x21CC ⇌ # RIGHTWARDS HARPOON OVER LEFTWARDS HARPOON
|
||||||
|
0x21CD ⇍ # LEFTWARDS DOUBLE ARROW WITH STROKE
|
||||||
|
0x21CE ⇎ # LEFT RIGHT DOUBLE ARROW WITH STROKE
|
||||||
|
0x21CF ⇏ # RIGHTWARDS DOUBLE ARROW WITH STROKE
|
||||||
|
0x21D0 ⇐ # LEFTWARDS DOUBLE ARROW
|
||||||
|
0x21D1 ⇑ # UPWARDS DOUBLE ARROW
|
||||||
|
0x21D2 ⇒ # RIGHTWARDS DOUBLE ARROW
|
||||||
|
0x21D3 ⇓ # DOWNWARDS DOUBLE ARROW
|
||||||
|
0x21D4 ⇔ # LEFT RIGHT DOUBLE ARROW
|
||||||
|
0x21D5 ⇕ # UP DOWN DOUBLE ARROW
|
||||||
|
0x21DA ⇚ # LEFTWARDS TRIPLE ARROW
|
||||||
|
0x21DB ⇛ # RIGHTWARDS TRIPLE ARROW
|
||||||
|
0x2200 ∀ # FOR ALL
|
||||||
|
0x2201 ∁ # COMPLEMENT
|
||||||
|
0x2202 ∂ # PARTIAL DIFFERENTIAL
|
||||||
|
0x2203 ∃ # THERE EXISTS
|
||||||
|
0x2204 ∄ # THERE DOES NOT EXIST
|
||||||
|
0x2205 ∅ # EMPTY SET
|
||||||
|
0x2207 ∇ # NABLA
|
||||||
|
0x2208 ∈ # ELEMENT OF
|
||||||
|
0x2209 ∉ # NOT AN ELEMENT OF
|
||||||
|
0x220A ∊ # SMALL ELEMENT OF
|
||||||
|
0x220B ∋ # CONTAINS AS MEMBER
|
||||||
|
0x220D ∍ # SMALL CONTAINS AS MEMBER
|
||||||
|
0x220F ∏ # N-ARY PRODUCT
|
||||||
|
0x2210 ∐ # N-ARY COPRODUCT
|
||||||
|
0x2211 ∑ # N-ARY SUMMATION
|
||||||
|
0x2212 − # MINUS SIGN
|
||||||
|
0x2213 ∓ # MINUS-OR-PLUS SIGN
|
||||||
|
0x2214 ∔ # DOT PLUS
|
||||||
|
0x2216 ∖ # SET MINUS
|
||||||
|
0x2217 ∗ # ASTERISK OPERATOR
|
||||||
|
0x2218 ∘ # RING OPERATOR
|
||||||
|
0x221A √ # SQUARE ROOT
|
||||||
|
0x221D ∝ # PROPORTIONAL TO
|
||||||
|
0x221E ∞ # INFINITY
|
||||||
|
0x221F ∟ # RIGHT ANGLE
|
||||||
|
0x2220 ∠ # ANGLE
|
||||||
|
0x2221 ∡ # MEASURED ANGLE
|
||||||
|
0x2222 ∢ # SPHERICAL ANGLE
|
||||||
|
0x2223 ∣ # DIVIDES
|
||||||
|
0x2224 ∤ # DOES NOT DIVIDE
|
||||||
|
0x2225 ∥ # PARALLEL TO
|
||||||
|
0x2226 ∦ # NOT PARALLEL TO
|
||||||
|
0x2227 ∧ # LOGICAL AND
|
||||||
|
0x2228 ∨ # LOGICAL OR
|
||||||
|
0x2229 ∩ # INTERSECTION
|
||||||
|
0x222A ∪ # UNION
|
||||||
|
0x222B ∫ # INTEGRAL
|
||||||
|
0x222E ∮ # CONTOUR INTEGRAL
|
||||||
|
0x2234 ∴ # THEREFORE
|
||||||
|
0x2235 ∵ # BECAUSE
|
||||||
|
0x223C ∼ # TILDE OPERATOR
|
||||||
|
0x223D ∽ # REVERSED TILDE
|
||||||
|
0x2240 ≀ # WREATH PRODUCT
|
||||||
|
0x2241 ≁ # NOT TILDE
|
||||||
|
0x2243 ≃ # ASYMPTOTICALLY EQUAL TO
|
||||||
|
0x2244 ≄ # NOT ASYMPTOTICALLY EQUAL TO
|
||||||
|
0x2245 ≅ # APPROXIMATELY EQUAL TO
|
||||||
|
0x2247 ≇ # NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO
|
||||||
|
0x2248 ≈ # ALMOST EQUAL TO
|
||||||
|
0x2249 ≉ # NOT ALMOST EQUAL TO
|
||||||
|
0x224A ≊ # ALMOST EQUAL OR EQUAL TO
|
||||||
|
0x224C ≌ # ALL EQUAL TO
|
||||||
|
0x224E ≎ # GEOMETRICALLY EQUIVALENT TO
|
||||||
|
0x224F ≏ # DIFFERENCE BETWEEN
|
||||||
|
0x2250 ≐ # APPROACHES THE LIMIT
|
||||||
|
0x2251 ≑ # GEOMETRICALLY EQUAL TO
|
||||||
|
0x2252 ≒ # APPROXIMATELY EQUAL TO OR THE IMAGE OF
|
||||||
|
0x2253 ≓ # IMAGE OF OR APPROXIMATELY EQUAL TO
|
||||||
|
0x2254 ≔ # COLON EQUALS
|
||||||
|
0x2255 ≕ # EQUALS COLON
|
||||||
|
0x2256 ≖ # RING IN EQUAL TO
|
||||||
|
0x2257 ≗ # RING EQUAL TO
|
||||||
|
0x2259 ≙ # ESTIMATES
|
||||||
|
0x225C ≜ # DELTA EQUAL TO
|
||||||
|
0x2260 ≠ # NOT EQUAL TO
|
||||||
|
0x2261 ≡ # IDENTICAL TO
|
||||||
|
0x2262 ≢ # NOT IDENTICAL TO
|
||||||
|
0x2264 ≤ # LESS-THAN OR EQUAL TO
|
||||||
|
0x2265 ≥ # GREATER-THAN OR EQUAL TO
|
||||||
|
0x2266 ≦ # LESS-THAN OVER EQUAL TO
|
||||||
|
0x2267 ≧ # GREATER-THAN OVER EQUAL TO
|
||||||
|
0x2268 ≨ # LESS-THAN BUT NOT EQUAL TO
|
||||||
|
0x2269 ≩ # GREATER-THAN BUT NOT EQUAL TO
|
||||||
|
0x226A ≪ # MUCH LESS-THAN
|
||||||
|
0x226B ≫ # MUCH GREATER-THAN
|
||||||
|
0x226C ≬ # BETWEEN
|
||||||
|
0x226E ≮ # NOT LESS-THAN
|
||||||
|
0x226F ≯ # NOT GREATER-THAN
|
||||||
|
0x2270 ≰ # NEITHER LESS-THAN NOR EQUAL TO
|
||||||
|
0x2271 ≱ # NEITHER GREATER-THAN NOR EQUAL TO
|
||||||
|
0x2272 ≲ # LESS-THAN OR EQUIVALENT TO
|
||||||
|
0x2273 ≳ # GREATER-THAN OR EQUIVALENT TO
|
||||||
|
0x2276 ≶ # LESS-THAN OR GREATER-THAN
|
||||||
|
0x2277 ≷ # GREATER-THAN OR LESS-THAN
|
||||||
|
0x227A ≺ # PRECEDES
|
||||||
|
0x227B ≻ # SUCCEEDS
|
||||||
|
0x227C ≼ # PRECEDES OR EQUAL TO
|
||||||
|
0x227D ≽ # SUCCEEDS OR EQUAL TO
|
||||||
|
0x227E ≾ # PRECEDES OR EQUIVALENT TO
|
||||||
|
0x227F ≿ # SUCCEEDS OR EQUIVALENT TO
|
||||||
|
0x2280 ⊀ # DOES NOT PRECEDE
|
||||||
|
0x2281 ⊁ # DOES NOT SUCCEED
|
||||||
|
0x2282 ⊂ # SUBSET OF
|
||||||
|
0x2283 ⊃ # SUPERSET OF
|
||||||
|
0x2284 ⊄ # NOT A SUBSET OF
|
||||||
|
0x2285 ⊅ # NOT A SUPERSET OF
|
||||||
|
0x2286 ⊆ # SUBSET OF OR EQUAL TO
|
||||||
|
0x2287 ⊇ # SUPERSET OF OR EQUAL TO
|
||||||
|
0x2288 ⊈ # NEITHER A SUBSET OF NOR EQUAL TO
|
||||||
|
0x2289 ⊉ # NEITHER A SUPERSET OF NOR EQUAL TO
|
||||||
|
0x228A ⊊ # SUBSET OF WITH NOT EQUAL TO
|
||||||
|
0x228B ⊋ # SUPERSET OF WITH NOT EQUAL TO
|
||||||
|
0x228E ⊎ # MULTISET UNION
|
||||||
|
0x228F ⊏ # SQUARE IMAGE OF
|
||||||
|
0x2290 ⊐ # SQUARE ORIGINAL OF
|
||||||
|
0x2291 ⊑ # SQUARE IMAGE OF OR EQUAL TO
|
||||||
|
0x2292 ⊒ # SQUARE ORIGINAL OF OR EQUAL TO
|
||||||
|
0x2293 ⊓ # SQUARE CAP
|
||||||
|
0x2294 ⊔ # SQUARE CUP
|
||||||
|
0x2295 ⊕ # CIRCLED PLUS
|
||||||
|
0x2296 ⊖ # CIRCLED MINUS
|
||||||
|
0x2297 ⊗ # CIRCLED TIMES
|
||||||
|
0x2298 ⊘ # CIRCLED DIVISION SLASH
|
||||||
|
0x2299 ⊙ # CIRCLED DOT OPERATOR
|
||||||
|
0x229A ⊚ # CIRCLED RING OPERATOR
|
||||||
|
0x229B ⊛ # CIRCLED ASTERISK OPERATOR
|
||||||
|
0x229D ⊝ # CIRCLED DASH
|
||||||
|
0x229E ⊞ # SQUARED PLUS
|
||||||
|
0x229F ⊟ # SQUARED MINUS
|
||||||
|
0x22A0 ⊠ # SQUARED TIMES
|
||||||
|
0x22A1 ⊡ # SQUARED DOT OPERATOR
|
||||||
|
0x22A2 ⊢ # RIGHT TACK
|
||||||
|
0x22A3 ⊣ # LEFT TACK
|
||||||
|
0x22A4 ⊤ # DOWN TACK
|
||||||
|
0x22A5 ⊥ # UP TACK
|
||||||
|
0x22A7 ⊧ # MODELS
|
||||||
|
0x22A8 ⊨ # TRUE
|
||||||
|
0x22A9 ⊩ # FORCES
|
||||||
|
0x22AA ⊪ # TRIPLE VERTICAL BAR RIGHT TURNSTILE
|
||||||
|
0x22AC ⊬ # DOES NOT PROVE
|
||||||
|
0x22AD ⊭ # NOT TRUE
|
||||||
|
0x22AE ⊮ # DOES NOT FORCE
|
||||||
|
0x22AF ⊯ # NEGATED DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE
|
||||||
|
0x22B2 ⊲ # NORMAL SUBGROUP OF
|
||||||
|
0x22B3 ⊳ # CONTAINS AS NORMAL SUBGROUP
|
||||||
|
0x22B4 ⊴ # NORMAL SUBGROUP OF OR EQUAL TO
|
||||||
|
0x22B5 ⊵ # CONTAINS AS NORMAL SUBGROUP OR EQUAL TO
|
||||||
|
0x22B8 ⊸ # MULTIMAP
|
||||||
|
0x22BA ⊺ # INTERCALATE
|
||||||
|
0x22BB ⊻ # XOR
|
||||||
|
0x22BC ⊼ # NAND
|
||||||
|
0x22C4 ⋄ # DIAMOND OPERATOR
|
||||||
|
0x22C5 ⋅ # DOT OPERATOR
|
||||||
|
0x22C6 ⋆ # STAR OPERATOR
|
||||||
|
0x22C7 ⋇ # DIVISION TIMES
|
||||||
|
0x22C8 ⋈ # BOWTIE
|
||||||
|
0x22C9 ⋉ # LEFT NORMAL FACTOR SEMIDIRECT PRODUCT
|
||||||
|
0x22CA ⋊ # RIGHT NORMAL FACTOR SEMIDIRECT PRODUCT
|
||||||
|
0x22CB ⋋ # LEFT SEMIDIRECT PRODUCT
|
||||||
|
0x22CC ⋌ # RIGHT SEMIDIRECT PRODUCT
|
||||||
|
0x22CD ⋍ # REVERSED TILDE EQUALS
|
||||||
|
0x22CE ⋎ # CURLY LOGICAL OR
|
||||||
|
0x22CF ⋏ # CURLY LOGICAL AND
|
||||||
|
0x22D0 ⋐ # DOUBLE SUBSET
|
||||||
|
0x22D1 ⋑ # DOUBLE SUPERSET
|
||||||
|
0x22D2 ⋒ # DOUBLE INTERSECTION
|
||||||
|
0x22D3 ⋓ # DOUBLE UNION
|
||||||
|
0x22D4 ⋔ # PITCHFORK
|
||||||
|
0x22D6 ⋖ # LESS-THAN WITH DOT
|
||||||
|
0x22D7 ⋗ # GREATER-THAN WITH DOT
|
||||||
|
0x22D8 ⋘ # VERY MUCH LESS-THAN
|
||||||
|
0x22D9 ⋙ # VERY MUCH GREATER-THAN
|
||||||
|
0x22DA ⋚ # LESS-THAN EQUAL TO OR GREATER-THAN
|
||||||
|
0x22DB ⋛ # GREATER-THAN EQUAL TO OR LESS-THAN
|
||||||
|
0x22DC ⋜ # EQUAL TO OR LESS-THAN
|
||||||
|
0x22DD ⋝ # EQUAL TO OR GREATER-THAN
|
||||||
|
0x22DE ⋞ # EQUAL TO OR PRECEDES
|
||||||
|
0x22DF ⋟ # EQUAL TO OR SUCCEEDS
|
||||||
|
0x22E0 ⋠ # DOES NOT PRECEDE OR EQUAL
|
||||||
|
0x22E1 ⋡ # DOES NOT SUCCEED OR EQUAL
|
||||||
|
0x22E6 ⋦ # LESS-THAN BUT NOT EQUIVALENT TO
|
||||||
|
0x22E7 ⋧ # GREATER-THAN BUT NOT EQUIVALENT TO
|
||||||
|
0x22E8 ⋨ # PRECEDES BUT NOT EQUIVALENT TO
|
||||||
|
0x22E9 ⋩ # SUCCEEDS BUT NOT EQUIVALENT TO
|
||||||
|
0x22EA ⋪ # NOT NORMAL SUBGROUP OF
|
||||||
|
0x22EB ⋫ # DOES NOT CONTAIN AS NORMAL SUBGROUP
|
||||||
|
0x22EC ⋬ # NOT NORMAL SUBGROUP OF OR EQUAL TO
|
||||||
|
0x22ED ⋭ # DOES NOT CONTAIN AS NORMAL SUBGROUP OR EQUAL
|
||||||
|
0x22EE ⋮ # VERTICAL ELLIPSIS
|
||||||
|
0x2306 ⌆ # PERSPECTIVE
|
||||||
|
0x2308 ⌈ # LEFT CEILING
|
||||||
|
0x2309 ⌉ # RIGHT CEILING
|
||||||
|
0x230A ⌊ # LEFT FLOOR
|
||||||
|
0x230B ⌋ # RIGHT FLOOR
|
||||||
|
0x230C ⌌ # BOTTOM RIGHT CROP
|
||||||
|
0x230D ⌍ # BOTTOM LEFT CROP
|
||||||
|
0x230E ⌎ # TOP RIGHT CROP
|
||||||
|
0x230F ⌏ # TOP LEFT CROP
|
||||||
|
0x2315 ⌕ # TELEPHONE RECORDER
|
||||||
|
0x2316 ⌖ # POSITION INDICATOR
|
||||||
|
0x231C ⌜ # TOP LEFT CORNER
|
||||||
|
0x231D ⌝ # TOP RIGHT CORNER
|
||||||
|
0x231E ⌞ # BOTTOM LEFT CORNER
|
||||||
|
0x231F ⌟ # BOTTOM RIGHT CORNER
|
||||||
|
0x2322 ⌢ # FROWN
|
||||||
|
0x2323 ⌣ # SMILE
|
||||||
|
0x2329 〈 # LEFT-POINTING ANGLE BRACKET
|
||||||
|
0x232A 〉 # RIGHT-POINTING ANGLE BRACKET
|
||||||
|
0x2423 ␣ # OPEN BOX
|
||||||
|
0x24C8 Ⓢ # CIRCLED LATIN CAPITAL LETTER S
|
||||||
|
0x2500 ─ # BOX DRAWINGS LIGHT HORIZONTAL
|
||||||
|
0x2502 │ # BOX DRAWINGS LIGHT VERTICAL
|
||||||
|
0x250C ┌ # BOX DRAWINGS LIGHT DOWN AND RIGHT
|
||||||
|
0x2510 ┐ # BOX DRAWINGS LIGHT DOWN AND LEFT
|
||||||
|
0x2514 └ # BOX DRAWINGS LIGHT UP AND RIGHT
|
||||||
|
0x2518 ┘ # BOX DRAWINGS LIGHT UP AND LEFT
|
||||||
|
0x251C ├ # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
|
||||||
|
0x2524 ┤ # BOX DRAWINGS LIGHT VERTICAL AND LEFT
|
||||||
|
0x252C ┬ # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
|
||||||
|
0x2534 ┴ # BOX DRAWINGS LIGHT UP AND HORIZONTAL
|
||||||
|
0x253C ┼ # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
|
||||||
|
0x2550 ═ # BOX DRAWINGS DOUBLE HORIZONTAL
|
||||||
|
0x2551 ║ # BOX DRAWINGS DOUBLE VERTICAL
|
||||||
|
0x2552 ╒ # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
|
||||||
|
0x2553 ╓ # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
|
||||||
|
0x2554 ╔ # BOX DRAWINGS DOUBLE DOWN AND RIGHT
|
||||||
|
0x2555 ╕ # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
|
||||||
|
0x2556 ╖ # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
|
||||||
|
0x2557 ╗ # BOX DRAWINGS DOUBLE DOWN AND LEFT
|
||||||
|
0x2558 ╘ # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
|
||||||
|
0x2559 ╙ # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
|
||||||
|
0x255A ╚ # BOX DRAWINGS DOUBLE UP AND RIGHT
|
||||||
|
0x255B ╛ # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
|
||||||
|
0x255C ╜ # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
|
||||||
|
0x255D ╝ # BOX DRAWINGS DOUBLE UP AND LEFT
|
||||||
|
0x255E ╞ # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
|
||||||
|
0x255F ╟ # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
|
||||||
|
0x2560 ╠ # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
|
||||||
|
0x2561 ╡ # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
|
||||||
|
0x2562 ╢ # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
|
||||||
|
0x2563 ╣ # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
|
||||||
|
0x2564 ╤ # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
|
||||||
|
0x2565 ╥ # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
|
||||||
|
0x2566 ╦ # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
|
||||||
|
0x2567 ╧ # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
|
||||||
|
0x2568 ╨ # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
|
||||||
|
0x2569 ╩ # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
|
||||||
|
0x256A ╪ # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
|
||||||
|
0x256B ╫ # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
|
||||||
|
0x256C ╬ # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
|
||||||
|
0x2580 ▀ # UPPER HALF BLOCK
|
||||||
|
0x2584 ▄ # LOWER HALF BLOCK
|
||||||
|
0x2588 █ # FULL BLOCK
|
||||||
|
0x2591 ░ # LIGHT SHADE
|
||||||
|
0x2592 ▒ # MEDIUM SHADE
|
||||||
|
0x2593 ▓ # DARK SHADE
|
||||||
|
0x25A1 □ # WHITE SQUARE
|
||||||
|
0x25AA ▪ # BLACK SMALL SQUARE
|
||||||
|
0x25AD ▭ # WHITE RECTANGLE
|
||||||
|
0x25AE ▮ # BLACK VERTICAL RECTANGLE
|
||||||
|
0x25B3 △ # WHITE UP-POINTING TRIANGLE
|
||||||
|
0x25B4 ▴ # BLACK UP-POINTING SMALL TRIANGLE
|
||||||
|
0x25B5 ▵ # WHITE UP-POINTING SMALL TRIANGLE
|
||||||
|
0x25B8 ▸ # BLACK RIGHT-POINTING SMALL TRIANGLE
|
||||||
|
0x25B9 ▹ # WHITE RIGHT-POINTING SMALL TRIANGLE
|
||||||
|
0x25BD ▽ # WHITE DOWN-POINTING TRIANGLE
|
||||||
|
0x25BE ▾ # BLACK DOWN-POINTING SMALL TRIANGLE
|
||||||
|
0x25BF ▿ # WHITE DOWN-POINTING SMALL TRIANGLE
|
||||||
|
0x25C2 ◂ # BLACK LEFT-POINTING SMALL TRIANGLE
|
||||||
|
0x25C3 ◃ # WHITE LEFT-POINTING SMALL TRIANGLE
|
||||||
|
0x25CA ◊ # LOZENGE
|
||||||
|
0x25CB ○ # WHITE CIRCLE
|
||||||
|
0x2605 ★ # BLACK STAR
|
||||||
|
0x2606 ☆ # WHITE STAR
|
||||||
|
0x260E ☎ # BLACK TELEPHONE
|
||||||
|
0x2640 ♀ # FEMALE SIGN
|
||||||
|
0x2642 ♂ # MALE SIGN
|
||||||
|
0x2660 ♠ # BLACK SPADE SUIT
|
||||||
|
0x2663 ♣ # BLACK CLUB SUIT
|
||||||
|
0x2665 ♥ # BLACK HEART SUIT
|
||||||
|
0x2666 ♦ # BLACK DIAMOND SUIT
|
||||||
|
0x266A ♪ # EIGHTH NOTE
|
||||||
|
0x266D ♭ # MUSIC FLAT SIGN
|
||||||
|
0x266E ♮ # MUSIC NATURAL SIGN
|
||||||
|
0x266F ♯ # MUSIC SHARP SIGN
|
||||||
|
0x2713 ✓ # CHECK MARK
|
||||||
|
0x2717 ✗ # BALLOT X
|
||||||
|
0x2720 ✠ # MALTESE CROSS
|
||||||
|
0x2726 ✦ # BLACK FOUR POINTED STAR
|
||||||
|
0x2727 ✧ # WHITE FOUR POINTED STAR
|
||||||
|
0x2736 ✶ # SIX POINTED BLACK STAR
|
||||||
|
0xFB00 ff # LATIN SMALL LIGATURE FF
|
||||||
|
0xFB01 fi # LATIN SMALL LIGATURE FI
|
||||||
|
0xFB02 fl # LATIN SMALL LIGATURE FL
|
||||||
|
0xFB03 ffi # LATIN SMALL LIGATURE FFI
|
||||||
|
0xFB04 ffl # LATIN SMALL LIGATURE FFL
|
216
tests/lynx-dump/data/utf-8-demo.html
Normal file
216
tests/lynx-dump/data/utf-8-demo.html
Normal file
@ -0,0 +1,216 @@
|
|||||||
|
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
||||||
|
<HTML>
|
||||||
|
<HEAD>
|
||||||
|
<TITLE>Markus Kuhn's UTF-8 demo</TITLE>
|
||||||
|
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">
|
||||||
|
<LINK REV="made" HREF="mailto:dickey@invisible-island.net">
|
||||||
|
</HEAD>
|
||||||
|
|
||||||
|
<BODY>
|
||||||
|
<pre>
|
||||||
|
UTF-8 encoded sample plain-text file
|
||||||
|
‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾
|
||||||
|
|
||||||
|
Markus Kuhn [ˈmaʳkʊs kuːn] <mkuhn@acm.org> — 1999-08-20
|
||||||
|
|
||||||
|
|
||||||
|
The ASCII compatible UTF-8 encoding of ISO 10646 and Unicode
|
||||||
|
plain-text files is defined in RFC 2279 and in ISO 10646-1 Annex R.
|
||||||
|
|
||||||
|
|
||||||
|
Using Unicode/UTF-8, you can write in emails and source code things such as
|
||||||
|
|
||||||
|
Mathematics and Sciences:
|
||||||
|
|
||||||
|
∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β),
|
||||||
|
|
||||||
|
ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (A ⇔ B),
|
||||||
|
|
||||||
|
2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm
|
||||||
|
|
||||||
|
Linguistics and dictionaries:
|
||||||
|
|
||||||
|
ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn
|
||||||
|
Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ]
|
||||||
|
|
||||||
|
APL:
|
||||||
|
|
||||||
|
((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈
|
||||||
|
|
||||||
|
Nicer typography in plain text files:
|
||||||
|
|
||||||
|
╔══════════════════════════════════════════╗
|
||||||
|
║ ║
|
||||||
|
║ • ‘single’ and “double” quotes ║
|
||||||
|
║ ║
|
||||||
|
║ • Curly apostrophes: “We’ve been here” ║
|
||||||
|
║ ║
|
||||||
|
║ • Latin-1 apostrophe and accents: '´` ║
|
||||||
|
║ ║
|
||||||
|
║ • ‚deutsche‘ „Anführungszeichen“ ║
|
||||||
|
║ ║
|
||||||
|
║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║
|
||||||
|
║ ║
|
||||||
|
║ • ASCII safety test: 1lI|, 0OD, 8B ║
|
||||||
|
║ ╭─────────╮ ║
|
||||||
|
║ • the euro symbol: │ € 14.95 │ ║
|
||||||
|
║ ╰─────────╯ ║
|
||||||
|
╚══════════════════════════════════════════╝
|
||||||
|
|
||||||
|
Greek (in Polytonic):
|
||||||
|
|
||||||
|
The Greek anthem:
|
||||||
|
|
||||||
|
Σὲ γνωρίζω ἀπὸ τὴν κόψη
|
||||||
|
τοῦ σπαθιοῦ τὴν τρομερή,
|
||||||
|
σὲ γνωρίζω ἀπὸ τὴν ὄψη
|
||||||
|
ποὺ μὲ βία μετράει τὴ γῆ.
|
||||||
|
|
||||||
|
᾿Απ᾿ τὰ κόκκαλα βγαλμένη
|
||||||
|
τῶν ῾Ελλήνων τὰ ἱερά
|
||||||
|
καὶ σὰν πρῶτα ἀνδρειωμένη
|
||||||
|
χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά!
|
||||||
|
|
||||||
|
From a speech of Demosthenes in the 4th century BC:
|
||||||
|
|
||||||
|
Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι,
|
||||||
|
ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς
|
||||||
|
λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ
|
||||||
|
τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿
|
||||||
|
εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ
|
||||||
|
πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν
|
||||||
|
οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι,
|
||||||
|
οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν
|
||||||
|
ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον
|
||||||
|
τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι
|
||||||
|
γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν
|
||||||
|
προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους
|
||||||
|
σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ
|
||||||
|
τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ
|
||||||
|
τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς
|
||||||
|
τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον.
|
||||||
|
|
||||||
|
Δημοσθένους, Γ´ ᾿Ολυνθιακὸς
|
||||||
|
|
||||||
|
Georgian:
|
||||||
|
|
||||||
|
From a Unicode conference invitation:
|
||||||
|
|
||||||
|
გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო
|
||||||
|
კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს,
|
||||||
|
ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს
|
||||||
|
ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი,
|
||||||
|
ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება
|
||||||
|
ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში,
|
||||||
|
ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში.
|
||||||
|
|
||||||
|
Russian:
|
||||||
|
|
||||||
|
From a Unicode conference invitation:
|
||||||
|
|
||||||
|
Зарегистрируйтесь сейчас на Десятую Международную Конференцию по
|
||||||
|
Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии.
|
||||||
|
Конференция соберет широкий круг экспертов по вопросам глобального
|
||||||
|
Интернета и Unicode, локализации и интернационализации, воплощению и
|
||||||
|
применению Unicode в различных операционных системах и программных
|
||||||
|
приложениях, шрифтах, верстке и многоязычных компьютерных системах.
|
||||||
|
|
||||||
|
Thai (UCS Level 2):
|
||||||
|
|
||||||
|
Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese
|
||||||
|
classic 'San Gua'):
|
||||||
|
|
||||||
|
[----------------------------|------------------------]
|
||||||
|
๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่
|
||||||
|
สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา
|
||||||
|
ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา
|
||||||
|
โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ
|
||||||
|
เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ
|
||||||
|
ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ
|
||||||
|
พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้
|
||||||
|
ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ
|
||||||
|
|
||||||
|
(The above is a two-column text. If combining characters are handled
|
||||||
|
correctly, the lines of the second column should be aligned with the
|
||||||
|
| character above.)
|
||||||
|
|
||||||
|
Ethiopian:
|
||||||
|
|
||||||
|
Proverbs in the Amharic language:
|
||||||
|
|
||||||
|
ሰማይ አይታረስ ንጉሥ አይከሰስ።
|
||||||
|
ብላ ካለኝ እንደአባቴ በቆመጠኝ።
|
||||||
|
ጌጥ ያለቤቱ ቁምጥና ነው።
|
||||||
|
ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው።
|
||||||
|
የአፍ ወለምታ በቅቤ አይታሽም።
|
||||||
|
አይጥ በበላ ዳዋ ተመታ።
|
||||||
|
ሲተረጉሙ ይደረግሙ።
|
||||||
|
ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል።
|
||||||
|
ድር ቢያብር አንበሳ ያስር።
|
||||||
|
ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም።
|
||||||
|
እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም።
|
||||||
|
የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ።
|
||||||
|
ሥራ ከመፍታት ልጄን ላፋታት።
|
||||||
|
ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል።
|
||||||
|
የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ።
|
||||||
|
ተንጋሎ ቢተፉ ተመልሶ ባፉ።
|
||||||
|
ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው።
|
||||||
|
እግርህን በፍራሽህ ልክ ዘርጋ።
|
||||||
|
|
||||||
|
Runes:
|
||||||
|
|
||||||
|
ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ
|
||||||
|
|
||||||
|
(Old English, which transcribed into Latin reads 'He cwaeth that he
|
||||||
|
bude thaem lande northweardum with tha Westsae.' and means 'He said
|
||||||
|
that he lived in the northern land near the Western Sea.')
|
||||||
|
|
||||||
|
Braille:
|
||||||
|
|
||||||
|
⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌
|
||||||
|
|
||||||
|
⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞
|
||||||
|
⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎
|
||||||
|
⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂
|
||||||
|
⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙
|
||||||
|
⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑
|
||||||
|
⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲
|
||||||
|
|
||||||
|
⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲
|
||||||
|
|
||||||
|
⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹
|
||||||
|
⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞
|
||||||
|
⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕
|
||||||
|
⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹
|
||||||
|
⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎
|
||||||
|
⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎
|
||||||
|
⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳
|
||||||
|
⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞
|
||||||
|
⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲
|
||||||
|
|
||||||
|
(The first couple of paragraphs of "A Christmas Carol" by Dickens)
|
||||||
|
|
||||||
|
Compact font selection example text:
|
||||||
|
|
||||||
|
ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789
|
||||||
|
abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ
|
||||||
|
–—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд
|
||||||
|
∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა
|
||||||
|
|
||||||
|
Greetings in various languages:
|
||||||
|
|
||||||
|
Hello world, Καλημέρα κόσμε, コンニチハ
|
||||||
|
|
||||||
|
Box drawing alignment tests: █
|
||||||
|
▉
|
||||||
|
╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳
|
||||||
|
║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳
|
||||||
|
║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳
|
||||||
|
╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳
|
||||||
|
║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎
|
||||||
|
║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏
|
||||||
|
╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█
|
||||||
|
|
||||||
|
</pre>
|
||||||
|
</BODY>
|
||||||
|
</HTML>
|
207
tests/lynx-dump/data/utf-8-demo.html.exp
Normal file
207
tests/lynx-dump/data/utf-8-demo.html.exp
Normal file
@ -0,0 +1,207 @@
|
|||||||
|
UTF-8 encoded sample plain-text file
|
||||||
|
‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾
|
||||||
|
|
||||||
|
Markus Kuhn [ˈmaʳkʊs kuːn] <mkuhn@acm.org> — 1999-08-20
|
||||||
|
|
||||||
|
|
||||||
|
The ASCII compatible UTF-8 encoding of ISO 10646 and Unicode
|
||||||
|
plain-text files is defined in RFC 2279 and in ISO 10646-1 Annex R.
|
||||||
|
|
||||||
|
|
||||||
|
Using Unicode/UTF-8, you can write in emails and source code things such as
|
||||||
|
|
||||||
|
Mathematics and Sciences:
|
||||||
|
|
||||||
|
∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β),
|
||||||
|
|
||||||
|
ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (A ⇔ B),
|
||||||
|
|
||||||
|
2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm
|
||||||
|
|
||||||
|
Linguistics and dictionaries:
|
||||||
|
|
||||||
|
ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn
|
||||||
|
Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ]
|
||||||
|
|
||||||
|
APL:
|
||||||
|
|
||||||
|
((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈
|
||||||
|
|
||||||
|
Nicer typography in plain text files:
|
||||||
|
|
||||||
|
╔══════════════════════════════════════════╗
|
||||||
|
║ ║
|
||||||
|
║ • ‘single’ and “double” quotes ║
|
||||||
|
║ ║
|
||||||
|
║ • Curly apostrophes: “We’ve been here” ║
|
||||||
|
║ ║
|
||||||
|
║ • Latin-1 apostrophe and accents: '´` ║
|
||||||
|
║ ║
|
||||||
|
║ • ‚deutsche‘ „Anführungszeichen“ ║
|
||||||
|
║ ║
|
||||||
|
║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║
|
||||||
|
║ ║
|
||||||
|
║ • ASCII safety test: 1lI|, 0OD, 8B ║
|
||||||
|
║ ╭─────────╮ ║
|
||||||
|
║ • the euro symbol: │ € 14.95 │ ║
|
||||||
|
║ ╰─────────╯ ║
|
||||||
|
╚══════════════════════════════════════════╝
|
||||||
|
|
||||||
|
Greek (in Polytonic):
|
||||||
|
|
||||||
|
The Greek anthem:
|
||||||
|
|
||||||
|
Σὲ γνωρίζω ἀπὸ τὴν κόψη
|
||||||
|
τοῦ σπαθιοῦ τὴν τρομερή,
|
||||||
|
σὲ γνωρίζω ἀπὸ τὴν ὄψη
|
||||||
|
ποὺ μὲ βία μετράει τὴ γῆ.
|
||||||
|
|
||||||
|
᾿Απ᾿ τὰ κόκκαλα βγαλμένη
|
||||||
|
τῶν ῾Ελλήνων τὰ ἱερά
|
||||||
|
καὶ σὰν πρῶτα ἀνδρειωμένη
|
||||||
|
χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά!
|
||||||
|
|
||||||
|
From a speech of Demosthenes in the 4th century BC:
|
||||||
|
|
||||||
|
Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι,
|
||||||
|
ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς
|
||||||
|
λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ
|
||||||
|
τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿
|
||||||
|
εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ
|
||||||
|
πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν
|
||||||
|
οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι,
|
||||||
|
οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν
|
||||||
|
ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον
|
||||||
|
τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι
|
||||||
|
γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν
|
||||||
|
προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους
|
||||||
|
σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ
|
||||||
|
τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ
|
||||||
|
τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς
|
||||||
|
τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον.
|
||||||
|
|
||||||
|
Δημοσθένους, Γ´ ᾿Ολυνθιακὸς
|
||||||
|
|
||||||
|
Georgian:
|
||||||
|
|
||||||
|
From a Unicode conference invitation:
|
||||||
|
|
||||||
|
გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო
|
||||||
|
კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს,
|
||||||
|
ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს
|
||||||
|
ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი,
|
||||||
|
ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება
|
||||||
|
ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში,
|
||||||
|
ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში.
|
||||||
|
|
||||||
|
Russian:
|
||||||
|
|
||||||
|
From a Unicode conference invitation:
|
||||||
|
|
||||||
|
Зарегистрируйтесь сейчас на Десятую Международную Конференцию по
|
||||||
|
Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии.
|
||||||
|
Конференция соберет широкий круг экспертов по вопросам глобального
|
||||||
|
Интернета и Unicode, локализации и интернационализации, воплощению и
|
||||||
|
применению Unicode в различных операционных системах и программных
|
||||||
|
приложениях, шрифтах, верстке и многоязычных компьютерных системах.
|
||||||
|
|
||||||
|
Thai (UCS Level 2):
|
||||||
|
|
||||||
|
Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese
|
||||||
|
classic 'San Gua'):
|
||||||
|
|
||||||
|
[----------------------------|------------------------]
|
||||||
|
๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่
|
||||||
|
สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา
|
||||||
|
ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา
|
||||||
|
โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ
|
||||||
|
เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ
|
||||||
|
ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ
|
||||||
|
พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้
|
||||||
|
ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ
|
||||||
|
|
||||||
|
(The above is a two-column text. If combining characters are handled
|
||||||
|
correctly, the lines of the second column should be aligned with the
|
||||||
|
| character above.)
|
||||||
|
|
||||||
|
Ethiopian:
|
||||||
|
|
||||||
|
Proverbs in the Amharic language:
|
||||||
|
|
||||||
|
ሰማይ አይታረስ ንጉሥ አይከሰስ።
|
||||||
|
ብላ ካለኝ እንደአባቴ በቆመጠኝ።
|
||||||
|
ጌጥ ያለቤቱ ቁምጥና ነው።
|
||||||
|
ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው።
|
||||||
|
የአፍ ወለምታ በቅቤ አይታሽም።
|
||||||
|
አይጥ በበላ ዳዋ ተመታ።
|
||||||
|
ሲተረጉሙ ይደረግሙ።
|
||||||
|
ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል።
|
||||||
|
ድር ቢያብር አንበሳ ያስር።
|
||||||
|
ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም።
|
||||||
|
እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም።
|
||||||
|
የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ።
|
||||||
|
ሥራ ከመፍታት ልጄን ላፋታት።
|
||||||
|
ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል።
|
||||||
|
የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ።
|
||||||
|
ተንጋሎ ቢተፉ ተመልሶ ባፉ።
|
||||||
|
ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው።
|
||||||
|
እግርህን በፍራሽህ ልክ ዘርጋ።
|
||||||
|
|
||||||
|
Runes:
|
||||||
|
|
||||||
|
ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ
|
||||||
|
|
||||||
|
(Old English, which transcribed into Latin reads 'He cwaeth that he
|
||||||
|
bude thaem lande northweardum with tha Westsae.' and means 'He said
|
||||||
|
that he lived in the northern land near the Western Sea.')
|
||||||
|
|
||||||
|
Braille:
|
||||||
|
|
||||||
|
⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌
|
||||||
|
|
||||||
|
⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞
|
||||||
|
⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎
|
||||||
|
⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂
|
||||||
|
⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙
|
||||||
|
⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑
|
||||||
|
⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲
|
||||||
|
|
||||||
|
⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲
|
||||||
|
|
||||||
|
⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹
|
||||||
|
⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞
|
||||||
|
⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕
|
||||||
|
⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹
|
||||||
|
⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎
|
||||||
|
⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎
|
||||||
|
⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳
|
||||||
|
⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞
|
||||||
|
⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲
|
||||||
|
|
||||||
|
(The first couple of paragraphs of "A Christmas Carol" by Dickens)
|
||||||
|
|
||||||
|
Compact font selection example text:
|
||||||
|
|
||||||
|
ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789
|
||||||
|
abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ
|
||||||
|
–—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд
|
||||||
|
∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi<>⑀₂ἠḂӥẄɐː⍎אԱა
|
||||||
|
|
||||||
|
Greetings in various languages:
|
||||||
|
|
||||||
|
Hello world, Καλημέρα κόσμε, コンニチハ
|
||||||
|
|
||||||
|
Box drawing alignment tests: █
|
||||||
|
▉
|
||||||
|
╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳
|
||||||
|
╳
|
||||||
|
║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳
|
||||||
|
╳
|
||||||
|
║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳
|
||||||
|
╳
|
||||||
|
╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳
|
||||||
|
╳
|
||||||
|
║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎
|
||||||
|
║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏
|
||||||
|
╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇
|
||||||
|
█
|
7
tests/lynx-dump/main.fmf
Normal file
7
tests/lynx-dump/main.fmf
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
summary: Check whether `lynx -dump` works as expected.
|
||||||
|
test: ./runtest.sh
|
||||||
|
require:
|
||||||
|
- bash
|
||||||
|
- diffutils
|
||||||
|
- lynx
|
||||||
|
- sed
|
29
tests/lynx-dump/runtest.sh
Executable file
29
tests/lynx-dump/runtest.sh
Executable file
@ -0,0 +1,29 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# exit immediately if any command returns non-zero exit code
|
||||||
|
set -e
|
||||||
|
|
||||||
|
# print commands as they are executed by the shell interpreter
|
||||||
|
set -x
|
||||||
|
|
||||||
|
# set locale
|
||||||
|
export LC_ALL=C
|
||||||
|
locale
|
||||||
|
|
||||||
|
# print basic info about lynx
|
||||||
|
rpm -q lynx
|
||||||
|
command -v lynx
|
||||||
|
lynx --version
|
||||||
|
ldd /usr/bin/lynx
|
||||||
|
|
||||||
|
# iterate over all input HTML files
|
||||||
|
for tst in data/*.html; do
|
||||||
|
# dump their content as plain-text using lynx
|
||||||
|
lynx -dump file://localhost${PWD}/${tst} > ${tst}.out
|
||||||
|
|
||||||
|
# drop absolute paths from the output
|
||||||
|
sed -e 's|file://.*$||' -i ${tst}.out
|
||||||
|
|
||||||
|
# compare the output with expected output
|
||||||
|
diff -u ${tst}.{exp,out}
|
||||||
|
done
|
26
tests/lynx-dump/sync.sh
Executable file
26
tests/lynx-dump/sync.sh
Executable file
@ -0,0 +1,26 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# exit immediately if any command returns non-zero exit code
|
||||||
|
set -e
|
||||||
|
|
||||||
|
# print commands as they are executed by the shell interpreter
|
||||||
|
set -x
|
||||||
|
|
||||||
|
# set locale
|
||||||
|
export LC_ALL=C
|
||||||
|
locale
|
||||||
|
|
||||||
|
# print basic info about lynx
|
||||||
|
rpm -q lynx
|
||||||
|
command -v lynx
|
||||||
|
lynx --version
|
||||||
|
ldd /usr/bin/lynx
|
||||||
|
|
||||||
|
# iterate over all input HTML files
|
||||||
|
for tst in data/*.html; do
|
||||||
|
# dump their content as plain-text using lynx
|
||||||
|
lynx -dump file://localhost${PWD}/${tst} > ${tst}.exp
|
||||||
|
|
||||||
|
# drop absolute paths from the output
|
||||||
|
sed -e 's|file://.*$||' -i ${tst}.exp
|
||||||
|
done
|
Loading…
Reference in New Issue
Block a user