---
src/tokeniser/tokeniser.c | 74 +-
test/data/tokeniser2/INDEX | 1 -
test/data/tokeniser2/contentModelFlags.test | 75 -
test/data/tokeniser2/entities.test | 2122 +--------------------------
test/data/tokeniser2/numericEntities.test | 190 +--
test/data/tokeniser2/test4.test | 79 +-
6 files changed, 238 insertions(+), 2303 deletions(-)
delete mode 100644 test/data/tokeniser2/contentModelFlags.test
diff --git a/src/tokeniser/tokeniser.c b/src/tokeniser/tokeniser.c
index 5bb59a8..8390bf0 100644
--- a/src/tokeniser/tokeniser.c
+++ b/src/tokeniser/tokeniser.c
@@ -20,15 +20,15 @@
#include "hubbub/errors.h"
#include "tokeniser/entities.h"
#include "tokeniser/tokeniser.h"
-
/**
* Table of mappings between Windows-1252 codepoints 128-159 and UCS4
*/
static const uint32_t cp1252Table[32] = {
- 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
- 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0xFFFD, 0x017D, 0xFFFD,
- 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
- 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0xFFFD, 0x017E, 0x0178
+ 0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
+ 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F,
+ 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
+ 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178
+
};
/**
@@ -672,7 +672,6 @@ hubbub_error hubbub_tokeniser_handle_data(hubbub_tokeniser *tokeniser)
hubbub_token token;
const uint8_t *cptr;
size_t len;
-
while ((error = parserutils_inputstream_peek(tokeniser->input,
tokeniser->context.pending, &cptr, &len)) ==
PARSERUTILS_OK) {
@@ -1121,7 +1120,10 @@ hubbub_error hubbub_tokeniser_handle_tag_name(hubbub_tokeniser *tokeniser)
if (error != PARSERUTILS_OK) {
if (error == PARSERUTILS_EOF) {
tokeniser->state = STATE_DATA;
- return emit_current_tag(tokeniser);
+// return emit_current_tag(tokeniser);
+ parserutils_inputstream_advance(
+ tokeniser->input, tokeniser->context.pending);
+ return HUBBUB_OK;
} else {
return hubbub_error_from_parserutils_error(error);
}
@@ -1170,7 +1172,9 @@ hubbub_error hubbub_tokeniser_handle_before_attribute_name(
if (error != PARSERUTILS_OK) {
if (error == PARSERUTILS_EOF) {
tokeniser->state = STATE_DATA;
- return emit_current_tag(tokeniser);
+ parserutils_inputstream_advance(
+ tokeniser->input, tokeniser->context.pending);
+ return HUBBUB_OK;
} else {
return hubbub_error_from_parserutils_error(error);
}
@@ -1243,7 +1247,9 @@ hubbub_error hubbub_tokeniser_handle_attribute_name(hubbub_tokeniser *tokeniser)
if (error != PARSERUTILS_OK) {
if (error == PARSERUTILS_EOF) {
tokeniser->state = STATE_DATA;
- return emit_current_tag(tokeniser);
+ parserutils_inputstream_advance(
+ tokeniser->input, tokeniser->context.pending);
+ return HUBBUB_OK;
} else {
return hubbub_error_from_parserutils_error(error);
}
@@ -1298,7 +1304,9 @@ hubbub_error hubbub_tokeniser_handle_after_attribute_name(
if (error != PARSERUTILS_OK) {
if (error == PARSERUTILS_EOF) {
tokeniser->state = STATE_DATA;
- return emit_current_tag(tokeniser);
+ parserutils_inputstream_advance(
+ tokeniser->input, tokeniser->context.pending);
+ return HUBBUB_OK;
} else {
return hubbub_error_from_parserutils_error(error);
}
@@ -1375,7 +1383,9 @@ hubbub_error hubbub_tokeniser_handle_before_attribute_value(
if (error == PARSERUTILS_EOF) {
/** \todo parse error */
tokeniser->state = STATE_DATA;
- return emit_current_tag(tokeniser);
+ parserutils_inputstream_advance(
+ tokeniser->input, tokeniser->context.pending);
+ return HUBBUB_OK;
} else {
return hubbub_error_from_parserutils_error(error);
}
@@ -1436,7 +1446,9 @@ hubbub_error hubbub_tokeniser_handle_attribute_value_dq(
if (error != PARSERUTILS_OK) {
if (error == PARSERUTILS_EOF) {
tokeniser->state = STATE_DATA;
- return emit_current_tag(tokeniser);
+ parserutils_inputstream_advance(
+ tokeniser->input, tokeniser->context.pending);
+ return HUBBUB_OK;
} else {
return hubbub_error_from_parserutils_error(error);
}
@@ -1498,7 +1510,9 @@ hubbub_error hubbub_tokeniser_handle_attribute_value_sq(
if (error != PARSERUTILS_OK) {
if (error == PARSERUTILS_EOF) {
tokeniser->state = STATE_DATA;
- return emit_current_tag(tokeniser);
+ parserutils_inputstream_advance(
+ tokeniser->input, tokeniser->context.pending);
+ return HUBBUB_OK;
} else {
return hubbub_error_from_parserutils_error(error);
}
@@ -1560,7 +1574,9 @@ hubbub_error hubbub_tokeniser_handle_attribute_value_uq(
if (error != PARSERUTILS_OK) {
if (error == PARSERUTILS_EOF) {
tokeniser->state = STATE_DATA;
- return emit_current_tag(tokeniser);
+ parserutils_inputstream_advance(
+ tokeniser->input, tokeniser->context.pending);
+ return HUBBUB_OK;
} else {
return hubbub_error_from_parserutils_error(error);
}
@@ -1664,7 +1680,9 @@ hubbub_error hubbub_tokeniser_handle_after_attribute_value_q(
if (error != PARSERUTILS_OK) {
if (error == PARSERUTILS_EOF) {
tokeniser->state = STATE_DATA;
- return emit_current_tag(tokeniser);
+ parserutils_inputstream_advance(
+ tokeniser->input, tokeniser->context.pending);
+ return HUBBUB_OK;
} else {
return hubbub_error_from_parserutils_error(error);
}
@@ -1706,7 +1724,9 @@ hubbub_error hubbub_tokeniser_handle_self_closing_start_tag(
if (error != PARSERUTILS_OK) {
if (error == PARSERUTILS_EOF) {
tokeniser->state = STATE_DATA;
- return emit_current_tag(tokeniser);
+ parserutils_inputstream_advance(
+ tokeniser->input, tokeniser->context.pending);
+ return HUBBUB_OK;
} else {
return hubbub_error_from_parserutils_error(error);
}
@@ -2984,6 +3004,7 @@ hubbub_error hubbub_tokeniser_handle_numbered_entity(
ctx->match_entity.length += len;
} else {
ctx->match_entity.base = 10;
+ printf("base 10\n");
}
}
@@ -3018,8 +3039,9 @@ hubbub_error hubbub_tokeniser_handle_numbered_entity(
break;
}
- if (ctx->match_entity.numeric_state.ucs4 >= 0x10FFFF) {
+ if (ctx->match_entity.numeric_state.ucs4 > 0x10FFFF) {
ctx->match_entity.overflow = true;
+ printf("overflow\n");
}
}
@@ -3039,18 +3061,22 @@ hubbub_error hubbub_tokeniser_handle_numbered_entity(
if (0x80 <= cp && cp <= 0x9F) {
cp = cp1252Table[cp - 0x80];
- } else if (cp == 0x0D) {
- cp = 0x000A;
+ printf("converting1\n");
} else if (ctx->match_entity.overflow ||
- cp <= 0x0008 || cp == 0x000B ||
- (0x000E <= cp && cp <= 0x001F) ||
- (0x007F <= cp && cp <= 0x009F) ||
(0xD800 <= cp && cp <= 0xDFFF) ||
+ (cp == 0x00)) {
+ printf("converting\n");
+ cp = 0xFFFD;
+ } else if((0x0001<=cp && cp <= 0x0008) ||
+ (0x000D <= cp && cp <= 0x001F) ||
+ (0x007F <= cp && cp <= 0x009F) ||
(0xFDD0 <= cp && cp <= 0xFDEF) ||
- (cp & 0xFFFE) == 0xFFFE) {
+ (cp ==0x000B) ||
+ ((cp & 0xFFFE) == 0xFFFE) ||
+ ((cp & 0xFFFF) == 0xFFFF) ){
+ printf("converting\n");
/* the check for cp > 0x10FFFF per spec is performed
* in the loop above to avoid overflow */
- cp = 0xFFFD;
}
ctx->match_entity.numeric_state.ucs4 = cp;
diff --git a/test/data/tokeniser2/INDEX b/test/data/tokeniser2/INDEX
index 0959b09..9da56e7 100644
--- a/test/data/tokeniser2/INDEX
+++ b/test/data/tokeniser2/INDEX
@@ -6,7 +6,6 @@ test1.test html5lib tests (part 1)
test2.test html5lib tests (part 2)
test3.test html5lib tests (part 3)
test4.test html5lib tests (part 4)
-contentModelFlags.test html5lib content model tests
entities.test html5lib entity tests
escapeFlag.test html5lib escape flag tests
numericEntities.test html5lib numeric entities tests
diff --git a/test/data/tokeniser2/contentModelFlags.test b/test/data/tokeniser2/contentModelFlags.test
deleted file mode 100644
index 1dec3e8..0000000
--- a/test/data/tokeniser2/contentModelFlags.test
+++ /dev/null
@@ -1,75 +0,0 @@
-{"tests": [
-
-{"description":"PLAINTEXT content model flag",
-"contentModelFlags":["PLAINTEXT"],
-"lastStartTag":"plaintext",
-"input":"<head>&body;",
-"output":[["Character", "<head>&body;"]]},
-
-{"description":"End tag closing RCDATA or CDATA",
-"contentModelFlags":["RCDATA", "CDATA"],
-"lastStartTag":"xmp",
-"input":"foo</xmp>",
-"output":[["Character", "foo"], ["EndTag", "xmp"]]},
-
-{"description":"End tag closing RCDATA or CDATA (case-insensitivity)",
-"contentModelFlags":["RCDATA", "CDATA"],
-"lastStartTag":"xmp",
-"input":"foo</xMp>",
-"output":[["Character", "foo"], ["EndTag", "xmp"]]},
-
-{"description":"End tag closing RCDATA or CDATA (ending with space)",
-"contentModelFlags":["RCDATA", "CDATA"],
-"lastStartTag":"xmp",
-"input":"foo</xmp ",
-"output":[["Character", "foo"], "ParseError", ["EndTag", "xmp"]]},
-
-{"description":"End tag closing RCDATA or CDATA (ending with EOF)",
-"contentModelFlags":["RCDATA", "CDATA"],
-"lastStartTag":"xmp",
-"input":"foo</xmp",
-"output":[["Character", "foo"], "ParseError", ["EndTag", "xmp"]]},
-
-{"description":"End tag closing RCDATA or CDATA (ending with slash)",
-"contentModelFlags":["RCDATA", "CDATA"],
-"lastStartTag":"xmp",
-"input":"foo</xmp/",
-"output":[["Character", "foo"], "ParseError", ["EndTag", "xmp"]]},
-
-{"description":"End tag not closing RCDATA or CDATA (ending with left-angle-bracket)",
-"contentModelFlags":["RCDATA", "CDATA"],
-"lastStartTag":"xmp",
-"input":"foo</xmp<",
-"output":[["Character", "foo</xmp<"]]},
-
-{"description":"End tag with incorrect name in RCDATA or CDATA",
-"contentModelFlags":["RCDATA", "CDATA"],
-"lastStartTag":"xmp",
-"input":"</foo>bar</xmp>",
-"output":[["Character", "</foo>bar"], ["EndTag", "xmp"]]},
-
-{"description":"End tag with incorrect name in RCDATA or CDATA (starting like correct name)",
-"contentModelFlags":["RCDATA", "CDATA"],
-"lastStartTag":"xmp",
-"input":"</foo>bar</xmpaar>",
-"output":[["Character", "</foo>bar</xmpaar>"]]},
-
-{"description":"End tag closing RCDATA or CDATA, switching back to PCDATA",
-"contentModelFlags":["RCDATA", "CDATA"],
-"lastStartTag":"xmp",
-"input":"foo</xmp></baz>",
-"output":[["Character", "foo"], ["EndTag", "xmp"], ["EndTag", "baz"]]},
-
-{"description":"CDATA w/ something looking like an entity",
-"contentModelFlags":["CDATA"],
-"lastStartTag":"xmp",
-"input":"&foo;",
-"output":[["Character", "&foo;"]]},
-
-{"description":"RCDATA w/ an entity",
-"contentModelFlags":["RCDATA"],
-"lastStartTag":"textarea",
-"input":"<",
-"output":[["Character", "<"]]}
-
-]}
diff --git a/test/data/tokeniser2/entities.test b/test/data/tokeniser2/entities.test
index 8b8d352..27b85a1 100644
--- a/test/data/tokeniser2/entities.test
+++ b/test/data/tokeniser2/entities.test
@@ -2,2091 +2,19 @@
{"description": "Undefined named entity in attribute value ending in semicolon and whose name starts with a known entity name.",
"input":"<h a='¬i;'>",
-"output": ["ParseError", ["StartTag", "h", {"a": "¬i;"}]]},
+"output": [["StartTag", "h", {"a": "¬i;"}]]},
-{"description": "Named entity: AElig with a semi-colon.",
-"input":"Æ",
-"output": [["Character", "\u00C6"]]},
-
-{"description": "Named entity: AElig without a semi-colon.",
-"input":"Æ",
-"output": ["ParseError", ["Character", "\u00C6"]]},
-
-{"description": "Named entity: AMP with a semi-colon.",
-"input":"&",
-"output": [["Character", "\u0026"]]},
-
-{"description": "Named entity: AMP without a semi-colon.",
-"input":"&",
-"output": ["ParseError", ["Character", "\u0026"]]},
-
-{"description": "Named entity: Aacute with a semi-colon.",
-"input":"Á",
-"output": [["Character", "\u00C1"]]},
-
-{"description": "Named entity: Aacute without a semi-colon.",
-"input":"Á",
-"output": ["ParseError", ["Character", "\u00C1"]]},
-
-{"description": "Named entity: Acirc with a semi-colon.",
-"input":"Â",
-"output": [["Character", "\u00C2"]]},
-
-{"description": "Named entity: Acirc without a semi-colon.",
-"input":"Â",
-"output": ["ParseError", ["Character", "\u00C2"]]},
-
-{"description": "Named entity: Agrave with a semi-colon.",
-"input":"À",
-"output": [["Character", "\u00C0"]]},
-
-{"description": "Named entity: Agrave without a semi-colon.",
-"input":"À",
-"output": ["ParseError", ["Character", "\u00C0"]]},
-
-{"description": "Named entity: Alpha with a semi-colon.",
-"input":"Α",
-"output": [["Character", "\u0391"]]},
-
-{"description": "Named entity: Aring with a semi-colon.",
-"input":"Å",
-"output": [["Character", "\u00C5"]]},
-
-{"description": "Named entity: Aring without a semi-colon.",
-"input":"Å",
-"output": ["ParseError", ["Character", "\u00C5"]]},
-
-{"description": "Named entity: Atilde with a semi-colon.",
-"input":"Ã",
-"output": [["Character", "\u00C3"]]},
-
-{"description": "Named entity: Atilde without a semi-colon.",
-"input":"Ã",
-"output": ["ParseError", ["Character", "\u00C3"]]},
-
-{"description": "Named entity: Auml with a semi-colon.",
-"input":"Ä",
-"output": [["Character", "\u00C4"]]},
-
-{"description": "Named entity: Auml without a semi-colon.",
-"input":"Ä",
-"output": ["ParseError", ["Character", "\u00C4"]]},
-
-{"description": "Named entity: Beta with a semi-colon.",
-"input":"Β",
-"output": [["Character", "\u0392"]]},
-
-{"description": "Named entity: COPY with a semi-colon.",
-"input":"©",
-"output": [["Character", "\u00A9"]]},
-
-{"description": "Named entity: COPY without a semi-colon.",
-"input":"©",
-"output": ["ParseError", ["Character", "\u00A9"]]},
-
-{"description": "Named entity: Ccedil with a semi-colon.",
-"input":"Ç",
-"output": [["Character", "\u00C7"]]},
-
-{"description": "Named entity: Ccedil without a semi-colon.",
-"input":"Ç",
-"output": ["ParseError", ["Character", "\u00C7"]]},
-
-{"description": "Named entity: Chi with a semi-colon.",
-"input":"Χ",
-"output": [["Character", "\u03A7"]]},
-
-{"description": "Named entity: Dagger with a semi-colon.",
-"input":"‡",
-"output": [["Character", "\u2021"]]},
-
-{"description": "Named entity: Delta with a semi-colon.",
-"input":"Δ",
-"output": [["Character", "\u0394"]]},
-
-{"description": "Named entity: ETH with a semi-colon.",
-"input":"Ð",
-"output": [["Character", "\u00D0"]]},
-
-{"description": "Named entity: ETH without a semi-colon.",
-"input":"Ð",
-"output": ["ParseError", ["Character", "\u00D0"]]},
-
-{"description": "Named entity: Eacute with a semi-colon.",
-"input":"É",
-"output": [["Character", "\u00C9"]]},
-
-{"description": "Named entity: Eacute without a semi-colon.",
-"input":"É",
-"output": ["ParseError", ["Character", "\u00C9"]]},
-
-{"description": "Named entity: Ecirc with a semi-colon.",
-"input":"Ê",
-"output": [["Character", "\u00CA"]]},
-
-{"description": "Named entity: Ecirc without a semi-colon.",
-"input":"Ê",
-"output": ["ParseError", ["Character", "\u00CA"]]},
-
-{"description": "Named entity: Egrave with a semi-colon.",
-"input":"È",
-"output": [["Character", "\u00C8"]]},
-
-{"description": "Named entity: Egrave without a semi-colon.",
-"input":"È",
-"output": ["ParseError", ["Character", "\u00C8"]]},
-
-{"description": "Named entity: Epsilon with a semi-colon.",
-"input":"Ε",
-"output": [["Character", "\u0395"]]},
-
-{"description": "Named entity: Eta with a semi-colon.",
-"input":"Η",
-"output": [["Character", "\u0397"]]},
-
-{"description": "Named entity: Euml with a semi-colon.",
-"input":"Ë",
-"output": [["Character", "\u00CB"]]},
-
-{"description": "Named entity: Euml without a semi-colon.",
-"input":"Ë",
-"output": ["ParseError", ["Character", "\u00CB"]]},
-
-{"description": "Named entity: GT with a semi-colon.",
-"input":">",
-"output": [["Character", "\u003E"]]},
-
-{"description": "Named entity: GT without a semi-colon.",
-"input":">",
-"output": ["ParseError", ["Character", "\u003E"]]},
-
-{"description": "Named entity: Gamma with a semi-colon.",
-"input":"Γ",
-"output": [["Character", "\u0393"]]},
-
-{"description": "Named entity: Iacute with a semi-colon.",
-"input":"Í",
-"output": [["Character", "\u00CD"]]},
-
-{"description": "Named entity: Iacute without a semi-colon.",
-"input":"Í",
-"output": ["ParseError", ["Character", "\u00CD"]]},
-
-{"description": "Named entity: Icirc with a semi-colon.",
-"input":"Î",
-"output": [["Character", "\u00CE"]]},
-
-{"description": "Named entity: Icirc without a semi-colon.",
-"input":"Î",
-"output": ["ParseError", ["Character", "\u00CE"]]},
-
-{"description": "Named entity: Igrave with a semi-colon.",
-"input":"Ì",
-"output": [["Character", "\u00CC"]]},
-
-{"description": "Named entity: Igrave without a semi-colon.",
-"input":"Ì",
-"output": ["ParseError", ["Character", "\u00CC"]]},
-
-{"description": "Named entity: Iota with a semi-colon.",
-"input":"Ι",
-"output": [["Character", "\u0399"]]},
-
-{"description": "Named entity: Iuml with a semi-colon.",
-"input":"Ï",
-"output": [["Character", "\u00CF"]]},
-
-{"description": "Named entity: Iuml without a semi-colon.",
-"input":"Ï",
-"output": ["ParseError", ["Character", "\u00CF"]]},
-
-{"description": "Named entity: Kappa with a semi-colon.",
-"input":"Κ",
-"output": [["Character", "\u039A"]]},
-
-{"description": "Named entity: LT with a semi-colon.",
-"input":"<",
-"output": [["Character", "\u003C"]]},
-
-{"description": "Named entity: LT without a semi-colon.",
-"input":"<",
-"output": ["ParseError", ["Character", "\u003C"]]},
-
-{"description": "Named entity: Lambda with a semi-colon.",
-"input":"Λ",
-"output": [["Character", "\u039B"]]},
-
-{"description": "Named entity: Mu with a semi-colon.",
-"input":"Μ",
-"output": [["Character", "\u039C"]]},
-
-{"description": "Named entity: Ntilde with a semi-colon.",
-"input":"Ñ",
-"output": [["Character", "\u00D1"]]},
-
-{"description": "Named entity: Ntilde without a semi-colon.",
-"input":"Ñ",
-"output": ["ParseError", ["Character", "\u00D1"]]},
-
-{"description": "Named entity: Nu with a semi-colon.",
-"input":"Ν",
-"output": [["Character", "\u039D"]]},
-
-{"description": "Named entity: OElig with a semi-colon.",
-"input":"Œ",
-"output": [["Character", "\u0152"]]},
-
-{"description": "Named entity: Oacute with a semi-colon.",
-"input":"Ó",
-"output": [["Character", "\u00D3"]]},
-
-{"description": "Named entity: Oacute without a semi-colon.",
-"input":"Ó",
-"output": ["ParseError", ["Character", "\u00D3"]]},
-
-{"description": "Named entity: Ocirc with a semi-colon.",
-"input":"Ô",
-"output": [["Character", "\u00D4"]]},
-
-{"description": "Named entity: Ocirc without a semi-colon.",
-"input":"Ô",
-"output": ["ParseError", ["Character", "\u00D4"]]},
-
-{"description": "Named entity: Ograve with a semi-colon.",
-"input":"Ò",
-"output": [["Character", "\u00D2"]]},
-
-{"description": "Named entity: Ograve without a semi-colon.",
-"input":"Ò",
-"output": ["ParseError", ["Character", "\u00D2"]]},
-
-{"description": "Named entity: Omega with a semi-colon.",
-"input":"Ω",
-"output": [["Character", "\u03A9"]]},
-
-{"description": "Named entity: Omicron with a semi-colon.",
-"input":"Ο",
-"output": [["Character", "\u039F"]]},
-
-{"description": "Named entity: Oslash with a semi-colon.",
-"input":"Ø",
-"output": [["Character", "\u00D8"]]},
-
-{"description": "Named entity: Oslash without a semi-colon.",
-"input":"Ø",
-"output": ["ParseError", ["Character", "\u00D8"]]},
-
-{"description": "Named entity: Otilde with a semi-colon.",
-"input":"Õ",
-"output": [["Character", "\u00D5"]]},
-
-{"description": "Named entity: Otilde without a semi-colon.",
-"input":"Õ",
-"output": ["ParseError", ["Character", "\u00D5"]]},
-
-{"description": "Named entity: Ouml with a semi-colon.",
-"input":"Ö",
-"output": [["Character", "\u00D6"]]},
-
-{"description": "Named entity: Ouml without a semi-colon.",
-"input":"Ö",
-"output": ["ParseError", ["Character", "\u00D6"]]},
-
-{"description": "Named entity: Phi with a semi-colon.",
-"input":"Φ",
-"output": [["Character", "\u03A6"]]},
-
-{"description": "Named entity: Pi with a semi-colon.",
-"input":"Π",
-"output": [["Character", "\u03A0"]]},
-
-{"description": "Named entity: Prime with a semi-colon.",
-"input":"″",
-"output": [["Character", "\u2033"]]},
-
-{"description": "Named entity: Psi with a semi-colon.",
-"input":"Ψ",
-"output": [["Character", "\u03A8"]]},
-
-{"description": "Named entity: QUOT with a semi-colon.",
-"input":""",
-"output": [["Character", "\u0022"]]},
-
-{"description": "Named entity: QUOT without a semi-colon.",
-"input":""",
-"output": ["ParseError", ["Character", "\u0022"]]},
-
-{"description": "Named entity: REG with a semi-colon.",
-"input":"®",
-"output": [["Character", "\u00AE"]]},
-
-{"description": "Named entity: REG without a semi-colon.",
-"input":"®",
-"output": ["ParseError", ["Character", "\u00AE"]]},
-
-{"description": "Named entity: Rho with a semi-colon.",
-"input":"Ρ",
-"output": [["Character", "\u03A1"]]},
-
-{"description": "Named entity: Scaron with a semi-colon.",
-"input":"Š",
-"output": [["Character", "\u0160"]]},
-
-{"description": "Named entity: Sigma with a semi-colon.",
-"input":"Σ",
-"output": [["Character", "\u03A3"]]},
-
-{"description": "Named entity: THORN with a semi-colon.",
-"input":"Þ",
-"output": [["Character", "\u00DE"]]},
-
-{"description": "Named entity: THORN without a semi-colon.",
-"input":"Þ",
-"output": ["ParseError", ["Character", "\u00DE"]]},
-
-{"description": "Named entity: TRADE with a semi-colon.",
-"input":"™",
-"output": [["Character", "\u2122"]]},
-
-{"description": "Named entity: Tau with a semi-colon.",
-"input":"Τ",
-"output": [["Character", "\u03A4"]]},
-
-{"description": "Named entity: Theta with a semi-colon.",
-"input":"Θ",
-"output": [["Character", "\u0398"]]},
-
-{"description": "Named entity: Uacute with a semi-colon.",
-"input":"Ú",
-"output": [["Character", "\u00DA"]]},
-
-{"description": "Named entity: Uacute without a semi-colon.",
-"input":"Ú",
-"output": ["ParseError", ["Character", "\u00DA"]]},
-
-{"description": "Named entity: Ucirc with a semi-colon.",
-"input":"Û",
-"output": [["Character", "\u00DB"]]},
-
-{"description": "Named entity: Ucirc without a semi-colon.",
-"input":"Û",
-"output": ["ParseError", ["Character", "\u00DB"]]},
-
-{"description": "Named entity: Ugrave with a semi-colon.",
-"input":"Ù",
-"output": [["Character", "\u00D9"]]},
-
-{"description": "Named entity: Ugrave without a semi-colon.",
-"input":"Ù",
-"output": ["ParseError", ["Character", "\u00D9"]]},
-
-{"description": "Named entity: Upsilon with a semi-colon.",
-"input":"Υ",
-"output": [["Character", "\u03A5"]]},
-
-{"description": "Named entity: Uuml with a semi-colon.",
-"input":"Ü",
-"output": [["Character", "\u00DC"]]},
-
-{"description": "Named entity: Uuml without a semi-colon.",
-"input":"Ü",
-"output": ["ParseError", ["Character", "\u00DC"]]},
-
-{"description": "Named entity: Xi with a semi-colon.",
-"input":"Ξ",
-"output": [["Character", "\u039E"]]},
-
-{"description": "Named entity: Yacute with a semi-colon.",
-"input":"Ý",
-"output": [["Character", "\u00DD"]]},
-
-{"description": "Named entity: Yacute without a semi-colon.",
-"input":"Ý",
-"output": ["ParseError", ["Character", "\u00DD"]]},
-
-{"description": "Named entity: Yuml with a semi-colon.",
-"input":"Ÿ",
-"output": [["Character", "\u0178"]]},
-
-{"description": "Named entity: Zeta with a semi-colon.",
-"input":"Ζ",
-"output": [["Character", "\u0396"]]},
-
-{"description": "Named entity: aacute with a semi-colon.",
-"input":"á",
-"output": [["Character", "\u00E1"]]},
-
-{"description": "Named entity: aacute without a semi-colon.",
-"input":"á",
-"output": ["ParseError", ["Character", "\u00E1"]]},
-
-{"description": "Named entity: acirc with a semi-colon.",
-"input":"â",
-"output": [["Character", "\u00E2"]]},
-
-{"description": "Named entity: acirc without a semi-colon.",
-"input":"â",
-"output": ["ParseError", ["Character", "\u00E2"]]},
-
-{"description": "Named entity: acute with a semi-colon.",
-"input":"´",
-"output": [["Character", "\u00B4"]]},
-
-{"description": "Named entity: acute without a semi-colon.",
-"input":"´",
-"output": ["ParseError", ["Character", "\u00B4"]]},
-
-{"description": "Named entity: aelig with a semi-colon.",
-"input":"æ",
-"output": [["Character", "\u00E6"]]},
-
-{"description": "Named entity: aelig without a semi-colon.",
-"input":"æ",
-"output": ["ParseError", ["Character", "\u00E6"]]},
-
-{"description": "Named entity: agrave with a semi-colon.",
-"input":"à",
-"output": [["Character", "\u00E0"]]},
-
-{"description": "Named entity: agrave without a semi-colon.",
-"input":"à",
-"output": ["ParseError", ["Character", "\u00E0"]]},
-
-{"description": "Named entity: alefsym with a semi-colon.",
-"input":"ℵ",
-"output": [["Character", "\u2135"]]},
-
-{"description": "Named entity: alpha with a semi-colon.",
-"input":"α",
-"output": [["Character", "\u03B1"]]},
-
-{"description": "Named entity: amp with a semi-colon.",
-"input":"&",
-"output": [["Character", "\u0026"]]},
-
-{"description": "Named entity: amp without a semi-colon.",
-"input":"&",
-"output": ["ParseError", ["Character", "\u0026"]]},
-
-{"description": "Named entity: and with a semi-colon.",
-"input":"∧",
-"output": [["Character", "\u2227"]]},
-
-{"description": "Named entity: ang with a semi-colon.",
-"input":"∠",
-"output": [["Character", "\u2220"]]},
-
-{"description": "Named entity: apos with a semi-colon.",
-"input":"'",
-"output": [["Character", "\u0027"]]},
-
-{"description": "Named entity: aring with a semi-colon.",
-"input":"å",
-"output": [["Character", "\u00E5"]]},
-
-{"description": "Named entity: aring without a semi-colon.",
-"input":"å",
-"output": ["ParseError", ["Character", "\u00E5"]]},
-
-{"description": "Named entity: asymp with a semi-colon.",
-"input":"≈",
-"output": [["Character", "\u2248"]]},
-
-{"description": "Named entity: atilde with a semi-colon.",
-"input":"ã",
-"output": [["Character", "\u00E3"]]},
-
-{"description": "Named entity: atilde without a semi-colon.",
-"input":"ã",
-"output": ["ParseError", ["Character", "\u00E3"]]},
-
-{"description": "Named entity: auml with a semi-colon.",
-"input":"ä",
-"output": [["Character", "\u00E4"]]},
-
-{"description": "Named entity: auml without a semi-colon.",
-"input":"ä",
-"output": ["ParseError", ["Character", "\u00E4"]]},
-
-{"description": "Named entity: bdquo with a semi-colon.",
-"input":"„",
-"output": [["Character", "\u201E"]]},
-
-{"description": "Named entity: beta with a semi-colon.",
-"input":"β",
-"output": [["Character", "\u03B2"]]},
-
-{"description": "Named entity: brvbar with a semi-colon.",
-"input":"¦",
-"output": [["Character", "\u00A6"]]},
-
-{"description": "Named entity: brvbar without a semi-colon.",
-"input":"¦",
-"output": ["ParseError", ["Character", "\u00A6"]]},
-
-{"description": "Named entity: bull with a semi-colon.",
-"input":"•",
-"output": [["Character", "\u2022"]]},
-
-{"description": "Named entity: cap with a semi-colon.",
-"input":"∩",
-"output": [["Character", "\u2229"]]},
-
-{"description": "Named entity: ccedil with a semi-colon.",
-"input":"ç",
-"output": [["Character", "\u00E7"]]},
-
-{"description": "Named entity: ccedil without a semi-colon.",
-"input":"ç",
-"output": ["ParseError", ["Character", "\u00E7"]]},
-
-{"description": "Named entity: cedil with a semi-colon.",
-"input":"¸",
-"output": [["Character", "\u00B8"]]},
-
-{"description": "Named entity: cedil without a semi-colon.",
-"input":"¸",
-"output": ["ParseError", ["Character", "\u00B8"]]},
-
-{"description": "Named entity: cent with a semi-colon.",
-"input":"¢",
-"output": [["Character", "\u00A2"]]},
-
-{"description": "Named entity: cent without a semi-colon.",
-"input":"¢",
-"output": ["ParseError", ["Character", "\u00A2"]]},
-
-{"description": "Named entity: chi with a semi-colon.",
-"input":"χ",
-"output": [["Character", "\u03C7"]]},
-
-{"description": "Named entity: circ with a semi-colon.",
-"input":"ˆ",
-"output": [["Character", "\u02C6"]]},
-
-{"description": "Named entity: clubs with a semi-colon.",
-"input":"♣",
-"output": [["Character", "\u2663"]]},
-
-{"description": "Named entity: cong with a semi-colon.",
-"input":"≅",
-"output": [["Character", "\u2245"]]},
-
-{"description": "Named entity: copy with a semi-colon.",
-"input":"©",
-"output": [["Character", "\u00A9"]]},
-
-{"description": "Named entity: copy without a semi-colon.",
-"input":"©",
-"output": ["ParseError", ["Character", "\u00A9"]]},
-
-{"description": "Named entity: crarr with a semi-colon.",
-"input":"↵",
-"output": [["Character", "\u21B5"]]},
-
-{"description": "Named entity: cup with a semi-colon.",
-"input":"∪",
-"output": [["Character", "\u222A"]]},
-
-{"description": "Named entity: curren with a semi-colon.",
-"input":"¤",
-"output": [["Character", "\u00A4"]]},
-
-{"description": "Named entity: curren without a semi-colon.",
-"input":"¤",
-"output": ["ParseError", ["Character", "\u00A4"]]},
-
-{"description": "Named entity: dArr with a semi-colon.",
-"input":"⇓",
-"output": [["Character", "\u21D3"]]},
-
-{"description": "Named entity: dagger with a semi-colon.",
-"input":"†",
-"output": [["Character", "\u2020"]]},
-
-{"description": "Named entity: darr with a semi-colon.",
-"input":"↓",
-"output": [["Character", "\u2193"]]},
-
-{"description": "Named entity: deg with a semi-colon.",
-"input":"°",
-"output": [["Character", "\u00B0"]]},
-
-{"description": "Named entity: deg without a semi-colon.",
-"input":"°",
-"output": ["ParseError", ["Character", "\u00B0"]]},
-
-{"description": "Named entity: delta with a semi-colon.",
-"input":"δ",
-"output": [["Character", "\u03B4"]]},
-
-{"description": "Named entity: diams with a semi-colon.",
-"input":"♦",
-"output": [["Character", "\u2666"]]},
-
-{"description": "Named entity: divide with a semi-colon.",
-"input":"÷",
-"output": [["Character", "\u00F7"]]},
-
-{"description": "Named entity: divide without a semi-colon.",
-"input":"÷",
-"output": ["ParseError", ["Character", "\u00F7"]]},
-
-{"description": "Named entity: eacute with a semi-colon.",
-"input":"é",
-"output": [["Character", "\u00E9"]]},
-
-{"description": "Named entity: eacute without a semi-colon.",
-"input":"é",
-"output": ["ParseError", ["Character", "\u00E9"]]},
-
-{"description": "Named entity: ecirc with a semi-colon.",
-"input":"ê",
-"output": [["Character", "\u00EA"]]},
-
-{"description": "Named entity: ecirc without a semi-colon.",
-"input":"ê",
-"output": ["ParseError", ["Character", "\u00EA"]]},
-
-{"description": "Named entity: egrave with a semi-colon.",
-"input":"è",
-"output": [["Character", "\u00E8"]]},
-
-{"description": "Named entity: egrave without a semi-colon.",
-"input":"è",
-"output": ["ParseError", ["Character", "\u00E8"]]},
-
-{"description": "Named entity: empty with a semi-colon.",
-"input":"∅",
-"output": [["Character", "\u2205"]]},
-
-{"description": "Named entity: emsp with a semi-colon.",
-"input":" ",
-"output": [["Character", "\u2003"]]},
-
-{"description": "Named entity: ensp with a semi-colon.",
-"input":" ",
-"output": [["Character", "\u2002"]]},
-
-{"description": "Named entity: epsilon with a semi-colon.",
-"input":"ε",
-"output": [["Character", "\u03B5"]]},
-
-{"description": "Named entity: equiv with a semi-colon.",
-"input":"≡",
-"output": [["Character", "\u2261"]]},
-
-{"description": "Named entity: eta with a semi-colon.",
-"input":"η",
-"output": [["Character", "\u03B7"]]},
-
-{"description": "Named entity: eth with a semi-colon.",
-"input":"ð",
-"output": [["Character", "\u00F0"]]},
-
-{"description": "Named entity: eth without a semi-colon.",
-"input":"ð",
-"output": ["ParseError", ["Character", "\u00F0"]]},
-
-{"description": "Named entity: euml with a semi-colon.",
-"input":"ë",
-"output": [["Character", "\u00EB"]]},
-
-{"description": "Named entity: euml without a semi-colon.",
-"input":"ë",
-"output": ["ParseError", ["Character", "\u00EB"]]},
-
-{"description": "Named entity: euro with a semi-colon.",
-"input":"€",
-"output": [["Character", "\u20AC"]]},
-
-{"description": "Named entity: exist with a semi-colon.",
-"input":"∃",
-"output": [["Character", "\u2203"]]},
-
-{"description": "Named entity: fnof with a semi-colon.",
-"input":"ƒ",
-"output": [["Character", "\u0192"]]},
-
-{"description": "Named entity: forall with a semi-colon.",
-"input":"∀",
-"output": [["Character", "\u2200"]]},
-
-{"description": "Named entity: frac12 with a semi-colon.",
-"input":"½",
-"output": [["Character", "\u00BD"]]},
-
-{"description": "Named entity: frac12 without a semi-colon.",
-"input":"½",
-"output": ["ParseError", ["Character", "\u00BD"]]},
-
-{"description": "Named entity: frac14 with a semi-colon.",
-"input":"¼",
-"output": [["Character", "\u00BC"]]},
-
-{"description": "Named entity: frac14 without a semi-colon.",
-"input":"¼",
-"output": ["ParseError", ["Character", "\u00BC"]]},
-
-{"description": "Named entity: frac34 with a semi-colon.",
-"input":"¾",
-"output": [["Character", "\u00BE"]]},
-
-{"description": "Named entity: frac34 without a semi-colon.",
-"input":"¾",
-"output": ["ParseError", ["Character", "\u00BE"]]},
-
-{"description": "Named entity: frasl with a semi-colon.",
-"input":"⁄",
-"output": [["Character", "\u2044"]]},
-
-{"description": "Named entity: gamma with a semi-colon.",
-"input":"γ",
-"output": [["Character", "\u03B3"]]},
-
-{"description": "Named entity: ge with a semi-colon.",
-"input":"≥",
-"output": [["Character", "\u2265"]]},
-
-{"description": "Named entity: gt with a semi-colon.",
-"input":">",
-"output": [["Character", "\u003E"]]},
-
-{"description": "Named entity: gt without a semi-colon.",
-"input":">",
-"output": ["ParseError", ["Character", "\u003E"]]},
-
-{"description": "Named entity: hArr with a semi-colon.",
-"input":"⇔",
-"output": [["Character", "\u21D4"]]},
-
-{"description": "Named entity: harr with a semi-colon.",
-"input":"↔",
-"output": [["Character", "\u2194"]]},
-
-{"description": "Named entity: hearts with a semi-colon.",
-"input":"♥",
-"output": [["Character", "\u2665"]]},
-
-{"description": "Named entity: hellip with a semi-colon.",
-"input":"…",
-"output": [["Character", "\u2026"]]},
-
-{"description": "Named entity: iacute with a semi-colon.",
-"input":"í",
-"output": [["Character", "\u00ED"]]},
-
-{"description": "Named entity: iacute without a semi-colon.",
-"input":"í",
-"output": ["ParseError", ["Character", "\u00ED"]]},
-
-{"description": "Named entity: icirc with a semi-colon.",
-"input":"î",
-"output": [["Character", "\u00EE"]]},
-
-{"description": "Named entity: icirc without a semi-colon.",
-"input":"î",
-"output": ["ParseError", ["Character", "\u00EE"]]},
-
-{"description": "Named entity: iexcl with a semi-colon.",
-"input":"¡",
-"output": [["Character", "\u00A1"]]},
-
-{"description": "Named entity: iexcl without a semi-colon.",
-"input":"¡",
-"output": ["ParseError", ["Character", "\u00A1"]]},
-
-{"description": "Named entity: igrave with a semi-colon.",
-"input":"ì",
-"output": [["Character", "\u00EC"]]},
-
-{"description": "Named entity: igrave without a semi-colon.",
-"input":"ì",
-"output": ["ParseError", ["Character", "\u00EC"]]},
-
-{"description": "Named entity: image with a semi-colon.",
-"input":"ℑ",
-"output": [["Character", "\u2111"]]},
-
-{"description": "Named entity: infin with a semi-colon.",
-"input":"∞",
-"output": [["Character", "\u221E"]]},
-
-{"description": "Named entity: int with a semi-colon.",
-"input":"∫",
-"output": [["Character", "\u222B"]]},
-
-{"description": "Named entity: iota with a semi-colon.",
-"input":"ι",
-"output": [["Character", "\u03B9"]]},
-
-{"description": "Named entity: iquest with a semi-colon.",
-"input":"¿",
-"output": [["Character", "\u00BF"]]},
-
-{"description": "Named entity: iquest without a semi-colon.",
-"input":"¿",
-"output": ["ParseError", ["Character", "\u00BF"]]},
-
-{"description": "Named entity: isin with a semi-colon.",
-"input":"∈",
-"output": [["Character", "\u2208"]]},
-
-{"description": "Named entity: iuml with a semi-colon.",
-"input":"ï",
-"output": [["Character", "\u00EF"]]},
-
-{"description": "Named entity: iuml without a semi-colon.",
-"input":"ï",
-"output": ["ParseError", ["Character", "\u00EF"]]},
-
-{"description": "Named entity: kappa with a semi-colon.",
-"input":"κ",
-"output": [["Character", "\u03BA"]]},
-
-{"description": "Named entity: lArr with a semi-colon.",
-"input":"⇐",
-"output": [["Character", "\u21D0"]]},
-
-{"description": "Named entity: lambda with a semi-colon.",
-"input":"λ",
-"output": [["Character", "\u03BB"]]},
-
-{"description": "Named entity: lang with a semi-colon.",
-"input":"⟨",
-"output": [["Character", "\u27E8"]]},
-
-{"description": "Named entity: laquo with a semi-colon.",
-"input":"«",
-"output": [["Character", "\u00AB"]]},
-
-{"description": "Named entity: laquo without a semi-colon.",
-"input":"«",
-"output": ["ParseError", ["Character", "\u00AB"]]},
-
-{"description": "Named entity: larr with a semi-colon.",
-"input":"←",
-"output": [["Character", "\u2190"]]},
-
-{"description": "Named entity: lceil with a semi-colon.",
-"input":"⌈",
-"output": [["Character", "\u2308"]]},
-
-{"description": "Named entity: ldquo with a semi-colon.",
-"input":"“",
-"output": [["Character", "\u201C"]]},
-
-{"description": "Named entity: le with a semi-colon.",
-"input":"≤",
-"output": [["Character", "\u2264"]]},
-
-{"description": "Named entity: lfloor with a semi-colon.",
-"input":"⌊",
-"output": [["Character", "\u230A"]]},
-
-{"description": "Named entity: lowast with a semi-colon.",
-"input":"∗",
-"output": [["Character", "\u2217"]]},
-
-{"description": "Named entity: loz with a semi-colon.",
-"input":"◊",
-"output": [["Character", "\u25CA"]]},
-
-{"description": "Named entity: lrm with a semi-colon.",
-"input":"‎",
-"output": [["Character", "\u200E"]]},
-
-{"description": "Named entity: lsaquo with a semi-colon.",
-"input":"‹",
-"output": [["Character", "\u2039"]]},
-
-{"description": "Named entity: lsquo with a semi-colon.",
-"input":"‘",
-"output": [["Character", "\u2018"]]},
-
-{"description": "Named entity: lt with a semi-colon.",
-"input":"<",
-"output": [["Character", "\u003C"]]},
-
-{"description": "Named entity: lt without a semi-colon.",
-"input":"<",
-"output": ["ParseError", ["Character", "\u003C"]]},
-
-{"description": "Named entity: macr with a semi-colon.",
-"input":"¯",
-"output": [["Character", "\u00AF"]]},
-
-{"description": "Named entity: macr without a semi-colon.",
-"input":"¯",
-"output": ["ParseError", ["Character", "\u00AF"]]},
-
-{"description": "Named entity: mdash with a semi-colon.",
-"input":"—",
-"output": [["Character", "\u2014"]]},
-
-{"description": "Named entity: micro with a semi-colon.",
-"input":"µ",
-"output": [["Character", "\u00B5"]]},
-
-{"description": "Named entity: micro without a semi-colon.",
-"input":"µ",
-"output": ["ParseError", ["Character", "\u00B5"]]},
-
-{"description": "Named entity: middot with a semi-colon.",
-"input":"·",
-"output": [["Character", "\u00B7"]]},
-
-{"description": "Named entity: middot without a semi-colon.",
-"input":"·",
-"output": ["ParseError", ["Character", "\u00B7"]]},
-
-{"description": "Named entity: minus with a semi-colon.",
-"input":"−",
-"output": [["Character", "\u2212"]]},
-
-{"description": "Named entity: mu with a semi-colon.",
-"input":"μ",
-"output": [["Character", "\u03BC"]]},
-
-{"description": "Named entity: nabla with a semi-colon.",
-"input":"∇",
-"output": [["Character", "\u2207"]]},
-
-{"description": "Named entity: nbsp with a semi-colon.",
-"input":" ",
-"output": [["Character", "\u00A0"]]},
-
-{"description": "Named entity: nbsp without a semi-colon.",
-"input":" ",
-"output": ["ParseError", ["Character", "\u00A0"]]},
-
-{"description": "Named entity: ndash with a semi-colon.",
-"input":"–",
-"output": [["Character", "\u2013"]]},
-
-{"description": "Named entity: ne with a semi-colon.",
-"input":"≠",
-"output": [["Character", "\u2260"]]},
-
-{"description": "Named entity: ni with a semi-colon.",
-"input":"∋",
-"output": [["Character", "\u220B"]]},
-
-{"description": "Named entity: not with a semi-colon.",
-"input":"¬",
-"output": [["Character", "\u00AC"]]},
-
-{"description": "Named entity: not without a semi-colon.",
-"input":"¬",
-"output": ["ParseError", ["Character", "\u00AC"]]},
-
-{"description": "Named entity: notin with a semi-colon.",
-"input":"∉",
-"output": [["Character", "\u2209"]]},
-
-{"description": "Named entity: nsub with a semi-colon.",
-"input":"⊄",
-"output": [["Character", "\u2284"]]},
-
-{"description": "Named entity: ntilde with a semi-colon.",
-"input":"ñ",
-"output": [["Character", "\u00F1"]]},
-
-{"description": "Named entity: ntilde without a semi-colon.",
-"input":"ñ",
-"output": ["ParseError", ["Character", "\u00F1"]]},
-
-{"description": "Named entity: nu with a semi-colon.",
-"input":"ν",
-"output": [["Character", "\u03BD"]]},
-
-{"description": "Named entity: oacute with a semi-colon.",
-"input":"ó",
-"output": [["Character", "\u00F3"]]},
-
-{"description": "Named entity: oacute without a semi-colon.",
-"input":"ó",
-"output": ["ParseError", ["Character", "\u00F3"]]},
-
-{"description": "Named entity: ocirc with a semi-colon.",
-"input":"ô",
-"output": [["Character", "\u00F4"]]},
-
-{"description": "Named entity: ocirc without a semi-colon.",
-"input":"ô",
-"output": ["ParseError", ["Character", "\u00F4"]]},
-
-{"description": "Named entity: oelig with a semi-colon.",
-"input":"œ",
-"output": [["Character", "\u0153"]]},
-
-{"description": "Named entity: ograve with a semi-colon.",
-"input":"ò",
-"output": [["Character", "\u00F2"]]},
-
-{"description": "Named entity: ograve without a semi-colon.",
-"input":"ò",
-"output": ["ParseError", ["Character", "\u00F2"]]},
-
-{"description": "Named entity: oline with a semi-colon.",
-"input":"‾",
-"output": [["Character", "\u203E"]]},
-
-{"description": "Named entity: omega with a semi-colon.",
-"input":"ω",
-"output": [["Character", "\u03C9"]]},
-
-{"description": "Named entity: omicron with a semi-colon.",
-"input":"ο",
-"output": [["Character", "\u03BF"]]},
-
-{"description": "Named entity: oplus with a semi-colon.",
-"input":"⊕",
-"output": [["Character", "\u2295"]]},
-
-{"description": "Named entity: or with a semi-colon.",
-"input":"∨",
-"output": [["Character", "\u2228"]]},
-
-{"description": "Named entity: ordf with a semi-colon.",
-"input":"ª",
-"output": [["Character", "\u00AA"]]},
-
-{"description": "Named entity: ordf without a semi-colon.",
-"input":"ª",
-"output": ["ParseError", ["Character", "\u00AA"]]},
-
-{"description": "Named entity: ordm with a semi-colon.",
-"input":"º",
-"output": [["Character", "\u00BA"]]},
-
-{"description": "Named entity: ordm without a semi-colon.",
-"input":"º",
-"output": ["ParseError", ["Character", "\u00BA"]]},
-
-{"description": "Named entity: oslash with a semi-colon.",
-"input":"ø",
-"output": [["Character", "\u00F8"]]},
-
-{"description": "Named entity: oslash without a semi-colon.",
-"input":"ø",
-"output": ["ParseError", ["Character", "\u00F8"]]},
-
-{"description": "Named entity: otilde with a semi-colon.",
-"input":"õ",
-"output": [["Character", "\u00F5"]]},
-
-{"description": "Named entity: otilde without a semi-colon.",
-"input":"õ",
-"output": ["ParseError", ["Character", "\u00F5"]]},
-
-{"description": "Named entity: otimes with a semi-colon.",
-"input":"⊗",
-"output": [["Character", "\u2297"]]},
-
-{"description": "Named entity: ouml with a semi-colon.",
-"input":"ö",
-"output": [["Character", "\u00F6"]]},
-
-{"description": "Named entity: ouml without a semi-colon.",
-"input":"ö",
-"output": ["ParseError", ["Character", "\u00F6"]]},
-
-{"description": "Named entity: para with a semi-colon.",
-"input":"¶",
-"output": [["Character", "\u00B6"]]},
-
-{"description": "Named entity: para without a semi-colon.",
-"input":"¶",
-"output": ["ParseError", ["Character", "\u00B6"]]},
-
-{"description": "Named entity: part with a semi-colon.",
-"input":"∂",
-"output": [["Character", "\u2202"]]},
-
-{"description": "Named entity: permil with a semi-colon.",
-"input":"‰",
-"output": [["Character", "\u2030"]]},
-
-{"description": "Named entity: perp with a semi-colon.",
-"input":"⊥",
-"output": [["Character", "\u22A5"]]},
-
-{"description": "Named entity: phi with a semi-colon.",
-"input":"φ",
-"output": [["Character", "\u03C6"]]},
-
-{"description": "Named entity: pi with a semi-colon.",
-"input":"π",
-"output": [["Character", "\u03C0"]]},
-
-{"description": "Named entity: piv with a semi-colon.",
-"input":"ϖ",
-"output": [["Character", "\u03D6"]]},
-
-{"description": "Named entity: plusmn with a semi-colon.",
-"input":"±",
-"output": [["Character", "\u00B1"]]},
-
-{"description": "Named entity: plusmn without a semi-colon.",
-"input":"±",
-"output": ["ParseError", ["Character", "\u00B1"]]},
-
-{"description": "Named entity: pound with a semi-colon.",
-"input":"£",
-"output": [["Character", "\u00A3"]]},
-
-{"description": "Named entity: pound without a semi-colon.",
-"input":"£",
-"output": ["ParseError", ["Character", "\u00A3"]]},
-
-{"description": "Named entity: prime with a semi-colon.",
-"input":"′",
-"output": [["Character", "\u2032"]]},
-
-{"description": "Named entity: prod with a semi-colon.",
-"input":"∏",
-"output": [["Character", "\u220F"]]},
-
-{"description": "Named entity: prop with a semi-colon.",
-"input":"∝",
-"output": [["Character", "\u221D"]]},
-
-{"description": "Named entity: psi with a semi-colon.",
-"input":"ψ",
-"output": [["Character", "\u03C8"]]},
-
-{"description": "Named entity: quot with a semi-colon.",
-"input":""",
-"output": [["Character", "\u0022"]]},
-
-{"description": "Named entity: quot without a semi-colon.",
-"input":""",
-"output": ["ParseError", ["Character", "\u0022"]]},
-
-{"description": "Named entity: rArr with a semi-colon.",
-"input":"⇒",
-"output": [["Character", "\u21D2"]]},
-
-{"description": "Named entity: radic with a semi-colon.",
-"input":"√",
-"output": [["Character", "\u221A"]]},
-
-{"description": "Named entity: rang with a semi-colon.",
-"input":"⟩",
-"output": [["Character", "\u27E9"]]},
-
-{"description": "Named entity: raquo with a semi-colon.",
-"input":"»",
-"output": [["Character", "\u00BB"]]},
-
-{"description": "Named entity: raquo without a semi-colon.",
-"input":"»",
-"output": ["ParseError", ["Character", "\u00BB"]]},
-
-{"description": "Named entity: rarr with a semi-colon.",
-"input":"→",
-"output": [["Character", "\u2192"]]},
-
-{"description": "Named entity: rceil with a semi-colon.",
-"input":"⌉",
-"output": [["Character", "\u2309"]]},
-
-{"description": "Named entity: rdquo with a semi-colon.",
-"input":"”",
-"output": [["Character", "\u201D"]]},
-
-{"description": "Named entity: real with a semi-colon.",
-"input":"ℜ",
-"output": [["Character", "\u211C"]]},
-
-{"description": "Named entity: reg with a semi-colon.",
-"input":"®",
-"output": [["Character", "\u00AE"]]},
-
-{"description": "Named entity: reg without a semi-colon.",
-"input":"®",
-"output": ["ParseError", ["Character", "\u00AE"]]},
-
-{"description": "Named entity: rfloor with a semi-colon.",
-"input":"⌋",
-"output": [["Character", "\u230B"]]},
-
-{"description": "Named entity: rho with a semi-colon.",
-"input":"ρ",
-"output": [["Character", "\u03C1"]]},
-
-{"description": "Named entity: rlm with a semi-colon.",
-"input":"‏",
-"output": [["Character", "\u200F"]]},
-
-{"description": "Named entity: rsaquo with a semi-colon.",
-"input":"›",
-"output": [["Character", "\u203A"]]},
-
-{"description": "Named entity: rsquo with a semi-colon.",
-"input":"’",
-"output": [["Character", "\u2019"]]},
-
-{"description": "Named entity: sbquo with a semi-colon.",
-"input":"‚",
-"output": [["Character", "\u201A"]]},
-
-{"description": "Named entity: scaron with a semi-colon.",
-"input":"š",
-"output": [["Character", "\u0161"]]},
-
-{"description": "Named entity: sdot with a semi-colon.",
-"input":"⋅",
-"output": [["Character", "\u22C5"]]},
-
-{"description": "Named entity: sect with a semi-colon.",
-"input":"§",
-"output": [["Character", "\u00A7"]]},
-
-{"description": "Named entity: sect without a semi-colon.",
-"input":"§",
-"output": ["ParseError", ["Character", "\u00A7"]]},
-
-{"description": "Named entity: shy with a semi-colon.",
-"input":"­",
-"output": [["Character", "\u00AD"]]},
-
-{"description": "Named entity: shy without a semi-colon.",
-"input":"­",
-"output": ["ParseError", ["Character", "\u00AD"]]},
-
-{"description": "Named entity: sigma with a semi-colon.",
-"input":"σ",
-"output": [["Character", "\u03C3"]]},
-
-{"description": "Named entity: sigmaf with a semi-colon.",
-"input":"ς",
-"output": [["Character", "\u03C2"]]},
-
-{"description": "Named entity: sim with a semi-colon.",
-"input":"∼",
-"output": [["Character", "\u223C"]]},
-
-{"description": "Named entity: spades with a semi-colon.",
-"input":"♠",
-"output": [["Character", "\u2660"]]},
-
-{"description": "Named entity: sub with a semi-colon.",
-"input":"⊂",
-"output": [["Character", "\u2282"]]},
-
-{"description": "Named entity: sube with a semi-colon.",
-"input":"⊆",
-"output": [["Character", "\u2286"]]},
-
-{"description": "Named entity: sum with a semi-colon.",
-"input":"∑",
-"output": [["Character", "\u2211"]]},
-
-{"description": "Named entity: sup1 with a semi-colon.",
-"input":"¹",
-"output": [["Character", "\u00B9"]]},
-
-{"description": "Named entity: sup1 without a semi-colon.",
-"input":"¹",
-"output": ["ParseError", ["Character", "\u00B9"]]},
-
-{"description": "Named entity: sup2 with a semi-colon.",
-"input":"²",
-"output": [["Character", "\u00B2"]]},
-
-{"description": "Named entity: sup2 without a semi-colon.",
-"input":"²",
-"output": ["ParseError", ["Character", "\u00B2"]]},
-
-{"description": "Named entity: sup3 with a semi-colon.",
-"input":"³",
-"output": [["Character", "\u00B3"]]},
-
-{"description": "Named entity: sup3 without a semi-colon.",
-"input":"³",
-"output": ["ParseError", ["Character", "\u00B3"]]},
-
-{"description": "Named entity: sup with a semi-colon.",
-"input":"⊃",
-"output": [["Character", "\u2283"]]},
-
-{"description": "Named entity: supe with a semi-colon.",
-"input":"⊇",
-"output": [["Character", "\u2287"]]},
-
-{"description": "Named entity: szlig with a semi-colon.",
-"input":"ß",
-"output": [["Character", "\u00DF"]]},
-
-{"description": "Named entity: szlig without a semi-colon.",
-"input":"ß",
-"output": ["ParseError", ["Character", "\u00DF"]]},
-
-{"description": "Named entity: tau with a semi-colon.",
-"input":"τ",
-"output": [["Character", "\u03C4"]]},
-
-{"description": "Named entity: there4 with a semi-colon.",
-"input":"∴",
-"output": [["Character", "\u2234"]]},
-
-{"description": "Named entity: theta with a semi-colon.",
-"input":"θ",
-"output": [["Character", "\u03B8"]]},
-
-{"description": "Named entity: thetasym with a semi-colon.",
-"input":"ϑ",
-"output": [["Character", "\u03D1"]]},
-
-{"description": "Named entity: thinsp with a semi-colon.",
-"input":" ",
-"output": [["Character", "\u2009"]]},
-
-{"description": "Named entity: thorn with a semi-colon.",
-"input":"þ",
-"output": [["Character", "\u00FE"]]},
-
-{"description": "Named entity: thorn without a semi-colon.",
-"input":"þ",
-"output": ["ParseError", ["Character", "\u00FE"]]},
-
-{"description": "Named entity: tilde with a semi-colon.",
-"input":"˜",
-"output": [["Character", "\u02DC"]]},
-
-{"description": "Named entity: times with a semi-colon.",
-"input":"×",
-"output": [["Character", "\u00D7"]]},
-
-{"description": "Named entity: times without a semi-colon.",
-"input":"×",
-"output": ["ParseError", ["Character", "\u00D7"]]},
-
-{"description": "Named entity: trade with a semi-colon.",
-"input":"™",
-"output": [["Character", "\u2122"]]},
-
-{"description": "Named entity: uArr with a semi-colon.",
-"input":"⇑",
-"output": [["Character", "\u21D1"]]},
-
-{"description": "Named entity: uacute with a semi-colon.",
-"input":"ú",
-"output": [["Character", "\u00FA"]]},
-
-{"description": "Named entity: uacute without a semi-colon.",
-"input":"ú",
-"output": ["ParseError", ["Character", "\u00FA"]]},
-
-{"description": "Named entity: uarr with a semi-colon.",
-"input":"↑",
-"output": [["Character", "\u2191"]]},
-
-{"description": "Named entity: ucirc with a semi-colon.",
-"input":"û",
-"output": [["Character", "\u00FB"]]},
-
-{"description": "Named entity: ucirc without a semi-colon.",
-"input":"û",
-"output": ["ParseError", ["Character", "\u00FB"]]},
-
-{"description": "Named entity: ugrave with a semi-colon.",
-"input":"ù",
-"output": [["Character", "\u00F9"]]},
-
-{"description": "Named entity: ugrave without a semi-colon.",
-"input":"ù",
-"output": ["ParseError", ["Character", "\u00F9"]]},
-
-{"description": "Named entity: uml with a semi-colon.",
-"input":"¨",
-"output": [["Character", "\u00A8"]]},
-
-{"description": "Named entity: uml without a semi-colon.",
-"input":"¨",
-"output": ["ParseError", ["Character", "\u00A8"]]},
-
-{"description": "Named entity: upsih with a semi-colon.",
-"input":"ϒ",
-"output": [["Character", "\u03D2"]]},
-
-{"description": "Named entity: upsilon with a semi-colon.",
-"input":"υ",
-"output": [["Character", "\u03C5"]]},
-
-{"description": "Named entity: uuml with a semi-colon.",
-"input":"ü",
-"output": [["Character", "\u00FC"]]},
-
-{"description": "Named entity: uuml without a semi-colon.",
-"input":"ü",
-"output": ["ParseError", ["Character", "\u00FC"]]},
-
-{"description": "Named entity: weierp with a semi-colon.",
-"input":"℘",
-"output": [["Character", "\u2118"]]},
-
-{"description": "Named entity: xi with a semi-colon.",
-"input":"ξ",
-"output": [["Character", "\u03BE"]]},
-
-{"description": "Named entity: yacute with a semi-colon.",
-"input":"ý",
-"output": [["Character", "\u00FD"]]},
-
-{"description": "Named entity: yacute without a semi-colon.",
-"input":"ý",
-"output": ["ParseError", ["Character", "\u00FD"]]},
-
-{"description": "Named entity: yen with a semi-colon.",
-"input":"¥",
-"output": [["Character", "\u00A5"]]},
-
-{"description": "Named entity: yen without a semi-colon.",
-"input":"¥",
-"output": ["ParseError", ["Character", "\u00A5"]]},
-
-{"description": "Named entity: yuml with a semi-colon.",
-"input":"ÿ",
-"output": [["Character", "\u00FF"]]},
-
-{"description": "Named entity: yuml without a semi-colon.",
-"input":"ÿ",
-"output": ["ParseError", ["Character", "\u00FF"]]},
-
-{"description": "Named entity: zeta with a semi-colon.",
-"input":"ζ",
-"output": [["Character", "\u03B6"]]},
-
-{"description": "Named entity: zwj with a semi-colon.",
-"input":"‍",
-"output": [["Character", "\u200D"]]},
-
-{"description": "Named entity: zwnj with a semi-colon.",
-"input":"‌",
-"output": [["Character", "\u200C"]]},
-
-{"description": "Bad named entity: Alpha without a semi-colon.",
-"input":"&Alpha",
-"output": ["ParseError", ["Character", "&Alpha"]]},
-
-{"description": "Bad named entity: alpha without a semi-colon.",
-"input":"&alpha",
-"output": ["ParseError", ["Character", "&alpha"]]},
-
-{"description": "Bad named entity: and without a semi-colon.",
-"input":"&and",
-"output": ["ParseError", ["Character", "&and"]]},
-
-{"description": "Bad named entity: ang without a semi-colon.",
-"input":"&ang",
-"output": ["ParseError", ["Character", "&ang"]]},
-
-{"description": "Bad named entity: apos without a semi-colon.",
-"input":"&apos",
-"output": ["ParseError", ["Character", "&apos"]]},
-
-{"description": "Bad named entity: asymp without a semi-colon.",
-"input":"&asymp",
-"output": ["ParseError", ["Character", "&asymp"]]},
-
-{"description": "Bad named entity: bdquo without a semi-colon.",
-"input":"&bdquo",
-"output": ["ParseError", ["Character", "&bdquo"]]},
-
-{"description": "Bad named entity: Beta without a semi-colon.",
-"input":"&Beta",
-"output": ["ParseError", ["Character", "&Beta"]]},
-
-{"description": "Bad named entity: beta without a semi-colon.",
-"input":"&beta",
-"output": ["ParseError", ["Character", "&beta"]]},
-
-{"description": "Bad named entity: bull without a semi-colon.",
-"input":"&bull",
-"output": ["ParseError", ["Character", "&bull"]]},
-
-{"description": "Bad named entity: cap without a semi-colon.",
-"input":"&cap",
-"output": ["ParseError", ["Character", "&cap"]]},
-
-{"description": "Bad named entity: Chi without a semi-colon.",
-"input":"&Chi",
-"output": ["ParseError", ["Character", "&Chi"]]},
-
-{"description": "Bad named entity: chi without a semi-colon.",
-"input":"&chi",
-"output": ["ParseError", ["Character", "&chi"]]},
-
-{"description": "Bad named entity: circ without a semi-colon.",
-"input":"&circ",
-"output": ["ParseError", ["Character", "&circ"]]},
-
-{"description": "Bad named entity: clubs without a semi-colon.",
-"input":"&clubs",
-"output": ["ParseError", ["Character", "&clubs"]]},
-
-{"description": "Bad named entity: cong without a semi-colon.",
-"input":"&cong",
-"output": ["ParseError", ["Character", "&cong"]]},
-
-{"description": "Bad named entity: crarr without a semi-colon.",
-"input":"&crarr",
-"output": ["ParseError", ["Character", "&crarr"]]},
-
-{"description": "Bad named entity: cup without a semi-colon.",
-"input":"&cup",
-"output": ["ParseError", ["Character", "&cup"]]},
-
-{"description": "Bad named entity: dagger without a semi-colon.",
-"input":"&dagger",
-"output": ["ParseError", ["Character", "&dagger"]]},
-
-{"description": "Bad named entity: dagger without a semi-colon.",
-"input":"&dagger",
-"output": ["ParseError", ["Character", "&dagger"]]},
-
-{"description": "Bad named entity: darr without a semi-colon.",
-"input":"&darr",
-"output": ["ParseError", ["Character", "&darr"]]},
-
-{"description": "Bad named entity: darr without a semi-colon.",
-"input":"&darr",
-"output": ["ParseError", ["Character", "&darr"]]},
-
-{"description": "Bad named entity: Delta without a semi-colon.",
-"input":"&Delta",
-"output": ["ParseError", ["Character", "&Delta"]]},
-
-{"description": "Bad named entity: delta without a semi-colon.",
-"input":"&delta",
-"output": ["ParseError", ["Character", "&delta"]]},
-
-{"description": "Bad named entity: diams without a semi-colon.",
-"input":"&diams",
-"output": ["ParseError", ["Character", "&diams"]]},
-
-{"description": "Bad named entity: empty without a semi-colon.",
-"input":"&empty",
-"output": ["ParseError", ["Character", "&empty"]]},
-
-{"description": "Bad named entity: emsp without a semi-colon.",
-"input":"&emsp",
-"output": ["ParseError", ["Character", "&emsp"]]},
-
-{"description": "Bad named entity: ensp without a semi-colon.",
-"input":"&ensp",
-"output": ["ParseError", ["Character", "&ensp"]]},
-
-{"description": "Bad named entity: Epsilon without a semi-colon.",
-"input":"&Epsilon",
-"output": ["ParseError", ["Character", "&Epsilon"]]},
-
-{"description": "Bad named entity: epsilon without a semi-colon.",
-"input":"&epsilon",
-"output": ["ParseError", ["Character", "&epsilon"]]},
-
-{"description": "Bad named entity: equiv without a semi-colon.",
-"input":"&equiv",
-"output": ["ParseError", ["Character", "&equiv"]]},
-
-{"description": "Bad named entity: Eta without a semi-colon.",
-"input":"&Eta",
-"output": ["ParseError", ["Character", "&Eta"]]},
-
-{"description": "Bad named entity: eta without a semi-colon.",
-"input":"&eta",
-"output": ["ParseError", ["Character", "&eta"]]},
-
-{"description": "Bad named entity: euro without a semi-colon.",
-"input":"&euro",
-"output": ["ParseError", ["Character", "&euro"]]},
-
-{"description": "Bad named entity: exist without a semi-colon.",
-"input":"&exist",
-"output": ["ParseError", ["Character", "&exist"]]},
-
-{"description": "Bad named entity: fnof without a semi-colon.",
-"input":"&fnof",
-"output": ["ParseError", ["Character", "&fnof"]]},
-
-{"description": "Bad named entity: forall without a semi-colon.",
-"input":"&forall",
-"output": ["ParseError", ["Character", "&forall"]]},
-
-{"description": "Bad named entity: frasl without a semi-colon.",
-"input":"&frasl",
-"output": ["ParseError", ["Character", "&frasl"]]},
-
-{"description": "Bad named entity: Gamma without a semi-colon.",
-"input":"&Gamma",
-"output": ["ParseError", ["Character", "&Gamma"]]},
-
-{"description": "Bad named entity: gamma without a semi-colon.",
-"input":"&gamma",
-"output": ["ParseError", ["Character", "&gamma"]]},
-
-{"description": "Bad named entity: ge without a semi-colon.",
-"input":"&ge",
-"output": ["ParseError", ["Character", "&ge"]]},
-
-{"description": "Bad named entity: harr without a semi-colon.",
-"input":"&harr",
-"output": ["ParseError", ["Character", "&harr"]]},
-
-{"description": "Bad named entity: harr without a semi-colon.",
-"input":"&harr",
-"output": ["ParseError", ["Character", "&harr"]]},
-
-{"description": "Bad named entity: hearts without a semi-colon.",
-"input":"&hearts",
-"output": ["ParseError", ["Character", "&hearts"]]},
-
-{"description": "Bad named entity: hellip without a semi-colon.",
-"input":"&hellip",
-"output": ["ParseError", ["Character", "&hellip"]]},
-
-{"description": "Bad named entity: image without a semi-colon.",
-"input":"&image",
-"output": ["ParseError", ["Character", "&image"]]},
-
-{"description": "Bad named entity: infin without a semi-colon.",
-"input":"&infin",
-"output": ["ParseError", ["Character", "&infin"]]},
-
-{"description": "Bad named entity: int without a semi-colon.",
-"input":"&int",
-"output": ["ParseError", ["Character", "&int"]]},
-
-{"description": "Bad named entity: Iota without a semi-colon.",
-"input":"&Iota",
-"output": ["ParseError", ["Character", "&Iota"]]},
-
-{"description": "Bad named entity: iota without a semi-colon.",
-"input":"&iota",
-"output": ["ParseError", ["Character", "&iota"]]},
-
-{"description": "Bad named entity: isin without a semi-colon.",
-"input":"&isin",
-"output": ["ParseError", ["Character", "&isin"]]},
-
-{"description": "Bad named entity: Kappa without a semi-colon.",
-"input":"&Kappa",
-"output": ["ParseError", ["Character", "&Kappa"]]},
-
-{"description": "Bad named entity: kappa without a semi-colon.",
-"input":"&kappa",
-"output": ["ParseError", ["Character", "&kappa"]]},
-
-{"description": "Bad named entity: Lambda without a semi-colon.",
-"input":"&Lambda",
-"output": ["ParseError", ["Character", "&Lambda"]]},
-
-{"description": "Bad named entity: lambda without a semi-colon.",
-"input":"&lambda",
-"output": ["ParseError", ["Character", "&lambda"]]},
-
-{"description": "Bad named entity: lang without a semi-colon.",
-"input":"&lang",
-"output": ["ParseError", ["Character", "&lang"]]},
-
-{"description": "Bad named entity: larr without a semi-colon.",
-"input":"&larr",
-"output": ["ParseError", ["Character", "&larr"]]},
-
-{"description": "Bad named entity: larr without a semi-colon.",
-"input":"&larr",
-"output": ["ParseError", ["Character", "&larr"]]},
-
-{"description": "Bad named entity: lceil without a semi-colon.",
-"input":"&lceil",
-"output": ["ParseError", ["Character", "&lceil"]]},
-
-{"description": "Bad named entity: ldquo without a semi-colon.",
-"input":"&ldquo",
-"output": ["ParseError", ["Character", "&ldquo"]]},
-
-{"description": "Bad named entity: le without a semi-colon.",
-"input":"&le",
-"output": ["ParseError", ["Character", "&le"]]},
-
-{"description": "Bad named entity: lfloor without a semi-colon.",
-"input":"&lfloor",
-"output": ["ParseError", ["Character", "&lfloor"]]},
-
-{"description": "Bad named entity: lowast without a semi-colon.",
-"input":"&lowast",
-"output": ["ParseError", ["Character", "&lowast"]]},
-
-{"description": "Bad named entity: loz without a semi-colon.",
-"input":"&loz",
-"output": ["ParseError", ["Character", "&loz"]]},
-
-{"description": "Bad named entity: lrm without a semi-colon.",
-"input":"&lrm",
-"output": ["ParseError", ["Character", "&lrm"]]},
-
-{"description": "Bad named entity: lsaquo without a semi-colon.",
-"input":"&lsaquo",
-"output": ["ParseError", ["Character", "&lsaquo"]]},
-
-{"description": "Bad named entity: lsquo without a semi-colon.",
-"input":"&lsquo",
-"output": ["ParseError", ["Character", "&lsquo"]]},
-
-{"description": "Bad named entity: mdash without a semi-colon.",
-"input":"&mdash",
-"output": ["ParseError", ["Character", "&mdash"]]},
-
-{"description": "Bad named entity: minus without a semi-colon.",
-"input":"&minus",
-"output": ["ParseError", ["Character", "&minus"]]},
-
-{"description": "Bad named entity: Mu without a semi-colon.",
-"input":"&Mu",
-"output": ["ParseError", ["Character", "&Mu"]]},
-
-{"description": "Bad named entity: mu without a semi-colon.",
-"input":"&mu",
-"output": ["ParseError", ["Character", "&mu"]]},
-
-{"description": "Bad named entity: nabla without a semi-colon.",
-"input":"&nabla",
-"output": ["ParseError", ["Character", "&nabla"]]},
-
-{"description": "Bad named entity: ndash without a semi-colon.",
-"input":"&ndash",
-"output": ["ParseError", ["Character", "&ndash"]]},
-
-{"description": "Bad named entity: ne without a semi-colon.",
-"input":"&ne",
-"output": ["ParseError", ["Character", "&ne"]]},
-
-{"description": "Bad named entity: ni without a semi-colon.",
-"input":"&ni",
-"output": ["ParseError", ["Character", "&ni"]]},
-
-{"description": "Bad named entity: notin without a semi-colon.",
-"input":"¬in",
-"output": ["ParseError", ["Character", "\u00ACin"]]},
-
-{"description": "Bad named entity: nsub without a semi-colon.",
-"input":"&nsub",
-"output": ["ParseError", ["Character", "&nsub"]]},
-
-{"description": "Bad named entity: Nu without a semi-colon.",
-"input":"&Nu",
-"output": ["ParseError", ["Character", "&Nu"]]},
-
-{"description": "Bad named entity: nu without a semi-colon.",
-"input":"&nu",
-"output": ["ParseError", ["Character", "&nu"]]},
-
-{"description": "Bad named entity: OElig without a semi-colon.",
-"input":"&OElig",
-"output": ["ParseError", ["Character", "&OElig"]]},
-
-{"description": "Bad named entity: oelig without a semi-colon.",
-"input":"&oelig",
-"output": ["ParseError", ["Character", "&oelig"]]},
-
-{"description": "Bad named entity: oline without a semi-colon.",
-"input":"&oline",
-"output": ["ParseError", ["Character", "&oline"]]},
-
-{"description": "Bad named entity: Omega without a semi-colon.",
-"input":"&Omega",
-"output": ["ParseError", ["Character", "&Omega"]]},
-
-{"description": "Bad named entity: omega without a semi-colon.",
-"input":"&omega",
-"output": ["ParseError", ["Character", "&omega"]]},
-
-{"description": "Bad named entity: Omicron without a semi-colon.",
-"input":"&Omicron",
-"output": ["ParseError", ["Character", "&Omicron"]]},
-
-{"description": "Bad named entity: omicron without a semi-colon.",
-"input":"&omicron",
-"output": ["ParseError", ["Character", "&omicron"]]},
-
-{"description": "Bad named entity: oplus without a semi-colon.",
-"input":"&oplus",
-"output": ["ParseError", ["Character", "&oplus"]]},
-
-{"description": "Bad named entity: or without a semi-colon.",
-"input":"&or",
-"output": ["ParseError", ["Character", "&or"]]},
-
-{"description": "Bad named entity: otimes without a semi-colon.",
-"input":"&otimes",
-"output": ["ParseError", ["Character", "&otimes"]]},
-
-{"description": "Bad named entity: part without a semi-colon.",
-"input":"&part",
-"output": ["ParseError", ["Character", "&part"]]},
-
-{"description": "Bad named entity: permil without a semi-colon.",
-"input":"&permil",
-"output": ["ParseError", ["Character", "&permil"]]},
-
-{"description": "Bad named entity: perp without a semi-colon.",
-"input":"&perp",
-"output": ["ParseError", ["Character", "&perp"]]},
-
-{"description": "Bad named entity: Phi without a semi-colon.",
-"input":"&Phi",
-"output": ["ParseError", ["Character", "&Phi"]]},
-
-{"description": "Bad named entity: phi without a semi-colon.",
-"input":"&phi",
-"output": ["ParseError", ["Character", "&phi"]]},
-
-{"description": "Bad named entity: Pi without a semi-colon.",
-"input":"&Pi",
-"output": ["ParseError", ["Character", "&Pi"]]},
-
-{"description": "Bad named entity: pi without a semi-colon.",
-"input":"&pi",
-"output": ["ParseError", ["Character", "&pi"]]},
-
-{"description": "Bad named entity: piv without a semi-colon.",
-"input":"&piv",
-"output": ["ParseError", ["Character", "&piv"]]},
-
-{"description": "Bad named entity: prime without a semi-colon.",
-"input":"&prime",
-"output": ["ParseError", ["Character", "&prime"]]},
-
-{"description": "Bad named entity: prime without a semi-colon.",
-"input":"&prime",
-"output": ["ParseError", ["Character", "&prime"]]},
-
-{"description": "Bad named entity: prod without a semi-colon.",
-"input":"&prod",
-"output": ["ParseError", ["Character", "&prod"]]},
-
-{"description": "Bad named entity: prop without a semi-colon.",
-"input":"&prop",
-"output": ["ParseError", ["Character", "&prop"]]},
-
-{"description": "Bad named entity: Psi without a semi-colon.",
-"input":"&Psi",
-"output": ["ParseError", ["Character", "&Psi"]]},
-
-{"description": "Bad named entity: psi without a semi-colon.",
-"input":"&psi",
-"output": ["ParseError", ["Character", "&psi"]]},
-
-{"description": "Bad named entity: radic without a semi-colon.",
-"input":"&radic",
-"output": ["ParseError", ["Character", "&radic"]]},
-
-{"description": "Bad named entity: rang without a semi-colon.",
-"input":"&rang",
-"output": ["ParseError", ["Character", "&rang"]]},
-
-{"description": "Bad named entity: rarr without a semi-colon.",
-"input":"&rarr",
-"output": ["ParseError", ["Character", "&rarr"]]},
-
-{"description": "Bad named entity: rarr without a semi-colon.",
-"input":"&rarr",
-"output": ["ParseError", ["Character", "&rarr"]]},
-
-{"description": "Bad named entity: rceil without a semi-colon.",
-"input":"&rceil",
-"output": ["ParseError", ["Character", "&rceil"]]},
-
-{"description": "Bad named entity: rdquo without a semi-colon.",
-"input":"&rdquo",
-"output": ["ParseError", ["Character", "&rdquo"]]},
-
-{"description": "Bad named entity: real without a semi-colon.",
-"input":"&real",
-"output": ["ParseError", ["Character", "&real"]]},
-
-{"description": "Bad named entity: rfloor without a semi-colon.",
-"input":"&rfloor",
-"output": ["ParseError", ["Character", "&rfloor"]]},
-
-{"description": "Bad named entity: Rho without a semi-colon.",
-"input":"&Rho",
-"output": ["ParseError", ["Character", "&Rho"]]},
-
-{"description": "Bad named entity: rho without a semi-colon.",
-"input":"&rho",
-"output": ["ParseError", ["Character", "&rho"]]},
-
-{"description": "Bad named entity: rlm without a semi-colon.",
-"input":"&rlm",
-"output": ["ParseError", ["Character", "&rlm"]]},
-
-{"description": "Bad named entity: rsaquo without a semi-colon.",
-"input":"&rsaquo",
-"output": ["ParseError", ["Character", "&rsaquo"]]},
-
-{"description": "Bad named entity: rsquo without a semi-colon.",
-"input":"&rsquo",
-"output": ["ParseError", ["Character", "&rsquo"]]},
-
-{"description": "Bad named entity: sbquo without a semi-colon.",
-"input":"&sbquo",
-"output": ["ParseError", ["Character", "&sbquo"]]},
-
-{"description": "Bad named entity: Scaron without a semi-colon.",
-"input":"&Scaron",
-"output": ["ParseError", ["Character", "&Scaron"]]},
-
-{"description": "Bad named entity: scaron without a semi-colon.",
-"input":"&scaron",
-"output": ["ParseError", ["Character", "&scaron"]]},
-
-{"description": "Bad named entity: sdot without a semi-colon.",
-"input":"&sdot",
-"output": ["ParseError", ["Character", "&sdot"]]},
-
-{"description": "Bad named entity: Sigma without a semi-colon.",
-"input":"&Sigma",
-"output": ["ParseError", ["Character", "&Sigma"]]},
-
-{"description": "Bad named entity: sigma without a semi-colon.",
-"input":"&sigma",
-"output": ["ParseError", ["Character", "&sigma"]]},
-
-{"description": "Bad named entity: sigmaf without a semi-colon.",
-"input":"&sigmaf",
-"output": ["ParseError", ["Character", "&sigmaf"]]},
-
-{"description": "Bad named entity: sim without a semi-colon.",
-"input":"&sim",
-"output": ["ParseError", ["Character", "&sim"]]},
-
-{"description": "Bad named entity: spades without a semi-colon.",
-"input":"&spades",
-"output": ["ParseError", ["Character", "&spades"]]},
-
-{"description": "Bad named entity: sub without a semi-colon.",
-"input":"&sub",
-"output": ["ParseError", ["Character", "&sub"]]},
-
-{"description": "Bad named entity: sube without a semi-colon.",
-"input":"&sube",
-"output": ["ParseError", ["Character", "&sube"]]},
-
-{"description": "Bad named entity: sum without a semi-colon.",
-"input":"&sum",
-"output": ["ParseError", ["Character", "&sum"]]},
-
-{"description": "Bad named entity: sup without a semi-colon.",
-"input":"&sup",
-"output": ["ParseError", ["Character", "&sup"]]},
-
-{"description": "Bad named entity: supe without a semi-colon.",
-"input":"&supe",
-"output": ["ParseError", ["Character", "&supe"]]},
-
-{"description": "Bad named entity: Tau without a semi-colon.",
-"input":"&Tau",
-"output": ["ParseError", ["Character", "&Tau"]]},
-
-{"description": "Bad named entity: tau without a semi-colon.",
-"input":"&tau",
-"output": ["ParseError", ["Character", "&tau"]]},
-
-{"description": "Bad named entity: there4 without a semi-colon.",
-"input":"&there4",
-"output": ["ParseError", ["Character", "&there4"]]},
-
-{"description": "Bad named entity: Theta without a semi-colon.",
-"input":"&Theta",
-"output": ["ParseError", ["Character", "&Theta"]]},
-
-{"description": "Bad named entity: theta without a semi-colon.",
-"input":"&theta",
-"output": ["ParseError", ["Character", "&theta"]]},
-
-{"description": "Bad named entity: thetasym without a semi-colon.",
-"input":"&thetasym",
-"output": ["ParseError", ["Character", "&thetasym"]]},
-
-{"description": "Bad named entity: thinsp without a semi-colon.",
-"input":"&thinsp",
-"output": ["ParseError", ["Character", "&thinsp"]]},
-
-{"description": "Bad named entity: tilde without a semi-colon.",
-"input":"&tilde",
-"output": ["ParseError", ["Character", "&tilde"]]},
-
-{"description": "Bad named entity: trade without a semi-colon.",
-"input":"&trade",
-"output": ["ParseError", ["Character", "&trade"]]},
-
-{"description": "Bad named entity: uarr without a semi-colon.",
-"input":"&uarr",
-"output": ["ParseError", ["Character", "&uarr"]]},
-
-{"description": "Bad named entity: uarr without a semi-colon.",
-"input":"&uarr",
-"output": ["ParseError", ["Character", "&uarr"]]},
-
-{"description": "Bad named entity: upsih without a semi-colon.",
-"input":"&upsih",
-"output": ["ParseError", ["Character", "&upsih"]]},
-
-{"description": "Bad named entity: Upsilon without a semi-colon.",
-"input":"&Upsilon",
-"output": ["ParseError", ["Character", "&Upsilon"]]},
-
-{"description": "Bad named entity: upsilon without a semi-colon.",
-"input":"&upsilon",
-"output": ["ParseError", ["Character", "&upsilon"]]},
-
-{"description": "Bad named entity: weierp without a semi-colon.",
-"input":"&weierp",
-"output": ["ParseError", ["Character", "&weierp"]]},
-
-{"description": "Bad named entity: Xi without a semi-colon.",
-"input":"&Xi",
-"output": ["ParseError", ["Character", "&Xi"]]},
-
-{"description": "Bad named entity: xi without a semi-colon.",
-"input":"&xi",
-"output": ["ParseError", ["Character", "&xi"]]},
-
-{"description": "Bad named entity: Yuml without a semi-colon.",
-"input":"&Yuml",
-"output": ["ParseError", ["Character", "&Yuml"]]},
-
-{"description": "Bad named entity: Zeta without a semi-colon.",
-"input":"&Zeta",
-"output": ["ParseError", ["Character", "&Zeta"]]},
-
-{"description": "Bad named entity: zeta without a semi-colon.",
-"input":"&zeta",
-"output": ["ParseError", ["Character", "&zeta"]]},
-
-{"description": "Bad named entity: zwj without a semi-colon.",
-"input":"&zwj",
-"output": ["ParseError", ["Character", "&zwj"]]},
-
-{"description": "Bad named entity: zwnj without a semi-colon.",
-"input":"&zwnj",
-"output": ["ParseError", ["Character", "&zwnj"]]},
-
-{"description": "Bad named entity: zwnj without a semi-colon.",
-"input":"&zwnj",
-"output": ["ParseError", ["Character", "&zwnj"]]},
+{"description": "Entity name followed by the equals sign in an attribute value.",
+"input":"<h a='&lang='>",
+"output": [["StartTag", "h", {"a": "&lang="}]]},
{"description": "CR as numeric entity",
"input":"
",
-"output": ["ParseError", ["Character", "\n"]]},
+"output": ["ParseError", ["Character", "\r"]]},
{"description": "CR as hexadecimal numeric entity",
"input":"
",
-"output": ["ParseError", ["Character", "\n"]]},
+"output": ["ParseError", ["Character", "\r"]]},
{"description": "Windows-1252 EURO SIGN numeric entity.",
"input":"€",
@@ -2094,7 +22,7 @@
{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
"input":"",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0081"]]},
{"description": "Windows-1252 SINGLE LOW-9 QUOTATION MARK numeric entity.",
"input":"‚",
@@ -2142,7 +70,7 @@
{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
"input":"",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u008D"]]},
{"description": "Windows-1252 LATIN CAPITAL LETTER Z WITH CARON numeric entity.",
"input":"Ž",
@@ -2150,11 +78,11 @@
{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
"input":"",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u008F"]]},
{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
"input":"",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0090"]]},
{"description": "Windows-1252 LEFT SINGLE QUOTATION MARK numeric entity.",
"input":"‘",
@@ -2206,7 +134,7 @@
{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
"input":"",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u009D"]]},
{"description": "Windows-1252 EURO SIGN hexadecimal numeric entity.",
"input":"€",
@@ -2214,7 +142,7 @@
{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
"input":"",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0081"]]},
{"description": "Windows-1252 SINGLE LOW-9 QUOTATION MARK hexadecimal numeric entity.",
"input":"‚",
@@ -2262,7 +190,7 @@
{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
"input":"",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u008D"]]},
{"description": "Windows-1252 LATIN CAPITAL LETTER Z WITH CARON hexadecimal numeric entity.",
"input":"Ž",
@@ -2270,11 +198,11 @@
{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
"input":"",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u008F"]]},
{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
"input":"",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0090"]]},
{"description": "Windows-1252 LEFT SINGLE QUOTATION MARK hexadecimal numeric entity.",
"input":"‘",
@@ -2326,7 +254,7 @@
{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
"input":"",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u009D"]]},
{"description": "Windows-1252 LATIN SMALL LETTER Z WITH CARON hexadecimal numeric entity.",
"input":"ž",
@@ -2334,6 +262,22 @@
{"description": "Windows-1252 LATIN CAPITAL LETTER Y WITH DIAERESIS hexadecimal numeric entity.",
"input":"Ÿ",
-"output": ["ParseError", ["Character", "\u0178"]]}
+"output": ["ParseError", ["Character", "\u0178"]]},
+
+{"description": "Decimal numeric entity followed by hex character a.",
+"input":"aa",
+"output": ["ParseError", ["Character", "aa"]]},
+
+{"description": "Decimal numeric entity followed by hex character A.",
+"input":"aA",
+"output": ["ParseError", ["Character", "aA"]]},
+
+{"description": "Decimal numeric entity followed by hex character f.",
+"input":"af",
+"output": ["ParseError", ["Character", "af"]]},
+
+{"description": "Decimal numeric entity followed by hex character A.",
+"input":"aF",
+"output": ["ParseError", ["Character", "aF"]]}
]}
diff --git a/test/data/tokeniser2/numericEntities.test b/test/data/tokeniser2/numericEntities.test
index 78a8a13..36c8228 100644
--- a/test/data/tokeniser2/numericEntities.test
+++ b/test/data/tokeniser2/numericEntities.test
@@ -6,115 +6,115 @@
{"description": "Invalid numeric entity character U+0001",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0001"]]},
{"description": "Invalid numeric entity character U+0002",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0002"]]},
{"description": "Invalid numeric entity character U+0003",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0003"]]},
{"description": "Invalid numeric entity character U+0004",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0004"]]},
{"description": "Invalid numeric entity character U+0005",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0005"]]},
{"description": "Invalid numeric entity character U+0006",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0006"]]},
{"description": "Invalid numeric entity character U+0007",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0007"]]},
{"description": "Invalid numeric entity character U+0008",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0008"]]},
{"description": "Invalid numeric entity character U+000B",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u000b"]]},
{"description": "Invalid numeric entity character U+000E",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u000e"]]},
{"description": "Invalid numeric entity character U+000F",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u000f"]]},
{"description": "Invalid numeric entity character U+0010",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0010"]]},
{"description": "Invalid numeric entity character U+0011",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0011"]]},
{"description": "Invalid numeric entity character U+0012",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0012"]]},
{"description": "Invalid numeric entity character U+0013",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0013"]]},
{"description": "Invalid numeric entity character U+0014",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0014"]]},
{"description": "Invalid numeric entity character U+0015",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0015"]]},
{"description": "Invalid numeric entity character U+0016",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0016"]]},
{"description": "Invalid numeric entity character U+0017",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0017"]]},
{"description": "Invalid numeric entity character U+0018",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0018"]]},
{"description": "Invalid numeric entity character U+0019",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0019"]]},
{"description": "Invalid numeric entity character U+001A",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u001a"]]},
{"description": "Invalid numeric entity character U+001B",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u001b"]]},
{"description": "Invalid numeric entity character U+001C",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u001c"]]},
{"description": "Invalid numeric entity character U+001D",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u001d"]]},
{"description": "Invalid numeric entity character U+001E",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u001e"]]},
{"description": "Invalid numeric entity character U+001F",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u001f"]]},
{"description": "Invalid numeric entity character U+007F",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u007f"]]},
{"description": "Invalid numeric entity character U+D800",
"input": "�",
@@ -126,267 +126,267 @@
{"description": "Invalid numeric entity character U+FDD0",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdd0"]]},
{"description": "Invalid numeric entity character U+FDD1",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdd1"]]},
{"description": "Invalid numeric entity character U+FDD2",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdd2"]]},
{"description": "Invalid numeric entity character U+FDD3",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdd3"]]},
{"description": "Invalid numeric entity character U+FDD4",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdd4"]]},
{"description": "Invalid numeric entity character U+FDD5",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdd5"]]},
{"description": "Invalid numeric entity character U+FDD6",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdd6"]]},
{"description": "Invalid numeric entity character U+FDD7",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdd7"]]},
{"description": "Invalid numeric entity character U+FDD8",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdd8"]]},
{"description": "Invalid numeric entity character U+FDD9",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdd9"]]},
{"description": "Invalid numeric entity character U+FDDA",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdda"]]},
{"description": "Invalid numeric entity character U+FDDB",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufddb"]]},
{"description": "Invalid numeric entity character U+FDDC",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufddc"]]},
{"description": "Invalid numeric entity character U+FDDD",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufddd"]]},
{"description": "Invalid numeric entity character U+FDDE",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdde"]]},
{"description": "Invalid numeric entity character U+FDDF",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufddf"]]},
{"description": "Invalid numeric entity character U+FDE0",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufde0"]]},
{"description": "Invalid numeric entity character U+FDE1",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufde1"]]},
{"description": "Invalid numeric entity character U+FDE2",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufde2"]]},
{"description": "Invalid numeric entity character U+FDE3",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufde3"]]},
{"description": "Invalid numeric entity character U+FDE4",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufde4"]]},
{"description": "Invalid numeric entity character U+FDE5",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufde5"]]},
{"description": "Invalid numeric entity character U+FDE6",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufde6"]]},
{"description": "Invalid numeric entity character U+FDE7",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufde7"]]},
{"description": "Invalid numeric entity character U+FDE8",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufde8"]]},
{"description": "Invalid numeric entity character U+FDE9",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufde9"]]},
{"description": "Invalid numeric entity character U+FDEA",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdea"]]},
{"description": "Invalid numeric entity character U+FDEB",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdeb"]]},
{"description": "Invalid numeric entity character U+FDEC",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdec"]]},
{"description": "Invalid numeric entity character U+FDED",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufded"]]},
{"description": "Invalid numeric entity character U+FDEE",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdee"]]},
{"description": "Invalid numeric entity character U+FDEF",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdef"]]},
{"description": "Invalid numeric entity character U+FFFE",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufffe"]]},
{"description": "Invalid numeric entity character U+FFFF",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uffff"]]},
{"description": "Invalid numeric entity character U+1FFFE",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD83F\uDFFE"]]},
{"description": "Invalid numeric entity character U+1FFFF",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD83F\uDFFF"]]},
{"description": "Invalid numeric entity character U+2FFFE",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD87F\uDFFE"]]},
{"description": "Invalid numeric entity character U+2FFFF",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD87F\uDFFF"]]},
{"description": "Invalid numeric entity character U+3FFFE",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD8BF\uDFFE"]]},
{"description": "Invalid numeric entity character U+3FFFF",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD8BF\uDFFF"]]},
{"description": "Invalid numeric entity character U+4FFFE",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD8FF\uDFFE"]]},
{"description": "Invalid numeric entity character U+4FFFF",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD8FF\uDFFF"]]},
{"description": "Invalid numeric entity character U+5FFFE",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD93F\uDFFE"]]},
{"description": "Invalid numeric entity character U+5FFFF",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD93F\uDFFF"]]},
{"description": "Invalid numeric entity character U+6FFFE",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD97F\uDFFE"]]},
{"description": "Invalid numeric entity character U+6FFFF",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD97F\uDFFF"]]},
{"description": "Invalid numeric entity character U+7FFFE",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD9BF\uDFFE"]]},
{"description": "Invalid numeric entity character U+7FFFF",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD9BF\uDFFF"]]},
{"description": "Invalid numeric entity character U+8FFFE",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD9FF\uDFFE"]]},
{"description": "Invalid numeric entity character U+8FFFF",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD9FF\uDFFF"]]},
{"description": "Invalid numeric entity character U+9FFFE",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDA3F\uDFFE"]]},
{"description": "Invalid numeric entity character U+9FFFF",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDA3F\uDFFF"]]},
{"description": "Invalid numeric entity character U+AFFFE",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDA7F\uDFFE"]]},
{"description": "Invalid numeric entity character U+AFFFF",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDA7F\uDFFF"]]},
{"description": "Invalid numeric entity character U+BFFFE",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDABF\uDFFE"]]},
{"description": "Invalid numeric entity character U+BFFFF",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDABF\uDFFF"]]},
{"description": "Invalid numeric entity character U+CFFFE",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDAFF\uDFFE"]]},
{"description": "Invalid numeric entity character U+CFFFF",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDAFF\uDFFF"]]},
{"description": "Invalid numeric entity character U+DFFFE",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDB3F\uDFFE"]]},
{"description": "Invalid numeric entity character U+DFFFF",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDB3F\uDFFF"]]},
{"description": "Invalid numeric entity character U+EFFFE",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDB7F\uDFFE"]]},
{"description": "Invalid numeric entity character U+EFFFF",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDB7F\uDFFF"]]},
{"description": "Invalid numeric entity character U+FFFFE",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDBBF\uDFFE"]]},
{"description": "Invalid numeric entity character U+FFFFF",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDBBF\uDFFF"]]},
{"description": "Invalid numeric entity character U+10FFFE",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDBFF\uDFFE"]]},
{"description": "Invalid numeric entity character U+10FFFF",
"input": "",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDBFF\uDFFF"]]},
{"description": "Valid numeric entity character U+0009",
"input": "	",
@@ -1309,3 +1309,5 @@
"output": [["Character", "\uDBFF\uDFFD"]]}
]}
+
+
diff --git a/test/data/tokeniser2/test4.test b/test/data/tokeniser2/test4.test
index ec8f72c..4be94b0 100644
--- a/test/data/tokeniser2/test4.test
+++ b/test/data/tokeniser2/test4.test
@@ -1,11 +1,11 @@
{"tests": [
{"description":"< in attribute name",
-"input":"<z/0 <",
+"input":"<z/0 <>",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "", "<": ""}]]},
{"description":"< in attribute value",
-"input":"<z x=<",
+"input":"<z x=<>",
"output":["ParseError", ["StartTag", "z", {"x": "<"}]]},
{"description":"= in unquoted attribute value",
@@ -28,25 +28,25 @@
"input":"<z ====>",
"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"=": "=="}]]},
-{"description":"Allowed \" after ampersand in attribute value",
+{"description":"\" after ampersand in double-quoted attribute value",
"input":"<z z=\"&\">",
"output":[["StartTag", "z", {"z": "&"}]]},
-{"description":"Non-allowed ' after ampersand in attribute value",
+{"description":"' after ampersand in double-quoted attribute value",
"input":"<z z=\"&'\">",
-"output":["ParseError", ["StartTag", "z", {"z": "&'"}]]},
+"output":[["StartTag", "z", {"z": "&'"}]]},
-{"description":"Allowed ' after ampersand in attribute value",
+{"description":"' after ampersand in single-quoted attribute value",
"input":"<z z='&'>",
"output":[["StartTag", "z", {"z": "&"}]]},
-{"description":"Non-allowed \" after ampersand in attribute value",
+{"description":"\" after ampersand in single-quoted attribute value",
"input":"<z z='&\"'>",
-"output":["ParseError", ["StartTag", "z", {"z": "&\""}]]},
+"output":[["StartTag", "z", {"z": "&\""}]]},
{"description":"Text after bogus character reference",
"input":"<z z='&xlink_xmlns;'>bar<z>",
-"output":["ParseError",["StartTag","z",{"z":"&xlink_xmlns;"}],["Character","bar"],["StartTag","z",{}]]},
+"output":[["StartTag","z",{"z":"&xlink_xmlns;"}],["Character","bar"],["StartTag","z",{}]]},
{"description":"Text after hex character reference",
"input":"<z z='  foo'>bar<z>",
@@ -98,11 +98,11 @@
{"description":"CR EOF in tag name",
"input":"<z\r",
-"output":["ParseError", ["StartTag", "z", {}]]},
+"output":["ParseError"]},
{"description":"Slash EOF in tag name",
"input":"<z/",
-"output":["ParseError", ["StartTag", "z", {}]]},
+"output":["ParseError"]},
{"description":"Zero hex numeric entity",
"input":"�",
@@ -134,7 +134,7 @@
{"description":"Maximum non-BMP numeric entity",
"input":"",
-"output":["ParseError", ["Character", "\uFFFD"]]},
+"output":["ParseError", ["Character", "\uDBFF\uDFFF"]]},
{"description":"Above maximum numeric entity",
"input":"�",
@@ -222,12 +222,12 @@
{"description":"U+0000 in lookahead region after non-matching character",
"input":"<!doc>\u0000",
-"output":["ParseError", ["Comment", "doc"], "ParseError", ["Character", "\uFFFD"]],
+"output":["ParseError", ["Comment", "doc"], "ParseError", ["Character", "\u0000"]],
"ignoreErrorOrder":true},
{"description":"U+0000 in lookahead region",
"input":"<!doc\u0000",
-"output":["ParseError", "ParseError", ["Comment", "doc\uFFFD"]],
+"output":["ParseError", ["Comment", "doc\uFFFD"]],
"ignoreErrorOrder":true},
{"description":"U+0080 in lookahead region",
@@ -245,11 +245,6 @@
"output":["ParseError", "ParseError", ["Comment", "doc\uD83F\uDFFF"]],
"ignoreErrorOrder":true},
-{"description":"CR followed by U+0000",
-"input":"\r\u0000",
-"output":["ParseError", ["Character", "\n\uFFFD"]],
-"ignoreErrorOrder":true},
-
{"description":"CR followed by non-LF",
"input":"\r?",
"output":[["Character", "\n?"]]},
@@ -300,6 +295,50 @@
{"description":"Doctype html x>text",
"input":"<!DOCTYPE html x>text",
-"output":["ParseError", ["DOCTYPE", "html", null, null, false], ["Character", "text"]]}
+"output":["ParseError", ["DOCTYPE", "html", null, null, false], ["Character", "text"]]},
+
+{"description":"Grave accent in unquoted attribute",
+"input":"<a a=aa`>",
+"output":["ParseError", ["StartTag", "a", {"a":"aa`"}]]},
+
+{"description":"EOF in tag name state ",
+"input":"<a",
+"output":["ParseError"]},
+
+{"description":"EOF in tag name state",
+"input":"<a",
+"output":["ParseError"]},
+
+{"description":"EOF in before attribute name state",
+"input":"<a ",
+"output":["ParseError"]},
+
+{"description":"EOF in attribute name state",
+"input":"<a a",
+"output":["ParseError"]},
+
+{"description":"EOF in after attribute name state",
+"input":"<a a ",
+"output":["ParseError"]},
+
+{"description":"EOF in before attribute value state",
+"input":"<a a =",
+"output":["ParseError"]},
+
+{"description":"EOF in attribute value (double quoted) state",
+"input":"<a a =\"a",
+"output":["ParseError"]},
+
+{"description":"EOF in attribute value (single quoted) state",
+"input":"<a a ='a",
+"output":["ParseError"]},
+
+{"description":"EOF in attribute value (unquoted) state",
+"input":"<a a =a",
+"output":["ParseError"]},
+
+{"description":"EOF in after attribute value state",
+"input":"<a a ='a'",
+"output":["ParseError"]}
]}
--
1.8.3.2
No comments:
Post a Comment