Monday, 31 March 2014

[PATCH 05/10] improved support for numbered entities & reconsumption of EOF charecter

---
src/tokeniser/tokeniser.c | 74 +-
test/data/tokeniser2/INDEX | 1 -
test/data/tokeniser2/contentModelFlags.test | 75 -
test/data/tokeniser2/entities.test | 2122 +--------------------------
test/data/tokeniser2/numericEntities.test | 190 +--
test/data/tokeniser2/test4.test | 79 +-
6 files changed, 238 insertions(+), 2303 deletions(-)
delete mode 100644 test/data/tokeniser2/contentModelFlags.test

diff --git a/src/tokeniser/tokeniser.c b/src/tokeniser/tokeniser.c
index 5bb59a8..8390bf0 100644
--- a/src/tokeniser/tokeniser.c
+++ b/src/tokeniser/tokeniser.c
@@ -20,15 +20,15 @@
#include "hubbub/errors.h"
#include "tokeniser/entities.h"
#include "tokeniser/tokeniser.h"
-
/**
* Table of mappings between Windows-1252 codepoints 128-159 and UCS4
*/
static const uint32_t cp1252Table[32] = {
- 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
- 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0xFFFD, 0x017D, 0xFFFD,
- 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
- 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0xFFFD, 0x017E, 0x0178
+ 0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
+ 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F,
+ 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
+ 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178
+
};

/**
@@ -672,7 +672,6 @@ hubbub_error hubbub_tokeniser_handle_data(hubbub_tokeniser *tokeniser)
hubbub_token token;
const uint8_t *cptr;
size_t len;
-
while ((error = parserutils_inputstream_peek(tokeniser->input,
tokeniser->context.pending, &cptr, &len)) ==
PARSERUTILS_OK) {
@@ -1121,7 +1120,10 @@ hubbub_error hubbub_tokeniser_handle_tag_name(hubbub_tokeniser *tokeniser)
if (error != PARSERUTILS_OK) {
if (error == PARSERUTILS_EOF) {
tokeniser->state = STATE_DATA;
- return emit_current_tag(tokeniser);
+// return emit_current_tag(tokeniser);
+ parserutils_inputstream_advance(
+ tokeniser->input, tokeniser->context.pending);
+ return HUBBUB_OK;
} else {
return hubbub_error_from_parserutils_error(error);
}
@@ -1170,7 +1172,9 @@ hubbub_error hubbub_tokeniser_handle_before_attribute_name(
if (error != PARSERUTILS_OK) {
if (error == PARSERUTILS_EOF) {
tokeniser->state = STATE_DATA;
- return emit_current_tag(tokeniser);
+ parserutils_inputstream_advance(
+ tokeniser->input, tokeniser->context.pending);
+ return HUBBUB_OK;
} else {
return hubbub_error_from_parserutils_error(error);
}
@@ -1243,7 +1247,9 @@ hubbub_error hubbub_tokeniser_handle_attribute_name(hubbub_tokeniser *tokeniser)
if (error != PARSERUTILS_OK) {
if (error == PARSERUTILS_EOF) {
tokeniser->state = STATE_DATA;
- return emit_current_tag(tokeniser);
+ parserutils_inputstream_advance(
+ tokeniser->input, tokeniser->context.pending);
+ return HUBBUB_OK;
} else {
return hubbub_error_from_parserutils_error(error);
}
@@ -1298,7 +1304,9 @@ hubbub_error hubbub_tokeniser_handle_after_attribute_name(
if (error != PARSERUTILS_OK) {
if (error == PARSERUTILS_EOF) {
tokeniser->state = STATE_DATA;
- return emit_current_tag(tokeniser);
+ parserutils_inputstream_advance(
+ tokeniser->input, tokeniser->context.pending);
+ return HUBBUB_OK;
} else {
return hubbub_error_from_parserutils_error(error);
}
@@ -1375,7 +1383,9 @@ hubbub_error hubbub_tokeniser_handle_before_attribute_value(
if (error == PARSERUTILS_EOF) {
/** \todo parse error */
tokeniser->state = STATE_DATA;
- return emit_current_tag(tokeniser);
+ parserutils_inputstream_advance(
+ tokeniser->input, tokeniser->context.pending);
+ return HUBBUB_OK;
} else {
return hubbub_error_from_parserutils_error(error);
}
@@ -1436,7 +1446,9 @@ hubbub_error hubbub_tokeniser_handle_attribute_value_dq(
if (error != PARSERUTILS_OK) {
if (error == PARSERUTILS_EOF) {
tokeniser->state = STATE_DATA;
- return emit_current_tag(tokeniser);
+ parserutils_inputstream_advance(
+ tokeniser->input, tokeniser->context.pending);
+ return HUBBUB_OK;
} else {
return hubbub_error_from_parserutils_error(error);
}
@@ -1498,7 +1510,9 @@ hubbub_error hubbub_tokeniser_handle_attribute_value_sq(
if (error != PARSERUTILS_OK) {
if (error == PARSERUTILS_EOF) {
tokeniser->state = STATE_DATA;
- return emit_current_tag(tokeniser);
+ parserutils_inputstream_advance(
+ tokeniser->input, tokeniser->context.pending);
+ return HUBBUB_OK;
} else {
return hubbub_error_from_parserutils_error(error);
}
@@ -1560,7 +1574,9 @@ hubbub_error hubbub_tokeniser_handle_attribute_value_uq(
if (error != PARSERUTILS_OK) {
if (error == PARSERUTILS_EOF) {
tokeniser->state = STATE_DATA;
- return emit_current_tag(tokeniser);
+ parserutils_inputstream_advance(
+ tokeniser->input, tokeniser->context.pending);
+ return HUBBUB_OK;
} else {
return hubbub_error_from_parserutils_error(error);
}
@@ -1664,7 +1680,9 @@ hubbub_error hubbub_tokeniser_handle_after_attribute_value_q(
if (error != PARSERUTILS_OK) {
if (error == PARSERUTILS_EOF) {
tokeniser->state = STATE_DATA;
- return emit_current_tag(tokeniser);
+ parserutils_inputstream_advance(
+ tokeniser->input, tokeniser->context.pending);
+ return HUBBUB_OK;
} else {
return hubbub_error_from_parserutils_error(error);
}
@@ -1706,7 +1724,9 @@ hubbub_error hubbub_tokeniser_handle_self_closing_start_tag(
if (error != PARSERUTILS_OK) {
if (error == PARSERUTILS_EOF) {
tokeniser->state = STATE_DATA;
- return emit_current_tag(tokeniser);
+ parserutils_inputstream_advance(
+ tokeniser->input, tokeniser->context.pending);
+ return HUBBUB_OK;
} else {
return hubbub_error_from_parserutils_error(error);
}
@@ -2984,6 +3004,7 @@ hubbub_error hubbub_tokeniser_handle_numbered_entity(
ctx->match_entity.length += len;
} else {
ctx->match_entity.base = 10;
+ printf("base 10\n");
}
}

@@ -3018,8 +3039,9 @@ hubbub_error hubbub_tokeniser_handle_numbered_entity(
break;
}

- if (ctx->match_entity.numeric_state.ucs4 >= 0x10FFFF) {
+ if (ctx->match_entity.numeric_state.ucs4 > 0x10FFFF) {
ctx->match_entity.overflow = true;
+ printf("overflow\n");
}
}

@@ -3039,18 +3061,22 @@ hubbub_error hubbub_tokeniser_handle_numbered_entity(

if (0x80 <= cp && cp <= 0x9F) {
cp = cp1252Table[cp - 0x80];
- } else if (cp == 0x0D) {
- cp = 0x000A;
+ printf("converting1\n");
} else if (ctx->match_entity.overflow ||
- cp <= 0x0008 || cp == 0x000B ||
- (0x000E <= cp && cp <= 0x001F) ||
- (0x007F <= cp && cp <= 0x009F) ||
(0xD800 <= cp && cp <= 0xDFFF) ||
+ (cp == 0x00)) {
+ printf("converting\n");
+ cp = 0xFFFD;
+ } else if((0x0001<=cp && cp <= 0x0008) ||
+ (0x000D <= cp && cp <= 0x001F) ||
+ (0x007F <= cp && cp <= 0x009F) ||
(0xFDD0 <= cp && cp <= 0xFDEF) ||
- (cp & 0xFFFE) == 0xFFFE) {
+ (cp ==0x000B) ||
+ ((cp & 0xFFFE) == 0xFFFE) ||
+ ((cp & 0xFFFF) == 0xFFFF) ){
+ printf("converting\n");
/* the check for cp > 0x10FFFF per spec is performed
* in the loop above to avoid overflow */
- cp = 0xFFFD;
}
ctx->match_entity.numeric_state.ucs4 = cp;

diff --git a/test/data/tokeniser2/INDEX b/test/data/tokeniser2/INDEX
index 0959b09..9da56e7 100644
--- a/test/data/tokeniser2/INDEX
+++ b/test/data/tokeniser2/INDEX
@@ -6,7 +6,6 @@ test1.test html5lib tests (part 1)
test2.test html5lib tests (part 2)
test3.test html5lib tests (part 3)
test4.test html5lib tests (part 4)
-contentModelFlags.test html5lib content model tests
entities.test html5lib entity tests
escapeFlag.test html5lib escape flag tests
numericEntities.test html5lib numeric entities tests
diff --git a/test/data/tokeniser2/contentModelFlags.test b/test/data/tokeniser2/contentModelFlags.test
deleted file mode 100644
index 1dec3e8..0000000
--- a/test/data/tokeniser2/contentModelFlags.test
+++ /dev/null
@@ -1,75 +0,0 @@
-{"tests": [
-
-{"description":"PLAINTEXT content model flag",
-"contentModelFlags":["PLAINTEXT"],
-"lastStartTag":"plaintext",
-"input":"<head>&body;",
-"output":[["Character", "<head>&body;"]]},
-
-{"description":"End tag closing RCDATA or CDATA",
-"contentModelFlags":["RCDATA", "CDATA"],
-"lastStartTag":"xmp",
-"input":"foo</xmp>",
-"output":[["Character", "foo"], ["EndTag", "xmp"]]},
-
-{"description":"End tag closing RCDATA or CDATA (case-insensitivity)",
-"contentModelFlags":["RCDATA", "CDATA"],
-"lastStartTag":"xmp",
-"input":"foo</xMp>",
-"output":[["Character", "foo"], ["EndTag", "xmp"]]},
-
-{"description":"End tag closing RCDATA or CDATA (ending with space)",
-"contentModelFlags":["RCDATA", "CDATA"],
-"lastStartTag":"xmp",
-"input":"foo</xmp ",
-"output":[["Character", "foo"], "ParseError", ["EndTag", "xmp"]]},
-
-{"description":"End tag closing RCDATA or CDATA (ending with EOF)",
-"contentModelFlags":["RCDATA", "CDATA"],
-"lastStartTag":"xmp",
-"input":"foo</xmp",
-"output":[["Character", "foo"], "ParseError", ["EndTag", "xmp"]]},
-
-{"description":"End tag closing RCDATA or CDATA (ending with slash)",
-"contentModelFlags":["RCDATA", "CDATA"],
-"lastStartTag":"xmp",
-"input":"foo</xmp/",
-"output":[["Character", "foo"], "ParseError", ["EndTag", "xmp"]]},
-
-{"description":"End tag not closing RCDATA or CDATA (ending with left-angle-bracket)",
-"contentModelFlags":["RCDATA", "CDATA"],
-"lastStartTag":"xmp",
-"input":"foo</xmp<",
-"output":[["Character", "foo</xmp<"]]},
-
-{"description":"End tag with incorrect name in RCDATA or CDATA",
-"contentModelFlags":["RCDATA", "CDATA"],
-"lastStartTag":"xmp",
-"input":"</foo>bar</xmp>",
-"output":[["Character", "</foo>bar"], ["EndTag", "xmp"]]},
-
-{"description":"End tag with incorrect name in RCDATA or CDATA (starting like correct name)",
-"contentModelFlags":["RCDATA", "CDATA"],
-"lastStartTag":"xmp",
-"input":"</foo>bar</xmpaar>",
-"output":[["Character", "</foo>bar</xmpaar>"]]},
-
-{"description":"End tag closing RCDATA or CDATA, switching back to PCDATA",
-"contentModelFlags":["RCDATA", "CDATA"],
-"lastStartTag":"xmp",
-"input":"foo</xmp></baz>",
-"output":[["Character", "foo"], ["EndTag", "xmp"], ["EndTag", "baz"]]},
-
-{"description":"CDATA w/ something looking like an entity",
-"contentModelFlags":["CDATA"],
-"lastStartTag":"xmp",
-"input":"&foo;",
-"output":[["Character", "&foo;"]]},
-
-{"description":"RCDATA w/ an entity",
-"contentModelFlags":["RCDATA"],
-"lastStartTag":"textarea",
-"input":"&lt;",
-"output":[["Character", "<"]]}
-
-]}
diff --git a/test/data/tokeniser2/entities.test b/test/data/tokeniser2/entities.test
index 8b8d352..27b85a1 100644
--- a/test/data/tokeniser2/entities.test
+++ b/test/data/tokeniser2/entities.test
@@ -2,2091 +2,19 @@

{"description": "Undefined named entity in attribute value ending in semicolon and whose name starts with a known entity name.",
"input":"<h a='&noti;'>",
-"output": ["ParseError", ["StartTag", "h", {"a": "&noti;"}]]},
+"output": [["StartTag", "h", {"a": "&noti;"}]]},

-{"description": "Named entity: AElig with a semi-colon.",
-"input":"&AElig;",
-"output": [["Character", "\u00C6"]]},
-
-{"description": "Named entity: AElig without a semi-colon.",
-"input":"&AElig",
-"output": ["ParseError", ["Character", "\u00C6"]]},
-
-{"description": "Named entity: AMP with a semi-colon.",
-"input":"&AMP;",
-"output": [["Character", "\u0026"]]},
-
-{"description": "Named entity: AMP without a semi-colon.",
-"input":"&AMP",
-"output": ["ParseError", ["Character", "\u0026"]]},
-
-{"description": "Named entity: Aacute with a semi-colon.",
-"input":"&Aacute;",
-"output": [["Character", "\u00C1"]]},
-
-{"description": "Named entity: Aacute without a semi-colon.",
-"input":"&Aacute",
-"output": ["ParseError", ["Character", "\u00C1"]]},
-
-{"description": "Named entity: Acirc with a semi-colon.",
-"input":"&Acirc;",
-"output": [["Character", "\u00C2"]]},
-
-{"description": "Named entity: Acirc without a semi-colon.",
-"input":"&Acirc",
-"output": ["ParseError", ["Character", "\u00C2"]]},
-
-{"description": "Named entity: Agrave with a semi-colon.",
-"input":"&Agrave;",
-"output": [["Character", "\u00C0"]]},
-
-{"description": "Named entity: Agrave without a semi-colon.",
-"input":"&Agrave",
-"output": ["ParseError", ["Character", "\u00C0"]]},
-
-{"description": "Named entity: Alpha with a semi-colon.",
-"input":"&Alpha;",
-"output": [["Character", "\u0391"]]},
-
-{"description": "Named entity: Aring with a semi-colon.",
-"input":"&Aring;",
-"output": [["Character", "\u00C5"]]},
-
-{"description": "Named entity: Aring without a semi-colon.",
-"input":"&Aring",
-"output": ["ParseError", ["Character", "\u00C5"]]},
-
-{"description": "Named entity: Atilde with a semi-colon.",
-"input":"&Atilde;",
-"output": [["Character", "\u00C3"]]},
-
-{"description": "Named entity: Atilde without a semi-colon.",
-"input":"&Atilde",
-"output": ["ParseError", ["Character", "\u00C3"]]},
-
-{"description": "Named entity: Auml with a semi-colon.",
-"input":"&Auml;",
-"output": [["Character", "\u00C4"]]},
-
-{"description": "Named entity: Auml without a semi-colon.",
-"input":"&Auml",
-"output": ["ParseError", ["Character", "\u00C4"]]},
-
-{"description": "Named entity: Beta with a semi-colon.",
-"input":"&Beta;",
-"output": [["Character", "\u0392"]]},
-
-{"description": "Named entity: COPY with a semi-colon.",
-"input":"&COPY;",
-"output": [["Character", "\u00A9"]]},
-
-{"description": "Named entity: COPY without a semi-colon.",
-"input":"&COPY",
-"output": ["ParseError", ["Character", "\u00A9"]]},
-
-{"description": "Named entity: Ccedil with a semi-colon.",
-"input":"&Ccedil;",
-"output": [["Character", "\u00C7"]]},
-
-{"description": "Named entity: Ccedil without a semi-colon.",
-"input":"&Ccedil",
-"output": ["ParseError", ["Character", "\u00C7"]]},
-
-{"description": "Named entity: Chi with a semi-colon.",
-"input":"&Chi;",
-"output": [["Character", "\u03A7"]]},
-
-{"description": "Named entity: Dagger with a semi-colon.",
-"input":"&Dagger;",
-"output": [["Character", "\u2021"]]},
-
-{"description": "Named entity: Delta with a semi-colon.",
-"input":"&Delta;",
-"output": [["Character", "\u0394"]]},
-
-{"description": "Named entity: ETH with a semi-colon.",
-"input":"&ETH;",
-"output": [["Character", "\u00D0"]]},
-
-{"description": "Named entity: ETH without a semi-colon.",
-"input":"&ETH",
-"output": ["ParseError", ["Character", "\u00D0"]]},
-
-{"description": "Named entity: Eacute with a semi-colon.",
-"input":"&Eacute;",
-"output": [["Character", "\u00C9"]]},
-
-{"description": "Named entity: Eacute without a semi-colon.",
-"input":"&Eacute",
-"output": ["ParseError", ["Character", "\u00C9"]]},
-
-{"description": "Named entity: Ecirc with a semi-colon.",
-"input":"&Ecirc;",
-"output": [["Character", "\u00CA"]]},
-
-{"description": "Named entity: Ecirc without a semi-colon.",
-"input":"&Ecirc",
-"output": ["ParseError", ["Character", "\u00CA"]]},
-
-{"description": "Named entity: Egrave with a semi-colon.",
-"input":"&Egrave;",
-"output": [["Character", "\u00C8"]]},
-
-{"description": "Named entity: Egrave without a semi-colon.",
-"input":"&Egrave",
-"output": ["ParseError", ["Character", "\u00C8"]]},
-
-{"description": "Named entity: Epsilon with a semi-colon.",
-"input":"&Epsilon;",
-"output": [["Character", "\u0395"]]},
-
-{"description": "Named entity: Eta with a semi-colon.",
-"input":"&Eta;",
-"output": [["Character", "\u0397"]]},
-
-{"description": "Named entity: Euml with a semi-colon.",
-"input":"&Euml;",
-"output": [["Character", "\u00CB"]]},
-
-{"description": "Named entity: Euml without a semi-colon.",
-"input":"&Euml",
-"output": ["ParseError", ["Character", "\u00CB"]]},
-
-{"description": "Named entity: GT with a semi-colon.",
-"input":"&GT;",
-"output": [["Character", "\u003E"]]},
-
-{"description": "Named entity: GT without a semi-colon.",
-"input":"&GT",
-"output": ["ParseError", ["Character", "\u003E"]]},
-
-{"description": "Named entity: Gamma with a semi-colon.",
-"input":"&Gamma;",
-"output": [["Character", "\u0393"]]},
-
-{"description": "Named entity: Iacute with a semi-colon.",
-"input":"&Iacute;",
-"output": [["Character", "\u00CD"]]},
-
-{"description": "Named entity: Iacute without a semi-colon.",
-"input":"&Iacute",
-"output": ["ParseError", ["Character", "\u00CD"]]},
-
-{"description": "Named entity: Icirc with a semi-colon.",
-"input":"&Icirc;",
-"output": [["Character", "\u00CE"]]},
-
-{"description": "Named entity: Icirc without a semi-colon.",
-"input":"&Icirc",
-"output": ["ParseError", ["Character", "\u00CE"]]},
-
-{"description": "Named entity: Igrave with a semi-colon.",
-"input":"&Igrave;",
-"output": [["Character", "\u00CC"]]},
-
-{"description": "Named entity: Igrave without a semi-colon.",
-"input":"&Igrave",
-"output": ["ParseError", ["Character", "\u00CC"]]},
-
-{"description": "Named entity: Iota with a semi-colon.",
-"input":"&Iota;",
-"output": [["Character", "\u0399"]]},
-
-{"description": "Named entity: Iuml with a semi-colon.",
-"input":"&Iuml;",
-"output": [["Character", "\u00CF"]]},
-
-{"description": "Named entity: Iuml without a semi-colon.",
-"input":"&Iuml",
-"output": ["ParseError", ["Character", "\u00CF"]]},
-
-{"description": "Named entity: Kappa with a semi-colon.",
-"input":"&Kappa;",
-"output": [["Character", "\u039A"]]},
-
-{"description": "Named entity: LT with a semi-colon.",
-"input":"&LT;",
-"output": [["Character", "\u003C"]]},
-
-{"description": "Named entity: LT without a semi-colon.",
-"input":"&LT",
-"output": ["ParseError", ["Character", "\u003C"]]},
-
-{"description": "Named entity: Lambda with a semi-colon.",
-"input":"&Lambda;",
-"output": [["Character", "\u039B"]]},
-
-{"description": "Named entity: Mu with a semi-colon.",
-"input":"&Mu;",
-"output": [["Character", "\u039C"]]},
-
-{"description": "Named entity: Ntilde with a semi-colon.",
-"input":"&Ntilde;",
-"output": [["Character", "\u00D1"]]},
-
-{"description": "Named entity: Ntilde without a semi-colon.",
-"input":"&Ntilde",
-"output": ["ParseError", ["Character", "\u00D1"]]},
-
-{"description": "Named entity: Nu with a semi-colon.",
-"input":"&Nu;",
-"output": [["Character", "\u039D"]]},
-
-{"description": "Named entity: OElig with a semi-colon.",
-"input":"&OElig;",
-"output": [["Character", "\u0152"]]},
-
-{"description": "Named entity: Oacute with a semi-colon.",
-"input":"&Oacute;",
-"output": [["Character", "\u00D3"]]},
-
-{"description": "Named entity: Oacute without a semi-colon.",
-"input":"&Oacute",
-"output": ["ParseError", ["Character", "\u00D3"]]},
-
-{"description": "Named entity: Ocirc with a semi-colon.",
-"input":"&Ocirc;",
-"output": [["Character", "\u00D4"]]},
-
-{"description": "Named entity: Ocirc without a semi-colon.",
-"input":"&Ocirc",
-"output": ["ParseError", ["Character", "\u00D4"]]},
-
-{"description": "Named entity: Ograve with a semi-colon.",
-"input":"&Ograve;",
-"output": [["Character", "\u00D2"]]},
-
-{"description": "Named entity: Ograve without a semi-colon.",
-"input":"&Ograve",
-"output": ["ParseError", ["Character", "\u00D2"]]},
-
-{"description": "Named entity: Omega with a semi-colon.",
-"input":"&Omega;",
-"output": [["Character", "\u03A9"]]},
-
-{"description": "Named entity: Omicron with a semi-colon.",
-"input":"&Omicron;",
-"output": [["Character", "\u039F"]]},
-
-{"description": "Named entity: Oslash with a semi-colon.",
-"input":"&Oslash;",
-"output": [["Character", "\u00D8"]]},
-
-{"description": "Named entity: Oslash without a semi-colon.",
-"input":"&Oslash",
-"output": ["ParseError", ["Character", "\u00D8"]]},
-
-{"description": "Named entity: Otilde with a semi-colon.",
-"input":"&Otilde;",
-"output": [["Character", "\u00D5"]]},
-
-{"description": "Named entity: Otilde without a semi-colon.",
-"input":"&Otilde",
-"output": ["ParseError", ["Character", "\u00D5"]]},
-
-{"description": "Named entity: Ouml with a semi-colon.",
-"input":"&Ouml;",
-"output": [["Character", "\u00D6"]]},
-
-{"description": "Named entity: Ouml without a semi-colon.",
-"input":"&Ouml",
-"output": ["ParseError", ["Character", "\u00D6"]]},
-
-{"description": "Named entity: Phi with a semi-colon.",
-"input":"&Phi;",
-"output": [["Character", "\u03A6"]]},
-
-{"description": "Named entity: Pi with a semi-colon.",
-"input":"&Pi;",
-"output": [["Character", "\u03A0"]]},
-
-{"description": "Named entity: Prime with a semi-colon.",
-"input":"&Prime;",
-"output": [["Character", "\u2033"]]},
-
-{"description": "Named entity: Psi with a semi-colon.",
-"input":"&Psi;",
-"output": [["Character", "\u03A8"]]},
-
-{"description": "Named entity: QUOT with a semi-colon.",
-"input":"&QUOT;",
-"output": [["Character", "\u0022"]]},
-
-{"description": "Named entity: QUOT without a semi-colon.",
-"input":"&QUOT",
-"output": ["ParseError", ["Character", "\u0022"]]},
-
-{"description": "Named entity: REG with a semi-colon.",
-"input":"&REG;",
-"output": [["Character", "\u00AE"]]},
-
-{"description": "Named entity: REG without a semi-colon.",
-"input":"&REG",
-"output": ["ParseError", ["Character", "\u00AE"]]},
-
-{"description": "Named entity: Rho with a semi-colon.",
-"input":"&Rho;",
-"output": [["Character", "\u03A1"]]},
-
-{"description": "Named entity: Scaron with a semi-colon.",
-"input":"&Scaron;",
-"output": [["Character", "\u0160"]]},
-
-{"description": "Named entity: Sigma with a semi-colon.",
-"input":"&Sigma;",
-"output": [["Character", "\u03A3"]]},
-
-{"description": "Named entity: THORN with a semi-colon.",
-"input":"&THORN;",
-"output": [["Character", "\u00DE"]]},
-
-{"description": "Named entity: THORN without a semi-colon.",
-"input":"&THORN",
-"output": ["ParseError", ["Character", "\u00DE"]]},
-
-{"description": "Named entity: TRADE with a semi-colon.",
-"input":"&TRADE;",
-"output": [["Character", "\u2122"]]},
-
-{"description": "Named entity: Tau with a semi-colon.",
-"input":"&Tau;",
-"output": [["Character", "\u03A4"]]},
-
-{"description": "Named entity: Theta with a semi-colon.",
-"input":"&Theta;",
-"output": [["Character", "\u0398"]]},
-
-{"description": "Named entity: Uacute with a semi-colon.",
-"input":"&Uacute;",
-"output": [["Character", "\u00DA"]]},
-
-{"description": "Named entity: Uacute without a semi-colon.",
-"input":"&Uacute",
-"output": ["ParseError", ["Character", "\u00DA"]]},
-
-{"description": "Named entity: Ucirc with a semi-colon.",
-"input":"&Ucirc;",
-"output": [["Character", "\u00DB"]]},
-
-{"description": "Named entity: Ucirc without a semi-colon.",
-"input":"&Ucirc",
-"output": ["ParseError", ["Character", "\u00DB"]]},
-
-{"description": "Named entity: Ugrave with a semi-colon.",
-"input":"&Ugrave;",
-"output": [["Character", "\u00D9"]]},
-
-{"description": "Named entity: Ugrave without a semi-colon.",
-"input":"&Ugrave",
-"output": ["ParseError", ["Character", "\u00D9"]]},
-
-{"description": "Named entity: Upsilon with a semi-colon.",
-"input":"&Upsilon;",
-"output": [["Character", "\u03A5"]]},
-
-{"description": "Named entity: Uuml with a semi-colon.",
-"input":"&Uuml;",
-"output": [["Character", "\u00DC"]]},
-
-{"description": "Named entity: Uuml without a semi-colon.",
-"input":"&Uuml",
-"output": ["ParseError", ["Character", "\u00DC"]]},
-
-{"description": "Named entity: Xi with a semi-colon.",
-"input":"&Xi;",
-"output": [["Character", "\u039E"]]},
-
-{"description": "Named entity: Yacute with a semi-colon.",
-"input":"&Yacute;",
-"output": [["Character", "\u00DD"]]},
-
-{"description": "Named entity: Yacute without a semi-colon.",
-"input":"&Yacute",
-"output": ["ParseError", ["Character", "\u00DD"]]},
-
-{"description": "Named entity: Yuml with a semi-colon.",
-"input":"&Yuml;",
-"output": [["Character", "\u0178"]]},
-
-{"description": "Named entity: Zeta with a semi-colon.",
-"input":"&Zeta;",
-"output": [["Character", "\u0396"]]},
-
-{"description": "Named entity: aacute with a semi-colon.",
-"input":"&aacute;",
-"output": [["Character", "\u00E1"]]},
-
-{"description": "Named entity: aacute without a semi-colon.",
-"input":"&aacute",
-"output": ["ParseError", ["Character", "\u00E1"]]},
-
-{"description": "Named entity: acirc with a semi-colon.",
-"input":"&acirc;",
-"output": [["Character", "\u00E2"]]},
-
-{"description": "Named entity: acirc without a semi-colon.",
-"input":"&acirc",
-"output": ["ParseError", ["Character", "\u00E2"]]},
-
-{"description": "Named entity: acute with a semi-colon.",
-"input":"&acute;",
-"output": [["Character", "\u00B4"]]},
-
-{"description": "Named entity: acute without a semi-colon.",
-"input":"&acute",
-"output": ["ParseError", ["Character", "\u00B4"]]},
-
-{"description": "Named entity: aelig with a semi-colon.",
-"input":"&aelig;",
-"output": [["Character", "\u00E6"]]},
-
-{"description": "Named entity: aelig without a semi-colon.",
-"input":"&aelig",
-"output": ["ParseError", ["Character", "\u00E6"]]},
-
-{"description": "Named entity: agrave with a semi-colon.",
-"input":"&agrave;",
-"output": [["Character", "\u00E0"]]},
-
-{"description": "Named entity: agrave without a semi-colon.",
-"input":"&agrave",
-"output": ["ParseError", ["Character", "\u00E0"]]},
-
-{"description": "Named entity: alefsym with a semi-colon.",
-"input":"&alefsym;",
-"output": [["Character", "\u2135"]]},
-
-{"description": "Named entity: alpha with a semi-colon.",
-"input":"&alpha;",
-"output": [["Character", "\u03B1"]]},
-
-{"description": "Named entity: amp with a semi-colon.",
-"input":"&amp;",
-"output": [["Character", "\u0026"]]},
-
-{"description": "Named entity: amp without a semi-colon.",
-"input":"&amp",
-"output": ["ParseError", ["Character", "\u0026"]]},
-
-{"description": "Named entity: and with a semi-colon.",
-"input":"&and;",
-"output": [["Character", "\u2227"]]},
-
-{"description": "Named entity: ang with a semi-colon.",
-"input":"&ang;",
-"output": [["Character", "\u2220"]]},
-
-{"description": "Named entity: apos with a semi-colon.",
-"input":"&apos;",
-"output": [["Character", "\u0027"]]},
-
-{"description": "Named entity: aring with a semi-colon.",
-"input":"&aring;",
-"output": [["Character", "\u00E5"]]},
-
-{"description": "Named entity: aring without a semi-colon.",
-"input":"&aring",
-"output": ["ParseError", ["Character", "\u00E5"]]},
-
-{"description": "Named entity: asymp with a semi-colon.",
-"input":"&asymp;",
-"output": [["Character", "\u2248"]]},
-
-{"description": "Named entity: atilde with a semi-colon.",
-"input":"&atilde;",
-"output": [["Character", "\u00E3"]]},
-
-{"description": "Named entity: atilde without a semi-colon.",
-"input":"&atilde",
-"output": ["ParseError", ["Character", "\u00E3"]]},
-
-{"description": "Named entity: auml with a semi-colon.",
-"input":"&auml;",
-"output": [["Character", "\u00E4"]]},
-
-{"description": "Named entity: auml without a semi-colon.",
-"input":"&auml",
-"output": ["ParseError", ["Character", "\u00E4"]]},
-
-{"description": "Named entity: bdquo with a semi-colon.",
-"input":"&bdquo;",
-"output": [["Character", "\u201E"]]},
-
-{"description": "Named entity: beta with a semi-colon.",
-"input":"&beta;",
-"output": [["Character", "\u03B2"]]},
-
-{"description": "Named entity: brvbar with a semi-colon.",
-"input":"&brvbar;",
-"output": [["Character", "\u00A6"]]},
-
-{"description": "Named entity: brvbar without a semi-colon.",
-"input":"&brvbar",
-"output": ["ParseError", ["Character", "\u00A6"]]},
-
-{"description": "Named entity: bull with a semi-colon.",
-"input":"&bull;",
-"output": [["Character", "\u2022"]]},
-
-{"description": "Named entity: cap with a semi-colon.",
-"input":"&cap;",
-"output": [["Character", "\u2229"]]},
-
-{"description": "Named entity: ccedil with a semi-colon.",
-"input":"&ccedil;",
-"output": [["Character", "\u00E7"]]},
-
-{"description": "Named entity: ccedil without a semi-colon.",
-"input":"&ccedil",
-"output": ["ParseError", ["Character", "\u00E7"]]},
-
-{"description": "Named entity: cedil with a semi-colon.",
-"input":"&cedil;",
-"output": [["Character", "\u00B8"]]},
-
-{"description": "Named entity: cedil without a semi-colon.",
-"input":"&cedil",
-"output": ["ParseError", ["Character", "\u00B8"]]},
-
-{"description": "Named entity: cent with a semi-colon.",
-"input":"&cent;",
-"output": [["Character", "\u00A2"]]},
-
-{"description": "Named entity: cent without a semi-colon.",
-"input":"&cent",
-"output": ["ParseError", ["Character", "\u00A2"]]},
-
-{"description": "Named entity: chi with a semi-colon.",
-"input":"&chi;",
-"output": [["Character", "\u03C7"]]},
-
-{"description": "Named entity: circ with a semi-colon.",
-"input":"&circ;",
-"output": [["Character", "\u02C6"]]},
-
-{"description": "Named entity: clubs with a semi-colon.",
-"input":"&clubs;",
-"output": [["Character", "\u2663"]]},
-
-{"description": "Named entity: cong with a semi-colon.",
-"input":"&cong;",
-"output": [["Character", "\u2245"]]},
-
-{"description": "Named entity: copy with a semi-colon.",
-"input":"&copy;",
-"output": [["Character", "\u00A9"]]},
-
-{"description": "Named entity: copy without a semi-colon.",
-"input":"&copy",
-"output": ["ParseError", ["Character", "\u00A9"]]},
-
-{"description": "Named entity: crarr with a semi-colon.",
-"input":"&crarr;",
-"output": [["Character", "\u21B5"]]},
-
-{"description": "Named entity: cup with a semi-colon.",
-"input":"&cup;",
-"output": [["Character", "\u222A"]]},
-
-{"description": "Named entity: curren with a semi-colon.",
-"input":"&curren;",
-"output": [["Character", "\u00A4"]]},
-
-{"description": "Named entity: curren without a semi-colon.",
-"input":"&curren",
-"output": ["ParseError", ["Character", "\u00A4"]]},
-
-{"description": "Named entity: dArr with a semi-colon.",
-"input":"&dArr;",
-"output": [["Character", "\u21D3"]]},
-
-{"description": "Named entity: dagger with a semi-colon.",
-"input":"&dagger;",
-"output": [["Character", "\u2020"]]},
-
-{"description": "Named entity: darr with a semi-colon.",
-"input":"&darr;",
-"output": [["Character", "\u2193"]]},
-
-{"description": "Named entity: deg with a semi-colon.",
-"input":"&deg;",
-"output": [["Character", "\u00B0"]]},
-
-{"description": "Named entity: deg without a semi-colon.",
-"input":"&deg",
-"output": ["ParseError", ["Character", "\u00B0"]]},
-
-{"description": "Named entity: delta with a semi-colon.",
-"input":"&delta;",
-"output": [["Character", "\u03B4"]]},
-
-{"description": "Named entity: diams with a semi-colon.",
-"input":"&diams;",
-"output": [["Character", "\u2666"]]},
-
-{"description": "Named entity: divide with a semi-colon.",
-"input":"&divide;",
-"output": [["Character", "\u00F7"]]},
-
-{"description": "Named entity: divide without a semi-colon.",
-"input":"&divide",
-"output": ["ParseError", ["Character", "\u00F7"]]},
-
-{"description": "Named entity: eacute with a semi-colon.",
-"input":"&eacute;",
-"output": [["Character", "\u00E9"]]},
-
-{"description": "Named entity: eacute without a semi-colon.",
-"input":"&eacute",
-"output": ["ParseError", ["Character", "\u00E9"]]},
-
-{"description": "Named entity: ecirc with a semi-colon.",
-"input":"&ecirc;",
-"output": [["Character", "\u00EA"]]},
-
-{"description": "Named entity: ecirc without a semi-colon.",
-"input":"&ecirc",
-"output": ["ParseError", ["Character", "\u00EA"]]},
-
-{"description": "Named entity: egrave with a semi-colon.",
-"input":"&egrave;",
-"output": [["Character", "\u00E8"]]},
-
-{"description": "Named entity: egrave without a semi-colon.",
-"input":"&egrave",
-"output": ["ParseError", ["Character", "\u00E8"]]},
-
-{"description": "Named entity: empty with a semi-colon.",
-"input":"&empty;",
-"output": [["Character", "\u2205"]]},
-
-{"description": "Named entity: emsp with a semi-colon.",
-"input":"&emsp;",
-"output": [["Character", "\u2003"]]},
-
-{"description": "Named entity: ensp with a semi-colon.",
-"input":"&ensp;",
-"output": [["Character", "\u2002"]]},
-
-{"description": "Named entity: epsilon with a semi-colon.",
-"input":"&epsilon;",
-"output": [["Character", "\u03B5"]]},
-
-{"description": "Named entity: equiv with a semi-colon.",
-"input":"&equiv;",
-"output": [["Character", "\u2261"]]},
-
-{"description": "Named entity: eta with a semi-colon.",
-"input":"&eta;",
-"output": [["Character", "\u03B7"]]},
-
-{"description": "Named entity: eth with a semi-colon.",
-"input":"&eth;",
-"output": [["Character", "\u00F0"]]},
-
-{"description": "Named entity: eth without a semi-colon.",
-"input":"&eth",
-"output": ["ParseError", ["Character", "\u00F0"]]},
-
-{"description": "Named entity: euml with a semi-colon.",
-"input":"&euml;",
-"output": [["Character", "\u00EB"]]},
-
-{"description": "Named entity: euml without a semi-colon.",
-"input":"&euml",
-"output": ["ParseError", ["Character", "\u00EB"]]},
-
-{"description": "Named entity: euro with a semi-colon.",
-"input":"&euro;",
-"output": [["Character", "\u20AC"]]},
-
-{"description": "Named entity: exist with a semi-colon.",
-"input":"&exist;",
-"output": [["Character", "\u2203"]]},
-
-{"description": "Named entity: fnof with a semi-colon.",
-"input":"&fnof;",
-"output": [["Character", "\u0192"]]},
-
-{"description": "Named entity: forall with a semi-colon.",
-"input":"&forall;",
-"output": [["Character", "\u2200"]]},
-
-{"description": "Named entity: frac12 with a semi-colon.",
-"input":"&frac12;",
-"output": [["Character", "\u00BD"]]},
-
-{"description": "Named entity: frac12 without a semi-colon.",
-"input":"&frac12",
-"output": ["ParseError", ["Character", "\u00BD"]]},
-
-{"description": "Named entity: frac14 with a semi-colon.",
-"input":"&frac14;",
-"output": [["Character", "\u00BC"]]},
-
-{"description": "Named entity: frac14 without a semi-colon.",
-"input":"&frac14",
-"output": ["ParseError", ["Character", "\u00BC"]]},
-
-{"description": "Named entity: frac34 with a semi-colon.",
-"input":"&frac34;",
-"output": [["Character", "\u00BE"]]},
-
-{"description": "Named entity: frac34 without a semi-colon.",
-"input":"&frac34",
-"output": ["ParseError", ["Character", "\u00BE"]]},
-
-{"description": "Named entity: frasl with a semi-colon.",
-"input":"&frasl;",
-"output": [["Character", "\u2044"]]},
-
-{"description": "Named entity: gamma with a semi-colon.",
-"input":"&gamma;",
-"output": [["Character", "\u03B3"]]},
-
-{"description": "Named entity: ge with a semi-colon.",
-"input":"&ge;",
-"output": [["Character", "\u2265"]]},
-
-{"description": "Named entity: gt with a semi-colon.",
-"input":"&gt;",
-"output": [["Character", "\u003E"]]},
-
-{"description": "Named entity: gt without a semi-colon.",
-"input":"&gt",
-"output": ["ParseError", ["Character", "\u003E"]]},
-
-{"description": "Named entity: hArr with a semi-colon.",
-"input":"&hArr;",
-"output": [["Character", "\u21D4"]]},
-
-{"description": "Named entity: harr with a semi-colon.",
-"input":"&harr;",
-"output": [["Character", "\u2194"]]},
-
-{"description": "Named entity: hearts with a semi-colon.",
-"input":"&hearts;",
-"output": [["Character", "\u2665"]]},
-
-{"description": "Named entity: hellip with a semi-colon.",
-"input":"&hellip;",
-"output": [["Character", "\u2026"]]},
-
-{"description": "Named entity: iacute with a semi-colon.",
-"input":"&iacute;",
-"output": [["Character", "\u00ED"]]},
-
-{"description": "Named entity: iacute without a semi-colon.",
-"input":"&iacute",
-"output": ["ParseError", ["Character", "\u00ED"]]},
-
-{"description": "Named entity: icirc with a semi-colon.",
-"input":"&icirc;",
-"output": [["Character", "\u00EE"]]},
-
-{"description": "Named entity: icirc without a semi-colon.",
-"input":"&icirc",
-"output": ["ParseError", ["Character", "\u00EE"]]},
-
-{"description": "Named entity: iexcl with a semi-colon.",
-"input":"&iexcl;",
-"output": [["Character", "\u00A1"]]},
-
-{"description": "Named entity: iexcl without a semi-colon.",
-"input":"&iexcl",
-"output": ["ParseError", ["Character", "\u00A1"]]},
-
-{"description": "Named entity: igrave with a semi-colon.",
-"input":"&igrave;",
-"output": [["Character", "\u00EC"]]},
-
-{"description": "Named entity: igrave without a semi-colon.",
-"input":"&igrave",
-"output": ["ParseError", ["Character", "\u00EC"]]},
-
-{"description": "Named entity: image with a semi-colon.",
-"input":"&image;",
-"output": [["Character", "\u2111"]]},
-
-{"description": "Named entity: infin with a semi-colon.",
-"input":"&infin;",
-"output": [["Character", "\u221E"]]},
-
-{"description": "Named entity: int with a semi-colon.",
-"input":"&int;",
-"output": [["Character", "\u222B"]]},
-
-{"description": "Named entity: iota with a semi-colon.",
-"input":"&iota;",
-"output": [["Character", "\u03B9"]]},
-
-{"description": "Named entity: iquest with a semi-colon.",
-"input":"&iquest;",
-"output": [["Character", "\u00BF"]]},
-
-{"description": "Named entity: iquest without a semi-colon.",
-"input":"&iquest",
-"output": ["ParseError", ["Character", "\u00BF"]]},
-
-{"description": "Named entity: isin with a semi-colon.",
-"input":"&isin;",
-"output": [["Character", "\u2208"]]},
-
-{"description": "Named entity: iuml with a semi-colon.",
-"input":"&iuml;",
-"output": [["Character", "\u00EF"]]},
-
-{"description": "Named entity: iuml without a semi-colon.",
-"input":"&iuml",
-"output": ["ParseError", ["Character", "\u00EF"]]},
-
-{"description": "Named entity: kappa with a semi-colon.",
-"input":"&kappa;",
-"output": [["Character", "\u03BA"]]},
-
-{"description": "Named entity: lArr with a semi-colon.",
-"input":"&lArr;",
-"output": [["Character", "\u21D0"]]},
-
-{"description": "Named entity: lambda with a semi-colon.",
-"input":"&lambda;",
-"output": [["Character", "\u03BB"]]},
-
-{"description": "Named entity: lang with a semi-colon.",
-"input":"&lang;",
-"output": [["Character", "\u27E8"]]},
-
-{"description": "Named entity: laquo with a semi-colon.",
-"input":"&laquo;",
-"output": [["Character", "\u00AB"]]},
-
-{"description": "Named entity: laquo without a semi-colon.",
-"input":"&laquo",
-"output": ["ParseError", ["Character", "\u00AB"]]},
-
-{"description": "Named entity: larr with a semi-colon.",
-"input":"&larr;",
-"output": [["Character", "\u2190"]]},
-
-{"description": "Named entity: lceil with a semi-colon.",
-"input":"&lceil;",
-"output": [["Character", "\u2308"]]},
-
-{"description": "Named entity: ldquo with a semi-colon.",
-"input":"&ldquo;",
-"output": [["Character", "\u201C"]]},
-
-{"description": "Named entity: le with a semi-colon.",
-"input":"&le;",
-"output": [["Character", "\u2264"]]},
-
-{"description": "Named entity: lfloor with a semi-colon.",
-"input":"&lfloor;",
-"output": [["Character", "\u230A"]]},
-
-{"description": "Named entity: lowast with a semi-colon.",
-"input":"&lowast;",
-"output": [["Character", "\u2217"]]},
-
-{"description": "Named entity: loz with a semi-colon.",
-"input":"&loz;",
-"output": [["Character", "\u25CA"]]},
-
-{"description": "Named entity: lrm with a semi-colon.",
-"input":"&lrm;",
-"output": [["Character", "\u200E"]]},
-
-{"description": "Named entity: lsaquo with a semi-colon.",
-"input":"&lsaquo;",
-"output": [["Character", "\u2039"]]},
-
-{"description": "Named entity: lsquo with a semi-colon.",
-"input":"&lsquo;",
-"output": [["Character", "\u2018"]]},
-
-{"description": "Named entity: lt with a semi-colon.",
-"input":"&lt;",
-"output": [["Character", "\u003C"]]},
-
-{"description": "Named entity: lt without a semi-colon.",
-"input":"&lt",
-"output": ["ParseError", ["Character", "\u003C"]]},
-
-{"description": "Named entity: macr with a semi-colon.",
-"input":"&macr;",
-"output": [["Character", "\u00AF"]]},
-
-{"description": "Named entity: macr without a semi-colon.",
-"input":"&macr",
-"output": ["ParseError", ["Character", "\u00AF"]]},
-
-{"description": "Named entity: mdash with a semi-colon.",
-"input":"&mdash;",
-"output": [["Character", "\u2014"]]},
-
-{"description": "Named entity: micro with a semi-colon.",
-"input":"&micro;",
-"output": [["Character", "\u00B5"]]},
-
-{"description": "Named entity: micro without a semi-colon.",
-"input":"&micro",
-"output": ["ParseError", ["Character", "\u00B5"]]},
-
-{"description": "Named entity: middot with a semi-colon.",
-"input":"&middot;",
-"output": [["Character", "\u00B7"]]},
-
-{"description": "Named entity: middot without a semi-colon.",
-"input":"&middot",
-"output": ["ParseError", ["Character", "\u00B7"]]},
-
-{"description": "Named entity: minus with a semi-colon.",
-"input":"&minus;",
-"output": [["Character", "\u2212"]]},
-
-{"description": "Named entity: mu with a semi-colon.",
-"input":"&mu;",
-"output": [["Character", "\u03BC"]]},
-
-{"description": "Named entity: nabla with a semi-colon.",
-"input":"&nabla;",
-"output": [["Character", "\u2207"]]},
-
-{"description": "Named entity: nbsp with a semi-colon.",
-"input":"&nbsp;",
-"output": [["Character", "\u00A0"]]},
-
-{"description": "Named entity: nbsp without a semi-colon.",
-"input":"&nbsp",
-"output": ["ParseError", ["Character", "\u00A0"]]},
-
-{"description": "Named entity: ndash with a semi-colon.",
-"input":"&ndash;",
-"output": [["Character", "\u2013"]]},
-
-{"description": "Named entity: ne with a semi-colon.",
-"input":"&ne;",
-"output": [["Character", "\u2260"]]},
-
-{"description": "Named entity: ni with a semi-colon.",
-"input":"&ni;",
-"output": [["Character", "\u220B"]]},
-
-{"description": "Named entity: not with a semi-colon.",
-"input":"&not;",
-"output": [["Character", "\u00AC"]]},
-
-{"description": "Named entity: not without a semi-colon.",
-"input":"&not",
-"output": ["ParseError", ["Character", "\u00AC"]]},
-
-{"description": "Named entity: notin with a semi-colon.",
-"input":"&notin;",
-"output": [["Character", "\u2209"]]},
-
-{"description": "Named entity: nsub with a semi-colon.",
-"input":"&nsub;",
-"output": [["Character", "\u2284"]]},
-
-{"description": "Named entity: ntilde with a semi-colon.",
-"input":"&ntilde;",
-"output": [["Character", "\u00F1"]]},
-
-{"description": "Named entity: ntilde without a semi-colon.",
-"input":"&ntilde",
-"output": ["ParseError", ["Character", "\u00F1"]]},
-
-{"description": "Named entity: nu with a semi-colon.",
-"input":"&nu;",
-"output": [["Character", "\u03BD"]]},
-
-{"description": "Named entity: oacute with a semi-colon.",
-"input":"&oacute;",
-"output": [["Character", "\u00F3"]]},
-
-{"description": "Named entity: oacute without a semi-colon.",
-"input":"&oacute",
-"output": ["ParseError", ["Character", "\u00F3"]]},
-
-{"description": "Named entity: ocirc with a semi-colon.",
-"input":"&ocirc;",
-"output": [["Character", "\u00F4"]]},
-
-{"description": "Named entity: ocirc without a semi-colon.",
-"input":"&ocirc",
-"output": ["ParseError", ["Character", "\u00F4"]]},
-
-{"description": "Named entity: oelig with a semi-colon.",
-"input":"&oelig;",
-"output": [["Character", "\u0153"]]},
-
-{"description": "Named entity: ograve with a semi-colon.",
-"input":"&ograve;",
-"output": [["Character", "\u00F2"]]},
-
-{"description": "Named entity: ograve without a semi-colon.",
-"input":"&ograve",
-"output": ["ParseError", ["Character", "\u00F2"]]},
-
-{"description": "Named entity: oline with a semi-colon.",
-"input":"&oline;",
-"output": [["Character", "\u203E"]]},
-
-{"description": "Named entity: omega with a semi-colon.",
-"input":"&omega;",
-"output": [["Character", "\u03C9"]]},
-
-{"description": "Named entity: omicron with a semi-colon.",
-"input":"&omicron;",
-"output": [["Character", "\u03BF"]]},
-
-{"description": "Named entity: oplus with a semi-colon.",
-"input":"&oplus;",
-"output": [["Character", "\u2295"]]},
-
-{"description": "Named entity: or with a semi-colon.",
-"input":"&or;",
-"output": [["Character", "\u2228"]]},
-
-{"description": "Named entity: ordf with a semi-colon.",
-"input":"&ordf;",
-"output": [["Character", "\u00AA"]]},
-
-{"description": "Named entity: ordf without a semi-colon.",
-"input":"&ordf",
-"output": ["ParseError", ["Character", "\u00AA"]]},
-
-{"description": "Named entity: ordm with a semi-colon.",
-"input":"&ordm;",
-"output": [["Character", "\u00BA"]]},
-
-{"description": "Named entity: ordm without a semi-colon.",
-"input":"&ordm",
-"output": ["ParseError", ["Character", "\u00BA"]]},
-
-{"description": "Named entity: oslash with a semi-colon.",
-"input":"&oslash;",
-"output": [["Character", "\u00F8"]]},
-
-{"description": "Named entity: oslash without a semi-colon.",
-"input":"&oslash",
-"output": ["ParseError", ["Character", "\u00F8"]]},
-
-{"description": "Named entity: otilde with a semi-colon.",
-"input":"&otilde;",
-"output": [["Character", "\u00F5"]]},
-
-{"description": "Named entity: otilde without a semi-colon.",
-"input":"&otilde",
-"output": ["ParseError", ["Character", "\u00F5"]]},
-
-{"description": "Named entity: otimes with a semi-colon.",
-"input":"&otimes;",
-"output": [["Character", "\u2297"]]},
-
-{"description": "Named entity: ouml with a semi-colon.",
-"input":"&ouml;",
-"output": [["Character", "\u00F6"]]},
-
-{"description": "Named entity: ouml without a semi-colon.",
-"input":"&ouml",
-"output": ["ParseError", ["Character", "\u00F6"]]},
-
-{"description": "Named entity: para with a semi-colon.",
-"input":"&para;",
-"output": [["Character", "\u00B6"]]},
-
-{"description": "Named entity: para without a semi-colon.",
-"input":"&para",
-"output": ["ParseError", ["Character", "\u00B6"]]},
-
-{"description": "Named entity: part with a semi-colon.",
-"input":"&part;",
-"output": [["Character", "\u2202"]]},
-
-{"description": "Named entity: permil with a semi-colon.",
-"input":"&permil;",
-"output": [["Character", "\u2030"]]},
-
-{"description": "Named entity: perp with a semi-colon.",
-"input":"&perp;",
-"output": [["Character", "\u22A5"]]},
-
-{"description": "Named entity: phi with a semi-colon.",
-"input":"&phi;",
-"output": [["Character", "\u03C6"]]},
-
-{"description": "Named entity: pi with a semi-colon.",
-"input":"&pi;",
-"output": [["Character", "\u03C0"]]},
-
-{"description": "Named entity: piv with a semi-colon.",
-"input":"&piv;",
-"output": [["Character", "\u03D6"]]},
-
-{"description": "Named entity: plusmn with a semi-colon.",
-"input":"&plusmn;",
-"output": [["Character", "\u00B1"]]},
-
-{"description": "Named entity: plusmn without a semi-colon.",
-"input":"&plusmn",
-"output": ["ParseError", ["Character", "\u00B1"]]},
-
-{"description": "Named entity: pound with a semi-colon.",
-"input":"&pound;",
-"output": [["Character", "\u00A3"]]},
-
-{"description": "Named entity: pound without a semi-colon.",
-"input":"&pound",
-"output": ["ParseError", ["Character", "\u00A3"]]},
-
-{"description": "Named entity: prime with a semi-colon.",
-"input":"&prime;",
-"output": [["Character", "\u2032"]]},
-
-{"description": "Named entity: prod with a semi-colon.",
-"input":"&prod;",
-"output": [["Character", "\u220F"]]},
-
-{"description": "Named entity: prop with a semi-colon.",
-"input":"&prop;",
-"output": [["Character", "\u221D"]]},
-
-{"description": "Named entity: psi with a semi-colon.",
-"input":"&psi;",
-"output": [["Character", "\u03C8"]]},
-
-{"description": "Named entity: quot with a semi-colon.",
-"input":"&quot;",
-"output": [["Character", "\u0022"]]},
-
-{"description": "Named entity: quot without a semi-colon.",
-"input":"&quot",
-"output": ["ParseError", ["Character", "\u0022"]]},
-
-{"description": "Named entity: rArr with a semi-colon.",
-"input":"&rArr;",
-"output": [["Character", "\u21D2"]]},
-
-{"description": "Named entity: radic with a semi-colon.",
-"input":"&radic;",
-"output": [["Character", "\u221A"]]},
-
-{"description": "Named entity: rang with a semi-colon.",
-"input":"&rang;",
-"output": [["Character", "\u27E9"]]},
-
-{"description": "Named entity: raquo with a semi-colon.",
-"input":"&raquo;",
-"output": [["Character", "\u00BB"]]},
-
-{"description": "Named entity: raquo without a semi-colon.",
-"input":"&raquo",
-"output": ["ParseError", ["Character", "\u00BB"]]},
-
-{"description": "Named entity: rarr with a semi-colon.",
-"input":"&rarr;",
-"output": [["Character", "\u2192"]]},
-
-{"description": "Named entity: rceil with a semi-colon.",
-"input":"&rceil;",
-"output": [["Character", "\u2309"]]},
-
-{"description": "Named entity: rdquo with a semi-colon.",
-"input":"&rdquo;",
-"output": [["Character", "\u201D"]]},
-
-{"description": "Named entity: real with a semi-colon.",
-"input":"&real;",
-"output": [["Character", "\u211C"]]},
-
-{"description": "Named entity: reg with a semi-colon.",
-"input":"&reg;",
-"output": [["Character", "\u00AE"]]},
-
-{"description": "Named entity: reg without a semi-colon.",
-"input":"&reg",
-"output": ["ParseError", ["Character", "\u00AE"]]},
-
-{"description": "Named entity: rfloor with a semi-colon.",
-"input":"&rfloor;",
-"output": [["Character", "\u230B"]]},
-
-{"description": "Named entity: rho with a semi-colon.",
-"input":"&rho;",
-"output": [["Character", "\u03C1"]]},
-
-{"description": "Named entity: rlm with a semi-colon.",
-"input":"&rlm;",
-"output": [["Character", "\u200F"]]},
-
-{"description": "Named entity: rsaquo with a semi-colon.",
-"input":"&rsaquo;",
-"output": [["Character", "\u203A"]]},
-
-{"description": "Named entity: rsquo with a semi-colon.",
-"input":"&rsquo;",
-"output": [["Character", "\u2019"]]},
-
-{"description": "Named entity: sbquo with a semi-colon.",
-"input":"&sbquo;",
-"output": [["Character", "\u201A"]]},
-
-{"description": "Named entity: scaron with a semi-colon.",
-"input":"&scaron;",
-"output": [["Character", "\u0161"]]},
-
-{"description": "Named entity: sdot with a semi-colon.",
-"input":"&sdot;",
-"output": [["Character", "\u22C5"]]},
-
-{"description": "Named entity: sect with a semi-colon.",
-"input":"&sect;",
-"output": [["Character", "\u00A7"]]},
-
-{"description": "Named entity: sect without a semi-colon.",
-"input":"&sect",
-"output": ["ParseError", ["Character", "\u00A7"]]},
-
-{"description": "Named entity: shy with a semi-colon.",
-"input":"&shy;",
-"output": [["Character", "\u00AD"]]},
-
-{"description": "Named entity: shy without a semi-colon.",
-"input":"&shy",
-"output": ["ParseError", ["Character", "\u00AD"]]},
-
-{"description": "Named entity: sigma with a semi-colon.",
-"input":"&sigma;",
-"output": [["Character", "\u03C3"]]},
-
-{"description": "Named entity: sigmaf with a semi-colon.",
-"input":"&sigmaf;",
-"output": [["Character", "\u03C2"]]},
-
-{"description": "Named entity: sim with a semi-colon.",
-"input":"&sim;",
-"output": [["Character", "\u223C"]]},
-
-{"description": "Named entity: spades with a semi-colon.",
-"input":"&spades;",
-"output": [["Character", "\u2660"]]},
-
-{"description": "Named entity: sub with a semi-colon.",
-"input":"&sub;",
-"output": [["Character", "\u2282"]]},
-
-{"description": "Named entity: sube with a semi-colon.",
-"input":"&sube;",
-"output": [["Character", "\u2286"]]},
-
-{"description": "Named entity: sum with a semi-colon.",
-"input":"&sum;",
-"output": [["Character", "\u2211"]]},
-
-{"description": "Named entity: sup1 with a semi-colon.",
-"input":"&sup1;",
-"output": [["Character", "\u00B9"]]},
-
-{"description": "Named entity: sup1 without a semi-colon.",
-"input":"&sup1",
-"output": ["ParseError", ["Character", "\u00B9"]]},
-
-{"description": "Named entity: sup2 with a semi-colon.",
-"input":"&sup2;",
-"output": [["Character", "\u00B2"]]},
-
-{"description": "Named entity: sup2 without a semi-colon.",
-"input":"&sup2",
-"output": ["ParseError", ["Character", "\u00B2"]]},
-
-{"description": "Named entity: sup3 with a semi-colon.",
-"input":"&sup3;",
-"output": [["Character", "\u00B3"]]},
-
-{"description": "Named entity: sup3 without a semi-colon.",
-"input":"&sup3",
-"output": ["ParseError", ["Character", "\u00B3"]]},
-
-{"description": "Named entity: sup with a semi-colon.",
-"input":"&sup;",
-"output": [["Character", "\u2283"]]},
-
-{"description": "Named entity: supe with a semi-colon.",
-"input":"&supe;",
-"output": [["Character", "\u2287"]]},
-
-{"description": "Named entity: szlig with a semi-colon.",
-"input":"&szlig;",
-"output": [["Character", "\u00DF"]]},
-
-{"description": "Named entity: szlig without a semi-colon.",
-"input":"&szlig",
-"output": ["ParseError", ["Character", "\u00DF"]]},
-
-{"description": "Named entity: tau with a semi-colon.",
-"input":"&tau;",
-"output": [["Character", "\u03C4"]]},
-
-{"description": "Named entity: there4 with a semi-colon.",
-"input":"&there4;",
-"output": [["Character", "\u2234"]]},
-
-{"description": "Named entity: theta with a semi-colon.",
-"input":"&theta;",
-"output": [["Character", "\u03B8"]]},
-
-{"description": "Named entity: thetasym with a semi-colon.",
-"input":"&thetasym;",
-"output": [["Character", "\u03D1"]]},
-
-{"description": "Named entity: thinsp with a semi-colon.",
-"input":"&thinsp;",
-"output": [["Character", "\u2009"]]},
-
-{"description": "Named entity: thorn with a semi-colon.",
-"input":"&thorn;",
-"output": [["Character", "\u00FE"]]},
-
-{"description": "Named entity: thorn without a semi-colon.",
-"input":"&thorn",
-"output": ["ParseError", ["Character", "\u00FE"]]},
-
-{"description": "Named entity: tilde with a semi-colon.",
-"input":"&tilde;",
-"output": [["Character", "\u02DC"]]},
-
-{"description": "Named entity: times with a semi-colon.",
-"input":"&times;",
-"output": [["Character", "\u00D7"]]},
-
-{"description": "Named entity: times without a semi-colon.",
-"input":"&times",
-"output": ["ParseError", ["Character", "\u00D7"]]},
-
-{"description": "Named entity: trade with a semi-colon.",
-"input":"&trade;",
-"output": [["Character", "\u2122"]]},
-
-{"description": "Named entity: uArr with a semi-colon.",
-"input":"&uArr;",
-"output": [["Character", "\u21D1"]]},
-
-{"description": "Named entity: uacute with a semi-colon.",
-"input":"&uacute;",
-"output": [["Character", "\u00FA"]]},
-
-{"description": "Named entity: uacute without a semi-colon.",
-"input":"&uacute",
-"output": ["ParseError", ["Character", "\u00FA"]]},
-
-{"description": "Named entity: uarr with a semi-colon.",
-"input":"&uarr;",
-"output": [["Character", "\u2191"]]},
-
-{"description": "Named entity: ucirc with a semi-colon.",
-"input":"&ucirc;",
-"output": [["Character", "\u00FB"]]},
-
-{"description": "Named entity: ucirc without a semi-colon.",
-"input":"&ucirc",
-"output": ["ParseError", ["Character", "\u00FB"]]},
-
-{"description": "Named entity: ugrave with a semi-colon.",
-"input":"&ugrave;",
-"output": [["Character", "\u00F9"]]},
-
-{"description": "Named entity: ugrave without a semi-colon.",
-"input":"&ugrave",
-"output": ["ParseError", ["Character", "\u00F9"]]},
-
-{"description": "Named entity: uml with a semi-colon.",
-"input":"&uml;",
-"output": [["Character", "\u00A8"]]},
-
-{"description": "Named entity: uml without a semi-colon.",
-"input":"&uml",
-"output": ["ParseError", ["Character", "\u00A8"]]},
-
-{"description": "Named entity: upsih with a semi-colon.",
-"input":"&upsih;",
-"output": [["Character", "\u03D2"]]},
-
-{"description": "Named entity: upsilon with a semi-colon.",
-"input":"&upsilon;",
-"output": [["Character", "\u03C5"]]},
-
-{"description": "Named entity: uuml with a semi-colon.",
-"input":"&uuml;",
-"output": [["Character", "\u00FC"]]},
-
-{"description": "Named entity: uuml without a semi-colon.",
-"input":"&uuml",
-"output": ["ParseError", ["Character", "\u00FC"]]},
-
-{"description": "Named entity: weierp with a semi-colon.",
-"input":"&weierp;",
-"output": [["Character", "\u2118"]]},
-
-{"description": "Named entity: xi with a semi-colon.",
-"input":"&xi;",
-"output": [["Character", "\u03BE"]]},
-
-{"description": "Named entity: yacute with a semi-colon.",
-"input":"&yacute;",
-"output": [["Character", "\u00FD"]]},
-
-{"description": "Named entity: yacute without a semi-colon.",
-"input":"&yacute",
-"output": ["ParseError", ["Character", "\u00FD"]]},
-
-{"description": "Named entity: yen with a semi-colon.",
-"input":"&yen;",
-"output": [["Character", "\u00A5"]]},
-
-{"description": "Named entity: yen without a semi-colon.",
-"input":"&yen",
-"output": ["ParseError", ["Character", "\u00A5"]]},
-
-{"description": "Named entity: yuml with a semi-colon.",
-"input":"&yuml;",
-"output": [["Character", "\u00FF"]]},
-
-{"description": "Named entity: yuml without a semi-colon.",
-"input":"&yuml",
-"output": ["ParseError", ["Character", "\u00FF"]]},
-
-{"description": "Named entity: zeta with a semi-colon.",
-"input":"&zeta;",
-"output": [["Character", "\u03B6"]]},
-
-{"description": "Named entity: zwj with a semi-colon.",
-"input":"&zwj;",
-"output": [["Character", "\u200D"]]},
-
-{"description": "Named entity: zwnj with a semi-colon.",
-"input":"&zwnj;",
-"output": [["Character", "\u200C"]]},
-
-{"description": "Bad named entity: Alpha without a semi-colon.",
-"input":"&Alpha",
-"output": ["ParseError", ["Character", "&Alpha"]]},
-
-{"description": "Bad named entity: alpha without a semi-colon.",
-"input":"&alpha",
-"output": ["ParseError", ["Character", "&alpha"]]},
-
-{"description": "Bad named entity: and without a semi-colon.",
-"input":"&and",
-"output": ["ParseError", ["Character", "&and"]]},
-
-{"description": "Bad named entity: ang without a semi-colon.",
-"input":"&ang",
-"output": ["ParseError", ["Character", "&ang"]]},
-
-{"description": "Bad named entity: apos without a semi-colon.",
-"input":"&apos",
-"output": ["ParseError", ["Character", "&apos"]]},
-
-{"description": "Bad named entity: asymp without a semi-colon.",
-"input":"&asymp",
-"output": ["ParseError", ["Character", "&asymp"]]},
-
-{"description": "Bad named entity: bdquo without a semi-colon.",
-"input":"&bdquo",
-"output": ["ParseError", ["Character", "&bdquo"]]},
-
-{"description": "Bad named entity: Beta without a semi-colon.",
-"input":"&Beta",
-"output": ["ParseError", ["Character", "&Beta"]]},
-
-{"description": "Bad named entity: beta without a semi-colon.",
-"input":"&beta",
-"output": ["ParseError", ["Character", "&beta"]]},
-
-{"description": "Bad named entity: bull without a semi-colon.",
-"input":"&bull",
-"output": ["ParseError", ["Character", "&bull"]]},
-
-{"description": "Bad named entity: cap without a semi-colon.",
-"input":"&cap",
-"output": ["ParseError", ["Character", "&cap"]]},
-
-{"description": "Bad named entity: Chi without a semi-colon.",
-"input":"&Chi",
-"output": ["ParseError", ["Character", "&Chi"]]},
-
-{"description": "Bad named entity: chi without a semi-colon.",
-"input":"&chi",
-"output": ["ParseError", ["Character", "&chi"]]},
-
-{"description": "Bad named entity: circ without a semi-colon.",
-"input":"&circ",
-"output": ["ParseError", ["Character", "&circ"]]},
-
-{"description": "Bad named entity: clubs without a semi-colon.",
-"input":"&clubs",
-"output": ["ParseError", ["Character", "&clubs"]]},
-
-{"description": "Bad named entity: cong without a semi-colon.",
-"input":"&cong",
-"output": ["ParseError", ["Character", "&cong"]]},
-
-{"description": "Bad named entity: crarr without a semi-colon.",
-"input":"&crarr",
-"output": ["ParseError", ["Character", "&crarr"]]},
-
-{"description": "Bad named entity: cup without a semi-colon.",
-"input":"&cup",
-"output": ["ParseError", ["Character", "&cup"]]},
-
-{"description": "Bad named entity: dagger without a semi-colon.",
-"input":"&dagger",
-"output": ["ParseError", ["Character", "&dagger"]]},
-
-{"description": "Bad named entity: dagger without a semi-colon.",
-"input":"&dagger",
-"output": ["ParseError", ["Character", "&dagger"]]},
-
-{"description": "Bad named entity: darr without a semi-colon.",
-"input":"&darr",
-"output": ["ParseError", ["Character", "&darr"]]},
-
-{"description": "Bad named entity: darr without a semi-colon.",
-"input":"&darr",
-"output": ["ParseError", ["Character", "&darr"]]},
-
-{"description": "Bad named entity: Delta without a semi-colon.",
-"input":"&Delta",
-"output": ["ParseError", ["Character", "&Delta"]]},
-
-{"description": "Bad named entity: delta without a semi-colon.",
-"input":"&delta",
-"output": ["ParseError", ["Character", "&delta"]]},
-
-{"description": "Bad named entity: diams without a semi-colon.",
-"input":"&diams",
-"output": ["ParseError", ["Character", "&diams"]]},
-
-{"description": "Bad named entity: empty without a semi-colon.",
-"input":"&empty",
-"output": ["ParseError", ["Character", "&empty"]]},
-
-{"description": "Bad named entity: emsp without a semi-colon.",
-"input":"&emsp",
-"output": ["ParseError", ["Character", "&emsp"]]},
-
-{"description": "Bad named entity: ensp without a semi-colon.",
-"input":"&ensp",
-"output": ["ParseError", ["Character", "&ensp"]]},
-
-{"description": "Bad named entity: Epsilon without a semi-colon.",
-"input":"&Epsilon",
-"output": ["ParseError", ["Character", "&Epsilon"]]},
-
-{"description": "Bad named entity: epsilon without a semi-colon.",
-"input":"&epsilon",
-"output": ["ParseError", ["Character", "&epsilon"]]},
-
-{"description": "Bad named entity: equiv without a semi-colon.",
-"input":"&equiv",
-"output": ["ParseError", ["Character", "&equiv"]]},
-
-{"description": "Bad named entity: Eta without a semi-colon.",
-"input":"&Eta",
-"output": ["ParseError", ["Character", "&Eta"]]},
-
-{"description": "Bad named entity: eta without a semi-colon.",
-"input":"&eta",
-"output": ["ParseError", ["Character", "&eta"]]},
-
-{"description": "Bad named entity: euro without a semi-colon.",
-"input":"&euro",
-"output": ["ParseError", ["Character", "&euro"]]},
-
-{"description": "Bad named entity: exist without a semi-colon.",
-"input":"&exist",
-"output": ["ParseError", ["Character", "&exist"]]},
-
-{"description": "Bad named entity: fnof without a semi-colon.",
-"input":"&fnof",
-"output": ["ParseError", ["Character", "&fnof"]]},
-
-{"description": "Bad named entity: forall without a semi-colon.",
-"input":"&forall",
-"output": ["ParseError", ["Character", "&forall"]]},
-
-{"description": "Bad named entity: frasl without a semi-colon.",
-"input":"&frasl",
-"output": ["ParseError", ["Character", "&frasl"]]},
-
-{"description": "Bad named entity: Gamma without a semi-colon.",
-"input":"&Gamma",
-"output": ["ParseError", ["Character", "&Gamma"]]},
-
-{"description": "Bad named entity: gamma without a semi-colon.",
-"input":"&gamma",
-"output": ["ParseError", ["Character", "&gamma"]]},
-
-{"description": "Bad named entity: ge without a semi-colon.",
-"input":"&ge",
-"output": ["ParseError", ["Character", "&ge"]]},
-
-{"description": "Bad named entity: harr without a semi-colon.",
-"input":"&harr",
-"output": ["ParseError", ["Character", "&harr"]]},
-
-{"description": "Bad named entity: harr without a semi-colon.",
-"input":"&harr",
-"output": ["ParseError", ["Character", "&harr"]]},
-
-{"description": "Bad named entity: hearts without a semi-colon.",
-"input":"&hearts",
-"output": ["ParseError", ["Character", "&hearts"]]},
-
-{"description": "Bad named entity: hellip without a semi-colon.",
-"input":"&hellip",
-"output": ["ParseError", ["Character", "&hellip"]]},
-
-{"description": "Bad named entity: image without a semi-colon.",
-"input":"&image",
-"output": ["ParseError", ["Character", "&image"]]},
-
-{"description": "Bad named entity: infin without a semi-colon.",
-"input":"&infin",
-"output": ["ParseError", ["Character", "&infin"]]},
-
-{"description": "Bad named entity: int without a semi-colon.",
-"input":"&int",
-"output": ["ParseError", ["Character", "&int"]]},
-
-{"description": "Bad named entity: Iota without a semi-colon.",
-"input":"&Iota",
-"output": ["ParseError", ["Character", "&Iota"]]},
-
-{"description": "Bad named entity: iota without a semi-colon.",
-"input":"&iota",
-"output": ["ParseError", ["Character", "&iota"]]},
-
-{"description": "Bad named entity: isin without a semi-colon.",
-"input":"&isin",
-"output": ["ParseError", ["Character", "&isin"]]},
-
-{"description": "Bad named entity: Kappa without a semi-colon.",
-"input":"&Kappa",
-"output": ["ParseError", ["Character", "&Kappa"]]},
-
-{"description": "Bad named entity: kappa without a semi-colon.",
-"input":"&kappa",
-"output": ["ParseError", ["Character", "&kappa"]]},
-
-{"description": "Bad named entity: Lambda without a semi-colon.",
-"input":"&Lambda",
-"output": ["ParseError", ["Character", "&Lambda"]]},
-
-{"description": "Bad named entity: lambda without a semi-colon.",
-"input":"&lambda",
-"output": ["ParseError", ["Character", "&lambda"]]},
-
-{"description": "Bad named entity: lang without a semi-colon.",
-"input":"&lang",
-"output": ["ParseError", ["Character", "&lang"]]},
-
-{"description": "Bad named entity: larr without a semi-colon.",
-"input":"&larr",
-"output": ["ParseError", ["Character", "&larr"]]},
-
-{"description": "Bad named entity: larr without a semi-colon.",
-"input":"&larr",
-"output": ["ParseError", ["Character", "&larr"]]},
-
-{"description": "Bad named entity: lceil without a semi-colon.",
-"input":"&lceil",
-"output": ["ParseError", ["Character", "&lceil"]]},
-
-{"description": "Bad named entity: ldquo without a semi-colon.",
-"input":"&ldquo",
-"output": ["ParseError", ["Character", "&ldquo"]]},
-
-{"description": "Bad named entity: le without a semi-colon.",
-"input":"&le",
-"output": ["ParseError", ["Character", "&le"]]},
-
-{"description": "Bad named entity: lfloor without a semi-colon.",
-"input":"&lfloor",
-"output": ["ParseError", ["Character", "&lfloor"]]},
-
-{"description": "Bad named entity: lowast without a semi-colon.",
-"input":"&lowast",
-"output": ["ParseError", ["Character", "&lowast"]]},
-
-{"description": "Bad named entity: loz without a semi-colon.",
-"input":"&loz",
-"output": ["ParseError", ["Character", "&loz"]]},
-
-{"description": "Bad named entity: lrm without a semi-colon.",
-"input":"&lrm",
-"output": ["ParseError", ["Character", "&lrm"]]},
-
-{"description": "Bad named entity: lsaquo without a semi-colon.",
-"input":"&lsaquo",
-"output": ["ParseError", ["Character", "&lsaquo"]]},
-
-{"description": "Bad named entity: lsquo without a semi-colon.",
-"input":"&lsquo",
-"output": ["ParseError", ["Character", "&lsquo"]]},
-
-{"description": "Bad named entity: mdash without a semi-colon.",
-"input":"&mdash",
-"output": ["ParseError", ["Character", "&mdash"]]},
-
-{"description": "Bad named entity: minus without a semi-colon.",
-"input":"&minus",
-"output": ["ParseError", ["Character", "&minus"]]},
-
-{"description": "Bad named entity: Mu without a semi-colon.",
-"input":"&Mu",
-"output": ["ParseError", ["Character", "&Mu"]]},
-
-{"description": "Bad named entity: mu without a semi-colon.",
-"input":"&mu",
-"output": ["ParseError", ["Character", "&mu"]]},
-
-{"description": "Bad named entity: nabla without a semi-colon.",
-"input":"&nabla",
-"output": ["ParseError", ["Character", "&nabla"]]},
-
-{"description": "Bad named entity: ndash without a semi-colon.",
-"input":"&ndash",
-"output": ["ParseError", ["Character", "&ndash"]]},
-
-{"description": "Bad named entity: ne without a semi-colon.",
-"input":"&ne",
-"output": ["ParseError", ["Character", "&ne"]]},
-
-{"description": "Bad named entity: ni without a semi-colon.",
-"input":"&ni",
-"output": ["ParseError", ["Character", "&ni"]]},
-
-{"description": "Bad named entity: notin without a semi-colon.",
-"input":"&notin",
-"output": ["ParseError", ["Character", "\u00ACin"]]},
-
-{"description": "Bad named entity: nsub without a semi-colon.",
-"input":"&nsub",
-"output": ["ParseError", ["Character", "&nsub"]]},
-
-{"description": "Bad named entity: Nu without a semi-colon.",
-"input":"&Nu",
-"output": ["ParseError", ["Character", "&Nu"]]},
-
-{"description": "Bad named entity: nu without a semi-colon.",
-"input":"&nu",
-"output": ["ParseError", ["Character", "&nu"]]},
-
-{"description": "Bad named entity: OElig without a semi-colon.",
-"input":"&OElig",
-"output": ["ParseError", ["Character", "&OElig"]]},
-
-{"description": "Bad named entity: oelig without a semi-colon.",
-"input":"&oelig",
-"output": ["ParseError", ["Character", "&oelig"]]},
-
-{"description": "Bad named entity: oline without a semi-colon.",
-"input":"&oline",
-"output": ["ParseError", ["Character", "&oline"]]},
-
-{"description": "Bad named entity: Omega without a semi-colon.",
-"input":"&Omega",
-"output": ["ParseError", ["Character", "&Omega"]]},
-
-{"description": "Bad named entity: omega without a semi-colon.",
-"input":"&omega",
-"output": ["ParseError", ["Character", "&omega"]]},
-
-{"description": "Bad named entity: Omicron without a semi-colon.",
-"input":"&Omicron",
-"output": ["ParseError", ["Character", "&Omicron"]]},
-
-{"description": "Bad named entity: omicron without a semi-colon.",
-"input":"&omicron",
-"output": ["ParseError", ["Character", "&omicron"]]},
-
-{"description": "Bad named entity: oplus without a semi-colon.",
-"input":"&oplus",
-"output": ["ParseError", ["Character", "&oplus"]]},
-
-{"description": "Bad named entity: or without a semi-colon.",
-"input":"&or",
-"output": ["ParseError", ["Character", "&or"]]},
-
-{"description": "Bad named entity: otimes without a semi-colon.",
-"input":"&otimes",
-"output": ["ParseError", ["Character", "&otimes"]]},
-
-{"description": "Bad named entity: part without a semi-colon.",
-"input":"&part",
-"output": ["ParseError", ["Character", "&part"]]},
-
-{"description": "Bad named entity: permil without a semi-colon.",
-"input":"&permil",
-"output": ["ParseError", ["Character", "&permil"]]},
-
-{"description": "Bad named entity: perp without a semi-colon.",
-"input":"&perp",
-"output": ["ParseError", ["Character", "&perp"]]},
-
-{"description": "Bad named entity: Phi without a semi-colon.",
-"input":"&Phi",
-"output": ["ParseError", ["Character", "&Phi"]]},
-
-{"description": "Bad named entity: phi without a semi-colon.",
-"input":"&phi",
-"output": ["ParseError", ["Character", "&phi"]]},
-
-{"description": "Bad named entity: Pi without a semi-colon.",
-"input":"&Pi",
-"output": ["ParseError", ["Character", "&Pi"]]},
-
-{"description": "Bad named entity: pi without a semi-colon.",
-"input":"&pi",
-"output": ["ParseError", ["Character", "&pi"]]},
-
-{"description": "Bad named entity: piv without a semi-colon.",
-"input":"&piv",
-"output": ["ParseError", ["Character", "&piv"]]},
-
-{"description": "Bad named entity: prime without a semi-colon.",
-"input":"&prime",
-"output": ["ParseError", ["Character", "&prime"]]},
-
-{"description": "Bad named entity: prime without a semi-colon.",
-"input":"&prime",
-"output": ["ParseError", ["Character", "&prime"]]},
-
-{"description": "Bad named entity: prod without a semi-colon.",
-"input":"&prod",
-"output": ["ParseError", ["Character", "&prod"]]},
-
-{"description": "Bad named entity: prop without a semi-colon.",
-"input":"&prop",
-"output": ["ParseError", ["Character", "&prop"]]},
-
-{"description": "Bad named entity: Psi without a semi-colon.",
-"input":"&Psi",
-"output": ["ParseError", ["Character", "&Psi"]]},
-
-{"description": "Bad named entity: psi without a semi-colon.",
-"input":"&psi",
-"output": ["ParseError", ["Character", "&psi"]]},
-
-{"description": "Bad named entity: radic without a semi-colon.",
-"input":"&radic",
-"output": ["ParseError", ["Character", "&radic"]]},
-
-{"description": "Bad named entity: rang without a semi-colon.",
-"input":"&rang",
-"output": ["ParseError", ["Character", "&rang"]]},
-
-{"description": "Bad named entity: rarr without a semi-colon.",
-"input":"&rarr",
-"output": ["ParseError", ["Character", "&rarr"]]},
-
-{"description": "Bad named entity: rarr without a semi-colon.",
-"input":"&rarr",
-"output": ["ParseError", ["Character", "&rarr"]]},
-
-{"description": "Bad named entity: rceil without a semi-colon.",
-"input":"&rceil",
-"output": ["ParseError", ["Character", "&rceil"]]},
-
-{"description": "Bad named entity: rdquo without a semi-colon.",
-"input":"&rdquo",
-"output": ["ParseError", ["Character", "&rdquo"]]},
-
-{"description": "Bad named entity: real without a semi-colon.",
-"input":"&real",
-"output": ["ParseError", ["Character", "&real"]]},
-
-{"description": "Bad named entity: rfloor without a semi-colon.",
-"input":"&rfloor",
-"output": ["ParseError", ["Character", "&rfloor"]]},
-
-{"description": "Bad named entity: Rho without a semi-colon.",
-"input":"&Rho",
-"output": ["ParseError", ["Character", "&Rho"]]},
-
-{"description": "Bad named entity: rho without a semi-colon.",
-"input":"&rho",
-"output": ["ParseError", ["Character", "&rho"]]},
-
-{"description": "Bad named entity: rlm without a semi-colon.",
-"input":"&rlm",
-"output": ["ParseError", ["Character", "&rlm"]]},
-
-{"description": "Bad named entity: rsaquo without a semi-colon.",
-"input":"&rsaquo",
-"output": ["ParseError", ["Character", "&rsaquo"]]},
-
-{"description": "Bad named entity: rsquo without a semi-colon.",
-"input":"&rsquo",
-"output": ["ParseError", ["Character", "&rsquo"]]},
-
-{"description": "Bad named entity: sbquo without a semi-colon.",
-"input":"&sbquo",
-"output": ["ParseError", ["Character", "&sbquo"]]},
-
-{"description": "Bad named entity: Scaron without a semi-colon.",
-"input":"&Scaron",
-"output": ["ParseError", ["Character", "&Scaron"]]},
-
-{"description": "Bad named entity: scaron without a semi-colon.",
-"input":"&scaron",
-"output": ["ParseError", ["Character", "&scaron"]]},
-
-{"description": "Bad named entity: sdot without a semi-colon.",
-"input":"&sdot",
-"output": ["ParseError", ["Character", "&sdot"]]},
-
-{"description": "Bad named entity: Sigma without a semi-colon.",
-"input":"&Sigma",
-"output": ["ParseError", ["Character", "&Sigma"]]},
-
-{"description": "Bad named entity: sigma without a semi-colon.",
-"input":"&sigma",
-"output": ["ParseError", ["Character", "&sigma"]]},
-
-{"description": "Bad named entity: sigmaf without a semi-colon.",
-"input":"&sigmaf",
-"output": ["ParseError", ["Character", "&sigmaf"]]},
-
-{"description": "Bad named entity: sim without a semi-colon.",
-"input":"&sim",
-"output": ["ParseError", ["Character", "&sim"]]},
-
-{"description": "Bad named entity: spades without a semi-colon.",
-"input":"&spades",
-"output": ["ParseError", ["Character", "&spades"]]},
-
-{"description": "Bad named entity: sub without a semi-colon.",
-"input":"&sub",
-"output": ["ParseError", ["Character", "&sub"]]},
-
-{"description": "Bad named entity: sube without a semi-colon.",
-"input":"&sube",
-"output": ["ParseError", ["Character", "&sube"]]},
-
-{"description": "Bad named entity: sum without a semi-colon.",
-"input":"&sum",
-"output": ["ParseError", ["Character", "&sum"]]},
-
-{"description": "Bad named entity: sup without a semi-colon.",
-"input":"&sup",
-"output": ["ParseError", ["Character", "&sup"]]},
-
-{"description": "Bad named entity: supe without a semi-colon.",
-"input":"&supe",
-"output": ["ParseError", ["Character", "&supe"]]},
-
-{"description": "Bad named entity: Tau without a semi-colon.",
-"input":"&Tau",
-"output": ["ParseError", ["Character", "&Tau"]]},
-
-{"description": "Bad named entity: tau without a semi-colon.",
-"input":"&tau",
-"output": ["ParseError", ["Character", "&tau"]]},
-
-{"description": "Bad named entity: there4 without a semi-colon.",
-"input":"&there4",
-"output": ["ParseError", ["Character", "&there4"]]},
-
-{"description": "Bad named entity: Theta without a semi-colon.",
-"input":"&Theta",
-"output": ["ParseError", ["Character", "&Theta"]]},
-
-{"description": "Bad named entity: theta without a semi-colon.",
-"input":"&theta",
-"output": ["ParseError", ["Character", "&theta"]]},
-
-{"description": "Bad named entity: thetasym without a semi-colon.",
-"input":"&thetasym",
-"output": ["ParseError", ["Character", "&thetasym"]]},
-
-{"description": "Bad named entity: thinsp without a semi-colon.",
-"input":"&thinsp",
-"output": ["ParseError", ["Character", "&thinsp"]]},
-
-{"description": "Bad named entity: tilde without a semi-colon.",
-"input":"&tilde",
-"output": ["ParseError", ["Character", "&tilde"]]},
-
-{"description": "Bad named entity: trade without a semi-colon.",
-"input":"&trade",
-"output": ["ParseError", ["Character", "&trade"]]},
-
-{"description": "Bad named entity: uarr without a semi-colon.",
-"input":"&uarr",
-"output": ["ParseError", ["Character", "&uarr"]]},
-
-{"description": "Bad named entity: uarr without a semi-colon.",
-"input":"&uarr",
-"output": ["ParseError", ["Character", "&uarr"]]},
-
-{"description": "Bad named entity: upsih without a semi-colon.",
-"input":"&upsih",
-"output": ["ParseError", ["Character", "&upsih"]]},
-
-{"description": "Bad named entity: Upsilon without a semi-colon.",
-"input":"&Upsilon",
-"output": ["ParseError", ["Character", "&Upsilon"]]},
-
-{"description": "Bad named entity: upsilon without a semi-colon.",
-"input":"&upsilon",
-"output": ["ParseError", ["Character", "&upsilon"]]},
-
-{"description": "Bad named entity: weierp without a semi-colon.",
-"input":"&weierp",
-"output": ["ParseError", ["Character", "&weierp"]]},
-
-{"description": "Bad named entity: Xi without a semi-colon.",
-"input":"&Xi",
-"output": ["ParseError", ["Character", "&Xi"]]},
-
-{"description": "Bad named entity: xi without a semi-colon.",
-"input":"&xi",
-"output": ["ParseError", ["Character", "&xi"]]},
-
-{"description": "Bad named entity: Yuml without a semi-colon.",
-"input":"&Yuml",
-"output": ["ParseError", ["Character", "&Yuml"]]},
-
-{"description": "Bad named entity: Zeta without a semi-colon.",
-"input":"&Zeta",
-"output": ["ParseError", ["Character", "&Zeta"]]},
-
-{"description": "Bad named entity: zeta without a semi-colon.",
-"input":"&zeta",
-"output": ["ParseError", ["Character", "&zeta"]]},
-
-{"description": "Bad named entity: zwj without a semi-colon.",
-"input":"&zwj",
-"output": ["ParseError", ["Character", "&zwj"]]},
-
-{"description": "Bad named entity: zwnj without a semi-colon.",
-"input":"&zwnj",
-"output": ["ParseError", ["Character", "&zwnj"]]},
-
-{"description": "Bad named entity: zwnj without a semi-colon.",
-"input":"&zwnj",
-"output": ["ParseError", ["Character", "&zwnj"]]},
+{"description": "Entity name followed by the equals sign in an attribute value.",
+"input":"<h a='&lang='>",
+"output": [["StartTag", "h", {"a": "&lang="}]]},

{"description": "CR as numeric entity",
"input":"&#013;",
-"output": ["ParseError", ["Character", "\n"]]},
+"output": ["ParseError", ["Character", "\r"]]},

{"description": "CR as hexadecimal numeric entity",
"input":"&#x00D;",
-"output": ["ParseError", ["Character", "\n"]]},
+"output": ["ParseError", ["Character", "\r"]]},

{"description": "Windows-1252 EURO SIGN numeric entity.",
"input":"&#0128;",
@@ -2094,7 +22,7 @@

{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
"input":"&#0129;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0081"]]},

{"description": "Windows-1252 SINGLE LOW-9 QUOTATION MARK numeric entity.",
"input":"&#0130;",
@@ -2142,7 +70,7 @@

{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
"input":"&#0141;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u008D"]]},

{"description": "Windows-1252 LATIN CAPITAL LETTER Z WITH CARON numeric entity.",
"input":"&#0142;",
@@ -2150,11 +78,11 @@

{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
"input":"&#0143;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u008F"]]},

{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
"input":"&#0144;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0090"]]},

{"description": "Windows-1252 LEFT SINGLE QUOTATION MARK numeric entity.",
"input":"&#0145;",
@@ -2206,7 +134,7 @@

{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
"input":"&#0157;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u009D"]]},

{"description": "Windows-1252 EURO SIGN hexadecimal numeric entity.",
"input":"&#x080;",
@@ -2214,7 +142,7 @@

{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
"input":"&#x081;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0081"]]},

{"description": "Windows-1252 SINGLE LOW-9 QUOTATION MARK hexadecimal numeric entity.",
"input":"&#x082;",
@@ -2262,7 +190,7 @@

{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
"input":"&#x08D;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u008D"]]},

{"description": "Windows-1252 LATIN CAPITAL LETTER Z WITH CARON hexadecimal numeric entity.",
"input":"&#x08E;",
@@ -2270,11 +198,11 @@

{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
"input":"&#x08F;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u008F"]]},

{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
"input":"&#x090;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0090"]]},

{"description": "Windows-1252 LEFT SINGLE QUOTATION MARK hexadecimal numeric entity.",
"input":"&#x091;",
@@ -2326,7 +254,7 @@

{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
"input":"&#x09D;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u009D"]]},

{"description": "Windows-1252 LATIN SMALL LETTER Z WITH CARON hexadecimal numeric entity.",
"input":"&#x09E;",
@@ -2334,6 +262,22 @@

{"description": "Windows-1252 LATIN CAPITAL LETTER Y WITH DIAERESIS hexadecimal numeric entity.",
"input":"&#x09F;",
-"output": ["ParseError", ["Character", "\u0178"]]}
+"output": ["ParseError", ["Character", "\u0178"]]},
+
+{"description": "Decimal numeric entity followed by hex character a.",
+"input":"&#97a",
+"output": ["ParseError", ["Character", "aa"]]},
+
+{"description": "Decimal numeric entity followed by hex character A.",
+"input":"&#97A",
+"output": ["ParseError", ["Character", "aA"]]},
+
+{"description": "Decimal numeric entity followed by hex character f.",
+"input":"&#97f",
+"output": ["ParseError", ["Character", "af"]]},
+
+{"description": "Decimal numeric entity followed by hex character A.",
+"input":"&#97F",
+"output": ["ParseError", ["Character", "aF"]]}

]}
diff --git a/test/data/tokeniser2/numericEntities.test b/test/data/tokeniser2/numericEntities.test
index 78a8a13..36c8228 100644
--- a/test/data/tokeniser2/numericEntities.test
+++ b/test/data/tokeniser2/numericEntities.test
@@ -6,115 +6,115 @@

{"description": "Invalid numeric entity character U+0001",
"input": "&#x0001;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0001"]]},

{"description": "Invalid numeric entity character U+0002",
"input": "&#x0002;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0002"]]},

{"description": "Invalid numeric entity character U+0003",
"input": "&#x0003;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0003"]]},

{"description": "Invalid numeric entity character U+0004",
"input": "&#x0004;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0004"]]},

{"description": "Invalid numeric entity character U+0005",
"input": "&#x0005;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0005"]]},

{"description": "Invalid numeric entity character U+0006",
"input": "&#x0006;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0006"]]},

{"description": "Invalid numeric entity character U+0007",
"input": "&#x0007;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0007"]]},

{"description": "Invalid numeric entity character U+0008",
"input": "&#x0008;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0008"]]},

{"description": "Invalid numeric entity character U+000B",
"input": "&#x000b;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u000b"]]},

{"description": "Invalid numeric entity character U+000E",
"input": "&#x000e;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u000e"]]},

{"description": "Invalid numeric entity character U+000F",
"input": "&#x000f;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u000f"]]},

{"description": "Invalid numeric entity character U+0010",
"input": "&#x0010;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0010"]]},

{"description": "Invalid numeric entity character U+0011",
"input": "&#x0011;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0011"]]},

{"description": "Invalid numeric entity character U+0012",
"input": "&#x0012;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0012"]]},

{"description": "Invalid numeric entity character U+0013",
"input": "&#x0013;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0013"]]},

{"description": "Invalid numeric entity character U+0014",
"input": "&#x0014;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0014"]]},

{"description": "Invalid numeric entity character U+0015",
"input": "&#x0015;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0015"]]},

{"description": "Invalid numeric entity character U+0016",
"input": "&#x0016;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0016"]]},

{"description": "Invalid numeric entity character U+0017",
"input": "&#x0017;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0017"]]},

{"description": "Invalid numeric entity character U+0018",
"input": "&#x0018;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0018"]]},

{"description": "Invalid numeric entity character U+0019",
"input": "&#x0019;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0019"]]},

{"description": "Invalid numeric entity character U+001A",
"input": "&#x001a;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u001a"]]},

{"description": "Invalid numeric entity character U+001B",
"input": "&#x001b;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u001b"]]},

{"description": "Invalid numeric entity character U+001C",
"input": "&#x001c;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u001c"]]},

{"description": "Invalid numeric entity character U+001D",
"input": "&#x001d;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u001d"]]},

{"description": "Invalid numeric entity character U+001E",
"input": "&#x001e;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u001e"]]},

{"description": "Invalid numeric entity character U+001F",
"input": "&#x001f;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u001f"]]},

{"description": "Invalid numeric entity character U+007F",
"input": "&#x007f;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u007f"]]},

{"description": "Invalid numeric entity character U+D800",
"input": "&#xd800;",
@@ -126,267 +126,267 @@

{"description": "Invalid numeric entity character U+FDD0",
"input": "&#xfdd0;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdd0"]]},

{"description": "Invalid numeric entity character U+FDD1",
"input": "&#xfdd1;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdd1"]]},

{"description": "Invalid numeric entity character U+FDD2",
"input": "&#xfdd2;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdd2"]]},

{"description": "Invalid numeric entity character U+FDD3",
"input": "&#xfdd3;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdd3"]]},

{"description": "Invalid numeric entity character U+FDD4",
"input": "&#xfdd4;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdd4"]]},

{"description": "Invalid numeric entity character U+FDD5",
"input": "&#xfdd5;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdd5"]]},

{"description": "Invalid numeric entity character U+FDD6",
"input": "&#xfdd6;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdd6"]]},

{"description": "Invalid numeric entity character U+FDD7",
"input": "&#xfdd7;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdd7"]]},

{"description": "Invalid numeric entity character U+FDD8",
"input": "&#xfdd8;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdd8"]]},

{"description": "Invalid numeric entity character U+FDD9",
"input": "&#xfdd9;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdd9"]]},

{"description": "Invalid numeric entity character U+FDDA",
"input": "&#xfdda;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdda"]]},

{"description": "Invalid numeric entity character U+FDDB",
"input": "&#xfddb;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufddb"]]},

{"description": "Invalid numeric entity character U+FDDC",
"input": "&#xfddc;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufddc"]]},

{"description": "Invalid numeric entity character U+FDDD",
"input": "&#xfddd;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufddd"]]},

{"description": "Invalid numeric entity character U+FDDE",
"input": "&#xfdde;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdde"]]},

{"description": "Invalid numeric entity character U+FDDF",
"input": "&#xfddf;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufddf"]]},

{"description": "Invalid numeric entity character U+FDE0",
"input": "&#xfde0;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufde0"]]},

{"description": "Invalid numeric entity character U+FDE1",
"input": "&#xfde1;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufde1"]]},

{"description": "Invalid numeric entity character U+FDE2",
"input": "&#xfde2;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufde2"]]},

{"description": "Invalid numeric entity character U+FDE3",
"input": "&#xfde3;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufde3"]]},

{"description": "Invalid numeric entity character U+FDE4",
"input": "&#xfde4;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufde4"]]},

{"description": "Invalid numeric entity character U+FDE5",
"input": "&#xfde5;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufde5"]]},

{"description": "Invalid numeric entity character U+FDE6",
"input": "&#xfde6;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufde6"]]},

{"description": "Invalid numeric entity character U+FDE7",
"input": "&#xfde7;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufde7"]]},

{"description": "Invalid numeric entity character U+FDE8",
"input": "&#xfde8;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufde8"]]},

{"description": "Invalid numeric entity character U+FDE9",
"input": "&#xfde9;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufde9"]]},

{"description": "Invalid numeric entity character U+FDEA",
"input": "&#xfdea;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdea"]]},

{"description": "Invalid numeric entity character U+FDEB",
"input": "&#xfdeb;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdeb"]]},

{"description": "Invalid numeric entity character U+FDEC",
"input": "&#xfdec;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdec"]]},

{"description": "Invalid numeric entity character U+FDED",
"input": "&#xfded;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufded"]]},

{"description": "Invalid numeric entity character U+FDEE",
"input": "&#xfdee;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdee"]]},

{"description": "Invalid numeric entity character U+FDEF",
"input": "&#xfdef;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdef"]]},

{"description": "Invalid numeric entity character U+FFFE",
"input": "&#xfffe;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufffe"]]},

{"description": "Invalid numeric entity character U+FFFF",
"input": "&#xffff;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uffff"]]},

{"description": "Invalid numeric entity character U+1FFFE",
"input": "&#x1fffe;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD83F\uDFFE"]]},

{"description": "Invalid numeric entity character U+1FFFF",
"input": "&#x1ffff;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD83F\uDFFF"]]},

{"description": "Invalid numeric entity character U+2FFFE",
"input": "&#x2fffe;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD87F\uDFFE"]]},

{"description": "Invalid numeric entity character U+2FFFF",
"input": "&#x2ffff;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD87F\uDFFF"]]},

{"description": "Invalid numeric entity character U+3FFFE",
"input": "&#x3fffe;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD8BF\uDFFE"]]},

{"description": "Invalid numeric entity character U+3FFFF",
"input": "&#x3ffff;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD8BF\uDFFF"]]},

{"description": "Invalid numeric entity character U+4FFFE",
"input": "&#x4fffe;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD8FF\uDFFE"]]},

{"description": "Invalid numeric entity character U+4FFFF",
"input": "&#x4ffff;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD8FF\uDFFF"]]},

{"description": "Invalid numeric entity character U+5FFFE",
"input": "&#x5fffe;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD93F\uDFFE"]]},

{"description": "Invalid numeric entity character U+5FFFF",
"input": "&#x5ffff;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD93F\uDFFF"]]},

{"description": "Invalid numeric entity character U+6FFFE",
"input": "&#x6fffe;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD97F\uDFFE"]]},

{"description": "Invalid numeric entity character U+6FFFF",
"input": "&#x6ffff;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD97F\uDFFF"]]},

{"description": "Invalid numeric entity character U+7FFFE",
"input": "&#x7fffe;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD9BF\uDFFE"]]},

{"description": "Invalid numeric entity character U+7FFFF",
"input": "&#x7ffff;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD9BF\uDFFF"]]},

{"description": "Invalid numeric entity character U+8FFFE",
"input": "&#x8fffe;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD9FF\uDFFE"]]},

{"description": "Invalid numeric entity character U+8FFFF",
"input": "&#x8ffff;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD9FF\uDFFF"]]},

{"description": "Invalid numeric entity character U+9FFFE",
"input": "&#x9fffe;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDA3F\uDFFE"]]},

{"description": "Invalid numeric entity character U+9FFFF",
"input": "&#x9ffff;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDA3F\uDFFF"]]},

{"description": "Invalid numeric entity character U+AFFFE",
"input": "&#xafffe;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDA7F\uDFFE"]]},

{"description": "Invalid numeric entity character U+AFFFF",
"input": "&#xaffff;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDA7F\uDFFF"]]},

{"description": "Invalid numeric entity character U+BFFFE",
"input": "&#xbfffe;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDABF\uDFFE"]]},

{"description": "Invalid numeric entity character U+BFFFF",
"input": "&#xbffff;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDABF\uDFFF"]]},

{"description": "Invalid numeric entity character U+CFFFE",
"input": "&#xcfffe;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDAFF\uDFFE"]]},

{"description": "Invalid numeric entity character U+CFFFF",
"input": "&#xcffff;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDAFF\uDFFF"]]},

{"description": "Invalid numeric entity character U+DFFFE",
"input": "&#xdfffe;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDB3F\uDFFE"]]},

{"description": "Invalid numeric entity character U+DFFFF",
"input": "&#xdffff;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDB3F\uDFFF"]]},

{"description": "Invalid numeric entity character U+EFFFE",
"input": "&#xefffe;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDB7F\uDFFE"]]},

{"description": "Invalid numeric entity character U+EFFFF",
"input": "&#xeffff;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDB7F\uDFFF"]]},

{"description": "Invalid numeric entity character U+FFFFE",
"input": "&#xffffe;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDBBF\uDFFE"]]},

{"description": "Invalid numeric entity character U+FFFFF",
"input": "&#xfffff;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDBBF\uDFFF"]]},

{"description": "Invalid numeric entity character U+10FFFE",
"input": "&#x10fffe;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDBFF\uDFFE"]]},

{"description": "Invalid numeric entity character U+10FFFF",
"input": "&#x10ffff;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDBFF\uDFFF"]]},

{"description": "Valid numeric entity character U+0009",
"input": "&#x0009;",
@@ -1309,3 +1309,5 @@
"output": [["Character", "\uDBFF\uDFFD"]]}

]}
+
+
diff --git a/test/data/tokeniser2/test4.test b/test/data/tokeniser2/test4.test
index ec8f72c..4be94b0 100644
--- a/test/data/tokeniser2/test4.test
+++ b/test/data/tokeniser2/test4.test
@@ -1,11 +1,11 @@
{"tests": [

{"description":"< in attribute name",
-"input":"<z/0 <",
+"input":"<z/0 <>",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "", "<": ""}]]},

{"description":"< in attribute value",
-"input":"<z x=<",
+"input":"<z x=<>",
"output":["ParseError", ["StartTag", "z", {"x": "<"}]]},

{"description":"= in unquoted attribute value",
@@ -28,25 +28,25 @@
"input":"<z ====>",
"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"=": "=="}]]},

-{"description":"Allowed \" after ampersand in attribute value",
+{"description":"\" after ampersand in double-quoted attribute value",
"input":"<z z=\"&\">",
"output":[["StartTag", "z", {"z": "&"}]]},

-{"description":"Non-allowed ' after ampersand in attribute value",
+{"description":"' after ampersand in double-quoted attribute value",
"input":"<z z=\"&'\">",
-"output":["ParseError", ["StartTag", "z", {"z": "&'"}]]},
+"output":[["StartTag", "z", {"z": "&'"}]]},

-{"description":"Allowed ' after ampersand in attribute value",
+{"description":"' after ampersand in single-quoted attribute value",
"input":"<z z='&'>",
"output":[["StartTag", "z", {"z": "&"}]]},

-{"description":"Non-allowed \" after ampersand in attribute value",
+{"description":"\" after ampersand in single-quoted attribute value",
"input":"<z z='&\"'>",
-"output":["ParseError", ["StartTag", "z", {"z": "&\""}]]},
+"output":[["StartTag", "z", {"z": "&\""}]]},

{"description":"Text after bogus character reference",
"input":"<z z='&xlink_xmlns;'>bar<z>",
-"output":["ParseError",["StartTag","z",{"z":"&xlink_xmlns;"}],["Character","bar"],["StartTag","z",{}]]},
+"output":[["StartTag","z",{"z":"&xlink_xmlns;"}],["Character","bar"],["StartTag","z",{}]]},

{"description":"Text after hex character reference",
"input":"<z z='&#x0020; foo'>bar<z>",
@@ -98,11 +98,11 @@

{"description":"CR EOF in tag name",
"input":"<z\r",
-"output":["ParseError", ["StartTag", "z", {}]]},
+"output":["ParseError"]},

{"description":"Slash EOF in tag name",
"input":"<z/",
-"output":["ParseError", ["StartTag", "z", {}]]},
+"output":["ParseError"]},

{"description":"Zero hex numeric entity",
"input":"&#x0",
@@ -134,7 +134,7 @@

{"description":"Maximum non-BMP numeric entity",
"input":"&#X10FFFF;",
-"output":["ParseError", ["Character", "\uFFFD"]]},
+"output":["ParseError", ["Character", "\uDBFF\uDFFF"]]},

{"description":"Above maximum numeric entity",
"input":"&#x110000;",
@@ -222,12 +222,12 @@

{"description":"U+0000 in lookahead region after non-matching character",
"input":"<!doc>\u0000",
-"output":["ParseError", ["Comment", "doc"], "ParseError", ["Character", "\uFFFD"]],
+"output":["ParseError", ["Comment", "doc"], "ParseError", ["Character", "\u0000"]],
"ignoreErrorOrder":true},

{"description":"U+0000 in lookahead region",
"input":"<!doc\u0000",
-"output":["ParseError", "ParseError", ["Comment", "doc\uFFFD"]],
+"output":["ParseError", ["Comment", "doc\uFFFD"]],
"ignoreErrorOrder":true},

{"description":"U+0080 in lookahead region",
@@ -245,11 +245,6 @@
"output":["ParseError", "ParseError", ["Comment", "doc\uD83F\uDFFF"]],
"ignoreErrorOrder":true},

-{"description":"CR followed by U+0000",
-"input":"\r\u0000",
-"output":["ParseError", ["Character", "\n\uFFFD"]],
-"ignoreErrorOrder":true},
-
{"description":"CR followed by non-LF",
"input":"\r?",
"output":[["Character", "\n?"]]},
@@ -300,6 +295,50 @@

{"description":"Doctype html x>text",
"input":"<!DOCTYPE html x>text",
-"output":["ParseError", ["DOCTYPE", "html", null, null, false], ["Character", "text"]]}
+"output":["ParseError", ["DOCTYPE", "html", null, null, false], ["Character", "text"]]},
+
+{"description":"Grave accent in unquoted attribute",
+"input":"<a a=aa`>",
+"output":["ParseError", ["StartTag", "a", {"a":"aa`"}]]},
+
+{"description":"EOF in tag name state ",
+"input":"<a",
+"output":["ParseError"]},
+
+{"description":"EOF in tag name state",
+"input":"<a",
+"output":["ParseError"]},
+
+{"description":"EOF in before attribute name state",
+"input":"<a ",
+"output":["ParseError"]},
+
+{"description":"EOF in attribute name state",
+"input":"<a a",
+"output":["ParseError"]},
+
+{"description":"EOF in after attribute name state",
+"input":"<a a ",
+"output":["ParseError"]},
+
+{"description":"EOF in before attribute value state",
+"input":"<a a =",
+"output":["ParseError"]},
+
+{"description":"EOF in attribute value (double quoted) state",
+"input":"<a a =\"a",
+"output":["ParseError"]},
+
+{"description":"EOF in attribute value (single quoted) state",
+"input":"<a a ='a",
+"output":["ParseError"]},
+
+{"description":"EOF in attribute value (unquoted) state",
+"input":"<a a =a",
+"output":["ParseError"]},
+
+{"description":"EOF in after attribute value state",
+"input":"<a a ='a'",
+"output":["ParseError"]}

]}
--
1.8.3.2

No comments:

Post a Comment