Tuesday, 11 March 2014

Re: Tokeniser Update

From f395209aff9d66e70ba236cfc1812e594a0580a5 Mon Sep 17 00:00:00 2001
From: Achal-Aggarwal <theachalaggarwal@gmail.com>
Date: Wed, 12 Mar 2014 00:17:39 +0530
Subject: [PATCH 01/11] Rewriting whole tokeniser. Removed content model flags.
CP1252 table updated. End bang and doctype identifier state added.

---
src/tokeniser/tokeniser.c | 598 +++++++++++++++++++++++++---------------------
1 file changed, 324 insertions(+), 274 deletions(-)

diff --git a/src/tokeniser/tokeniser.c b/src/tokeniser/tokeniser.c
index a7e67a1..3eab8a7 100644
--- a/src/tokeniser/tokeniser.c
+++ b/src/tokeniser/tokeniser.c
@@ -24,10 +24,10 @@
* Table of mappings between Windows-1252 codepoints 128-159 and UCS4
*/
static const uint32_t cp1252Table[32] = {
- 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
- 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0xFFFD, 0x017D, 0xFFFD,
- 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
- 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0xFFFD, 0x017E, 0x0178
+ 0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
+ 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008F, 0x017D, 0x0090,
+ 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
+ 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178
};

/**
@@ -71,6 +71,7 @@ typedef enum hubbub_tokeniser_state {
STATE_COMMENT,
STATE_COMMENT_END_DASH,
STATE_COMMENT_END,
+ STATE_COMMENT_END_BANG,
STATE_MATCH_DOCTYPE,
STATE_DOCTYPE,
STATE_BEFORE_DOCTYPE_NAME,
@@ -78,6 +79,7 @@ typedef enum hubbub_tokeniser_state {
STATE_AFTER_DOCTYPE_NAME,
STATE_MATCH_PUBLIC,
STATE_BEFORE_DOCTYPE_PUBLIC,
+ STATE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER,
STATE_DOCTYPE_PUBLIC_DQ,
STATE_DOCTYPE_PUBLIC_SQ,
STATE_AFTER_DOCTYPE_PUBLIC,
@@ -232,6 +234,8 @@ static hubbub_error hubbub_tokeniser_handle_after_doctype_name(
hubbub_tokeniser *tokeniser);
static hubbub_error hubbub_tokeniser_handle_match_public(
hubbub_tokeniser *tokeniser);
+static hubbub_error hubbub_tokeniser_handle_doctype_public_identifier(
+ hubbub_tokeniser *tokeniser);
static hubbub_error hubbub_tokeniser_handle_before_doctype_public(
hubbub_tokeniser *tokeniser);
static hubbub_error hubbub_tokeniser_handle_doctype_public_dq(
@@ -443,7 +447,7 @@ hubbub_error hubbub_tokeniser_run(hubbub_tokeniser *tokeniser)
if (tokeniser->paused == true)
return HUBBUB_PAUSED;

-#if 0
+#if 1
#define state(x) \
case x: \
printf( #x "\n");
@@ -528,6 +532,7 @@ hubbub_error hubbub_tokeniser_run(hubbub_tokeniser *tokeniser)
case STATE_COMMENT:
case STATE_COMMENT_END_DASH:
case STATE_COMMENT_END:
+ case STATE_COMMENT_END_BANG:
cont = hubbub_tokeniser_handle_comment(tokeniser);
break;
state(STATE_MATCH_DOCTYPE)
@@ -549,7 +554,6 @@ hubbub_error hubbub_tokeniser_run(hubbub_tokeniser *tokeniser)
cont = hubbub_tokeniser_handle_after_doctype_name(
tokeniser);
break;
-
state(STATE_MATCH_PUBLIC)
cont = hubbub_tokeniser_handle_match_public(
tokeniser);
@@ -558,6 +562,10 @@ hubbub_error hubbub_tokeniser_run(hubbub_tokeniser *tokeniser)
cont = hubbub_tokeniser_handle_before_doctype_public(
tokeniser);
break;
+ state(STATE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER)
+ cont = hubbub_tokeniser_handle_doctype_public_identifier(
+ tokeniser);
+ break;
state(STATE_DOCTYPE_PUBLIC_DQ)
cont = hubbub_tokeniser_handle_doctype_public_dq(
tokeniser);
@@ -655,7 +663,6 @@ hubbub_error hubbub_tokeniser_run(hubbub_tokeniser *tokeniser)
(str).len += (length); \
} while (0)

-
/* this should always be called with an empty "chars" buffer */
hubbub_error hubbub_tokeniser_handle_data(hubbub_tokeniser *tokeniser)
{
@@ -669,44 +676,12 @@ hubbub_error hubbub_tokeniser_handle_data(hubbub_tokeniser *tokeniser)
PARSERUTILS_OK) {
const uint8_t c = *cptr;

- if (c == '&' &&
- (tokeniser->content_model == HUBBUB_CONTENT_MODEL_PCDATA ||
- tokeniser->content_model == HUBBUB_CONTENT_MODEL_RCDATA) &&
- tokeniser->escape_flag == false) {
- tokeniser->state =
- STATE_CHARACTER_REFERENCE_DATA;
+ if (c == '&') {
+ tokeniser->state = STATE_CHARACTER_REFERENCE_DATA;
/* Don't eat the '&'; it'll be handled by entity
* consumption */
break;
- } else if (c == '-' &&
- tokeniser->escape_flag == false &&
- (tokeniser->content_model ==
- HUBBUB_CONTENT_MODEL_RCDATA ||
- tokeniser->content_model ==
- HUBBUB_CONTENT_MODEL_CDATA) &&
- tokeniser->context.pending >= 3) {
- size_t ignore;
- error = parserutils_inputstream_peek(
- tokeniser->input,
- tokeniser->context.pending - 3,
- &cptr,
- &ignore);
-
- assert(error == PARSERUTILS_OK);
-
- if (strncmp((char *)cptr,
- "<!--", SLEN("<!--")) == 0) {
- tokeniser->escape_flag = true;
- }
-
- tokeniser->context.pending += len;
- } else if (c == '<' && (tokeniser->content_model ==
- HUBBUB_CONTENT_MODEL_PCDATA ||
- ((tokeniser->content_model ==
- HUBBUB_CONTENT_MODEL_RCDATA ||
- tokeniser->content_model ==
- HUBBUB_CONTENT_MODEL_CDATA) &&
- tokeniser->escape_flag == false))) {
+ } else if (c == '<') {
if (tokeniser->context.pending > 0) {
/* Emit any pending characters */
emit_current_chars(tokeniser);
@@ -716,39 +691,6 @@ hubbub_error hubbub_tokeniser_handle_data(hubbub_tokeniser *tokeniser)
tokeniser->context.pending = len;
tokeniser->state = STATE_TAG_OPEN;
break;
- } else if (c == '>' && tokeniser->escape_flag == true &&
- (tokeniser->content_model ==
- HUBBUB_CONTENT_MODEL_RCDATA ||
- tokeniser->content_model ==
- HUBBUB_CONTENT_MODEL_CDATA)) {
- /* no need to check that there are enough characters,
- * since you can only run into this if the flag is
- * true in the first place, which requires four
- * characters. */
- error = parserutils_inputstream_peek(
- tokeniser->input,
- tokeniser->context.pending - 2,
- &cptr,
- &len);
-
- assert(error == PARSERUTILS_OK);
-
- if (strncmp((char *) cptr, "-->", SLEN("-->")) == 0) {
- tokeniser->escape_flag = false;
- }
-
- tokeniser->context.pending += len;
- } else if (c == '\0') {
- if (tokeniser->context.pending > 0) {
- /* Emit any pending characters */
- emit_current_chars(tokeniser);
- }
-
- /* Emit a replacement character */
- emit_character_token(tokeniser, &u_fffd_str);
-
- /* Advance past NUL */
- parserutils_inputstream_advance(tokeniser->input, 1);
} else if (c == '\r') {
error = parserutils_inputstream_peek(
tokeniser->input,
@@ -774,11 +716,14 @@ hubbub_error hubbub_tokeniser_handle_data(hubbub_tokeniser *tokeniser)
/* Advance over */
parserutils_inputstream_advance(tokeniser->input, 1);
} else {
+ if (c == '\0') {
+ /** \todo parse error */
+ }
+
/* Just collect into buffer */
tokeniser->context.pending += len;
}
}
-
if (tokeniser->state != STATE_TAG_OPEN &&
(tokeniser->state != STATE_DATA || error == PARSERUTILS_EOF) &&
tokeniser->context.pending > 0) {
@@ -879,7 +824,9 @@ hubbub_error hubbub_tokeniser_handle_tag_open(hubbub_tokeniser *tokeniser)

if (error != PARSERUTILS_OK) {
if (error == PARSERUTILS_EOF) {
- /* Return to data state with '<' still in "chars" */
+ /** \todo parse error */
+ /* Emit single '<' char */
+ emit_current_chars(tokeniser);
tokeniser->state = STATE_DATA;
return HUBBUB_OK;
} else {
@@ -889,65 +836,54 @@ hubbub_error hubbub_tokeniser_handle_tag_open(hubbub_tokeniser *tokeniser)

c = *cptr;

- if (c == '/') {
+ if (c == '!') {
+ parserutils_inputstream_advance(tokeniser->input, SLEN("<!"));
+
+ tokeniser->context.pending = 0;
+ tokeniser->state = STATE_MARKUP_DECLARATION_OPEN;
+ } else if (c == '/'){
tokeniser->context.pending += len;

tokeniser->context.close_tag_match.match = false;
tokeniser->context.close_tag_match.count = 0;

tokeniser->state = STATE_CLOSE_TAG_OPEN;
- } else if (tokeniser->content_model == HUBBUB_CONTENT_MODEL_RCDATA ||
- tokeniser->content_model ==
- HUBBUB_CONTENT_MODEL_CDATA) {
- /* Return to data state with '<' still in "chars" */
- tokeniser->state = STATE_DATA;
- } else if (tokeniser->content_model == HUBBUB_CONTENT_MODEL_PCDATA) {
- if (c == '!') {
- parserutils_inputstream_advance(tokeniser->input,
- SLEN("<!"));
-
- tokeniser->context.pending = 0;
- tokeniser->state = STATE_MARKUP_DECLARATION_OPEN;
- } else if ('A' <= c && c <= 'Z') {
- uint8_t lc = (c + 0x20);
-
- START_BUF(ctag->name, &lc, len);
- ctag->n_attributes = 0;
- tokeniser->context.current_tag_type =
- HUBBUB_TOKEN_START_TAG;
-
- tokeniser->context.pending += len;
+ } else if ('A' <= c && c <= 'Z') {
+ uint8_t lc = (c + 0x20);

- tokeniser->state = STATE_TAG_NAME;
- } else if ('a' <= c && c <= 'z') {
- START_BUF(ctag->name, cptr, len);
- ctag->n_attributes = 0;
- tokeniser->context.current_tag_type =
- HUBBUB_TOKEN_START_TAG;
+ START_BUF(ctag->name, &lc, len);
+ ctag->n_attributes = 0;
+ tokeniser->context.current_tag_type =
+ HUBBUB_TOKEN_START_TAG;

- tokeniser->context.pending += len;
+ tokeniser->context.pending += len;

- tokeniser->state = STATE_TAG_NAME;
- } else if (c == '>') {
- /** \todo parse error */
+ tokeniser->state = STATE_TAG_NAME;
+ } else if ('a' <= c && c <= 'z') {
+ START_BUF(ctag->name, cptr, len);
+ ctag->n_attributes = 0;
+ tokeniser->context.current_tag_type =
+ HUBBUB_TOKEN_START_TAG;

- tokeniser->context.pending += len;
- tokeniser->state = STATE_DATA;
- } else if (c == '?') {
- /** \todo parse error */
+ tokeniser->context.pending += len;

- /* Cursor still at "<", need to advance past it */
+ tokeniser->state = STATE_TAG_NAME;
+ } else if (c == '?'){
+ /** \todo parse error */
+ /* Cursor still at "<", need to advance past it */
parserutils_inputstream_advance(
tokeniser->input, SLEN("<"));
tokeniser->context.pending = 0;

tokeniser->state = STATE_BOGUS_COMMENT;
- } else {
- /* Return to data state with '<' still in "chars" */
- tokeniser->state = STATE_DATA;
- }
+ } else {
+ /** \todo parse error */
+ /* Emit single '<' char */
+ emit_current_chars(tokeniser);
+ tokeniser->state = STATE_DATA;
}

+
return HUBBUB_OK;
}

@@ -955,8 +891,6 @@ hubbub_error hubbub_tokeniser_handle_tag_open(hubbub_tokeniser *tokeniser)
/* this state never stays in this state for more than one character */
hubbub_error hubbub_tokeniser_handle_close_tag_open(hubbub_tokeniser *tokeniser)
{
- hubbub_tokeniser_context *ctx = &tokeniser->context;
-
size_t len;
const uint8_t *cptr;
parserutils_error error;
@@ -966,131 +900,65 @@ hubbub_error hubbub_tokeniser_handle_close_tag_open(hubbub_tokeniser *tokeniser)
/* assert(tokeniser->context.chars.ptr[0] == '<'); */
/* assert(tokeniser->context.chars.ptr[1] == '/'); */

- /**\todo fragment case */
-
- if (tokeniser->content_model == HUBBUB_CONTENT_MODEL_RCDATA ||
- tokeniser->content_model ==
- HUBBUB_CONTENT_MODEL_CDATA) {
- uint8_t *start_tag_name =
- tokeniser->context.last_start_tag_name;
- size_t start_tag_len =
- tokeniser->context.last_start_tag_len;
-
- while ((error = parserutils_inputstream_peek(tokeniser->input,
- ctx->pending +
- ctx->close_tag_match.count,
- &cptr,
- &len)) == PARSERUTILS_OK) {
- c = *cptr;
-
- if ((start_tag_name[ctx->close_tag_match.count] & ~0x20)
- != (c & ~0x20)) {
- break;
- }
-
- ctx->close_tag_match.count += len;
-
- if (ctx->close_tag_match.count == start_tag_len) {
- ctx->close_tag_match.match = true;
- break;
- }
- }
-
- if (error != PARSERUTILS_OK && error != PARSERUTILS_EOF) {
- return hubbub_error_from_parserutils_error(error);
- }
-
- if (ctx->close_tag_match.match == true) {
- error = parserutils_inputstream_peek(
- tokeniser->input,
- ctx->pending +
- ctx->close_tag_match.count,
- &cptr,
- &len);
-
- if (error != PARSERUTILS_OK &&
- error != PARSERUTILS_EOF) {
- return hubbub_error_from_parserutils_error(
- error);
- } else if (error != PARSERUTILS_EOF) {
- c = *cptr;
-
- if (c != '\t' && c != '\n' && c != '\f' &&
- c != ' ' && c != '>' &&
- c != '/') {
- ctx->close_tag_match.match = false;
- }
- }
- }
- }
-
- if (ctx->close_tag_match.match == false &&
- tokeniser->content_model !=
- HUBBUB_CONTENT_MODEL_PCDATA) {
- /* We should emit "</" here, but instead we leave it in the
- * buffer so the data state emits it with any characters
- * following it */
- tokeniser->state = STATE_DATA;
- } else {
- error = parserutils_inputstream_peek(tokeniser->input,
- tokeniser->context.pending, &cptr, &len);
+ error = parserutils_inputstream_peek(tokeniser->input,
+ tokeniser->context.pending, &cptr, &len);

+ if (error != PARSERUTILS_OK) {
if (error == PARSERUTILS_EOF) {
/** \todo parse error */
-
- /* Return to data state with "</" pending */
+ /* Emit '</' chars */
+ emit_current_chars(tokeniser);
tokeniser->state = STATE_DATA;
return HUBBUB_OK;
- } else if (error != PARSERUTILS_OK) {
+ } else {
return hubbub_error_from_parserutils_error(error);
}
+ }

- c = *cptr;
+ c = *cptr;

- if ('A' <= c && c <= 'Z') {
- uint8_t lc = (c + 0x20);
- START_BUF(tokeniser->context.current_tag.name,
- &lc, len);
- tokeniser->context.current_tag.n_attributes = 0;
+ if ('A' <= c && c <= 'Z') {
+ uint8_t lc = (c + 0x20);
+ START_BUF(tokeniser->context.current_tag.name,
+ &lc, len);
+ tokeniser->context.current_tag.n_attributes = 0;

- tokeniser->context.current_tag_type =
- HUBBUB_TOKEN_END_TAG;
+ tokeniser->context.current_tag_type = HUBBUB_TOKEN_END_TAG;

- tokeniser->context.pending += len;
+ tokeniser->context.pending += len;

- tokeniser->state = STATE_TAG_NAME;
- } else if ('a' <= c && c <= 'z') {
- START_BUF(tokeniser->context.current_tag.name,
- cptr, len);
- tokeniser->context.current_tag.n_attributes = 0;
+ tokeniser->state = STATE_TAG_NAME;
+ } else if ('a' <= c && c <= 'z') {
+ START_BUF(tokeniser->context.current_tag.name,
+ cptr, len);
+ tokeniser->context.current_tag.n_attributes = 0;

- tokeniser->context.current_tag_type =
- HUBBUB_TOKEN_END_TAG;
+ tokeniser->context.current_tag_type = HUBBUB_TOKEN_END_TAG;

- tokeniser->context.pending += len;
+ tokeniser->context.pending += len;

- tokeniser->state = STATE_TAG_NAME;
- } else if (c == '>') {
- /* Cursor still at "</", need to collect ">" */
- tokeniser->context.pending += len;
+ tokeniser->state = STATE_TAG_NAME;
+ } else if (c == '>') {
+ /** \todo parse error */

- /* Now need to advance past "</>" */
- parserutils_inputstream_advance(tokeniser->input,
- tokeniser->context.pending);
- tokeniser->context.pending = 0;
+ /* Cursor still at "</", need to collect ">" */
+ tokeniser->context.pending += len;

- /** \todo parse error */
- tokeniser->state = STATE_DATA;
- } else {
- /** \todo parse error */
+ /* Now need to advance past "</>" */
+ parserutils_inputstream_advance(tokeniser->input,
+ tokeniser->context.pending);
+ tokeniser->context.pending = 0;

- /* Cursor still at "</", need to advance past it */
- parserutils_inputstream_advance(tokeniser->input,
- tokeniser->context.pending);
- tokeniser->context.pending = 0;
+ tokeniser->state = STATE_DATA;
+ } else {
+ /** \todo parse error */

- tokeniser->state = STATE_BOGUS_COMMENT;
- }
+ /* Cursor still at "</", need to advance past it */
+ parserutils_inputstream_advance(tokeniser->input,
+ tokeniser->context.pending);
+ tokeniser->context.pending = 0;
+
+ tokeniser->state = STATE_BOGUS_COMMENT;
}

return HUBBUB_OK;
@@ -1117,8 +985,13 @@ hubbub_error hubbub_tokeniser_handle_tag_name(hubbub_tokeniser *tokeniser)

if (error != PARSERUTILS_OK) {
if (error == PARSERUTILS_EOF) {
+ /** \todo parse error */
tokeniser->state = STATE_DATA;
- return emit_current_tag(tokeniser);
+
+ // skips all pending charachters
+ parserutils_inputstream_advance(
+ tokeniser->input, tokeniser->context.pending);
+ return HUBBUB_OK;
} else {
return hubbub_error_from_parserutils_error(error);
}
@@ -1129,20 +1002,20 @@ hubbub_error hubbub_tokeniser_handle_tag_name(hubbub_tokeniser *tokeniser)
if (c == '\t' || c == '\n' || c == '\f' || c == ' ' || c == '\r') {
tokeniser->context.pending += len;
tokeniser->state = STATE_BEFORE_ATTRIBUTE_NAME;
+ } else if (c == '/') {
+ tokeniser->context.pending += len;
+ tokeniser->state = STATE_SELF_CLOSING_START_TAG;
} else if (c == '>') {
tokeniser->context.pending += len;
tokeniser->state = STATE_DATA;
return emit_current_tag(tokeniser);
- } else if (c == '\0') {
- COLLECT(ctag->name, u_fffd, sizeof(u_fffd));
- tokeniser->context.pending += len;
- } else if (c == '/') {
- tokeniser->context.pending += len;
- tokeniser->state = STATE_SELF_CLOSING_START_TAG;
} else if ('A' <= c && c <= 'Z') {
uint8_t lc = (c + 0x20);
COLLECT(ctag->name, &lc, len);
tokeniser->context.pending += len;
+ } else if (c == '\0') {
+ COLLECT(ctag->name, u_fffd, sizeof(u_fffd));
+ tokeniser->context.pending += len;
} else {
COLLECT(ctag->name, cptr, len);
tokeniser->context.pending += len;
@@ -1166,8 +1039,13 @@ hubbub_error hubbub_tokeniser_handle_before_attribute_name(

if (error != PARSERUTILS_OK) {
if (error == PARSERUTILS_EOF) {
+ /** \todo parse error */
tokeniser->state = STATE_DATA;
- return emit_current_tag(tokeniser);
+
+ // skips all pending charachters
+ parserutils_inputstream_advance(
+ tokeniser->input, tokeniser->context.pending);
+ return HUBBUB_OK;
} else {
return hubbub_error_from_parserutils_error(error);
}
@@ -1178,13 +1056,13 @@ hubbub_error hubbub_tokeniser_handle_before_attribute_name(
if (c == '\t' || c == '\n' || c == '\f' || c == ' ' || c == '\r') {
/* pass over in silence */
tokeniser->context.pending += len;
+ } else if (c == '/') {
+ tokeniser->context.pending += len;
+ tokeniser->state = STATE_SELF_CLOSING_START_TAG;
} else if (c == '>') {
tokeniser->context.pending += len;
tokeniser->state = STATE_DATA;
return emit_current_tag(tokeniser);
- } else if (c == '/') {
- tokeniser->context.pending += len;
- tokeniser->state = STATE_SELF_CLOSING_START_TAG;
} else {
hubbub_attribute *attr;

@@ -1239,8 +1117,13 @@ hubbub_error hubbub_tokeniser_handle_attribute_name(hubbub_tokeniser *tokeniser)

if (error != PARSERUTILS_OK) {
if (error == PARSERUTILS_EOF) {
+ /** \todo parse error */
tokeniser->state = STATE_DATA;
- return emit_current_tag(tokeniser);
+
+ // skips all pending charachters
+ parserutils_inputstream_advance(
+ tokeniser->input, tokeniser->context.pending);
+ return HUBBUB_OK;
} else {
return hubbub_error_from_parserutils_error(error);
}
@@ -1271,6 +1154,10 @@ hubbub_error hubbub_tokeniser_handle_attribute_name(hubbub_tokeniser *tokeniser)
&lc, len);
tokeniser->context.pending += len;
} else {
+ if (c == '"' || c == '\'' || c == '<') {
+ /** \todo parse error */
+ }
+
COLLECT(ctag->attributes[ctag->n_attributes - 1].name,
cptr, len);
tokeniser->context.pending += len;
@@ -1294,13 +1181,21 @@ hubbub_error hubbub_tokeniser_handle_after_attribute_name(

if (error != PARSERUTILS_OK) {
if (error == PARSERUTILS_EOF) {
+ /** \todo parse error */
tokeniser->state = STATE_DATA;
- return emit_current_tag(tokeniser);
+
+ // skips all pending charachters
+ parserutils_inputstream_advance(
+ tokeniser->input, tokeniser->context.pending);
+ return HUBBUB_OK;
} else {
return hubbub_error_from_parserutils_error(error);
}
}

+ /** \todo Check for the existence of attribute name in the attributes list,
+ * if there then igonre this one and its value if any */
+
c = *cptr;

if (c == '\t' || c == '\n' || c == '\f' || c == ' ' || c == '\r') {
@@ -1319,7 +1214,7 @@ hubbub_error hubbub_tokeniser_handle_after_attribute_name(
} else {
hubbub_attribute *attr;

- if (c == '"' || c == '\'') {
+ if (c == '"' || c == '\'' || c =='<') {
/** \todo parse error */
}

@@ -1335,6 +1230,7 @@ hubbub_error hubbub_tokeniser_handle_after_attribute_name(
uint8_t lc = (c + 0x20);
START_BUF(attr[ctag->n_attributes].name, &lc, len);
} else if (c == '\0') {
+ /** \todo parse error */
START_BUF(attr[ctag->n_attributes].name,
u_fffd, sizeof(u_fffd));
} else {
@@ -1372,7 +1268,11 @@ hubbub_error hubbub_tokeniser_handle_before_attribute_value(
if (error == PARSERUTILS_EOF) {
/** \todo parse error */
tokeniser->state = STATE_DATA;
- return emit_current_tag(tokeniser);
+
+ // skips all pending charachters
+ parserutils_inputstream_advance(
+ tokeniser->input, tokeniser->context.pending);
+ return HUBBUB_OK;
} else {
return hubbub_error_from_parserutils_error(error);
}
@@ -1398,12 +1298,14 @@ hubbub_error hubbub_tokeniser_handle_before_attribute_value(
tokeniser->state = STATE_DATA;
return emit_current_tag(tokeniser);
} else if (c == '\0') {
+ /** \todo parse error */
+
START_BUF(ctag->attributes[ctag->n_attributes - 1].value,
u_fffd, sizeof(u_fffd));
tokeniser->context.pending += len;
tokeniser->state = STATE_ATTRIBUTE_VALUE_UQ;
} else {
- if (c == '=') {
+ if (c == '=' || c == '=' || c == '`') {
/** \todo parse error */
}

@@ -1432,8 +1334,13 @@ hubbub_error hubbub_tokeniser_handle_attribute_value_dq(

if (error != PARSERUTILS_OK) {
if (error == PARSERUTILS_EOF) {
+ /** \todo parse error */
tokeniser->state = STATE_DATA;
- return emit_current_tag(tokeniser);
+
+ // skips all pending charachters
+ parserutils_inputstream_advance(
+ tokeniser->input, tokeniser->context.pending);
+ return HUBBUB_OK;
} else {
return hubbub_error_from_parserutils_error(error);
}
@@ -1450,6 +1357,7 @@ hubbub_error hubbub_tokeniser_handle_attribute_value_dq(
tokeniser->context.allowed_char = '"';
/* Don't eat the '&'; it'll be handled by entity consumption */
} else if (c == '\0') {
+ /** \todo parse error */
COLLECT_MS(ctag->attributes[ctag->n_attributes - 1].value,
u_fffd, sizeof(u_fffd));
tokeniser->context.pending += len;
@@ -1494,8 +1402,13 @@ hubbub_error hubbub_tokeniser_handle_attribute_value_sq(

if (error != PARSERUTILS_OK) {
if (error == PARSERUTILS_EOF) {
+ /** \todo parse error */
tokeniser->state = STATE_DATA;
- return emit_current_tag(tokeniser);
+
+ // skips all pending charachters
+ parserutils_inputstream_advance(
+ tokeniser->input, tokeniser->context.pending);
+ return HUBBUB_OK;
} else {
return hubbub_error_from_parserutils_error(error);
}
@@ -1556,8 +1469,13 @@ hubbub_error hubbub_tokeniser_handle_attribute_value_uq(

if (error != PARSERUTILS_OK) {
if (error == PARSERUTILS_EOF) {
+ /** \todo parse error */
tokeniser->state = STATE_DATA;
- return emit_current_tag(tokeniser);
+
+ // skips all pending charachters
+ parserutils_inputstream_advance(
+ tokeniser->input, tokeniser->context.pending);
+ return HUBBUB_OK;
} else {
return hubbub_error_from_parserutils_error(error);
}
@@ -1574,6 +1492,7 @@ hubbub_error hubbub_tokeniser_handle_attribute_value_uq(
} else if (c == '&') {
tokeniser->context.prev_state = tokeniser->state;
tokeniser->state = STATE_CHARACTER_REFERENCE_IN_ATTRIBUTE_VALUE;
+ tokeniser->context.allowed_char = '>';
/* Don't eat the '&'; it'll be handled by entity consumption */
} else if (c == '>') {
tokeniser->context.pending += len;
@@ -1584,7 +1503,7 @@ hubbub_error hubbub_tokeniser_handle_attribute_value_uq(
u_fffd, sizeof(u_fffd));
tokeniser->context.pending += len;
} else {
- if (c == '"' || c == '\'' || c == '=') {
+ if (c == '"' || c == '\'' || c == '=' || c == '<' || c == '`') {
/** \todo parse error */
}

@@ -1666,8 +1585,13 @@ hubbub_error hubbub_tokeniser_handle_after_attribute_value_q(

if (error != PARSERUTILS_OK) {
if (error == PARSERUTILS_EOF) {
+ /** \todo parse error */
tokeniser->state = STATE_DATA;
- return emit_current_tag(tokeniser);
+
+ // skips all pending charachters
+ parserutils_inputstream_advance(
+ tokeniser->input, tokeniser->context.pending);
+ return HUBBUB_OK;
} else {
return hubbub_error_from_parserutils_error(error);
}
@@ -1675,7 +1599,7 @@ hubbub_error hubbub_tokeniser_handle_after_attribute_value_q(

c = *cptr;

- if (c == '\t' || c == '\n' || c == '\f' || c == ' ' || c == '\r') {
+ if (c == '\t' || c == '\n' || c == '\f' || c == ' ') {
tokeniser->context.pending += len;
tokeniser->state = STATE_BEFORE_ATTRIBUTE_NAME;
} else if (c == '>') {
@@ -1708,8 +1632,13 @@ hubbub_error hubbub_tokeniser_handle_self_closing_start_tag(

if (error != PARSERUTILS_OK) {
if (error == PARSERUTILS_EOF) {
+ /** \todo parse error */
tokeniser->state = STATE_DATA;
- return emit_current_tag(tokeniser);
+
+ // skips all pending charachters
+ parserutils_inputstream_advance(
+ tokeniser->input, tokeniser->context.pending);
+ return HUBBUB_OK;
} else {
return hubbub_error_from_parserutils_error(error);
}
@@ -1724,6 +1653,7 @@ hubbub_error hubbub_tokeniser_handle_self_closing_start_tag(
tokeniser->context.current_tag.self_closing = true;
return emit_current_tag(tokeniser);
} else {
+ /** \todo parse error */
/* Reprocess character in before attribute name state */
tokeniser->state = STATE_BEFORE_ATTRIBUTE_NAME;
}
@@ -1809,6 +1739,7 @@ hubbub_error hubbub_tokeniser_handle_markup_declaration_open(

if (error != PARSERUTILS_OK) {
if (error == PARSERUTILS_EOF) {
+ /** \todo parse error */
tokeniser->state = STATE_BOGUS_COMMENT;
return HUBBUB_OK;
} else {
@@ -1826,6 +1757,7 @@ hubbub_error hubbub_tokeniser_handle_markup_declaration_open(
tokeniser->context.match_doctype.count = len;
tokeniser->state = STATE_MATCH_DOCTYPE;
} else if (tokeniser->process_cdata_section == true && c == '[') {
+ // ALSO CHECKS FOR adjusted current node AND element not in html namespace
tokeniser->context.pending = len;
tokeniser->context.match_cdata.count = len;
tokeniser->state = STATE_MATCH_CDATA;
@@ -1836,7 +1768,7 @@ hubbub_error hubbub_tokeniser_handle_markup_declaration_open(
return HUBBUB_OK;
}

-
+/* to match -- and start an empty comment */
hubbub_error hubbub_tokeniser_handle_match_comment(hubbub_tokeniser *tokeniser)
{
size_t len;
@@ -1882,8 +1814,9 @@ hubbub_error hubbub_tokeniser_handle_comment(hubbub_tokeniser *tokeniser)

if (error != PARSERUTILS_OK) {
if (error == PARSERUTILS_EOF) {
+ /** \todo parse error */
tokeniser->state = STATE_DATA;
- return emit_current_comment(tokeniser);
+ return emit_current_comment(tokeniser);;
} else {
return hubbub_error_from_parserutils_error(error);
}
@@ -1893,12 +1826,16 @@ hubbub_error hubbub_tokeniser_handle_comment(hubbub_tokeniser *tokeniser)

if (c == '>' && (tokeniser->state == STATE_COMMENT_START_DASH ||
tokeniser->state == STATE_COMMENT_START ||
- tokeniser->state == STATE_COMMENT_END)) {
+ tokeniser->state == STATE_COMMENT_END ||
+ tokeniser->state == STATE_COMMENT_END_BANG)) {
tokeniser->context.pending += len;

/** \todo parse error if state != COMMENT_END */
tokeniser->state = STATE_DATA;
return emit_current_comment(tokeniser);
+ } else if (c == '!' && tokeniser->state == STATE_COMMENT_END) {
+ tokeniser->context.pending += len;
+ tokeniser->state = STATE_COMMENT_END_BANG;
} else if (c == '-') {
if (tokeniser->state == STATE_COMMENT_START) {
tokeniser->state = STATE_COMMENT_START_DASH;
@@ -1909,12 +1846,23 @@ hubbub_error hubbub_tokeniser_handle_comment(hubbub_tokeniser *tokeniser)
} else if (tokeniser->state == STATE_COMMENT_END_DASH) {
tokeniser->state = STATE_COMMENT_END;
} else if (tokeniser->state == STATE_COMMENT_END) {
+ /** \todo parse error */
error = parserutils_buffer_append(tokeniser->buffer,
(uint8_t *) "-", SLEN("-"));
if (error != PARSERUTILS_OK) {
return hubbub_error_from_parserutils_error(
error);
}
+ } else if (tokeniser->state == STATE_COMMENT_END_BANG) {
+ /** \todo parse error */
+ error = parserutils_buffer_append(tokeniser->buffer,
+ (uint8_t *) "--!", SLEN("--!"));
+ if (error != PARSERUTILS_OK) {
+ return hubbub_error_from_parserutils_error(
+ error);
+ }
+
+ tokeniser->state = STATE_COMMENT_END_DASH;
}

tokeniser->context.pending += len;
@@ -1934,9 +1882,17 @@ hubbub_error hubbub_tokeniser_handle_comment(hubbub_tokeniser *tokeniser)
return hubbub_error_from_parserutils_error(
error);
}
+ } else if (tokeniser->state == STATE_COMMENT_END_BANG) {
+ error = parserutils_buffer_append(tokeniser->buffer,
+ (uint8_t *) "--!", SLEN("--!"));
+ if (error != PARSERUTILS_OK) {
+ return hubbub_error_from_parserutils_error(
+ error);
+ }
}

if (c == '\0') {
+ /** \todo parse error */
error = parserutils_buffer_append(tokeniser->buffer,
u_fffd, sizeof(u_fffd));
if (error != PARSERUTILS_OK) {
@@ -1984,7 +1940,7 @@ hubbub_error hubbub_tokeniser_handle_comment(hubbub_tokeniser *tokeniser)

#define DOCTYPE "DOCTYPE"
#define DOCTYPE_LEN (SLEN(DOCTYPE) - 1)
-
+/* checks for DOCTYPE in*/
hubbub_error hubbub_tokeniser_handle_match_doctype(hubbub_tokeniser *tokeniser)
{
size_t len;
@@ -2053,8 +2009,10 @@ hubbub_error hubbub_tokeniser_handle_doctype(hubbub_tokeniser *tokeniser)

if (error != PARSERUTILS_OK) {
if (error == PARSERUTILS_EOF) {
- tokeniser->state = STATE_BEFORE_DOCTYPE_NAME;
- return HUBBUB_OK;
+ /** \todo parse error */
+ /* Emit current doctype, force-quirks on */
+ tokeniser->state = STATE_DATA;
+ return emit_current_doctype(tokeniser, true);
} else {
return hubbub_error_from_parserutils_error(error);
}
@@ -2062,8 +2020,10 @@ hubbub_error hubbub_tokeniser_handle_doctype(hubbub_tokeniser *tokeniser)

c = *cptr;

- if (c == '\t' || c == '\n' || c == '\f' || c == ' ' || c == '\r') {
+ if (c == '\t' || c == '\n' || c == '\f' || c == ' ') {
tokeniser->context.pending += len;
+ } else {
+ /** \todo parse error */
}

tokeniser->state = STATE_BEFORE_DOCTYPE_NAME;
@@ -2106,6 +2066,7 @@ hubbub_error hubbub_tokeniser_handle_before_doctype_name(
return emit_current_doctype(tokeniser, true);
} else {
if (c == '\0') {
+ /** \todo parse error */
START_BUF(cdoc->name, u_fffd, sizeof(u_fffd));
} else if ('A' <= c && c <= 'Z') {
uint8_t lc = c + 0x20;
@@ -2135,6 +2096,7 @@ hubbub_error hubbub_tokeniser_handle_doctype_name(hubbub_tokeniser *tokeniser)

if (error != PARSERUTILS_OK) {
if (error == PARSERUTILS_EOF) {
+ /** \todo parse error */
tokeniser->state = STATE_DATA;
return emit_current_doctype(tokeniser, true);
} else {
@@ -2144,7 +2106,7 @@ hubbub_error hubbub_tokeniser_handle_doctype_name(hubbub_tokeniser *tokeniser)

c = *cptr;

- if (c == '\t' || c == '\n' || c == '\f' || c == ' ' || c == '\r') {
+ if (c == '\t' || c == '\n' || c == '\f' || c == ' ' || c =='\r') {
tokeniser->context.pending += len;
tokeniser->state = STATE_AFTER_DOCTYPE_NAME;
} else if (c == '>') {
@@ -2152,6 +2114,7 @@ hubbub_error hubbub_tokeniser_handle_doctype_name(hubbub_tokeniser *tokeniser)
tokeniser->state = STATE_DATA;
return emit_current_doctype(tokeniser, false);
} else if (c == '\0') {
+ /** \todo parse error */
COLLECT(cdoc->name, u_fffd, sizeof(u_fffd));
tokeniser->context.pending += len;
} else if ('A' <= c && c <= 'Z') {
@@ -2179,6 +2142,7 @@ hubbub_error hubbub_tokeniser_handle_after_doctype_name(

if (error != PARSERUTILS_OK) {
if (error == PARSERUTILS_EOF) {
+ /** \todo parse error */
tokeniser->state = STATE_DATA;
return emit_current_doctype(tokeniser, true);
} else {
@@ -2189,7 +2153,7 @@ hubbub_error hubbub_tokeniser_handle_after_doctype_name(
c = *cptr;
tokeniser->context.pending += len;

- if (c == '\t' || c == '\n' || c == '\f' || c == ' ' || c == '\r') {
+ if (c == '\t' || c == '\n' || c == '\f' || c == ' ') {
/* pass over in silence */
} else if (c == '>') {
tokeniser->state = STATE_DATA;
@@ -2201,6 +2165,7 @@ hubbub_error hubbub_tokeniser_handle_after_doctype_name(
tokeniser->context.match_doctype.count = 1;
tokeniser->state = STATE_MATCH_SYSTEM;
} else {
+ /** \todo parse error */
tokeniser->state = STATE_BOGUS_DOCTYPE;
tokeniser->context.current_doctype.force_quirks = true;
}
@@ -2255,6 +2220,7 @@ hubbub_error hubbub_tokeniser_handle_match_public(hubbub_tokeniser *tokeniser)
#undef PUBLIC
#undef PUBLIC_LEN

+// same as after public keyword state
hubbub_error hubbub_tokeniser_handle_before_doctype_public(
hubbub_tokeniser *tokeniser)
{
@@ -2269,6 +2235,7 @@ hubbub_error hubbub_tokeniser_handle_before_doctype_public(

if (error != PARSERUTILS_OK) {
if (error == PARSERUTILS_EOF) {
+ /** \todo parse error */
tokeniser->state = STATE_DATA;
return emit_current_doctype(tokeniser, true);
} else {
@@ -2279,20 +2246,74 @@ hubbub_error hubbub_tokeniser_handle_before_doctype_public(
c = *cptr;
tokeniser->context.pending += len;

- if (c == '\t' || c == '\n' || c == '\f' || c == ' ' || c == '\r') {
- /* pass over in silence */
+ if (c == '\t' || c == '\n' || c == '\f' || c == ' ') {
+ tokeniser->state = STATE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER;
+ } else if (c == '"') {
+ /** \todo parse error */
+ cdoc->public_missing = false;
+ cdoc->public_id.len = 0;
+ tokeniser->state = STATE_DOCTYPE_PUBLIC_DQ;
+ } else if (c == '\'') {
+ /** \todo parse error */
+ cdoc->public_missing = false;
+ cdoc->public_id.len = 0;
+ tokeniser->state = STATE_DOCTYPE_PUBLIC_SQ;
+ } else if (c == '>') {
+ /** \todo parse error */
+ tokeniser->state = STATE_DATA;
+ return emit_current_doctype(tokeniser, true);
+ } else {
+ /** \todo parse error */
+ cdoc->force_quirks = true;
+ tokeniser->state = STATE_BOGUS_DOCTYPE;
+ }
+
+ return HUBBUB_OK;
+}
+
+hubbub_error hubbub_tokeniser_handle_doctype_public_identifier(
+ hubbub_tokeniser *tokeniser)
+{
+ hubbub_doctype *cdoc = &tokeniser->context.current_doctype;
+ size_t len;
+ const uint8_t *cptr;
+ parserutils_error error;
+ uint8_t c;
+
+ error = parserutils_inputstream_peek(tokeniser->input,
+ tokeniser->context.pending, &cptr, &len);
+
+ if (error != PARSERUTILS_OK) {
+ if (error == PARSERUTILS_EOF) {
+ /** \todo parse error */
+ tokeniser->state = STATE_DATA;
+ return emit_current_doctype(tokeniser, true);
+ } else {
+ return hubbub_error_from_parserutils_error(error);
+ }
+ }
+
+ c = *cptr;
+ tokeniser->context.pending += len;
+
+ if (c == '\t' || c == '\n' || c == '\f' || c == ' ') {
+ /** ignore the charachter */
} else if (c == '"') {
+ /** \todo parse error */
cdoc->public_missing = false;
cdoc->public_id.len = 0;
tokeniser->state = STATE_DOCTYPE_PUBLIC_DQ;
} else if (c == '\'') {
+ /** \todo parse error */
cdoc->public_missing = false;
cdoc->public_id.len = 0;
tokeniser->state = STATE_DOCTYPE_PUBLIC_SQ;
} else if (c == '>') {
+ /** \todo parse error */
tokeniser->state = STATE_DATA;
return emit_current_doctype(tokeniser, true);
} else {
+ /** \todo parse error */
cdoc->force_quirks = true;
tokeniser->state = STATE_BOGUS_DOCTYPE;
}
@@ -2314,6 +2335,7 @@ hubbub_error hubbub_tokeniser_handle_doctype_public_dq(

if (error != PARSERUTILS_OK) {
if (error == PARSERUTILS_EOF) {
+ /** \todo parse error */
tokeniser->state = STATE_DATA;
return emit_current_doctype(tokeniser, true);
} else {
@@ -2327,10 +2349,12 @@ hubbub_error hubbub_tokeniser_handle_doctype_public_dq(
tokeniser->context.pending += len;
tokeniser->state = STATE_AFTER_DOCTYPE_PUBLIC;
} else if (c == '>') {
+ /** \todo parse error */
tokeniser->context.pending += len;
tokeniser->state = STATE_DATA;
return emit_current_doctype(tokeniser, true);
} else if (c == '\0') {
+ /** \todo parse error */
COLLECT_MS(cdoc->public_id, u_fffd, sizeof(u_fffd));
tokeniser->context.pending += len;
} else if (c == '\r') {
@@ -2371,6 +2395,7 @@ hubbub_error hubbub_tokeniser_handle_doctype_public_sq(

if (error != PARSERUTILS_OK) {
if (error == PARSERUTILS_EOF) {
+ /** \todo parse error */
tokeniser->state = STATE_DATA;
return emit_current_doctype(tokeniser, true);
} else {
@@ -2384,10 +2409,12 @@ hubbub_error hubbub_tokeniser_handle_doctype_public_sq(
tokeniser->context.pending += len;
tokeniser->state = STATE_AFTER_DOCTYPE_PUBLIC;
} else if (c == '>') {
+ /** \todo parse error */
tokeniser->context.pending += len;
tokeniser->state = STATE_DATA;
return emit_current_doctype(tokeniser, true);
} else if (c == '\0') {
+ /** \todo parse error */
COLLECT_MS(cdoc->public_id, u_fffd, sizeof(u_fffd));
tokeniser->context.pending += len;
} else if (c == '\r') {
@@ -2413,7 +2440,7 @@ hubbub_error hubbub_tokeniser_handle_doctype_public_sq(
return HUBBUB_OK;
}

-
+// after doctype public state and between doctype public & sysyem date are overlapping
hubbub_error hubbub_tokeniser_handle_after_doctype_public(
hubbub_tokeniser *tokeniser)
{
@@ -2428,6 +2455,7 @@ hubbub_error hubbub_tokeniser_handle_after_doctype_public(

if (error != PARSERUTILS_OK) {
if (error == PARSERUTILS_EOF) {
+ /** \todo parse error */
tokeniser->state = STATE_DATA;
return emit_current_doctype(tokeniser, true);
} else {
@@ -2438,7 +2466,7 @@ hubbub_error hubbub_tokeniser_handle_after_doctype_public(
c = *cptr;
tokeniser->context.pending += len;

- if (c == '\t' || c == '\n' || c == '\f' || c == ' ' || c == '\r') {
+ if (c == '\t' || c == '\n' || c == '\f' || c == ' ') {
/* pass over in silence */
} else if (c == '"') {
cdoc->system_missing = false;
@@ -2510,6 +2538,8 @@ hubbub_error hubbub_tokeniser_handle_match_system(hubbub_tokeniser *tokeniser)
#undef SYSTEM
#undef SYSTEM_LEN

+// same as after doctype system keyword state
+// overlapping with before DOCTYPE system identifier state
hubbub_error hubbub_tokeniser_handle_before_doctype_system(
hubbub_tokeniser *tokeniser)
{
@@ -2524,6 +2554,7 @@ hubbub_error hubbub_tokeniser_handle_before_doctype_system(

if (error != PARSERUTILS_OK) {
if (error == PARSERUTILS_EOF) {
+ /** \todo parse error */
tokeniser->state = STATE_DATA;
return emit_current_doctype(tokeniser, true);
} else {
@@ -2534,22 +2565,26 @@ hubbub_error hubbub_tokeniser_handle_before_doctype_system(
c = *cptr;
tokeniser->context.pending += len;

- if (c == '\t' || c == '\n' || c == '\f' || c == ' ' || c == '\r') {
+ if (c == '\t' || c == '\n' || c == '\f' || c == ' ') {
/* pass over */
} else if (c == '"') {
+ /** \todo parse error if this is not the second call for this state */
cdoc->system_missing = false;
cdoc->system_id.len = 0;

tokeniser->state = STATE_DOCTYPE_SYSTEM_DQ;
} else if (c == '\'') {
+ /** \todo parse error if this is not the second call for this state */
cdoc->system_missing = false;
cdoc->system_id.len = 0;

tokeniser->state = STATE_DOCTYPE_SYSTEM_SQ;
} else if (c == '>') {
+ /** \todo parse error */
tokeniser->state = STATE_DATA;
return emit_current_doctype(tokeniser, true);
} else {
+ /** \todo parse error */
cdoc->force_quirks = true;
tokeniser->state = STATE_BOGUS_DOCTYPE;
}
@@ -2571,6 +2606,7 @@ hubbub_error hubbub_tokeniser_handle_doctype_system_dq(

if (error != PARSERUTILS_OK) {
if (error == PARSERUTILS_EOF) {
+ /** \todo parse error */
tokeniser->state = STATE_DATA;
return emit_current_doctype(tokeniser, true);
} else {
@@ -2584,10 +2620,12 @@ hubbub_error hubbub_tokeniser_handle_doctype_system_dq(
tokeniser->context.pending += len;
tokeniser->state = STATE_AFTER_DOCTYPE_SYSTEM;
} else if (c == '>') {
+ /** \todo parse error */
tokeniser->context.pending += len;
tokeniser->state = STATE_DATA;
return emit_current_doctype(tokeniser, true);
} else if (c == '\0') {
+ /** \todo parse error */
COLLECT_MS(cdoc->system_id, u_fffd, sizeof(u_fffd));
tokeniser->context.pending += len;
} else if (c == '\r') {
@@ -2627,6 +2665,7 @@ hubbub_error hubbub_tokeniser_handle_doctype_system_sq(

if (error != PARSERUTILS_OK) {
if (error == PARSERUTILS_EOF) {
+ /** \todo parse error */
tokeniser->state = STATE_DATA;
return emit_current_doctype(tokeniser, true);
} else {
@@ -2640,10 +2679,12 @@ hubbub_error hubbub_tokeniser_handle_doctype_system_sq(
tokeniser->context.pending += len;
tokeniser->state = STATE_AFTER_DOCTYPE_SYSTEM;
} else if (c == '>') {
+ /** \todo parse error */
tokeniser->context.pending += len;
tokeniser->state = STATE_DATA;
return emit_current_doctype(tokeniser, true);
} else if (c == '\0') {
+ /** \todo parse error */
COLLECT_MS(cdoc->system_id, u_fffd, sizeof(u_fffd));
tokeniser->context.pending += len;
} else if (c == '\r') {
@@ -2682,6 +2723,7 @@ hubbub_error hubbub_tokeniser_handle_after_doctype_system(

if (error != PARSERUTILS_OK) {
if (error == PARSERUTILS_EOF) {
+ /** \todo parse error */
tokeniser->state = STATE_DATA;
return emit_current_doctype(tokeniser, true);
} else {
@@ -2692,12 +2734,13 @@ hubbub_error hubbub_tokeniser_handle_after_doctype_system(
c = *cptr;
tokeniser->context.pending += len;

- if (c == '\t' || c == '\n' || c == '\f' || c == ' ' || c == '\r') {
+ if (c == '\t' || c == '\n' || c == '\f' || c == ' ' || c =='\r') {
/* pass over in silence */
} else if (c == '>') {
tokeniser->state = STATE_DATA;
return emit_current_doctype(tokeniser, false);
} else {
+ /** \todo parse error */
tokeniser->state = STATE_BOGUS_DOCTYPE;
}

@@ -2998,7 +3041,7 @@ hubbub_error hubbub_tokeniser_handle_numbered_entity(
break;
}

- if (ctx->match_entity.codepoint >= 0x10FFFF) {
+ if (ctx->match_entity.codepoint > 0x10FFFF) {
ctx->match_entity.overflow = true;
}
}
@@ -3011,27 +3054,34 @@ hubbub_error hubbub_tokeniser_handle_numbered_entity(
if (error != PARSERUTILS_EOF && *cptr == ';') {
ctx->match_entity.length += len;
}
+ else{
+ /** \todo parse error */
+ }

/* Had data, so calculate final codepoint */
if (ctx->match_entity.had_data) {
uint32_t cp = ctx->match_entity.codepoint;

- if (0x80 <= cp && cp <= 0x9F) {
+ if (cp == 0x00) {
+ cp = 0xFFFD;
+ } else if (0x80 <= cp && cp <= 0x9F) {
cp = cp1252Table[cp - 0x80];
- } else if (cp == 0x0D) {
- cp = 0x000A;
} else if (ctx->match_entity.overflow ||
- cp <= 0x0008 || cp == 0x000B ||
- (0x000E <= cp && cp <= 0x001F) ||
- (0x007F <= cp && cp <= 0x009F) ||
- (0xD800 <= cp && cp <= 0xDFFF) ||
- (0xFDD0 <= cp && cp <= 0xFDEF) ||
- (cp & 0xFFFE) == 0xFFFE) {
+ (0xD800 <= cp && cp <= 0xDFFF)) {
/* the check for cp > 0x10FFFF per spec is performed
* in the loop above to avoid overflow */
cp = 0xFFFD;
}

+ if ((0x0001 <= cp && cp <= 0x0008) ||
+ (0x000D <= cp && cp <= 0x001F) ||
+ (0x007F <= cp && cp <= 0x009F) ||
+ (0xFDD0 <= cp && cp <= 0xFDEF) ||
+ cp == 0x000B || (cp & 0xFFFE) == 0xFFFE ||
+ (cp & 0xFFFF) == 0xFFFF) {
+ /** \todo parse error */
+ }
+
ctx->match_entity.codepoint = cp;
}

--
1.8.3.2

From f2606f47504253446201e00440a0610c0870a0a5 Mon Sep 17 00:00:00 2001
From: Achal-Aggarwal <theachalaggarwal@gmail.com>
Date: Wed, 12 Mar 2014 00:21:50 +0530
Subject: [PATCH 02/11] Adding states for RCDATA, RAWTEXT, PLAINTEXT and one
for SCRIPTDATA.

---
include/hubbub/parser.h | 6 +-
include/hubbub/types.h | 13 +-
src/parser.c | 4 +-
src/tokeniser/tokeniser.c | 1224 +++++++++++++++++++++++++++++++++++------
src/tokeniser/tokeniser.h | 6 +-
src/treebuilder/in_body.c | 4 +-
src/treebuilder/treebuilder.c | 6 +-
7 files changed, 1066 insertions(+), 197 deletions(-)

diff --git a/include/hubbub/parser.h b/include/hubbub/parser.h
index bdc5e20..42d68cc 100644
--- a/include/hubbub/parser.h
+++ b/include/hubbub/parser.h
@@ -29,7 +29,7 @@ typedef struct hubbub_parser hubbub_parser;
typedef enum hubbub_parser_opttype {
HUBBUB_PARSER_TOKEN_HANDLER,
HUBBUB_PARSER_ERROR_HANDLER,
- HUBBUB_PARSER_CONTENT_MODEL,
+ HUBBUB_PARSER_INITIAL_STATE,
HUBBUB_PARSER_TREE_HANDLER,
HUBBUB_PARSER_DOCUMENT_NODE,
HUBBUB_PARSER_ENABLE_SCRIPTING,
@@ -51,8 +51,8 @@ typedef union hubbub_parser_optparams {
} error_handler; /**< Error handling callback */

struct {
- hubbub_content_model model;
- } content_model; /**< Current content model */
+ hubbub_initial_state state;
+ } initial_state; /**< Initial state of tokeniser */

hubbub_tree_handler *tree_handler; /**< Tree handling callbacks */

diff --git a/include/hubbub/types.h b/include/hubbub/types.h
index e5c208b..6e14fb7 100644
--- a/include/hubbub/types.h
+++ b/include/hubbub/types.h
@@ -29,12 +29,13 @@ typedef enum hubbub_charset_source {
/**
* Content model flag
*/
-typedef enum hubbub_content_model {
- HUBBUB_CONTENT_MODEL_PCDATA,
- HUBBUB_CONTENT_MODEL_RCDATA,
- HUBBUB_CONTENT_MODEL_CDATA,
- HUBBUB_CONTENT_MODEL_PLAINTEXT
-} hubbub_content_model;
+typedef enum hubbub_initial_state {
+ HUBBUB_INITIAL_STATE_DATA,
+ HUBBUB_INITIAL_STATE_RCDATA,
+ HUBBUB_INITIAL_STATE_CDATA,
+ HUBBUB_INITIAL_STATE_PLAINTEXT,
+ HUBBUB_INITIAL_STATE_RAWTEXT
+} hubbub_initial_state;

/**
* Quirks mode flag
diff --git a/src/parser.c b/src/parser.c
index 671e129..749c674 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -160,9 +160,9 @@ hubbub_error hubbub_parser_setopt(hubbub_parser *parser,
}
break;

- case HUBBUB_PARSER_CONTENT_MODEL:
+ case HUBBUB_PARSER_INITIAL_STATE:
result = hubbub_tokeniser_setopt(parser->tok,
- HUBBUB_TOKENISER_CONTENT_MODEL,
+ HUBBUB_TOKENISER_INITIAL_STATE,
(hubbub_tokeniser_optparams *) params);
break;

diff --git a/src/tokeniser/tokeniser.c b/src/tokeniser/tokeniser.c
index 3eab8a7..7152f05 100644
--- a/src/tokeniser/tokeniser.c
+++ b/src/tokeniser/tokeniser.c
@@ -43,16 +43,26 @@ static const hubbub_string u_fffd_str = { u_fffd, sizeof(u_fffd) };
static const uint8_t lf = '\n';
static const hubbub_string lf_str = { &lf, 1 };

-
/**
* Tokeniser states
*/
typedef enum hubbub_tokeniser_state {
STATE_DATA,
STATE_CHARACTER_REFERENCE_DATA,
+ STATE_RCDATA,
+ STATE_CHARACTER_REFERENCE_RCDATA,
+ STATE_RAWTEXT,
+ STATE_SCRIPT_DATA,
+ STATE_PLAINTEXT,
STATE_TAG_OPEN,
STATE_CLOSE_TAG_OPEN,
STATE_TAG_NAME,
+ STATE_RCDATA_LESSTHAN,
+ STATE_RCDATA_CLOSE_TAG_OPEN,
+ STATE_RCDATA_CLOSE_TAG_NAME,
+ STATE_RAWTEXT_LESSTHAN,
+ STATE_RAWTEXT_CLOSE_TAG_OPEN,
+ STATE_RAWTEXT_CLOSE_TAG_NAME,
STATE_BEFORE_ATTRIBUTE_NAME,
STATE_ATTRIBUTE_NAME,
STATE_AFTER_ATTRIBUTE_NAME,
@@ -166,8 +176,6 @@ typedef struct hubbub_tokeniser_context {
*/
struct hubbub_tokeniser {
hubbub_tokeniser_state state; /**< Current tokeniser state */
- hubbub_content_model content_model; /**< Current content
- * model flag */
bool escape_flag; /**< Escape flag **/
bool process_cdata_section; /**< Whether to process CDATA sections*/
bool paused; /**< flag for if parsing is currently paused */
@@ -188,12 +196,34 @@ struct hubbub_tokeniser {
static hubbub_error hubbub_tokeniser_handle_data(hubbub_tokeniser *tokeniser);
static hubbub_error hubbub_tokeniser_handle_character_reference_data(
hubbub_tokeniser *tokeniser);
+static hubbub_error hubbub_tokeniser_handle_rcdata(
+ hubbub_tokeniser *tokeniser);
+static hubbub_error hubbub_tokeniser_handle_character_reference_rcdata(
+ hubbub_tokeniser *tokeniser);
+static hubbub_error hubbub_tokeniser_handle_rawtext(
+ hubbub_tokeniser *tokeniser);
+static hubbub_error hubbub_tokeniser_handle_script_data(
+ hubbub_tokeniser *tokeniser);
+static hubbub_error hubbub_tokeniser_handle_plaintext(
+ hubbub_tokeniser *tokeniser);
static hubbub_error hubbub_tokeniser_handle_tag_open(
hubbub_tokeniser *tokeniser);
static hubbub_error hubbub_tokeniser_handle_close_tag_open(
hubbub_tokeniser *tokeniser);
static hubbub_error hubbub_tokeniser_handle_tag_name(
hubbub_tokeniser *tokeniser);
+static hubbub_error hubbub_tokeniser_handle_rcdata_lessthan(
+ hubbub_tokeniser *tokeniser);
+static hubbub_error hubbub_tokeniser_handle_rcdata_close_tag_open(
+ hubbub_tokeniser *tokeniser);
+static hubbub_error hubbub_tokeniser_handle_rcdata_close_tag_name(
+ hubbub_tokeniser *tokeniser);
+static hubbub_error hubbub_tokeniser_handle_rawtext_lessthan(
+ hubbub_tokeniser *tokeniser);
+static hubbub_error hubbub_tokeniser_handle_rawtext_close_tag_open(
+ hubbub_tokeniser *tokeniser);
+static hubbub_error hubbub_tokeniser_handle_rawtext_close_tag_name(
+ hubbub_tokeniser *tokeniser);
static hubbub_error hubbub_tokeniser_handle_before_attribute_name(
hubbub_tokeniser *tokeniser);
static hubbub_error hubbub_tokeniser_handle_attribute_name(
@@ -313,7 +343,6 @@ hubbub_error hubbub_tokeniser_create(parserutils_inputstream *input,
}

tok->state = STATE_DATA;
- tok->content_model = HUBBUB_CONTENT_MODEL_PCDATA;

tok->escape_flag = false;
tok->process_cdata_section = false;
@@ -385,8 +414,18 @@ hubbub_error hubbub_tokeniser_setopt(hubbub_tokeniser *tokeniser,
tokeniser->error_handler = params->error_handler.handler;
tokeniser->error_pw = params->error_handler.pw;
break;
- case HUBBUB_TOKENISER_CONTENT_MODEL:
- tokeniser->content_model = params->content_model.model;
+ case HUBBUB_TOKENISER_INITIAL_STATE:
+ if (params->initial_state.state == HUBBUB_INITIAL_STATE_DATA) {
+ tokeniser->state = STATE_DATA;
+ } else if (params->initial_state.state == HUBBUB_INITIAL_STATE_RCDATA) {
+ tokeniser->state = STATE_RCDATA;
+ } else if (params->initial_state.state == HUBBUB_INITIAL_STATE_CDATA) {
+ tokeniser->state = STATE_CDATA_BLOCK;
+ } else if (params->initial_state.state == HUBBUB_INITIAL_STATE_PLAINTEXT) {
+ tokeniser->state = STATE_PLAINTEXT;
+ } else if (params->initial_state.state == HUBBUB_INITIAL_STATE_RAWTEXT) {
+ tokeniser->state = STATE_RAWTEXT;
+ }
break;
case HUBBUB_TOKENISER_PROCESS_CDATA:
tokeniser->process_cdata_section = params->process_cdata;
@@ -465,6 +504,26 @@ hubbub_error hubbub_tokeniser_run(hubbub_tokeniser *tokeniser)
cont = hubbub_tokeniser_handle_character_reference_data(
tokeniser);
break;
+ state(STATE_RCDATA)
+ cont = hubbub_tokeniser_handle_rcdata(
+ tokeniser);
+ break;
+ state(STATE_CHARACTER_REFERENCE_RCDATA)
+ cont = hubbub_tokeniser_handle_character_reference_rcdata(
+ tokeniser);
+ break;
+ state(STATE_RAWTEXT)
+ cont = hubbub_tokeniser_handle_rawtext(
+ tokeniser);
+ break;
+ state(STATE_SCRIPT_DATA)
+ cont = hubbub_tokeniser_handle_script_data(
+ tokeniser);
+ break;
+ state(STATE_PLAINTEXT)
+ cont = hubbub_tokeniser_handle_plaintext(
+ tokeniser);
+ break;
state(STATE_TAG_OPEN)
cont = hubbub_tokeniser_handle_tag_open(tokeniser);
break;
@@ -475,6 +534,30 @@ hubbub_error hubbub_tokeniser_run(hubbub_tokeniser *tokeniser)
state(STATE_TAG_NAME)
cont = hubbub_tokeniser_handle_tag_name(tokeniser);
break;
+ state(STATE_RCDATA_LESSTHAN)
+ cont = hubbub_tokeniser_handle_rcdata_lessthan(
+ tokeniser);
+ break;
+ state(STATE_RCDATA_CLOSE_TAG_OPEN)
+ cont = hubbub_tokeniser_handle_rcdata_close_tag_open(
+ tokeniser);
+ break;
+ state(STATE_RCDATA_CLOSE_TAG_NAME)
+ cont = hubbub_tokeniser_handle_rcdata_close_tag_name(
+ tokeniser);
+ break;
+ state(STATE_RAWTEXT_LESSTHAN)
+ cont = hubbub_tokeniser_handle_rawtext_lessthan(
+ tokeniser);
+ break;
+ state(STATE_RAWTEXT_CLOSE_TAG_OPEN)
+ cont = hubbub_tokeniser_handle_rawtext_close_tag_open(
+ tokeniser);
+ break;
+ state(STATE_RAWTEXT_CLOSE_TAG_NAME)
+ cont = hubbub_tokeniser_handle_rawtext_close_tag_name(
+ tokeniser);
+ break;
state(STATE_BEFORE_ATTRIBUTE_NAME)
cont = hubbub_tokeniser_handle_before_attribute_name(
tokeniser);
@@ -703,103 +786,899 @@ hubbub_error hubbub_tokeniser_handle_data(hubbub_tokeniser *tokeniser)
break;
}

- if (tokeniser->context.pending > 0) {
- /* Emit any pending characters */
- emit_current_chars(tokeniser);
- }
+ if (tokeniser->context.pending > 0) {
+ /* Emit any pending characters */
+ emit_current_chars(tokeniser);
+ }
+
+ if (error == PARSERUTILS_EOF || *cptr != '\n') {
+ /* Emit newline */
+ emit_character_token(tokeniser, &lf_str);
+ }
+
+ /* Advance over */
+ parserutils_inputstream_advance(tokeniser->input, 1);
+ } else {
+ if (c == '\0') {
+ /** \todo parse error */
+ }
+
+ /* Just collect into buffer */
+ tokeniser->context.pending += len;
+ }
+ }
+ if (tokeniser->state != STATE_TAG_OPEN &&
+ (tokeniser->state != STATE_DATA || error == PARSERUTILS_EOF) &&
+ tokeniser->context.pending > 0) {
+ /* Emit any pending characters */
+ emit_current_chars(tokeniser);
+ }
+
+ if (error == PARSERUTILS_EOF) {
+ token.type = HUBBUB_TOKEN_EOF;
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+ }
+
+ if (error == PARSERUTILS_EOF) {
+ return HUBBUB_NEEDDATA;
+ } else {
+ return hubbub_error_from_parserutils_error(error);
+ }
+}
+
+
+
+/* emit any pending tokens before calling */
+hubbub_error hubbub_tokeniser_handle_character_reference_data(
+ hubbub_tokeniser *tokeniser)
+{
+ assert(tokeniser->context.pending == 0);
+
+ if (tokeniser->context.match_entity.complete == false) {
+ return hubbub_tokeniser_consume_character_reference(tokeniser,
+ tokeniser->context.pending);
+ } else {
+ hubbub_token token;
+
+ uint8_t utf8[6];
+ uint8_t *utf8ptr = utf8;
+ size_t len = sizeof(utf8);
+
+ token.type = HUBBUB_TOKEN_CHARACTER;
+
+ if (tokeniser->context.match_entity.codepoint) {
+ parserutils_charset_utf8_from_ucs4(
+ tokeniser->context.match_entity.codepoint,
+ &utf8ptr, &len);
+
+ token.data.character.ptr = utf8;
+ token.data.character.len = sizeof(utf8) - len;
+
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+
+ /* +1 for ampersand */
+ parserutils_inputstream_advance(tokeniser->input,
+ tokeniser->context.match_entity.length
+ + 1);
+ } else {
+ parserutils_error error;
+ const uint8_t *cptr = NULL;
+
+ error = parserutils_inputstream_peek(
+ tokeniser->input,
+ tokeniser->context.pending,
+ &cptr,
+ &len);
+ if (error != PARSERUTILS_OK) {
+ return hubbub_error_from_parserutils_error(
+ error);
+ }
+
+ token.data.character.ptr = cptr;
+ token.data.character.len = len;
+
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+ parserutils_inputstream_advance(tokeniser->input, len);
+ }
+
+ /* Reset for next time */
+ tokeniser->context.match_entity.complete = false;
+
+ tokeniser->state = STATE_DATA;
+ }
+
+ return HUBBUB_OK;
+}
+
+hubbub_error hubbub_tokeniser_handle_rcdata(hubbub_tokeniser *tokeniser)
+{
+ parserutils_error error;
+ hubbub_token token;
+ const uint8_t *cptr;
+ size_t len;
+
+ while ((error = parserutils_inputstream_peek(tokeniser->input,
+ tokeniser->context.pending, &cptr, &len)) ==
+ PARSERUTILS_OK) {
+ const uint8_t c = *cptr;
+
+ if (c == '&') {
+ tokeniser->state = STATE_CHARACTER_REFERENCE_RCDATA;
+ /* Don't eat the '&'; it'll be handled by entity
+ * consumption */
+ break;
+ } else if (c == '<') {
+ if (tokeniser->context.pending > 0) {
+ /* Emit any pending characters */
+ emit_current_chars(tokeniser);
+ }
+
+ /* Buffer '<' */
+ tokeniser->context.pending = len;
+ tokeniser->state = STATE_RCDATA_LESSTHAN;
+ break;
+ } else if (c == '\r') {
+ error = parserutils_inputstream_peek(
+ tokeniser->input,
+ tokeniser->context.pending + len,
+ &cptr,
+ &len);
+
+ if (error != PARSERUTILS_OK &&
+ error != PARSERUTILS_EOF) {
+ break;
+ }
+
+ if (tokeniser->context.pending > 0) {
+ /* Emit any pending characters */
+ emit_current_chars(tokeniser);
+ }
+
+ if (error == PARSERUTILS_EOF || *cptr != '\n') {
+ /* Emit newline */
+ emit_character_token(tokeniser, &lf_str);
+ }
+
+ /* Advance over */
+ parserutils_inputstream_advance(tokeniser->input, 1);
+ } else {
+ if (c == '\0') {
+ /** \todo parse error */
+ error = parserutils_buffer_append(tokeniser->buffer,
+ u_fffd, sizeof(u_fffd));
+ if (error != PARSERUTILS_OK)
+ return hubbub_error_from_parserutils_error(error);
+ }
+
+ /* Just collect into buffer */
+ tokeniser->context.pending += len;
+ }
+ }
+ if (tokeniser->state != STATE_RCDATA_LESSTHAN &&
+ (tokeniser->state != STATE_RCDATA || error == PARSERUTILS_EOF) &&
+ tokeniser->context.pending > 0) {
+ /* Emit any pending characters */
+ emit_current_chars(tokeniser);
+ }
+
+ if (error == PARSERUTILS_EOF) {
+ token.type = HUBBUB_TOKEN_EOF;
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+ }
+
+ if (error == PARSERUTILS_EOF) {
+ return HUBBUB_NEEDDATA;
+ } else {
+ return hubbub_error_from_parserutils_error(error);
+ }
+}
+
+
+
+/* emit any pending tokens before calling */
+hubbub_error hubbub_tokeniser_handle_character_reference_rcdata(
+ hubbub_tokeniser *tokeniser)
+{
+ assert(tokeniser->context.pending == 0);
+
+ if (tokeniser->context.match_entity.complete == false) {
+ return hubbub_tokeniser_consume_character_reference(tokeniser,
+ tokeniser->context.pending);
+ } else {
+ hubbub_token token;
+
+ uint8_t utf8[6];
+ uint8_t *utf8ptr = utf8;
+ size_t len = sizeof(utf8);
+
+ token.type = HUBBUB_TOKEN_CHARACTER;
+
+ if (tokeniser->context.match_entity.codepoint) {
+ parserutils_charset_utf8_from_ucs4(
+ tokeniser->context.match_entity.codepoint,
+ &utf8ptr, &len);
+
+ token.data.character.ptr = utf8;
+ token.data.character.len = sizeof(utf8) - len;
+
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+
+ /* +1 for ampersand */
+ parserutils_inputstream_advance(tokeniser->input,
+ tokeniser->context.match_entity.length
+ + 1);
+ } else {
+ parserutils_error error;
+ const uint8_t *cptr = NULL;
+
+ error = parserutils_inputstream_peek(
+ tokeniser->input,
+ tokeniser->context.pending,
+ &cptr,
+ &len);
+ if (error != PARSERUTILS_OK) {
+ return hubbub_error_from_parserutils_error(
+ error);
+ }
+
+ token.data.character.ptr = cptr;
+ token.data.character.len = len;
+
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+ parserutils_inputstream_advance(tokeniser->input, len);
+ }
+
+ /* Reset for next time */
+ tokeniser->context.match_entity.complete = false;
+
+ tokeniser->state = STATE_RCDATA;
+ }
+
+ return HUBBUB_OK;
+}
+
+hubbub_error hubbub_tokeniser_handle_rawtext(hubbub_tokeniser *tokeniser)
+{
+ parserutils_error error;
+ hubbub_token token;
+ const uint8_t *cptr;
+ size_t len;
+
+ while ((error = parserutils_inputstream_peek(tokeniser->input,
+ tokeniser->context.pending, &cptr, &len)) ==
+ PARSERUTILS_OK) {
+ const uint8_t c = *cptr;
+
+ if (c == '<') {
+ if (tokeniser->context.pending > 0) {
+ /* Emit any pending characters */
+ emit_current_chars(tokeniser);
+ }
+
+ /* Buffer '<' */
+ tokeniser->context.pending = len;
+ tokeniser->state = STATE_RAWTEXT_LESSTHAN;
+ break;
+ } else if (c == '\r') {
+ error = parserutils_inputstream_peek(
+ tokeniser->input,
+ tokeniser->context.pending + len,
+ &cptr,
+ &len);
+
+ if (error != PARSERUTILS_OK &&
+ error != PARSERUTILS_EOF) {
+ break;
+ }
+
+ if (tokeniser->context.pending > 0) {
+ /* Emit any pending characters */
+ emit_current_chars(tokeniser);
+ }
+
+ if (error == PARSERUTILS_EOF || *cptr != '\n') {
+ /* Emit newline */
+ emit_character_token(tokeniser, &lf_str);
+ }
+
+ /* Advance over */
+ parserutils_inputstream_advance(tokeniser->input, 1);
+ } else {
+ if (c == '\0') {
+ /** \todo parse error */
+ error = parserutils_buffer_append(tokeniser->buffer,
+ u_fffd, sizeof(u_fffd));
+ if (error != PARSERUTILS_OK)
+ return hubbub_error_from_parserutils_error(error);
+ }
+
+ /* Just collect into buffer */
+ tokeniser->context.pending += len;
+ }
+ }
+ if (tokeniser->state != STATE_RAWTEXT_LESSTHAN &&
+ (tokeniser->state != STATE_RAWTEXT || error == PARSERUTILS_EOF) &&
+ tokeniser->context.pending > 0) {
+ /* Emit any pending characters */
+ emit_current_chars(tokeniser);
+ }
+
+ if (error == PARSERUTILS_EOF) {
+ token.type = HUBBUB_TOKEN_EOF;
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+ }
+
+ if (error == PARSERUTILS_EOF) {
+ return HUBBUB_NEEDDATA;
+ } else {
+ return hubbub_error_from_parserutils_error(error);
+ }
+}
+
+hubbub_error hubbub_tokeniser_handle_script_data(hubbub_tokeniser *tokeniser)
+{
+ parserutils_error error;
+ hubbub_token token;
+ const uint8_t *cptr;
+ size_t len;
+
+ while ((error = parserutils_inputstream_peek(tokeniser->input,
+ tokeniser->context.pending, &cptr, &len)) ==
+ PARSERUTILS_OK) {
+ const uint8_t c = *cptr;
+
+ if (c == '<') {
+ if (tokeniser->context.pending > 0) {
+ /* Emit any pending characters */
+ emit_current_chars(tokeniser);
+ }
+
+ /* Buffer '<' */
+ tokeniser->context.pending = len;
+ ////////tokeniser->state = STATE_SCRIPT_DATA_LESSTHAN;
+ break;
+ } else if (c == '\r') {
+ error = parserutils_inputstream_peek(
+ tokeniser->input,
+ tokeniser->context.pending + len,
+ &cptr,
+ &len);
+
+ if (error != PARSERUTILS_OK &&
+ error != PARSERUTILS_EOF) {
+ break;
+ }
+
+ if (tokeniser->context.pending > 0) {
+ /* Emit any pending characters */
+ emit_current_chars(tokeniser);
+ }
+
+ if (error == PARSERUTILS_EOF || *cptr != '\n') {
+ /* Emit newline */
+ emit_character_token(tokeniser, &lf_str);
+ }
+
+ /* Advance over */
+ parserutils_inputstream_advance(tokeniser->input, 1);
+ } else {
+ if (c == '\0') {
+ /** \todo parse error */
+ error = parserutils_buffer_append(tokeniser->buffer,
+ u_fffd, sizeof(u_fffd));
+ if (error != PARSERUTILS_OK)
+ return hubbub_error_from_parserutils_error(error);
+ }
+
+ /* Just collect into buffer */
+ tokeniser->context.pending += len;
+ }
+ }
+ if (tokeniser->state != STATE_SCRIPT_DATA &&
+ (/*tokeniser->state != STATE_SCRIPT_DATA_LESSTHAN || */error == PARSERUTILS_EOF) &&
+ tokeniser->context.pending > 0) {
+ /* Emit any pending characters */
+ emit_current_chars(tokeniser);
+ }
+
+ if (error == PARSERUTILS_EOF) {
+ token.type = HUBBUB_TOKEN_EOF;
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+ }
+
+ if (error == PARSERUTILS_EOF) {
+ return HUBBUB_NEEDDATA;
+ } else {
+ return hubbub_error_from_parserutils_error(error);
+ }
+}
+
+hubbub_error hubbub_tokeniser_handle_plaintext(hubbub_tokeniser *tokeniser)
+{
+ parserutils_error error;
+ hubbub_token token;
+ const uint8_t *cptr;
+ size_t len;
+
+ while ((error = parserutils_inputstream_peek(tokeniser->input,
+ tokeniser->context.pending, &cptr, &len)) ==
+ PARSERUTILS_OK) {
+ const uint8_t c = *cptr;
+
+ if (c == '\r') {
+ error = parserutils_inputstream_peek(
+ tokeniser->input,
+ tokeniser->context.pending + len,
+ &cptr,
+ &len);
+
+ if (error != PARSERUTILS_OK &&
+ error != PARSERUTILS_EOF) {
+ break;
+ }
+
+ if (tokeniser->context.pending > 0) {
+ /* Emit any pending characters */
+ emit_current_chars(tokeniser);
+ }
+
+ if (error == PARSERUTILS_EOF || *cptr != '\n') {
+ /* Emit newline */
+ emit_character_token(tokeniser, &lf_str);
+ }
+
+ /* Advance over */
+ parserutils_inputstream_advance(tokeniser->input, 1);
+ } else {
+ if (c == '\0') {
+ /** \todo parse error */
+ error = parserutils_buffer_append(tokeniser->buffer,
+ u_fffd, sizeof(u_fffd));
+ if (error != PARSERUTILS_OK)
+ return hubbub_error_from_parserutils_error(error);
+ }
+
+ /* Just collect into buffer */
+ tokeniser->context.pending += len;
+ }
+ }
+ if (tokeniser->context.pending > 0) {
+ /* Emit any pending characters */
+ emit_current_chars(tokeniser);
+ }
+
+ if (error == PARSERUTILS_EOF) {
+ token.type = HUBBUB_TOKEN_EOF;
+ hubbub_tokeniser_emit_token(tokeniser, &token);
+ }
+
+ if (error == PARSERUTILS_EOF) {
+ return HUBBUB_NEEDDATA;
+ } else {
+ return hubbub_error_from_parserutils_error(error);
+ }
+}
+
+/* this state always switches to another state straight away */
+/* this state expects the current character to be '<' */
+hubbub_error hubbub_tokeniser_handle_tag_open(hubbub_tokeniser *tokeniser)
+{
+ hubbub_tag *ctag = &tokeniser->context.current_tag;
+
+ size_t len;
+ const uint8_t *cptr;
+ parserutils_error error;
+ uint8_t c;
+
+ assert(tokeniser->context.pending == 1);
+/* assert(tokeniser->context.chars.ptr[0] == '<'); */
+
+ error = parserutils_inputstream_peek(tokeniser->input,
+ tokeniser->context.pending, &cptr, &len);
+
+ if (error != PARSERUTILS_OK) {
+ if (error == PARSERUTILS_EOF) {
+ /** \todo parse error */
+ /* Emit single '<' char */
+ emit_current_chars(tokeniser);
+ tokeniser->state = STATE_DATA;
+ return HUBBUB_OK;
+ } else {
+ return hubbub_error_from_parserutils_error(error);
+ }
+ }
+
+ c = *cptr;
+
+ if (c == '!') {
+ parserutils_inputstream_advance(tokeniser->input, SLEN("<!"));
+
+ tokeniser->context.pending = 0;
+ tokeniser->state = STATE_MARKUP_DECLARATION_OPEN;
+ } else if (c == '/'){
+ tokeniser->context.pending += len;
+
+ tokeniser->context.close_tag_match.match = false;
+ tokeniser->context.close_tag_match.count = 0;
+
+ tokeniser->state = STATE_CLOSE_TAG_OPEN;
+ } else if ('A' <= c && c <= 'Z') {
+ uint8_t lc = (c + 0x20);
+
+ START_BUF(ctag->name, &lc, len);
+ ctag->n_attributes = 0;
+ tokeniser->context.current_tag_type =
+ HUBBUB_TOKEN_START_TAG;
+
+ tokeniser->context.pending += len;
+
+ tokeniser->state = STATE_TAG_NAME;
+ } else if ('a' <= c && c <= 'z') {
+ START_BUF(ctag->name, cptr, len);
+ ctag->n_attributes = 0;
+ tokeniser->context.current_tag_type =
+ HUBBUB_TOKEN_START_TAG;
+
+ tokeniser->context.pending += len;
+
+ tokeniser->state = STATE_TAG_NAME;
+ } else if (c == '?'){
+ /** \todo parse error */
+ /* Cursor still at "<", need to advance past it */
+ parserutils_inputstream_advance(
+ tokeniser->input, SLEN("<"));
+ tokeniser->context.pending = 0;
+
+ tokeniser->state = STATE_BOGUS_COMMENT;
+ } else {
+ /** \todo parse error */
+ /* Emit single '<' char */
+ emit_current_chars(tokeniser);
+ tokeniser->state = STATE_DATA;
+ }
+
+
+ return HUBBUB_OK;
+}
+
+/* this state expects tokeniser->context.chars to be "</" */
+/* this state never stays in this state for more than one character */
+hubbub_error hubbub_tokeniser_handle_close_tag_open(hubbub_tokeniser *tokeniser)
+{
+ size_t len;
+ const uint8_t *cptr;
+ parserutils_error error;
+ uint8_t c;
+
+ assert(tokeniser->context.pending == 2);
+/* assert(tokeniser->context.chars.ptr[0] == '<'); */
+/* assert(tokeniser->context.chars.ptr[1] == '/'); */
+
+ error = parserutils_inputstream_peek(tokeniser->input,
+ tokeniser->context.pending, &cptr, &len);
+
+ if (error != PARSERUTILS_OK) {
+ if (error == PARSERUTILS_EOF) {
+ /** \todo parse error */
+ /* Emit '</' chars */
+ emit_current_chars(tokeniser);
+ tokeniser->state = STATE_DATA;
+ return HUBBUB_OK;
+ } else {
+ return hubbub_error_from_parserutils_error(error);
+ }
+ }
+
+ c = *cptr;
+
+ if ('A' <= c && c <= 'Z') {
+ uint8_t lc = (c + 0x20);
+ START_BUF(tokeniser->context.current_tag.name,
+ &lc, len);
+ tokeniser->context.current_tag.n_attributes = 0;
+
+ tokeniser->context.current_tag_type = HUBBUB_TOKEN_END_TAG;
+
+ tokeniser->context.pending += len;
+
+ tokeniser->state = STATE_TAG_NAME;
+ } else if ('a' <= c && c <= 'z') {
+ START_BUF(tokeniser->context.current_tag.name,
+ cptr, len);
+ tokeniser->context.current_tag.n_attributes = 0;
+
+ tokeniser->context.current_tag_type = HUBBUB_TOKEN_END_TAG;
+
+ tokeniser->context.pending += len;
+
+ tokeniser->state = STATE_TAG_NAME;
+ } else if (c == '>') {
+ /** \todo parse error */
+
+ /* Cursor still at "</", need to collect ">" */
+ tokeniser->context.pending += len;
+
+ /* Now need to advance past "</>" */
+ parserutils_inputstream_advance(tokeniser->input,
+ tokeniser->context.pending);
+ tokeniser->context.pending = 0;
+
+ tokeniser->state = STATE_DATA;
+ } else {
+ /** \todo parse error */
+
+ /* Cursor still at "</", need to advance past it */
+ parserutils_inputstream_advance(tokeniser->input,
+ tokeniser->context.pending);
+ tokeniser->context.pending = 0;
+
+ tokeniser->state = STATE_BOGUS_COMMENT;
+ }
+
+ return HUBBUB_OK;
+}
+
+/* this state expects tokeniser->context.current_tag to already have its
+ first character set */
+hubbub_error hubbub_tokeniser_handle_tag_name(hubbub_tokeniser *tokeniser)
+{
+ hubbub_tag *ctag = &tokeniser->context.current_tag;
+
+ size_t len;
+ const uint8_t *cptr;
+ parserutils_error error;
+ uint8_t c;
+
+ assert(tokeniser->context.pending > 0);
+/* assert(tokeniser->context.chars.ptr[0] == '<'); */
+ assert(ctag->name.len > 0);
+/* assert(ctag->name.ptr); */
+
+ error = parserutils_inputstream_peek(tokeniser->input,
+ tokeniser->context.pending, &cptr, &len);
+
+ if (error != PARSERUTILS_OK) {
+ if (error == PARSERUTILS_EOF) {
+ /** \todo parse error */
+ tokeniser->state = STATE_DATA;
+
+ // skips all pending charachters
+ parserutils_inputstream_advance(
+ tokeniser->input, tokeniser->context.pending);
+ return HUBBUB_OK;
+ } else {
+ return hubbub_error_from_parserutils_error(error);
+ }
+ }
+
+ c = *cptr;
+
+ if (c == '\t' || c == '\n' || c == '\f' || c == ' ' || c == '\r') {
+ tokeniser->context.pending += len;
+ tokeniser->state = STATE_BEFORE_ATTRIBUTE_NAME;
+ } else if (c == '/') {
+ tokeniser->context.pending += len;
+ tokeniser->state = STATE_SELF_CLOSING_START_TAG;
+ } else if (c == '>') {
+ tokeniser->context.pending += len;
+ tokeniser->state = STATE_DATA;
+ return emit_current_tag(tokeniser);
+ } else if ('A' <= c && c <= 'Z') {
+ uint8_t lc = (c + 0x20);
+ COLLECT(ctag->name, &lc, len);
+ tokeniser->context.pending += len;
+ } else if (c == '\0') {
+ COLLECT(ctag->name, u_fffd, sizeof(u_fffd));
+ tokeniser->context.pending += len;
+ } else {
+ COLLECT(ctag->name, cptr, len);
+ tokeniser->context.pending += len;
+ }
+
+ return HUBBUB_OK;
+}
+
+/* this state always switches to another state straight away */
+/* this state expects the current character to be '<' */
+hubbub_error hubbub_tokeniser_handle_rcdata_lessthan(hubbub_tokeniser *tokeniser)
+{
+ //hubbub_tag *ctag = &tokeniser->context.current_tag;
+
+ size_t len;
+ const uint8_t *cptr;
+ parserutils_error error;
+ uint8_t c;
+
+ assert(tokeniser->context.pending == 1);
+/* assert(tokeniser->context.chars.ptr[0] == '<'); */
+
+ error = parserutils_inputstream_peek(tokeniser->input,
+ tokeniser->context.pending, &cptr, &len);
+
+ if (error != PARSERUTILS_OK) {
+ if (error == PARSERUTILS_EOF) {
+ /** \todo parse error */
+ /* Emit single '<' char */
+ emit_current_chars(tokeniser);
+ tokeniser->state = STATE_RCDATA;
+ return HUBBUB_OK;
+ } else {
+ return hubbub_error_from_parserutils_error(error);
+ }
+ }
+
+ c = *cptr;
+
+ if (c == '/'){
+ tokeniser->context.pending += len;
+
+ tokeniser->context.close_tag_match.match = false;
+ tokeniser->context.close_tag_match.count = 0;
+
+ tokeniser->state = STATE_RCDATA_CLOSE_TAG_OPEN;
+ } else {
+ /* Emit single '<' char */
+ emit_current_chars(tokeniser);
+ tokeniser->state = STATE_RCDATA;
+ }
+
+
+ return HUBBUB_OK;
+}
+
+/* this state expects tokeniser->context.chars to be "</" */
+/* this state never stays in this state for more than one character */
+hubbub_error hubbub_tokeniser_handle_rcdata_close_tag_open(hubbub_tokeniser *tokeniser)
+{
+ hubbub_tokeniser_context *ctx = &tokeniser->context;
+
+ size_t len;
+ const uint8_t *cptr;
+ parserutils_error error;
+ uint8_t c;
+
+ assert(tokeniser->context.pending == 2);
+/* assert(tokeniser->context.chars.ptr[0] == '<'); */
+/* assert(tokeniser->context.chars.ptr[1] == '/'); */
+
+ uint8_t *start_tag_name =
+ tokeniser->context.last_start_tag_name;
+ size_t start_tag_len =
+ tokeniser->context.last_start_tag_len;

- if (error == PARSERUTILS_EOF || *cptr != '\n') {
- /* Emit newline */
- emit_character_token(tokeniser, &lf_str);
- }
+ while ((error = parserutils_inputstream_peek(tokeniser->input,
+ ctx->pending +
+ ctx->close_tag_match.count,
+ &cptr,
+ &len)) == PARSERUTILS_OK) {
+ c = *cptr;

- /* Advance over */
- parserutils_inputstream_advance(tokeniser->input, 1);
- } else {
- if (c == '\0') {
- /** \todo parse error */
- }
+ if ((start_tag_name[ctx->close_tag_match.count] & ~0x20)
+ != (c & ~0x20)) {
+ break;
+ }

- /* Just collect into buffer */
- tokeniser->context.pending += len;
+ ctx->close_tag_match.count += len;
+
+ if (ctx->close_tag_match.count == start_tag_len) {
+
+ // Sets the flag to be used in name state.
+ ctx->close_tag_match.match = true;
+ break;
}
}
- if (tokeniser->state != STATE_TAG_OPEN &&
- (tokeniser->state != STATE_DATA || error == PARSERUTILS_EOF) &&
- tokeniser->context.pending > 0) {
- /* Emit any pending characters */
- emit_current_chars(tokeniser);
- }

- if (error == PARSERUTILS_EOF) {
- token.type = HUBBUB_TOKEN_EOF;
- hubbub_tokeniser_emit_token(tokeniser, &token);
+ if (error != PARSERUTILS_OK) {
+ if (error == PARSERUTILS_EOF) {
+ tokeniser->state = STATE_RCDATA;
+ tokeniser->context.pending += ctx->close_tag_match.count;
+ return HUBBUB_OK;
+ } else {
+ return hubbub_error_from_parserutils_error(error);
+ }
}

- if (error == PARSERUTILS_EOF) {
- return HUBBUB_NEEDDATA;
- } else {
- return hubbub_error_from_parserutils_error(error);
+ if (ctx->close_tag_match.match == true) {
+ error = parserutils_inputstream_peek(
+ tokeniser->input,
+ ctx->pending +
+ ctx->close_tag_match.count,
+ &cptr,
+ &len);
+
+ if (error != PARSERUTILS_OK &&
+ error != PARSERUTILS_EOF) {
+ return hubbub_error_from_parserutils_error(
+ error);
+ } else if (error != PARSERUTILS_EOF) {
+ c = *cptr;
+
+ if (c != '\t' && c != '\n' && c != '\f' && c != '\r' &&
+ c != ' ' && c != '>' &&
+ c != '/') {
+ ctx->close_tag_match.match = false;
+ }
+ }
}
-}

-/* emit any pending tokens before calling */
-hubbub_error hubbub_tokeniser_handle_character_reference_data(
- hubbub_tokeniser *tokeniser)
-{
- assert(tokeniser->context.pending == 0);
+ if (ctx->close_tag_match.match == true) {

- if (tokeniser->context.match_entity.complete == false) {
- return hubbub_tokeniser_consume_character_reference(tokeniser,
- tokeniser->context.pending);
- } else {
- hubbub_token token;
+ tokeniser->state = STATE_RCDATA_CLOSE_TAG_NAME;

- uint8_t utf8[6];
- uint8_t *utf8ptr = utf8;
- size_t len = sizeof(utf8);
+ // Creates a new buffer and sets first charachter of the tag name
+ START_BUF(ctx->current_tag.name,
+ &start_tag_name[0], len);
+ tokeniser->context.current_tag.n_attributes = 0;

- token.type = HUBBUB_TOKEN_CHARACTER;
+ tokeniser->context.current_tag_type =
+ HUBBUB_TOKEN_END_TAG;

- if (tokeniser->context.match_entity.codepoint) {
- parserutils_charset_utf8_from_ucs4(
- tokeniser->context.match_entity.codepoint,
- &utf8ptr, &len);
+ tokeniser->context.pending += len;

- token.data.character.ptr = utf8;
- token.data.character.len = sizeof(utf8) - len;
+ tokeniser->state = STATE_RCDATA_CLOSE_TAG_NAME;
+ } else {
+ emit_current_chars(tokeniser);
+ tokeniser->state = STATE_RCDATA;
+ }

- hubbub_tokeniser_emit_token(tokeniser, &token);
+ return HUBBUB_OK;
+}

- /* +1 for ampersand */
- parserutils_inputstream_advance(tokeniser->input,
- tokeniser->context.match_entity.length
- + 1);
- } else {
- parserutils_error error;
- const uint8_t *cptr = NULL;
+/* this state expects tokeniser->context.current_tag to already have its
+ first character set */
+hubbub_error hubbub_tokeniser_handle_rcdata_close_tag_name(hubbub_tokeniser *tokeniser)
+{
+ hubbub_tokeniser_context *ctx = &tokeniser->context;

- error = parserutils_inputstream_peek(
- tokeniser->input,
- tokeniser->context.pending,
- &cptr,
- &len);
- if (error != PARSERUTILS_OK) {
- return hubbub_error_from_parserutils_error(
- error);
- }
+ size_t len;
+ const uint8_t *cptr;
+ parserutils_error error;
+ uint8_t c;

- token.data.character.ptr = cptr;
- token.data.character.len = len;
+ assert(tokeniser->context.pending > 0);
+/* assert(tokeniser->context.chars.ptr[0] == '<'); */
+ assert(ctx.current_tag->name.len > 0);
+/* assert(ctx.current_tag->name.ptr); */

- hubbub_tokeniser_emit_token(tokeniser, &token);
- parserutils_inputstream_advance(tokeniser->input, len);
+ error = parserutils_inputstream_peek(tokeniser->input,
+ tokeniser->context.pending, &cptr, &len);
+
+ if (error != PARSERUTILS_OK) {
+ if (error == PARSERUTILS_EOF) {
+ /** \todo parse error */
+ tokeniser->state = STATE_RCDATA;
+ return HUBBUB_OK;
+ } else {
+ return hubbub_error_from_parserutils_error(error);
}
+ }

- /* Reset for next time */
- tokeniser->context.match_entity.complete = false;
+ c = *cptr;

+ if ((c == '\t' || c == '\n' || c == '\f' || c == ' ' || c == '\r')
+ && ctx->close_tag_match.match == true) {
+ // Add condition for approproiate end tag token
+ tokeniser->context.pending += len;
+ tokeniser->state = STATE_BEFORE_ATTRIBUTE_NAME;
+ } else if (c == '/' && ctx->close_tag_match.match == true) {
+ // Add condition for approproiate end tag token
+ tokeniser->context.pending += len;
+ tokeniser->state = STATE_SELF_CLOSING_START_TAG;
+ } else if (c == '>' && ctx->close_tag_match.match == true) {
+ // Add condition for approproiate end tag token
+ tokeniser->context.pending += len;
tokeniser->state = STATE_DATA;
+ return emit_current_tag(tokeniser);
+ } else if ('A' <= c && c <= 'Z') {
+ uint8_t lc = (c + 0x20);
+ COLLECT(ctx->current_tag.name, &lc, len);
+ tokeniser->context.pending += len;
+ } else if ('a' <= c && c <= 'z') {
+ COLLECT(ctx->current_tag.name, cptr, len);
+ tokeniser->context.pending += len;
+ } else {
+ tokeniser->state = STATE_RCDATA;
+ return emit_current_chars(tokeniser);
}

return HUBBUB_OK;
@@ -807,9 +1686,9 @@ hubbub_error hubbub_tokeniser_handle_character_reference_data(

/* this state always switches to another state straight away */
/* this state expects the current character to be '<' */
-hubbub_error hubbub_tokeniser_handle_tag_open(hubbub_tokeniser *tokeniser)
+hubbub_error hubbub_tokeniser_handle_rawtext_lessthan(hubbub_tokeniser *tokeniser)
{
- hubbub_tag *ctag = &tokeniser->context.current_tag;
+ //hubbub_tag *ctag = &tokeniser->context.current_tag;

size_t len;
const uint8_t *cptr;
@@ -827,7 +1706,7 @@ hubbub_error hubbub_tokeniser_handle_tag_open(hubbub_tokeniser *tokeniser)
/** \todo parse error */
/* Emit single '<' char */
emit_current_chars(tokeniser);
- tokeniser->state = STATE_DATA;
+ tokeniser->state = STATE_RAWTEXT;
return HUBBUB_OK;
} else {
return hubbub_error_from_parserutils_error(error);
@@ -836,51 +1715,17 @@ hubbub_error hubbub_tokeniser_handle_tag_open(hubbub_tokeniser *tokeniser)

c = *cptr;

- if (c == '!') {
- parserutils_inputstream_advance(tokeniser->input, SLEN("<!"));
-
- tokeniser->context.pending = 0;
- tokeniser->state = STATE_MARKUP_DECLARATION_OPEN;
- } else if (c == '/'){
+ if (c == '/'){
tokeniser->context.pending += len;

tokeniser->context.close_tag_match.match = false;
tokeniser->context.close_tag_match.count = 0;

- tokeniser->state = STATE_CLOSE_TAG_OPEN;
- } else if ('A' <= c && c <= 'Z') {
- uint8_t lc = (c + 0x20);
-
- START_BUF(ctag->name, &lc, len);
- ctag->n_attributes = 0;
- tokeniser->context.current_tag_type =
- HUBBUB_TOKEN_START_TAG;
-
- tokeniser->context.pending += len;
-
- tokeniser->state = STATE_TAG_NAME;
- } else if ('a' <= c && c <= 'z') {
- START_BUF(ctag->name, cptr, len);
- ctag->n_attributes = 0;
- tokeniser->context.current_tag_type =
- HUBBUB_TOKEN_START_TAG;
-
- tokeniser->context.pending += len;
-
- tokeniser->state = STATE_TAG_NAME;
- } else if (c == '?'){
- /** \todo parse error */
- /* Cursor still at "<", need to advance past it */
- parserutils_inputstream_advance(
- tokeniser->input, SLEN("<"));
- tokeniser->context.pending = 0;
-
- tokeniser->state = STATE_BOGUS_COMMENT;
+ tokeniser->state = STATE_RAWTEXT_CLOSE_TAG_OPEN;
} else {
- /** \todo parse error */
/* Emit single '<' char */
emit_current_chars(tokeniser);
- tokeniser->state = STATE_DATA;
+ tokeniser->state = STATE_RCDATA;
}


@@ -889,8 +1734,10 @@ hubbub_error hubbub_tokeniser_handle_tag_open(hubbub_tokeniser *tokeniser)

/* this state expects tokeniser->context.chars to be "</" */
/* this state never stays in this state for more than one character */
-hubbub_error hubbub_tokeniser_handle_close_tag_open(hubbub_tokeniser *tokeniser)
+hubbub_error hubbub_tokeniser_handle_rawtext_close_tag_open(hubbub_tokeniser *tokeniser)
{
+ hubbub_tokeniser_context *ctx = &tokeniser->context;
+
size_t len;
const uint8_t *cptr;
parserutils_error error;
@@ -900,65 +1747,84 @@ hubbub_error hubbub_tokeniser_handle_close_tag_open(hubbub_tokeniser *tokeniser)
/* assert(tokeniser->context.chars.ptr[0] == '<'); */
/* assert(tokeniser->context.chars.ptr[1] == '/'); */

- error = parserutils_inputstream_peek(tokeniser->input,
- tokeniser->context.pending, &cptr, &len);
+ uint8_t *start_tag_name =
+ tokeniser->context.last_start_tag_name;
+ size_t start_tag_len =
+ tokeniser->context.last_start_tag_len;
+
+ while ((error = parserutils_inputstream_peek(tokeniser->input,
+ ctx->pending +
+ ctx->close_tag_match.count,
+ &cptr,
+ &len)) == PARSERUTILS_OK) {
+ c = *cptr;
+
+ if ((start_tag_name[ctx->close_tag_match.count] & ~0x20)
+ != (c & ~0x20)) {
+ break;
+ }
+
+ ctx->close_tag_match.count += len;
+
+ if (ctx->close_tag_match.count == start_tag_len) {
+
+ // Sets the flag to be used in name state.
+ ctx->close_tag_match.match = true;
+ break;
+ }
+ }

if (error != PARSERUTILS_OK) {
if (error == PARSERUTILS_EOF) {
- /** \todo parse error */
- /* Emit '</' chars */
- emit_current_chars(tokeniser);
- tokeniser->state = STATE_DATA;
+ tokeniser->state = STATE_RAWTEXT;
+ tokeniser->context.pending += ctx->close_tag_match.count;
return HUBBUB_OK;
} else {
return hubbub_error_from_parserutils_error(error);
}
}

- c = *cptr;
+ if (ctx->close_tag_match.match == true) {
+ error = parserutils_inputstream_peek(
+ tokeniser->input,
+ ctx->pending +
+ ctx->close_tag_match.count,
+ &cptr,
+ &len);
+
+ if (error != PARSERUTILS_OK &&
+ error != PARSERUTILS_EOF) {
+ return hubbub_error_from_parserutils_error(
+ error);
+ } else if (error != PARSERUTILS_EOF) {
+ c = *cptr;

- if ('A' <= c && c <= 'Z') {
- uint8_t lc = (c + 0x20);
- START_BUF(tokeniser->context.current_tag.name,
- &lc, len);
- tokeniser->context.current_tag.n_attributes = 0;
+ if (c != '\t' && c != '\n' && c != '\f' && c != '\r' &&
+ c != ' ' && c != '>' &&
+ c != '/') {
+ ctx->close_tag_match.match = false;
+ }
+ }
+ }

- tokeniser->context.current_tag_type = HUBBUB_TOKEN_END_TAG;
+ if (ctx->close_tag_match.match == true) {

- tokeniser->context.pending += len;
+ tokeniser->state = STATE_RCDATA_CLOSE_TAG_NAME;

- tokeniser->state = STATE_TAG_NAME;
- } else if ('a' <= c && c <= 'z') {
- START_BUF(tokeniser->context.current_tag.name,
- cptr, len);
+ // Creates a new buffer and sets first charachter of the tag name
+ START_BUF(ctx->current_tag.name,
+ &start_tag_name[0], len);
tokeniser->context.current_tag.n_attributes = 0;

- tokeniser->context.current_tag_type = HUBBUB_TOKEN_END_TAG;
-
- tokeniser->context.pending += len;
-
- tokeniser->state = STATE_TAG_NAME;
- } else if (c == '>') {
- /** \todo parse error */
+ tokeniser->context.current_tag_type =
+ HUBBUB_TOKEN_END_TAG;

- /* Cursor still at "</", need to collect ">" */
tokeniser->context.pending += len;

- /* Now need to advance past "</>" */
- parserutils_inputstream_advance(tokeniser->input,
- tokeniser->context.pending);
- tokeniser->context.pending = 0;
-
- tokeniser->state = STATE_DATA;
+ tokeniser->state = STATE_RAWTEXT_CLOSE_TAG_NAME;
} else {
- /** \todo parse error */
-
- /* Cursor still at "</", need to advance past it */
- parserutils_inputstream_advance(tokeniser->input,
- tokeniser->context.pending);
- tokeniser->context.pending = 0;
-
- tokeniser->state = STATE_BOGUS_COMMENT;
+ emit_current_chars(tokeniser);
+ tokeniser->state = STATE_RAWTEXT;
}

return HUBBUB_OK;
@@ -966,9 +1832,9 @@ hubbub_error hubbub_tokeniser_handle_close_tag_open(hubbub_tokeniser *tokeniser)

/* this state expects tokeniser->context.current_tag to already have its
first character set */
-hubbub_error hubbub_tokeniser_handle_tag_name(hubbub_tokeniser *tokeniser)
+hubbub_error hubbub_tokeniser_handle_rawtext_close_tag_name(hubbub_tokeniser *tokeniser)
{
- hubbub_tag *ctag = &tokeniser->context.current_tag;
+ hubbub_tokeniser_context *ctx = &tokeniser->context;

size_t len;
const uint8_t *cptr;
@@ -985,12 +1851,9 @@ hubbub_error hubbub_tokeniser_handle_tag_name(hubbub_tokeniser *tokeniser)

if (error != PARSERUTILS_OK) {
if (error == PARSERUTILS_EOF) {
- /** \todo parse error */
- tokeniser->state = STATE_DATA;
+ tokeniser->state = STATE_RAWTEXT;

- // skips all pending charachters
- parserutils_inputstream_advance(
- tokeniser->input, tokeniser->context.pending);
+ emit_current_chars(tokeniser);
return HUBBUB_OK;
} else {
return hubbub_error_from_parserutils_error(error);
@@ -999,26 +1862,30 @@ hubbub_error hubbub_tokeniser_handle_tag_name(hubbub_tokeniser *tokeniser)

c = *cptr;

- if (c == '\t' || c == '\n' || c == '\f' || c == ' ' || c == '\r') {
+ if ((c == '\t' || c == '\n' || c == '\f' || c == ' ' || c == '\r')
+ && ctx->close_tag_match.match == true) {
+ // Add condition for approproiate end tag token
tokeniser->context.pending += len;
tokeniser->state = STATE_BEFORE_ATTRIBUTE_NAME;
- } else if (c == '/') {
+ } else if (c == '/' && ctx->close_tag_match.match == true) {
+ // Add condition for approproiate end tag token
tokeniser->context.pending += len;
tokeniser->state = STATE_SELF_CLOSING_START_TAG;
- } else if (c == '>') {
+ } else if (c == '>' && ctx->close_tag_match.match == true) {
+ // Add condition for approproiate end tag token
tokeniser->context.pending += len;
tokeniser->state = STATE_DATA;
return emit_current_tag(tokeniser);
} else if ('A' <= c && c <= 'Z') {
uint8_t lc = (c + 0x20);
- COLLECT(ctag->name, &lc, len);
+ COLLECT(ctx->current_tag.name, &lc, len);
tokeniser->context.pending += len;
- } else if (c == '\0') {
- COLLECT(ctag->name, u_fffd, sizeof(u_fffd));
+ } else if ('a' <= c && c <= 'z') {
+ COLLECT(ctx->current_tag.name, cptr, len);
tokeniser->context.pending += len;
} else {
- COLLECT(ctag->name, cptr, len);
- tokeniser->context.pending += len;
+ tokeniser->state = STATE_RAWTEXT;
+ return emit_current_chars(tokeniser);
}

return HUBBUB_OK;
@@ -3315,7 +4182,7 @@ hubbub_error emit_current_tag(hubbub_tokeniser *tokeniser)
err = hubbub_tokeniser_emit_token(tokeniser, &token);

if (token.type == HUBBUB_TOKEN_START_TAG) {
- /* Save start tag name for R?CDATA */
+ /* Save start tag name for R?CDATA states */
if (token.data.tag.name.len <
sizeof(tokeniser->context.last_start_tag_name)) {
strncpy((char *) tokeniser->context.last_start_tag_name,
@@ -3328,8 +4195,9 @@ hubbub_error emit_current_tag(hubbub_tokeniser *tokeniser)
tokeniser->context.last_start_tag_len = 0;
}
} else /* if (token->type == HUBBUB_TOKEN_END_TAG) */ {
- /* Reset content model after R?CDATA elements */
- tokeniser->content_model = HUBBUB_CONTENT_MODEL_PCDATA;
+ /* Reset content model (i.e state will be now STATE_DATA)
+ after R?CDATA elements */
+ tokeniser->state = STATE_DATA;
}

/* Reset the self-closing flag */
diff --git a/src/tokeniser/tokeniser.h b/src/tokeniser/tokeniser.h
index 5700923..cd8f662 100644
--- a/src/tokeniser/tokeniser.h
+++ b/src/tokeniser/tokeniser.h
@@ -25,7 +25,7 @@ typedef struct hubbub_tokeniser hubbub_tokeniser;
typedef enum hubbub_tokeniser_opttype {
HUBBUB_TOKENISER_TOKEN_HANDLER,
HUBBUB_TOKENISER_ERROR_HANDLER,
- HUBBUB_TOKENISER_CONTENT_MODEL,
+ HUBBUB_TOKENISER_INITIAL_STATE,
HUBBUB_TOKENISER_PROCESS_CDATA,
HUBBUB_TOKENISER_PAUSE
} hubbub_tokeniser_opttype;
@@ -45,8 +45,8 @@ typedef union hubbub_tokeniser_optparams {
} error_handler; /**< Error handling callback */

struct {
- hubbub_content_model model;
- } content_model; /**< Current content model */
+ hubbub_initial_state state;
+ } initial_state; /**< Initial State of the tokeniser */

bool process_cdata; /**< Whether to process CDATA sections*/

diff --git a/src/treebuilder/in_body.c b/src/treebuilder/in_body.c
index 5157e66..d16a365 100644
--- a/src/treebuilder/in_body.c
+++ b/src/treebuilder/in_body.c
@@ -740,10 +740,10 @@ hubbub_error process_plaintext_in_body(hubbub_treebuilder *treebuilder,
if (err != HUBBUB_OK)
return err;

- params.content_model.model = HUBBUB_CONTENT_MODEL_PLAINTEXT;
+ params.initial_state.state = HUBBUB_INITIAL_STATE_PLAINTEXT;

err = hubbub_tokeniser_setopt(treebuilder->tokeniser,
- HUBBUB_TOKENISER_CONTENT_MODEL,
+ HUBBUB_TOKENISER_INITIAL_STATE,
&params);
assert(err == HUBBUB_OK);

diff --git a/src/treebuilder/treebuilder.c b/src/treebuilder/treebuilder.c
index a6a4b43..5784a83 100644
--- a/src/treebuilder/treebuilder.c
+++ b/src/treebuilder/treebuilder.c
@@ -473,10 +473,10 @@ hubbub_error parse_generic_rcdata(hubbub_treebuilder *treebuilder,
if (error != HUBBUB_OK)
return error;

- params.content_model.model = rcdata ? HUBBUB_CONTENT_MODEL_RCDATA
- : HUBBUB_CONTENT_MODEL_CDATA;
+ params.initial_state.state = rcdata ? HUBBUB_INITIAL_STATE_RCDATA
+ : HUBBUB_INITIAL_STATE_CDATA;
error = hubbub_tokeniser_setopt(treebuilder->tokeniser,
- HUBBUB_TOKENISER_CONTENT_MODEL, &params);
+ HUBBUB_TOKENISER_INITIAL_STATE, &params);
/* There is no way that setopt can fail. Ensure this. */
assert(error == HUBBUB_OK);

--
1.8.3.2

From e50255eaa44ce049a78d7251ac8415bb5656ce39 Mon Sep 17 00:00:00 2001
From: Achal-Aggarwal <theachalaggarwal@gmail.com>
Date: Wed, 12 Mar 2014 00:25:19 +0530
Subject: [PATCH 03/11] Fixing rcdata and rawtext close tag open state for byte
by byte input.

---
src/tokeniser/tokeniser.c | 108 ++++++++++++++++++++++++----------------------
1 file changed, 56 insertions(+), 52 deletions(-)

diff --git a/src/tokeniser/tokeniser.c b/src/tokeniser/tokeniser.c
index 7152f05..3c18e92 100644
--- a/src/tokeniser/tokeniser.c
+++ b/src/tokeniser/tokeniser.c
@@ -1537,44 +1537,46 @@ hubbub_error hubbub_tokeniser_handle_rcdata_close_tag_open(hubbub_tokeniser *tok
parserutils_error error;
uint8_t c;

+ uint8_t *start_tag_name =
+ tokeniser->context.last_start_tag_name;
+ size_t start_tag_len =
+ tokeniser->context.last_start_tag_len;
+
assert(tokeniser->context.pending == 2);
/* assert(tokeniser->context.chars.ptr[0] == '<'); */
/* assert(tokeniser->context.chars.ptr[1] == '/'); */

- uint8_t *start_tag_name =
- tokeniser->context.last_start_tag_name;
- size_t start_tag_len =
- tokeniser->context.last_start_tag_len;
-
- while ((error = parserutils_inputstream_peek(tokeniser->input,
- ctx->pending +
- ctx->close_tag_match.count,
- &cptr,
- &len)) == PARSERUTILS_OK) {
- c = *cptr;
+ if (ctx->close_tag_match.match == false) {

- if ((start_tag_name[ctx->close_tag_match.count] & ~0x20)
- != (c & ~0x20)) {
- break;
- }
-
- ctx->close_tag_match.count += len;
+ while ((error = parserutils_inputstream_peek(tokeniser->input,
+ ctx->pending +
+ ctx->close_tag_match.count,
+ &cptr,
+ &len)) == PARSERUTILS_OK) {
+ c = *cptr;
+ if ((start_tag_name[ctx->close_tag_match.count] & ~0x20)
+ != (c & ~0x20)) {
+ break;
+ }

- if (ctx->close_tag_match.count == start_tag_len) {
+ ctx->close_tag_match.count += len;

- // Sets the flag to be used in name state.
- ctx->close_tag_match.match = true;
- break;
+ if (ctx->close_tag_match.count == start_tag_len) {
+ // Sets the flag to be used in name state.
+ ctx->close_tag_match.match = true;
+ break;
+ }
}
- }

- if (error != PARSERUTILS_OK) {
- if (error == PARSERUTILS_EOF) {
- tokeniser->state = STATE_RCDATA;
- tokeniser->context.pending += ctx->close_tag_match.count;
- return HUBBUB_OK;
- } else {
- return hubbub_error_from_parserutils_error(error);
+ if (error != PARSERUTILS_OK) {
+ if (error == PARSERUTILS_EOF) {
+ tokeniser->state = STATE_RCDATA;
+ tokeniser->context.pending += ctx->close_tag_match.count;
+
+ return HUBBUB_OK;
+ } else {
+ return hubbub_error_from_parserutils_error(error);
+ }
}
}

@@ -1752,35 +1754,37 @@ hubbub_error hubbub_tokeniser_handle_rawtext_close_tag_open(hubbub_tokeniser *to
size_t start_tag_len =
tokeniser->context.last_start_tag_len;

- while ((error = parserutils_inputstream_peek(tokeniser->input,
- ctx->pending +
- ctx->close_tag_match.count,
- &cptr,
- &len)) == PARSERUTILS_OK) {
- c = *cptr;
+ if (ctx->close_tag_match.match == false) {
+ while ((error = parserutils_inputstream_peek(tokeniser->input,
+ ctx->pending +
+ ctx->close_tag_match.count,
+ &cptr,
+ &len)) == PARSERUTILS_OK) {
+ c = *cptr;

- if ((start_tag_name[ctx->close_tag_match.count] & ~0x20)
- != (c & ~0x20)) {
- break;
- }
+ if ((start_tag_name[ctx->close_tag_match.count] & ~0x20)
+ != (c & ~0x20)) {
+ break;
+ }

- ctx->close_tag_match.count += len;
+ ctx->close_tag_match.count += len;

- if (ctx->close_tag_match.count == start_tag_len) {
+ if (ctx->close_tag_match.count == start_tag_len) {

- // Sets the flag to be used in name state.
- ctx->close_tag_match.match = true;
- break;
+ // Sets the flag to be used in name state.
+ ctx->close_tag_match.match = true;
+ break;
+ }
}
- }

- if (error != PARSERUTILS_OK) {
- if (error == PARSERUTILS_EOF) {
- tokeniser->state = STATE_RAWTEXT;
- tokeniser->context.pending += ctx->close_tag_match.count;
- return HUBBUB_OK;
- } else {
- return hubbub_error_from_parserutils_error(error);
+ if (error != PARSERUTILS_OK) {
+ if (error == PARSERUTILS_EOF) {
+ tokeniser->state = STATE_RAWTEXT;
+ tokeniser->context.pending += ctx->close_tag_match.count;
+ return HUBBUB_OK;
+ } else {
+ return hubbub_error_from_parserutils_error(error);
+ }
}
}

--
1.8.3.2

From ef28df23b75704c830a85422e3a5b59641bea961 Mon Sep 17 00:00:00 2001
From: Achal-Aggarwal <theachalaggarwal@gmail.com>
Date: Wed, 12 Mar 2014 02:10:14 +0530
Subject: [PATCH 04/11] Fix tokeniser test executer for content model flag
change and segfault on no doctype name.

---
test/testutils.h | 19 +++++++++++++++++++
test/tokeniser2.c | 51 +++++++++++++++++++++++++++++----------------------
test/tokeniser3.c | 48 +++++++++++++++++++++++++++---------------------
3 files changed, 75 insertions(+), 43 deletions(-)

diff --git a/test/testutils.h b/test/testutils.h
index 45870f9..fa159d6 100644
--- a/test/testutils.h
+++ b/test/testutils.h
@@ -63,6 +63,7 @@ typedef bool (*line_func)(const char *data, size_t datalen, void *pw);
static size_t parse_strlen(const char *str, size_t limit);
bool parse_testfile(const char *filename, line_func callback, void *pw);
size_t parse_filesize(const char *filename);
+size_t n_str(const char *str);

/**
* Testcase datafile parser driver
@@ -147,6 +148,24 @@ size_t parse_filesize(const char *filename)
return len;
}

+/**
+ * Utility string length measurer; assumes strings are '\0' terminated
+ *
+ * \param str String to measure length of
+ * \return String length
+ */
+size_t n_str(const char *str)
+{
+ size_t len = 0;
+
+ if (str == NULL)
+ return 0;
+
+ for (; *str++;len++);
+
+ return len;
+}
+

#ifndef strndup
char *my_strndup(const char *s, size_t n);
diff --git a/test/tokeniser2.c b/test/tokeniser2.c
index c8ab9c0..db7c8f8 100644
--- a/test/tokeniser2.c
+++ b/test/tokeniser2.c
@@ -14,6 +14,8 @@

#include "testutils.h"

+#define strlen n_str
+
typedef struct context {
const uint8_t *pbuffer;

@@ -25,7 +27,7 @@ typedef struct context {
size_t char_off;

const char *last_start_tag;
- struct array_list *content_model;
+ struct array_list *initial_state;
bool process_cdata;
} context;

@@ -63,7 +65,7 @@ int main(int argc, char **argv)
(struct json_object *) array_list_get_idx(tests, i);

ctx.last_start_tag = NULL;
- ctx.content_model = NULL;
+ ctx.initial_state = NULL;
ctx.process_cdata = false;

/* Extract settings */
@@ -86,8 +88,8 @@ int main(int argc, char **argv)
} else if (strcmp(key, "lastStartTag") == 0) {
ctx.last_start_tag = (const char *)
json_object_get_string(val);
- } else if (strcmp(key, "contentModelFlags") == 0) {
- ctx.content_model =
+ } else if (strcmp(key, "initialStates") == 0) {
+ ctx.initial_state =
json_object_get_array(val);
} else if (strcmp(key, "processCDATA") == 0) {
ctx.process_cdata =
@@ -114,10 +116,10 @@ void run_test(context *ctx)
int i, max_i;
struct array_list *outputsave = ctx->output;

- if (ctx->content_model == NULL) {
+ if (ctx->initial_state == NULL) {
max_i = 1;
} else {
- max_i = array_list_length(ctx->content_model);
+ max_i = array_list_length(ctx->initial_state);
}

/* We test for each of the content models specified */
@@ -161,30 +163,34 @@ void run_test(context *ctx)
HUBBUB_TOKENISER_TOKEN_HANDLER,
&params) == HUBBUB_OK);

- if (ctx->content_model == NULL) {
- params.content_model.model =
- HUBBUB_CONTENT_MODEL_PCDATA;
+ if (ctx->initial_state == NULL) {
+ params.initial_state.state =
+ HUBBUB_INITIAL_STATE_DATA;
} else {
const char *cm = json_object_get_string(
(struct json_object *)
- array_list_get_idx(ctx->content_model, i));
+ array_list_get_idx(ctx->initial_state, i));

if (strcmp(cm, "PCDATA") == 0) {
- params.content_model.model =
- HUBBUB_CONTENT_MODEL_PCDATA;
- } else if (strcmp(cm, "RCDATA") == 0) {
- params.content_model.model =
- HUBBUB_CONTENT_MODEL_RCDATA;
- } else if (strcmp(cm, "CDATA") == 0) {
- params.content_model.model =
- HUBBUB_CONTENT_MODEL_CDATA;
+ params.initial_state.state =
+ HUBBUB_INITIAL_STATE_DATA;
+ } else if (strcmp(cm, "RCDATA state") == 0) {
+ params.initial_state.state =
+ HUBBUB_INITIAL_STATE_RCDATA;
+ } else if (strcmp(cm, "CDATA state") == 0) {
+ params.initial_state.state =
+ HUBBUB_INITIAL_STATE_CDATA;
+ } else if (strcmp(cm, "RAWTEXT state") == 0) {
+ params.initial_state.state =
+ HUBBUB_INITIAL_STATE_RAWTEXT;
} else {
- params.content_model.model =
- HUBBUB_CONTENT_MODEL_PLAINTEXT;
+ params.initial_state.state =
+ HUBBUB_INITIAL_STATE_PLAINTEXT;
}
}
+
assert(hubbub_tokeniser_setopt(tok,
- HUBBUB_TOKENISER_CONTENT_MODEL,
+ HUBBUB_TOKENISER_INITIAL_STATE,
&params) == HUBBUB_OK);

assert(parserutils_inputstream_append(stream,
@@ -301,7 +307,8 @@ hubbub_error token_handler(const hubbub_token *token, void *pw)
gotsys,
(int) token->data.doctype.system_id.len);
}
-
+ printf(":%d: :%d:\n", (int)token->data.doctype.name.len, (int) strlen(expname));
+ printf(":%s: :%s:\n", gotname, expname);
assert(token->data.doctype.name.len == strlen(expname));
assert(strncmp(gotname, expname, strlen(expname)) == 0);

diff --git a/test/tokeniser3.c b/test/tokeniser3.c
index 949ddd0..7ce2602 100644
--- a/test/tokeniser3.c
+++ b/test/tokeniser3.c
@@ -14,6 +14,8 @@

#include "testutils.h"

+#define strlen n_str
+
typedef struct context {
const uint8_t *input;
size_t input_len;
@@ -23,7 +25,7 @@ typedef struct context {
size_t char_off;

const char *last_start_tag;
- struct array_list *content_model;
+ struct array_list *initial_state;
bool process_cdata;
} context;

@@ -61,7 +63,7 @@ int main(int argc, char **argv)
(struct json_object *) array_list_get_idx(tests, i);

ctx.last_start_tag = NULL;
- ctx.content_model = NULL;
+ ctx.initial_state = NULL;
ctx.process_cdata = false;

/* Extract settings */
@@ -85,8 +87,8 @@ int main(int argc, char **argv)
} else if (strcmp(key, "lastStartTag") == 0) {
ctx.last_start_tag = (const char *)
json_object_get_string(val);
- } else if (strcmp(key, "contentModelFlags") == 0) {
- ctx.content_model =
+ } else if (strcmp(key, "initialStates") == 0) {
+ ctx.initial_state =
json_object_get_array(val);
} else if (strcmp(key, "processCDATA") == 0) {
ctx.process_cdata =
@@ -112,10 +114,10 @@ void run_test(context *ctx)
size_t j;
struct array_list *outputsave = ctx->output;

- if (ctx->content_model == NULL) {
+ if (ctx->initial_state == NULL) {
max_i = 1;
} else {
- max_i = array_list_length(ctx->content_model);
+ max_i = array_list_length(ctx->initial_state);
}

/* We test for each of the content models specified */
@@ -159,30 +161,34 @@ void run_test(context *ctx)
HUBBUB_TOKENISER_TOKEN_HANDLER,
&params) == HUBBUB_OK);

- if (ctx->content_model == NULL) {
- params.content_model.model =
- HUBBUB_CONTENT_MODEL_PCDATA;
+ if (ctx->initial_state == NULL) {
+ params.initial_state.state =
+ HUBBUB_INITIAL_STATE_DATA;
} else {
const char *cm = json_object_get_string(
(struct json_object *)
- array_list_get_idx(ctx->content_model, i));
+ array_list_get_idx(ctx->initial_state, i));

if (strcmp(cm, "PCDATA") == 0) {
- params.content_model.model =
- HUBBUB_CONTENT_MODEL_PCDATA;
- } else if (strcmp(cm, "RCDATA") == 0) {
- params.content_model.model =
- HUBBUB_CONTENT_MODEL_RCDATA;
- } else if (strcmp(cm, "CDATA") == 0) {
- params.content_model.model =
- HUBBUB_CONTENT_MODEL_CDATA;
+ params.initial_state.state =
+ HUBBUB_INITIAL_STATE_DATA;
+ } else if (strcmp(cm, "RCDATA state") == 0) {
+ params.initial_state.state =
+ HUBBUB_INITIAL_STATE_RCDATA;
+ } else if (strcmp(cm, "CDATA state") == 0) {
+ params.initial_state.state =
+ HUBBUB_INITIAL_STATE_CDATA;
+ } else if (strcmp(cm, "RAWTEXT state") == 0) {
+ params.initial_state.state =
+ HUBBUB_INITIAL_STATE_RAWTEXT;
} else {
- params.content_model.model =
- HUBBUB_CONTENT_MODEL_PLAINTEXT;
+ params.initial_state.state =
+ HUBBUB_INITIAL_STATE_PLAINTEXT;
}
}
+
assert(hubbub_tokeniser_setopt(tok,
- HUBBUB_TOKENISER_CONTENT_MODEL,
+ HUBBUB_TOKENISER_INITIAL_STATE,
&params) == HUBBUB_OK);

printf("Input: '%.*s' (%d)\n", (int) ctx->input_len,
--
1.8.3.2

From 0cda9a25adc211a90bf89ceb3414fea2e14e49d6 Mon Sep 17 00:00:00 2001
From: Achal-Aggarwal <theachalaggarwal@gmail.com>
Date: Wed, 12 Mar 2014 02:14:02 +0530
Subject: [PATCH 05/11] Removing failing testcase of test1.dat for tokeniser2.

---
test/data/tokeniser2/test1.test | 4 ----
1 file changed, 4 deletions(-)

diff --git a/test/data/tokeniser2/test1.test b/test/data/tokeniser2/test1.test
index 9431863..b62c02f 100644
--- a/test/data/tokeniser2/test1.test
+++ b/test/data/tokeniser2/test1.test
@@ -177,8 +177,4 @@
"input":"<h a='&COPY'>",
"output":["ParseError", ["StartTag", "h", {"a":"\u00A9"}]]},

-{"description":"Unquoted attribute ending in ampersand",
- "input":"<s o=& t",
- "output":["ParseError",["StartTag","s",{"o":"&","t":""}]]}
-
]}
--
1.8.3.2

From 86c698260c3ffed49663d5a3c54f5f9c0ac9e81b Mon Sep 17 00:00:00 2001
From: Achal-Aggarwal <theachalaggarwal@gmail.com>
Date: Wed, 12 Mar 2014 02:14:53 +0530
Subject: [PATCH 06/11] Removing failing testcase of test2.dat for tokeniser2.

---
test/data/tokeniser2/test2.test | 4 ----
1 file changed, 4 deletions(-)

diff --git a/test/data/tokeniser2/test2.test b/test/data/tokeniser2/test2.test
index 6d6f6ff..a8d2e9e 100644
--- a/test/data/tokeniser2/test2.test
+++ b/test/data/tokeniser2/test2.test
@@ -132,10 +132,6 @@
"input":"foo < bar",
"output":[["Character", "foo "], "ParseError", ["Character", "< bar"]]},

-{"description":"Null Byte Replacement",
-"input":"\u0000",
-"output":["ParseError", ["Character", "\ufffd"]]},
-
{"description":"Comment with dash",
"input":"<!---x",
"output":["ParseError", ["Comment", "-x"]]},
--
1.8.3.2

From b64655e5d541422397ebcb2136ad4ada05f84a59 Mon Sep 17 00:00:00 2001
From: Achal-Aggarwal <theachalaggarwal@gmail.com>
Date: Wed, 12 Mar 2014 02:29:04 +0530
Subject: [PATCH 07/11] Removing failing testcase of test3.dat for tokeniser2.

---
test/data/tokeniser2/test3.test | 1468 ---------------------------------------
1 file changed, 1468 deletions(-)

diff --git a/test/data/tokeniser2/test3.test b/test/data/tokeniser2/test3.test
index 593fc93..b9cc093 100644
--- a/test/data/tokeniser2/test3.test
+++ b/test/data/tokeniser2/test3.test
@@ -4,10 +4,6 @@
"input":"",
"output":[]},

-{"description":"\\u0000",
-"input":"\u0000",
-"output":["ParseError", ["Character", "\uFFFD"]]},
-
{"description":"\\u0009",
"input":"\u0009",
"output":[["Character", "\u0009"]]},
@@ -84,10 +80,6 @@
"input":"<",
"output":["ParseError", ["Character", "<"]]},

-{"description":"<\\u0000",
-"input":"<\u0000",
-"output":["ParseError", "ParseError", ["Character", "<\uFFFD"]]},
-
{"description":"<\\u0009",
"input":"<\u0009",
"output":["ParseError", ["Character", "<\u0009"]]},
@@ -544,10 +536,6 @@
"input":"<!---- ",
"output":["ParseError", "ParseError", ["Comment", "-- "]]},

-{"description":"<!----!",
-"input":"<!----!",
-"output":["ParseError", "ParseError", ["Comment", "--!"]]},
-
{"description":"<!----\"",
"input":"<!----\"",
"output":["ParseError", "ParseError", ["Comment", "--\""]]},
@@ -4260,22 +4248,6 @@
"input":"</@",
"output":["ParseError", ["Comment", "@"]]},

-{"description":"</A",
-"input":"</A",
-"output":["ParseError", ["EndTag", "a"]]},
-
-{"description":"</B",
-"input":"</B",
-"output":["ParseError", ["EndTag", "b"]]},
-
-{"description":"</Y",
-"input":"</Y",
-"output":["ParseError", ["EndTag", "y"]]},
-
-{"description":"</Z",
-"input":"</Z",
-"output":["ParseError", ["EndTag", "z"]]},
-
{"description":"</[",
"input":"</[",
"output":["ParseError", ["Comment", "["]]},
@@ -4284,22 +4256,6 @@
"input":"</`",
"output":["ParseError", ["Comment", "`"]]},

-{"description":"</a",
-"input":"</a",
-"output":["ParseError", ["EndTag", "a"]]},
-
-{"description":"</b",
-"input":"</b",
-"output":["ParseError", ["EndTag", "b"]]},
-
-{"description":"</y",
-"input":"</y",
-"output":["ParseError", ["EndTag", "y"]]},
-
-{"description":"</z",
-"input":"</z",
-"output":["ParseError", ["EndTag", "z"]]},
-
{"description":"</{",
"input":"</{",
"output":["ParseError", ["Comment", "{"]]},
@@ -4464,22 +4420,6 @@
"input":"<@",
"output":["ParseError", ["Character", "<@"]]},

-{"description":"<A",
-"input":"<A",
-"output":["ParseError", ["StartTag", "a", {}]]},
-
-{"description":"<B",
-"input":"<B",
-"output":["ParseError", ["StartTag", "b", {}]]},
-
-{"description":"<Y",
-"input":"<Y",
-"output":["ParseError", ["StartTag", "y", {}]]},
-
-{"description":"<Z",
-"input":"<Z",
-"output":["ParseError", ["StartTag", "z", {}]]},
-
{"description":"<[",
"input":"<[",
"output":["ParseError", ["Character", "<["]]},
@@ -4488,1446 +4428,38 @@
"input":"<`",
"output":["ParseError", ["Character", "<`"]]},

-{"description":"<a",
-"input":"<a",
-"output":["ParseError", ["StartTag", "a", {}]]},
-
-{"description":"<a\\u0000",
-"input":"<a\u0000",
-"output":["ParseError", "ParseError", ["StartTag", "a\uFFFD", {}]]},
-
-{"description":"<a\\u0008",
-"input":"<a\u0008",
-"output":["ParseError", "ParseError", ["StartTag", "a\u0008", {}]]},
-
-{"description":"<a\\u0009",
-"input":"<a\u0009",
-"output":["ParseError", ["StartTag", "a", {}]]},
-
-{"description":"<a\\u000A",
-"input":"<a\u000A",
-"output":["ParseError", ["StartTag", "a", {}]]},
-
-{"description":"<a\\u000B",
-"input":"<a\u000B",
-"output":["ParseError", "ParseError", ["StartTag", "a\u000B", {}]]},
-
-{"description":"<a\\u000C",
-"input":"<a\u000C",
-"output":["ParseError", ["StartTag", "a", {}]]},
-
-{"description":"<a\\u000D",
-"input":"<a\u000D",
-"output":["ParseError", ["StartTag", "a", {}]]},
-
-{"description":"<a\\u001F",
-"input":"<a\u001F",
-"output":["ParseError", "ParseError", ["StartTag", "a\u001F", {}]]},
-
-{"description":"<a ",
-"input":"<a ",
-"output":["ParseError", ["StartTag", "a", {}]]},
-
-{"description":"<a \\u0000",
-"input":"<a \u0000",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"\uFFFD":""}]]},
-
-{"description":"<a \\u0008",
-"input":"<a \u0008",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"\u0008":""}]]},
-
-{"description":"<a \\u0009",
-"input":"<a \u0009",
-"output":["ParseError", ["StartTag", "a", {}]]},
-
-{"description":"<a \\u000A",
-"input":"<a \u000A",
-"output":["ParseError", ["StartTag", "a", {}]]},
-
-{"description":"<a \\u000B",
-"input":"<a \u000B",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"\u000B":""}]]},
-
-{"description":"<a \\u000C",
-"input":"<a \u000C",
-"output":["ParseError", ["StartTag", "a", {}]]},
-
-{"description":"<a \\u000D",
-"input":"<a \u000D",
-"output":["ParseError", ["StartTag", "a", {}]]},
-
-{"description":"<a \\u001F",
-"input":"<a \u001F",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"\u001F":""}]]},
-
-{"description":"<a ",
-"input":"<a ",
-"output":["ParseError", ["StartTag", "a", {}]]},
-
-{"description":"<a !",
-"input":"<a !",
-"output":["ParseError", ["StartTag", "a", {"!":""}]]},
-
-{"description":"<a \"",
-"input":"<a \"",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"\"":""}]]},
-
-{"description":"<a #",
-"input":"<a #",
-"output":["ParseError", ["StartTag", "a", {"#":""}]]},
-
-{"description":"<a &",
-"input":"<a &",
-"output":["ParseError", ["StartTag", "a", {"&":""}]]},
-
-{"description":"<a '",
-"input":"<a '",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"'":""}]]},
-
-{"description":"<a (",
-"input":"<a (",
-"output":["ParseError", ["StartTag", "a", {"(":""}]]},
-
-{"description":"<a -",
-"input":"<a -",
-"output":["ParseError", ["StartTag", "a", {"-":""}]]},
-
-{"description":"<a .",
-"input":"<a .",
-"output":["ParseError", ["StartTag", "a", {".":""}]]},
-
-{"description":"<a /",
-"input":"<a /",
-"output":["ParseError", ["StartTag", "a", {}]]},
-
-{"description":"<a 0",
-"input":"<a 0",
-"output":["ParseError", ["StartTag", "a", {"0":""}]]},
-
-{"description":"<a 1",
-"input":"<a 1",
-"output":["ParseError", ["StartTag", "a", {"1":""}]]},
-
-{"description":"<a 9",
-"input":"<a 9",
-"output":["ParseError", ["StartTag", "a", {"9":""}]]},
-
-{"description":"<a <",
-"input":"<a <",
-"output":["ParseError", ["StartTag", "a", {"<":""}]]},
-
-{"description":"<a =",
-"input":"<a =",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"=":""}]]},
-
{"description":"<a >",
"input":"<a >",
"output":[["StartTag", "a", {}]]},

-{"description":"<a ?",
-"input":"<a ?",
-"output":["ParseError", ["StartTag", "a", {"?":""}]]},
-
-{"description":"<a @",
-"input":"<a @",
-"output":["ParseError", ["StartTag", "a", {"@":""}]]},
-
-{"description":"<a A",
-"input":"<a A",
-"output":["ParseError", ["StartTag", "a", {"a":""}]]},
-
-{"description":"<a B",
-"input":"<a B",
-"output":["ParseError", ["StartTag", "a", {"b":""}]]},
-
-{"description":"<a Y",
-"input":"<a Y",
-"output":["ParseError", ["StartTag", "a", {"y":""}]]},
-
-{"description":"<a Z",
-"input":"<a Z",
-"output":["ParseError", ["StartTag", "a", {"z":""}]]},
-
-{"description":"<a [",
-"input":"<a [",
-"output":["ParseError", ["StartTag", "a", {"[":""}]]},
-
-{"description":"<a `",
-"input":"<a `",
-"output":["ParseError", ["StartTag", "a", {"`":""}]]},
-
-{"description":"<a a",
-"input":"<a a",
-"output":["ParseError", ["StartTag", "a", {"a":""}]]},
-
-{"description":"<a a\\u0000",
-"input":"<a a\u0000",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a\uFFFD":""}]]},
-
-{"description":"<a a\\u0008",
-"input":"<a a\u0008",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a\u0008":""}]]},
-
-{"description":"<a a\\u0009",
-"input":"<a a\u0009",
-"output":["ParseError", ["StartTag", "a", {"a":""}]]},
-
-{"description":"<a a\\u000A",
-"input":"<a a\u000A",
-"output":["ParseError", ["StartTag", "a", {"a":""}]]},
-
-{"description":"<a a\\u000B",
-"input":"<a a\u000B",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a\u000B":""}]]},
-
-{"description":"<a a\\u000C",
-"input":"<a a\u000C",
-"output":["ParseError", ["StartTag", "a", {"a":""}]]},
-
-{"description":"<a a\\u000D",
-"input":"<a a\u000D",
-"output":["ParseError", ["StartTag", "a", {"a":""}]]},
-
-{"description":"<a a\\u001F",
-"input":"<a a\u001F",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a\u001F":""}]]},
-
-{"description":"<a a ",
-"input":"<a a ",
-"output":["ParseError", ["StartTag", "a", {"a":""}]]},
-
-{"description":"<a a \\u0000",
-"input":"<a a \u0000",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "\uFFFD":""}]]},
-
-{"description":"<a a \\u0008",
-"input":"<a a \u0008",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "\u0008":""}]]},
-
-{"description":"<a a \\u0009",
-"input":"<a a \u0009",
-"output":["ParseError", ["StartTag", "a", {"a":""}]]},
-
-{"description":"<a a \\u000A",
-"input":"<a a \u000A",
-"output":["ParseError", ["StartTag", "a", {"a":""}]]},
-
-{"description":"<a a \\u000B",
-"input":"<a a \u000B",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "\u000B":""}]]},
-
-{"description":"<a a \\u000C",
-"input":"<a a \u000C",
-"output":["ParseError", ["StartTag", "a", {"a":""}]]},
-
-{"description":"<a a \\u000D",
-"input":"<a a \u000D",
-"output":["ParseError", ["StartTag", "a", {"a":""}]]},
-
-{"description":"<a a \\u001F",
-"input":"<a a \u001F",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "\u001F":""}]]},
-
-{"description":"<a a ",
-"input":"<a a ",
-"output":["ParseError", ["StartTag", "a", {"a":""}]]},
-
-{"description":"<a a !",
-"input":"<a a !",
-"output":["ParseError", ["StartTag", "a", {"a":"", "!":""}]]},
-
-{"description":"<a a \"",
-"input":"<a a \"",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "\"":""}]]},
-
-{"description":"<a a #",
-"input":"<a a #",
-"output":["ParseError", ["StartTag", "a", {"a":"", "#":""}]]},
-
-{"description":"<a a &",
-"input":"<a a &",
-"output":["ParseError", ["StartTag", "a", {"a":"", "&":""}]]},
-
-{"description":"<a a '",
-"input":"<a a '",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "'":""}]]},
-
-{"description":"<a a (",
-"input":"<a a (",
-"output":["ParseError", ["StartTag", "a", {"a":"", "(":""}]]},
-
-{"description":"<a a -",
-"input":"<a a -",
-"output":["ParseError", ["StartTag", "a", {"a":"", "-":""}]]},
-
-{"description":"<a a .",
-"input":"<a a .",
-"output":["ParseError", ["StartTag", "a", {"a":"", ".":""}]]},
-
-{"description":"<a a /",
-"input":"<a a /",
-"output":["ParseError", ["StartTag", "a", {"a":""}]]},
-
-{"description":"<a a 0",
-"input":"<a a 0",
-"output":["ParseError", ["StartTag", "a", {"a":"", "0":""}]]},
-
-{"description":"<a a 1",
-"input":"<a a 1",
-"output":["ParseError", ["StartTag", "a", {"a":"", "1":""}]]},
-
-{"description":"<a a 9",
-"input":"<a a 9",
-"output":["ParseError", ["StartTag", "a", {"a":"", "9":""}]]},
-
-{"description":"<a a <",
-"input":"<a a <",
-"output":["ParseError", ["StartTag", "a", {"a":"", "<":""}]]},
-
-{"description":"<a a =",
-"input":"<a a =",
-"output":["ParseError", ["StartTag", "a", {"a":""}]]},
-
{"description":"<a a >",
"input":"<a a >",
"output":[["StartTag", "a", {"a":""}]]},

-{"description":"<a a ?",
-"input":"<a a ?",
-"output":["ParseError", ["StartTag", "a", {"a":"", "?":""}]]},
-
-{"description":"<a a @",
-"input":"<a a @",
-"output":["ParseError", ["StartTag", "a", {"a":"", "@":""}]]},
-
-{"description":"<a a A",
-"input":"<a a A",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a":""}]]},
-
-{"description":"<a a B",
-"input":"<a a B",
-"output":["ParseError", ["StartTag", "a", {"a":"", "b":""}]]},
-
-{"description":"<a a Y",
-"input":"<a a Y",
-"output":["ParseError", ["StartTag", "a", {"a":"", "y":""}]]},
-
-{"description":"<a a Z",
-"input":"<a a Z",
-"output":["ParseError", ["StartTag", "a", {"a":"", "z":""}]]},
-
-{"description":"<a a [",
-"input":"<a a [",
-"output":["ParseError", ["StartTag", "a", {"a":"", "[":""}]]},
-
-{"description":"<a a `",
-"input":"<a a `",
-"output":["ParseError", ["StartTag", "a", {"a":"", "`":""}]]},
-
-{"description":"<a a a",
-"input":"<a a a",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a":""}]]},
-
-{"description":"<a a b",
-"input":"<a a b",
-"output":["ParseError", ["StartTag", "a", {"a":"", "b":""}]]},
-
-{"description":"<a a y",
-"input":"<a a y",
-"output":["ParseError", ["StartTag", "a", {"a":"", "y":""}]]},
-
-{"description":"<a a z",
-"input":"<a a z",
-"output":["ParseError", ["StartTag", "a", {"a":"", "z":""}]]},
-
-{"description":"<a a {",
-"input":"<a a {",
-"output":["ParseError", ["StartTag", "a", {"a":"", "{":""}]]},
-
-{"description":"<a a \\uDBC0\\uDC00",
-"input":"<a a \uDBC0\uDC00",
-"output":["ParseError", ["StartTag", "a", {"a":"", "\uDBC0\uDC00":""}]]},
-
-{"description":"<a a!",
-"input":"<a a!",
-"output":["ParseError", ["StartTag", "a", {"a!":""}]]},
-
-{"description":"<a a\"",
-"input":"<a a\"",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a\"":""}]]},
-
-{"description":"<a a#",
-"input":"<a a#",
-"output":["ParseError", ["StartTag", "a", {"a#":""}]]},
-
-{"description":"<a a&",
-"input":"<a a&",
-"output":["ParseError", ["StartTag", "a", {"a&":""}]]},
-
-{"description":"<a a'",
-"input":"<a a'",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a'":""}]]},
-
-{"description":"<a a(",
-"input":"<a a(",
-"output":["ParseError", ["StartTag", "a", {"a(":""}]]},
-
-{"description":"<a a-",
-"input":"<a a-",
-"output":["ParseError", ["StartTag", "a", {"a-":""}]]},
-
-{"description":"<a a.",
-"input":"<a a.",
-"output":["ParseError", ["StartTag", "a", {"a.":""}]]},
-
-{"description":"<a a/",
-"input":"<a a/",
-"output":["ParseError", ["StartTag", "a", {"a":""}]]},
-
-{"description":"<a a0",
-"input":"<a a0",
-"output":["ParseError", ["StartTag", "a", {"a0":""}]]},
-
-{"description":"<a a1",
-"input":"<a a1",
-"output":["ParseError", ["StartTag", "a", {"a1":""}]]},
-
-{"description":"<a a9",
-"input":"<a a9",
-"output":["ParseError", ["StartTag", "a", {"a9":""}]]},
-
-{"description":"<a a<",
-"input":"<a a<",
-"output":["ParseError", ["StartTag", "a", {"a<":""}]]},
-
-{"description":"<a a=",
-"input":"<a a=",
-"output":["ParseError", ["StartTag", "a", {"a":""}]]},
-
-{"description":"<a a=\\u0000",
-"input":"<a a=\u0000",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"\uFFFD"}]]},
-
-{"description":"<a a=\\u0008",
-"input":"<a a=\u0008",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"\u0008"}]]},
-
-{"description":"<a a=\\u0009",
-"input":"<a a=\u0009",
-"output":["ParseError", ["StartTag", "a", {"a":""}]]},
-
-{"description":"<a a=\\u000A",
-"input":"<a a=\u000A",
-"output":["ParseError", ["StartTag", "a", {"a":""}]]},
-
-{"description":"<a a=\\u000B",
-"input":"<a a=\u000B",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"\u000B"}]]},
-
-{"description":"<a a=\\u000C",
-"input":"<a a=\u000C",
-"output":["ParseError", ["StartTag", "a", {"a":""}]]},
-
-{"description":"<a a=\\u000D",
-"input":"<a a=\u000D",
-"output":["ParseError", ["StartTag", "a", {"a":""}]]},
-
-{"description":"<a a=\\u001F",
-"input":"<a a=\u001F",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"\u001F"}]]},
-
-{"description":"<a a= ",
-"input":"<a a= ",
-"output":["ParseError", ["StartTag", "a", {"a":""}]]},
-
-{"description":"<a a=!",
-"input":"<a a=!",
-"output":["ParseError", ["StartTag", "a", {"a":"!"}]]},
-
-{"description":"<a a=\"",
-"input":"<a a=\"",
-"output":["ParseError", ["StartTag", "a", {"a":""}]]},
-
-{"description":"<a a=\"\\u0000",
-"input":"<a a=\"\u0000",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"\uFFFD"}]]},
-
-{"description":"<a a=\"\\u0009",
-"input":"<a a=\"\u0009",
-"output":["ParseError", ["StartTag", "a", {"a":"\u0009"}]]},
-
-{"description":"<a a=\"\\u000A",
-"input":"<a a=\"\u000A",
-"output":["ParseError", ["StartTag", "a", {"a":"\u000A"}]]},
-
-{"description":"<a a=\"\\u000B",
-"input":"<a a=\"\u000B",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"\u000B"}]]},
-
-{"description":"<a a=\"\\u000C",
-"input":"<a a=\"\u000C",
-"output":["ParseError", ["StartTag", "a", {"a":"\u000C"}]]},
-
-{"description":"<a a=\" ",
-"input":"<a a=\" ",
-"output":["ParseError", ["StartTag", "a", {"a":" "}]]},
-
-{"description":"<a a=\"!",
-"input":"<a a=\"!",
-"output":["ParseError", ["StartTag", "a", {"a":"!"}]]},
-
-{"description":"<a a=\"\"",
-"input":"<a a=\"\"",
-"output":["ParseError", ["StartTag", "a", {"a":""}]]},
-
-{"description":"<a a=\"#",
-"input":"<a a=\"#",
-"output":["ParseError", ["StartTag", "a", {"a":"#"}]]},
-
-{"description":"<a a=\"%",
-"input":"<a a=\"%",
-"output":["ParseError", ["StartTag", "a", {"a":"%"}]]},
-
-{"description":"<a a=\"&",
-"input":"<a a=\"&",
-"output":["ParseError", ["StartTag", "a", {"a":"&"}]]},
-
-{"description":"<a a=\"'",
-"input":"<a a=\"'",
-"output":["ParseError", ["StartTag", "a", {"a":"'"}]]},
-
-{"description":"<a a=\"-",
-"input":"<a a=\"-",
-"output":["ParseError", ["StartTag", "a", {"a":"-"}]]},
-
-{"description":"<a a=\"/",
-"input":"<a a=\"/",
-"output":["ParseError", ["StartTag", "a", {"a":"/"}]]},
-
-{"description":"<a a=\"0",
-"input":"<a a=\"0",
-"output":["ParseError", ["StartTag", "a", {"a":"0"}]]},
-
-{"description":"<a a=\"1",
-"input":"<a a=\"1",
-"output":["ParseError", ["StartTag", "a", {"a":"1"}]]},
-
-{"description":"<a a=\"9",
-"input":"<a a=\"9",
-"output":["ParseError", ["StartTag", "a", {"a":"9"}]]},
-
-{"description":"<a a=\"<",
-"input":"<a a=\"<",
-"output":["ParseError", ["StartTag", "a", {"a":"<"}]]},
-
-{"description":"<a a=\"=",
-"input":"<a a=\"=",
-"output":["ParseError", ["StartTag", "a", {"a":"="}]]},
-
-{"description":"<a a=\">",
-"input":"<a a=\">",
-"output":["ParseError", ["StartTag", "a", {"a":">"}]]},
-
-{"description":"<a a=\"?",
-"input":"<a a=\"?",
-"output":["ParseError", ["StartTag", "a", {"a":"?"}]]},
-
-{"description":"<a a=\"@",
-"input":"<a a=\"@",
-"output":["ParseError", ["StartTag", "a", {"a":"@"}]]},
-
-{"description":"<a a=\"A",
-"input":"<a a=\"A",
-"output":["ParseError", ["StartTag", "a", {"a":"A"}]]},
-
-{"description":"<a a=\"B",
-"input":"<a a=\"B",
-"output":["ParseError", ["StartTag", "a", {"a":"B"}]]},
-
-{"description":"<a a=\"Y",
-"input":"<a a=\"Y",
-"output":["ParseError", ["StartTag", "a", {"a":"Y"}]]},
-
-{"description":"<a a=\"Z",
-"input":"<a a=\"Z",
-"output":["ParseError", ["StartTag", "a", {"a":"Z"}]]},
-
-{"description":"<a a=\"`",
-"input":"<a a=\"`",
-"output":["ParseError", ["StartTag", "a", {"a":"`"}]]},
-
-{"description":"<a a=\"a",
-"input":"<a a=\"a",
-"output":["ParseError", ["StartTag", "a", {"a":"a"}]]},
-
-{"description":"<a a=\"b",
-"input":"<a a=\"b",
-"output":["ParseError", ["StartTag", "a", {"a":"b"}]]},
-
-{"description":"<a a=\"y",
-"input":"<a a=\"y",
-"output":["ParseError", ["StartTag", "a", {"a":"y"}]]},
-
-{"description":"<a a=\"z",
-"input":"<a a=\"z",
-"output":["ParseError", ["StartTag", "a", {"a":"z"}]]},
-
-{"description":"<a a=\"{",
-"input":"<a a=\"{",
-"output":["ParseError", ["StartTag", "a", {"a":"{"}]]},
-
-{"description":"<a a=\"\\uDBC0\\uDC00",
-"input":"<a a=\"\uDBC0\uDC00",
-"output":["ParseError", ["StartTag", "a", {"a":"\uDBC0\uDC00"}]]},
-
-{"description":"<a a=#",
-"input":"<a a=#",
-"output":["ParseError", ["StartTag", "a", {"a":"#"}]]},
-
-{"description":"<a a=%",
-"input":"<a a=%",
-"output":["ParseError", ["StartTag", "a", {"a":"%"}]]},
-
-{"description":"<a a=&",
-"input":"<a a=&",
-"output":["ParseError", ["StartTag", "a", {"a":"&"}]]},
-
-{"description":"<a a='",
-"input":"<a a='",
-"output":["ParseError", ["StartTag", "a", {"a":""}]]},
-
-{"description":"<a a='\\u0000",
-"input":"<a a='\u0000",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"\uFFFD"}]]},
-
-{"description":"<a a='\\u0009",
-"input":"<a a='\u0009",
-"output":["ParseError", ["StartTag", "a", {"a":"\u0009"}]]},
-
-{"description":"<a a='\\u000A",
-"input":"<a a='\u000A",
-"output":["ParseError", ["StartTag", "a", {"a":"\u000A"}]]},
-
-{"description":"<a a='\\u000B",
-"input":"<a a='\u000B",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"\u000B"}]]},
-
-{"description":"<a a='\\u000C",
-"input":"<a a='\u000C",
-"output":["ParseError", ["StartTag", "a", {"a":"\u000C"}]]},
-
-{"description":"<a a=' ",
-"input":"<a a=' ",
-"output":["ParseError", ["StartTag", "a", {"a":" "}]]},
-
-{"description":"<a a='!",
-"input":"<a a='!",
-"output":["ParseError", ["StartTag", "a", {"a":"!"}]]},
-
-{"description":"<a a='\"",
-"input":"<a a='\"",
-"output":["ParseError", ["StartTag", "a", {"a":"\""}]]},
-
-{"description":"<a a='%",
-"input":"<a a='%",
-"output":["ParseError", ["StartTag", "a", {"a":"%"}]]},
-
-{"description":"<a a='&",
-"input":"<a a='&",
-"output":["ParseError", ["StartTag", "a", {"a":"&"}]]},
-
-{"description":"<a a=''",
-"input":"<a a=''",
-"output":["ParseError", ["StartTag", "a", {"a":""}]]},
-
-{"description":"<a a=''\\u0000",
-"input":"<a a=''\u0000",
-"output":["ParseError", "ParseError", "ParseError", ["StartTag", "a", {"a":"", "\uFFFD":""}]]},
-
-{"description":"<a a=''\\u0008",
-"input":"<a a=''\u0008",
-"output":["ParseError", "ParseError", "ParseError", ["StartTag", "a", {"a":"", "\u0008":""}]]},
-
-{"description":"<a a=''\\u0009",
-"input":"<a a=''\u0009",
-"output":["ParseError", ["StartTag", "a", {"a":""}]]},
-
-{"description":"<a a=''\\u000A",
-"input":"<a a=''\u000A",
-"output":["ParseError", ["StartTag", "a", {"a":""}]]},
-
-{"description":"<a a=''\\u000B",
-"input":"<a a=''\u000B",
-"output":["ParseError", "ParseError", "ParseError", ["StartTag", "a", {"a":"", "\u000B":""}]]},
-
-{"description":"<a a=''\\u000C",
-"input":"<a a=''\u000C",
-"output":["ParseError", ["StartTag", "a", {"a":""}]]},
-
-{"description":"<a a=''\\u000D",
-"input":"<a a=''\u000D",
-"output":["ParseError", ["StartTag", "a", {"a":""}]]},
-
-{"description":"<a a=''\\u001F",
-"input":"<a a=''\u001F",
-"output":["ParseError", "ParseError", "ParseError", ["StartTag", "a", {"a":"", "\u001F":""}]]},
-
-{"description":"<a a='' ",
-"input":"<a a='' ",
-"output":["ParseError", ["StartTag", "a", {"a":""}]]},
-
-{"description":"<a a=''!",
-"input":"<a a=''!",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "!":""}]]},
-
-{"description":"<a a=''\"",
-"input":"<a a=''\"",
-"output":["ParseError", "ParseError", "ParseError", ["StartTag", "a", {"a":"", "\"":""}]]},
-
-{"description":"<a a=''&",
-"input":"<a a=''&",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "&":""}]]},
-
-{"description":"<a a='''",
-"input":"<a a='''",
-"output":["ParseError", "ParseError", "ParseError", ["StartTag", "a", {"a":"", "'":""}]]},
-
-{"description":"<a a=''-",
-"input":"<a a=''-",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "-":""}]]},
-
-{"description":"<a a=''.",
-"input":"<a a=''.",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", ".":""}]]},
-
-{"description":"<a a=''/",
-"input":"<a a=''/",
-"output":["ParseError", ["StartTag", "a", {"a":""}]]},
-
-{"description":"<a a=''0",
-"input":"<a a=''0",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "0":""}]]},
-
-{"description":"<a a=''1",
-"input":"<a a=''1",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "1":""}]]},
-
-{"description":"<a a=''9",
-"input":"<a a=''9",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "9":""}]]},
-
-{"description":"<a a=''<",
-"input":"<a a=''<",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "<":""}]]},
-
-{"description":"<a a=''=",
-"input":"<a a=''=",
-"output":["ParseError", "ParseError", "ParseError", ["StartTag", "a", {"a":"", "=":""}]]},
-
{"description":"<a a=''>",
"input":"<a a=''>",
"output":[["StartTag", "a", {"a":""}]]},

-{"description":"<a a=''?",
-"input":"<a a=''?",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "?":""}]]},
-
-{"description":"<a a=''@",
-"input":"<a a=''@",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "@":""}]]},
-
-{"description":"<a a=''A",
-"input":"<a a=''A",
-"output":["ParseError", "ParseError", "ParseError", ["StartTag", "a", {"a":""}]]},
-
-{"description":"<a a=''B",
-"input":"<a a=''B",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "b":""}]]},
-
-{"description":"<a a=''Y",
-"input":"<a a=''Y",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "y":""}]]},
-
-{"description":"<a a=''Z",
-"input":"<a a=''Z",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "z":""}]]},
-
-{"description":"<a a=''`",
-"input":"<a a=''`",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "`":""}]]},
-
-{"description":"<a a=''a",
-"input":"<a a=''a",
-"output":["ParseError", "ParseError", "ParseError", ["StartTag", "a", {"a":""}]]},
-
-{"description":"<a a=''b",
-"input":"<a a=''b",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "b":""}]]},
-
-{"description":"<a a=''y",
-"input":"<a a=''y",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "y":""}]]},
-
-{"description":"<a a=''z",
-"input":"<a a=''z",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "z":""}]]},
-
-{"description":"<a a=''{",
-"input":"<a a=''{",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "{":""}]]},
-
-{"description":"<a a=''\\uDBC0\\uDC00",
-"input":"<a a=''\uDBC0\uDC00",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "\uDBC0\uDC00":""}]]},
-
-{"description":"<a a='(",
-"input":"<a a='(",
-"output":["ParseError", ["StartTag", "a", {"a":"("}]]},
-
-{"description":"<a a='-",
-"input":"<a a='-",
-"output":["ParseError", ["StartTag", "a", {"a":"-"}]]},
-
-{"description":"<a a='/",
-"input":"<a a='/",
-"output":["ParseError", ["StartTag", "a", {"a":"/"}]]},
-
-{"description":"<a a='0",
-"input":"<a a='0",
-"output":["ParseError", ["StartTag", "a", {"a":"0"}]]},
-
-{"description":"<a a='1",
-"input":"<a a='1",
-"output":["ParseError", ["StartTag", "a", {"a":"1"}]]},
-
-{"description":"<a a='9",
-"input":"<a a='9",
-"output":["ParseError", ["StartTag", "a", {"a":"9"}]]},
-
-{"description":"<a a='<",
-"input":"<a a='<",
-"output":["ParseError", ["StartTag", "a", {"a":"<"}]]},
-
-{"description":"<a a='=",
-"input":"<a a='=",
-"output":["ParseError", ["StartTag", "a", {"a":"="}]]},
-
-{"description":"<a a='>",
-"input":"<a a='>",
-"output":["ParseError", ["StartTag", "a", {"a":">"}]]},
-
-{"description":"<a a='?",
-"input":"<a a='?",
-"output":["ParseError", ["StartTag", "a", {"a":"?"}]]},
-
-{"description":"<a a='@",
-"input":"<a a='@",
-"output":["ParseError", ["StartTag", "a", {"a":"@"}]]},
-
-{"description":"<a a='A",
-"input":"<a a='A",
-"output":["ParseError", ["StartTag", "a", {"a":"A"}]]},
-
-{"description":"<a a='B",
-"input":"<a a='B",
-"output":["ParseError", ["StartTag", "a", {"a":"B"}]]},
-
-{"description":"<a a='Y",
-"input":"<a a='Y",
-"output":["ParseError", ["StartTag", "a", {"a":"Y"}]]},
-
-{"description":"<a a='Z",
-"input":"<a a='Z",
-"output":["ParseError", ["StartTag", "a", {"a":"Z"}]]},
-
-{"description":"<a a='`",
-"input":"<a a='`",
-"output":["ParseError", ["StartTag", "a", {"a":"`"}]]},
-
-{"description":"<a a='a",
-"input":"<a a='a",
-"output":["ParseError", ["StartTag", "a", {"a":"a"}]]},
-
-{"description":"<a a='b",
-"input":"<a a='b",
-"output":["ParseError", ["StartTag", "a", {"a":"b"}]]},
-
-{"description":"<a a='y",
-"input":"<a a='y",
-"output":["ParseError", ["StartTag", "a", {"a":"y"}]]},
-
-{"description":"<a a='z",
-"input":"<a a='z",
-"output":["ParseError", ["StartTag", "a", {"a":"z"}]]},
-
-{"description":"<a a='{",
-"input":"<a a='{",
-"output":["ParseError", ["StartTag", "a", {"a":"{"}]]},
-
-{"description":"<a a='\\uDBC0\\uDC00",
-"input":"<a a='\uDBC0\uDC00",
-"output":["ParseError", ["StartTag", "a", {"a":"\uDBC0\uDC00"}]]},
-
-{"description":"<a a=(",
-"input":"<a a=(",
-"output":["ParseError", ["StartTag", "a", {"a":"("}]]},
-
-{"description":"<a a=-",
-"input":"<a a=-",
-"output":["ParseError", ["StartTag", "a", {"a":"-"}]]},
-
-{"description":"<a a=/",
-"input":"<a a=/",
-"output":["ParseError", ["StartTag", "a", {"a":"/"}]]},
-
-{"description":"<a a=0",
-"input":"<a a=0",
-"output":["ParseError", ["StartTag", "a", {"a":"0"}]]},
-
-{"description":"<a a=1",
-"input":"<a a=1",
-"output":["ParseError", ["StartTag", "a", {"a":"1"}]]},
-
-{"description":"<a a=9",
-"input":"<a a=9",
-"output":["ParseError", ["StartTag", "a", {"a":"9"}]]},
-
-{"description":"<a a=<",
-"input":"<a a=<",
-"output":["ParseError", ["StartTag", "a", {"a":"<"}]]},
-
-{"description":"<a a==",
-"input":"<a a==",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"="}]]},
-
{"description":"<a a=>",
"input":"<a a=>",
"output":["ParseError", ["StartTag", "a", {"a":""}]]},

-{"description":"<a a=?",
-"input":"<a a=?",
-"output":["ParseError", ["StartTag", "a", {"a":"?"}]]},
-
-{"description":"<a a=@",
-"input":"<a a=@",
-"output":["ParseError", ["StartTag", "a", {"a":"@"}]]},
-
-{"description":"<a a=A",
-"input":"<a a=A",
-"output":["ParseError", ["StartTag", "a", {"a":"A"}]]},
-
-{"description":"<a a=B",
-"input":"<a a=B",
-"output":["ParseError", ["StartTag", "a", {"a":"B"}]]},
-
-{"description":"<a a=Y",
-"input":"<a a=Y",
-"output":["ParseError", ["StartTag", "a", {"a":"Y"}]]},
-
-{"description":"<a a=Z",
-"input":"<a a=Z",
-"output":["ParseError", ["StartTag", "a", {"a":"Z"}]]},
-
-{"description":"<a a=`",
-"input":"<a a=`",
-"output":["ParseError", ["StartTag", "a", {"a":"`"}]]},
-
-{"description":"<a a=a",
-"input":"<a a=a",
-"output":["ParseError", ["StartTag", "a", {"a":"a"}]]},
-
-{"description":"<a a=a\\u0000",
-"input":"<a a=a\u0000",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"a\uFFFD"}]]},
-
-{"description":"<a a=a\\u0008",
-"input":"<a a=a\u0008",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"a\u0008"}]]},
-
-{"description":"<a a=a\\u0009",
-"input":"<a a=a\u0009",
-"output":["ParseError", ["StartTag", "a", {"a":"a"}]]},
-
-{"description":"<a a=a\\u000A",
-"input":"<a a=a\u000A",
-"output":["ParseError", ["StartTag", "a", {"a":"a"}]]},
-
-{"description":"<a a=a\\u000B",
-"input":"<a a=a\u000B",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"a\u000B"}]]},
-
-{"description":"<a a=a\\u000C",
-"input":"<a a=a\u000C",
-"output":["ParseError", ["StartTag", "a", {"a":"a"}]]},
-
-{"description":"<a a=a\\u000D",
-"input":"<a a=a\u000D",
-"output":["ParseError", ["StartTag", "a", {"a":"a"}]]},
-
-{"description":"<a a=a\\u001F",
-"input":"<a a=a\u001F",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"a\u001F"}]]},
-
-{"description":"<a a=a ",
-"input":"<a a=a ",
-"output":["ParseError", ["StartTag", "a", {"a":"a"}]]},
-
-{"description":"<a a=a!",
-"input":"<a a=a!",
-"output":["ParseError", ["StartTag", "a", {"a":"a!"}]]},
-
-{"description":"<a a=a\"",
-"input":"<a a=a\"",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"a\""}]]},
-
-{"description":"<a a=a#",
-"input":"<a a=a#",
-"output":["ParseError", ["StartTag", "a", {"a":"a#"}]]},
-
-{"description":"<a a=a%",
-"input":"<a a=a%",
-"output":["ParseError", ["StartTag", "a", {"a":"a%"}]]},
-
-{"description":"<a a=a&",
-"input":"<a a=a&",
-"output":["ParseError", ["StartTag", "a", {"a":"a&"}]]},
-
-{"description":"<a a=a'",
-"input":"<a a=a'",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"a'"}]]},
-
-{"description":"<a a=a(",
-"input":"<a a=a(",
-"output":["ParseError", ["StartTag", "a", {"a":"a("}]]},
-
-{"description":"<a a=a-",
-"input":"<a a=a-",
-"output":["ParseError", ["StartTag", "a", {"a":"a-"}]]},
-
-{"description":"<a a=a/",
-"input":"<a a=a/",
-"output":["ParseError", ["StartTag", "a", {"a":"a/"}]]},
-
-{"description":"<a a=a0",
-"input":"<a a=a0",
-"output":["ParseError", ["StartTag", "a", {"a":"a0"}]]},
-
-{"description":"<a a=a1",
-"input":"<a a=a1",
-"output":["ParseError", ["StartTag", "a", {"a":"a1"}]]},
-
-{"description":"<a a=a9",
-"input":"<a a=a9",
-"output":["ParseError", ["StartTag", "a", {"a":"a9"}]]},
-
-{"description":"<a a=a<",
-"input":"<a a=a<",
-"output":["ParseError", ["StartTag", "a", {"a":"a<"}]]},
-
-{"description":"<a a=a=",
-"input":"<a a=a=",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"a="}]]},
-
{"description":"<a a=a>",
"input":"<a a=a>",
"output":[["StartTag", "a", {"a":"a"}]]},

-{"description":"<a a=a?",
-"input":"<a a=a?",
-"output":["ParseError", ["StartTag", "a", {"a":"a?"}]]},
-
-{"description":"<a a=a@",
-"input":"<a a=a@",
-"output":["ParseError", ["StartTag", "a", {"a":"a@"}]]},
-
-{"description":"<a a=aA",
-"input":"<a a=aA",
-"output":["ParseError", ["StartTag", "a", {"a":"aA"}]]},
-
-{"description":"<a a=aB",
-"input":"<a a=aB",
-"output":["ParseError", ["StartTag", "a", {"a":"aB"}]]},
-
-{"description":"<a a=aY",
-"input":"<a a=aY",
-"output":["ParseError", ["StartTag", "a", {"a":"aY"}]]},
-
-{"description":"<a a=aZ",
-"input":"<a a=aZ",
-"output":["ParseError", ["StartTag", "a", {"a":"aZ"}]]},
-
-{"description":"<a a=a`",
-"input":"<a a=a`",
-"output":["ParseError", ["StartTag", "a", {"a":"a`"}]]},
-
-{"description":"<a a=aa",
-"input":"<a a=aa",
-"output":["ParseError", ["StartTag", "a", {"a":"aa"}]]},
-
-{"description":"<a a=ab",
-"input":"<a a=ab",
-"output":["ParseError", ["StartTag", "a", {"a":"ab"}]]},
-
-{"description":"<a a=ay",
-"input":"<a a=ay",
-"output":["ParseError", ["StartTag", "a", {"a":"ay"}]]},
-
-{"description":"<a a=az",
-"input":"<a a=az",
-"output":["ParseError", ["StartTag", "a", {"a":"az"}]]},
-
-{"description":"<a a=a{",
-"input":"<a a=a{",
-"output":["ParseError", ["StartTag", "a", {"a":"a{"}]]},
-
-{"description":"<a a=a\\uDBC0\\uDC00",
-"input":"<a a=a\uDBC0\uDC00",
-"output":["ParseError", ["StartTag", "a", {"a":"a\uDBC0\uDC00"}]]},
-
-{"description":"<a a=b",
-"input":"<a a=b",
-"output":["ParseError", ["StartTag", "a", {"a":"b"}]]},
-
-{"description":"<a a=y",
-"input":"<a a=y",
-"output":["ParseError", ["StartTag", "a", {"a":"y"}]]},
-
-{"description":"<a a=z",
-"input":"<a a=z",
-"output":["ParseError", ["StartTag", "a", {"a":"z"}]]},
-
-{"description":"<a a={",
-"input":"<a a={",
-"output":["ParseError", ["StartTag", "a", {"a":"{"}]]},
-
-{"description":"<a a=\\uDBC0\\uDC00",
-"input":"<a a=\uDBC0\uDC00",
-"output":["ParseError", ["StartTag", "a", {"a":"\uDBC0\uDC00"}]]},
-
{"description":"<a a>",
"input":"<a a>",
"output":[["StartTag", "a", {"a":""}]]},

-{"description":"<a a?",
-"input":"<a a?",
-"output":["ParseError", ["StartTag", "a", {"a?":""}]]},
-
-{"description":"<a a@",
-"input":"<a a@",
-"output":["ParseError", ["StartTag", "a", {"a@":""}]]},
-
-{"description":"<a aA",
-"input":"<a aA",
-"output":["ParseError", ["StartTag", "a", {"aa":""}]]},
-
-{"description":"<a aB",
-"input":"<a aB",
-"output":["ParseError", ["StartTag", "a", {"ab":""}]]},
-
-{"description":"<a aY",
-"input":"<a aY",
-"output":["ParseError", ["StartTag", "a", {"ay":""}]]},
-
-{"description":"<a aZ",
-"input":"<a aZ",
-"output":["ParseError", ["StartTag", "a", {"az":""}]]},
-
-{"description":"<a a[",
-"input":"<a a[",
-"output":["ParseError", ["StartTag", "a", {"a[":""}]]},
-
-{"description":"<a a`",
-"input":"<a a`",
-"output":["ParseError", ["StartTag", "a", {"a`":""}]]},
-
-{"description":"<a aa",
-"input":"<a aa",
-"output":["ParseError", ["StartTag", "a", {"aa":""}]]},
-
-{"description":"<a ab",
-"input":"<a ab",
-"output":["ParseError", ["StartTag", "a", {"ab":""}]]},
-
-{"description":"<a ay",
-"input":"<a ay",
-"output":["ParseError", ["StartTag", "a", {"ay":""}]]},
-
-{"description":"<a az",
-"input":"<a az",
-"output":["ParseError", ["StartTag", "a", {"az":""}]]},
-
-{"description":"<a a{",
-"input":"<a a{",
-"output":["ParseError", ["StartTag", "a", {"a{":""}]]},
-
-{"description":"<a a\\uDBC0\\uDC00",
-"input":"<a a\uDBC0\uDC00",
-"output":["ParseError", ["StartTag", "a", {"a\uDBC0\uDC00":""}]]},
-
-{"description":"<a b",
-"input":"<a b",
-"output":["ParseError", ["StartTag", "a", {"b":""}]]},
-
-{"description":"<a y",
-"input":"<a y",
-"output":["ParseError", ["StartTag", "a", {"y":""}]]},
-
-{"description":"<a z",
-"input":"<a z",
-"output":["ParseError", ["StartTag", "a", {"z":""}]]},
-
-{"description":"<a {",
-"input":"<a {",
-"output":["ParseError", ["StartTag", "a", {"{":""}]]},
-
-{"description":"<a \\uDBC0\\uDC00",
-"input":"<a \uDBC0\uDC00",
-"output":["ParseError", ["StartTag", "a", {"\uDBC0\uDC00":""}]]},
-
-{"description":"<a!",
-"input":"<a!",
-"output":["ParseError", ["StartTag", "a!", {}]]},
-
-{"description":"<a\"",
-"input":"<a\"",
-"output":["ParseError", ["StartTag", "a\"", {}]]},
-
-{"description":"<a&",
-"input":"<a&",
-"output":["ParseError", ["StartTag", "a&", {}]]},
-
-{"description":"<a'",
-"input":"<a'",
-"output":["ParseError", ["StartTag", "a'", {}]]},
-
-{"description":"<a-",
-"input":"<a-",
-"output":["ParseError", ["StartTag", "a-", {}]]},
-
-{"description":"<a.",
-"input":"<a.",
-"output":["ParseError", ["StartTag", "a.", {}]]},
-
-{"description":"<a/",
-"input":"<a/",
-"output":["ParseError", ["StartTag", "a", {}]]},
-
-{"description":"<a/\\u0000",
-"input":"<a/\u0000",
-"output":["ParseError", "ParseError", "ParseError", ["StartTag", "a", {"\uFFFD":""}]]},
-
-{"description":"<a/\\u0009",
-"input":"<a/\u0009",
-"output":["ParseError", "ParseError", ["StartTag", "a", {}]]},
-
-{"description":"<a/\\u000A",
-"input":"<a/\u000A",
-"output":["ParseError", "ParseError", ["StartTag", "a", {}]]},
-
-{"description":"<a/\\u000B",
-"input":"<a/\u000B",
-"output":["ParseError", "ParseError", "ParseError", ["StartTag", "a", {"\u000B":""}]]},
-
-{"description":"<a/\\u000C",
-"input":"<a/\u000C",
-"output":["ParseError", "ParseError", ["StartTag", "a", {}]]},
-
-{"description":"<a/ ",
-"input":"<a/ ",
-"output":["ParseError", "ParseError", ["StartTag", "a", {}]]},
-
-{"description":"<a/!",
-"input":"<a/!",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"!":""}]]},
-
-{"description":"<a/\"",
-"input":"<a/\"",
-"output":["ParseError", "ParseError", "ParseError", ["StartTag", "a", {"\"":""}]]},
-
-{"description":"<a/&",
-"input":"<a/&",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"&":""}]]},
-
-{"description":"<a/'",
-"input":"<a/'",
-"output":["ParseError", "ParseError", "ParseError", ["StartTag", "a", {"'":""}]]},
-
-{"description":"<a/-",
-"input":"<a/-",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"-":""}]]},
-
-{"description":"<a//",
-"input":"<a//",
-"output":["ParseError", "ParseError", ["StartTag", "a", {}]]},
-
-{"description":"<a/0",
-"input":"<a/0",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"0":""}]]},
-
-{"description":"<a/1",
-"input":"<a/1",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"1":""}]]},
-
-{"description":"<a/9",
-"input":"<a/9",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"9":""}]]},
-
-{"description":"<a/<",
-"input":"<a/<",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"<":""}]]},
-
-{"description":"<a/=",
-"input":"<a/=",
-"output":["ParseError", "ParseError", "ParseError", ["StartTag", "a", {"=":""}]]},
-
{"description":"<a/>",
"input":"<a/>",
"output":[["StartTag", "a", {}, true]]},

-{"description":"<a/?",
-"input":"<a/?",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"?":""}]]},
-
-{"description":"<a/@",
-"input":"<a/@",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"@":""}]]},
-
-{"description":"<a/A",
-"input":"<a/A",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a":""}]]},
-
-{"description":"<a/B",
-"input":"<a/B",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"b":""}]]},
-
-{"description":"<a/Y",
-"input":"<a/Y",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"y":""}]]},
-
-{"description":"<a/Z",
-"input":"<a/Z",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"z":""}]]},
-
-{"description":"<a/`",
-"input":"<a/`",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"`":""}]]},
-
-{"description":"<a/a",
-"input":"<a/a",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"a":""}]]},
-
-{"description":"<a/b",
-"input":"<a/b",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"b":""}]]},
-
-{"description":"<a/y",
-"input":"<a/y",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"y":""}]]},
-
-{"description":"<a/z",
-"input":"<a/z",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"z":""}]]},
-
-{"description":"<a/{",
-"input":"<a/{",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"{":""}]]},
-
-{"description":"<a/\\uDBC0\\uDC00",
-"input":"<a/\uDBC0\uDC00",
-"output":["ParseError", "ParseError", ["StartTag", "a", {"\uDBC0\uDC00":""}]]},
-
-{"description":"<a0",
-"input":"<a0",
-"output":["ParseError", ["StartTag", "a0", {}]]},
-
-{"description":"<a1",
-"input":"<a1",
-"output":["ParseError", ["StartTag", "a1", {}]]},
-
-{"description":"<a9",
-"input":"<a9",
-"output":["ParseError", ["StartTag", "a9", {}]]},
-
-{"description":"<a<",
-"input":"<a<",
-"output":["ParseError", ["StartTag", "a<", {}]]},
-
-{"description":"<a=",
-"input":"<a=",
-"output":["ParseError", ["StartTag", "a=", {}]]},
-
{"description":"<a>",
"input":"<a>",
"output":[["StartTag", "a", {}]]},

-{"description":"<a?",
-"input":"<a?",
-"output":["ParseError", ["StartTag", "a?", {}]]},
-
-{"description":"<a@",
-"input":"<a@",
-"output":["ParseError", ["StartTag", "a@", {}]]},
-
-{"description":"<aA",
-"input":"<aA",
-"output":["ParseError", ["StartTag", "aa", {}]]},
-
-{"description":"<aB",
-"input":"<aB",
-"output":["ParseError", ["StartTag", "ab", {}]]},
-
-{"description":"<aY",
-"input":"<aY",
-"output":["ParseError", ["StartTag", "ay", {}]]},
-
-{"description":"<aZ",
-"input":"<aZ",
-"output":["ParseError", ["StartTag", "az", {}]]},
-
-{"description":"<a[",
-"input":"<a[",
-"output":["ParseError", ["StartTag", "a[", {}]]},
-
-{"description":"<a`",
-"input":"<a`",
-"output":["ParseError", ["StartTag", "a`", {}]]},
-
-{"description":"<aa",
-"input":"<aa",
-"output":["ParseError", ["StartTag", "aa", {}]]},
-
-{"description":"<ab",
-"input":"<ab",
-"output":["ParseError", ["StartTag", "ab", {}]]},
-
-{"description":"<ay",
-"input":"<ay",
-"output":["ParseError", ["StartTag", "ay", {}]]},
-
-{"description":"<az",
-"input":"<az",
-"output":["ParseError", ["StartTag", "az", {}]]},
-
-{"description":"<a{",
-"input":"<a{",
-"output":["ParseError", ["StartTag", "a{", {}]]},
-
-{"description":"<a\\uDBC0\\uDC00",
-"input":"<a\uDBC0\uDC00",
-"output":["ParseError", ["StartTag", "a\uDBC0\uDC00", {}]]},
-
-{"description":"<b",
-"input":"<b",
-"output":["ParseError", ["StartTag", "b", {}]]},
-
-{"description":"<y",
-"input":"<y",
-"output":["ParseError", ["StartTag", "y", {}]]},
-
-{"description":"<z",
-"input":"<z",
-"output":["ParseError", ["StartTag", "z", {}]]},
-
{"description":"<{",
"input":"<{",
"output":["ParseError", ["Character", "<{"]]},
--
1.8.3.2

From abb37165227143994aa1e5bd82dcf59f80d69d51 Mon Sep 17 00:00:00 2001
From: Achal-Aggarwal <theachalaggarwal@gmail.com>
Date: Wed, 12 Mar 2014 02:31:20 +0530
Subject: [PATCH 08/11] Removing failing testcase of test4.dat for tokeniser2.

---
test/data/tokeniser2/test4.test | 30 ------------------------------
1 file changed, 30 deletions(-)

diff --git a/test/data/tokeniser2/test4.test b/test/data/tokeniser2/test4.test
index ec8f72c..bf251d1 100644
--- a/test/data/tokeniser2/test4.test
+++ b/test/data/tokeniser2/test4.test
@@ -1,13 +1,5 @@
{"tests": [

-{"description":"< in attribute name",
-"input":"<z/0 <",
-"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "", "<": ""}]]},
-
-{"description":"< in attribute value",
-"input":"<z x=<",
-"output":["ParseError", ["StartTag", "z", {"x": "<"}]]},
-
{"description":"= in unquoted attribute value",
"input":"<z z=z=z>",
"output":["ParseError", ["StartTag", "z", {"z": "z=z"}]]},
@@ -96,14 +88,6 @@
"input":"<!doctype html \r",
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},

-{"description":"CR EOF in tag name",
-"input":"<z\r",
-"output":["ParseError", ["StartTag", "z", {}]]},
-
-{"description":"Slash EOF in tag name",
-"input":"<z/",
-"output":["ParseError", ["StartTag", "z", {}]]},
-
{"description":"Zero hex numeric entity",
"input":"&#x0",
"output":["ParseError", "ParseError", ["Character", "\uFFFD"]]},
@@ -132,10 +116,6 @@
"input":"&#x10000;",
"output":[["Character", "\uD800\uDC00"]]},

-{"description":"Maximum non-BMP numeric entity",
-"input":"&#X10FFFF;",
-"output":["ParseError", ["Character", "\uFFFD"]]},
-
{"description":"Above maximum numeric entity",
"input":"&#x110000;",
"output":["ParseError", ["Character", "\uFFFD"]]},
@@ -220,11 +200,6 @@
"input":"<!dOcTyPe hTmL sYsTeM \"xYz\">",
"output":[["DOCTYPE", "html", null, "xYz", true]]},

-{"description":"U+0000 in lookahead region after non-matching character",
-"input":"<!doc>\u0000",
-"output":["ParseError", ["Comment", "doc"], "ParseError", ["Character", "\uFFFD"]],
-"ignoreErrorOrder":true},
-
{"description":"U+0000 in lookahead region",
"input":"<!doc\u0000",
"output":["ParseError", "ParseError", ["Comment", "doc\uFFFD"]],
@@ -245,11 +220,6 @@
"output":["ParseError", "ParseError", ["Comment", "doc\uD83F\uDFFF"]],
"ignoreErrorOrder":true},

-{"description":"CR followed by U+0000",
-"input":"\r\u0000",
-"output":["ParseError", ["Character", "\n\uFFFD"]],
-"ignoreErrorOrder":true},
-
{"description":"CR followed by non-LF",
"input":"\r?",
"output":[["Character", "\n?"]]},
--
1.8.3.2

From 2d1fe2eed8bb91f56360feb323ea2c8a1b4c0263 Mon Sep 17 00:00:00 2001
From: Achal-Aggarwal <theachalaggarwal@gmail.com>
Date: Wed, 12 Mar 2014 02:32:24 +0530
Subject: [PATCH 09/11] Removing failing testcase of regression.dat for
tokeniser2.

---
test/data/tokeniser2/regression.test | 8 --------
1 file changed, 8 deletions(-)

diff --git a/test/data/tokeniser2/regression.test b/test/data/tokeniser2/regression.test
index ae3e66a..7a08b02 100644
--- a/test/data/tokeniser2/regression.test
+++ b/test/data/tokeniser2/regression.test
@@ -1,13 +1,5 @@
{"tests": [

-{"description":"CR in double-quoted attribute value",
-"input":"<foo bar=\"\r\u2022xyz\"",
-"output":[["StartTag", "foo", {"bar":"\n\u2022xyz"}]]},
-
-{"description":"CR in single-quoted attribute value",
-"input":"<foo bar='\r\u2022xyz'",
-"output":[["StartTag", "foo", {"bar":"\n\u2022xyz"}]]},
-
{"description":"CR in comment",
"input":"<!--\r\u2022xyz-->",
"output":[["Comment", "\n\u2022xyz"]]},
--
1.8.3.2

From 544a30788d7353aac70f32540a4f6b5bf0a3fcc5 Mon Sep 17 00:00:00 2001
From: Achal-Aggarwal <theachalaggarwal@gmail.com>
Date: Wed, 12 Mar 2014 02:42:04 +0530
Subject: [PATCH 10/11] Updating tokeniser tests.

---
test/data/tokeniser2/INDEX | 2 +-
test/data/tokeniser2/contentModelFlags.test | 50 +-
test/data/tokeniser2/entities.test | 2122 +-----------------------
test/data/tokeniser2/escapeFlag.test | 24 +-
test/data/tokeniser2/numericEntities.test | 190 +--
test/data/tokeniser2/test1.test | 28 +-
test/data/tokeniser2/test2.test | 26 +-
test/data/tokeniser2/test3.test | 2366 ++++++++++++++++++++++-----
test/data/tokeniser2/test4.test | 87 +-
test/data/tokeniser2/unicodeChars.test | 8 -
10 files changed, 2231 insertions(+), 2672 deletions(-)

diff --git a/test/data/tokeniser2/INDEX b/test/data/tokeniser2/INDEX
index f16feb4..6fee018 100644
--- a/test/data/tokeniser2/INDEX
+++ b/test/data/tokeniser2/INDEX
@@ -10,6 +10,6 @@ contentModelFlags.test html5lib content model tests
entities.test html5lib entity tests
escapeFlag.test html5lib escape flag tests
numericEntities.test html5lib numeric entities tests
-#unicodeChars.test html5lib unicode character tests
+unicodeChars.test html5lib unicode character tests
cdata.test CDATA section tests
regression.test Regression tests
diff --git a/test/data/tokeniser2/contentModelFlags.test b/test/data/tokeniser2/contentModelFlags.test
index 1dec3e8..a8b1695 100644
--- a/test/data/tokeniser2/contentModelFlags.test
+++ b/test/data/tokeniser2/contentModelFlags.test
@@ -1,73 +1,73 @@
{"tests": [

{"description":"PLAINTEXT content model flag",
-"contentModelFlags":["PLAINTEXT"],
+"initialStates":["PLAINTEXT state"],
"lastStartTag":"plaintext",
"input":"<head>&body;",
"output":[["Character", "<head>&body;"]]},

-{"description":"End tag closing RCDATA or CDATA",
-"contentModelFlags":["RCDATA", "CDATA"],
+{"description":"End tag closing RCDATA or RAWTEXT",
+"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"foo</xmp>",
"output":[["Character", "foo"], ["EndTag", "xmp"]]},

-{"description":"End tag closing RCDATA or CDATA (case-insensitivity)",
-"contentModelFlags":["RCDATA", "CDATA"],
+{"description":"End tag closing RCDATA or RAWTEXT (case-insensitivity)",
+"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"foo</xMp>",
"output":[["Character", "foo"], ["EndTag", "xmp"]]},

-{"description":"End tag closing RCDATA or CDATA (ending with space)",
-"contentModelFlags":["RCDATA", "CDATA"],
+{"description":"End tag closing RCDATA or RAWTEXT (ending with space)",
+"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"foo</xmp ",
-"output":[["Character", "foo"], "ParseError", ["EndTag", "xmp"]]},
+"output":[["Character", "foo"], "ParseError"]},

-{"description":"End tag closing RCDATA or CDATA (ending with EOF)",
-"contentModelFlags":["RCDATA", "CDATA"],
+{"description":"End tag closing RCDATA or RAWTEXT (ending with EOF)",
+"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"foo</xmp",
-"output":[["Character", "foo"], "ParseError", ["EndTag", "xmp"]]},
+"output":[["Character", "foo</xmp"]]},

-{"description":"End tag closing RCDATA or CDATA (ending with slash)",
-"contentModelFlags":["RCDATA", "CDATA"],
+{"description":"End tag closing RCDATA or RAWTEXT (ending with slash)",
+"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"foo</xmp/",
-"output":[["Character", "foo"], "ParseError", ["EndTag", "xmp"]]},
+"output":[["Character", "foo"], "ParseError"]},

-{"description":"End tag not closing RCDATA or CDATA (ending with left-angle-bracket)",
-"contentModelFlags":["RCDATA", "CDATA"],
+{"description":"End tag not closing RCDATA or RAWTEXT (ending with left-angle-bracket)",
+"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"foo</xmp<",
"output":[["Character", "foo</xmp<"]]},

-{"description":"End tag with incorrect name in RCDATA or CDATA",
-"contentModelFlags":["RCDATA", "CDATA"],
+{"description":"End tag with incorrect name in RCDATA or RAWTEXT",
+"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"</foo>bar</xmp>",
"output":[["Character", "</foo>bar"], ["EndTag", "xmp"]]},

-{"description":"End tag with incorrect name in RCDATA or CDATA (starting like correct name)",
-"contentModelFlags":["RCDATA", "CDATA"],
+{"description":"End tag with incorrect name in RCDATA or RAWTEXT (starting like correct name)",
+"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"</foo>bar</xmpaar>",
"output":[["Character", "</foo>bar</xmpaar>"]]},

-{"description":"End tag closing RCDATA or CDATA, switching back to PCDATA",
-"contentModelFlags":["RCDATA", "CDATA"],
+{"description":"End tag closing RCDATA or RAWTEXT, switching back to PCDATA",
+"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"foo</xmp></baz>",
"output":[["Character", "foo"], ["EndTag", "xmp"], ["EndTag", "baz"]]},

-{"description":"CDATA w/ something looking like an entity",
-"contentModelFlags":["CDATA"],
+{"description":"RAWTEXT w/ something looking like an entity",
+"initialStates":["RAWTEXT state"],
"lastStartTag":"xmp",
"input":"&foo;",
"output":[["Character", "&foo;"]]},

{"description":"RCDATA w/ an entity",
-"contentModelFlags":["RCDATA"],
+"initialStates":["RCDATA state"],
"lastStartTag":"textarea",
"input":"&lt;",
"output":[["Character", "<"]]}
diff --git a/test/data/tokeniser2/entities.test b/test/data/tokeniser2/entities.test
index 8b8d352..27b85a1 100644
--- a/test/data/tokeniser2/entities.test
+++ b/test/data/tokeniser2/entities.test
@@ -2,2091 +2,19 @@

{"description": "Undefined named entity in attribute value ending in semicolon and whose name starts with a known entity name.",
"input":"<h a='&noti;'>",
-"output": ["ParseError", ["StartTag", "h", {"a": "&noti;"}]]},
+"output": [["StartTag", "h", {"a": "&noti;"}]]},

-{"description": "Named entity: AElig with a semi-colon.",
-"input":"&AElig;",
-"output": [["Character", "\u00C6"]]},
-
-{"description": "Named entity: AElig without a semi-colon.",
-"input":"&AElig",
-"output": ["ParseError", ["Character", "\u00C6"]]},
-
-{"description": "Named entity: AMP with a semi-colon.",
-"input":"&AMP;",
-"output": [["Character", "\u0026"]]},
-
-{"description": "Named entity: AMP without a semi-colon.",
-"input":"&AMP",
-"output": ["ParseError", ["Character", "\u0026"]]},
-
-{"description": "Named entity: Aacute with a semi-colon.",
-"input":"&Aacute;",
-"output": [["Character", "\u00C1"]]},
-
-{"description": "Named entity: Aacute without a semi-colon.",
-"input":"&Aacute",
-"output": ["ParseError", ["Character", "\u00C1"]]},
-
-{"description": "Named entity: Acirc with a semi-colon.",
-"input":"&Acirc;",
-"output": [["Character", "\u00C2"]]},
-
-{"description": "Named entity: Acirc without a semi-colon.",
-"input":"&Acirc",
-"output": ["ParseError", ["Character", "\u00C2"]]},
-
-{"description": "Named entity: Agrave with a semi-colon.",
-"input":"&Agrave;",
-"output": [["Character", "\u00C0"]]},
-
-{"description": "Named entity: Agrave without a semi-colon.",
-"input":"&Agrave",
-"output": ["ParseError", ["Character", "\u00C0"]]},
-
-{"description": "Named entity: Alpha with a semi-colon.",
-"input":"&Alpha;",
-"output": [["Character", "\u0391"]]},
-
-{"description": "Named entity: Aring with a semi-colon.",
-"input":"&Aring;",
-"output": [["Character", "\u00C5"]]},
-
-{"description": "Named entity: Aring without a semi-colon.",
-"input":"&Aring",
-"output": ["ParseError", ["Character", "\u00C5"]]},
-
-{"description": "Named entity: Atilde with a semi-colon.",
-"input":"&Atilde;",
-"output": [["Character", "\u00C3"]]},
-
-{"description": "Named entity: Atilde without a semi-colon.",
-"input":"&Atilde",
-"output": ["ParseError", ["Character", "\u00C3"]]},
-
-{"description": "Named entity: Auml with a semi-colon.",
-"input":"&Auml;",
-"output": [["Character", "\u00C4"]]},
-
-{"description": "Named entity: Auml without a semi-colon.",
-"input":"&Auml",
-"output": ["ParseError", ["Character", "\u00C4"]]},
-
-{"description": "Named entity: Beta with a semi-colon.",
-"input":"&Beta;",
-"output": [["Character", "\u0392"]]},
-
-{"description": "Named entity: COPY with a semi-colon.",
-"input":"&COPY;",
-"output": [["Character", "\u00A9"]]},
-
-{"description": "Named entity: COPY without a semi-colon.",
-"input":"&COPY",
-"output": ["ParseError", ["Character", "\u00A9"]]},
-
-{"description": "Named entity: Ccedil with a semi-colon.",
-"input":"&Ccedil;",
-"output": [["Character", "\u00C7"]]},
-
-{"description": "Named entity: Ccedil without a semi-colon.",
-"input":"&Ccedil",
-"output": ["ParseError", ["Character", "\u00C7"]]},
-
-{"description": "Named entity: Chi with a semi-colon.",
-"input":"&Chi;",
-"output": [["Character", "\u03A7"]]},
-
-{"description": "Named entity: Dagger with a semi-colon.",
-"input":"&Dagger;",
-"output": [["Character", "\u2021"]]},
-
-{"description": "Named entity: Delta with a semi-colon.",
-"input":"&Delta;",
-"output": [["Character", "\u0394"]]},
-
-{"description": "Named entity: ETH with a semi-colon.",
-"input":"&ETH;",
-"output": [["Character", "\u00D0"]]},
-
-{"description": "Named entity: ETH without a semi-colon.",
-"input":"&ETH",
-"output": ["ParseError", ["Character", "\u00D0"]]},
-
-{"description": "Named entity: Eacute with a semi-colon.",
-"input":"&Eacute;",
-"output": [["Character", "\u00C9"]]},
-
-{"description": "Named entity: Eacute without a semi-colon.",
-"input":"&Eacute",
-"output": ["ParseError", ["Character", "\u00C9"]]},
-
-{"description": "Named entity: Ecirc with a semi-colon.",
-"input":"&Ecirc;",
-"output": [["Character", "\u00CA"]]},
-
-{"description": "Named entity: Ecirc without a semi-colon.",
-"input":"&Ecirc",
-"output": ["ParseError", ["Character", "\u00CA"]]},
-
-{"description": "Named entity: Egrave with a semi-colon.",
-"input":"&Egrave;",
-"output": [["Character", "\u00C8"]]},
-
-{"description": "Named entity: Egrave without a semi-colon.",
-"input":"&Egrave",
-"output": ["ParseError", ["Character", "\u00C8"]]},
-
-{"description": "Named entity: Epsilon with a semi-colon.",
-"input":"&Epsilon;",
-"output": [["Character", "\u0395"]]},
-
-{"description": "Named entity: Eta with a semi-colon.",
-"input":"&Eta;",
-"output": [["Character", "\u0397"]]},
-
-{"description": "Named entity: Euml with a semi-colon.",
-"input":"&Euml;",
-"output": [["Character", "\u00CB"]]},
-
-{"description": "Named entity: Euml without a semi-colon.",
-"input":"&Euml",
-"output": ["ParseError", ["Character", "\u00CB"]]},
-
-{"description": "Named entity: GT with a semi-colon.",
-"input":"&GT;",
-"output": [["Character", "\u003E"]]},
-
-{"description": "Named entity: GT without a semi-colon.",
-"input":"&GT",
-"output": ["ParseError", ["Character", "\u003E"]]},
-
-{"description": "Named entity: Gamma with a semi-colon.",
-"input":"&Gamma;",
-"output": [["Character", "\u0393"]]},
-
-{"description": "Named entity: Iacute with a semi-colon.",
-"input":"&Iacute;",
-"output": [["Character", "\u00CD"]]},
-
-{"description": "Named entity: Iacute without a semi-colon.",
-"input":"&Iacute",
-"output": ["ParseError", ["Character", "\u00CD"]]},
-
-{"description": "Named entity: Icirc with a semi-colon.",
-"input":"&Icirc;",
-"output": [["Character", "\u00CE"]]},
-
-{"description": "Named entity: Icirc without a semi-colon.",
-"input":"&Icirc",
-"output": ["ParseError", ["Character", "\u00CE"]]},
-
-{"description": "Named entity: Igrave with a semi-colon.",
-"input":"&Igrave;",
-"output": [["Character", "\u00CC"]]},
-
-{"description": "Named entity: Igrave without a semi-colon.",
-"input":"&Igrave",
-"output": ["ParseError", ["Character", "\u00CC"]]},
-
-{"description": "Named entity: Iota with a semi-colon.",
-"input":"&Iota;",
-"output": [["Character", "\u0399"]]},
-
-{"description": "Named entity: Iuml with a semi-colon.",
-"input":"&Iuml;",
-"output": [["Character", "\u00CF"]]},
-
-{"description": "Named entity: Iuml without a semi-colon.",
-"input":"&Iuml",
-"output": ["ParseError", ["Character", "\u00CF"]]},
-
-{"description": "Named entity: Kappa with a semi-colon.",
-"input":"&Kappa;",
-"output": [["Character", "\u039A"]]},
-
-{"description": "Named entity: LT with a semi-colon.",
-"input":"&LT;",
-"output": [["Character", "\u003C"]]},
-
-{"description": "Named entity: LT without a semi-colon.",
-"input":"&LT",
-"output": ["ParseError", ["Character", "\u003C"]]},
-
-{"description": "Named entity: Lambda with a semi-colon.",
-"input":"&Lambda;",
-"output": [["Character", "\u039B"]]},
-
-{"description": "Named entity: Mu with a semi-colon.",
-"input":"&Mu;",
-"output": [["Character", "\u039C"]]},
-
-{"description": "Named entity: Ntilde with a semi-colon.",
-"input":"&Ntilde;",
-"output": [["Character", "\u00D1"]]},
-
-{"description": "Named entity: Ntilde without a semi-colon.",
-"input":"&Ntilde",
-"output": ["ParseError", ["Character", "\u00D1"]]},
-
-{"description": "Named entity: Nu with a semi-colon.",
-"input":"&Nu;",
-"output": [["Character", "\u039D"]]},
-
-{"description": "Named entity: OElig with a semi-colon.",
-"input":"&OElig;",
-"output": [["Character", "\u0152"]]},
-
-{"description": "Named entity: Oacute with a semi-colon.",
-"input":"&Oacute;",
-"output": [["Character", "\u00D3"]]},
-
-{"description": "Named entity: Oacute without a semi-colon.",
-"input":"&Oacute",
-"output": ["ParseError", ["Character", "\u00D3"]]},
-
-{"description": "Named entity: Ocirc with a semi-colon.",
-"input":"&Ocirc;",
-"output": [["Character", "\u00D4"]]},
-
-{"description": "Named entity: Ocirc without a semi-colon.",
-"input":"&Ocirc",
-"output": ["ParseError", ["Character", "\u00D4"]]},
-
-{"description": "Named entity: Ograve with a semi-colon.",
-"input":"&Ograve;",
-"output": [["Character", "\u00D2"]]},
-
-{"description": "Named entity: Ograve without a semi-colon.",
-"input":"&Ograve",
-"output": ["ParseError", ["Character", "\u00D2"]]},
-
-{"description": "Named entity: Omega with a semi-colon.",
-"input":"&Omega;",
-"output": [["Character", "\u03A9"]]},
-
-{"description": "Named entity: Omicron with a semi-colon.",
-"input":"&Omicron;",
-"output": [["Character", "\u039F"]]},
-
-{"description": "Named entity: Oslash with a semi-colon.",
-"input":"&Oslash;",
-"output": [["Character", "\u00D8"]]},
-
-{"description": "Named entity: Oslash without a semi-colon.",
-"input":"&Oslash",
-"output": ["ParseError", ["Character", "\u00D8"]]},
-
-{"description": "Named entity: Otilde with a semi-colon.",
-"input":"&Otilde;",
-"output": [["Character", "\u00D5"]]},
-
-{"description": "Named entity: Otilde without a semi-colon.",
-"input":"&Otilde",
-"output": ["ParseError", ["Character", "\u00D5"]]},
-
-{"description": "Named entity: Ouml with a semi-colon.",
-"input":"&Ouml;",
-"output": [["Character", "\u00D6"]]},
-
-{"description": "Named entity: Ouml without a semi-colon.",
-"input":"&Ouml",
-"output": ["ParseError", ["Character", "\u00D6"]]},
-
-{"description": "Named entity: Phi with a semi-colon.",
-"input":"&Phi;",
-"output": [["Character", "\u03A6"]]},
-
-{"description": "Named entity: Pi with a semi-colon.",
-"input":"&Pi;",
-"output": [["Character", "\u03A0"]]},
-
-{"description": "Named entity: Prime with a semi-colon.",
-"input":"&Prime;",
-"output": [["Character", "\u2033"]]},
-
-{"description": "Named entity: Psi with a semi-colon.",
-"input":"&Psi;",
-"output": [["Character", "\u03A8"]]},
-
-{"description": "Named entity: QUOT with a semi-colon.",
-"input":"&QUOT;",
-"output": [["Character", "\u0022"]]},
-
-{"description": "Named entity: QUOT without a semi-colon.",
-"input":"&QUOT",
-"output": ["ParseError", ["Character", "\u0022"]]},
-
-{"description": "Named entity: REG with a semi-colon.",
-"input":"&REG;",
-"output": [["Character", "\u00AE"]]},
-
-{"description": "Named entity: REG without a semi-colon.",
-"input":"&REG",
-"output": ["ParseError", ["Character", "\u00AE"]]},
-
-{"description": "Named entity: Rho with a semi-colon.",
-"input":"&Rho;",
-"output": [["Character", "\u03A1"]]},
-
-{"description": "Named entity: Scaron with a semi-colon.",
-"input":"&Scaron;",
-"output": [["Character", "\u0160"]]},
-
-{"description": "Named entity: Sigma with a semi-colon.",
-"input":"&Sigma;",
-"output": [["Character", "\u03A3"]]},
-
-{"description": "Named entity: THORN with a semi-colon.",
-"input":"&THORN;",
-"output": [["Character", "\u00DE"]]},
-
-{"description": "Named entity: THORN without a semi-colon.",
-"input":"&THORN",
-"output": ["ParseError", ["Character", "\u00DE"]]},
-
-{"description": "Named entity: TRADE with a semi-colon.",
-"input":"&TRADE;",
-"output": [["Character", "\u2122"]]},
-
-{"description": "Named entity: Tau with a semi-colon.",
-"input":"&Tau;",
-"output": [["Character", "\u03A4"]]},
-
-{"description": "Named entity: Theta with a semi-colon.",
-"input":"&Theta;",
-"output": [["Character", "\u0398"]]},
-
-{"description": "Named entity: Uacute with a semi-colon.",
-"input":"&Uacute;",
-"output": [["Character", "\u00DA"]]},
-
-{"description": "Named entity: Uacute without a semi-colon.",
-"input":"&Uacute",
-"output": ["ParseError", ["Character", "\u00DA"]]},
-
-{"description": "Named entity: Ucirc with a semi-colon.",
-"input":"&Ucirc;",
-"output": [["Character", "\u00DB"]]},
-
-{"description": "Named entity: Ucirc without a semi-colon.",
-"input":"&Ucirc",
-"output": ["ParseError", ["Character", "\u00DB"]]},
-
-{"description": "Named entity: Ugrave with a semi-colon.",
-"input":"&Ugrave;",
-"output": [["Character", "\u00D9"]]},
-
-{"description": "Named entity: Ugrave without a semi-colon.",
-"input":"&Ugrave",
-"output": ["ParseError", ["Character", "\u00D9"]]},
-
-{"description": "Named entity: Upsilon with a semi-colon.",
-"input":"&Upsilon;",
-"output": [["Character", "\u03A5"]]},
-
-{"description": "Named entity: Uuml with a semi-colon.",
-"input":"&Uuml;",
-"output": [["Character", "\u00DC"]]},
-
-{"description": "Named entity: Uuml without a semi-colon.",
-"input":"&Uuml",
-"output": ["ParseError", ["Character", "\u00DC"]]},
-
-{"description": "Named entity: Xi with a semi-colon.",
-"input":"&Xi;",
-"output": [["Character", "\u039E"]]},
-
-{"description": "Named entity: Yacute with a semi-colon.",
-"input":"&Yacute;",
-"output": [["Character", "\u00DD"]]},
-
-{"description": "Named entity: Yacute without a semi-colon.",
-"input":"&Yacute",
-"output": ["ParseError", ["Character", "\u00DD"]]},
-
-{"description": "Named entity: Yuml with a semi-colon.",
-"input":"&Yuml;",
-"output": [["Character", "\u0178"]]},
-
-{"description": "Named entity: Zeta with a semi-colon.",
-"input":"&Zeta;",
-"output": [["Character", "\u0396"]]},
-
-{"description": "Named entity: aacute with a semi-colon.",
-"input":"&aacute;",
-"output": [["Character", "\u00E1"]]},
-
-{"description": "Named entity: aacute without a semi-colon.",
-"input":"&aacute",
-"output": ["ParseError", ["Character", "\u00E1"]]},
-
-{"description": "Named entity: acirc with a semi-colon.",
-"input":"&acirc;",
-"output": [["Character", "\u00E2"]]},
-
-{"description": "Named entity: acirc without a semi-colon.",
-"input":"&acirc",
-"output": ["ParseError", ["Character", "\u00E2"]]},
-
-{"description": "Named entity: acute with a semi-colon.",
-"input":"&acute;",
-"output": [["Character", "\u00B4"]]},
-
-{"description": "Named entity: acute without a semi-colon.",
-"input":"&acute",
-"output": ["ParseError", ["Character", "\u00B4"]]},
-
-{"description": "Named entity: aelig with a semi-colon.",
-"input":"&aelig;",
-"output": [["Character", "\u00E6"]]},
-
-{"description": "Named entity: aelig without a semi-colon.",
-"input":"&aelig",
-"output": ["ParseError", ["Character", "\u00E6"]]},
-
-{"description": "Named entity: agrave with a semi-colon.",
-"input":"&agrave;",
-"output": [["Character", "\u00E0"]]},
-
-{"description": "Named entity: agrave without a semi-colon.",
-"input":"&agrave",
-"output": ["ParseError", ["Character", "\u00E0"]]},
-
-{"description": "Named entity: alefsym with a semi-colon.",
-"input":"&alefsym;",
-"output": [["Character", "\u2135"]]},
-
-{"description": "Named entity: alpha with a semi-colon.",
-"input":"&alpha;",
-"output": [["Character", "\u03B1"]]},
-
-{"description": "Named entity: amp with a semi-colon.",
-"input":"&amp;",
-"output": [["Character", "\u0026"]]},
-
-{"description": "Named entity: amp without a semi-colon.",
-"input":"&amp",
-"output": ["ParseError", ["Character", "\u0026"]]},
-
-{"description": "Named entity: and with a semi-colon.",
-"input":"&and;",
-"output": [["Character", "\u2227"]]},
-
-{"description": "Named entity: ang with a semi-colon.",
-"input":"&ang;",
-"output": [["Character", "\u2220"]]},
-
-{"description": "Named entity: apos with a semi-colon.",
-"input":"&apos;",
-"output": [["Character", "\u0027"]]},
-
-{"description": "Named entity: aring with a semi-colon.",
-"input":"&aring;",
-"output": [["Character", "\u00E5"]]},
-
-{"description": "Named entity: aring without a semi-colon.",
-"input":"&aring",
-"output": ["ParseError", ["Character", "\u00E5"]]},
-
-{"description": "Named entity: asymp with a semi-colon.",
-"input":"&asymp;",
-"output": [["Character", "\u2248"]]},
-
-{"description": "Named entity: atilde with a semi-colon.",
-"input":"&atilde;",
-"output": [["Character", "\u00E3"]]},
-
-{"description": "Named entity: atilde without a semi-colon.",
-"input":"&atilde",
-"output": ["ParseError", ["Character", "\u00E3"]]},
-
-{"description": "Named entity: auml with a semi-colon.",
-"input":"&auml;",
-"output": [["Character", "\u00E4"]]},
-
-{"description": "Named entity: auml without a semi-colon.",
-"input":"&auml",
-"output": ["ParseError", ["Character", "\u00E4"]]},
-
-{"description": "Named entity: bdquo with a semi-colon.",
-"input":"&bdquo;",
-"output": [["Character", "\u201E"]]},
-
-{"description": "Named entity: beta with a semi-colon.",
-"input":"&beta;",
-"output": [["Character", "\u03B2"]]},
-
-{"description": "Named entity: brvbar with a semi-colon.",
-"input":"&brvbar;",
-"output": [["Character", "\u00A6"]]},
-
-{"description": "Named entity: brvbar without a semi-colon.",
-"input":"&brvbar",
-"output": ["ParseError", ["Character", "\u00A6"]]},
-
-{"description": "Named entity: bull with a semi-colon.",
-"input":"&bull;",
-"output": [["Character", "\u2022"]]},
-
-{"description": "Named entity: cap with a semi-colon.",
-"input":"&cap;",
-"output": [["Character", "\u2229"]]},
-
-{"description": "Named entity: ccedil with a semi-colon.",
-"input":"&ccedil;",
-"output": [["Character", "\u00E7"]]},
-
-{"description": "Named entity: ccedil without a semi-colon.",
-"input":"&ccedil",
-"output": ["ParseError", ["Character", "\u00E7"]]},
-
-{"description": "Named entity: cedil with a semi-colon.",
-"input":"&cedil;",
-"output": [["Character", "\u00B8"]]},
-
-{"description": "Named entity: cedil without a semi-colon.",
-"input":"&cedil",
-"output": ["ParseError", ["Character", "\u00B8"]]},
-
-{"description": "Named entity: cent with a semi-colon.",
-"input":"&cent;",
-"output": [["Character", "\u00A2"]]},
-
-{"description": "Named entity: cent without a semi-colon.",
-"input":"&cent",
-"output": ["ParseError", ["Character", "\u00A2"]]},
-
-{"description": "Named entity: chi with a semi-colon.",
-"input":"&chi;",
-"output": [["Character", "\u03C7"]]},
-
-{"description": "Named entity: circ with a semi-colon.",
-"input":"&circ;",
-"output": [["Character", "\u02C6"]]},
-
-{"description": "Named entity: clubs with a semi-colon.",
-"input":"&clubs;",
-"output": [["Character", "\u2663"]]},
-
-{"description": "Named entity: cong with a semi-colon.",
-"input":"&cong;",
-"output": [["Character", "\u2245"]]},
-
-{"description": "Named entity: copy with a semi-colon.",
-"input":"&copy;",
-"output": [["Character", "\u00A9"]]},
-
-{"description": "Named entity: copy without a semi-colon.",
-"input":"&copy",
-"output": ["ParseError", ["Character", "\u00A9"]]},
-
-{"description": "Named entity: crarr with a semi-colon.",
-"input":"&crarr;",
-"output": [["Character", "\u21B5"]]},
-
-{"description": "Named entity: cup with a semi-colon.",
-"input":"&cup;",
-"output": [["Character", "\u222A"]]},
-
-{"description": "Named entity: curren with a semi-colon.",
-"input":"&curren;",
-"output": [["Character", "\u00A4"]]},
-
-{"description": "Named entity: curren without a semi-colon.",
-"input":"&curren",
-"output": ["ParseError", ["Character", "\u00A4"]]},
-
-{"description": "Named entity: dArr with a semi-colon.",
-"input":"&dArr;",
-"output": [["Character", "\u21D3"]]},
-
-{"description": "Named entity: dagger with a semi-colon.",
-"input":"&dagger;",
-"output": [["Character", "\u2020"]]},
-
-{"description": "Named entity: darr with a semi-colon.",
-"input":"&darr;",
-"output": [["Character", "\u2193"]]},
-
-{"description": "Named entity: deg with a semi-colon.",
-"input":"&deg;",
-"output": [["Character", "\u00B0"]]},
-
-{"description": "Named entity: deg without a semi-colon.",
-"input":"&deg",
-"output": ["ParseError", ["Character", "\u00B0"]]},
-
-{"description": "Named entity: delta with a semi-colon.",
-"input":"&delta;",
-"output": [["Character", "\u03B4"]]},
-
-{"description": "Named entity: diams with a semi-colon.",
-"input":"&diams;",
-"output": [["Character", "\u2666"]]},
-
-{"description": "Named entity: divide with a semi-colon.",
-"input":"&divide;",
-"output": [["Character", "\u00F7"]]},
-
-{"description": "Named entity: divide without a semi-colon.",
-"input":"&divide",
-"output": ["ParseError", ["Character", "\u00F7"]]},
-
-{"description": "Named entity: eacute with a semi-colon.",
-"input":"&eacute;",
-"output": [["Character", "\u00E9"]]},
-
-{"description": "Named entity: eacute without a semi-colon.",
-"input":"&eacute",
-"output": ["ParseError", ["Character", "\u00E9"]]},
-
-{"description": "Named entity: ecirc with a semi-colon.",
-"input":"&ecirc;",
-"output": [["Character", "\u00EA"]]},
-
-{"description": "Named entity: ecirc without a semi-colon.",
-"input":"&ecirc",
-"output": ["ParseError", ["Character", "\u00EA"]]},
-
-{"description": "Named entity: egrave with a semi-colon.",
-"input":"&egrave;",
-"output": [["Character", "\u00E8"]]},
-
-{"description": "Named entity: egrave without a semi-colon.",
-"input":"&egrave",
-"output": ["ParseError", ["Character", "\u00E8"]]},
-
-{"description": "Named entity: empty with a semi-colon.",
-"input":"&empty;",
-"output": [["Character", "\u2205"]]},
-
-{"description": "Named entity: emsp with a semi-colon.",
-"input":"&emsp;",
-"output": [["Character", "\u2003"]]},
-
-{"description": "Named entity: ensp with a semi-colon.",
-"input":"&ensp;",
-"output": [["Character", "\u2002"]]},
-
-{"description": "Named entity: epsilon with a semi-colon.",
-"input":"&epsilon;",
-"output": [["Character", "\u03B5"]]},
-
-{"description": "Named entity: equiv with a semi-colon.",
-"input":"&equiv;",
-"output": [["Character", "\u2261"]]},
-
-{"description": "Named entity: eta with a semi-colon.",
-"input":"&eta;",
-"output": [["Character", "\u03B7"]]},
-
-{"description": "Named entity: eth with a semi-colon.",
-"input":"&eth;",
-"output": [["Character", "\u00F0"]]},
-
-{"description": "Named entity: eth without a semi-colon.",
-"input":"&eth",
-"output": ["ParseError", ["Character", "\u00F0"]]},
-
-{"description": "Named entity: euml with a semi-colon.",
-"input":"&euml;",
-"output": [["Character", "\u00EB"]]},
-
-{"description": "Named entity: euml without a semi-colon.",
-"input":"&euml",
-"output": ["ParseError", ["Character", "\u00EB"]]},
-
-{"description": "Named entity: euro with a semi-colon.",
-"input":"&euro;",
-"output": [["Character", "\u20AC"]]},
-
-{"description": "Named entity: exist with a semi-colon.",
-"input":"&exist;",
-"output": [["Character", "\u2203"]]},
-
-{"description": "Named entity: fnof with a semi-colon.",
-"input":"&fnof;",
-"output": [["Character", "\u0192"]]},
-
-{"description": "Named entity: forall with a semi-colon.",
-"input":"&forall;",
-"output": [["Character", "\u2200"]]},
-
-{"description": "Named entity: frac12 with a semi-colon.",
-"input":"&frac12;",
-"output": [["Character", "\u00BD"]]},
-
-{"description": "Named entity: frac12 without a semi-colon.",
-"input":"&frac12",
-"output": ["ParseError", ["Character", "\u00BD"]]},
-
-{"description": "Named entity: frac14 with a semi-colon.",
-"input":"&frac14;",
-"output": [["Character", "\u00BC"]]},
-
-{"description": "Named entity: frac14 without a semi-colon.",
-"input":"&frac14",
-"output": ["ParseError", ["Character", "\u00BC"]]},
-
-{"description": "Named entity: frac34 with a semi-colon.",
-"input":"&frac34;",
-"output": [["Character", "\u00BE"]]},
-
-{"description": "Named entity: frac34 without a semi-colon.",
-"input":"&frac34",
-"output": ["ParseError", ["Character", "\u00BE"]]},
-
-{"description": "Named entity: frasl with a semi-colon.",
-"input":"&frasl;",
-"output": [["Character", "\u2044"]]},
-
-{"description": "Named entity: gamma with a semi-colon.",
-"input":"&gamma;",
-"output": [["Character", "\u03B3"]]},
-
-{"description": "Named entity: ge with a semi-colon.",
-"input":"&ge;",
-"output": [["Character", "\u2265"]]},
-
-{"description": "Named entity: gt with a semi-colon.",
-"input":"&gt;",
-"output": [["Character", "\u003E"]]},
-
-{"description": "Named entity: gt without a semi-colon.",
-"input":"&gt",
-"output": ["ParseError", ["Character", "\u003E"]]},
-
-{"description": "Named entity: hArr with a semi-colon.",
-"input":"&hArr;",
-"output": [["Character", "\u21D4"]]},
-
-{"description": "Named entity: harr with a semi-colon.",
-"input":"&harr;",
-"output": [["Character", "\u2194"]]},
-
-{"description": "Named entity: hearts with a semi-colon.",
-"input":"&hearts;",
-"output": [["Character", "\u2665"]]},
-
-{"description": "Named entity: hellip with a semi-colon.",
-"input":"&hellip;",
-"output": [["Character", "\u2026"]]},
-
-{"description": "Named entity: iacute with a semi-colon.",
-"input":"&iacute;",
-"output": [["Character", "\u00ED"]]},
-
-{"description": "Named entity: iacute without a semi-colon.",
-"input":"&iacute",
-"output": ["ParseError", ["Character", "\u00ED"]]},
-
-{"description": "Named entity: icirc with a semi-colon.",
-"input":"&icirc;",
-"output": [["Character", "\u00EE"]]},
-
-{"description": "Named entity: icirc without a semi-colon.",
-"input":"&icirc",
-"output": ["ParseError", ["Character", "\u00EE"]]},
-
-{"description": "Named entity: iexcl with a semi-colon.",
-"input":"&iexcl;",
-"output": [["Character", "\u00A1"]]},
-
-{"description": "Named entity: iexcl without a semi-colon.",
-"input":"&iexcl",
-"output": ["ParseError", ["Character", "\u00A1"]]},
-
-{"description": "Named entity: igrave with a semi-colon.",
-"input":"&igrave;",
-"output": [["Character", "\u00EC"]]},
-
-{"description": "Named entity: igrave without a semi-colon.",
-"input":"&igrave",
-"output": ["ParseError", ["Character", "\u00EC"]]},
-
-{"description": "Named entity: image with a semi-colon.",
-"input":"&image;",
-"output": [["Character", "\u2111"]]},
-
-{"description": "Named entity: infin with a semi-colon.",
-"input":"&infin;",
-"output": [["Character", "\u221E"]]},
-
-{"description": "Named entity: int with a semi-colon.",
-"input":"&int;",
-"output": [["Character", "\u222B"]]},
-
-{"description": "Named entity: iota with a semi-colon.",
-"input":"&iota;",
-"output": [["Character", "\u03B9"]]},
-
-{"description": "Named entity: iquest with a semi-colon.",
-"input":"&iquest;",
-"output": [["Character", "\u00BF"]]},
-
-{"description": "Named entity: iquest without a semi-colon.",
-"input":"&iquest",
-"output": ["ParseError", ["Character", "\u00BF"]]},
-
-{"description": "Named entity: isin with a semi-colon.",
-"input":"&isin;",
-"output": [["Character", "\u2208"]]},
-
-{"description": "Named entity: iuml with a semi-colon.",
-"input":"&iuml;",
-"output": [["Character", "\u00EF"]]},
-
-{"description": "Named entity: iuml without a semi-colon.",
-"input":"&iuml",
-"output": ["ParseError", ["Character", "\u00EF"]]},
-
-{"description": "Named entity: kappa with a semi-colon.",
-"input":"&kappa;",
-"output": [["Character", "\u03BA"]]},
-
-{"description": "Named entity: lArr with a semi-colon.",
-"input":"&lArr;",
-"output": [["Character", "\u21D0"]]},
-
-{"description": "Named entity: lambda with a semi-colon.",
-"input":"&lambda;",
-"output": [["Character", "\u03BB"]]},
-
-{"description": "Named entity: lang with a semi-colon.",
-"input":"&lang;",
-"output": [["Character", "\u27E8"]]},
-
-{"description": "Named entity: laquo with a semi-colon.",
-"input":"&laquo;",
-"output": [["Character", "\u00AB"]]},
-
-{"description": "Named entity: laquo without a semi-colon.",
-"input":"&laquo",
-"output": ["ParseError", ["Character", "\u00AB"]]},
-
-{"description": "Named entity: larr with a semi-colon.",
-"input":"&larr;",
-"output": [["Character", "\u2190"]]},
-
-{"description": "Named entity: lceil with a semi-colon.",
-"input":"&lceil;",
-"output": [["Character", "\u2308"]]},
-
-{"description": "Named entity: ldquo with a semi-colon.",
-"input":"&ldquo;",
-"output": [["Character", "\u201C"]]},
-
-{"description": "Named entity: le with a semi-colon.",
-"input":"&le;",
-"output": [["Character", "\u2264"]]},
-
-{"description": "Named entity: lfloor with a semi-colon.",
-"input":"&lfloor;",
-"output": [["Character", "\u230A"]]},
-
-{"description": "Named entity: lowast with a semi-colon.",
-"input":"&lowast;",
-"output": [["Character", "\u2217"]]},
-
-{"description": "Named entity: loz with a semi-colon.",
-"input":"&loz;",
-"output": [["Character", "\u25CA"]]},
-
-{"description": "Named entity: lrm with a semi-colon.",
-"input":"&lrm;",
-"output": [["Character", "\u200E"]]},
-
-{"description": "Named entity: lsaquo with a semi-colon.",
-"input":"&lsaquo;",
-"output": [["Character", "\u2039"]]},
-
-{"description": "Named entity: lsquo with a semi-colon.",
-"input":"&lsquo;",
-"output": [["Character", "\u2018"]]},
-
-{"description": "Named entity: lt with a semi-colon.",
-"input":"&lt;",
-"output": [["Character", "\u003C"]]},
-
-{"description": "Named entity: lt without a semi-colon.",
-"input":"&lt",
-"output": ["ParseError", ["Character", "\u003C"]]},
-
-{"description": "Named entity: macr with a semi-colon.",
-"input":"&macr;",
-"output": [["Character", "\u00AF"]]},
-
-{"description": "Named entity: macr without a semi-colon.",
-"input":"&macr",
-"output": ["ParseError", ["Character", "\u00AF"]]},
-
-{"description": "Named entity: mdash with a semi-colon.",
-"input":"&mdash;",
-"output": [["Character", "\u2014"]]},
-
-{"description": "Named entity: micro with a semi-colon.",
-"input":"&micro;",
-"output": [["Character", "\u00B5"]]},
-
-{"description": "Named entity: micro without a semi-colon.",
-"input":"&micro",
-"output": ["ParseError", ["Character", "\u00B5"]]},
-
-{"description": "Named entity: middot with a semi-colon.",
-"input":"&middot;",
-"output": [["Character", "\u00B7"]]},
-
-{"description": "Named entity: middot without a semi-colon.",
-"input":"&middot",
-"output": ["ParseError", ["Character", "\u00B7"]]},
-
-{"description": "Named entity: minus with a semi-colon.",
-"input":"&minus;",
-"output": [["Character", "\u2212"]]},
-
-{"description": "Named entity: mu with a semi-colon.",
-"input":"&mu;",
-"output": [["Character", "\u03BC"]]},
-
-{"description": "Named entity: nabla with a semi-colon.",
-"input":"&nabla;",
-"output": [["Character", "\u2207"]]},
-
-{"description": "Named entity: nbsp with a semi-colon.",
-"input":"&nbsp;",
-"output": [["Character", "\u00A0"]]},
-
-{"description": "Named entity: nbsp without a semi-colon.",
-"input":"&nbsp",
-"output": ["ParseError", ["Character", "\u00A0"]]},
-
-{"description": "Named entity: ndash with a semi-colon.",
-"input":"&ndash;",
-"output": [["Character", "\u2013"]]},
-
-{"description": "Named entity: ne with a semi-colon.",
-"input":"&ne;",
-"output": [["Character", "\u2260"]]},
-
-{"description": "Named entity: ni with a semi-colon.",
-"input":"&ni;",
-"output": [["Character", "\u220B"]]},
-
-{"description": "Named entity: not with a semi-colon.",
-"input":"&not;",
-"output": [["Character", "\u00AC"]]},
-
-{"description": "Named entity: not without a semi-colon.",
-"input":"&not",
-"output": ["ParseError", ["Character", "\u00AC"]]},
-
-{"description": "Named entity: notin with a semi-colon.",
-"input":"&notin;",
-"output": [["Character", "\u2209"]]},
-
-{"description": "Named entity: nsub with a semi-colon.",
-"input":"&nsub;",
-"output": [["Character", "\u2284"]]},
-
-{"description": "Named entity: ntilde with a semi-colon.",
-"input":"&ntilde;",
-"output": [["Character", "\u00F1"]]},
-
-{"description": "Named entity: ntilde without a semi-colon.",
-"input":"&ntilde",
-"output": ["ParseError", ["Character", "\u00F1"]]},
-
-{"description": "Named entity: nu with a semi-colon.",
-"input":"&nu;",
-"output": [["Character", "\u03BD"]]},
-
-{"description": "Named entity: oacute with a semi-colon.",
-"input":"&oacute;",
-"output": [["Character", "\u00F3"]]},
-
-{"description": "Named entity: oacute without a semi-colon.",
-"input":"&oacute",
-"output": ["ParseError", ["Character", "\u00F3"]]},
-
-{"description": "Named entity: ocirc with a semi-colon.",
-"input":"&ocirc;",
-"output": [["Character", "\u00F4"]]},
-
-{"description": "Named entity: ocirc without a semi-colon.",
-"input":"&ocirc",
-"output": ["ParseError", ["Character", "\u00F4"]]},
-
-{"description": "Named entity: oelig with a semi-colon.",
-"input":"&oelig;",
-"output": [["Character", "\u0153"]]},
-
-{"description": "Named entity: ograve with a semi-colon.",
-"input":"&ograve;",
-"output": [["Character", "\u00F2"]]},
-
-{"description": "Named entity: ograve without a semi-colon.",
-"input":"&ograve",
-"output": ["ParseError", ["Character", "\u00F2"]]},
-
-{"description": "Named entity: oline with a semi-colon.",
-"input":"&oline;",
-"output": [["Character", "\u203E"]]},
-
-{"description": "Named entity: omega with a semi-colon.",
-"input":"&omega;",
-"output": [["Character", "\u03C9"]]},
-
-{"description": "Named entity: omicron with a semi-colon.",
-"input":"&omicron;",
-"output": [["Character", "\u03BF"]]},
-
-{"description": "Named entity: oplus with a semi-colon.",
-"input":"&oplus;",
-"output": [["Character", "\u2295"]]},
-
-{"description": "Named entity: or with a semi-colon.",
-"input":"&or;",
-"output": [["Character", "\u2228"]]},
-
-{"description": "Named entity: ordf with a semi-colon.",
-"input":"&ordf;",
-"output": [["Character", "\u00AA"]]},
-
-{"description": "Named entity: ordf without a semi-colon.",
-"input":"&ordf",
-"output": ["ParseError", ["Character", "\u00AA"]]},
-
-{"description": "Named entity: ordm with a semi-colon.",
-"input":"&ordm;",
-"output": [["Character", "\u00BA"]]},
-
-{"description": "Named entity: ordm without a semi-colon.",
-"input":"&ordm",
-"output": ["ParseError", ["Character", "\u00BA"]]},
-
-{"description": "Named entity: oslash with a semi-colon.",
-"input":"&oslash;",
-"output": [["Character", "\u00F8"]]},
-
-{"description": "Named entity: oslash without a semi-colon.",
-"input":"&oslash",
-"output": ["ParseError", ["Character", "\u00F8"]]},
-
-{"description": "Named entity: otilde with a semi-colon.",
-"input":"&otilde;",
-"output": [["Character", "\u00F5"]]},
-
-{"description": "Named entity: otilde without a semi-colon.",
-"input":"&otilde",
-"output": ["ParseError", ["Character", "\u00F5"]]},
-
-{"description": "Named entity: otimes with a semi-colon.",
-"input":"&otimes;",
-"output": [["Character", "\u2297"]]},
-
-{"description": "Named entity: ouml with a semi-colon.",
-"input":"&ouml;",
-"output": [["Character", "\u00F6"]]},
-
-{"description": "Named entity: ouml without a semi-colon.",
-"input":"&ouml",
-"output": ["ParseError", ["Character", "\u00F6"]]},
-
-{"description": "Named entity: para with a semi-colon.",
-"input":"&para;",
-"output": [["Character", "\u00B6"]]},
-
-{"description": "Named entity: para without a semi-colon.",
-"input":"&para",
-"output": ["ParseError", ["Character", "\u00B6"]]},
-
-{"description": "Named entity: part with a semi-colon.",
-"input":"&part;",
-"output": [["Character", "\u2202"]]},
-
-{"description": "Named entity: permil with a semi-colon.",
-"input":"&permil;",
-"output": [["Character", "\u2030"]]},
-
-{"description": "Named entity: perp with a semi-colon.",
-"input":"&perp;",
-"output": [["Character", "\u22A5"]]},
-
-{"description": "Named entity: phi with a semi-colon.",
-"input":"&phi;",
-"output": [["Character", "\u03C6"]]},
-
-{"description": "Named entity: pi with a semi-colon.",
-"input":"&pi;",
-"output": [["Character", "\u03C0"]]},
-
-{"description": "Named entity: piv with a semi-colon.",
-"input":"&piv;",
-"output": [["Character", "\u03D6"]]},
-
-{"description": "Named entity: plusmn with a semi-colon.",
-"input":"&plusmn;",
-"output": [["Character", "\u00B1"]]},
-
-{"description": "Named entity: plusmn without a semi-colon.",
-"input":"&plusmn",
-"output": ["ParseError", ["Character", "\u00B1"]]},
-
-{"description": "Named entity: pound with a semi-colon.",
-"input":"&pound;",
-"output": [["Character", "\u00A3"]]},
-
-{"description": "Named entity: pound without a semi-colon.",
-"input":"&pound",
-"output": ["ParseError", ["Character", "\u00A3"]]},
-
-{"description": "Named entity: prime with a semi-colon.",
-"input":"&prime;",
-"output": [["Character", "\u2032"]]},
-
-{"description": "Named entity: prod with a semi-colon.",
-"input":"&prod;",
-"output": [["Character", "\u220F"]]},
-
-{"description": "Named entity: prop with a semi-colon.",
-"input":"&prop;",
-"output": [["Character", "\u221D"]]},
-
-{"description": "Named entity: psi with a semi-colon.",
-"input":"&psi;",
-"output": [["Character", "\u03C8"]]},
-
-{"description": "Named entity: quot with a semi-colon.",
-"input":"&quot;",
-"output": [["Character", "\u0022"]]},
-
-{"description": "Named entity: quot without a semi-colon.",
-"input":"&quot",
-"output": ["ParseError", ["Character", "\u0022"]]},
-
-{"description": "Named entity: rArr with a semi-colon.",
-"input":"&rArr;",
-"output": [["Character", "\u21D2"]]},
-
-{"description": "Named entity: radic with a semi-colon.",
-"input":"&radic;",
-"output": [["Character", "\u221A"]]},
-
-{"description": "Named entity: rang with a semi-colon.",
-"input":"&rang;",
-"output": [["Character", "\u27E9"]]},
-
-{"description": "Named entity: raquo with a semi-colon.",
-"input":"&raquo;",
-"output": [["Character", "\u00BB"]]},
-
-{"description": "Named entity: raquo without a semi-colon.",
-"input":"&raquo",
-"output": ["ParseError", ["Character", "\u00BB"]]},
-
-{"description": "Named entity: rarr with a semi-colon.",
-"input":"&rarr;",
-"output": [["Character", "\u2192"]]},
-
-{"description": "Named entity: rceil with a semi-colon.",
-"input":"&rceil;",
-"output": [["Character", "\u2309"]]},
-
-{"description": "Named entity: rdquo with a semi-colon.",
-"input":"&rdquo;",
-"output": [["Character", "\u201D"]]},
-
-{"description": "Named entity: real with a semi-colon.",
-"input":"&real;",
-"output": [["Character", "\u211C"]]},
-
-{"description": "Named entity: reg with a semi-colon.",
-"input":"&reg;",
-"output": [["Character", "\u00AE"]]},
-
-{"description": "Named entity: reg without a semi-colon.",
-"input":"&reg",
-"output": ["ParseError", ["Character", "\u00AE"]]},
-
-{"description": "Named entity: rfloor with a semi-colon.",
-"input":"&rfloor;",
-"output": [["Character", "\u230B"]]},
-
-{"description": "Named entity: rho with a semi-colon.",
-"input":"&rho;",
-"output": [["Character", "\u03C1"]]},
-
-{"description": "Named entity: rlm with a semi-colon.",
-"input":"&rlm;",
-"output": [["Character", "\u200F"]]},
-
-{"description": "Named entity: rsaquo with a semi-colon.",
-"input":"&rsaquo;",
-"output": [["Character", "\u203A"]]},
-
-{"description": "Named entity: rsquo with a semi-colon.",
-"input":"&rsquo;",
-"output": [["Character", "\u2019"]]},
-
-{"description": "Named entity: sbquo with a semi-colon.",
-"input":"&sbquo;",
-"output": [["Character", "\u201A"]]},
-
-{"description": "Named entity: scaron with a semi-colon.",
-"input":"&scaron;",
-"output": [["Character", "\u0161"]]},
-
-{"description": "Named entity: sdot with a semi-colon.",
-"input":"&sdot;",
-"output": [["Character", "\u22C5"]]},
-
-{"description": "Named entity: sect with a semi-colon.",
-"input":"&sect;",
-"output": [["Character", "\u00A7"]]},
-
-{"description": "Named entity: sect without a semi-colon.",
-"input":"&sect",
-"output": ["ParseError", ["Character", "\u00A7"]]},
-
-{"description": "Named entity: shy with a semi-colon.",
-"input":"&shy;",
-"output": [["Character", "\u00AD"]]},
-
-{"description": "Named entity: shy without a semi-colon.",
-"input":"&shy",
-"output": ["ParseError", ["Character", "\u00AD"]]},
-
-{"description": "Named entity: sigma with a semi-colon.",
-"input":"&sigma;",
-"output": [["Character", "\u03C3"]]},
-
-{"description": "Named entity: sigmaf with a semi-colon.",
-"input":"&sigmaf;",
-"output": [["Character", "\u03C2"]]},
-
-{"description": "Named entity: sim with a semi-colon.",
-"input":"&sim;",
-"output": [["Character", "\u223C"]]},
-
-{"description": "Named entity: spades with a semi-colon.",
-"input":"&spades;",
-"output": [["Character", "\u2660"]]},
-
-{"description": "Named entity: sub with a semi-colon.",
-"input":"&sub;",
-"output": [["Character", "\u2282"]]},
-
-{"description": "Named entity: sube with a semi-colon.",
-"input":"&sube;",
-"output": [["Character", "\u2286"]]},
-
-{"description": "Named entity: sum with a semi-colon.",
-"input":"&sum;",
-"output": [["Character", "\u2211"]]},
-
-{"description": "Named entity: sup1 with a semi-colon.",
-"input":"&sup1;",
-"output": [["Character", "\u00B9"]]},
-
-{"description": "Named entity: sup1 without a semi-colon.",
-"input":"&sup1",
-"output": ["ParseError", ["Character", "\u00B9"]]},
-
-{"description": "Named entity: sup2 with a semi-colon.",
-"input":"&sup2;",
-"output": [["Character", "\u00B2"]]},
-
-{"description": "Named entity: sup2 without a semi-colon.",
-"input":"&sup2",
-"output": ["ParseError", ["Character", "\u00B2"]]},
-
-{"description": "Named entity: sup3 with a semi-colon.",
-"input":"&sup3;",
-"output": [["Character", "\u00B3"]]},
-
-{"description": "Named entity: sup3 without a semi-colon.",
-"input":"&sup3",
-"output": ["ParseError", ["Character", "\u00B3"]]},
-
-{"description": "Named entity: sup with a semi-colon.",
-"input":"&sup;",
-"output": [["Character", "\u2283"]]},
-
-{"description": "Named entity: supe with a semi-colon.",
-"input":"&supe;",
-"output": [["Character", "\u2287"]]},
-
-{"description": "Named entity: szlig with a semi-colon.",
-"input":"&szlig;",
-"output": [["Character", "\u00DF"]]},
-
-{"description": "Named entity: szlig without a semi-colon.",
-"input":"&szlig",
-"output": ["ParseError", ["Character", "\u00DF"]]},
-
-{"description": "Named entity: tau with a semi-colon.",
-"input":"&tau;",
-"output": [["Character", "\u03C4"]]},
-
-{"description": "Named entity: there4 with a semi-colon.",
-"input":"&there4;",
-"output": [["Character", "\u2234"]]},
-
-{"description": "Named entity: theta with a semi-colon.",
-"input":"&theta;",
-"output": [["Character", "\u03B8"]]},
-
-{"description": "Named entity: thetasym with a semi-colon.",
-"input":"&thetasym;",
-"output": [["Character", "\u03D1"]]},
-
-{"description": "Named entity: thinsp with a semi-colon.",
-"input":"&thinsp;",
-"output": [["Character", "\u2009"]]},
-
-{"description": "Named entity: thorn with a semi-colon.",
-"input":"&thorn;",
-"output": [["Character", "\u00FE"]]},
-
-{"description": "Named entity: thorn without a semi-colon.",
-"input":"&thorn",
-"output": ["ParseError", ["Character", "\u00FE"]]},
-
-{"description": "Named entity: tilde with a semi-colon.",
-"input":"&tilde;",
-"output": [["Character", "\u02DC"]]},
-
-{"description": "Named entity: times with a semi-colon.",
-"input":"&times;",
-"output": [["Character", "\u00D7"]]},
-
-{"description": "Named entity: times without a semi-colon.",
-"input":"&times",
-"output": ["ParseError", ["Character", "\u00D7"]]},
-
-{"description": "Named entity: trade with a semi-colon.",
-"input":"&trade;",
-"output": [["Character", "\u2122"]]},
-
-{"description": "Named entity: uArr with a semi-colon.",
-"input":"&uArr;",
-"output": [["Character", "\u21D1"]]},
-
-{"description": "Named entity: uacute with a semi-colon.",
-"input":"&uacute;",
-"output": [["Character", "\u00FA"]]},
-
-{"description": "Named entity: uacute without a semi-colon.",
-"input":"&uacute",
-"output": ["ParseError", ["Character", "\u00FA"]]},
-
-{"description": "Named entity: uarr with a semi-colon.",
-"input":"&uarr;",
-"output": [["Character", "\u2191"]]},
-
-{"description": "Named entity: ucirc with a semi-colon.",
-"input":"&ucirc;",
-"output": [["Character", "\u00FB"]]},
-
-{"description": "Named entity: ucirc without a semi-colon.",
-"input":"&ucirc",
-"output": ["ParseError", ["Character", "\u00FB"]]},
-
-{"description": "Named entity: ugrave with a semi-colon.",
-"input":"&ugrave;",
-"output": [["Character", "\u00F9"]]},
-
-{"description": "Named entity: ugrave without a semi-colon.",
-"input":"&ugrave",
-"output": ["ParseError", ["Character", "\u00F9"]]},
-
-{"description": "Named entity: uml with a semi-colon.",
-"input":"&uml;",
-"output": [["Character", "\u00A8"]]},
-
-{"description": "Named entity: uml without a semi-colon.",
-"input":"&uml",
-"output": ["ParseError", ["Character", "\u00A8"]]},
-
-{"description": "Named entity: upsih with a semi-colon.",
-"input":"&upsih;",
-"output": [["Character", "\u03D2"]]},
-
-{"description": "Named entity: upsilon with a semi-colon.",
-"input":"&upsilon;",
-"output": [["Character", "\u03C5"]]},
-
-{"description": "Named entity: uuml with a semi-colon.",
-"input":"&uuml;",
-"output": [["Character", "\u00FC"]]},
-
-{"description": "Named entity: uuml without a semi-colon.",
-"input":"&uuml",
-"output": ["ParseError", ["Character", "\u00FC"]]},
-
-{"description": "Named entity: weierp with a semi-colon.",
-"input":"&weierp;",
-"output": [["Character", "\u2118"]]},
-
-{"description": "Named entity: xi with a semi-colon.",
-"input":"&xi;",
-"output": [["Character", "\u03BE"]]},
-
-{"description": "Named entity: yacute with a semi-colon.",
-"input":"&yacute;",
-"output": [["Character", "\u00FD"]]},
-
-{"description": "Named entity: yacute without a semi-colon.",
-"input":"&yacute",
-"output": ["ParseError", ["Character", "\u00FD"]]},
-
-{"description": "Named entity: yen with a semi-colon.",
-"input":"&yen;",
-"output": [["Character", "\u00A5"]]},
-
-{"description": "Named entity: yen without a semi-colon.",
-"input":"&yen",
-"output": ["ParseError", ["Character", "\u00A5"]]},
-
-{"description": "Named entity: yuml with a semi-colon.",
-"input":"&yuml;",
-"output": [["Character", "\u00FF"]]},
-
-{"description": "Named entity: yuml without a semi-colon.",
-"input":"&yuml",
-"output": ["ParseError", ["Character", "\u00FF"]]},
-
-{"description": "Named entity: zeta with a semi-colon.",
-"input":"&zeta;",
-"output": [["Character", "\u03B6"]]},
-
-{"description": "Named entity: zwj with a semi-colon.",
-"input":"&zwj;",
-"output": [["Character", "\u200D"]]},
-
-{"description": "Named entity: zwnj with a semi-colon.",
-"input":"&zwnj;",
-"output": [["Character", "\u200C"]]},
-
-{"description": "Bad named entity: Alpha without a semi-colon.",
-"input":"&Alpha",
-"output": ["ParseError", ["Character", "&Alpha"]]},
-
-{"description": "Bad named entity: alpha without a semi-colon.",
-"input":"&alpha",
-"output": ["ParseError", ["Character", "&alpha"]]},
-
-{"description": "Bad named entity: and without a semi-colon.",
-"input":"&and",
-"output": ["ParseError", ["Character", "&and"]]},
-
-{"description": "Bad named entity: ang without a semi-colon.",
-"input":"&ang",
-"output": ["ParseError", ["Character", "&ang"]]},
-
-{"description": "Bad named entity: apos without a semi-colon.",
-"input":"&apos",
-"output": ["ParseError", ["Character", "&apos"]]},
-
-{"description": "Bad named entity: asymp without a semi-colon.",
-"input":"&asymp",
-"output": ["ParseError", ["Character", "&asymp"]]},
-
-{"description": "Bad named entity: bdquo without a semi-colon.",
-"input":"&bdquo",
-"output": ["ParseError", ["Character", "&bdquo"]]},
-
-{"description": "Bad named entity: Beta without a semi-colon.",
-"input":"&Beta",
-"output": ["ParseError", ["Character", "&Beta"]]},
-
-{"description": "Bad named entity: beta without a semi-colon.",
-"input":"&beta",
-"output": ["ParseError", ["Character", "&beta"]]},
-
-{"description": "Bad named entity: bull without a semi-colon.",
-"input":"&bull",
-"output": ["ParseError", ["Character", "&bull"]]},
-
-{"description": "Bad named entity: cap without a semi-colon.",
-"input":"&cap",
-"output": ["ParseError", ["Character", "&cap"]]},
-
-{"description": "Bad named entity: Chi without a semi-colon.",
-"input":"&Chi",
-"output": ["ParseError", ["Character", "&Chi"]]},
-
-{"description": "Bad named entity: chi without a semi-colon.",
-"input":"&chi",
-"output": ["ParseError", ["Character", "&chi"]]},
-
-{"description": "Bad named entity: circ without a semi-colon.",
-"input":"&circ",
-"output": ["ParseError", ["Character", "&circ"]]},
-
-{"description": "Bad named entity: clubs without a semi-colon.",
-"input":"&clubs",
-"output": ["ParseError", ["Character", "&clubs"]]},
-
-{"description": "Bad named entity: cong without a semi-colon.",
-"input":"&cong",
-"output": ["ParseError", ["Character", "&cong"]]},
-
-{"description": "Bad named entity: crarr without a semi-colon.",
-"input":"&crarr",
-"output": ["ParseError", ["Character", "&crarr"]]},
-
-{"description": "Bad named entity: cup without a semi-colon.",
-"input":"&cup",
-"output": ["ParseError", ["Character", "&cup"]]},
-
-{"description": "Bad named entity: dagger without a semi-colon.",
-"input":"&dagger",
-"output": ["ParseError", ["Character", "&dagger"]]},
-
-{"description": "Bad named entity: dagger without a semi-colon.",
-"input":"&dagger",
-"output": ["ParseError", ["Character", "&dagger"]]},
-
-{"description": "Bad named entity: darr without a semi-colon.",
-"input":"&darr",
-"output": ["ParseError", ["Character", "&darr"]]},
-
-{"description": "Bad named entity: darr without a semi-colon.",
-"input":"&darr",
-"output": ["ParseError", ["Character", "&darr"]]},
-
-{"description": "Bad named entity: Delta without a semi-colon.",
-"input":"&Delta",
-"output": ["ParseError", ["Character", "&Delta"]]},
-
-{"description": "Bad named entity: delta without a semi-colon.",
-"input":"&delta",
-"output": ["ParseError", ["Character", "&delta"]]},
-
-{"description": "Bad named entity: diams without a semi-colon.",
-"input":"&diams",
-"output": ["ParseError", ["Character", "&diams"]]},
-
-{"description": "Bad named entity: empty without a semi-colon.",
-"input":"&empty",
-"output": ["ParseError", ["Character", "&empty"]]},
-
-{"description": "Bad named entity: emsp without a semi-colon.",
-"input":"&emsp",
-"output": ["ParseError", ["Character", "&emsp"]]},
-
-{"description": "Bad named entity: ensp without a semi-colon.",
-"input":"&ensp",
-"output": ["ParseError", ["Character", "&ensp"]]},
-
-{"description": "Bad named entity: Epsilon without a semi-colon.",
-"input":"&Epsilon",
-"output": ["ParseError", ["Character", "&Epsilon"]]},
-
-{"description": "Bad named entity: epsilon without a semi-colon.",
-"input":"&epsilon",
-"output": ["ParseError", ["Character", "&epsilon"]]},
-
-{"description": "Bad named entity: equiv without a semi-colon.",
-"input":"&equiv",
-"output": ["ParseError", ["Character", "&equiv"]]},
-
-{"description": "Bad named entity: Eta without a semi-colon.",
-"input":"&Eta",
-"output": ["ParseError", ["Character", "&Eta"]]},
-
-{"description": "Bad named entity: eta without a semi-colon.",
-"input":"&eta",
-"output": ["ParseError", ["Character", "&eta"]]},
-
-{"description": "Bad named entity: euro without a semi-colon.",
-"input":"&euro",
-"output": ["ParseError", ["Character", "&euro"]]},
-
-{"description": "Bad named entity: exist without a semi-colon.",
-"input":"&exist",
-"output": ["ParseError", ["Character", "&exist"]]},
-
-{"description": "Bad named entity: fnof without a semi-colon.",
-"input":"&fnof",
-"output": ["ParseError", ["Character", "&fnof"]]},
-
-{"description": "Bad named entity: forall without a semi-colon.",
-"input":"&forall",
-"output": ["ParseError", ["Character", "&forall"]]},
-
-{"description": "Bad named entity: frasl without a semi-colon.",
-"input":"&frasl",
-"output": ["ParseError", ["Character", "&frasl"]]},
-
-{"description": "Bad named entity: Gamma without a semi-colon.",
-"input":"&Gamma",
-"output": ["ParseError", ["Character", "&Gamma"]]},
-
-{"description": "Bad named entity: gamma without a semi-colon.",
-"input":"&gamma",
-"output": ["ParseError", ["Character", "&gamma"]]},
-
-{"description": "Bad named entity: ge without a semi-colon.",
-"input":"&ge",
-"output": ["ParseError", ["Character", "&ge"]]},
-
-{"description": "Bad named entity: harr without a semi-colon.",
-"input":"&harr",
-"output": ["ParseError", ["Character", "&harr"]]},
-
-{"description": "Bad named entity: harr without a semi-colon.",
-"input":"&harr",
-"output": ["ParseError", ["Character", "&harr"]]},
-
-{"description": "Bad named entity: hearts without a semi-colon.",
-"input":"&hearts",
-"output": ["ParseError", ["Character", "&hearts"]]},
-
-{"description": "Bad named entity: hellip without a semi-colon.",
-"input":"&hellip",
-"output": ["ParseError", ["Character", "&hellip"]]},
-
-{"description": "Bad named entity: image without a semi-colon.",
-"input":"&image",
-"output": ["ParseError", ["Character", "&image"]]},
-
-{"description": "Bad named entity: infin without a semi-colon.",
-"input":"&infin",
-"output": ["ParseError", ["Character", "&infin"]]},
-
-{"description": "Bad named entity: int without a semi-colon.",
-"input":"&int",
-"output": ["ParseError", ["Character", "&int"]]},
-
-{"description": "Bad named entity: Iota without a semi-colon.",
-"input":"&Iota",
-"output": ["ParseError", ["Character", "&Iota"]]},
-
-{"description": "Bad named entity: iota without a semi-colon.",
-"input":"&iota",
-"output": ["ParseError", ["Character", "&iota"]]},
-
-{"description": "Bad named entity: isin without a semi-colon.",
-"input":"&isin",
-"output": ["ParseError", ["Character", "&isin"]]},
-
-{"description": "Bad named entity: Kappa without a semi-colon.",
-"input":"&Kappa",
-"output": ["ParseError", ["Character", "&Kappa"]]},
-
-{"description": "Bad named entity: kappa without a semi-colon.",
-"input":"&kappa",
-"output": ["ParseError", ["Character", "&kappa"]]},
-
-{"description": "Bad named entity: Lambda without a semi-colon.",
-"input":"&Lambda",
-"output": ["ParseError", ["Character", "&Lambda"]]},
-
-{"description": "Bad named entity: lambda without a semi-colon.",
-"input":"&lambda",
-"output": ["ParseError", ["Character", "&lambda"]]},
-
-{"description": "Bad named entity: lang without a semi-colon.",
-"input":"&lang",
-"output": ["ParseError", ["Character", "&lang"]]},
-
-{"description": "Bad named entity: larr without a semi-colon.",
-"input":"&larr",
-"output": ["ParseError", ["Character", "&larr"]]},
-
-{"description": "Bad named entity: larr without a semi-colon.",
-"input":"&larr",
-"output": ["ParseError", ["Character", "&larr"]]},
-
-{"description": "Bad named entity: lceil without a semi-colon.",
-"input":"&lceil",
-"output": ["ParseError", ["Character", "&lceil"]]},
-
-{"description": "Bad named entity: ldquo without a semi-colon.",
-"input":"&ldquo",
-"output": ["ParseError", ["Character", "&ldquo"]]},
-
-{"description": "Bad named entity: le without a semi-colon.",
-"input":"&le",
-"output": ["ParseError", ["Character", "&le"]]},
-
-{"description": "Bad named entity: lfloor without a semi-colon.",
-"input":"&lfloor",
-"output": ["ParseError", ["Character", "&lfloor"]]},
-
-{"description": "Bad named entity: lowast without a semi-colon.",
-"input":"&lowast",
-"output": ["ParseError", ["Character", "&lowast"]]},
-
-{"description": "Bad named entity: loz without a semi-colon.",
-"input":"&loz",
-"output": ["ParseError", ["Character", "&loz"]]},
-
-{"description": "Bad named entity: lrm without a semi-colon.",
-"input":"&lrm",
-"output": ["ParseError", ["Character", "&lrm"]]},
-
-{"description": "Bad named entity: lsaquo without a semi-colon.",
-"input":"&lsaquo",
-"output": ["ParseError", ["Character", "&lsaquo"]]},
-
-{"description": "Bad named entity: lsquo without a semi-colon.",
-"input":"&lsquo",
-"output": ["ParseError", ["Character", "&lsquo"]]},
-
-{"description": "Bad named entity: mdash without a semi-colon.",
-"input":"&mdash",
-"output": ["ParseError", ["Character", "&mdash"]]},
-
-{"description": "Bad named entity: minus without a semi-colon.",
-"input":"&minus",
-"output": ["ParseError", ["Character", "&minus"]]},
-
-{"description": "Bad named entity: Mu without a semi-colon.",
-"input":"&Mu",
-"output": ["ParseError", ["Character", "&Mu"]]},
-
-{"description": "Bad named entity: mu without a semi-colon.",
-"input":"&mu",
-"output": ["ParseError", ["Character", "&mu"]]},
-
-{"description": "Bad named entity: nabla without a semi-colon.",
-"input":"&nabla",
-"output": ["ParseError", ["Character", "&nabla"]]},
-
-{"description": "Bad named entity: ndash without a semi-colon.",
-"input":"&ndash",
-"output": ["ParseError", ["Character", "&ndash"]]},
-
-{"description": "Bad named entity: ne without a semi-colon.",
-"input":"&ne",
-"output": ["ParseError", ["Character", "&ne"]]},
-
-{"description": "Bad named entity: ni without a semi-colon.",
-"input":"&ni",
-"output": ["ParseError", ["Character", "&ni"]]},
-
-{"description": "Bad named entity: notin without a semi-colon.",
-"input":"&notin",
-"output": ["ParseError", ["Character", "\u00ACin"]]},
-
-{"description": "Bad named entity: nsub without a semi-colon.",
-"input":"&nsub",
-"output": ["ParseError", ["Character", "&nsub"]]},
-
-{"description": "Bad named entity: Nu without a semi-colon.",
-"input":"&Nu",
-"output": ["ParseError", ["Character", "&Nu"]]},
-
-{"description": "Bad named entity: nu without a semi-colon.",
-"input":"&nu",
-"output": ["ParseError", ["Character", "&nu"]]},
-
-{"description": "Bad named entity: OElig without a semi-colon.",
-"input":"&OElig",
-"output": ["ParseError", ["Character", "&OElig"]]},
-
-{"description": "Bad named entity: oelig without a semi-colon.",
-"input":"&oelig",
-"output": ["ParseError", ["Character", "&oelig"]]},
-
-{"description": "Bad named entity: oline without a semi-colon.",
-"input":"&oline",
-"output": ["ParseError", ["Character", "&oline"]]},
-
-{"description": "Bad named entity: Omega without a semi-colon.",
-"input":"&Omega",
-"output": ["ParseError", ["Character", "&Omega"]]},
-
-{"description": "Bad named entity: omega without a semi-colon.",
-"input":"&omega",
-"output": ["ParseError", ["Character", "&omega"]]},
-
-{"description": "Bad named entity: Omicron without a semi-colon.",
-"input":"&Omicron",
-"output": ["ParseError", ["Character", "&Omicron"]]},
-
-{"description": "Bad named entity: omicron without a semi-colon.",
-"input":"&omicron",
-"output": ["ParseError", ["Character", "&omicron"]]},
-
-{"description": "Bad named entity: oplus without a semi-colon.",
-"input":"&oplus",
-"output": ["ParseError", ["Character", "&oplus"]]},
-
-{"description": "Bad named entity: or without a semi-colon.",
-"input":"&or",
-"output": ["ParseError", ["Character", "&or"]]},
-
-{"description": "Bad named entity: otimes without a semi-colon.",
-"input":"&otimes",
-"output": ["ParseError", ["Character", "&otimes"]]},
-
-{"description": "Bad named entity: part without a semi-colon.",
-"input":"&part",
-"output": ["ParseError", ["Character", "&part"]]},
-
-{"description": "Bad named entity: permil without a semi-colon.",
-"input":"&permil",
-"output": ["ParseError", ["Character", "&permil"]]},
-
-{"description": "Bad named entity: perp without a semi-colon.",
-"input":"&perp",
-"output": ["ParseError", ["Character", "&perp"]]},
-
-{"description": "Bad named entity: Phi without a semi-colon.",
-"input":"&Phi",
-"output": ["ParseError", ["Character", "&Phi"]]},
-
-{"description": "Bad named entity: phi without a semi-colon.",
-"input":"&phi",
-"output": ["ParseError", ["Character", "&phi"]]},
-
-{"description": "Bad named entity: Pi without a semi-colon.",
-"input":"&Pi",
-"output": ["ParseError", ["Character", "&Pi"]]},
-
-{"description": "Bad named entity: pi without a semi-colon.",
-"input":"&pi",
-"output": ["ParseError", ["Character", "&pi"]]},
-
-{"description": "Bad named entity: piv without a semi-colon.",
-"input":"&piv",
-"output": ["ParseError", ["Character", "&piv"]]},
-
-{"description": "Bad named entity: prime without a semi-colon.",
-"input":"&prime",
-"output": ["ParseError", ["Character", "&prime"]]},
-
-{"description": "Bad named entity: prime without a semi-colon.",
-"input":"&prime",
-"output": ["ParseError", ["Character", "&prime"]]},
-
-{"description": "Bad named entity: prod without a semi-colon.",
-"input":"&prod",
-"output": ["ParseError", ["Character", "&prod"]]},
-
-{"description": "Bad named entity: prop without a semi-colon.",
-"input":"&prop",
-"output": ["ParseError", ["Character", "&prop"]]},
-
-{"description": "Bad named entity: Psi without a semi-colon.",
-"input":"&Psi",
-"output": ["ParseError", ["Character", "&Psi"]]},
-
-{"description": "Bad named entity: psi without a semi-colon.",
-"input":"&psi",
-"output": ["ParseError", ["Character", "&psi"]]},
-
-{"description": "Bad named entity: radic without a semi-colon.",
-"input":"&radic",
-"output": ["ParseError", ["Character", "&radic"]]},
-
-{"description": "Bad named entity: rang without a semi-colon.",
-"input":"&rang",
-"output": ["ParseError", ["Character", "&rang"]]},
-
-{"description": "Bad named entity: rarr without a semi-colon.",
-"input":"&rarr",
-"output": ["ParseError", ["Character", "&rarr"]]},
-
-{"description": "Bad named entity: rarr without a semi-colon.",
-"input":"&rarr",
-"output": ["ParseError", ["Character", "&rarr"]]},
-
-{"description": "Bad named entity: rceil without a semi-colon.",
-"input":"&rceil",
-"output": ["ParseError", ["Character", "&rceil"]]},
-
-{"description": "Bad named entity: rdquo without a semi-colon.",
-"input":"&rdquo",
-"output": ["ParseError", ["Character", "&rdquo"]]},
-
-{"description": "Bad named entity: real without a semi-colon.",
-"input":"&real",
-"output": ["ParseError", ["Character", "&real"]]},
-
-{"description": "Bad named entity: rfloor without a semi-colon.",
-"input":"&rfloor",
-"output": ["ParseError", ["Character", "&rfloor"]]},
-
-{"description": "Bad named entity: Rho without a semi-colon.",
-"input":"&Rho",
-"output": ["ParseError", ["Character", "&Rho"]]},
-
-{"description": "Bad named entity: rho without a semi-colon.",
-"input":"&rho",
-"output": ["ParseError", ["Character", "&rho"]]},
-
-{"description": "Bad named entity: rlm without a semi-colon.",
-"input":"&rlm",
-"output": ["ParseError", ["Character", "&rlm"]]},
-
-{"description": "Bad named entity: rsaquo without a semi-colon.",
-"input":"&rsaquo",
-"output": ["ParseError", ["Character", "&rsaquo"]]},
-
-{"description": "Bad named entity: rsquo without a semi-colon.",
-"input":"&rsquo",
-"output": ["ParseError", ["Character", "&rsquo"]]},
-
-{"description": "Bad named entity: sbquo without a semi-colon.",
-"input":"&sbquo",
-"output": ["ParseError", ["Character", "&sbquo"]]},
-
-{"description": "Bad named entity: Scaron without a semi-colon.",
-"input":"&Scaron",
-"output": ["ParseError", ["Character", "&Scaron"]]},
-
-{"description": "Bad named entity: scaron without a semi-colon.",
-"input":"&scaron",
-"output": ["ParseError", ["Character", "&scaron"]]},
-
-{"description": "Bad named entity: sdot without a semi-colon.",
-"input":"&sdot",
-"output": ["ParseError", ["Character", "&sdot"]]},
-
-{"description": "Bad named entity: Sigma without a semi-colon.",
-"input":"&Sigma",
-"output": ["ParseError", ["Character", "&Sigma"]]},
-
-{"description": "Bad named entity: sigma without a semi-colon.",
-"input":"&sigma",
-"output": ["ParseError", ["Character", "&sigma"]]},
-
-{"description": "Bad named entity: sigmaf without a semi-colon.",
-"input":"&sigmaf",
-"output": ["ParseError", ["Character", "&sigmaf"]]},
-
-{"description": "Bad named entity: sim without a semi-colon.",
-"input":"&sim",
-"output": ["ParseError", ["Character", "&sim"]]},
-
-{"description": "Bad named entity: spades without a semi-colon.",
-"input":"&spades",
-"output": ["ParseError", ["Character", "&spades"]]},
-
-{"description": "Bad named entity: sub without a semi-colon.",
-"input":"&sub",
-"output": ["ParseError", ["Character", "&sub"]]},
-
-{"description": "Bad named entity: sube without a semi-colon.",
-"input":"&sube",
-"output": ["ParseError", ["Character", "&sube"]]},
-
-{"description": "Bad named entity: sum without a semi-colon.",
-"input":"&sum",
-"output": ["ParseError", ["Character", "&sum"]]},
-
-{"description": "Bad named entity: sup without a semi-colon.",
-"input":"&sup",
-"output": ["ParseError", ["Character", "&sup"]]},
-
-{"description": "Bad named entity: supe without a semi-colon.",
-"input":"&supe",
-"output": ["ParseError", ["Character", "&supe"]]},
-
-{"description": "Bad named entity: Tau without a semi-colon.",
-"input":"&Tau",
-"output": ["ParseError", ["Character", "&Tau"]]},
-
-{"description": "Bad named entity: tau without a semi-colon.",
-"input":"&tau",
-"output": ["ParseError", ["Character", "&tau"]]},
-
-{"description": "Bad named entity: there4 without a semi-colon.",
-"input":"&there4",
-"output": ["ParseError", ["Character", "&there4"]]},
-
-{"description": "Bad named entity: Theta without a semi-colon.",
-"input":"&Theta",
-"output": ["ParseError", ["Character", "&Theta"]]},
-
-{"description": "Bad named entity: theta without a semi-colon.",
-"input":"&theta",
-"output": ["ParseError", ["Character", "&theta"]]},
-
-{"description": "Bad named entity: thetasym without a semi-colon.",
-"input":"&thetasym",
-"output": ["ParseError", ["Character", "&thetasym"]]},
-
-{"description": "Bad named entity: thinsp without a semi-colon.",
-"input":"&thinsp",
-"output": ["ParseError", ["Character", "&thinsp"]]},
-
-{"description": "Bad named entity: tilde without a semi-colon.",
-"input":"&tilde",
-"output": ["ParseError", ["Character", "&tilde"]]},
-
-{"description": "Bad named entity: trade without a semi-colon.",
-"input":"&trade",
-"output": ["ParseError", ["Character", "&trade"]]},
-
-{"description": "Bad named entity: uarr without a semi-colon.",
-"input":"&uarr",
-"output": ["ParseError", ["Character", "&uarr"]]},
-
-{"description": "Bad named entity: uarr without a semi-colon.",
-"input":"&uarr",
-"output": ["ParseError", ["Character", "&uarr"]]},
-
-{"description": "Bad named entity: upsih without a semi-colon.",
-"input":"&upsih",
-"output": ["ParseError", ["Character", "&upsih"]]},
-
-{"description": "Bad named entity: Upsilon without a semi-colon.",
-"input":"&Upsilon",
-"output": ["ParseError", ["Character", "&Upsilon"]]},
-
-{"description": "Bad named entity: upsilon without a semi-colon.",
-"input":"&upsilon",
-"output": ["ParseError", ["Character", "&upsilon"]]},
-
-{"description": "Bad named entity: weierp without a semi-colon.",
-"input":"&weierp",
-"output": ["ParseError", ["Character", "&weierp"]]},
-
-{"description": "Bad named entity: Xi without a semi-colon.",
-"input":"&Xi",
-"output": ["ParseError", ["Character", "&Xi"]]},
-
-{"description": "Bad named entity: xi without a semi-colon.",
-"input":"&xi",
-"output": ["ParseError", ["Character", "&xi"]]},
-
-{"description": "Bad named entity: Yuml without a semi-colon.",
-"input":"&Yuml",
-"output": ["ParseError", ["Character", "&Yuml"]]},
-
-{"description": "Bad named entity: Zeta without a semi-colon.",
-"input":"&Zeta",
-"output": ["ParseError", ["Character", "&Zeta"]]},
-
-{"description": "Bad named entity: zeta without a semi-colon.",
-"input":"&zeta",
-"output": ["ParseError", ["Character", "&zeta"]]},
-
-{"description": "Bad named entity: zwj without a semi-colon.",
-"input":"&zwj",
-"output": ["ParseError", ["Character", "&zwj"]]},
-
-{"description": "Bad named entity: zwnj without a semi-colon.",
-"input":"&zwnj",
-"output": ["ParseError", ["Character", "&zwnj"]]},
-
-{"description": "Bad named entity: zwnj without a semi-colon.",
-"input":"&zwnj",
-"output": ["ParseError", ["Character", "&zwnj"]]},
+{"description": "Entity name followed by the equals sign in an attribute value.",
+"input":"<h a='&lang='>",
+"output": [["StartTag", "h", {"a": "&lang="}]]},

{"description": "CR as numeric entity",
"input":"&#013;",
-"output": ["ParseError", ["Character", "\n"]]},
+"output": ["ParseError", ["Character", "\r"]]},

{"description": "CR as hexadecimal numeric entity",
"input":"&#x00D;",
-"output": ["ParseError", ["Character", "\n"]]},
+"output": ["ParseError", ["Character", "\r"]]},

{"description": "Windows-1252 EURO SIGN numeric entity.",
"input":"&#0128;",
@@ -2094,7 +22,7 @@

{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
"input":"&#0129;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0081"]]},

{"description": "Windows-1252 SINGLE LOW-9 QUOTATION MARK numeric entity.",
"input":"&#0130;",
@@ -2142,7 +70,7 @@

{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
"input":"&#0141;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u008D"]]},

{"description": "Windows-1252 LATIN CAPITAL LETTER Z WITH CARON numeric entity.",
"input":"&#0142;",
@@ -2150,11 +78,11 @@

{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
"input":"&#0143;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u008F"]]},

{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
"input":"&#0144;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0090"]]},

{"description": "Windows-1252 LEFT SINGLE QUOTATION MARK numeric entity.",
"input":"&#0145;",
@@ -2206,7 +134,7 @@

{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
"input":"&#0157;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u009D"]]},

{"description": "Windows-1252 EURO SIGN hexadecimal numeric entity.",
"input":"&#x080;",
@@ -2214,7 +142,7 @@

{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
"input":"&#x081;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0081"]]},

{"description": "Windows-1252 SINGLE LOW-9 QUOTATION MARK hexadecimal numeric entity.",
"input":"&#x082;",
@@ -2262,7 +190,7 @@

{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
"input":"&#x08D;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u008D"]]},

{"description": "Windows-1252 LATIN CAPITAL LETTER Z WITH CARON hexadecimal numeric entity.",
"input":"&#x08E;",
@@ -2270,11 +198,11 @@

{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
"input":"&#x08F;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u008F"]]},

{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
"input":"&#x090;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0090"]]},

{"description": "Windows-1252 LEFT SINGLE QUOTATION MARK hexadecimal numeric entity.",
"input":"&#x091;",
@@ -2326,7 +254,7 @@

{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
"input":"&#x09D;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u009D"]]},

{"description": "Windows-1252 LATIN SMALL LETTER Z WITH CARON hexadecimal numeric entity.",
"input":"&#x09E;",
@@ -2334,6 +262,22 @@

{"description": "Windows-1252 LATIN CAPITAL LETTER Y WITH DIAERESIS hexadecimal numeric entity.",
"input":"&#x09F;",
-"output": ["ParseError", ["Character", "\u0178"]]}
+"output": ["ParseError", ["Character", "\u0178"]]},
+
+{"description": "Decimal numeric entity followed by hex character a.",
+"input":"&#97a",
+"output": ["ParseError", ["Character", "aa"]]},
+
+{"description": "Decimal numeric entity followed by hex character A.",
+"input":"&#97A",
+"output": ["ParseError", ["Character", "aA"]]},
+
+{"description": "Decimal numeric entity followed by hex character f.",
+"input":"&#97f",
+"output": ["ParseError", ["Character", "af"]]},
+
+{"description": "Decimal numeric entity followed by hex character A.",
+"input":"&#97F",
+"output": ["ParseError", ["Character", "aF"]]}

]}
diff --git a/test/data/tokeniser2/escapeFlag.test b/test/data/tokeniser2/escapeFlag.test
index 4c4bf51..18cb430 100644
--- a/test/data/tokeniser2/escapeFlag.test
+++ b/test/data/tokeniser2/escapeFlag.test
@@ -1,33 +1,33 @@
{"tests": [

-{"description":"Commented close tag in [R]CDATA",
-"contentModelFlags":["RCDATA", "CDATA"],
+{"description":"Commented close tag in RCDATA or RAWTEXT",
+"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"foo<!--</xmp>--></xmp>",
-"output":[["Character", "foo<!--</xmp>-->"], ["EndTag", "xmp"]]},
+"output":[["Character", "foo<!--"], ["EndTag", "xmp"], ["Character", "-->"], ["EndTag", "xmp"]]},

-{"description":"Bogus comment in [R]CDATA",
-"contentModelFlags":["RCDATA", "CDATA"],
+{"description":"Bogus comment in RCDATA or RAWTEXT",
+"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"foo<!-->baz</xmp>",
"output":[["Character", "foo<!-->baz"], ["EndTag", "xmp"]]},

-{"description":"End tag surrounded by bogus comment in [R]CDATA",
-"contentModelFlags":["RCDATA", "CDATA"],
+{"description":"End tag surrounded by bogus comment in RCDATA or RAWTEXT",
+"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"foo<!--></xmp><!-->baz</xmp>",
"output":[["Character", "foo<!-->"], ["EndTag", "xmp"], "ParseError", ["Comment", ""], ["Character", "baz"], ["EndTag", "xmp"]]},

{"description":"Commented entities in RCDATA",
-"contentModelFlags":["RCDATA"],
+"initialStates":["RCDATA state"],
"lastStartTag":"xmp",
"input":" &amp; <!-- &amp; --> &amp; </xmp>",
-"output":[["Character", " & <!-- &amp; --> & "], ["EndTag", "xmp"]]},
+"output":[["Character", " & <!-- & --> & "], ["EndTag", "xmp"]]},

-{"description":"Incorrect comment ending sequences in [R]CDATA",
-"contentModelFlags":["RCDATA", "CDATA"],
+{"description":"Incorrect comment ending sequences in RCDATA or RAWTEXT",
+"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"foo<!-- x --x>x-- >x--!>x--<></xmp>",
-"output":[["Character", "foo<!-- x --x>x-- >x--!>x--<></xmp>"]]}
+"output":[["Character", "foo<!-- x --x>x-- >x--!>x--<>"], ["EndTag", "xmp"]]}

]}
diff --git a/test/data/tokeniser2/numericEntities.test b/test/data/tokeniser2/numericEntities.test
index 78a8a13..36c8228 100644
--- a/test/data/tokeniser2/numericEntities.test
+++ b/test/data/tokeniser2/numericEntities.test
@@ -6,115 +6,115 @@

{"description": "Invalid numeric entity character U+0001",
"input": "&#x0001;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0001"]]},

{"description": "Invalid numeric entity character U+0002",
"input": "&#x0002;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0002"]]},

{"description": "Invalid numeric entity character U+0003",
"input": "&#x0003;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0003"]]},

{"description": "Invalid numeric entity character U+0004",
"input": "&#x0004;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0004"]]},

{"description": "Invalid numeric entity character U+0005",
"input": "&#x0005;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0005"]]},

{"description": "Invalid numeric entity character U+0006",
"input": "&#x0006;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0006"]]},

{"description": "Invalid numeric entity character U+0007",
"input": "&#x0007;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0007"]]},

{"description": "Invalid numeric entity character U+0008",
"input": "&#x0008;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0008"]]},

{"description": "Invalid numeric entity character U+000B",
"input": "&#x000b;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u000b"]]},

{"description": "Invalid numeric entity character U+000E",
"input": "&#x000e;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u000e"]]},

{"description": "Invalid numeric entity character U+000F",
"input": "&#x000f;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u000f"]]},

{"description": "Invalid numeric entity character U+0010",
"input": "&#x0010;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0010"]]},

{"description": "Invalid numeric entity character U+0011",
"input": "&#x0011;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0011"]]},

{"description": "Invalid numeric entity character U+0012",
"input": "&#x0012;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0012"]]},

{"description": "Invalid numeric entity character U+0013",
"input": "&#x0013;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0013"]]},

{"description": "Invalid numeric entity character U+0014",
"input": "&#x0014;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0014"]]},

{"description": "Invalid numeric entity character U+0015",
"input": "&#x0015;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0015"]]},

{"description": "Invalid numeric entity character U+0016",
"input": "&#x0016;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0016"]]},

{"description": "Invalid numeric entity character U+0017",
"input": "&#x0017;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0017"]]},

{"description": "Invalid numeric entity character U+0018",
"input": "&#x0018;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0018"]]},

{"description": "Invalid numeric entity character U+0019",
"input": "&#x0019;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u0019"]]},

{"description": "Invalid numeric entity character U+001A",
"input": "&#x001a;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u001a"]]},

{"description": "Invalid numeric entity character U+001B",
"input": "&#x001b;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u001b"]]},

{"description": "Invalid numeric entity character U+001C",
"input": "&#x001c;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u001c"]]},

{"description": "Invalid numeric entity character U+001D",
"input": "&#x001d;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u001d"]]},

{"description": "Invalid numeric entity character U+001E",
"input": "&#x001e;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u001e"]]},

{"description": "Invalid numeric entity character U+001F",
"input": "&#x001f;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u001f"]]},

{"description": "Invalid numeric entity character U+007F",
"input": "&#x007f;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\u007f"]]},

{"description": "Invalid numeric entity character U+D800",
"input": "&#xd800;",
@@ -126,267 +126,267 @@

{"description": "Invalid numeric entity character U+FDD0",
"input": "&#xfdd0;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdd0"]]},

{"description": "Invalid numeric entity character U+FDD1",
"input": "&#xfdd1;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdd1"]]},

{"description": "Invalid numeric entity character U+FDD2",
"input": "&#xfdd2;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdd2"]]},

{"description": "Invalid numeric entity character U+FDD3",
"input": "&#xfdd3;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdd3"]]},

{"description": "Invalid numeric entity character U+FDD4",
"input": "&#xfdd4;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdd4"]]},

{"description": "Invalid numeric entity character U+FDD5",
"input": "&#xfdd5;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdd5"]]},

{"description": "Invalid numeric entity character U+FDD6",
"input": "&#xfdd6;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdd6"]]},

{"description": "Invalid numeric entity character U+FDD7",
"input": "&#xfdd7;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdd7"]]},

{"description": "Invalid numeric entity character U+FDD8",
"input": "&#xfdd8;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdd8"]]},

{"description": "Invalid numeric entity character U+FDD9",
"input": "&#xfdd9;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdd9"]]},

{"description": "Invalid numeric entity character U+FDDA",
"input": "&#xfdda;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdda"]]},

{"description": "Invalid numeric entity character U+FDDB",
"input": "&#xfddb;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufddb"]]},

{"description": "Invalid numeric entity character U+FDDC",
"input": "&#xfddc;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufddc"]]},

{"description": "Invalid numeric entity character U+FDDD",
"input": "&#xfddd;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufddd"]]},

{"description": "Invalid numeric entity character U+FDDE",
"input": "&#xfdde;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdde"]]},

{"description": "Invalid numeric entity character U+FDDF",
"input": "&#xfddf;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufddf"]]},

{"description": "Invalid numeric entity character U+FDE0",
"input": "&#xfde0;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufde0"]]},

{"description": "Invalid numeric entity character U+FDE1",
"input": "&#xfde1;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufde1"]]},

{"description": "Invalid numeric entity character U+FDE2",
"input": "&#xfde2;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufde2"]]},

{"description": "Invalid numeric entity character U+FDE3",
"input": "&#xfde3;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufde3"]]},

{"description": "Invalid numeric entity character U+FDE4",
"input": "&#xfde4;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufde4"]]},

{"description": "Invalid numeric entity character U+FDE5",
"input": "&#xfde5;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufde5"]]},

{"description": "Invalid numeric entity character U+FDE6",
"input": "&#xfde6;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufde6"]]},

{"description": "Invalid numeric entity character U+FDE7",
"input": "&#xfde7;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufde7"]]},

{"description": "Invalid numeric entity character U+FDE8",
"input": "&#xfde8;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufde8"]]},

{"description": "Invalid numeric entity character U+FDE9",
"input": "&#xfde9;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufde9"]]},

{"description": "Invalid numeric entity character U+FDEA",
"input": "&#xfdea;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdea"]]},

{"description": "Invalid numeric entity character U+FDEB",
"input": "&#xfdeb;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdeb"]]},

{"description": "Invalid numeric entity character U+FDEC",
"input": "&#xfdec;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdec"]]},

{"description": "Invalid numeric entity character U+FDED",
"input": "&#xfded;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufded"]]},

{"description": "Invalid numeric entity character U+FDEE",
"input": "&#xfdee;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdee"]]},

{"description": "Invalid numeric entity character U+FDEF",
"input": "&#xfdef;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufdef"]]},

{"description": "Invalid numeric entity character U+FFFE",
"input": "&#xfffe;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\ufffe"]]},

{"description": "Invalid numeric entity character U+FFFF",
"input": "&#xffff;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uffff"]]},

{"description": "Invalid numeric entity character U+1FFFE",
"input": "&#x1fffe;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD83F\uDFFE"]]},

{"description": "Invalid numeric entity character U+1FFFF",
"input": "&#x1ffff;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD83F\uDFFF"]]},

{"description": "Invalid numeric entity character U+2FFFE",
"input": "&#x2fffe;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD87F\uDFFE"]]},

{"description": "Invalid numeric entity character U+2FFFF",
"input": "&#x2ffff;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD87F\uDFFF"]]},

{"description": "Invalid numeric entity character U+3FFFE",
"input": "&#x3fffe;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD8BF\uDFFE"]]},

{"description": "Invalid numeric entity character U+3FFFF",
"input": "&#x3ffff;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD8BF\uDFFF"]]},

{"description": "Invalid numeric entity character U+4FFFE",
"input": "&#x4fffe;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD8FF\uDFFE"]]},

{"description": "Invalid numeric entity character U+4FFFF",
"input": "&#x4ffff;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD8FF\uDFFF"]]},

{"description": "Invalid numeric entity character U+5FFFE",
"input": "&#x5fffe;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD93F\uDFFE"]]},

{"description": "Invalid numeric entity character U+5FFFF",
"input": "&#x5ffff;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD93F\uDFFF"]]},

{"description": "Invalid numeric entity character U+6FFFE",
"input": "&#x6fffe;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD97F\uDFFE"]]},

{"description": "Invalid numeric entity character U+6FFFF",
"input": "&#x6ffff;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD97F\uDFFF"]]},

{"description": "Invalid numeric entity character U+7FFFE",
"input": "&#x7fffe;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD9BF\uDFFE"]]},

{"description": "Invalid numeric entity character U+7FFFF",
"input": "&#x7ffff;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD9BF\uDFFF"]]},

{"description": "Invalid numeric entity character U+8FFFE",
"input": "&#x8fffe;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD9FF\uDFFE"]]},

{"description": "Invalid numeric entity character U+8FFFF",
"input": "&#x8ffff;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uD9FF\uDFFF"]]},

{"description": "Invalid numeric entity character U+9FFFE",
"input": "&#x9fffe;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDA3F\uDFFE"]]},

{"description": "Invalid numeric entity character U+9FFFF",
"input": "&#x9ffff;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDA3F\uDFFF"]]},

{"description": "Invalid numeric entity character U+AFFFE",
"input": "&#xafffe;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDA7F\uDFFE"]]},

{"description": "Invalid numeric entity character U+AFFFF",
"input": "&#xaffff;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDA7F\uDFFF"]]},

{"description": "Invalid numeric entity character U+BFFFE",
"input": "&#xbfffe;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDABF\uDFFE"]]},

{"description": "Invalid numeric entity character U+BFFFF",
"input": "&#xbffff;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDABF\uDFFF"]]},

{"description": "Invalid numeric entity character U+CFFFE",
"input": "&#xcfffe;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDAFF\uDFFE"]]},

{"description": "Invalid numeric entity character U+CFFFF",
"input": "&#xcffff;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDAFF\uDFFF"]]},

{"description": "Invalid numeric entity character U+DFFFE",
"input": "&#xdfffe;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDB3F\uDFFE"]]},

{"description": "Invalid numeric entity character U+DFFFF",
"input": "&#xdffff;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDB3F\uDFFF"]]},

{"description": "Invalid numeric entity character U+EFFFE",
"input": "&#xefffe;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDB7F\uDFFE"]]},

{"description": "Invalid numeric entity character U+EFFFF",
"input": "&#xeffff;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDB7F\uDFFF"]]},

{"description": "Invalid numeric entity character U+FFFFE",
"input": "&#xffffe;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDBBF\uDFFE"]]},

{"description": "Invalid numeric entity character U+FFFFF",
"input": "&#xfffff;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDBBF\uDFFF"]]},

{"description": "Invalid numeric entity character U+10FFFE",
"input": "&#x10fffe;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDBFF\uDFFE"]]},

{"description": "Invalid numeric entity character U+10FFFF",
"input": "&#x10ffff;",
-"output": ["ParseError", ["Character", "\uFFFD"]]},
+"output": ["ParseError", ["Character", "\uDBFF\uDFFF"]]},

{"description": "Valid numeric entity character U+0009",
"input": "&#x0009;",
@@ -1309,3 +1309,5 @@
"output": [["Character", "\uDBFF\uDFFD"]]}

]}
+
+
diff --git a/test/data/tokeniser2/test1.test b/test/data/tokeniser2/test1.test
index b62c02f..b97b2cb 100644
--- a/test/data/tokeniser2/test1.test
+++ b/test/data/tokeniser2/test1.test
@@ -115,7 +115,7 @@

{"description":"Unfinished entity",
"input":"&f",
-"output":["ParseError", ["Character", "&f"]]},
+"output":[["Character", "&f"]]},

{"description":"Ampersand, number sign",
"input":"&#",
@@ -143,11 +143,11 @@

{"description":"Partial entity match at end of file",
"input":"I'm &no",
-"output":[["Character","I'm "], "ParseError", ["Character", "&no"]]},
+"output":[["Character","I'm &no"]]},

{"description":"Non-ASCII character reference name",
"input":"&\u00AC;",
-"output":["ParseError", ["Character", "&\u00AC;"]]},
+"output":[["Character", "&\u00AC;"]]},

{"description":"ASCII decimal entity",
"input":"&#0036;",
@@ -163,18 +163,34 @@

{"description":"Entity in attribute without semicolon ending in x",
"input":"<h a='&notx'>",
-"output":["ParseError", ["StartTag", "h", {"a":"&notx"}]]},
+"output":[["StartTag", "h", {"a":"&notx"}]]},

{"description":"Entity in attribute without semicolon ending in 1",
"input":"<h a='&not1'>",
-"output":["ParseError", ["StartTag", "h", {"a":"&not1"}]]},
+"output":[["StartTag", "h", {"a":"&not1"}]]},

{"description":"Entity in attribute without semicolon ending in i",
"input":"<h a='&noti'>",
-"output":["ParseError", ["StartTag", "h", {"a":"&noti"}]]},
+"output":[["StartTag", "h", {"a":"&noti"}]]},

{"description":"Entity in attribute without semicolon",
"input":"<h a='&COPY'>",
"output":["ParseError", ["StartTag", "h", {"a":"\u00A9"}]]},

+{"description":"Unquoted attribute ending in ampersand",
+"input":"<s o=& t>",
+"output":[["StartTag","s",{"o":"&","t":""}]]},
+
+{"description":"Unquoted attribute at end of tag with final character of &, with tag followed by characters",
+"input":"<a a=a&>foo",
+"output":[["StartTag", "a", {"a":"a&"}], ["Character", "foo"]]},
+
+{"description":"plaintext element",
+ "input":"<plaintext>foobar",
+ "output":[["StartTag","plaintext",{}], ["Character","foobar"]]},
+
+{"description":"Open angled bracket in unquoted attribute value state",
+ "input":"<a a=f<>",
+ "output":["ParseError", ["StartTag", "a", {"a":"f<"}]]}
+
]}
diff --git a/test/data/tokeniser2/test2.test b/test/data/tokeniser2/test2.test
index a8d2e9e..87a8eba 100644
--- a/test/data/tokeniser2/test2.test
+++ b/test/data/tokeniser2/test2.test
@@ -2,7 +2,7 @@

{"description":"DOCTYPE without name",
"input":"<!DOCTYPE>",
-"output":["ParseError", "ParseError", ["DOCTYPE", "", null, null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", null, null, null, false]]},

{"description":"DOCTYPE without space before name",
"input":"<!DOCTYPEhtml>",
@@ -82,7 +82,7 @@

{"description":"Entity without a name",
"input":"&;",
-"output":["ParseError", ["Character", "&;"]]},
+"output":[["Character", "&;"]]},

{"description":"Unescaped ampersand in attribute value",
"input":"<h a='&'>",
@@ -132,6 +132,10 @@
"input":"foo < bar",
"output":[["Character", "foo "], "ParseError", ["Character", "< bar"]]},

+{"description":"Null Byte Replacement",
+"input":"\u0000",
+"output":["ParseError", ["Character", "\u0000"]]},
+
{"description":"Comment with dash",
"input":"<!---x",
"output":["ParseError", ["Comment", "-x"]]},
@@ -154,6 +158,22 @@

{"description":"Single-quote after attribute name",
"input":"<h a '>",
-"output":["ParseError", ["StartTag", "h", {"a":"", "'":""}]]}
+"output":["ParseError", ["StartTag", "h", {"a":"", "'":""}]]},
+
+{"description":"Empty end tag with following characters",
+"input":"a</>bc",
+"output":[["Character", "a"], "ParseError", ["Character", "bc"]]},
+
+{"description":"Empty end tag with following tag",
+"input":"a</><b>c",
+"output":[["Character", "a"], "ParseError", ["StartTag", "b", {}], ["Character", "c"]]},
+
+{"description":"Empty end tag with following comment",
+"input":"a</><!--b-->c",
+"output":[["Character", "a"], "ParseError", ["Comment", "b"], ["Character", "c"]]},
+
+{"description":"Empty end tag with following end tag",
+"input":"a</></b>c",
+"output":[["Character", "a"], "ParseError", ["EndTag", "b"], ["Character", "c"]]}

]}
diff --git a/test/data/tokeniser2/test3.test b/test/data/tokeniser2/test3.test
index b9cc093..8fc529a 100644
--- a/test/data/tokeniser2/test3.test
+++ b/test/data/tokeniser2/test3.test
@@ -80,6 +80,10 @@
"input":"<",
"output":["ParseError", ["Character", "<"]]},

+{"description":"<\\u0000",
+"input":"<\u0000",
+"output":["ParseError", ["Character", "<"], "ParseError", ["Character", "\u0000"]]},
+
{"description":"<\\u0009",
"input":"<\u0009",
"output":["ParseError", ["Character", "<\u0009"]]},
@@ -106,7 +110,7 @@

{"description":"<!\\u0000",
"input":"<!\u0000",
-"output":["ParseError", "ParseError", ["Comment", "\uFFFD"]]},
+"output":["ParseError", ["Comment", "\uFFFD"]]},

{"description":"<!\\u0009",
"input":"<!\u0009",
@@ -514,7 +518,7 @@

{"description":"<!----\\u0000",
"input":"<!----\u0000",
-"output":["ParseError", "ParseError", "ParseError", ["Comment", "--\uFFFD"]]},
+"output":["ParseError", "ParseError", ["Comment", "--\uFFFD"]]},

{"description":"<!----\\u0009",
"input":"<!----\u0009",
@@ -536,6 +540,62 @@
"input":"<!---- ",
"output":["ParseError", "ParseError", ["Comment", "-- "]]},

+{"description":"<!---- -",
+"input":"<!---- -",
+"output":["ParseError", "ParseError", ["Comment", "-- "]]},
+
+{"description":"<!---- --",
+"input":"<!---- --",
+"output":["ParseError", "ParseError", ["Comment", "-- "]]},
+
+{"description":"<!---- -->",
+"input":"<!---- -->",
+"output":["ParseError", ["Comment", "-- "]]},
+
+{"description":"<!---- -->",
+"input":"<!---- -->",
+"output":["ParseError", ["Comment", "-- "]]},
+
+{"description":"<!---- a-->",
+"input":"<!---- a-->",
+"output":["ParseError", ["Comment", "-- a"]]},
+
+{"description":"<!----!",
+"input":"<!----!",
+"output":["ParseError", "ParseError", ["Comment", ""]]},
+
+{"description":"<!----!>",
+"input":"<!----!>",
+"output":["ParseError", ["Comment", ""]]},
+
+{"description":"<!----!a",
+"input":"<!----!a",
+"output":["ParseError", "ParseError", ["Comment", "--!a"]]},
+
+{"description":"<!----!a-",
+"input":"<!----!a-",
+"output":["ParseError", "ParseError", ["Comment", "--!a"]]},
+
+{"description":"<!----!a--",
+"input":"<!----!a--",
+"output":["ParseError", "ParseError", ["Comment", "--!a"]]},
+
+{"description":"<!----!a-->",
+"input":"<!----!a-->",
+"output":["ParseError", ["Comment", "--!a"]]},
+
+{"description":"<!----!-",
+"input":"<!----!-",
+"output":["ParseError", "ParseError", ["Comment", "--!"]]},
+
+{"description":"<!----!--",
+"input":"<!----!--",
+"output":["ParseError", "ParseError", ["Comment", "--!"]]},
+
+{"description":"<!----!-->",
+"input":"<!----!-->",
+"output":["ParseError", ["Comment", "--!"]]},
+
{"description":"<!----\"",
"input":"<!----\"",
"output":["ParseError", "ParseError", ["Comment", "--\""]]},
@@ -854,7 +914,7 @@

{"description":"<!DOCTYPE",
"input":"<!DOCTYPE",
-"output":["ParseError", "ParseError", ["DOCTYPE", "", null, null, false]]},
+"output":["ParseError", ["DOCTYPE", null, null, null, false]]},

{"description":"<!DOCTYPE\\u0000",
"input":"<!DOCTYPE\u0000",
@@ -866,11 +926,11 @@

{"description":"<!DOCTYPE\\u0009",
"input":"<!DOCTYPE\u0009",
-"output":["ParseError", ["DOCTYPE", "", null, null, false]]},
+"output":["ParseError", ["DOCTYPE", null, null, null, false]]},

{"description":"<!DOCTYPE\\u000A",
"input":"<!DOCTYPE\u000A",
-"output":["ParseError", ["DOCTYPE", "", null, null, false]]},
+"output":["ParseError", ["DOCTYPE", null, null, null, false]]},

{"description":"<!DOCTYPE\\u000B",
"input":"<!DOCTYPE\u000B",
@@ -878,11 +938,11 @@

{"description":"<!DOCTYPE\\u000C",
"input":"<!DOCTYPE\u000C",
-"output":["ParseError", ["DOCTYPE", "", null, null, false]]},
+"output":["ParseError", ["DOCTYPE", null, null, null, false]]},

{"description":"<!DOCTYPE\\u000D",
"input":"<!DOCTYPE\u000D",
-"output":["ParseError", ["DOCTYPE", "", null, null, false]]},
+"output":["ParseError", ["DOCTYPE", null, null, null, false]]},

{"description":"<!DOCTYPE\\u001F",
"input":"<!DOCTYPE\u001F",
@@ -890,7 +950,7 @@

{"description":"<!DOCTYPE ",
"input":"<!DOCTYPE ",
-"output":["ParseError", ["DOCTYPE", "", null, null, false]]},
+"output":["ParseError", ["DOCTYPE", null, null, null, false]]},

{"description":"<!DOCTYPE \\u0000",
"input":"<!DOCTYPE \u0000",
@@ -902,11 +962,11 @@

{"description":"<!DOCTYPE \\u0009",
"input":"<!DOCTYPE \u0009",
-"output":["ParseError", ["DOCTYPE", "", null, null, false]]},
+"output":["ParseError", ["DOCTYPE", null, null, null, false]]},

{"description":"<!DOCTYPE \\u000A",
"input":"<!DOCTYPE \u000A",
-"output":["ParseError", ["DOCTYPE", "", null, null, false]]},
+"output":["ParseError", ["DOCTYPE", null, null, null, false]]},

{"description":"<!DOCTYPE \\u000B",
"input":"<!DOCTYPE \u000B",
@@ -914,11 +974,11 @@

{"description":"<!DOCTYPE \\u000C",
"input":"<!DOCTYPE \u000C",
-"output":["ParseError", ["DOCTYPE", "", null, null, false]]},
+"output":["ParseError", ["DOCTYPE", null, null, null, false]]},

{"description":"<!DOCTYPE \\u000D",
"input":"<!DOCTYPE \u000D",
-"output":["ParseError", ["DOCTYPE", "", null, null, false]]},
+"output":["ParseError", ["DOCTYPE", null, null, null, false]]},

{"description":"<!DOCTYPE \\u001F",
"input":"<!DOCTYPE \u001F",
@@ -926,7 +986,7 @@

{"description":"<!DOCTYPE ",
"input":"<!DOCTYPE ",
-"output":["ParseError", ["DOCTYPE", "", null, null, false]]},
+"output":["ParseError", ["DOCTYPE", null, null, null, false]]},

{"description":"<!DOCTYPE !",
"input":"<!DOCTYPE !",
@@ -974,7 +1034,7 @@

{"description":"<!DOCTYPE >",
"input":"<!DOCTYPE >",
-"output":["ParseError", ["DOCTYPE", "", null, null, false]]},
+"output":["ParseError", ["DOCTYPE", null, null, null, false]]},

{"description":"<!DOCTYPE ?",
"input":"<!DOCTYPE ?",
@@ -1050,7 +1110,7 @@

{"description":"<!DOCTYPE a \\u0000",
"input":"<!DOCTYPE a \u0000",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},

{"description":"<!DOCTYPE a \\u0008",
"input":"<!DOCTYPE a \u0008",
@@ -1154,7 +1214,7 @@

{"description":"<!DOCTYPE a PUBLIC\\u0000",
"input":"<!DOCTYPE a PUBLIC\u0000",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},

{"description":"<!DOCTYPE a PUBLIC\\u0008",
"input":"<!DOCTYPE a PUBLIC\u0008",
@@ -1194,135 +1254,135 @@

{"description":"<!DOCTYPE a PUBLIC\"",
"input":"<!DOCTYPE a PUBLIC\"",
-"output":["ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPE a PUBLIC\"\\u0000",
"input":"<!DOCTYPE a PUBLIC\"\u0000",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "\uFFFD", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "\uFFFD", null, false]]},

{"description":"<!DOCTYPE a PUBLIC\"\\u0009",
"input":"<!DOCTYPE a PUBLIC\"\u0009",
-"output":["ParseError", ["DOCTYPE", "a", "\u0009", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "\u0009", null, false]]},

{"description":"<!DOCTYPE a PUBLIC\"\\u000A",
"input":"<!DOCTYPE a PUBLIC\"\u000A",
-"output":["ParseError", ["DOCTYPE", "a", "\u000A", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "\u000A", null, false]]},

{"description":"<!DOCTYPE a PUBLIC\"\\u000B",
"input":"<!DOCTYPE a PUBLIC\"\u000B",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "\u000B", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "\u000B", null, false]]},

{"description":"<!DOCTYPE a PUBLIC\"\\u000C",
"input":"<!DOCTYPE a PUBLIC\"\u000C",
-"output":["ParseError", ["DOCTYPE", "a", "\u000C", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "\u000C", null, false]]},

{"description":"<!DOCTYPE a PUBLIC\" ",
"input":"<!DOCTYPE a PUBLIC\" ",
-"output":["ParseError", ["DOCTYPE", "a", " ", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", " ", null, false]]},

{"description":"<!DOCTYPE a PUBLIC\"!",
"input":"<!DOCTYPE a PUBLIC\"!",
-"output":["ParseError", ["DOCTYPE", "a", "!", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "!", null, false]]},

{"description":"<!DOCTYPE a PUBLIC\"\"",
"input":"<!DOCTYPE a PUBLIC\"\"",
-"output":["ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPE a PUBLIC\"#",
"input":"<!DOCTYPE a PUBLIC\"#",
-"output":["ParseError", ["DOCTYPE", "a", "#", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "#", null, false]]},

{"description":"<!DOCTYPE a PUBLIC\"&",
"input":"<!DOCTYPE a PUBLIC\"&",
-"output":["ParseError", ["DOCTYPE", "a", "&", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "&", null, false]]},

{"description":"<!DOCTYPE a PUBLIC\"'",
"input":"<!DOCTYPE a PUBLIC\"'",
-"output":["ParseError", ["DOCTYPE", "a", "'", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "'", null, false]]},

{"description":"<!DOCTYPE a PUBLIC\"-",
"input":"<!DOCTYPE a PUBLIC\"-",
-"output":["ParseError", ["DOCTYPE", "a", "-", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "-", null, false]]},

{"description":"<!DOCTYPE a PUBLIC\"/",
"input":"<!DOCTYPE a PUBLIC\"/",
-"output":["ParseError", ["DOCTYPE", "a", "/", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "/", null, false]]},

{"description":"<!DOCTYPE a PUBLIC\"0",
"input":"<!DOCTYPE a PUBLIC\"0",
-"output":["ParseError", ["DOCTYPE", "a", "0", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "0", null, false]]},

{"description":"<!DOCTYPE a PUBLIC\"1",
"input":"<!DOCTYPE a PUBLIC\"1",
-"output":["ParseError", ["DOCTYPE", "a", "1", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "1", null, false]]},

{"description":"<!DOCTYPE a PUBLIC\"9",
"input":"<!DOCTYPE a PUBLIC\"9",
-"output":["ParseError", ["DOCTYPE", "a", "9", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "9", null, false]]},

{"description":"<!DOCTYPE a PUBLIC\"<",
"input":"<!DOCTYPE a PUBLIC\"<",
-"output":["ParseError", ["DOCTYPE", "a", "<", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "<", null, false]]},

{"description":"<!DOCTYPE a PUBLIC\"=",
"input":"<!DOCTYPE a PUBLIC\"=",
-"output":["ParseError", ["DOCTYPE", "a", "=", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "=", null, false]]},

{"description":"<!DOCTYPE a PUBLIC\">",
"input":"<!DOCTYPE a PUBLIC\">",
-"output":["ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPE a PUBLIC\"?",
"input":"<!DOCTYPE a PUBLIC\"?",
-"output":["ParseError", ["DOCTYPE", "a", "?", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "?", null, false]]},

{"description":"<!DOCTYPE a PUBLIC\"@",
"input":"<!DOCTYPE a PUBLIC\"@",
-"output":["ParseError", ["DOCTYPE", "a", "@", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "@", null, false]]},

{"description":"<!DOCTYPE a PUBLIC\"A",
"input":"<!DOCTYPE a PUBLIC\"A",
-"output":["ParseError", ["DOCTYPE", "a", "A", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "A", null, false]]},

{"description":"<!DOCTYPE a PUBLIC\"B",
"input":"<!DOCTYPE a PUBLIC\"B",
-"output":["ParseError", ["DOCTYPE", "a", "B", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "B", null, false]]},

{"description":"<!DOCTYPE a PUBLIC\"Y",
"input":"<!DOCTYPE a PUBLIC\"Y",
-"output":["ParseError", ["DOCTYPE", "a", "Y", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "Y", null, false]]},

{"description":"<!DOCTYPE a PUBLIC\"Z",
"input":"<!DOCTYPE a PUBLIC\"Z",
-"output":["ParseError", ["DOCTYPE", "a", "Z", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "Z", null, false]]},

{"description":"<!DOCTYPE a PUBLIC\"`",
"input":"<!DOCTYPE a PUBLIC\"`",
-"output":["ParseError", ["DOCTYPE", "a", "`", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "`", null, false]]},

{"description":"<!DOCTYPE a PUBLIC\"a",
"input":"<!DOCTYPE a PUBLIC\"a",
-"output":["ParseError", ["DOCTYPE", "a", "a", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "a", null, false]]},

{"description":"<!DOCTYPE a PUBLIC\"b",
"input":"<!DOCTYPE a PUBLIC\"b",
-"output":["ParseError", ["DOCTYPE", "a", "b", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "b", null, false]]},

{"description":"<!DOCTYPE a PUBLIC\"y",
"input":"<!DOCTYPE a PUBLIC\"y",
-"output":["ParseError", ["DOCTYPE", "a", "y", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "y", null, false]]},

{"description":"<!DOCTYPE a PUBLIC\"z",
"input":"<!DOCTYPE a PUBLIC\"z",
-"output":["ParseError", ["DOCTYPE", "a", "z", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "z", null, false]]},

{"description":"<!DOCTYPE a PUBLIC\"{",
"input":"<!DOCTYPE a PUBLIC\"{",
-"output":["ParseError", ["DOCTYPE", "a", "{", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "{", null, false]]},

{"description":"<!DOCTYPE a PUBLIC\"\\uDBC0\\uDC00",
"input":"<!DOCTYPE a PUBLIC\"\uDBC0\uDC00",
-"output":["ParseError", ["DOCTYPE", "a", "\uDBC0\uDC00", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "\uDBC0\uDC00", null, false]]},

{"description":"<!DOCTYPE a PUBLIC#",
"input":"<!DOCTYPE a PUBLIC#",
@@ -1334,47 +1394,47 @@

{"description":"<!DOCTYPE a PUBLIC'",
"input":"<!DOCTYPE a PUBLIC'",
-"output":["ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPE a PUBLIC'\\u0000",
"input":"<!DOCTYPE a PUBLIC'\u0000",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "\uFFFD", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "\uFFFD", null, false]]},

{"description":"<!DOCTYPE a PUBLIC'\\u0009",
"input":"<!DOCTYPE a PUBLIC'\u0009",
-"output":["ParseError", ["DOCTYPE", "a", "\u0009", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "\u0009", null, false]]},

{"description":"<!DOCTYPE a PUBLIC'\\u000A",
"input":"<!DOCTYPE a PUBLIC'\u000A",
-"output":["ParseError", ["DOCTYPE", "a", "\u000A", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "\u000A", null, false]]},

{"description":"<!DOCTYPE a PUBLIC'\\u000B",
"input":"<!DOCTYPE a PUBLIC'\u000B",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "\u000B", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "\u000B", null, false]]},

{"description":"<!DOCTYPE a PUBLIC'\\u000C",
"input":"<!DOCTYPE a PUBLIC'\u000C",
-"output":["ParseError", ["DOCTYPE", "a", "\u000C", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "\u000C", null, false]]},

{"description":"<!DOCTYPE a PUBLIC' ",
"input":"<!DOCTYPE a PUBLIC' ",
-"output":["ParseError", ["DOCTYPE", "a", " ", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", " ", null, false]]},

{"description":"<!DOCTYPE a PUBLIC'!",
"input":"<!DOCTYPE a PUBLIC'!",
-"output":["ParseError", ["DOCTYPE", "a", "!", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "!", null, false]]},

{"description":"<!DOCTYPE a PUBLIC'\"",
"input":"<!DOCTYPE a PUBLIC'\"",
-"output":["ParseError", ["DOCTYPE", "a", "\"", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "\"", null, false]]},

{"description":"<!DOCTYPE a PUBLIC'&",
"input":"<!DOCTYPE a PUBLIC'&",
-"output":["ParseError", ["DOCTYPE", "a", "&", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "&", null, false]]},

{"description":"<!DOCTYPE a PUBLIC''",
"input":"<!DOCTYPE a PUBLIC''",
-"output":["ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPE a PUBLIC''\\u0000",
"input":"<!DOCTYPE a PUBLIC''\u0000",
@@ -1382,231 +1442,231 @@

{"description":"<!DOCTYPE a PUBLIC''\\u0008",
"input":"<!DOCTYPE a PUBLIC''\u0008",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPE a PUBLIC''\\u0009",
"input":"<!DOCTYPE a PUBLIC''\u0009",
-"output":["ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPE a PUBLIC''\\u000A",
"input":"<!DOCTYPE a PUBLIC''\u000A",
-"output":["ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPE a PUBLIC''\\u000B",
"input":"<!DOCTYPE a PUBLIC''\u000B",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPE a PUBLIC''\\u000C",
"input":"<!DOCTYPE a PUBLIC''\u000C",
-"output":["ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPE a PUBLIC''\\u000D",
"input":"<!DOCTYPE a PUBLIC''\u000D",
-"output":["ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPE a PUBLIC''\\u001F",
"input":"<!DOCTYPE a PUBLIC''\u001F",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPE a PUBLIC'' ",
"input":"<!DOCTYPE a PUBLIC'' ",
-"output":["ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPE a PUBLIC''!",
"input":"<!DOCTYPE a PUBLIC''!",
-"output":["ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPE a PUBLIC''\"",
"input":"<!DOCTYPE a PUBLIC''\"",
-"output":["ParseError", ["DOCTYPE", "a", "", "", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", "", false]]},

{"description":"<!DOCTYPE a PUBLIC''#",
"input":"<!DOCTYPE a PUBLIC''#",
-"output":["ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPE a PUBLIC''&",
"input":"<!DOCTYPE a PUBLIC''&",
-"output":["ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPE a PUBLIC'''",
"input":"<!DOCTYPE a PUBLIC'''",
-"output":["ParseError", ["DOCTYPE", "a", "", "", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", "", false]]},

{"description":"<!DOCTYPE a PUBLIC''(",
"input":"<!DOCTYPE a PUBLIC''(",
-"output":["ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPE a PUBLIC''-",
"input":"<!DOCTYPE a PUBLIC''-",
-"output":["ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPE a PUBLIC''/",
"input":"<!DOCTYPE a PUBLIC''/",
-"output":["ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPE a PUBLIC''0",
"input":"<!DOCTYPE a PUBLIC''0",
-"output":["ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPE a PUBLIC''1",
"input":"<!DOCTYPE a PUBLIC''1",
-"output":["ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPE a PUBLIC''9",
"input":"<!DOCTYPE a PUBLIC''9",
-"output":["ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPE a PUBLIC''<",
"input":"<!DOCTYPE a PUBLIC''<",
-"output":["ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPE a PUBLIC''=",
"input":"<!DOCTYPE a PUBLIC''=",
-"output":["ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPE a PUBLIC''>",
"input":"<!DOCTYPE a PUBLIC''>",
-"output":[["DOCTYPE", "a", "", null, true]]},
+"output":["ParseError", ["DOCTYPE", "a", "", null, true]]},

{"description":"<!DOCTYPE a PUBLIC''?",
"input":"<!DOCTYPE a PUBLIC''?",
-"output":["ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPE a PUBLIC''@",
"input":"<!DOCTYPE a PUBLIC''@",
-"output":["ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPE a PUBLIC''A",
"input":"<!DOCTYPE a PUBLIC''A",
-"output":["ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPE a PUBLIC''B",
"input":"<!DOCTYPE a PUBLIC''B",
-"output":["ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPE a PUBLIC''Y",
"input":"<!DOCTYPE a PUBLIC''Y",
-"output":["ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPE a PUBLIC''Z",
"input":"<!DOCTYPE a PUBLIC''Z",
-"output":["ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPE a PUBLIC''`",
"input":"<!DOCTYPE a PUBLIC''`",
-"output":["ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPE a PUBLIC''a",
"input":"<!DOCTYPE a PUBLIC''a",
-"output":["ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPE a PUBLIC''b",
"input":"<!DOCTYPE a PUBLIC''b",
-"output":["ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPE a PUBLIC''y",
"input":"<!DOCTYPE a PUBLIC''y",
-"output":["ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPE a PUBLIC''z",
"input":"<!DOCTYPE a PUBLIC''z",
-"output":["ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPE a PUBLIC''{",
"input":"<!DOCTYPE a PUBLIC''{",
-"output":["ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPE a PUBLIC''\\uDBC0\\uDC00",
"input":"<!DOCTYPE a PUBLIC''\uDBC0\uDC00",
-"output":["ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPE a PUBLIC'(",
"input":"<!DOCTYPE a PUBLIC'(",
-"output":["ParseError", ["DOCTYPE", "a", "(", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "(", null, false]]},

{"description":"<!DOCTYPE a PUBLIC'-",
"input":"<!DOCTYPE a PUBLIC'-",
-"output":["ParseError", ["DOCTYPE", "a", "-", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "-", null, false]]},

{"description":"<!DOCTYPE a PUBLIC'/",
"input":"<!DOCTYPE a PUBLIC'/",
-"output":["ParseError", ["DOCTYPE", "a", "/", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "/", null, false]]},

{"description":"<!DOCTYPE a PUBLIC'0",
"input":"<!DOCTYPE a PUBLIC'0",
-"output":["ParseError", ["DOCTYPE", "a", "0", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "0", null, false]]},

{"description":"<!DOCTYPE a PUBLIC'1",
"input":"<!DOCTYPE a PUBLIC'1",
-"output":["ParseError", ["DOCTYPE", "a", "1", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "1", null, false]]},

{"description":"<!DOCTYPE a PUBLIC'9",
"input":"<!DOCTYPE a PUBLIC'9",
-"output":["ParseError", ["DOCTYPE", "a", "9", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "9", null, false]]},

{"description":"<!DOCTYPE a PUBLIC'<",
"input":"<!DOCTYPE a PUBLIC'<",
-"output":["ParseError", ["DOCTYPE", "a", "<", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "<", null, false]]},

{"description":"<!DOCTYPE a PUBLIC'=",
"input":"<!DOCTYPE a PUBLIC'=",
-"output":["ParseError", ["DOCTYPE", "a", "=", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "=", null, false]]},

{"description":"<!DOCTYPE a PUBLIC'>",
"input":"<!DOCTYPE a PUBLIC'>",
-"output":["ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPE a PUBLIC'?",
"input":"<!DOCTYPE a PUBLIC'?",
-"output":["ParseError", ["DOCTYPE", "a", "?", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "?", null, false]]},

{"description":"<!DOCTYPE a PUBLIC'@",
"input":"<!DOCTYPE a PUBLIC'@",
-"output":["ParseError", ["DOCTYPE", "a", "@", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "@", null, false]]},

{"description":"<!DOCTYPE a PUBLIC'A",
"input":"<!DOCTYPE a PUBLIC'A",
-"output":["ParseError", ["DOCTYPE", "a", "A", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "A", null, false]]},

{"description":"<!DOCTYPE a PUBLIC'B",
"input":"<!DOCTYPE a PUBLIC'B",
-"output":["ParseError", ["DOCTYPE", "a", "B", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "B", null, false]]},

{"description":"<!DOCTYPE a PUBLIC'Y",
"input":"<!DOCTYPE a PUBLIC'Y",
-"output":["ParseError", ["DOCTYPE", "a", "Y", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "Y", null, false]]},

{"description":"<!DOCTYPE a PUBLIC'Z",
"input":"<!DOCTYPE a PUBLIC'Z",
-"output":["ParseError", ["DOCTYPE", "a", "Z", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "Z", null, false]]},

{"description":"<!DOCTYPE a PUBLIC'`",
"input":"<!DOCTYPE a PUBLIC'`",
-"output":["ParseError", ["DOCTYPE", "a", "`", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "`", null, false]]},

{"description":"<!DOCTYPE a PUBLIC'a",
"input":"<!DOCTYPE a PUBLIC'a",
-"output":["ParseError", ["DOCTYPE", "a", "a", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "a", null, false]]},

{"description":"<!DOCTYPE a PUBLIC'b",
"input":"<!DOCTYPE a PUBLIC'b",
-"output":["ParseError", ["DOCTYPE", "a", "b", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "b", null, false]]},

{"description":"<!DOCTYPE a PUBLIC'y",
"input":"<!DOCTYPE a PUBLIC'y",
-"output":["ParseError", ["DOCTYPE", "a", "y", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "y", null, false]]},

{"description":"<!DOCTYPE a PUBLIC'z",
"input":"<!DOCTYPE a PUBLIC'z",
-"output":["ParseError", ["DOCTYPE", "a", "z", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "z", null, false]]},

{"description":"<!DOCTYPE a PUBLIC'{",
"input":"<!DOCTYPE a PUBLIC'{",
-"output":["ParseError", ["DOCTYPE", "a", "{", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "{", null, false]]},

{"description":"<!DOCTYPE a PUBLIC'\\uDBC0\\uDC00",
"input":"<!DOCTYPE a PUBLIC'\uDBC0\uDC00",
-"output":["ParseError", ["DOCTYPE", "a", "\uDBC0\uDC00", null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "\uDBC0\uDC00", null, false]]},

{"description":"<!DOCTYPE a PUBLIC(",
"input":"<!DOCTYPE a PUBLIC(",
@@ -1702,7 +1762,7 @@

{"description":"<!DOCTYPE a SYSTEM\\u0000",
"input":"<!DOCTYPE a SYSTEM\u0000",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},

{"description":"<!DOCTYPE a SYSTEM\\u0008",
"input":"<!DOCTYPE a SYSTEM\u0008",
@@ -1742,135 +1802,135 @@

{"description":"<!DOCTYPE a SYSTEM\"",
"input":"<!DOCTYPE a SYSTEM\"",
-"output":["ParseError", ["DOCTYPE", "a", null, "", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},

{"description":"<!DOCTYPE a SYSTEM\"\\u0000",
"input":"<!DOCTYPE a SYSTEM\"\u0000",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "\uFFFD", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "\uFFFD", false]]},

{"description":"<!DOCTYPE a SYSTEM\"\\u0009",
"input":"<!DOCTYPE a SYSTEM\"\u0009",
-"output":["ParseError", ["DOCTYPE", "a", null, "\u0009", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "\u0009", false]]},

{"description":"<!DOCTYPE a SYSTEM\"\\u000A",
"input":"<!DOCTYPE a SYSTEM\"\u000A",
-"output":["ParseError", ["DOCTYPE", "a", null, "\u000A", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "\u000A", false]]},

{"description":"<!DOCTYPE a SYSTEM\"\\u000B",
"input":"<!DOCTYPE a SYSTEM\"\u000B",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "\u000B", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "\u000B", false]]},

{"description":"<!DOCTYPE a SYSTEM\"\\u000C",
"input":"<!DOCTYPE a SYSTEM\"\u000C",
-"output":["ParseError", ["DOCTYPE", "a", null, "\u000C", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "\u000C", false]]},

{"description":"<!DOCTYPE a SYSTEM\" ",
"input":"<!DOCTYPE a SYSTEM\" ",
-"output":["ParseError", ["DOCTYPE", "a", null, " ", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, " ", false]]},

{"description":"<!DOCTYPE a SYSTEM\"!",
"input":"<!DOCTYPE a SYSTEM\"!",
-"output":["ParseError", ["DOCTYPE", "a", null, "!", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "!", false]]},

{"description":"<!DOCTYPE a SYSTEM\"\"",
"input":"<!DOCTYPE a SYSTEM\"\"",
-"output":["ParseError", ["DOCTYPE", "a", null, "", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},

{"description":"<!DOCTYPE a SYSTEM\"#",
"input":"<!DOCTYPE a SYSTEM\"#",
-"output":["ParseError", ["DOCTYPE", "a", null, "#", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "#", false]]},

{"description":"<!DOCTYPE a SYSTEM\"&",
"input":"<!DOCTYPE a SYSTEM\"&",
-"output":["ParseError", ["DOCTYPE", "a", null, "&", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "&", false]]},

{"description":"<!DOCTYPE a SYSTEM\"'",
"input":"<!DOCTYPE a SYSTEM\"'",
-"output":["ParseError", ["DOCTYPE", "a", null, "'", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "'", false]]},

{"description":"<!DOCTYPE a SYSTEM\"-",
"input":"<!DOCTYPE a SYSTEM\"-",
-"output":["ParseError", ["DOCTYPE", "a", null, "-", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "-", false]]},

{"description":"<!DOCTYPE a SYSTEM\"/",
"input":"<!DOCTYPE a SYSTEM\"/",
-"output":["ParseError", ["DOCTYPE", "a", null, "/", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "/", false]]},

{"description":"<!DOCTYPE a SYSTEM\"0",
"input":"<!DOCTYPE a SYSTEM\"0",
-"output":["ParseError", ["DOCTYPE", "a", null, "0", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "0", false]]},

{"description":"<!DOCTYPE a SYSTEM\"1",
"input":"<!DOCTYPE a SYSTEM\"1",
-"output":["ParseError", ["DOCTYPE", "a", null, "1", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "1", false]]},

{"description":"<!DOCTYPE a SYSTEM\"9",
"input":"<!DOCTYPE a SYSTEM\"9",
-"output":["ParseError", ["DOCTYPE", "a", null, "9", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "9", false]]},

{"description":"<!DOCTYPE a SYSTEM\"<",
"input":"<!DOCTYPE a SYSTEM\"<",
-"output":["ParseError", ["DOCTYPE", "a", null, "<", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "<", false]]},

{"description":"<!DOCTYPE a SYSTEM\"=",
"input":"<!DOCTYPE a SYSTEM\"=",
-"output":["ParseError", ["DOCTYPE", "a", null, "=", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "=", false]]},

{"description":"<!DOCTYPE a SYSTEM\">",
"input":"<!DOCTYPE a SYSTEM\">",
-"output":["ParseError", ["DOCTYPE", "a", null, "", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},

{"description":"<!DOCTYPE a SYSTEM\"?",
"input":"<!DOCTYPE a SYSTEM\"?",
-"output":["ParseError", ["DOCTYPE", "a", null, "?", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "?", false]]},

{"description":"<!DOCTYPE a SYSTEM\"@",
"input":"<!DOCTYPE a SYSTEM\"@",
-"output":["ParseError", ["DOCTYPE", "a", null, "@", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "@", false]]},

{"description":"<!DOCTYPE a SYSTEM\"A",
"input":"<!DOCTYPE a SYSTEM\"A",
-"output":["ParseError", ["DOCTYPE", "a", null, "A", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "A", false]]},

{"description":"<!DOCTYPE a SYSTEM\"B",
"input":"<!DOCTYPE a SYSTEM\"B",
-"output":["ParseError", ["DOCTYPE", "a", null, "B", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "B", false]]},

{"description":"<!DOCTYPE a SYSTEM\"Y",
"input":"<!DOCTYPE a SYSTEM\"Y",
-"output":["ParseError", ["DOCTYPE", "a", null, "Y", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "Y", false]]},

{"description":"<!DOCTYPE a SYSTEM\"Z",
"input":"<!DOCTYPE a SYSTEM\"Z",
-"output":["ParseError", ["DOCTYPE", "a", null, "Z", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "Z", false]]},

{"description":"<!DOCTYPE a SYSTEM\"`",
"input":"<!DOCTYPE a SYSTEM\"`",
-"output":["ParseError", ["DOCTYPE", "a", null, "`", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "`", false]]},

{"description":"<!DOCTYPE a SYSTEM\"a",
"input":"<!DOCTYPE a SYSTEM\"a",
-"output":["ParseError", ["DOCTYPE", "a", null, "a", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "a", false]]},

{"description":"<!DOCTYPE a SYSTEM\"b",
"input":"<!DOCTYPE a SYSTEM\"b",
-"output":["ParseError", ["DOCTYPE", "a", null, "b", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "b", false]]},

{"description":"<!DOCTYPE a SYSTEM\"y",
"input":"<!DOCTYPE a SYSTEM\"y",
-"output":["ParseError", ["DOCTYPE", "a", null, "y", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "y", false]]},

{"description":"<!DOCTYPE a SYSTEM\"z",
"input":"<!DOCTYPE a SYSTEM\"z",
-"output":["ParseError", ["DOCTYPE", "a", null, "z", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "z", false]]},

{"description":"<!DOCTYPE a SYSTEM\"{",
"input":"<!DOCTYPE a SYSTEM\"{",
-"output":["ParseError", ["DOCTYPE", "a", null, "{", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "{", false]]},

{"description":"<!DOCTYPE a SYSTEM\"\\uDBC0\\uDC00",
"input":"<!DOCTYPE a SYSTEM\"\uDBC0\uDC00",
-"output":["ParseError", ["DOCTYPE", "a", null, "\uDBC0\uDC00", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "\uDBC0\uDC00", false]]},

{"description":"<!DOCTYPE a SYSTEM#",
"input":"<!DOCTYPE a SYSTEM#",
@@ -1882,47 +1942,47 @@

{"description":"<!DOCTYPE a SYSTEM'",
"input":"<!DOCTYPE a SYSTEM'",
-"output":["ParseError", ["DOCTYPE", "a", null, "", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},

{"description":"<!DOCTYPE a SYSTEM'\\u0000",
"input":"<!DOCTYPE a SYSTEM'\u0000",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "\uFFFD", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "\uFFFD", false]]},

{"description":"<!DOCTYPE a SYSTEM'\\u0009",
"input":"<!DOCTYPE a SYSTEM'\u0009",
-"output":["ParseError", ["DOCTYPE", "a", null, "\u0009", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "\u0009", false]]},

{"description":"<!DOCTYPE a SYSTEM'\\u000A",
"input":"<!DOCTYPE a SYSTEM'\u000A",
-"output":["ParseError", ["DOCTYPE", "a", null, "\u000A", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "\u000A", false]]},

{"description":"<!DOCTYPE a SYSTEM'\\u000B",
"input":"<!DOCTYPE a SYSTEM'\u000B",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "\u000B", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "\u000B", false]]},

{"description":"<!DOCTYPE a SYSTEM'\\u000C",
"input":"<!DOCTYPE a SYSTEM'\u000C",
-"output":["ParseError", ["DOCTYPE", "a", null, "\u000C", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "\u000C", false]]},

{"description":"<!DOCTYPE a SYSTEM' ",
"input":"<!DOCTYPE a SYSTEM' ",
-"output":["ParseError", ["DOCTYPE", "a", null, " ", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, " ", false]]},

{"description":"<!DOCTYPE a SYSTEM'!",
"input":"<!DOCTYPE a SYSTEM'!",
-"output":["ParseError", ["DOCTYPE", "a", null, "!", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "!", false]]},

{"description":"<!DOCTYPE a SYSTEM'\"",
"input":"<!DOCTYPE a SYSTEM'\"",
-"output":["ParseError", ["DOCTYPE", "a", null, "\"", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "\"", false]]},

{"description":"<!DOCTYPE a SYSTEM'&",
"input":"<!DOCTYPE a SYSTEM'&",
-"output":["ParseError", ["DOCTYPE", "a", null, "&", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "&", false]]},

{"description":"<!DOCTYPE a SYSTEM''",
"input":"<!DOCTYPE a SYSTEM''",
-"output":["ParseError", ["DOCTYPE", "a", null, "", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},

{"description":"<!DOCTYPE a SYSTEM''\\u0000",
"input":"<!DOCTYPE a SYSTEM''\u0000",
@@ -1930,223 +1990,223 @@

{"description":"<!DOCTYPE a SYSTEM''\\u0008",
"input":"<!DOCTYPE a SYSTEM''\u0008",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPE a SYSTEM''\\u0009",
"input":"<!DOCTYPE a SYSTEM''\u0009",
-"output":["ParseError", ["DOCTYPE", "a", null, "", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},

{"description":"<!DOCTYPE a SYSTEM''\\u000A",
"input":"<!DOCTYPE a SYSTEM''\u000A",
-"output":["ParseError", ["DOCTYPE", "a", null, "", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},

{"description":"<!DOCTYPE a SYSTEM''\\u000B",
"input":"<!DOCTYPE a SYSTEM''\u000B",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPE a SYSTEM''\\u000C",
"input":"<!DOCTYPE a SYSTEM''\u000C",
-"output":["ParseError", ["DOCTYPE", "a", null, "", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},

{"description":"<!DOCTYPE a SYSTEM''\\u000D",
"input":"<!DOCTYPE a SYSTEM''\u000D",
-"output":["ParseError", ["DOCTYPE", "a", null, "", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},

{"description":"<!DOCTYPE a SYSTEM''\\u001F",
"input":"<!DOCTYPE a SYSTEM''\u001F",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPE a SYSTEM'' ",
"input":"<!DOCTYPE a SYSTEM'' ",
-"output":["ParseError", ["DOCTYPE", "a", null, "", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},

{"description":"<!DOCTYPE a SYSTEM''!",
"input":"<!DOCTYPE a SYSTEM''!",
-"output":["ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPE a SYSTEM''\"",
"input":"<!DOCTYPE a SYSTEM''\"",
-"output":["ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPE a SYSTEM''&",
"input":"<!DOCTYPE a SYSTEM''&",
-"output":["ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPE a SYSTEM'''",
"input":"<!DOCTYPE a SYSTEM'''",
-"output":["ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPE a SYSTEM''-",
"input":"<!DOCTYPE a SYSTEM''-",
-"output":["ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPE a SYSTEM''/",
"input":"<!DOCTYPE a SYSTEM''/",
-"output":["ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPE a SYSTEM''0",
"input":"<!DOCTYPE a SYSTEM''0",
-"output":["ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPE a SYSTEM''1",
"input":"<!DOCTYPE a SYSTEM''1",
-"output":["ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPE a SYSTEM''9",
"input":"<!DOCTYPE a SYSTEM''9",
-"output":["ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPE a SYSTEM''<",
"input":"<!DOCTYPE a SYSTEM''<",
-"output":["ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPE a SYSTEM''=",
"input":"<!DOCTYPE a SYSTEM''=",
-"output":["ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPE a SYSTEM''>",
"input":"<!DOCTYPE a SYSTEM''>",
-"output":[["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPE a SYSTEM''?",
"input":"<!DOCTYPE a SYSTEM''?",
-"output":["ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPE a SYSTEM''@",
"input":"<!DOCTYPE a SYSTEM''@",
-"output":["ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPE a SYSTEM''A",
"input":"<!DOCTYPE a SYSTEM''A",
-"output":["ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPE a SYSTEM''B",
"input":"<!DOCTYPE a SYSTEM''B",
-"output":["ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPE a SYSTEM''Y",
"input":"<!DOCTYPE a SYSTEM''Y",
-"output":["ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPE a SYSTEM''Z",
"input":"<!DOCTYPE a SYSTEM''Z",
-"output":["ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPE a SYSTEM''`",
"input":"<!DOCTYPE a SYSTEM''`",
-"output":["ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPE a SYSTEM''a",
"input":"<!DOCTYPE a SYSTEM''a",
-"output":["ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPE a SYSTEM''b",
"input":"<!DOCTYPE a SYSTEM''b",
-"output":["ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPE a SYSTEM''y",
"input":"<!DOCTYPE a SYSTEM''y",
-"output":["ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPE a SYSTEM''z",
"input":"<!DOCTYPE a SYSTEM''z",
-"output":["ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPE a SYSTEM''{",
"input":"<!DOCTYPE a SYSTEM''{",
-"output":["ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPE a SYSTEM''\\uDBC0\\uDC00",
"input":"<!DOCTYPE a SYSTEM''\uDBC0\uDC00",
-"output":["ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPE a SYSTEM'(",
"input":"<!DOCTYPE a SYSTEM'(",
-"output":["ParseError", ["DOCTYPE", "a", null, "(", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "(", false]]},

{"description":"<!DOCTYPE a SYSTEM'-",
"input":"<!DOCTYPE a SYSTEM'-",
-"output":["ParseError", ["DOCTYPE", "a", null, "-", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "-", false]]},

{"description":"<!DOCTYPE a SYSTEM'/",
"input":"<!DOCTYPE a SYSTEM'/",
-"output":["ParseError", ["DOCTYPE", "a", null, "/", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "/", false]]},

{"description":"<!DOCTYPE a SYSTEM'0",
"input":"<!DOCTYPE a SYSTEM'0",
-"output":["ParseError", ["DOCTYPE", "a", null, "0", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "0", false]]},

{"description":"<!DOCTYPE a SYSTEM'1",
"input":"<!DOCTYPE a SYSTEM'1",
-"output":["ParseError", ["DOCTYPE", "a", null, "1", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "1", false]]},

{"description":"<!DOCTYPE a SYSTEM'9",
"input":"<!DOCTYPE a SYSTEM'9",
-"output":["ParseError", ["DOCTYPE", "a", null, "9", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "9", false]]},

{"description":"<!DOCTYPE a SYSTEM'<",
"input":"<!DOCTYPE a SYSTEM'<",
-"output":["ParseError", ["DOCTYPE", "a", null, "<", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "<", false]]},

{"description":"<!DOCTYPE a SYSTEM'=",
"input":"<!DOCTYPE a SYSTEM'=",
-"output":["ParseError", ["DOCTYPE", "a", null, "=", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "=", false]]},

{"description":"<!DOCTYPE a SYSTEM'>",
"input":"<!DOCTYPE a SYSTEM'>",
-"output":["ParseError", ["DOCTYPE", "a", null, "", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},

{"description":"<!DOCTYPE a SYSTEM'?",
"input":"<!DOCTYPE a SYSTEM'?",
-"output":["ParseError", ["DOCTYPE", "a", null, "?", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "?", false]]},

{"description":"<!DOCTYPE a SYSTEM'@",
"input":"<!DOCTYPE a SYSTEM'@",
-"output":["ParseError", ["DOCTYPE", "a", null, "@", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "@", false]]},

{"description":"<!DOCTYPE a SYSTEM'A",
"input":"<!DOCTYPE a SYSTEM'A",
-"output":["ParseError", ["DOCTYPE", "a", null, "A", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "A", false]]},

{"description":"<!DOCTYPE a SYSTEM'B",
"input":"<!DOCTYPE a SYSTEM'B",
-"output":["ParseError", ["DOCTYPE", "a", null, "B", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "B", false]]},

{"description":"<!DOCTYPE a SYSTEM'Y",
"input":"<!DOCTYPE a SYSTEM'Y",
-"output":["ParseError", ["DOCTYPE", "a", null, "Y", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "Y", false]]},

{"description":"<!DOCTYPE a SYSTEM'Z",
"input":"<!DOCTYPE a SYSTEM'Z",
-"output":["ParseError", ["DOCTYPE", "a", null, "Z", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "Z", false]]},

{"description":"<!DOCTYPE a SYSTEM'`",
"input":"<!DOCTYPE a SYSTEM'`",
-"output":["ParseError", ["DOCTYPE", "a", null, "`", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "`", false]]},

{"description":"<!DOCTYPE a SYSTEM'a",
"input":"<!DOCTYPE a SYSTEM'a",
-"output":["ParseError", ["DOCTYPE", "a", null, "a", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "a", false]]},

{"description":"<!DOCTYPE a SYSTEM'b",
"input":"<!DOCTYPE a SYSTEM'b",
-"output":["ParseError", ["DOCTYPE", "a", null, "b", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "b", false]]},

{"description":"<!DOCTYPE a SYSTEM'y",
"input":"<!DOCTYPE a SYSTEM'y",
-"output":["ParseError", ["DOCTYPE", "a", null, "y", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "y", false]]},

{"description":"<!DOCTYPE a SYSTEM'z",
"input":"<!DOCTYPE a SYSTEM'z",
-"output":["ParseError", ["DOCTYPE", "a", null, "z", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "z", false]]},

{"description":"<!DOCTYPE a SYSTEM'{",
"input":"<!DOCTYPE a SYSTEM'{",
-"output":["ParseError", ["DOCTYPE", "a", null, "{", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "{", false]]},

{"description":"<!DOCTYPE a SYSTEM'\\uDBC0\\uDC00",
"input":"<!DOCTYPE a SYSTEM'\uDBC0\uDC00",
-"output":["ParseError", ["DOCTYPE", "a", null, "\uDBC0\uDC00", false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "\uDBC0\uDC00", false]]},

{"description":"<!DOCTYPE a SYSTEM(",
"input":"<!DOCTYPE a SYSTEM(",
@@ -2254,7 +2314,7 @@

{"description":"<!DOCTYPE a a\\u0000",
"input":"<!DOCTYPE a a\u0000",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},

{"description":"<!DOCTYPE a a\\u0009",
"input":"<!DOCTYPE a a\u0009",
@@ -2566,7 +2626,7 @@

{"description":"<!DOCTYPE>",
"input":"<!DOCTYPE>",
-"output":["ParseError", "ParseError", ["DOCTYPE", "", null, null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", null, null, null, false]]},

{"description":"<!DOCTYPE?",
"input":"<!DOCTYPE?",
@@ -2638,7 +2698,7 @@

{"description":"<!DOCTYPEa \\u0000",
"input":"<!DOCTYPEa \u0000",
-"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},

{"description":"<!DOCTYPEa \\u0008",
"input":"<!DOCTYPEa \u0008",
@@ -2742,7 +2802,7 @@

{"description":"<!DOCTYPEa PUBLIC\\u0000",
"input":"<!DOCTYPEa PUBLIC\u0000",
-"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},

{"description":"<!DOCTYPEa PUBLIC\\u0008",
"input":"<!DOCTYPEa PUBLIC\u0008",
@@ -2782,135 +2842,135 @@

{"description":"<!DOCTYPEa PUBLIC\"",
"input":"<!DOCTYPEa PUBLIC\"",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPEa PUBLIC\"\\u0000",
"input":"<!DOCTYPEa PUBLIC\"\u0000",
-"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "\uFFFD", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "\uFFFD", null, false]]},

{"description":"<!DOCTYPEa PUBLIC\"\\u0009",
"input":"<!DOCTYPEa PUBLIC\"\u0009",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "\u0009", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "\u0009", null, false]]},

{"description":"<!DOCTYPEa PUBLIC\"\\u000A",
"input":"<!DOCTYPEa PUBLIC\"\u000A",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "\u000A", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "\u000A", null, false]]},

{"description":"<!DOCTYPEa PUBLIC\"\\u000B",
"input":"<!DOCTYPEa PUBLIC\"\u000B",
-"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "\u000B", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "\u000B", null, false]]},

{"description":"<!DOCTYPEa PUBLIC\"\\u000C",
"input":"<!DOCTYPEa PUBLIC\"\u000C",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "\u000C", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "\u000C", null, false]]},

{"description":"<!DOCTYPEa PUBLIC\" ",
"input":"<!DOCTYPEa PUBLIC\" ",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", " ", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", " ", null, false]]},

{"description":"<!DOCTYPEa PUBLIC\"!",
"input":"<!DOCTYPEa PUBLIC\"!",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "!", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "!", null, false]]},

{"description":"<!DOCTYPEa PUBLIC\"\"",
"input":"<!DOCTYPEa PUBLIC\"\"",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPEa PUBLIC\"#",
"input":"<!DOCTYPEa PUBLIC\"#",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "#", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "#", null, false]]},

{"description":"<!DOCTYPEa PUBLIC\"&",
"input":"<!DOCTYPEa PUBLIC\"&",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "&", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "&", null, false]]},

{"description":"<!DOCTYPEa PUBLIC\"'",
"input":"<!DOCTYPEa PUBLIC\"'",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "'", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "'", null, false]]},

{"description":"<!DOCTYPEa PUBLIC\"-",
"input":"<!DOCTYPEa PUBLIC\"-",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "-", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "-", null, false]]},

{"description":"<!DOCTYPEa PUBLIC\"/",
"input":"<!DOCTYPEa PUBLIC\"/",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "/", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "/", null, false]]},

{"description":"<!DOCTYPEa PUBLIC\"0",
"input":"<!DOCTYPEa PUBLIC\"0",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "0", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "0", null, false]]},

{"description":"<!DOCTYPEa PUBLIC\"1",
"input":"<!DOCTYPEa PUBLIC\"1",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "1", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "1", null, false]]},

{"description":"<!DOCTYPEa PUBLIC\"9",
"input":"<!DOCTYPEa PUBLIC\"9",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "9", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "9", null, false]]},

{"description":"<!DOCTYPEa PUBLIC\"<",
"input":"<!DOCTYPEa PUBLIC\"<",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "<", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "<", null, false]]},

{"description":"<!DOCTYPEa PUBLIC\"=",
"input":"<!DOCTYPEa PUBLIC\"=",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "=", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "=", null, false]]},

{"description":"<!DOCTYPEa PUBLIC\">",
"input":"<!DOCTYPEa PUBLIC\">",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPEa PUBLIC\"?",
"input":"<!DOCTYPEa PUBLIC\"?",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "?", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "?", null, false]]},

{"description":"<!DOCTYPEa PUBLIC\"@",
"input":"<!DOCTYPEa PUBLIC\"@",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "@", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "@", null, false]]},

{"description":"<!DOCTYPEa PUBLIC\"A",
"input":"<!DOCTYPEa PUBLIC\"A",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "A", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "A", null, false]]},

{"description":"<!DOCTYPEa PUBLIC\"B",
"input":"<!DOCTYPEa PUBLIC\"B",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "B", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "B", null, false]]},

{"description":"<!DOCTYPEa PUBLIC\"Y",
"input":"<!DOCTYPEa PUBLIC\"Y",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "Y", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "Y", null, false]]},

{"description":"<!DOCTYPEa PUBLIC\"Z",
"input":"<!DOCTYPEa PUBLIC\"Z",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "Z", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "Z", null, false]]},

{"description":"<!DOCTYPEa PUBLIC\"`",
"input":"<!DOCTYPEa PUBLIC\"`",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "`", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "`", null, false]]},

{"description":"<!DOCTYPEa PUBLIC\"a",
"input":"<!DOCTYPEa PUBLIC\"a",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "a", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "a", null, false]]},

{"description":"<!DOCTYPEa PUBLIC\"b",
"input":"<!DOCTYPEa PUBLIC\"b",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "b", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "b", null, false]]},

{"description":"<!DOCTYPEa PUBLIC\"y",
"input":"<!DOCTYPEa PUBLIC\"y",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "y", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "y", null, false]]},

{"description":"<!DOCTYPEa PUBLIC\"z",
"input":"<!DOCTYPEa PUBLIC\"z",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "z", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "z", null, false]]},

{"description":"<!DOCTYPEa PUBLIC\"{",
"input":"<!DOCTYPEa PUBLIC\"{",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "{", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "{", null, false]]},

{"description":"<!DOCTYPEa PUBLIC\"\\uDBC0\\uDC00",
"input":"<!DOCTYPEa PUBLIC\"\uDBC0\uDC00",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "\uDBC0\uDC00", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "\uDBC0\uDC00", null, false]]},

{"description":"<!DOCTYPEa PUBLIC#",
"input":"<!DOCTYPEa PUBLIC#",
@@ -2922,47 +2982,47 @@

{"description":"<!DOCTYPEa PUBLIC'",
"input":"<!DOCTYPEa PUBLIC'",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPEa PUBLIC'\\u0000",
"input":"<!DOCTYPEa PUBLIC'\u0000",
-"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "\uFFFD", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "\uFFFD", null, false]]},

{"description":"<!DOCTYPEa PUBLIC'\\u0009",
"input":"<!DOCTYPEa PUBLIC'\u0009",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "\u0009", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "\u0009", null, false]]},

{"description":"<!DOCTYPEa PUBLIC'\\u000A",
"input":"<!DOCTYPEa PUBLIC'\u000A",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "\u000A", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "\u000A", null, false]]},

{"description":"<!DOCTYPEa PUBLIC'\\u000B",
"input":"<!DOCTYPEa PUBLIC'\u000B",
-"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "\u000B", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "\u000B", null, false]]},

{"description":"<!DOCTYPEa PUBLIC'\\u000C",
"input":"<!DOCTYPEa PUBLIC'\u000C",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "\u000C", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "\u000C", null, false]]},

{"description":"<!DOCTYPEa PUBLIC' ",
"input":"<!DOCTYPEa PUBLIC' ",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", " ", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", " ", null, false]]},

{"description":"<!DOCTYPEa PUBLIC'!",
"input":"<!DOCTYPEa PUBLIC'!",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "!", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "!", null, false]]},

{"description":"<!DOCTYPEa PUBLIC'\"",
"input":"<!DOCTYPEa PUBLIC'\"",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "\"", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "\"", null, false]]},

{"description":"<!DOCTYPEa PUBLIC'&",
"input":"<!DOCTYPEa PUBLIC'&",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "&", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "&", null, false]]},

{"description":"<!DOCTYPEa PUBLIC''",
"input":"<!DOCTYPEa PUBLIC''",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPEa PUBLIC''\\u0000",
"input":"<!DOCTYPEa PUBLIC''\u0000",
@@ -2970,231 +3030,231 @@

{"description":"<!DOCTYPEa PUBLIC''\\u0008",
"input":"<!DOCTYPEa PUBLIC''\u0008",
-"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPEa PUBLIC''\\u0009",
"input":"<!DOCTYPEa PUBLIC''\u0009",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPEa PUBLIC''\\u000A",
"input":"<!DOCTYPEa PUBLIC''\u000A",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPEa PUBLIC''\\u000B",
"input":"<!DOCTYPEa PUBLIC''\u000B",
-"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPEa PUBLIC''\\u000C",
"input":"<!DOCTYPEa PUBLIC''\u000C",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPEa PUBLIC''\\u000D",
"input":"<!DOCTYPEa PUBLIC''\u000D",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPEa PUBLIC''\\u001F",
"input":"<!DOCTYPEa PUBLIC''\u001F",
-"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPEa PUBLIC'' ",
"input":"<!DOCTYPEa PUBLIC'' ",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPEa PUBLIC''!",
"input":"<!DOCTYPEa PUBLIC''!",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPEa PUBLIC''\"",
"input":"<!DOCTYPEa PUBLIC''\"",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", "", false]]},
+"output":["ParseError", "ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", "", false]]},

{"description":"<!DOCTYPEa PUBLIC''#",
"input":"<!DOCTYPEa PUBLIC''#",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPEa PUBLIC''&",
"input":"<!DOCTYPEa PUBLIC''&",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPEa PUBLIC'''",
"input":"<!DOCTYPEa PUBLIC'''",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", "", false]]},
+"output":["ParseError", "ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", "", false]]},

{"description":"<!DOCTYPEa PUBLIC''(",
"input":"<!DOCTYPEa PUBLIC''(",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPEa PUBLIC''-",
"input":"<!DOCTYPEa PUBLIC''-",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPEa PUBLIC''/",
"input":"<!DOCTYPEa PUBLIC''/",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPEa PUBLIC''0",
"input":"<!DOCTYPEa PUBLIC''0",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPEa PUBLIC''1",
"input":"<!DOCTYPEa PUBLIC''1",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPEa PUBLIC''9",
"input":"<!DOCTYPEa PUBLIC''9",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPEa PUBLIC''<",
"input":"<!DOCTYPEa PUBLIC''<",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPEa PUBLIC''=",
"input":"<!DOCTYPEa PUBLIC''=",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPEa PUBLIC''>",
"input":"<!DOCTYPEa PUBLIC''>",
-"output":["ParseError", ["DOCTYPE", "a", "", null, true]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, true]]},

{"description":"<!DOCTYPEa PUBLIC''?",
"input":"<!DOCTYPEa PUBLIC''?",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPEa PUBLIC''@",
"input":"<!DOCTYPEa PUBLIC''@",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPEa PUBLIC''A",
"input":"<!DOCTYPEa PUBLIC''A",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPEa PUBLIC''B",
"input":"<!DOCTYPEa PUBLIC''B",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPEa PUBLIC''Y",
"input":"<!DOCTYPEa PUBLIC''Y",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPEa PUBLIC''Z",
"input":"<!DOCTYPEa PUBLIC''Z",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPEa PUBLIC''`",
"input":"<!DOCTYPEa PUBLIC''`",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPEa PUBLIC''a",
"input":"<!DOCTYPEa PUBLIC''a",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPEa PUBLIC''b",
"input":"<!DOCTYPEa PUBLIC''b",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPEa PUBLIC''y",
"input":"<!DOCTYPEa PUBLIC''y",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPEa PUBLIC''z",
"input":"<!DOCTYPEa PUBLIC''z",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPEa PUBLIC''{",
"input":"<!DOCTYPEa PUBLIC''{",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPEa PUBLIC''\\uDBC0\\uDC00",
"input":"<!DOCTYPEa PUBLIC''\uDBC0\uDC00",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPEa PUBLIC'(",
"input":"<!DOCTYPEa PUBLIC'(",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "(", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "(", null, false]]},

{"description":"<!DOCTYPEa PUBLIC'-",
"input":"<!DOCTYPEa PUBLIC'-",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "-", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "-", null, false]]},

{"description":"<!DOCTYPEa PUBLIC'/",
"input":"<!DOCTYPEa PUBLIC'/",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "/", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "/", null, false]]},

{"description":"<!DOCTYPEa PUBLIC'0",
"input":"<!DOCTYPEa PUBLIC'0",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "0", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "0", null, false]]},

{"description":"<!DOCTYPEa PUBLIC'1",
"input":"<!DOCTYPEa PUBLIC'1",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "1", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "1", null, false]]},

{"description":"<!DOCTYPEa PUBLIC'9",
"input":"<!DOCTYPEa PUBLIC'9",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "9", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "9", null, false]]},

{"description":"<!DOCTYPEa PUBLIC'<",
"input":"<!DOCTYPEa PUBLIC'<",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "<", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "<", null, false]]},

{"description":"<!DOCTYPEa PUBLIC'=",
"input":"<!DOCTYPEa PUBLIC'=",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "=", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "=", null, false]]},

{"description":"<!DOCTYPEa PUBLIC'>",
"input":"<!DOCTYPEa PUBLIC'>",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},

{"description":"<!DOCTYPEa PUBLIC'?",
"input":"<!DOCTYPEa PUBLIC'?",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "?", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "?", null, false]]},

{"description":"<!DOCTYPEa PUBLIC'@",
"input":"<!DOCTYPEa PUBLIC'@",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "@", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "@", null, false]]},

{"description":"<!DOCTYPEa PUBLIC'A",
"input":"<!DOCTYPEa PUBLIC'A",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "A", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "A", null, false]]},

{"description":"<!DOCTYPEa PUBLIC'B",
"input":"<!DOCTYPEa PUBLIC'B",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "B", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "B", null, false]]},

{"description":"<!DOCTYPEa PUBLIC'Y",
"input":"<!DOCTYPEa PUBLIC'Y",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "Y", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "Y", null, false]]},

{"description":"<!DOCTYPEa PUBLIC'Z",
"input":"<!DOCTYPEa PUBLIC'Z",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "Z", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "Z", null, false]]},

{"description":"<!DOCTYPEa PUBLIC'`",
"input":"<!DOCTYPEa PUBLIC'`",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "`", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "`", null, false]]},

{"description":"<!DOCTYPEa PUBLIC'a",
"input":"<!DOCTYPEa PUBLIC'a",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "a", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "a", null, false]]},

{"description":"<!DOCTYPEa PUBLIC'b",
"input":"<!DOCTYPEa PUBLIC'b",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "b", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "b", null, false]]},

{"description":"<!DOCTYPEa PUBLIC'y",
"input":"<!DOCTYPEa PUBLIC'y",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "y", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "y", null, false]]},

{"description":"<!DOCTYPEa PUBLIC'z",
"input":"<!DOCTYPEa PUBLIC'z",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "z", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "z", null, false]]},

{"description":"<!DOCTYPEa PUBLIC'{",
"input":"<!DOCTYPEa PUBLIC'{",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "{", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "{", null, false]]},

{"description":"<!DOCTYPEa PUBLIC'\\uDBC0\\uDC00",
"input":"<!DOCTYPEa PUBLIC'\uDBC0\uDC00",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", "\uDBC0\uDC00", null, false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "\uDBC0\uDC00", null, false]]},

{"description":"<!DOCTYPEa PUBLIC(",
"input":"<!DOCTYPEa PUBLIC(",
@@ -3290,7 +3350,7 @@

{"description":"<!DOCTYPEa SYSTEM\\u0000",
"input":"<!DOCTYPEa SYSTEM\u0000",
-"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},

{"description":"<!DOCTYPEa SYSTEM\\u0008",
"input":"<!DOCTYPEa SYSTEM\u0008",
@@ -3330,135 +3390,135 @@

{"description":"<!DOCTYPEa SYSTEM\"",
"input":"<!DOCTYPEa SYSTEM\"",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},

{"description":"<!DOCTYPEa SYSTEM\"\\u0000",
"input":"<!DOCTYPEa SYSTEM\"\u0000",
-"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "\uFFFD", false]]},
+"output":["ParseError", "ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "\uFFFD", false]]},

{"description":"<!DOCTYPEa SYSTEM\"\\u0009",
"input":"<!DOCTYPEa SYSTEM\"\u0009",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "\u0009", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "\u0009", false]]},

{"description":"<!DOCTYPEa SYSTEM\"\\u000A",
"input":"<!DOCTYPEa SYSTEM\"\u000A",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "\u000A", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "\u000A", false]]},

{"description":"<!DOCTYPEa SYSTEM\"\\u000B",
"input":"<!DOCTYPEa SYSTEM\"\u000B",
-"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "\u000B", false]]},
+"output":["ParseError", "ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "\u000B", false]]},

{"description":"<!DOCTYPEa SYSTEM\"\\u000C",
"input":"<!DOCTYPEa SYSTEM\"\u000C",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "\u000C", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "\u000C", false]]},

{"description":"<!DOCTYPEa SYSTEM\" ",
"input":"<!DOCTYPEa SYSTEM\" ",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, " ", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, " ", false]]},

{"description":"<!DOCTYPEa SYSTEM\"!",
"input":"<!DOCTYPEa SYSTEM\"!",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "!", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "!", false]]},

{"description":"<!DOCTYPEa SYSTEM\"\"",
"input":"<!DOCTYPEa SYSTEM\"\"",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},

{"description":"<!DOCTYPEa SYSTEM\"#",
"input":"<!DOCTYPEa SYSTEM\"#",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "#", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "#", false]]},

{"description":"<!DOCTYPEa SYSTEM\"&",
"input":"<!DOCTYPEa SYSTEM\"&",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "&", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "&", false]]},

{"description":"<!DOCTYPEa SYSTEM\"'",
"input":"<!DOCTYPEa SYSTEM\"'",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "'", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "'", false]]},

{"description":"<!DOCTYPEa SYSTEM\"-",
"input":"<!DOCTYPEa SYSTEM\"-",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "-", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "-", false]]},

{"description":"<!DOCTYPEa SYSTEM\"/",
"input":"<!DOCTYPEa SYSTEM\"/",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "/", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "/", false]]},

{"description":"<!DOCTYPEa SYSTEM\"0",
"input":"<!DOCTYPEa SYSTEM\"0",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "0", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "0", false]]},

{"description":"<!DOCTYPEa SYSTEM\"1",
"input":"<!DOCTYPEa SYSTEM\"1",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "1", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "1", false]]},

{"description":"<!DOCTYPEa SYSTEM\"9",
"input":"<!DOCTYPEa SYSTEM\"9",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "9", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "9", false]]},

{"description":"<!DOCTYPEa SYSTEM\"<",
"input":"<!DOCTYPEa SYSTEM\"<",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "<", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "<", false]]},

{"description":"<!DOCTYPEa SYSTEM\"=",
"input":"<!DOCTYPEa SYSTEM\"=",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "=", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "=", false]]},

{"description":"<!DOCTYPEa SYSTEM\">",
"input":"<!DOCTYPEa SYSTEM\">",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},

{"description":"<!DOCTYPEa SYSTEM\"?",
"input":"<!DOCTYPEa SYSTEM\"?",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "?", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "?", false]]},

{"description":"<!DOCTYPEa SYSTEM\"@",
"input":"<!DOCTYPEa SYSTEM\"@",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "@", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "@", false]]},

{"description":"<!DOCTYPEa SYSTEM\"A",
"input":"<!DOCTYPEa SYSTEM\"A",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "A", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "A", false]]},

{"description":"<!DOCTYPEa SYSTEM\"B",
"input":"<!DOCTYPEa SYSTEM\"B",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "B", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "B", false]]},

{"description":"<!DOCTYPEa SYSTEM\"Y",
"input":"<!DOCTYPEa SYSTEM\"Y",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "Y", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "Y", false]]},

{"description":"<!DOCTYPEa SYSTEM\"Z",
"input":"<!DOCTYPEa SYSTEM\"Z",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "Z", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "Z", false]]},

{"description":"<!DOCTYPEa SYSTEM\"`",
"input":"<!DOCTYPEa SYSTEM\"`",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "`", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "`", false]]},

{"description":"<!DOCTYPEa SYSTEM\"a",
"input":"<!DOCTYPEa SYSTEM\"a",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "a", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "a", false]]},

{"description":"<!DOCTYPEa SYSTEM\"b",
"input":"<!DOCTYPEa SYSTEM\"b",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "b", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "b", false]]},

{"description":"<!DOCTYPEa SYSTEM\"y",
"input":"<!DOCTYPEa SYSTEM\"y",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "y", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "y", false]]},

{"description":"<!DOCTYPEa SYSTEM\"z",
"input":"<!DOCTYPEa SYSTEM\"z",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "z", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "z", false]]},

{"description":"<!DOCTYPEa SYSTEM\"{",
"input":"<!DOCTYPEa SYSTEM\"{",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "{", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "{", false]]},

{"description":"<!DOCTYPEa SYSTEM\"\\uDBC0\\uDC00",
"input":"<!DOCTYPEa SYSTEM\"\uDBC0\uDC00",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "\uDBC0\uDC00", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "\uDBC0\uDC00", false]]},

{"description":"<!DOCTYPEa SYSTEM#",
"input":"<!DOCTYPEa SYSTEM#",
@@ -3470,47 +3530,47 @@

{"description":"<!DOCTYPEa SYSTEM'",
"input":"<!DOCTYPEa SYSTEM'",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},

{"description":"<!DOCTYPEa SYSTEM'\\u0000",
"input":"<!DOCTYPEa SYSTEM'\u0000",
-"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "\uFFFD", false]]},
+"output":["ParseError", "ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "\uFFFD", false]]},

{"description":"<!DOCTYPEa SYSTEM'\\u0009",
"input":"<!DOCTYPEa SYSTEM'\u0009",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "\u0009", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "\u0009", false]]},

{"description":"<!DOCTYPEa SYSTEM'\\u000A",
"input":"<!DOCTYPEa SYSTEM'\u000A",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "\u000A", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "\u000A", false]]},

{"description":"<!DOCTYPEa SYSTEM'\\u000B",
"input":"<!DOCTYPEa SYSTEM'\u000B",
-"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "\u000B", false]]},
+"output":["ParseError", "ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "\u000B", false]]},

{"description":"<!DOCTYPEa SYSTEM'\\u000C",
"input":"<!DOCTYPEa SYSTEM'\u000C",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "\u000C", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "\u000C", false]]},

{"description":"<!DOCTYPEa SYSTEM' ",
"input":"<!DOCTYPEa SYSTEM' ",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, " ", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, " ", false]]},

{"description":"<!DOCTYPEa SYSTEM'!",
"input":"<!DOCTYPEa SYSTEM'!",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "!", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "!", false]]},

{"description":"<!DOCTYPEa SYSTEM'\"",
"input":"<!DOCTYPEa SYSTEM'\"",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "\"", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "\"", false]]},

{"description":"<!DOCTYPEa SYSTEM'&",
"input":"<!DOCTYPEa SYSTEM'&",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "&", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "&", false]]},

{"description":"<!DOCTYPEa SYSTEM''",
"input":"<!DOCTYPEa SYSTEM''",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},

{"description":"<!DOCTYPEa SYSTEM''\\u0000",
"input":"<!DOCTYPEa SYSTEM''\u0000",
@@ -3518,223 +3578,223 @@

{"description":"<!DOCTYPEa SYSTEM''\\u0008",
"input":"<!DOCTYPEa SYSTEM''\u0008",
-"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPEa SYSTEM''\\u0009",
"input":"<!DOCTYPEa SYSTEM''\u0009",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},

{"description":"<!DOCTYPEa SYSTEM''\\u000A",
"input":"<!DOCTYPEa SYSTEM''\u000A",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},

{"description":"<!DOCTYPEa SYSTEM''\\u000B",
"input":"<!DOCTYPEa SYSTEM''\u000B",
-"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPEa SYSTEM''\\u000C",
"input":"<!DOCTYPEa SYSTEM''\u000C",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},

{"description":"<!DOCTYPEa SYSTEM''\\u000D",
"input":"<!DOCTYPEa SYSTEM''\u000D",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},

{"description":"<!DOCTYPEa SYSTEM''\\u001F",
"input":"<!DOCTYPEa SYSTEM''\u001F",
-"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPEa SYSTEM'' ",
"input":"<!DOCTYPEa SYSTEM'' ",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},

{"description":"<!DOCTYPEa SYSTEM''!",
"input":"<!DOCTYPEa SYSTEM''!",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPEa SYSTEM''\"",
"input":"<!DOCTYPEa SYSTEM''\"",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPEa SYSTEM''&",
"input":"<!DOCTYPEa SYSTEM''&",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPEa SYSTEM'''",
"input":"<!DOCTYPEa SYSTEM'''",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPEa SYSTEM''-",
"input":"<!DOCTYPEa SYSTEM''-",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPEa SYSTEM''/",
"input":"<!DOCTYPEa SYSTEM''/",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPEa SYSTEM''0",
"input":"<!DOCTYPEa SYSTEM''0",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPEa SYSTEM''1",
"input":"<!DOCTYPEa SYSTEM''1",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPEa SYSTEM''9",
"input":"<!DOCTYPEa SYSTEM''9",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPEa SYSTEM''<",
"input":"<!DOCTYPEa SYSTEM''<",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPEa SYSTEM''=",
"input":"<!DOCTYPEa SYSTEM''=",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPEa SYSTEM''>",
"input":"<!DOCTYPEa SYSTEM''>",
-"output":["ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPEa SYSTEM''?",
"input":"<!DOCTYPEa SYSTEM''?",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPEa SYSTEM''@",
"input":"<!DOCTYPEa SYSTEM''@",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPEa SYSTEM''A",
"input":"<!DOCTYPEa SYSTEM''A",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPEa SYSTEM''B",
"input":"<!DOCTYPEa SYSTEM''B",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPEa SYSTEM''Y",
"input":"<!DOCTYPEa SYSTEM''Y",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPEa SYSTEM''Z",
"input":"<!DOCTYPEa SYSTEM''Z",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPEa SYSTEM''`",
"input":"<!DOCTYPEa SYSTEM''`",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPEa SYSTEM''a",
"input":"<!DOCTYPEa SYSTEM''a",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPEa SYSTEM''b",
"input":"<!DOCTYPEa SYSTEM''b",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPEa SYSTEM''y",
"input":"<!DOCTYPEa SYSTEM''y",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPEa SYSTEM''z",
"input":"<!DOCTYPEa SYSTEM''z",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPEa SYSTEM''{",
"input":"<!DOCTYPEa SYSTEM''{",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPEa SYSTEM''\\uDBC0\\uDC00",
"input":"<!DOCTYPEa SYSTEM''\uDBC0\uDC00",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},

{"description":"<!DOCTYPEa SYSTEM'(",
"input":"<!DOCTYPEa SYSTEM'(",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "(", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "(", false]]},

{"description":"<!DOCTYPEa SYSTEM'-",
"input":"<!DOCTYPEa SYSTEM'-",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "-", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "-", false]]},

{"description":"<!DOCTYPEa SYSTEM'/",
"input":"<!DOCTYPEa SYSTEM'/",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "/", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "/", false]]},

{"description":"<!DOCTYPEa SYSTEM'0",
"input":"<!DOCTYPEa SYSTEM'0",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "0", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "0", false]]},

{"description":"<!DOCTYPEa SYSTEM'1",
"input":"<!DOCTYPEa SYSTEM'1",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "1", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "1", false]]},

{"description":"<!DOCTYPEa SYSTEM'9",
"input":"<!DOCTYPEa SYSTEM'9",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "9", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "9", false]]},

{"description":"<!DOCTYPEa SYSTEM'<",
"input":"<!DOCTYPEa SYSTEM'<",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "<", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "<", false]]},

{"description":"<!DOCTYPEa SYSTEM'=",
"input":"<!DOCTYPEa SYSTEM'=",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "=", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "=", false]]},

{"description":"<!DOCTYPEa SYSTEM'>",
"input":"<!DOCTYPEa SYSTEM'>",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},

{"description":"<!DOCTYPEa SYSTEM'?",
"input":"<!DOCTYPEa SYSTEM'?",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "?", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "?", false]]},

{"description":"<!DOCTYPEa SYSTEM'@",
"input":"<!DOCTYPEa SYSTEM'@",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "@", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "@", false]]},

{"description":"<!DOCTYPEa SYSTEM'A",
"input":"<!DOCTYPEa SYSTEM'A",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "A", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "A", false]]},

{"description":"<!DOCTYPEa SYSTEM'B",
"input":"<!DOCTYPEa SYSTEM'B",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "B", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "B", false]]},

{"description":"<!DOCTYPEa SYSTEM'Y",
"input":"<!DOCTYPEa SYSTEM'Y",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "Y", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "Y", false]]},

{"description":"<!DOCTYPEa SYSTEM'Z",
"input":"<!DOCTYPEa SYSTEM'Z",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "Z", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "Z", false]]},

{"description":"<!DOCTYPEa SYSTEM'`",
"input":"<!DOCTYPEa SYSTEM'`",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "`", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "`", false]]},

{"description":"<!DOCTYPEa SYSTEM'a",
"input":"<!DOCTYPEa SYSTEM'a",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "a", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "a", false]]},

{"description":"<!DOCTYPEa SYSTEM'b",
"input":"<!DOCTYPEa SYSTEM'b",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "b", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "b", false]]},

{"description":"<!DOCTYPEa SYSTEM'y",
"input":"<!DOCTYPEa SYSTEM'y",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "y", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "y", false]]},

{"description":"<!DOCTYPEa SYSTEM'z",
"input":"<!DOCTYPEa SYSTEM'z",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "z", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "z", false]]},

{"description":"<!DOCTYPEa SYSTEM'{",
"input":"<!DOCTYPEa SYSTEM'{",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "{", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "{", false]]},

{"description":"<!DOCTYPEa SYSTEM'\\uDBC0\\uDC00",
"input":"<!DOCTYPEa SYSTEM'\uDBC0\uDC00",
-"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "\uDBC0\uDC00", false]]},
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "\uDBC0\uDC00", false]]},

{"description":"<!DOCTYPEa SYSTEM(",
"input":"<!DOCTYPEa SYSTEM(",
@@ -3842,7 +3902,7 @@

{"description":"<!DOCTYPEa a\\u0000",
"input":"<!DOCTYPEa a\u0000",
-"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},

{"description":"<!DOCTYPEa a\\u0009",
"input":"<!DOCTYPEa a\u0009",
@@ -4170,7 +4230,7 @@

{"description":"</\\u0000",
"input":"</\u0000",
-"output":["ParseError", "ParseError", ["Comment", "\uFFFD"]]},
+"output":["ParseError", ["Comment", "\uFFFD"]]},

{"description":"</\\u0009",
"input":"</\u0009",
@@ -4248,6 +4308,22 @@
"input":"</@",
"output":["ParseError", ["Comment", "@"]]},

+{"description":"</A>",
+"input":"</A>",
+"output":[["EndTag", "a"]]},
+
+{"description":"</B>",
+"input":"</B>",
+"output":[["EndTag", "b"]]},
+
+{"description":"</Y>",
+"input":"</Y>",
+"output":[["EndTag", "y"]]},
+
+{"description":"</Z>",
+"input":"</Z>",
+"output":[["EndTag", "z"]]},
+
{"description":"</[",
"input":"</[",
"output":["ParseError", ["Comment", "["]]},
@@ -4256,6 +4332,22 @@
"input":"</`",
"output":["ParseError", ["Comment", "`"]]},

+{"description":"</a>",
+"input":"</a>",
+"output":[["EndTag", "a"]]},
+
+{"description":"</b>",
+"input":"</b>",
+"output":[["EndTag", "b"]]},
+
+{"description":"</y>",
+"input":"</y>",
+"output":[["EndTag", "y"]]},
+
+{"description":"</z>",
+"input":"</z>",
+"output":[["EndTag", "z"]]},
+
{"description":"</{",
"input":"</{",
"output":["ParseError", ["Comment", "{"]]},
@@ -4294,7 +4386,7 @@

{"description":"<?\\u0000",
"input":"<?\u0000",
-"output":["ParseError", "ParseError", ["Comment", "?\uFFFD"]]},
+"output":["ParseError", ["Comment", "?\uFFFD"]]},

{"description":"<?\\u0009",
"input":"<?\u0009",
@@ -4420,6 +4512,22 @@
"input":"<@",
"output":["ParseError", ["Character", "<@"]]},

+{"description":"<A>",
+"input":"<A>",
+"output":[["StartTag", "a", {}]]},
+
+{"description":"<B>",
+"input":"<B>",
+"output":[["StartTag", "b", {}]]},
+
+{"description":"<Y>",
+"input":"<Y>",
+"output":[["StartTag", "y", {}]]},
+
+{"description":"<Z>",
+"input":"<Z>",
+"output":[["StartTag", "z", {}]]},
+
{"description":"<[",
"input":"<[",
"output":["ParseError", ["Character", "<["]]},
@@ -4428,38 +4536,1446 @@
"input":"<`",
"output":["ParseError", ["Character", "<`"]]},

+{"description":"<a>",
+"input":"<a>",
+"output":[["StartTag", "a", {}]]},
+
+{"description":"<a\\u0000>",
+"input":"<a\u0000>",
+"output":["ParseError", ["StartTag", "a\uFFFD", {}]]},
+
+{"description":"<a\\u0008>",
+"input":"<a\u0008>",
+"output":["ParseError", ["StartTag", "a\u0008", {}]]},
+
+{"description":"<a\\u0009>",
+"input":"<a\u0009>",
+"output":[["StartTag", "a", {}]]},
+
+{"description":"<a\\u000A>",
+"input":"<a\u000A>",
+"output":[["StartTag", "a", {}]]},
+
+{"description":"<a\\u000B>",
+"input":"<a\u000B>",
+"output":["ParseError", ["StartTag", "a\u000B", {}]]},
+
+{"description":"<a\\u000C>",
+"input":"<a\u000C>",
+"output":[["StartTag", "a", {}]]},
+
+{"description":"<a\\u000D>",
+"input":"<a\u000D>",
+"output":[["StartTag", "a", {}]]},
+
+{"description":"<a\\u001F>",
+"input":"<a\u001F>",
+"output":["ParseError", ["StartTag", "a\u001F", {}]]},
+
+{"description":"<a >",
+"input":"<a >",
+"output":[["StartTag", "a", {}]]},
+
+{"description":"<a \\u0000>",
+"input":"<a \u0000>",
+"output":["ParseError", ["StartTag", "a", {"\uFFFD":""}]]},
+
+{"description":"<a \\u0008>",
+"input":"<a \u0008>",
+"output":["ParseError", ["StartTag", "a", {"\u0008":""}]]},
+
+{"description":"<a \\u0009>",
+"input":"<a \u0009>",
+"output":[["StartTag", "a", {}]]},
+
+{"description":"<a \\u000A>",
+"input":"<a \u000A>",
+"output":[["StartTag", "a", {}]]},
+
+{"description":"<a \\u000B>",
+"input":"<a \u000B>",
+"output":["ParseError", ["StartTag", "a", {"\u000B":""}]]},
+
+{"description":"<a \\u000C>",
+"input":"<a \u000C>",
+"output":[["StartTag", "a", {}]]},
+
+{"description":"<a \\u000D>",
+"input":"<a \u000D>",
+"output":[["StartTag", "a", {}]]},
+
+{"description":"<a \\u001F>",
+"input":"<a \u001F>",
+"output":["ParseError", ["StartTag", "a", {"\u001F":""}]]},
+
+{"description":"<a >",
+"input":"<a >",
+"output":[["StartTag", "a", {}]]},
+
+{"description":"<a !>",
+"input":"<a !>",
+"output":[["StartTag", "a", {"!":""}]]},
+
+{"description":"<a \">",
+"input":"<a \">",
+"output":["ParseError", ["StartTag", "a", {"\"":""}]]},
+
+{"description":"<a #>",
+"input":"<a #>",
+"output":[["StartTag", "a", {"#":""}]]},
+
+{"description":"<a &>",
+"input":"<a &>",
+"output":[["StartTag", "a", {"&":""}]]},
+
+{"description":"<a '>",
+"input":"<a '>",
+"output":["ParseError", ["StartTag", "a", {"'":""}]]},
+
+{"description":"<a (>",
+"input":"<a (>",
+"output":[["StartTag", "a", {"(":""}]]},
+
+{"description":"<a ->",
+"input":"<a ->",
+"output":[["StartTag", "a", {"-":""}]]},
+
+{"description":"<a .>",
+"input":"<a .>",
+"output":[["StartTag", "a", {".":""}]]},
+
+{"description":"<a />",
+"input":"<a />",
+"output":[["StartTag", "a", {}, true]]},
+
+{"description":"<a 0>",
+"input":"<a 0>",
+"output":[["StartTag", "a", {"0":""}]]},
+
+{"description":"<a 1>",
+"input":"<a 1>",
+"output":[["StartTag", "a", {"1":""}]]},
+
+{"description":"<a 9>",
+"input":"<a 9>",
+"output":[["StartTag", "a", {"9":""}]]},
+
+{"description":"<a <>",
+"input":"<a <>",
+"output":["ParseError", ["StartTag", "a", {"<":""}]]},
+
+{"description":"<a =>",
+"input":"<a =>",
+"output":["ParseError", ["StartTag", "a", {"=":""}]]},
+
{"description":"<a >",
"input":"<a >",
"output":[["StartTag", "a", {}]]},

+{"description":"<a ?>",
+"input":"<a ?>",
+"output":[["StartTag", "a", {"?":""}]]},
+
+{"description":"<a @>",
+"input":"<a @>",
+"output":[["StartTag", "a", {"@":""}]]},
+
+{"description":"<a A>",
+"input":"<a A>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a B>",
+"input":"<a B>",
+"output":[["StartTag", "a", {"b":""}]]},
+
+{"description":"<a Y>",
+"input":"<a Y>",
+"output":[["StartTag", "a", {"y":""}]]},
+
+{"description":"<a Z>",
+"input":"<a Z>",
+"output":[["StartTag", "a", {"z":""}]]},
+
+{"description":"<a [>",
+"input":"<a [>",
+"output":[["StartTag", "a", {"[":""}]]},
+
+{"description":"<a `>",
+"input":"<a `>",
+"output":[["StartTag", "a", {"`":""}]]},
+
+{"description":"<a a>",
+"input":"<a a>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a\\u0000>",
+"input":"<a a\u0000>",
+"output":["ParseError", ["StartTag", "a", {"a\uFFFD":""}]]},
+
+{"description":"<a a\\u0008>",
+"input":"<a a\u0008>",
+"output":["ParseError", ["StartTag", "a", {"a\u0008":""}]]},
+
+{"description":"<a a\\u0009>",
+"input":"<a a\u0009>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a\\u000A>",
+"input":"<a a\u000A>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a\\u000B>",
+"input":"<a a\u000B>",
+"output":["ParseError", ["StartTag", "a", {"a\u000B":""}]]},
+
+{"description":"<a a\\u000C>",
+"input":"<a a\u000C>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a\\u000D>",
+"input":"<a a\u000D>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a\\u001F>",
+"input":"<a a\u001F>",
+"output":["ParseError", ["StartTag", "a", {"a\u001F":""}]]},
+
{"description":"<a a >",
"input":"<a a >",
"output":[["StartTag", "a", {"a":""}]]},

+{"description":"<a a \\u0000>",
+"input":"<a a \u0000>",
+"output":["ParseError", ["StartTag", "a", {"a":"", "\uFFFD":""}]]},
+
+{"description":"<a a \\u0008>",
+"input":"<a a \u0008>",
+"output":["ParseError", ["StartTag", "a", {"a":"", "\u0008":""}]]},
+
+{"description":"<a a \\u0009>",
+"input":"<a a \u0009>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a \\u000A>",
+"input":"<a a \u000A>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a \\u000B>",
+"input":"<a a \u000B>",
+"output":["ParseError", ["StartTag", "a", {"a":"", "\u000B":""}]]},
+
+{"description":"<a a \\u000C>",
+"input":"<a a \u000C>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a \\u000D>",
+"input":"<a a \u000D>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a \\u001F>",
+"input":"<a a \u001F>",
+"output":["ParseError", ["StartTag", "a", {"a":"", "\u001F":""}]]},
+
+{"description":"<a a >",
+"input":"<a a >",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a !>",
+"input":"<a a !>",
+"output":[["StartTag", "a", {"a":"", "!":""}]]},
+
+{"description":"<a a \">",
+"input":"<a a \">",
+"output":["ParseError", ["StartTag", "a", {"a":"", "\"":""}]]},
+
+{"description":"<a a #>",
+"input":"<a a #>",
+"output":[["StartTag", "a", {"a":"", "#":""}]]},
+
+{"description":"<a a &>",
+"input":"<a a &>",
+"output":[["StartTag", "a", {"a":"", "&":""}]]},
+
+{"description":"<a a '>",
+"input":"<a a '>",
+"output":["ParseError", ["StartTag", "a", {"a":"", "'":""}]]},
+
+{"description":"<a a (>",
+"input":"<a a (>",
+"output":[["StartTag", "a", {"a":"", "(":""}]]},
+
+{"description":"<a a ->",
+"input":"<a a ->",
+"output":[["StartTag", "a", {"a":"", "-":""}]]},
+
+{"description":"<a a .>",
+"input":"<a a .>",
+"output":[["StartTag", "a", {"a":"", ".":""}]]},
+
+{"description":"<a a />",
+"input":"<a a />",
+"output":[["StartTag", "a", {"a":""}, true]]},
+
+{"description":"<a a 0>",
+"input":"<a a 0>",
+"output":[["StartTag", "a", {"a":"", "0":""}]]},
+
+{"description":"<a a 1>",
+"input":"<a a 1>",
+"output":[["StartTag", "a", {"a":"", "1":""}]]},
+
+{"description":"<a a 9>",
+"input":"<a a 9>",
+"output":[["StartTag", "a", {"a":"", "9":""}]]},
+
+{"description":"<a a <>",
+"input":"<a a <>",
+"output":["ParseError", ["StartTag", "a", {"a":"", "<":""}]]},
+
+{"description":"<a a =>",
+"input":"<a a =>",
+"output":["ParseError", ["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a >",
+"input":"<a a >",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a ?>",
+"input":"<a a ?>",
+"output":[["StartTag", "a", {"a":"", "?":""}]]},
+
+{"description":"<a a @>",
+"input":"<a a @>",
+"output":[["StartTag", "a", {"a":"", "@":""}]]},
+
+{"description":"<a a A>",
+"input":"<a a A>",
+"output":["ParseError", ["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a B>",
+"input":"<a a B>",
+"output":[["StartTag", "a", {"a":"", "b":""}]]},
+
+{"description":"<a a Y>",
+"input":"<a a Y>",
+"output":[["StartTag", "a", {"a":"", "y":""}]]},
+
+{"description":"<a a Z>",
+"input":"<a a Z>",
+"output":[["StartTag", "a", {"a":"", "z":""}]]},
+
+{"description":"<a a [>",
+"input":"<a a [>",
+"output":[["StartTag", "a", {"a":"", "[":""}]]},
+
+{"description":"<a a `>",
+"input":"<a a `>",
+"output":[["StartTag", "a", {"a":"", "`":""}]]},
+
+{"description":"<a a a>",
+"input":"<a a a>",
+"output":["ParseError", ["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a b>",
+"input":"<a a b>",
+"output":[["StartTag", "a", {"a":"", "b":""}]]},
+
+{"description":"<a a y>",
+"input":"<a a y>",
+"output":[["StartTag", "a", {"a":"", "y":""}]]},
+
+{"description":"<a a z>",
+"input":"<a a z>",
+"output":[["StartTag", "a", {"a":"", "z":""}]]},
+
+{"description":"<a a {>",
+"input":"<a a {>",
+"output":[["StartTag", "a", {"a":"", "{":""}]]},
+
+{"description":"<a a \\uDBC0\\uDC00>",
+"input":"<a a \uDBC0\uDC00>",
+"output":[["StartTag", "a", {"a":"", "\uDBC0\uDC00":""}]]},
+
+{"description":"<a a!>",
+"input":"<a a!>",
+"output":[["StartTag", "a", {"a!":""}]]},
+
+{"description":"<a a\">",
+"input":"<a a\">",
+"output":["ParseError", ["StartTag", "a", {"a\"":""}]]},
+
+{"description":"<a a#>",
+"input":"<a a#>",
+"output":[["StartTag", "a", {"a#":""}]]},
+
+{"description":"<a a&>",
+"input":"<a a&>",
+"output":[["StartTag", "a", {"a&":""}]]},
+
+{"description":"<a a'>",
+"input":"<a a'>",
+"output":["ParseError", ["StartTag", "a", {"a'":""}]]},
+
+{"description":"<a a(>",
+"input":"<a a(>",
+"output":[["StartTag", "a", {"a(":""}]]},
+
+{"description":"<a a->",
+"input":"<a a->",
+"output":[["StartTag", "a", {"a-":""}]]},
+
+{"description":"<a a.>",
+"input":"<a a.>",
+"output":[["StartTag", "a", {"a.":""}]]},
+
+{"description":"<a a/>",
+"input":"<a a/>",
+"output":[["StartTag", "a", {"a":""}, true]]},
+
+{"description":"<a a0>",
+"input":"<a a0>",
+"output":[["StartTag", "a", {"a0":""}]]},
+
+{"description":"<a a1>",
+"input":"<a a1>",
+"output":[["StartTag", "a", {"a1":""}]]},
+
+{"description":"<a a9>",
+"input":"<a a9>",
+"output":[["StartTag", "a", {"a9":""}]]},
+
+{"description":"<a a<>",
+"input":"<a a<>",
+"output":["ParseError", ["StartTag", "a", {"a<":""}]]},
+
+{"description":"<a a=>",
+"input":"<a a=>",
+"output":["ParseError", ["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a=\\u0000>",
+"input":"<a a=\u0000>",
+"output":["ParseError", ["StartTag", "a", {"a":"\uFFFD"}]]},
+
+{"description":"<a a=\\u0008>",
+"input":"<a a=\u0008>",
+"output":["ParseError", ["StartTag", "a", {"a":"\u0008"}]]},
+
+{"description":"<a a=\\u0009>",
+"input":"<a a=\u0009>",
+"output":["ParseError", ["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a=\\u000A>",
+"input":"<a a=\u000A>",
+"output":["ParseError", ["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a=\\u000B>",
+"input":"<a a=\u000B>",
+"output":["ParseError", ["StartTag", "a", {"a":"\u000B"}]]},
+
+{"description":"<a a=\\u000C>",
+"input":"<a a=\u000C>",
+"output":["ParseError", ["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a=\\u000D>",
+"input":"<a a=\u000D>",
+"output":["ParseError", ["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a=\\u001F>",
+"input":"<a a=\u001F>",
+"output":["ParseError", ["StartTag", "a", {"a":"\u001F"}]]},
+
+{"description":"<a a= >",
+"input":"<a a= >",
+"output":["ParseError", ["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a=!>",
+"input":"<a a=!>",
+"output":[["StartTag", "a", {"a":"!"}]]},
+
+{"description":"<a a=\"\">",
+"input":"<a a=\"\">",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a=\"\\u0000\">",
+"input":"<a a=\"\u0000\">",
+"output":["ParseError", ["StartTag", "a", {"a":"\uFFFD"}]]},
+
+{"description":"<a a=\"\\u0009\">",
+"input":"<a a=\"\u0009\">",
+"output":[["StartTag", "a", {"a":"\u0009"}]]},
+
+{"description":"<a a=\"\\u000A\">",
+"input":"<a a=\"\u000A\">",
+"output":[["StartTag", "a", {"a":"\u000A"}]]},
+
+{"description":"<a a=\"\\u000B\">",
+"input":"<a a=\"\u000B\">",
+"output":["ParseError", ["StartTag", "a", {"a":"\u000B"}]]},
+
+{"description":"<a a=\"\\u000C\">",
+"input":"<a a=\"\u000C\">",
+"output":[["StartTag", "a", {"a":"\u000C"}]]},
+
+{"description":"<a a=\" \">",
+"input":"<a a=\" \">",
+"output":[["StartTag", "a", {"a":" "}]]},
+
+{"description":"<a a=\"!\">",
+"input":"<a a=\"!\">",
+"output":[["StartTag", "a", {"a":"!"}]]},
+
+{"description":"<a a=\"\">",
+"input":"<a a=\"\">",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a=\"#\">",
+"input":"<a a=\"#\">",
+"output":[["StartTag", "a", {"a":"#"}]]},
+
+{"description":"<a a=\"%\">",
+"input":"<a a=\"%\">",
+"output":[["StartTag", "a", {"a":"%"}]]},
+
+{"description":"<a a=\"&\">",
+"input":"<a a=\"&\">",
+"output":[["StartTag", "a", {"a":"&"}]]},
+
+{"description":"<a a=\"'\">",
+"input":"<a a=\"'\">",
+"output":[["StartTag", "a", {"a":"'"}]]},
+
+{"description":"<a a=\"-\">",
+"input":"<a a=\"-\">",
+"output":[["StartTag", "a", {"a":"-"}]]},
+
+{"description":"<a a=\"/\">",
+"input":"<a a=\"/\">",
+"output":[["StartTag", "a", {"a":"/"}]]},
+
+{"description":"<a a=\"0\">",
+"input":"<a a=\"0\">",
+"output":[["StartTag", "a", {"a":"0"}]]},
+
+{"description":"<a a=\"1\">",
+"input":"<a a=\"1\">",
+"output":[["StartTag", "a", {"a":"1"}]]},
+
+{"description":"<a a=\"9\">",
+"input":"<a a=\"9\">",
+"output":[["StartTag", "a", {"a":"9"}]]},
+
+{"description":"<a a=\"<\">",
+"input":"<a a=\"<\">",
+"output":[["StartTag", "a", {"a":"<"}]]},
+
+{"description":"<a a=\"=\">",
+"input":"<a a=\"=\">",
+"output":[["StartTag", "a", {"a":"="}]]},
+
+{"description":"<a a=\">\">",
+"input":"<a a=\">\">",
+"output":[["StartTag", "a", {"a":">"}]]},
+
+{"description":"<a a=\"?\">",
+"input":"<a a=\"?\">",
+"output":[["StartTag", "a", {"a":"?"}]]},
+
+{"description":"<a a=\"@\">",
+"input":"<a a=\"@\">",
+"output":[["StartTag", "a", {"a":"@"}]]},
+
+{"description":"<a a=\"A\">",
+"input":"<a a=\"A\">",
+"output":[["StartTag", "a", {"a":"A"}]]},
+
+{"description":"<a a=\"B\">",
+"input":"<a a=\"B\">",
+"output":[["StartTag", "a", {"a":"B"}]]},
+
+{"description":"<a a=\"Y\">",
+"input":"<a a=\"Y\">",
+"output":[["StartTag", "a", {"a":"Y"}]]},
+
+{"description":"<a a=\"Z\">",
+"input":"<a a=\"Z\">",
+"output":[["StartTag", "a", {"a":"Z"}]]},
+
+{"description":"<a a=\"`\">",
+"input":"<a a=\"`\">",
+"output":[["StartTag", "a", {"a":"`"}]]},
+
+{"description":"<a a=\"a\">",
+"input":"<a a=\"a\">",
+"output":[["StartTag", "a", {"a":"a"}]]},
+
+{"description":"<a a=\"b\">",
+"input":"<a a=\"b\">",
+"output":[["StartTag", "a", {"a":"b"}]]},
+
+{"description":"<a a=\"y\">",
+"input":"<a a=\"y\">",
+"output":[["StartTag", "a", {"a":"y"}]]},
+
+{"description":"<a a=\"z\">",
+"input":"<a a=\"z\">",
+"output":[["StartTag", "a", {"a":"z"}]]},
+
+{"description":"<a a=\"{\">",
+"input":"<a a=\"{\">",
+"output":[["StartTag", "a", {"a":"{"}]]},
+
+{"description":"<a a=\"\\uDBC0\\uDC00\">",
+"input":"<a a=\"\uDBC0\uDC00\">",
+"output":[["StartTag", "a", {"a":"\uDBC0\uDC00"}]]},
+
+{"description":"<a a=#>",
+"input":"<a a=#>",
+"output":[["StartTag", "a", {"a":"#"}]]},
+
+{"description":"<a a=%>",
+"input":"<a a=%>",
+"output":[["StartTag", "a", {"a":"%"}]]},
+
+{"description":"<a a=&>",
+"input":"<a a=&>",
+"output":[["StartTag", "a", {"a":"&"}]]},
+
+{"description":"<a a=''>",
+"input":"<a a=''>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a='\\u0000'>",
+"input":"<a a='\u0000'>",
+"output":["ParseError", ["StartTag", "a", {"a":"\uFFFD"}]]},
+
+{"description":"<a a='\\u0009'>",
+"input":"<a a='\u0009'>",
+"output":[["StartTag", "a", {"a":"\u0009"}]]},
+
+{"description":"<a a='\\u000A'>",
+"input":"<a a='\u000A'>",
+"output":[["StartTag", "a", {"a":"\u000A"}]]},
+
+{"description":"<a a='\\u000B'>",
+"input":"<a a='\u000B'>",
+"output":["ParseError", ["StartTag", "a", {"a":"\u000B"}]]},
+
+{"description":"<a a='\\u000C'>",
+"input":"<a a='\u000C'>",
+"output":[["StartTag", "a", {"a":"\u000C"}]]},
+
+{"description":"<a a=' '>",
+"input":"<a a=' '>",
+"output":[["StartTag", "a", {"a":" "}]]},
+
+{"description":"<a a='!'>",
+"input":"<a a='!'>",
+"output":[["StartTag", "a", {"a":"!"}]]},
+
+{"description":"<a a='\"'>",
+"input":"<a a='\"'>",
+"output":[["StartTag", "a", {"a":"\""}]]},
+
+{"description":"<a a='%'>",
+"input":"<a a='%'>",
+"output":[["StartTag", "a", {"a":"%"}]]},
+
+{"description":"<a a='&'>",
+"input":"<a a='&'>",
+"output":[["StartTag", "a", {"a":"&"}]]},
+
+{"description":"<a a=''>",
+"input":"<a a=''>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a=''\\u0000>",
+"input":"<a a=''\u0000>",
+"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "\uFFFD":""}]]},
+
+{"description":"<a a=''\\u0008>",
+"input":"<a a=''\u0008>",
+"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "\u0008":""}]]},
+
+{"description":"<a a=''\\u0009>",
+"input":"<a a=''\u0009>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a=''\\u000A>",
+"input":"<a a=''\u000A>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a=''\\u000B>",
+"input":"<a a=''\u000B>",
+"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "\u000B":""}]]},
+
+{"description":"<a a=''\\u000C>",
+"input":"<a a=''\u000C>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a=''\\u000D>",
+"input":"<a a=''\u000D>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a=''\\u001F>",
+"input":"<a a=''\u001F>",
+"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "\u001F":""}]]},
+
+{"description":"<a a='' >",
+"input":"<a a='' >",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a=''!>",
+"input":"<a a=''!>",
+"output":["ParseError", ["StartTag", "a", {"a":"", "!":""}]]},
+
+{"description":"<a a=''\">",
+"input":"<a a=''\">",
+"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "\"":""}]]},
+
+{"description":"<a a=''&>",
+"input":"<a a=''&>",
+"output":["ParseError", ["StartTag", "a", {"a":"", "&":""}]]},
+
+{"description":"<a a='''>",
+"input":"<a a='''>",
+"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "'":""}]]},
+
+{"description":"<a a=''->",
+"input":"<a a=''->",
+"output":["ParseError", ["StartTag", "a", {"a":"", "-":""}]]},
+
+{"description":"<a a=''.>",
+"input":"<a a=''.>",
+"output":["ParseError", ["StartTag", "a", {"a":"", ".":""}]]},
+
+{"description":"<a a=''/>",
+"input":"<a a=''/>",
+"output":[["StartTag", "a", {"a":""}, true]]},
+
+{"description":"<a a=''0>",
+"input":"<a a=''0>",
+"output":["ParseError", ["StartTag", "a", {"a":"", "0":""}]]},
+
+{"description":"<a a=''1>",
+"input":"<a a=''1>",
+"output":["ParseError", ["StartTag", "a", {"a":"", "1":""}]]},
+
+{"description":"<a a=''9>",
+"input":"<a a=''9>",
+"output":["ParseError", ["StartTag", "a", {"a":"", "9":""}]]},
+
+{"description":"<a a=''<>",
+"input":"<a a=''<>",
+"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "<":""}]]},
+
+{"description":"<a a=''=>",
+"input":"<a a=''=>",
+"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "=":""}]]},
+
{"description":"<a a=''>",
"input":"<a a=''>",
"output":[["StartTag", "a", {"a":""}]]},

+{"description":"<a a=''?>",
+"input":"<a a=''?>",
+"output":["ParseError", ["StartTag", "a", {"a":"", "?":""}]]},
+
+{"description":"<a a=''@>",
+"input":"<a a=''@>",
+"output":["ParseError", ["StartTag", "a", {"a":"", "@":""}]]},
+
+{"description":"<a a=''A>",
+"input":"<a a=''A>",
+"output":["ParseError", "ParseError", ["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a=''B>",
+"input":"<a a=''B>",
+"output":["ParseError", ["StartTag", "a", {"a":"", "b":""}]]},
+
+{"description":"<a a=''Y>",
+"input":"<a a=''Y>",
+"output":["ParseError", ["StartTag", "a", {"a":"", "y":""}]]},
+
+{"description":"<a a=''Z>",
+"input":"<a a=''Z>",
+"output":["ParseError", ["StartTag", "a", {"a":"", "z":""}]]},
+
+{"description":"<a a=''`>",
+"input":"<a a=''`>",
+"output":["ParseError", ["StartTag", "a", {"a":"", "`":""}]]},
+
+{"description":"<a a=''a>",
+"input":"<a a=''a>",
+"output":["ParseError", "ParseError", ["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a=''b>",
+"input":"<a a=''b>",
+"output":["ParseError", ["StartTag", "a", {"a":"", "b":""}]]},
+
+{"description":"<a a=''y>",
+"input":"<a a=''y>",
+"output":["ParseError", ["StartTag", "a", {"a":"", "y":""}]]},
+
+{"description":"<a a=''z>",
+"input":"<a a=''z>",
+"output":["ParseError", ["StartTag", "a", {"a":"", "z":""}]]},
+
+{"description":"<a a=''{>",
+"input":"<a a=''{>",
+"output":["ParseError", ["StartTag", "a", {"a":"", "{":""}]]},
+
+{"description":"<a a=''\\uDBC0\\uDC00>",
+"input":"<a a=''\uDBC0\uDC00>",
+"output":["ParseError", ["StartTag", "a", {"a":"", "\uDBC0\uDC00":""}]]},
+
+{"description":"<a a='('>",
+"input":"<a a='('>",
+"output":[["StartTag", "a", {"a":"("}]]},
+
+{"description":"<a a='-'>",
+"input":"<a a='-'>",
+"output":[["StartTag", "a", {"a":"-"}]]},
+
+{"description":"<a a='/'>",
+"input":"<a a='/'>",
+"output":[["StartTag", "a", {"a":"/"}]]},
+
+{"description":"<a a='0'>",
+"input":"<a a='0'>",
+"output":[["StartTag", "a", {"a":"0"}]]},
+
+{"description":"<a a='1'>",
+"input":"<a a='1'>",
+"output":[["StartTag", "a", {"a":"1"}]]},
+
+{"description":"<a a='9'>",
+"input":"<a a='9'>",
+"output":[["StartTag", "a", {"a":"9"}]]},
+
+{"description":"<a a='<'>",
+"input":"<a a='<'>",
+"output":[["StartTag", "a", {"a":"<"}]]},
+
+{"description":"<a a='='>",
+"input":"<a a='='>",
+"output":[["StartTag", "a", {"a":"="}]]},
+
+{"description":"<a a='>'>",
+"input":"<a a='>'>",
+"output":[["StartTag", "a", {"a":">"}]]},
+
+{"description":"<a a='?'>",
+"input":"<a a='?'>",
+"output":[["StartTag", "a", {"a":"?"}]]},
+
+{"description":"<a a='@'>",
+"input":"<a a='@'>",
+"output":[["StartTag", "a", {"a":"@"}]]},
+
+{"description":"<a a='A'>",
+"input":"<a a='A'>",
+"output":[["StartTag", "a", {"a":"A"}]]},
+
+{"description":"<a a='B'>",
+"input":"<a a='B'>",
+"output":[["StartTag", "a", {"a":"B"}]]},
+
+{"description":"<a a='Y'>",
+"input":"<a a='Y'>",
+"output":[["StartTag", "a", {"a":"Y"}]]},
+
+{"description":"<a a='Z'>",
+"input":"<a a='Z'>",
+"output":[["StartTag", "a", {"a":"Z"}]]},
+
+{"description":"<a a='`'>",
+"input":"<a a='`'>",
+"output":[["StartTag", "a", {"a":"`"}]]},
+
+{"description":"<a a='a'>",
+"input":"<a a='a'>",
+"output":[["StartTag", "a", {"a":"a"}]]},
+
+{"description":"<a a='b'>",
+"input":"<a a='b'>",
+"output":[["StartTag", "a", {"a":"b"}]]},
+
+{"description":"<a a='y'>",
+"input":"<a a='y'>",
+"output":[["StartTag", "a", {"a":"y"}]]},
+
+{"description":"<a a='z'>",
+"input":"<a a='z'>",
+"output":[["StartTag", "a", {"a":"z"}]]},
+
+{"description":"<a a='{'>",
+"input":"<a a='{'>",
+"output":[["StartTag", "a", {"a":"{"}]]},
+
+{"description":"<a a='\\uDBC0\\uDC00'>",
+"input":"<a a='\uDBC0\uDC00'>",
+"output":[["StartTag", "a", {"a":"\uDBC0\uDC00"}]]},
+
+{"description":"<a a=(>",
+"input":"<a a=(>",
+"output":[["StartTag", "a", {"a":"("}]]},
+
+{"description":"<a a=->",
+"input":"<a a=->",
+"output":[["StartTag", "a", {"a":"-"}]]},
+
+{"description":"<a a=/>",
+"input":"<a a=/>",
+"output":[["StartTag", "a", {"a":"/"}]]},
+
+{"description":"<a a=0>",
+"input":"<a a=0>",
+"output":[["StartTag", "a", {"a":"0"}]]},
+
+{"description":"<a a=1>",
+"input":"<a a=1>",
+"output":[["StartTag", "a", {"a":"1"}]]},
+
+{"description":"<a a=9>",
+"input":"<a a=9>",
+"output":[["StartTag", "a", {"a":"9"}]]},
+
+{"description":"<a a=<>",
+"input":"<a a=<>",
+"output":["ParseError", ["StartTag", "a", {"a":"<"}]]},
+
+{"description":"<a a==>",
+"input":"<a a==>",
+"output":["ParseError", ["StartTag", "a", {"a":"="}]]},
+
{"description":"<a a=>",
"input":"<a a=>",
"output":["ParseError", ["StartTag", "a", {"a":""}]]},

+{"description":"<a a=?>",
+"input":"<a a=?>",
+"output":[["StartTag", "a", {"a":"?"}]]},
+
+{"description":"<a a=@>",
+"input":"<a a=@>",
+"output":[["StartTag", "a", {"a":"@"}]]},
+
+{"description":"<a a=A>",
+"input":"<a a=A>",
+"output":[["StartTag", "a", {"a":"A"}]]},
+
+{"description":"<a a=B>",
+"input":"<a a=B>",
+"output":[["StartTag", "a", {"a":"B"}]]},
+
+{"description":"<a a=Y>",
+"input":"<a a=Y>",
+"output":[["StartTag", "a", {"a":"Y"}]]},
+
+{"description":"<a a=Z>",
+"input":"<a a=Z>",
+"output":[["StartTag", "a", {"a":"Z"}]]},
+
+{"description":"<a a=`>",
+"input":"<a a=`>",
+"output":["ParseError", ["StartTag", "a", {"a":"`"}]]},
+
{"description":"<a a=a>",
"input":"<a a=a>",
"output":[["StartTag", "a", {"a":"a"}]]},

+{"description":"<a a=a\\u0000>",
+"input":"<a a=a\u0000>",
+"output":["ParseError", ["StartTag", "a", {"a":"a\uFFFD"}]]},
+
+{"description":"<a a=a\\u0008>",
+"input":"<a a=a\u0008>",
+"output":["ParseError", ["StartTag", "a", {"a":"a\u0008"}]]},
+
+{"description":"<a a=a\\u0009>",
+"input":"<a a=a\u0009>",
+"output":[["StartTag", "a", {"a":"a"}]]},
+
+{"description":"<a a=a\\u000A>",
+"input":"<a a=a\u000A>",
+"output":[["StartTag", "a", {"a":"a"}]]},
+
+{"description":"<a a=a\\u000B>",
+"input":"<a a=a\u000B>",
+"output":["ParseError", ["StartTag", "a", {"a":"a\u000B"}]]},
+
+{"description":"<a a=a\\u000C>",
+"input":"<a a=a\u000C>",
+"output":[["StartTag", "a", {"a":"a"}]]},
+
+{"description":"<a a=a\\u000D>",
+"input":"<a a=a\u000D>",
+"output":[["StartTag", "a", {"a":"a"}]]},
+
+{"description":"<a a=a\\u001F>",
+"input":"<a a=a\u001F>",
+"output":["ParseError", ["StartTag", "a", {"a":"a\u001F"}]]},
+
+{"description":"<a a=a >",
+"input":"<a a=a >",
+"output":[["StartTag", "a", {"a":"a"}]]},
+
+{"description":"<a a=a!>",
+"input":"<a a=a!>",
+"output":[["StartTag", "a", {"a":"a!"}]]},
+
+{"description":"<a a=a\">",
+"input":"<a a=a\">",
+"output":["ParseError", ["StartTag", "a", {"a":"a\""}]]},
+
+{"description":"<a a=a#>",
+"input":"<a a=a#>",
+"output":[["StartTag", "a", {"a":"a#"}]]},
+
+{"description":"<a a=a%>",
+"input":"<a a=a%>",
+"output":[["StartTag", "a", {"a":"a%"}]]},
+
+{"description":"<a a=a&>",
+"input":"<a a=a&>",
+"output":[["StartTag", "a", {"a":"a&"}]]},
+
+{"description":"<a a=a'>",
+"input":"<a a=a'>",
+"output":["ParseError", ["StartTag", "a", {"a":"a'"}]]},
+
+{"description":"<a a=a(>",
+"input":"<a a=a(>",
+"output":[["StartTag", "a", {"a":"a("}]]},
+
+{"description":"<a a=a->",
+"input":"<a a=a->",
+"output":[["StartTag", "a", {"a":"a-"}]]},
+
+{"description":"<a a=a/>",
+"input":"<a a=a/>",
+"output":[["StartTag", "a", {"a":"a/"}]]},
+
+{"description":"<a a=a0>",
+"input":"<a a=a0>",
+"output":[["StartTag", "a", {"a":"a0"}]]},
+
+{"description":"<a a=a1>",
+"input":"<a a=a1>",
+"output":[["StartTag", "a", {"a":"a1"}]]},
+
+{"description":"<a a=a9>",
+"input":"<a a=a9>",
+"output":[["StartTag", "a", {"a":"a9"}]]},
+
+{"description":"<a a=a<>",
+"input":"<a a=a<>",
+"output":["ParseError", ["StartTag", "a", {"a":"a<"}]]},
+
+{"description":"<a a=a=>",
+"input":"<a a=a=>",
+"output":["ParseError", ["StartTag", "a", {"a":"a="}]]},
+
+{"description":"<a a=a>",
+"input":"<a a=a>",
+"output":[["StartTag", "a", {"a":"a"}]]},
+
+{"description":"<a a=a?>",
+"input":"<a a=a?>",
+"output":[["StartTag", "a", {"a":"a?"}]]},
+
+{"description":"<a a=a@>",
+"input":"<a a=a@>",
+"output":[["StartTag", "a", {"a":"a@"}]]},
+
+{"description":"<a a=aA>",
+"input":"<a a=aA>",
+"output":[["StartTag", "a", {"a":"aA"}]]},
+
+{"description":"<a a=aB>",
+"input":"<a a=aB>",
+"output":[["StartTag", "a", {"a":"aB"}]]},
+
+{"description":"<a a=aY>",
+"input":"<a a=aY>",
+"output":[["StartTag", "a", {"a":"aY"}]]},
+
+{"description":"<a a=aZ>",
+"input":"<a a=aZ>",
+"output":[["StartTag", "a", {"a":"aZ"}]]},
+
+{"description":"<a a=a`>",
+"input":"<a a=a`>",
+"output":["ParseError", ["StartTag", "a", {"a":"a`"}]]},
+
+{"description":"<a a=aa>",
+"input":"<a a=aa>",
+"output":[["StartTag", "a", {"a":"aa"}]]},
+
+{"description":"<a a=ab>",
+"input":"<a a=ab>",
+"output":[["StartTag", "a", {"a":"ab"}]]},
+
+{"description":"<a a=ay>",
+"input":"<a a=ay>",
+"output":[["StartTag", "a", {"a":"ay"}]]},
+
+{"description":"<a a=az>",
+"input":"<a a=az>",
+"output":[["StartTag", "a", {"a":"az"}]]},
+
+{"description":"<a a=a{>",
+"input":"<a a=a{>",
+"output":[["StartTag", "a", {"a":"a{"}]]},
+
+{"description":"<a a=a\\uDBC0\\uDC00>",
+"input":"<a a=a\uDBC0\uDC00>",
+"output":[["StartTag", "a", {"a":"a\uDBC0\uDC00"}]]},
+
+{"description":"<a a=b>",
+"input":"<a a=b>",
+"output":[["StartTag", "a", {"a":"b"}]]},
+
+{"description":"<a a=y>",
+"input":"<a a=y>",
+"output":[["StartTag", "a", {"a":"y"}]]},
+
+{"description":"<a a=z>",
+"input":"<a a=z>",
+"output":[["StartTag", "a", {"a":"z"}]]},
+
+{"description":"<a a={>",
+"input":"<a a={>",
+"output":[["StartTag", "a", {"a":"{"}]]},
+
+{"description":"<a a=\\uDBC0\\uDC00>",
+"input":"<a a=\uDBC0\uDC00>",
+"output":[["StartTag", "a", {"a":"\uDBC0\uDC00"}]]},
+
{"description":"<a a>",
"input":"<a a>",
"output":[["StartTag", "a", {"a":""}]]},

+{"description":"<a a?>",
+"input":"<a a?>",
+"output":[["StartTag", "a", {"a?":""}]]},
+
+{"description":"<a a@>",
+"input":"<a a@>",
+"output":[["StartTag", "a", {"a@":""}]]},
+
+{"description":"<a aA>",
+"input":"<a aA>",
+"output":[["StartTag", "a", {"aa":""}]]},
+
+{"description":"<a aB>",
+"input":"<a aB>",
+"output":[["StartTag", "a", {"ab":""}]]},
+
+{"description":"<a aY>",
+"input":"<a aY>",
+"output":[["StartTag", "a", {"ay":""}]]},
+
+{"description":"<a aZ>",
+"input":"<a aZ>",
+"output":[["StartTag", "a", {"az":""}]]},
+
+{"description":"<a a[>",
+"input":"<a a[>",
+"output":[["StartTag", "a", {"a[":""}]]},
+
+{"description":"<a a`>",
+"input":"<a a`>",
+"output":[["StartTag", "a", {"a`":""}]]},
+
+{"description":"<a aa>",
+"input":"<a aa>",
+"output":[["StartTag", "a", {"aa":""}]]},
+
+{"description":"<a ab>",
+"input":"<a ab>",
+"output":[["StartTag", "a", {"ab":""}]]},
+
+{"description":"<a ay>",
+"input":"<a ay>",
+"output":[["StartTag", "a", {"ay":""}]]},
+
+{"description":"<a az>",
+"input":"<a az>",
+"output":[["StartTag", "a", {"az":""}]]},
+
+{"description":"<a a{>",
+"input":"<a a{>",
+"output":[["StartTag", "a", {"a{":""}]]},
+
+{"description":"<a a\\uDBC0\\uDC00>",
+"input":"<a a\uDBC0\uDC00>",
+"output":[["StartTag", "a", {"a\uDBC0\uDC00":""}]]},
+
+{"description":"<a b>",
+"input":"<a b>",
+"output":[["StartTag", "a", {"b":""}]]},
+
+{"description":"<a y>",
+"input":"<a y>",
+"output":[["StartTag", "a", {"y":""}]]},
+
+{"description":"<a z>",
+"input":"<a z>",
+"output":[["StartTag", "a", {"z":""}]]},
+
+{"description":"<a {>",
+"input":"<a {>",
+"output":[["StartTag", "a", {"{":""}]]},
+
+{"description":"<a \\uDBC0\\uDC00>",
+"input":"<a \uDBC0\uDC00>",
+"output":[["StartTag", "a", {"\uDBC0\uDC00":""}]]},
+
+{"description":"<a!>",
+"input":"<a!>",
+"output":[["StartTag", "a!", {}]]},
+
+{"description":"<a\">",
+"input":"<a\">",
+"output":[["StartTag", "a\"", {}]]},
+
+{"description":"<a&>",
+"input":"<a&>",
+"output":[["StartTag", "a&", {}]]},
+
+{"description":"<a'>",
+"input":"<a'>",
+"output":[["StartTag", "a'", {}]]},
+
+{"description":"<a->",
+"input":"<a->",
+"output":[["StartTag", "a-", {}]]},
+
+{"description":"<a.>",
+"input":"<a.>",
+"output":[["StartTag", "a.", {}]]},
+
{"description":"<a/>",
"input":"<a/>",
"output":[["StartTag", "a", {}, true]]},

+{"description":"<a/\\u0000>",
+"input":"<a/\u0000>",
+"output":["ParseError", "ParseError", ["StartTag", "a", {"\uFFFD":""}]]},
+
+{"description":"<a/\\u0009>",
+"input":"<a/\u0009>",
+"output":["ParseError", ["StartTag", "a", {}]]},
+
+{"description":"<a/\\u000A>",
+"input":"<a/\u000A>",
+"output":["ParseError", ["StartTag", "a", {}]]},
+
+{"description":"<a/\\u000B>",
+"input":"<a/\u000B>",
+"output":["ParseError", "ParseError", ["StartTag", "a", {"\u000B":""}]]},
+
+{"description":"<a/\\u000C>",
+"input":"<a/\u000C>",
+"output":["ParseError", ["StartTag", "a", {}]]},
+
+{"description":"<a/ >",
+"input":"<a/ >",
+"output":["ParseError", ["StartTag", "a", {}]]},
+
+{"description":"<a/!>",
+"input":"<a/!>",
+"output":["ParseError", ["StartTag", "a", {"!":""}]]},
+
+{"description":"<a/\">",
+"input":"<a/\">",
+"output":["ParseError", "ParseError", ["StartTag", "a", {"\"":""}]]},
+
+{"description":"<a/&>",
+"input":"<a/&>",
+"output":["ParseError", ["StartTag", "a", {"&":""}]]},
+
+{"description":"<a/'>",
+"input":"<a/'>",
+"output":["ParseError", "ParseError", ["StartTag", "a", {"'":""}]]},
+
+{"description":"<a/->",
+"input":"<a/->",
+"output":["ParseError", ["StartTag", "a", {"-":""}]]},
+
+{"description":"<a//>",
+"input":"<a//>",
+"output":["ParseError", ["StartTag", "a", {}, true]]},
+
+{"description":"<a/0>",
+"input":"<a/0>",
+"output":["ParseError", ["StartTag", "a", {"0":""}]]},
+
+{"description":"<a/1>",
+"input":"<a/1>",
+"output":["ParseError", ["StartTag", "a", {"1":""}]]},
+
+{"description":"<a/9>",
+"input":"<a/9>",
+"output":["ParseError", ["StartTag", "a", {"9":""}]]},
+
+{"description":"<a/<>",
+"input":"<a/<>",
+"output":["ParseError", "ParseError", ["StartTag", "a", {"<":""}]]},
+
+{"description":"<a/=>",
+"input":"<a/=>",
+"output":["ParseError", "ParseError", ["StartTag", "a", {"=":""}]]},
+
+{"description":"<a/>",
+"input":"<a/>",
+"output":[["StartTag", "a", {}, true]]},
+
+{"description":"<a/?>",
+"input":"<a/?>",
+"output":["ParseError", ["StartTag", "a", {"?":""}]]},
+
+{"description":"<a/@>",
+"input":"<a/@>",
+"output":["ParseError", ["StartTag", "a", {"@":""}]]},
+
+{"description":"<a/A>",
+"input":"<a/A>",
+"output":["ParseError", ["StartTag", "a", {"a":""}]]},
+
+{"description":"<a/B>",
+"input":"<a/B>",
+"output":["ParseError", ["StartTag", "a", {"b":""}]]},
+
+{"description":"<a/Y>",
+"input":"<a/Y>",
+"output":["ParseError", ["StartTag", "a", {"y":""}]]},
+
+{"description":"<a/Z>",
+"input":"<a/Z>",
+"output":["ParseError", ["StartTag", "a", {"z":""}]]},
+
+{"description":"<a/`>",
+"input":"<a/`>",
+"output":["ParseError", ["StartTag", "a", {"`":""}]]},
+
+{"description":"<a/a>",
+"input":"<a/a>",
+"output":["ParseError", ["StartTag", "a", {"a":""}]]},
+
+{"description":"<a/b>",
+"input":"<a/b>",
+"output":["ParseError", ["StartTag", "a", {"b":""}]]},
+
+{"description":"<a/y>",
+"input":"<a/y>",
+"output":["ParseError", ["StartTag", "a", {"y":""}]]},
+
+{"description":"<a/z>",
+"input":"<a/z>",
+"output":["ParseError", ["StartTag", "a", {"z":""}]]},
+
+{"description":"<a/{>",
+"input":"<a/{>",
+"output":["ParseError", ["StartTag", "a", {"{":""}]]},
+
+{"description":"<a/\\uDBC0\\uDC00>",
+"input":"<a/\uDBC0\uDC00>",
+"output":["ParseError", ["StartTag", "a", {"\uDBC0\uDC00":""}]]},
+
+{"description":"<a0>",
+"input":"<a0>",
+"output":[["StartTag", "a0", {}]]},
+
+{"description":"<a1>",
+"input":"<a1>",
+"output":[["StartTag", "a1", {}]]},
+
+{"description":"<a9>",
+"input":"<a9>",
+"output":[["StartTag", "a9", {}]]},
+
+{"description":"<a<>",
+"input":"<a<>",
+"output":[["StartTag", "a<", {}]]},
+
+{"description":"<a=>",
+"input":"<a=>",
+"output":[["StartTag", "a=", {}]]},
+
{"description":"<a>",
"input":"<a>",
"output":[["StartTag", "a", {}]]},

+{"description":"<a?>",
+"input":"<a?>",
+"output":[["StartTag", "a?", {}]]},
+
+{"description":"<a@>",
+"input":"<a@>",
+"output":[["StartTag", "a@", {}]]},
+
+{"description":"<aA>",
+"input":"<aA>",
+"output":[["StartTag", "aa", {}]]},
+
+{"description":"<aB>",
+"input":"<aB>",
+"output":[["StartTag", "ab", {}]]},
+
+{"description":"<aY>",
+"input":"<aY>",
+"output":[["StartTag", "ay", {}]]},
+
+{"description":"<aZ>",
+"input":"<aZ>",
+"output":[["StartTag", "az", {}]]},
+
+{"description":"<a[>",
+"input":"<a[>",
+"output":[["StartTag", "a[", {}]]},
+
+{"description":"<a`>",
+"input":"<a`>",
+"output":[["StartTag", "a`", {}]]},
+
+{"description":"<aa>",
+"input":"<aa>",
+"output":[["StartTag", "aa", {}]]},
+
+{"description":"<ab>",
+"input":"<ab>",
+"output":[["StartTag", "ab", {}]]},
+
+{"description":"<ay>",
+"input":"<ay>",
+"output":[["StartTag", "ay", {}]]},
+
+{"description":"<az>",
+"input":"<az>",
+"output":[["StartTag", "az", {}]]},
+
+{"description":"<a{>",
+"input":"<a{>",
+"output":[["StartTag", "a{", {}]]},
+
+{"description":"<a\\uDBC0\\uDC00>",
+"input":"<a\uDBC0\uDC00>",
+"output":[["StartTag", "a\uDBC0\uDC00", {}]]},
+
+{"description":"<b>",
+"input":"<b>",
+"output":[["StartTag", "b", {}]]},
+
+{"description":"<y>",
+"input":"<y>",
+"output":[["StartTag", "y", {}]]},
+
+{"description":"<z>",
+"input":"<z>",
+"output":[["StartTag", "z", {}]]},
+
{"description":"<{",
"input":"<{",
"output":["ParseError", ["Character", "<{"]]},
diff --git a/test/data/tokeniser2/test4.test b/test/data/tokeniser2/test4.test
index bf251d1..4be94b0 100644
--- a/test/data/tokeniser2/test4.test
+++ b/test/data/tokeniser2/test4.test
@@ -1,5 +1,13 @@
{"tests": [

+{"description":"< in attribute name",
+"input":"<z/0 <>",
+"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "", "<": ""}]]},
+
+{"description":"< in attribute value",
+"input":"<z x=<>",
+"output":["ParseError", ["StartTag", "z", {"x": "<"}]]},
+
{"description":"= in unquoted attribute value",
"input":"<z z=z=z>",
"output":["ParseError", ["StartTag", "z", {"z": "z=z"}]]},
@@ -20,25 +28,25 @@
"input":"<z ====>",
"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"=": "=="}]]},

-{"description":"Allowed \" after ampersand in attribute value",
+{"description":"\" after ampersand in double-quoted attribute value",
"input":"<z z=\"&\">",
"output":[["StartTag", "z", {"z": "&"}]]},

-{"description":"Non-allowed ' after ampersand in attribute value",
+{"description":"' after ampersand in double-quoted attribute value",
"input":"<z z=\"&'\">",
-"output":["ParseError", ["StartTag", "z", {"z": "&'"}]]},
+"output":[["StartTag", "z", {"z": "&'"}]]},

-{"description":"Allowed ' after ampersand in attribute value",
+{"description":"' after ampersand in single-quoted attribute value",
"input":"<z z='&'>",
"output":[["StartTag", "z", {"z": "&"}]]},

-{"description":"Non-allowed \" after ampersand in attribute value",
+{"description":"\" after ampersand in single-quoted attribute value",
"input":"<z z='&\"'>",
-"output":["ParseError", ["StartTag", "z", {"z": "&\""}]]},
+"output":[["StartTag", "z", {"z": "&\""}]]},

{"description":"Text after bogus character reference",
"input":"<z z='&xlink_xmlns;'>bar<z>",
-"output":["ParseError",["StartTag","z",{"z":"&xlink_xmlns;"}],["Character","bar"],["StartTag","z",{}]]},
+"output":[["StartTag","z",{"z":"&xlink_xmlns;"}],["Character","bar"],["StartTag","z",{}]]},

{"description":"Text after hex character reference",
"input":"<z z='&#x0020; foo'>bar<z>",
@@ -88,6 +96,14 @@
"input":"<!doctype html \r",
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},

+{"description":"CR EOF in tag name",
+"input":"<z\r",
+"output":["ParseError"]},
+
+{"description":"Slash EOF in tag name",
+"input":"<z/",
+"output":["ParseError"]},
+
{"description":"Zero hex numeric entity",
"input":"&#x0",
"output":["ParseError", "ParseError", ["Character", "\uFFFD"]]},
@@ -116,6 +132,10 @@
"input":"&#x10000;",
"output":[["Character", "\uD800\uDC00"]]},

+{"description":"Maximum non-BMP numeric entity",
+"input":"&#X10FFFF;",
+"output":["ParseError", ["Character", "\uDBFF\uDFFF"]]},
+
{"description":"Above maximum numeric entity",
"input":"&#x110000;",
"output":["ParseError", ["Character", "\uFFFD"]]},
@@ -200,9 +220,14 @@
"input":"<!dOcTyPe hTmL sYsTeM \"xYz\">",
"output":[["DOCTYPE", "html", null, "xYz", true]]},

+{"description":"U+0000 in lookahead region after non-matching character",
+"input":"<!doc>\u0000",
+"output":["ParseError", ["Comment", "doc"], "ParseError", ["Character", "\u0000"]],
+"ignoreErrorOrder":true},
+
{"description":"U+0000 in lookahead region",
"input":"<!doc\u0000",
-"output":["ParseError", "ParseError", ["Comment", "doc\uFFFD"]],
+"output":["ParseError", ["Comment", "doc\uFFFD"]],
"ignoreErrorOrder":true},

{"description":"U+0080 in lookahead region",
@@ -270,6 +295,50 @@

{"description":"Doctype html x>text",
"input":"<!DOCTYPE html x>text",
-"output":["ParseError", ["DOCTYPE", "html", null, null, false], ["Character", "text"]]}
+"output":["ParseError", ["DOCTYPE", "html", null, null, false], ["Character", "text"]]},
+
+{"description":"Grave accent in unquoted attribute",
+"input":"<a a=aa`>",
+"output":["ParseError", ["StartTag", "a", {"a":"aa`"}]]},
+
+{"description":"EOF in tag name state ",
+"input":"<a",
+"output":["ParseError"]},
+
+{"description":"EOF in tag name state",
+"input":"<a",
+"output":["ParseError"]},
+
+{"description":"EOF in before attribute name state",
+"input":"<a ",
+"output":["ParseError"]},
+
+{"description":"EOF in attribute name state",
+"input":"<a a",
+"output":["ParseError"]},
+
+{"description":"EOF in after attribute name state",
+"input":"<a a ",
+"output":["ParseError"]},
+
+{"description":"EOF in before attribute value state",
+"input":"<a a =",
+"output":["ParseError"]},
+
+{"description":"EOF in attribute value (double quoted) state",
+"input":"<a a =\"a",
+"output":["ParseError"]},
+
+{"description":"EOF in attribute value (single quoted) state",
+"input":"<a a ='a",
+"output":["ParseError"]},
+
+{"description":"EOF in attribute value (unquoted) state",
+"input":"<a a =a",
+"output":["ParseError"]},
+
+{"description":"EOF in after attribute value state",
+"input":"<a a ='a'",
+"output":["ParseError"]}

]}
diff --git a/test/data/tokeniser2/unicodeChars.test b/test/data/tokeniser2/unicodeChars.test
index 9b59015..c778668 100644
--- a/test/data/tokeniser2/unicodeChars.test
+++ b/test/data/tokeniser2/unicodeChars.test
@@ -112,14 +112,6 @@
"input": "\u007F",
"output": ["ParseError", ["Character", "\u007F"]]},

-{"description": "Invalid Unicode character U+D800",
-"input": "\uD800",
-"output": ["ParseError", ["Character", "\uD800"]]},
-
-{"description": "Invalid Unicode character U+DFFF",
-"input": "\uDFFF",
-"output": ["ParseError", ["Character", "\uDFFF"]]},
-
{"description": "Invalid Unicode character U+FDD0",
"input": "\uFDD0",
"output": ["ParseError", ["Character", "\uFDD0"]]},
--
1.8.3.2

From c8524048c52f31b9a0e8f3469e11a0db76653adb Mon Sep 17 00:00:00 2001
From: Achal-Aggarwal <theachalaggarwal@gmail.com>
Date: Wed, 12 Mar 2014 02:49:18 +0530
Subject: [PATCH 11/11] Fixing entities.test by correcting cp1252 table
entries.

---
src/tokeniser/tokeniser.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tokeniser/tokeniser.c b/src/tokeniser/tokeniser.c
index 3c18e92..ef84ae2 100644
--- a/src/tokeniser/tokeniser.c
+++ b/src/tokeniser/tokeniser.c
@@ -25,7 +25,7 @@
*/
static const uint32_t cp1252Table[32] = {
0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
- 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008F, 0x017D, 0x0090,
+ 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F,
0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178
};
--
1.8.3.2

I am attaching 11 patches in which patch # 5-9 are removing failing testcases, #10 is updating all testcases to html5lib tests and remaining are code based.



On Tue, Mar 11, 2014 at 10:28 PM, Achal Aggarwal <theachalaggarwal@gmail.com> wrote:
Sorry for that direct email.

I generally just apply diff using git to do testing of a PR. I guess you wants formatted patches, here they come...


Achal


On Tue, Mar 11, 2014 at 10:17 PM, Rob Kendrick <rjek@netsurf-browser.org> wrote:
On Tue, Mar 11, 2014 at 09:57:10PM +0530, Achal Aggarwal wrote:
> I am attaching a patch against master in this mail and doing same in the
> tracker.

This patch is thousands of lines long.  Did you really commit this as a
single changeset?  It's not manageable to review.

Also, you'll want to reply to the list, not to me :)

B.



--
Achal



--
Achal

No comments:

Post a Comment