---
 include/hubbub/types.h                 |  3 +-
 src/tokeniser/tokeniser.c              | 11 ++---
 test/data/tokeniser2/INDEX             |  2 +-
 test/data/tree-construction/tests5.dat | 84 +++++++++++++++++++++-------------
 test/tokeniser2.c                      |  9 ++--
 test/tokeniser3.c                      |  9 ++--
 6 files changed, 72 insertions(+), 46 deletions(-)
diff --git a/include/hubbub/types.h b/include/hubbub/types.h
index e5c208b..6e2b1a9 100644
--- a/include/hubbub/types.h
+++ b/include/hubbub/types.h
@@ -33,7 +33,8 @@ typedef enum hubbub_content_model {
 	HUBBUB_CONTENT_MODEL_PCDATA,
 	HUBBUB_CONTENT_MODEL_RCDATA,
 	HUBBUB_CONTENT_MODEL_CDATA,
-	HUBBUB_CONTENT_MODEL_PLAINTEXT
+	HUBBUB_CONTENT_MODEL_PLAINTEXT,
+	HUBBUB_CONTENT_MODEL_RAWTEXT
 } hubbub_content_model;
 
 /**
diff --git a/src/tokeniser/tokeniser.c b/src/tokeniser/tokeniser.c
index 3087ac8..4f87287 100644
--- a/src/tokeniser/tokeniser.c
+++ b/src/tokeniser/tokeniser.c
@@ -689,8 +689,6 @@ hubbub_error hubbub_tokeniser_handle_data(hubbub_tokeniser *tokeniser)
 		} else if (c == '-' &&
 				tokeniser->escape_flag == false &&
 				(tokeniser->content_model ==
-						HUBBUB_CONTENT_MODEL_RCDATA ||
-				tokeniser->content_model ==
 						HUBBUB_CONTENT_MODEL_CDATA) &&
 				tokeniser->context.pending >= 3) {
 			size_t ignore;
@@ -712,6 +710,8 @@ hubbub_error hubbub_tokeniser_handle_data(hubbub_tokeniser *tokeniser)
 						HUBBUB_CONTENT_MODEL_PCDATA ||
 					((tokeniser->content_model ==
 						HUBBUB_CONTENT_MODEL_RCDATA ||
+					tokeniser->content_model == 
+						HUBBUB_CONTENT_MODEL_RAWTEXT ||
 					tokeniser->content_model ==
 						HUBBUB_CONTENT_MODEL_CDATA) &&
 				tokeniser->escape_flag == false))) {
@@ -899,6 +899,7 @@ hubbub_error hubbub_tokeniser_handle_tag_open(hubbub_tokeniser *tokeniser)
 
 		tokeniser->state = STATE_CLOSE_TAG_OPEN;
 	} else if (tokeniser->content_model == HUBBUB_CONTENT_MODEL_RCDATA ||
+			tokeniser->content_model == HUBBUB_CONTENT_MODEL_RAWTEXT ||
 			tokeniser->content_model ==
 					HUBBUB_CONTENT_MODEL_CDATA) {
 		/* Return to data state with '<' still in "chars" */
@@ -971,6 +972,7 @@ hubbub_error hubbub_tokeniser_handle_close_tag_open(hubbub_tokeniser *tokeniser)
 	/**\todo fragment case */
 
 	if (tokeniser->content_model == HUBBUB_CONTENT_MODEL_RCDATA ||
+			tokeniser->content_model == HUBBUB_CONTENT_MODEL_RAWTEXT ||
 			tokeniser->content_model ==
 					HUBBUB_CONTENT_MODEL_CDATA) {
 		uint8_t *start_tag_name =
@@ -3004,7 +3006,6 @@ hubbub_error hubbub_tokeniser_handle_numbered_entity(
 			ctx->match_entity.length += len;
 		} else {
 			ctx->match_entity.base = 10;
-			printf("base 10\n");
 		}
 	}
 
@@ -3041,7 +3042,6 @@ hubbub_error hubbub_tokeniser_handle_numbered_entity(
 
 		if (ctx->match_entity.numeric_state.ucs4 > 0x10FFFF) {
 			ctx->match_entity.overflow = true;
-			printf("overflow\n");
 		}
 	}
 
@@ -3061,11 +3061,9 @@ hubbub_error hubbub_tokeniser_handle_numbered_entity(
 
 		if (0x80 <= cp && cp <= 0x9F) {
 			cp = cp1252Table[cp - 0x80];
-			printf("converting1\n");
 		} else if (ctx->match_entity.overflow || 
 				(0xD800 <= cp && cp <= 0xDFFF) ||
 				(cp == 0x00)) {
-			printf("converting\n");
 			cp = 0xFFFD;
 		} else if((0x0001<=cp && cp <= 0x0008) ||
 				(0x000D <= cp && cp <= 0x001F) ||
@@ -3074,7 +3072,6 @@ hubbub_error hubbub_tokeniser_handle_numbered_entity(
 				(cp ==0x000B) ||
 				((cp & 0xFFFE) == 0xFFFE) ||
 				((cp & 0xFFFF) == 0xFFFF) ){
-			printf("converting\n");
 			/* the check for cp > 0x10FFFF per spec is performed
 			 * in the loop above to avoid overflow */
 		}
diff --git a/test/data/tokeniser2/INDEX b/test/data/tokeniser2/INDEX
index 9b165c0..9ff8596 100644
--- a/test/data/tokeniser2/INDEX
+++ b/test/data/tokeniser2/INDEX
@@ -7,7 +7,7 @@ test2.test		html5lib tests (part 2)
 test3.test		html5lib tests (part 3)
 test4.test		html5lib tests (part 4)
 entities.test		html5lib entity tests
-#escapeFlag.test		html5lib escape flag tests
+escapeFlag.test		html5lib escape flag tests
 numericEntities.test	html5lib numeric entities tests
 unicodeChars.test	html5lib unicode character tests
 #unicodeCharsProblematic.test	html5lib problematic unicode character tests
diff --git a/test/data/tree-construction/tests5.dat b/test/data/tree-construction/tests5.dat
index 2c95031..4d5fcd7 100644
--- a/test/data/tree-construction/tests5.dat
+++ b/test/data/tree-construction/tests5.dat
@@ -1,31 +1,33 @@
 #data
 <style> <!-- </style>x
 #errors
-Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
-Line: 1 Col: 22 Unexpected end of file. Expected end tag (style).
+(1,7): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |     <style>
-|       " <!-- </style>x"
+|       " <!-- "
 |   <body>
+|     "x"
 
 #data
 <style> <!-- </style> --> </style>x
 #errors
-Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
+(1,7): expected-doctype-but-got-start-tag
+(1,34): unexpected-end-tag
 #document
 | <html>
 |   <head>
 |     <style>
-|       " <!-- </style> --> "
+|       " <!-- "
+|     " "
 |   <body>
-|     "x"
+|     "--> x"
 
 #data
 <style> <!--> </style>x
 #errors
-Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
+(1,7): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
@@ -37,7 +39,7 @@ Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
 #data
 <style> <!---> </style>x
 #errors
-Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
+(1,7): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
@@ -49,7 +51,7 @@ Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
 #data
 <iframe> <!---> </iframe>x
 #errors
-Line: 1 Col: 8 Unexpected start tag (iframe). Expected DOCTYPE.
+(1,8): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
@@ -61,55 +63,63 @@ Line: 1 Col: 8 Unexpected start tag (iframe). Expected DOCTYPE.
 #data
 <iframe> <!--- </iframe>->x</iframe> --> </iframe>x
 #errors
-Line: 1 Col: 8 Unexpected start tag (iframe). Expected DOCTYPE.
+(1,8): expected-doctype-but-got-start-tag
+(1,36): unexpected-end-tag
+(1,50): unexpected-end-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <iframe>
-|       " <!--- </iframe>->x</iframe> --> "
-|     "x"
+|       " <!--- "
+|     "->x --> x"
 
 #data
 <script> <!-- </script> --> </script>x
 #errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+(1,8): expected-doctype-but-got-start-tag
+(1,37): unexpected-end-tag
 #document
 | <html>
 |   <head>
 |     <script>
-|       " <!-- </script> --> "
+|       " <!-- "
+|     " "
 |   <body>
-|     "x"
+|     "--> x"
 
 #data
 <title> <!-- </title> --> </title>x
 #errors
-Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
+(1,7): expected-doctype-but-got-start-tag
+(1,34): unexpected-end-tag
 #document
 | <html>
 |   <head>
 |     <title>
-|       " <!-- </title> --> "
+|       " <!-- "
+|     " "
 |   <body>
-|     "x"
+|     "--> x"
 
 #data
 <textarea> <!--- </textarea>->x</textarea> --> </textarea>x
 #errors
-Line: 1 Col: 10 Unexpected start tag (textarea). Expected DOCTYPE.
+(1,10): expected-doctype-but-got-start-tag
+(1,42): unexpected-end-tag
+(1,58): unexpected-end-tag
 #document
 | <html>
 |   <head>
 |   <body>
 |     <textarea>
-|       " <!--- </textarea>->x</textarea> --> "
-|     "x"
+|       " <!--- "
+|     "->x --> x"
 
 #data
 <style> <!</-- </style>x
 #errors
-Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
+(1,7): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
@@ -119,9 +129,20 @@ Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
 |     "x"
 
 #data
+<p><xmp></xmp>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|     <xmp>
+
+#data
 <xmp> <!-- > --> </xmp>
 #errors
-Line: 1 Col: 5 Unexpected start tag (xmp). Expected DOCTYPE.
+(1,5): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
@@ -132,7 +153,7 @@ Line: 1 Col: 5 Unexpected start tag (xmp). Expected DOCTYPE.
 #data
 <title>&</title>
 #errors
-Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
+(1,7): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
@@ -143,33 +164,34 @@ Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
 #data
 <title><!--&--></title>
 #errors
-Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
+(1,7): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |     <title>
-|       "<!--&-->"
+|       "<!--&-->"
 |   <body>
 
 #data
 <title><!--</title>
 #errors
-Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
-Line: 1 Col: 19 Unexpected end of file. Expected end tag (title).
+(1,7): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
 |     <title>
-|       "<!--</title>"
+|       "<!--"
 |   <body>
 
 #data
 <noscript><!--</noscript>--></noscript>
 #errors
-Line: 1 Col: 10 Unexpected start tag (noscript). Expected DOCTYPE.
+(1,10): expected-doctype-but-got-start-tag
+(1,39): unexpected-end-tag
 #document
 | <html>
 |   <head>
 |     <noscript>
-|       "<!--</noscript>-->"
+|       "<!--"
 |   <body>
+|     "-->"
diff --git a/test/tokeniser2.c b/test/tokeniser2.c
index 3024e81..7c56aeb 100644
--- a/test/tokeniser2.c
+++ b/test/tokeniser2.c
@@ -177,15 +177,18 @@ void run_test(context *ctx)
 				(struct json_object *)
 				array_list_get_idx(ctx->content_model, i));
 
-			if (strcmp(cm, "PCDATA") == 0) {
+			if (strcmp(cm, "PCDATA state") == 0) {
 				params.content_model.model =
 						HUBBUB_CONTENT_MODEL_PCDATA;
-			} else if (strcmp(cm, "RCDATA") == 0) {
+			} else if (strcmp(cm, "RCDATA state") == 0) {
 				params.content_model.model =
 						HUBBUB_CONTENT_MODEL_RCDATA;
-			} else if (strcmp(cm, "CDATA") == 0) {
+			} else if (strcmp(cm, "CDATA state") == 0) {
 				params.content_model.model =
 						HUBBUB_CONTENT_MODEL_CDATA;
+			} else if (strcmp(cm, "RAWTEXT state") == 0) {
+				params.content_model.model =
+						HUBBUB_CONTENT_MODEL_RAWTEXT;
 			} else {
 				params.content_model.model =
 					HUBBUB_CONTENT_MODEL_PLAINTEXT;
diff --git a/test/tokeniser3.c b/test/tokeniser3.c
index c4c5231..a68e0ba 100644
--- a/test/tokeniser3.c
+++ b/test/tokeniser3.c
@@ -175,15 +175,18 @@ void run_test(context *ctx)
 				(struct json_object *)
 				array_list_get_idx(ctx->content_model, i));
 
-			if (strcmp(cm, "PCDATA") == 0) {
+			if (strcmp(cm, "PCDATA state") == 0) {
 				params.content_model.model =
 						HUBBUB_CONTENT_MODEL_PCDATA;
-			} else if (strcmp(cm, "RCDATA") == 0) {
+			} else if (strcmp(cm, "RCDATA state") == 0) {
 				params.content_model.model =
 						HUBBUB_CONTENT_MODEL_RCDATA;
-			} else if (strcmp(cm, "CDATA") == 0) {
+			} else if (strcmp(cm, "CDATA state") == 0) {
 				params.content_model.model =
 						HUBBUB_CONTENT_MODEL_CDATA;
+			} else if (strcmp(cm, "RAWTEXT state") == 0) {
+				params.content_model.model =
+						HUBBUB_CONTENT_MODEL_RAWTEXT;
 			} else {
 				params.content_model.model =
 					HUBBUB_CONTENT_MODEL_PLAINTEXT;
-- 
1.8.3.2
 
No comments:
Post a Comment