Monday, 31 March 2014

[PATCH 08/10] adding rawtext without fixing treebuilder

---
include/hubbub/types.h | 3 +-
src/tokeniser/tokeniser.c | 11 ++---
test/data/tokeniser2/INDEX | 2 +-
test/data/tree-construction/tests5.dat | 84 +++++++++++++++++++++-------------
test/tokeniser2.c | 9 ++--
test/tokeniser3.c | 9 ++--
6 files changed, 72 insertions(+), 46 deletions(-)

diff --git a/include/hubbub/types.h b/include/hubbub/types.h
index e5c208b..6e2b1a9 100644
--- a/include/hubbub/types.h
+++ b/include/hubbub/types.h
@@ -33,7 +33,8 @@ typedef enum hubbub_content_model {
HUBBUB_CONTENT_MODEL_PCDATA,
HUBBUB_CONTENT_MODEL_RCDATA,
HUBBUB_CONTENT_MODEL_CDATA,
- HUBBUB_CONTENT_MODEL_PLAINTEXT
+ HUBBUB_CONTENT_MODEL_PLAINTEXT,
+ HUBBUB_CONTENT_MODEL_RAWTEXT
} hubbub_content_model;

/**
diff --git a/src/tokeniser/tokeniser.c b/src/tokeniser/tokeniser.c
index 3087ac8..4f87287 100644
--- a/src/tokeniser/tokeniser.c
+++ b/src/tokeniser/tokeniser.c
@@ -689,8 +689,6 @@ hubbub_error hubbub_tokeniser_handle_data(hubbub_tokeniser *tokeniser)
} else if (c == '-' &&
tokeniser->escape_flag == false &&
(tokeniser->content_model ==
- HUBBUB_CONTENT_MODEL_RCDATA ||
- tokeniser->content_model ==
HUBBUB_CONTENT_MODEL_CDATA) &&
tokeniser->context.pending >= 3) {
size_t ignore;
@@ -712,6 +710,8 @@ hubbub_error hubbub_tokeniser_handle_data(hubbub_tokeniser *tokeniser)
HUBBUB_CONTENT_MODEL_PCDATA ||
((tokeniser->content_model ==
HUBBUB_CONTENT_MODEL_RCDATA ||
+ tokeniser->content_model ==
+ HUBBUB_CONTENT_MODEL_RAWTEXT ||
tokeniser->content_model ==
HUBBUB_CONTENT_MODEL_CDATA) &&
tokeniser->escape_flag == false))) {
@@ -899,6 +899,7 @@ hubbub_error hubbub_tokeniser_handle_tag_open(hubbub_tokeniser *tokeniser)

tokeniser->state = STATE_CLOSE_TAG_OPEN;
} else if (tokeniser->content_model == HUBBUB_CONTENT_MODEL_RCDATA ||
+ tokeniser->content_model == HUBBUB_CONTENT_MODEL_RAWTEXT ||
tokeniser->content_model ==
HUBBUB_CONTENT_MODEL_CDATA) {
/* Return to data state with '<' still in "chars" */
@@ -971,6 +972,7 @@ hubbub_error hubbub_tokeniser_handle_close_tag_open(hubbub_tokeniser *tokeniser)
/**\todo fragment case */

if (tokeniser->content_model == HUBBUB_CONTENT_MODEL_RCDATA ||
+ tokeniser->content_model == HUBBUB_CONTENT_MODEL_RAWTEXT ||
tokeniser->content_model ==
HUBBUB_CONTENT_MODEL_CDATA) {
uint8_t *start_tag_name =
@@ -3004,7 +3006,6 @@ hubbub_error hubbub_tokeniser_handle_numbered_entity(
ctx->match_entity.length += len;
} else {
ctx->match_entity.base = 10;
- printf("base 10\n");
}
}

@@ -3041,7 +3042,6 @@ hubbub_error hubbub_tokeniser_handle_numbered_entity(

if (ctx->match_entity.numeric_state.ucs4 > 0x10FFFF) {
ctx->match_entity.overflow = true;
- printf("overflow\n");
}
}

@@ -3061,11 +3061,9 @@ hubbub_error hubbub_tokeniser_handle_numbered_entity(

if (0x80 <= cp && cp <= 0x9F) {
cp = cp1252Table[cp - 0x80];
- printf("converting1\n");
} else if (ctx->match_entity.overflow ||
(0xD800 <= cp && cp <= 0xDFFF) ||
(cp == 0x00)) {
- printf("converting\n");
cp = 0xFFFD;
} else if((0x0001<=cp && cp <= 0x0008) ||
(0x000D <= cp && cp <= 0x001F) ||
@@ -3074,7 +3072,6 @@ hubbub_error hubbub_tokeniser_handle_numbered_entity(
(cp ==0x000B) ||
((cp & 0xFFFE) == 0xFFFE) ||
((cp & 0xFFFF) == 0xFFFF) ){
- printf("converting\n");
/* the check for cp > 0x10FFFF per spec is performed
* in the loop above to avoid overflow */
}
diff --git a/test/data/tokeniser2/INDEX b/test/data/tokeniser2/INDEX
index 9b165c0..9ff8596 100644
--- a/test/data/tokeniser2/INDEX
+++ b/test/data/tokeniser2/INDEX
@@ -7,7 +7,7 @@ test2.test html5lib tests (part 2)
test3.test html5lib tests (part 3)
test4.test html5lib tests (part 4)
entities.test html5lib entity tests
-#escapeFlag.test html5lib escape flag tests
+escapeFlag.test html5lib escape flag tests
numericEntities.test html5lib numeric entities tests
unicodeChars.test html5lib unicode character tests
#unicodeCharsProblematic.test html5lib problematic unicode character tests
diff --git a/test/data/tree-construction/tests5.dat b/test/data/tree-construction/tests5.dat
index 2c95031..4d5fcd7 100644
--- a/test/data/tree-construction/tests5.dat
+++ b/test/data/tree-construction/tests5.dat
@@ -1,31 +1,33 @@
#data
<style> <!-- </style>x
#errors
-Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
-Line: 1 Col: 22 Unexpected end of file. Expected end tag (style).
+(1,7): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <style>
-| " <!-- </style>x"
+| " <!-- "
| <body>
+| "x"

#data
<style> <!-- </style> --> </style>x
#errors
-Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
+(1,7): expected-doctype-but-got-start-tag
+(1,34): unexpected-end-tag
#document
| <html>
| <head>
| <style>
-| " <!-- </style> --> "
+| " <!-- "
+| " "
| <body>
-| "x"
+| "--> x"

#data
<style> <!--> </style>x
#errors
-Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
+(1,7): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
@@ -37,7 +39,7 @@ Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
#data
<style> <!---> </style>x
#errors
-Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
+(1,7): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
@@ -49,7 +51,7 @@ Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
#data
<iframe> <!---> </iframe>x
#errors
-Line: 1 Col: 8 Unexpected start tag (iframe). Expected DOCTYPE.
+(1,8): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
@@ -61,55 +63,63 @@ Line: 1 Col: 8 Unexpected start tag (iframe). Expected DOCTYPE.
#data
<iframe> <!--- </iframe>->x</iframe> --> </iframe>x
#errors
-Line: 1 Col: 8 Unexpected start tag (iframe). Expected DOCTYPE.
+(1,8): expected-doctype-but-got-start-tag
+(1,36): unexpected-end-tag
+(1,50): unexpected-end-tag
#document
| <html>
| <head>
| <body>
| <iframe>
-| " <!--- </iframe>->x</iframe> --> "
-| "x"
+| " <!--- "
+| "->x --> x"

#data
<script> <!-- </script> --> </script>x
#errors
-Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+(1,8): expected-doctype-but-got-start-tag
+(1,37): unexpected-end-tag
#document
| <html>
| <head>
| <script>
-| " <!-- </script> --> "
+| " <!-- "
+| " "
| <body>
-| "x"
+| "--> x"

#data
<title> <!-- </title> --> </title>x
#errors
-Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
+(1,7): expected-doctype-but-got-start-tag
+(1,34): unexpected-end-tag
#document
| <html>
| <head>
| <title>
-| " <!-- </title> --> "
+| " <!-- "
+| " "
| <body>
-| "x"
+| "--> x"

#data
<textarea> <!--- </textarea>->x</textarea> --> </textarea>x
#errors
-Line: 1 Col: 10 Unexpected start tag (textarea). Expected DOCTYPE.
+(1,10): expected-doctype-but-got-start-tag
+(1,42): unexpected-end-tag
+(1,58): unexpected-end-tag
#document
| <html>
| <head>
| <body>
| <textarea>
-| " <!--- </textarea>->x</textarea> --> "
-| "x"
+| " <!--- "
+| "->x --> x"

#data
<style> <!</-- </style>x
#errors
-Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
+(1,7): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
@@ -119,9 +129,20 @@ Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
| "x"

#data
+<p><xmp></xmp>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+#document
+| <html>
+| <head>
+| <body>
+| <p>
+| <xmp>
+
+#data
<xmp> <!-- > --> </xmp>
#errors
-Line: 1 Col: 5 Unexpected start tag (xmp). Expected DOCTYPE.
+(1,5): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
@@ -132,7 +153,7 @@ Line: 1 Col: 5 Unexpected start tag (xmp). Expected DOCTYPE.
#data
<title>&amp;</title>
#errors
-Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
+(1,7): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
@@ -143,33 +164,34 @@ Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
#data
<title><!--&amp;--></title>
#errors
-Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
+(1,7): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <title>
-| "<!--&amp;-->"
+| "<!--&-->"
| <body>

#data
<title><!--</title>
#errors
-Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
-Line: 1 Col: 19 Unexpected end of file. Expected end tag (title).
+(1,7): expected-doctype-but-got-start-tag
#document
| <html>
| <head>
| <title>
-| "<!--</title>"
+| "<!--"
| <body>

#data
<noscript><!--</noscript>--></noscript>
#errors
-Line: 1 Col: 10 Unexpected start tag (noscript). Expected DOCTYPE.
+(1,10): expected-doctype-but-got-start-tag
+(1,39): unexpected-end-tag
#document
| <html>
| <head>
| <noscript>
-| "<!--</noscript>-->"
+| "<!--"
| <body>
+| "-->"
diff --git a/test/tokeniser2.c b/test/tokeniser2.c
index 3024e81..7c56aeb 100644
--- a/test/tokeniser2.c
+++ b/test/tokeniser2.c
@@ -177,15 +177,18 @@ void run_test(context *ctx)
(struct json_object *)
array_list_get_idx(ctx->content_model, i));

- if (strcmp(cm, "PCDATA") == 0) {
+ if (strcmp(cm, "PCDATA state") == 0) {
params.content_model.model =
HUBBUB_CONTENT_MODEL_PCDATA;
- } else if (strcmp(cm, "RCDATA") == 0) {
+ } else if (strcmp(cm, "RCDATA state") == 0) {
params.content_model.model =
HUBBUB_CONTENT_MODEL_RCDATA;
- } else if (strcmp(cm, "CDATA") == 0) {
+ } else if (strcmp(cm, "CDATA state") == 0) {
params.content_model.model =
HUBBUB_CONTENT_MODEL_CDATA;
+ } else if (strcmp(cm, "RAWTEXT state") == 0) {
+ params.content_model.model =
+ HUBBUB_CONTENT_MODEL_RAWTEXT;
} else {
params.content_model.model =
HUBBUB_CONTENT_MODEL_PLAINTEXT;
diff --git a/test/tokeniser3.c b/test/tokeniser3.c
index c4c5231..a68e0ba 100644
--- a/test/tokeniser3.c
+++ b/test/tokeniser3.c
@@ -175,15 +175,18 @@ void run_test(context *ctx)
(struct json_object *)
array_list_get_idx(ctx->content_model, i));

- if (strcmp(cm, "PCDATA") == 0) {
+ if (strcmp(cm, "PCDATA state") == 0) {
params.content_model.model =
HUBBUB_CONTENT_MODEL_PCDATA;
- } else if (strcmp(cm, "RCDATA") == 0) {
+ } else if (strcmp(cm, "RCDATA state") == 0) {
params.content_model.model =
HUBBUB_CONTENT_MODEL_RCDATA;
- } else if (strcmp(cm, "CDATA") == 0) {
+ } else if (strcmp(cm, "CDATA state") == 0) {
params.content_model.model =
HUBBUB_CONTENT_MODEL_CDATA;
+ } else if (strcmp(cm, "RAWTEXT state") == 0) {
+ params.content_model.model =
+ HUBBUB_CONTENT_MODEL_RAWTEXT;
} else {
params.content_model.model =
HUBBUB_CONTENT_MODEL_PLAINTEXT;
--
1.8.3.2

No comments:

Post a Comment