Wednesday, 12 March 2014

[PATCH 04/11] Fix tokeniser test executer for content model flag change and segfault on no doctype name.

---
test/testutils.h | 19 +++++++++++++++++++
test/tokeniser2.c | 51 +++++++++++++++++++++++++++++----------------------
test/tokeniser3.c | 48 +++++++++++++++++++++++++++---------------------
3 files changed, 75 insertions(+), 43 deletions(-)

diff --git a/test/testutils.h b/test/testutils.h
index 45870f9..fa159d6 100644
--- a/test/testutils.h
+++ b/test/testutils.h
@@ -63,6 +63,7 @@ typedef bool (*line_func)(const char *data, size_t datalen, void *pw);
static size_t parse_strlen(const char *str, size_t limit);
bool parse_testfile(const char *filename, line_func callback, void *pw);
size_t parse_filesize(const char *filename);
+size_t n_str(const char *str);

/**
* Testcase datafile parser driver
@@ -147,6 +148,24 @@ size_t parse_filesize(const char *filename)
return len;
}

+/**
+ * Utility string length measurer; assumes strings are '\0' terminated
+ *
+ * \param str String to measure length of
+ * \return String length
+ */
+size_t n_str(const char *str)
+{
+ size_t len = 0;
+
+ if (str == NULL)
+ return 0;
+
+ for (; *str++;len++);
+
+ return len;
+}
+

#ifndef strndup
char *my_strndup(const char *s, size_t n);
diff --git a/test/tokeniser2.c b/test/tokeniser2.c
index c8ab9c0..db7c8f8 100644
--- a/test/tokeniser2.c
+++ b/test/tokeniser2.c
@@ -14,6 +14,8 @@

#include "testutils.h"

+#define strlen n_str
+
typedef struct context {
const uint8_t *pbuffer;

@@ -25,7 +27,7 @@ typedef struct context {
size_t char_off;

const char *last_start_tag;
- struct array_list *content_model;
+ struct array_list *initial_state;
bool process_cdata;
} context;

@@ -63,7 +65,7 @@ int main(int argc, char **argv)
(struct json_object *) array_list_get_idx(tests, i);

ctx.last_start_tag = NULL;
- ctx.content_model = NULL;
+ ctx.initial_state = NULL;
ctx.process_cdata = false;

/* Extract settings */
@@ -86,8 +88,8 @@ int main(int argc, char **argv)
} else if (strcmp(key, "lastStartTag") == 0) {
ctx.last_start_tag = (const char *)
json_object_get_string(val);
- } else if (strcmp(key, "contentModelFlags") == 0) {
- ctx.content_model =
+ } else if (strcmp(key, "initialStates") == 0) {
+ ctx.initial_state =
json_object_get_array(val);
} else if (strcmp(key, "processCDATA") == 0) {
ctx.process_cdata =
@@ -114,10 +116,10 @@ void run_test(context *ctx)
int i, max_i;
struct array_list *outputsave = ctx->output;

- if (ctx->content_model == NULL) {
+ if (ctx->initial_state == NULL) {
max_i = 1;
} else {
- max_i = array_list_length(ctx->content_model);
+ max_i = array_list_length(ctx->initial_state);
}

/* We test for each of the content models specified */
@@ -161,30 +163,34 @@ void run_test(context *ctx)
HUBBUB_TOKENISER_TOKEN_HANDLER,
&params) == HUBBUB_OK);

- if (ctx->content_model == NULL) {
- params.content_model.model =
- HUBBUB_CONTENT_MODEL_PCDATA;
+ if (ctx->initial_state == NULL) {
+ params.initial_state.state =
+ HUBBUB_INITIAL_STATE_DATA;
} else {
const char *cm = json_object_get_string(
(struct json_object *)
- array_list_get_idx(ctx->content_model, i));
+ array_list_get_idx(ctx->initial_state, i));

if (strcmp(cm, "PCDATA") == 0) {
- params.content_model.model =
- HUBBUB_CONTENT_MODEL_PCDATA;
- } else if (strcmp(cm, "RCDATA") == 0) {
- params.content_model.model =
- HUBBUB_CONTENT_MODEL_RCDATA;
- } else if (strcmp(cm, "CDATA") == 0) {
- params.content_model.model =
- HUBBUB_CONTENT_MODEL_CDATA;
+ params.initial_state.state =
+ HUBBUB_INITIAL_STATE_DATA;
+ } else if (strcmp(cm, "RCDATA state") == 0) {
+ params.initial_state.state =
+ HUBBUB_INITIAL_STATE_RCDATA;
+ } else if (strcmp(cm, "CDATA state") == 0) {
+ params.initial_state.state =
+ HUBBUB_INITIAL_STATE_CDATA;
+ } else if (strcmp(cm, "RAWTEXT state") == 0) {
+ params.initial_state.state =
+ HUBBUB_INITIAL_STATE_RAWTEXT;
} else {
- params.content_model.model =
- HUBBUB_CONTENT_MODEL_PLAINTEXT;
+ params.initial_state.state =
+ HUBBUB_INITIAL_STATE_PLAINTEXT;
}
}
+
assert(hubbub_tokeniser_setopt(tok,
- HUBBUB_TOKENISER_CONTENT_MODEL,
+ HUBBUB_TOKENISER_INITIAL_STATE,
&params) == HUBBUB_OK);

assert(parserutils_inputstream_append(stream,
@@ -301,7 +307,8 @@ hubbub_error token_handler(const hubbub_token *token, void *pw)
gotsys,
(int) token->data.doctype.system_id.len);
}
-
+ printf(":%d: :%d:\n", (int)token->data.doctype.name.len, (int) strlen(expname));
+ printf(":%s: :%s:\n", gotname, expname);
assert(token->data.doctype.name.len == strlen(expname));
assert(strncmp(gotname, expname, strlen(expname)) == 0);

diff --git a/test/tokeniser3.c b/test/tokeniser3.c
index 949ddd0..7ce2602 100644
--- a/test/tokeniser3.c
+++ b/test/tokeniser3.c
@@ -14,6 +14,8 @@

#include "testutils.h"

+#define strlen n_str
+
typedef struct context {
const uint8_t *input;
size_t input_len;
@@ -23,7 +25,7 @@ typedef struct context {
size_t char_off;

const char *last_start_tag;
- struct array_list *content_model;
+ struct array_list *initial_state;
bool process_cdata;
} context;

@@ -61,7 +63,7 @@ int main(int argc, char **argv)
(struct json_object *) array_list_get_idx(tests, i);

ctx.last_start_tag = NULL;
- ctx.content_model = NULL;
+ ctx.initial_state = NULL;
ctx.process_cdata = false;

/* Extract settings */
@@ -85,8 +87,8 @@ int main(int argc, char **argv)
} else if (strcmp(key, "lastStartTag") == 0) {
ctx.last_start_tag = (const char *)
json_object_get_string(val);
- } else if (strcmp(key, "contentModelFlags") == 0) {
- ctx.content_model =
+ } else if (strcmp(key, "initialStates") == 0) {
+ ctx.initial_state =
json_object_get_array(val);
} else if (strcmp(key, "processCDATA") == 0) {
ctx.process_cdata =
@@ -112,10 +114,10 @@ void run_test(context *ctx)
size_t j;
struct array_list *outputsave = ctx->output;

- if (ctx->content_model == NULL) {
+ if (ctx->initial_state == NULL) {
max_i = 1;
} else {
- max_i = array_list_length(ctx->content_model);
+ max_i = array_list_length(ctx->initial_state);
}

/* We test for each of the content models specified */
@@ -159,30 +161,34 @@ void run_test(context *ctx)
HUBBUB_TOKENISER_TOKEN_HANDLER,
&params) == HUBBUB_OK);

- if (ctx->content_model == NULL) {
- params.content_model.model =
- HUBBUB_CONTENT_MODEL_PCDATA;
+ if (ctx->initial_state == NULL) {
+ params.initial_state.state =
+ HUBBUB_INITIAL_STATE_DATA;
} else {
const char *cm = json_object_get_string(
(struct json_object *)
- array_list_get_idx(ctx->content_model, i));
+ array_list_get_idx(ctx->initial_state, i));

if (strcmp(cm, "PCDATA") == 0) {
- params.content_model.model =
- HUBBUB_CONTENT_MODEL_PCDATA;
- } else if (strcmp(cm, "RCDATA") == 0) {
- params.content_model.model =
- HUBBUB_CONTENT_MODEL_RCDATA;
- } else if (strcmp(cm, "CDATA") == 0) {
- params.content_model.model =
- HUBBUB_CONTENT_MODEL_CDATA;
+ params.initial_state.state =
+ HUBBUB_INITIAL_STATE_DATA;
+ } else if (strcmp(cm, "RCDATA state") == 0) {
+ params.initial_state.state =
+ HUBBUB_INITIAL_STATE_RCDATA;
+ } else if (strcmp(cm, "CDATA state") == 0) {
+ params.initial_state.state =
+ HUBBUB_INITIAL_STATE_CDATA;
+ } else if (strcmp(cm, "RAWTEXT state") == 0) {
+ params.initial_state.state =
+ HUBBUB_INITIAL_STATE_RAWTEXT;
} else {
- params.content_model.model =
- HUBBUB_CONTENT_MODEL_PLAINTEXT;
+ params.initial_state.state =
+ HUBBUB_INITIAL_STATE_PLAINTEXT;
}
}
+
assert(hubbub_tokeniser_setopt(tok,
- HUBBUB_TOKENISER_CONTENT_MODEL,
+ HUBBUB_TOKENISER_INITIAL_STATE,
&params) == HUBBUB_OK);

printf("Input: '%.*s' (%d)\n", (int) ctx->input_len,
--
1.8.3.2

No comments:

Post a Comment