From bd11bbb5d322310a1591dc3fa0439266840140b9 Mon Sep 17 00:00:00 2001 From: Blake Felt Date: Wed, 24 Apr 2019 22:28:00 +0000 Subject: [PATCH] Added ability to parse attributes. --- src/xml.c | 158 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- src/xml.h | 22 ++++++++ 2 files changed, 176 insertions(+), 4 deletions(-) diff --git a/src/xml.c b/src/xml.c index 34cd770..345a453 100644 --- a/src/xml.c +++ b/src/xml.c @@ -49,12 +49,23 @@ struct xml_string { /** * [OPAQUE API] * - * An xml_node will always contain a tag name and a 0-terminated list of - * children. Moreover it may contain text content. + * An xml_attribute may contain text content. + */ +struct xml_attribute { + struct xml_string* name; + struct xml_string* content; +}; + +/** + * [OPAQUE API] + * + * An xml_node will always contain a tag name, a 0-terminated list of attributes + * and a 0-terminated list of children. Moreover it may contain text content. */ struct xml_node { struct xml_string* name; struct xml_string* content; + struct xml_attribute** attributes; struct xml_node** children; }; @@ -175,6 +186,21 @@ static void xml_string_free(struct xml_string* string) { +/** + * [PRIVATE] + * + * Frees the resources allocated by the attribute + */ +static void xml_attribute_free(struct xml_attribute* attribute) { + if(attribute->name) { + xml_string_free(attribute->name); + } + if(attribute->content) { + xml_string_free(attribute->content); + } + free(attribute); +} + /** * [PRIVATE] * @@ -187,6 +213,13 @@ static void xml_node_free(struct xml_node* node) { xml_string_free(node->content); } + struct xml_attribute** at = node->attributes; + while(*at) { + xml_attribute_free(*at); + ++at; + } + free(node->attributes); + struct xml_node** it = node->children; while (*it) { xml_node_free(*it); @@ -337,6 +370,82 @@ static void xml_skip_whitespace(struct xml_parser* parser) { +/** + * [PRIVATE] + * + * Finds and creates all attributes on the given node. + * + * @author Blake Felt + * @see https://github.com/Molorius + */ +static struct xml_attribute** xml_find_attributes(struct xml_parser* parser, struct xml_string* tag_open) { + xml_parser_info(parser, "find_attributes"); + char* tmp; + char* rest = NULL; + char* token; + char* str_name; + char* str_content; + const unsigned char* start_name; + const unsigned char* start_content; + size_t old_elements; + size_t new_elements; + struct xml_attribute* new_attribute; + struct xml_attribute** attributes; + int position; + + attributes = calloc(1, sizeof(struct xml_attribute*)); + attributes[0] = 0; + + tmp = (char*) xml_string_clone(tag_open); + + token = strtok_r(tmp, " ", &rest); // skip the first value + if(token == NULL) { + goto cleanup; + } + tag_open->length = strlen(token); + + for(token=strtok_r(NULL," ", &rest); token!=NULL; token=strtok_r(NULL," ", &rest)) { + str_name = malloc(strlen(token)+1); + str_content = malloc(strlen(token)+1); + // %s=\"%s\" wasn't working for some reason, ugly hack to make it work + if(sscanf(token, "%[^=]=\"%[^\"]", str_name, str_content) != 2) { + if(sscanf(token, "%[^=]=\'%[^\']", str_name, str_content) != 2) { + free(str_name); + free(str_content); + continue; + } + } + position = token-tmp; + start_name = &tag_open->buffer[position]; + start_content = &tag_open->buffer[position + strlen(str_name) + 2]; + + new_attribute = malloc(sizeof(struct xml_attribute)); + new_attribute->name = malloc(sizeof(struct xml_string)); + new_attribute->name->buffer = (unsigned char*)start_name; + new_attribute->name->length = strlen(str_name); + new_attribute->content = malloc(sizeof(struct xml_string)); + new_attribute->content->buffer = (unsigned char*)start_content; + new_attribute->content->length = strlen(str_content); + + old_elements = get_zero_terminated_array_elements(attributes); + new_elements = old_elements + 1; + attributes = realloc(attributes, (new_elements+1)*sizeof(struct xml_attributes*)); + + attributes[new_elements-1] = new_attribute; + attributes[new_elements] = 0; + + + free(str_name); + free(str_content); + } + +cleanup: + free(tmp); + return attributes; +} + + + /** * [PRIVATE] * @@ -531,6 +640,9 @@ static struct xml_node* xml_parse_node(struct xml_parser* parser) { struct xml_string* tag_close = 0; struct xml_string* content = 0; + size_t original_length; + struct xml_attribute** attributes; + struct xml_node** children = calloc(1, sizeof(struct xml_node*)); children[0] = 0; @@ -543,11 +655,13 @@ static struct xml_node* xml_parse_node(struct xml_parser* parser) { goto exit_failure; } + original_length = tag_open->length; + attributes = xml_find_attributes(parser, tag_open); + /* If tag ends with `/' it's self closing, skip content lookup */ - if (tag_open->length > 0 && '/' == tag_open->buffer[tag_open->length - 1]) { + if (tag_open->length > 0 && '/' == tag_open->buffer[original_length - 1]) { /* Drop `/' */ - --tag_open->length; goto node_creation; } @@ -612,6 +726,7 @@ node_creation:; struct xml_node* node = malloc(sizeof(struct xml_node)); node->name = tag_open; node->content = content; + node->attributes = attributes; node->children = children; return node; @@ -795,6 +910,41 @@ struct xml_node* xml_node_child(struct xml_node* node, size_t child) { +/** + * [PUBLIC API] + */ +size_t xml_node_attributes(struct xml_node* node) { + return get_zero_terminated_array_elements(node->attributes); +} + + + +/** + * [PUBLIC API] + */ +struct xml_string* xml_node_attribute_name(struct xml_node* node, size_t attribute) { + if(attribute >= xml_node_attributes(node)) { + return 0; + } + + return node->attributes[attribute]->name; +} + + + +/** + * [PUBLIC API] + */ +struct xml_string* xml_node_attribute_content(struct xml_node* node, size_t attribute) { + if(attribute >= xml_node_attributes(node)) { + return 0; + } + + return node->attributes[attribute]->content; +} + + + /** * [PUBLIC API] */ diff --git a/src/xml.h b/src/xml.h index 2559a63..6217a12 100644 --- a/src/xml.h +++ b/src/xml.h @@ -40,6 +40,7 @@ extern "C" { */ struct xml_document; struct xml_node; +struct xml_attribute; /** * Internal character sequence representation @@ -125,6 +126,27 @@ struct xml_node* xml_node_child(struct xml_node* node, size_t child); +/** + * @return Number of attribute nodes + */ +size_t xml_node_attributes(struct xml_node* node); + + + +/** + * @return the n-th attribute name or 0 if out of range + */ +struct xml_string* xml_node_attribute_name(struct xml_node* node, size_t attribute); + + + +/** + * @return the n-th attribute content or 0 if out of range + */ +struct xml_string* xml_node_attribute_content(struct xml_node* node, size_t attribute); + + + /** * @return The node described by the path or 0 if child cannot be found * @warning Each element on the way must be unique