diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bdc5af0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*~ +build diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..7d36f39 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,41 @@ +# Project setup +PROJECT(xml) +CMAKE_MINIMUM_REQUIRED(VERSION 2.6.0 FATAL_ERROR) + + +# Compiler setup +SET(CMAKE_C_FLAGS_DEBUG "-DDEBUG") +SET(CMAKE_C_FLAGS_RELEASE "-O2") + +#IF (${CMAKE_BUILD_TYPE} strequal "Debug") +# ADD_DEFINITIONS(-DDEBUG) +# +#ELSE (DEFINED ${DEBUG_BUILD}) +# SET(CMAKE_BUILD_TYPE ${CMAKE_C_FLAGS_RELEASE}) +#ENDIF (DEFINED ${DEBUG_BUILD}) + + +# Sources +SET(SOURCE_DIRECTORY src) +SET(TEST_SOURCE_DIRECTORY test) + + +# Build library +ADD_LIBRARY(xml STATIC + ${SOURCE_DIRECTORY}/xml.c +) + + +# Build unit cases +INCLUDE_DIRECTORIES(${SOURCE_DIRECTORY}) + +ADD_EXECUTABLE(test-xml + ${TEST_SOURCE_DIRECTORY}/test-xml +) +TARGET_LINK_LIBRARIES(test-xml xml) + + +# Deploy +INSTALL(TARGETS xml DESTINATION lib) +INSTALL(FILES DESTINATION) + diff --git a/src/xml.c b/src/xml.c new file mode 100644 index 0000000..863c5d7 --- /dev/null +++ b/src/xml.c @@ -0,0 +1,736 @@ +/** + * Copyright (c) 2012 ooxi/xml.c + * https://github.com/ooxi/xml.c + * + * This software is provided 'as-is', without any express or implied warranty. + * In no event will the authors be held liable for any damages arising from the + * use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software in a + * product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +#include +#include +#include +#include +#include "xml.h" + + + + + +/** + * [OPAQUE API] + * + * UTF-8 text + */ +struct xml_string { + uint8_t* buffer; + size_t length; +}; + +/** + * [OPAQUE API] + * + * An xml_node will always contain a tag name and a 0-terminated list of + * children. Moreover it may contain text content. + */ +struct xml_node { + struct xml_string* name; + struct xml_string* content; + struct xml_node** children; +}; + +/** + * [OPAQUE API] + * + * An xml_document simply contains the root node and the underlying buffer + */ +struct xml_document { + struct xml_string buffer; + struct xml_node* root; +}; + + + + + +/** + * [PRIVATE] + * + * Parser context + */ +struct xml_parser { + uint8_t* buffer; + size_t position; + size_t length; +}; + +/** + * [PRIVATE] + * + * Character offsets + */ +enum xml_parser_offset { + NO_CHARACTER = -1, + CURRENT_CHARACTER = 0, + NEXT_CHARACTER = 1, +}; + + + + + +/** + * [PRIVATE] + * + * @return Number of elements in 0-terminated array + */ +static size_t get_zero_terminated_array_elements(struct xml_node** nodes) { + size_t elements = 0; + + while (nodes[elements]) { + ++elements; + } + + return elements; +} + + + +/** + * [PRIVATE] + * + * @warning No UTF conversions will be attempted + * + * @return true gdw. a == b + */ +static _Bool xml_string_equals(struct xml_string* a, struct xml_string* b) { + _Bool const true = 1; + _Bool const false = 0; + + if (a->length != b->length) { + return false; + } + + size_t i = 0; for (; i < a->length; ++i) { + if (a->buffer[i] != b->buffer[i]) { + return false; + } + } + + return true; +} + + + +/** + * [PRIVATE] + * + * Frees the resources allocated by the string + * + * @waring `buffer` must _not_ be freed, since it is a reference to the + * document's buffer + */ +static void xml_string_free(struct xml_string* string) { + free(string); +} + + + +/** + * [PRIVATE] + * + * Frees the resources allocated by the node + */ +static void xml_node_free(struct xml_node* node) { + xml_string_free(node->name); + + if (node->content) { + xml_string_free(node->content); + } + + struct xml_node** it = node->children; + while (*it) { + xml_node_free(*it); + ++it; + } + free(node->children); + + free(node); +} + + + +/** + * [PRIVATE] + * + * Echos the parsers call stack for debugging purposes + */ +#ifdef DEBUG +static void xml_parser_info(struct xml_parser* parser, char const* message) { + fprintf(stdout, "xml_parser_info %s\n", message); +} +#else +#define xml_parser_info(parser, message) {} +#endif + + + +/** + * [PRIVATE] + * + * Echos an error regarding the parser's source to the console + */ +static void xml_parser_error(struct xml_parser* parser, enum xml_parser_offset offset, char const* message) { + int row = 0; + int column = 0; + + #define min(X,Y) ((X) < (Y) ? (X) : (Y)) + #define max(X,Y) ((X) > (Y) ? (X) : (Y)) + size_t character = max(0, min(parser->length, parser->position + offset)); + #undef min + #undef max + + size_t position = 0; for (; position < character; ++position) { + column++; + + if ('\n' == parser->buffer[position]) { + row++; + column = 0; + } + } + + if (NO_CHARACTER != offset) { + fprintf(stderr, "xml_parser_error at %i:%i (is %c): %s\n", + row + 1, column, parser->buffer[character], message + ); + } else { + fprintf(stderr, "xml_parser_error at %i:%i: %s\n", + row + 1, column, message + ); + } +} + + + +/** + * [PRIVATE] + * + * Returns the n-th not-whitespace byte in parser and 0 if such a byte does not + * exist + */ +static uint8_t xml_parser_peek(struct xml_parser* parser, size_t n) { + size_t position = parser->position; + + while (position < parser->length) { + if (!isspace(parser->buffer[position])) { + if (n == 0) { + return parser->buffer[position]; + } else { + --n; + } + } + + position++; + } + + return 0; +} + + + +/** + * [PRIVATE] + * + * Moves the parser's position n bytes. If the new position would be out of + * bounds, it will be converted to the bounds itself + */ +static void xml_parser_consume(struct xml_parser* parser, size_t n) { + + /* Debug information + */ + #ifdef DEBUG + #define min(X,Y) ((X) < (Y) ? (X) : (Y)) + char* consumed = alloca((n + 1) * sizeof(char)); + memcpy(consumed, &parser->buffer[parser->position], min(n, parser->length - parser->position)); + consumed[n] = 0; + #undef min + + size_t message_buffer_length = 512; + char* message_buffer = alloca(512 * sizeof(char)); + snprintf(message_buffer, message_buffer_length, "Consuming %li bytes \"%s\"", (long)n, consumed); + message_buffer[message_buffer_length - 1] = 0; + + xml_parser_info(parser, message_buffer); + #endif + + + /* Move the position forward + */ + parser->position += n; + + /* Don't go too far + * + * @warning Valid because parser->length must be greater than 0 + */ + if (parser->position >= parser->length) { + parser->position = parser->length - 1; + } +} + + + +/** + * [PRIVATE] + * + * Skips to the next non-whitespace character + */ +static void xml_skip_whitespace(struct xml_parser* parser) { + xml_parser_info(parser, "whitespace"); + + while (isspace(parser->buffer[parser->position])) { + if (parser->position + 1 >= parser->length) { + return; + } else { + parser->position++; + } + } +} + + + +/** + * [PRIVATE] + * + * Parses the name out of the an XML tag's ending + * + * ---( Example )--- + * tag_name> + * --- + */ +static struct xml_string* xml_parse_tag_end(struct xml_parser* parser) { + xml_parser_info(parser, "tag_end"); + size_t start = parser->position; + size_t length = 0; + + /* Parse until `>' or a whitespace is reached + */ + while (start + length < parser->length) { + uint8_t current = xml_parser_peek(parser, CURRENT_CHARACTER); + + if (('>' == current) || isspace(current)) { + break; + } else { + xml_parser_consume(parser, 1); + length++; + } + } + + /* Consume `>' + */ + if ('>' != xml_parser_peek(parser, CURRENT_CHARACTER)) { + xml_parser_error(parser, CURRENT_CHARACTER, "xml_parse_tag_end::expected tag end"); + return 0; + } + xml_parser_consume(parser, 1); + + /* Return parsed tag name + */ + struct xml_string* name = malloc(sizeof(struct xml_string)); + name->buffer = &parser->buffer[start]; + name->length = length; + return name; +} + + + +/** + * [PRIVATE] + * + * Parses an opening XML tag without attributes + * + * ---( Example )--- + * + * --- + */ +static struct xml_string* xml_parse_tag_open(struct xml_parser* parser) { + xml_parser_info(parser, "tag_open"); + xml_skip_whitespace(parser); + + /* Consume `<' + */ + if ('<' != xml_parser_peek(parser, CURRENT_CHARACTER)) { + xml_parser_error(parser, CURRENT_CHARACTER, "xml_parse_tag_open::expected opening tag"); + return 0; + } + xml_parser_consume(parser, 1); + + /* Consume tag name + */ + return xml_parse_tag_end(parser); +} + + + +/** + * [PRIVATE] + * + * Parses an closing XML tag without attributes + * + * ---( Example )--- + * + * --- + */ +static struct xml_string* xml_parse_tag_close(struct xml_parser* parser) { + xml_parser_info(parser, "tag_close"); + xml_skip_whitespace(parser); + + /* Consume `position; + size_t length = 0; + + /* Consume until `<' is reached + */ + while (start + length < parser->length) { + uint8_t current = xml_parser_peek(parser, CURRENT_CHARACTER); + + if ('<' == current) { + break; + } else { + xml_parser_consume(parser, 1); + length++; + } + } + + /* Next character must be an `<' or we have reached end of file + */ + if ('<' != xml_parser_peek(parser, CURRENT_CHARACTER)) { + xml_parser_error(parser, CURRENT_CHARACTER, "xml_parse_content::expected <"); + return 0; + } + + /* Ignore tailing whitespace + */ + while ((length > 0) && isspace(parser->buffer[start + length - 1])) { + length--; + } + + /* Return text + */ + struct xml_string* content = malloc(sizeof(struct xml_string)); + content->buffer = &parser->buffer[start]; + content->length = length; + return content; +} + + + +/** + * [PRIVATE] + * + * Parses an XML fragment node + * + * ---( Example without children )--- + * Text + * --- + * + * ---( Example with children )--- + * + * Text + * Text + * Content + * + * --- + */ +static struct xml_node* xml_parse_node(struct xml_parser* parser) { + xml_parser_info(parser, "node"); + + /* Setup variables + */ + struct xml_string* tag_open = 0; + struct xml_string* tag_close = 0; + struct xml_string* content = 0; + + struct xml_node** children = calloc(1, sizeof(struct xml_node*)); + children[0] = 0; + + + /* Parse open tag + */ + tag_open = xml_parse_tag_open(parser); + if (!tag_open) { + xml_parser_error(parser, NO_CHARACTER, "xml_parse_node::tag_open"); + goto exit_failure; + } + + + /* If the content does not start with '<', a text content is assumed + */ + if ('<' != xml_parser_peek(parser, CURRENT_CHARACTER)) { + content = xml_parse_content(parser); + + if (!content) { + xml_parser_error(parser, 0, "xml_parse_node::content"); + goto exit_failure; + } + + + /* Otherwise children are to be expected + */ + } else while ('/' != xml_parser_peek(parser, NEXT_CHARACTER)) { + + /* Parse child node + */ + struct xml_node* child = xml_parse_node(parser); + if (!child) { + xml_parser_error(parser, NEXT_CHARACTER, "xml_parse_node::child"); + goto exit_failure; + } + + /* Grow child array :) + */ + size_t old_elements = get_zero_terminated_array_elements(children); + size_t new_elements = old_elements + 1; + children = realloc(children, new_elements * sizeof(struct xml_node*)); + + /* Save child + */ + children[new_elements - 1] = child; + children[new_elements] = 0; + } + + + /* Parse close tag + */ + tag_close = xml_parse_tag_close(parser); + if (!tag_close) { + xml_parser_error(parser, NO_CHARACTER, "xml_parse_node::tag_close"); + goto exit_failure; + } + + + /* Close tag has to match open tag + */ + if (!xml_string_equals(tag_open, tag_close)) { + xml_parser_error(parser, NO_CHARACTER, "xml_parse_node::tag missmatch"); + goto exit_failure; + } + + + /* Return parsed node + */ + xml_string_free(tag_close); + + struct xml_node* node = malloc(sizeof(struct xml_node)); + node->name = tag_open; + node->content = content; + node->children = children; + return node; + + + /* A failure occured, so free all allocalted resources + */ +exit_failure: + if (tag_open) { + xml_string_free(tag_open); + } + if (tag_close) { + xml_string_free(tag_close); + } + if (content) { + xml_string_free(content); + } + + struct xml_node** it = children; + while (*it) { + xml_node_free(*it); + ++it; + } + free(children); + + return 0; +} + + + + + +/** + * [PUBLIC API] + * + * + */ +struct xml_document* xml_parse_document(uint8_t* buffer, size_t length) { + + /* Initialize parser + */ + struct xml_parser parser = { + .buffer = buffer, + .position = 0, + .length = length + }; + + /* An empty buffer can never contain a valid document + */ + if (!length) { + xml_parser_error(&parser, NO_CHARACTER, "xml_parse_document::length equals zero"); + return 0; + } + + /* Parse the root node + */ + struct xml_node* root = xml_parse_node(&parser); + if (!root) { + xml_parser_error(&parser, NO_CHARACTER, "xml_parse_document::parsing document failed"); + return 0; + } + + /* Return parsed document + */ + struct xml_document* document = malloc(sizeof(struct xml_document)); + document->buffer.buffer = buffer; + document->buffer.length = length; + document->root = root; + + return document; +} + + + +/** + * [PUBLIC API] + */ +void xml_document_free(struct xml_document* document, _Bool free_buffer) { + if (free_buffer) { + free(document->buffer.buffer); + } + free(document); +} + + + +/** + * [PUBLIC API] + */ +struct xml_node* xml_document_root(struct xml_document* document) { + return document->root; +} + + + +/** + * [PUBLIC API] + */ +struct xml_string* xml_node_name(struct xml_node* node) { + return node->name; +} + + + +/** + * [PUBLIC API] + */ +struct xml_string* xml_node_content(struct xml_node* node) { + return node->content; +} + + + +/** + * [PUBLIC API] + * + * @warning O(n) + */ +size_t xml_node_children(struct xml_node* node) { + return get_zero_terminated_array_elements(node->children); +} + + + +/** + * [PUBLIC API] + */ +struct xml_node* xml_node_child(struct xml_node* node, size_t child) { + if (child >= xml_node_children(node)) { + return 0; + } + + return node->children[child]; +} + + + +/** + * [PUBLIC API] + */ +size_t xml_string_length(struct xml_string* string) { + return string->length; +} + + + +/** + * [PUBLIC API] + */ +void xml_string_copy(struct xml_string* string, uint8_t* buffer, size_t length) { + #define min(X,Y) ((X) < (Y) ? (X) : (Y)) + length = min(length, string->length); + #undef min + + memcpy(buffer, string->buffer, length); +} + diff --git a/src/xml.h b/src/xml.h new file mode 100644 index 0000000..5ba87a9 --- /dev/null +++ b/src/xml.h @@ -0,0 +1,130 @@ +/** + * Copyright (c) 2012 ooxi/xml.c + * https://github.com/ooxi/xml.c + * + * This software is provided 'as-is', without any express or implied warranty. + * In no event will the authors be held liable for any damages arising from the + * use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software in a + * product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +#ifndef HEADER_GLTOOLKIT_XML +#define HEADER_GLTOOLKIT_XML + + +/** + * Includes + */ +#include +#include + + + +/** + * Opaque structure holding the parsed xml document + */ +struct xml_document; +struct xml_node; + +/** + * Internal character sequence representation + */ +struct xml_string; + + + +/** + * Tries to parse the XML fragment in buffer + * + * @param buffer Chunk to parse + * @param length Size of the buffer + * + * @warning `buffer` will be referenced by the document, you may not free it + * until you free the xml_document + * @warning You have to call xml_free after you finished using the document + * + * @return The parsed xml fragment iff `parsing was successful + */ +struct xml_document* xml_parse_document(uint8_t* buffer, size_t length); + + + +/** + * Frees all resources associated with the document. All xml_node and xml_string + * references obtained through the document will be invalidated + * + * @param document xml_document to free + * @param free_buffer iff true the internal buffer supplied via xml_parse_buffer + * will be freed with the `free` system call + */ +void xml_document_free(struct xml_document* document, _Bool free_buffer); + + +/** + * @return xml_node representing the document root + */ +struct xml_node* xml_document_root(struct xml_document* document); + + + +/** + * @return The xml_node's tag name + */ +struct xml_string* xml_node_name(struct xml_node* node); + + + +/** + * @return The xml_node's string content (if available, otherwise NULL) + */ +struct xml_string* xml_node_content(struct xml_node* node); + + + +/** + * @return Number of child nodes + */ +size_t xml_node_children(struct xml_node* node); + + + +/** + * @return The n-th child or 0 if out of range + */ +struct xml_node* xml_node_child(struct xml_node* node, size_t child); + + + +/** + * @return Length of the string + */ +size_t xml_string_length(struct xml_string* string); + + + +/** + * Copies the string into the supplied buffer + * + * @warning String will not be 0-terminated + * @warning Will write at most length bytes, even if the string is longer + */ +void xml_string_copy(struct xml_string* string, uint8_t* buffer, size_t length); + + + + + +#endif + diff --git a/test/test-xml.c b/test/test-xml.c new file mode 100644 index 0000000..82c2f3f --- /dev/null +++ b/test/test-xml.c @@ -0,0 +1,155 @@ +/** + * Copyright (c) 2012 ooxi/xml.c + * https://github.com/ooxi/xml.c + * + * This software is provided 'as-is', without any express or implied warranty. + * In no event will the authors be held liable for any damages arising from the + * use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software in a + * product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +#include +#include +#include + +static _Bool true = 1; +static _Bool false = 0; + + + + + +/** + * Will halt the program iff assertion fails + */ +static void _assert_that(_Bool condition, char const* message, char const* func, char const* file, int line) { + if (!condition) { + fprintf(stderr, "Assertion failed: %s, in %s (%s:%i)\n", message, func, file, line); + exit(EXIT_FAILURE); + } +} + +#define assert_that(condition, message) \ + _assert_that(condition, message, __func__, __FILE__, __LINE__) + + + +/** + * @return true iff xml string equals the c string + */ +static _Bool string_equals(struct xml_string* a, char const* b) { + size_t a_length = xml_string_length(a); + size_t b_length = strlen(b); + + uint8_t* a_buffer = alloca((a_length + 1) * sizeof(uint8_t)); + xml_string_copy(a, a_buffer, a_length); + a_buffer[a_length] = 0; + + if (a_length != b_length) { + fprintf(stderr, "string_equals: %s#%i <> %s#%i\n", a_buffer, (int)a_length, b, (int)b_length); + return false; + } + + size_t i = 0; for (; i < a_length; ++i) { + if (a_buffer[i] != b[i]) { + fprintf(stderr, "string_equals: %s <> %s\n", a_buffer, b); + return false; + } + } + + return true; +} + + + +/** + * Converts a static character array to an uint8_t data source + */ +#define SOURCE(source, content) \ + uint8_t* source = alloca(strlen(content) * sizeof(uint8_t)); \ + { size_t i = 0; for (; i < strlen(content); ++i) { \ + source[i] = content[i]; \ + } \ + } + + + +/** + * Tries to parse a simple document containing only one tag + */ +static void test_xml_parse_document_0() { + SOURCE(source, "World"); + + struct xml_document* document = xml_parse_document(source, strlen(source)); + assert_that(document, "Could not parse document"); + + struct xml_node* root = xml_document_root(document); + assert_that(string_equals(xml_node_name(root), "Hello"), "root node name must be `Hello'"); + assert_that(string_equals(xml_node_content(root), "World"), "root node content must be `World'"); + + xml_document_free(document, false); +} + +/** + * Tries to parse a document containing multiple tags + */ +static void test_xml_parse_document_1() { + SOURCE(source, "" + "\n" + "\t\n" + "\t\tFirst content\n" + "\t\n" + "\t\n" + "\t\tSecond content\n" + "\t\n" + "\n" + ); + struct xml_document* document = xml_parse_document(source, strlen(source)); + assert_that(document, "Could not parse document"); + + struct xml_node* root = xml_document_root(document); + assert_that(string_equals(xml_node_name(root), "Parent"), "root node name must be `Parent'"); + assert_that(2 == xml_node_children(root), "root must have two children"); + + struct xml_node* first_child = xml_node_child(root, 0); + struct xml_node* second_child = xml_node_child(root, 1); + assert_that(first_child && second_child, "Failed retrieving the children of root"); + + struct xml_node* third_child = xml_node_child(root, 2); + assert_that(!third_child, "root has a third child where non should be"); + + assert_that(string_equals(xml_node_name(first_child), "Child"), "first_child node name must be `Child'"); + assert_that(string_equals(xml_node_content(first_child), "First content"), "first_child node content must be `First content'"); + assert_that(string_equals(xml_node_name(second_child), "Child"), "second_child node name must be `Child'"); + assert_that(string_equals(xml_node_content(second_child), "Second content"), "second_child node content must be `tSecond content'"); + + xml_document_free(document, false); +} + + + + + +/** + * Console interface + */ +int main(int argc, char** argv) { + test_xml_parse_document_0(); + test_xml_parse_document_1(); + + fprintf(stdout, "All tests passed :-)\n"); + exit(EXIT_SUCCESS); +} +