Browse Source

xml.c -- a tiny xml subset parser

Similar to the GLib Markup parser, which also just parses an xml subset, xml.c is a simple, small and self contained xml parser in one file. Ideal for embedding into other projects without the need for big external dependencies.
master
ooxi 11 years ago
parent
commit
6489e62ff9
5 changed files with 1064 additions and 0 deletions
  1. +2
    -0
      .gitignore
  2. +41
    -0
      CMakeLists.txt
  3. +736
    -0
      src/xml.c
  4. +130
    -0
      src/xml.h
  5. +155
    -0
      test/test-xml.c

+ 2
- 0
.gitignore View File

@@ -0,0 +1,2 @@
*~
build

+ 41
- 0
CMakeLists.txt View File

@@ -0,0 +1,41 @@
# Project setup
PROJECT(xml)
CMAKE_MINIMUM_REQUIRED(VERSION 2.6.0 FATAL_ERROR)
# Compiler setup
SET(CMAKE_C_FLAGS_DEBUG "-DDEBUG")
SET(CMAKE_C_FLAGS_RELEASE "-O2")
#IF (${CMAKE_BUILD_TYPE} strequal "Debug")
# ADD_DEFINITIONS(-DDEBUG)
#
#ELSE (DEFINED ${DEBUG_BUILD})
# SET(CMAKE_BUILD_TYPE ${CMAKE_C_FLAGS_RELEASE})
#ENDIF (DEFINED ${DEBUG_BUILD})
# Sources
SET(SOURCE_DIRECTORY src)
SET(TEST_SOURCE_DIRECTORY test)
# Build library
ADD_LIBRARY(xml STATIC
${SOURCE_DIRECTORY}/xml.c
)
# Build unit cases
INCLUDE_DIRECTORIES(${SOURCE_DIRECTORY})
ADD_EXECUTABLE(test-xml
${TEST_SOURCE_DIRECTORY}/test-xml
)
TARGET_LINK_LIBRARIES(test-xml xml)
# Deploy
INSTALL(TARGETS xml DESTINATION lib)
INSTALL(FILES DESTINATION)

+ 736
- 0
src/xml.c View File

@@ -0,0 +1,736 @@
/**
* Copyright (c) 2012 ooxi/xml.c
* https://github.com/ooxi/xml.c
*
* This software is provided 'as-is', without any express or implied warranty.
* In no event will the authors be held liable for any damages arising from the
* use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software in a
* product, an acknowledgment in the product documentation would be
* appreciated but is not required.
*
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
*
* 3. This notice may not be removed or altered from any source distribution.
*/
#include <ctype.h>
#include <malloc.h>
#include <stdio.h>
#include <stdlib.h>
#include "xml.h"





/**
* [OPAQUE API]
*
* UTF-8 text
*/
struct xml_string {
uint8_t* buffer;
size_t length;
};

/**
* [OPAQUE API]
*
* An xml_node will always contain a tag name and a 0-terminated list of
* children. Moreover it may contain text content.
*/
struct xml_node {
struct xml_string* name;
struct xml_string* content;
struct xml_node** children;
};

/**
* [OPAQUE API]
*
* An xml_document simply contains the root node and the underlying buffer
*/
struct xml_document {
struct xml_string buffer;
struct xml_node* root;
};





/**
* [PRIVATE]
*
* Parser context
*/
struct xml_parser {
uint8_t* buffer;
size_t position;
size_t length;
};

/**
* [PRIVATE]
*
* Character offsets
*/
enum xml_parser_offset {
NO_CHARACTER = -1,
CURRENT_CHARACTER = 0,
NEXT_CHARACTER = 1,
};





/**
* [PRIVATE]
*
* @return Number of elements in 0-terminated array
*/
static size_t get_zero_terminated_array_elements(struct xml_node** nodes) {
size_t elements = 0;

while (nodes[elements]) {
++elements;
}

return elements;
}



/**
* [PRIVATE]
*
* @warning No UTF conversions will be attempted
*
* @return true gdw. a == b
*/
static _Bool xml_string_equals(struct xml_string* a, struct xml_string* b) {
_Bool const true = 1;
_Bool const false = 0;

if (a->length != b->length) {
return false;
}

size_t i = 0; for (; i < a->length; ++i) {
if (a->buffer[i] != b->buffer[i]) {
return false;
}
}

return true;
}



/**
* [PRIVATE]
*
* Frees the resources allocated by the string
*
* @waring `buffer` must _not_ be freed, since it is a reference to the
* document's buffer
*/
static void xml_string_free(struct xml_string* string) {
free(string);
}



/**
* [PRIVATE]
*
* Frees the resources allocated by the node
*/
static void xml_node_free(struct xml_node* node) {
xml_string_free(node->name);

if (node->content) {
xml_string_free(node->content);
}

struct xml_node** it = node->children;
while (*it) {
xml_node_free(*it);
++it;
}
free(node->children);

free(node);
}



/**
* [PRIVATE]
*
* Echos the parsers call stack for debugging purposes
*/
#ifdef DEBUG
static void xml_parser_info(struct xml_parser* parser, char const* message) {
fprintf(stdout, "xml_parser_info %s\n", message);
}
#else
#define xml_parser_info(parser, message) {}
#endif



/**
* [PRIVATE]
*
* Echos an error regarding the parser's source to the console
*/
static void xml_parser_error(struct xml_parser* parser, enum xml_parser_offset offset, char const* message) {
int row = 0;
int column = 0;

#define min(X,Y) ((X) < (Y) ? (X) : (Y))
#define max(X,Y) ((X) > (Y) ? (X) : (Y))
size_t character = max(0, min(parser->length, parser->position + offset));
#undef min
#undef max

size_t position = 0; for (; position < character; ++position) {
column++;

if ('\n' == parser->buffer[position]) {
row++;
column = 0;
}
}

if (NO_CHARACTER != offset) {
fprintf(stderr, "xml_parser_error at %i:%i (is %c): %s\n",
row + 1, column, parser->buffer[character], message
);
} else {
fprintf(stderr, "xml_parser_error at %i:%i: %s\n",
row + 1, column, message
);
}
}



/**
* [PRIVATE]
*
* Returns the n-th not-whitespace byte in parser and 0 if such a byte does not
* exist
*/
static uint8_t xml_parser_peek(struct xml_parser* parser, size_t n) {
size_t position = parser->position;

while (position < parser->length) {
if (!isspace(parser->buffer[position])) {
if (n == 0) {
return parser->buffer[position];
} else {
--n;
}
}

position++;
}

return 0;
}



/**
* [PRIVATE]
*
* Moves the parser's position n bytes. If the new position would be out of
* bounds, it will be converted to the bounds itself
*/
static void xml_parser_consume(struct xml_parser* parser, size_t n) {

/* Debug information
*/
#ifdef DEBUG
#define min(X,Y) ((X) < (Y) ? (X) : (Y))
char* consumed = alloca((n + 1) * sizeof(char));
memcpy(consumed, &parser->buffer[parser->position], min(n, parser->length - parser->position));
consumed[n] = 0;
#undef min

size_t message_buffer_length = 512;
char* message_buffer = alloca(512 * sizeof(char));
snprintf(message_buffer, message_buffer_length, "Consuming %li bytes \"%s\"", (long)n, consumed);
message_buffer[message_buffer_length - 1] = 0;

xml_parser_info(parser, message_buffer);
#endif


/* Move the position forward
*/
parser->position += n;

/* Don't go too far
*
* @warning Valid because parser->length must be greater than 0
*/
if (parser->position >= parser->length) {
parser->position = parser->length - 1;
}
}



/**
* [PRIVATE]
*
* Skips to the next non-whitespace character
*/
static void xml_skip_whitespace(struct xml_parser* parser) {
xml_parser_info(parser, "whitespace");

while (isspace(parser->buffer[parser->position])) {
if (parser->position + 1 >= parser->length) {
return;
} else {
parser->position++;
}
}
}



/**
* [PRIVATE]
*
* Parses the name out of the an XML tag's ending
*
* ---( Example )---
* tag_name>
* ---
*/
static struct xml_string* xml_parse_tag_end(struct xml_parser* parser) {
xml_parser_info(parser, "tag_end");
size_t start = parser->position;
size_t length = 0;

/* Parse until `>' or a whitespace is reached
*/
while (start + length < parser->length) {
uint8_t current = xml_parser_peek(parser, CURRENT_CHARACTER);

if (('>' == current) || isspace(current)) {
break;
} else {
xml_parser_consume(parser, 1);
length++;
}
}

/* Consume `>'
*/
if ('>' != xml_parser_peek(parser, CURRENT_CHARACTER)) {
xml_parser_error(parser, CURRENT_CHARACTER, "xml_parse_tag_end::expected tag end");
return 0;
}
xml_parser_consume(parser, 1);

/* Return parsed tag name
*/
struct xml_string* name = malloc(sizeof(struct xml_string));
name->buffer = &parser->buffer[start];
name->length = length;
return name;
}



/**
* [PRIVATE]
*
* Parses an opening XML tag without attributes
*
* ---( Example )---
* <tag_name>
* ---
*/
static struct xml_string* xml_parse_tag_open(struct xml_parser* parser) {
xml_parser_info(parser, "tag_open");
xml_skip_whitespace(parser);

/* Consume `<'
*/
if ('<' != xml_parser_peek(parser, CURRENT_CHARACTER)) {
xml_parser_error(parser, CURRENT_CHARACTER, "xml_parse_tag_open::expected opening tag");
return 0;
}
xml_parser_consume(parser, 1);

/* Consume tag name
*/
return xml_parse_tag_end(parser);
}



/**
* [PRIVATE]
*
* Parses an closing XML tag without attributes
*
* ---( Example )---
* </tag_name>
* ---
*/
static struct xml_string* xml_parse_tag_close(struct xml_parser* parser) {
xml_parser_info(parser, "tag_close");
xml_skip_whitespace(parser);

/* Consume `</'
*/
if ( ('<' != xml_parser_peek(parser, CURRENT_CHARACTER))
|| ('/' != xml_parser_peek(parser, NEXT_CHARACTER))) {

if ('<' != xml_parser_peek(parser, CURRENT_CHARACTER)) {
xml_parser_error(parser, CURRENT_CHARACTER, "xml_parse_tag_close::expected closing tag `<'");
}
if ('/' != xml_parser_peek(parser, NEXT_CHARACTER)) {
xml_parser_error(parser, NEXT_CHARACTER, "xml_parse_tag_close::expected closing tag `/'");
}

return 0;
}
xml_parser_consume(parser, 2);

/* Consume tag name
*/
return xml_parse_tag_end(parser);
}



/**
* [PRIVATE]
*
* Parses a tag's content
*
* ---( Example )---
* this is
* a
* tag {} content
* ---
*
* @warning CDATA etc. is _not_ and will never be supported
*/
static struct xml_string* xml_parse_content(struct xml_parser* parser) {
xml_parser_info(parser, "content");

/* Whitespace will be ignored
*/
xml_skip_whitespace(parser);

size_t start = parser->position;
size_t length = 0;

/* Consume until `<' is reached
*/
while (start + length < parser->length) {
uint8_t current = xml_parser_peek(parser, CURRENT_CHARACTER);

if ('<' == current) {
break;
} else {
xml_parser_consume(parser, 1);
length++;
}
}

/* Next character must be an `<' or we have reached end of file
*/
if ('<' != xml_parser_peek(parser, CURRENT_CHARACTER)) {
xml_parser_error(parser, CURRENT_CHARACTER, "xml_parse_content::expected <");
return 0;
}

/* Ignore tailing whitespace
*/
while ((length > 0) && isspace(parser->buffer[start + length - 1])) {
length--;
}

/* Return text
*/
struct xml_string* content = malloc(sizeof(struct xml_string));
content->buffer = &parser->buffer[start];
content->length = length;
return content;
}



/**
* [PRIVATE]
*
* Parses an XML fragment node
*
* ---( Example without children )---
* <Node>Text</Node>
* ---
*
* ---( Example with children )---
* <Parent>
* <Child>Text</Child>
* <Child>Text</Child>
* <Test>Content</Test>
* </Parent>
* ---
*/
static struct xml_node* xml_parse_node(struct xml_parser* parser) {
xml_parser_info(parser, "node");

/* Setup variables
*/
struct xml_string* tag_open = 0;
struct xml_string* tag_close = 0;
struct xml_string* content = 0;

struct xml_node** children = calloc(1, sizeof(struct xml_node*));
children[0] = 0;


/* Parse open tag
*/
tag_open = xml_parse_tag_open(parser);
if (!tag_open) {
xml_parser_error(parser, NO_CHARACTER, "xml_parse_node::tag_open");
goto exit_failure;
}


/* If the content does not start with '<', a text content is assumed
*/
if ('<' != xml_parser_peek(parser, CURRENT_CHARACTER)) {
content = xml_parse_content(parser);

if (!content) {
xml_parser_error(parser, 0, "xml_parse_node::content");
goto exit_failure;
}


/* Otherwise children are to be expected
*/
} else while ('/' != xml_parser_peek(parser, NEXT_CHARACTER)) {

/* Parse child node
*/
struct xml_node* child = xml_parse_node(parser);
if (!child) {
xml_parser_error(parser, NEXT_CHARACTER, "xml_parse_node::child");
goto exit_failure;
}

/* Grow child array :)
*/
size_t old_elements = get_zero_terminated_array_elements(children);
size_t new_elements = old_elements + 1;
children = realloc(children, new_elements * sizeof(struct xml_node*));

/* Save child
*/
children[new_elements - 1] = child;
children[new_elements] = 0;
}


/* Parse close tag
*/
tag_close = xml_parse_tag_close(parser);
if (!tag_close) {
xml_parser_error(parser, NO_CHARACTER, "xml_parse_node::tag_close");
goto exit_failure;
}


/* Close tag has to match open tag
*/
if (!xml_string_equals(tag_open, tag_close)) {
xml_parser_error(parser, NO_CHARACTER, "xml_parse_node::tag missmatch");
goto exit_failure;
}


/* Return parsed node
*/
xml_string_free(tag_close);

struct xml_node* node = malloc(sizeof(struct xml_node));
node->name = tag_open;
node->content = content;
node->children = children;
return node;


/* A failure occured, so free all allocalted resources
*/
exit_failure:
if (tag_open) {
xml_string_free(tag_open);
}
if (tag_close) {
xml_string_free(tag_close);
}
if (content) {
xml_string_free(content);
}

struct xml_node** it = children;
while (*it) {
xml_node_free(*it);
++it;
}
free(children);

return 0;
}





/**
* [PUBLIC API]
*
*
*/
struct xml_document* xml_parse_document(uint8_t* buffer, size_t length) {

/* Initialize parser
*/
struct xml_parser parser = {
.buffer = buffer,
.position = 0,
.length = length
};

/* An empty buffer can never contain a valid document
*/
if (!length) {
xml_parser_error(&parser, NO_CHARACTER, "xml_parse_document::length equals zero");
return 0;
}

/* Parse the root node
*/
struct xml_node* root = xml_parse_node(&parser);
if (!root) {
xml_parser_error(&parser, NO_CHARACTER, "xml_parse_document::parsing document failed");
return 0;
}

/* Return parsed document
*/
struct xml_document* document = malloc(sizeof(struct xml_document));
document->buffer.buffer = buffer;
document->buffer.length = length;
document->root = root;

return document;
}



/**
* [PUBLIC API]
*/
void xml_document_free(struct xml_document* document, _Bool free_buffer) {
if (free_buffer) {
free(document->buffer.buffer);
}
free(document);
}



/**
* [PUBLIC API]
*/
struct xml_node* xml_document_root(struct xml_document* document) {
return document->root;
}



/**
* [PUBLIC API]
*/
struct xml_string* xml_node_name(struct xml_node* node) {
return node->name;
}



/**
* [PUBLIC API]
*/
struct xml_string* xml_node_content(struct xml_node* node) {
return node->content;
}



/**
* [PUBLIC API]
*
* @warning O(n)
*/
size_t xml_node_children(struct xml_node* node) {
return get_zero_terminated_array_elements(node->children);
}



/**
* [PUBLIC API]
*/
struct xml_node* xml_node_child(struct xml_node* node, size_t child) {
if (child >= xml_node_children(node)) {
return 0;
}

return node->children[child];
}



/**
* [PUBLIC API]
*/
size_t xml_string_length(struct xml_string* string) {
return string->length;
}



/**
* [PUBLIC API]
*/
void xml_string_copy(struct xml_string* string, uint8_t* buffer, size_t length) {
#define min(X,Y) ((X) < (Y) ? (X) : (Y))
length = min(length, string->length);
#undef min

memcpy(buffer, string->buffer, length);
}


+ 130
- 0
src/xml.h View File

@@ -0,0 +1,130 @@
/**
* Copyright (c) 2012 ooxi/xml.c
* https://github.com/ooxi/xml.c
*
* This software is provided 'as-is', without any express or implied warranty.
* In no event will the authors be held liable for any damages arising from the
* use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software in a
* product, an acknowledgment in the product documentation would be
* appreciated but is not required.
*
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
*
* 3. This notice may not be removed or altered from any source distribution.
*/
#ifndef HEADER_GLTOOLKIT_XML
#define HEADER_GLTOOLKIT_XML


/**
* Includes
*/
#include <stdint.h>
#include <string.h>



/**
* Opaque structure holding the parsed xml document
*/
struct xml_document;
struct xml_node;

/**
* Internal character sequence representation
*/
struct xml_string;



/**
* Tries to parse the XML fragment in buffer
*
* @param buffer Chunk to parse
* @param length Size of the buffer
*
* @warning `buffer` will be referenced by the document, you may not free it
* until you free the xml_document
* @warning You have to call xml_free after you finished using the document
*
* @return The parsed xml fragment iff `parsing was successful
*/
struct xml_document* xml_parse_document(uint8_t* buffer, size_t length);



/**
* Frees all resources associated with the document. All xml_node and xml_string
* references obtained through the document will be invalidated
*
* @param document xml_document to free
* @param free_buffer iff true the internal buffer supplied via xml_parse_buffer
* will be freed with the `free` system call
*/
void xml_document_free(struct xml_document* document, _Bool free_buffer);


/**
* @return xml_node representing the document root
*/
struct xml_node* xml_document_root(struct xml_document* document);



/**
* @return The xml_node's tag name
*/
struct xml_string* xml_node_name(struct xml_node* node);



/**
* @return The xml_node's string content (if available, otherwise NULL)
*/
struct xml_string* xml_node_content(struct xml_node* node);



/**
* @return Number of child nodes
*/
size_t xml_node_children(struct xml_node* node);



/**
* @return The n-th child or 0 if out of range
*/
struct xml_node* xml_node_child(struct xml_node* node, size_t child);



/**
* @return Length of the string
*/
size_t xml_string_length(struct xml_string* string);



/**
* Copies the string into the supplied buffer
*
* @warning String will not be 0-terminated
* @warning Will write at most length bytes, even if the string is longer
*/
void xml_string_copy(struct xml_string* string, uint8_t* buffer, size_t length);





#endif


+ 155
- 0
test/test-xml.c View File

@@ -0,0 +1,155 @@
/**
* Copyright (c) 2012 ooxi/xml.c
* https://github.com/ooxi/xml.c
*
* This software is provided 'as-is', without any express or implied warranty.
* In no event will the authors be held liable for any damages arising from the
* use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software in a
* product, an acknowledgment in the product documentation would be
* appreciated but is not required.
*
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
*
* 3. This notice may not be removed or altered from any source distribution.
*/
#include <stdio.h>
#include <stdlib.h>
#include <xml.h>

static _Bool true = 1;
static _Bool false = 0;





/**
* Will halt the program iff assertion fails
*/
static void _assert_that(_Bool condition, char const* message, char const* func, char const* file, int line) {
if (!condition) {
fprintf(stderr, "Assertion failed: %s, in %s (%s:%i)\n", message, func, file, line);
exit(EXIT_FAILURE);
}
}

#define assert_that(condition, message) \
_assert_that(condition, message, __func__, __FILE__, __LINE__)



/**
* @return true iff xml string equals the c string
*/
static _Bool string_equals(struct xml_string* a, char const* b) {
size_t a_length = xml_string_length(a);
size_t b_length = strlen(b);

uint8_t* a_buffer = alloca((a_length + 1) * sizeof(uint8_t));
xml_string_copy(a, a_buffer, a_length);
a_buffer[a_length] = 0;

if (a_length != b_length) {
fprintf(stderr, "string_equals: %s#%i <> %s#%i\n", a_buffer, (int)a_length, b, (int)b_length);
return false;
}

size_t i = 0; for (; i < a_length; ++i) {
if (a_buffer[i] != b[i]) {
fprintf(stderr, "string_equals: %s <> %s\n", a_buffer, b);
return false;
}
}

return true;
}



/**
* Converts a static character array to an uint8_t data source
*/
#define SOURCE(source, content) \
uint8_t* source = alloca(strlen(content) * sizeof(uint8_t)); \
{ size_t i = 0; for (; i < strlen(content); ++i) { \
source[i] = content[i]; \
} \
}



/**
* Tries to parse a simple document containing only one tag
*/
static void test_xml_parse_document_0() {
SOURCE(source, "<Hello>World</Hello>");

struct xml_document* document = xml_parse_document(source, strlen(source));
assert_that(document, "Could not parse document");

struct xml_node* root = xml_document_root(document);
assert_that(string_equals(xml_node_name(root), "Hello"), "root node name must be `Hello'");
assert_that(string_equals(xml_node_content(root), "World"), "root node content must be `World'");

xml_document_free(document, false);
}

/**
* Tries to parse a document containing multiple tags
*/
static void test_xml_parse_document_1() {
SOURCE(source, ""
"<Parent>\n"
"\t<Child>\n"
"\t\tFirst content\n"
"\t</Child>\n"
"\t<Child>\n"
"\t\tSecond content\n"
"\t</Child>\n"
"</Parent>\n"
);
struct xml_document* document = xml_parse_document(source, strlen(source));
assert_that(document, "Could not parse document");

struct xml_node* root = xml_document_root(document);
assert_that(string_equals(xml_node_name(root), "Parent"), "root node name must be `Parent'");
assert_that(2 == xml_node_children(root), "root must have two children");

struct xml_node* first_child = xml_node_child(root, 0);
struct xml_node* second_child = xml_node_child(root, 1);
assert_that(first_child && second_child, "Failed retrieving the children of root");

struct xml_node* third_child = xml_node_child(root, 2);
assert_that(!third_child, "root has a third child where non should be");

assert_that(string_equals(xml_node_name(first_child), "Child"), "first_child node name must be `Child'");
assert_that(string_equals(xml_node_content(first_child), "First content"), "first_child node content must be `First content'");
assert_that(string_equals(xml_node_name(second_child), "Child"), "second_child node name must be `Child'");
assert_that(string_equals(xml_node_content(second_child), "Second content"), "second_child node content must be `tSecond content'");

xml_document_free(document, false);
}





/**
* Console interface
*/
int main(int argc, char** argv) {
test_xml_parse_document_0();
test_xml_parse_document_1();

fprintf(stdout, "All tests passed :-)\n");
exit(EXIT_SUCCESS);
}


Loading…
Cancel
Save