Explorar el Código

xml.c -- a tiny xml subset parser

Similar to the GLib Markup parser, which also just parses an xml subset, xml.c is a simple, small and self contained xml parser in one file. Ideal for embedding into other projects without the need for big external dependencies.
master
ooxi hace 12 años
padre
commit
6489e62ff9
Se han modificado 5 ficheros con 1064 adiciones y 0 borrados
  1. +2
    -0
      .gitignore
  2. +41
    -0
      CMakeLists.txt
  3. +736
    -0
      src/xml.c
  4. +130
    -0
      src/xml.h
  5. +155
    -0
      test/test-xml.c

+ 2
- 0
.gitignore Ver fichero

@@ -0,0 +1,2 @@
*~
build

+ 41
- 0
CMakeLists.txt Ver fichero

@@ -0,0 +1,41 @@
# Project setup
PROJECT(xml)
CMAKE_MINIMUM_REQUIRED(VERSION 2.6.0 FATAL_ERROR)
# Compiler setup
SET(CMAKE_C_FLAGS_DEBUG "-DDEBUG")
SET(CMAKE_C_FLAGS_RELEASE "-O2")
#IF (${CMAKE_BUILD_TYPE} strequal "Debug")
# ADD_DEFINITIONS(-DDEBUG)
#
#ELSE (DEFINED ${DEBUG_BUILD})
# SET(CMAKE_BUILD_TYPE ${CMAKE_C_FLAGS_RELEASE})
#ENDIF (DEFINED ${DEBUG_BUILD})
# Sources
SET(SOURCE_DIRECTORY src)
SET(TEST_SOURCE_DIRECTORY test)
# Build library
ADD_LIBRARY(xml STATIC
${SOURCE_DIRECTORY}/xml.c
)
# Build unit cases
INCLUDE_DIRECTORIES(${SOURCE_DIRECTORY})
ADD_EXECUTABLE(test-xml
${TEST_SOURCE_DIRECTORY}/test-xml
)
TARGET_LINK_LIBRARIES(test-xml xml)
# Deploy
INSTALL(TARGETS xml DESTINATION lib)
INSTALL(FILES DESTINATION)

+ 736
- 0
src/xml.c Ver fichero

@@ -0,0 +1,736 @@
/**
* Copyright (c) 2012 ooxi/xml.c
* https://github.com/ooxi/xml.c
*
* This software is provided 'as-is', without any express or implied warranty.
* In no event will the authors be held liable for any damages arising from the
* use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software in a
* product, an acknowledgment in the product documentation would be
* appreciated but is not required.
*
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
*
* 3. This notice may not be removed or altered from any source distribution.
*/
#include <ctype.h>
#include <malloc.h>
#include <stdio.h>
#include <stdlib.h>
#include "xml.h"





/**
* [OPAQUE API]
*
* UTF-8 text
*/
struct xml_string {
uint8_t* buffer;
size_t length;
};

/**
* [OPAQUE API]
*
* An xml_node will always contain a tag name and a 0-terminated list of
* children. Moreover it may contain text content.
*/
struct xml_node {
struct xml_string* name;
struct xml_string* content;
struct xml_node** children;
};

/**
* [OPAQUE API]
*
* An xml_document simply contains the root node and the underlying buffer
*/
struct xml_document {
struct xml_string buffer;
struct xml_node* root;
};





/**
* [PRIVATE]
*
* Parser context
*/
struct xml_parser {
uint8_t* buffer;
size_t position;
size_t length;
};

/**
* [PRIVATE]
*
* Character offsets
*/
enum xml_parser_offset {
NO_CHARACTER = -1,
CURRENT_CHARACTER = 0,
NEXT_CHARACTER = 1,
};





/**
* [PRIVATE]
*
* @return Number of elements in 0-terminated array
*/
static size_t get_zero_terminated_array_elements(struct xml_node** nodes) {
size_t elements = 0;

while (nodes[elements]) {
++elements;
}

return elements;
}



/**
* [PRIVATE]
*
* @warning No UTF conversions will be attempted
*
* @return true gdw. a == b
*/
static _Bool xml_string_equals(struct xml_string* a, struct xml_string* b) {
_Bool const true = 1;
_Bool const false = 0;

if (a->length != b->length) {
return false;
}

size_t i = 0; for (; i < a->length; ++i) {
if (a->buffer[i] != b->buffer[i]) {
return false;
}
}

return true;
}



/**
* [PRIVATE]
*
* Frees the resources allocated by the string
*
* @waring `buffer` must _not_ be freed, since it is a reference to the
* document's buffer
*/
static void xml_string_free(struct xml_string* string) {
free(string);
}



/**
* [PRIVATE]
*
* Frees the resources allocated by the node
*/
static void xml_node_free(struct xml_node* node) {
xml_string_free(node->name);

if (node->content) {
xml_string_free(node->content);
}

struct xml_node** it = node->children;
while (*it) {
xml_node_free(*it);
++it;
}
free(node->children);

free(node);
}



/**
* [PRIVATE]
*
* Echos the parsers call stack for debugging purposes
*/
#ifdef DEBUG
static void xml_parser_info(struct xml_parser* parser, char const* message) {
fprintf(stdout, "xml_parser_info %s\n", message);
}
#else
#define xml_parser_info(parser, message) {}
#endif



/**
* [PRIVATE]
*
* Echos an error regarding the parser's source to the console
*/
static void xml_parser_error(struct xml_parser* parser, enum xml_parser_offset offset, char const* message) {
int row = 0;
int column = 0;

#define min(X,Y) ((X) < (Y) ? (X) : (Y))
#define max(X,Y) ((X) > (Y) ? (X) : (Y))
size_t character = max(0, min(parser->length, parser->position + offset));
#undef min
#undef max

size_t position = 0; for (; position < character; ++position) {
column++;

if ('\n' == parser->buffer[position]) {
row++;
column = 0;
}
}

if (NO_CHARACTER != offset) {
fprintf(stderr, "xml_parser_error at %i:%i (is %c): %s\n",
row + 1, column, parser->buffer[character], message
);
} else {
fprintf(stderr, "xml_parser_error at %i:%i: %s\n",
row + 1, column, message
);
}
}



/**
* [PRIVATE]
*
* Returns the n-th not-whitespace byte in parser and 0 if such a byte does not
* exist
*/
static uint8_t xml_parser_peek(struct xml_parser* parser, size_t n) {
size_t position = parser->position;

while (position < parser->length) {
if (!isspace(parser->buffer[position])) {
if (n == 0) {
return parser->buffer[position];
} else {
--n;
}
}

position++;
}

return 0;
}



/**
* [PRIVATE]
*
* Moves the parser's position n bytes. If the new position would be out of
* bounds, it will be converted to the bounds itself
*/
static void xml_parser_consume(struct xml_parser* parser, size_t n) {

/* Debug information
*/
#ifdef DEBUG
#define min(X,Y) ((X) < (Y) ? (X) : (Y))
char* consumed = alloca((n + 1) * sizeof(char));
memcpy(consumed, &parser->buffer[parser->position], min(n, parser->length - parser->position));
consumed[n] = 0;
#undef min

size_t message_buffer_length = 512;
char* message_buffer = alloca(512 * sizeof(char));
snprintf(message_buffer, message_buffer_length, "Consuming %li bytes \"%s\"", (long)n, consumed);
message_buffer[message_buffer_length - 1] = 0;

xml_parser_info(parser, message_buffer);
#endif


/* Move the position forward
*/
parser->position += n;

/* Don't go too far
*
* @warning Valid because parser->length must be greater than 0
*/
if (parser->position >= parser->length) {
parser->position = parser->length - 1;
}
}



/**
* [PRIVATE]
*
* Skips to the next non-whitespace character
*/
static void xml_skip_whitespace(struct xml_parser* parser) {
xml_parser_info(parser, "whitespace");

while (isspace(parser->buffer[parser->position])) {
if (parser->position + 1 >= parser->length) {
return;
} else {
parser->position++;
}
}
}



/**
* [PRIVATE]
*
* Parses the name out of the an XML tag's ending
*
* ---( Example )---
* tag_name>
* ---
*/
static struct xml_string* xml_parse_tag_end(struct xml_parser* parser) {
xml_parser_info(parser, "tag_end");
size_t start = parser->position;
size_t length = 0;

/* Parse until `>' or a whitespace is reached
*/
while (start + length < parser->length) {
uint8_t current = xml_parser_peek(parser, CURRENT_CHARACTER);

if (('>' == current) || isspace(current)) {
break;
} else {
xml_parser_consume(parser, 1);
length++;
}
}

/* Consume `>'
*/
if ('>' != xml_parser_peek(parser, CURRENT_CHARACTER)) {
xml_parser_error(parser, CURRENT_CHARACTER, "xml_parse_tag_end::expected tag end");
return 0;
}
xml_parser_consume(parser, 1);

/* Return parsed tag name
*/
struct xml_string* name = malloc(sizeof(struct xml_string));
name->buffer = &parser->buffer[start];
name->length = length;
return name;
}



/**
* [PRIVATE]
*
* Parses an opening XML tag without attributes
*
* ---( Example )---
* <tag_name>
* ---
*/
static struct xml_string* xml_parse_tag_open(struct xml_parser* parser) {
xml_parser_info(parser, "tag_open");
xml_skip_whitespace(parser);

/* Consume `<'
*/
if ('<' != xml_parser_peek(parser, CURRENT_CHARACTER)) {
xml_parser_error(parser, CURRENT_CHARACTER, "xml_parse_tag_open::expected opening tag");
return 0;
}
xml_parser_consume(parser, 1);

/* Consume tag name
*/
return xml_parse_tag_end(parser);
}



/**
* [PRIVATE]
*
* Parses an closing XML tag without attributes
*
* ---( Example )---
* </tag_name>
* ---
*/
static struct xml_string* xml_parse_tag_close(struct xml_parser* parser) {
xml_parser_info(parser, "tag_close");
xml_skip_whitespace(parser);

/* Consume `</'
*/
if ( ('<' != xml_parser_peek(parser, CURRENT_CHARACTER))
|| ('/' != xml_parser_peek(parser, NEXT_CHARACTER))) {

if ('<' != xml_parser_peek(parser, CURRENT_CHARACTER)) {
xml_parser_error(parser, CURRENT_CHARACTER, "xml_parse_tag_close::expected closing tag `<'");
}
if ('/' != xml_parser_peek(parser, NEXT_CHARACTER)) {
xml_parser_error(parser, NEXT_CHARACTER, "xml_parse_tag_close::expected closing tag `/'");
}

return 0;
}
xml_parser_consume(parser, 2);

/* Consume tag name
*/
return xml_parse_tag_end(parser);
}



/**
* [PRIVATE]
*
* Parses a tag's content
*
* ---( Example )---
* this is
* a
* tag {} content
* ---
*
* @warning CDATA etc. is _not_ and will never be supported
*/
static struct xml_string* xml_parse_content(struct xml_parser* parser) {
xml_parser_info(parser, "content");

/* Whitespace will be ignored
*/
xml_skip_whitespace(parser);

size_t start = parser->position;
size_t length = 0;

/* Consume until `<' is reached
*/
while (start + length < parser->length) {
uint8_t current = xml_parser_peek(parser, CURRENT_CHARACTER);

if ('<' == current) {
break;
} else {
xml_parser_consume(parser, 1);
length++;
}
}

/* Next character must be an `<' or we have reached end of file
*/
if ('<' != xml_parser_peek(parser, CURRENT_CHARACTER)) {
xml_parser_error(parser, CURRENT_CHARACTER, "xml_parse_content::expected <");
return 0;
}

/* Ignore tailing whitespace
*/
while ((length > 0) && isspace(parser->buffer[start + length - 1])) {
length--;
}

/* Return text
*/
struct xml_string* content = malloc(sizeof(struct xml_string));
content->buffer = &parser->buffer[start];
content->length = length;
return content;
}



/**
* [PRIVATE]
*
* Parses an XML fragment node
*
* ---( Example without children )---
* <Node>Text</Node>
* ---
*
* ---( Example with children )---
* <Parent>
* <Child>Text</Child>
* <Child>Text</Child>
* <Test>Content</Test>
* </Parent>
* ---
*/
static struct xml_node* xml_parse_node(struct xml_parser* parser) {
xml_parser_info(parser, "node");

/* Setup variables
*/
struct xml_string* tag_open = 0;
struct xml_string* tag_close = 0;
struct xml_string* content = 0;

struct xml_node** children = calloc(1, sizeof(struct xml_node*));
children[0] = 0;


/* Parse open tag
*/
tag_open = xml_parse_tag_open(parser);
if (!tag_open) {
xml_parser_error(parser, NO_CHARACTER, "xml_parse_node::tag_open");
goto exit_failure;
}


/* If the content does not start with '<', a text content is assumed
*/
if ('<' != xml_parser_peek(parser, CURRENT_CHARACTER)) {
content = xml_parse_content(parser);

if (!content) {
xml_parser_error(parser, 0, "xml_parse_node::content");
goto exit_failure;
}


/* Otherwise children are to be expected
*/
} else while ('/' != xml_parser_peek(parser, NEXT_CHARACTER)) {

/* Parse child node
*/
struct xml_node* child = xml_parse_node(parser);
if (!child) {
xml_parser_error(parser, NEXT_CHARACTER, "xml_parse_node::child");
goto exit_failure;
}

/* Grow child array :)
*/
size_t old_elements = get_zero_terminated_array_elements(children);
size_t new_elements = old_elements + 1;
children = realloc(children, new_elements * sizeof(struct xml_node*));

/* Save child
*/
children[new_elements - 1] = child;
children[new_elements] = 0;
}


/* Parse close tag
*/
tag_close = xml_parse_tag_close(parser);
if (!tag_close) {
xml_parser_error(parser, NO_CHARACTER, "xml_parse_node::tag_close");
goto exit_failure;
}


/* Close tag has to match open tag
*/
if (!xml_string_equals(tag_open, tag_close)) {
xml_parser_error(parser, NO_CHARACTER, "xml_parse_node::tag missmatch");
goto exit_failure;
}


/* Return parsed node
*/
xml_string_free(tag_close);

struct xml_node* node = malloc(sizeof(struct xml_node));
node->name = tag_open;
node->content = content;
node->children = children;
return node;


/* A failure occured, so free all allocalted resources
*/
exit_failure:
if (tag_open) {
xml_string_free(tag_open);
}
if (tag_close) {
xml_string_free(tag_close);
}
if (content) {
xml_string_free(content);
}

struct xml_node** it = children;
while (*it) {
xml_node_free(*it);
++it;
}
free(children);

return 0;
}





/**
* [PUBLIC API]
*
*
*/
struct xml_document* xml_parse_document(uint8_t* buffer, size_t length) {

/* Initialize parser
*/
struct xml_parser parser = {
.buffer = buffer,
.position = 0,
.length = length
};

/* An empty buffer can never contain a valid document
*/
if (!length) {
xml_parser_error(&parser, NO_CHARACTER, "xml_parse_document::length equals zero");
return 0;
}

/* Parse the root node
*/
struct xml_node* root = xml_parse_node(&parser);
if (!root) {
xml_parser_error(&parser, NO_CHARACTER, "xml_parse_document::parsing document failed");
return 0;
}

/* Return parsed document
*/
struct xml_document* document = malloc(sizeof(struct xml_document));
document->buffer.buffer = buffer;
document->buffer.length = length;
document->root = root;

return document;
}



/**
* [PUBLIC API]
*/
void xml_document_free(struct xml_document* document, _Bool free_buffer) {
if (free_buffer) {
free(document->buffer.buffer);
}
free(document);
}



/**
* [PUBLIC API]
*/
struct xml_node* xml_document_root(struct xml_document* document) {
return document->root;
}



/**
* [PUBLIC API]
*/
struct xml_string* xml_node_name(struct xml_node* node) {
return node->name;
}



/**
* [PUBLIC API]
*/
struct xml_string* xml_node_content(struct xml_node* node) {
return node->content;
}



/**
* [PUBLIC API]
*
* @warning O(n)
*/
size_t xml_node_children(struct xml_node* node) {
return get_zero_terminated_array_elements(node->children);
}



/**
* [PUBLIC API]
*/
struct xml_node* xml_node_child(struct xml_node* node, size_t child) {
if (child >= xml_node_children(node)) {
return 0;
}

return node->children[child];
}



/**
* [PUBLIC API]
*/
size_t xml_string_length(struct xml_string* string) {
return string->length;
}



/**
* [PUBLIC API]
*/
void xml_string_copy(struct xml_string* string, uint8_t* buffer, size_t length) {
#define min(X,Y) ((X) < (Y) ? (X) : (Y))
length = min(length, string->length);
#undef min

memcpy(buffer, string->buffer, length);
}


+ 130
- 0
src/xml.h Ver fichero

@@ -0,0 +1,130 @@
/**
* Copyright (c) 2012 ooxi/xml.c
* https://github.com/ooxi/xml.c
*
* This software is provided 'as-is', without any express or implied warranty.
* In no event will the authors be held liable for any damages arising from the
* use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software in a
* product, an acknowledgment in the product documentation would be
* appreciated but is not required.
*
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
*
* 3. This notice may not be removed or altered from any source distribution.
*/
#ifndef HEADER_GLTOOLKIT_XML
#define HEADER_GLTOOLKIT_XML


/**
* Includes
*/
#include <stdint.h>
#include <string.h>



/**
* Opaque structure holding the parsed xml document
*/
struct xml_document;
struct xml_node;

/**
* Internal character sequence representation
*/
struct xml_string;



/**
* Tries to parse the XML fragment in buffer
*
* @param buffer Chunk to parse
* @param length Size of the buffer
*
* @warning `buffer` will be referenced by the document, you may not free it
* until you free the xml_document
* @warning You have to call xml_free after you finished using the document
*
* @return The parsed xml fragment iff `parsing was successful
*/
struct xml_document* xml_parse_document(uint8_t* buffer, size_t length);



/**
* Frees all resources associated with the document. All xml_node and xml_string
* references obtained through the document will be invalidated
*
* @param document xml_document to free
* @param free_buffer iff true the internal buffer supplied via xml_parse_buffer
* will be freed with the `free` system call
*/
void xml_document_free(struct xml_document* document, _Bool free_buffer);


/**
* @return xml_node representing the document root
*/
struct xml_node* xml_document_root(struct xml_document* document);



/**
* @return The xml_node's tag name
*/
struct xml_string* xml_node_name(struct xml_node* node);



/**
* @return The xml_node's string content (if available, otherwise NULL)
*/
struct xml_string* xml_node_content(struct xml_node* node);



/**
* @return Number of child nodes
*/
size_t xml_node_children(struct xml_node* node);



/**
* @return The n-th child or 0 if out of range
*/
struct xml_node* xml_node_child(struct xml_node* node, size_t child);



/**
* @return Length of the string
*/
size_t xml_string_length(struct xml_string* string);



/**
* Copies the string into the supplied buffer
*
* @warning String will not be 0-terminated
* @warning Will write at most length bytes, even if the string is longer
*/
void xml_string_copy(struct xml_string* string, uint8_t* buffer, size_t length);





#endif


+ 155
- 0
test/test-xml.c Ver fichero

@@ -0,0 +1,155 @@
/**
* Copyright (c) 2012 ooxi/xml.c
* https://github.com/ooxi/xml.c
*
* This software is provided 'as-is', without any express or implied warranty.
* In no event will the authors be held liable for any damages arising from the
* use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software in a
* product, an acknowledgment in the product documentation would be
* appreciated but is not required.
*
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
*
* 3. This notice may not be removed or altered from any source distribution.
*/
#include <stdio.h>
#include <stdlib.h>
#include <xml.h>

static _Bool true = 1;
static _Bool false = 0;





/**
* Will halt the program iff assertion fails
*/
static void _assert_that(_Bool condition, char const* message, char const* func, char const* file, int line) {
if (!condition) {
fprintf(stderr, "Assertion failed: %s, in %s (%s:%i)\n", message, func, file, line);
exit(EXIT_FAILURE);
}
}

#define assert_that(condition, message) \
_assert_that(condition, message, __func__, __FILE__, __LINE__)



/**
* @return true iff xml string equals the c string
*/
static _Bool string_equals(struct xml_string* a, char const* b) {
size_t a_length = xml_string_length(a);
size_t b_length = strlen(b);

uint8_t* a_buffer = alloca((a_length + 1) * sizeof(uint8_t));
xml_string_copy(a, a_buffer, a_length);
a_buffer[a_length] = 0;

if (a_length != b_length) {
fprintf(stderr, "string_equals: %s#%i <> %s#%i\n", a_buffer, (int)a_length, b, (int)b_length);
return false;
}

size_t i = 0; for (; i < a_length; ++i) {
if (a_buffer[i] != b[i]) {
fprintf(stderr, "string_equals: %s <> %s\n", a_buffer, b);
return false;
}
}

return true;
}



/**
* Converts a static character array to an uint8_t data source
*/
#define SOURCE(source, content) \
uint8_t* source = alloca(strlen(content) * sizeof(uint8_t)); \
{ size_t i = 0; for (; i < strlen(content); ++i) { \
source[i] = content[i]; \
} \
}



/**
* Tries to parse a simple document containing only one tag
*/
static void test_xml_parse_document_0() {
SOURCE(source, "<Hello>World</Hello>");

struct xml_document* document = xml_parse_document(source, strlen(source));
assert_that(document, "Could not parse document");

struct xml_node* root = xml_document_root(document);
assert_that(string_equals(xml_node_name(root), "Hello"), "root node name must be `Hello'");
assert_that(string_equals(xml_node_content(root), "World"), "root node content must be `World'");

xml_document_free(document, false);
}

/**
* Tries to parse a document containing multiple tags
*/
static void test_xml_parse_document_1() {
SOURCE(source, ""
"<Parent>\n"
"\t<Child>\n"
"\t\tFirst content\n"
"\t</Child>\n"
"\t<Child>\n"
"\t\tSecond content\n"
"\t</Child>\n"
"</Parent>\n"
);
struct xml_document* document = xml_parse_document(source, strlen(source));
assert_that(document, "Could not parse document");

struct xml_node* root = xml_document_root(document);
assert_that(string_equals(xml_node_name(root), "Parent"), "root node name must be `Parent'");
assert_that(2 == xml_node_children(root), "root must have two children");

struct xml_node* first_child = xml_node_child(root, 0);
struct xml_node* second_child = xml_node_child(root, 1);
assert_that(first_child && second_child, "Failed retrieving the children of root");

struct xml_node* third_child = xml_node_child(root, 2);
assert_that(!third_child, "root has a third child where non should be");

assert_that(string_equals(xml_node_name(first_child), "Child"), "first_child node name must be `Child'");
assert_that(string_equals(xml_node_content(first_child), "First content"), "first_child node content must be `First content'");
assert_that(string_equals(xml_node_name(second_child), "Child"), "second_child node name must be `Child'");
assert_that(string_equals(xml_node_content(second_child), "Second content"), "second_child node content must be `tSecond content'");

xml_document_free(document, false);
}





/**
* Console interface
*/
int main(int argc, char** argv) {
test_xml_parse_document_0();
test_xml_parse_document_1();

fprintf(stdout, "All tests passed :-)\n");
exit(EXIT_SUCCESS);
}


Cargando…
Cancelar
Guardar