Simple XML subset parser comparable to glib's Markup parser, but without any dependencies in one self contained file. Forked from https://github.com/ooxi/xml.c
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

737 lignes
14 KiB

  1. /**
  2. * Copyright (c) 2012 ooxi/xml.c
  3. * https://github.com/ooxi/xml.c
  4. *
  5. * This software is provided 'as-is', without any express or implied warranty.
  6. * In no event will the authors be held liable for any damages arising from the
  7. * use of this software.
  8. *
  9. * Permission is granted to anyone to use this software for any purpose,
  10. * including commercial applications, and to alter it and redistribute it
  11. * freely, subject to the following restrictions:
  12. *
  13. * 1. The origin of this software must not be misrepresented; you must not
  14. * claim that you wrote the original software. If you use this software in a
  15. * product, an acknowledgment in the product documentation would be
  16. * appreciated but is not required.
  17. *
  18. * 2. Altered source versions must be plainly marked as such, and must not be
  19. * misrepresented as being the original software.
  20. *
  21. * 3. This notice may not be removed or altered from any source distribution.
  22. */
  23. #include <ctype.h>
  24. #include <malloc.h>
  25. #include <stdio.h>
  26. #include <stdlib.h>
  27. #include "xml.h"
  28. /**
  29. * [OPAQUE API]
  30. *
  31. * UTF-8 text
  32. */
  33. struct xml_string {
  34. uint8_t* buffer;
  35. size_t length;
  36. };
  37. /**
  38. * [OPAQUE API]
  39. *
  40. * An xml_node will always contain a tag name and a 0-terminated list of
  41. * children. Moreover it may contain text content.
  42. */
  43. struct xml_node {
  44. struct xml_string* name;
  45. struct xml_string* content;
  46. struct xml_node** children;
  47. };
  48. /**
  49. * [OPAQUE API]
  50. *
  51. * An xml_document simply contains the root node and the underlying buffer
  52. */
  53. struct xml_document {
  54. struct xml_string buffer;
  55. struct xml_node* root;
  56. };
  57. /**
  58. * [PRIVATE]
  59. *
  60. * Parser context
  61. */
  62. struct xml_parser {
  63. uint8_t* buffer;
  64. size_t position;
  65. size_t length;
  66. };
  67. /**
  68. * [PRIVATE]
  69. *
  70. * Character offsets
  71. */
  72. enum xml_parser_offset {
  73. NO_CHARACTER = -1,
  74. CURRENT_CHARACTER = 0,
  75. NEXT_CHARACTER = 1,
  76. };
  77. /**
  78. * [PRIVATE]
  79. *
  80. * @return Number of elements in 0-terminated array
  81. */
  82. static size_t get_zero_terminated_array_elements(struct xml_node** nodes) {
  83. size_t elements = 0;
  84. while (nodes[elements]) {
  85. ++elements;
  86. }
  87. return elements;
  88. }
  89. /**
  90. * [PRIVATE]
  91. *
  92. * @warning No UTF conversions will be attempted
  93. *
  94. * @return true gdw. a == b
  95. */
  96. static _Bool xml_string_equals(struct xml_string* a, struct xml_string* b) {
  97. _Bool const true = 1;
  98. _Bool const false = 0;
  99. if (a->length != b->length) {
  100. return false;
  101. }
  102. size_t i = 0; for (; i < a->length; ++i) {
  103. if (a->buffer[i] != b->buffer[i]) {
  104. return false;
  105. }
  106. }
  107. return true;
  108. }
  109. /**
  110. * [PRIVATE]
  111. *
  112. * Frees the resources allocated by the string
  113. *
  114. * @waring `buffer` must _not_ be freed, since it is a reference to the
  115. * document's buffer
  116. */
  117. static void xml_string_free(struct xml_string* string) {
  118. free(string);
  119. }
  120. /**
  121. * [PRIVATE]
  122. *
  123. * Frees the resources allocated by the node
  124. */
  125. static void xml_node_free(struct xml_node* node) {
  126. xml_string_free(node->name);
  127. if (node->content) {
  128. xml_string_free(node->content);
  129. }
  130. struct xml_node** it = node->children;
  131. while (*it) {
  132. xml_node_free(*it);
  133. ++it;
  134. }
  135. free(node->children);
  136. free(node);
  137. }
  138. /**
  139. * [PRIVATE]
  140. *
  141. * Echos the parsers call stack for debugging purposes
  142. */
  143. #ifdef DEBUG
  144. static void xml_parser_info(struct xml_parser* parser, char const* message) {
  145. fprintf(stdout, "xml_parser_info %s\n", message);
  146. }
  147. #else
  148. #define xml_parser_info(parser, message) {}
  149. #endif
  150. /**
  151. * [PRIVATE]
  152. *
  153. * Echos an error regarding the parser's source to the console
  154. */
  155. static void xml_parser_error(struct xml_parser* parser, enum xml_parser_offset offset, char const* message) {
  156. int row = 0;
  157. int column = 0;
  158. #define min(X,Y) ((X) < (Y) ? (X) : (Y))
  159. #define max(X,Y) ((X) > (Y) ? (X) : (Y))
  160. size_t character = max(0, min(parser->length, parser->position + offset));
  161. #undef min
  162. #undef max
  163. size_t position = 0; for (; position < character; ++position) {
  164. column++;
  165. if ('\n' == parser->buffer[position]) {
  166. row++;
  167. column = 0;
  168. }
  169. }
  170. if (NO_CHARACTER != offset) {
  171. fprintf(stderr, "xml_parser_error at %i:%i (is %c): %s\n",
  172. row + 1, column, parser->buffer[character], message
  173. );
  174. } else {
  175. fprintf(stderr, "xml_parser_error at %i:%i: %s\n",
  176. row + 1, column, message
  177. );
  178. }
  179. }
  180. /**
  181. * [PRIVATE]
  182. *
  183. * Returns the n-th not-whitespace byte in parser and 0 if such a byte does not
  184. * exist
  185. */
  186. static uint8_t xml_parser_peek(struct xml_parser* parser, size_t n) {
  187. size_t position = parser->position;
  188. while (position < parser->length) {
  189. if (!isspace(parser->buffer[position])) {
  190. if (n == 0) {
  191. return parser->buffer[position];
  192. } else {
  193. --n;
  194. }
  195. }
  196. position++;
  197. }
  198. return 0;
  199. }
  200. /**
  201. * [PRIVATE]
  202. *
  203. * Moves the parser's position n bytes. If the new position would be out of
  204. * bounds, it will be converted to the bounds itself
  205. */
  206. static void xml_parser_consume(struct xml_parser* parser, size_t n) {
  207. /* Debug information
  208. */
  209. #ifdef DEBUG
  210. #define min(X,Y) ((X) < (Y) ? (X) : (Y))
  211. char* consumed = alloca((n + 1) * sizeof(char));
  212. memcpy(consumed, &parser->buffer[parser->position], min(n, parser->length - parser->position));
  213. consumed[n] = 0;
  214. #undef min
  215. size_t message_buffer_length = 512;
  216. char* message_buffer = alloca(512 * sizeof(char));
  217. snprintf(message_buffer, message_buffer_length, "Consuming %li bytes \"%s\"", (long)n, consumed);
  218. message_buffer[message_buffer_length - 1] = 0;
  219. xml_parser_info(parser, message_buffer);
  220. #endif
  221. /* Move the position forward
  222. */
  223. parser->position += n;
  224. /* Don't go too far
  225. *
  226. * @warning Valid because parser->length must be greater than 0
  227. */
  228. if (parser->position >= parser->length) {
  229. parser->position = parser->length - 1;
  230. }
  231. }
  232. /**
  233. * [PRIVATE]
  234. *
  235. * Skips to the next non-whitespace character
  236. */
  237. static void xml_skip_whitespace(struct xml_parser* parser) {
  238. xml_parser_info(parser, "whitespace");
  239. while (isspace(parser->buffer[parser->position])) {
  240. if (parser->position + 1 >= parser->length) {
  241. return;
  242. } else {
  243. parser->position++;
  244. }
  245. }
  246. }
  247. /**
  248. * [PRIVATE]
  249. *
  250. * Parses the name out of the an XML tag's ending
  251. *
  252. * ---( Example )---
  253. * tag_name>
  254. * ---
  255. */
  256. static struct xml_string* xml_parse_tag_end(struct xml_parser* parser) {
  257. xml_parser_info(parser, "tag_end");
  258. size_t start = parser->position;
  259. size_t length = 0;
  260. /* Parse until `>' or a whitespace is reached
  261. */
  262. while (start + length < parser->length) {
  263. uint8_t current = xml_parser_peek(parser, CURRENT_CHARACTER);
  264. if (('>' == current) || isspace(current)) {
  265. break;
  266. } else {
  267. xml_parser_consume(parser, 1);
  268. length++;
  269. }
  270. }
  271. /* Consume `>'
  272. */
  273. if ('>' != xml_parser_peek(parser, CURRENT_CHARACTER)) {
  274. xml_parser_error(parser, CURRENT_CHARACTER, "xml_parse_tag_end::expected tag end");
  275. return 0;
  276. }
  277. xml_parser_consume(parser, 1);
  278. /* Return parsed tag name
  279. */
  280. struct xml_string* name = malloc(sizeof(struct xml_string));
  281. name->buffer = &parser->buffer[start];
  282. name->length = length;
  283. return name;
  284. }
  285. /**
  286. * [PRIVATE]
  287. *
  288. * Parses an opening XML tag without attributes
  289. *
  290. * ---( Example )---
  291. * <tag_name>
  292. * ---
  293. */
  294. static struct xml_string* xml_parse_tag_open(struct xml_parser* parser) {
  295. xml_parser_info(parser, "tag_open");
  296. xml_skip_whitespace(parser);
  297. /* Consume `<'
  298. */
  299. if ('<' != xml_parser_peek(parser, CURRENT_CHARACTER)) {
  300. xml_parser_error(parser, CURRENT_CHARACTER, "xml_parse_tag_open::expected opening tag");
  301. return 0;
  302. }
  303. xml_parser_consume(parser, 1);
  304. /* Consume tag name
  305. */
  306. return xml_parse_tag_end(parser);
  307. }
  308. /**
  309. * [PRIVATE]
  310. *
  311. * Parses an closing XML tag without attributes
  312. *
  313. * ---( Example )---
  314. * </tag_name>
  315. * ---
  316. */
  317. static struct xml_string* xml_parse_tag_close(struct xml_parser* parser) {
  318. xml_parser_info(parser, "tag_close");
  319. xml_skip_whitespace(parser);
  320. /* Consume `</'
  321. */
  322. if ( ('<' != xml_parser_peek(parser, CURRENT_CHARACTER))
  323. || ('/' != xml_parser_peek(parser, NEXT_CHARACTER))) {
  324. if ('<' != xml_parser_peek(parser, CURRENT_CHARACTER)) {
  325. xml_parser_error(parser, CURRENT_CHARACTER, "xml_parse_tag_close::expected closing tag `<'");
  326. }
  327. if ('/' != xml_parser_peek(parser, NEXT_CHARACTER)) {
  328. xml_parser_error(parser, NEXT_CHARACTER, "xml_parse_tag_close::expected closing tag `/'");
  329. }
  330. return 0;
  331. }
  332. xml_parser_consume(parser, 2);
  333. /* Consume tag name
  334. */
  335. return xml_parse_tag_end(parser);
  336. }
  337. /**
  338. * [PRIVATE]
  339. *
  340. * Parses a tag's content
  341. *
  342. * ---( Example )---
  343. * this is
  344. * a
  345. * tag {} content
  346. * ---
  347. *
  348. * @warning CDATA etc. is _not_ and will never be supported
  349. */
  350. static struct xml_string* xml_parse_content(struct xml_parser* parser) {
  351. xml_parser_info(parser, "content");
  352. /* Whitespace will be ignored
  353. */
  354. xml_skip_whitespace(parser);
  355. size_t start = parser->position;
  356. size_t length = 0;
  357. /* Consume until `<' is reached
  358. */
  359. while (start + length < parser->length) {
  360. uint8_t current = xml_parser_peek(parser, CURRENT_CHARACTER);
  361. if ('<' == current) {
  362. break;
  363. } else {
  364. xml_parser_consume(parser, 1);
  365. length++;
  366. }
  367. }
  368. /* Next character must be an `<' or we have reached end of file
  369. */
  370. if ('<' != xml_parser_peek(parser, CURRENT_CHARACTER)) {
  371. xml_parser_error(parser, CURRENT_CHARACTER, "xml_parse_content::expected <");
  372. return 0;
  373. }
  374. /* Ignore tailing whitespace
  375. */
  376. while ((length > 0) && isspace(parser->buffer[start + length - 1])) {
  377. length--;
  378. }
  379. /* Return text
  380. */
  381. struct xml_string* content = malloc(sizeof(struct xml_string));
  382. content->buffer = &parser->buffer[start];
  383. content->length = length;
  384. return content;
  385. }
  386. /**
  387. * [PRIVATE]
  388. *
  389. * Parses an XML fragment node
  390. *
  391. * ---( Example without children )---
  392. * <Node>Text</Node>
  393. * ---
  394. *
  395. * ---( Example with children )---
  396. * <Parent>
  397. * <Child>Text</Child>
  398. * <Child>Text</Child>
  399. * <Test>Content</Test>
  400. * </Parent>
  401. * ---
  402. */
  403. static struct xml_node* xml_parse_node(struct xml_parser* parser) {
  404. xml_parser_info(parser, "node");
  405. /* Setup variables
  406. */
  407. struct xml_string* tag_open = 0;
  408. struct xml_string* tag_close = 0;
  409. struct xml_string* content = 0;
  410. struct xml_node** children = calloc(1, sizeof(struct xml_node*));
  411. children[0] = 0;
  412. /* Parse open tag
  413. */
  414. tag_open = xml_parse_tag_open(parser);
  415. if (!tag_open) {
  416. xml_parser_error(parser, NO_CHARACTER, "xml_parse_node::tag_open");
  417. goto exit_failure;
  418. }
  419. /* If the content does not start with '<', a text content is assumed
  420. */
  421. if ('<' != xml_parser_peek(parser, CURRENT_CHARACTER)) {
  422. content = xml_parse_content(parser);
  423. if (!content) {
  424. xml_parser_error(parser, 0, "xml_parse_node::content");
  425. goto exit_failure;
  426. }
  427. /* Otherwise children are to be expected
  428. */
  429. } else while ('/' != xml_parser_peek(parser, NEXT_CHARACTER)) {
  430. /* Parse child node
  431. */
  432. struct xml_node* child = xml_parse_node(parser);
  433. if (!child) {
  434. xml_parser_error(parser, NEXT_CHARACTER, "xml_parse_node::child");
  435. goto exit_failure;
  436. }
  437. /* Grow child array :)
  438. */
  439. size_t old_elements = get_zero_terminated_array_elements(children);
  440. size_t new_elements = old_elements + 1;
  441. children = realloc(children, new_elements * sizeof(struct xml_node*));
  442. /* Save child
  443. */
  444. children[new_elements - 1] = child;
  445. children[new_elements] = 0;
  446. }
  447. /* Parse close tag
  448. */
  449. tag_close = xml_parse_tag_close(parser);
  450. if (!tag_close) {
  451. xml_parser_error(parser, NO_CHARACTER, "xml_parse_node::tag_close");
  452. goto exit_failure;
  453. }
  454. /* Close tag has to match open tag
  455. */
  456. if (!xml_string_equals(tag_open, tag_close)) {
  457. xml_parser_error(parser, NO_CHARACTER, "xml_parse_node::tag missmatch");
  458. goto exit_failure;
  459. }
  460. /* Return parsed node
  461. */
  462. xml_string_free(tag_close);
  463. struct xml_node* node = malloc(sizeof(struct xml_node));
  464. node->name = tag_open;
  465. node->content = content;
  466. node->children = children;
  467. return node;
  468. /* A failure occured, so free all allocalted resources
  469. */
  470. exit_failure:
  471. if (tag_open) {
  472. xml_string_free(tag_open);
  473. }
  474. if (tag_close) {
  475. xml_string_free(tag_close);
  476. }
  477. if (content) {
  478. xml_string_free(content);
  479. }
  480. struct xml_node** it = children;
  481. while (*it) {
  482. xml_node_free(*it);
  483. ++it;
  484. }
  485. free(children);
  486. return 0;
  487. }
  488. /**
  489. * [PUBLIC API]
  490. *
  491. *
  492. */
  493. struct xml_document* xml_parse_document(uint8_t* buffer, size_t length) {
  494. /* Initialize parser
  495. */
  496. struct xml_parser parser = {
  497. .buffer = buffer,
  498. .position = 0,
  499. .length = length
  500. };
  501. /* An empty buffer can never contain a valid document
  502. */
  503. if (!length) {
  504. xml_parser_error(&parser, NO_CHARACTER, "xml_parse_document::length equals zero");
  505. return 0;
  506. }
  507. /* Parse the root node
  508. */
  509. struct xml_node* root = xml_parse_node(&parser);
  510. if (!root) {
  511. xml_parser_error(&parser, NO_CHARACTER, "xml_parse_document::parsing document failed");
  512. return 0;
  513. }
  514. /* Return parsed document
  515. */
  516. struct xml_document* document = malloc(sizeof(struct xml_document));
  517. document->buffer.buffer = buffer;
  518. document->buffer.length = length;
  519. document->root = root;
  520. return document;
  521. }
  522. /**
  523. * [PUBLIC API]
  524. */
  525. void xml_document_free(struct xml_document* document, _Bool free_buffer) {
  526. if (free_buffer) {
  527. free(document->buffer.buffer);
  528. }
  529. free(document);
  530. }
  531. /**
  532. * [PUBLIC API]
  533. */
  534. struct xml_node* xml_document_root(struct xml_document* document) {
  535. return document->root;
  536. }
  537. /**
  538. * [PUBLIC API]
  539. */
  540. struct xml_string* xml_node_name(struct xml_node* node) {
  541. return node->name;
  542. }
  543. /**
  544. * [PUBLIC API]
  545. */
  546. struct xml_string* xml_node_content(struct xml_node* node) {
  547. return node->content;
  548. }
  549. /**
  550. * [PUBLIC API]
  551. *
  552. * @warning O(n)
  553. */
  554. size_t xml_node_children(struct xml_node* node) {
  555. return get_zero_terminated_array_elements(node->children);
  556. }
  557. /**
  558. * [PUBLIC API]
  559. */
  560. struct xml_node* xml_node_child(struct xml_node* node, size_t child) {
  561. if (child >= xml_node_children(node)) {
  562. return 0;
  563. }
  564. return node->children[child];
  565. }
  566. /**
  567. * [PUBLIC API]
  568. */
  569. size_t xml_string_length(struct xml_string* string) {
  570. return string->length;
  571. }
  572. /**
  573. * [PUBLIC API]
  574. */
  575. void xml_string_copy(struct xml_string* string, uint8_t* buffer, size_t length) {
  576. #define min(X,Y) ((X) < (Y) ? (X) : (Y))
  577. length = min(length, string->length);
  578. #undef min
  579. memcpy(buffer, string->buffer, length);
  580. }