1 //==========================================================================
5 // Gary S. Brown's 32 bit CRC
7 //==========================================================================
8 //####ECOSGPLCOPYRIGHTBEGIN####
9 // -------------------------------------------
10 // This file is part of eCos, the Embedded Configurable Operating System.
11 // Copyright (C) 2005 eCosCentric Ltd
13 // eCos is free software; you can redistribute it and/or modify it under
14 // the terms of the GNU General Public License as published by the Free
15 // Software Foundation; either version 2 or (at your option) any later version.
17 // eCos is distributed in the hope that it will be useful, but WITHOUT ANY
18 // WARRANTY; without even the implied warranty of MERCHANTABILITY or
19 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
22 // You should have received a copy of the GNU General Public License along
23 // with eCos; if not, write to the Free Software Foundation, Inc.,
24 // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
26 // As a special exception, if other files instantiate templates or use macros
27 // or inline functions from this file, or you compile this file and link it
28 // with other works to produce a work based on this file, this file does not
29 // by itself cause the resulting work to be covered by the GNU General Public
30 // License. However the source code for this file must still be made available
31 // in accordance with section (3) of the GNU General Public License.
33 // This exception does not invalidate any other reasons why a work based on
34 // this file might be covered by the GNU General Public License.
36 // Alternative licenses for eCos may be arranged by contacting Red Hat, Inc.
37 // at http://sources.redhat.com/ecos/ecos-license/
38 // -------------------------------------------
39 //####ECOSGPLCOPYRIGHTEND####
40 //==========================================================================
41 //#####DESCRIPTIONBEGIN####
43 // Author(s): Aaron Voisine
44 // Contributors: Matt Jerdonek
49 // This code is part of eCos (tm).
51 //####DESCRIPTIONEND####
53 //==========================================================================
58 * Copyright 2004 Aaron Voisine <aaron@voisine.org>
60 * Permission is hereby granted, free of charge, to any person obtaining
61 * a copy of this software and associated documentation files (the
62 * "Software"), to deal in the Software without restriction, including
63 * without limitation the rights to use, copy, modify, merge, publish,
64 * distribute, sublicense, and/or sell copies of the Software, and to
65 * permit persons to whom the Software is furnished to do so, subject to
66 * the following conditions:
68 * The above copyright notice and this permission notice shall be included
69 * in all copies or substantial portions of the Software.
71 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
72 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
73 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
74 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
75 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
76 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
77 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
86 #include <sys/types.h>
93 #include <pkgconf/system.h>
95 #define EZXML_TXTM 0x80 // flag value meaning txt was malloced
96 #define EZXML_WS "\t\r\n " // whitespace
98 // called when parser finds close of tag
99 #define ezxml_close_tag(root) root->cur = root->cur->parent;
101 typedef struct ezxml_root *ezxml_root_t;
102 struct ezxml_root { // additional data for the root tag
103 struct ezxml xml; // is a super-struct built on top of ezxml struct
104 ezxml_t cur; // current xml tree insertion point
105 void *m; // original xml string
106 size_t len; // length of allocated memory for mmap, or -1 for malloc
107 const char *err; // error string
108 char ***pi; // processing instructions
111 // returns the first child tag with the given name or NULL if not found
112 ezxml_t ezxml_child(ezxml_t xml, const char *name)
114 if (! xml) return NULL;
116 while (xml && strcmp(name, xml->name)) xml = xml->sibling;
121 // returns the Nth tag with the same name in the same subsection or NULL if not
123 ezxml_t ezxml_idx(ezxml_t xml, int idx)
125 for (; xml && idx; idx--) xml = xml->next;
129 // returns the value of the requested tag attribute or NULL if not found
130 const char *ezxml_attr(ezxml_t xml, const char *attr)
134 if (! xml) return NULL;
135 while (xml->attr[i] && strcmp(attr, xml->attr[i])) i += 2;
136 return (xml->attr[i]) ? xml->attr[i + 1] : NULL;
139 // same as ezxml_get but takes an alredy initialized va_list
140 ezxml_t ezxml_vget(ezxml_t xml, va_list ap)
142 char *name = va_arg(ap, char *);
143 int idx = va_arg(ap, int);
145 xml = ezxml_child(xml, name);
146 return (idx < 0) ? xml : ezxml_vget(ezxml_idx(xml, idx), ap);
149 // Traverses the xml tree to retrive a specific subtag. Takes a variable
150 // length list of tag names and indexes. Final index must be -1. Example:
151 // title = ezxml_get(library, "shelf", 0, "book", 2, "title", -1);
152 // This retrieves the title of the 3rd book on the 1st shelf of library.
153 // Returns NULL if not found.
154 ezxml_t ezxml_get(ezxml_t xml, ...)
160 ret = ezxml_vget(xml, ap);
166 // returns a NULL terminated array of processing instructions for the given
168 const char **ezxml_pi(ezxml_t xml, const char *target)
170 static const char *nopi = NULL;
174 while (xml->parent) xml = xml->parent;
175 root = (ezxml_root_t)xml;
177 if (! root->pi) return &nopi;
178 while (root->pi[i] && strcmp(target, root->pi[i][0])) i++;
179 return (root->pi[i]) ? (const char **)root->pi[i] + 1 : &nopi;
182 // Converts \r or \r\n to a single \n. If decode is non-zero, decodes ampersand
183 // sequences in place. Returns s.
184 char *ezxml_decode(char *s, int decode)
191 while (*s && *s != '\r' && *s != '&') s++;
193 if (! *s) return ret;
194 else if (*s == '\r') {
196 if (*s == '\n') memmove((void *)s, (void *)(s + 1), strlen(s));
199 else if (! decode) { s++; continue; }
200 else if (! strncmp(s, "<", 4)) *(s++) = '<';
201 else if (! strncmp(s, ">", 4)) *(s++) = '>';
202 else if (! strncmp(s, """, 6)) *(s++) = '"';
203 else if (! strncmp(s, "'", 6)) *(s++) = '\'';
204 else if (! strncmp(s, "&", 5)) s++;
205 else if (! strncmp(s, "&#", 2)) {
206 if (s[2] == 'x') c = strtol(s + 3, &e, 16);
207 else c = strtol(s + 2, &e, 10);
208 if (! c || *e != ';') { s++; continue; }
210 if (c < 0x80) *(s++) = (char)c; // US-ASCII subset
211 else { // multi-byte UTF-8 sequence
212 for (b = 0, d = c; d; d /= 2) b++; // number of bits in c
213 b = (b - 2) / 5; // number of bytes in payload
214 *(s++) = (0xFF << (7 - b)) | (c >> (6 * b)); // head
215 while (b) *(s++) = 0x80 | ((c >> (6 * --b)) & 0x3F); // payload
218 else { s++; continue; }
220 memmove((void *)s, (void *)(strchr(s, ';') + 1), strlen(strchr(s, ';')));
224 // called when parser finds start of new tag
225 void ezxml_open_tag(ezxml_root_t root, char *name, char **attr)
227 ezxml_t xml = root->cur;
229 if (xml->name) { // not root tag
230 if (xml->child) { // already have sub tags
232 while (xml->ordered) xml = xml->ordered;
233 xml->ordered = (ezxml_t)malloc(sizeof(struct ezxml));
234 xml->ordered->parent = root->cur;
235 root->cur = xml->ordered;
236 xml = xml->parent->child;
238 while (strcmp(xml->name, name) && xml->sibling) xml = xml->sibling;
239 if (! strcmp(xml->name, name)) { // already have this tag type
240 while (xml->next) xml = xml->next;
241 xml = xml->next = root->cur;
243 else xml = xml->sibling = root->cur;
245 else { // first sub tag
246 xml->child = (ezxml_t)malloc(sizeof(struct ezxml));
247 xml->child->parent = xml;
248 root->cur = xml = xml->child;
251 xml->off = strlen(xml->parent->txt); // offset in parent char content
254 // initialize new tag
257 xml->next = xml->child = xml->sibling = xml->ordered = NULL;
262 // called when parser finds character content between open and closing tag
263 void ezxml_char_content(ezxml_root_t root, char *s, size_t len, short decode)
265 ezxml_t xml = root->cur;
268 if (! xml || ! xml->name || ! len) return;
271 ezxml_decode(s, decode);
273 if (! *(xml->txt)) xml->txt = s;
274 else { // allocate our own memory and make a copy
275 l = strlen(xml->txt);
276 if (! (xml->flags & EZXML_TXTM)) {
277 xml->txt = strcpy((char *)malloc(l + len + 1), xml->txt);
278 xml->flags |= EZXML_TXTM;
280 else xml->txt = (char *)realloc((void *)(xml->txt), l + len + 1);
281 strcpy(xml->txt + l, s);
285 // called when the parser finds an xml processing instruction
286 void ezxml_proc_inst(ezxml_root_t root, char *s, size_t len)
291 s[len] = '\0'; // null terminate instruction
292 *(s += strcspn(s, EZXML_WS)) = '\0'; // null terminate target
293 s += strspn(s + 1, EZXML_WS) + 1; // skip whitespace after target
295 if (! root->pi) *(root->pi = (char ***)malloc(sizeof(char**))) = NULL;
297 while (root->pi[i] && strcmp(target, root->pi[i][0])) i++;
298 if (! root->pi[i]) { // new target
299 root->pi = (char ***)realloc(root->pi, sizeof(char **) * (i + 2));
300 root->pi[i] = (char **)malloc(sizeof(char *) * 2);
301 root->pi[i][0] = target;
302 root->pi[i + 1] = NULL; // null terminate lists
303 root->pi[i][1] = (char *)root->pi[i + 1];
306 while (root->pi[i][j]) j++;
307 root->pi[i] = (char **)realloc(root->pi[i], sizeof(char *) * (j + 2));
309 root->pi[i][j + 1] = NULL;
312 // set an error string and return root
313 ezxml_t ezxml_seterr(ezxml_root_t root, const char *err)
316 return (ezxml_t)root;
319 // parse the given xml string and return an ezxml structure
320 ezxml_t ezxml_parse_str(char *s, size_t len)
322 ezxml_root_t root = (ezxml_root_t)malloc(sizeof(struct ezxml_root));
323 char *d, **attr, q, e;
324 static char *noattr[] = { NULL };
327 if (! root) return NULL;
329 // initialize root tag
330 memset((void *)root, '\0', sizeof (struct ezxml_root));
331 root->xml.attr = noattr;
332 root->cur = (ezxml_t)root;
334 root->err = root->xml.txt = "";
336 if (! len) return ezxml_seterr(root, "root tag missing");
340 while (*s && *s != '<') s++; // find first tag
341 if (! *s) return ezxml_seterr(root, "root tag missing");
347 if (isalpha(*s) || *s == '_' || *s == ':') { // new tag
348 if (! root->cur) return ezxml_seterr(root, "unmatched closing tag");
350 s += strcspn(s, EZXML_WS "/>");
351 if (isspace(*s)) *(s++) = '\0';
354 while (*s && *s != '/' && *s != '>') { // new tag attribute
355 while (isspace(*s)) s++;
357 attr = (char **)((! l) ? malloc(3 * sizeof (char *)) :
358 realloc((void *)attr, (l + 3) * sizeof (char *)));
361 s += strcspn(s, EZXML_WS "=/>");
362 if (*s == '=' || isspace(*s)) {
364 q = *(s += strspn(s, EZXML_WS "="));
365 if (q == '"' || q == '\'') { // attribute value
367 while (*s && *s != q) s++;
368 if (*s) *(s++) = '\0';
371 return ezxml_seterr(root, (q == '"') ?
372 "missing \"" : "missing '");
374 ezxml_decode(attr[l + 1], 1);
376 else attr[l + 1] = "";
378 else attr[l + 1] = "";
380 attr[(l += 2)] = NULL;
383 if (*s == '/') { // self closing tag
385 if ((*s && *s != '>') || (! *s && e != '>')) {
387 return ezxml_seterr(root, "missing >");
389 ezxml_open_tag(root, d, attr);
390 ezxml_close_tag(root);
392 else if (*s == '>' || (! *s && e == '>')) { // open tag
395 ezxml_open_tag(root, d, attr);
400 return ezxml_seterr(root, "missing >");
403 else if (*s == '/') { // close tag
404 if (! root->cur) return ezxml_seterr(root, "unmatched closing tag");
405 ezxml_close_tag(root);
406 while (*s && *s != '>') s++;
407 if (! *s && e != '>') return ezxml_seterr(root, "missing >");
409 else if (! strncmp(s, "!--", 3)) { // comment
410 do { s = strstr(s, "--"); } while (s && *(s += 2) && *s != '>');
411 if (! s || (! *s && e != '>'))
412 return ezxml_seterr(root, "unclosed <!--");
414 else if (! strncmp(s, "![CDATA[", 8)) { // cdata
415 if ((s = strstr(s, "]]>")))
416 ezxml_char_content(root, d + 8, (s += 2) - d - 10, 0);
417 else return ezxml_seterr(root, "unclosed <![CDATA[");
419 else if (! strncmp(s, "!DOCTYPE", 8)) { // skip <!DOCTYPE declarations
420 for (l = 0; *s && ((! l && *s != '>') || (l && (*s != ']' ||
421 s[strspn(s + 1, EZXML_WS) + 1] != '>')));
422 l = (*s == '[') ? 1 : l) s += strcspn(s + 1, "[]>") + 1;
423 if (! *s && e != '>')
424 return ezxml_seterr(root, "unclosed <!DOCTYPE");
426 else if (*s == '?') { // <?...?> processing instructions
427 do { s = strchr(s, '?'); } while (s && *(++s) && *s != '>');
428 if (! s || (! *s && e != '>'))
429 return ezxml_seterr(root, "unclosed <?");
430 else ezxml_proc_inst(root, d + 1, s - d - 2);
432 else return ezxml_seterr(root, "syntax error");
434 if (! s || ! *s) break;
437 if (*s && *s != '<') { // tag character content
438 while (*s && *s != '<') s++;
439 if (*s) ezxml_char_content(root, d, s - d, 1);
442 else if (! *s) break;
445 return (root->cur) ? ezxml_seterr(root, (root->cur->name) ? "unclosed tag" :
446 "root tag missing") : (ezxml_t)root;
449 #ifdef CYGPKG_IO_FILEIO
450 // Wrapper for ezxml_parse_str() that accepts a file stream. Reads the entire
451 // stream into memory and then parses it. For xml files, use ezxml_parse_file()
452 // or ezxml_parse_fd()
453 ezxml_t ezxml_parse_fp(FILE *fp)
456 size_t l, len = 0, ps = (size_t)sysconf(_SC_PAGESIZE);
459 if (! (s = (char *)malloc(ps))) return NULL;
461 len += (l = fread((void *)((int)s + len), 1, ps, fp));
462 if (l == ps) s = (char *)realloc((void *)s, len + ps);
463 } while (s && l == ps);
465 if (! s) return NULL;
467 ret = (ezxml_root_t)ezxml_parse_str(s, len);
468 ret->len = -1; // so we know to free s in ezxml_free()
473 // A wrapper for ezxml_parse_str() that accepts a file descriptor. First
474 // attempts to mem map the file. Failing that, reads the file into memory.
475 // Returns NULL on failure.
476 ezxml_t ezxml_parse_fd(int fd)
480 size_t len, ps = (size_t)sysconf(_SC_PAGESIZE);
483 if (fd < 0) return NULL;
486 len = (stat.st_size + ps - 1) & ~(ps - 1); // round up to next page boundry
490 if ((m = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0)) !=
492 madvise(m, len, MADV_SEQUENTIAL); // optimize for sequential access
493 ret = (ezxml_root_t)ezxml_parse_str((char *)m, stat.st_size);
494 madvise(m, len, MADV_NORMAL); // put it back to normal
498 else { // mmap failed, read file into memory
499 if (! (m = malloc(stat.st_size))) return NULL;
500 len = read(fd, m, stat.st_size);
501 ret = (ezxml_root_t)ezxml_parse_str((char *)m, len);
508 // a wrapper for ezxml_parse_fd that accepts a file name
509 ezxml_t ezxml_parse_file(const char *file)
511 int fd = open(file, O_RDONLY, 0);
512 ezxml_t ret = ezxml_parse_fd(fd);
514 if (fd >= 0) close(fd);
518 // Encodes ampersand sequences appending the results to dst, reallocating dst
519 // if it's length excedes max. Returns *dst.
520 char *ezxml_ampencode(const char *s, size_t len, char **dst, size_t *dlen,
525 for (e = s + len; s != e; s++) {
526 while (*dlen + 10 > *max) *dst = realloc(*dst, *max += EZXML_BUFSIZE);
529 case '\0': return *dst;
530 case '&': *dlen += sprintf(*dst + *dlen, "&"); break;
531 case '<': *dlen += sprintf(*dst + *dlen, "<"); break;
532 case '>': *dlen += sprintf(*dst + *dlen, ">"); break;
533 case '"': *dlen += sprintf(*dst + *dlen, """); break;
535 if (*s >= ' ' || *s == '\n' || *s == '\t') (*dst)[(*dlen)++] = *s;
536 else *dlen += sprintf(*dst + *dlen, "&#%02d;", *s);
543 // Recursively converts each tag to xml appending it to s. Reallocates s if it's
544 // length excedes max. start is the location of the previous tag in the parent
545 // tag's character content. Returns *s.
546 char *ezxml_toxml_r(ezxml_t xml, char **s, size_t *len, size_t *max,
550 char *txt = (xml->parent) ? xml->parent->txt : "";
552 // parent character content up to this tag
553 ezxml_ampencode(txt + start, xml->off - start, s, len, max);
555 while (*len + strlen(xml->name) + 4 > *max) // reallocate s
556 *s = realloc(*s, *max += EZXML_BUFSIZE);
558 *len += sprintf(*s + *len, "<%s", xml->name); // open tag
559 for (i = 0; xml->attr[i]; i += 2) { // tag attributes
560 while (*len + strlen(xml->attr[i]) + 7 > *max) // reallocate s
561 *s = realloc(*s, *max += EZXML_BUFSIZE);
563 *len += sprintf(*s + *len, " %s=\"", xml->attr[i]);
564 ezxml_ampencode(xml->attr[i + 1], -1, s, len, max);
565 *len += sprintf(*s + *len, "\"");
568 if (xml->child || *(xml->txt)) { // tag content
569 *len += sprintf(*s + *len, ">");
570 if (xml->child) ezxml_toxml_r(xml->child, s, len, max, 0);
571 else ezxml_ampencode(xml->txt, -1, s, len, max); // char content
573 while (*len + strlen(xml->name) + 4 > *max) // reallocate s
574 *s = realloc(*s, *max += EZXML_BUFSIZE);
576 *len += sprintf(*s + *len, "</%s>", xml->name); // close tag
578 else *len += sprintf(*s + *len, "/>"); // self closing tag
580 if (xml->ordered) return ezxml_toxml_r(xml->ordered, s, len, max, xml->off);
581 return ezxml_ampencode(txt + xml->off, -1, s, len, max);
584 // converts an ezxml structure back to xml, returning it as a string that must
586 char *ezxml_toxml(ezxml_t xml)
588 ezxml_t p = xml->parent;
589 size_t len = 0, max = EZXML_BUFSIZE;
590 char *s = strcpy((char *)malloc(max), "");
592 if (! xml || ! xml->name) return realloc(s, len + 1);
595 ezxml_toxml_r(xml, &s, &len, &max, 0);
598 return realloc(s, len + 1);
601 // free the memory allocated for the ezxml structure
602 void ezxml_free(ezxml_t xml)
604 void *m = NULL; // assigned to null to avoid compiler warning
611 m = ((ezxml_root_t)xml)->m;
612 len = ((ezxml_root_t)xml)->len;
614 if (((ezxml_root_t)xml)->pi) {
615 for (i = 0; ((ezxml_root_t)xml)->pi[i]; i++)
616 free(((ezxml_root_t)xml)->pi[i]);
617 free(((ezxml_root_t)xml)->pi);
621 ezxml_free(xml->child);
622 ezxml_free(xml->ordered);
624 if (xml->attr[0]) free((void *)xml->attr);
625 if ((xml->flags & EZXML_TXTM)) free((void *)xml->txt);
628 if (len == -1) free(m);
630 else if (len) munmap(m, len);
634 // return parser error message or empty string if none
635 const char *ezxml_error(ezxml_t xml)
637 while (xml->parent) xml = xml->parent;
638 return ((ezxml_root_t)xml)->err;
641 #ifdef EZXMLTEST // test harness
643 int main(int argc, char **argv)
648 if (argc != 2) return fprintf(stderr, "usage: %s xmlfile\n", argv[0]);
650 xml = ezxml_parse_file(argv[1]);
651 printf("%s", (s = ezxml_toxml(xml)));
652 fprintf(stderr, "%s", ezxml_error(xml));