1 |
// Written in the D programming language. |
---|
2 |
|
---|
3 |
module std2.xml; |
---|
4 |
import std2.encoding; |
---|
5 |
|
---|
6 |
import std.string; |
---|
7 |
|
---|
8 |
private alias find indexOf; |
---|
9 |
private alias rfind lastIndexOf; |
---|
10 |
|
---|
11 |
|
---|
12 |
|
---|
13 |
/** |
---|
14 |
Classes and functions for creating and parsing XML |
---|
15 |
|
---|
16 |
The basic architecture of this module is that there are standalone functions, |
---|
17 |
classes for constructing an XML document from scratch (Tag, Element and |
---|
18 |
Document), and also classes for parsing a pre-existing XML file (ElementParser |
---|
19 |
and DocumentParser). The parsing classes <i>may</i> be used to build a |
---|
20 |
Document, but that is not their primary purpose. The handling capabilities of |
---|
21 |
DocumentParser and ElementParser are sufficiently customizable that you can |
---|
22 |
make them do pretty much whatever you want. |
---|
23 |
|
---|
24 |
Authors: Janice Caron |
---|
25 |
|
---|
26 |
Date: 2008.02.12 - 2008.05.07 |
---|
27 |
|
---|
28 |
License: Public Domain |
---|
29 |
|
---|
30 |
Example: This example creates a DOM (Document Object Model) tree |
---|
31 |
from an XML file. |
---|
32 |
------------------------------------------------------------------------------ |
---|
33 |
import std.xml; |
---|
34 |
import std.stdio; |
---|
35 |
import std.string; |
---|
36 |
|
---|
37 |
// books.xml is used in various samples throughout the Microsoft XML Core |
---|
38 |
// Services (MSXML) SDK. |
---|
39 |
// |
---|
40 |
// See http://msdn2.microsoft.com/en-us/library/ms762271(VS.85).aspx |
---|
41 |
|
---|
42 |
void main() |
---|
43 |
{ |
---|
44 |
string s = cast(string)std.file.read("books.xml"); |
---|
45 |
|
---|
46 |
// Check for well-formedness |
---|
47 |
check(s); |
---|
48 |
|
---|
49 |
// Make a DOM tree |
---|
50 |
auto doc = new Document(s); |
---|
51 |
|
---|
52 |
// Plain-print it |
---|
53 |
writefln(doc); |
---|
54 |
} |
---|
55 |
------------------------------------------------------------------------------ |
---|
56 |
|
---|
57 |
Example: This example does much the same thing, except that the file is |
---|
58 |
deconstructed and reconstructed by hand. This is more work, but the |
---|
59 |
techniques involved offer vastly more power. |
---|
60 |
------------------------------------------------------------------------------ |
---|
61 |
import std.xml; |
---|
62 |
import std.stdio; |
---|
63 |
import std.string; |
---|
64 |
|
---|
65 |
struct Book |
---|
66 |
{ |
---|
67 |
string id; |
---|
68 |
string author; |
---|
69 |
string title; |
---|
70 |
string genre; |
---|
71 |
string price; |
---|
72 |
string pubDate; |
---|
73 |
string description; |
---|
74 |
} |
---|
75 |
|
---|
76 |
void main() |
---|
77 |
{ |
---|
78 |
string s = cast(string)std.file.read("books.xml"); |
---|
79 |
|
---|
80 |
// Check for well-formedness |
---|
81 |
check(s); |
---|
82 |
|
---|
83 |
// Take it apart |
---|
84 |
Book[] books; |
---|
85 |
|
---|
86 |
auto xml = new DocumentParser(s); |
---|
87 |
xml.onStartTag["book"] = (ElementParser xml) |
---|
88 |
{ |
---|
89 |
Book book; |
---|
90 |
book.id = xml.tag.attr["id"]; |
---|
91 |
|
---|
92 |
xml.onEndTag["author"] = (in Element e) { book.author = e.text; }; |
---|
93 |
xml.onEndTag["title"] = (in Element e) { book.title = e.text; }; |
---|
94 |
xml.onEndTag["genre"] = (in Element e) { book.genre = e.text; }; |
---|
95 |
xml.onEndTag["price"] = (in Element e) { book.price = e.text; }; |
---|
96 |
xml.onEndTag["publish-date"] = (in Element e) { book.pubDate = e.text; }; |
---|
97 |
xml.onEndTag["description"] = (in Element e) { book.description = e.text; }; |
---|
98 |
|
---|
99 |
xml.parse(); |
---|
100 |
|
---|
101 |
books ~= book; |
---|
102 |
}; |
---|
103 |
xml.parse(); |
---|
104 |
|
---|
105 |
// Put it back together again; |
---|
106 |
auto doc = new Document(new Tag("catalog")); |
---|
107 |
foreach(book;books) |
---|
108 |
{ |
---|
109 |
auto element = new Element("book"); |
---|
110 |
element.tag.attr["id"] = book.id; |
---|
111 |
|
---|
112 |
element ~= new Element("author", book.author); |
---|
113 |
element ~= new Element("title", book.title); |
---|
114 |
element ~= new Element("genre", book.genre); |
---|
115 |
element ~= new Element("price", book.price); |
---|
116 |
element ~= new Element("publish-date",book.pubDate); |
---|
117 |
element ~= new Element("description", book.description); |
---|
118 |
|
---|
119 |
doc ~= element; |
---|
120 |
} |
---|
121 |
|
---|
122 |
// Pretty-print it |
---|
123 |
writefln(join(doc.pretty(3),"\n")); |
---|
124 |
} |
---|
125 |
------------------------------------------------------------------------------- |
---|
126 |
* Macros: |
---|
127 |
* WIKI=Phobos/StdXml |
---|
128 |
*/ |
---|
129 |
|
---|
130 |
/** |
---|
131 |
* Abstract base class for XML items |
---|
132 |
*/ |
---|
133 |
abstract class Item |
---|
134 |
{ |
---|
135 |
/// Compares with another Item of same type for equality |
---|
136 |
abstract override int opEquals(Object o); |
---|
137 |
|
---|
138 |
/// Compares with another Item of same type |
---|
139 |
abstract override int opCmp(Object o); |
---|
140 |
|
---|
141 |
/// Returns the hash of this item |
---|
142 |
abstract override hash_t toHash(); |
---|
143 |
|
---|
144 |
/// Returns a string representation of this item |
---|
145 |
abstract override string toString(); |
---|
146 |
|
---|
147 |
/** |
---|
148 |
* Returns an indented string representation of this item |
---|
149 |
* |
---|
150 |
* Params: |
---|
151 |
* indent = number of spaces by which to indent child elements |
---|
152 |
*/ |
---|
153 |
string[] pretty(uint indent) |
---|
154 |
{ |
---|
155 |
string s = strip(toString()); |
---|
156 |
return s.length == 0 ? [] : [ s ]; |
---|
157 |
} |
---|
158 |
|
---|
159 |
/// Returns true if the item represents empty XML text |
---|
160 |
abstract bool isEmptyXML(); |
---|
161 |
} |
---|
162 |
|
---|
163 |
|
---|
164 |
string cdata = "<![CDATA["; |
---|
165 |
|
---|
166 |
/** |
---|
167 |
* Returns true if the character is a character according to the XML standard |
---|
168 |
* |
---|
169 |
* Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) |
---|
170 |
* |
---|
171 |
* Params: |
---|
172 |
* c = the character to be tested |
---|
173 |
*/ |
---|
174 |
bool isChar(dchar c) // rule 2 |
---|
175 |
{ |
---|
176 |
return lookup(CharTable,c); |
---|
177 |
} |
---|
178 |
|
---|
179 |
unittest |
---|
180 |
{ |
---|
181 |
// const CharTable=[0x9,0x9,0xA,0xA,0xD,0xD,0x20,0xD7FF,0xE000,0xFFFD, |
---|
182 |
// 0x10000,0x10FFFF]; |
---|
183 |
assert(!isChar(cast(dchar)0x8)); |
---|
184 |
assert( isChar(cast(dchar)0x9)); |
---|
185 |
assert( isChar(cast(dchar)0xA)); |
---|
186 |
assert(!isChar(cast(dchar)0xB)); |
---|
187 |
assert(!isChar(cast(dchar)0xC)); |
---|
188 |
assert( isChar(cast(dchar)0xD)); |
---|
189 |
assert(!isChar(cast(dchar)0xE)); |
---|
190 |
assert(!isChar(cast(dchar)0x1F)); |
---|
191 |
assert( isChar(cast(dchar)0x20)); |
---|
192 |
assert( isChar('J')); |
---|
193 |
assert( isChar(cast(dchar)0xD7FF)); |
---|
194 |
assert(!isChar(cast(dchar)0xD800)); |
---|
195 |
assert(!isChar(cast(dchar)0xDFFF)); |
---|
196 |
assert( isChar(cast(dchar)0xE000)); |
---|
197 |
assert( isChar(cast(dchar)0xFFFD)); |
---|
198 |
assert(!isChar(cast(dchar)0xFFFE)); |
---|
199 |
assert(!isChar(cast(dchar)0xFFFF)); |
---|
200 |
assert( isChar(cast(dchar)0x10000)); |
---|
201 |
assert( isChar(cast(dchar)0x10FFFF)); |
---|
202 |
assert(!isChar(cast(dchar)0x110000)); |
---|
203 |
} |
---|
204 |
S1 munch(S1, S2)(ref S1 s, S2 pattern) |
---|
205 |
{ |
---|
206 |
size_t j = s.length; |
---|
207 |
foreach (i, c; s) |
---|
208 |
{ |
---|
209 |
if (!inPattern(c, pattern)) |
---|
210 |
{ |
---|
211 |
j = i; |
---|
212 |
break; |
---|
213 |
} |
---|
214 |
} |
---|
215 |
scope(exit) s = s[j .. $]; |
---|
216 |
return s[0 .. j]; |
---|
217 |
} |
---|
218 |
/** |
---|
219 |
* Returns true if the character is whitespace according to the XML standard |
---|
220 |
* |
---|
221 |
* Only the following characters are considered whitespace in XML - space, tab, |
---|
222 |
* carriage return and linefeed |
---|
223 |
* |
---|
224 |
* Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) |
---|
225 |
* |
---|
226 |
* Params: |
---|
227 |
* c = the character to be tested |
---|
228 |
*/ |
---|
229 |
bool isSpace(dchar c) |
---|
230 |
{ |
---|
231 |
return c == '\u0020' || c == '\u0009' || c == '\u000A' || c == '\u000D'; |
---|
232 |
} |
---|
233 |
|
---|
234 |
/** |
---|
235 |
* Returns true if the character is a digit according to the XML standard |
---|
236 |
* |
---|
237 |
* Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) |
---|
238 |
* |
---|
239 |
* Params: |
---|
240 |
* c = the character to be tested |
---|
241 |
*/ |
---|
242 |
bool isDigit(dchar c) |
---|
243 |
{ |
---|
244 |
return lookup(DigitTable,c); |
---|
245 |
} |
---|
246 |
|
---|
247 |
/** |
---|
248 |
* Returns true if the character is a letter according to the XML standard |
---|
249 |
* |
---|
250 |
* Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) |
---|
251 |
* |
---|
252 |
* Params: |
---|
253 |
* c = the character to be tested |
---|
254 |
*/ |
---|
255 |
bool isLetter(dchar c) // rule 84 |
---|
256 |
{ |
---|
257 |
return isIdeographic(c) || isBaseChar(c); |
---|
258 |
} |
---|
259 |
|
---|
260 |
/** |
---|
261 |
* Returns true if the character is an ideographic character according to the |
---|
262 |
* XML standard |
---|
263 |
* |
---|
264 |
* Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) |
---|
265 |
* |
---|
266 |
* Params: |
---|
267 |
* c = the character to be tested |
---|
268 |
*/ |
---|
269 |
bool isIdeographic(dchar c) |
---|
270 |
{ |
---|
271 |
return lookup(IdeographicTable,c); |
---|
272 |
} |
---|
273 |
|
---|
274 |
/** |
---|
275 |
* Returns true if the character is a base character according to the XML |
---|
276 |
* standard |
---|
277 |
* |
---|
278 |
* Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) |
---|
279 |
* |
---|
280 |
* Params: |
---|
281 |
* c = the character to be tested |
---|
282 |
*/ |
---|
283 |
bool isBaseChar(dchar c) |
---|
284 |
{ |
---|
285 |
return lookup(BaseCharTable,c); |
---|
286 |
} |
---|
287 |
|
---|
288 |
/** |
---|
289 |
* Returns true if the character is a combining character according to the |
---|
290 |
* XML standard |
---|
291 |
* |
---|
292 |
* Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) |
---|
293 |
* |
---|
294 |
* Params: |
---|
295 |
* c = the character to be tested |
---|
296 |
*/ |
---|
297 |
bool isCombiningChar(dchar c) |
---|
298 |
{ |
---|
299 |
return lookup(CombiningCharTable,c); |
---|
300 |
} |
---|
301 |
|
---|
302 |
/** |
---|
303 |
* Returns true if the character is an extender according to the XML standard |
---|
304 |
* |
---|
305 |
* Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) |
---|
306 |
* |
---|
307 |
* Params: |
---|
308 |
* c = the character to be tested |
---|
309 |
*/ |
---|
310 |
bool isExtender(dchar c) |
---|
311 |
{ |
---|
312 |
return lookup(ExtenderTable,c); |
---|
313 |
} |
---|
314 |
|
---|
315 |
/** |
---|
316 |
* Encodes a string by replacing all characters which need to be escaped with |
---|
317 |
* appropriate predefined XML entities. |
---|
318 |
* |
---|
319 |
* encode() escapes certain characters (ampersand, quote, apostrophe, less-than |
---|
320 |
* and greater-than), and similarly, decode() unescapes them. These functions |
---|
321 |
* are provided for convenience only. You do not need to use them when using |
---|
322 |
* the std.xml classes, because then all the encoding and decoding will be done |
---|
323 |
* for you automatically. |
---|
324 |
* |
---|
325 |
* If the string is not modified, the original will be returned. |
---|
326 |
* |
---|
327 |
* Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) |
---|
328 |
* |
---|
329 |
* Params: |
---|
330 |
* s = The string to be encoded |
---|
331 |
* |
---|
332 |
* Returns: The encoded string |
---|
333 |
* |
---|
334 |
* Examples: |
---|
335 |
* -------------- |
---|
336 |
* writefln(encode("a > b")); // writes "a > b" |
---|
337 |
* -------------- |
---|
338 |
*/ |
---|
339 |
/* |
---|
340 |
// |
---|
341 |
//Specialized version of replace, to avoid the boring predictability of std.string.replace always returning a new array. |
---|
342 |
//Replace all occurances of from with to. |
---|
343 |
//Return original array if from not found. |
---|
344 |
// |
---|
345 |
|
---|
346 |
string replaceIf(string s, dchar from, string to) |
---|
347 |
{ |
---|
348 |
char[] p; |
---|
349 |
int i; |
---|
350 |
size_t istart; |
---|
351 |
|
---|
352 |
istart = 0; |
---|
353 |
while (istart < s.length) |
---|
354 |
{ |
---|
355 |
i = find(s[istart .. s.length], from); |
---|
356 |
if (i == -1) |
---|
357 |
{ |
---|
358 |
if (istart == 0) |
---|
359 |
return s; |
---|
360 |
p ~= s[istart .. s.length]; |
---|
361 |
break; |
---|
362 |
} |
---|
363 |
p ~= s[istart .. istart + i]; |
---|
364 |
p ~= to; |
---|
365 |
istart += i + 1; |
---|
366 |
} |
---|
367 |
return p; |
---|
368 |
} |
---|
369 |
|
---|
370 |
string encodeStdEntity(string s) |
---|
371 |
{ |
---|
372 |
s = replaceIf(s,'&',"&"); |
---|
373 |
s = replaceIf(s,'\"',"""); |
---|
374 |
s = replaceIf(s,'\'',"'"); |
---|
375 |
s = replaceIf(s,'<',"<"); |
---|
376 |
s = replaceIf(s,'>',">"); |
---|
377 |
return s; |
---|
378 |
} |
---|
379 |
*/ |
---|
380 |
|
---|
381 |
/** |
---|
382 |
Specialized version of startsWith. |
---|
383 |
Return if string s2 exactly matches string s1 up to length of s2, for non-zero length strings. |
---|
384 |
*/ |
---|
385 |
private bool startsWith(string s1, string s2) |
---|
386 |
{ |
---|
387 |
size_t len2 = s2.length; |
---|
388 |
size_t len1 = s1.length; |
---|
389 |
|
---|
390 |
if ((len2 > 0) && (len1 >= len2)) |
---|
391 |
return (s2 == s1[0..len2]); |
---|
392 |
else |
---|
393 |
return false; |
---|
394 |
} |
---|
395 |
|
---|
396 |
/* encodeStdEntity suggestion as copied from Digital Mars bug reports issue 3218 */ |
---|
397 |
|
---|
398 |
T[] encodeStdEntity(T) (T[] src, T[] dst = null) |
---|
399 |
{ |
---|
400 |
T[] entity; |
---|
401 |
auto s = src.ptr; |
---|
402 |
auto t = s; |
---|
403 |
auto e = s + src.length; |
---|
404 |
auto index = 0; |
---|
405 |
|
---|
406 |
while (s < e) |
---|
407 |
switch (*s) |
---|
408 |
{ |
---|
409 |
case '"': |
---|
410 |
entity = """; |
---|
411 |
goto common; |
---|
412 |
|
---|
413 |
case '>': |
---|
414 |
entity = ">"; |
---|
415 |
goto common; |
---|
416 |
|
---|
417 |
case '<': |
---|
418 |
entity = "<"; |
---|
419 |
goto common; |
---|
420 |
|
---|
421 |
case '&': |
---|
422 |
entity = "&"; |
---|
423 |
goto common; |
---|
424 |
|
---|
425 |
case '\'': |
---|
426 |
entity = "'"; |
---|
427 |
goto common; |
---|
428 |
|
---|
429 |
common: |
---|
430 |
auto len = s - t; |
---|
431 |
if (dst.length <= index + len + entity.length) |
---|
432 |
dst.length = (dst.length + len + entity.length) |
---|
433 |
+ dst.length / 2; |
---|
434 |
|
---|
435 |
dst [index .. index + len] = t [0 .. len]; |
---|
436 |
index += len; |
---|
437 |
|
---|
438 |
dst [index .. index + entity.length] = entity; |
---|
439 |
index += entity.length; |
---|
440 |
t = ++s; |
---|
441 |
break; |
---|
442 |
|
---|
443 |
default: |
---|
444 |
++s; |
---|
445 |
break; |
---|
446 |
} |
---|
447 |
|
---|
448 |
|
---|
449 |
// did we change anything? |
---|
450 |
if (index) |
---|
451 |
{ |
---|
452 |
// copy tail too |
---|
453 |
auto len = e - t; |
---|
454 |
if (dst.length <= index + len) |
---|
455 |
dst.length = index + len; |
---|
456 |
|
---|
457 |
dst [index .. index + len] = t [0 .. len]; |
---|
458 |
return dst [0 .. index + len]; |
---|
459 |
} |
---|
460 |
|
---|
461 |
return src; |
---|
462 |
} |
---|
463 |
|
---|
464 |
|
---|
465 |
|
---|
466 |
unittest |
---|
467 |
{ |
---|
468 |
assert(encodeStdEntity("hello") is "hello"); |
---|
469 |
assert(encodeStdEntity("a > b") == "a > b"); |
---|
470 |
assert(encodeStdEntity("a < b") == "a < b"); |
---|
471 |
assert(encodeStdEntity("don't") == "don't"); |
---|
472 |
assert(encodeStdEntity("\"hi\"") == ""hi""); |
---|
473 |
assert(encodeStdEntity("cat & dog") == "cat & dog"); |
---|
474 |
} |
---|
475 |
|
---|
476 |
/** |
---|
477 |
* Mode to use for decoding. |
---|
478 |
* |
---|
479 |
* $(DDOC_ENUM_MEMBERS NONE) Do not decode |
---|
480 |
* $(DDOC_ENUM_MEMBERS LOOSE) Decode, but ignore errors |
---|
481 |
* $(DDOC_ENUM_MEMBERS STRICT) Decode, and throw exception on error |
---|
482 |
*/ |
---|
483 |
enum DecodeMode |
---|
484 |
{ |
---|
485 |
NONE, LOOSE, STRICT |
---|
486 |
} |
---|
487 |
|
---|
488 |
/** |
---|
489 |
* Decodes a string by unescaping all predefined XML entities. |
---|
490 |
* |
---|
491 |
* encode() escapes certain characters (ampersand, quote, apostrophe, less-than |
---|
492 |
* and greater-than), and similarly, decode() unescapes them. These functions |
---|
493 |
* are provided for convenience only. You do not need to use them when using |
---|
494 |
* the std.xml classes, because then all the encoding and decoding will be done |
---|
495 |
* for you automatically. |
---|
496 |
* |
---|
497 |
* This function decodes the entities &amp;, &quot;, &apos;, |
---|
498 |
* &lt; and &gt, |
---|
499 |
* as well as decimal and hexadecimal entities such as &#x20AC; |
---|
500 |
* |
---|
501 |
* If the string does not contain an ampersand, the original will be returned. |
---|
502 |
* |
---|
503 |
* Note that the "mode" parameter can be one of DecodeMode.NONE (do not |
---|
504 |
* decode), DecodeMode.LOOSE (decode, but ignore errors), or DecodeMode.STRICT |
---|
505 |
* (decode, and throw a DecodeException in the event of an error). |
---|
506 |
* |
---|
507 |
* Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) |
---|
508 |
* |
---|
509 |
* Params: |
---|
510 |
* s = The string to be decoded |
---|
511 |
* mode = (optional) Mode to use for decoding. (Defaults to LOOSE). |
---|
512 |
* |
---|
513 |
* Throws: DecodeException if mode == DecodeMode.STRICT and decode fails |
---|
514 |
* |
---|
515 |
* Returns: The decoded string |
---|
516 |
* |
---|
517 |
* Examples: |
---|
518 |
* -------------- |
---|
519 |
* writefln(decode("a > b")); // writes "a > b" |
---|
520 |
* -------------- |
---|
521 |
*/ |
---|
522 |
string decode(string s, DecodeMode mode=DecodeMode.LOOSE) |
---|
523 |
{ |
---|
524 |
if (mode == DecodeMode.NONE) return s; |
---|
525 |
|
---|
526 |
char[] buffer; |
---|
527 |
|
---|
528 |
for (int i=0; i<s.length; ++i) |
---|
529 |
{ |
---|
530 |
char c = s[i]; |
---|
531 |
if (c != '&') |
---|
532 |
{ |
---|
533 |
if (buffer.length != 0) buffer ~= c; |
---|
534 |
} |
---|
535 |
else |
---|
536 |
{ |
---|
537 |
if (buffer.length == 0) |
---|
538 |
{ |
---|
539 |
buffer = s.dup; |
---|
540 |
buffer.length = i; |
---|
541 |
} |
---|
542 |
if (startsWith(s[i..$],"&#")) |
---|
543 |
{ |
---|
544 |
try |
---|
545 |
{ |
---|
546 |
dchar d; |
---|
547 |
string t = s[i..$]; |
---|
548 |
checkCharRef(t, d); |
---|
549 |
std.utf.encode(buffer, d); |
---|
550 |
i = s.length - t.length - 1; |
---|
551 |
} |
---|
552 |
catch(Err e) |
---|
553 |
{ |
---|
554 |
if (mode == DecodeMode.STRICT) |
---|
555 |
throw new DecodeException("Unescaped &"); |
---|
556 |
buffer ~= '&'; |
---|
557 |
} |
---|
558 |
} |
---|
559 |
else if (startsWith(s[i..$],"&" )) { buffer ~= '&'; i += 4; } |
---|
560 |
else if (startsWith(s[i..$],""")) { buffer ~= '"'; i += 5; } |
---|
561 |
else if (startsWith(s[i..$],"'")) { buffer ~= '\''; i += 5; } |
---|
562 |
else if (startsWith(s[i..$],"<" )) { buffer ~= '<'; i += 3; } |
---|
563 |
else if (startsWith(s[i..$],">" )) { buffer ~= '>'; i += 3; } |
---|
564 |
else |
---|
565 |
{ |
---|
566 |
if (mode == DecodeMode.STRICT) |
---|
567 |
throw new DecodeException("Unescaped &"); |
---|
568 |
buffer ~= '&'; |
---|
569 |
} |
---|
570 |
} |
---|
571 |
} |
---|
572 |
return (buffer.length == 0) ? s : cast(string)buffer; |
---|
573 |
} |
---|
574 |
|
---|
575 |
unittest |
---|
576 |
{ |
---|
577 |
void assertNot(string s) |
---|
578 |
{ |
---|
579 |
bool b = false; |
---|
580 |
try { decode(s,DecodeMode.STRICT); } |
---|
581 |
catch (DecodeException e) { b = true; } |
---|
582 |
assert(b,s); |
---|
583 |
} |
---|
584 |
|
---|
585 |
// Assert that things that should work, do |
---|
586 |
assert(decode("hello", DecodeMode.STRICT) is "hello"); |
---|
587 |
assert(decode("a > b", DecodeMode.STRICT) == "a > b"); |
---|
588 |
assert(decode("a < b", DecodeMode.STRICT) == "a < b"); |
---|
589 |
assert(decode("don't", DecodeMode.STRICT) == "don't"); |
---|
590 |
assert(decode(""hi"", DecodeMode.STRICT) == "\"hi\""); |
---|
591 |
assert(decode("cat & dog", DecodeMode.STRICT) == "cat & dog"); |
---|
592 |
assert(decode("*", DecodeMode.STRICT) == "*"); |
---|
593 |
assert(decode("*", DecodeMode.STRICT) == "*"); |
---|
594 |
assert(decode("cat & dog", DecodeMode.LOOSE) == "cat & dog"); |
---|
595 |
assert(decode("a > b", DecodeMode.LOOSE) == "a > b"); |
---|
596 |
assert(decode("&#;", DecodeMode.LOOSE) == "&#;"); |
---|
597 |
assert(decode("&#x;", DecodeMode.LOOSE) == "&#x;"); |
---|
598 |
assert(decode("G;", DecodeMode.LOOSE) == "G;"); |
---|
599 |
assert(decode("G;", DecodeMode.LOOSE) == "G;"); |
---|
600 |
|
---|
601 |
// Assert that things that shouldn't work, don't |
---|
602 |
assertNot("cat & dog"); |
---|
603 |
assertNot("a > b"); |
---|
604 |
assertNot("&#;"); |
---|
605 |
assertNot("&#x;"); |
---|
606 |
assertNot("G;"); |
---|
607 |
assertNot("G;"); |
---|
608 |
} |
---|
609 |
|
---|
610 |
/** |
---|
611 |
* Class representing an XML document. |
---|
612 |
* |
---|
613 |
* Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) |
---|
614 |
* |
---|
615 |
*/ |
---|
616 |
class Document : Element |
---|
617 |
{ |
---|
618 |
/** |
---|
619 |
* Contains all text which occurs before the root element. |
---|
620 |
* Defaults to <?xml version="1.0"?> |
---|
621 |
*/ |
---|
622 |
string prolog = "<?xml version=\"1.0\"?>"; |
---|
623 |
/** |
---|
624 |
* Contains all text which occurs after the root element. |
---|
625 |
* Defaults to the empty string |
---|
626 |
*/ |
---|
627 |
string epilog; |
---|
628 |
|
---|
629 |
/** |
---|
630 |
* Constructs a Document by parsing XML text. |
---|
631 |
* |
---|
632 |
* This function creates a complete DOM (Document Object Model) tree. |
---|
633 |
* |
---|
634 |
* The input to this function MUST be valid XML. |
---|
635 |
* This is enforced by DocumentParser's in contract. |
---|
636 |
* |
---|
637 |
* Params: |
---|
638 |
* s = the complete XML text. |
---|
639 |
*/ |
---|
640 |
this(string s) |
---|
641 |
in |
---|
642 |
{ |
---|
643 |
assert(s.length != 0); |
---|
644 |
} |
---|
645 |
body |
---|
646 |
{ |
---|
647 |
auto xml = new DocumentParser(s); |
---|
648 |
string tagString = xml.tag.tagString; |
---|
649 |
|
---|
650 |
this(xml.tag); |
---|
651 |
prolog = s[0 .. tagString.ptr - s.ptr]; |
---|
652 |
parse(xml); |
---|
653 |
epilog = *xml.s; |
---|
654 |
} |
---|
655 |
|
---|
656 |
/** |
---|
657 |
* Constructs a Document from a Tag. |
---|
658 |
* |
---|
659 |
* Params: |
---|
660 |
* tag = the start tag of the document. |
---|
661 |
*/ |
---|
662 |
this(Tag tag) |
---|
663 |
{ |
---|
664 |
super(tag); |
---|
665 |
} |
---|
666 |
|
---|
667 |
|
---|
668 |
/** |
---|
669 |
* Compares two Documents for equality |
---|
670 |
* |
---|
671 |
* Examples: |
---|
672 |
* -------------- |
---|
673 |
* Document d1,d2; |
---|
674 |
* if (d1 == d2) { } |
---|
675 |
* -------------- |
---|
676 |
*/ |
---|
677 |
override int opEquals(Object o) |
---|
678 |
{ |
---|
679 |
auto doc = toType!( Document)(o); |
---|
680 |
return |
---|
681 |
(prolog != doc.prolog ) ? false : ( |
---|
682 |
(super != cast( Element)doc) ? false : ( |
---|
683 |
(epilog != doc.epilog ) ? false : ( |
---|
684 |
true ))); |
---|
685 |
} |
---|
686 |
|
---|
687 |
/** |
---|
688 |
* Compares two Documents |
---|
689 |
* |
---|
690 |
* You should rarely need to call this function. It exists so that |
---|
691 |
* Documents can be used as associative array keys. |
---|
692 |
* |
---|
693 |
* Examples: |
---|
694 |
* -------------- |
---|
695 |
* Document d1,d2; |
---|
696 |
* if (d1 < d2) { } |
---|
697 |
* -------------- |
---|
698 |
*/ |
---|
699 |
override int opCmp(Object o) |
---|
700 |
{ |
---|
701 |
auto doc = toType!(Document)(o); |
---|
702 |
return |
---|
703 |
((prolog != doc.prolog ) |
---|
704 |
? ( prolog < doc.prolog ? -1 : 1 ) : |
---|
705 |
((super != cast( Element)doc) |
---|
706 |
? ( super < cast( Element)doc ? -1 : 1 ) : |
---|
707 |
((epilog != doc.epilog ) |
---|
708 |
? ( epilog < doc.epilog ? -1 : 1 ) : |
---|
709 |
0 ))); |
---|
710 |
} |
---|
711 |
|
---|
712 |
/** |
---|
713 |
* Returns the hash of a Document |
---|
714 |
* |
---|
715 |
* You should rarely need to call this function. It exists so that |
---|
716 |
* Documents can be used as associative array keys. |
---|
717 |
*/ |
---|
718 |
override hash_t toHash() |
---|
719 |
{ |
---|
720 |
return hash(prolog,hash(epilog,super.toHash)); |
---|
721 |
} |
---|
722 |
|
---|
723 |
/** |
---|
724 |
* Returns the string representation of a Document. (That is, the |
---|
725 |
* complete XML of a document). |
---|
726 |
*/ |
---|
727 |
override string toString() |
---|
728 |
{ |
---|
729 |
return prolog ~ super.toString ~ epilog; |
---|
730 |
} |
---|
731 |
} |
---|
732 |
|
---|
733 |
|
---|
734 |
/** |
---|
735 |
* Class representing an XML element. |
---|
736 |
* |
---|
737 |
* Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) |
---|
738 |
*/ |
---|
739 |
class Element : Item |
---|
740 |
{ |
---|
741 |
Tag tag; /// The start tag of the element |
---|
742 |
Item[] items; /// The element's items |
---|
743 |
Text[] texts; /// The element's text items |
---|
744 |
CData[] cdatas; /// The element's CData items |
---|
745 |
Comment[] comments; /// The element's comments |
---|
746 |
ProcessingInstruction[] pis; /// The element's processing instructions |
---|
747 |
Element[] elements; /// The element's child elements |
---|
748 |
|
---|
749 |
/** |
---|
750 |
* Constructs an Element given a name and a string to be used as a Text |
---|
751 |
* interior. |
---|
752 |
* |
---|
753 |
* Params: |
---|
754 |
* name = the name of the element. |
---|
755 |
* interior = (optional) the string interior. |
---|
756 |
* |
---|
757 |
* Examples: |
---|
758 |
* ------------------------------------------------------- |
---|
759 |
* auto element = new Element("title","Serenity") |
---|
760 |
* // constructs the element <title>Serenity</title> |
---|
761 |
* ------------------------------------------------------- |
---|
762 |
*/ |
---|
763 |
this(string name, string interior=null) |
---|
764 |
{ |
---|
765 |
this(new Tag(name)); |
---|
766 |
if (interior.length != 0) opCatAssign(new Text(interior)); |
---|
767 |
} |
---|
768 |
|
---|
769 |
/** |
---|
770 |
* Constructs an Element from a Tag. |
---|
771 |
* |
---|
772 |
* Params: |
---|
773 |
* tag = the start or empty tag of the element. |
---|
774 |
*/ |
---|
775 |
this(Tag tag_) |
---|
776 |
{ |
---|
777 |
this.tag = new Tag(tag_.name); |
---|
778 |
tag.type = TagType.EMPTY; |
---|
779 |
foreach(k,v;tag_.attr) tag.attr[k] = v; |
---|
780 |
tag.tagString = tag_.tagString; |
---|
781 |
} |
---|
782 |
|
---|
783 |
/** |
---|
784 |
* Append a text item to the interior of this element |
---|
785 |
* |
---|
786 |
* Params: |
---|
787 |
* item = the item you wish to append. |
---|
788 |
* |
---|
789 |
* Examples: |
---|
790 |
* -------------- |
---|
791 |
* Element element; |
---|
792 |
* element ~= new Text("hello"); |
---|
793 |
* -------------- |
---|
794 |
*/ |
---|
795 |
void opCatAssign(Text item) |
---|
796 |
{ |
---|
797 |
texts ~= item; |
---|
798 |
appendItem(item); |
---|
799 |
} |
---|
800 |
|
---|
801 |
/** |
---|
802 |
* Append a CData item to the interior of this element |
---|
803 |
* |
---|
804 |
* Params: |
---|
805 |
* item = the item you wish to append. |
---|
806 |
* |
---|
807 |
* Examples: |
---|
808 |
* -------------- |
---|
809 |
* Element element; |
---|
810 |
* element ~= new CData("hello"); |
---|
811 |
* -------------- |
---|
812 |
*/ |
---|
813 |
void opCatAssign(CData item) |
---|
814 |
{ |
---|
815 |
cdatas ~= item; |
---|
816 |
appendItem(item); |
---|
817 |
} |
---|
818 |
|
---|
819 |
/** |
---|
820 |
* Append a comment to the interior of this element |
---|
821 |
* |
---|
822 |
* Params: |
---|
823 |
* item = the item you wish to append. |
---|
824 |
* |
---|
825 |
* Examples: |
---|
826 |
* -------------- |
---|
827 |
* Element element; |
---|
828 |
* element ~= new Comment("hello"); |
---|
829 |
* -------------- |
---|
830 |
*/ |
---|
831 |
void opCatAssign(Comment item) |
---|
832 |
{ |
---|
833 |
comments ~= item; |
---|
834 |
appendItem(item); |
---|
835 |
} |
---|
836 |
|
---|
837 |
/** |
---|
838 |
* Append a processing instruction to the interior of this element |
---|
839 |
* |
---|
840 |
* Params: |
---|
841 |
* item = the item you wish to append. |
---|
842 |
* |
---|
843 |
* Examples: |
---|
844 |
* -------------- |
---|
845 |
* Element element; |
---|
846 |
* element ~= new ProcessingInstruction("hello"); |
---|
847 |
* -------------- |
---|
848 |
*/ |
---|
849 |
void opCatAssign(ProcessingInstruction item) |
---|
850 |
{ |
---|
851 |
pis ~= item; |
---|
852 |
appendItem(item); |
---|
853 |
} |
---|
854 |
|
---|
855 |
/** |
---|
856 |
* Append a complete element to the interior of this element |
---|
857 |
* |
---|
858 |
* Params: |
---|
859 |
* item = the item you wish to append. |
---|
860 |
* |
---|
861 |
* Examples: |
---|
862 |
* -------------- |
---|
863 |
* Element element; |
---|
864 |
* Element other = new Element("br"); |
---|
865 |
* element ~= other; |
---|
866 |
* // appends element representing <br /> |
---|
867 |
* -------------- |
---|
868 |
*/ |
---|
869 |
void opCatAssign(Element item) |
---|
870 |
{ |
---|
871 |
elements ~= item; |
---|
872 |
appendItem(item); |
---|
873 |
} |
---|
874 |
|
---|
875 |
private void appendItem(Item item) |
---|
876 |
{ |
---|
877 |
items ~= item; |
---|
878 |
if (tag.type == TagType.EMPTY && !item.isEmptyXML) |
---|
879 |
tag.type = TagType.START; |
---|
880 |
} |
---|
881 |
|
---|
882 |
private void parse(ElementParser xml) |
---|
883 |
{ |
---|
884 |
xml.onText = (string s) { opCatAssign(new Text(s)); }; |
---|
885 |
xml.onCData = (string s) { opCatAssign(new CData(s)); }; |
---|
886 |
xml.onComment = (string s) { opCatAssign(new Comment(s)); }; |
---|
887 |
xml.onPI = (string s) { opCatAssign(new ProcessingInstruction(s)); }; |
---|
888 |
|
---|
889 |
xml.onStartTag[null] = (ElementParser xml) |
---|
890 |
{ |
---|
891 |
auto e = new Element(xml.tag); |
---|
892 |
e.parse(xml); |
---|
893 |
opCatAssign(e); |
---|
894 |
}; |
---|
895 |
|
---|
896 |
xml.parse(); |
---|
897 |
} |
---|
898 |
|
---|
899 |
/** |
---|
900 |
* Compares two Elements for equality |
---|
901 |
* |
---|
902 |
* Examples: |
---|
903 |
* -------------- |
---|
904 |
* Element e1,e2; |
---|
905 |
* if (e1 == e2) { } |
---|
906 |
* -------------- |
---|
907 |
*/ |
---|
908 |
override int opEquals(Object o) |
---|
909 |
{ |
---|
910 |
auto element = toType!( Element)(o); |
---|
911 |
uint len = items.length; |
---|
912 |
if (len != element.items.length) return false; |
---|
913 |
for (uint i=0; i<len; ++i) |
---|
914 |
{ |
---|
915 |
if (!items[i].opEquals(element.items[i])) return false; |
---|
916 |
} |
---|
917 |
return true; |
---|
918 |
} |
---|
919 |
|
---|
920 |
/** |
---|
921 |
* Compares two Elements |
---|
922 |
* |
---|
923 |
* You should rarely need to call this function. It exists so that Elements |
---|
924 |
* can be used as associative array keys. |
---|
925 |
* |
---|
926 |
* Examples: |
---|
927 |
* -------------- |
---|
928 |
* Element e1,e2; |
---|
929 |
* if (e1 < e2) { } |
---|
930 |
* -------------- |
---|
931 |
*/ |
---|
932 |
override int opCmp(Object o) |
---|
933 |
{ |
---|
934 |
auto element = toType!( Element)(o); |
---|
935 |
for (uint i=0; ; ++i) |
---|
936 |
{ |
---|
937 |
if (i == items.length && i == element.items.length) return 0; |
---|
938 |
if (i == items.length) return -1; |
---|
939 |
if (i == element.items.length) return 1; |
---|
940 |
if (items[i] != element.items[i]) |
---|
941 |
return items[i].opCmp(element.items[i]); |
---|
942 |
} |
---|
943 |
} |
---|
944 |
|
---|
945 |
/** |
---|
946 |
* Returns the hash of an Element |
---|
947 |
* |
---|
948 |
* You should rarely need to call this function. It exists so that Elements |
---|
949 |
* can be used as associative array keys. |
---|
950 |
*/ |
---|
951 |
override hash_t toHash() |
---|
952 |
{ |
---|
953 |
hash_t hash = tag.toHash; |
---|
954 |
foreach(item;items) hash += item.toHash(); |
---|
955 |
return hash; |
---|
956 |
} |
---|
957 |
|
---|
958 |
/** |
---|
959 |
* Returns the decoded interior of an element. |
---|
960 |
* |
---|
961 |
* The element is assumed to containt text <i>only</i>. So, for |
---|
962 |
* example, given XML such as "<title>Good &amp; |
---|
963 |
* Bad</title>", will return "Good & Bad". |
---|
964 |
* |
---|
965 |
* Params: |
---|
966 |
* mode = (optional) Mode to use for decoding. (Defaults to LOOSE). |
---|
967 |
* |
---|
968 |
* Throws: DecodeException if decode fails |
---|
969 |
*/ |
---|
970 |
string text(DecodeMode mode=DecodeMode.LOOSE) |
---|
971 |
{ |
---|
972 |
string buffer; |
---|
973 |
foreach(item;items) |
---|
974 |
{ |
---|
975 |
Text t = cast(Text)item; |
---|
976 |
if (t is null) throw new DecodeException(item.toString); |
---|
977 |
buffer ~= decode(t.toString,mode); |
---|
978 |
} |
---|
979 |
return buffer; |
---|
980 |
} |
---|
981 |
|
---|
982 |
/** |
---|
983 |
* Returns an indented string representation of this item |
---|
984 |
* |
---|
985 |
* Params: |
---|
986 |
* indent = (optional) number of spaces by which to indent this |
---|
987 |
* element. Defaults to 2. |
---|
988 |
*/ |
---|
989 |
override string[] pretty(uint indent=2) |
---|
990 |
{ |
---|
991 |
|
---|
992 |
if (isEmptyXML) return [ tag.toEmptyString ]; |
---|
993 |
|
---|
994 |
if (items.length == 1) |
---|
995 |
{ |
---|
996 |
Text t = cast(Text)(items[0]); |
---|
997 |
if (t !is null) |
---|
998 |
{ |
---|
999 |
return [tag.toStartString ~ t.toString ~ tag.toEndString]; |
---|
1000 |
} |
---|
1001 |
} |
---|
1002 |
|
---|
1003 |
string[] a = [ tag.toStartString ]; |
---|
1004 |
foreach(item;items) |
---|
1005 |
{ |
---|
1006 |
string[] b = item.pretty(indent); |
---|
1007 |
foreach(s;b) |
---|
1008 |
{ |
---|
1009 |
a ~= rjustify(s,s.length + indent); |
---|
1010 |
} |
---|
1011 |
} |
---|
1012 |
a ~= tag.toEndString; |
---|
1013 |
return a; |
---|
1014 |
} |
---|
1015 |
|
---|
1016 |
/** |
---|
1017 |
* Returns the string representation of an Element |
---|
1018 |
* |
---|
1019 |
* Examples: |
---|
1020 |
* -------------- |
---|
1021 |
* auto element = new Element("br"); |
---|
1022 |
* writefln(element.toString); // writes "<br />" |
---|
1023 |
* -------------- |
---|
1024 |
*/ |
---|
1025 |
override string toString() |
---|
1026 |
{ |
---|
1027 |
if (isEmptyXML) return tag.toEmptyString; |
---|
1028 |
|
---|
1029 |
string buffer = tag.toStartString; |
---|
1030 |
foreach(item;items) { buffer ~= item.toString; } |
---|
1031 |
buffer ~= tag.toEndString; |
---|
1032 |
return buffer; |
---|
1033 |
} |
---|
1034 |
|
---|
1035 |
override bool isEmptyXML() { return false; } /// Returns false always |
---|
1036 |
} |
---|
1037 |
|
---|
1038 |
|
---|
1039 |
/** |
---|
1040 |
* Tag types. |
---|
1041 |
* |
---|
1042 |
* $(DDOC_ENUM_MEMBERS START) Used for start tags |
---|
1043 |
* $(DDOC_ENUM_MEMBERS END) Used for end tags |
---|
1044 |
* $(DDOC_ENUM_MEMBERS EMPTY) Used for empty tags |
---|
1045 |
* |
---|
1046 |
*/ |
---|
1047 |
enum TagType { START, END, EMPTY }; |
---|
1048 |
|
---|
1049 |
/** |
---|
1050 |
* Class representing an XML tag. |
---|
1051 |
* |
---|
1052 |
* Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) |
---|
1053 |
* |
---|
1054 |
* The class invariant guarantees |
---|
1055 |
* <ul> |
---|
1056 |
* <li> that $(B type) is a valid enum TagType value</li> |
---|
1057 |
* <li> that $(B name) consists of valid characters</li> |
---|
1058 |
* <li> that each attribute name consists of valid characters</li> |
---|
1059 |
* </ul> |
---|
1060 |
*/ |
---|
1061 |
class Tag |
---|
1062 |
{ |
---|
1063 |
TagType type = TagType.START; /// Type of tag |
---|
1064 |
string name; /// Tag name |
---|
1065 |
string[string] attr; /// Associative array of attributes |
---|
1066 |
private string tagString; |
---|
1067 |
|
---|
1068 |
invariant() |
---|
1069 |
{ |
---|
1070 |
string s; |
---|
1071 |
string t; |
---|
1072 |
|
---|
1073 |
assert(type == TagType.START |
---|
1074 |
|| type == TagType.END |
---|
1075 |
|| type == TagType.EMPTY); |
---|
1076 |
|
---|
1077 |
s = name; |
---|
1078 |
try { checkName(s,t); } |
---|
1079 |
catch(Err e) { assert(false,"Invalid tag name:" ~ e.toString); } |
---|
1080 |
|
---|
1081 |
foreach(k,v;attr) |
---|
1082 |
{ |
---|
1083 |
s = k; |
---|
1084 |
try { checkName(s,t); } |
---|
1085 |
catch(Err e) |
---|
1086 |
{ assert(false,"Invalid atrribute name:" ~ e.toString); } |
---|
1087 |
} |
---|
1088 |
} |
---|
1089 |
|
---|
1090 |
/** |
---|
1091 |
* Constructs an instance of Tag with a specified name and type |
---|
1092 |
* |
---|
1093 |
* The constructor does not initialize the attributes. To initialize the |
---|
1094 |
* attributes, you access the $(B attr) member variable. |
---|
1095 |
* |
---|
1096 |
* Params: |
---|
1097 |
* name = the Tag's name |
---|
1098 |
* type = (optional) the Tag's type. If omitted, defaults to |
---|
1099 |
* TagType.START. |
---|
1100 |
* |
---|
1101 |
* Examples: |
---|
1102 |
* -------------- |
---|
1103 |
* auto tag = new Tag("img",Tag.EMPTY); |
---|
1104 |
* tag.attr["src"] = "http://example.com/example.jpg"; |
---|
1105 |
* -------------- |
---|
1106 |
*/ |
---|
1107 |
this(string name, TagType type=TagType.START) |
---|
1108 |
{ |
---|
1109 |
this.name = name; |
---|
1110 |
this.type = type; |
---|
1111 |
} |
---|
1112 |
|
---|
1113 |
/* Private constructor (so don't ddoc this!) |
---|
1114 |
* |
---|
1115 |
* Constructs a Tag by parsing the string representation, e.g. "<html>". |
---|
1116 |
* |
---|
1117 |
* The string is passed by reference, and is advanced over all characters |
---|
1118 |
* consumed. |
---|
1119 |
* |
---|
1120 |
* The second parameter is a dummy parameter only, required solely to |
---|
1121 |
* distinguish this constructor from the public one. |
---|
1122 |
*/ |
---|
1123 |
private this(ref string s, bool dummy) |
---|
1124 |
{ |
---|
1125 |
tagString = s; |
---|
1126 |
try |
---|
1127 |
{ |
---|
1128 |
reqc(s,'<'); |
---|
1129 |
if (optc(s,'/')) type = TagType.END; |
---|
1130 |
name = munch(s,"^/>"~whitespace); |
---|
1131 |
munch(s,whitespace); |
---|
1132 |
while(s.length > 0 && s[0] != '>' && s[0] != '/') |
---|
1133 |
{ |
---|
1134 |
string key = munch(s,"^="~whitespace); |
---|
1135 |
munch(s,whitespace); |
---|
1136 |
reqc(s,'='); |
---|
1137 |
munch(s,whitespace); |
---|
1138 |
reqc(s,'"'); |
---|
1139 |
string val = encodeStdEntity(munch(s,"^\"")); |
---|
1140 |
reqc(s,'"'); |
---|
1141 |
munch(s,whitespace); |
---|
1142 |
attr[key] = val; |
---|
1143 |
} |
---|
1144 |
if (optc(s,'/')) |
---|
1145 |
{ |
---|
1146 |
if (type == TagType.END) throw new TagException(""); |
---|
1147 |
type = TagType.EMPTY; |
---|
1148 |
} |
---|
1149 |
reqc(s,'>'); |
---|
1150 |
tagString.length = (s.ptr - tagString.ptr); |
---|
1151 |
} |
---|
1152 |
catch(XMLException e) |
---|
1153 |
{ |
---|
1154 |
tagString.length = (s.ptr - tagString.ptr); |
---|
1155 |
throw new TagException(tagString); |
---|
1156 |
} |
---|
1157 |
} |
---|
1158 |
|
---|
1159 |
|
---|
1160 |
/** |
---|
1161 |
* Compares two Tags for equality |
---|
1162 |
* |
---|
1163 |
* You should rarely need to call this function. It exists so that Tags |
---|
1164 |
* can be used as associative array keys. |
---|
1165 |
* |
---|
1166 |
* Examples: |
---|
1167 |
* -------------- |
---|
1168 |
* Tag tag1,tag2 |
---|
1169 |
* if (tag1 == tag2) { } |
---|
1170 |
* -------------- |
---|
1171 |
*/ |
---|
1172 |
override int opEquals(Object o) |
---|
1173 |
{ |
---|
1174 |
auto tag = toType!( Tag)(o); |
---|
1175 |
return |
---|
1176 |
(name != tag.name) ? false : ( |
---|
1177 |
(attr != tag.attr) ? false : ( |
---|
1178 |
(type != tag.type) ? false : ( |
---|
1179 |
true ))); |
---|
1180 |
} |
---|
1181 |
|
---|
1182 |
/** |
---|
1183 |
* Compares two Tags |
---|
1184 |
* |
---|
1185 |
* Examples: |
---|
1186 |
* -------------- |
---|
1187 |
* Tag tag1,tag2 |
---|
1188 |
* if (tag1 < tag2) { } |
---|
1189 |
* -------------- |
---|
1190 |
*/ |
---|
1191 |
override int opCmp(Object o) |
---|
1192 |
{ |
---|
1193 |
auto tag = toType!( Tag)(o); |
---|
1194 |
return |
---|
1195 |
((name != tag.name) ? ( name < tag.name ? -1 : 1 ) : |
---|
1196 |
((attr != tag.attr) ? ( attr < tag.attr ? -1 : 1 ) : |
---|
1197 |
((type != tag.type) ? ( type < tag.type ? -1 : 1 ) : |
---|
1198 |
0 ))); |
---|
1199 |
} |
---|
1200 |
|
---|
1201 |
/** |
---|
1202 |
* Returns the hash of a Tag |
---|
1203 |
* |
---|
1204 |
* You should rarely need to call this function. It exists so that Tags |
---|
1205 |
* can be used as associative array keys. |
---|
1206 |
*/ |
---|
1207 |
override hash_t toHash() |
---|
1208 |
{ |
---|
1209 |
hash_t hash = 0; |
---|
1210 |
foreach(dchar c;name) hash = hash * 11 + c; |
---|
1211 |
return hash; |
---|
1212 |
} |
---|
1213 |
|
---|
1214 |
/** |
---|
1215 |
* Returns the string representation of a Tag |
---|
1216 |
* |
---|
1217 |
* Examples: |
---|
1218 |
* -------------- |
---|
1219 |
* auto tag = new Tag("book",TagType.START); |
---|
1220 |
* writefln(tag.toString); // writes "<book>" |
---|
1221 |
* -------------- |
---|
1222 |
*/ |
---|
1223 |
override string toString() |
---|
1224 |
{ |
---|
1225 |
if (isEmpty) return toEmptyString(); |
---|
1226 |
return (isEnd) ? toEndString() : toStartString(); |
---|
1227 |
} |
---|
1228 |
|
---|
1229 |
private |
---|
1230 |
{ |
---|
1231 |
string toNonEndString() |
---|
1232 |
{ |
---|
1233 |
string s = "<" ~ name; |
---|
1234 |
foreach(key,val;attr) |
---|
1235 |
s ~= format(" %s=\"%s\"",key,decode(val,DecodeMode.LOOSE)); |
---|
1236 |
return s; |
---|
1237 |
} |
---|
1238 |
|
---|
1239 |
string toStartString() { return toNonEndString() ~ ">"; } |
---|
1240 |
|
---|
1241 |
string toEndString() { return "</" ~ name ~ ">"; } |
---|
1242 |
|
---|
1243 |
string toEmptyString() { return toNonEndString() ~ " />"; } |
---|
1244 |
} |
---|
1245 |
|
---|
1246 |
/** |
---|
1247 |
* Returns true if the Tag is a start tag |
---|
1248 |
* |
---|
1249 |
* Examples: |
---|
1250 |
* -------------- |
---|
1251 |
* if (tag.isStart) { } |
---|
1252 |
* -------------- |
---|
1253 |
*/ |
---|
1254 |
bool isStart() { return type == TagType.START; } |
---|
1255 |
|
---|
1256 |
/** |
---|
1257 |
* Returns true if the Tag is an end tag |
---|
1258 |
* |
---|
1259 |
* Examples: |
---|
1260 |
* -------------- |
---|
1261 |
* if (tag.isEnd) { } |
---|
1262 |
* -------------- |
---|
1263 |
*/ |
---|
1264 |
bool isEnd() { return type == TagType.END; } |
---|
1265 |
|
---|
1266 |
/** |
---|
1267 |
* Returns true if the Tag is an empty tag |
---|
1268 |
* |
---|
1269 |
* Examples: |
---|
1270 |
* -------------- |
---|
1271 |
* if (tag.isEmpty) { } |
---|
1272 |
* -------------- |
---|
1273 |
*/ |
---|
1274 |
bool isEmpty() { return type == TagType.EMPTY; } |
---|
1275 |
} |
---|
1276 |
|
---|
1277 |
|
---|
1278 |
/** |
---|
1279 |
* Class representing a comment |
---|
1280 |
*/ |
---|
1281 |
class Comment : Item |
---|
1282 |
{ |
---|
1283 |
private string content; |
---|
1284 |
|
---|
1285 |
/** |
---|
1286 |
* Construct a comment |
---|
1287 |
* |
---|
1288 |
* Params: |
---|
1289 |
* content = the body of the comment |
---|
1290 |
* |
---|
1291 |
* Throws: CommentException if the comment body is illegal (contains "--" |
---|
1292 |
* or exactly equals "-") |
---|
1293 |
* |
---|
1294 |
* Examples: |
---|
1295 |
* -------------- |
---|
1296 |
* auto item = new Comment("This is a comment"); |
---|
1297 |
* // constructs <!--This is a comment--> |
---|
1298 |
* -------------- |
---|
1299 |
*/ |
---|
1300 |
this(string content) |
---|
1301 |
{ |
---|
1302 |
if (content == "-" || content.indexOf("==") != -1) |
---|
1303 |
throw new CommentException(content); |
---|
1304 |
this.content = content; |
---|
1305 |
} |
---|
1306 |
|
---|
1307 |
/** |
---|
1308 |
* Compares two comments for equality |
---|
1309 |
* |
---|
1310 |
* Examples: |
---|
1311 |
* -------------- |
---|
1312 |
* Comment item1,item2; |
---|
1313 |
* if (item1 == item2) { } |
---|
1314 |
* -------------- |
---|
1315 |
*/ |
---|
1316 |
override int opEquals(Object o) |
---|
1317 |
{ |
---|
1318 |
auto item = toType!( Item)(o); |
---|
1319 |
auto t = cast(Comment)item; |
---|
1320 |
return t !is null && content == t.content; |
---|
1321 |
} |
---|
1322 |
|
---|
1323 |
/** |
---|
1324 |
* Compares two comments |
---|
1325 |
* |
---|
1326 |
* You should rarely need to call this function. It exists so that Comments |
---|
1327 |
* can be used as associative array keys. |
---|
1328 |
* |
---|
1329 |
* Examples: |
---|
1330 |
* -------------- |
---|
1331 |
* Comment item1,item2; |
---|
1332 |
* if (item1 < item2) { } |
---|
1333 |
* -------------- |
---|
1334 |
*/ |
---|
1335 |
override int opCmp(Object o) |
---|
1336 |
{ |
---|
1337 |
auto item = toType!( Item)(o); |
---|
1338 |
auto t = cast(Comment)item; |
---|
1339 |
return t !is null && (content != t.content |
---|
1340 |
? (content < t.content ? -1 : 1 ) : 0 ); |
---|
1341 |
} |
---|
1342 |
|
---|
1343 |
/** |
---|
1344 |
* Returns the hash of a Comment |
---|
1345 |
* |
---|
1346 |
* You should rarely need to call this function. It exists so that Comments |
---|
1347 |
* can be used as associative array keys. |
---|
1348 |
*/ |
---|
1349 |
override hash_t toHash() { return hash(content); } |
---|
1350 |
|
---|
1351 |
/** |
---|
1352 |
* Returns a string representation of this comment |
---|
1353 |
*/ |
---|
1354 |
override string toString() { return "<!--" ~ content ~ "-->"; } |
---|
1355 |
|
---|
1356 |
override bool isEmptyXML() { return false; } /// Returns false always |
---|
1357 |
} |
---|
1358 |
|
---|
1359 |
/** |
---|
1360 |
* Class representing a Character Data section |
---|
1361 |
*/ |
---|
1362 |
class CData : Item |
---|
1363 |
{ |
---|
1364 |
private string content; |
---|
1365 |
|
---|
1366 |
/** |
---|
1367 |
* Construct a chraracter data section |
---|
1368 |
* |
---|
1369 |
* Params: |
---|
1370 |
* content = the body of the character data segment |
---|
1371 |
* |
---|
1372 |
* Throws: CDataException if the segment body is illegal (contains "]]>") |
---|
1373 |
* |
---|
1374 |
* Examples: |
---|
1375 |
* -------------- |
---|
1376 |
* auto item = new CData("<b>hello</b>"); |
---|
1377 |
* // constructs <![CDATA[<b>hello</b>]]> |
---|
1378 |
* -------------- |
---|
1379 |
*/ |
---|
1380 |
this(string content) |
---|
1381 |
{ |
---|
1382 |
if (content.indexOf("]]>") != -1) throw new CDataException(content); |
---|
1383 |
this.content = content; |
---|
1384 |
} |
---|
1385 |
|
---|
1386 |
/** |
---|
1387 |
* Compares two CDatas for equality |
---|
1388 |
* |
---|
1389 |
* Examples: |
---|
1390 |
* -------------- |
---|
1391 |
* CData item1,item2; |
---|
1392 |
* if (item1 == item2) { } |
---|
1393 |
* -------------- |
---|
1394 |
*/ |
---|
1395 |
override int opEquals(Object o) |
---|
1396 |
{ |
---|
1397 |
auto item = toType!( Item)(o); |
---|
1398 |
auto t = cast(CData)item; |
---|
1399 |
return t !is null && content == t.content; |
---|
1400 |
} |
---|
1401 |
|
---|
1402 |
/** |
---|
1403 |
* Compares two CDatas |
---|
1404 |
* |
---|
1405 |
* You should rarely need to call this function. It exists so that CDatas |
---|
1406 |
* can be used as associative array keys. |
---|
1407 |
* |
---|
1408 |
* Examples: |
---|
1409 |
* -------------- |
---|
1410 |
* CData item1,item2; |
---|
1411 |
* if (item1 < item2) { } |
---|
1412 |
* -------------- |
---|
1413 |
*/ |
---|
1414 |
override int opCmp(Object o) |
---|
1415 |
{ |
---|
1416 |
auto item = toType!( Item)(o); |
---|
1417 |
auto t = cast(CData)item; |
---|
1418 |
return t !is null && (content != t.content |
---|
1419 |
? (content < t.content ? -1 : 1 ) : 0 ); |
---|
1420 |
} |
---|
1421 |
|
---|
1422 |
/** |
---|
1423 |
* Returns the hash of a CData |
---|
1424 |
* |
---|
1425 |
* You should rarely need to call this function. It exists so that CDatas |
---|
1426 |
* can be used as associative array keys. |
---|
1427 |
*/ |
---|
1428 |
override hash_t toHash() { return hash(content); } |
---|
1429 |
|
---|
1430 |
/** |
---|
1431 |
* Returns a string representation of this CData section |
---|
1432 |
*/ |
---|
1433 |
override string toString() { return cdata ~ content ~ "]]>"; } |
---|
1434 |
|
---|
1435 |
override bool isEmptyXML() { return false; } /// Returns false always |
---|
1436 |
} |
---|
1437 |
|
---|
1438 |
/** |
---|
1439 |
* Class representing a text (aka Parsed Character Data) section |
---|
1440 |
*/ |
---|
1441 |
class Text : Item |
---|
1442 |
{ |
---|
1443 |
private string content; |
---|
1444 |
|
---|
1445 |
/** |
---|
1446 |
* Construct a text (aka PCData) section |
---|
1447 |
* |
---|
1448 |
* Params: |
---|
1449 |
* content = the text. This function encodes the text before |
---|
1450 |
* insertion, so it is safe to insert any text |
---|
1451 |
* |
---|
1452 |
* Examples: |
---|
1453 |
* -------------- |
---|
1454 |
* auto Text = new CData("a < b"); |
---|
1455 |
* // constructs a < b |
---|
1456 |
* -------------- |
---|
1457 |
*/ |
---|
1458 |
this(string content) |
---|
1459 |
{ |
---|
1460 |
this.content = encodeStdEntity(content); |
---|
1461 |
} |
---|
1462 |
|
---|
1463 |
/** |
---|
1464 |
* Compares two text sections for equality |
---|
1465 |
* |
---|
1466 |
* Examples: |
---|
1467 |
* -------------- |
---|
1468 |
* Text item1,item2; |
---|
1469 |
* if (item1 == item2) { } |
---|
1470 |
* -------------- |
---|
1471 |
*/ |
---|
1472 |
override int opEquals(Object o) |
---|
1473 |
{ |
---|
1474 |
auto item = toType!( Item)(o); |
---|
1475 |
auto t = cast(Text)item; |
---|
1476 |
return t !is null && content == t.content; |
---|
1477 |
} |
---|
1478 |
|
---|
1479 |
/** |
---|
1480 |
* Compares two text sections |
---|
1481 |
* |
---|
1482 |
* You should rarely need to call this function. It exists so that Texts |
---|
1483 |
* can be used as associative array keys. |
---|
1484 |
* |
---|
1485 |
* Examples: |
---|
1486 |
* -------------- |
---|
1487 |
* Text item1,item2; |
---|
1488 |
* if (item1 < item2) { } |
---|
1489 |
* -------------- |
---|
1490 |
*/ |
---|
1491 |
override int opCmp(Object o) |
---|
1492 |
{ |
---|
1493 |
auto item = toType!( Item)(o); |
---|
1494 |
auto t = cast(Text)item; |
---|
1495 |
return t !is null |
---|
1496 |
&& (content != t.content ? (content < t.content ? -1 : 1 ) : 0 ); |
---|
1497 |
} |
---|
1498 |
|
---|
1499 |
/** |
---|
1500 |
* Returns the hash of a text section |
---|
1501 |
* |
---|
1502 |
* You should rarely need to call this function. It exists so that Texts |
---|
1503 |
* can be used as associative array keys. |
---|
1504 |
*/ |
---|
1505 |
override hash_t toHash() { return hash(content); } |
---|
1506 |
|
---|
1507 |
/** |
---|
1508 |
* Returns a string representation of this Text section |
---|
1509 |
*/ |
---|
1510 |
override string toString() { return content; } |
---|
1511 |
|
---|
1512 |
/** |
---|
1513 |
* Returns true if the content is the empty string |
---|
1514 |
*/ |
---|
1515 |
override bool isEmptyXML() { return content.length == 0; } |
---|
1516 |
} |
---|
1517 |
|
---|
1518 |
/** |
---|
1519 |
* Class representing an XML Instruction section |
---|
1520 |
*/ |
---|
1521 |
class XMLInstruction : Item |
---|
1522 |
{ |
---|
1523 |
private string content; |
---|
1524 |
|
---|
1525 |
/** |
---|
1526 |
* Construct an XML Instruction section |
---|
1527 |
* |
---|
1528 |
* Params: |
---|
1529 |
* content = the body of the instruction segment |
---|
1530 |
* |
---|
1531 |
* Throws: XIException if the segment body is illegal (contains ">") |
---|
1532 |
* |
---|
1533 |
* Examples: |
---|
1534 |
* -------------- |
---|
1535 |
* auto item = new XMLInstruction("ATTLIST"); |
---|
1536 |
* // constructs <!ATTLIST> |
---|
1537 |
* -------------- |
---|
1538 |
*/ |
---|
1539 |
this(string content) |
---|
1540 |
{ |
---|
1541 |
if (content.indexOf(">") != -1) throw new XIException(content); |
---|
1542 |
this.content = content; |
---|
1543 |
} |
---|
1544 |
|
---|
1545 |
/** |
---|
1546 |
* Compares two XML instructions for equality |
---|
1547 |
* |
---|
1548 |
* Examples: |
---|
1549 |
* -------------- |
---|
1550 |
* XMLInstruction item1,item2; |
---|
1551 |
* if (item1 == item2) { } |
---|
1552 |
* -------------- |
---|
1553 |
*/ |
---|
1554 |
override int opEquals(Object o) |
---|
1555 |
{ |
---|
1556 |
auto item = toType!( Item)(o); |
---|
1557 |
auto t = cast(XMLInstruction)item; |
---|
1558 |
return t !is null && content == t.content; |
---|
1559 |
} |
---|
1560 |
|
---|
1561 |
/** |
---|
1562 |
* Compares two XML instructions |
---|
1563 |
* |
---|
1564 |
* You should rarely need to call this function. It exists so that |
---|
1565 |
* XmlInstructions can be used as associative array keys. |
---|
1566 |
* |
---|
1567 |
* Examples: |
---|
1568 |
* -------------- |
---|
1569 |
* XMLInstruction item1,item2; |
---|
1570 |
* if (item1 < item2) { } |
---|
1571 |
* -------------- |
---|
1572 |
*/ |
---|
1573 |
override int opCmp(Object o) |
---|
1574 |
{ |
---|
1575 |
auto item = toType!( Item)(o); |
---|
1576 |
auto t = cast(XMLInstruction)item; |
---|
1577 |
return t !is null |
---|
1578 |
&& (content != t.content ? (content < t.content ? -1 : 1 ) : 0 ); |
---|
1579 |
} |
---|
1580 |
|
---|
1581 |
/** |
---|
1582 |
* Returns the hash of an XMLInstruction |
---|
1583 |
* |
---|
1584 |
* You should rarely need to call this function. It exists so that |
---|
1585 |
* XmlInstructions can be used as associative array keys. |
---|
1586 |
*/ |
---|
1587 |
override hash_t toHash() { return hash(content); } |
---|
1588 |
|
---|
1589 |
/** |
---|
1590 |
* Returns a string representation of this XmlInstruction |
---|
1591 |
*/ |
---|
1592 |
override string toString() { return "<!" ~ content ~ ">"; } |
---|
1593 |
|
---|
1594 |
override bool isEmptyXML() { return false; } /// Returns false always |
---|
1595 |
} |
---|
1596 |
|
---|
1597 |
/** |
---|
1598 |
* Class representing a Processing Instruction section |
---|
1599 |
*/ |
---|
1600 |
class ProcessingInstruction : Item |
---|
1601 |
{ |
---|
1602 |
private string content; |
---|
1603 |
|
---|
1604 |
/** |
---|
1605 |
* Construct a Processing Instruction section |
---|
1606 |
* |
---|
1607 |
* Params: |
---|
1608 |
* content = the body of the instruction segment |
---|
1609 |
* |
---|
1610 |
* Throws: PIException if the segment body is illegal (contains "?>") |
---|
1611 |
* |
---|
1612 |
* Examples: |
---|
1613 |
* -------------- |
---|
1614 |
* auto item = new ProcessingInstruction("php"); |
---|
1615 |
* // constructs <?php?> |
---|
1616 |
* -------------- |
---|
1617 |
*/ |
---|
1618 |
this(string content) |
---|
1619 |
{ |
---|
1620 |
if (content.indexOf("?>") != -1) throw new PIException(content); |
---|
1621 |
this.content = content; |
---|
1622 |
} |
---|
1623 |
|
---|
1624 |
/** |
---|
1625 |
* Compares two processing instructions for equality |
---|
1626 |
* |
---|
1627 |
* Examples: |
---|
1628 |
* -------------- |
---|
1629 |
* ProcessingInstruction item1,item2; |
---|
1630 |
* if (item1 == item2) { } |
---|
1631 |
* -------------- |
---|
1632 |
*/ |
---|
1633 |
override int opEquals(Object o) |
---|
1634 |
{ |
---|
1635 |
auto item = toType!( Item)(o); |
---|
1636 |
auto t = cast(ProcessingInstruction)item; |
---|
1637 |
return t !is null && content == t.content; |
---|
1638 |
} |
---|
1639 |
|
---|
1640 |
/** |
---|
1641 |
* Compares two processing instructions |
---|
1642 |
* |
---|
1643 |
* You should rarely need to call this function. It exists so that |
---|
1644 |
* ProcessingInstructions can be used as associative array keys. |
---|
1645 |
* |
---|
1646 |
* Examples: |
---|
1647 |
* -------------- |
---|
1648 |
* ProcessingInstruction item1,item2; |
---|
1649 |
* if (item1 < item2) { } |
---|
1650 |
* -------------- |
---|
1651 |
*/ |
---|
1652 |
override int opCmp(Object o) |
---|
1653 |
{ |
---|
1654 |
auto item = toType!( Item)(o); |
---|
1655 |
auto t = cast(ProcessingInstruction)item; |
---|
1656 |
return t !is null |
---|
1657 |
&& (content != t.content ? (content < t.content ? -1 : 1 ) : 0 ); |
---|
1658 |
} |
---|
1659 |
|
---|
1660 |
/** |
---|
1661 |
* Returns the hash of a ProcessingInstruction |
---|
1662 |
* |
---|
1663 |
* You should rarely need to call this function. It exists so that |
---|
1664 |
* ProcessingInstructions can be used as associative array keys. |
---|
1665 |
*/ |
---|
1666 |
override hash_t toHash() { return hash(content); } |
---|
1667 |
|
---|
1668 |
/** |
---|
1669 |
* Returns a string representation of this ProcessingInstruction |
---|
1670 |
|
---|
1671 |
*/ |
---|
1672 |
override string toString() { return "<?" ~ content ~ "?>"; } |
---|
1673 |
|
---|
1674 |
override bool isEmptyXML() { return false; } /// Returns false always |
---|
1675 |
} |
---|
1676 |
|
---|
1677 |
|
---|
1678 |
/** |
---|
1679 |
* Class for parsing an XML Document. |
---|
1680 |
* |
---|
1681 |
* This is a subclass of ElementParser. Most of the useful functions are |
---|
1682 |
* documented there. |
---|
1683 |
* |
---|
1684 |
* Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) |
---|
1685 |
* |
---|
1686 |
* Bugs: |
---|
1687 |
* Currently only supports UTF documents. |
---|
1688 |
* |
---|
1689 |
* If there is an encoding attribute in the prolog, it is ignored. |
---|
1690 |
* |
---|
1691 |
*/ |
---|
1692 |
class DocumentParser : ElementParser |
---|
1693 |
{ |
---|
1694 |
string xmlText; |
---|
1695 |
|
---|
1696 |
/** |
---|
1697 |
* Constructs a DocumentParser. |
---|
1698 |
* |
---|
1699 |
* The input to this function MUST be valid XML. |
---|
1700 |
* This is enforced by the function's in contract. |
---|
1701 |
* |
---|
1702 |
* Params: |
---|
1703 |
* xmltext = the entire XML document as text |
---|
1704 |
* |
---|
1705 |
*/ |
---|
1706 |
this(string xmlText_) |
---|
1707 |
in |
---|
1708 |
{ |
---|
1709 |
assert(xmlText_.length != 0); |
---|
1710 |
try |
---|
1711 |
{ |
---|
1712 |
// Confirm that the input is valid XML |
---|
1713 |
check(xmlText_); |
---|
1714 |
} |
---|
1715 |
catch (CheckException e) |
---|
1716 |
{ |
---|
1717 |
// And if it's not, tell the user why not |
---|
1718 |
assert(false, "\n" ~ e.toString()); |
---|
1719 |
} |
---|
1720 |
} |
---|
1721 |
body |
---|
1722 |
{ |
---|
1723 |
xmlText = xmlText_; |
---|
1724 |
s = &xmlText; |
---|
1725 |
super(); // Initialize everything |
---|
1726 |
parse(); // Parse through the root tag (but not beyond) |
---|
1727 |
} |
---|
1728 |
} |
---|
1729 |
|
---|
1730 |
/** |
---|
1731 |
* Class for parsing an XML element. |
---|
1732 |
* |
---|
1733 |
* Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) |
---|
1734 |
* |
---|
1735 |
* Note that you cannot construct instances of this class directly. You can |
---|
1736 |
* construct a DocumentParser (which is a subclass of ElementParser), but |
---|
1737 |
* otherwise, Instances of ElementParser will be created for you by the |
---|
1738 |
* library, and passed your way via onStartTag handlers. |
---|
1739 |
* |
---|
1740 |
*/ |
---|
1741 |
class ElementParser |
---|
1742 |
{ |
---|
1743 |
alias void delegate(string) Handler; |
---|
1744 |
alias void delegate(in Element element) ElementHandler; |
---|
1745 |
alias void delegate(ElementParser parser) ParserHandler; |
---|
1746 |
|
---|
1747 |
private |
---|
1748 |
{ |
---|
1749 |
Tag tag_; |
---|
1750 |
string elementStart; |
---|
1751 |
string* s; |
---|
1752 |
|
---|
1753 |
Handler commentHandler = null; |
---|
1754 |
Handler cdataHandler = null; |
---|
1755 |
Handler xiHandler = null; |
---|
1756 |
Handler piHandler = null; |
---|
1757 |
Handler rawTextHandler = null; |
---|
1758 |
Handler textHandler = null; |
---|
1759 |
|
---|
1760 |
// Private constructor for start tags |
---|
1761 |
this(ElementParser parent) |
---|
1762 |
{ |
---|
1763 |
s = parent.s; |
---|
1764 |
this(); |
---|
1765 |
tag_ = parent.tag_; |
---|
1766 |
} |
---|
1767 |
|
---|
1768 |
// Private constructor for empty tags |
---|
1769 |
this(Tag tag, string* t) |
---|
1770 |
{ |
---|
1771 |
s = t; |
---|
1772 |
this(); |
---|
1773 |
tag_ = tag; |
---|
1774 |
} |
---|
1775 |
} |
---|
1776 |
|
---|
1777 |
/** |
---|
1778 |
* The Tag at the start of the element being parsed. You can read this to |
---|
1779 |
* determine the tag's name and attributes. |
---|
1780 |
*/ |
---|
1781 |
Tag tag() { return tag_; } |
---|
1782 |
|
---|
1783 |
/** |
---|
1784 |
* Register a handler which will be called whenever a start tag is |
---|
1785 |
* encountered which matches the specified name. You can also pass null as |
---|
1786 |
* the name, in which case the handler will be called for any unmatched |
---|
1787 |
* start tag. |
---|
1788 |
* |
---|
1789 |
* Examples: |
---|
1790 |
* -------------- |
---|
1791 |
* // Call this function whenever a <podcast> start tag is encountered |
---|
1792 |
* onStartTag["podcast"] = (ElementParser xml) |
---|
1793 |
* { |
---|
1794 |
* // Your code here |
---|
1795 |
* // |
---|
1796 |
* // This is a a closure, so code here may reference |
---|
1797 |
* // variables which are outside of this scope |
---|
1798 |
* }; |
---|
1799 |
* |
---|
1800 |
* // call myEpisodeStartHandler (defined elsewhere) whenever an <episode> |
---|
1801 |
* // start tag is encountered |
---|
1802 |
* onStartTag["episode"] = &myEpisodeStartHandler; |
---|
1803 |
* |
---|
1804 |
* // call delegate dg for all other start tags |
---|
1805 |
* onStartTag[null] = dg; |
---|
1806 |
* -------------- |
---|
1807 |
* |
---|
1808 |
* This library will supply your function with a new instance of |
---|
1809 |
* ElementHandler, which may be used to parse inside the element whose |
---|
1810 |
* start tag was just found, or to identify the tag attributes of the |
---|
1811 |
* element, etc. |
---|
1812 |
* |
---|
1813 |
* Note that your function will be called for both start tags and empty |
---|
1814 |
* tags. That is, we make no distinction between <br></br> |
---|
1815 |
* and <br/>. |
---|
1816 |
*/ |
---|
1817 |
ParserHandler[string] onStartTag; |
---|
1818 |
|
---|
1819 |
/** |
---|
1820 |
* Register a handler which will be called whenever an end tag is |
---|
1821 |
* encountered which matches the specified name. You can also pass null as |
---|
1822 |
* the name, in which case the handler will be called for any unmatched |
---|
1823 |
* end tag. |
---|
1824 |
* |
---|
1825 |
* Examples: |
---|
1826 |
* -------------- |
---|
1827 |
* // Call this function whenever a </podcast> end tag is encountered |
---|
1828 |
* onEndTag["podcast"] = (in Element e) |
---|
1829 |
* { |
---|
1830 |
* // Your code here |
---|
1831 |
* // |
---|
1832 |
* // This is a a closure, so code here may reference |
---|
1833 |
* // variables which are outside of this scope |
---|
1834 |
* }; |
---|
1835 |
* |
---|
1836 |
* // call myEpisodeEndHandler (defined elsewhere) whenever an </episode> |
---|
1837 |
* // end tag is encountered |
---|
1838 |
* onEndTag["episode"] = &myEpisodeEndHandler; |
---|
1839 |
* |
---|
1840 |
* // call delegate dg for all other end tags |
---|
1841 |
* onEndTag[null] = dg; |
---|
1842 |
* -------------- |
---|
1843 |
* |
---|
1844 |
* Note that your function will be called for both start tags and empty |
---|
1845 |
* tags. That is, we make no distinction between <br></br> |
---|
1846 |
* and <br/>. |
---|
1847 |
*/ |
---|
1848 |
ElementHandler[string] onEndTag; |
---|
1849 |
|
---|
1850 |
protected this() |
---|
1851 |
{ |
---|
1852 |
elementStart = *s; |
---|
1853 |
} |
---|
1854 |
|
---|
1855 |
/** |
---|
1856 |
* Register a handler which will be called whenever text is encountered. |
---|
1857 |
* |
---|
1858 |
* Examples: |
---|
1859 |
* -------------- |
---|
1860 |
* // Call this function whenever text is encountered |
---|
1861 |
* onText = (string s) |
---|
1862 |
* { |
---|
1863 |
* // Your code here |
---|
1864 |
* |
---|
1865 |
* // The passed parameter s will have been decoded by the time you see |
---|
1866 |
* // it, and so may contain any character. |
---|
1867 |
* // |
---|
1868 |
* // This is a a closure, so code here may reference |
---|
1869 |
* // variables which are outside of this scope |
---|
1870 |
* }; |
---|
1871 |
* -------------- |
---|
1872 |
*/ |
---|
1873 |
void onText(Handler handler) { textHandler = handler; } |
---|
1874 |
|
---|
1875 |
/** |
---|
1876 |
* Register an alternative handler which will be called whenever text |
---|
1877 |
* is encountered. This differs from onText in that onText will decode |
---|
1878 |
* the text, wheras onTextRaw will not. This allows you to make design |
---|
1879 |
* choices, since onText will be more accurate, but slower, while |
---|
1880 |
* onTextRaw will be faster, but less accurate. Of course, you can |
---|
1881 |
* still call decode() within your handler, if you want, but you'd |
---|
1882 |
* probably want to use onTextRaw only in circumstances where you |
---|
1883 |
* know that decoding is unnecessary. |
---|
1884 |
* |
---|
1885 |
* Examples: |
---|
1886 |
* -------------- |
---|
1887 |
* // Call this function whenever text is encountered |
---|
1888 |
* onText = (string s) |
---|
1889 |
* { |
---|
1890 |
* // Your code here |
---|
1891 |
* |
---|
1892 |
* // The passed parameter s will NOT have been decoded. |
---|
1893 |
* // |
---|
1894 |
* // This is a a closure, so code here may reference |
---|
1895 |
* // variables which are outside of this scope |
---|
1896 |
* }; |
---|
1897 |
* -------------- |
---|
1898 |
*/ |
---|
1899 |
void onTextRaw(Handler handler) { rawTextHandler = handler; } |
---|
1900 |
|
---|
1901 |
/** |
---|
1902 |
* Register a handler which will be called whenever a character data |
---|
1903 |
* segement is encountered. |
---|
1904 |
* |
---|
1905 |
* Examples: |
---|
1906 |
* -------------- |
---|
1907 |
* // Call this function whenever a CData section is encountered |
---|
1908 |
* onCData = (string s) |
---|
1909 |
* { |
---|
1910 |
* // Your code here |
---|
1911 |
* |
---|
1912 |
* // The passed parameter s does not include the opening <![CDATA[ |
---|
1913 |
* // nor closing ]]> |
---|
1914 |
* // |
---|
1915 |
* // This is a a closure, so code here may reference |
---|
1916 |
* // variables which are outside of this scope |
---|
1917 |
* }; |
---|
1918 |
* -------------- |
---|
1919 |
*/ |
---|
1920 |
void onCData(Handler handler) { cdataHandler = handler; } |
---|
1921 |
|
---|
1922 |
/** |
---|
1923 |
* Register a handler which will be called whenever a comment is |
---|
1924 |
* encountered. |
---|
1925 |
* |
---|
1926 |
* Examples: |
---|
1927 |
* -------------- |
---|
1928 |
* // Call this function whenever a comment is encountered |
---|
1929 |
* onComment = (string s) |
---|
1930 |
* { |
---|
1931 |
* // Your code here |
---|
1932 |
* |
---|
1933 |
* // The passed parameter s does not include the opening <!-- nor |
---|
1934 |
* // closing --> |
---|
1935 |
* // |
---|
1936 |
* // This is a a closure, so code here may reference |
---|
1937 |
* // variables which are outside of this scope |
---|
1938 |
* }; |
---|
1939 |
* -------------- |
---|
1940 |
*/ |
---|
1941 |
void onComment(Handler handler) { commentHandler = handler; } |
---|
1942 |
|
---|
1943 |
/** |
---|
1944 |
* Register a handler which will be called whenever a processing |
---|
1945 |
* instruction is encountered. |
---|
1946 |
* |
---|
1947 |
* Examples: |
---|
1948 |
* -------------- |
---|
1949 |
* // Call this function whenever a processing instruction is encountered |
---|
1950 |
* onPI = (string s) |
---|
1951 |
* { |
---|
1952 |
* // Your code here |
---|
1953 |
* |
---|
1954 |
* // The passed parameter s does not include the opening <? nor |
---|
1955 |
* // closing ?> |
---|
1956 |
* // |
---|
1957 |
* // This is a a closure, so code here may reference |
---|
1958 |
* // variables which are outside of this scope |
---|
1959 |
* }; |
---|
1960 |
* -------------- |
---|
1961 |
*/ |
---|
1962 |
void onPI(Handler handler) { piHandler = handler; } |
---|
1963 |
|
---|
1964 |
/** |
---|
1965 |
* Register a handler which will be called whenever an XML instruction is |
---|
1966 |
* encountered. |
---|
1967 |
* |
---|
1968 |
* Examples: |
---|
1969 |
* -------------- |
---|
1970 |
* // Call this function whenever an XML instruction is encountered |
---|
1971 |
* // (Note: XML instructions may only occur preceeding the root tag of a |
---|
1972 |
* // document). |
---|
1973 |
* onPI = (string s) |
---|
1974 |
* { |
---|
1975 |
* // Your code here |
---|
1976 |
* |
---|
1977 |
* // The passed parameter s does not include the opening <! nor |
---|
1978 |
* // closing > |
---|
1979 |
* // |
---|
1980 |
* // This is a a closure, so code here may reference |
---|
1981 |
* // variables which are outside of this scope |
---|
1982 |
* }; |
---|
1983 |
* -------------- |
---|
1984 |
*/ |
---|
1985 |
void onXI(Handler handler) { xiHandler = handler; } |
---|
1986 |
|
---|
1987 |
/** |
---|
1988 |
* Parse an XML element. |
---|
1989 |
* |
---|
1990 |
* Parsing will continue until the end of the current element. Any items |
---|
1991 |
* encountered for which a handler has been registered will invoke that |
---|
1992 |
* handler. |
---|
1993 |
* |
---|
1994 |
* Throws: various kinds of XMLException |
---|
1995 |
*/ |
---|
1996 |
void parse() |
---|
1997 |
{ |
---|
1998 |
string t; |
---|
1999 |
Tag root = tag_; |
---|
2000 |
Tag[string] startTags; |
---|
2001 |
if (tag_ !is null) startTags[tag_.name] = tag_; |
---|
2002 |
|
---|
2003 |
while(s.length != 0) |
---|
2004 |
{ |
---|
2005 |
if (startsWith(*s,"<!--")) |
---|
2006 |
{ |
---|
2007 |
chop(*s,4); |
---|
2008 |
t = chop(*s,indexOf(*s,"-->")); |
---|
2009 |
if (commentHandler.funcptr !is null) commentHandler(t); |
---|
2010 |
chop(*s,3); |
---|
2011 |
} |
---|
2012 |
else if (startsWith(*s,"<![CDATA[")) |
---|
2013 |
{ |
---|
2014 |
chop(*s,9); |
---|
2015 |
t = chop(*s,indexOf(*s,"]]>")); |
---|
2016 |
if (cdataHandler.funcptr !is null) cdataHandler(t); |
---|
2017 |
chop(*s,3); |
---|
2018 |
} |
---|
2019 |
else if (startsWith(*s,"<!")) |
---|
2020 |
{ |
---|
2021 |
chop(*s,2); |
---|
2022 |
t = chop(*s,indexOf(*s,">")); |
---|
2023 |
if (xiHandler.funcptr !is null) xiHandler(t); |
---|
2024 |
chop(*s,1); |
---|
2025 |
} |
---|
2026 |
else if (startsWith(*s,"<?")) |
---|
2027 |
{ |
---|
2028 |
chop(*s,2); |
---|
2029 |
t = chop(*s,indexOf(*s,"?>")); |
---|
2030 |
if (piHandler.funcptr !is null) piHandler(t); |
---|
2031 |
chop(*s,2); |
---|
2032 |
} |
---|
2033 |
else if (startsWith(*s,"<")) |
---|
2034 |
{ |
---|
2035 |
tag_ = new Tag(*s,true); |
---|
2036 |
if (root is null) |
---|
2037 |
return; // Return to constructor of derived class |
---|
2038 |
|
---|
2039 |
if (tag_.isStart) |
---|
2040 |
{ |
---|
2041 |
startTags[tag_.name] = tag_; |
---|
2042 |
|
---|
2043 |
auto parser = new ElementParser(this); |
---|
2044 |
|
---|
2045 |
auto handler = tag_.name in onStartTag; |
---|
2046 |
if (handler !is null) (*handler)(parser); |
---|
2047 |
else |
---|
2048 |
{ |
---|
2049 |
handler = null in onStartTag; |
---|
2050 |
if (handler !is null) (*handler)(parser); |
---|
2051 |
} |
---|
2052 |
} |
---|
2053 |
else if (tag_.isEnd) |
---|
2054 |
{ |
---|
2055 |
auto startTag = startTags[tag_.name]; |
---|
2056 |
string text; |
---|
2057 |
|
---|
2058 |
char* p = startTag.tagString.ptr |
---|
2059 |
+ startTag.tagString.length; |
---|
2060 |
char* q = tag_.tagString.ptr; |
---|
2061 |
text = p[0..(q-p)]; |
---|
2062 |
|
---|
2063 |
auto element = new Element(startTag); |
---|
2064 |
if (text.length != 0) element ~= new Text(text); |
---|
2065 |
|
---|
2066 |
auto handler = tag_.name in onEndTag; |
---|
2067 |
if (handler !is null) (*handler)(element); |
---|
2068 |
else |
---|
2069 |
{ |
---|
2070 |
handler = null in onEndTag; |
---|
2071 |
if (handler !is null) (*handler)(element); |
---|
2072 |
} |
---|
2073 |
|
---|
2074 |
if (tag_.name == root.name) return; |
---|
2075 |
} |
---|
2076 |
else if (tag_.isEmpty) |
---|
2077 |
{ |
---|
2078 |
Tag startTag = new Tag(tag_.name); |
---|
2079 |
|
---|
2080 |
// Handle the pretend start tag |
---|
2081 |
string s2; |
---|
2082 |
auto parser = new ElementParser(startTag,&s2); |
---|
2083 |
auto handler1 = startTag.name in onStartTag; |
---|
2084 |
if (handler1 !is null) (*handler1)(parser); |
---|
2085 |
else |
---|
2086 |
{ |
---|
2087 |
handler1 = null in onStartTag; |
---|
2088 |
if (handler1 !is null) (*handler1)(parser); |
---|
2089 |
} |
---|
2090 |
|
---|
2091 |
// Handle the pretend end tag |
---|
2092 |
auto element = new Element(startTag); |
---|
2093 |
auto handler2 = tag_.name in onEndTag; |
---|
2094 |
if (handler2 !is null) (*handler2)(element); |
---|
2095 |
else |
---|
2096 |
{ |
---|
2097 |
handler2 = null in onEndTag; |
---|
2098 |
if (handler2 !is null) (*handler2)(element); |
---|
2099 |
} |
---|
2100 |
} |
---|
2101 |
} |
---|
2102 |
else |
---|
2103 |
{ |
---|
2104 |
t = chop(*s,indexOf(*s,"<")); |
---|
2105 |
if (rawTextHandler.funcptr !is null) |
---|
2106 |
rawTextHandler(t); |
---|
2107 |
else if (textHandler.funcptr !is null) |
---|
2108 |
textHandler(decode(t,DecodeMode.LOOSE)); |
---|
2109 |
} |
---|
2110 |
} |
---|
2111 |
} |
---|
2112 |
|
---|
2113 |
/** |
---|
2114 |
* Returns that part of the element which has already been parsed |
---|
2115 |
*/ |
---|
2116 |
override string toString() |
---|
2117 |
{ |
---|
2118 |
int n = elementStart.length - s.length; |
---|
2119 |
return elementStart[0..n]; |
---|
2120 |
} |
---|
2121 |
|
---|
2122 |
} |
---|
2123 |
|
---|
2124 |
private |
---|
2125 |
{ |
---|
2126 |
template Check(string msg) |
---|
2127 |
{ |
---|
2128 |
string old = s; |
---|
2129 |
|
---|
2130 |
void fail() |
---|
2131 |
{ |
---|
2132 |
s = old; |
---|
2133 |
throw new Err(s,msg); |
---|
2134 |
} |
---|
2135 |
|
---|
2136 |
void fail(Err e) |
---|
2137 |
{ |
---|
2138 |
s = old; |
---|
2139 |
throw new Err(s,msg,e); |
---|
2140 |
} |
---|
2141 |
|
---|
2142 |
void fail(string msg2) |
---|
2143 |
{ |
---|
2144 |
fail(new Err(s,msg2)); |
---|
2145 |
} |
---|
2146 |
} |
---|
2147 |
|
---|
2148 |
void checkMisc(ref string s) // rule 27 |
---|
2149 |
{ |
---|
2150 |
mixin Check!("Misc"); |
---|
2151 |
|
---|
2152 |
try |
---|
2153 |
{ |
---|
2154 |
if (s.startsWith("<!--")) { checkComment(s); } |
---|
2155 |
else if (s.startsWith("<?")) { checkPI(s); } |
---|
2156 |
else { checkSpace(s); } |
---|
2157 |
} |
---|
2158 |
catch(Err e) { fail(e); } |
---|
2159 |
} |
---|
2160 |
|
---|
2161 |
void checkDocument(ref string s) // rule 1 |
---|
2162 |
{ |
---|
2163 |
mixin Check!("Document"); |
---|
2164 |
try |
---|
2165 |
{ |
---|
2166 |
checkProlog(s); |
---|
2167 |
checkElement(s); |
---|
2168 |
star!(checkMisc)(s); |
---|
2169 |
} |
---|
2170 |
catch(Err e) { fail(e); } |
---|
2171 |
} |
---|
2172 |
|
---|
2173 |
void checkChars(ref string s) // rule 2 |
---|
2174 |
{ |
---|
2175 |
// TO DO - Fix std.utf stride and decode functions, then use those |
---|
2176 |
// instead |
---|
2177 |
|
---|
2178 |
mixin Check!("Chars"); |
---|
2179 |
|
---|
2180 |
dchar c; |
---|
2181 |
int n = -1; |
---|
2182 |
foreach(int i,dchar d; s) |
---|
2183 |
{ |
---|
2184 |
if (!isChar(d)) |
---|
2185 |
{ |
---|
2186 |
c = d; |
---|
2187 |
n = i; |
---|
2188 |
break; |
---|
2189 |
} |
---|
2190 |
} |
---|
2191 |
if (n != -1) |
---|
2192 |
{ |
---|
2193 |
s = s[n..$]; |
---|
2194 |
fail(format("invalid character: U+%04X",c)); |
---|
2195 |
} |
---|
2196 |
} |
---|
2197 |
|
---|
2198 |
void checkSpace(ref string s) // rule 3 |
---|
2199 |
{ |
---|
2200 |
mixin Check!("Whitespace"); |
---|
2201 |
munch(s,"\u0020\u0009\u000A\u000D"); |
---|
2202 |
if (s is old) fail(); |
---|
2203 |
} |
---|
2204 |
|
---|
2205 |
void checkName(ref string s, out string name) // rule 5 |
---|
2206 |
{ |
---|
2207 |
mixin Check!("Name"); |
---|
2208 |
|
---|
2209 |
if (s.length == 0) fail(); |
---|
2210 |
int n; |
---|
2211 |
foreach(int i,dchar c;s) |
---|
2212 |
{ |
---|
2213 |
if (c == '_' || c == ':' || isLetter(c)) continue; |
---|
2214 |
if (i == 0) fail(); |
---|
2215 |
if (c == '-' || c == '.' || isDigit(c) |
---|
2216 |
|| isCombiningChar(c) || isExtender(c)) continue; |
---|
2217 |
n = i; |
---|
2218 |
break; |
---|
2219 |
} |
---|
2220 |
name = s[0..n]; |
---|
2221 |
s = s[n..$]; |
---|
2222 |
} |
---|
2223 |
|
---|
2224 |
void checkAttValue(ref string s) // rule 10 |
---|
2225 |
{ |
---|
2226 |
mixin Check!("AttValue"); |
---|
2227 |
|
---|
2228 |
if (s.length == 0) fail(); |
---|
2229 |
char c = s[0]; |
---|
2230 |
if (c != '\u0022' && c != '\u0027') |
---|
2231 |
fail("attribute value requires quotes"); |
---|
2232 |
s = s[1..$]; |
---|
2233 |
for(;;) |
---|
2234 |
{ |
---|
2235 |
munch(s,"^<&"~c); |
---|
2236 |
if (s.length == 0) fail("unterminated attribute value"); |
---|
2237 |
if (s[0] == '<') fail("< found in attribute value"); |
---|
2238 |
if (s[0] == c) break; |
---|
2239 |
try { checkReference(s); } catch(Err e) { fail(e); } |
---|
2240 |
} |
---|
2241 |
s = s[1..$]; |
---|
2242 |
} |
---|
2243 |
|
---|
2244 |
void checkCharData(ref string s) // rule 14 |
---|
2245 |
{ |
---|
2246 |
mixin Check!("CharData"); |
---|
2247 |
|
---|
2248 |
while (s.length != 0) |
---|
2249 |
{ |
---|
2250 |
if (s.startsWith("&")) break; |
---|
2251 |
if (s.startsWith("<")) break; |
---|
2252 |
if (s.startsWith("]]>")) fail("]]> found within char data"); |
---|
2253 |
s = s[1..$]; |
---|
2254 |
} |
---|
2255 |
} |
---|
2256 |
|
---|
2257 |
void checkComment(ref string s) // rule 15 |
---|
2258 |
{ |
---|
2259 |
mixin Check!("Comment"); |
---|
2260 |
|
---|
2261 |
try { checkLiteral("<!--",s); } catch(Err e) { fail(e); } |
---|
2262 |
int n = s.indexOf("--"); |
---|
2263 |
if (n == -1) fail("unterminated comment"); |
---|
2264 |
s = s[0..n]; |
---|
2265 |
try { checkLiteral("-->",s); } catch(Err e) { fail(e); } |
---|
2266 |
} |
---|
2267 |
|
---|
2268 |
void checkPI(ref string s) // rule 16 |
---|
2269 |
{ |
---|
2270 |
mixin Check!("PI"); |
---|
2271 |
|
---|
2272 |
try |
---|
2273 |
{ |
---|
2274 |
checkLiteral("<?",s); |
---|
2275 |
checkEnd("?>",s); |
---|
2276 |
} |
---|
2277 |
catch(Err e) { fail(e); } |
---|
2278 |
} |
---|
2279 |
|
---|
2280 |
void checkCDSect(ref string s) // rule 18 |
---|
2281 |
{ |
---|
2282 |
mixin Check!("CDSect"); |
---|
2283 |
|
---|
2284 |
try |
---|
2285 |
{ |
---|
2286 |
checkLiteral(cdata,s); |
---|
2287 |
checkEnd("]]>",s); |
---|
2288 |
} |
---|
2289 |
catch(Err e) { fail(e); } |
---|
2290 |
} |
---|
2291 |
|
---|
2292 |
void checkProlog(ref string s) // rule 22 |
---|
2293 |
{ |
---|
2294 |
mixin Check!("Prolog"); |
---|
2295 |
|
---|
2296 |
try |
---|
2297 |
{ |
---|
2298 |
checkXMLDecl(s); |
---|
2299 |
star!(checkMisc)(s); |
---|
2300 |
opt!(seq!(checkDocTypeDecl,star!(checkMisc)))(s); |
---|
2301 |
} |
---|
2302 |
catch(Err e) { fail(e); } |
---|
2303 |
} |
---|
2304 |
|
---|
2305 |
void checkXMLDecl(ref string s) // rule 23 |
---|
2306 |
{ |
---|
2307 |
mixin Check!("XMLDecl"); |
---|
2308 |
|
---|
2309 |
try |
---|
2310 |
{ |
---|
2311 |
checkLiteral("<?xml",s); |
---|
2312 |
checkVersionInfo(s); |
---|
2313 |
opt!(checkEncodingDecl)(s); |
---|
2314 |
opt!(checkSDDecl)(s); |
---|
2315 |
opt!(checkSpace)(s); |
---|
2316 |
checkLiteral("?>",s); |
---|
2317 |
} |
---|
2318 |
catch(Err e) { fail(e); } |
---|
2319 |
} |
---|
2320 |
|
---|
2321 |
void checkVersionInfo(ref string s) // rule 24 |
---|
2322 |
{ |
---|
2323 |
mixin Check!("VersionInfo"); |
---|
2324 |
|
---|
2325 |
try |
---|
2326 |
{ |
---|
2327 |
checkSpace(s); |
---|
2328 |
checkLiteral("version",s); |
---|
2329 |
checkEq(s); |
---|
2330 |
quoted!(checkVersionNum)(s); |
---|
2331 |
} |
---|
2332 |
catch(Err e) { fail(e); } |
---|
2333 |
} |
---|
2334 |
|
---|
2335 |
void checkEq(ref string s) // rule 25 |
---|
2336 |
{ |
---|
2337 |
mixin Check!("Eq"); |
---|
2338 |
|
---|
2339 |
try |
---|
2340 |
{ |
---|
2341 |
opt!(checkSpace)(s); |
---|
2342 |
checkLiteral("=",s); |
---|
2343 |
opt!(checkSpace)(s); |
---|
2344 |
} |
---|
2345 |
catch(Err e) { fail(e); } |
---|
2346 |
} |
---|
2347 |
|
---|
2348 |
void checkVersionNum(ref string s) // rule 26 |
---|
2349 |
{ |
---|
2350 |
mixin Check!("VersionNum"); |
---|
2351 |
|
---|
2352 |
munch(s,"a-zA-Z0-9_.:-"); |
---|
2353 |
if (s is old) fail(); |
---|
2354 |
} |
---|
2355 |
|
---|
2356 |
void checkDocTypeDecl(ref string s) // rule 28 |
---|
2357 |
{ |
---|
2358 |
mixin Check!("DocTypeDecl"); |
---|
2359 |
|
---|
2360 |
try |
---|
2361 |
{ |
---|
2362 |
checkLiteral("<!DOCTYPE",s); |
---|
2363 |
// |
---|
2364 |
// TO DO -- ensure DOCTYPE is well formed |
---|
2365 |
// (But not yet. That's one of our "future directions") |
---|
2366 |
// |
---|
2367 |
checkEnd(">",s); |
---|
2368 |
} |
---|
2369 |
catch(Err e) { fail(e); } |
---|
2370 |
} |
---|
2371 |
|
---|
2372 |
void checkSDDecl(ref string s) // rule 32 |
---|
2373 |
{ |
---|
2374 |
mixin Check!("SDDecl"); |
---|
2375 |
|
---|
2376 |
try |
---|
2377 |
{ |
---|
2378 |
checkSpace(s); |
---|
2379 |
checkLiteral("standalone",s); |
---|
2380 |
checkEq(s); |
---|
2381 |
} |
---|
2382 |
catch(Err e) { fail(e); } |
---|
2383 |
|
---|
2384 |
int n = 0; |
---|
2385 |
if (s.startsWith("'yes'") || s.startsWith("\"yes\"")) n = 5; |
---|
2386 |
else if (s.startsWith("'no'" ) || s.startsWith("\"no\"" )) n = 4; |
---|
2387 |
else fail("standalone attribute value must be 'yes', \"yes\"," |
---|
2388 |
" 'no' or \"no\""); |
---|
2389 |
s = s[n..$]; |
---|
2390 |
} |
---|
2391 |
|
---|
2392 |
void checkElement(ref string s) // rule 39 |
---|
2393 |
{ |
---|
2394 |
mixin Check!("Element"); |
---|
2395 |
|
---|
2396 |
string sname,ename,t; |
---|
2397 |
try { checkTag(s,t,sname); } catch(Err e) { fail(e); } |
---|
2398 |
|
---|
2399 |
if (t == "STag") |
---|
2400 |
{ |
---|
2401 |
try |
---|
2402 |
{ |
---|
2403 |
checkContent(s); |
---|
2404 |
t = s; |
---|
2405 |
checkETag(s,ename); |
---|
2406 |
} |
---|
2407 |
catch(Err e) { fail(e); } |
---|
2408 |
|
---|
2409 |
if (sname != ename) |
---|
2410 |
{ |
---|
2411 |
s = t; |
---|
2412 |
fail("end tag name \"" ~ ename |
---|
2413 |
~ "\" differs from start tag name \""~sname~"\""); |
---|
2414 |
} |
---|
2415 |
} |
---|
2416 |
} |
---|
2417 |
|
---|
2418 |
// rules 40 and 44 |
---|
2419 |
void checkTag(ref string s, out string type, out string name) |
---|
2420 |
{ |
---|
2421 |
mixin Check!("Tag"); |
---|
2422 |
|
---|
2423 |
try |
---|
2424 |
{ |
---|
2425 |
type = "STag"; |
---|
2426 |
checkLiteral("<",s); |
---|
2427 |
checkName(s,name); |
---|
2428 |
star!(seq!(checkSpace,checkAttribute))(s); |
---|
2429 |
opt!(checkSpace)(s); |
---|
2430 |
if (s.length != 0 && s[0] == '/') |
---|
2431 |
{ |
---|
2432 |
s = s[1..$]; |
---|
2433 |
type = "ETag"; |
---|
2434 |
} |
---|
2435 |
checkLiteral(">",s); |
---|
2436 |
} |
---|
2437 |
catch(Err e) { fail(e); } |
---|
2438 |
} |
---|
2439 |
|
---|
2440 |
void checkAttribute(ref string s) // rule 41 |
---|
2441 |
{ |
---|
2442 |
mixin Check!("Attribute"); |
---|
2443 |
|
---|
2444 |
try |
---|
2445 |
{ |
---|
2446 |
string name; |
---|
2447 |
checkName(s,name); |
---|
2448 |
checkEq(s); |
---|
2449 |
checkAttValue(s); |
---|
2450 |
} |
---|
2451 |
catch(Err e) { fail(e); } |
---|
2452 |
} |
---|
2453 |
|
---|
2454 |
void checkETag(ref string s, out string name) // rule 42 |
---|
2455 |
{ |
---|
2456 |
mixin Check!("ETag"); |
---|
2457 |
|
---|
2458 |
try |
---|
2459 |
{ |
---|
2460 |
checkLiteral("</",s); |
---|
2461 |
checkName(s,name); |
---|
2462 |
opt!(checkSpace)(s); |
---|
2463 |
checkLiteral(">",s); |
---|
2464 |
} |
---|
2465 |
catch(Err e) { fail(e); } |
---|
2466 |
} |
---|
2467 |
|
---|
2468 |
void checkContent(ref string s) // rule 43 |
---|
2469 |
{ |
---|
2470 |
mixin Check!("Content"); |
---|
2471 |
|
---|
2472 |
try |
---|
2473 |
{ |
---|
2474 |
while (s.length != 0) |
---|
2475 |
{ |
---|
2476 |
old = s; |
---|
2477 |
if (s.startsWith("&")) { checkReference(s); } |
---|
2478 |
else if (s.startsWith("<!--")) { checkComment(s); } |
---|
2479 |
else if (s.startsWith("<?")) { checkPI(s); } |
---|
2480 |
else if (s.startsWith(cdata)) { checkCDSect(s); } |
---|
2481 |
else if (s.startsWith("</")) { break; } |
---|
2482 |
else if (s.startsWith("<")) { checkElement(s); } |
---|
2483 |
else { checkCharData(s); } |
---|
2484 |
} |
---|
2485 |
} |
---|
2486 |
catch(Err e) { fail(e); } |
---|
2487 |
} |
---|
2488 |
|
---|
2489 |
void checkCharRef(ref string s, out dchar c) // rule 66 |
---|
2490 |
{ |
---|
2491 |
mixin Check!("CharRef"); |
---|
2492 |
|
---|
2493 |
c = 0; |
---|
2494 |
try { checkLiteral("&#",s); } catch(Err e) { fail(e); } |
---|
2495 |
int radix = 10; |
---|
2496 |
if (s.length != 0 && s[0] == 'x') |
---|
2497 |
{ |
---|
2498 |
s = s[1..$]; |
---|
2499 |
radix = 16; |
---|
2500 |
} |
---|
2501 |
if (s.length == 0) fail("unterminated character reference"); |
---|
2502 |
if (s[0] == ';') |
---|
2503 |
fail("character reference must have at least one digit"); |
---|
2504 |
while (s.length != 0) |
---|
2505 |
{ |
---|
2506 |
char d = s[0]; |
---|
2507 |
int n = 0; |
---|
2508 |
switch(d) |
---|
2509 |
{ |
---|
2510 |
case 'F','f': ++n; |
---|
2511 |
case 'E','e': ++n; |
---|
2512 |
case 'D','d': ++n; |
---|
2513 |
case 'C','c': ++n; |
---|
2514 |
case 'B','b': ++n; |
---|
2515 |
case 'A','a': ++n; |
---|
2516 |
case '9': ++n; |
---|
2517 |
case '8': ++n; |
---|
2518 |
case '7': ++n; |
---|
2519 |
case '6': ++n; |
---|
2520 |
case '5': ++n; |
---|
2521 |
case '4': ++n; |
---|
2522 |
case '3': ++n; |
---|
2523 |
case '2': ++n; |
---|
2524 |
case '1': ++n; |
---|
2525 |
case '0': break; |
---|
2526 |
default: n = 100; break; |
---|
2527 |
} |
---|
2528 |
if (n >= radix) break; |
---|
2529 |
c *= radix; |
---|
2530 |
c += n; |
---|
2531 |
s = s[1..$]; |
---|
2532 |
} |
---|
2533 |
if (!isChar(c)) fail(format("U+%04X is not a legal character",c)); |
---|
2534 |
if (s.length == 0 || s[0] != ';') fail("expected ;"); |
---|
2535 |
else s = s[1..$]; |
---|
2536 |
} |
---|
2537 |
|
---|
2538 |
void checkReference(ref string s) // rule 67 |
---|
2539 |
{ |
---|
2540 |
mixin Check!("Reference"); |
---|
2541 |
|
---|
2542 |
try |
---|
2543 |
{ |
---|
2544 |
dchar c; |
---|
2545 |
if (s.startsWith("&#")) checkCharRef(s,c); |
---|
2546 |
else checkEntityRef(s); |
---|
2547 |
} |
---|
2548 |
catch(Err e) { fail(e); } |
---|
2549 |
} |
---|
2550 |
|
---|
2551 |
void checkEntityRef(ref string s) // rule 68 |
---|
2552 |
{ |
---|
2553 |
mixin Check!("EntityRef"); |
---|
2554 |
|
---|
2555 |
try |
---|
2556 |
{ |
---|
2557 |
string name; |
---|
2558 |
checkLiteral("&",s); |
---|
2559 |
checkName(s,name); |
---|
2560 |
checkLiteral(";",s); |
---|
2561 |
} |
---|
2562 |
catch(Err e) { fail(e); } |
---|
2563 |
} |
---|
2564 |
|
---|
2565 |
void checkEncName(ref string s) // rule 81 |
---|
2566 |
{ |
---|
2567 |
mixin Check!("EncName"); |
---|
2568 |
|
---|
2569 |
munch(s,"a-zA-Z"); |
---|
2570 |
if (s is old) fail(); |
---|
2571 |
munch(s,"a-zA-Z0-9_.-"); |
---|
2572 |
} |
---|
2573 |
|
---|
2574 |
void checkEncodingDecl(ref string s) // rule 80 |
---|
2575 |
{ |
---|
2576 |
mixin Check!("EncodingDecl"); |
---|
2577 |
|
---|
2578 |
try |
---|
2579 |
{ |
---|
2580 |
checkSpace(s); |
---|
2581 |
checkLiteral("encoding",s); |
---|
2582 |
checkEq(s); |
---|
2583 |
quoted!(checkEncName)(s); |
---|
2584 |
} |
---|
2585 |
catch(Err e) { fail(e); } |
---|
2586 |
} |
---|
2587 |
|
---|
2588 |
// Helper functions |
---|
2589 |
|
---|
2590 |
void checkLiteral(string literal,ref string s) |
---|
2591 |
{ |
---|
2592 |
mixin Check!("Literal"); |
---|
2593 |
|
---|
2594 |
if (!s.startsWith(literal)) fail("Expected literal \""~literal~"\""); |
---|
2595 |
s = s[literal.length..$]; |
---|
2596 |
} |
---|
2597 |
|
---|
2598 |
void checkEnd(string end,ref string s) |
---|
2599 |
{ |
---|
2600 |
// Deliberately no mixin Check here. |
---|
2601 |
|
---|
2602 |
int n = s.indexOf(end); |
---|
2603 |
if (n == -1) throw new Err(s,"Unable to find terminating \""~end~"\""); |
---|
2604 |
s = s[n..$]; |
---|
2605 |
checkLiteral(end,s); |
---|
2606 |
} |
---|
2607 |
|
---|
2608 |
// Metafunctions -- none of these use mixin Check |
---|
2609 |
|
---|
2610 |
void opt(alias f)(ref string s) |
---|
2611 |
{ |
---|
2612 |
try { f(s); } catch(Err e) {} |
---|
2613 |
} |
---|
2614 |
|
---|
2615 |
void plus(alias f)(ref string s) |
---|
2616 |
{ |
---|
2617 |
f(s); |
---|
2618 |
star!(f)(s); |
---|
2619 |
} |
---|
2620 |
|
---|
2621 |
void star(alias f)(ref string s) |
---|
2622 |
{ |
---|
2623 |
while (s.length != 0) |
---|
2624 |
{ |
---|
2625 |
try { f(s); } |
---|
2626 |
catch(Err e) { return; } |
---|
2627 |
} |
---|
2628 |
} |
---|
2629 |
|
---|
2630 |
void quoted(alias f)(ref string s) |
---|
2631 |
{ |
---|
2632 |
if (s.startsWith("'")) |
---|
2633 |
{ |
---|
2634 |
checkLiteral("'",s); |
---|
2635 |
f(s); |
---|
2636 |
checkLiteral("'",s); |
---|
2637 |
} |
---|
2638 |
else |
---|
2639 |
{ |
---|
2640 |
checkLiteral("\"",s); |
---|
2641 |
f(s); |
---|
2642 |
checkLiteral("\"",s); |
---|
2643 |
} |
---|
2644 |
} |
---|
2645 |
|
---|
2646 |
void seq(alias f,alias g)(ref string s) |
---|
2647 |
{ |
---|
2648 |
f(s); |
---|
2649 |
g(s); |
---|
2650 |
} |
---|
2651 |
} |
---|
2652 |
|
---|
2653 |
/** |
---|
2654 |
* Check an entire XML document for well-formedness |
---|
2655 |
* |
---|
2656 |
* Params: |
---|
2657 |
* s = the document to be checked, passed as a string |
---|
2658 |
* |
---|
2659 |
* Throws: CheckException if the document is not well formed |
---|
2660 |
* |
---|
2661 |
* CheckException's toString() method will yield the complete heirarchy of |
---|
2662 |
* parse failure (the XML equivalent of a stack trace), giving the line and |
---|
2663 |
* column number of every failure at every level. |
---|
2664 |
*/ |
---|
2665 |
void check(string s) |
---|
2666 |
{ |
---|
2667 |
try |
---|
2668 |
{ |
---|
2669 |
checkChars(s); |
---|
2670 |
checkDocument(s); |
---|
2671 |
if (s.length != 0) throw new Err(s,"Junk found after document"); |
---|
2672 |
} |
---|
2673 |
catch(Err e) |
---|
2674 |
{ |
---|
2675 |
e.complete(s); |
---|
2676 |
throw e; |
---|
2677 |
} |
---|
2678 |
} |
---|
2679 |
|
---|
2680 |
unittest |
---|
2681 |
{ |
---|
2682 |
try |
---|
2683 |
{ |
---|
2684 |
check(`<?xml version="1.0"?> |
---|
2685 |
<catalog> |
---|
2686 |
<book id="bk101"> |
---|
2687 |
<author>Gambardella, Matthew</author> |
---|
2688 |
<title>XML Developer's Guide</title> |
---|
2689 |
<genre>Computer</genre> |
---|
2690 |
<price>44.95</price> |
---|
2691 |
<publish_date>2000-10-01</publish_date> |
---|
2692 |
<description>An in-depth look at creating applications |
---|
2693 |
with XML.</description> |
---|
2694 |
</book> |
---|
2695 |
<book id="bk102"> |
---|
2696 |
<author>Ralls, Kim</author> |
---|
2697 |
<title>Midnight Rain</title> |
---|
2698 |
<genre>Fantasy</genres> |
---|
2699 |
<price>5.95</price> |
---|
2700 |
<publish_date>2000-12-16</publish_date> |
---|
2701 |
<description>A former architect battles corporate zombies, |
---|
2702 |
an evil sorceress, and her own childhood to become queen |
---|
2703 |
of the world.</description> |
---|
2704 |
</book> |
---|
2705 |
<book id="bk103"> |
---|
2706 |
<author>Corets, Eva</author> |
---|
2707 |
<title>Maeve Ascendant</title> |
---|
2708 |
<genre>Fantasy</genre> |
---|
2709 |
<price>5.95</price> |
---|
2710 |
<publish_date>2000-11-17</publish_date> |
---|
2711 |
<description>After the collapse of a nanotechnology |
---|
2712 |
society in England, the young survivors lay the |
---|
2713 |
foundation for a new society.</description> |
---|
2714 |
</book> |
---|
2715 |
</catalog>`); |
---|
2716 |
assert(false); |
---|
2717 |
} |
---|
2718 |
catch(CheckException e) |
---|
2719 |
{ |
---|
2720 |
int n = e.toString().indexOf("end tag name \"genres\" differs" |
---|
2721 |
" from start tag name \"genre\""); |
---|
2722 |
assert(n != -1); |
---|
2723 |
} |
---|
2724 |
} |
---|
2725 |
|
---|
2726 |
/** The base class for exceptions thrown by this module */ |
---|
2727 |
class XMLException : Exception { this(string msg) { super(msg); } } |
---|
2728 |
|
---|
2729 |
// Other exceptions |
---|
2730 |
|
---|
2731 |
/// Thrown during Comment constructor |
---|
2732 |
class CommentException : XMLException |
---|
2733 |
{ private this(string msg) { super(msg); } } |
---|
2734 |
|
---|
2735 |
/// Thrown during CData constructor |
---|
2736 |
class CDataException : XMLException |
---|
2737 |
{ private this(string msg) { super(msg); } } |
---|
2738 |
|
---|
2739 |
/// Thrown during XMLInstruction constructor |
---|
2740 |
class XIException : XMLException |
---|
2741 |
{ private this(string msg) { super(msg); } } |
---|
2742 |
|
---|
2743 |
/// Thrown during ProcessingInstruction constructor |
---|
2744 |
class PIException : XMLException |
---|
2745 |
{ private this(string msg) { super(msg); } } |
---|
2746 |
|
---|
2747 |
/// Thrown during Text constructor |
---|
2748 |
class TextException : XMLException |
---|
2749 |
{ private this(string msg) { super(msg); } } |
---|
2750 |
|
---|
2751 |
/// Thrown during decode() |
---|
2752 |
class DecodeException : XMLException |
---|
2753 |
{ private this(string msg) { super(msg); } } |
---|
2754 |
|
---|
2755 |
/// Thrown if comparing with wrong type |
---|
2756 |
class InvalidTypeException : XMLException |
---|
2757 |
{ private this(string msg) { super(msg); } } |
---|
2758 |
|
---|
2759 |
/// Thrown when parsing for Tags |
---|
2760 |
class TagException : XMLException |
---|
2761 |
{ private this(string msg) { super(msg); } } |
---|
2762 |
|
---|
2763 |
/** |
---|
2764 |
* Thrown during check() |
---|
2765 |
*/ |
---|
2766 |
class CheckException : XMLException |
---|
2767 |
{ |
---|
2768 |
CheckException err; /// Parent in heirarchy |
---|
2769 |
private string tail; |
---|
2770 |
/** |
---|
2771 |
* Name of production rule which failed to parse, |
---|
2772 |
* or specific error message |
---|
2773 |
*/ |
---|
2774 |
string msg; |
---|
2775 |
uint line = 0; /// Line number at which parse failure occurred |
---|
2776 |
uint column = 0; /// Column number at which parse failure occurred |
---|
2777 |
|
---|
2778 |
private this(string tail,string msg,Err err=null) |
---|
2779 |
{ |
---|
2780 |
super(null); |
---|
2781 |
this.tail = tail; |
---|
2782 |
this.msg = msg; |
---|
2783 |
this.err = err; |
---|
2784 |
} |
---|
2785 |
|
---|
2786 |
private void complete(string entire) |
---|
2787 |
{ |
---|
2788 |
string head = entire[0..$-tail.length]; |
---|
2789 |
int n = head.lastIndexOf('\n') + 1; |
---|
2790 |
line = head.count("\n") + 1; |
---|
2791 |
dstring t; |
---|
2792 |
transcode(head[n..$],t); |
---|
2793 |
column = t.length + 1; |
---|
2794 |
if (err !is null) err.complete(entire); |
---|
2795 |
} |
---|
2796 |
|
---|
2797 |
override string toString() |
---|
2798 |
{ |
---|
2799 |
string s; |
---|
2800 |
if (line != 0) s = format("Line %d, column %d: ",line,column); |
---|
2801 |
s ~= msg; |
---|
2802 |
s ~= '\n'; |
---|
2803 |
if (err !is null) s = err.toString ~ s; |
---|
2804 |
return s; |
---|
2805 |
} |
---|
2806 |
} |
---|
2807 |
|
---|
2808 |
private alias CheckException Err; |
---|
2809 |
|
---|
2810 |
// Private helper functions |
---|
2811 |
|
---|
2812 |
private |
---|
2813 |
{ |
---|
2814 |
T toType(T)(Object o) |
---|
2815 |
{ |
---|
2816 |
T t = cast(T)(o); |
---|
2817 |
if (t is null) |
---|
2818 |
{ |
---|
2819 |
throw new InvalidTypeException("Attempt to compare a " |
---|
2820 |
~ T.stringof ~ " with an instance of another type"); |
---|
2821 |
} |
---|
2822 |
return t; |
---|
2823 |
} |
---|
2824 |
|
---|
2825 |
string chop(ref string s, int n) |
---|
2826 |
{ |
---|
2827 |
if (n == -1) n = s.length; |
---|
2828 |
string t = s[0..n]; |
---|
2829 |
s = s[n..$]; |
---|
2830 |
return t; |
---|
2831 |
} |
---|
2832 |
|
---|
2833 |
bool optc(ref string s, char c) |
---|
2834 |
{ |
---|
2835 |
bool b = s.length != 0 && s[0] == c; |
---|
2836 |
if (b) s = s[1..$]; |
---|
2837 |
return b; |
---|
2838 |
} |
---|
2839 |
|
---|
2840 |
void reqc(ref string s, char c) |
---|
2841 |
{ |
---|
2842 |
if (s.length == 0 || s[0] != c) throw new TagException(""); |
---|
2843 |
s = s[1..$]; |
---|
2844 |
} |
---|
2845 |
|
---|
2846 |
hash_t hash(string s,hash_t h=0) |
---|
2847 |
{ |
---|
2848 |
foreach(dchar c;s) h = h * 11 + c; |
---|
2849 |
return h; |
---|
2850 |
} |
---|
2851 |
|
---|
2852 |
// Definitions from the XML specification |
---|
2853 |
dchar[] CharTable=[0x9,0x9,0xA,0xA,0xD,0xD,0x20,0xD7FF,0xE000,0xFFFD, |
---|
2854 |
0x10000,0x10FFFF]; |
---|
2855 |
dchar[] BaseCharTable=[0x0041,0x005A,0x0061,0x007A,0x00C0,0x00D6,0x00D8, |
---|
2856 |
0x00F6,0x00F8,0x00FF,0x0100,0x0131,0x0134,0x013E,0x0141,0x0148,0x014A, |
---|
2857 |
0x017E,0x0180,0x01C3,0x01CD,0x01F0,0x01F4,0x01F5,0x01FA,0x0217,0x0250, |
---|
2858 |
0x02A8,0x02BB,0x02C1,0x0386,0x0386,0x0388,0x038A,0x038C,0x038C,0x038E, |
---|
2859 |
0x03A1,0x03A3,0x03CE,0x03D0,0x03D6,0x03DA,0x03DA,0x03DC,0x03DC,0x03DE, |
---|
2860 |
0x03DE,0x03E0,0x03E0,0x03E2,0x03F3,0x0401,0x040C,0x040E,0x044F,0x0451, |
---|
2861 |
0x045C,0x045E,0x0481,0x0490,0x04C4,0x04C7,0x04C8,0x04CB,0x04CC,0x04D0, |
---|
2862 |
0x04EB,0x04EE,0x04F5,0x04F8,0x04F9,0x0531,0x0556,0x0559,0x0559,0x0561, |
---|
2863 |
0x0586,0x05D0,0x05EA,0x05F0,0x05F2,0x0621,0x063A,0x0641,0x064A,0x0671, |
---|
2864 |
0x06B7,0x06BA,0x06BE,0x06C0,0x06CE,0x06D0,0x06D3,0x06D5,0x06D5,0x06E5, |
---|
2865 |
0x06E6,0x0905,0x0939,0x093D,0x093D,0x0958,0x0961,0x0985,0x098C,0x098F, |
---|
2866 |
0x0990,0x0993,0x09A8,0x09AA,0x09B0,0x09B2,0x09B2,0x09B6,0x09B9,0x09DC, |
---|
2867 |
0x09DD,0x09DF,0x09E1,0x09F0,0x09F1,0x0A05,0x0A0A,0x0A0F,0x0A10,0x0A13, |
---|
2868 |
0x0A28,0x0A2A,0x0A30,0x0A32,0x0A33,0x0A35,0x0A36,0x0A38,0x0A39,0x0A59, |
---|
2869 |
0x0A5C,0x0A5E,0x0A5E,0x0A72,0x0A74,0x0A85,0x0A8B,0x0A8D,0x0A8D,0x0A8F, |
---|
2870 |
0x0A91,0x0A93,0x0AA8,0x0AAA,0x0AB0,0x0AB2,0x0AB3,0x0AB5,0x0AB9,0x0ABD, |
---|
2871 |
0x0ABD,0x0AE0,0x0AE0,0x0B05,0x0B0C,0x0B0F,0x0B10,0x0B13,0x0B28,0x0B2A, |
---|
2872 |
0x0B30,0x0B32,0x0B33,0x0B36,0x0B39,0x0B3D,0x0B3D,0x0B5C,0x0B5D,0x0B5F, |
---|
2873 |
0x0B61,0x0B85,0x0B8A,0x0B8E,0x0B90,0x0B92,0x0B95,0x0B99,0x0B9A,0x0B9C, |
---|
2874 |
0x0B9C,0x0B9E,0x0B9F,0x0BA3,0x0BA4,0x0BA8,0x0BAA,0x0BAE,0x0BB5,0x0BB7, |
---|
2875 |
0x0BB9,0x0C05,0x0C0C,0x0C0E,0x0C10,0x0C12,0x0C28,0x0C2A,0x0C33,0x0C35, |
---|
2876 |
0x0C39,0x0C60,0x0C61,0x0C85,0x0C8C,0x0C8E,0x0C90,0x0C92,0x0CA8,0x0CAA, |
---|
2877 |
0x0CB3,0x0CB5,0x0CB9,0x0CDE,0x0CDE,0x0CE0,0x0CE1,0x0D05,0x0D0C,0x0D0E, |
---|
2878 |
0x0D10,0x0D12,0x0D28,0x0D2A,0x0D39,0x0D60,0x0D61,0x0E01,0x0E2E,0x0E30, |
---|
2879 |
0x0E30,0x0E32,0x0E33,0x0E40,0x0E45,0x0E81,0x0E82,0x0E84,0x0E84,0x0E87, |
---|
2880 |
0x0E88,0x0E8A,0x0E8A,0x0E8D,0x0E8D,0x0E94,0x0E97,0x0E99,0x0E9F,0x0EA1, |
---|
2881 |
0x0EA3,0x0EA5,0x0EA5,0x0EA7,0x0EA7,0x0EAA,0x0EAB,0x0EAD,0x0EAE,0x0EB0, |
---|
2882 |
0x0EB0,0x0EB2,0x0EB3,0x0EBD,0x0EBD,0x0EC0,0x0EC4,0x0F40,0x0F47,0x0F49, |
---|
2883 |
0x0F69,0x10A0,0x10C5,0x10D0,0x10F6,0x1100,0x1100,0x1102,0x1103,0x1105, |
---|
2884 |
0x1107,0x1109,0x1109,0x110B,0x110C,0x110E,0x1112,0x113C,0x113C,0x113E, |
---|
2885 |
0x113E,0x1140,0x1140,0x114C,0x114C,0x114E,0x114E,0x1150,0x1150,0x1154, |
---|
2886 |
0x1155,0x1159,0x1159,0x115F,0x1161,0x1163,0x1163,0x1165,0x1165,0x1167, |
---|
2887 |
0x1167,0x1169,0x1169,0x116D,0x116E,0x1172,0x1173,0x1175,0x1175,0x119E, |
---|
2888 |
0x119E,0x11A8,0x11A8,0x11AB,0x11AB,0x11AE,0x11AF,0x11B7,0x11B8,0x11BA, |
---|
2889 |
0x11BA,0x11BC,0x11C2,0x11EB,0x11EB,0x11F0,0x11F0,0x11F9,0x11F9,0x1E00, |
---|
2890 |
0x1E9B,0x1EA0,0x1EF9,0x1F00,0x1F15,0x1F18,0x1F1D,0x1F20,0x1F45,0x1F48, |
---|
2891 |
0x1F4D,0x1F50,0x1F57,0x1F59,0x1F59,0x1F5B,0x1F5B,0x1F5D,0x1F5D,0x1F5F, |
---|
2892 |
0x1F7D,0x1F80,0x1FB4,0x1FB6,0x1FBC,0x1FBE,0x1FBE,0x1FC2,0x1FC4,0x1FC6, |
---|
2893 |
0x1FCC,0x1FD0,0x1FD3,0x1FD6,0x1FDB,0x1FE0,0x1FEC,0x1FF2,0x1FF4,0x1FF6, |
---|
2894 |
0x1FFC,0x2126,0x2126,0x212A,0x212B,0x212E,0x212E,0x2180,0x2182,0x3041, |
---|
2895 |
0x3094,0x30A1,0x30FA,0x3105,0x312C,0xAC00,0xD7A3]; |
---|
2896 |
dchar[] IdeographicTable=[0x4E00,0x9FA5,0x3007,0x3007,0x3021,0x3029]; |
---|
2897 |
dchar[] CombiningCharTable=[0x0300,0x0345,0x0360,0x0361,0x0483,0x0486, |
---|
2898 |
0x0591,0x05A1,0x05A3,0x05B9,0x05BB,0x05BD,0x05BF,0x05BF,0x05C1,0x05C2, |
---|
2899 |
0x05C4,0x05C4,0x064B,0x0652,0x0670,0x0670,0x06D6,0x06DC,0x06DD,0x06DF, |
---|
2900 |
0x06E0,0x06E4,0x06E7,0x06E8,0x06EA,0x06ED,0x0901,0x0903,0x093C,0x093C, |
---|
2901 |
0x093E,0x094C,0x094D,0x094D,0x0951,0x0954,0x0962,0x0963,0x0981,0x0983, |
---|
2902 |
0x09BC,0x09BC,0x09BE,0x09BE,0x09BF,0x09BF,0x09C0,0x09C4,0x09C7,0x09C8, |
---|
2903 |
0x09CB,0x09CD,0x09D7,0x09D7,0x09E2,0x09E3,0x0A02,0x0A02,0x0A3C,0x0A3C, |
---|
2904 |
0x0A3E,0x0A3E,0x0A3F,0x0A3F,0x0A40,0x0A42,0x0A47,0x0A48,0x0A4B,0x0A4D, |
---|
2905 |
0x0A70,0x0A71,0x0A81,0x0A83,0x0ABC,0x0ABC,0x0ABE,0x0AC5,0x0AC7,0x0AC9, |
---|
2906 |
0x0ACB,0x0ACD,0x0B01,0x0B03,0x0B3C,0x0B3C,0x0B3E,0x0B43,0x0B47,0x0B48, |
---|
2907 |
0x0B4B,0x0B4D,0x0B56,0x0B57,0x0B82,0x0B83,0x0BBE,0x0BC2,0x0BC6,0x0BC8, |
---|
2908 |
0x0BCA,0x0BCD,0x0BD7,0x0BD7,0x0C01,0x0C03,0x0C3E,0x0C44,0x0C46,0x0C48, |
---|
2909 |
0x0C4A,0x0C4D,0x0C55,0x0C56,0x0C82,0x0C83,0x0CBE,0x0CC4,0x0CC6,0x0CC8, |
---|
2910 |
0x0CCA,0x0CCD,0x0CD5,0x0CD6,0x0D02,0x0D03,0x0D3E,0x0D43,0x0D46,0x0D48, |
---|
2911 |
0x0D4A,0x0D4D,0x0D57,0x0D57,0x0E31,0x0E31,0x0E34,0x0E3A,0x0E47,0x0E4E, |
---|
2912 |
0x0EB1,0x0EB1,0x0EB4,0x0EB9,0x0EBB,0x0EBC,0x0EC8,0x0ECD,0x0F18,0x0F19, |
---|
2913 |
0x0F35,0x0F35,0x0F37,0x0F37,0x0F39,0x0F39,0x0F3E,0x0F3E,0x0F3F,0x0F3F, |
---|
2914 |
0x0F71,0x0F84,0x0F86,0x0F8B,0x0F90,0x0F95,0x0F97,0x0F97,0x0F99,0x0FAD, |
---|
2915 |
0x0FB1,0x0FB7,0x0FB9,0x0FB9,0x20D0,0x20DC,0x20E1,0x20E1,0x302A,0x302F, |
---|
2916 |
0x3099,0x3099,0x309A,0x309A]; |
---|
2917 |
dchar[] DigitTable=[0x0030,0x0039,0x0660,0x0669,0x06F0,0x06F9,0x0966, |
---|
2918 |
0x096F,0x09E6,0x09EF,0x0A66,0x0A6F,0x0AE6,0x0AEF,0x0B66,0x0B6F,0x0BE7, |
---|
2919 |
0x0BEF,0x0C66,0x0C6F,0x0CE6,0x0CEF,0x0D66,0x0D6F,0x0E50,0x0E59,0x0ED0, |
---|
2920 |
0x0ED9,0x0F20,0x0F29]; |
---|
2921 |
dchar[] ExtenderTable=[0x00B7,0x00B7,0x02D0,0x02D0,0x02D1,0x02D1,0x0387, |
---|
2922 |
0x0387,0x0640,0x0640,0x0E46,0x0E46,0x0EC6,0x0EC6,0x3005,0x3005,0x3031, |
---|
2923 |
0x3035,0x309D,0x309E,0x30FC,0x30FE]; |
---|
2924 |
|
---|
2925 |
bool lookup(dchar[] table, int c) |
---|
2926 |
{ |
---|
2927 |
while (table.length != 0) |
---|
2928 |
{ |
---|
2929 |
int m = (table.length >> 1) & ~1; |
---|
2930 |
if (c < table[m]) |
---|
2931 |
{ |
---|
2932 |
table = table[0..m]; |
---|
2933 |
} |
---|
2934 |
else if (c > table[m+1]) |
---|
2935 |
{ |
---|
2936 |
table = table[m+2..$]; |
---|
2937 |
} |
---|
2938 |
else return true; |
---|
2939 |
} |
---|
2940 |
return false; |
---|
2941 |
} |
---|
2942 |
|
---|
2943 |
string startOf(string s) |
---|
2944 |
{ |
---|
2945 |
string r; |
---|
2946 |
foreach(char c;s) |
---|
2947 |
{ |
---|
2948 |
r ~= (c < 0x20 || c > 0x7F) ? '.' : c; |
---|
2949 |
if (r.length >= 40) { r ~= "___"; break; } |
---|
2950 |
} |
---|
2951 |
return r; |
---|
2952 |
} |
---|
2953 |
|
---|
2954 |
void exit(string s=null) |
---|
2955 |
{ |
---|
2956 |
throw new XMLException(s); |
---|
2957 |
} |
---|
2958 |
} |
---|
2959 |
|
---|
2960 |
version (unittest_report) |
---|
2961 |
{ |
---|
2962 |
import std.stdio; |
---|
2963 |
unittest { |
---|
2964 |
writefln("unittest std2.xml passed"); |
---|
2965 |
} |
---|
2966 |
} |
---|