| 1 |
// Written in the D programming language. |
|---|
| 2 |
|
|---|
| 3 |
/** |
|---|
| 4 |
Classes and functions for creating and parsing XML |
|---|
| 5 |
|
|---|
| 6 |
The basic architecture of this module is that there are standalone functions, |
|---|
| 7 |
classes for constructing an XML document from scratch (Tag, Element and |
|---|
| 8 |
Document), and also classes for parsing a pre-existing XML file (ElementParser |
|---|
| 9 |
and DocumentParser). The parsing classes <i>may</i> be used to build a |
|---|
| 10 |
Document, but that is not their primary purpose. The handling capabilities of |
|---|
| 11 |
DocumentParser and ElementParser are sufficiently customizable that you can |
|---|
| 12 |
make them do pretty much whatever you want. |
|---|
| 13 |
|
|---|
| 14 |
Example: This example creates a DOM (Document Object Model) tree |
|---|
| 15 |
from an XML file. |
|---|
| 16 |
------------------------------------------------------------------------------ |
|---|
| 17 |
import std.xml; |
|---|
| 18 |
import std.stdio; |
|---|
| 19 |
import std.string; |
|---|
| 20 |
|
|---|
| 21 |
// books.xml is used in various samples throughout the Microsoft XML Core |
|---|
| 22 |
// Services (MSXML) SDK. |
|---|
| 23 |
// |
|---|
| 24 |
// See http://msdn2.microsoft.com/en-us/library/ms762271(VS.85).aspx |
|---|
| 25 |
|
|---|
| 26 |
void main() |
|---|
| 27 |
{ |
|---|
| 28 |
string s = cast(string)std.file.read("books.xml"); |
|---|
| 29 |
|
|---|
| 30 |
// Check for well-formedness |
|---|
| 31 |
check(s); |
|---|
| 32 |
|
|---|
| 33 |
// Make a DOM tree |
|---|
| 34 |
auto doc = new Document(s); |
|---|
| 35 |
|
|---|
| 36 |
// Plain-print it |
|---|
| 37 |
writefln(doc); |
|---|
| 38 |
} |
|---|
| 39 |
------------------------------------------------------------------------------ |
|---|
| 40 |
|
|---|
| 41 |
Example: This example does much the same thing, except that the file is |
|---|
| 42 |
deconstructed and reconstructed by hand. This is more work, but the |
|---|
| 43 |
techniques involved offer vastly more power. |
|---|
| 44 |
------------------------------------------------------------------------------ |
|---|
| 45 |
import std.xml; |
|---|
| 46 |
import std.stdio; |
|---|
| 47 |
import std.string; |
|---|
| 48 |
|
|---|
| 49 |
struct Book |
|---|
| 50 |
{ |
|---|
| 51 |
string id; |
|---|
| 52 |
string author; |
|---|
| 53 |
string title; |
|---|
| 54 |
string genre; |
|---|
| 55 |
string price; |
|---|
| 56 |
string pubDate; |
|---|
| 57 |
string description; |
|---|
| 58 |
} |
|---|
| 59 |
|
|---|
| 60 |
void main() |
|---|
| 61 |
{ |
|---|
| 62 |
string s = cast(string)std.file.read("books.xml"); |
|---|
| 63 |
|
|---|
| 64 |
// Check for well-formedness |
|---|
| 65 |
check(s); |
|---|
| 66 |
|
|---|
| 67 |
// Take it apart |
|---|
| 68 |
Book[] books; |
|---|
| 69 |
|
|---|
| 70 |
auto xml = new DocumentParser(s); |
|---|
| 71 |
xml.onStartTag["book"] = (ElementParser xml) |
|---|
| 72 |
{ |
|---|
| 73 |
Book book; |
|---|
| 74 |
book.id = xml.tag.attr["id"]; |
|---|
| 75 |
|
|---|
| 76 |
xml.onEndTag["author"] = (in Element e) { book.author = e.text; }; |
|---|
| 77 |
xml.onEndTag["title"] = (in Element e) { book.title = e.text; }; |
|---|
| 78 |
xml.onEndTag["genre"] = (in Element e) { book.genre = e.text; }; |
|---|
| 79 |
xml.onEndTag["price"] = (in Element e) { book.price = e.text; }; |
|---|
| 80 |
xml.onEndTag["publish-date"] = (in Element e) { book.pubDate = e.text; }; |
|---|
| 81 |
xml.onEndTag["description"] = (in Element e) { book.description = e.text; }; |
|---|
| 82 |
|
|---|
| 83 |
xml.parse(); |
|---|
| 84 |
|
|---|
| 85 |
books ~= book; |
|---|
| 86 |
}; |
|---|
| 87 |
xml.parse(); |
|---|
| 88 |
|
|---|
| 89 |
// Put it back together again; |
|---|
| 90 |
auto doc = new Document(new Tag("catalog")); |
|---|
| 91 |
foreach(book;books) |
|---|
| 92 |
{ |
|---|
| 93 |
auto element = new Element("book"); |
|---|
| 94 |
element.tag.attr["id"] = book.id; |
|---|
| 95 |
|
|---|
| 96 |
element ~= new Element("author", book.author); |
|---|
| 97 |
element ~= new Element("title", book.title); |
|---|
| 98 |
element ~= new Element("genre", book.genre); |
|---|
| 99 |
element ~= new Element("price", book.price); |
|---|
| 100 |
element ~= new Element("publish-date",book.pubDate); |
|---|
| 101 |
element ~= new Element("description", book.description); |
|---|
| 102 |
|
|---|
| 103 |
doc ~= element; |
|---|
| 104 |
} |
|---|
| 105 |
|
|---|
| 106 |
// Pretty-print it |
|---|
| 107 |
writefln(join(doc.pretty(3),"\n")); |
|---|
| 108 |
} |
|---|
| 109 |
------------------------------------------------------------------------------- |
|---|
| 110 |
Macros: |
|---|
| 111 |
WIKI=Phobos/StdXml |
|---|
| 112 |
|
|---|
| 113 |
Copyright: Copyright Janice Caron 2008 - 2009. |
|---|
| 114 |
License: <a href="http://www.boost.org/LICENSE_1_0.txt">Boost License 1.0</a>. |
|---|
| 115 |
Authors: Janice Caron |
|---|
| 116 |
|
|---|
| 117 |
Copyright Janice Caron 2008 - 2009. |
|---|
| 118 |
Distributed under the Boost Software License, Version 1.0. |
|---|
| 119 |
(See accompanying file LICENSE_1_0.txt or copy at |
|---|
| 120 |
http://www.boost.org/LICENSE_1_0.txt) |
|---|
| 121 |
*/ |
|---|
| 122 |
module orange.xml.PhobosXML; |
|---|
| 123 |
|
|---|
| 124 |
version (Tango) {} |
|---|
| 125 |
else |
|---|
| 126 |
version = Phobos; |
|---|
| 127 |
|
|---|
| 128 |
version (Phobos): |
|---|
| 129 |
|
|---|
| 130 |
mixin(`import std.array; |
|---|
| 131 |
import std.string; |
|---|
| 132 |
import std.encoding; |
|---|
| 133 |
|
|---|
| 134 |
enum cdata = "<![CDATA["; |
|---|
| 135 |
|
|---|
| 136 |
final class Attribute : Element |
|---|
| 137 |
{ |
|---|
| 138 |
private alias string tstring; |
|---|
| 139 |
private tstring name_; |
|---|
| 140 |
private tstring value_; |
|---|
| 141 |
|
|---|
| 142 |
this (tstring name, tstring value, Element parent) |
|---|
| 143 |
{ |
|---|
| 144 |
super(name); |
|---|
| 145 |
name_ = name; |
|---|
| 146 |
value_ = value; |
|---|
| 147 |
parent_ = parent; |
|---|
| 148 |
} |
|---|
| 149 |
|
|---|
| 150 |
tstring name () |
|---|
| 151 |
{ |
|---|
| 152 |
return name_; |
|---|
| 153 |
} |
|---|
| 154 |
|
|---|
| 155 |
tstring value () |
|---|
| 156 |
{ |
|---|
| 157 |
return value_; |
|---|
| 158 |
} |
|---|
| 159 |
} |
|---|
| 160 |
|
|---|
| 161 |
/*struct TagProxy |
|---|
| 162 |
{ |
|---|
| 163 |
private alias string tstring; |
|---|
| 164 |
private tstring name_; |
|---|
| 165 |
|
|---|
| 166 |
private static TagProxy opCall (tstring name) |
|---|
| 167 |
{ |
|---|
| 168 |
TagProxy tp; |
|---|
| 169 |
tp.name_ = name; |
|---|
| 170 |
|
|---|
| 171 |
return tp; |
|---|
| 172 |
} |
|---|
| 173 |
|
|---|
| 174 |
tstring name () |
|---|
| 175 |
{ |
|---|
| 176 |
return name_; |
|---|
| 177 |
} |
|---|
| 178 |
}*/ |
|---|
| 179 |
|
|---|
| 180 |
/** |
|---|
| 181 |
* Returns true if the character is a character according to the XML standard |
|---|
| 182 |
* |
|---|
| 183 |
* Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) |
|---|
| 184 |
* |
|---|
| 185 |
* Params: |
|---|
| 186 |
* c = the character to be tested |
|---|
| 187 |
*/ |
|---|
| 188 |
bool isChar(dchar c) // rule 2 |
|---|
| 189 |
{ |
|---|
| 190 |
if (c <= 0xD7FF) |
|---|
| 191 |
{ |
|---|
| 192 |
if (c >= 0x20) |
|---|
| 193 |
return true; |
|---|
| 194 |
switch(c) |
|---|
| 195 |
{ |
|---|
| 196 |
case 0xA: |
|---|
| 197 |
case 0x9: |
|---|
| 198 |
case 0xD: |
|---|
| 199 |
return true; |
|---|
| 200 |
default: |
|---|
| 201 |
return false; |
|---|
| 202 |
} |
|---|
| 203 |
} |
|---|
| 204 |
else if (0xE000 <= c && c <= 0x10FFFF) |
|---|
| 205 |
{ |
|---|
| 206 |
if ((c & 0x1FFFFE) != 0xFFFE) // U+FFFE and U+FFFF |
|---|
| 207 |
return true; |
|---|
| 208 |
} |
|---|
| 209 |
return false; |
|---|
| 210 |
} |
|---|
| 211 |
|
|---|
| 212 |
unittest |
|---|
| 213 |
{ |
|---|
| 214 |
// const CharTable=[0x9,0x9,0xA,0xA,0xD,0xD,0x20,0xD7FF,0xE000,0xFFFD, |
|---|
| 215 |
// 0x10000,0x10FFFF]; |
|---|
| 216 |
assert(!isChar(cast(dchar)0x8)); |
|---|
| 217 |
assert( isChar(cast(dchar)0x9)); |
|---|
| 218 |
assert( isChar(cast(dchar)0xA)); |
|---|
| 219 |
assert(!isChar(cast(dchar)0xB)); |
|---|
| 220 |
assert(!isChar(cast(dchar)0xC)); |
|---|
| 221 |
assert( isChar(cast(dchar)0xD)); |
|---|
| 222 |
assert(!isChar(cast(dchar)0xE)); |
|---|
| 223 |
assert(!isChar(cast(dchar)0x1F)); |
|---|
| 224 |
assert( isChar(cast(dchar)0x20)); |
|---|
| 225 |
assert( isChar('J')); |
|---|
| 226 |
assert( isChar(cast(dchar)0xD7FF)); |
|---|
| 227 |
assert(!isChar(cast(dchar)0xD800)); |
|---|
| 228 |
assert(!isChar(cast(dchar)0xDFFF)); |
|---|
| 229 |
assert( isChar(cast(dchar)0xE000)); |
|---|
| 230 |
assert( isChar(cast(dchar)0xFFFD)); |
|---|
| 231 |
assert(!isChar(cast(dchar)0xFFFE)); |
|---|
| 232 |
assert(!isChar(cast(dchar)0xFFFF)); |
|---|
| 233 |
assert( isChar(cast(dchar)0x10000)); |
|---|
| 234 |
assert( isChar(cast(dchar)0x10FFFF)); |
|---|
| 235 |
assert(!isChar(cast(dchar)0x110000)); |
|---|
| 236 |
|
|---|
| 237 |
debug (stdxml_TestHardcodedChecks) |
|---|
| 238 |
{ |
|---|
| 239 |
foreach (c; 0 .. dchar.max + 1) |
|---|
| 240 |
assert(isChar(c) == lookup(CharTable, c)); |
|---|
| 241 |
} |
|---|
| 242 |
} |
|---|
| 243 |
|
|---|
| 244 |
/** |
|---|
| 245 |
* Returns true if the character is whitespace according to the XML standard |
|---|
| 246 |
* |
|---|
| 247 |
* Only the following characters are considered whitespace in XML - space, tab, |
|---|
| 248 |
* carriage return and linefeed |
|---|
| 249 |
* |
|---|
| 250 |
* Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) |
|---|
| 251 |
* |
|---|
| 252 |
* Params: |
|---|
| 253 |
* c = the character to be tested |
|---|
| 254 |
*/ |
|---|
| 255 |
bool isSpace(dchar c) |
|---|
| 256 |
{ |
|---|
| 257 |
return c == '\u0020' || c == '\u0009' || c == '\u000A' || c == '\u000D'; |
|---|
| 258 |
} |
|---|
| 259 |
|
|---|
| 260 |
/** |
|---|
| 261 |
* Returns true if the character is a digit according to the XML standard |
|---|
| 262 |
* |
|---|
| 263 |
* Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) |
|---|
| 264 |
* |
|---|
| 265 |
* Params: |
|---|
| 266 |
* c = the character to be tested |
|---|
| 267 |
*/ |
|---|
| 268 |
bool isDigit(dchar c) |
|---|
| 269 |
{ |
|---|
| 270 |
if (c <= 0x0039 && c >= 0x0030) |
|---|
| 271 |
return true; |
|---|
| 272 |
else |
|---|
| 273 |
return lookup(DigitTable,c); |
|---|
| 274 |
} |
|---|
| 275 |
|
|---|
| 276 |
unittest |
|---|
| 277 |
{ |
|---|
| 278 |
debug (stdxml_TestHardcodedChecks) |
|---|
| 279 |
{ |
|---|
| 280 |
foreach (c; 0 .. dchar.max + 1) |
|---|
| 281 |
assert(isDigit(c) == lookup(DigitTable, c)); |
|---|
| 282 |
} |
|---|
| 283 |
} |
|---|
| 284 |
|
|---|
| 285 |
/** |
|---|
| 286 |
* Returns true if the character is a letter according to the XML standard |
|---|
| 287 |
* |
|---|
| 288 |
* Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) |
|---|
| 289 |
* |
|---|
| 290 |
* Params: |
|---|
| 291 |
* c = the character to be tested |
|---|
| 292 |
*/ |
|---|
| 293 |
bool isLetter(dchar c) // rule 84 |
|---|
| 294 |
{ |
|---|
| 295 |
return isIdeographic(c) || isBaseChar(c); |
|---|
| 296 |
} |
|---|
| 297 |
|
|---|
| 298 |
/** |
|---|
| 299 |
* Returns true if the character is an ideographic character according to the |
|---|
| 300 |
* XML standard |
|---|
| 301 |
* |
|---|
| 302 |
* Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) |
|---|
| 303 |
* |
|---|
| 304 |
* Params: |
|---|
| 305 |
* c = the character to be tested |
|---|
| 306 |
*/ |
|---|
| 307 |
bool isIdeographic(dchar c) |
|---|
| 308 |
{ |
|---|
| 309 |
if (c == 0x3007) |
|---|
| 310 |
return true; |
|---|
| 311 |
if (c <= 0x3029 && c >= 0x3021 ) |
|---|
| 312 |
return true; |
|---|
| 313 |
if (c <= 0x9FA5 && c >= 0x4E00) |
|---|
| 314 |
return true; |
|---|
| 315 |
return false; |
|---|
| 316 |
} |
|---|
| 317 |
|
|---|
| 318 |
unittest |
|---|
| 319 |
{ |
|---|
| 320 |
assert(isIdeographic('\u4E00')); |
|---|
| 321 |
assert(isIdeographic('\u9FA5')); |
|---|
| 322 |
assert(isIdeographic('\u3007')); |
|---|
| 323 |
assert(isIdeographic('\u3021')); |
|---|
| 324 |
assert(isIdeographic('\u3029')); |
|---|
| 325 |
|
|---|
| 326 |
debug (stdxml_TestHardcodedChecks) |
|---|
| 327 |
{ |
|---|
| 328 |
foreach (c; 0 .. dchar.max + 1) |
|---|
| 329 |
assert(isIdeographic(c) == lookup(IdeographicTable, c)); |
|---|
| 330 |
} |
|---|
| 331 |
} |
|---|
| 332 |
|
|---|
| 333 |
/** |
|---|
| 334 |
* Returns true if the character is a base character according to the XML |
|---|
| 335 |
* standard |
|---|
| 336 |
* |
|---|
| 337 |
* Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) |
|---|
| 338 |
* |
|---|
| 339 |
* Params: |
|---|
| 340 |
* c = the character to be tested |
|---|
| 341 |
*/ |
|---|
| 342 |
bool isBaseChar(dchar c) |
|---|
| 343 |
{ |
|---|
| 344 |
return lookup(BaseCharTable,c); |
|---|
| 345 |
} |
|---|
| 346 |
|
|---|
| 347 |
/** |
|---|
| 348 |
* Returns true if the character is a combining character according to the |
|---|
| 349 |
* XML standard |
|---|
| 350 |
* |
|---|
| 351 |
* Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) |
|---|
| 352 |
* |
|---|
| 353 |
* Params: |
|---|
| 354 |
* c = the character to be tested |
|---|
| 355 |
*/ |
|---|
| 356 |
bool isCombiningChar(dchar c) |
|---|
| 357 |
{ |
|---|
| 358 |
return lookup(CombiningCharTable,c); |
|---|
| 359 |
} |
|---|
| 360 |
|
|---|
| 361 |
/** |
|---|
| 362 |
* Returns true if the character is an extender according to the XML standard |
|---|
| 363 |
* |
|---|
| 364 |
* Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) |
|---|
| 365 |
* |
|---|
| 366 |
* Params: |
|---|
| 367 |
* c = the character to be tested |
|---|
| 368 |
*/ |
|---|
| 369 |
bool isExtender(dchar c) |
|---|
| 370 |
{ |
|---|
| 371 |
return lookup(ExtenderTable,c); |
|---|
| 372 |
} |
|---|
| 373 |
|
|---|
| 374 |
/** |
|---|
| 375 |
* Encodes a string by replacing all characters which need to be escaped with |
|---|
| 376 |
* appropriate predefined XML entities. |
|---|
| 377 |
* |
|---|
| 378 |
* encode() escapes certain characters (ampersand, quote, apostrophe, less-than |
|---|
| 379 |
* and greater-than), and similarly, decode() unescapes them. These functions |
|---|
| 380 |
* are provided for convenience only. You do not need to use them when using |
|---|
| 381 |
* the std.xml classes, because then all the encoding and decoding will be done |
|---|
| 382 |
* for you automatically. |
|---|
| 383 |
* |
|---|
| 384 |
* If the string is not modified, the original will be returned. |
|---|
| 385 |
* |
|---|
| 386 |
* Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) |
|---|
| 387 |
* |
|---|
| 388 |
* Params: |
|---|
| 389 |
* s = The string to be encoded |
|---|
| 390 |
* |
|---|
| 391 |
* Returns: The encoded string |
|---|
| 392 |
* |
|---|
| 393 |
* Examples: |
|---|
| 394 |
* -------------- |
|---|
| 395 |
* writefln(encode("a > b")); // writes "a > b" |
|---|
| 396 |
* -------------- |
|---|
| 397 |
*/ |
|---|
| 398 |
S encode(S)(S s, S buffer = null) |
|---|
| 399 |
{ |
|---|
| 400 |
string r; |
|---|
| 401 |
size_t lastI; |
|---|
| 402 |
if (buffer) buffer.length = 0; |
|---|
| 403 |
auto result = appender(&buffer); |
|---|
| 404 |
|
|---|
| 405 |
foreach (i, c; s) |
|---|
| 406 |
{ |
|---|
| 407 |
switch (c) |
|---|
| 408 |
{ |
|---|
| 409 |
case '&': r = "&"; break; |
|---|
| 410 |
case '"': r = """; break; |
|---|
| 411 |
case '\'': r = "'"; break; |
|---|
| 412 |
case '<': r = "<"; break; |
|---|
| 413 |
case '>': r = ">"; break; |
|---|
| 414 |
default: continue; |
|---|
| 415 |
} |
|---|
| 416 |
// Replace with r |
|---|
| 417 |
result.put(s[lastI .. i]); |
|---|
| 418 |
result.put(r); |
|---|
| 419 |
lastI = i + 1; |
|---|
| 420 |
} |
|---|
| 421 |
|
|---|
| 422 |
if (!result.data) return s; |
|---|
| 423 |
result.put(s[lastI .. $]); |
|---|
| 424 |
return result.data; |
|---|
| 425 |
} |
|---|
| 426 |
|
|---|
| 427 |
unittest |
|---|
| 428 |
{ |
|---|
| 429 |
assert(encode("hello") is "hello"); |
|---|
| 430 |
assert(encode("a > b") == "a > b", encode("a > b")); |
|---|
| 431 |
assert(encode("a < b") == "a < b"); |
|---|
| 432 |
assert(encode("don't") == "don't"); |
|---|
| 433 |
assert(encode("\"hi\"") == ""hi"", encode("\"hi\"")); |
|---|
| 434 |
assert(encode("cat & dog") == "cat & dog"); |
|---|
| 435 |
} |
|---|
| 436 |
|
|---|
| 437 |
/** |
|---|
| 438 |
* Mode to use for decoding. |
|---|
| 439 |
* |
|---|
| 440 |
* $(DDOC_ENUM_MEMBERS NONE) Do not decode |
|---|
| 441 |
* $(DDOC_ENUM_MEMBERS LOOSE) Decode, but ignore errors |
|---|
| 442 |
* $(DDOC_ENUM_MEMBERS STRICT) Decode, and throw exception on error |
|---|
| 443 |
*/ |
|---|
| 444 |
enum DecodeMode |
|---|
| 445 |
{ |
|---|
| 446 |
NONE, LOOSE, STRICT |
|---|
| 447 |
} |
|---|
| 448 |
|
|---|
| 449 |
/** |
|---|
| 450 |
* Decodes a string by unescaping all predefined XML entities. |
|---|
| 451 |
* |
|---|
| 452 |
* encode() escapes certain characters (ampersand, quote, apostrophe, less-than |
|---|
| 453 |
* and greater-than), and similarly, decode() unescapes them. These functions |
|---|
| 454 |
* are provided for convenience only. You do not need to use them when using |
|---|
| 455 |
* the std.xml classes, because then all the encoding and decoding will be done |
|---|
| 456 |
* for you automatically. |
|---|
| 457 |
* |
|---|
| 458 |
* This function decodes the entities &amp;, &quot;, &apos;, |
|---|
| 459 |
* &lt; and &gt, |
|---|
| 460 |
* as well as decimal and hexadecimal entities such as &#x20AC; |
|---|
| 461 |
* |
|---|
| 462 |
* If the string does not contain an ampersand, the original will be returned. |
|---|
| 463 |
* |
|---|
| 464 |
* Note that the "mode" parameter can be one of DecodeMode.NONE (do not |
|---|
| 465 |
* decode), DecodeMode.LOOSE (decode, but ignore errors), or DecodeMode.STRICT |
|---|
| 466 |
* (decode, and throw a DecodeException in the event of an error). |
|---|
| 467 |
* |
|---|
| 468 |
* Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) |
|---|
| 469 |
* |
|---|
| 470 |
* Params: |
|---|
| 471 |
* s = The string to be decoded |
|---|
| 472 |
* mode = (optional) Mode to use for decoding. (Defaults to LOOSE). |
|---|
| 473 |
* |
|---|
| 474 |
* Throws: DecodeException if mode == DecodeMode.STRICT and decode fails |
|---|
| 475 |
* |
|---|
| 476 |
* Returns: The decoded string |
|---|
| 477 |
* |
|---|
| 478 |
* Examples: |
|---|
| 479 |
* -------------- |
|---|
| 480 |
* writefln(decode("a > b")); // writes "a > b" |
|---|
| 481 |
* -------------- |
|---|
| 482 |
*/ |
|---|
| 483 |
string decode(string s, DecodeMode mode=DecodeMode.LOOSE) |
|---|
| 484 |
{ |
|---|
| 485 |
if (mode == DecodeMode.NONE) return s; |
|---|
| 486 |
|
|---|
| 487 |
char[] buffer; |
|---|
| 488 |
for (int i=0; i<s.length; ++i) |
|---|
| 489 |
{ |
|---|
| 490 |
char c = s[i]; |
|---|
| 491 |
if (c != '&') |
|---|
| 492 |
{ |
|---|
| 493 |
if (buffer.length != 0) buffer ~= c; |
|---|
| 494 |
} |
|---|
| 495 |
else |
|---|
| 496 |
{ |
|---|
| 497 |
if (buffer.length == 0) |
|---|
| 498 |
{ |
|---|
| 499 |
buffer = s[0 .. i].dup; |
|---|
| 500 |
} |
|---|
| 501 |
if (startsWith(s[i..$],"&#")) |
|---|
| 502 |
{ |
|---|
| 503 |
try |
|---|
| 504 |
{ |
|---|
| 505 |
dchar d; |
|---|
| 506 |
string t = s[i..$]; |
|---|
| 507 |
checkCharRef(t, d); |
|---|
| 508 |
char[4] temp; |
|---|
| 509 |
buffer ~= temp[0 .. std.utf.encode(temp, d)]; |
|---|
| 510 |
i = s.length - t.length - 1; |
|---|
| 511 |
} |
|---|
| 512 |
catch(Err e) |
|---|
| 513 |
{ |
|---|
| 514 |
if (mode == DecodeMode.STRICT) |
|---|
| 515 |
throw new DecodeException("Unescaped &"); |
|---|
| 516 |
buffer ~= '&'; |
|---|
| 517 |
} |
|---|
| 518 |
} |
|---|
| 519 |
else if (startsWith(s[i..$],"&" )) { buffer ~= '&'; i += 4; } |
|---|
| 520 |
else if (startsWith(s[i..$],""")) { buffer ~= '"'; i += 5; } |
|---|
| 521 |
else if (startsWith(s[i..$],"'")) { buffer ~= '\''; i += 5; } |
|---|
| 522 |
else if (startsWith(s[i..$],"<" )) { buffer ~= '<'; i += 3; } |
|---|
| 523 |
else if (startsWith(s[i..$],">" )) { buffer ~= '>'; i += 3; } |
|---|
| 524 |
else |
|---|
| 525 |
{ |
|---|
| 526 |
if (mode == DecodeMode.STRICT) |
|---|
| 527 |
throw new DecodeException("Unescaped &"); |
|---|
| 528 |
buffer ~= '&'; |
|---|
| 529 |
} |
|---|
| 530 |
} |
|---|
| 531 |
} |
|---|
| 532 |
return (buffer.length == 0) ? s : cast(string)buffer; |
|---|
| 533 |
} |
|---|
| 534 |
|
|---|
| 535 |
unittest |
|---|
| 536 |
{ |
|---|
| 537 |
void assertNot(string s) |
|---|
| 538 |
{ |
|---|
| 539 |
bool b = false; |
|---|
| 540 |
try { decode(s,DecodeMode.STRICT); } |
|---|
| 541 |
catch (DecodeException e) { b = true; } |
|---|
| 542 |
assert(b,s); |
|---|
| 543 |
} |
|---|
| 544 |
|
|---|
| 545 |
// Assert that things that should work, do |
|---|
| 546 |
assert(decode("hello", DecodeMode.STRICT) is "hello"); |
|---|
| 547 |
assert(decode("a > b", DecodeMode.STRICT) == "a > b"); |
|---|
| 548 |
assert(decode("a < b", DecodeMode.STRICT) == "a < b"); |
|---|
| 549 |
assert(decode("don't", DecodeMode.STRICT) == "don't"); |
|---|
| 550 |
assert(decode(""hi"", DecodeMode.STRICT) == "\"hi\""); |
|---|
| 551 |
assert(decode("cat & dog", DecodeMode.STRICT) == "cat & dog"); |
|---|
| 552 |
assert(decode("*", DecodeMode.STRICT) == "*"); |
|---|
| 553 |
assert(decode("*", DecodeMode.STRICT) == "*"); |
|---|
| 554 |
assert(decode("cat & dog", DecodeMode.LOOSE) == "cat & dog"); |
|---|
| 555 |
assert(decode("a > b", DecodeMode.LOOSE) == "a > b"); |
|---|
| 556 |
assert(decode("&#;", DecodeMode.LOOSE) == "&#;"); |
|---|
| 557 |
assert(decode("&#x;", DecodeMode.LOOSE) == "&#x;"); |
|---|
| 558 |
assert(decode("G;", DecodeMode.LOOSE) == "G;"); |
|---|
| 559 |
assert(decode("G;", DecodeMode.LOOSE) == "G;"); |
|---|
| 560 |
|
|---|
| 561 |
// Assert that things that shouldn't work, don't |
|---|
| 562 |
assertNot("cat & dog"); |
|---|
| 563 |
assertNot("a > b"); |
|---|
| 564 |
assertNot("&#;"); |
|---|
| 565 |
assertNot("&#x;"); |
|---|
| 566 |
assertNot("G;"); |
|---|
| 567 |
assertNot("G;"); |
|---|
| 568 |
} |
|---|
| 569 |
|
|---|
| 570 |
/** |
|---|
| 571 |
* Class representing an XML document. |
|---|
| 572 |
* |
|---|
| 573 |
* Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) |
|---|
| 574 |
* |
|---|
| 575 |
*/ |
|---|
| 576 |
class Document : Element |
|---|
| 577 |
{ |
|---|
| 578 |
/** |
|---|
| 579 |
* Contains all text which occurs before the root element. |
|---|
| 580 |
* Defaults to <?xml version="1.0"?> |
|---|
| 581 |
*/ |
|---|
| 582 |
string prolog = "<?xml version=\"1.0\"?>"; |
|---|
| 583 |
/** |
|---|
| 584 |
* Contains all text which occurs after the root element. |
|---|
| 585 |
* Defaults to the empty string |
|---|
| 586 |
*/ |
|---|
| 587 |
string epilog; |
|---|
| 588 |
|
|---|
| 589 |
/** |
|---|
| 590 |
* Constructs a Document by parsing XML text. |
|---|
| 591 |
* |
|---|
| 592 |
* This function creates a complete DOM (Document Object Model) tree. |
|---|
| 593 |
* |
|---|
| 594 |
* The input to this function MUST be valid XML. |
|---|
| 595 |
* This is enforced by DocumentParser's in contract. |
|---|
| 596 |
* |
|---|
| 597 |
* Params: |
|---|
| 598 |
* s = the complete XML text. |
|---|
| 599 |
*/ |
|---|
| 600 |
this(string s) |
|---|
| 601 |
in |
|---|
| 602 |
{ |
|---|
| 603 |
assert(s.length != 0); |
|---|
| 604 |
} |
|---|
| 605 |
body |
|---|
| 606 |
{ |
|---|
| 607 |
auto xml = new DocumentParser(s); |
|---|
| 608 |
string tagString = xml.tag.tagString; |
|---|
| 609 |
|
|---|
| 610 |
this(xml.tag); |
|---|
| 611 |
prolog = s[0 .. tagString.ptr - s.ptr]; |
|---|
| 612 |
parse(xml); |
|---|
| 613 |
epilog = *xml.s; |
|---|
| 614 |
} |
|---|
| 615 |
|
|---|
| 616 |
/** |
|---|
| 617 |
* Constructs a Document from a Tag. |
|---|
| 618 |
* |
|---|
| 619 |
* Params: |
|---|
| 620 |
* tag = the start tag of the document. |
|---|
| 621 |
*/ |
|---|
| 622 |
this(const(Tag) tag) |
|---|
| 623 |
{ |
|---|
| 624 |
super(tag); |
|---|
| 625 |
} |
|---|
| 626 |
|
|---|
| 627 |
const |
|---|
| 628 |
{ |
|---|
| 629 |
/** |
|---|
| 630 |
* Compares two Documents for equality |
|---|
| 631 |
* |
|---|
| 632 |
* Examples: |
|---|
| 633 |
* -------------- |
|---|
| 634 |
* Document d1,d2; |
|---|
| 635 |
* if (d1 == d2) { } |
|---|
| 636 |
* -------------- |
|---|
| 637 |
*/ |
|---|
| 638 |
override bool opEquals(Object o) |
|---|
| 639 |
{ |
|---|
| 640 |
const doc = toType!(const Document)(o); |
|---|
| 641 |
return |
|---|
| 642 |
(prolog != doc.prolog ) ? false : ( |
|---|
| 643 |
(super != cast(const Element)doc) ? false : ( |
|---|
| 644 |
(epilog != doc.epilog ) ? false : ( |
|---|
| 645 |
true ))); |
|---|
| 646 |
} |
|---|
| 647 |
|
|---|
| 648 |
/** |
|---|
| 649 |
* Compares two Documents |
|---|
| 650 |
* |
|---|
| 651 |
* You should rarely need to call this function. It exists so that |
|---|
| 652 |
* Documents can be used as associative array keys. |
|---|
| 653 |
* |
|---|
| 654 |
* Examples: |
|---|
| 655 |
* -------------- |
|---|
| 656 |
* Document d1,d2; |
|---|
| 657 |
* if (d1 < d2) { } |
|---|
| 658 |
* -------------- |
|---|
| 659 |
*/ |
|---|
| 660 |
override int opCmp(Object o) |
|---|
| 661 |
{ |
|---|
| 662 |
const doc = toType!(const Document)(o); |
|---|
| 663 |
return |
|---|
| 664 |
((prolog != doc.prolog ) |
|---|
| 665 |
? ( prolog < doc.prolog ? -1 : 1 ) : |
|---|
| 666 |
((super != cast(const Element)doc) |
|---|
| 667 |
? ( super < cast(const Element)doc ? -1 : 1 ) : |
|---|
| 668 |
((epilog != doc.epilog ) |
|---|
| 669 |
? ( epilog < doc.epilog ? -1 : 1 ) : |
|---|
| 670 |
0 ))); |
|---|
| 671 |
} |
|---|
| 672 |
|
|---|
| 673 |
/** |
|---|
| 674 |
* Returns the hash of a Document |
|---|
| 675 |
* |
|---|
| 676 |
* You should rarely need to call this function. It exists so that |
|---|
| 677 |
* Documents can be used as associative array keys. |
|---|
| 678 |
*/ |
|---|
| 679 |
override hash_t toHash() |
|---|
| 680 |
{ |
|---|
| 681 |
return hash(prolog,hash(epilog,super.toHash)); |
|---|
| 682 |
} |
|---|
| 683 |
|
|---|
| 684 |
/** |
|---|
| 685 |
* Returns the string representation of a Document. (That is, the |
|---|
| 686 |
* complete XML of a document). |
|---|
| 687 |
*/ |
|---|
| 688 |
override string toString() |
|---|
| 689 |
{ |
|---|
| 690 |
return prolog ~ super.toString ~ epilog; |
|---|
| 691 |
} |
|---|
| 692 |
} |
|---|
| 693 |
} |
|---|
| 694 |
|
|---|
| 695 |
/** |
|---|
| 696 |
* Class representing an XML element. |
|---|
| 697 |
* |
|---|
| 698 |
* Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) |
|---|
| 699 |
*/ |
|---|
| 700 |
class Element : Item |
|---|
| 701 |
{ |
|---|
| 702 |
Tag tag; /// The start tag of the element |
|---|
| 703 |
Item[] items; /// The element's items |
|---|
| 704 |
Text[] texts; /// The element's text items |
|---|
| 705 |
CData[] cdatas; /// The element's CData items |
|---|
| 706 |
Comment[] comments; /// The element's comments |
|---|
| 707 |
ProcessingInstruction[] pis; /// The element's processing instructions |
|---|
| 708 |
Element[] elements; /// The element's child elements |
|---|
| 709 |
Element parent_; |
|---|
| 710 |
|
|---|
| 711 |
/** |
|---|
| 712 |
* Constructs an Element given a name and a string to be used as a Text |
|---|
| 713 |
* interior. |
|---|
| 714 |
* |
|---|
| 715 |
* Params: |
|---|
| 716 |
* name = the name of the element. |
|---|
| 717 |
* interior = (optional) the string interior. |
|---|
| 718 |
* |
|---|
| 719 |
* Examples: |
|---|
| 720 |
* ------------------------------------------------------- |
|---|
| 721 |
* auto element = new Element("title","Serenity") |
|---|
| 722 |
* // constructs the element <title>Serenity</title> |
|---|
| 723 |
* ------------------------------------------------------- |
|---|
| 724 |
*/ |
|---|
| 725 |
this(string name, string interior=null) |
|---|
| 726 |
{ |
|---|
| 727 |
this(new Tag(name)); |
|---|
| 728 |
if (interior.length != 0) opCatAssign(new Text(interior)); |
|---|
| 729 |
} |
|---|
| 730 |
|
|---|
| 731 |
/** |
|---|
| 732 |
* Constructs an Element from a Tag. |
|---|
| 733 |
* |
|---|
| 734 |
* Params: |
|---|
| 735 |
* tag = the start or empty tag of the element. |
|---|
| 736 |
*/ |
|---|
| 737 |
this(const(Tag) tag_) |
|---|
| 738 |
{ |
|---|
| 739 |
this.tag = new Tag(tag_.name); |
|---|
| 740 |
tag.type = TagType.EMPTY; |
|---|
| 741 |
foreach(k,v;tag_.attr) tag.attr[k] = v; |
|---|
| 742 |
tag.tagString = tag_.tagString; |
|---|
| 743 |
} |
|---|
| 744 |
|
|---|
| 745 |
Element parent () |
|---|
| 746 |
{ |
|---|
| 747 |
return parent_; |
|---|
| 748 |
} |
|---|
| 749 |
|
|---|
| 750 |
Element parent (Element parent) |
|---|
| 751 |
{ |
|---|
| 752 |
return parent_ = parent; |
|---|
| 753 |
} |
|---|
| 754 |
|
|---|
| 755 |
string name () |
|---|
| 756 |
{ |
|---|
| 757 |
return tag.name; |
|---|
| 758 |
} |
|---|
| 759 |
|
|---|
| 760 |
string value () |
|---|
| 761 |
{ |
|---|
| 762 |
return text; |
|---|
| 763 |
} |
|---|
| 764 |
|
|---|
| 765 |
alias elements children; |
|---|
| 766 |
|
|---|
| 767 |
Attribute[] attributes () |
|---|
| 768 |
{ |
|---|
| 769 |
auto attrs = new Attribute[tag.attr.length]; |
|---|
| 770 |
attrs = attrs[0 .. 0]; |
|---|
| 771 |
|
|---|
| 772 |
foreach (k, v ; tag.attr) |
|---|
| 773 |
attrs ~= new Attribute(k, v, this); |
|---|
| 774 |
|
|---|
| 775 |
return attrs; |
|---|
| 776 |
} |
|---|
| 777 |
|
|---|
| 778 |
Element query () |
|---|
| 779 |
{ |
|---|
| 780 |
return this; |
|---|
| 781 |
} |
|---|
| 782 |
|
|---|
| 783 |
Element attribute (string prefix, string name, string value = null) |
|---|
| 784 |
{ |
|---|
| 785 |
tag.attr[name] = value; |
|---|
| 786 |
|
|---|
| 787 |
return this; |
|---|
| 788 |
} |
|---|
| 789 |
|
|---|
| 790 |
/** |
|---|
| 791 |
* Append a text item to the interior of this element |
|---|
| 792 |
* |
|---|
| 793 |
* Params: |
|---|
| 794 |
* item = the item you wish to append. |
|---|
| 795 |
* |
|---|
| 796 |
* Examples: |
|---|
| 797 |
* -------------- |
|---|
| 798 |
* Element element; |
|---|
| 799 |
* element ~= new Text("hello"); |
|---|
| 800 |
* -------------- |
|---|
| 801 |
*/ |
|---|
| 802 |
void opCatAssign(Text item) |
|---|
| 803 |
{ |
|---|
| 804 |
texts ~= item; |
|---|
| 805 |
appendItem(item); |
|---|
| 806 |
} |
|---|
| 807 |
|
|---|
| 808 |
/** |
|---|
| 809 |
* Append a CData item to the interior of this element |
|---|
| 810 |
* |
|---|
| 811 |
* Params: |
|---|
| 812 |
* item = the item you wish to append. |
|---|
| 813 |
* |
|---|
| 814 |
* Examples: |
|---|
| 815 |
* -------------- |
|---|
| 816 |
* Element element; |
|---|
| 817 |
* element ~= new CData("hello"); |
|---|
| 818 |
* -------------- |
|---|
| 819 |
*/ |
|---|
| 820 |
void opCatAssign(CData item) |
|---|
| 821 |
{ |
|---|
| 822 |
cdatas ~= item; |
|---|
| 823 |
appendItem(item); |
|---|
| 824 |
} |
|---|
| 825 |
|
|---|
| 826 |
/** |
|---|
| 827 |
* Append a comment to the interior of this element |
|---|
| 828 |
* |
|---|
| 829 |
* Params: |
|---|
| 830 |
* item = the item you wish to append. |
|---|
| 831 |
* |
|---|
| 832 |
* Examples: |
|---|
| 833 |
* -------------- |
|---|
| 834 |
* Element element; |
|---|
| 835 |
* element ~= new Comment("hello"); |
|---|
| 836 |
* -------------- |
|---|
| 837 |
*/ |
|---|
| 838 |
void opCatAssign(Comment item) |
|---|
| 839 |
{ |
|---|
| 840 |
comments ~= item; |
|---|
| 841 |
appendItem(item); |
|---|
| 842 |
} |
|---|
| 843 |
|
|---|
| 844 |
/** |
|---|
| 845 |
* Append a processing instruction to the interior of this element |
|---|
| 846 |
* |
|---|
| 847 |
* Params: |
|---|
| 848 |
* item = the item you wish to append. |
|---|
| 849 |
* |
|---|
| 850 |
* Examples: |
|---|
| 851 |
* -------------- |
|---|
| 852 |
* Element element; |
|---|
| 853 |
* element ~= new ProcessingInstruction("hello"); |
|---|
| 854 |
* -------------- |
|---|
| 855 |
*/ |
|---|
| 856 |
void opCatAssign(ProcessingInstruction item) |
|---|
| 857 |
{ |
|---|
| 858 |
pis ~= item; |
|---|
| 859 |
appendItem(item); |
|---|
| 860 |
} |
|---|
| 861 |
|
|---|
| 862 |
/** |
|---|
| 863 |
* Append a complete element to the interior of this element |
|---|
| 864 |
* |
|---|
| 865 |
* Params: |
|---|
| 866 |
* item = the item you wish to append. |
|---|
| 867 |
* |
|---|
| 868 |
* Examples: |
|---|
| 869 |
* -------------- |
|---|
| 870 |
* Element element; |
|---|
| 871 |
* Element other = new Element("br"); |
|---|
| 872 |
* element ~= other; |
|---|
| 873 |
* // appends element representing <br /> |
|---|
| 874 |
* -------------- |
|---|
| 875 |
*/ |
|---|
| 876 |
void opCatAssign(Element item) |
|---|
| 877 |
{ |
|---|
| 878 |
elements ~= item; |
|---|
| 879 |
appendItem(item); |
|---|
| 880 |
} |
|---|
| 881 |
|
|---|
| 882 |
private void appendItem(Item item) |
|---|
| 883 |
{ |
|---|
| 884 |
items ~= item; |
|---|
| 885 |
if (tag.type == TagType.EMPTY && !item.isEmptyXML) |
|---|
| 886 |
tag.type = TagType.START; |
|---|
| 887 |
} |
|---|
| 888 |
|
|---|
| 889 |
private void parse(ElementParser xml) |
|---|
| 890 |
{ |
|---|
| 891 |
xml.onText = (string s) { opCatAssign(new Text(s)); }; |
|---|
| 892 |
xml.onCData = (string s) { opCatAssign(new CData(s)); }; |
|---|
| 893 |
xml.onComment = (string s) { opCatAssign(new Comment(s)); }; |
|---|
| 894 |
xml.onPI = (string s) { opCatAssign(new ProcessingInstruction(s)); }; |
|---|
| 895 |
|
|---|
| 896 |
xml.onStartTag[null] = (ElementParser xml) |
|---|
| 897 |
{ |
|---|
| 898 |
auto e = new Element(xml.tag); |
|---|
| 899 |
e.parse(xml); |
|---|
| 900 |
opCatAssign(e); |
|---|
| 901 |
}; |
|---|
| 902 |
|
|---|
| 903 |
xml.parse(); |
|---|
| 904 |
} |
|---|
| 905 |
|
|---|
| 906 |
/** |
|---|
| 907 |
* Compares two Elements for equality |
|---|
| 908 |
* |
|---|
| 909 |
* Examples: |
|---|
| 910 |
* -------------- |
|---|
| 911 |
* Element e1,e2; |
|---|
| 912 |
* if (e1 == e2) { } |
|---|
| 913 |
* -------------- |
|---|
| 914 |
*/ |
|---|
| 915 |
override bool opEquals(Object o) |
|---|
| 916 |
{ |
|---|
| 917 |
const element = toType!(const Element)(o); |
|---|
| 918 |
uint len = items.length; |
|---|
| 919 |
if (len != element.items.length) return false; |
|---|
| 920 |
for (uint i=0; i<len; ++i) |
|---|
| 921 |
{ |
|---|
| 922 |
if (!items[i].opEquals(element.items[i])) return false; |
|---|
| 923 |
} |
|---|
| 924 |
return true; |
|---|
| 925 |
} |
|---|
| 926 |
|
|---|
| 927 |
/** |
|---|
| 928 |
* Compares two Elements |
|---|
| 929 |
* |
|---|
| 930 |
* You should rarely need to call this function. It exists so that Elements |
|---|
| 931 |
* can be used as associative array keys. |
|---|
| 932 |
* |
|---|
| 933 |
* Examples: |
|---|
| 934 |
* -------------- |
|---|
| 935 |
* Element e1,e2; |
|---|
| 936 |
* if (e1 < e2) { } |
|---|
| 937 |
* -------------- |
|---|
| 938 |
*/ |
|---|
| 939 |
override int opCmp(Object o) |
|---|
| 940 |
{ |
|---|
| 941 |
const element = toType!(const Element)(o); |
|---|
| 942 |
for (uint i=0; ; ++i) |
|---|
| 943 |
{ |
|---|
| 944 |
if (i == items.length && i == element.items.length) return 0; |
|---|
| 945 |
if (i == items.length) return -1; |
|---|
| 946 |
if (i == element.items.length) return 1; |
|---|
| 947 |
if (items[i] != element.items[i]) |
|---|
| 948 |
return items[i].opCmp(element.items[i]); |
|---|
| 949 |
} |
|---|
| 950 |
} |
|---|
| 951 |
|
|---|
| 952 |
/** |
|---|
| 953 |
* Returns the hash of an Element |
|---|
| 954 |
* |
|---|
| 955 |
* You should rarely need to call this function. It exists so that Elements |
|---|
| 956 |
* can be used as associative array keys. |
|---|
| 957 |
*/ |
|---|
| 958 |
override hash_t toHash() |
|---|
| 959 |
{ |
|---|
| 960 |
hash_t hash = tag.toHash; |
|---|
| 961 |
foreach(item;items) hash += item.toHash(); |
|---|
| 962 |
return hash; |
|---|
| 963 |
} |
|---|
| 964 |
|
|---|
| 965 |
const |
|---|
| 966 |
{ |
|---|
| 967 |
/** |
|---|
| 968 |
* Returns the decoded interior of an element. |
|---|
| 969 |
* |
|---|
| 970 |
* The element is assumed to containt text <i>only</i>. So, for |
|---|
| 971 |
* example, given XML such as "<title>Good &amp; |
|---|
| 972 |
* Bad</title>", will return "Good & Bad". |
|---|
| 973 |
* |
|---|
| 974 |
* Params: |
|---|
| 975 |
* mode = (optional) Mode to use for decoding. (Defaults to LOOSE). |
|---|
| 976 |
* |
|---|
| 977 |
* Throws: DecodeException if decode fails |
|---|
| 978 |
*/ |
|---|
| 979 |
string text(DecodeMode mode=DecodeMode.LOOSE) |
|---|
| 980 |
{ |
|---|
| 981 |
string buffer; |
|---|
| 982 |
foreach(item;items) |
|---|
| 983 |
{ |
|---|
| 984 |
Text t = cast(Text)item; |
|---|
| 985 |
if (t is null) throw new DecodeException(item.toString); |
|---|
| 986 |
buffer ~= decode(t.toString,mode); |
|---|
| 987 |
} |
|---|
| 988 |
return buffer; |
|---|
| 989 |
} |
|---|
| 990 |
|
|---|
| 991 |
/** |
|---|
| 992 |
* Returns an indented string representation of this item |
|---|
| 993 |
* |
|---|
| 994 |
* Params: |
|---|
| 995 |
* indent = (optional) number of spaces by which to indent this |
|---|
| 996 |
* element. Defaults to 2. |
|---|
| 997 |
*/ |
|---|
| 998 |
override string[] pretty(uint indent=2) |
|---|
| 999 |
{ |
|---|
| 1000 |
|
|---|
| 1001 |
if (isEmptyXML || tag.isEmpty) return [ tag.toEmptyString ]; |
|---|
| 1002 |
|
|---|
| 1003 |
if (items.length == 1) |
|---|
| 1004 |
{ |
|---|
| 1005 |
Text t = cast(Text)(items[0]); |
|---|
| 1006 |
if (t !is null) |
|---|
| 1007 |
{ |
|---|
| 1008 |
return [tag.toStartString ~ t.toString ~ tag.toEndString]; |
|---|
| 1009 |
} |
|---|
| 1010 |
} |
|---|
| 1011 |
|
|---|
| 1012 |
string[] a = [ tag.toStartString ]; |
|---|
| 1013 |
foreach(item;items) |
|---|
| 1014 |
{ |
|---|
| 1015 |
string[] b = item.pretty(indent); |
|---|
| 1016 |
foreach(s;b) |
|---|
| 1017 |
{ |
|---|
| 1018 |
a ~= rightJustify(s,s.length + indent); |
|---|
| 1019 |
} |
|---|
| 1020 |
} |
|---|
| 1021 |
a ~= tag.toEndString; |
|---|
| 1022 |
return a; |
|---|
| 1023 |
} |
|---|
| 1024 |
|
|---|
| 1025 |
/** |
|---|
| 1026 |
* Returns the string representation of an Element |
|---|
| 1027 |
* |
|---|
| 1028 |
* Examples: |
|---|
| 1029 |
* -------------- |
|---|
| 1030 |
* auto element = new Element("br"); |
|---|
| 1031 |
* writefln(element.toString); // writes "<br />" |
|---|
| 1032 |
* -------------- |
|---|
| 1033 |
*/ |
|---|
| 1034 |
override string toString() |
|---|
| 1035 |
{ |
|---|
| 1036 |
if (isEmptyXML || tag.isEmpty) return tag.toEmptyString; |
|---|
| 1037 |
|
|---|
| 1038 |
string buffer = tag.toStartString; |
|---|
| 1039 |
foreach(item;items) { buffer ~= item.toString; } |
|---|
| 1040 |
buffer ~= tag.toEndString; |
|---|
| 1041 |
return buffer; |
|---|
| 1042 |
} |
|---|
| 1043 |
|
|---|
| 1044 |
override bool isEmptyXML() { return false; } /// Returns false always |
|---|
| 1045 |
} |
|---|
| 1046 |
} |
|---|
| 1047 |
|
|---|
| 1048 |
/** |
|---|
| 1049 |
* Tag types. |
|---|
| 1050 |
* |
|---|
| 1051 |
* $(DDOC_ENUM_MEMBERS START) Used for start tags |
|---|
| 1052 |
* $(DDOC_ENUM_MEMBERS END) Used for end tags |
|---|
| 1053 |
* $(DDOC_ENUM_MEMBERS EMPTY) Used for empty tags |
|---|
| 1054 |
* |
|---|
| 1055 |
*/ |
|---|
| 1056 |
enum TagType { START, END, EMPTY }; |
|---|
| 1057 |
|
|---|
| 1058 |
/** |
|---|
| 1059 |
* Class representing an XML tag. |
|---|
| 1060 |
* |
|---|
| 1061 |
* Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) |
|---|
| 1062 |
* |
|---|
| 1063 |
* The class invariant guarantees |
|---|
| 1064 |
* <ul> |
|---|
| 1065 |
* <li> that $(B type) is a valid enum TagType value</li> |
|---|
| 1066 |
* <li> that $(B name) consists of valid characters</li> |
|---|
| 1067 |
* <li> that each attribute name consists of valid characters</li> |
|---|
| 1068 |
* </ul> |
|---|
| 1069 |
*/ |
|---|
| 1070 |
class Tag |
|---|
| 1071 |
{ |
|---|
| 1072 |
TagType type = TagType.START; /// Type of tag |
|---|
| 1073 |
string name; /// Tag name |
|---|
| 1074 |
string[string] attr; /// Associative array of attributes |
|---|
| 1075 |
private string tagString; |
|---|
| 1076 |
|
|---|
| 1077 |
invariant() |
|---|
| 1078 |
{ |
|---|
| 1079 |
string s; |
|---|
| 1080 |
string t; |
|---|
| 1081 |
|
|---|
| 1082 |
assert(type == TagType.START |
|---|
| 1083 |
|| type == TagType.END |
|---|
| 1084 |
|| type == TagType.EMPTY); |
|---|
| 1085 |
|
|---|
| 1086 |
s = name; |
|---|
| 1087 |
try { checkName(s,t); } |
|---|
| 1088 |
catch(Err e) { assert(false,"Invalid tag name:" ~ e.toString); } |
|---|
| 1089 |
|
|---|
| 1090 |
foreach(k,v;attr) |
|---|
| 1091 |
{ |
|---|
| 1092 |
s = k; |
|---|
| 1093 |
try { checkName(s,t); } |
|---|
| 1094 |
catch(Err e) |
|---|
| 1095 |
{ assert(false,"Invalid atrribute name:" ~ e.toString); } |
|---|
| 1096 |
} |
|---|
| 1097 |
} |
|---|
| 1098 |
|
|---|
| 1099 |
/** |
|---|
| 1100 |
* Constructs an instance of Tag with a specified name and type |
|---|
| 1101 |
* |
|---|
| 1102 |
* The constructor does not initialize the attributes. To initialize the |
|---|
| 1103 |
* attributes, you access the $(B attr) member variable. |
|---|
| 1104 |
* |
|---|
| 1105 |
* Params: |
|---|
| 1106 |
* name = the Tag's name |
|---|
| 1107 |
* type = (optional) the Tag's type. If omitted, defaults to |
|---|
| 1108 |
* TagType.START. |
|---|
| 1109 |
* |
|---|
| 1110 |
* Examples: |
|---|
| 1111 |
* -------------- |
|---|
| 1112 |
* auto tag = new Tag("img",Tag.EMPTY); |
|---|
| 1113 |
* tag.attr["src"] = "http://example.com/example.jpg"; |
|---|
| 1114 |
* -------------- |
|---|
| 1115 |
*/ |
|---|
| 1116 |
this(string name, TagType type=TagType.START) |
|---|
| 1117 |
{ |
|---|
| 1118 |
this.name = name; |
|---|
| 1119 |
this.type = type; |
|---|
| 1120 |
} |
|---|
| 1121 |
|
|---|
| 1122 |
/* Private constructor (so don't ddoc this!) |
|---|
| 1123 |
* |
|---|
| 1124 |
* Constructs a Tag by parsing the string representation, e.g. "<html>". |
|---|
| 1125 |
* |
|---|
| 1126 |
* The string is passed by reference, and is advanced over all characters |
|---|
| 1127 |
* consumed. |
|---|
| 1128 |
* |
|---|
| 1129 |
* The second parameter is a dummy parameter only, required solely to |
|---|
| 1130 |
* distinguish this constructor from the public one. |
|---|
| 1131 |
*/ |
|---|
| 1132 |
private this(ref string s, bool dummy) |
|---|
| 1133 |
{ |
|---|
| 1134 |
tagString = s; |
|---|
| 1135 |
try |
|---|
| 1136 |
{ |
|---|
| 1137 |
reqc(s,'<'); |
|---|
| 1138 |
if (optc(s,'/')) type = TagType.END; |
|---|
| 1139 |
name = munch(s,"^/>"~whitespace); |
|---|
| 1140 |
munch(s,whitespace); |
|---|
| 1141 |
while(s.length > 0 && s[0] != '>' && s[0] != '/') |
|---|
| 1142 |
{ |
|---|
| 1143 |
string key = munch(s,"^="~whitespace); |
|---|
| 1144 |
munch(s,whitespace); |
|---|
| 1145 |
reqc(s,'='); |
|---|
| 1146 |
munch(s,whitespace); |
|---|
| 1147 |
reqc(s,'"'); |
|---|
| 1148 |
string val = decode(munch(s,"^\""), DecodeMode.LOOSE); |
|---|
| 1149 |
reqc(s,'"'); |
|---|
| 1150 |
munch(s,whitespace); |
|---|
| 1151 |
attr[key] = val; |
|---|
| 1152 |
} |
|---|
| 1153 |
if (optc(s,'/')) |
|---|
| 1154 |
{ |
|---|
| 1155 |
if (type == TagType.END) throw new TagException(""); |
|---|
| 1156 |
type = TagType.EMPTY; |
|---|
| 1157 |
} |
|---|
| 1158 |
reqc(s,'>'); |
|---|
| 1159 |
tagString.length = (s.ptr - tagString.ptr); |
|---|
| 1160 |
} |
|---|
| 1161 |
catch(XMLException e) |
|---|
| 1162 |
{ |
|---|
| 1163 |
tagString.length = (s.ptr - tagString.ptr); |
|---|
| 1164 |
throw new TagException(tagString); |
|---|
| 1165 |
} |
|---|
| 1166 |
} |
|---|
| 1167 |
|
|---|
| 1168 |
const |
|---|
| 1169 |
{ |
|---|
| 1170 |
/** |
|---|
| 1171 |
* Compares two Tags for equality |
|---|
| 1172 |
* |
|---|
| 1173 |
* You should rarely need to call this function. It exists so that Tags |
|---|
| 1174 |
* can be used as associative array keys. |
|---|
| 1175 |
* |
|---|
| 1176 |
* Examples: |
|---|
| 1177 |
* -------------- |
|---|
| 1178 |
* Tag tag1,tag2 |
|---|
| 1179 |
* if (tag1 == tag2) { } |
|---|
| 1180 |
* -------------- |
|---|
| 1181 |
*/ |
|---|
| 1182 |
override bool opEquals(Object o) |
|---|
| 1183 |
{ |
|---|
| 1184 |
const tag = toType!(const Tag)(o); |
|---|
| 1185 |
return |
|---|
| 1186 |
(name != tag.name) ? false : ( |
|---|
| 1187 |
(attr != tag.attr) ? false : ( |
|---|
| 1188 |
(type != tag.type) ? false : ( |
|---|
| 1189 |
true ))); |
|---|
| 1190 |
} |
|---|
| 1191 |
|
|---|
| 1192 |
/** |
|---|
| 1193 |
* Compares two Tags |
|---|
| 1194 |
* |
|---|
| 1195 |
* Examples: |
|---|
| 1196 |
* -------------- |
|---|
| 1197 |
* Tag tag1,tag2 |
|---|
| 1198 |
* if (tag1 < tag2) { } |
|---|
| 1199 |
* -------------- |
|---|
| 1200 |
*/ |
|---|
| 1201 |
override int opCmp(Object o) |
|---|
| 1202 |
{ |
|---|
| 1203 |
const tag = toType!(const Tag)(o); |
|---|
| 1204 |
return |
|---|
| 1205 |
((name != tag.name) ? ( name < tag.name ? -1 : 1 ) : |
|---|
| 1206 |
((attr != tag.attr) ? ( attr < tag.attr ? -1 : 1 ) : |
|---|
| 1207 |
((type != tag.type) ? ( type < tag.type ? -1 : 1 ) : |
|---|
| 1208 |
0 ))); |
|---|
| 1209 |
} |
|---|
| 1210 |
|
|---|
| 1211 |
/** |
|---|
| 1212 |
* Returns the hash of a Tag |
|---|
| 1213 |
* |
|---|
| 1214 |
* You should rarely need to call this function. It exists so that Tags |
|---|
| 1215 |
* can be used as associative array keys. |
|---|
| 1216 |
*/ |
|---|
| 1217 |
override hash_t toHash() |
|---|
| 1218 |
{ |
|---|
| 1219 |
hash_t hash = 0; |
|---|
| 1220 |
foreach(dchar c;name) hash = hash * 11 + c; |
|---|
| 1221 |
return hash; |
|---|
| 1222 |
} |
|---|
| 1223 |
|
|---|
| 1224 |
/** |
|---|
| 1225 |
* Returns the string representation of a Tag |
|---|
| 1226 |
* |
|---|
| 1227 |
* Examples: |
|---|
| 1228 |
* -------------- |
|---|
| 1229 |
* auto tag = new Tag("book",TagType.START); |
|---|
| 1230 |
* writefln(tag.toString); // writes "<book>" |
|---|
| 1231 |
* -------------- |
|---|
| 1232 |
*/ |
|---|
| 1233 |
override string toString() |
|---|
| 1234 |
{ |
|---|
| 1235 |
if (isEmpty) return toEmptyString(); |
|---|
| 1236 |
return (isEnd) ? toEndString() : toStartString(); |
|---|
| 1237 |
} |
|---|
| 1238 |
|
|---|
| 1239 |
private |
|---|
| 1240 |
{ |
|---|
| 1241 |
string toNonEndString() |
|---|
| 1242 |
{ |
|---|
| 1243 |
string s = "<" ~ name; |
|---|
| 1244 |
foreach(key,val;attr) |
|---|
| 1245 |
s ~= format(" %s=\"%s\"",key,decode(val,DecodeMode.LOOSE)); |
|---|
| 1246 |
return s; |
|---|
| 1247 |
} |
|---|
| 1248 |
|
|---|
| 1249 |
string toStartString() { return toNonEndString() ~ ">"; } |
|---|
| 1250 |
|
|---|
| 1251 |
string toEndString() { return "</" ~ name ~ ">"; } |
|---|
| 1252 |
|
|---|
| 1253 |
string toEmptyString() { return toNonEndString() ~ "/>"; } |
|---|
| 1254 |
} |
|---|
| 1255 |
|
|---|
| 1256 |
/** |
|---|
| 1257 |
* Returns true if the Tag is a start tag |
|---|
| 1258 |
* |
|---|
| 1259 |
* Examples: |
|---|
| 1260 |
* -------------- |
|---|
| 1261 |
* if (tag.isStart) { } |
|---|
| 1262 |
* -------------- |
|---|
| 1263 |
*/ |
|---|
| 1264 |
bool isStart() { return type == TagType.START; } |
|---|
| 1265 |
|
|---|
| 1266 |
/** |
|---|
| 1267 |
* Returns true if the Tag is an end tag |
|---|
| 1268 |
* |
|---|
| 1269 |
* Examples: |
|---|
| 1270 |
* -------------- |
|---|
| 1271 |
* if (tag.isEnd) { } |
|---|
| 1272 |
* -------------- |
|---|
| 1273 |
*/ |
|---|
| 1274 |
bool isEnd() { return type == TagType.END; } |
|---|
| 1275 |
|
|---|
| 1276 |
/** |
|---|
| 1277 |
* Returns true if the Tag is an empty tag |
|---|
| 1278 |
* |
|---|
| 1279 |
* Examples: |
|---|
| 1280 |
* -------------- |
|---|
| 1281 |
* if (tag.isEmpty) { } |
|---|
| 1282 |
* -------------- |
|---|
| 1283 |
*/ |
|---|
| 1284 |
bool isEmpty() { return type == TagType.EMPTY; } |
|---|
| 1285 |
} |
|---|
| 1286 |
} |
|---|
| 1287 |
|
|---|
| 1288 |
/** |
|---|
| 1289 |
* Class representing a comment |
|---|
| 1290 |
*/ |
|---|
| 1291 |
class Comment : Item |
|---|
| 1292 |
{ |
|---|
| 1293 |
private string content; |
|---|
| 1294 |
|
|---|
| 1295 |
/** |
|---|
| 1296 |
* Construct a comment |
|---|
| 1297 |
* |
|---|
| 1298 |
* Params: |
|---|
| 1299 |
* content = the body of the comment |
|---|
| 1300 |
* |
|---|
| 1301 |
* Throws: CommentException if the comment body is illegal (contains "--" |
|---|
| 1302 |
* or exactly equals "-") |
|---|
| 1303 |
* |
|---|
| 1304 |
* Examples: |
|---|
| 1305 |
* -------------- |
|---|
| 1306 |
* auto item = new Comment("This is a comment"); |
|---|
| 1307 |
* // constructs <!--This is a comment--> |
|---|
| 1308 |
* -------------- |
|---|
| 1309 |
*/ |
|---|
| 1310 |
this(string content) |
|---|
| 1311 |
{ |
|---|
| 1312 |
if (content == "-" || content.indexOf("==") != -1) |
|---|
| 1313 |
throw new CommentException(content); |
|---|
| 1314 |
this.content = content; |
|---|
| 1315 |
} |
|---|
| 1316 |
|
|---|
| 1317 |
/** |
|---|
| 1318 |
* Compares two comments for equality |
|---|
| 1319 |
* |
|---|
| 1320 |
* Examples: |
|---|
| 1321 |
* -------------- |
|---|
| 1322 |
* Comment item1,item2; |
|---|
| 1323 |
* if (item1 == item2) { } |
|---|
| 1324 |
* -------------- |
|---|
| 1325 |
*/ |
|---|
| 1326 |
override bool opEquals(Object o) |
|---|
| 1327 |
{ |
|---|
| 1328 |
const item = toType!(const Item)(o); |
|---|
| 1329 |
const t = cast(Comment)item; |
|---|
| 1330 |
return t !is null && content == t.content; |
|---|
| 1331 |
} |
|---|
| 1332 |
|
|---|
| 1333 |
/** |
|---|
| 1334 |
* Compares two comments |
|---|
| 1335 |
* |
|---|
| 1336 |
* You should rarely need to call this function. It exists so that Comments |
|---|
| 1337 |
* can be used as associative array keys. |
|---|
| 1338 |
* |
|---|
| 1339 |
* Examples: |
|---|
| 1340 |
* -------------- |
|---|
| 1341 |
* Comment item1,item2; |
|---|
| 1342 |
* if (item1 < item2) { } |
|---|
| 1343 |
* -------------- |
|---|
| 1344 |
*/ |
|---|
| 1345 |
override int opCmp(Object o) |
|---|
| 1346 |
{ |
|---|
| 1347 |
const item = toType!(const Item)(o); |
|---|
| 1348 |
const t = cast(Comment)item; |
|---|
| 1349 |
return t !is null && (content != t.content |
|---|
| 1350 |
? (content < t.content ? -1 : 1 ) : 0 ); |
|---|
| 1351 |
} |
|---|
| 1352 |
|
|---|
| 1353 |
/** |
|---|
| 1354 |
* Returns the hash of a Comment |
|---|
| 1355 |
* |
|---|
| 1356 |
* You should rarely need to call this function. It exists so that Comments |
|---|
| 1357 |
* can be used as associative array keys. |
|---|
| 1358 |
*/ |
|---|
| 1359 |
override hash_t toHash() { return hash(content); } |
|---|
| 1360 |
|
|---|
| 1361 |
/** |
|---|
| 1362 |
* Returns a string representation of this comment |
|---|
| 1363 |
*/ |
|---|
| 1364 |
override const string toString() { return "<!--" ~ content ~ "-->"; } |
|---|
| 1365 |
|
|---|
| 1366 |
override const bool isEmptyXML() { return false; } /// Returns false always |
|---|
| 1367 |
} |
|---|
| 1368 |
|
|---|
| 1369 |
/** |
|---|
| 1370 |
* Class representing a Character Data section |
|---|
| 1371 |
*/ |
|---|
| 1372 |
class CData : Item |
|---|
| 1373 |
{ |
|---|
| 1374 |
private string content; |
|---|
| 1375 |
|
|---|
| 1376 |
/** |
|---|
| 1377 |
* Construct a chraracter data section |
|---|
| 1378 |
* |
|---|
| 1379 |
* Params: |
|---|
| 1380 |
* content = the body of the character data segment |
|---|
| 1381 |
* |
|---|
| 1382 |
* Throws: CDataException if the segment body is illegal (contains "]]>") |
|---|
| 1383 |
* |
|---|
| 1384 |
* Examples: |
|---|
| 1385 |
* -------------- |
|---|
| 1386 |
* auto item = new CData("<b>hello</b>"); |
|---|
| 1387 |
* // constructs <![CDATA[<b>hello</b>]]> |
|---|
| 1388 |
* -------------- |
|---|
| 1389 |
*/ |
|---|
| 1390 |
this(string content) |
|---|
| 1391 |
{ |
|---|
| 1392 |
if (content.indexOf("]]>") != -1) throw new CDataException(content); |
|---|
| 1393 |
this.content = content; |
|---|
| 1394 |
} |
|---|
| 1395 |
|
|---|
| 1396 |
/** |
|---|
| 1397 |
* Compares two CDatas for equality |
|---|
| 1398 |
* |
|---|
| 1399 |
* Examples: |
|---|
| 1400 |
* -------------- |
|---|
| 1401 |
* CData item1,item2; |
|---|
| 1402 |
* if (item1 == item2) { } |
|---|
| 1403 |
* -------------- |
|---|
| 1404 |
*/ |
|---|
| 1405 |
override bool opEquals(Object o) |
|---|
| 1406 |
{ |
|---|
| 1407 |
const item = toType!(const Item)(o); |
|---|
| 1408 |
const t = cast(CData)item; |
|---|
| 1409 |
return t !is null && content == t.content; |
|---|
| 1410 |
} |
|---|
| 1411 |
|
|---|
| 1412 |
/** |
|---|
| 1413 |
* Compares two CDatas |
|---|
| 1414 |
* |
|---|
| 1415 |
* You should rarely need to call this function. It exists so that CDatas |
|---|
| 1416 |
* can be used as associative array keys. |
|---|
| 1417 |
* |
|---|
| 1418 |
* Examples: |
|---|
| 1419 |
* -------------- |
|---|
| 1420 |
* CData item1,item2; |
|---|
| 1421 |
* if (item1 < item2) { } |
|---|
| 1422 |
* -------------- |
|---|
| 1423 |
*/ |
|---|
| 1424 |
override int opCmp(Object o) |
|---|
| 1425 |
{ |
|---|
| 1426 |
const item = toType!(const Item)(o); |
|---|
| 1427 |
const t = cast(CData)item; |
|---|
| 1428 |
return t !is null && (content != t.content |
|---|
| 1429 |
? (content < t.content ? -1 : 1 ) : 0 ); |
|---|
| 1430 |
} |
|---|
| 1431 |
|
|---|
| 1432 |
/** |
|---|
| 1433 |
* Returns the hash of a CData |
|---|
| 1434 |
* |
|---|
| 1435 |
* You should rarely need to call this function. It exists so that CDatas |
|---|
| 1436 |
* can be used as associative array keys. |
|---|
| 1437 |
*/ |
|---|
| 1438 |
override hash_t toHash() { return hash(content); } |
|---|
| 1439 |
|
|---|
| 1440 |
/** |
|---|
| 1441 |
* Returns a string representation of this CData section |
|---|
| 1442 |
*/ |
|---|
| 1443 |
override const string toString() { return cdata ~ content ~ "]]>"; } |
|---|
| 1444 |
|
|---|
| 1445 |
override const bool isEmptyXML() { return false; } /// Returns false always |
|---|
| 1446 |
} |
|---|
| 1447 |
|
|---|
| 1448 |
/** |
|---|
| 1449 |
* Class representing a text (aka Parsed Character Data) section |
|---|
| 1450 |
*/ |
|---|
| 1451 |
class Text : Item |
|---|
| 1452 |
{ |
|---|
| 1453 |
private string content; |
|---|
| 1454 |
|
|---|
| 1455 |
/** |
|---|
| 1456 |
* Construct a text (aka PCData) section |
|---|
| 1457 |
* |
|---|
| 1458 |
* Params: |
|---|
| 1459 |
* content = the text. This function encodes the text before |
|---|
| 1460 |
* insertion, so it is safe to insert any text |
|---|
| 1461 |
* |
|---|
| 1462 |
* Examples: |
|---|
| 1463 |
* -------------- |
|---|
| 1464 |
* auto Text = new CData("a < b"); |
|---|
| 1465 |
* // constructs a < b |
|---|
| 1466 |
* -------------- |
|---|
| 1467 |
*/ |
|---|
| 1468 |
this(string content) |
|---|
| 1469 |
{ |
|---|
| 1470 |
this.content = encode(content); |
|---|
| 1471 |
} |
|---|
| 1472 |
|
|---|
| 1473 |
/** |
|---|
| 1474 |
* Compares two text sections for equality |
|---|
| 1475 |
* |
|---|
| 1476 |
* Examples: |
|---|
| 1477 |
* -------------- |
|---|
| 1478 |
* Text item1,item2; |
|---|
| 1479 |
* if (item1 == item2) { } |
|---|
| 1480 |
* -------------- |
|---|
| 1481 |
*/ |
|---|
| 1482 |
override bool opEquals(Object o) |
|---|
| 1483 |
{ |
|---|
| 1484 |
const item = toType!(const Item)(o); |
|---|
| 1485 |
const t = cast(Text)item; |
|---|
| 1486 |
return t !is null && content == t.content; |
|---|
| 1487 |
} |
|---|
| 1488 |
|
|---|
| 1489 |
/** |
|---|
| 1490 |
* Compares two text sections |
|---|
| 1491 |
* |
|---|
| 1492 |
* You should rarely need to call this function. It exists so that Texts |
|---|
| 1493 |
* can be used as associative array keys. |
|---|
| 1494 |
* |
|---|
| 1495 |
* Examples: |
|---|
| 1496 |
* -------------- |
|---|
| 1497 |
* Text item1,item2; |
|---|
| 1498 |
* if (item1 < item2) { } |
|---|
| 1499 |
* -------------- |
|---|
| 1500 |
*/ |
|---|
| 1501 |
override int opCmp(Object o) |
|---|
| 1502 |
{ |
|---|
| 1503 |
const item = toType!(const Item)(o); |
|---|
| 1504 |
const t = cast(Text)item; |
|---|
| 1505 |
return t !is null |
|---|
| 1506 |
&& (content != t.content ? (content < t.content ? -1 : 1 ) : 0 ); |
|---|
| 1507 |
} |
|---|
| 1508 |
|
|---|
| 1509 |
/** |
|---|
| 1510 |
* Returns the hash of a text section |
|---|
| 1511 |
* |
|---|
| 1512 |
* You should rarely need to call this function. It exists so that Texts |
|---|
| 1513 |
* can be used as associative array keys. |
|---|
| 1514 |
*/ |
|---|
| 1515 |
override hash_t toHash() { return hash(content); } |
|---|
| 1516 |
|
|---|
| 1517 |
/** |
|---|
| 1518 |
* Returns a string representation of this Text section |
|---|
| 1519 |
*/ |
|---|
| 1520 |
override const string toString() { return content; } |
|---|
| 1521 |
|
|---|
| 1522 |
/** |
|---|
| 1523 |
* Returns true if the content is the empty string |
|---|
| 1524 |
*/ |
|---|
| 1525 |
override const bool isEmptyXML() { return content.length == 0; } |
|---|
| 1526 |
} |
|---|
| 1527 |
|
|---|
| 1528 |
/** |
|---|
| 1529 |
* Class representing an XML Instruction section |
|---|
| 1530 |
*/ |
|---|
| 1531 |
class XMLInstruction : Item |
|---|
| 1532 |
{ |
|---|
| 1533 |
private string content; |
|---|
| 1534 |
|
|---|
| 1535 |
/** |
|---|
| 1536 |
* Construct an XML Instruction section |
|---|
| 1537 |
* |
|---|
| 1538 |
* Params: |
|---|
| 1539 |
* content = the body of the instruction segment |
|---|
| 1540 |
* |
|---|
| 1541 |
* Throws: XIException if the segment body is illegal (contains ">") |
|---|
| 1542 |
* |
|---|
| 1543 |
* Examples: |
|---|
| 1544 |
* -------------- |
|---|
| 1545 |
* auto item = new XMLInstruction("ATTLIST"); |
|---|
| 1546 |
* // constructs <!ATTLIST> |
|---|
| 1547 |
* -------------- |
|---|
| 1548 |
*/ |
|---|
| 1549 |
this(string content) |
|---|
| 1550 |
{ |
|---|
| 1551 |
if (content.indexOf(">") != -1) throw new XIException(content); |
|---|
| 1552 |
this.content = content; |
|---|
| 1553 |
} |
|---|
| 1554 |
|
|---|
| 1555 |
/** |
|---|
| 1556 |
* Compares two XML instructions for equality |
|---|
| 1557 |
* |
|---|
| 1558 |
* Examples: |
|---|
| 1559 |
* -------------- |
|---|
| 1560 |
* XMLInstruction item1,item2; |
|---|
| 1561 |
* if (item1 == item2) { } |
|---|
| 1562 |
* -------------- |
|---|
| 1563 |
*/ |
|---|
| 1564 |
override bool opEquals(Object o) |
|---|
| 1565 |
{ |
|---|
| 1566 |
const item = toType!(const Item)(o); |
|---|
| 1567 |
const t = cast(XMLInstruction)item; |
|---|
| 1568 |
return t !is null && content == t.content; |
|---|
| 1569 |
} |
|---|
| 1570 |
|
|---|
| 1571 |
/** |
|---|
| 1572 |
* Compares two XML instructions |
|---|
| 1573 |
* |
|---|
| 1574 |
* You should rarely need to call this function. It exists so that |
|---|
| 1575 |
* XmlInstructions can be used as associative array keys. |
|---|
| 1576 |
* |
|---|
| 1577 |
* Examples: |
|---|
| 1578 |
* -------------- |
|---|
| 1579 |
* XMLInstruction item1,item2; |
|---|
| 1580 |
* if (item1 < item2) { } |
|---|
| 1581 |
* -------------- |
|---|
| 1582 |
*/ |
|---|
| 1583 |
override int opCmp(Object o) |
|---|
| 1584 |
{ |
|---|
| 1585 |
const item = toType!(const Item)(o); |
|---|
| 1586 |
const t = cast(XMLInstruction)item; |
|---|
| 1587 |
return t !is null |
|---|
| 1588 |
&& (content != t.content ? (content < t.content ? -1 : 1 ) : 0 ); |
|---|
| 1589 |
} |
|---|
| 1590 |
|
|---|
| 1591 |
/** |
|---|
| 1592 |
* Returns the hash of an XMLInstruction |
|---|
| 1593 |
* |
|---|
| 1594 |
* You should rarely need to call this function. It exists so that |
|---|
| 1595 |
* XmlInstructions can be used as associative array keys. |
|---|
| 1596 |
*/ |
|---|
| 1597 |
override hash_t toHash() { return hash(content); } |
|---|
| 1598 |
|
|---|
| 1599 |
/** |
|---|
| 1600 |
* Returns a string representation of this XmlInstruction |
|---|
| 1601 |
*/ |
|---|
| 1602 |
override const string toString() { return "<!" ~ content ~ ">"; } |
|---|
| 1603 |
|
|---|
| 1604 |
override const bool isEmptyXML() { return false; } /// Returns false always |
|---|
| 1605 |
} |
|---|
| 1606 |
|
|---|
| 1607 |
/** |
|---|
| 1608 |
* Class representing a Processing Instruction section |
|---|
| 1609 |
*/ |
|---|
| 1610 |
class ProcessingInstruction : Item |
|---|
| 1611 |
{ |
|---|
| 1612 |
private string content; |
|---|
| 1613 |
|
|---|
| 1614 |
/** |
|---|
| 1615 |
* Construct a Processing Instruction section |
|---|
| 1616 |
* |
|---|
| 1617 |
* Params: |
|---|
| 1618 |
* content = the body of the instruction segment |
|---|
| 1619 |
* |
|---|
| 1620 |
* Throws: PIException if the segment body is illegal (contains "?>") |
|---|
| 1621 |
* |
|---|
| 1622 |
* Examples: |
|---|
| 1623 |
* -------------- |
|---|
| 1624 |
* auto item = new ProcessingInstruction("php"); |
|---|
| 1625 |
* // constructs <?php?> |
|---|
| 1626 |
* -------------- |
|---|
| 1627 |
*/ |
|---|
| 1628 |
this(string content) |
|---|
| 1629 |
{ |
|---|
| 1630 |
if (content.indexOf("?>") != -1) throw new PIException(content); |
|---|
| 1631 |
this.content = content; |
|---|
| 1632 |
} |
|---|
| 1633 |
|
|---|
| 1634 |
/** |
|---|
| 1635 |
* Compares two processing instructions for equality |
|---|
| 1636 |
* |
|---|
| 1637 |
* Examples: |
|---|
| 1638 |
* -------------- |
|---|
| 1639 |
* ProcessingInstruction item1,item2; |
|---|
| 1640 |
* if (item1 == item2) { } |
|---|
| 1641 |
* -------------- |
|---|
| 1642 |
*/ |
|---|
| 1643 |
override bool opEquals(Object o) |
|---|
| 1644 |
{ |
|---|
| 1645 |
const item = toType!(const Item)(o); |
|---|
| 1646 |
const t = cast(ProcessingInstruction)item; |
|---|
| 1647 |
return t !is null && content == t.content; |
|---|
| 1648 |
} |
|---|
| 1649 |
|
|---|
| 1650 |
/** |
|---|
| 1651 |
* Compares two processing instructions |
|---|
| 1652 |
* |
|---|
| 1653 |
* You should rarely need to call this function. It exists so that |
|---|
| 1654 |
* ProcessingInstructions can be used as associative array keys. |
|---|
| 1655 |
* |
|---|
| 1656 |
* Examples: |
|---|
| 1657 |
* -------------- |
|---|
| 1658 |
* ProcessingInstruction item1,item2; |
|---|
| 1659 |
* if (item1 < item2) { } |
|---|
| 1660 |
* -------------- |
|---|
| 1661 |
*/ |
|---|
| 1662 |
override int opCmp(Object o) |
|---|
| 1663 |
{ |
|---|
| 1664 |
const item = toType!(const Item)(o); |
|---|
| 1665 |
const t = cast(ProcessingInstruction)item; |
|---|
| 1666 |
return t !is null |
|---|
| 1667 |
&& (content != t.content ? (content < t.content ? -1 : 1 ) : 0 ); |
|---|
| 1668 |
} |
|---|
| 1669 |
|
|---|
| 1670 |
/** |
|---|
| 1671 |
* Returns the hash of a ProcessingInstruction |
|---|
| 1672 |
* |
|---|
| 1673 |
* You should rarely need to call this function. It exists so that |
|---|
| 1674 |
* ProcessingInstructions can be used as associative array keys. |
|---|
| 1675 |
*/ |
|---|
| 1676 |
override hash_t toHash() { return hash(content); } |
|---|
| 1677 |
|
|---|
| 1678 |
/** |
|---|
| 1679 |
* Returns a string representation of this ProcessingInstruction |
|---|
| 1680 |
*/ |
|---|
| 1681 |
override const string toString() { return "<?" ~ content ~ "?>"; } |
|---|
| 1682 |
|
|---|
| 1683 |
override const bool isEmptyXML() { return false; } /// Returns false always |
|---|
| 1684 |
} |
|---|
| 1685 |
|
|---|
| 1686 |
/** |
|---|
| 1687 |
* Abstract base class for XML items |
|---|
| 1688 |
*/ |
|---|
| 1689 |
abstract class Item |
|---|
| 1690 |
{ |
|---|
| 1691 |
/// Compares with another Item of same type for equality |
|---|
| 1692 |
abstract override bool opEquals(Object o); |
|---|
| 1693 |
|
|---|
| 1694 |
/// Compares with another Item of same type |
|---|
| 1695 |
abstract override int opCmp(Object o); |
|---|
| 1696 |
|
|---|
| 1697 |
/// Returns the hash of this item |
|---|
| 1698 |
abstract override hash_t toHash(); |
|---|
| 1699 |
|
|---|
| 1700 |
/// Returns a string representation of this item |
|---|
| 1701 |
abstract override const string toString(); |
|---|
| 1702 |
|
|---|
| 1703 |
/** |
|---|
| 1704 |
* Returns an indented string representation of this item |
|---|
| 1705 |
* |
|---|
| 1706 |
* Params: |
|---|
| 1707 |
* indent = number of spaces by which to indent child elements |
|---|
| 1708 |
*/ |
|---|
| 1709 |
const string[] pretty(uint indent) |
|---|
| 1710 |
{ |
|---|
| 1711 |
string s = strip(toString()); |
|---|
| 1712 |
return s.length == 0 ? [] : [ s ]; |
|---|
| 1713 |
} |
|---|
| 1714 |
|
|---|
| 1715 |
/// Returns true if the item represents empty XML text |
|---|
| 1716 |
abstract const bool isEmptyXML(); |
|---|
| 1717 |
} |
|---|
| 1718 |
|
|---|
| 1719 |
/** |
|---|
| 1720 |
* Class for parsing an XML Document. |
|---|
| 1721 |
* |
|---|
| 1722 |
* This is a subclass of ElementParser. Most of the useful functions are |
|---|
| 1723 |
* documented there. |
|---|
| 1724 |
* |
|---|
| 1725 |
* Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) |
|---|
| 1726 |
* |
|---|
| 1727 |
* Bugs: |
|---|
| 1728 |
* Currently only supports UTF documents. |
|---|
| 1729 |
* |
|---|
| 1730 |
* If there is an encoding attribute in the prolog, it is ignored. |
|---|
| 1731 |
* |
|---|
| 1732 |
*/ |
|---|
| 1733 |
class DocumentParser : ElementParser |
|---|
| 1734 |
{ |
|---|
| 1735 |
string xmlText; |
|---|
| 1736 |
|
|---|
| 1737 |
/** |
|---|
| 1738 |
* Constructs a DocumentParser. |
|---|
| 1739 |
* |
|---|
| 1740 |
* The input to this function MUST be valid XML. |
|---|
| 1741 |
* This is enforced by the function's in contract. |
|---|
| 1742 |
* |
|---|
| 1743 |
* Params: |
|---|
| 1744 |
* xmltext = the entire XML document as text |
|---|
| 1745 |
* |
|---|
| 1746 |
*/ |
|---|
| 1747 |
this(string xmlText_) |
|---|
| 1748 |
in |
|---|
| 1749 |
{ |
|---|
| 1750 |
assert(xmlText_.length != 0); |
|---|
| 1751 |
try |
|---|
| 1752 |
{ |
|---|
| 1753 |
// Confirm that the input is valid XML |
|---|
| 1754 |
check(xmlText_); |
|---|
| 1755 |
} |
|---|
| 1756 |
catch (CheckException e) |
|---|
| 1757 |
{ |
|---|
| 1758 |
// And if it's not, tell the user why not |
|---|
| 1759 |
assert(false, "\n" ~ e.toString()); |
|---|
| 1760 |
} |
|---|
| 1761 |
} |
|---|
| 1762 |
body |
|---|
| 1763 |
{ |
|---|
| 1764 |
xmlText = xmlText_; |
|---|
| 1765 |
s = &xmlText; |
|---|
| 1766 |
super(); // Initialize everything |
|---|
| 1767 |
parse(); // Parse through the root tag (but not beyond) |
|---|
| 1768 |
} |
|---|
| 1769 |
} |
|---|
| 1770 |
|
|---|
| 1771 |
/** |
|---|
| 1772 |
* Class for parsing an XML element. |
|---|
| 1773 |
* |
|---|
| 1774 |
* Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) |
|---|
| 1775 |
* |
|---|
| 1776 |
* Note that you cannot construct instances of this class directly. You can |
|---|
| 1777 |
* construct a DocumentParser (which is a subclass of ElementParser), but |
|---|
| 1778 |
* otherwise, Instances of ElementParser will be created for you by the |
|---|
| 1779 |
* library, and passed your way via onStartTag handlers. |
|---|
| 1780 |
* |
|---|
| 1781 |
*/ |
|---|
| 1782 |
class ElementParser |
|---|
| 1783 |
{ |
|---|
| 1784 |
alias void delegate(string) Handler; |
|---|
| 1785 |
alias void delegate(in Element element) ElementHandler; |
|---|
| 1786 |
alias void delegate(ElementParser parser) ParserHandler; |
|---|
| 1787 |
|
|---|
| 1788 |
private |
|---|
| 1789 |
{ |
|---|
| 1790 |
Tag tag_; |
|---|
| 1791 |
string elementStart; |
|---|
| 1792 |
string* s; |
|---|
| 1793 |
|
|---|
| 1794 |
Handler commentHandler = null; |
|---|
| 1795 |
Handler cdataHandler = null; |
|---|
| 1796 |
Handler xiHandler = null; |
|---|
| 1797 |
Handler piHandler = null; |
|---|
| 1798 |
Handler rawTextHandler = null; |
|---|
| 1799 |
Handler textHandler = null; |
|---|
| 1800 |
|
|---|
| 1801 |
// Private constructor for start tags |
|---|
| 1802 |
this(ElementParser parent) |
|---|
| 1803 |
{ |
|---|
| 1804 |
s = parent.s; |
|---|
| 1805 |
this(); |
|---|
| 1806 |
tag_ = parent.tag_; |
|---|
| 1807 |
} |
|---|
| 1808 |
|
|---|
| 1809 |
// Private constructor for empty tags |
|---|
| 1810 |
this(Tag tag, string* t) |
|---|
| 1811 |
{ |
|---|
| 1812 |
s = t; |
|---|
| 1813 |
this(); |
|---|
| 1814 |
tag_ = tag; |
|---|
| 1815 |
} |
|---|
| 1816 |
} |
|---|
| 1817 |
|
|---|
| 1818 |
/** |
|---|
| 1819 |
* The Tag at the start of the element being parsed. You can read this to |
|---|
| 1820 |
* determine the tag's name and attributes. |
|---|
| 1821 |
*/ |
|---|
| 1822 |
const const(Tag) tag() { return tag_; } |
|---|
| 1823 |
|
|---|
| 1824 |
/** |
|---|
| 1825 |
* Register a handler which will be called whenever a start tag is |
|---|
| 1826 |
* encountered which matches the specified name. You can also pass null as |
|---|
| 1827 |
* the name, in which case the handler will be called for any unmatched |
|---|
| 1828 |
* start tag. |
|---|
| 1829 |
* |
|---|
| 1830 |
* Examples: |
|---|
| 1831 |
* -------------- |
|---|
| 1832 |
* // Call this function whenever a <podcast> start tag is encountered |
|---|
| 1833 |
* onStartTag["podcast"] = (ElementParser xml) |
|---|
| 1834 |
* { |
|---|
| 1835 |
* // Your code here |
|---|
| 1836 |
* // |
|---|
| 1837 |
* // This is a a closure, so code here may reference |
|---|
| 1838 |
* // variables which are outside of this scope |
|---|
| 1839 |
* }; |
|---|
| 1840 |
* |
|---|
| 1841 |
* // call myEpisodeStartHandler (defined elsewhere) whenever an <episode> |
|---|
| 1842 |
* // start tag is encountered |
|---|
| 1843 |
* onStartTag["episode"] = &myEpisodeStartHandler; |
|---|
| 1844 |
* |
|---|
| 1845 |
* // call delegate dg for all other start tags |
|---|
| 1846 |
* onStartTag[null] = dg; |
|---|
| 1847 |
* -------------- |
|---|
| 1848 |
* |
|---|
| 1849 |
* This library will supply your function with a new instance of |
|---|
| 1850 |
* ElementHandler, which may be used to parse inside the element whose |
|---|
| 1851 |
* start tag was just found, or to identify the tag attributes of the |
|---|
| 1852 |
* element, etc. |
|---|
| 1853 |
* |
|---|
| 1854 |
* Note that your function will be called for both start tags and empty |
|---|
| 1855 |
* tags. That is, we make no distinction between <br></br> |
|---|
| 1856 |
* and <br/>. |
|---|
| 1857 |
*/ |
|---|
| 1858 |
ParserHandler[string] onStartTag; |
|---|
| 1859 |
|
|---|
| 1860 |
/** |
|---|
| 1861 |
* Register a handler which will be called whenever an end tag is |
|---|
| 1862 |
* encountered which matches the specified name. You can also pass null as |
|---|
| 1863 |
* the name, in which case the handler will be called for any unmatched |
|---|
| 1864 |
* end tag. |
|---|
| 1865 |
* |
|---|
| 1866 |
* Examples: |
|---|
| 1867 |
* -------------- |
|---|
| 1868 |
* // Call this function whenever a </podcast> end tag is encountered |
|---|
| 1869 |
* onEndTag["podcast"] = (in Element e) |
|---|
| 1870 |
* { |
|---|
| 1871 |
* // Your code here |
|---|
| 1872 |
* // |
|---|
| 1873 |
* // This is a a closure, so code here may reference |
|---|
| 1874 |
* // variables which are outside of this scope |
|---|
| 1875 |
* }; |
|---|
| 1876 |
* |
|---|
| 1877 |
* // call myEpisodeEndHandler (defined elsewhere) whenever an </episode> |
|---|
| 1878 |
* // end tag is encountered |
|---|
| 1879 |
* onEndTag["episode"] = &myEpisodeEndHandler; |
|---|
| 1880 |
* |
|---|
| 1881 |
* // call delegate dg for all other end tags |
|---|
| 1882 |
* onEndTag[null] = dg; |
|---|
| 1883 |
* -------------- |
|---|
| 1884 |
* |
|---|
| 1885 |
* Note that your function will be called for both start tags and empty |
|---|
| 1886 |
* tags. That is, we make no distinction between <br></br> |
|---|
| 1887 |
* and <br/>. |
|---|
| 1888 |
*/ |
|---|
| 1889 |
ElementHandler[string] onEndTag; |
|---|
| 1890 |
|
|---|
| 1891 |
protected this() |
|---|
| 1892 |
{ |
|---|
| 1893 |
elementStart = *s; |
|---|
| 1894 |
} |
|---|
| 1895 |
|
|---|
| 1896 |
/** |
|---|
| 1897 |
* Register a handler which will be called whenever text is encountered. |
|---|
| 1898 |
* |
|---|
| 1899 |
* Examples: |
|---|
| 1900 |
* -------------- |
|---|
| 1901 |
* // Call this function whenever text is encountered |
|---|
| 1902 |
* onText = (string s) |
|---|
| 1903 |
* { |
|---|
| 1904 |
* // Your code here |
|---|
| 1905 |
* |
|---|
| 1906 |
* // The passed parameter s will have been decoded by the time you see |
|---|
| 1907 |
* // it, and so may contain any character. |
|---|
| 1908 |
* // |
|---|
| 1909 |
* // This is a a closure, so code here may reference |
|---|
| 1910 |
* // variables which are outside of this scope |
|---|
| 1911 |
* }; |
|---|
| 1912 |
* -------------- |
|---|
| 1913 |
*/ |
|---|
| 1914 |
void onText(Handler handler) { textHandler = handler; } |
|---|
| 1915 |
|
|---|
| 1916 |
/** |
|---|
| 1917 |
* Register an alternative handler which will be called whenever text |
|---|
| 1918 |
* is encountered. This differs from onText in that onText will decode |
|---|
| 1919 |
* the text, wheras onTextRaw will not. This allows you to make design |
|---|
| 1920 |
* choices, since onText will be more accurate, but slower, while |
|---|
| 1921 |
* onTextRaw will be faster, but less accurate. Of course, you can |
|---|
| 1922 |
* still call decode() within your handler, if you want, but you'd |
|---|
| 1923 |
* probably want to use onTextRaw only in circumstances where you |
|---|
| 1924 |
* know that decoding is unnecessary. |
|---|
| 1925 |
* |
|---|
| 1926 |
* Examples: |
|---|
| 1927 |
* -------------- |
|---|
| 1928 |
* // Call this function whenever text is encountered |
|---|
| 1929 |
* onText = (string s) |
|---|
| 1930 |
* { |
|---|
| 1931 |
* // Your code here |
|---|
| 1932 |
* |
|---|
| 1933 |
* // The passed parameter s will NOT have been decoded. |
|---|
| 1934 |
* // |
|---|
| 1935 |
* // This is a a closure, so code here may reference |
|---|
| 1936 |
* // variables which are outside of this scope |
|---|
| 1937 |
* }; |
|---|
| 1938 |
* -------------- |
|---|
| 1939 |
*/ |
|---|
| 1940 |
void onTextRaw(Handler handler) { rawTextHandler = handler; } |
|---|
| 1941 |
|
|---|
| 1942 |
/** |
|---|
| 1943 |
* Register a handler which will be called whenever a character data |
|---|
| 1944 |
* segement is encountered. |
|---|
| 1945 |
* |
|---|
| 1946 |
* Examples: |
|---|
| 1947 |
* -------------- |
|---|
| 1948 |
* // Call this function whenever a CData section is encountered |
|---|
| 1949 |
* onCData = (string s) |
|---|
| 1950 |
* { |
|---|
| 1951 |
* // Your code here |
|---|
| 1952 |
* |
|---|
| 1953 |
* // The passed parameter s does not include the opening <![CDATA[ |
|---|
| 1954 |
* // nor closing ]]> |
|---|
| 1955 |
* // |
|---|
| 1956 |
* // This is a a closure, so code here may reference |
|---|
| 1957 |
* // variables which are outside of this scope |
|---|
| 1958 |
* }; |
|---|
| 1959 |
* -------------- |
|---|
| 1960 |
*/ |
|---|
| 1961 |
void onCData(Handler handler) { cdataHandler = handler; } |
|---|
| 1962 |
|
|---|
| 1963 |
/** |
|---|
| 1964 |
* Register a handler which will be called whenever a comment is |
|---|
| 1965 |
* encountered. |
|---|
| 1966 |
* |
|---|
| 1967 |
* Examples: |
|---|
| 1968 |
* -------------- |
|---|
| 1969 |
* // Call this function whenever a comment is encountered |
|---|
| 1970 |
* onComment = (string s) |
|---|
| 1971 |
* { |
|---|
| 1972 |
* // Your code here |
|---|
| 1973 |
* |
|---|
| 1974 |
* // The passed parameter s does not include the opening <!-- nor |
|---|
| 1975 |
* // closing --> |
|---|
| 1976 |
* // |
|---|
| 1977 |
* // This is a a closure, so code here may reference |
|---|
| 1978 |
* // variables which are outside of this scope |
|---|
| 1979 |
* }; |
|---|
| 1980 |
* -------------- |
|---|
| 1981 |
*/ |
|---|
| 1982 |
void onComment(Handler handler) { commentHandler = handler; } |
|---|
| 1983 |
|
|---|
| 1984 |
/** |
|---|
| 1985 |
* Register a handler which will be called whenever a processing |
|---|
| 1986 |
* instruction is encountered. |
|---|
| 1987 |
* |
|---|
| 1988 |
* Examples: |
|---|
| 1989 |
* -------------- |
|---|
| 1990 |
* // Call this function whenever a processing instruction is encountered |
|---|
| 1991 |
* onPI = (string s) |
|---|
| 1992 |
* { |
|---|
| 1993 |
* // Your code here |
|---|
| 1994 |
* |
|---|
| 1995 |
* // The passed parameter s does not include the opening <? nor |
|---|
| 1996 |
* // closing ?> |
|---|
| 1997 |
* // |
|---|
| 1998 |
* // This is a a closure, so code here may reference |
|---|
| 1999 |
* // variables which are outside of this scope |
|---|
| 2000 |
* }; |
|---|
| 2001 |
* -------------- |
|---|
| 2002 |
*/ |
|---|
| 2003 |
void onPI(Handler handler) { piHandler = handler; } |
|---|
| 2004 |
|
|---|
| 2005 |
/** |
|---|
| 2006 |
* Register a handler which will be called whenever an XML instruction is |
|---|
| 2007 |
* encountered. |
|---|
| 2008 |
* |
|---|
| 2009 |
* Examples: |
|---|
| 2010 |
* -------------- |
|---|
| 2011 |
* // Call this function whenever an XML instruction is encountered |
|---|
| 2012 |
* // (Note: XML instructions may only occur preceeding the root tag of a |
|---|
| 2013 |
* // document). |
|---|
| 2014 |
* onPI = (string s) |
|---|
| 2015 |
* { |
|---|
| 2016 |
* // Your code here |
|---|
| 2017 |
* |
|---|
| 2018 |
* // The passed parameter s does not include the opening <! nor |
|---|
| 2019 |
* // closing > |
|---|
| 2020 |
* // |
|---|
| 2021 |
* // This is a a closure, so code here may reference |
|---|
| 2022 |
* // variables which are outside of this scope |
|---|
| 2023 |
* }; |
|---|
| 2024 |
* -------------- |
|---|
| 2025 |
*/ |
|---|
| 2026 |
void onXI(Handler handler) { xiHandler = handler; } |
|---|
| 2027 |
|
|---|
| 2028 |
/** |
|---|
| 2029 |
* Parse an XML element. |
|---|
| 2030 |
* |
|---|
| 2031 |
* Parsing will continue until the end of the current element. Any items |
|---|
| 2032 |
* encountered for which a handler has been registered will invoke that |
|---|
| 2033 |
* handler. |
|---|
| 2034 |
* |
|---|
| 2035 |
* Throws: various kinds of XMLException |
|---|
| 2036 |
*/ |
|---|
| 2037 |
void parse() |
|---|
| 2038 |
{ |
|---|
| 2039 |
string t; |
|---|
| 2040 |
Tag root = tag_; |
|---|
| 2041 |
Tag[string] startTags; |
|---|
| 2042 |
if (tag_ !is null) startTags[tag_.name] = tag_; |
|---|
| 2043 |
|
|---|
| 2044 |
while(s.length != 0) |
|---|
| 2045 |
{ |
|---|
| 2046 |
if (startsWith(*s,"<!--")) |
|---|
| 2047 |
{ |
|---|
| 2048 |
chop(*s,4); |
|---|
| 2049 |
t = chop(*s,indexOf(*s,"-->")); |
|---|
| 2050 |
if (commentHandler.funcptr !is null) commentHandler(t); |
|---|
| 2051 |
chop(*s,3); |
|---|
| 2052 |
} |
|---|
| 2053 |
else if (startsWith(*s,"<![CDATA[")) |
|---|
| 2054 |
{ |
|---|
| 2055 |
chop(*s,9); |
|---|
| 2056 |
t = chop(*s,indexOf(*s,"]]>")); |
|---|
| 2057 |
if (cdataHandler.funcptr !is null) cdataHandler(t); |
|---|
| 2058 |
chop(*s,3); |
|---|
| 2059 |
} |
|---|
| 2060 |
else if (startsWith(*s,"<!")) |
|---|
| 2061 |
{ |
|---|
| 2062 |
chop(*s,2); |
|---|
| 2063 |
t = chop(*s,indexOf(*s,">")); |
|---|
| 2064 |
if (xiHandler.funcptr !is null) xiHandler(t); |
|---|
| 2065 |
chop(*s,1); |
|---|
| 2066 |
} |
|---|
| 2067 |
else if (startsWith(*s,"<?")) |
|---|
| 2068 |
{ |
|---|
| 2069 |
chop(*s,2); |
|---|
| 2070 |
t = chop(*s,indexOf(*s,"?>")); |
|---|
| 2071 |
if (piHandler.funcptr !is null) piHandler(t); |
|---|
| 2072 |
chop(*s,2); |
|---|
| 2073 |
} |
|---|
| 2074 |
else if (startsWith(*s,"<")) |
|---|
| 2075 |
{ |
|---|
| 2076 |
tag_ = new Tag(*s,true); |
|---|
| 2077 |
if (root is null) |
|---|
| 2078 |
return; // Return to constructor of derived class |
|---|
| 2079 |
|
|---|
| 2080 |
if (tag_.isStart) |
|---|
| 2081 |
{ |
|---|
| 2082 |
startTags[tag_.name] = tag_; |
|---|
| 2083 |
|
|---|
| 2084 |
auto parser = new ElementParser(this); |
|---|
| 2085 |
|
|---|
| 2086 |
auto handler = tag_.name in onStartTag; |
|---|
| 2087 |
if (handler !is null) (*handler)(parser); |
|---|
| 2088 |
else |
|---|
| 2089 |
{ |
|---|
| 2090 |
handler = null in onStartTag; |
|---|
| 2091 |
if (handler !is null) (*handler)(parser); |
|---|
| 2092 |
} |
|---|
| 2093 |
} |
|---|
| 2094 |
else if (tag_.isEnd) |
|---|
| 2095 |
{ |
|---|
| 2096 |
auto startTag = startTags[tag_.name]; |
|---|
| 2097 |
string text; |
|---|
| 2098 |
|
|---|
| 2099 |
immutable(char)* p = startTag.tagString.ptr |
|---|
| 2100 |
+ startTag.tagString.length; |
|---|
| 2101 |
immutable(char)* q = tag_.tagString.ptr; |
|---|
| 2102 |
text = decode(p[0..(q-p)], DecodeMode.LOOSE); |
|---|
| 2103 |
|
|---|
| 2104 |
auto element = new Element(startTag); |
|---|
| 2105 |
if (text.length != 0) element ~= new Text(text); |
|---|
| 2106 |
|
|---|
| 2107 |
auto handler = tag_.name in onEndTag; |
|---|
| 2108 |
if (handler !is null) (*handler)(element); |
|---|
| 2109 |
else |
|---|
| 2110 |
{ |
|---|
| 2111 |
handler = null in onEndTag; |
|---|
| 2112 |
if (handler !is null) (*handler)(element); |
|---|
| 2113 |
} |
|---|
| 2114 |
|
|---|
| 2115 |
if (tag_.name == root.name) return; |
|---|
| 2116 |
} |
|---|
| 2117 |
else if (tag_.isEmpty) |
|---|
| 2118 |
{ |
|---|
| 2119 |
Tag startTag = new Tag(tag_.name); |
|---|
| 2120 |
|
|---|
| 2121 |
// FIX by hed010gy, for bug 2979 |
|---|
| 2122 |
// http://d.puremagic.com/issues/show_bug.cgi?id=2979 |
|---|
| 2123 |
if (tag_.attr.length > 0) |
|---|
| 2124 |
foreach(tn,tv; tag_.attr) startTag.attr[tn]=tv; |
|---|
| 2125 |
// END FIX |
|---|
| 2126 |
|
|---|
| 2127 |
// Handle the pretend start tag |
|---|
| 2128 |
string s2; |
|---|
| 2129 |
auto parser = new ElementParser(startTag,&s2); |
|---|
| 2130 |
auto handler1 = startTag.name in onStartTag; |
|---|
| 2131 |
if (handler1 !is null) (*handler1)(parser); |
|---|
| 2132 |
else |
|---|
| 2133 |
{ |
|---|
| 2134 |
handler1 = null in onStartTag; |
|---|
| 2135 |
if (handler1 !is null) (*handler1)(parser); |
|---|
| 2136 |
} |
|---|
| 2137 |
|
|---|
| 2138 |
// Handle the pretend end tag |
|---|
| 2139 |
auto element = new Element(startTag); |
|---|
| 2140 |
auto handler2 = tag_.name in onEndTag; |
|---|
| 2141 |
if (handler2 !is null) (*handler2)(element); |
|---|
| 2142 |
else |
|---|
| 2143 |
{ |
|---|
| 2144 |
handler2 = null in onEndTag; |
|---|
| 2145 |
if (handler2 !is null) (*handler2)(element); |
|---|
| 2146 |
} |
|---|
| 2147 |
} |
|---|
| 2148 |
} |
|---|
| 2149 |
else |
|---|
| 2150 |
{ |
|---|
| 2151 |
t = chop(*s,indexOf(*s,"<")); |
|---|
| 2152 |
if (rawTextHandler.funcptr !is null) |
|---|
| 2153 |
rawTextHandler(t); |
|---|
| 2154 |
else if (textHandler.funcptr !is null) |
|---|
| 2155 |
textHandler(decode(t,DecodeMode.LOOSE)); |
|---|
| 2156 |
} |
|---|
| 2157 |
} |
|---|
| 2158 |
} |
|---|
| 2159 |
|
|---|
| 2160 |
/** |
|---|
| 2161 |
* Returns that part of the element which has already been parsed |
|---|
| 2162 |
*/ |
|---|
| 2163 |
const override string toString() |
|---|
| 2164 |
{ |
|---|
| 2165 |
int n = elementStart.length - s.length; |
|---|
| 2166 |
return elementStart[0..n]; |
|---|
| 2167 |
} |
|---|
| 2168 |
|
|---|
| 2169 |
} |
|---|
| 2170 |
|
|---|
| 2171 |
private |
|---|
| 2172 |
{ |
|---|
| 2173 |
template Check(string msg) |
|---|
| 2174 |
{ |
|---|
| 2175 |
string old = s; |
|---|
| 2176 |
|
|---|
| 2177 |
void fail() |
|---|
| 2178 |
{ |
|---|
| 2179 |
s = old; |
|---|
| 2180 |
throw new Err(s,msg); |
|---|
| 2181 |
} |
|---|
| 2182 |
|
|---|
| 2183 |
void fail(Err e) |
|---|
| 2184 |
{ |
|---|
| 2185 |
s = old; |
|---|
| 2186 |
throw new Err(s,msg,e); |
|---|
| 2187 |
} |
|---|
| 2188 |
|
|---|
| 2189 |
void fail(string msg2) |
|---|
| 2190 |
{ |
|---|
| 2191 |
fail(new Err(s,msg2)); |
|---|
| 2192 |
} |
|---|
| 2193 |
} |
|---|
| 2194 |
|
|---|
| 2195 |
void checkMisc(ref string s) // rule 27 |
|---|
| 2196 |
{ |
|---|
| 2197 |
mixin Check!("Misc"); |
|---|
| 2198 |
|
|---|
| 2199 |
try |
|---|
| 2200 |
{ |
|---|
| 2201 |
if (s.startsWith("<!--")) { checkComment(s); } |
|---|
| 2202 |
else if (s.startsWith("<?")) { checkPI(s); } |
|---|
| 2203 |
else { checkSpace(s); } |
|---|
| 2204 |
} |
|---|
| 2205 |
catch(Err e) { fail(e); } |
|---|
| 2206 |
} |
|---|
| 2207 |
|
|---|
| 2208 |
void checkDocument(ref string s) // rule 1 |
|---|
| 2209 |
{ |
|---|
| 2210 |
mixin Check!("Document"); |
|---|
| 2211 |
try |
|---|
| 2212 |
{ |
|---|
| 2213 |
checkProlog(s); |
|---|
| 2214 |
checkElement(s); |
|---|
| 2215 |
star!(checkMisc)(s); |
|---|
| 2216 |
} |
|---|
| 2217 |
catch(Err e) { fail(e); } |
|---|
| 2218 |
} |
|---|
| 2219 |
|
|---|
| 2220 |
void checkChars(ref string s) // rule 2 |
|---|
| 2221 |
{ |
|---|
| 2222 |
// TO DO - Fix std.utf stride and decode functions, then use those |
|---|
| 2223 |
// instead |
|---|
| 2224 |
|
|---|
| 2225 |
mixin Check!("Chars"); |
|---|
| 2226 |
|
|---|
| 2227 |
dchar c; |
|---|
| 2228 |
int n = -1; |
|---|
| 2229 |
foreach(int i,dchar d; s) |
|---|
| 2230 |
{ |
|---|
| 2231 |
if (!isChar(d)) |
|---|
| 2232 |
{ |
|---|
| 2233 |
c = d; |
|---|
| 2234 |
n = i; |
|---|
| 2235 |
break; |
|---|
| 2236 |
} |
|---|
| 2237 |
} |
|---|
| 2238 |
if (n != -1) |
|---|
| 2239 |
{ |
|---|
| 2240 |
s = s[n..$]; |
|---|
| 2241 |
fail(format("invalid character: U+%04X",c)); |
|---|
| 2242 |
} |
|---|
| 2243 |
} |
|---|
| 2244 |
|
|---|
| 2245 |
void checkSpace(ref string s) // rule 3 |
|---|
| 2246 |
{ |
|---|
| 2247 |
mixin Check!("Whitespace"); |
|---|
| 2248 |
munch(s,"\u0020\u0009\u000A\u000D"); |
|---|
| 2249 |
if (s is old) fail(); |
|---|
| 2250 |
} |
|---|
| 2251 |
|
|---|
| 2252 |
void checkName(ref string s, out string name) // rule 5 |
|---|
| 2253 |
{ |
|---|
| 2254 |
mixin Check!("Name"); |
|---|
| 2255 |
|
|---|
| 2256 |
if (s.length == 0) fail(); |
|---|
| 2257 |
int n; |
|---|
| 2258 |
foreach(int i,dchar c;s) |
|---|
| 2259 |
{ |
|---|
| 2260 |
if (c == '_' || c == ':' || isLetter(c)) continue; |
|---|
| 2261 |
if (i == 0) fail(); |
|---|
| 2262 |
if (c == '-' || c == '.' || isDigit(c) |
|---|
| 2263 |
|| isCombiningChar(c) || isExtender(c)) continue; |
|---|
| 2264 |
n = i; |
|---|
| 2265 |
break; |
|---|
| 2266 |
} |
|---|
| 2267 |
name = s[0..n]; |
|---|
| 2268 |
s = s[n..$]; |
|---|
| 2269 |
} |
|---|
| 2270 |
|
|---|
| 2271 |
void checkAttValue(ref string s) // rule 10 |
|---|
| 2272 |
{ |
|---|
| 2273 |
mixin Check!("AttValue"); |
|---|
| 2274 |
|
|---|
| 2275 |
if (s.length == 0) fail(); |
|---|
| 2276 |
char c = s[0]; |
|---|
| 2277 |
if (c != '\u0022' && c != '\u0027') |
|---|
| 2278 |
fail("attribute value requires quotes"); |
|---|
| 2279 |
s = s[1..$]; |
|---|
| 2280 |
for(;;) |
|---|
| 2281 |
{ |
|---|
| 2282 |
munch(s,"^<&"~c); |
|---|
| 2283 |
if (s.length == 0) fail("unterminated attribute value"); |
|---|
| 2284 |
if (s[0] == '<') fail("< found in attribute value"); |
|---|
| 2285 |
if (s[0] == c) break; |
|---|
| 2286 |
try { checkReference(s); } catch(Err e) { fail(e); } |
|---|
| 2287 |
} |
|---|
| 2288 |
s = s[1..$]; |
|---|
| 2289 |
} |
|---|
| 2290 |
|
|---|
| 2291 |
void checkCharData(ref string s) // rule 14 |
|---|
| 2292 |
{ |
|---|
| 2293 |
mixin Check!("CharData"); |
|---|
| 2294 |
|
|---|
| 2295 |
while (s.length != 0) |
|---|
| 2296 |
{ |
|---|
| 2297 |
if (s.startsWith("&")) break; |
|---|
| 2298 |
if (s.startsWith("<")) break; |
|---|
| 2299 |
if (s.startsWith("]]>")) fail("]]> found within char data"); |
|---|
| 2300 |
s = s[1..$]; |
|---|
| 2301 |
} |
|---|
| 2302 |
} |
|---|
| 2303 |
|
|---|
| 2304 |
void checkComment(ref string s) // rule 15 |
|---|
| 2305 |
{ |
|---|
| 2306 |
mixin Check!("Comment"); |
|---|
| 2307 |
|
|---|
| 2308 |
try { checkLiteral("<!--",s); } catch(Err e) { fail(e); } |
|---|
| 2309 |
int n = s.indexOf("--"); |
|---|
| 2310 |
if (n == -1) fail("unterminated comment"); |
|---|
| 2311 |
s = s[n..$]; |
|---|
| 2312 |
try { checkLiteral("-->",s); } catch(Err e) { fail(e); } |
|---|
| 2313 |
} |
|---|
| 2314 |
|
|---|
| 2315 |
void checkPI(ref string s) // rule 16 |
|---|
| 2316 |
{ |
|---|
| 2317 |
mixin Check!("PI"); |
|---|
| 2318 |
|
|---|
| 2319 |
try |
|---|
| 2320 |
{ |
|---|
| 2321 |
checkLiteral("<?",s); |
|---|
| 2322 |
checkEnd("?>",s); |
|---|
| 2323 |
} |
|---|
| 2324 |
catch(Err e) { fail(e); } |
|---|
| 2325 |
} |
|---|
| 2326 |
|
|---|
| 2327 |
void checkCDSect(ref string s) // rule 18 |
|---|
| 2328 |
{ |
|---|
| 2329 |
mixin Check!("CDSect"); |
|---|
| 2330 |
|
|---|
| 2331 |
try |
|---|
| 2332 |
{ |
|---|
| 2333 |
checkLiteral(cdata,s); |
|---|
| 2334 |
checkEnd("]]>",s); |
|---|
| 2335 |
} |
|---|
| 2336 |
catch(Err e) { fail(e); } |
|---|
| 2337 |
} |
|---|
| 2338 |
|
|---|
| 2339 |
void checkProlog(ref string s) // rule 22 |
|---|
| 2340 |
{ |
|---|
| 2341 |
mixin Check!("Prolog"); |
|---|
| 2342 |
|
|---|
| 2343 |
try |
|---|
| 2344 |
{ |
|---|
| 2345 |
checkXMLDecl(s); |
|---|
| 2346 |
star!(checkMisc)(s); |
|---|
| 2347 |
opt!(seq!(checkDocTypeDecl,star!(checkMisc)))(s); |
|---|
| 2348 |
} |
|---|
| 2349 |
catch(Err e) { fail(e); } |
|---|
| 2350 |
} |
|---|
| 2351 |
|
|---|
| 2352 |
void checkXMLDecl(ref string s) // rule 23 |
|---|
| 2353 |
{ |
|---|
| 2354 |
mixin Check!("XMLDecl"); |
|---|
| 2355 |
|
|---|
| 2356 |
try |
|---|
| 2357 |
{ |
|---|
| 2358 |
checkLiteral("<?xml",s); |
|---|
| 2359 |
checkVersionInfo(s); |
|---|
| 2360 |
opt!(checkEncodingDecl)(s); |
|---|
| 2361 |
opt!(checkSDDecl)(s); |
|---|
| 2362 |
opt!(checkSpace)(s); |
|---|
| 2363 |
checkLiteral("?>",s); |
|---|
| 2364 |
} |
|---|
| 2365 |
catch(Err e) { fail(e); } |
|---|
| 2366 |
} |
|---|
| 2367 |
|
|---|
| 2368 |
void checkVersionInfo(ref string s) // rule 24 |
|---|
| 2369 |
{ |
|---|
| 2370 |
mixin Check!("VersionInfo"); |
|---|
| 2371 |
|
|---|
| 2372 |
try |
|---|
| 2373 |
{ |
|---|
| 2374 |
checkSpace(s); |
|---|
| 2375 |
checkLiteral("version",s); |
|---|
| 2376 |
checkEq(s); |
|---|
| 2377 |
quoted!(checkVersionNum)(s); |
|---|
| 2378 |
} |
|---|
| 2379 |
catch(Err e) { fail(e); } |
|---|
| 2380 |
} |
|---|
| 2381 |
|
|---|
| 2382 |
void checkEq(ref string s) // rule 25 |
|---|
| 2383 |
{ |
|---|
| 2384 |
mixin Check!("Eq"); |
|---|
| 2385 |
|
|---|
| 2386 |
try |
|---|
| 2387 |
{ |
|---|
| 2388 |
opt!(checkSpace)(s); |
|---|
| 2389 |
checkLiteral("=",s); |
|---|
| 2390 |
opt!(checkSpace)(s); |
|---|
| 2391 |
} |
|---|
| 2392 |
catch(Err e) { fail(e); } |
|---|
| 2393 |
} |
|---|
| 2394 |
|
|---|
| 2395 |
void checkVersionNum(ref string s) // rule 26 |
|---|
| 2396 |
{ |
|---|
| 2397 |
mixin Check!("VersionNum"); |
|---|
| 2398 |
|
|---|
| 2399 |
munch(s,"a-zA-Z0-9_.:-"); |
|---|
| 2400 |
if (s is old) fail(); |
|---|
| 2401 |
} |
|---|
| 2402 |
|
|---|
| 2403 |
void checkDocTypeDecl(ref string s) // rule 28 |
|---|
| 2404 |
{ |
|---|
| 2405 |
mixin Check!("DocTypeDecl"); |
|---|
| 2406 |
|
|---|
| 2407 |
try |
|---|
| 2408 |
{ |
|---|
| 2409 |
checkLiteral("<!DOCTYPE",s); |
|---|
| 2410 |
// |
|---|
| 2411 |
// TO DO -- ensure DOCTYPE is well formed |
|---|
| 2412 |
// (But not yet. That's one of our "future directions") |
|---|
| 2413 |
// |
|---|
| 2414 |
checkEnd(">",s); |
|---|
| 2415 |
} |
|---|
| 2416 |
catch(Err e) { fail(e); } |
|---|
| 2417 |
} |
|---|
| 2418 |
|
|---|
| 2419 |
void checkSDDecl(ref string s) // rule 32 |
|---|
| 2420 |
{ |
|---|
| 2421 |
mixin Check!("SDDecl"); |
|---|
| 2422 |
|
|---|
| 2423 |
try |
|---|
| 2424 |
{ |
|---|
| 2425 |
checkSpace(s); |
|---|
| 2426 |
checkLiteral("standalone",s); |
|---|
| 2427 |
checkEq(s); |
|---|
| 2428 |
} |
|---|
| 2429 |
catch(Err e) { fail(e); } |
|---|
| 2430 |
|
|---|
| 2431 |
int n = 0; |
|---|
| 2432 |
if (s.startsWith("'yes'") || s.startsWith("\"yes\"")) n = 5; |
|---|
| 2433 |
else if (s.startsWith("'no'" ) || s.startsWith("\"no\"" )) n = 4; |
|---|
| 2434 |
else fail("standalone attribute value must be 'yes', \"yes\"," |
|---|
| 2435 |
" 'no' or \"no\""); |
|---|
| 2436 |
s = s[n..$]; |
|---|
| 2437 |
} |
|---|
| 2438 |
|
|---|
| 2439 |
void checkElement(ref string s) // rule 39 |
|---|
| 2440 |
{ |
|---|
| 2441 |
mixin Check!("Element"); |
|---|
| 2442 |
|
|---|
| 2443 |
string sname,ename,t; |
|---|
| 2444 |
try { checkTag(s,t,sname); } catch(Err e) { fail(e); } |
|---|
| 2445 |
|
|---|
| 2446 |
if (t == "STag") |
|---|
| 2447 |
{ |
|---|
| 2448 |
try |
|---|
| 2449 |
{ |
|---|
| 2450 |
checkContent(s); |
|---|
| 2451 |
t = s; |
|---|
| 2452 |
checkETag(s,ename); |
|---|
| 2453 |
} |
|---|
| 2454 |
catch(Err e) { fail(e); } |
|---|
| 2455 |
|
|---|
| 2456 |
if (sname != ename) |
|---|
| 2457 |
{ |
|---|
| 2458 |
s = t; |
|---|
| 2459 |
fail("end tag name \"" ~ ename |
|---|
| 2460 |
~ "\" differs from start tag name \""~sname~"\""); |
|---|
| 2461 |
} |
|---|
| 2462 |
} |
|---|
| 2463 |
} |
|---|
| 2464 |
|
|---|
| 2465 |
// rules 40 and 44 |
|---|
| 2466 |
void checkTag(ref string s, out string type, out string name) |
|---|
| 2467 |
{ |
|---|
| 2468 |
mixin Check!("Tag"); |
|---|
| 2469 |
|
|---|
| 2470 |
try |
|---|
| 2471 |
{ |
|---|
| 2472 |
type = "STag"; |
|---|
| 2473 |
checkLiteral("<",s); |
|---|
| 2474 |
checkName(s,name); |
|---|
| 2475 |
star!(seq!(checkSpace,checkAttribute))(s); |
|---|
| 2476 |
opt!(checkSpace)(s); |
|---|
| 2477 |
if (s.length != 0 && s[0] == '/') |
|---|
| 2478 |
{ |
|---|
| 2479 |
s = s[1..$]; |
|---|
| 2480 |
type = "ETag"; |
|---|
| 2481 |
} |
|---|
| 2482 |
checkLiteral(">",s); |
|---|
| 2483 |
} |
|---|
| 2484 |
catch(Err e) { fail(e); } |
|---|
| 2485 |
} |
|---|
| 2486 |
|
|---|
| 2487 |
void checkAttribute(ref string s) // rule 41 |
|---|
| 2488 |
{ |
|---|
| 2489 |
mixin Check!("Attribute"); |
|---|
| 2490 |
|
|---|
| 2491 |
try |
|---|
| 2492 |
{ |
|---|
| 2493 |
string name; |
|---|
| 2494 |
checkName(s,name); |
|---|
| 2495 |
checkEq(s); |
|---|
| 2496 |
checkAttValue(s); |
|---|
| 2497 |
} |
|---|
| 2498 |
catch(Err e) { fail(e); } |
|---|
| 2499 |
} |
|---|
| 2500 |
|
|---|
| 2501 |
void checkETag(ref string s, out string name) // rule 42 |
|---|
| 2502 |
{ |
|---|
| 2503 |
mixin Check!("ETag"); |
|---|
| 2504 |
|
|---|
| 2505 |
try |
|---|
| 2506 |
{ |
|---|
| 2507 |
checkLiteral("</",s); |
|---|
| 2508 |
checkName(s,name); |
|---|
| 2509 |
opt!(checkSpace)(s); |
|---|
| 2510 |
checkLiteral(">",s); |
|---|
| 2511 |
} |
|---|
| 2512 |
catch(Err e) { fail(e); } |
|---|
| 2513 |
} |
|---|
| 2514 |
|
|---|
| 2515 |
void checkContent(ref string s) // rule 43 |
|---|
| 2516 |
{ |
|---|
| 2517 |
mixin Check!("Content"); |
|---|
| 2518 |
|
|---|
| 2519 |
try |
|---|
| 2520 |
{ |
|---|
| 2521 |
while (s.length != 0) |
|---|
| 2522 |
{ |
|---|
| 2523 |
old = s; |
|---|
| 2524 |
if (s.startsWith("&")) { checkReference(s); } |
|---|
| 2525 |
else if (s.startsWith("<!--")) { checkComment(s); } |
|---|
| 2526 |
else if (s.startsWith("<?")) { checkPI(s); } |
|---|
| 2527 |
else if (s.startsWith(cdata)) { checkCDSect(s); } |
|---|
| 2528 |
else if (s.startsWith("</")) { break; } |
|---|
| 2529 |
else if (s.startsWith("<")) { checkElement(s); } |
|---|
| 2530 |
else { checkCharData(s); } |
|---|
| 2531 |
} |
|---|
| 2532 |
} |
|---|
| 2533 |
catch(Err e) { fail(e); } |
|---|
| 2534 |
} |
|---|
| 2535 |
|
|---|
| 2536 |
void checkCharRef(ref string s, out dchar c) // rule 66 |
|---|
| 2537 |
{ |
|---|
| 2538 |
mixin Check!("CharRef"); |
|---|
| 2539 |
|
|---|
| 2540 |
c = 0; |
|---|
| 2541 |
try { checkLiteral("&#",s); } catch(Err e) { fail(e); } |
|---|
| 2542 |
int radix = 10; |
|---|
| 2543 |
if (s.length != 0 && s[0] == 'x') |
|---|
| 2544 |
{ |
|---|
| 2545 |
s = s[1..$]; |
|---|
| 2546 |
radix = 16; |
|---|
| 2547 |
} |
|---|
| 2548 |
if (s.length == 0) fail("unterminated character reference"); |
|---|
| 2549 |
if (s[0] == ';') |
|---|
| 2550 |
fail("character reference must have at least one digit"); |
|---|
| 2551 |
while (s.length != 0) |
|---|
| 2552 |
{ |
|---|
| 2553 |
char d = s[0]; |
|---|
| 2554 |
int n = 0; |
|---|
| 2555 |
switch(d) |
|---|
| 2556 |
{ |
|---|
| 2557 |
case 'F','f': ++n; |
|---|
| 2558 |
case 'E','e': ++n; |
|---|
| 2559 |
case 'D','d': ++n; |
|---|
| 2560 |
case 'C','c': ++n; |
|---|
| 2561 |
case 'B','b': ++n; |
|---|
| 2562 |
case 'A','a': ++n; |
|---|
| 2563 |
case '9': ++n; |
|---|
| 2564 |
case '8': ++n; |
|---|
| 2565 |
case '7': ++n; |
|---|
| 2566 |
case '6': ++n; |
|---|
| 2567 |
case '5': ++n; |
|---|
| 2568 |
case '4': ++n; |
|---|
| 2569 |
case '3': ++n; |
|---|
| 2570 |
case '2': ++n; |
|---|
| 2571 |
case '1': ++n; |
|---|
| 2572 |
case '0': break; |
|---|
| 2573 |
default: n = 100; break; |
|---|
| 2574 |
} |
|---|
| 2575 |
if (n >= radix) break; |
|---|
| 2576 |
c *= radix; |
|---|
| 2577 |
c += n; |
|---|
| 2578 |
s = s[1..$]; |
|---|
| 2579 |
} |
|---|
| 2580 |
if (!isChar(c)) fail(format("U+%04X is not a legal character",c)); |
|---|
| 2581 |
if (s.length == 0 || s[0] != ';') fail("expected ;"); |
|---|
| 2582 |
else s = s[1..$]; |
|---|
| 2583 |
} |
|---|
| 2584 |
|
|---|
| 2585 |
void checkReference(ref string s) // rule 67 |
|---|
| 2586 |
{ |
|---|
| 2587 |
mixin Check!("Reference"); |
|---|
| 2588 |
|
|---|
| 2589 |
try |
|---|
| 2590 |
{ |
|---|
| 2591 |
dchar c; |
|---|
| 2592 |
if (s.startsWith("&#")) checkCharRef(s,c); |
|---|
| 2593 |
else checkEntityRef(s); |
|---|
| 2594 |
} |
|---|
| 2595 |
catch(Err e) { fail(e); } |
|---|
| 2596 |
} |
|---|
| 2597 |
|
|---|
| 2598 |
void checkEntityRef(ref string s) // rule 68 |
|---|
| 2599 |
{ |
|---|
| 2600 |
mixin Check!("EntityRef"); |
|---|
| 2601 |
|
|---|
| 2602 |
try |
|---|
| 2603 |
{ |
|---|
| 2604 |
string name; |
|---|
| 2605 |
checkLiteral("&",s); |
|---|
| 2606 |
checkName(s,name); |
|---|
| 2607 |
checkLiteral(";",s); |
|---|
| 2608 |
} |
|---|
| 2609 |
catch(Err e) { fail(e); } |
|---|
| 2610 |
} |
|---|
| 2611 |
|
|---|
| 2612 |
void checkEncName(ref string s) // rule 81 |
|---|
| 2613 |
{ |
|---|
| 2614 |
mixin Check!("EncName"); |
|---|
| 2615 |
|
|---|
| 2616 |
munch(s,"a-zA-Z"); |
|---|
| 2617 |
if (s is old) fail(); |
|---|
| 2618 |
munch(s,"a-zA-Z0-9_.-"); |
|---|
| 2619 |
} |
|---|
| 2620 |
|
|---|
| 2621 |
void checkEncodingDecl(ref string s) // rule 80 |
|---|
| 2622 |
{ |
|---|
| 2623 |
mixin Check!("EncodingDecl"); |
|---|
| 2624 |
|
|---|
| 2625 |
try |
|---|
| 2626 |
{ |
|---|
| 2627 |
checkSpace(s); |
|---|
| 2628 |
checkLiteral("encoding",s); |
|---|
| 2629 |
checkEq(s); |
|---|
| 2630 |
quoted!(checkEncName)(s); |
|---|
| 2631 |
} |
|---|
| 2632 |
catch(Err e) { fail(e); } |
|---|
| 2633 |
} |
|---|
| 2634 |
|
|---|
| 2635 |
// Helper functions |
|---|
| 2636 |
|
|---|
| 2637 |
void checkLiteral(string literal,ref string s) |
|---|
| 2638 |
{ |
|---|
| 2639 |
mixin Check!("Literal"); |
|---|
| 2640 |
|
|---|
| 2641 |
if (!s.startsWith(literal)) fail("Expected literal \""~literal~"\""); |
|---|
| 2642 |
s = s[literal.length..$]; |
|---|
| 2643 |
} |
|---|
| 2644 |
|
|---|
| 2645 |
void checkEnd(string end,ref string s) |
|---|
| 2646 |
{ |
|---|
| 2647 |
// Deliberately no mixin Check here. |
|---|
| 2648 |
|
|---|
| 2649 |
int n = s.indexOf(end); |
|---|
| 2650 |
if (n == -1) throw new Err(s,"Unable to find terminating \""~end~"\""); |
|---|
| 2651 |
s = s[n..$]; |
|---|
| 2652 |
checkLiteral(end,s); |
|---|
| 2653 |
} |
|---|
| 2654 |
|
|---|
| 2655 |
// Metafunctions -- none of these use mixin Check |
|---|
| 2656 |
|
|---|
| 2657 |
void opt(alias f)(ref string s) |
|---|
| 2658 |
{ |
|---|
| 2659 |
try { f(s); } catch(Err e) {} |
|---|
| 2660 |
} |
|---|
| 2661 |
|
|---|
| 2662 |
void plus(alias f)(ref string s) |
|---|
| 2663 |
{ |
|---|
| 2664 |
f(s); |
|---|
| 2665 |
star!(f)(s); |
|---|
| 2666 |
} |
|---|
| 2667 |
|
|---|
| 2668 |
void star(alias f)(ref string s) |
|---|
| 2669 |
{ |
|---|
| 2670 |
while (s.length != 0) |
|---|
| 2671 |
{ |
|---|
| 2672 |
try { f(s); } |
|---|
| 2673 |
catch(Err e) { return; } |
|---|
| 2674 |
} |
|---|
| 2675 |
} |
|---|
| 2676 |
|
|---|
| 2677 |
void quoted(alias f)(ref string s) |
|---|
| 2678 |
{ |
|---|
| 2679 |
if (s.startsWith("'")) |
|---|
| 2680 |
{ |
|---|
| 2681 |
checkLiteral("'",s); |
|---|
| 2682 |
f(s); |
|---|
| 2683 |
checkLiteral("'",s); |
|---|
| 2684 |
} |
|---|
| 2685 |
else |
|---|
| 2686 |
{ |
|---|
| 2687 |
checkLiteral("\"",s); |
|---|
| 2688 |
f(s); |
|---|
| 2689 |
checkLiteral("\"",s); |
|---|
| 2690 |
} |
|---|
| 2691 |
} |
|---|
| 2692 |
|
|---|
| 2693 |
void seq(alias f,alias g)(ref string s) |
|---|
| 2694 |
{ |
|---|
| 2695 |
f(s); |
|---|
| 2696 |
g(s); |
|---|
| 2697 |
} |
|---|
| 2698 |
} |
|---|
| 2699 |
|
|---|
| 2700 |
/** |
|---|
| 2701 |
* Check an entire XML document for well-formedness |
|---|
| 2702 |
* |
|---|
| 2703 |
* Params: |
|---|
| 2704 |
* s = the document to be checked, passed as a string |
|---|
| 2705 |
* |
|---|
| 2706 |
* Throws: CheckException if the document is not well formed |
|---|
| 2707 |
* |
|---|
| 2708 |
* CheckException's toString() method will yield the complete heirarchy of |
|---|
| 2709 |
* parse failure (the XML equivalent of a stack trace), giving the line and |
|---|
| 2710 |
* column number of every failure at every level. |
|---|
| 2711 |
*/ |
|---|
| 2712 |
void check(string s) |
|---|
| 2713 |
{ |
|---|
| 2714 |
try |
|---|
| 2715 |
{ |
|---|
| 2716 |
checkChars(s); |
|---|
| 2717 |
checkDocument(s); |
|---|
| 2718 |
if (s.length != 0) throw new Err(s,"Junk found after document"); |
|---|
| 2719 |
} |
|---|
| 2720 |
catch(Err e) |
|---|
| 2721 |
{ |
|---|
| 2722 |
e.complete(s); |
|---|
| 2723 |
throw e; |
|---|
| 2724 |
} |
|---|
| 2725 |
} |
|---|
| 2726 |
|
|---|
| 2727 |
unittest |
|---|
| 2728 |
{ |
|---|
| 2729 |
version (none) // WHY ARE WE NOT RUNNING THIS UNIT TEST? |
|---|
| 2730 |
{ |
|---|
| 2731 |
try |
|---|
| 2732 |
{ |
|---|
| 2733 |
check(q"[<?xml version="1.0"?> |
|---|
| 2734 |
<catalog> |
|---|
| 2735 |
<book id="bk101"> |
|---|
| 2736 |
<author>Gambardella, Matthew</author> |
|---|
| 2737 |
<title>XML Developer's Guide</title> |
|---|
| 2738 |
<genre>Computer</genre> |
|---|
| 2739 |
<price>44.95</price> |
|---|
| 2740 |
<publish_date>2000-10-01</publish_date> |
|---|
| 2741 |
<description>An in-depth look at creating applications |
|---|
| 2742 |
with XML.</description> |
|---|
| 2743 |
</book> |
|---|
| 2744 |
<book id="bk102"> |
|---|
| 2745 |
<author>Ralls, Kim</author> |
|---|
| 2746 |
<title>Midnight Rain</title> |
|---|
| 2747 |
<genre>Fantasy</genres> |
|---|
| 2748 |
<price>5.95</price> |
|---|
| 2749 |
<publish_date>2000-12-16</publish_date> |
|---|
| 2750 |
<description>A former architect battles corporate zombies, |
|---|
| 2751 |
an evil sorceress, and her own childhood to become queen |
|---|
| 2752 |
of the world.</description> |
|---|
| 2753 |
</book> |
|---|
| 2754 |
<book id="bk103"> |
|---|
| 2755 |
<author>Corets, Eva</author> |
|---|
| 2756 |
<title>Maeve Ascendant</title> |
|---|
| 2757 |
<genre>Fantasy</genre> |
|---|
| 2758 |
<price>5.95</price> |
|---|
| 2759 |
<publish_date>2000-11-17</publish_date> |
|---|
| 2760 |
<description>After the collapse of a nanotechnology |
|---|
| 2761 |
society in England, the young survivors lay the |
|---|
| 2762 |
foundation for a new society.</description> |
|---|
| 2763 |
</book> |
|---|
| 2764 |
</catalog> |
|---|
| 2765 |
]"); |
|---|
| 2766 |
assert(false); |
|---|
| 2767 |
} |
|---|
| 2768 |
catch(CheckException e) |
|---|
| 2769 |
{ |
|---|
| 2770 |
int n = e.toString().indexOf("end tag name \"genres\" differs" |
|---|
| 2771 |
" from start tag name \"genre\""); |
|---|
| 2772 |
assert(n != -1); |
|---|
| 2773 |
} |
|---|
| 2774 |
} |
|---|
| 2775 |
} |
|---|
| 2776 |
|
|---|
| 2777 |
unittest |
|---|
| 2778 |
{ |
|---|
| 2779 |
string s = q"EOS |
|---|
| 2780 |
<?xml version="1.0"?> |
|---|
| 2781 |
<set> |
|---|
| 2782 |
<one>A</one> |
|---|
| 2783 |
<!-- comment --> |
|---|
| 2784 |
<two>B</two> |
|---|
| 2785 |
</set> |
|---|
| 2786 |
EOS"; |
|---|
| 2787 |
try |
|---|
| 2788 |
{ |
|---|
| 2789 |
check(s); |
|---|
| 2790 |
} |
|---|
| 2791 |
catch (CheckException e) |
|---|
| 2792 |
{ |
|---|
| 2793 |
assert(0, e.toString()); |
|---|
| 2794 |
} |
|---|
| 2795 |
} |
|---|
| 2796 |
|
|---|
| 2797 |
unittest |
|---|
| 2798 |
{ |
|---|
| 2799 |
string s = q"EOS |
|---|
| 2800 |
<?xml version="1.0" encoding="utf-8"?> <Tests> |
|---|
| 2801 |
<Test thing="What & Up">What & Up Second</Test> |
|---|
| 2802 |
</Tests> |
|---|
| 2803 |
EOS"; |
|---|
| 2804 |
auto xml = new DocumentParser(s); |
|---|
| 2805 |
|
|---|
| 2806 |
xml.onStartTag["Test"] = (ElementParser xml) { |
|---|
| 2807 |
assert(xml.tag.attr["thing"] == "What & Up"); |
|---|
| 2808 |
}; |
|---|
| 2809 |
|
|---|
| 2810 |
xml.onEndTag["Test"] = (in Element e) { |
|---|
| 2811 |
assert(e.text == "What & Up Second"); |
|---|
| 2812 |
}; |
|---|
| 2813 |
xml.parse(); |
|---|
| 2814 |
} |
|---|
| 2815 |
|
|---|
| 2816 |
/** The base class for exceptions thrown by this module */ |
|---|
| 2817 |
class XMLException : Exception { this(string msg) { super(msg); } } |
|---|
| 2818 |
|
|---|
| 2819 |
// Other exceptions |
|---|
| 2820 |
|
|---|
| 2821 |
/// Thrown during Comment constructor |
|---|
| 2822 |
class CommentException : XMLException |
|---|
| 2823 |
{ private this(string msg) { super(msg); } } |
|---|
| 2824 |
|
|---|
| 2825 |
/// Thrown during CData constructor |
|---|
| 2826 |
class CDataException : XMLException |
|---|
| 2827 |
{ private this(string msg) { super(msg); } } |
|---|
| 2828 |
|
|---|
| 2829 |
/// Thrown during XMLInstruction constructor |
|---|
| 2830 |
class XIException : XMLException |
|---|
| 2831 |
{ private this(string msg) { super(msg); } } |
|---|
| 2832 |
|
|---|
| 2833 |
/// Thrown during ProcessingInstruction constructor |
|---|
| 2834 |
class PIException : XMLException |
|---|
| 2835 |
{ private this(string msg) { super(msg); } } |
|---|
| 2836 |
|
|---|
| 2837 |
/// Thrown during Text constructor |
|---|
| 2838 |
class TextException : XMLException |
|---|
| 2839 |
{ private this(string msg) { super(msg); } } |
|---|
| 2840 |
|
|---|
| 2841 |
/// Thrown during decode() |
|---|
| 2842 |
class DecodeException : XMLException |
|---|
| 2843 |
{ private this(string msg) { super(msg); } } |
|---|
| 2844 |
|
|---|
| 2845 |
/// Thrown if comparing with wrong type |
|---|
| 2846 |
class InvalidTypeException : XMLException |
|---|
| 2847 |
{ private this(string msg) { super(msg); } } |
|---|
| 2848 |
|
|---|
| 2849 |
/// Thrown when parsing for Tags |
|---|
| 2850 |
class TagException : XMLException |
|---|
| 2851 |
{ private this(string msg) { super(msg); } } |
|---|
| 2852 |
|
|---|
| 2853 |
/** |
|---|
| 2854 |
* Thrown during check() |
|---|
| 2855 |
*/ |
|---|
| 2856 |
class CheckException : XMLException |
|---|
| 2857 |
{ |
|---|
| 2858 |
CheckException err; /// Parent in heirarchy |
|---|
| 2859 |
private string tail; |
|---|
| 2860 |
/** |
|---|
| 2861 |
* Name of production rule which failed to parse, |
|---|
| 2862 |
* or specific error message |
|---|
| 2863 |
*/ |
|---|
| 2864 |
string msg; |
|---|
| 2865 |
uint line = 0; /// Line number at which parse failure occurred |
|---|
| 2866 |
uint column = 0; /// Column number at which parse failure occurred |
|---|
| 2867 |
|
|---|
| 2868 |
private this(string tail,string msg,Err err=null) |
|---|
| 2869 |
{ |
|---|
| 2870 |
super(null); |
|---|
| 2871 |
this.tail = tail; |
|---|
| 2872 |
this.msg = msg; |
|---|
| 2873 |
this.err = err; |
|---|
| 2874 |
} |
|---|
| 2875 |
|
|---|
| 2876 |
private void complete(string entire) |
|---|
| 2877 |
{ |
|---|
| 2878 |
string head = entire[0..$-tail.length]; |
|---|
| 2879 |
int n = head.lastIndexOf('\n') + 1; |
|---|
| 2880 |
line = head.count("\n") + 1; |
|---|
| 2881 |
dstring t; |
|---|
| 2882 |
transcode(head[n..$],t); |
|---|
| 2883 |
column = t.length + 1; |
|---|
| 2884 |
if (err !is null) err.complete(entire); |
|---|
| 2885 |
} |
|---|
| 2886 |
|
|---|
| 2887 |
override const string toString() |
|---|
| 2888 |
{ |
|---|
| 2889 |
string s; |
|---|
| 2890 |
if (line != 0) s = format("Line %d, column %d: ",line,column); |
|---|
| 2891 |
s ~= msg; |
|---|
| 2892 |
s ~= '\n'; |
|---|
| 2893 |
if (err !is null) s = err.toString ~ s; |
|---|
| 2894 |
return s; |
|---|
| 2895 |
} |
|---|
| 2896 |
} |
|---|
| 2897 |
|
|---|
| 2898 |
private alias CheckException Err; |
|---|
| 2899 |
|
|---|
| 2900 |
// Private helper functions |
|---|
| 2901 |
|
|---|
| 2902 |
private |
|---|
| 2903 |
{ |
|---|
| 2904 |
T toType(T)(Object o) |
|---|
| 2905 |
{ |
|---|
| 2906 |
T t = cast(T)(o); |
|---|
| 2907 |
if (t is null) |
|---|
| 2908 |
{ |
|---|
| 2909 |
throw new InvalidTypeException("Attempt to compare a " |
|---|
| 2910 |
~ T.stringof ~ " with an instance of another type"); |
|---|
| 2911 |
} |
|---|
| 2912 |
return t; |
|---|
| 2913 |
} |
|---|
| 2914 |
|
|---|
| 2915 |
string chop(ref string s, int n) |
|---|
| 2916 |
{ |
|---|
| 2917 |
if (n == -1) n = s.length; |
|---|
| 2918 |
string t = s[0..n]; |
|---|
| 2919 |
s = s[n..$]; |
|---|
| 2920 |
return t; |
|---|
| 2921 |
} |
|---|
| 2922 |
|
|---|
| 2923 |
bool optc(ref string s, char c) |
|---|
| 2924 |
{ |
|---|
| 2925 |
bool b = s.length != 0 && s[0] == c; |
|---|
| 2926 |
if (b) s = s[1..$]; |
|---|
| 2927 |
return b; |
|---|
| 2928 |
} |
|---|
| 2929 |
|
|---|
| 2930 |
void reqc(ref string s, char c) |
|---|
| 2931 |
{ |
|---|
| 2932 |
if (s.length == 0 || s[0] != c) throw new TagException(""); |
|---|
| 2933 |
s = s[1..$]; |
|---|
| 2934 |
} |
|---|
| 2935 |
|
|---|
| 2936 |
hash_t hash(string s,hash_t h=0) |
|---|
| 2937 |
{ |
|---|
| 2938 |
foreach(dchar c;s) h = h * 11 + c; |
|---|
| 2939 |
return h; |
|---|
| 2940 |
} |
|---|
| 2941 |
|
|---|
| 2942 |
// Definitions from the XML specification |
|---|
| 2943 |
immutable CharTable=[0x9,0x9,0xA,0xA,0xD,0xD,0x20,0xD7FF,0xE000,0xFFFD, |
|---|
| 2944 |
0x10000,0x10FFFF]; |
|---|
| 2945 |
immutable BaseCharTable=[0x0041,0x005A,0x0061,0x007A,0x00C0,0x00D6,0x00D8, |
|---|
| 2946 |
0x00F6,0x00F8,0x00FF,0x0100,0x0131,0x0134,0x013E,0x0141,0x0148,0x014A, |
|---|
| 2947 |
0x017E,0x0180,0x01C3,0x01CD,0x01F0,0x01F4,0x01F5,0x01FA,0x0217,0x0250, |
|---|
| 2948 |
0x02A8,0x02BB,0x02C1,0x0386,0x0386,0x0388,0x038A,0x038C,0x038C,0x038E, |
|---|
| 2949 |
0x03A1,0x03A3,0x03CE,0x03D0,0x03D6,0x03DA,0x03DA,0x03DC,0x03DC,0x03DE, |
|---|
| 2950 |
0x03DE,0x03E0,0x03E0,0x03E2,0x03F3,0x0401,0x040C,0x040E,0x044F,0x0451, |
|---|
| 2951 |
0x045C,0x045E,0x0481,0x0490,0x04C4,0x04C7,0x04C8,0x04CB,0x04CC,0x04D0, |
|---|
| 2952 |
0x04EB,0x04EE,0x04F5,0x04F8,0x04F9,0x0531,0x0556,0x0559,0x0559,0x0561, |
|---|
| 2953 |
0x0586,0x05D0,0x05EA,0x05F0,0x05F2,0x0621,0x063A,0x0641,0x064A,0x0671, |
|---|
| 2954 |
0x06B7,0x06BA,0x06BE,0x06C0,0x06CE,0x06D0,0x06D3,0x06D5,0x06D5,0x06E5, |
|---|
| 2955 |
0x06E6,0x0905,0x0939,0x093D,0x093D,0x0958,0x0961,0x0985,0x098C,0x098F, |
|---|
| 2956 |
0x0990,0x0993,0x09A8,0x09AA,0x09B0,0x09B2,0x09B2,0x09B6,0x09B9,0x09DC, |
|---|
| 2957 |
0x09DD,0x09DF,0x09E1,0x09F0,0x09F1,0x0A05,0x0A0A,0x0A0F,0x0A10,0x0A13, |
|---|
| 2958 |
0x0A28,0x0A2A,0x0A30,0x0A32,0x0A33,0x0A35,0x0A36,0x0A38,0x0A39,0x0A59, |
|---|
| 2959 |
0x0A5C,0x0A5E,0x0A5E,0x0A72,0x0A74,0x0A85,0x0A8B,0x0A8D,0x0A8D,0x0A8F, |
|---|
| 2960 |
0x0A91,0x0A93,0x0AA8,0x0AAA,0x0AB0,0x0AB2,0x0AB3,0x0AB5,0x0AB9,0x0ABD, |
|---|
| 2961 |
0x0ABD,0x0AE0,0x0AE0,0x0B05,0x0B0C,0x0B0F,0x0B10,0x0B13,0x0B28,0x0B2A, |
|---|
| 2962 |
0x0B30,0x0B32,0x0B33,0x0B36,0x0B39,0x0B3D,0x0B3D,0x0B5C,0x0B5D,0x0B5F, |
|---|
| 2963 |
0x0B61,0x0B85,0x0B8A,0x0B8E,0x0B90,0x0B92,0x0B95,0x0B99,0x0B9A,0x0B9C, |
|---|
| 2964 |
0x0B9C,0x0B9E,0x0B9F,0x0BA3,0x0BA4,0x0BA8,0x0BAA,0x0BAE,0x0BB5,0x0BB7, |
|---|
| 2965 |
0x0BB9,0x0C05,0x0C0C,0x0C0E,0x0C10,0x0C12,0x0C28,0x0C2A,0x0C33,0x0C35, |
|---|
| 2966 |
0x0C39,0x0C60,0x0C61,0x0C85,0x0C8C,0x0C8E,0x0C90,0x0C92,0x0CA8,0x0CAA, |
|---|
| 2967 |
0x0CB3,0x0CB5,0x0CB9,0x0CDE,0x0CDE,0x0CE0,0x0CE1,0x0D05,0x0D0C,0x0D0E, |
|---|
| 2968 |
0x0D10,0x0D12,0x0D28,0x0D2A,0x0D39,0x0D60,0x0D61,0x0E01,0x0E2E,0x0E30, |
|---|
| 2969 |
0x0E30,0x0E32,0x0E33,0x0E40,0x0E45,0x0E81,0x0E82,0x0E84,0x0E84,0x0E87, |
|---|
| 2970 |
0x0E88,0x0E8A,0x0E8A,0x0E8D,0x0E8D,0x0E94,0x0E97,0x0E99,0x0E9F,0x0EA1, |
|---|
| 2971 |
0x0EA3,0x0EA5,0x0EA5,0x0EA7,0x0EA7,0x0EAA,0x0EAB,0x0EAD,0x0EAE,0x0EB0, |
|---|
| 2972 |
0x0EB0,0x0EB2,0x0EB3,0x0EBD,0x0EBD,0x0EC0,0x0EC4,0x0F40,0x0F47,0x0F49, |
|---|
| 2973 |
0x0F69,0x10A0,0x10C5,0x10D0,0x10F6,0x1100,0x1100,0x1102,0x1103,0x1105, |
|---|
| 2974 |
0x1107,0x1109,0x1109,0x110B,0x110C,0x110E,0x1112,0x113C,0x113C,0x113E, |
|---|
| 2975 |
0x113E,0x1140,0x1140,0x114C,0x114C,0x114E,0x114E,0x1150,0x1150,0x1154, |
|---|
| 2976 |
0x1155,0x1159,0x1159,0x115F,0x1161,0x1163,0x1163,0x1165,0x1165,0x1167, |
|---|
| 2977 |
0x1167,0x1169,0x1169,0x116D,0x116E,0x1172,0x1173,0x1175,0x1175,0x119E, |
|---|
| 2978 |
0x119E,0x11A8,0x11A8,0x11AB,0x11AB,0x11AE,0x11AF,0x11B7,0x11B8,0x11BA, |
|---|
| 2979 |
0x11BA,0x11BC,0x11C2,0x11EB,0x11EB,0x11F0,0x11F0,0x11F9,0x11F9,0x1E00, |
|---|
| 2980 |
0x1E9B,0x1EA0,0x1EF9,0x1F00,0x1F15,0x1F18,0x1F1D,0x1F20,0x1F45,0x1F48, |
|---|
| 2981 |
0x1F4D,0x1F50,0x1F57,0x1F59,0x1F59,0x1F5B,0x1F5B,0x1F5D,0x1F5D,0x1F5F, |
|---|
| 2982 |
0x1F7D,0x1F80,0x1FB4,0x1FB6,0x1FBC,0x1FBE,0x1FBE,0x1FC2,0x1FC4,0x1FC6, |
|---|
| 2983 |
0x1FCC,0x1FD0,0x1FD3,0x1FD6,0x1FDB,0x1FE0,0x1FEC,0x1FF2,0x1FF4,0x1FF6, |
|---|
| 2984 |
0x1FFC,0x2126,0x2126,0x212A,0x212B,0x212E,0x212E,0x2180,0x2182,0x3041, |
|---|
| 2985 |
0x3094,0x30A1,0x30FA,0x3105,0x312C,0xAC00,0xD7A3]; |
|---|
| 2986 |
immutable IdeographicTable=[0x3007,0x3007,0x3021,0x3029,0x4E00,0x9FA5]; |
|---|
| 2987 |
immutable CombiningCharTable=[0x0300,0x0345,0x0360,0x0361,0x0483,0x0486, |
|---|
| 2988 |
0x0591,0x05A1,0x05A3,0x05B9,0x05BB,0x05BD,0x05BF,0x05BF,0x05C1,0x05C2, |
|---|
| 2989 |
0x05C4,0x05C4,0x064B,0x0652,0x0670,0x0670,0x06D6,0x06DC,0x06DD,0x06DF, |
|---|
| 2990 |
0x06E0,0x06E4,0x06E7,0x06E8,0x06EA,0x06ED,0x0901,0x0903,0x093C,0x093C, |
|---|
| 2991 |
0x093E,0x094C,0x094D,0x094D,0x0951,0x0954,0x0962,0x0963,0x0981,0x0983, |
|---|
| 2992 |
0x09BC,0x09BC,0x09BE,0x09BE,0x09BF,0x09BF,0x09C0,0x09C4,0x09C7,0x09C8, |
|---|
| 2993 |
0x09CB,0x09CD,0x09D7,0x09D7,0x09E2,0x09E3,0x0A02,0x0A02,0x0A3C,0x0A3C, |
|---|
| 2994 |
0x0A3E,0x0A3E,0x0A3F,0x0A3F,0x0A40,0x0A42,0x0A47,0x0A48,0x0A4B,0x0A4D, |
|---|
| 2995 |
0x0A70,0x0A71,0x0A81,0x0A83,0x0ABC,0x0ABC,0x0ABE,0x0AC5,0x0AC7,0x0AC9, |
|---|
| 2996 |
0x0ACB,0x0ACD,0x0B01,0x0B03,0x0B3C,0x0B3C,0x0B3E,0x0B43,0x0B47,0x0B48, |
|---|
| 2997 |
0x0B4B,0x0B4D,0x0B56,0x0B57,0x0B82,0x0B83,0x0BBE,0x0BC2,0x0BC6,0x0BC8, |
|---|
| 2998 |
0x0BCA,0x0BCD,0x0BD7,0x0BD7,0x0C01,0x0C03,0x0C3E,0x0C44,0x0C46,0x0C48, |
|---|
| 2999 |
0x0C4A,0x0C4D,0x0C55,0x0C56,0x0C82,0x0C83,0x0CBE,0x0CC4,0x0CC6,0x0CC8, |
|---|
| 3000 |
0x0CCA,0x0CCD,0x0CD5,0x0CD6,0x0D02,0x0D03,0x0D3E,0x0D43,0x0D46,0x0D48, |
|---|
| 3001 |
0x0D4A,0x0D4D,0x0D57,0x0D57,0x0E31,0x0E31,0x0E34,0x0E3A,0x0E47,0x0E4E, |
|---|
| 3002 |
0x0EB1,0x0EB1,0x0EB4,0x0EB9,0x0EBB,0x0EBC,0x0EC8,0x0ECD,0x0F18,0x0F19, |
|---|
| 3003 |
0x0F35,0x0F35,0x0F37,0x0F37,0x0F39,0x0F39,0x0F3E,0x0F3E,0x0F3F,0x0F3F, |
|---|
| 3004 |
0x0F71,0x0F84,0x0F86,0x0F8B,0x0F90,0x0F95,0x0F97,0x0F97,0x0F99,0x0FAD, |
|---|
| 3005 |
0x0FB1,0x0FB7,0x0FB9,0x0FB9,0x20D0,0x20DC,0x20E1,0x20E1,0x302A,0x302F, |
|---|
| 3006 |
0x3099,0x3099,0x309A,0x309A]; |
|---|
| 3007 |
immutable DigitTable=[0x0030,0x0039,0x0660,0x0669,0x06F0,0x06F9,0x0966, |
|---|
| 3008 |
0x096F,0x09E6,0x09EF,0x0A66,0x0A6F,0x0AE6,0x0AEF,0x0B66,0x0B6F,0x0BE7, |
|---|
| 3009 |
0x0BEF,0x0C66,0x0C6F,0x0CE6,0x0CEF,0x0D66,0x0D6F,0x0E50,0x0E59,0x0ED0, |
|---|
| 3010 |
0x0ED9,0x0F20,0x0F29]; |
|---|
| 3011 |
immutable ExtenderTable=[0x00B7,0x00B7,0x02D0,0x02D0,0x02D1,0x02D1,0x0387, |
|---|
| 3012 |
0x0387,0x0640,0x0640,0x0E46,0x0E46,0x0EC6,0x0EC6,0x3005,0x3005,0x3031, |
|---|
| 3013 |
0x3035,0x309D,0x309E,0x30FC,0x30FE]; |
|---|
| 3014 |
|
|---|
| 3015 |
bool lookup(const(int)[] table, int c) |
|---|
| 3016 |
{ |
|---|
| 3017 |
while (table.length != 0) |
|---|
| 3018 |
{ |
|---|
| 3019 |
int m = (table.length >> 1) & ~1; |
|---|
| 3020 |
if (c < table[m]) |
|---|
| 3021 |
{ |
|---|
| 3022 |
table = table[0..m]; |
|---|
| 3023 |
} |
|---|
| 3024 |
else if (c > table[m+1]) |
|---|
| 3025 |
{ |
|---|
| 3026 |
table = table[m+2..$]; |
|---|
| 3027 |
} |
|---|
| 3028 |
else return true; |
|---|
| 3029 |
} |
|---|
| 3030 |
return false; |
|---|
| 3031 |
} |
|---|
| 3032 |
|
|---|
| 3033 |
string startOf(string s) |
|---|
| 3034 |
{ |
|---|
| 3035 |
string r; |
|---|
| 3036 |
foreach(char c;s) |
|---|
| 3037 |
{ |
|---|
| 3038 |
r ~= (c < 0x20 || c > 0x7F) ? '.' : c; |
|---|
| 3039 |
if (r.length >= 40) { r ~= "___"; break; } |
|---|
| 3040 |
} |
|---|
| 3041 |
return r; |
|---|
| 3042 |
} |
|---|
| 3043 |
|
|---|
| 3044 |
void exit(string s=null) |
|---|
| 3045 |
{ |
|---|
| 3046 |
throw new XMLException(s); |
|---|
| 3047 |
} |
|---|
| 3048 |
}`); |
|---|