1 module deepmagic.dom.xml.xml; 2 3 import deepmagic.dom; 4 /+ 5 import std.algorithm : count, startsWith; 6 import std.array; 7 import std.ascii; 8 import std.string; 9 import std.encoding; 10 +/ 11 enum cdata = "<![CDATA["; 12 13 bool isChar(dchar c) 14 { 15 if (c <= 0xD7FF) 16 { 17 if (c >= 0x20) 18 return true; 19 switch(c) 20 { 21 case 0xA: 22 case 0x9: 23 case 0xD: 24 return true; 25 default: 26 return false; 27 } 28 } 29 else if (0xE000 <= c && c <= 0x10FFFF) 30 { 31 if ((c & 0x1FFFFE) != 0xFFFE) // U+FFFE and U+FFFF 32 return true; 33 } 34 return false; 35 } 36 37 bool isSpace(dchar c) 38 { 39 return c == '\u0020' || c == '\u0009' || c == '\u000A' || c == '\u000D'; 40 } 41 42 bool isDigit(dchar c) 43 { 44 if (c <= 0x0039 && c >= 0x0030) 45 return true; 46 else 47 return lookup(DigitTable,c); 48 } 49 50 bool isLetter(dchar c) // rule 84 51 { 52 return isIdeographic(c) || isBaseChar(c); 53 } 54 55 bool isIdeographic(dchar c) 56 { 57 if (c == 0x3007) 58 return true; 59 if (c <= 0x3029 && c >= 0x3021 ) 60 return true; 61 if (c <= 0x9FA5 && c >= 0x4E00) 62 return true; 63 return false; 64 } 65 66 bool isBaseChar(dchar c) 67 { 68 return lookup(BaseCharTable,c); 69 } 70 71 bool isCombiningChar(dchar c) 72 { 73 return lookup(CombiningCharTable,c); 74 } 75 76 bool isExtender(dchar c) 77 { 78 return lookup(ExtenderTable,c); 79 } 80 81 S encode(S)(S s) 82 { 83 string r; 84 size_t lastI; 85 auto result = appender!S(); 86 87 foreach (i, c; s) 88 { 89 switch (c) 90 { 91 case '&': r = "&"; break; 92 case '"': r = """; break; 93 case '\'': r = "'"; break; 94 case '<': r = "<"; break; 95 case '>': r = ">"; break; 96 default: continue; 97 } 98 // Replace with r 99 result.put(s[lastI .. i]); 100 result.put(r); 101 lastI = i + 1; 102 } 103 104 if (!result.data.ptr) return s; 105 result.put(s[lastI .. $]); 106 return result.data; 107 } 108 109 enum DecodeMode 110 { 111 NONE, LOOSE, STRICT 112 } 113 114 /** 115 * Decodes a string by unescaping all predefined XML entities. 116 * 117 * encode() escapes certain characters (ampersand, quote, apostrophe, less-than 118 * and greater-than), and similarly, decode() unescapes them. These functions 119 * are provided for convenience only. You do not need to use them when using 120 * the std.xml classes, because then all the encoding and decoding will be done 121 * for you automatically. 122 * 123 * This function decodes the entities &amp;, &quot;, &apos;, 124 * &lt; and &gt, 125 * as well as decimal and hexadecimal entities such as &#x20AC; 126 * 127 * If the string does not contain an ampersand, the original will be returned. 128 * 129 * Note that the "mode" parameter can be one of DecodeMode.NONE (do not 130 * decode), DecodeMode.LOOSE (decode, but ignore errors), or DecodeMode.STRICT 131 * (decode, and throw a DecodeException in the event of an error). 132 * 133 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 134 * 135 * Params: 136 * s = The string to be decoded 137 * mode = (optional) Mode to use for decoding. (Defaults to LOOSE). 138 * 139 * Throws: DecodeException if mode == DecodeMode.STRICT and decode fails 140 * 141 * Returns: The decoded string 142 * 143 * Examples: 144 * -------------- 145 * writefln(decode("a > b")); // writes "a > b" 146 * -------------- 147 */ 148 string decode(string s, DecodeMode mode=DecodeMode.LOOSE) 149 { 150 import std.utf : encode; 151 152 if (mode == DecodeMode.NONE) return s; 153 154 char[] buffer; 155 foreach (ref i; 0 .. s.length) 156 { 157 char c = s[i]; 158 if (c != '&') 159 { 160 if (buffer.length != 0) buffer ~= c; 161 } 162 else 163 { 164 if (buffer.length == 0) 165 { 166 buffer = s[0 .. i].dup; 167 } 168 if (startsWith(s[i..$],"&#")) 169 { 170 try 171 { 172 dchar d; 173 string t = s[i..$]; 174 checkCharRef(t, d); 175 char[4] temp; 176 buffer ~= temp[0 .. std.utf.encode(temp, d)]; 177 i = s.length - t.length - 1; 178 } 179 catch(Err e) 180 { 181 if (mode == DecodeMode.STRICT) 182 throw new DecodeException("Unescaped &"); 183 buffer ~= '&'; 184 } 185 } 186 else if (startsWith(s[i..$],"&" )) { buffer ~= '&'; i += 4; } 187 else if (startsWith(s[i..$],""")) { buffer ~= '"'; i += 5; } 188 else if (startsWith(s[i..$],"'")) { buffer ~= '\''; i += 5; } 189 else if (startsWith(s[i..$],"<" )) { buffer ~= '<'; i += 3; } 190 else if (startsWith(s[i..$],">" )) { buffer ~= '>'; i += 3; } 191 else 192 { 193 if (mode == DecodeMode.STRICT) 194 throw new DecodeException("Unescaped &"); 195 buffer ~= '&'; 196 } 197 } 198 } 199 return (buffer.length == 0) ? s : cast(string)buffer; 200 } 201 202 unittest 203 { 204 void assertNot(string s) 205 { 206 bool b = false; 207 try { decode(s,DecodeMode.STRICT); } 208 catch (DecodeException e) { b = true; } 209 assert(b,s); 210 } 211 212 // Assert that things that should work, do 213 auto s = "hello"; 214 assert(decode(s, DecodeMode.STRICT) is s); 215 assert(decode("a > b", DecodeMode.STRICT) == "a > b"); 216 assert(decode("a < b", DecodeMode.STRICT) == "a < b"); 217 assert(decode("don't", DecodeMode.STRICT) == "don't"); 218 assert(decode(""hi"", DecodeMode.STRICT) == "\"hi\""); 219 assert(decode("cat & dog", DecodeMode.STRICT) == "cat & dog"); 220 assert(decode("*", DecodeMode.STRICT) == "*"); 221 assert(decode("*", DecodeMode.STRICT) == "*"); 222 assert(decode("cat & dog", DecodeMode.LOOSE) == "cat & dog"); 223 assert(decode("a > b", DecodeMode.LOOSE) == "a > b"); 224 assert(decode("&#;", DecodeMode.LOOSE) == "&#;"); 225 assert(decode("&#x;", DecodeMode.LOOSE) == "&#x;"); 226 assert(decode("G;", DecodeMode.LOOSE) == "G;"); 227 assert(decode("G;", DecodeMode.LOOSE) == "G;"); 228 229 // Assert that things that shouldn't work, don't 230 assertNot("cat & dog"); 231 assertNot("a > b"); 232 assertNot("&#;"); 233 assertNot("&#x;"); 234 assertNot("G;"); 235 assertNot("G;"); 236 } 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 public 252 { 253 template Check(string msg) 254 { 255 string old = s; 256 257 void fail() 258 { 259 s = old; 260 throw new Err(s,msg); 261 } 262 263 void fail(Err e) 264 { 265 s = old; 266 throw new Err(s,msg,e); 267 } 268 269 void fail(string msg2) 270 { 271 fail(new Err(s,msg2)); 272 } 273 } 274 275 void checkMisc(ref string s) // rule 27 276 { 277 mixin Check!("Misc"); 278 279 try 280 { 281 if (s.startsWith("<!--")) { checkComment(s); } 282 else if (s.startsWith("<?")) { checkPI(s); } 283 else { checkSpace(s); } 284 } 285 catch(Err e) { fail(e); } 286 } 287 288 void checkDocument(ref string s) // rule 1 289 { 290 mixin Check!("Document"); 291 try 292 { 293 checkProlog(s); 294 checkElement(s); 295 star!(checkMisc)(s); 296 } 297 catch(Err e) { fail(e); } 298 } 299 300 void checkChars(ref string s) // rule 2 301 { 302 // TO DO - Fix std.utf stride and decode functions, then use those 303 // instead 304 305 mixin Check!("Chars"); 306 307 dchar c; 308 int n = -1; 309 foreach(int i,dchar d; s) 310 { 311 if (!isChar(d)) 312 { 313 c = d; 314 n = i; 315 break; 316 } 317 } 318 if (n != -1) 319 { 320 s = s[n..$]; 321 fail(format("invalid character: U+%04X",c)); 322 } 323 } 324 325 void checkSpace(ref string s) // rule 3 326 { 327 mixin Check!("Whitespace"); 328 munch(s,"\u0020\u0009\u000A\u000D"); 329 if (s is old) fail(); 330 } 331 332 void checkName(ref string s, out string name) // rule 5 333 { 334 mixin Check!("Name"); 335 336 if (s.length == 0) fail(); 337 int n; 338 foreach(int i,dchar c;s) 339 { 340 if (c == '_' || c == ':' || isLetter(c)) continue; 341 if (i == 0) fail(); 342 if (c == '-' || c == '.' || isDigit(c) 343 || isCombiningChar(c) || isExtender(c)) continue; 344 n = i; 345 break; 346 } 347 name = s[0..n]; 348 s = s[n..$]; 349 } 350 351 void checkAttValue(ref string s) // rule 10 352 { 353 mixin Check!("AttValue"); 354 355 if (s.length == 0) fail(); 356 char c = s[0]; 357 if (c != '\u0022' && c != '\u0027') 358 fail("attribute value requires quotes"); 359 s = s[1..$]; 360 for(;;) 361 { 362 munch(s,"^<&"~c); 363 if (s.length == 0) fail("unterminated attribute value"); 364 if (s[0] == '<') fail("< found in attribute value"); 365 if (s[0] == c) break; 366 try { checkReference(s); } catch(Err e) { fail(e); } 367 } 368 s = s[1..$]; 369 } 370 371 void checkCharData(ref string s) // rule 14 372 { 373 mixin Check!("CharData"); 374 375 while (s.length != 0) 376 { 377 if (s.startsWith("&")) break; 378 if (s.startsWith("<")) break; 379 if (s.startsWith("]]>")) fail("]]> found within char data"); 380 s = s[1..$]; 381 } 382 } 383 384 void checkComment(ref string s) // rule 15 385 { 386 mixin Check!("Comment"); 387 388 try { checkLiteral("<!--",s); } catch(Err e) { fail(e); } 389 ptrdiff_t n = s.indexOf("--"); 390 if (n == -1) fail("unterminated comment"); 391 s = s[n..$]; 392 try { checkLiteral("-->",s); } catch(Err e) { fail(e); } 393 } 394 395 void checkPI(ref string s) // rule 16 396 { 397 mixin Check!("PI"); 398 399 try 400 { 401 checkLiteral("<?",s); 402 checkEnd("?>",s); 403 } 404 catch(Err e) { fail(e); } 405 } 406 407 void checkCDSect(ref string s) // rule 18 408 { 409 mixin Check!("CDSect"); 410 411 try 412 { 413 checkLiteral(cdata,s); 414 checkEnd("]]>",s); 415 } 416 catch(Err e) { fail(e); } 417 } 418 419 void checkProlog(ref string s) // rule 22 420 { 421 mixin Check!("Prolog"); 422 423 try 424 { 425 /* The XML declaration is optional 426 * http://www.w3.org/TR/2008/REC-xml-20081126/#NT-prolog 427 */ 428 opt!(checkXMLDecl)(s); 429 430 star!(checkMisc)(s); 431 opt!(seq!(checkDocTypeDecl,star!(checkMisc)))(s); 432 } 433 catch(Err e) { fail(e); } 434 } 435 436 void checkXMLDecl(ref string s) // rule 23 437 { 438 mixin Check!("XMLDecl"); 439 440 try 441 { 442 checkLiteral("<?xml",s); 443 checkVersionInfo(s); 444 opt!(checkEncodingDecl)(s); 445 opt!(checkSDDecl)(s); 446 opt!(checkSpace)(s); 447 checkLiteral("?>",s); 448 } 449 catch(Err e) { fail(e); } 450 } 451 452 void checkVersionInfo(ref string s) // rule 24 453 { 454 mixin Check!("VersionInfo"); 455 456 try 457 { 458 checkSpace(s); 459 checkLiteral("version",s); 460 checkEq(s); 461 quoted!(checkVersionNum)(s); 462 } 463 catch(Err e) { fail(e); } 464 } 465 466 void checkEq(ref string s) // rule 25 467 { 468 mixin Check!("Eq"); 469 470 try 471 { 472 opt!(checkSpace)(s); 473 checkLiteral("=",s); 474 opt!(checkSpace)(s); 475 } 476 catch(Err e) { fail(e); } 477 } 478 479 void checkVersionNum(ref string s) // rule 26 480 { 481 mixin Check!("VersionNum"); 482 483 munch(s,"a-zA-Z0-9_.:-"); 484 if (s is old) fail(); 485 } 486 487 void checkDocTypeDecl(ref string s) // rule 28 488 { 489 mixin Check!("DocTypeDecl"); 490 491 try 492 { 493 checkLiteral("<!DOCTYPE",s); 494 // 495 // TO DO -- ensure DOCTYPE is well formed 496 // (But not yet. That's one of our "future directions") 497 // 498 checkEnd(">",s); 499 } 500 catch(Err e) { fail(e); } 501 } 502 503 void checkSDDecl(ref string s) // rule 32 504 { 505 mixin Check!("SDDecl"); 506 507 try 508 { 509 checkSpace(s); 510 checkLiteral("standalone",s); 511 checkEq(s); 512 } 513 catch(Err e) { fail(e); } 514 515 int n = 0; 516 if (s.startsWith("'yes'") || s.startsWith("\"yes\"")) n = 5; 517 else if (s.startsWith("'no'" ) || s.startsWith("\"no\"" )) n = 4; 518 else fail("standalone attribute value must be 'yes', \"yes\","~ 519 " 'no' or \"no\""); 520 s = s[n..$]; 521 } 522 523 void checkElement(ref string s) // rule 39 524 { 525 mixin Check!("Element"); 526 527 string sname,ename,t; 528 try { checkTag(s,t,sname); } catch(Err e) { fail(e); } 529 530 if (t == "STag") 531 { 532 try 533 { 534 checkContent(s); 535 t = s; 536 checkETag(s,ename); 537 } 538 catch(Err e) { fail(e); } 539 540 if (sname != ename) 541 { 542 s = t; 543 fail("end tag name \"" ~ ename 544 ~ "\" differs from start tag name \""~sname~"\""); 545 } 546 } 547 } 548 549 // rules 40 and 44 550 void checkTag(ref string s, out string type, out string name) 551 { 552 mixin Check!("Tag"); 553 554 try 555 { 556 type = "STag"; 557 checkLiteral("<",s); 558 checkName(s,name); 559 star!(seq!(checkSpace,checkAttribute))(s); 560 opt!(checkSpace)(s); 561 if (s.length != 0 && s[0] == '/') 562 { 563 s = s[1..$]; 564 type = "ETag"; 565 } 566 checkLiteral(">",s); 567 } 568 catch(Err e) { fail(e); } 569 } 570 571 void checkAttribute(ref string s) // rule 41 572 { 573 mixin Check!("Attribute"); 574 575 try 576 { 577 string name; 578 checkName(s,name); 579 checkEq(s); 580 checkAttValue(s); 581 } 582 catch(Err e) { fail(e); } 583 } 584 585 void checkETag(ref string s, out string name) // rule 42 586 { 587 mixin Check!("ETag"); 588 589 try 590 { 591 checkLiteral("</",s); 592 checkName(s,name); 593 opt!(checkSpace)(s); 594 checkLiteral(">",s); 595 } 596 catch(Err e) { fail(e); } 597 } 598 599 void checkContent(ref string s) // rule 43 600 { 601 mixin Check!("Content"); 602 603 try 604 { 605 while (s.length != 0) 606 { 607 old = s; 608 if (s.startsWith("&")) { checkReference(s); } 609 else if (s.startsWith("<!--")) { checkComment(s); } 610 else if (s.startsWith("<?")) { checkPI(s); } 611 else if (s.startsWith(cdata)) { checkCDSect(s); } 612 else if (s.startsWith("</")) { break; } 613 else if (s.startsWith("<")) { checkElement(s); } 614 else { checkCharData(s); } 615 } 616 } 617 catch(Err e) { fail(e); } 618 } 619 620 void checkCharRef(ref string s, out dchar c) // rule 66 621 { 622 mixin Check!("CharRef"); 623 624 c = 0; 625 try { checkLiteral("&#",s); } catch(Err e) { fail(e); } 626 int radix = 10; 627 if (s.length != 0 && s[0] == 'x') 628 { 629 s = s[1..$]; 630 radix = 16; 631 } 632 if (s.length == 0) fail("unterminated character reference"); 633 if (s[0] == ';') 634 fail("character reference must have at least one digit"); 635 while (s.length != 0) 636 { 637 char d = s[0]; 638 int n = 0; 639 switch(d) 640 { 641 case 'F','f': ++n; goto case; 642 case 'E','e': ++n; goto case; 643 case 'D','d': ++n; goto case; 644 case 'C','c': ++n; goto case; 645 case 'B','b': ++n; goto case; 646 case 'A','a': ++n; goto case; 647 case '9': ++n; goto case; 648 case '8': ++n; goto case; 649 case '7': ++n; goto case; 650 case '6': ++n; goto case; 651 case '5': ++n; goto case; 652 case '4': ++n; goto case; 653 case '3': ++n; goto case; 654 case '2': ++n; goto case; 655 case '1': ++n; goto case; 656 case '0': break; 657 default: n = 100; break; 658 } 659 if (n >= radix) break; 660 c *= radix; 661 c += n; 662 s = s[1..$]; 663 } 664 if (!isChar(c)) fail(format("U+%04X is not a legal character",c)); 665 if (s.length == 0 || s[0] != ';') fail("expected ;"); 666 else s = s[1..$]; 667 } 668 669 void checkReference(ref string s) // rule 67 670 { 671 mixin Check!("Reference"); 672 673 try 674 { 675 dchar c; 676 if (s.startsWith("&#")) checkCharRef(s,c); 677 else checkEntityRef(s); 678 } 679 catch(Err e) { fail(e); } 680 } 681 682 void checkEntityRef(ref string s) // rule 68 683 { 684 mixin Check!("EntityRef"); 685 686 try 687 { 688 string name; 689 checkLiteral("&",s); 690 checkName(s,name); 691 checkLiteral(";",s); 692 } 693 catch(Err e) { fail(e); } 694 } 695 696 void checkEncName(ref string s) // rule 81 697 { 698 mixin Check!("EncName"); 699 700 munch(s,"a-zA-Z"); 701 if (s is old) fail(); 702 munch(s,"a-zA-Z0-9_.-"); 703 } 704 705 void checkEncodingDecl(ref string s) // rule 80 706 { 707 mixin Check!("EncodingDecl"); 708 709 try 710 { 711 checkSpace(s); 712 checkLiteral("encoding",s); 713 checkEq(s); 714 quoted!(checkEncName)(s); 715 } 716 catch(Err e) { fail(e); } 717 } 718 719 // Helper functions 720 721 void checkLiteral(string literal,ref string s) 722 { 723 mixin Check!("Literal"); 724 725 if (!s.startsWith(literal)) fail("Expected literal \""~literal~"\""); 726 s = s[literal.length..$]; 727 } 728 729 void checkEnd(string end,ref string s) 730 { 731 // Deliberately no mixin Check here. 732 733 auto n = s.indexOf(end); 734 if (n == -1) throw new Err(s,"Unable to find terminating \""~end~"\""); 735 s = s[n..$]; 736 checkLiteral(end,s); 737 } 738 739 // Metafunctions -- none of these use mixin Check 740 741 void opt(alias f)(ref string s) 742 { 743 try { f(s); } catch(Err e) {} 744 } 745 746 void plus(alias f)(ref string s) 747 { 748 f(s); 749 star!(f)(s); 750 } 751 752 void star(alias f)(ref string s) 753 { 754 while (s.length != 0) 755 { 756 try { f(s); } 757 catch(Err e) { return; } 758 } 759 } 760 761 void quoted(alias f)(ref string s) 762 { 763 if (s.startsWith("'")) 764 { 765 checkLiteral("'",s); 766 f(s); 767 checkLiteral("'",s); 768 } 769 else 770 { 771 checkLiteral("\"",s); 772 f(s); 773 checkLiteral("\"",s); 774 } 775 } 776 777 void seq(alias f,alias g)(ref string s) 778 { 779 f(s); 780 g(s); 781 } 782 } 783 784 /** 785 * Check an entire XML document for well-formedness 786 * 787 * Params: 788 * s = the document to be checked, passed as a string 789 * 790 * Throws: CheckException if the document is not well formed 791 * 792 * CheckException's toString() method will yield the complete hierarchy of 793 * parse failure (the XML equivalent of a stack trace), giving the line and 794 * column number of every failure at every level. 795 */ 796 void check(string s) 797 { 798 try 799 { 800 checkChars(s); 801 checkDocument(s); 802 if (s.length != 0) throw new Err(s,"Junk found after document"); 803 } 804 catch(Err e) 805 { 806 e.complete(s); 807 throw e; 808 } 809 } 810 811 class CheckException : XMLException 812 { 813 CheckException err; /// Parent in hierarchy 814 private string tail; 815 /** 816 * Name of production rule which failed to parse, 817 * or specific error message 818 */ 819 string msg; 820 size_t line = 0; /// Line number at which parse failure occurred 821 size_t column = 0; /// Column number at which parse failure occurred 822 823 public this(string tail,string msg,Err err=null) 824 { 825 super(null); 826 this.tail = tail; 827 this.msg = msg; 828 this.err = err; 829 } 830 831 private void complete(string entire) 832 { 833 string head = entire[0..$-tail.length]; 834 ptrdiff_t n = head.lastIndexOf('\n') + 1; 835 line = head.count("\n") + 1; 836 dstring t; 837 transcode(head[n..$],t); 838 column = t.length + 1; 839 if (err !is null) err.complete(entire); 840 } 841 842 override string toString() const 843 { 844 string s; 845 if (line != 0) s = format("Line %d, column %d: ",line,column); 846 s ~= msg; 847 s ~= '\n'; 848 if (err !is null) s = err.toString() ~ s; 849 return s; 850 } 851 } 852 853 public alias Err = CheckException; 854 855 // Private helper functions 856 public 857 { 858 T toType(T)(Object o) 859 { 860 T t = cast(T)(o); 861 if (t is null) 862 { 863 throw new InvalidTypeException("Attempt to compare a " 864 ~ T.stringof ~ " with an instance of another type"); 865 } 866 return t; 867 } 868 869 string chop(ref string s, size_t n) 870 { 871 if (n == -1) n = s.length; 872 string t = s[0..n]; 873 s = s[n..$]; 874 return t; 875 } 876 877 bool optc(ref string s, char c) 878 { 879 bool b = s.length != 0 && s[0] == c; 880 if (b) s = s[1..$]; 881 return b; 882 } 883 884 void reqc(ref string s, char c) 885 { 886 if (s.length == 0 || s[0] != c) throw new TagException(""); 887 s = s[1..$]; 888 } 889 890 size_t hash(string s,size_t h=0) @trusted nothrow 891 { 892 return typeid(s).getHash(&s) + h; 893 } 894 895 // Definitions from the XML specification 896 immutable CharTable=[0x9,0x9,0xA,0xA,0xD,0xD,0x20,0xD7FF,0xE000,0xFFFD, 897 0x10000,0x10FFFF]; 898 immutable BaseCharTable=[0x0041,0x005A,0x0061,0x007A,0x00C0,0x00D6,0x00D8, 899 0x00F6,0x00F8,0x00FF,0x0100,0x0131,0x0134,0x013E,0x0141,0x0148,0x014A, 900 0x017E,0x0180,0x01C3,0x01CD,0x01F0,0x01F4,0x01F5,0x01FA,0x0217,0x0250, 901 0x02A8,0x02BB,0x02C1,0x0386,0x0386,0x0388,0x038A,0x038C,0x038C,0x038E, 902 0x03A1,0x03A3,0x03CE,0x03D0,0x03D6,0x03DA,0x03DA,0x03DC,0x03DC,0x03DE, 903 0x03DE,0x03E0,0x03E0,0x03E2,0x03F3,0x0401,0x040C,0x040E,0x044F,0x0451, 904 0x045C,0x045E,0x0481,0x0490,0x04C4,0x04C7,0x04C8,0x04CB,0x04CC,0x04D0, 905 0x04EB,0x04EE,0x04F5,0x04F8,0x04F9,0x0531,0x0556,0x0559,0x0559,0x0561, 906 0x0586,0x05D0,0x05EA,0x05F0,0x05F2,0x0621,0x063A,0x0641,0x064A,0x0671, 907 0x06B7,0x06BA,0x06BE,0x06C0,0x06CE,0x06D0,0x06D3,0x06D5,0x06D5,0x06E5, 908 0x06E6,0x0905,0x0939,0x093D,0x093D,0x0958,0x0961,0x0985,0x098C,0x098F, 909 0x0990,0x0993,0x09A8,0x09AA,0x09B0,0x09B2,0x09B2,0x09B6,0x09B9,0x09DC, 910 0x09DD,0x09DF,0x09E1,0x09F0,0x09F1,0x0A05,0x0A0A,0x0A0F,0x0A10,0x0A13, 911 0x0A28,0x0A2A,0x0A30,0x0A32,0x0A33,0x0A35,0x0A36,0x0A38,0x0A39,0x0A59, 912 0x0A5C,0x0A5E,0x0A5E,0x0A72,0x0A74,0x0A85,0x0A8B,0x0A8D,0x0A8D,0x0A8F, 913 0x0A91,0x0A93,0x0AA8,0x0AAA,0x0AB0,0x0AB2,0x0AB3,0x0AB5,0x0AB9,0x0ABD, 914 0x0ABD,0x0AE0,0x0AE0,0x0B05,0x0B0C,0x0B0F,0x0B10,0x0B13,0x0B28,0x0B2A, 915 0x0B30,0x0B32,0x0B33,0x0B36,0x0B39,0x0B3D,0x0B3D,0x0B5C,0x0B5D,0x0B5F, 916 0x0B61,0x0B85,0x0B8A,0x0B8E,0x0B90,0x0B92,0x0B95,0x0B99,0x0B9A,0x0B9C, 917 0x0B9C,0x0B9E,0x0B9F,0x0BA3,0x0BA4,0x0BA8,0x0BAA,0x0BAE,0x0BB5,0x0BB7, 918 0x0BB9,0x0C05,0x0C0C,0x0C0E,0x0C10,0x0C12,0x0C28,0x0C2A,0x0C33,0x0C35, 919 0x0C39,0x0C60,0x0C61,0x0C85,0x0C8C,0x0C8E,0x0C90,0x0C92,0x0CA8,0x0CAA, 920 0x0CB3,0x0CB5,0x0CB9,0x0CDE,0x0CDE,0x0CE0,0x0CE1,0x0D05,0x0D0C,0x0D0E, 921 0x0D10,0x0D12,0x0D28,0x0D2A,0x0D39,0x0D60,0x0D61,0x0E01,0x0E2E,0x0E30, 922 0x0E30,0x0E32,0x0E33,0x0E40,0x0E45,0x0E81,0x0E82,0x0E84,0x0E84,0x0E87, 923 0x0E88,0x0E8A,0x0E8A,0x0E8D,0x0E8D,0x0E94,0x0E97,0x0E99,0x0E9F,0x0EA1, 924 0x0EA3,0x0EA5,0x0EA5,0x0EA7,0x0EA7,0x0EAA,0x0EAB,0x0EAD,0x0EAE,0x0EB0, 925 0x0EB0,0x0EB2,0x0EB3,0x0EBD,0x0EBD,0x0EC0,0x0EC4,0x0F40,0x0F47,0x0F49, 926 0x0F69,0x10A0,0x10C5,0x10D0,0x10F6,0x1100,0x1100,0x1102,0x1103,0x1105, 927 0x1107,0x1109,0x1109,0x110B,0x110C,0x110E,0x1112,0x113C,0x113C,0x113E, 928 0x113E,0x1140,0x1140,0x114C,0x114C,0x114E,0x114E,0x1150,0x1150,0x1154, 929 0x1155,0x1159,0x1159,0x115F,0x1161,0x1163,0x1163,0x1165,0x1165,0x1167, 930 0x1167,0x1169,0x1169,0x116D,0x116E,0x1172,0x1173,0x1175,0x1175,0x119E, 931 0x119E,0x11A8,0x11A8,0x11AB,0x11AB,0x11AE,0x11AF,0x11B7,0x11B8,0x11BA, 932 0x11BA,0x11BC,0x11C2,0x11EB,0x11EB,0x11F0,0x11F0,0x11F9,0x11F9,0x1E00, 933 0x1E9B,0x1EA0,0x1EF9,0x1F00,0x1F15,0x1F18,0x1F1D,0x1F20,0x1F45,0x1F48, 934 0x1F4D,0x1F50,0x1F57,0x1F59,0x1F59,0x1F5B,0x1F5B,0x1F5D,0x1F5D,0x1F5F, 935 0x1F7D,0x1F80,0x1FB4,0x1FB6,0x1FBC,0x1FBE,0x1FBE,0x1FC2,0x1FC4,0x1FC6, 936 0x1FCC,0x1FD0,0x1FD3,0x1FD6,0x1FDB,0x1FE0,0x1FEC,0x1FF2,0x1FF4,0x1FF6, 937 0x1FFC,0x2126,0x2126,0x212A,0x212B,0x212E,0x212E,0x2180,0x2182,0x3041, 938 0x3094,0x30A1,0x30FA,0x3105,0x312C,0xAC00,0xD7A3]; 939 immutable IdeographicTable=[0x3007,0x3007,0x3021,0x3029,0x4E00,0x9FA5]; 940 immutable CombiningCharTable=[0x0300,0x0345,0x0360,0x0361,0x0483,0x0486, 941 0x0591,0x05A1,0x05A3,0x05B9,0x05BB,0x05BD,0x05BF,0x05BF,0x05C1,0x05C2, 942 0x05C4,0x05C4,0x064B,0x0652,0x0670,0x0670,0x06D6,0x06DC,0x06DD,0x06DF, 943 0x06E0,0x06E4,0x06E7,0x06E8,0x06EA,0x06ED,0x0901,0x0903,0x093C,0x093C, 944 0x093E,0x094C,0x094D,0x094D,0x0951,0x0954,0x0962,0x0963,0x0981,0x0983, 945 0x09BC,0x09BC,0x09BE,0x09BE,0x09BF,0x09BF,0x09C0,0x09C4,0x09C7,0x09C8, 946 0x09CB,0x09CD,0x09D7,0x09D7,0x09E2,0x09E3,0x0A02,0x0A02,0x0A3C,0x0A3C, 947 0x0A3E,0x0A3E,0x0A3F,0x0A3F,0x0A40,0x0A42,0x0A47,0x0A48,0x0A4B,0x0A4D, 948 0x0A70,0x0A71,0x0A81,0x0A83,0x0ABC,0x0ABC,0x0ABE,0x0AC5,0x0AC7,0x0AC9, 949 0x0ACB,0x0ACD,0x0B01,0x0B03,0x0B3C,0x0B3C,0x0B3E,0x0B43,0x0B47,0x0B48, 950 0x0B4B,0x0B4D,0x0B56,0x0B57,0x0B82,0x0B83,0x0BBE,0x0BC2,0x0BC6,0x0BC8, 951 0x0BCA,0x0BCD,0x0BD7,0x0BD7,0x0C01,0x0C03,0x0C3E,0x0C44,0x0C46,0x0C48, 952 0x0C4A,0x0C4D,0x0C55,0x0C56,0x0C82,0x0C83,0x0CBE,0x0CC4,0x0CC6,0x0CC8, 953 0x0CCA,0x0CCD,0x0CD5,0x0CD6,0x0D02,0x0D03,0x0D3E,0x0D43,0x0D46,0x0D48, 954 0x0D4A,0x0D4D,0x0D57,0x0D57,0x0E31,0x0E31,0x0E34,0x0E3A,0x0E47,0x0E4E, 955 0x0EB1,0x0EB1,0x0EB4,0x0EB9,0x0EBB,0x0EBC,0x0EC8,0x0ECD,0x0F18,0x0F19, 956 0x0F35,0x0F35,0x0F37,0x0F37,0x0F39,0x0F39,0x0F3E,0x0F3E,0x0F3F,0x0F3F, 957 0x0F71,0x0F84,0x0F86,0x0F8B,0x0F90,0x0F95,0x0F97,0x0F97,0x0F99,0x0FAD, 958 0x0FB1,0x0FB7,0x0FB9,0x0FB9,0x20D0,0x20DC,0x20E1,0x20E1,0x302A,0x302F, 959 0x3099,0x3099,0x309A,0x309A]; 960 immutable DigitTable=[0x0030,0x0039,0x0660,0x0669,0x06F0,0x06F9,0x0966, 961 0x096F,0x09E6,0x09EF,0x0A66,0x0A6F,0x0AE6,0x0AEF,0x0B66,0x0B6F,0x0BE7, 962 0x0BEF,0x0C66,0x0C6F,0x0CE6,0x0CEF,0x0D66,0x0D6F,0x0E50,0x0E59,0x0ED0, 963 0x0ED9,0x0F20,0x0F29]; 964 immutable ExtenderTable=[0x00B7,0x00B7,0x02D0,0x02D0,0x02D1,0x02D1,0x0387, 965 0x0387,0x0640,0x0640,0x0E46,0x0E46,0x0EC6,0x0EC6,0x3005,0x3005,0x3031, 966 0x3035,0x309D,0x309E,0x30FC,0x30FE]; 967 968 bool lookup(const(int)[] table, int c) 969 { 970 while (table.length != 0) 971 { 972 auto m = (table.length >> 1) & ~1; 973 if (c < table[m]) 974 { 975 table = table[0..m]; 976 } 977 else if (c > table[m+1]) 978 { 979 table = table[m+2..$]; 980 } 981 else return true; 982 } 983 return false; 984 } 985 986 string startOf(string s) 987 { 988 string r; 989 foreach(char c;s) 990 { 991 r ~= (c < 0x20 || c > 0x7F) ? '.' : c; 992 if (r.length >= 40) { r ~= "___"; break; } 993 } 994 return r; 995 } 996 997 void exit(string s=null) 998 { 999 throw new XMLException(s); 1000 } 1001 } 1002