1 module deepmagic.dom.xml.xml;
2 
3 import deepmagic.dom;
4 /+
5 import std.algorithm : count, startsWith;
6 import std.array;
7 import std.ascii;
8 import std.string;
9 import std.encoding;
10 +/
11 enum cdata = "<![CDATA[";
12 
13 bool isChar(dchar c)
14 {
15 	if (c <= 0xD7FF)
16 	{
17 		if (c >= 0x20)
18 			return true;
19 		switch(c)
20 		{
21 		case 0xA:
22 		case 0x9:
23 		case 0xD:
24 			return true;
25 		default:
26 			return false;
27 		}
28 	}
29 	else if (0xE000 <= c && c <= 0x10FFFF)
30 	{
31 		if ((c & 0x1FFFFE) != 0xFFFE) // U+FFFE and U+FFFF
32 			return true;
33 	}
34 	return false;
35 }
36 
37 bool isSpace(dchar c)
38 {
39 	return c == '\u0020' || c == '\u0009' || c == '\u000A' || c == '\u000D';
40 }
41 
42 bool isDigit(dchar c)
43 {
44 	if (c <= 0x0039 && c >= 0x0030)
45 		return true;
46 	else
47 		return lookup(DigitTable,c);
48 }
49 
50 bool isLetter(dchar c) // rule 84
51 {
52 	return isIdeographic(c) || isBaseChar(c);
53 }
54 
55 bool isIdeographic(dchar c)
56 {
57 	if (c == 0x3007)
58 		return true;
59 	if (c <= 0x3029 && c >= 0x3021 )
60 		return true;
61 	if (c <= 0x9FA5 && c >= 0x4E00)
62 		return true;
63 	return false;
64 }
65 
66 bool isBaseChar(dchar c)
67 {
68 	return lookup(BaseCharTable,c);
69 }
70 
71 bool isCombiningChar(dchar c)
72 {
73 	return lookup(CombiningCharTable,c);
74 }
75 
76 bool isExtender(dchar c)
77 {
78 	return lookup(ExtenderTable,c);
79 }
80 
81 S encode(S)(S s)
82 {
83 	string r;
84 	size_t lastI;
85 	auto result = appender!S();
86 
87 	foreach (i, c; s)
88 	{
89 		switch (c)
90 		{
91 		case '&':  r = "&amp;"; break;
92 		case '"':  r = "&quot;"; break;
93 		case '\'': r = "&apos;"; break;
94 		case '<':  r = "&lt;"; break;
95 		case '>':  r = "&gt;"; break;
96 		default: continue;
97 		}
98 		// Replace with r
99 		result.put(s[lastI .. i]);
100 		result.put(r);
101 		lastI = i + 1;
102 	}
103 
104 	if (!result.data.ptr) return s;
105 	result.put(s[lastI .. $]);
106 	return result.data;
107 }
108 
109 enum DecodeMode
110 {
111 	NONE, LOOSE, STRICT
112 }
113 
114 /**
115  * Decodes a string by unescaping all predefined XML entities.
116  *
117  * encode() escapes certain characters (ampersand, quote, apostrophe, less-than
118  * and greater-than), and similarly, decode() unescapes them. These functions
119  * are provided for convenience only. You do not need to use them when using
120  * the std.xml classes, because then all the encoding and decoding will be done
121  * for you automatically.
122  *
123  * This function decodes the entities &amp;amp;, &amp;quot;, &amp;apos;,
124  * &amp;lt; and &amp;gt,
125  * as well as decimal and hexadecimal entities such as &amp;#x20AC;
126  *
127  * If the string does not contain an ampersand, the original will be returned.
128  *
129  * Note that the "mode" parameter can be one of DecodeMode.NONE (do not
130  * decode), DecodeMode.LOOSE (decode, but ignore errors), or DecodeMode.STRICT
131  * (decode, and throw a DecodeException in the event of an error).
132  *
133  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
134  *
135  * Params:
136  *	  s = The string to be decoded
137  *	  mode = (optional) Mode to use for decoding. (Defaults to LOOSE).
138  *
139  * Throws: DecodeException if mode == DecodeMode.STRICT and decode fails
140  *
141  * Returns: The decoded string
142  *
143  * Examples:
144  * --------------
145  * writefln(decode("a &gt; b")); // writes "a > b"
146  * --------------
147  */
148 string decode(string s, DecodeMode mode=DecodeMode.LOOSE)
149 {
150 	import std.utf : encode;
151 
152 	if (mode == DecodeMode.NONE) return s;
153 
154 	char[] buffer;
155 	foreach (ref i; 0 .. s.length)
156 	{
157 		char c = s[i];
158 		if (c != '&')
159 		{
160 			if (buffer.length != 0) buffer ~= c;
161 		}
162 		else
163 		{
164 			if (buffer.length == 0)
165 			{
166 				buffer = s[0 .. i].dup;
167 			}
168 			if (startsWith(s[i..$],"&#"))
169 			{
170 				try
171 				{
172 					dchar d;
173 					string t = s[i..$];
174 					checkCharRef(t, d);
175 					char[4] temp;
176 					buffer ~= temp[0 .. std.utf.encode(temp, d)];
177 					i = s.length - t.length - 1;
178 				}
179 				catch(Err e)
180 				{
181 					if (mode == DecodeMode.STRICT)
182 						throw new DecodeException("Unescaped &");
183 					buffer ~= '&';
184 				}
185 			}
186 			else if (startsWith(s[i..$],"&amp;" )) { buffer ~= '&';  i += 4; }
187 			else if (startsWith(s[i..$],"&quot;")) { buffer ~= '"';  i += 5; }
188 			else if (startsWith(s[i..$],"&apos;")) { buffer ~= '\''; i += 5; }
189 			else if (startsWith(s[i..$],"&lt;"  )) { buffer ~= '<';  i += 3; }
190 			else if (startsWith(s[i..$],"&gt;"  )) { buffer ~= '>';  i += 3; }
191 			else
192 			{
193 				if (mode == DecodeMode.STRICT)
194 					throw new DecodeException("Unescaped &");
195 				buffer ~= '&';
196 			}
197 		}
198 	}
199 	return (buffer.length == 0) ? s : cast(string)buffer;
200 }
201 
202 unittest
203 {
204 	void assertNot(string s)
205 	{
206 		bool b = false;
207 		try { decode(s,DecodeMode.STRICT); }
208 		catch (DecodeException e) { b = true; }
209 		assert(b,s);
210 	}
211 
212 	// Assert that things that should work, do
213 	auto s = "hello";
214 	assert(decode(s,				DecodeMode.STRICT) is s);
215 	assert(decode("a &gt; b",	   DecodeMode.STRICT) == "a > b");
216 	assert(decode("a &lt; b",	   DecodeMode.STRICT) == "a < b");
217 	assert(decode("don&apos;t",	 DecodeMode.STRICT) == "don't");
218 	assert(decode("&quot;hi&quot;", DecodeMode.STRICT) == "\"hi\"");
219 	assert(decode("cat &amp; dog",  DecodeMode.STRICT) == "cat & dog");
220 	assert(decode("&#42;",		  DecodeMode.STRICT) == "*");
221 	assert(decode("&#x2A;",		 DecodeMode.STRICT) == "*");
222 	assert(decode("cat & dog",	  DecodeMode.LOOSE) == "cat & dog");
223 	assert(decode("a &gt b",		DecodeMode.LOOSE) == "a &gt b");
224 	assert(decode("&#;",			DecodeMode.LOOSE) == "&#;");
225 	assert(decode("&#x;",		   DecodeMode.LOOSE) == "&#x;");
226 	assert(decode("&#2G;",		  DecodeMode.LOOSE) == "&#2G;");
227 	assert(decode("&#x2G;",		 DecodeMode.LOOSE) == "&#x2G;");
228 
229 	// Assert that things that shouldn't work, don't
230 	assertNot("cat & dog");
231 	assertNot("a &gt b");
232 	assertNot("&#;");
233 	assertNot("&#x;");
234 	assertNot("&#2G;");
235 	assertNot("&#x2G;");
236 }
237 
238 
239 
240 
241 
242 
243 
244 
245 
246 
247 
248 
249 
250 
251 public
252 {
253 	template Check(string msg)
254 	{
255 		string old = s;
256 
257 		void fail()
258 		{
259 			s = old;
260 			throw new Err(s,msg);
261 		}
262 
263 		void fail(Err e)
264 		{
265 			s = old;
266 			throw new Err(s,msg,e);
267 		}
268 
269 		void fail(string msg2)
270 		{
271 			fail(new Err(s,msg2));
272 		}
273 	}
274 
275 	void checkMisc(ref string s) // rule 27
276 	{
277 		mixin Check!("Misc");
278 
279 		try
280 		{
281 				 if (s.startsWith("<!--")) { checkComment(s); }
282 			else if (s.startsWith("<?"))   { checkPI(s); }
283 			else						   { checkSpace(s); }
284 		}
285 		catch(Err e) { fail(e); }
286 	}
287 
288 	void checkDocument(ref string s) // rule 1
289 	{
290 		mixin Check!("Document");
291 		try
292 		{
293 			checkProlog(s);
294 			checkElement(s);
295 			star!(checkMisc)(s);
296 		}
297 		catch(Err e) { fail(e); }
298 	}
299 
300 	void checkChars(ref string s) // rule 2
301 	{
302 		// TO DO - Fix std.utf stride and decode functions, then use those
303 		// instead
304 
305 		mixin Check!("Chars");
306 
307 		dchar c;
308 		int n = -1;
309 		foreach(int i,dchar d; s)
310 		{
311 			if (!isChar(d))
312 			{
313 				c = d;
314 				n = i;
315 				break;
316 			}
317 		}
318 		if (n != -1)
319 		{
320 			s = s[n..$];
321 			fail(format("invalid character: U+%04X",c));
322 		}
323 	}
324 
325 	void checkSpace(ref string s) // rule 3
326 	{
327 		mixin Check!("Whitespace");
328 		munch(s,"\u0020\u0009\u000A\u000D");
329 		if (s is old) fail();
330 	}
331 
332 	void checkName(ref string s, out string name) // rule 5
333 	{
334 		mixin Check!("Name");
335 
336 		if (s.length == 0) fail();
337 		int n;
338 		foreach(int i,dchar c;s)
339 		{
340 			if (c == '_' || c == ':' || isLetter(c)) continue;
341 			if (i == 0) fail();
342 			if (c == '-' || c == '.' || isDigit(c)
343 				|| isCombiningChar(c) || isExtender(c)) continue;
344 			n = i;
345 			break;
346 		}
347 		name = s[0..n];
348 		s = s[n..$];
349 	}
350 
351 	void checkAttValue(ref string s) // rule 10
352 	{
353 		mixin Check!("AttValue");
354 
355 		if (s.length == 0) fail();
356 		char c = s[0];
357 		if (c != '\u0022' && c != '\u0027')
358 			fail("attribute value requires quotes");
359 		s = s[1..$];
360 		for(;;)
361 		{
362 			munch(s,"^<&"~c);
363 			if (s.length == 0) fail("unterminated attribute value");
364 			if (s[0] == '<') fail("< found in attribute value");
365 			if (s[0] == c) break;
366 			try { checkReference(s); } catch(Err e) { fail(e); }
367 		}
368 		s = s[1..$];
369 	}
370 
371 	void checkCharData(ref string s) // rule 14
372 	{
373 		mixin Check!("CharData");
374 
375 		while (s.length != 0)
376 		{
377 			if (s.startsWith("&")) break;
378 			if (s.startsWith("<")) break;
379 			if (s.startsWith("]]>")) fail("]]> found within char data");
380 			s = s[1..$];
381 		}
382 	}
383 
384 	void checkComment(ref string s) // rule 15
385 	{
386 		mixin Check!("Comment");
387 
388 		try { checkLiteral("<!--",s); } catch(Err e) { fail(e); }
389 		ptrdiff_t n = s.indexOf("--");
390 		if (n == -1) fail("unterminated comment");
391 		s = s[n..$];
392 		try { checkLiteral("-->",s); } catch(Err e) { fail(e); }
393 	}
394 
395 	void checkPI(ref string s) // rule 16
396 	{
397 		mixin Check!("PI");
398 
399 		try
400 		{
401 			checkLiteral("<?",s);
402 			checkEnd("?>",s);
403 		}
404 		catch(Err e) { fail(e); }
405 	}
406 
407 	void checkCDSect(ref string s) // rule 18
408 	{
409 		mixin Check!("CDSect");
410 
411 		try
412 		{
413 			checkLiteral(cdata,s);
414 			checkEnd("]]>",s);
415 		}
416 		catch(Err e) { fail(e); }
417 	}
418 
419 	void checkProlog(ref string s) // rule 22
420 	{
421 		mixin Check!("Prolog");
422 
423 		try
424 		{
425 			/* The XML declaration is optional
426 			 * http://www.w3.org/TR/2008/REC-xml-20081126/#NT-prolog
427 			 */
428 			opt!(checkXMLDecl)(s);
429 
430 			star!(checkMisc)(s);
431 			opt!(seq!(checkDocTypeDecl,star!(checkMisc)))(s);
432 		}
433 		catch(Err e) { fail(e); }
434 	}
435 
436 	void checkXMLDecl(ref string s) // rule 23
437 	{
438 		mixin Check!("XMLDecl");
439 
440 		try
441 		{
442 			checkLiteral("<?xml",s);
443 			checkVersionInfo(s);
444 			opt!(checkEncodingDecl)(s);
445 			opt!(checkSDDecl)(s);
446 			opt!(checkSpace)(s);
447 			checkLiteral("?>",s);
448 		}
449 		catch(Err e) { fail(e); }
450 	}
451 
452 	void checkVersionInfo(ref string s) // rule 24
453 	{
454 		mixin Check!("VersionInfo");
455 
456 		try
457 		{
458 			checkSpace(s);
459 			checkLiteral("version",s);
460 			checkEq(s);
461 			quoted!(checkVersionNum)(s);
462 		}
463 		catch(Err e) { fail(e); }
464 	}
465 
466 	void checkEq(ref string s) // rule 25
467 	{
468 		mixin Check!("Eq");
469 
470 		try
471 		{
472 			opt!(checkSpace)(s);
473 			checkLiteral("=",s);
474 			opt!(checkSpace)(s);
475 		}
476 		catch(Err e) { fail(e); }
477 	}
478 
479 	void checkVersionNum(ref string s) // rule 26
480 	{
481 		mixin Check!("VersionNum");
482 
483 		munch(s,"a-zA-Z0-9_.:-");
484 		if (s is old) fail();
485 	}
486 
487 	void checkDocTypeDecl(ref string s) // rule 28
488 	{
489 		mixin Check!("DocTypeDecl");
490 
491 		try
492 		{
493 			checkLiteral("<!DOCTYPE",s);
494 			//
495 			// TO DO -- ensure DOCTYPE is well formed
496 			// (But not yet. That's one of our "future directions")
497 			//
498 			checkEnd(">",s);
499 		}
500 		catch(Err e) { fail(e); }
501 	}
502 
503 	void checkSDDecl(ref string s) // rule 32
504 	{
505 		mixin Check!("SDDecl");
506 
507 		try
508 		{
509 			checkSpace(s);
510 			checkLiteral("standalone",s);
511 			checkEq(s);
512 		}
513 		catch(Err e) { fail(e); }
514 
515 		int n = 0;
516 			 if (s.startsWith("'yes'") || s.startsWith("\"yes\"")) n = 5;
517 		else if (s.startsWith("'no'" ) || s.startsWith("\"no\"" )) n = 4;
518 		else fail("standalone attribute value must be 'yes', \"yes\","~
519 			" 'no' or \"no\"");
520 		s = s[n..$];
521 	}
522 
523 	void checkElement(ref string s) // rule 39
524 	{
525 		mixin Check!("Element");
526 
527 		string sname,ename,t;
528 		try { checkTag(s,t,sname); } catch(Err e) { fail(e); }
529 
530 		if (t == "STag")
531 		{
532 			try
533 			{
534 				checkContent(s);
535 				t = s;
536 				checkETag(s,ename);
537 			}
538 			catch(Err e) { fail(e); }
539 
540 			if (sname != ename)
541 			{
542 				s = t;
543 				fail("end tag name \"" ~ ename
544 					~ "\" differs from start tag name \""~sname~"\"");
545 			}
546 		}
547 	}
548 
549 	// rules 40 and 44
550 	void checkTag(ref string s, out string type, out string name)
551 	{
552 		mixin Check!("Tag");
553 
554 		try
555 		{
556 			type = "STag";
557 			checkLiteral("<",s);
558 			checkName(s,name);
559 			star!(seq!(checkSpace,checkAttribute))(s);
560 			opt!(checkSpace)(s);
561 			if (s.length != 0 && s[0] == '/')
562 			{
563 				s = s[1..$];
564 				type = "ETag";
565 			}
566 			checkLiteral(">",s);
567 		}
568 		catch(Err e) { fail(e); }
569 	}
570 
571 	void checkAttribute(ref string s) // rule 41
572 	{
573 		mixin Check!("Attribute");
574 
575 		try
576 		{
577 			string name;
578 			checkName(s,name);
579 			checkEq(s);
580 			checkAttValue(s);
581 		}
582 		catch(Err e) { fail(e); }
583 	}
584 
585 	void checkETag(ref string s, out string name) // rule 42
586 	{
587 		mixin Check!("ETag");
588 
589 		try
590 		{
591 			checkLiteral("</",s);
592 			checkName(s,name);
593 			opt!(checkSpace)(s);
594 			checkLiteral(">",s);
595 		}
596 		catch(Err e) { fail(e); }
597 	}
598 
599 	void checkContent(ref string s) // rule 43
600 	{
601 		mixin Check!("Content");
602 
603 		try
604 		{
605 			while (s.length != 0)
606 			{
607 				old = s;
608 					 if (s.startsWith("&"))		{ checkReference(s); }
609 				else if (s.startsWith("<!--"))	 { checkComment(s); }
610 				else if (s.startsWith("<?"))	   { checkPI(s); }
611 				else if (s.startsWith(cdata)) { checkCDSect(s); }
612 				else if (s.startsWith("</"))	   { break; }
613 				else if (s.startsWith("<"))		{ checkElement(s); }
614 				else							   { checkCharData(s); }
615 			}
616 		}
617 		catch(Err e) { fail(e); }
618 	}
619 
620 	void checkCharRef(ref string s, out dchar c) // rule 66
621 	{
622 		mixin Check!("CharRef");
623 
624 		c = 0;
625 		try { checkLiteral("&#",s); } catch(Err e) { fail(e); }
626 		int radix = 10;
627 		if (s.length != 0 && s[0] == 'x')
628 		{
629 			s = s[1..$];
630 			radix = 16;
631 		}
632 		if (s.length == 0) fail("unterminated character reference");
633 		if (s[0] == ';')
634 			fail("character reference must have at least one digit");
635 		while (s.length != 0)
636 		{
637 			char d = s[0];
638 			int n = 0;
639 			switch(d)
640 			{
641 				case 'F','f': ++n;	  goto case;
642 				case 'E','e': ++n;	  goto case;
643 				case 'D','d': ++n;	  goto case;
644 				case 'C','c': ++n;	  goto case;
645 				case 'B','b': ++n;	  goto case;
646 				case 'A','a': ++n;	  goto case;
647 				case '9':	 ++n;	  goto case;
648 				case '8':	 ++n;	  goto case;
649 				case '7':	 ++n;	  goto case;
650 				case '6':	 ++n;	  goto case;
651 				case '5':	 ++n;	  goto case;
652 				case '4':	 ++n;	  goto case;
653 				case '3':	 ++n;	  goto case;
654 				case '2':	 ++n;	  goto case;
655 				case '1':	 ++n;	  goto case;
656 				case '0':	 break;
657 				default: n = 100; break;
658 			}
659 			if (n >= radix) break;
660 			c *= radix;
661 			c += n;
662 			s = s[1..$];
663 		}
664 		if (!isChar(c)) fail(format("U+%04X is not a legal character",c));
665 		if (s.length == 0 || s[0] != ';') fail("expected ;");
666 		else s = s[1..$];
667 	}
668 
669 	void checkReference(ref string s) // rule 67
670 	{
671 		mixin Check!("Reference");
672 
673 		try
674 		{
675 			dchar c;
676 			if (s.startsWith("&#")) checkCharRef(s,c);
677 			else checkEntityRef(s);
678 		}
679 		catch(Err e) { fail(e); }
680 	}
681 
682 	void checkEntityRef(ref string s) // rule 68
683 	{
684 		mixin Check!("EntityRef");
685 
686 		try
687 		{
688 			string name;
689 			checkLiteral("&",s);
690 			checkName(s,name);
691 			checkLiteral(";",s);
692 		}
693 		catch(Err e) { fail(e); }
694 	}
695 
696 	void checkEncName(ref string s) // rule 81
697 	{
698 		mixin Check!("EncName");
699 
700 		munch(s,"a-zA-Z");
701 		if (s is old) fail();
702 		munch(s,"a-zA-Z0-9_.-");
703 	}
704 
705 	void checkEncodingDecl(ref string s) // rule 80
706 	{
707 		mixin Check!("EncodingDecl");
708 
709 		try
710 		{
711 			checkSpace(s);
712 			checkLiteral("encoding",s);
713 			checkEq(s);
714 			quoted!(checkEncName)(s);
715 		}
716 		catch(Err e) { fail(e); }
717 	}
718 
719 	// Helper functions
720 
721 	void checkLiteral(string literal,ref string s)
722 	{
723 		mixin Check!("Literal");
724 
725 		if (!s.startsWith(literal)) fail("Expected literal \""~literal~"\"");
726 		s = s[literal.length..$];
727 	}
728 
729 	void checkEnd(string end,ref string s)
730 	{
731 		// Deliberately no mixin Check here.
732 
733 		auto n = s.indexOf(end);
734 		if (n == -1) throw new Err(s,"Unable to find terminating \""~end~"\"");
735 		s = s[n..$];
736 		checkLiteral(end,s);
737 	}
738 
739 	// Metafunctions -- none of these use mixin Check
740 
741 	void opt(alias f)(ref string s)
742 	{
743 		try { f(s); } catch(Err e) {}
744 	}
745 
746 	void plus(alias f)(ref string s)
747 	{
748 		f(s);
749 		star!(f)(s);
750 	}
751 
752 	void star(alias f)(ref string s)
753 	{
754 		while (s.length != 0)
755 		{
756 			try { f(s); }
757 			catch(Err e) { return; }
758 		}
759 	}
760 
761 	void quoted(alias f)(ref string s)
762 	{
763 		if (s.startsWith("'"))
764 		{
765 			checkLiteral("'",s);
766 			f(s);
767 			checkLiteral("'",s);
768 		}
769 		else
770 		{
771 			checkLiteral("\"",s);
772 			f(s);
773 			checkLiteral("\"",s);
774 		}
775 	}
776 
777 	void seq(alias f,alias g)(ref string s)
778 	{
779 		f(s);
780 		g(s);
781 	}
782 }
783 
784 /**
785  * Check an entire XML document for well-formedness
786  *
787  * Params:
788  *	  s = the document to be checked, passed as a string
789  *
790  * Throws: CheckException if the document is not well formed
791  *
792  * CheckException's toString() method will yield the complete hierarchy of
793  * parse failure (the XML equivalent of a stack trace), giving the line and
794  * column number of every failure at every level.
795  */
796 void check(string s)
797 {
798 	try
799 	{
800 		checkChars(s);
801 		checkDocument(s);
802 		if (s.length != 0) throw new Err(s,"Junk found after document");
803 	}
804 	catch(Err e)
805 	{
806 		e.complete(s);
807 		throw e;
808 	}
809 }
810 
811 class CheckException : XMLException
812 {
813 	CheckException err; /// Parent in hierarchy
814 	private string tail;
815 	/**
816 	 * Name of production rule which failed to parse,
817 	 * or specific error message
818 	 */
819 	string msg;
820 	size_t line = 0; /// Line number at which parse failure occurred
821 	size_t column = 0; /// Column number at which parse failure occurred
822 
823 	public this(string tail,string msg,Err err=null)
824 	{
825 		super(null);
826 		this.tail = tail;
827 		this.msg = msg;
828 		this.err = err;
829 	}
830 
831 	private void complete(string entire)
832 	{
833 		string head = entire[0..$-tail.length];
834 		ptrdiff_t n = head.lastIndexOf('\n') + 1;
835 		line = head.count("\n") + 1;
836 		dstring t;
837 		transcode(head[n..$],t);
838 		column = t.length + 1;
839 		if (err !is null) err.complete(entire);
840 	}
841 
842 	override string toString() const
843 	{
844 		string s;
845 		if (line != 0) s = format("Line %d, column %d: ",line,column);
846 		s ~= msg;
847 		s ~= '\n';
848 		if (err !is null) s = err.toString() ~ s;
849 		return s;
850 	}
851 }
852 
853 public alias Err = CheckException;
854 
855 // Private helper functions
856 public
857 {
858 	T toType(T)(Object o)
859 	{
860 		T t = cast(T)(o);
861 		if (t is null)
862 		{
863 			throw new InvalidTypeException("Attempt to compare a "
864 				~ T.stringof ~ " with an instance of another type");
865 		}
866 		return t;
867 	}
868 
869 	string chop(ref string s, size_t n)
870 	{
871 		if (n == -1) n = s.length;
872 		string t = s[0..n];
873 		s = s[n..$];
874 		return t;
875 	}
876 
877 	bool optc(ref string s, char c)
878 	{
879 		bool b = s.length != 0 && s[0] == c;
880 		if (b) s = s[1..$];
881 		return b;
882 	}
883 
884 	void reqc(ref string s, char c)
885 	{
886 		if (s.length == 0 || s[0] != c) throw new TagException("");
887 		s = s[1..$];
888 	}
889 
890 	size_t hash(string s,size_t h=0) @trusted nothrow
891 	{
892 		return typeid(s).getHash(&s) + h;
893 	}
894 
895 	// Definitions from the XML specification
896 	immutable CharTable=[0x9,0x9,0xA,0xA,0xD,0xD,0x20,0xD7FF,0xE000,0xFFFD,
897 		0x10000,0x10FFFF];
898 	immutable BaseCharTable=[0x0041,0x005A,0x0061,0x007A,0x00C0,0x00D6,0x00D8,
899 		0x00F6,0x00F8,0x00FF,0x0100,0x0131,0x0134,0x013E,0x0141,0x0148,0x014A,
900 		0x017E,0x0180,0x01C3,0x01CD,0x01F0,0x01F4,0x01F5,0x01FA,0x0217,0x0250,
901 		0x02A8,0x02BB,0x02C1,0x0386,0x0386,0x0388,0x038A,0x038C,0x038C,0x038E,
902 		0x03A1,0x03A3,0x03CE,0x03D0,0x03D6,0x03DA,0x03DA,0x03DC,0x03DC,0x03DE,
903 		0x03DE,0x03E0,0x03E0,0x03E2,0x03F3,0x0401,0x040C,0x040E,0x044F,0x0451,
904 		0x045C,0x045E,0x0481,0x0490,0x04C4,0x04C7,0x04C8,0x04CB,0x04CC,0x04D0,
905 		0x04EB,0x04EE,0x04F5,0x04F8,0x04F9,0x0531,0x0556,0x0559,0x0559,0x0561,
906 		0x0586,0x05D0,0x05EA,0x05F0,0x05F2,0x0621,0x063A,0x0641,0x064A,0x0671,
907 		0x06B7,0x06BA,0x06BE,0x06C0,0x06CE,0x06D0,0x06D3,0x06D5,0x06D5,0x06E5,
908 		0x06E6,0x0905,0x0939,0x093D,0x093D,0x0958,0x0961,0x0985,0x098C,0x098F,
909 		0x0990,0x0993,0x09A8,0x09AA,0x09B0,0x09B2,0x09B2,0x09B6,0x09B9,0x09DC,
910 		0x09DD,0x09DF,0x09E1,0x09F0,0x09F1,0x0A05,0x0A0A,0x0A0F,0x0A10,0x0A13,
911 		0x0A28,0x0A2A,0x0A30,0x0A32,0x0A33,0x0A35,0x0A36,0x0A38,0x0A39,0x0A59,
912 		0x0A5C,0x0A5E,0x0A5E,0x0A72,0x0A74,0x0A85,0x0A8B,0x0A8D,0x0A8D,0x0A8F,
913 		0x0A91,0x0A93,0x0AA8,0x0AAA,0x0AB0,0x0AB2,0x0AB3,0x0AB5,0x0AB9,0x0ABD,
914 		0x0ABD,0x0AE0,0x0AE0,0x0B05,0x0B0C,0x0B0F,0x0B10,0x0B13,0x0B28,0x0B2A,
915 		0x0B30,0x0B32,0x0B33,0x0B36,0x0B39,0x0B3D,0x0B3D,0x0B5C,0x0B5D,0x0B5F,
916 		0x0B61,0x0B85,0x0B8A,0x0B8E,0x0B90,0x0B92,0x0B95,0x0B99,0x0B9A,0x0B9C,
917 		0x0B9C,0x0B9E,0x0B9F,0x0BA3,0x0BA4,0x0BA8,0x0BAA,0x0BAE,0x0BB5,0x0BB7,
918 		0x0BB9,0x0C05,0x0C0C,0x0C0E,0x0C10,0x0C12,0x0C28,0x0C2A,0x0C33,0x0C35,
919 		0x0C39,0x0C60,0x0C61,0x0C85,0x0C8C,0x0C8E,0x0C90,0x0C92,0x0CA8,0x0CAA,
920 		0x0CB3,0x0CB5,0x0CB9,0x0CDE,0x0CDE,0x0CE0,0x0CE1,0x0D05,0x0D0C,0x0D0E,
921 		0x0D10,0x0D12,0x0D28,0x0D2A,0x0D39,0x0D60,0x0D61,0x0E01,0x0E2E,0x0E30,
922 		0x0E30,0x0E32,0x0E33,0x0E40,0x0E45,0x0E81,0x0E82,0x0E84,0x0E84,0x0E87,
923 		0x0E88,0x0E8A,0x0E8A,0x0E8D,0x0E8D,0x0E94,0x0E97,0x0E99,0x0E9F,0x0EA1,
924 		0x0EA3,0x0EA5,0x0EA5,0x0EA7,0x0EA7,0x0EAA,0x0EAB,0x0EAD,0x0EAE,0x0EB0,
925 		0x0EB0,0x0EB2,0x0EB3,0x0EBD,0x0EBD,0x0EC0,0x0EC4,0x0F40,0x0F47,0x0F49,
926 		0x0F69,0x10A0,0x10C5,0x10D0,0x10F6,0x1100,0x1100,0x1102,0x1103,0x1105,
927 		0x1107,0x1109,0x1109,0x110B,0x110C,0x110E,0x1112,0x113C,0x113C,0x113E,
928 		0x113E,0x1140,0x1140,0x114C,0x114C,0x114E,0x114E,0x1150,0x1150,0x1154,
929 		0x1155,0x1159,0x1159,0x115F,0x1161,0x1163,0x1163,0x1165,0x1165,0x1167,
930 		0x1167,0x1169,0x1169,0x116D,0x116E,0x1172,0x1173,0x1175,0x1175,0x119E,
931 		0x119E,0x11A8,0x11A8,0x11AB,0x11AB,0x11AE,0x11AF,0x11B7,0x11B8,0x11BA,
932 		0x11BA,0x11BC,0x11C2,0x11EB,0x11EB,0x11F0,0x11F0,0x11F9,0x11F9,0x1E00,
933 		0x1E9B,0x1EA0,0x1EF9,0x1F00,0x1F15,0x1F18,0x1F1D,0x1F20,0x1F45,0x1F48,
934 		0x1F4D,0x1F50,0x1F57,0x1F59,0x1F59,0x1F5B,0x1F5B,0x1F5D,0x1F5D,0x1F5F,
935 		0x1F7D,0x1F80,0x1FB4,0x1FB6,0x1FBC,0x1FBE,0x1FBE,0x1FC2,0x1FC4,0x1FC6,
936 		0x1FCC,0x1FD0,0x1FD3,0x1FD6,0x1FDB,0x1FE0,0x1FEC,0x1FF2,0x1FF4,0x1FF6,
937 		0x1FFC,0x2126,0x2126,0x212A,0x212B,0x212E,0x212E,0x2180,0x2182,0x3041,
938 		0x3094,0x30A1,0x30FA,0x3105,0x312C,0xAC00,0xD7A3];
939 	immutable IdeographicTable=[0x3007,0x3007,0x3021,0x3029,0x4E00,0x9FA5];
940 	immutable CombiningCharTable=[0x0300,0x0345,0x0360,0x0361,0x0483,0x0486,
941 		0x0591,0x05A1,0x05A3,0x05B9,0x05BB,0x05BD,0x05BF,0x05BF,0x05C1,0x05C2,
942 		0x05C4,0x05C4,0x064B,0x0652,0x0670,0x0670,0x06D6,0x06DC,0x06DD,0x06DF,
943 		0x06E0,0x06E4,0x06E7,0x06E8,0x06EA,0x06ED,0x0901,0x0903,0x093C,0x093C,
944 		0x093E,0x094C,0x094D,0x094D,0x0951,0x0954,0x0962,0x0963,0x0981,0x0983,
945 		0x09BC,0x09BC,0x09BE,0x09BE,0x09BF,0x09BF,0x09C0,0x09C4,0x09C7,0x09C8,
946 		0x09CB,0x09CD,0x09D7,0x09D7,0x09E2,0x09E3,0x0A02,0x0A02,0x0A3C,0x0A3C,
947 		0x0A3E,0x0A3E,0x0A3F,0x0A3F,0x0A40,0x0A42,0x0A47,0x0A48,0x0A4B,0x0A4D,
948 		0x0A70,0x0A71,0x0A81,0x0A83,0x0ABC,0x0ABC,0x0ABE,0x0AC5,0x0AC7,0x0AC9,
949 		0x0ACB,0x0ACD,0x0B01,0x0B03,0x0B3C,0x0B3C,0x0B3E,0x0B43,0x0B47,0x0B48,
950 		0x0B4B,0x0B4D,0x0B56,0x0B57,0x0B82,0x0B83,0x0BBE,0x0BC2,0x0BC6,0x0BC8,
951 		0x0BCA,0x0BCD,0x0BD7,0x0BD7,0x0C01,0x0C03,0x0C3E,0x0C44,0x0C46,0x0C48,
952 		0x0C4A,0x0C4D,0x0C55,0x0C56,0x0C82,0x0C83,0x0CBE,0x0CC4,0x0CC6,0x0CC8,
953 		0x0CCA,0x0CCD,0x0CD5,0x0CD6,0x0D02,0x0D03,0x0D3E,0x0D43,0x0D46,0x0D48,
954 		0x0D4A,0x0D4D,0x0D57,0x0D57,0x0E31,0x0E31,0x0E34,0x0E3A,0x0E47,0x0E4E,
955 		0x0EB1,0x0EB1,0x0EB4,0x0EB9,0x0EBB,0x0EBC,0x0EC8,0x0ECD,0x0F18,0x0F19,
956 		0x0F35,0x0F35,0x0F37,0x0F37,0x0F39,0x0F39,0x0F3E,0x0F3E,0x0F3F,0x0F3F,
957 		0x0F71,0x0F84,0x0F86,0x0F8B,0x0F90,0x0F95,0x0F97,0x0F97,0x0F99,0x0FAD,
958 		0x0FB1,0x0FB7,0x0FB9,0x0FB9,0x20D0,0x20DC,0x20E1,0x20E1,0x302A,0x302F,
959 		0x3099,0x3099,0x309A,0x309A];
960 	immutable DigitTable=[0x0030,0x0039,0x0660,0x0669,0x06F0,0x06F9,0x0966,
961 		0x096F,0x09E6,0x09EF,0x0A66,0x0A6F,0x0AE6,0x0AEF,0x0B66,0x0B6F,0x0BE7,
962 		0x0BEF,0x0C66,0x0C6F,0x0CE6,0x0CEF,0x0D66,0x0D6F,0x0E50,0x0E59,0x0ED0,
963 		0x0ED9,0x0F20,0x0F29];
964 	immutable ExtenderTable=[0x00B7,0x00B7,0x02D0,0x02D0,0x02D1,0x02D1,0x0387,
965 		0x0387,0x0640,0x0640,0x0E46,0x0E46,0x0EC6,0x0EC6,0x3005,0x3005,0x3031,
966 		0x3035,0x309D,0x309E,0x30FC,0x30FE];
967 
968 	bool lookup(const(int)[] table, int c)
969 	{
970 		while (table.length != 0)
971 		{
972 			auto m = (table.length >> 1) & ~1;
973 			if (c < table[m])
974 			{
975 				table = table[0..m];
976 			}
977 			else if (c > table[m+1])
978 			{
979 				table = table[m+2..$];
980 			}
981 			else return true;
982 		}
983 		return false;
984 	}
985 
986 	string startOf(string s)
987 	{
988 		string r;
989 		foreach(char c;s)
990 		{
991 			r ~= (c < 0x20 || c > 0x7F) ? '.' : c;
992 			if (r.length >= 40) { r ~= "___"; break; }
993 		}
994 		return r;
995 	}
996 
997 	void exit(string s=null)
998 	{
999 		throw new XMLException(s);
1000 	}
1001 }
1002