1 module deepmagic.dom.xml.element_parser;
2 
3 import deepmagic.dom;
4 
5 class ElementParser
6 {
7 	alias Handler = void delegate(string);
8 	alias ElementHandler = void delegate(in Element element);
9 	alias ParserHandler = void delegate(ElementParser parser);
10 
11 	public
12 	{
13 		Tag tag_;
14 		string elementStart;
15 		string* s;
16 
17 		Handler commentHandler = null;
18 		Handler cdataHandler = null;
19 		Handler xiHandler = null;
20 		Handler piHandler = null;
21 		Handler rawTextHandler = null;
22 		Handler textHandler = null;
23 
24 		// Private constructor for start tags
25 		this(ElementParser parent)
26 		{
27 			s = parent.s;
28 			this();
29 			tag_ = parent.tag_;
30 		}
31 
32 		// Private constructor for empty tags
33 		this(Tag tag, string* t)
34 		{
35 			s = t;
36 			this();
37 			tag_ = tag;
38 		}
39 	}
40 
41 	/**
42 	 * The Tag at the start of the element being parsed. You can read this to
43 	 * determine the tag's name and attributes.
44 	 */
45 	@property const(Tag) tag() const { return tag_; }
46 
47 	/**
48 	 * Register a handler which will be called whenever a start tag is
49 	 * encountered which matches the specified name. You can also pass null as
50 	 * the name, in which case the handler will be called for any unmatched
51 	 * start tag.
52 	 *
53 	 * Examples:
54 	 * --------------
55 	 * // Call this function whenever a <podcast> start tag is encountered
56 	 * onStartTag["podcast"] = (ElementParser xml)
57 	 * {
58 	 *	 // Your code here
59 	 *	 //
60 	 *	 // This is a a closure, so code here may reference
61 	 *	 // variables which are outside of this scope
62 	 * };
63 	 *
64 	 * // call myEpisodeStartHandler (defined elsewhere) whenever an <episode>
65 	 * // start tag is encountered
66 	 * onStartTag["episode"] = &myEpisodeStartHandler;
67 	 *
68 	 * // call delegate dg for all other start tags
69 	 * onStartTag[null] = dg;
70 	 * --------------
71 	 *
72 	 * This library will supply your function with a new instance of
73 	 * ElementHandler, which may be used to parse inside the element whose
74 	 * start tag was just found, or to identify the tag attributes of the
75 	 * element, etc.
76 	 *
77 	 * Note that your function will be called for both start tags and empty
78 	 * tags. That is, we make no distinction between &lt;br&gt;&lt;/br&gt;
79 	 * and &lt;br/&gt;.
80 	 */
81 	ParserHandler[string] onStartTag;
82 
83 	/**
84 	 * Register a handler which will be called whenever an end tag is
85 	 * encountered which matches the specified name. You can also pass null as
86 	 * the name, in which case the handler will be called for any unmatched
87 	 * end tag.
88 	 *
89 	 * Examples:
90 	 * --------------
91 	 * // Call this function whenever a </podcast> end tag is encountered
92 	 * onEndTag["podcast"] = (in Element e)
93 	 * {
94 	 *	 // Your code here
95 	 *	 //
96 	 *	 // This is a a closure, so code here may reference
97 	 *	 // variables which are outside of this scope
98 	 * };
99 	 *
100 	 * // call myEpisodeEndHandler (defined elsewhere) whenever an </episode>
101 	 * // end tag is encountered
102 	 * onEndTag["episode"] = &myEpisodeEndHandler;
103 	 *
104 	 * // call delegate dg for all other end tags
105 	 * onEndTag[null] = dg;
106 	 * --------------
107 	 *
108 	 * Note that your function will be called for both start tags and empty
109 	 * tags. That is, we make no distinction between &lt;br&gt;&lt;/br&gt;
110 	 * and &lt;br/&gt;.
111 	 */
112 	ElementHandler[string] onEndTag;
113 
114 	protected this()
115 	{
116 		elementStart = *s;
117 	}
118 
119 	/**
120 	 * Register a handler which will be called whenever text is encountered.
121 	 *
122 	 * Examples:
123 	 * --------------
124 	 * // Call this function whenever text is encountered
125 	 * onText = (string s)
126 	 * {
127 	 *	 // Your code here
128 	 *
129 	 *	 // The passed parameter s will have been decoded by the time you see
130 	 *	 // it, and so may contain any character.
131 	 *	 //
132 	 *	 // This is a a closure, so code here may reference
133 	 *	 // variables which are outside of this scope
134 	 * };
135 	 * --------------
136 	 */
137 	@property void onText(Handler handler) { textHandler = handler; }
138 
139 	/**
140 	 * Register an alternative handler which will be called whenever text
141 	 * is encountered. This differs from onText in that onText will decode
142 	 * the text, whereas onTextRaw will not. This allows you to make design
143 	 * choices, since onText will be more accurate, but slower, while
144 	 * onTextRaw will be faster, but less accurate. Of course, you can
145 	 * still call decode() within your handler, if you want, but you'd
146 	 * probably want to use onTextRaw only in circumstances where you
147 	 * know that decoding is unnecessary.
148 	 *
149 	 * Examples:
150 	 * --------------
151 	 * // Call this function whenever text is encountered
152 	 * onText = (string s)
153 	 * {
154 	 *	 // Your code here
155 	 *
156 	 *	 // The passed parameter s will NOT have been decoded.
157 	 *	 //
158 	 *	 // This is a a closure, so code here may reference
159 	 *	 // variables which are outside of this scope
160 	 * };
161 	 * --------------
162 	 */
163 	void onTextRaw(Handler handler) { rawTextHandler = handler; }
164 
165 	/**
166 	 * Register a handler which will be called whenever a character data
167 	 * segment is encountered.
168 	 *
169 	 * Examples:
170 	 * --------------
171 	 * // Call this function whenever a CData section is encountered
172 	 * onCData = (string s)
173 	 * {
174 	 *	 // Your code here
175 	 *
176 	 *	 // The passed parameter s does not include the opening <![CDATA[
177 	 *	 // nor closing ]]>
178 	 *	 //
179 	 *	 // This is a a closure, so code here may reference
180 	 *	 // variables which are outside of this scope
181 	 * };
182 	 * --------------
183 	 */
184 	@property void onCData(Handler handler) { cdataHandler = handler; }
185 
186 	/**
187 	 * Register a handler which will be called whenever a comment is
188 	 * encountered.
189 	 *
190 	 * Examples:
191 	 * --------------
192 	 * // Call this function whenever a comment is encountered
193 	 * onComment = (string s)
194 	 * {
195 	 *	 // Your code here
196 	 *
197 	 *	 // The passed parameter s does not include the opening <!-- nor
198 	 *	 // closing -->
199 	 *	 //
200 	 *	 // This is a a closure, so code here may reference
201 	 *	 // variables which are outside of this scope
202 	 * };
203 	 * --------------
204 	 */
205 	@property void onComment(Handler handler) { commentHandler = handler; }
206 
207 	/**
208 	 * Register a handler which will be called whenever a processing
209 	 * instruction is encountered.
210 	 *
211 	 * Examples:
212 	 * --------------
213 	 * // Call this function whenever a processing instruction is encountered
214 	 * onPI = (string s)
215 	 * {
216 	 *	 // Your code here
217 	 *
218 	 *	 // The passed parameter s does not include the opening <? nor
219 	 *	 // closing ?>
220 	 *	 //
221 	 *	 // This is a a closure, so code here may reference
222 	 *	 // variables which are outside of this scope
223 	 * };
224 	 * --------------
225 	 */
226 	@property void onPI(Handler handler) { piHandler = handler; }
227 
228 	/**
229 	 * Register a handler which will be called whenever an XML instruction is
230 	 * encountered.
231 	 *
232 	 * Examples:
233 	 * --------------
234 	 * // Call this function whenever an XML instruction is encountered
235 	 * // (Note: XML instructions may only occur preceding the root tag of a
236 	 * // document).
237 	 * onPI = (string s)
238 	 * {
239 	 *	 // Your code here
240 	 *
241 	 *	 // The passed parameter s does not include the opening <! nor
242 	 *	 // closing >
243 	 *	 //
244 	 *	 // This is a a closure, so code here may reference
245 	 *	 // variables which are outside of this scope
246 	 * };
247 	 * --------------
248 	 */
249 	@property void onXI(Handler handler) { xiHandler = handler; }
250 
251 	/**
252 	 * Parse an XML element.
253 	 *
254 	 * Parsing will continue until the end of the current element. Any items
255 	 * encountered for which a handler has been registered will invoke that
256 	 * handler.
257 	 *
258 	 * Throws: various kinds of XMLException
259 	 */
260 	void parse()
261 	{
262 		string t;
263 		Tag root = tag_;
264 		Tag[string] startTags;
265 		if (tag_ !is null) startTags[tag_.name] = tag_;
266 
267 		while(s.length != 0)
268 		{
269 			if (startsWith(*s,"<!--"))
270 			{
271 				chop(*s,4);
272 				t = chop(*s,indexOf(*s,"-->"));
273 				if (commentHandler.funcptr !is null) commentHandler(t);
274 				chop(*s,3);
275 			}
276 			else if (startsWith(*s,"<![CDATA["))
277 			{
278 				chop(*s,9);
279 				t = chop(*s,indexOf(*s,"]]>"));
280 				if (cdataHandler.funcptr !is null) cdataHandler(t);
281 				chop(*s,3);
282 			}
283 			else if (startsWith(*s,"<!"))
284 			{
285 				chop(*s,2);
286 				t = chop(*s,indexOf(*s,">"));
287 				if (xiHandler.funcptr !is null) xiHandler(t);
288 				chop(*s,1);
289 			}
290 			else if (startsWith(*s,"<?"))
291 			{
292 				chop(*s,2);
293 				t = chop(*s,indexOf(*s,"?>"));
294 				if (piHandler.funcptr !is null) piHandler(t);
295 				chop(*s,2);
296 			}
297 			else if (startsWith(*s,"<"))
298 			{
299 				tag_ = new Tag(*s,true);
300 				if (root is null)
301 					return; // Return to constructor of derived class
302 
303 				if (tag_.isStart)
304 				{
305 					startTags[tag_.name] = tag_;
306 
307 					auto parser = new ElementParser(this);
308 
309 					auto handler = tag_.name in onStartTag;
310 					if (handler !is null) (*handler)(parser);
311 					else
312 					{
313 						handler = null in onStartTag;
314 						if (handler !is null) (*handler)(parser);
315 					}
316 				}
317 				else if (tag_.isEnd)
318 				{
319 					auto startTag = startTags[tag_.name];
320 					string text;
321 
322 					immutable(char)* p = startTag.tagString.ptr
323 						+ startTag.tagString.length;
324 					immutable(char)* q = tag_.tagString.ptr;
325 					text = decode(p[0..(q-p)], DecodeMode.LOOSE);
326 
327 					auto element = new Element(startTag);
328 					if (text.length != 0) element ~= new Text(text);
329 
330 					auto handler = tag_.name in onEndTag;
331 					if (handler !is null) (*handler)(element);
332 					else
333 					{
334 						handler = null in onEndTag;
335 						if (handler !is null) (*handler)(element);
336 					}
337 
338 					if (tag_.name == root.name) return;
339 				}
340 				else if (tag_.isEmpty)
341 				{
342 					Tag startTag = new Tag(tag_.name);
343 
344 					// FIX by hed010gy, for bug 2979
345 					// http://d.puremagic.com/issues/show_bug.cgi?id=2979
346 					if (tag_.attr.length > 0)
347 						  foreach(tn,tv; tag_.attr) startTag.attr[tn]=tv;
348 					// END FIX
349 
350 					// Handle the pretend start tag
351 					string s2;
352 					auto parser = new ElementParser(startTag,&s2);
353 					auto handler1 = startTag.name in onStartTag;
354 					if (handler1 !is null) (*handler1)(parser);
355 					else
356 					{
357 						handler1 = null in onStartTag;
358 						if (handler1 !is null) (*handler1)(parser);
359 					}
360 
361 					// Handle the pretend end tag
362 					auto element = new Element(startTag);
363 					auto handler2 = tag_.name in onEndTag;
364 					if (handler2 !is null) (*handler2)(element);
365 					else
366 					{
367 						handler2 = null in onEndTag;
368 						if (handler2 !is null) (*handler2)(element);
369 					}
370 				}
371 			}
372 			else
373 			{
374 				t = chop(*s,indexOf(*s,"<"));
375 				if (rawTextHandler.funcptr !is null)
376 					rawTextHandler(t);
377 				else if (textHandler.funcptr !is null)
378 					textHandler(decode(t,DecodeMode.LOOSE));
379 			}
380 		}
381 	}
382 
383 	/**
384 	 * Returns that part of the element which has already been parsed
385 	 */
386 	override string toString() const
387 	{
388 		assert(elementStart.length >= s.length);
389 		return elementStart[0 .. elementStart.length - s.length];
390 	}
391 
392 }