1 module deepmagic.dom.xml.element_parser; 2 3 import deepmagic.dom; 4 5 class ElementParser 6 { 7 alias Handler = void delegate(string); 8 alias ElementHandler = void delegate(in Element element); 9 alias ParserHandler = void delegate(ElementParser parser); 10 11 public 12 { 13 Tag tag_; 14 string elementStart; 15 string* s; 16 17 Handler commentHandler = null; 18 Handler cdataHandler = null; 19 Handler xiHandler = null; 20 Handler piHandler = null; 21 Handler rawTextHandler = null; 22 Handler textHandler = null; 23 24 // Private constructor for start tags 25 this(ElementParser parent) 26 { 27 s = parent.s; 28 this(); 29 tag_ = parent.tag_; 30 } 31 32 // Private constructor for empty tags 33 this(Tag tag, string* t) 34 { 35 s = t; 36 this(); 37 tag_ = tag; 38 } 39 } 40 41 /** 42 * The Tag at the start of the element being parsed. You can read this to 43 * determine the tag's name and attributes. 44 */ 45 @property const(Tag) tag() const { return tag_; } 46 47 /** 48 * Register a handler which will be called whenever a start tag is 49 * encountered which matches the specified name. You can also pass null as 50 * the name, in which case the handler will be called for any unmatched 51 * start tag. 52 * 53 * Examples: 54 * -------------- 55 * // Call this function whenever a <podcast> start tag is encountered 56 * onStartTag["podcast"] = (ElementParser xml) 57 * { 58 * // Your code here 59 * // 60 * // This is a a closure, so code here may reference 61 * // variables which are outside of this scope 62 * }; 63 * 64 * // call myEpisodeStartHandler (defined elsewhere) whenever an <episode> 65 * // start tag is encountered 66 * onStartTag["episode"] = &myEpisodeStartHandler; 67 * 68 * // call delegate dg for all other start tags 69 * onStartTag[null] = dg; 70 * -------------- 71 * 72 * This library will supply your function with a new instance of 73 * ElementHandler, which may be used to parse inside the element whose 74 * start tag was just found, or to identify the tag attributes of the 75 * element, etc. 76 * 77 * Note that your function will be called for both start tags and empty 78 * tags. That is, we make no distinction between <br></br> 79 * and <br/>. 80 */ 81 ParserHandler[string] onStartTag; 82 83 /** 84 * Register a handler which will be called whenever an end tag is 85 * encountered which matches the specified name. You can also pass null as 86 * the name, in which case the handler will be called for any unmatched 87 * end tag. 88 * 89 * Examples: 90 * -------------- 91 * // Call this function whenever a </podcast> end tag is encountered 92 * onEndTag["podcast"] = (in Element e) 93 * { 94 * // Your code here 95 * // 96 * // This is a a closure, so code here may reference 97 * // variables which are outside of this scope 98 * }; 99 * 100 * // call myEpisodeEndHandler (defined elsewhere) whenever an </episode> 101 * // end tag is encountered 102 * onEndTag["episode"] = &myEpisodeEndHandler; 103 * 104 * // call delegate dg for all other end tags 105 * onEndTag[null] = dg; 106 * -------------- 107 * 108 * Note that your function will be called for both start tags and empty 109 * tags. That is, we make no distinction between <br></br> 110 * and <br/>. 111 */ 112 ElementHandler[string] onEndTag; 113 114 protected this() 115 { 116 elementStart = *s; 117 } 118 119 /** 120 * Register a handler which will be called whenever text is encountered. 121 * 122 * Examples: 123 * -------------- 124 * // Call this function whenever text is encountered 125 * onText = (string s) 126 * { 127 * // Your code here 128 * 129 * // The passed parameter s will have been decoded by the time you see 130 * // it, and so may contain any character. 131 * // 132 * // This is a a closure, so code here may reference 133 * // variables which are outside of this scope 134 * }; 135 * -------------- 136 */ 137 @property void onText(Handler handler) { textHandler = handler; } 138 139 /** 140 * Register an alternative handler which will be called whenever text 141 * is encountered. This differs from onText in that onText will decode 142 * the text, whereas onTextRaw will not. This allows you to make design 143 * choices, since onText will be more accurate, but slower, while 144 * onTextRaw will be faster, but less accurate. Of course, you can 145 * still call decode() within your handler, if you want, but you'd 146 * probably want to use onTextRaw only in circumstances where you 147 * know that decoding is unnecessary. 148 * 149 * Examples: 150 * -------------- 151 * // Call this function whenever text is encountered 152 * onText = (string s) 153 * { 154 * // Your code here 155 * 156 * // The passed parameter s will NOT have been decoded. 157 * // 158 * // This is a a closure, so code here may reference 159 * // variables which are outside of this scope 160 * }; 161 * -------------- 162 */ 163 void onTextRaw(Handler handler) { rawTextHandler = handler; } 164 165 /** 166 * Register a handler which will be called whenever a character data 167 * segment is encountered. 168 * 169 * Examples: 170 * -------------- 171 * // Call this function whenever a CData section is encountered 172 * onCData = (string s) 173 * { 174 * // Your code here 175 * 176 * // The passed parameter s does not include the opening <![CDATA[ 177 * // nor closing ]]> 178 * // 179 * // This is a a closure, so code here may reference 180 * // variables which are outside of this scope 181 * }; 182 * -------------- 183 */ 184 @property void onCData(Handler handler) { cdataHandler = handler; } 185 186 /** 187 * Register a handler which will be called whenever a comment is 188 * encountered. 189 * 190 * Examples: 191 * -------------- 192 * // Call this function whenever a comment is encountered 193 * onComment = (string s) 194 * { 195 * // Your code here 196 * 197 * // The passed parameter s does not include the opening <!-- nor 198 * // closing --> 199 * // 200 * // This is a a closure, so code here may reference 201 * // variables which are outside of this scope 202 * }; 203 * -------------- 204 */ 205 @property void onComment(Handler handler) { commentHandler = handler; } 206 207 /** 208 * Register a handler which will be called whenever a processing 209 * instruction is encountered. 210 * 211 * Examples: 212 * -------------- 213 * // Call this function whenever a processing instruction is encountered 214 * onPI = (string s) 215 * { 216 * // Your code here 217 * 218 * // The passed parameter s does not include the opening <? nor 219 * // closing ?> 220 * // 221 * // This is a a closure, so code here may reference 222 * // variables which are outside of this scope 223 * }; 224 * -------------- 225 */ 226 @property void onPI(Handler handler) { piHandler = handler; } 227 228 /** 229 * Register a handler which will be called whenever an XML instruction is 230 * encountered. 231 * 232 * Examples: 233 * -------------- 234 * // Call this function whenever an XML instruction is encountered 235 * // (Note: XML instructions may only occur preceding the root tag of a 236 * // document). 237 * onPI = (string s) 238 * { 239 * // Your code here 240 * 241 * // The passed parameter s does not include the opening <! nor 242 * // closing > 243 * // 244 * // This is a a closure, so code here may reference 245 * // variables which are outside of this scope 246 * }; 247 * -------------- 248 */ 249 @property void onXI(Handler handler) { xiHandler = handler; } 250 251 /** 252 * Parse an XML element. 253 * 254 * Parsing will continue until the end of the current element. Any items 255 * encountered for which a handler has been registered will invoke that 256 * handler. 257 * 258 * Throws: various kinds of XMLException 259 */ 260 void parse() 261 { 262 string t; 263 Tag root = tag_; 264 Tag[string] startTags; 265 if (tag_ !is null) startTags[tag_.name] = tag_; 266 267 while(s.length != 0) 268 { 269 if (startsWith(*s,"<!--")) 270 { 271 chop(*s,4); 272 t = chop(*s,indexOf(*s,"-->")); 273 if (commentHandler.funcptr !is null) commentHandler(t); 274 chop(*s,3); 275 } 276 else if (startsWith(*s,"<![CDATA[")) 277 { 278 chop(*s,9); 279 t = chop(*s,indexOf(*s,"]]>")); 280 if (cdataHandler.funcptr !is null) cdataHandler(t); 281 chop(*s,3); 282 } 283 else if (startsWith(*s,"<!")) 284 { 285 chop(*s,2); 286 t = chop(*s,indexOf(*s,">")); 287 if (xiHandler.funcptr !is null) xiHandler(t); 288 chop(*s,1); 289 } 290 else if (startsWith(*s,"<?")) 291 { 292 chop(*s,2); 293 t = chop(*s,indexOf(*s,"?>")); 294 if (piHandler.funcptr !is null) piHandler(t); 295 chop(*s,2); 296 } 297 else if (startsWith(*s,"<")) 298 { 299 tag_ = new Tag(*s,true); 300 if (root is null) 301 return; // Return to constructor of derived class 302 303 if (tag_.isStart) 304 { 305 startTags[tag_.name] = tag_; 306 307 auto parser = new ElementParser(this); 308 309 auto handler = tag_.name in onStartTag; 310 if (handler !is null) (*handler)(parser); 311 else 312 { 313 handler = null in onStartTag; 314 if (handler !is null) (*handler)(parser); 315 } 316 } 317 else if (tag_.isEnd) 318 { 319 auto startTag = startTags[tag_.name]; 320 string text; 321 322 immutable(char)* p = startTag.tagString.ptr 323 + startTag.tagString.length; 324 immutable(char)* q = tag_.tagString.ptr; 325 text = decode(p[0..(q-p)], DecodeMode.LOOSE); 326 327 auto element = new Element(startTag); 328 if (text.length != 0) element ~= new Text(text); 329 330 auto handler = tag_.name in onEndTag; 331 if (handler !is null) (*handler)(element); 332 else 333 { 334 handler = null in onEndTag; 335 if (handler !is null) (*handler)(element); 336 } 337 338 if (tag_.name == root.name) return; 339 } 340 else if (tag_.isEmpty) 341 { 342 Tag startTag = new Tag(tag_.name); 343 344 // FIX by hed010gy, for bug 2979 345 // http://d.puremagic.com/issues/show_bug.cgi?id=2979 346 if (tag_.attr.length > 0) 347 foreach(tn,tv; tag_.attr) startTag.attr[tn]=tv; 348 // END FIX 349 350 // Handle the pretend start tag 351 string s2; 352 auto parser = new ElementParser(startTag,&s2); 353 auto handler1 = startTag.name in onStartTag; 354 if (handler1 !is null) (*handler1)(parser); 355 else 356 { 357 handler1 = null in onStartTag; 358 if (handler1 !is null) (*handler1)(parser); 359 } 360 361 // Handle the pretend end tag 362 auto element = new Element(startTag); 363 auto handler2 = tag_.name in onEndTag; 364 if (handler2 !is null) (*handler2)(element); 365 else 366 { 367 handler2 = null in onEndTag; 368 if (handler2 !is null) (*handler2)(element); 369 } 370 } 371 } 372 else 373 { 374 t = chop(*s,indexOf(*s,"<")); 375 if (rawTextHandler.funcptr !is null) 376 rawTextHandler(t); 377 else if (textHandler.funcptr !is null) 378 textHandler(decode(t,DecodeMode.LOOSE)); 379 } 380 } 381 } 382 383 /** 384 * Returns that part of the element which has already been parsed 385 */ 386 override string toString() const 387 { 388 assert(elementStart.length >= s.length); 389 return elementStart[0 .. elementStart.length - s.length]; 390 } 391 392 }