htmlparser.js 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205
  1. /**
  2. * @license Copyright (c) 2003-2015, CKSource - Frederico Knabben. All rights reserved.
  3. * For licensing, see LICENSE.md or http://ckeditor.com/license
  4. */
  5. /**
  6. * Provides an "event like" system to parse strings of HTML data.
  7. *
  8. * var parser = new CKEDITOR.htmlParser();
  9. * parser.onTagOpen = function( tagName, attributes, selfClosing ) {
  10. * alert( tagName );
  11. * };
  12. * parser.parse( '<p>Some <b>text</b>.</p>' ); // Alerts 'p', 'b'.
  13. *
  14. * @class
  15. * @constructor Creates a htmlParser class instance.
  16. */
  17. CKEDITOR.htmlParser = function() {
  18. this._ = {
  19. htmlPartsRegex: /<(?:(?:\/([^>]+)>)|(?:!--([\S|\s]*?)-->)|(?:([^\/\s>]+)((?:\s+[\w\-:.]+(?:\s*=\s*?(?:(?:"[^"]*")|(?:'[^']*')|[^\s"'\/>]+))?)*)[\S\s]*?(\/?)>))/g
  20. };
  21. };
  22. ( function() {
  23. var attribsRegex = /([\w\-:.]+)(?:(?:\s*=\s*(?:(?:"([^"]*)")|(?:'([^']*)')|([^\s>]+)))|(?=\s|$))/g,
  24. emptyAttribs = { checked: 1, compact: 1, declare: 1, defer: 1, disabled: 1, ismap: 1, multiple: 1, nohref: 1, noresize: 1, noshade: 1, nowrap: 1, readonly: 1, selected: 1 };
  25. CKEDITOR.htmlParser.prototype = {
  26. /**
  27. * Function to be fired when a tag opener is found. This function
  28. * should be overriden when using this class.
  29. *
  30. * var parser = new CKEDITOR.htmlParser();
  31. * parser.onTagOpen = function( tagName, attributes, selfClosing ) {
  32. * alert( tagName ); // e.g. 'b'
  33. * } );
  34. * parser.parse( '<!-- Example --><b>Hello</b>' );
  35. *
  36. * @param {String} tagName The tag name. The name is guarantted to be lowercased.
  37. * @param {Object} attributes An object containing all tag attributes. Each
  38. * property in this object represent and attribute name and its value is the attribute value.
  39. * @param {Boolean} selfClosing `true` if the tag closes itself, false if the tag doesn't.
  40. */
  41. onTagOpen: function() {},
  42. /**
  43. * Function to be fired when a tag closer is found. This function
  44. * should be overriden when using this class.
  45. *
  46. * var parser = new CKEDITOR.htmlParser();
  47. * parser.onTagClose = function( tagName ) {
  48. * alert( tagName ); // 'b'
  49. * } );
  50. * parser.parse( '<!-- Example --><b>Hello</b>' );
  51. *
  52. * @param {String} tagName The tag name. The name is guarantted to be lowercased.
  53. */
  54. onTagClose: function() {},
  55. /**
  56. * Function to be fired when text is found. This function
  57. * should be overriden when using this class.
  58. *
  59. * var parser = new CKEDITOR.htmlParser();
  60. * parser.onText = function( text ) {
  61. * alert( text ); // 'Hello'
  62. * } );
  63. * parser.parse( '<!-- Example --><b>Hello</b>' );
  64. *
  65. * @param {String} text The text found.
  66. */
  67. onText: function() {},
  68. /**
  69. * Function to be fired when CDATA section is found. This function
  70. * should be overriden when using this class.
  71. *
  72. * var parser = new CKEDITOR.htmlParser();
  73. * parser.onCDATA = function( cdata ) {
  74. * alert( cdata ); // 'var hello;'
  75. * } );
  76. * parser.parse( '<script>var hello;</script>' );
  77. *
  78. * @param {String} cdata The CDATA been found.
  79. */
  80. onCDATA: function() {},
  81. /**
  82. * Function to be fired when a commend is found. This function
  83. * should be overriden when using this class.
  84. *
  85. * var parser = new CKEDITOR.htmlParser();
  86. * parser.onComment = function( comment ) {
  87. * alert( comment ); // ' Example '
  88. * } );
  89. * parser.parse( '<!-- Example --><b>Hello</b>' );
  90. *
  91. * @param {String} comment The comment text.
  92. */
  93. onComment: function() {},
  94. /**
  95. * Parses text, looking for HTML tokens, like tag openers or closers,
  96. * or comments. This function fires the onTagOpen, onTagClose, onText
  97. * and onComment function during its execution.
  98. *
  99. * var parser = new CKEDITOR.htmlParser();
  100. * // The onTagOpen, onTagClose, onText and onComment should be overriden
  101. * // at this point.
  102. * parser.parse( '<!-- Example --><b>Hello</b>' );
  103. *
  104. * @param {String} html The HTML to be parsed.
  105. */
  106. parse: function( html ) {
  107. var parts, tagName,
  108. nextIndex = 0,
  109. cdata; // The collected data inside a CDATA section.
  110. while ( ( parts = this._.htmlPartsRegex.exec( html ) ) ) {
  111. var tagIndex = parts.index;
  112. if ( tagIndex > nextIndex ) {
  113. var text = html.substring( nextIndex, tagIndex );
  114. if ( cdata )
  115. cdata.push( text );
  116. else
  117. this.onText( text );
  118. }
  119. nextIndex = this._.htmlPartsRegex.lastIndex;
  120. // "parts" is an array with the following items:
  121. // 0 : The entire match for opening/closing tags and comments.
  122. // : Group filled with the tag name for closing tags.
  123. // 2 : Group filled with the comment text.
  124. // 3 : Group filled with the tag name for opening tags.
  125. // 4 : Group filled with the attributes part of opening tags.
  126. // Closing tag
  127. if ( ( tagName = parts[ 1 ] ) ) {
  128. tagName = tagName.toLowerCase();
  129. if ( cdata && CKEDITOR.dtd.$cdata[ tagName ] ) {
  130. // Send the CDATA data.
  131. this.onCDATA( cdata.join( '' ) );
  132. cdata = null;
  133. }
  134. if ( !cdata ) {
  135. this.onTagClose( tagName );
  136. continue;
  137. }
  138. }
  139. // If CDATA is enabled, just save the raw match.
  140. if ( cdata ) {
  141. cdata.push( parts[ 0 ] );
  142. continue;
  143. }
  144. // Opening tag
  145. if ( ( tagName = parts[ 3 ] ) ) {
  146. tagName = tagName.toLowerCase();
  147. // There are some tag names that can break things, so let's
  148. // simply ignore them when parsing. (#5224)
  149. if ( /="/.test( tagName ) )
  150. continue;
  151. var attribs = {},
  152. attribMatch,
  153. attribsPart = parts[ 4 ],
  154. selfClosing = !!parts[ 5 ];
  155. if ( attribsPart ) {
  156. while ( ( attribMatch = attribsRegex.exec( attribsPart ) ) ) {
  157. var attName = attribMatch[ 1 ].toLowerCase(),
  158. attValue = attribMatch[ 2 ] || attribMatch[ 3 ] || attribMatch[ 4 ] || '';
  159. if ( !attValue && emptyAttribs[ attName ] )
  160. attribs[ attName ] = attName;
  161. else
  162. attribs[ attName ] = CKEDITOR.tools.htmlDecodeAttr( attValue );
  163. }
  164. }
  165. this.onTagOpen( tagName, attribs, selfClosing );
  166. // Open CDATA mode when finding the appropriate tags.
  167. if ( !cdata && CKEDITOR.dtd.$cdata[ tagName ] )
  168. cdata = [];
  169. continue;
  170. }
  171. // Comment
  172. if ( ( tagName = parts[ 2 ] ) )
  173. this.onComment( tagName );
  174. }
  175. if ( html.length > nextIndex )
  176. this.onText( html.substring( nextIndex, html.length ) );
  177. }
  178. };
  179. } )();