/** * Not type-checking this file because it's mostly vendor code. */ /*! * HTML Parser By John Resig (ejohn.org) * Modified by Juriy "kangax" Zaytsev * Original code by Erik Arvidsson (MPL-1.1 OR Apache-2.0 OR GPL-2.0-or-later) * http://erik.eae.net/simplehtmlparser/simplehtmlparser.js */ import { makeMap, no } from 'shared/util' import { isNonPhrasingTag } from 'web/compiler/util' import { unicodeRegExp } from 'core/util/lang' import { ASTAttr, CompilerOptions } from 'types/compiler' // Regular Expressions for parsing tags and attributes const attribute = /^\s*([^\s"'<>\/=]+)(?:\s*(=)\s*(?:"([^"]*)"+|'([^']*)'+|([^\s"'=<>`]+)))?/ const dynamicArgAttribute = /^\s*((?:v-[\w-]+:|@|:|#)\[[^=]+?\][^\s"'<>\/=]*)(?:\s*(=)\s*(?:"([^"]*)"+|'([^']*)'+|([^\s"'=<>`]+)))?/ const ncname = `[a-zA-Z_][\\-\\.0-9_a-zA-Z${unicodeRegExp.source}]*` const qnameCapture = `((?:${ncname}\\:)?${ncname})` const startTagOpen = new RegExp(`^<${qnameCapture}`) const startTagClose = /^\s*(\/?)>/ const endTag = new RegExp(`^<\\/${qnameCapture}[^>]*>`) const doctype = /^]+>/i // #7298: escape - to avoid being passed as HTML comment when inlined in page const comment = /^', '"': '"', '&': '&', ' ': '\n', ' ': '\t', ''': "'" } const encodedAttr = /&(?:lt|gt|quot|amp|#39);/g const encodedAttrWithNewLines = /&(?:lt|gt|quot|amp|#39|#10|#9);/g // #5992 const isIgnoreNewlineTag = makeMap('pre,textarea', true) const shouldIgnoreFirstNewline = (tag, html) => tag && isIgnoreNewlineTag(tag) && html[0] === '\n' function decodeAttr(value, shouldDecodeNewlines) { const re = shouldDecodeNewlines ? encodedAttrWithNewLines : encodedAttr return value.replace(re, match => decodingMap[match]) } export interface HTMLParserOptions extends CompilerOptions { start?: ( tag: string, attrs: ASTAttr[], unary: boolean, start: number, end: number ) => void end?: (tag: string, start: number, end: number) => void chars?: (text: string, start?: number, end?: number) => void comment?: (content: string, start: number, end: number) => void } export function parseHTML(html, options: HTMLParserOptions) { const stack: any[] = [] const expectHTML = options.expectHTML const isUnaryTag = options.isUnaryTag || no const canBeLeftOpenTag = options.canBeLeftOpenTag || no let index = 0 let last, lastTag while (html) { last = html // Make sure we're not in a plaintext content element like script/style if (!lastTag || !isPlainTextElement(lastTag)) { let textEnd = html.indexOf('<') if (textEnd === 0) { // Comment: if (comment.test(html)) { const commentEnd = html.indexOf('-->') if (commentEnd >= 0) { if (options.shouldKeepComment && options.comment) { options.comment( html.substring(4, commentEnd), index, index + commentEnd + 3 ) } advance(commentEnd + 3) continue } } // http://en.wikipedia.org/wiki/Conditional_comment#Downlevel-revealed_conditional_comment if (conditionalComment.test(html)) { const conditionalEnd = html.indexOf(']>') if (conditionalEnd >= 0) { advance(conditionalEnd + 2) continue } } // Doctype: const doctypeMatch = html.match(doctype) if (doctypeMatch) { advance(doctypeMatch[0].length) continue } // End tag: const endTagMatch = html.match(endTag) if (endTagMatch) { const curIndex = index advance(endTagMatch[0].length) parseEndTag(endTagMatch[1], curIndex, index) continue } // Start tag: const startTagMatch = parseStartTag() if (startTagMatch) { handleStartTag(startTagMatch) if (shouldIgnoreFirstNewline(startTagMatch.tagName, html)) { advance(1) } continue } } let text, rest, next if (textEnd >= 0) { rest = html.slice(textEnd) while ( !endTag.test(rest) && !startTagOpen.test(rest) && !comment.test(rest) && !conditionalComment.test(rest) ) { // < in plain text, be forgiving and treat it as text next = rest.indexOf('<', 1) if (next < 0) break textEnd += next rest = html.slice(textEnd) } text = html.substring(0, textEnd) } if (textEnd < 0) { text = html } if (text) { advance(text.length) } if (options.chars && text) { options.chars(text, index - text.length, index) } } else { let endTagLength = 0 const stackedTag = lastTag.toLowerCase() const reStackedTag = reCache[stackedTag] || (reCache[stackedTag] = new RegExp( '([\\s\\S]*?)(]*>)', 'i' )) const rest = html.replace(reStackedTag, function (all, text, endTag) { endTagLength = endTag.length if (!isPlainTextElement(stackedTag) && stackedTag !== 'noscript') { text = text .replace(//g, '$1') // #7298 .replace(//g, '$1') } if (shouldIgnoreFirstNewline(stackedTag, text)) { text = text.slice(1) } if (options.chars) { options.chars(text) } return '' }) index += html.length - rest.length html = rest parseEndTag(stackedTag, index - endTagLength, index) } if (html === last) { options.chars && options.chars(html) if (__DEV__ && !stack.length && options.warn) { options.warn(`Mal-formatted tag at end of template: "${html}"`, { start: index + html.length }) } break } } // Clean up any remaining tags parseEndTag() function advance(n) { index += n html = html.substring(n) } function parseStartTag() { const start = html.match(startTagOpen) if (start) { const match: any = { tagName: start[1], attrs: [], start: index } advance(start[0].length) let end, attr while ( !(end = html.match(startTagClose)) && (attr = html.match(dynamicArgAttribute) || html.match(attribute)) ) { attr.start = index advance(attr[0].length) attr.end = index match.attrs.push(attr) } if (end) { match.unarySlash = end[1] advance(end[0].length) match.end = index return match } } } function handleStartTag(match) { const tagName = match.tagName const unarySlash = match.unarySlash if (expectHTML) { if (lastTag === 'p' && isNonPhrasingTag(tagName)) { parseEndTag(lastTag) } if (canBeLeftOpenTag(tagName) && lastTag === tagName) { parseEndTag(tagName) } } const unary = isUnaryTag(tagName) || !!unarySlash const l = match.attrs.length const attrs: ASTAttr[] = new Array(l) for (let i = 0; i < l; i++) { const args = match.attrs[i] const value = args[3] || args[4] || args[5] || '' const shouldDecodeNewlines = tagName === 'a' && args[1] === 'href' ? options.shouldDecodeNewlinesForHref : options.shouldDecodeNewlines attrs[i] = { name: args[1], value: decodeAttr(value, shouldDecodeNewlines) } if (__DEV__ && options.outputSourceRange) { attrs[i].start = args.start + args[0].match(/^\s*/).length attrs[i].end = args.end } } if (!unary) { stack.push({ tag: tagName, lowerCasedTag: tagName.toLowerCase(), attrs: attrs, start: match.start, end: match.end }) lastTag = tagName } if (options.start) { options.start(tagName, attrs, unary, match.start, match.end) } } function parseEndTag(tagName?: any, start?: any, end?: any) { let pos, lowerCasedTagName if (start == null) start = index if (end == null) end = index // Find the closest opened tag of the same type if (tagName) { lowerCasedTagName = tagName.toLowerCase() for (pos = stack.length - 1; pos >= 0; pos--) { if (stack[pos].lowerCasedTag === lowerCasedTagName) { break } } } else { // If no tag name is provided, clean shop pos = 0 } if (pos >= 0) { // Close all the open elements, up the stack for (let i = stack.length - 1; i >= pos; i--) { if (__DEV__ && (i > pos || !tagName) && options.warn) { options.warn(`tag <${stack[i].tag}> has no matching end tag.`, { start: stack[i].start, end: stack[i].end }) } if (options.end) { options.end(stack[i].tag, start, end) } } // Remove the open elements from the stack stack.length = pos lastTag = pos && stack[pos - 1].tag } else if (lowerCasedTagName === 'br') { if (options.start) { options.start(tagName, [], true, start, end) } } else if (lowerCasedTagName === 'p') { if (options.start) { options.start(tagName, [], false, start, end) } if (options.end) { options.end(tagName, start, end) } } } }