diff --git a/src/utils/htmlParser/format.ts b/src/utils/htmlParser/format.ts
new file mode 100644
index 00000000..2b409903
--- /dev/null
+++ b/src/utils/htmlParser/format.ts
@@ -0,0 +1,47 @@
+import { HTMLNode, CommentOrTextAST, ElementAST, AST } from './types'
+
+export const splitHead = (str: string, sep: string) => {
+ const idx = str.indexOf(sep)
+ if (idx === -1) return [str]
+ return [str.slice(0, idx), str.slice(idx + sep.length)]
+}
+
+const unquote = (str: string) => {
+ const car = str.charAt(0)
+ const end = str.length - 1
+ const isQuoteStart = car === '"' || car === "'"
+ if (isQuoteStart && car === str.charAt(end)) {
+ return str.slice(1, end)
+ }
+ return str
+}
+
+const formatAttributes = (attributes: string[]) => {
+ return attributes.map(attribute => {
+ const parts = splitHead(attribute.trim(), '=')
+ const key = parts[0]
+ const value = typeof parts[1] === 'string' ? unquote(parts[1]) : null
+ return { key, value }
+ })
+}
+
+export const format = (nodes: HTMLNode[]): AST[] => {
+ return nodes.map(node => {
+ if (node.type === 'element') {
+ const children = format(node.children)
+ const item: ElementAST = {
+ type: 'element',
+ tagName: node.tagName.toLowerCase(),
+ attributes: formatAttributes(node.attributes),
+ children,
+ }
+ return item
+ }
+
+ const item: CommentOrTextAST = {
+ type: node.type,
+ content: node.content,
+ }
+ return item
+ })
+}
\ No newline at end of file
diff --git a/src/utils/htmlParser/index.ts b/src/utils/htmlParser/index.ts
new file mode 100644
index 00000000..9852a72f
--- /dev/null
+++ b/src/utils/htmlParser/index.ts
@@ -0,0 +1,15 @@
+// 参考:https://github.com/andrejewski/himalaya 用TypeScript重写并简化部分功能
+
+import { lexer } from './lexer'
+import { parser } from './parser'
+import { format } from './format'
+import { toHTML } from './stringify'
+import type { AST } from './types'
+
+export const toAST = (str: string) => {
+ const tokens = lexer(str)
+ const nodes = parser(tokens)
+ return format(nodes)
+}
+
+export { toHTML, AST }
\ No newline at end of file
diff --git a/src/utils/htmlParser/lexer.ts b/src/utils/htmlParser/lexer.ts
new file mode 100644
index 00000000..a80175ba
--- /dev/null
+++ b/src/utils/htmlParser/lexer.ts
@@ -0,0 +1,276 @@
+import startsWith from 'lodash/startsWith'
+import endsWith from 'lodash/endsWith'
+import { Token } from './types'
+import { childlessTags } from './tags'
+
+interface State {
+ str: string;
+ position: number;
+ tokens: Token[];
+}
+
+const jumpPosition = (state: State, end: number) => {
+ const len = end - state.position
+ movePositopn(state, len)
+}
+
+const movePositopn = (state: State, len: number) => {
+ state.position = state.position + len
+}
+
+const findTextEnd = (str: string, index: number) => {
+ const isEnd = false
+ while (!isEnd) {
+ const textEnd = str.indexOf('<', index)
+ if (textEnd === -1) {
+ return textEnd
+ }
+ const char = str.charAt(textEnd + 1)
+ if (char === '/' || char === '!' || /[A-Za-z0-9]/.test(char)) {
+ return textEnd
+ }
+ index = textEnd + 1
+ }
+ return -1
+}
+
+const lexText = (state: State) => {
+ const { str } = state
+ let textEnd = findTextEnd(str, state.position)
+ if (textEnd === state.position) return
+ if (textEnd === -1) {
+ textEnd = str.length
+ }
+
+ const content = str.slice(state.position, textEnd)
+ jumpPosition(state, textEnd)
+
+ state.tokens.push({
+ type: 'text',
+ content,
+ })
+}
+
+const lexComment = (state: State) => {
+ const { str } = state
+
+ movePositopn(state, 4)
+ let contentEnd = str.indexOf('-->', state.position)
+ let commentEnd = contentEnd + 3
+ if (contentEnd === -1) {
+ contentEnd = commentEnd = str.length
+ }
+
+ const content = str.slice(state.position, contentEnd)
+ jumpPosition(state, commentEnd)
+
+ state.tokens.push({
+ type: 'comment',
+ content,
+ })
+}
+
+const lexTagName = (state: State) => {
+ const { str } = state
+ const len = str.length
+ let start = state.position
+
+ while (start < len) {
+ const char = str.charAt(start)
+ const isTagChar = !(/\s/.test(char) || char === '/' || char === '>')
+ if (isTagChar) break
+ start++
+ }
+
+ let end = start + 1
+ while (end < len) {
+ const char = str.charAt(end)
+ const isTagChar = !(/\s/.test(char) || char === '/' || char === '>')
+ if (!isTagChar) break
+ end++
+ }
+
+ jumpPosition(state, end)
+ const tagName = str.slice(start, end)
+ state.tokens.push({
+ type: 'tag',
+ content: tagName
+ })
+ return tagName
+}
+
+const lexTagAttributes = (state: State) => {
+ const { str, tokens } = state
+ let cursor = state.position
+ let quote = null
+ let wordBegin = cursor
+ const words = []
+ const len = str.length
+ while (cursor < len) {
+ const char = str.charAt(cursor)
+ if (quote) {
+ const isQuoteEnd = char === quote
+ if (isQuoteEnd) quote = null
+ cursor++
+ continue
+ }
+
+ const isTagEnd = char === '/' || char === '>'
+ if (isTagEnd) {
+ if (cursor !== wordBegin) words.push(str.slice(wordBegin, cursor))
+ break
+ }
+
+ const isWordEnd = /\s/.test(char)
+ if (isWordEnd) {
+ if (cursor !== wordBegin) words.push(str.slice(wordBegin, cursor))
+ wordBegin = cursor + 1
+ cursor++
+ continue
+ }
+
+ const isQuoteStart = char === '\'' || char === '"'
+ if (isQuoteStart) {
+ quote = char
+ cursor++
+ continue
+ }
+
+ cursor++
+ }
+ jumpPosition(state, cursor)
+
+ const type = 'attribute'
+ for (let i = 0; i < words.length; i++) {
+ const word = words[i]
+
+ const isNotPair = word.indexOf('=') === -1
+ if (isNotPair) {
+ const secondWord = words[i + 1]
+ if (secondWord && startsWith(secondWord, '=')) {
+ if (secondWord.length > 1) {
+ const newWord = word + secondWord
+ tokens.push({ type, content: newWord })
+ i += 1
+ continue
+ }
+ const thirdWord = words[i + 2]
+ i += 1
+ if (thirdWord) {
+ const newWord = word + '=' + thirdWord
+ tokens.push({ type, content: newWord })
+ i += 1
+ continue
+ }
+ }
+ }
+ if (endsWith(word, '=')) {
+ const secondWord = words[i + 1]
+ if (secondWord && secondWord.indexOf('=') === -1) {
+ const newWord = word + secondWord
+ tokens.push({ type, content: newWord })
+ i += 1
+ continue
+ }
+
+ const newWord = word.slice(0, -1)
+ tokens.push({ type, content: newWord })
+ continue
+ }
+
+ tokens.push({ type, content: word })
+ }
+}
+
+const lexSkipTag = (tagName: string, state: State) => {
+ const { str, tokens } = state
+ const safeTagName = tagName.toLowerCase()
+ const len = str.length
+ let index = state.position
+
+ while (index < len) {
+ const nextTag = str.indexOf('', index)
+ if (nextTag === -1) {
+ lexText(state)
+ break
+ }
+
+ const tagState = {
+ str,
+ position: state.position,
+ tokens: [],
+ }
+ jumpPosition(tagState, nextTag)
+ const name = lexTag(tagState)
+ if (safeTagName !== name.toLowerCase()) {
+ index = tagState.position
+ continue
+ }
+
+ if (nextTag !== state.position) {
+ const textStart = state.position
+ jumpPosition(state, nextTag)
+ tokens.push({
+ type: 'text',
+ content: str.slice(textStart, nextTag),
+ })
+ }
+
+ tokens.push(...tagState.tokens)
+ jumpPosition(state, tagState.position)
+ break
+ }
+}
+
+const lexTag = (state: State) => {
+ const { str } = state
+ const secondChar = str.charAt(state.position + 1)
+ const tagStartClose = secondChar === '/'
+ movePositopn(state, tagStartClose ? 2 : 1)
+ state.tokens.push({
+ type: 'tag-start',
+ close: tagStartClose,
+ })
+
+ const tagName = lexTagName(state)
+ lexTagAttributes(state)
+
+ const firstChar = str.charAt(state.position)
+ const tagEndClose = firstChar === '/'
+ movePositopn(state, tagEndClose ? 2 : 1)
+ state.tokens.push({
+ type: 'tag-end',
+ close: tagEndClose,
+ })
+ return tagName
+}
+
+const lex = (state: State) => {
+ const str = state.str
+ const len = str.length
+
+ while (state.position < len) {
+ const start = state.position
+ lexText(state)
+
+ if (state.position === start) {
+ const isComment = startsWith(str, '!--', start + 1)
+ if (isComment) lexComment(state)
+ else {
+ const tagName = lexTag(state)
+ const safeTag = tagName.toLowerCase()
+ if (childlessTags.includes(safeTag)) lexSkipTag(tagName, state)
+ }
+ }
+ }
+}
+
+export const lexer = (str: string): Token[] => {
+ const state = {
+ str,
+ position: 0,
+ tokens: [],
+ }
+ lex(state)
+ return state.tokens
+}
\ No newline at end of file
diff --git a/src/utils/htmlParser/parser.ts b/src/utils/htmlParser/parser.ts
new file mode 100644
index 00000000..6c835627
--- /dev/null
+++ b/src/utils/htmlParser/parser.ts
@@ -0,0 +1,129 @@
+import { Token, HTMLNode, TagToken, NormalElement, TagEndToken, AttributeToken, TextToken } from './types'
+import { closingTags, closingTagAncestorBreakers, voidTags } from './tags'
+
+interface StackItem {
+ tagName: string | null;
+ children: HTMLNode[];
+}
+
+interface State {
+ stack: StackItem[];
+ cursor: number;
+ tokens: Token[];
+}
+
+export const parser = (tokens: Token[]) => {
+ const root: StackItem = { tagName: null, children: [] }
+ const state: State = { tokens, cursor: 0, stack: [root] }
+ parse(state)
+ return root.children
+}
+
+export const hasTerminalParent = (tagName: string, stack: StackItem[]) => {
+ const tagParents = closingTagAncestorBreakers[tagName]
+ if (tagParents) {
+ let currentIndex = stack.length - 1
+ while (currentIndex >= 0) {
+ const parentTagName = stack[currentIndex].tagName
+ if (parentTagName === tagName) break
+ if (tagParents.includes(parentTagName)) return true
+ currentIndex--
+ }
+ }
+ return false
+}
+
+export const rewindStack = (stack: StackItem[], newLength: number) => {
+ stack.splice(newLength)
+}
+
+export const parse = (state: State) => {
+ const { stack, tokens } = state
+ let { cursor } = state
+ let nodes = stack[stack.length - 1].children
+ const len = tokens.length
+
+ while (cursor < len) {
+ const token = tokens[cursor]
+ if (token.type !== 'tag-start') {
+ nodes.push(token as TextToken)
+ cursor++
+ continue
+ }
+
+ const tagToken = tokens[++cursor] as TagToken
+ cursor++
+ const tagName = tagToken.content.toLowerCase()
+ if (token.close) {
+ let index = stack.length
+ let shouldRewind = false
+ while (--index > -1) {
+ if (stack[index].tagName === tagName) {
+ shouldRewind = true
+ break
+ }
+ }
+ while (cursor < len) {
+ if (tokens[cursor].type !== 'tag-end') break
+ cursor++
+ }
+ if (shouldRewind) {
+ rewindStack(stack, index)
+ break
+ }
+ else continue
+ }
+
+ const isClosingTag = closingTags.includes(tagName)
+ let shouldRewindToAutoClose = isClosingTag
+ if (shouldRewindToAutoClose) {
+ shouldRewindToAutoClose = !hasTerminalParent(tagName, stack)
+ }
+
+ if (shouldRewindToAutoClose) {
+ let currentIndex = stack.length - 1
+ while (currentIndex > 0) {
+ if (tagName === stack[currentIndex].tagName) {
+ rewindStack(stack, currentIndex)
+ const previousIndex = currentIndex - 1
+ nodes = stack[previousIndex].children
+ break
+ }
+ currentIndex = currentIndex - 1
+ }
+ }
+
+ const attributes = []
+ let tagEndToken: TagEndToken | undefined
+ while (cursor < len) {
+ const _token = tokens[cursor]
+ if (_token.type === 'tag-end') {
+ tagEndToken = _token
+ break
+ }
+ attributes.push((_token as AttributeToken).content)
+ cursor++
+ }
+
+ if (!tagEndToken) break
+
+ cursor++
+ const children: HTMLNode[] = []
+ const elementNode: NormalElement = {
+ type: 'element',
+ tagName: tagToken.content,
+ attributes,
+ children,
+ }
+ nodes.push(elementNode)
+
+ const hasChildren = !(tagEndToken.close || voidTags.includes(tagName))
+ if (hasChildren) {
+ stack.push({tagName, children})
+ const innerState = { tokens, cursor, stack }
+ parse(innerState)
+ cursor = innerState.cursor
+ }
+ }
+ state.cursor = cursor
+}
\ No newline at end of file
diff --git a/src/utils/htmlParser/stringify.ts b/src/utils/htmlParser/stringify.ts
new file mode 100644
index 00000000..82ec2aa5
--- /dev/null
+++ b/src/utils/htmlParser/stringify.ts
@@ -0,0 +1,28 @@
+import { AST, ElementAST, ElementAttribute } from './types'
+import { voidTags } from './tags'
+
+export const formatAttributes = (attributes: ElementAttribute[]) => {
+ return attributes.reduce((attrs, attribute) => {
+ const { key, value } = attribute
+ if (value === null) return `${attrs} ${key}`
+ if (key === 'style' && !value) return ''
+
+ const quoteEscape = value.indexOf('\'') !== -1
+ const quote = quoteEscape ? '"' : '\''
+ return `${attrs} ${key}=${quote}${value}${quote}`
+ }, '')
+}
+
+export const toHTML = (tree: AST[]) => {
+ const htmlStrings: string[] = tree.map(node => {
+ if (node.type === 'text') return node.content
+ if (node.type === 'comment') return ``
+
+ const { tagName, attributes, children } = node as ElementAST
+ const isSelfClosing = voidTags.includes(tagName.toLowerCase())
+
+ if (isSelfClosing) return `<${tagName}${formatAttributes(attributes)}>`
+ return `<${tagName}${formatAttributes(attributes)}>${toHTML(children)}${tagName}>`
+ })
+ return htmlStrings.join('')
+}
\ No newline at end of file
diff --git a/src/utils/htmlParser/tags.ts b/src/utils/htmlParser/tags.ts
new file mode 100644
index 00000000..d0bff44a
--- /dev/null
+++ b/src/utils/htmlParser/tags.ts
@@ -0,0 +1,16 @@
+export const childlessTags = ['style', 'script', 'template']
+
+export const closingTags = ['html', 'head', 'body', 'p', 'dt', 'dd', 'li', 'option', 'thead', 'th', 'tbody', 'tr', 'td', 'tfoot', 'colgroup']
+
+export const closingTagAncestorBreakers = {
+ li: ['ul', 'ol', 'menu'],
+ dt: ['dl'],
+ dd: ['dl'],
+ tbody: ['table'],
+ thead: ['table'],
+ tfoot: ['table'],
+ tr: ['table'],
+ td: ['table'],
+}
+
+export const voidTags = ['!doctype', 'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr']
\ No newline at end of file
diff --git a/src/utils/htmlParser/types.ts b/src/utils/htmlParser/types.ts
new file mode 100644
index 00000000..93f198f7
--- /dev/null
+++ b/src/utils/htmlParser/types.ts
@@ -0,0 +1,69 @@
+export interface ElementAttribute {
+ key: string;
+ value: string | null;
+}
+
+export interface CommentElement {
+ type: 'comment';
+ content: string;
+}
+
+export interface TextElement {
+ type: 'text';
+ content: string;
+}
+
+export interface NormalElement {
+ type: 'element';
+ tagName: string;
+ children: HTMLNode[];
+ attributes: string[];
+}
+
+export type HTMLNode = CommentElement | TextElement | NormalElement
+
+export interface ElementAST {
+ type: 'element';
+ tagName: string;
+ children: AST[];
+ attributes: ElementAttribute[];
+}
+
+export interface CommentOrTextAST {
+ type: 'comment' | 'text';
+ content: string;
+}
+
+export type AST = CommentOrTextAST | ElementAST
+
+export interface TagStartToken {
+ type: 'tag-start';
+ close: boolean;
+}
+
+export interface TagEndToken {
+ type: 'tag-end';
+ close: boolean;
+}
+
+export interface TagToken {
+ type: 'tag';
+ content: string;
+}
+
+export interface TextToken {
+ type: 'text';
+ content: string;
+}
+
+export interface CommentToken {
+ type: 'comment';
+ content: string;
+}
+
+export interface AttributeToken {
+ type: 'attribute';
+ content: string;
+}
+
+export type Token = TagStartToken | TagEndToken | TagToken | TextToken | CommentToken | AttributeToken