syncthing/vendor/github.com/a8m/mark/parser.go

437 lines
9.9 KiB
Go

package mark
import (
"regexp"
"strings"
"unicode"
"unicode/utf8"
)
// parse holds the state of the parser.
type parse struct {
Nodes []Node
lex Lexer
options *Options
tr *parse
output string
peekCount int
token [3]item // three-token lookahead for parser
links map[string]*DefLinkNode // Deflink parsing, used RefLinks
renderFn map[NodeType]RenderFn // Custom overridden fns
}
// Return new parser
func newParse(input string, opts *Options) *parse {
return &parse{
lex: lex(input),
options: opts,
links: make(map[string]*DefLinkNode),
renderFn: make(map[NodeType]RenderFn),
}
}
// parse convert the raw text to Nodeparse.
func (p *parse) parse() {
Loop:
for {
var n Node
switch t := p.peek(); t.typ {
case itemEOF, itemError:
break Loop
case itemNewLine:
p.next()
case itemHr:
n = p.newHr(p.next().pos)
case itemHTML:
t = p.next()
n = p.newHTML(t.pos, t.val)
case itemDefLink:
n = p.parseDefLink()
case itemHeading, itemLHeading:
n = p.parseHeading()
case itemCodeBlock, itemGfmCodeBlock:
n = p.parseCodeBlock()
case itemList:
n = p.parseList()
case itemTable, itemLpTable:
n = p.parseTable()
case itemBlockQuote:
n = p.parseBlockQuote()
case itemIndent:
space := p.next()
// If it isn't followed by itemText
if p.peek().typ != itemText {
continue
}
p.backup2(space)
fallthrough
// itemText
default:
tmp := p.newParagraph(t.pos)
tmp.Nodes = p.parseText(p.next().val + p.scanLines())
n = tmp
}
if n != nil {
p.append(n)
}
}
}
// Root getter
func (p *parse) root() *parse {
if p.tr == nil {
return p
}
return p.tr.root()
}
// Render parse nodes to the wanted output
func (p *parse) render() {
var output string
for i, node := range p.Nodes {
// If there's a custom render function, use it instead.
if fn, ok := p.renderFn[node.Type()]; ok {
output = fn(node)
} else {
output = node.Render()
}
p.output += output
if output != "" && i != len(p.Nodes)-1 {
p.output += "\n"
}
}
}
// append new node to nodes-list
func (p *parse) append(n Node) {
p.Nodes = append(p.Nodes, n)
}
// next returns the next token
func (p *parse) next() item {
if p.peekCount > 0 {
p.peekCount--
} else {
p.token[0] = p.lex.nextItem()
}
return p.token[p.peekCount]
}
// peek returns but does not consume the next token.
func (p *parse) peek() item {
if p.peekCount > 0 {
return p.token[p.peekCount-1]
}
p.peekCount = 1
p.token[0] = p.lex.nextItem()
return p.token[0]
}
// backup backs the input stream tp one token
func (p *parse) backup() {
p.peekCount++
}
// backup2 backs the input stream up two tokens.
// The zeroth token is already there.
func (p *parse) backup2(t1 item) {
p.token[1] = t1
p.peekCount = 2
}
// parseText
func (p *parse) parseText(input string) (nodes []Node) {
// Trim whitespaces that not a line-break
input = regexp.MustCompile(`(?m)^ +| +(\n|$)`).ReplaceAllStringFunc(input, func(s string) string {
if reBr.MatchString(s) {
return s
}
return strings.Replace(s, " ", "", -1)
})
l := lexInline(input)
for token := range l.items {
var node Node
switch token.typ {
case itemBr:
node = p.newBr(token.pos)
case itemStrong, itemItalic, itemStrike, itemCode:
node = p.parseEmphasis(token.typ, token.pos, token.val)
case itemLink, itemAutoLink, itemGfmLink:
var title, href string
var text []Node
if token.typ == itemLink {
match := reLink.FindStringSubmatch(token.val)
text = p.parseText(match[1])
href, title = match[2], match[3]
} else {
var match []string
if token.typ == itemGfmLink {
match = reGfmLink.FindStringSubmatch(token.val)
} else {
match = reAutoLink.FindStringSubmatch(token.val)
}
href = match[1]
text = append(text, p.newText(token.pos, match[1]))
}
node = p.newLink(token.pos, title, href, text...)
case itemImage:
match := reImage.FindStringSubmatch(token.val)
node = p.newImage(token.pos, match[3], match[2], match[1])
case itemRefLink, itemRefImage:
match := reRefLink.FindStringSubmatch(token.val)
text, ref := match[1], match[2]
if ref == "" {
ref = text
}
if token.typ == itemRefLink {
node = p.newRefLink(token.typ, token.pos, token.val, ref, p.parseText(text))
} else {
node = p.newRefImage(token.typ, token.pos, token.val, ref, text)
}
case itemHTML:
node = p.newHTML(token.pos, token.val)
default:
node = p.newText(token.pos, token.val)
}
nodes = append(nodes, node)
}
return nodes
}
// parse inline emphasis
func (p *parse) parseEmphasis(typ itemType, pos Pos, val string) *EmphasisNode {
var re *regexp.Regexp
switch typ {
case itemStrike:
re = reStrike
case itemStrong:
re = reStrong
case itemCode:
re = reCode
case itemItalic:
re = reItalic
}
node := p.newEmphasis(pos, typ)
match := re.FindStringSubmatch(val)
text := match[len(match)-1]
if text == "" {
text = match[1]
}
node.Nodes = p.parseText(text)
return node
}
// parse heading block
func (p *parse) parseHeading() (node *HeadingNode) {
token := p.next()
level := 1
var text string
if token.typ == itemHeading {
match := reHeading.FindStringSubmatch(token.val)
level, text = len(match[1]), match[2]
} else {
match := reLHeading.FindStringSubmatch(token.val)
// using equal signs for first-level, and dashes for second-level.
text = match[1]
if match[2] == "-" {
level = 2
}
}
node = p.newHeading(token.pos, level, text)
node.Nodes = p.parseText(text)
return
}
func (p *parse) parseDefLink() *DefLinkNode {
token := p.next()
match := reDefLink.FindStringSubmatch(token.val)
name := strings.ToLower(match[1])
// name(lowercase), href, title
n := p.newDefLink(token.pos, name, match[2], match[3])
// store in links
links := p.root().links
if _, ok := links[name]; !ok {
links[name] = n
}
return n
}
// parse codeBlock
func (p *parse) parseCodeBlock() *CodeNode {
var lang, text string
token := p.next()
if token.typ == itemGfmCodeBlock {
codeStart := reGfmCode.FindStringSubmatch(token.val)
lang = codeStart[3]
text = token.val[len(codeStart[0]):]
} else {
text = reCodeBlock.trim(token.val, "")
}
return p.newCode(token.pos, lang, text)
}
func (p *parse) parseBlockQuote() (n *BlockQuoteNode) {
token := p.next()
// replacer
re := regexp.MustCompile(`(?m)^ *> ?`)
raw := re.ReplaceAllString(token.val, "")
// TODO(a8m): doesn't work right now with defLink(inside the blockQuote)
tr := &parse{lex: lex(raw), tr: p}
tr.parse()
n = p.newBlockQuote(token.pos)
n.Nodes = tr.Nodes
return
}
// parse list
func (p *parse) parseList() *ListNode {
token := p.next()
list := p.newList(token.pos, isDigit(token.val))
Loop:
for {
switch token = p.peek(); token.typ {
case itemLooseItem, itemListItem:
list.append(p.parseListItem())
default:
break Loop
}
}
return list
}
// parse listItem
func (p *parse) parseListItem() *ListItemNode {
token := p.next()
item := p.newListItem(token.pos)
token.val = strings.TrimSpace(token.val)
if p.isTaskItem(token.val) {
item.Nodes = p.parseTaskItem(token)
return item
}
tr := &parse{lex: lex(token.val), tr: p}
tr.parse()
for _, node := range tr.Nodes {
// wrap with paragraph only when it's a loose item
if n, ok := node.(*ParagraphNode); ok && token.typ == itemListItem {
item.Nodes = append(item.Nodes, n.Nodes...)
} else {
item.append(node)
}
}
return item
}
// parseTaskItem parses list item as a task item.
func (p *parse) parseTaskItem(token item) []Node {
checkbox := p.newCheckbox(token.pos, token.val[1] == 'x')
token.val = strings.TrimSpace(token.val[3:])
return append([]Node{checkbox}, p.parseText(token.val)...)
}
// isTaskItem tests if the given string is list task item.
func (p *parse) isTaskItem(s string) bool {
if len(s) < 5 || s[0] != '[' || (s[1] != 'x' && s[1] != ' ') || s[2] != ']' {
return false
}
return "" != strings.TrimSpace(s[3:])
}
// parse table
func (p *parse) parseTable() *TableNode {
table := p.newTable(p.next().pos)
// Align [ None, Left, Right, ... ]
// Header [ Cells: [ ... ] ]
// Data: [ Rows: [ Cells: [ ... ] ] ]
rows := struct {
Align []AlignType
Header []item
Cells [][]item
}{}
Loop:
for i := 0; ; {
switch token := p.next(); token.typ {
case itemTableRow:
i++
if i > 2 {
rows.Cells = append(rows.Cells, []item{})
}
case itemTableCell:
// Header
if i == 1 {
rows.Header = append(rows.Header, token)
// Alignment
} else if i == 2 {
rows.Align = append(rows.Align, parseAlign(token.val))
// Data
} else {
pos := i - 3
rows.Cells[pos] = append(rows.Cells[pos], token)
}
default:
p.backup()
break Loop
}
}
// Tranform to nodes
table.append(p.parseCells(Header, rows.Header, rows.Align))
// Table body
for _, row := range rows.Cells {
table.append(p.parseCells(Data, row, rows.Align))
}
return table
}
// parse cells and return new row
func (p *parse) parseCells(kind int, items []item, align []AlignType) *RowNode {
var row *RowNode
for i, item := range items {
if i == 0 {
row = p.newRow(item.pos)
}
cell := p.newCell(item.pos, kind, align[i])
cell.Nodes = p.parseText(item.val)
row.append(cell)
}
return row
}
// Used to consume lines(itemText) for a continues paragraphs
func (p *parse) scanLines() (s string) {
for {
tkn := p.next()
if tkn.typ == itemText || tkn.typ == itemIndent {
s += tkn.val
} else if tkn.typ == itemNewLine {
if t := p.peek().typ; t != itemText && t != itemIndent {
p.backup2(tkn)
break
}
s += tkn.val
} else {
p.backup()
break
}
}
return
}
// get align-string and return the align type of it
func parseAlign(s string) (typ AlignType) {
sfx, pfx := strings.HasSuffix(s, ":"), strings.HasPrefix(s, ":")
switch {
case sfx && pfx:
typ = Center
case sfx:
typ = Right
case pfx:
typ = Left
}
return
}
// test if given string is digit
func isDigit(s string) bool {
r, _ := utf8.DecodeRuneInString(s)
return unicode.IsDigit(r)
}