syncthing/vendor/github.com/a8m/mark/parser.go
Jakob Borg 916ec63af6 cmd/stdiscosrv: New discovery server (fixes #4618)
This is a new revision of the discovery server. Relevant changes and
non-changes:

- Protocol towards clients is unchanged.

- Recommended large scale design is still to be deployed nehind nginx (I
  tested, and it's still a lot faster at terminating TLS).

- Database backend is leveldb again, only. It scales enough, is easy to
  setup, and we don't need any backend to take care of.

- Server supports replication. This is a simple TCP channel - protect it
  with a firewall when deploying over the internet. (We deploy this within
  the same datacenter, and with firewall.) Any incoming client announces
  are sent over the replication channel(s) to other peer discosrvs.
  Incoming replication changes are applied to the database as if they came
  from clients, but without the TLS/certificate overhead.

- Metrics are exposed using the prometheus library, when enabled.

- The database values and replication protocol is protobuf, because JSON
  was quite CPU intensive when I tried that and benchmarked it.

- The "Retry-After" value for failed lookups gets slowly increased from
  a default of 120 seconds, by 5 seconds for each failed lookup,
  independently by each discosrv. This lowers the query load over time for
  clients that are never seen. The Retry-After maxes out at 3600 after a
  couple of weeks of this increase. The number of failed lookups is
  stored in the database, now and then (avoiding making each lookup a
  database put).

All in all this means clients can be pointed towards a cluster using
just multiple A / AAAA records to gain both load sharing and redundancy
(if one is down, clients will talk to the remaining ones).

GitHub-Pull-Request: https://github.com/syncthing/syncthing/pull/4648
2018-01-14 08:52:31 +00:00

437 lines
9.9 KiB
Go

package mark
import (
"regexp"
"strings"
"unicode"
"unicode/utf8"
)
// parse holds the state of the parser.
type parse struct {
Nodes []Node
lex Lexer
options *Options
tr *parse
output string
peekCount int
token [3]item // three-token lookahead for parser
links map[string]*DefLinkNode // Deflink parsing, used RefLinks
renderFn map[NodeType]RenderFn // Custom overridden fns
}
// Return new parser
func newParse(input string, opts *Options) *parse {
return &parse{
lex: lex(input),
options: opts,
links: make(map[string]*DefLinkNode),
renderFn: make(map[NodeType]RenderFn),
}
}
// parse convert the raw text to Nodeparse.
func (p *parse) parse() {
Loop:
for {
var n Node
switch t := p.peek(); t.typ {
case itemEOF, itemError:
break Loop
case itemNewLine:
p.next()
case itemHr:
n = p.newHr(p.next().pos)
case itemHTML:
t = p.next()
n = p.newHTML(t.pos, t.val)
case itemDefLink:
n = p.parseDefLink()
case itemHeading, itemLHeading:
n = p.parseHeading()
case itemCodeBlock, itemGfmCodeBlock:
n = p.parseCodeBlock()
case itemList:
n = p.parseList()
case itemTable, itemLpTable:
n = p.parseTable()
case itemBlockQuote:
n = p.parseBlockQuote()
case itemIndent:
space := p.next()
// If it isn't followed by itemText
if p.peek().typ != itemText {
continue
}
p.backup2(space)
fallthrough
// itemText
default:
tmp := p.newParagraph(t.pos)
tmp.Nodes = p.parseText(p.next().val + p.scanLines())
n = tmp
}
if n != nil {
p.append(n)
}
}
}
// Root getter
func (p *parse) root() *parse {
if p.tr == nil {
return p
}
return p.tr.root()
}
// Render parse nodes to the wanted output
func (p *parse) render() {
var output string
for i, node := range p.Nodes {
// If there's a custom render function, use it instead.
if fn, ok := p.renderFn[node.Type()]; ok {
output = fn(node)
} else {
output = node.Render()
}
p.output += output
if output != "" && i != len(p.Nodes)-1 {
p.output += "\n"
}
}
}
// append new node to nodes-list
func (p *parse) append(n Node) {
p.Nodes = append(p.Nodes, n)
}
// next returns the next token
func (p *parse) next() item {
if p.peekCount > 0 {
p.peekCount--
} else {
p.token[0] = p.lex.nextItem()
}
return p.token[p.peekCount]
}
// peek returns but does not consume the next token.
func (p *parse) peek() item {
if p.peekCount > 0 {
return p.token[p.peekCount-1]
}
p.peekCount = 1
p.token[0] = p.lex.nextItem()
return p.token[0]
}
// backup backs the input stream tp one token
func (p *parse) backup() {
p.peekCount++
}
// backup2 backs the input stream up two tokens.
// The zeroth token is already there.
func (p *parse) backup2(t1 item) {
p.token[1] = t1
p.peekCount = 2
}
// parseText
func (p *parse) parseText(input string) (nodes []Node) {
// Trim whitespaces that not a line-break
input = regexp.MustCompile(`(?m)^ +| +(\n|$)`).ReplaceAllStringFunc(input, func(s string) string {
if reBr.MatchString(s) {
return s
}
return strings.Replace(s, " ", "", -1)
})
l := lexInline(input)
for token := range l.items {
var node Node
switch token.typ {
case itemBr:
node = p.newBr(token.pos)
case itemStrong, itemItalic, itemStrike, itemCode:
node = p.parseEmphasis(token.typ, token.pos, token.val)
case itemLink, itemAutoLink, itemGfmLink:
var title, href string
var text []Node
if token.typ == itemLink {
match := reLink.FindStringSubmatch(token.val)
text = p.parseText(match[1])
href, title = match[2], match[3]
} else {
var match []string
if token.typ == itemGfmLink {
match = reGfmLink.FindStringSubmatch(token.val)
} else {
match = reAutoLink.FindStringSubmatch(token.val)
}
href = match[1]
text = append(text, p.newText(token.pos, match[1]))
}
node = p.newLink(token.pos, title, href, text...)
case itemImage:
match := reImage.FindStringSubmatch(token.val)
node = p.newImage(token.pos, match[3], match[2], match[1])
case itemRefLink, itemRefImage:
match := reRefLink.FindStringSubmatch(token.val)
text, ref := match[1], match[2]
if ref == "" {
ref = text
}
if token.typ == itemRefLink {
node = p.newRefLink(token.typ, token.pos, token.val, ref, p.parseText(text))
} else {
node = p.newRefImage(token.typ, token.pos, token.val, ref, text)
}
case itemHTML:
node = p.newHTML(token.pos, token.val)
default:
node = p.newText(token.pos, token.val)
}
nodes = append(nodes, node)
}
return nodes
}
// parse inline emphasis
func (p *parse) parseEmphasis(typ itemType, pos Pos, val string) *EmphasisNode {
var re *regexp.Regexp
switch typ {
case itemStrike:
re = reStrike
case itemStrong:
re = reStrong
case itemCode:
re = reCode
case itemItalic:
re = reItalic
}
node := p.newEmphasis(pos, typ)
match := re.FindStringSubmatch(val)
text := match[len(match)-1]
if text == "" {
text = match[1]
}
node.Nodes = p.parseText(text)
return node
}
// parse heading block
func (p *parse) parseHeading() (node *HeadingNode) {
token := p.next()
level := 1
var text string
if token.typ == itemHeading {
match := reHeading.FindStringSubmatch(token.val)
level, text = len(match[1]), match[2]
} else {
match := reLHeading.FindStringSubmatch(token.val)
// using equal signs for first-level, and dashes for second-level.
text = match[1]
if match[2] == "-" {
level = 2
}
}
node = p.newHeading(token.pos, level, text)
node.Nodes = p.parseText(text)
return
}
func (p *parse) parseDefLink() *DefLinkNode {
token := p.next()
match := reDefLink.FindStringSubmatch(token.val)
name := strings.ToLower(match[1])
// name(lowercase), href, title
n := p.newDefLink(token.pos, name, match[2], match[3])
// store in links
links := p.root().links
if _, ok := links[name]; !ok {
links[name] = n
}
return n
}
// parse codeBlock
func (p *parse) parseCodeBlock() *CodeNode {
var lang, text string
token := p.next()
if token.typ == itemGfmCodeBlock {
codeStart := reGfmCode.FindStringSubmatch(token.val)
lang = codeStart[3]
text = token.val[len(codeStart[0]):]
} else {
text = reCodeBlock.trim(token.val, "")
}
return p.newCode(token.pos, lang, text)
}
func (p *parse) parseBlockQuote() (n *BlockQuoteNode) {
token := p.next()
// replacer
re := regexp.MustCompile(`(?m)^ *> ?`)
raw := re.ReplaceAllString(token.val, "")
// TODO(a8m): doesn't work right now with defLink(inside the blockQuote)
tr := &parse{lex: lex(raw), tr: p}
tr.parse()
n = p.newBlockQuote(token.pos)
n.Nodes = tr.Nodes
return
}
// parse list
func (p *parse) parseList() *ListNode {
token := p.next()
list := p.newList(token.pos, isDigit(token.val))
Loop:
for {
switch token = p.peek(); token.typ {
case itemLooseItem, itemListItem:
list.append(p.parseListItem())
default:
break Loop
}
}
return list
}
// parse listItem
func (p *parse) parseListItem() *ListItemNode {
token := p.next()
item := p.newListItem(token.pos)
token.val = strings.TrimSpace(token.val)
if p.isTaskItem(token.val) {
item.Nodes = p.parseTaskItem(token)
return item
}
tr := &parse{lex: lex(token.val), tr: p}
tr.parse()
for _, node := range tr.Nodes {
// wrap with paragraph only when it's a loose item
if n, ok := node.(*ParagraphNode); ok && token.typ == itemListItem {
item.Nodes = append(item.Nodes, n.Nodes...)
} else {
item.append(node)
}
}
return item
}
// parseTaskItem parses list item as a task item.
func (p *parse) parseTaskItem(token item) []Node {
checkbox := p.newCheckbox(token.pos, token.val[1] == 'x')
token.val = strings.TrimSpace(token.val[3:])
return append([]Node{checkbox}, p.parseText(token.val)...)
}
// isTaskItem tests if the given string is list task item.
func (p *parse) isTaskItem(s string) bool {
if len(s) < 5 || s[0] != '[' || (s[1] != 'x' && s[1] != ' ') || s[2] != ']' {
return false
}
return "" != strings.TrimSpace(s[3:])
}
// parse table
func (p *parse) parseTable() *TableNode {
table := p.newTable(p.next().pos)
// Align [ None, Left, Right, ... ]
// Header [ Cells: [ ... ] ]
// Data: [ Rows: [ Cells: [ ... ] ] ]
rows := struct {
Align []AlignType
Header []item
Cells [][]item
}{}
Loop:
for i := 0; ; {
switch token := p.next(); token.typ {
case itemTableRow:
i++
if i > 2 {
rows.Cells = append(rows.Cells, []item{})
}
case itemTableCell:
// Header
if i == 1 {
rows.Header = append(rows.Header, token)
// Alignment
} else if i == 2 {
rows.Align = append(rows.Align, parseAlign(token.val))
// Data
} else {
pos := i - 3
rows.Cells[pos] = append(rows.Cells[pos], token)
}
default:
p.backup()
break Loop
}
}
// Tranform to nodes
table.append(p.parseCells(Header, rows.Header, rows.Align))
// Table body
for _, row := range rows.Cells {
table.append(p.parseCells(Data, row, rows.Align))
}
return table
}
// parse cells and return new row
func (p *parse) parseCells(kind int, items []item, align []AlignType) *RowNode {
var row *RowNode
for i, item := range items {
if i == 0 {
row = p.newRow(item.pos)
}
cell := p.newCell(item.pos, kind, align[i])
cell.Nodes = p.parseText(item.val)
row.append(cell)
}
return row
}
// Used to consume lines(itemText) for a continues paragraphs
func (p *parse) scanLines() (s string) {
for {
tkn := p.next()
if tkn.typ == itemText || tkn.typ == itemIndent {
s += tkn.val
} else if tkn.typ == itemNewLine {
if t := p.peek().typ; t != itemText && t != itemIndent {
p.backup2(tkn)
break
}
s += tkn.val
} else {
p.backup()
break
}
}
return
}
// get align-string and return the align type of it
func parseAlign(s string) (typ AlignType) {
sfx, pfx := strings.HasSuffix(s, ":"), strings.HasPrefix(s, ":")
switch {
case sfx && pfx:
typ = Center
case sfx:
typ = Right
case pfx:
typ = Left
}
return
}
// test if given string is digit
func isDigit(s string) bool {
r, _ := utf8.DecodeRuneInString(s)
return unicode.IsDigit(r)
}