2019-07-31 11:39:38 +05:30
|
|
|
// Copyright 2015-2019 Brett Vickers.
|
2017-01-10 04:21:47 +05:30
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
package etree
|
|
|
|
|
|
|
|
import (
|
2019-07-31 11:39:38 +05:30
|
|
|
"bufio"
|
2017-01-10 04:21:47 +05:30
|
|
|
"io"
|
|
|
|
"strings"
|
2019-07-31 11:39:38 +05:30
|
|
|
"unicode/utf8"
|
2017-01-10 04:21:47 +05:30
|
|
|
)
|
|
|
|
|
|
|
|
// A simple stack
|
|
|
|
type stack struct {
|
|
|
|
data []interface{}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *stack) empty() bool {
|
|
|
|
return len(s.data) == 0
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *stack) push(value interface{}) {
|
|
|
|
s.data = append(s.data, value)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *stack) pop() interface{} {
|
|
|
|
value := s.data[len(s.data)-1]
|
|
|
|
s.data[len(s.data)-1] = nil
|
|
|
|
s.data = s.data[:len(s.data)-1]
|
|
|
|
return value
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *stack) peek() interface{} {
|
|
|
|
return s.data[len(s.data)-1]
|
|
|
|
}
|
|
|
|
|
|
|
|
// A fifo is a simple first-in-first-out queue.
|
|
|
|
type fifo struct {
|
|
|
|
data []interface{}
|
|
|
|
head, tail int
|
|
|
|
}
|
|
|
|
|
|
|
|
func (f *fifo) add(value interface{}) {
|
|
|
|
if f.len()+1 >= len(f.data) {
|
|
|
|
f.grow()
|
|
|
|
}
|
|
|
|
f.data[f.tail] = value
|
|
|
|
if f.tail++; f.tail == len(f.data) {
|
|
|
|
f.tail = 0
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (f *fifo) remove() interface{} {
|
|
|
|
value := f.data[f.head]
|
|
|
|
f.data[f.head] = nil
|
|
|
|
if f.head++; f.head == len(f.data) {
|
|
|
|
f.head = 0
|
|
|
|
}
|
|
|
|
return value
|
|
|
|
}
|
|
|
|
|
|
|
|
func (f *fifo) len() int {
|
|
|
|
if f.tail >= f.head {
|
|
|
|
return f.tail - f.head
|
|
|
|
}
|
|
|
|
return len(f.data) - f.head + f.tail
|
|
|
|
}
|
|
|
|
|
|
|
|
func (f *fifo) grow() {
|
|
|
|
c := len(f.data) * 2
|
|
|
|
if c == 0 {
|
|
|
|
c = 4
|
|
|
|
}
|
|
|
|
buf, count := make([]interface{}, c), f.len()
|
|
|
|
if f.tail >= f.head {
|
|
|
|
copy(buf[0:count], f.data[f.head:f.tail])
|
|
|
|
} else {
|
|
|
|
hindex := len(f.data) - f.head
|
|
|
|
copy(buf[0:hindex], f.data[f.head:])
|
|
|
|
copy(buf[hindex:count], f.data[:f.tail])
|
|
|
|
}
|
|
|
|
f.data, f.head, f.tail = buf, 0, count
|
|
|
|
}
|
|
|
|
|
|
|
|
// countReader implements a proxy reader that counts the number of
|
|
|
|
// bytes read from its encapsulated reader.
|
|
|
|
type countReader struct {
|
|
|
|
r io.Reader
|
|
|
|
bytes int64
|
|
|
|
}
|
|
|
|
|
|
|
|
func newCountReader(r io.Reader) *countReader {
|
|
|
|
return &countReader{r: r}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (cr *countReader) Read(p []byte) (n int, err error) {
|
|
|
|
b, err := cr.r.Read(p)
|
|
|
|
cr.bytes += int64(b)
|
|
|
|
return b, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// countWriter implements a proxy writer that counts the number of
|
|
|
|
// bytes written by its encapsulated writer.
|
|
|
|
type countWriter struct {
|
|
|
|
w io.Writer
|
|
|
|
bytes int64
|
|
|
|
}
|
|
|
|
|
|
|
|
func newCountWriter(w io.Writer) *countWriter {
|
|
|
|
return &countWriter{w: w}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (cw *countWriter) Write(p []byte) (n int, err error) {
|
|
|
|
b, err := cw.w.Write(p)
|
|
|
|
cw.bytes += int64(b)
|
|
|
|
return b, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// isWhitespace returns true if the byte slice contains only
|
|
|
|
// whitespace characters.
|
|
|
|
func isWhitespace(s string) bool {
|
|
|
|
for i := 0; i < len(s); i++ {
|
|
|
|
if c := s[i]; c != ' ' && c != '\t' && c != '\n' && c != '\r' {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
// spaceMatch returns true if namespace a is the empty string
|
|
|
|
// or if namespace a equals namespace b.
|
|
|
|
func spaceMatch(a, b string) bool {
|
|
|
|
switch {
|
|
|
|
case a == "":
|
|
|
|
return true
|
|
|
|
default:
|
|
|
|
return a == b
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// spaceDecompose breaks a namespace:tag identifier at the ':'
|
|
|
|
// and returns the two parts.
|
|
|
|
func spaceDecompose(str string) (space, key string) {
|
|
|
|
colon := strings.IndexByte(str, ':')
|
|
|
|
if colon == -1 {
|
|
|
|
return "", str
|
|
|
|
}
|
|
|
|
return str[:colon], str[colon+1:]
|
|
|
|
}
|
|
|
|
|
2019-07-31 11:39:38 +05:30
|
|
|
// Strings used by indentCRLF and indentLF
|
2017-01-10 04:21:47 +05:30
|
|
|
const (
|
2019-07-31 11:39:38 +05:30
|
|
|
indentSpaces = "\r\n "
|
|
|
|
indentTabs = "\r\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"
|
2017-01-10 04:21:47 +05:30
|
|
|
)
|
|
|
|
|
2019-07-31 11:39:38 +05:30
|
|
|
// indentCRLF returns a CRLF newline followed by n copies of the first
|
|
|
|
// non-CRLF character in the source string.
|
|
|
|
func indentCRLF(n int, source string) string {
|
2017-01-10 04:21:47 +05:30
|
|
|
switch {
|
|
|
|
case n < 0:
|
2019-07-31 11:39:38 +05:30
|
|
|
return source[:2]
|
|
|
|
case n < len(source)-1:
|
|
|
|
return source[:n+2]
|
2017-01-10 04:21:47 +05:30
|
|
|
default:
|
2019-07-31 11:39:38 +05:30
|
|
|
return source + strings.Repeat(source[2:3], n-len(source)+2)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// indentLF returns a LF newline followed by n copies of the first non-LF
|
|
|
|
// character in the source string.
|
|
|
|
func indentLF(n int, source string) string {
|
|
|
|
switch {
|
|
|
|
case n < 0:
|
|
|
|
return source[1:2]
|
|
|
|
case n < len(source)-1:
|
|
|
|
return source[1 : n+2]
|
|
|
|
default:
|
|
|
|
return source[1:] + strings.Repeat(source[2:3], n-len(source)+2)
|
2017-01-10 04:21:47 +05:30
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// nextIndex returns the index of the next occurrence of sep in s,
|
|
|
|
// starting from offset. It returns -1 if the sep string is not found.
|
|
|
|
func nextIndex(s, sep string, offset int) int {
|
|
|
|
switch i := strings.Index(s[offset:], sep); i {
|
|
|
|
case -1:
|
|
|
|
return -1
|
|
|
|
default:
|
|
|
|
return offset + i
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// isInteger returns true if the string s contains an integer.
|
|
|
|
func isInteger(s string) bool {
|
|
|
|
for i := 0; i < len(s); i++ {
|
|
|
|
if (s[i] < '0' || s[i] > '9') && !(i == 0 && s[i] == '-') {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true
|
|
|
|
}
|
2019-07-31 11:39:38 +05:30
|
|
|
|
|
|
|
type escapeMode byte
|
|
|
|
|
|
|
|
const (
|
|
|
|
escapeNormal escapeMode = iota
|
|
|
|
escapeCanonicalText
|
|
|
|
escapeCanonicalAttr
|
|
|
|
)
|
|
|
|
|
|
|
|
// escapeString writes an escaped version of a string to the writer.
|
|
|
|
func escapeString(w *bufio.Writer, s string, m escapeMode) {
|
|
|
|
var esc []byte
|
|
|
|
last := 0
|
|
|
|
for i := 0; i < len(s); {
|
|
|
|
r, width := utf8.DecodeRuneInString(s[i:])
|
|
|
|
i += width
|
|
|
|
switch r {
|
|
|
|
case '&':
|
|
|
|
esc = []byte("&")
|
|
|
|
case '<':
|
|
|
|
esc = []byte("<")
|
|
|
|
case '>':
|
|
|
|
if m == escapeCanonicalAttr {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
esc = []byte(">")
|
|
|
|
case '\'':
|
|
|
|
if m != escapeNormal {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
esc = []byte("'")
|
|
|
|
case '"':
|
|
|
|
if m == escapeCanonicalText {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
esc = []byte(""")
|
|
|
|
case '\t':
|
|
|
|
if m != escapeCanonicalAttr {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
esc = []byte("	")
|
|
|
|
case '\n':
|
|
|
|
if m != escapeCanonicalAttr {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
esc = []byte("
")
|
|
|
|
case '\r':
|
|
|
|
if m == escapeNormal {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
esc = []byte("
")
|
|
|
|
default:
|
|
|
|
if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) {
|
|
|
|
esc = []byte("\uFFFD")
|
|
|
|
break
|
|
|
|
}
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
w.WriteString(s[last : i-width])
|
|
|
|
w.Write(esc)
|
|
|
|
last = i
|
|
|
|
}
|
|
|
|
w.WriteString(s[last:])
|
|
|
|
}
|
|
|
|
|
|
|
|
func isInCharacterRange(r rune) bool {
|
|
|
|
return r == 0x09 ||
|
|
|
|
r == 0x0A ||
|
|
|
|
r == 0x0D ||
|
|
|
|
r >= 0x20 && r <= 0xD7FF ||
|
|
|
|
r >= 0xE000 && r <= 0xFFFD ||
|
|
|
|
r >= 0x10000 && r <= 0x10FFFF
|
|
|
|
}
|