65aaa607ab
Change made by: - running "gvt fetch" on each of the packages mentioned in Godeps/Godeps.json - `rm -rf Godeps` - tweaking the build scripts to not mention Godeps - tweaking the build scripts to test `./lib/...`, `./cmd/...` explicitly (to avoid testing vendor) - tweaking the build scripts to not juggle GOPATH for Godeps and instead set GO15VENDOREXPERIMENT. This also results in some updated packages at the same time I bet. Building with Go 1.3 and 1.4 still *works* but won't use our vendored dependencies - the user needs to have the actual packages in their GOPATH then, which they'll get with a normal "go get". Building with Go 1.6+ will get our vendored dependencies by default even when not using our build script, which is nice. By doing this we gain some freedom in that we can pick and choose manually what to include in vendor, as it's not based on just dependency analysis of our own code. This is also a risk as we might pick up dependencies we are unaware of, as the build may work locally with those packages present in GOPATH. On the other hand the build server will detect this as it has no packages in it's GOPATH beyond what is included in the repo. Recommended tool to manage dependencies is github.com/FiloSottile/gvt.
276 lines
7.0 KiB
Go
276 lines
7.0 KiB
Go
// Copyright 2011 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package norm
|
|
|
|
import (
|
|
"bufio"
|
|
"bytes"
|
|
"fmt"
|
|
"regexp"
|
|
"runtime"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"testing"
|
|
"time"
|
|
"unicode/utf8"
|
|
|
|
"golang.org/x/text/internal/gen"
|
|
"golang.org/x/text/internal/testtext"
|
|
)
|
|
|
|
var once sync.Once
|
|
|
|
func skipShort(t *testing.T) {
|
|
testtext.SkipIfNotLong(t)
|
|
|
|
once.Do(func() { loadTestData(t) })
|
|
}
|
|
|
|
// This regression test runs the test set in NormalizationTest.txt
|
|
// (taken from http://www.unicode.org/Public/<unicode.Version>/ucd/).
|
|
//
|
|
// NormalizationTest.txt has form:
|
|
// @Part0 # Specific cases
|
|
// #
|
|
// 1E0A;1E0A;0044 0307;1E0A;0044 0307; # (Ḋ; Ḋ; D◌̇; Ḋ; D◌̇; ) LATIN CAPITAL LETTER D WITH DOT ABOVE
|
|
// 1E0C;1E0C;0044 0323;1E0C;0044 0323; # (Ḍ; Ḍ; D◌̣; Ḍ; D◌̣; ) LATIN CAPITAL LETTER D WITH DOT BELOW
|
|
//
|
|
// Each test has 5 columns (c1, c2, c3, c4, c5), where
|
|
// (c1, c2, c3, c4, c5) == (c1, NFC(c1), NFD(c1), NFKC(c1), NFKD(c1))
|
|
//
|
|
// CONFORMANCE:
|
|
// 1. The following invariants must be true for all conformant implementations
|
|
//
|
|
// NFC
|
|
// c2 == NFC(c1) == NFC(c2) == NFC(c3)
|
|
// c4 == NFC(c4) == NFC(c5)
|
|
//
|
|
// NFD
|
|
// c3 == NFD(c1) == NFD(c2) == NFD(c3)
|
|
// c5 == NFD(c4) == NFD(c5)
|
|
//
|
|
// NFKC
|
|
// c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
|
|
//
|
|
// NFKD
|
|
// c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
|
|
//
|
|
// 2. For every code point X assigned in this version of Unicode that is not
|
|
// specifically listed in Part 1, the following invariants must be true
|
|
// for all conformant implementations:
|
|
//
|
|
// X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X)
|
|
//
|
|
|
|
// Column types.
|
|
const (
|
|
cRaw = iota
|
|
cNFC
|
|
cNFD
|
|
cNFKC
|
|
cNFKD
|
|
cMaxColumns
|
|
)
|
|
|
|
// Holds data from NormalizationTest.txt
|
|
var part []Part
|
|
|
|
type Part struct {
|
|
name string
|
|
number int
|
|
tests []Test
|
|
}
|
|
|
|
type Test struct {
|
|
name string
|
|
partnr int
|
|
number int
|
|
r rune // used for character by character test
|
|
cols [cMaxColumns]string // Each has 5 entries, see below.
|
|
}
|
|
|
|
func (t Test) Name() string {
|
|
if t.number < 0 {
|
|
return part[t.partnr].name
|
|
}
|
|
return fmt.Sprintf("%s:%d", part[t.partnr].name, t.number)
|
|
}
|
|
|
|
var partRe = regexp.MustCompile(`@Part(\d) # (.*)$`)
|
|
var testRe = regexp.MustCompile(`^` + strings.Repeat(`([\dA-F ]+);`, 5) + ` # (.*)$`)
|
|
|
|
var counter int
|
|
|
|
// Load the data form NormalizationTest.txt
|
|
func loadTestData(t *testing.T) {
|
|
f := gen.OpenUCDFile("NormalizationTest.txt")
|
|
defer f.Close()
|
|
scanner := bufio.NewScanner(f)
|
|
for scanner.Scan() {
|
|
line := scanner.Text()
|
|
if len(line) == 0 || line[0] == '#' {
|
|
continue
|
|
}
|
|
m := partRe.FindStringSubmatch(line)
|
|
if m != nil {
|
|
if len(m) < 3 {
|
|
t.Fatal("Failed to parse Part: ", line)
|
|
}
|
|
i, err := strconv.Atoi(m[1])
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
name := m[2]
|
|
part = append(part, Part{name: name[:len(name)-1], number: i})
|
|
continue
|
|
}
|
|
m = testRe.FindStringSubmatch(line)
|
|
if m == nil || len(m) < 7 {
|
|
t.Fatalf(`Failed to parse: "%s" result: %#v`, line, m)
|
|
}
|
|
test := Test{name: m[6], partnr: len(part) - 1, number: counter}
|
|
counter++
|
|
for j := 1; j < len(m)-1; j++ {
|
|
for _, split := range strings.Split(m[j], " ") {
|
|
r, err := strconv.ParseUint(split, 16, 64)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
if test.r == 0 {
|
|
// save for CharacterByCharacterTests
|
|
test.r = rune(r)
|
|
}
|
|
var buf [utf8.UTFMax]byte
|
|
sz := utf8.EncodeRune(buf[:], rune(r))
|
|
test.cols[j-1] += string(buf[:sz])
|
|
}
|
|
}
|
|
part := &part[len(part)-1]
|
|
part.tests = append(part.tests, test)
|
|
}
|
|
if scanner.Err() != nil {
|
|
t.Fatal(scanner.Err())
|
|
}
|
|
}
|
|
|
|
func cmpResult(t *testing.T, tc *Test, name string, f Form, gold, test, result string) {
|
|
if gold != result {
|
|
t.Errorf("%s:%s: %s(%+q)=%+q; want %+q: %s",
|
|
tc.Name(), name, fstr[f], test, result, gold, tc.name)
|
|
}
|
|
}
|
|
|
|
func cmpIsNormal(t *testing.T, tc *Test, name string, f Form, test string, result, want bool) {
|
|
if result != want {
|
|
t.Errorf("%s:%s: %s(%+q)=%v; want %v", tc.Name(), name, fstr[f], test, result, want)
|
|
}
|
|
}
|
|
|
|
func doTest(t *testing.T, tc *Test, f Form, gold, test string) {
|
|
testb := []byte(test)
|
|
result := f.Bytes(testb)
|
|
cmpResult(t, tc, "Bytes", f, gold, test, string(result))
|
|
|
|
sresult := f.String(test)
|
|
cmpResult(t, tc, "String", f, gold, test, sresult)
|
|
|
|
acc := []byte{}
|
|
i := Iter{}
|
|
i.InitString(f, test)
|
|
for !i.Done() {
|
|
acc = append(acc, i.Next()...)
|
|
}
|
|
cmpResult(t, tc, "Iter.Next", f, gold, test, string(acc))
|
|
|
|
buf := make([]byte, 128)
|
|
acc = nil
|
|
for p := 0; p < len(testb); {
|
|
nDst, nSrc, _ := f.Transform(buf, testb[p:], true)
|
|
acc = append(acc, buf[:nDst]...)
|
|
p += nSrc
|
|
}
|
|
cmpResult(t, tc, "Transform", f, gold, test, string(acc))
|
|
|
|
for i := range test {
|
|
out := f.Append(f.Bytes([]byte(test[:i])), []byte(test[i:])...)
|
|
cmpResult(t, tc, fmt.Sprintf(":Append:%d", i), f, gold, test, string(out))
|
|
}
|
|
cmpIsNormal(t, tc, "IsNormal", f, test, f.IsNormal([]byte(test)), test == gold)
|
|
cmpIsNormal(t, tc, "IsNormalString", f, test, f.IsNormalString(test), test == gold)
|
|
}
|
|
|
|
func doConformanceTests(t *testing.T, tc *Test, partn int) {
|
|
for i := 0; i <= 2; i++ {
|
|
doTest(t, tc, NFC, tc.cols[1], tc.cols[i])
|
|
doTest(t, tc, NFD, tc.cols[2], tc.cols[i])
|
|
doTest(t, tc, NFKC, tc.cols[3], tc.cols[i])
|
|
doTest(t, tc, NFKD, tc.cols[4], tc.cols[i])
|
|
}
|
|
for i := 3; i <= 4; i++ {
|
|
doTest(t, tc, NFC, tc.cols[3], tc.cols[i])
|
|
doTest(t, tc, NFD, tc.cols[4], tc.cols[i])
|
|
doTest(t, tc, NFKC, tc.cols[3], tc.cols[i])
|
|
doTest(t, tc, NFKD, tc.cols[4], tc.cols[i])
|
|
}
|
|
}
|
|
|
|
func TestCharacterByCharacter(t *testing.T) {
|
|
skipShort(t)
|
|
tests := part[1].tests
|
|
var last rune = 0
|
|
for i := 0; i <= len(tests); i++ { // last one is special case
|
|
var r rune
|
|
if i == len(tests) {
|
|
r = 0x2FA1E // Don't have to go to 0x10FFFF
|
|
} else {
|
|
r = tests[i].r
|
|
}
|
|
for last++; last < r; last++ {
|
|
// Check all characters that were not explicitly listed in the test.
|
|
tc := &Test{partnr: 1, number: -1}
|
|
char := string(last)
|
|
doTest(t, tc, NFC, char, char)
|
|
doTest(t, tc, NFD, char, char)
|
|
doTest(t, tc, NFKC, char, char)
|
|
doTest(t, tc, NFKD, char, char)
|
|
}
|
|
if i < len(tests) {
|
|
doConformanceTests(t, &tests[i], 1)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestStandardTests(t *testing.T) {
|
|
skipShort(t)
|
|
for _, j := range []int{0, 2, 3} {
|
|
for _, test := range part[j].tests {
|
|
doConformanceTests(t, &test, j)
|
|
}
|
|
}
|
|
}
|
|
|
|
// TestPerformance verifies that normalization is O(n). If any of the
|
|
// code does not properly check for maxCombiningChars, normalization
|
|
// may exhibit O(n**2) behavior.
|
|
func TestPerformance(t *testing.T) {
|
|
skipShort(t)
|
|
runtime.GOMAXPROCS(2)
|
|
success := make(chan bool, 1)
|
|
go func() {
|
|
buf := bytes.Repeat([]byte("\u035D"), 1024*1024)
|
|
buf = append(buf, "\u035B"...)
|
|
NFC.Append(nil, buf...)
|
|
success <- true
|
|
}()
|
|
timeout := time.After(1 * time.Second)
|
|
select {
|
|
case <-success:
|
|
// test completed before the timeout
|
|
case <-timeout:
|
|
t.Errorf(`unexpectedly long time to complete PerformanceTest`)
|
|
}
|
|
}
|