mirror of
https://github.com/genuinetools/reg.git
synced 2024-09-19 16:51:01 -04:00
67bc3ef6c3
Signed-off-by: Jess Frazelle <acidburn@microsoft.com>
1317 lines
30 KiB
Go
1317 lines
30 KiB
Go
// Copyright 2013 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package transform
|
|
|
|
import (
|
|
"bytes"
|
|
"errors"
|
|
"fmt"
|
|
"io/ioutil"
|
|
"strconv"
|
|
"strings"
|
|
"testing"
|
|
"time"
|
|
"unicode/utf8"
|
|
|
|
"golang.org/x/text/internal/testtext"
|
|
)
|
|
|
|
type lowerCaseASCII struct{ NopResetter }
|
|
|
|
func (lowerCaseASCII) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
|
n := len(src)
|
|
if n > len(dst) {
|
|
n, err = len(dst), ErrShortDst
|
|
}
|
|
for i, c := range src[:n] {
|
|
if 'A' <= c && c <= 'Z' {
|
|
c += 'a' - 'A'
|
|
}
|
|
dst[i] = c
|
|
}
|
|
return n, n, err
|
|
}
|
|
|
|
// lowerCaseASCIILookahead lowercases the string and reports ErrShortSrc as long
|
|
// as the input is not atEOF.
|
|
type lowerCaseASCIILookahead struct{ NopResetter }
|
|
|
|
func (lowerCaseASCIILookahead) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
|
n := len(src)
|
|
if n > len(dst) {
|
|
n, err = len(dst), ErrShortDst
|
|
}
|
|
for i, c := range src[:n] {
|
|
if 'A' <= c && c <= 'Z' {
|
|
c += 'a' - 'A'
|
|
}
|
|
dst[i] = c
|
|
}
|
|
if !atEOF {
|
|
err = ErrShortSrc
|
|
}
|
|
return n, n, err
|
|
}
|
|
|
|
var errYouMentionedX = errors.New("you mentioned X")
|
|
|
|
type dontMentionX struct{ NopResetter }
|
|
|
|
func (dontMentionX) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
|
n := len(src)
|
|
if n > len(dst) {
|
|
n, err = len(dst), ErrShortDst
|
|
}
|
|
for i, c := range src[:n] {
|
|
if c == 'X' {
|
|
return i, i, errYouMentionedX
|
|
}
|
|
dst[i] = c
|
|
}
|
|
return n, n, err
|
|
}
|
|
|
|
var errAtEnd = errors.New("error after all text")
|
|
|
|
type errorAtEnd struct{ NopResetter }
|
|
|
|
func (errorAtEnd) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
|
n := copy(dst, src)
|
|
if n < len(src) {
|
|
return n, n, ErrShortDst
|
|
}
|
|
if atEOF {
|
|
return n, n, errAtEnd
|
|
}
|
|
return n, n, nil
|
|
}
|
|
|
|
type replaceWithConstant struct {
|
|
replacement string
|
|
written int
|
|
}
|
|
|
|
func (t *replaceWithConstant) Reset() {
|
|
t.written = 0
|
|
}
|
|
|
|
func (t *replaceWithConstant) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
|
if atEOF {
|
|
nDst = copy(dst, t.replacement[t.written:])
|
|
t.written += nDst
|
|
if t.written < len(t.replacement) {
|
|
err = ErrShortDst
|
|
}
|
|
}
|
|
return nDst, len(src), err
|
|
}
|
|
|
|
type addAnXAtTheEnd struct{ NopResetter }
|
|
|
|
func (addAnXAtTheEnd) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
|
n := copy(dst, src)
|
|
if n < len(src) {
|
|
return n, n, ErrShortDst
|
|
}
|
|
if !atEOF {
|
|
return n, n, nil
|
|
}
|
|
if len(dst) == n {
|
|
return n, n, ErrShortDst
|
|
}
|
|
dst[n] = 'X'
|
|
return n + 1, n, nil
|
|
}
|
|
|
|
// doublerAtEOF is a strange Transformer that transforms "this" to "tthhiiss",
|
|
// but only if atEOF is true.
|
|
type doublerAtEOF struct{ NopResetter }
|
|
|
|
func (doublerAtEOF) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
|
if !atEOF {
|
|
return 0, 0, ErrShortSrc
|
|
}
|
|
for i, c := range src {
|
|
if 2*i+2 >= len(dst) {
|
|
return 2 * i, i, ErrShortDst
|
|
}
|
|
dst[2*i+0] = c
|
|
dst[2*i+1] = c
|
|
}
|
|
return 2 * len(src), len(src), nil
|
|
}
|
|
|
|
// rleDecode and rleEncode implement a toy run-length encoding: "aabbbbbbbbbb"
|
|
// is encoded as "2a10b". The decoding is assumed to not contain any numbers.
|
|
|
|
type rleDecode struct{ NopResetter }
|
|
|
|
func (rleDecode) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
|
loop:
|
|
for len(src) > 0 {
|
|
n := 0
|
|
for i, c := range src {
|
|
if '0' <= c && c <= '9' {
|
|
n = 10*n + int(c-'0')
|
|
continue
|
|
}
|
|
if i == 0 {
|
|
return nDst, nSrc, errors.New("rleDecode: bad input")
|
|
}
|
|
if n > len(dst) {
|
|
return nDst, nSrc, ErrShortDst
|
|
}
|
|
for j := 0; j < n; j++ {
|
|
dst[j] = c
|
|
}
|
|
dst, src = dst[n:], src[i+1:]
|
|
nDst, nSrc = nDst+n, nSrc+i+1
|
|
continue loop
|
|
}
|
|
if atEOF {
|
|
return nDst, nSrc, errors.New("rleDecode: bad input")
|
|
}
|
|
return nDst, nSrc, ErrShortSrc
|
|
}
|
|
return nDst, nSrc, nil
|
|
}
|
|
|
|
type rleEncode struct {
|
|
NopResetter
|
|
|
|
// allowStutter means that "xxxxxxxx" can be encoded as "5x3x"
|
|
// instead of always as "8x".
|
|
allowStutter bool
|
|
}
|
|
|
|
func (e rleEncode) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
|
for len(src) > 0 {
|
|
n, c0 := len(src), src[0]
|
|
for i, c := range src[1:] {
|
|
if c != c0 {
|
|
n = i + 1
|
|
break
|
|
}
|
|
}
|
|
if n == len(src) && !atEOF && !e.allowStutter {
|
|
return nDst, nSrc, ErrShortSrc
|
|
}
|
|
s := strconv.Itoa(n)
|
|
if len(s) >= len(dst) {
|
|
return nDst, nSrc, ErrShortDst
|
|
}
|
|
copy(dst, s)
|
|
dst[len(s)] = c0
|
|
dst, src = dst[len(s)+1:], src[n:]
|
|
nDst, nSrc = nDst+len(s)+1, nSrc+n
|
|
}
|
|
return nDst, nSrc, nil
|
|
}
|
|
|
|
// trickler consumes all input bytes, but writes a single byte at a time to dst.
|
|
type trickler []byte
|
|
|
|
func (t *trickler) Reset() {
|
|
*t = nil
|
|
}
|
|
|
|
func (t *trickler) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
|
*t = append(*t, src...)
|
|
if len(*t) == 0 {
|
|
return 0, 0, nil
|
|
}
|
|
if len(dst) == 0 {
|
|
return 0, len(src), ErrShortDst
|
|
}
|
|
dst[0] = (*t)[0]
|
|
*t = (*t)[1:]
|
|
if len(*t) > 0 {
|
|
err = ErrShortDst
|
|
}
|
|
return 1, len(src), err
|
|
}
|
|
|
|
// delayedTrickler is like trickler, but delays writing output to dst. This is
|
|
// highly unlikely to be relevant in practice, but it seems like a good idea
|
|
// to have some tolerance as long as progress can be detected.
|
|
type delayedTrickler []byte
|
|
|
|
func (t *delayedTrickler) Reset() {
|
|
*t = nil
|
|
}
|
|
|
|
func (t *delayedTrickler) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
|
if len(*t) > 0 && len(dst) > 0 {
|
|
dst[0] = (*t)[0]
|
|
*t = (*t)[1:]
|
|
nDst = 1
|
|
}
|
|
*t = append(*t, src...)
|
|
if len(*t) > 0 {
|
|
err = ErrShortDst
|
|
}
|
|
return nDst, len(src), err
|
|
}
|
|
|
|
type testCase struct {
|
|
desc string
|
|
t Transformer
|
|
src string
|
|
dstSize int
|
|
srcSize int
|
|
ioSize int
|
|
wantStr string
|
|
wantErr error
|
|
wantIter int // number of iterations taken; 0 means we don't care.
|
|
}
|
|
|
|
func (t testCase) String() string {
|
|
return tstr(t.t) + "; " + t.desc
|
|
}
|
|
|
|
func tstr(t Transformer) string {
|
|
if stringer, ok := t.(fmt.Stringer); ok {
|
|
return stringer.String()
|
|
}
|
|
s := fmt.Sprintf("%T", t)
|
|
return s[1+strings.Index(s, "."):]
|
|
}
|
|
|
|
func (c chain) String() string {
|
|
buf := &bytes.Buffer{}
|
|
buf.WriteString("Chain(")
|
|
for i, l := range c.link[:len(c.link)-1] {
|
|
if i != 0 {
|
|
fmt.Fprint(buf, ", ")
|
|
}
|
|
buf.WriteString(tstr(l.t))
|
|
}
|
|
buf.WriteString(")")
|
|
return buf.String()
|
|
}
|
|
|
|
var testCases = []testCase{
|
|
{
|
|
desc: "empty",
|
|
t: lowerCaseASCII{},
|
|
src: "",
|
|
dstSize: 100,
|
|
srcSize: 100,
|
|
wantStr: "",
|
|
},
|
|
|
|
{
|
|
desc: "basic",
|
|
t: lowerCaseASCII{},
|
|
src: "Hello WORLD.",
|
|
dstSize: 100,
|
|
srcSize: 100,
|
|
wantStr: "hello world.",
|
|
},
|
|
|
|
{
|
|
desc: "small dst",
|
|
t: lowerCaseASCII{},
|
|
src: "Hello WORLD.",
|
|
dstSize: 3,
|
|
srcSize: 100,
|
|
wantStr: "hello world.",
|
|
},
|
|
|
|
{
|
|
desc: "small src",
|
|
t: lowerCaseASCII{},
|
|
src: "Hello WORLD.",
|
|
dstSize: 100,
|
|
srcSize: 4,
|
|
wantStr: "hello world.",
|
|
},
|
|
|
|
{
|
|
desc: "small buffers",
|
|
t: lowerCaseASCII{},
|
|
src: "Hello WORLD.",
|
|
dstSize: 3,
|
|
srcSize: 4,
|
|
wantStr: "hello world.",
|
|
},
|
|
|
|
{
|
|
desc: "very small buffers",
|
|
t: lowerCaseASCII{},
|
|
src: "Hello WORLD.",
|
|
dstSize: 1,
|
|
srcSize: 1,
|
|
wantStr: "hello world.",
|
|
},
|
|
|
|
{
|
|
desc: "small dst with lookahead",
|
|
t: lowerCaseASCIILookahead{},
|
|
src: "Hello WORLD.",
|
|
dstSize: 3,
|
|
srcSize: 100,
|
|
wantStr: "hello world.",
|
|
},
|
|
|
|
{
|
|
desc: "small src with lookahead",
|
|
t: lowerCaseASCIILookahead{},
|
|
src: "Hello WORLD.",
|
|
dstSize: 100,
|
|
srcSize: 4,
|
|
wantStr: "hello world.",
|
|
},
|
|
|
|
{
|
|
desc: "small buffers with lookahead",
|
|
t: lowerCaseASCIILookahead{},
|
|
src: "Hello WORLD.",
|
|
dstSize: 3,
|
|
srcSize: 4,
|
|
wantStr: "hello world.",
|
|
},
|
|
|
|
{
|
|
desc: "very small buffers with lookahead",
|
|
t: lowerCaseASCIILookahead{},
|
|
src: "Hello WORLD.",
|
|
dstSize: 1,
|
|
srcSize: 2,
|
|
wantStr: "hello world.",
|
|
},
|
|
|
|
{
|
|
desc: "user error",
|
|
t: dontMentionX{},
|
|
src: "The First Rule of Transform Club: don't mention Mister X, ever.",
|
|
dstSize: 100,
|
|
srcSize: 100,
|
|
wantStr: "The First Rule of Transform Club: don't mention Mister ",
|
|
wantErr: errYouMentionedX,
|
|
},
|
|
|
|
{
|
|
desc: "user error at end",
|
|
t: errorAtEnd{},
|
|
src: "All goes well until it doesn't.",
|
|
dstSize: 100,
|
|
srcSize: 100,
|
|
wantStr: "All goes well until it doesn't.",
|
|
wantErr: errAtEnd,
|
|
},
|
|
|
|
{
|
|
desc: "user error at end, incremental",
|
|
t: errorAtEnd{},
|
|
src: "All goes well until it doesn't.",
|
|
dstSize: 10,
|
|
srcSize: 10,
|
|
wantStr: "All goes well until it doesn't.",
|
|
wantErr: errAtEnd,
|
|
},
|
|
|
|
{
|
|
desc: "replace entire non-empty string with one byte",
|
|
t: &replaceWithConstant{replacement: "X"},
|
|
src: "none of this will be copied",
|
|
dstSize: 1,
|
|
srcSize: 10,
|
|
wantStr: "X",
|
|
},
|
|
|
|
{
|
|
desc: "replace entire empty string with one byte",
|
|
t: &replaceWithConstant{replacement: "X"},
|
|
src: "",
|
|
dstSize: 1,
|
|
srcSize: 10,
|
|
wantStr: "X",
|
|
},
|
|
|
|
{
|
|
desc: "replace entire empty string with seven bytes",
|
|
t: &replaceWithConstant{replacement: "ABCDEFG"},
|
|
src: "",
|
|
dstSize: 3,
|
|
srcSize: 10,
|
|
wantStr: "ABCDEFG",
|
|
},
|
|
|
|
{
|
|
desc: "add an X (initialBufSize-1)",
|
|
t: addAnXAtTheEnd{},
|
|
src: aaa[:initialBufSize-1],
|
|
dstSize: 10,
|
|
srcSize: 10,
|
|
wantStr: aaa[:initialBufSize-1] + "X",
|
|
},
|
|
|
|
{
|
|
desc: "add an X (initialBufSize+0)",
|
|
t: addAnXAtTheEnd{},
|
|
src: aaa[:initialBufSize+0],
|
|
dstSize: 10,
|
|
srcSize: 10,
|
|
wantStr: aaa[:initialBufSize+0] + "X",
|
|
},
|
|
|
|
{
|
|
desc: "add an X (initialBufSize+1)",
|
|
t: addAnXAtTheEnd{},
|
|
src: aaa[:initialBufSize+1],
|
|
dstSize: 10,
|
|
srcSize: 10,
|
|
wantStr: aaa[:initialBufSize+1] + "X",
|
|
},
|
|
|
|
{
|
|
desc: "small buffers",
|
|
t: dontMentionX{},
|
|
src: "The First Rule of Transform Club: don't mention Mister X, ever.",
|
|
dstSize: 10,
|
|
srcSize: 10,
|
|
wantStr: "The First Rule of Transform Club: don't mention Mister ",
|
|
wantErr: errYouMentionedX,
|
|
},
|
|
|
|
{
|
|
desc: "very small buffers",
|
|
t: dontMentionX{},
|
|
src: "The First Rule of Transform Club: don't mention Mister X, ever.",
|
|
dstSize: 1,
|
|
srcSize: 1,
|
|
wantStr: "The First Rule of Transform Club: don't mention Mister ",
|
|
wantErr: errYouMentionedX,
|
|
},
|
|
|
|
{
|
|
desc: "only transform at EOF",
|
|
t: doublerAtEOF{},
|
|
src: "this",
|
|
dstSize: 100,
|
|
srcSize: 100,
|
|
wantStr: "tthhiiss",
|
|
},
|
|
|
|
{
|
|
desc: "basic",
|
|
t: rleDecode{},
|
|
src: "1a2b3c10d11e0f1g",
|
|
dstSize: 100,
|
|
srcSize: 100,
|
|
wantStr: "abbcccddddddddddeeeeeeeeeeeg",
|
|
},
|
|
|
|
{
|
|
desc: "long",
|
|
t: rleDecode{},
|
|
src: "12a23b34c45d56e99z",
|
|
dstSize: 100,
|
|
srcSize: 100,
|
|
wantStr: strings.Repeat("a", 12) +
|
|
strings.Repeat("b", 23) +
|
|
strings.Repeat("c", 34) +
|
|
strings.Repeat("d", 45) +
|
|
strings.Repeat("e", 56) +
|
|
strings.Repeat("z", 99),
|
|
},
|
|
|
|
{
|
|
desc: "tight buffers",
|
|
t: rleDecode{},
|
|
src: "1a2b3c10d11e0f1g",
|
|
dstSize: 11,
|
|
srcSize: 3,
|
|
wantStr: "abbcccddddddddddeeeeeeeeeeeg",
|
|
},
|
|
|
|
{
|
|
desc: "short dst",
|
|
t: rleDecode{},
|
|
src: "1a2b3c10d11e0f1g",
|
|
dstSize: 10,
|
|
srcSize: 3,
|
|
wantStr: "abbcccdddddddddd",
|
|
wantErr: ErrShortDst,
|
|
},
|
|
|
|
{
|
|
desc: "short src",
|
|
t: rleDecode{},
|
|
src: "1a2b3c10d11e0f1g",
|
|
dstSize: 11,
|
|
srcSize: 2,
|
|
ioSize: 2,
|
|
wantStr: "abbccc",
|
|
wantErr: ErrShortSrc,
|
|
},
|
|
|
|
{
|
|
desc: "basic",
|
|
t: rleEncode{},
|
|
src: "abbcccddddddddddeeeeeeeeeeeg",
|
|
dstSize: 100,
|
|
srcSize: 100,
|
|
wantStr: "1a2b3c10d11e1g",
|
|
},
|
|
|
|
{
|
|
desc: "long",
|
|
t: rleEncode{},
|
|
src: strings.Repeat("a", 12) +
|
|
strings.Repeat("b", 23) +
|
|
strings.Repeat("c", 34) +
|
|
strings.Repeat("d", 45) +
|
|
strings.Repeat("e", 56) +
|
|
strings.Repeat("z", 99),
|
|
dstSize: 100,
|
|
srcSize: 100,
|
|
wantStr: "12a23b34c45d56e99z",
|
|
},
|
|
|
|
{
|
|
desc: "tight buffers",
|
|
t: rleEncode{},
|
|
src: "abbcccddddddddddeeeeeeeeeeeg",
|
|
dstSize: 3,
|
|
srcSize: 12,
|
|
wantStr: "1a2b3c10d11e1g",
|
|
},
|
|
|
|
{
|
|
desc: "short dst",
|
|
t: rleEncode{},
|
|
src: "abbcccddddddddddeeeeeeeeeeeg",
|
|
dstSize: 2,
|
|
srcSize: 12,
|
|
wantStr: "1a2b3c",
|
|
wantErr: ErrShortDst,
|
|
},
|
|
|
|
{
|
|
desc: "short src",
|
|
t: rleEncode{},
|
|
src: "abbcccddddddddddeeeeeeeeeeeg",
|
|
dstSize: 3,
|
|
srcSize: 11,
|
|
ioSize: 11,
|
|
wantStr: "1a2b3c10d",
|
|
wantErr: ErrShortSrc,
|
|
},
|
|
|
|
{
|
|
desc: "allowStutter = false",
|
|
t: rleEncode{allowStutter: false},
|
|
src: "aaaabbbbbbbbccccddddd",
|
|
dstSize: 10,
|
|
srcSize: 10,
|
|
wantStr: "4a8b4c5d",
|
|
},
|
|
|
|
{
|
|
desc: "allowStutter = true",
|
|
t: rleEncode{allowStutter: true},
|
|
src: "aaaabbbbbbbbccccddddd",
|
|
dstSize: 10,
|
|
srcSize: 10,
|
|
ioSize: 10,
|
|
wantStr: "4a6b2b4c4d1d",
|
|
},
|
|
|
|
{
|
|
desc: "trickler",
|
|
t: &trickler{},
|
|
src: "abcdefghijklm",
|
|
dstSize: 3,
|
|
srcSize: 15,
|
|
wantStr: "abcdefghijklm",
|
|
},
|
|
|
|
{
|
|
desc: "delayedTrickler",
|
|
t: &delayedTrickler{},
|
|
src: "abcdefghijklm",
|
|
dstSize: 3,
|
|
srcSize: 15,
|
|
wantStr: "abcdefghijklm",
|
|
},
|
|
}
|
|
|
|
func TestReader(t *testing.T) {
|
|
for _, tc := range testCases {
|
|
testtext.Run(t, tc.desc, func(t *testing.T) {
|
|
r := NewReader(strings.NewReader(tc.src), tc.t)
|
|
// Differently sized dst and src buffers are not part of the
|
|
// exported API. We override them manually.
|
|
r.dst = make([]byte, tc.dstSize)
|
|
r.src = make([]byte, tc.srcSize)
|
|
got, err := ioutil.ReadAll(r)
|
|
str := string(got)
|
|
if str != tc.wantStr || err != tc.wantErr {
|
|
t.Errorf("\ngot %q, %v\nwant %q, %v", str, err, tc.wantStr, tc.wantErr)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestWriter(t *testing.T) {
|
|
tests := append(testCases, chainTests()...)
|
|
for _, tc := range tests {
|
|
sizes := []int{1, 2, 3, 4, 5, 10, 100, 1000}
|
|
if tc.ioSize > 0 {
|
|
sizes = []int{tc.ioSize}
|
|
}
|
|
for _, sz := range sizes {
|
|
testtext.Run(t, fmt.Sprintf("%s/%d", tc.desc, sz), func(t *testing.T) {
|
|
bb := &bytes.Buffer{}
|
|
w := NewWriter(bb, tc.t)
|
|
// Differently sized dst and src buffers are not part of the
|
|
// exported API. We override them manually.
|
|
w.dst = make([]byte, tc.dstSize)
|
|
w.src = make([]byte, tc.srcSize)
|
|
src := make([]byte, sz)
|
|
var err error
|
|
for b := tc.src; len(b) > 0 && err == nil; {
|
|
n := copy(src, b)
|
|
b = b[n:]
|
|
m := 0
|
|
m, err = w.Write(src[:n])
|
|
if m != n && err == nil {
|
|
t.Errorf("did not consume all bytes %d < %d", m, n)
|
|
}
|
|
}
|
|
if err == nil {
|
|
err = w.Close()
|
|
}
|
|
str := bb.String()
|
|
if str != tc.wantStr || err != tc.wantErr {
|
|
t.Errorf("\ngot %q, %v\nwant %q, %v", str, err, tc.wantStr, tc.wantErr)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestNop(t *testing.T) {
|
|
testCases := []struct {
|
|
str string
|
|
dstSize int
|
|
err error
|
|
}{
|
|
{"", 0, nil},
|
|
{"", 10, nil},
|
|
{"a", 0, ErrShortDst},
|
|
{"a", 1, nil},
|
|
{"a", 10, nil},
|
|
}
|
|
for i, tc := range testCases {
|
|
dst := make([]byte, tc.dstSize)
|
|
nDst, nSrc, err := Nop.Transform(dst, []byte(tc.str), true)
|
|
want := tc.str
|
|
if tc.dstSize < len(want) {
|
|
want = want[:tc.dstSize]
|
|
}
|
|
if got := string(dst[:nDst]); got != want || err != tc.err || nSrc != nDst {
|
|
t.Errorf("%d:\ngot %q, %d, %v\nwant %q, %d, %v", i, got, nSrc, err, want, nDst, tc.err)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestDiscard(t *testing.T) {
|
|
testCases := []struct {
|
|
str string
|
|
dstSize int
|
|
}{
|
|
{"", 0},
|
|
{"", 10},
|
|
{"a", 0},
|
|
{"ab", 10},
|
|
}
|
|
for i, tc := range testCases {
|
|
nDst, nSrc, err := Discard.Transform(make([]byte, tc.dstSize), []byte(tc.str), true)
|
|
if nDst != 0 || nSrc != len(tc.str) || err != nil {
|
|
t.Errorf("%d:\ngot %q, %d, %v\nwant 0, %d, nil", i, nDst, nSrc, err, len(tc.str))
|
|
}
|
|
}
|
|
}
|
|
|
|
// mkChain creates a Chain transformer. x must be alternating between transformer
|
|
// and bufSize, like T, (sz, T)*
|
|
func mkChain(x ...interface{}) *chain {
|
|
t := []Transformer{}
|
|
for i := 0; i < len(x); i += 2 {
|
|
t = append(t, x[i].(Transformer))
|
|
}
|
|
c := Chain(t...).(*chain)
|
|
for i, j := 1, 1; i < len(x); i, j = i+2, j+1 {
|
|
c.link[j].b = make([]byte, x[i].(int))
|
|
}
|
|
return c
|
|
}
|
|
|
|
func chainTests() []testCase {
|
|
return []testCase{
|
|
{
|
|
desc: "nil error",
|
|
t: mkChain(rleEncode{}, 100, lowerCaseASCII{}),
|
|
src: "ABB",
|
|
dstSize: 100,
|
|
srcSize: 100,
|
|
wantStr: "1a2b",
|
|
wantErr: nil,
|
|
wantIter: 1,
|
|
},
|
|
|
|
{
|
|
desc: "short dst buffer",
|
|
t: mkChain(lowerCaseASCII{}, 3, rleDecode{}),
|
|
src: "1a2b3c10d11e0f1g",
|
|
dstSize: 10,
|
|
srcSize: 3,
|
|
wantStr: "abbcccdddddddddd",
|
|
wantErr: ErrShortDst,
|
|
},
|
|
|
|
{
|
|
desc: "short internal dst buffer",
|
|
t: mkChain(lowerCaseASCII{}, 3, rleDecode{}, 10, Nop),
|
|
src: "1a2b3c10d11e0f1g",
|
|
dstSize: 100,
|
|
srcSize: 3,
|
|
wantStr: "abbcccdddddddddd",
|
|
wantErr: errShortInternal,
|
|
},
|
|
|
|
{
|
|
desc: "short internal dst buffer from input",
|
|
t: mkChain(rleDecode{}, 10, Nop),
|
|
src: "1a2b3c10d11e0f1g",
|
|
dstSize: 100,
|
|
srcSize: 3,
|
|
wantStr: "abbcccdddddddddd",
|
|
wantErr: errShortInternal,
|
|
},
|
|
|
|
{
|
|
desc: "empty short internal dst buffer",
|
|
t: mkChain(lowerCaseASCII{}, 3, rleDecode{}, 10, Nop),
|
|
src: "4a7b11e0f1g",
|
|
dstSize: 100,
|
|
srcSize: 3,
|
|
wantStr: "aaaabbbbbbb",
|
|
wantErr: errShortInternal,
|
|
},
|
|
|
|
{
|
|
desc: "empty short internal dst buffer from input",
|
|
t: mkChain(rleDecode{}, 10, Nop),
|
|
src: "4a7b11e0f1g",
|
|
dstSize: 100,
|
|
srcSize: 3,
|
|
wantStr: "aaaabbbbbbb",
|
|
wantErr: errShortInternal,
|
|
},
|
|
|
|
{
|
|
desc: "short internal src buffer after full dst buffer",
|
|
t: mkChain(Nop, 5, rleEncode{}, 10, Nop),
|
|
src: "cccccddddd",
|
|
dstSize: 100,
|
|
srcSize: 100,
|
|
wantStr: "",
|
|
wantErr: errShortInternal,
|
|
wantIter: 1,
|
|
},
|
|
|
|
{
|
|
desc: "short internal src buffer after short dst buffer; test lastFull",
|
|
t: mkChain(rleDecode{}, 5, rleEncode{}, 4, Nop),
|
|
src: "2a1b4c6d",
|
|
dstSize: 100,
|
|
srcSize: 100,
|
|
wantStr: "2a1b",
|
|
wantErr: errShortInternal,
|
|
},
|
|
|
|
{
|
|
desc: "short internal src buffer after successful complete fill",
|
|
t: mkChain(Nop, 3, rleDecode{}),
|
|
src: "123a4b",
|
|
dstSize: 4,
|
|
srcSize: 3,
|
|
wantStr: "",
|
|
wantErr: errShortInternal,
|
|
wantIter: 1,
|
|
},
|
|
|
|
{
|
|
desc: "short internal src buffer after short dst buffer; test lastFull",
|
|
t: mkChain(rleDecode{}, 5, rleEncode{}),
|
|
src: "2a1b4c6d",
|
|
dstSize: 4,
|
|
srcSize: 100,
|
|
wantStr: "2a1b",
|
|
wantErr: errShortInternal,
|
|
},
|
|
|
|
{
|
|
desc: "short src buffer",
|
|
t: mkChain(rleEncode{}, 5, Nop),
|
|
src: "abbcccddddeeeee",
|
|
dstSize: 4,
|
|
srcSize: 4,
|
|
ioSize: 4,
|
|
wantStr: "1a2b3c",
|
|
wantErr: ErrShortSrc,
|
|
},
|
|
|
|
{
|
|
desc: "process all in one go",
|
|
t: mkChain(rleEncode{}, 5, Nop),
|
|
src: "abbcccddddeeeeeffffff",
|
|
dstSize: 100,
|
|
srcSize: 100,
|
|
wantStr: "1a2b3c4d5e6f",
|
|
wantErr: nil,
|
|
wantIter: 1,
|
|
},
|
|
|
|
{
|
|
desc: "complete processing downstream after error",
|
|
t: mkChain(dontMentionX{}, 2, rleDecode{}, 5, Nop),
|
|
src: "3a4b5eX",
|
|
dstSize: 100,
|
|
srcSize: 100,
|
|
ioSize: 100,
|
|
wantStr: "aaabbbbeeeee",
|
|
wantErr: errYouMentionedX,
|
|
},
|
|
|
|
{
|
|
desc: "return downstream fatal errors first (followed by short dst)",
|
|
t: mkChain(dontMentionX{}, 8, rleDecode{}, 4, Nop),
|
|
src: "3a4b5eX",
|
|
dstSize: 100,
|
|
srcSize: 100,
|
|
ioSize: 100,
|
|
wantStr: "aaabbbb",
|
|
wantErr: errShortInternal,
|
|
},
|
|
|
|
{
|
|
desc: "return downstream fatal errors first (followed by short src)",
|
|
t: mkChain(dontMentionX{}, 5, Nop, 1, rleDecode{}),
|
|
src: "1a5bX",
|
|
dstSize: 100,
|
|
srcSize: 100,
|
|
ioSize: 100,
|
|
wantStr: "",
|
|
wantErr: errShortInternal,
|
|
},
|
|
|
|
{
|
|
desc: "short internal",
|
|
t: mkChain(Nop, 11, rleEncode{}, 3, Nop),
|
|
src: "abbcccddddddddddeeeeeeeeeeeg",
|
|
dstSize: 3,
|
|
srcSize: 100,
|
|
wantStr: "1a2b3c10d",
|
|
wantErr: errShortInternal,
|
|
},
|
|
}
|
|
}
|
|
|
|
func doTransform(tc testCase) (res string, iter int, err error) {
|
|
tc.t.Reset()
|
|
dst := make([]byte, tc.dstSize)
|
|
out, in := make([]byte, 0, 2*len(tc.src)), []byte(tc.src)
|
|
for {
|
|
iter++
|
|
src, atEOF := in, true
|
|
if len(src) > tc.srcSize {
|
|
src, atEOF = src[:tc.srcSize], false
|
|
}
|
|
nDst, nSrc, err := tc.t.Transform(dst, src, atEOF)
|
|
out = append(out, dst[:nDst]...)
|
|
in = in[nSrc:]
|
|
switch {
|
|
case err == nil && len(in) != 0:
|
|
case err == ErrShortSrc && nSrc > 0:
|
|
case err == ErrShortDst && (nDst > 0 || nSrc > 0):
|
|
default:
|
|
return string(out), iter, err
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestChain(t *testing.T) {
|
|
if c, ok := Chain().(nop); !ok {
|
|
t.Errorf("empty chain: %v; want Nop", c)
|
|
}
|
|
|
|
// Test Chain for a single Transformer.
|
|
for _, tc := range testCases {
|
|
tc.t = Chain(tc.t)
|
|
str, _, err := doTransform(tc)
|
|
if str != tc.wantStr || err != tc.wantErr {
|
|
t.Errorf("%s:\ngot %q, %v\nwant %q, %v", tc, str, err, tc.wantStr, tc.wantErr)
|
|
}
|
|
}
|
|
|
|
tests := chainTests()
|
|
sizes := []int{1, 2, 3, 4, 5, 7, 10, 100, 1000}
|
|
addTest := func(tc testCase, t *chain) {
|
|
if t.link[0].t != tc.t && tc.wantErr == ErrShortSrc {
|
|
tc.wantErr = errShortInternal
|
|
}
|
|
if t.link[len(t.link)-2].t != tc.t && tc.wantErr == ErrShortDst {
|
|
tc.wantErr = errShortInternal
|
|
}
|
|
tc.t = t
|
|
tests = append(tests, tc)
|
|
}
|
|
for _, tc := range testCases {
|
|
for _, sz := range sizes {
|
|
tt := tc
|
|
tt.dstSize = sz
|
|
addTest(tt, mkChain(tc.t, tc.dstSize, Nop))
|
|
addTest(tt, mkChain(tc.t, tc.dstSize, Nop, 2, Nop))
|
|
addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Nop))
|
|
if sz >= tc.dstSize && (tc.wantErr != ErrShortDst || sz == tc.dstSize) {
|
|
addTest(tt, mkChain(Nop, tc.srcSize, tc.t))
|
|
addTest(tt, mkChain(Nop, 100, Nop, tc.srcSize, tc.t))
|
|
}
|
|
}
|
|
}
|
|
for _, tc := range testCases {
|
|
tt := tc
|
|
tt.dstSize = 1
|
|
tt.wantStr = ""
|
|
addTest(tt, mkChain(tc.t, tc.dstSize, Discard))
|
|
addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Discard))
|
|
addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Nop, tc.dstSize, Discard))
|
|
}
|
|
for _, tc := range testCases {
|
|
tt := tc
|
|
tt.dstSize = 100
|
|
tt.wantStr = strings.Replace(tc.src, "0f", "", -1)
|
|
// Chain encoders and decoders.
|
|
if _, ok := tc.t.(rleEncode); ok && tc.wantErr == nil {
|
|
addTest(tt, mkChain(tc.t, tc.dstSize, Nop, 1000, rleDecode{}))
|
|
addTest(tt, mkChain(tc.t, tc.dstSize, Nop, tc.dstSize, rleDecode{}))
|
|
addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Nop, 100, rleDecode{}))
|
|
// decoding needs larger destinations
|
|
addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, rleDecode{}, 100, Nop))
|
|
addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Nop, 100, rleDecode{}, 100, Nop))
|
|
} else if _, ok := tc.t.(rleDecode); ok && tc.wantErr == nil {
|
|
// The internal buffer size may need to be the sum of the maximum segment
|
|
// size of the two encoders!
|
|
addTest(tt, mkChain(tc.t, 2*tc.dstSize, rleEncode{}))
|
|
addTest(tt, mkChain(tc.t, tc.dstSize, Nop, 101, rleEncode{}))
|
|
addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Nop, 100, rleEncode{}))
|
|
addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Nop, 200, rleEncode{}, 100, Nop))
|
|
}
|
|
}
|
|
for _, tc := range tests {
|
|
str, iter, err := doTransform(tc)
|
|
mi := tc.wantIter != 0 && tc.wantIter != iter
|
|
if str != tc.wantStr || err != tc.wantErr || mi {
|
|
t.Errorf("%s:\ngot iter:%d, %q, %v\nwant iter:%d, %q, %v", tc, iter, str, err, tc.wantIter, tc.wantStr, tc.wantErr)
|
|
}
|
|
break
|
|
}
|
|
}
|
|
|
|
func TestRemoveFunc(t *testing.T) {
|
|
filter := RemoveFunc(func(r rune) bool {
|
|
return strings.IndexRune("ab\u0300\u1234,", r) != -1
|
|
})
|
|
tests := []testCase{
|
|
{
|
|
src: ",",
|
|
wantStr: "",
|
|
},
|
|
|
|
{
|
|
src: "c",
|
|
wantStr: "c",
|
|
},
|
|
|
|
{
|
|
src: "\u2345",
|
|
wantStr: "\u2345",
|
|
},
|
|
|
|
{
|
|
src: "tschüß",
|
|
wantStr: "tschüß",
|
|
},
|
|
|
|
{
|
|
src: ",до,свидания,",
|
|
wantStr: "досвидания",
|
|
},
|
|
|
|
{
|
|
src: "a\xbd\xb2=\xbc ⌘",
|
|
wantStr: "\uFFFD\uFFFD=\uFFFD ⌘",
|
|
},
|
|
|
|
{
|
|
// If we didn't replace illegal bytes with RuneError, the result
|
|
// would be \u0300 or the code would need to be more complex.
|
|
src: "\xcc\u0300\x80",
|
|
wantStr: "\uFFFD\uFFFD",
|
|
},
|
|
|
|
{
|
|
src: "\xcc\u0300\x80",
|
|
dstSize: 3,
|
|
wantStr: "\uFFFD\uFFFD",
|
|
wantIter: 2,
|
|
},
|
|
|
|
{
|
|
// Test a long buffer greater than the internal buffer size
|
|
src: "hello\xcc\xcc\xccworld",
|
|
srcSize: 13,
|
|
wantStr: "hello\uFFFD\uFFFD\uFFFDworld",
|
|
wantIter: 1,
|
|
},
|
|
|
|
{
|
|
src: "\u2345",
|
|
dstSize: 2,
|
|
wantStr: "",
|
|
wantErr: ErrShortDst,
|
|
},
|
|
|
|
{
|
|
src: "\xcc",
|
|
dstSize: 2,
|
|
wantStr: "",
|
|
wantErr: ErrShortDst,
|
|
},
|
|
|
|
{
|
|
src: "\u0300",
|
|
dstSize: 2,
|
|
srcSize: 1,
|
|
wantStr: "",
|
|
wantErr: ErrShortSrc,
|
|
},
|
|
|
|
{
|
|
t: RemoveFunc(func(r rune) bool {
|
|
return r == utf8.RuneError
|
|
}),
|
|
src: "\xcc\u0300\x80",
|
|
wantStr: "\u0300",
|
|
},
|
|
}
|
|
|
|
for _, tc := range tests {
|
|
tc.desc = tc.src
|
|
if tc.t == nil {
|
|
tc.t = filter
|
|
}
|
|
if tc.dstSize == 0 {
|
|
tc.dstSize = 100
|
|
}
|
|
if tc.srcSize == 0 {
|
|
tc.srcSize = 100
|
|
}
|
|
str, iter, err := doTransform(tc)
|
|
mi := tc.wantIter != 0 && tc.wantIter != iter
|
|
if str != tc.wantStr || err != tc.wantErr || mi {
|
|
t.Errorf("%+q:\ngot iter:%d, %+q, %v\nwant iter:%d, %+q, %v", tc.src, iter, str, err, tc.wantIter, tc.wantStr, tc.wantErr)
|
|
}
|
|
|
|
tc.src = str
|
|
idem, _, _ := doTransform(tc)
|
|
if str != idem {
|
|
t.Errorf("%+q: found %+q; want %+q", tc.src, idem, str)
|
|
}
|
|
}
|
|
}
|
|
|
|
func testString(t *testing.T, f func(Transformer, string) (string, int, error)) {
|
|
for _, tt := range append(testCases, chainTests()...) {
|
|
if tt.desc == "allowStutter = true" {
|
|
// We don't have control over the buffer size, so we eliminate tests
|
|
// that depend on a specific buffer size being set.
|
|
continue
|
|
}
|
|
if tt.wantErr == ErrShortDst || tt.wantErr == ErrShortSrc {
|
|
// The result string will be different.
|
|
continue
|
|
}
|
|
testtext.Run(t, tt.desc, func(t *testing.T) {
|
|
got, n, err := f(tt.t, tt.src)
|
|
if tt.wantErr != err {
|
|
t.Errorf("error: got %v; want %v", err, tt.wantErr)
|
|
}
|
|
// Check that err == nil implies that n == len(tt.src). Note that vice
|
|
// versa isn't necessarily true.
|
|
if err == nil && n != len(tt.src) {
|
|
t.Errorf("err == nil: got %d bytes, want %d", n, err)
|
|
}
|
|
if got != tt.wantStr {
|
|
t.Errorf("string: got %q; want %q", got, tt.wantStr)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestBytes(t *testing.T) {
|
|
testString(t, func(z Transformer, s string) (string, int, error) {
|
|
b, n, err := Bytes(z, []byte(s))
|
|
return string(b), n, err
|
|
})
|
|
}
|
|
|
|
func TestAppend(t *testing.T) {
|
|
// Create a bunch of subtests for different buffer sizes.
|
|
testCases := [][]byte{
|
|
nil,
|
|
make([]byte, 0, 0),
|
|
make([]byte, 0, 1),
|
|
make([]byte, 1, 1),
|
|
make([]byte, 1, 5),
|
|
make([]byte, 100, 100),
|
|
make([]byte, 100, 200),
|
|
}
|
|
for _, tc := range testCases {
|
|
testString(t, func(z Transformer, s string) (string, int, error) {
|
|
b, n, err := Append(z, tc, []byte(s))
|
|
return string(b[len(tc):]), n, err
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestString(t *testing.T) {
|
|
testtext.Run(t, "transform", func(t *testing.T) { testString(t, String) })
|
|
|
|
// Overrun the internal destination buffer.
|
|
for i, s := range []string{
|
|
aaa[:1*initialBufSize-1],
|
|
aaa[:1*initialBufSize+0],
|
|
aaa[:1*initialBufSize+1],
|
|
AAA[:1*initialBufSize-1],
|
|
AAA[:1*initialBufSize+0],
|
|
AAA[:1*initialBufSize+1],
|
|
AAA[:2*initialBufSize-1],
|
|
AAA[:2*initialBufSize+0],
|
|
AAA[:2*initialBufSize+1],
|
|
aaa[:1*initialBufSize-2] + "A",
|
|
aaa[:1*initialBufSize-1] + "A",
|
|
aaa[:1*initialBufSize+0] + "A",
|
|
aaa[:1*initialBufSize+1] + "A",
|
|
} {
|
|
testtext.Run(t, fmt.Sprint("dst buffer test using lower/", i), func(t *testing.T) {
|
|
got, _, _ := String(lowerCaseASCII{}, s)
|
|
if want := strings.ToLower(s); got != want {
|
|
t.Errorf("got %s (%d); want %s (%d)", got, len(got), want, len(want))
|
|
}
|
|
})
|
|
}
|
|
|
|
// Overrun the internal source buffer.
|
|
for i, s := range []string{
|
|
aaa[:1*initialBufSize-1],
|
|
aaa[:1*initialBufSize+0],
|
|
aaa[:1*initialBufSize+1],
|
|
aaa[:2*initialBufSize+1],
|
|
aaa[:2*initialBufSize+0],
|
|
aaa[:2*initialBufSize+1],
|
|
} {
|
|
testtext.Run(t, fmt.Sprint("src buffer test using rleEncode/", i), func(t *testing.T) {
|
|
got, _, _ := String(rleEncode{}, s)
|
|
if want := fmt.Sprintf("%da", len(s)); got != want {
|
|
t.Errorf("got %s (%d); want %s (%d)", got, len(got), want, len(want))
|
|
}
|
|
})
|
|
}
|
|
|
|
// Test allocations for non-changing strings.
|
|
// Note we still need to allocate a single buffer.
|
|
for i, s := range []string{
|
|
"",
|
|
"123456789",
|
|
aaa[:initialBufSize-1],
|
|
aaa[:initialBufSize+0],
|
|
aaa[:initialBufSize+1],
|
|
aaa[:10*initialBufSize],
|
|
} {
|
|
testtext.Run(t, fmt.Sprint("alloc/", i), func(t *testing.T) {
|
|
if n := testtext.AllocsPerRun(5, func() { String(&lowerCaseASCIILookahead{}, s) }); n > 1 {
|
|
t.Errorf("#allocs was %f; want 1", n)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
// TestBytesAllocation tests that buffer growth stays limited with the trickler
|
|
// transformer, which behaves oddly but within spec. In case buffer growth is
|
|
// not correctly handled, the test will either panic with a failed allocation or
|
|
// thrash. To ensure the tests terminate under the last condition, we time out
|
|
// after some sufficiently long period of time.
|
|
func TestBytesAllocation(t *testing.T) {
|
|
done := make(chan bool)
|
|
go func() {
|
|
in := bytes.Repeat([]byte{'a'}, 1000)
|
|
tr := trickler(make([]byte, 1))
|
|
Bytes(&tr, in)
|
|
done <- true
|
|
}()
|
|
select {
|
|
case <-done:
|
|
case <-time.After(3 * time.Second):
|
|
t.Error("time out, likely due to excessive allocation")
|
|
}
|
|
}
|
|
|
|
// TestStringAllocation tests that buffer growth stays limited with the trickler
|
|
// transformer, which behaves oddly but within spec. In case buffer growth is
|
|
// not correctly handled, the test will either panic with a failed allocation or
|
|
// thrash. To ensure the tests terminate under the last condition, we time out
|
|
// after some sufficiently long period of time.
|
|
func TestStringAllocation(t *testing.T) {
|
|
done := make(chan bool)
|
|
go func() {
|
|
tr := trickler(make([]byte, 1))
|
|
String(&tr, aaa[:1000])
|
|
done <- true
|
|
}()
|
|
select {
|
|
case <-done:
|
|
case <-time.After(3 * time.Second):
|
|
t.Error("time out, likely due to excessive allocation")
|
|
}
|
|
}
|
|
|
|
func BenchmarkStringLowerEmpty(b *testing.B) {
|
|
for i := 0; i < b.N; i++ {
|
|
String(&lowerCaseASCIILookahead{}, "")
|
|
}
|
|
}
|
|
|
|
func BenchmarkStringLowerIdentical(b *testing.B) {
|
|
for i := 0; i < b.N; i++ {
|
|
String(&lowerCaseASCIILookahead{}, aaa[:4096])
|
|
}
|
|
}
|
|
|
|
func BenchmarkStringLowerChanged(b *testing.B) {
|
|
for i := 0; i < b.N; i++ {
|
|
String(&lowerCaseASCIILookahead{}, AAA[:4096])
|
|
}
|
|
}
|
|
|
|
var (
|
|
aaa = strings.Repeat("a", 4096)
|
|
AAA = strings.Repeat("A", 4096)
|
|
)
|