Source File
transform.go
Belonging Package
vendor/golang.org/x/text/transform
// Copyright 2013 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.// Package transform provides reader and writer wrappers that transform the// bytes passing through as well as various transformations. Example// transformations provided by other packages include normalization and// conversion between character sets.package transform // import "golang.org/x/text/transform"import ()var (// ErrShortDst means that the destination buffer was too short to// receive all of the transformed bytes.ErrShortDst = errors.New("transform: short destination buffer")// ErrShortSrc means that the source buffer has insufficient data to// complete the transformation.ErrShortSrc = errors.New("transform: short source buffer")// ErrEndOfSpan means that the input and output (the transformed input)// are not identical.ErrEndOfSpan = errors.New("transform: input and output are not identical")// errInconsistentByteCount means that Transform returned success (nil// error) but also returned nSrc inconsistent with the src argument.errInconsistentByteCount = errors.New("transform: inconsistent byte count returned")// errShortInternal means that an internal buffer is not large enough// to make progress and the Transform operation must be aborted.errShortInternal = errors.New("transform: short internal buffer"))// Transformer transforms bytes.type Transformer interface {// Transform writes to dst the transformed bytes read from src, and// returns the number of dst bytes written and src bytes read. The// atEOF argument tells whether src represents the last bytes of the// input.//// Callers should always process the nDst bytes produced and account// for the nSrc bytes consumed before considering the error err.//// A nil error means that all of the transformed bytes (whether freshly// transformed from src or left over from previous Transform calls)// were written to dst. A nil error can be returned regardless of// whether atEOF is true. If err is nil then nSrc must equal len(src);// the converse is not necessarily true.//// ErrShortDst means that dst was too short to receive all of the// transformed bytes. ErrShortSrc means that src had insufficient data// to complete the transformation. If both conditions apply, then// either error may be returned. Other than the error conditions listed// here, implementations are free to report other errors that arise.Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error)// Reset resets the state and allows a Transformer to be reused.Reset()}// SpanningTransformer extends the Transformer interface with a Span method// that determines how much of the input already conforms to the Transformer.type SpanningTransformer interface {Transformer// Span returns a position in src such that transforming src[:n] results in// identical output src[:n] for these bytes. It does not necessarily return// the largest such n. The atEOF argument tells whether src represents the// last bytes of the input.//// Callers should always account for the n bytes consumed before// considering the error err.//// A nil error means that all input bytes are known to be identical to the// output produced by the Transformer. A nil error can be returned// regardless of whether atEOF is true. If err is nil, then n must// equal len(src); the converse is not necessarily true.//// ErrEndOfSpan means that the Transformer output may differ from the// input after n bytes. Note that n may be len(src), meaning that the output// would contain additional bytes after otherwise identical output.// ErrShortSrc means that src had insufficient data to determine whether the// remaining bytes would change. Other than the error conditions listed// here, implementations are free to report other errors that arise.//// Calling Span can modify the Transformer state as a side effect. In// effect, it does the transformation just as calling Transform would, only// without copying to a destination buffer and only up to a point it can// determine the input and output bytes are the same. This is obviously more// limited than calling Transform, but can be more efficient in terms of// copying and allocating buffers. Calls to Span and Transform may be// interleaved.Span(src []byte, atEOF bool) (n int, err error)}// NopResetter can be embedded by implementations of Transformer to add a nop// Reset method.type NopResetter struct{}// Reset implements the Reset method of the Transformer interface.func (NopResetter) () {}// Reader wraps another io.Reader by transforming the bytes read.type Reader struct {r io.Readert Transformererr error// dst[dst0:dst1] contains bytes that have been transformed by t but// not yet copied out via Read.dst []bytedst0, dst1 int// src[src0:src1] contains bytes that have been read from r but not// yet transformed through t.src []bytesrc0, src1 int// transformComplete is whether the transformation is complete,// regardless of whether or not it was successful.transformComplete bool}const defaultBufSize = 4096// NewReader returns a new Reader that wraps r by transforming the bytes read// via t. It calls Reset on t.func ( io.Reader, Transformer) *Reader {.Reset()return &Reader{r: ,t: ,dst: make([]byte, defaultBufSize),src: make([]byte, defaultBufSize),}}// Read implements the io.Reader interface.func ( *Reader) ( []byte) (int, error) {, := 0, error(nil)for {// Copy out any transformed bytes and return the final error if we are done.if .dst0 != .dst1 {= copy(, .dst[.dst0:.dst1]).dst0 +=if .dst0 == .dst1 && .transformComplete {return , .err}return , nil} else if .transformComplete {return 0, .err}// Try to transform some source bytes, or to flush the transformer if we// are out of source bytes. We do this even if r.r.Read returned an error.// As the io.Reader documentation says, "process the n > 0 bytes returned// before considering the error".if .src0 != .src1 || .err != nil {.dst0 = 0.dst1, , = .t.Transform(.dst, .src[.src0:.src1], .err == io.EOF).src0 +=switch {case == nil:if .src0 != .src1 {.err = errInconsistentByteCount}// The Transform call was successful; we are complete if we// cannot read more bytes into src..transformComplete = .err != nilcontinuecase == ErrShortDst && (.dst1 != 0 || != 0):// Make room in dst by copying out, and try again.continuecase == ErrShortSrc && .src1-.src0 != len(.src) && .err == nil:// Read more bytes into src via the code below, and try again.default:.transformComplete = true// The reader error (r.err) takes precedence over the// transformer error (err) unless r.err is nil or io.EOF.if .err == nil || .err == io.EOF {.err =}continue}}// Move any untransformed source bytes to the start of the buffer// and read more bytes.if .src0 != 0 {.src0, .src1 = 0, copy(.src, .src[.src0:.src1])}, .err = .r.Read(.src[.src1:]).src1 +=}}// TODO: implement ReadByte (and ReadRune??).// Writer wraps another io.Writer by transforming the bytes read.// The user needs to call Close to flush unwritten bytes that may// be buffered.type Writer struct {w io.Writert Transformerdst []byte// src[:n] contains bytes that have not yet passed through t.src []byten int}// NewWriter returns a new Writer that wraps w by transforming the bytes written// via t. It calls Reset on t.func ( io.Writer, Transformer) *Writer {.Reset()return &Writer{w: ,t: ,dst: make([]byte, defaultBufSize),src: make([]byte, defaultBufSize),}}// Write implements the io.Writer interface. If there are not enough// bytes available to complete a Transform, the bytes will be buffered// for the next write. Call Close to convert the remaining bytes.func ( *Writer) ( []byte) ( int, error) {:=if .n > 0 {// Append bytes from data to the last remainder.// TODO: limit the amount copied on first try.= copy(.src[.n:], ).n +== .src[:.n]}for {, , := .t.Transform(.dst, , false)if , := .w.Write(.dst[:]); != nil {return ,}= [:]if .n == 0 {+=} else if len() <= {// Enough bytes from w.src have been consumed. We make src point// to data instead to reduce the copying..n = 0-= len()= [:]if < len() && ( == nil || == ErrShortSrc) {continue}}switch {case ErrShortDst:// This error is okay as long as we are making progress.if > 0 || > 0 {continue}case ErrShortSrc:if len() < len(.src) {:= copy(.src, )// If w.n > 0, bytes from data were already copied to w.src and n// was already set to the number of bytes consumed.if .n == 0 {+=}.n == nil} else if > 0 || > 0 {// Not enough buffer to store the remainder. Keep processing as// long as there is progress. Without this case, transforms that// require a lookahead larger than the buffer may result in an// error. This is not something one may expect to be common in// practice, but it may occur when buffers are set to small// sizes during testing.continue}case nil:if .n > 0 {= errInconsistentByteCount}}return ,}}// Close implements the io.Closer interface.func ( *Writer) () error {:= .src[:.n]for {, , := .t.Transform(.dst, , true)if , := .w.Write(.dst[:]); != nil {return}if != ErrShortDst {return}= [:]}}type nop struct{ NopResetter }func (nop) (, []byte, bool) (, int, error) {:= copy(, )if < len() {= ErrShortDst}return , ,}func (nop) ( []byte, bool) ( int, error) {return len(), nil}type discard struct{ NopResetter }func (discard) (, []byte, bool) (, int, error) {return 0, len(), nil}var (// Discard is a Transformer for which all Transform calls succeed// by consuming all bytes and writing nothing.Discard Transformer = discard{}// Nop is a SpanningTransformer that copies src to dst.Nop SpanningTransformer = nop{})// chain is a sequence of links. A chain with N Transformers has N+1 links and// N+1 buffers. Of those N+1 buffers, the first and last are the src and dst// buffers given to chain.Transform and the middle N-1 buffers are intermediate// buffers owned by the chain. The i'th link transforms bytes from the i'th// buffer chain.link[i].b at read offset chain.link[i].p to the i+1'th buffer// chain.link[i+1].b at write offset chain.link[i+1].n, for i in [0, N).type chain struct {link []linkerr error// errStart is the index at which the error occurred plus 1. Processing// errStart at this level at the next call to Transform. As long as// errStart > 0, chain will not consume any more source bytes.errStart int}func ( *chain) ( int, error) {if := + 1; > .errStart {.errStart =.err =}}type link struct {t Transformer// b[p:n] holds the bytes to be transformed by t.b []bytep intn int}func ( *link) () []byte {return .b[.p:.n]}func ( *link) () []byte {return .b[.n:]}// Chain returns a Transformer that applies t in sequence.func ( ...Transformer) Transformer {if len() == 0 {return nop{}}:= &chain{link: make([]link, len()+1)}for , := range {.link[].t =}// Allocate intermediate buffers.:= make([][defaultBufSize]byte, len()-1)for := range {.link[+1].b = [][:]}return}// Reset resets the state of Chain. It calls Reset on all the Transformers.func ( *chain) () {for , := range .link {if .t != nil {.t.Reset()}.link[].p, .link[].n = 0, 0}}// TODO: make chain use Span (is going to be fun to implement!)// Transform applies the transformers of c in sequence.func ( *chain) (, []byte, bool) (, int, error) {// Set up src and dst in the chain.:= &.link[0]:= &.link[len(.link)-1].b, .p, .n = , 0, len().b, .n = , 0var , bool // for detecting progress// i is the index of the next Transformer to apply, for i in [low, high].// low is the lowest index for which c.link[low] may still produce bytes.// high is the highest index for which c.link[high] has a Transformer.// The error returned by Transform determines whether to increase or// decrease i. We try to completely fill a buffer before converting it.for , , := .errStart, .errStart, len(.link)-2; <= && <= ; {, := &.link[], &.link[+1], , := .t.Transform(.dst(), .src(), && == ).n +=.p +=if > 0 && .p == .n {.p, .n = 0, 0}, = , falseswitch {case ErrShortDst:// Process the destination buffer next. Return if we are already// at the high index.if == {return .n, .p, ErrShortDst}if .n != 0 {++// If the Transformer at the next index is not able to process any// source bytes there is nothing that can be done to make progress// and the bytes will remain unprocessed. lastFull is used to// detect this and break out of the loop with a fatal error.= truecontinue}// The destination buffer was too small, but is completely empty.// Return a fatal error as this transformation can never complete..fatalError(, errShortInternal)case ErrShortSrc:if == 0 {// Save ErrShortSrc in err. All other errors take precedence.= ErrShortSrcbreak}// Source bytes were depleted before filling up the destination buffer.// Verify we made some progress, move the remaining bytes to the errStart// and try to get more source bytes.if && == 0 || .n-.p == len(.b) {// There were not enough source bytes to proceed while the source// buffer cannot hold any more bytes. Return a fatal error as this// transformation can never complete..fatalError(, errShortInternal)break}// in.b is an internal buffer and we can make progress..p, .n = 0, copy(.b, .src())fallthroughcase nil:// if i == low, we have depleted the bytes at index i or any lower levels.// In that case we increase low and i. In all other cases we decrease i to// fetch more bytes before proceeding to the next index.if > {--continue}default:.fatalError(, )}// Exhausted level low or fatal error: increase low and continue// to process the bytes accepted so far.++=}// If c.errStart > 0, this means we found a fatal error. We will clear// all upstream buffers. At this point, no more progress can be made// downstream, as Transform would have bailed while handling ErrShortDst.if .errStart > 0 {for := 1; < .errStart; ++ {.link[].p, .link[].n = 0, 0}, .errStart, .err = .err, 0, nil}return .n, .p,}// Deprecated: Use runes.Remove instead.func ( func( rune) bool) Transformer {return removeF()}type removeF func(r rune) boolfunc (removeF) () {}// Transform implements the Transformer interface.func ( removeF) (, []byte, bool) (, int, error) {for , := rune(0), 0; len() > 0; = [:] {if = rune([0]); < utf8.RuneSelf {= 1} else {, = utf8.DecodeRune()if == 1 {// Invalid rune.if ! && !utf8.FullRune() {= ErrShortSrcbreak}// We replace illegal bytes with RuneError. Not doing so might// otherwise turn a sequence of invalid UTF-8 into valid UTF-8.// The resulting byte sequence may subsequently contain runes// for which t(r) is true that were passed unnoticed.if !() {if +3 > len() {= ErrShortDstbreak}+= copy([:], "\uFFFD")}++continue}}if !() {if + > len() {= ErrShortDstbreak}+= copy([:], [:])}+=}return}// grow returns a new []byte that is longer than b, and copies the first n bytes// of b to the start of the new slice.func grow( []byte, int) []byte {:= len()if <= 32 {= 64} else if <= 256 {*= 2} else {+= >> 1}:= make([]byte, )copy(, [:])return}const initialBufSize = 128// String returns a string with the result of converting s[:n] using t, where// n <= len(s). If err == nil, n will be len(s). It calls Reset on t.func ( Transformer, string) ( string, int, error) {.Reset()if == "" {// Fast path for the common case for empty input. Results in about a// 86% reduction of running time for BenchmarkStringLowerEmpty.if , , := .Transform(nil, nil, true); == nil {return "", 0, nil}}// Allocate only once. Note that both dst and src escape when passed to// Transform.:= [2 * initialBufSize]byte{}:= [:initialBufSize:initialBufSize]:= [initialBufSize : 2*initialBufSize]// The input string s is transformed in multiple chunks (starting with a// chunk size of initialBufSize). nDst and nSrc are per-chunk (or// per-Transform-call) indexes, pDst and pSrc are overall indexes., := 0, 0, := 0, 0// pPrefix is the length of a common prefix: the first pPrefix bytes of the// result will equal the first pPrefix bytes of s. It is not guaranteed to// be the largest such value, but if pPrefix, len(result) and len(s) are// all equal after the final transform (i.e. calling Transform with atEOF// being true returned nil error) then we don't need to allocate a new// result string.:= 0for {// Invariant: pDst == pPrefix && pSrc == pPrefix.:= copy(, [:]), , = .Transform(, [:], + == len())+=+=// TODO: let transformers implement an optional Spanner interface, akin// to norm's QuickSpan. This would even allow us to avoid any allocation.if !bytes.Equal([:], [:]) {break}=if == ErrShortDst {// A buffer can only be short if a transformer modifies its input.break} else if == ErrShortSrc {if == 0 {// No progress was made.break}// Equal so far and !atEOF, so continue checking.} else if != nil || == len() {return string([:]), ,}}// Post-condition: pDst == pPrefix + nDst && pSrc == pPrefix + nSrc.// We have transformed the first pSrc bytes of the input s to become pDst// transformed bytes. Those transformed bytes are discontiguous: the first// pPrefix of them equal s[:pPrefix] and the last nDst of them equal// dst[:nDst]. We copy them around, into a new dst buffer if necessary, so// that they become one contiguous slice: dst[:pDst].if != 0 {:=if > len() {= make([]byte, len()+-)}copy([:], [:])copy([:], [:])=}// Prevent duplicate Transform calls with atEOF being true at the end of// the input. Also return if we have an unrecoverable error.if ( == nil && == len()) ||( != nil && != ErrShortDst && != ErrShortSrc) {return string([:]), ,}// Transform the remaining input, growing dst and src buffers as necessary.for {:= copy(, [:]):= + == len(), , := .Transform([:], [:], )+=+=// If we got ErrShortDst or ErrShortSrc, do not grow as long as we can// make progress. This may avoid excessive allocations.if == ErrShortDst {if == 0 {= grow(, )}} else if == ErrShortSrc {if {return string([:]), ,}if == 0 {= grow(, 0)}} else if != nil || == len() {return string([:]), ,}}}// Bytes returns a new byte slice with the result of converting b[:n] using t,// where n <= len(b). If err == nil, n will be len(b). It calls Reset on t.func ( Transformer, []byte) ( []byte, int, error) {return doAppend(, 0, make([]byte, len()), )}// Append appends the result of converting src[:n] using t to dst, where// n <= len(src), If err == nil, n will be len(src). It calls Reset on t.func ( Transformer, , []byte) ( []byte, int, error) {if len() == cap() {:= len() + len() // It is okay for this to be 0.:= make([]byte, )= [:copy(, )]}return doAppend(, len(), [:cap()], )}func doAppend( Transformer, int, , []byte) ( []byte, int, error) {.Reset():= 0for {, , := .Transform([:], [:], true)+=+=if != ErrShortDst {return [:], ,}// Grow the destination buffer, but do not grow as long as we can make// progress. This may avoid excessive allocations.if == 0 {= grow(, )}}}
![]() |
The pages are generated with Golds v0.7.9-preview. (GOOS=linux GOARCH=amd64) Golds is a Go 101 project developed by Tapir Liu. PR and bug reports are welcome and can be submitted to the issue list. Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds. |