diff options
author | Joe Tsai <joetsai@digital-static.net> | 2023-06-22 11:10:18 -0700 |
---|---|---|
committer | Joseph Tsai <joetsai@digital-static.net> | 2023-06-27 17:27:30 +0000 |
commit | 4f36f7e4dd921be455810cc925ca122d71381156 (patch) | |
tree | 85418a7a59baeb15042b2e9b61bb1526677d9e0b | |
parent | 8008c0840ff09c492434e67de443ee478957fb8f (diff) | |
download | go-4f36f7e4dd921be455810cc925ca122d71381156.tar.gz go-4f36f7e4dd921be455810cc925ca122d71381156.zip |
encoding: document that base32 and base64 do not use UTF-8
The invention of base32 and base64 predates the invention of UTF-8
and was never meant to output valid UTF-8.
By default, the output is always valid ASCII (and thus valid UTF-8)
except when the user specifies an alphabet or padding value
that is larger than '\x7f'. If that is done,
then the exact byte symbol is used rather than the UTF-8 encoding.
Fixes #60689
Change-Id: I4ec88d974ec0658ad1a578bbd65a809e27c73ea7
Reviewed-on: https://go-review.googlesource.com/c/go/+/505237
Run-TryBot: Joseph Tsai <joetsai@digital-static.net>
Reviewed-by: Ian Lance Taylor <iant@google.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
-rw-r--r-- | src/encoding/base32/base32.go | 5 | ||||
-rw-r--r-- | src/encoding/base64/base64.go | 5 |
2 files changed, 8 insertions, 2 deletions
diff --git a/src/encoding/base32/base32.go b/src/encoding/base32/base32.go index 41d343aaac..3dc37b0aa7 100644 --- a/src/encoding/base32/base32.go +++ b/src/encoding/base32/base32.go @@ -50,7 +50,8 @@ const encodeStd = "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567" const encodeHex = "0123456789ABCDEFGHIJKLMNOPQRSTUV" // NewEncoding returns a new Encoding defined by the given alphabet, -// which must be a 32-byte string. +// which must be a 32-byte string. The alphabet is treated as sequence +// of byte values without any special treatment for multi-byte UTF-8. func NewEncoding(encoder string) *Encoding { if len(encoder) != 32 { panic("encoding alphabet is not 32-bytes long") @@ -80,6 +81,8 @@ var HexEncoding = NewEncoding(encodeHex) // The padding character must not be '\r' or '\n', must not // be contained in the encoding's alphabet and must be a rune equal or // below '\xff'. +// Padding characters above '\x7f' are encoded as their exact byte value +// rather than using the UTF-8 representation of the codepoint. func (enc Encoding) WithPadding(padding rune) *Encoding { if padding == '\r' || padding == '\n' || padding > 0xff { panic("invalid padding") diff --git a/src/encoding/base64/base64.go b/src/encoding/base64/base64.go index 0e12d90d29..6aa8a15bdc 100644 --- a/src/encoding/base64/base64.go +++ b/src/encoding/base64/base64.go @@ -54,7 +54,8 @@ const encodeURL = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz012345678 // NewEncoding returns a new padded Encoding defined by the given alphabet, // which must be a 64-byte string that does not contain the padding character -// or CR / LF ('\r', '\n'). +// or CR / LF ('\r', '\n'). The alphabet is treated as sequence of byte values +// without any special treatment for multi-byte UTF-8. // The resulting Encoding uses the default padding character ('='), // which may be changed or disabled via WithPadding. func NewEncoding(encoder string) *Encoding { @@ -83,6 +84,8 @@ func NewEncoding(encoder string) *Encoding { // The padding character must not be '\r' or '\n', must not // be contained in the encoding's alphabet and must be a rune equal or // below '\xff'. +// Padding characters above '\x7f' are encoded as their exact byte value +// rather than using the UTF-8 representation of the codepoint. func (enc Encoding) WithPadding(padding rune) *Encoding { if padding == '\r' || padding == '\n' || padding > 0xff { panic("invalid padding") |