package toml
import (
"bytes"
"encoding"
"encoding/json"
"fmt"
"io"
"io/fs"
"math"
"os"
"reflect"
"strconv"
"strings"
"time"
)
// Unmarshaler is the interface implemented by objects that can unmarshal a
// TOML description of themselves.
type Unmarshaler interface {
UnmarshalTOML(any) error
}
// Unmarshal decodes the contents of data in TOML format into a pointer v.
//
// See [Decoder] for a description of the decoding process.
func Unmarshal(data []byte, v any) error {
_, err := NewDecoder(bytes.NewReader(data)).Decode(v)
return err
}
// Decode the TOML data in to the pointer v.
//
// See [Decoder] for a description of the decoding process.
func Decode(data string, v any) (MetaData, error) {
return NewDecoder(strings.NewReader(data)).Decode(v)
}
// DecodeFile reads the contents of a file and decodes it with [Decode].
func DecodeFile(path string, v any) (MetaData, error) {
fp, err := os.Open(path)
if err != nil {
return MetaData{}, err
}
defer fp.Close()
return NewDecoder(fp).Decode(v)
}
// DecodeFS reads the contents of a file from [fs.FS] and decodes it with
// [Decode].
func DecodeFS(fsys fs.FS, path string, v any) (MetaData, error) {
fp, err := fsys.Open(path)
if err != nil {
return MetaData{}, err
}
defer fp.Close()
return NewDecoder(fp).Decode(v)
}
// Primitive is a TOML value that hasn't been decoded into a Go value.
//
// This type can be used for any value, which will cause decoding to be delayed.
// You can use [PrimitiveDecode] to "manually" decode these values.
//
// NOTE: The underlying representation of a `Primitive` value is subject to
// change. Do not rely on it.
//
// NOTE: Primitive values are still parsed, so using them will only avoid the
// overhead of reflection. They can be useful when you don't know the exact type
// of TOML data until runtime.
type Primitive struct {
undecoded any
context Key
}
// The significand precision for float32 and float64 is 24 and 53 bits; this is
// the range a natural number can be stored in a float without loss of data.
const (
maxSafeFloat32Int = 16777215 // 2^24-1
maxSafeFloat64Int = int64(9007199254740991) // 2^53-1
)
// Decoder decodes TOML data.
//
// TOML tables correspond to Go structs or maps; they can be used
// interchangeably, but structs offer better type safety.
//
// TOML table arrays correspond to either a slice of structs or a slice of maps.
//
// TOML datetimes correspond to [time.Time]. Local datetimes are parsed in the
// local timezone.
//
// [time.Duration] types are treated as nanoseconds if the TOML value is an
// integer, or they're parsed with time.ParseDuration() if they're strings.
//
// All other TOML types (float, string, int, bool and array) correspond to the
// obvious Go types.
//
// An exception to the above rules is if a type implements the TextUnmarshaler
// interface, in which case any primitive TOML value (floats, strings, integers,
// booleans, datetimes) will be converted to a []byte and given to the value's
// UnmarshalText method. See the Unmarshaler example for a demonstration with
// email addresses.
//
// # Key mapping
//
// TOML keys can map to either keys in a Go map or field names in a Go struct.
// The special `toml` struct tag can be used to map TOML keys to struct fields
// that don't match the key name exactly (see the example). A case insensitive
// match to struct names will be tried if an exact match can't be found.
//
// The mapping between TOML values and Go values is loose. That is, there may
// exist TOML values that cannot be placed into your representation, and there
// may be parts of your representation that do not correspond to TOML values.
// This loose mapping can be made stricter by using the IsDefined and/or
// Undecoded methods on the MetaData returned.
//
// This decoder does not handle cyclic types. Decode will not terminate if a
// cyclic type is passed.
type Decoder struct {
r io.Reader
}
// NewDecoder creates a new Decoder.
func NewDecoder(r io.Reader) *Decoder {
return &Decoder{r: r}
}
var (
unmarshalToml = reflect.TypeOf((*Unmarshaler)(nil)).Elem()
unmarshalText = reflect.TypeOf((*encoding.TextUnmarshaler)(nil)).Elem()
primitiveType = reflect.TypeOf((*Primitive)(nil)).Elem()
)
// Decode TOML data in to the pointer `v`.
func (dec *Decoder) Decode(v any) (MetaData, error) {
rv := reflect.ValueOf(v)
if rv.Kind() != reflect.Ptr {
s := "%q"
if reflect.TypeOf(v) == nil {
s = "%v"
}
return MetaData{}, fmt.Errorf("toml: cannot decode to non-pointer "+s, reflect.TypeOf(v))
}
if rv.IsNil() {
return MetaData{}, fmt.Errorf("toml: cannot decode to nil value of %q", reflect.TypeOf(v))
}
// Check if this is a supported type: struct, map, any, or something that
// implements UnmarshalTOML or UnmarshalText.
rv = indirect(rv)
rt := rv.Type()
if rv.Kind() != reflect.Struct && rv.Kind() != reflect.Map &&
!(rv.Kind() == reflect.Interface && rv.NumMethod() == 0) &&
!rt.Implements(unmarshalToml) && !rt.Implements(unmarshalText) {
return MetaData{}, fmt.Errorf("toml: cannot decode to type %s", rt)
}
// TODO: parser should read from io.Reader? Or at the very least, make it
// read from []byte rather than string
data, err := io.ReadAll(dec.r)
if err != nil {
return MetaData{}, err
}
p, err := parse(string(data))
if err != nil {
return MetaData{}, err
}
md := MetaData{
mapping: p.mapping,
keyInfo: p.keyInfo,
keys: p.ordered,
decoded: make(map[string]struct{}, len(p.ordered)),
context: nil,
data: data,
}
return md, md.unify(p.mapping, rv)
}
// PrimitiveDecode is just like the other Decode* functions, except it decodes a
// TOML value that has already been parsed. Valid primitive values can *only* be
// obtained from values filled by the decoder functions, including this method.
// (i.e., v may contain more [Primitive] values.)
//
// Meta data for primitive values is included in the meta data returned by the
// Decode* functions with one exception: keys returned by the Undecoded method
// will only reflect keys that were decoded. Namely, any keys hidden behind a
// Primitive will be considered undecoded. Executing this method will update the
// undecoded keys in the meta data. (See the example.)
func (md *MetaData) PrimitiveDecode(primValue Primitive, v any) error {
md.context = primValue.context
defer func() { md.context = nil }()
return md.unify(primValue.undecoded, rvalue(v))
}
// markDecodedRecursive is a helper to mark any key under the given tmap as
// decoded, recursing as needed
func markDecodedRecursive(md *MetaData, tmap map[string]any) {
for key := range tmap {
md.decoded[md.context.add(key).String()] = struct{}{}
if tmap, ok := tmap[key].(map[string]any); ok {
md.context = append(md.context, key)
markDecodedRecursive(md, tmap)
md.context = md.context[0 : len(md.context)-1]
}
if tarr, ok := tmap[key].([]map[string]any); ok {
for _, elm := range tarr {
md.context = append(md.context, key)
markDecodedRecursive(md, elm)
md.context = md.context[0 : len(md.context)-1]
}
}
}
}
// unify performs a sort of type unification based on the structure of `rv`,
// which is the client representation.
//
// Any type mismatch produces an error. Finding a type that we don't know
// how to handle produces an unsupported type error.
func (md *MetaData) unify(data any, rv reflect.Value) error {
// Special case. Look for a `Primitive` value.
// TODO: #76 would make this superfluous after implemented.
if rv.Type() == primitiveType {
// Save the undecoded data and the key context into the primitive
// value.
context := make(Key, len(md.context))
copy(context, md.context)
rv.Set(reflect.ValueOf(Primitive{
undecoded: data,
context: context,
}))
return nil
}
rvi := rv.Interface()
if v, ok := rvi.(Unmarshaler); ok {
err := v.UnmarshalTOML(data)
if err != nil {
return md.parseErr(err)
}
// Assume the Unmarshaler decoded everything, so mark all keys under
// this table as decoded.
if tmap, ok := data.(map[string]any); ok {
markDecodedRecursive(md, tmap)
}
if aot, ok := data.([]map[string]any); ok {
for _, tmap := range aot {
markDecodedRecursive(md, tmap)
}
}
return nil
}
if v, ok := rvi.(encoding.TextUnmarshaler); ok {
return md.unifyText(data, v)
}
// TODO:
// The behavior here is incorrect whenever a Go type satisfies the
// encoding.TextUnmarshaler interface but also corresponds to a TOML hash or
// array. In particular, the unmarshaler should only be applied to primitive
// TOML values. But at this point, it will be applied to all kinds of values
// and produce an incorrect error whenever those values are hashes or arrays
// (including arrays of tables).
k := rv.Kind()
if k >= reflect.Int && k <= reflect.Uint64 {
return md.unifyInt(data, rv)
}
switch k {
case reflect.Struct:
return md.unifyStruct(data, rv)
case reflect.Map:
return md.unifyMap(data, rv)
case reflect.Array:
return md.unifyArray(data, rv)
case reflect.Slice:
return md.unifySlice(data, rv)
case reflect.String:
return md.unifyString(data, rv)
case reflect.Bool:
return md.unifyBool(data, rv)
case reflect.Interface:
if rv.NumMethod() > 0 { /// Only empty interfaces are supported.
return md.e("unsupported type %s", rv.Type())
}
return md.unifyAnything(data, rv)
case reflect.Float32, reflect.Float64:
return md.unifyFloat64(data, rv)
}
return md.e("unsupported type %s", rv.Kind())
}
func (md *MetaData) unifyStruct(mapping any, rv reflect.Value) error {
tmap, ok := mapping.(map[string]any)
if !ok {
if mapping == nil {
return nil
}
return md.e("type mismatch for %s: expected table but found %s", rv.Type().String(), fmtType(mapping))
}
for key, datum := range tmap {
var f *field
fields := cachedTypeFields(rv.Type())
for i := range fields {
ff := &fields[i]
if ff.name == key {
f = ff
break
}
if f == nil && strings.EqualFold(ff.name, key) {
f = ff
}
}
if f != nil {
subv := rv
for _, i := range f.index {
subv = indirect(subv.Field(i))
}
if isUnifiable(subv) {
md.decoded[md.context.add(key).String()] = struct{}{}
md.context = append(md.context, key)
err := md.unify(datum, subv)
if err != nil {
return err
}
md.context = md.context[0 : len(md.context)-1]
} else if f.name != "" {
return md.e("cannot write unexported field %s.%s", rv.Type().String(), f.name)
}
}
}
return nil
}
func (md *MetaData) unifyMap(mapping any, rv reflect.Value) error {
keyType := rv.Type().Key().Kind()
if keyType != reflect.String && keyType != reflect.Interface {
return fmt.Errorf("toml: cannot decode to a map with non-string key type (%s in %q)",
keyType, rv.Type())
}
tmap, ok := mapping.(map[string]any)
if !ok {
if tmap == nil {
return nil
}
return md.badtype("map", mapping)
}
if rv.IsNil() {
rv.Set(reflect.MakeMap(rv.Type()))
}
for k, v := range tmap {
md.decoded[md.context.add(k).String()] = struct{}{}
md.context = append(md.context, k)
rvval := reflect.Indirect(reflect.New(rv.Type().Elem()))
err := md.unify(v, indirect(rvval))
if err != nil {
return err
}
md.context = md.context[0 : len(md.context)-1]
rvkey := indirect(reflect.New(rv.Type().Key()))
switch keyType {
case reflect.Interface:
rvkey.Set(reflect.ValueOf(k))
case reflect.String:
rvkey.SetString(k)
}
rv.SetMapIndex(rvkey, rvval)
}
return nil
}
func (md *MetaData) unifyArray(data any, rv reflect.Value) error {
datav := reflect.ValueOf(data)
if datav.Kind() != reflect.Slice {
if !datav.IsValid() {
return nil
}
return md.badtype("slice", data)
}
if l := datav.Len(); l != rv.Len() {
return md.e("expected array length %d; got TOML array of length %d", rv.Len(), l)
}
return md.unifySliceArray(datav, rv)
}
func (md *MetaData) unifySlice(data any, rv reflect.Value) error {
datav := reflect.ValueOf(data)
if datav.Kind() != reflect.Slice {
if !datav.IsValid() {
return nil
}
return md.badtype("slice", data)
}
n := datav.Len()
if rv.IsNil() || rv.Cap() < n {
rv.Set(reflect.MakeSlice(rv.Type(), n, n))
}
rv.SetLen(n)
return md.unifySliceArray(datav, rv)
}
func (md *MetaData) unifySliceArray(data, rv reflect.Value) error {
l := data.Len()
for i := 0; i < l; i++ {
err := md.unify(data.Index(i).Interface(), indirect(rv.Index(i)))
if err != nil {
return err
}
}
return nil
}
func (md *MetaData) unifyString(data any, rv reflect.Value) error {
_, ok := rv.Interface().(json.Number)
if ok {
if i, ok := data.(int64); ok {
rv.SetString(strconv.FormatInt(i, 10))
} else if f, ok := data.(float64); ok {
rv.SetString(strconv.FormatFloat(f, 'g', -1, 64))
} else {
return md.badtype("string", data)
}
return nil
}
if s, ok := data.(string); ok {
rv.SetString(s)
return nil
}
return md.badtype("string", data)
}
func (md *MetaData) unifyFloat64(data any, rv reflect.Value) error {
rvk := rv.Kind()
if num, ok := data.(float64); ok {
switch rvk {
case reflect.Float32:
if num < -math.MaxFloat32 || num > math.MaxFloat32 {
return md.parseErr(errParseRange{i: num, size: rvk.String()})
}
fallthrough
case reflect.Float64:
rv.SetFloat(num)
default:
panic("bug")
}
return nil
}
if num, ok := data.(int64); ok {
if (rvk == reflect.Float32 && (num < -maxSafeFloat32Int || num > maxSafeFloat32Int)) ||
(rvk == reflect.Float64 && (num < -maxSafeFloat64Int || num > maxSafeFloat64Int)) {
return md.parseErr(errUnsafeFloat{i: num, size: rvk.String()})
}
rv.SetFloat(float64(num))
return nil
}
return md.badtype("float", data)
}
func (md *MetaData) unifyInt(data any, rv reflect.Value) error {
_, ok := rv.Interface().(time.Duration)
if ok {
// Parse as string duration, and fall back to regular integer parsing
// (as nanosecond) if this is not a string.
if s, ok := data.(string); ok {
dur, err := time.ParseDuration(s)
if err != nil {
return md.parseErr(errParseDuration{s})
}
rv.SetInt(int64(dur))
return nil
}
}
num, ok := data.(int64)
if !ok {
return md.badtype("integer", data)
}
rvk := rv.Kind()
switch {
case rvk >= reflect.Int && rvk <= reflect.Int64:
if (rvk == reflect.Int8 && (num < math.MinInt8 || num > math.MaxInt8)) ||
(rvk == reflect.Int16 && (num < math.MinInt16 || num > math.MaxInt16)) ||
(rvk == reflect.Int32 && (num < math.MinInt32 || num > math.MaxInt32)) {
return md.parseErr(errParseRange{i: num, size: rvk.String()})
}
rv.SetInt(num)
case rvk >= reflect.Uint && rvk <= reflect.Uint64:
unum := uint64(num)
if rvk == reflect.Uint8 && (num < 0 || unum > math.MaxUint8) ||
rvk == reflect.Uint16 && (num < 0 || unum > math.MaxUint16) ||
rvk == reflect.Uint32 && (num < 0 || unum > math.MaxUint32) {
return md.parseErr(errParseRange{i: num, size: rvk.String()})
}
rv.SetUint(unum)
default:
panic("unreachable")
}
return nil
}
func (md *MetaData) unifyBool(data any, rv reflect.Value) error {
if b, ok := data.(bool); ok {
rv.SetBool(b)
return nil
}
return md.badtype("boolean", data)
}
func (md *MetaData) unifyAnything(data any, rv reflect.Value) error {
rv.Set(reflect.ValueOf(data))
return nil
}
func (md *MetaData) unifyText(data any, v encoding.TextUnmarshaler) error {
var s string
switch sdata := data.(type) {
case Marshaler:
text, err := sdata.MarshalTOML()
if err != nil {
return err
}
s = string(text)
case encoding.TextMarshaler:
text, err := sdata.MarshalText()
if err != nil {
return err
}
s = string(text)
case fmt.Stringer:
s = sdata.String()
case string:
s = sdata
case bool:
s = fmt.Sprintf("%v", sdata)
case int64:
s = fmt.Sprintf("%d", sdata)
case float64:
s = fmt.Sprintf("%f", sdata)
default:
return md.badtype("primitive (string-like)", data)
}
if err := v.UnmarshalText([]byte(s)); err != nil {
return md.parseErr(err)
}
return nil
}
func (md *MetaData) badtype(dst string, data any) error {
return md.e("incompatible types: TOML value has type %s; destination has type %s", fmtType(data), dst)
}
func (md *MetaData) parseErr(err error) error {
k := md.context.String()
d := string(md.data)
return ParseError{
Message: err.Error(),
err: err,
LastKey: k,
Position: md.keyInfo[k].pos.withCol(d),
Line: md.keyInfo[k].pos.Line,
input: d,
}
}
func (md *MetaData) e(format string, args ...any) error {
f := "toml: "
if len(md.context) > 0 {
f = fmt.Sprintf("toml: (last key %q): ", md.context)
p := md.keyInfo[md.context.String()].pos
if p.Line > 0 {
f = fmt.Sprintf("toml: line %d (last key %q): ", p.Line, md.context)
}
}
return fmt.Errorf(f+format, args...)
}
// rvalue returns a reflect.Value of `v`. All pointers are resolved.
func rvalue(v any) reflect.Value {
return indirect(reflect.ValueOf(v))
}
// indirect returns the value pointed to by a pointer.
//
// Pointers are followed until the value is not a pointer. New values are
// allocated for each nil pointer.
//
// An exception to this rule is if the value satisfies an interface of interest
// to us (like encoding.TextUnmarshaler).
func indirect(v reflect.Value) reflect.Value {
if v.Kind() != reflect.Ptr {
if v.CanSet() {
pv := v.Addr()
pvi := pv.Interface()
if _, ok := pvi.(encoding.TextUnmarshaler); ok {
return pv
}
if _, ok := pvi.(Unmarshaler); ok {
return pv
}
}
return v
}
if v.IsNil() {
v.Set(reflect.New(v.Type().Elem()))
}
return indirect(reflect.Indirect(v))
}
func isUnifiable(rv reflect.Value) bool {
if rv.CanSet() {
return true
}
rvi := rv.Interface()
if _, ok := rvi.(encoding.TextUnmarshaler); ok {
return true
}
if _, ok := rvi.(Unmarshaler); ok {
return true
}
return false
}
// fmt %T with "interface {}" replaced with "any", which is far more readable.
func fmtType(t any) string {
return strings.ReplaceAll(fmt.Sprintf("%T", t), "interface {}", "any")
}
package toml
import (
"encoding"
"io"
)
// TextMarshaler is an alias for encoding.TextMarshaler.
//
// Deprecated: use encoding.TextMarshaler
type TextMarshaler encoding.TextMarshaler
// TextUnmarshaler is an alias for encoding.TextUnmarshaler.
//
// Deprecated: use encoding.TextUnmarshaler
type TextUnmarshaler encoding.TextUnmarshaler
// DecodeReader is an alias for NewDecoder(r).Decode(v).
//
// Deprecated: use NewDecoder(reader).Decode(&value).
func DecodeReader(r io.Reader, v any) (MetaData, error) { return NewDecoder(r).Decode(v) }
// PrimitiveDecode is an alias for MetaData.PrimitiveDecode().
//
// Deprecated: use MetaData.PrimitiveDecode.
func PrimitiveDecode(primValue Primitive, v any) error {
md := MetaData{decoded: make(map[string]struct{})}
return md.unify(primValue.undecoded, rvalue(v))
}
package toml
import (
"bufio"
"bytes"
"encoding"
"encoding/json"
"errors"
"fmt"
"io"
"math"
"reflect"
"sort"
"strconv"
"strings"
"time"
"github.com/BurntSushi/toml/internal"
)
type tomlEncodeError struct{ error }
var (
errArrayNilElement = errors.New("toml: cannot encode array with nil element")
errNonString = errors.New("toml: cannot encode a map with non-string key type")
errNoKey = errors.New("toml: top-level values must be Go maps or structs")
errAnything = errors.New("") // used in testing
)
var dblQuotedReplacer = strings.NewReplacer(
"\"", "\\\"",
"\\", "\\\\",
"\x00", `\u0000`,
"\x01", `\u0001`,
"\x02", `\u0002`,
"\x03", `\u0003`,
"\x04", `\u0004`,
"\x05", `\u0005`,
"\x06", `\u0006`,
"\x07", `\u0007`,
"\b", `\b`,
"\t", `\t`,
"\n", `\n`,
"\x0b", `\u000b`,
"\f", `\f`,
"\r", `\r`,
"\x0e", `\u000e`,
"\x0f", `\u000f`,
"\x10", `\u0010`,
"\x11", `\u0011`,
"\x12", `\u0012`,
"\x13", `\u0013`,
"\x14", `\u0014`,
"\x15", `\u0015`,
"\x16", `\u0016`,
"\x17", `\u0017`,
"\x18", `\u0018`,
"\x19", `\u0019`,
"\x1a", `\u001a`,
"\x1b", `\u001b`,
"\x1c", `\u001c`,
"\x1d", `\u001d`,
"\x1e", `\u001e`,
"\x1f", `\u001f`,
"\x7f", `\u007f`,
)
var (
marshalToml = reflect.TypeOf((*Marshaler)(nil)).Elem()
marshalText = reflect.TypeOf((*encoding.TextMarshaler)(nil)).Elem()
timeType = reflect.TypeOf((*time.Time)(nil)).Elem()
)
// Marshaler is the interface implemented by types that can marshal themselves
// into valid TOML.
type Marshaler interface {
MarshalTOML() ([]byte, error)
}
// Marshal returns a TOML representation of the Go value.
//
// See [Encoder] for a description of the encoding process.
func Marshal(v any) ([]byte, error) {
buff := new(bytes.Buffer)
if err := NewEncoder(buff).Encode(v); err != nil {
return nil, err
}
return buff.Bytes(), nil
}
// Encoder encodes a Go to a TOML document.
//
// The mapping between Go values and TOML values should be precisely the same as
// for [Decode].
//
// time.Time is encoded as a RFC 3339 string, and time.Duration as its string
// representation.
//
// The [Marshaler] and [encoding.TextMarshaler] interfaces are supported to
// encoding the value as custom TOML.
//
// If you want to write arbitrary binary data then you will need to use
// something like base64 since TOML does not have any binary types.
//
// When encoding TOML hashes (Go maps or structs), keys without any sub-hashes
// are encoded first.
//
// Go maps will be sorted alphabetically by key for deterministic output.
//
// The toml struct tag can be used to provide the key name; if omitted the
// struct field name will be used. If the "omitempty" option is present the
// following value will be skipped:
//
// - arrays, slices, maps, and string with len of 0
// - struct with all zero values
// - bool false
//
// If omitzero is given all int and float types with a value of 0 will be
// skipped.
//
// Encoding Go values without a corresponding TOML representation will return an
// error. Examples of this includes maps with non-string keys, slices with nil
// elements, embedded non-struct types, and nested slices containing maps or
// structs. (e.g. [][]map[string]string is not allowed but []map[string]string
// is okay, as is []map[string][]string).
//
// NOTE: only exported keys are encoded due to the use of reflection. Unexported
// keys are silently discarded.
type Encoder struct {
Indent string // string for a single indentation level; default is two spaces.
hasWritten bool // written any output to w yet?
w *bufio.Writer
}
// NewEncoder create a new Encoder.
func NewEncoder(w io.Writer) *Encoder {
return &Encoder{w: bufio.NewWriter(w), Indent: " "}
}
// Encode writes a TOML representation of the Go value to the [Encoder]'s writer.
//
// An error is returned if the value given cannot be encoded to a valid TOML
// document.
func (enc *Encoder) Encode(v any) error {
rv := eindirect(reflect.ValueOf(v))
err := enc.safeEncode(Key([]string{}), rv)
if err != nil {
return err
}
return enc.w.Flush()
}
func (enc *Encoder) safeEncode(key Key, rv reflect.Value) (err error) {
defer func() {
if r := recover(); r != nil {
if terr, ok := r.(tomlEncodeError); ok {
err = terr.error
return
}
panic(r)
}
}()
enc.encode(key, rv)
return nil
}
func (enc *Encoder) encode(key Key, rv reflect.Value) {
// If we can marshal the type to text, then we use that. This prevents the
// encoder for handling these types as generic structs (or whatever the
// underlying type of a TextMarshaler is).
switch {
case isMarshaler(rv):
enc.writeKeyValue(key, rv, false)
return
case rv.Type() == primitiveType: // TODO: #76 would make this superfluous after implemented.
enc.encode(key, reflect.ValueOf(rv.Interface().(Primitive).undecoded))
return
}
k := rv.Kind()
switch k {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32,
reflect.Int64,
reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32,
reflect.Uint64,
reflect.Float32, reflect.Float64, reflect.String, reflect.Bool:
enc.writeKeyValue(key, rv, false)
case reflect.Array, reflect.Slice:
if typeEqual(tomlArrayHash, tomlTypeOfGo(rv)) {
enc.eArrayOfTables(key, rv)
} else {
enc.writeKeyValue(key, rv, false)
}
case reflect.Interface:
if rv.IsNil() {
return
}
enc.encode(key, rv.Elem())
case reflect.Map:
if rv.IsNil() {
return
}
enc.eTable(key, rv)
case reflect.Ptr:
if rv.IsNil() {
return
}
enc.encode(key, rv.Elem())
case reflect.Struct:
enc.eTable(key, rv)
default:
encPanic(fmt.Errorf("unsupported type for key '%s': %s", key, k))
}
}
// eElement encodes any value that can be an array element.
func (enc *Encoder) eElement(rv reflect.Value) {
switch v := rv.Interface().(type) {
case time.Time: // Using TextMarshaler adds extra quotes, which we don't want.
format := time.RFC3339Nano
switch v.Location() {
case internal.LocalDatetime:
format = "2006-01-02T15:04:05.999999999"
case internal.LocalDate:
format = "2006-01-02"
case internal.LocalTime:
format = "15:04:05.999999999"
}
switch v.Location() {
default:
enc.write(v.Format(format))
case internal.LocalDatetime, internal.LocalDate, internal.LocalTime:
enc.write(v.In(time.UTC).Format(format))
}
return
case Marshaler:
s, err := v.MarshalTOML()
if err != nil {
encPanic(err)
}
if s == nil {
encPanic(errors.New("MarshalTOML returned nil and no error"))
}
enc.w.Write(s)
return
case encoding.TextMarshaler:
s, err := v.MarshalText()
if err != nil {
encPanic(err)
}
if s == nil {
encPanic(errors.New("MarshalText returned nil and no error"))
}
enc.writeQuoted(string(s))
return
case time.Duration:
enc.writeQuoted(v.String())
return
case json.Number:
n, _ := rv.Interface().(json.Number)
if n == "" { /// Useful zero value.
enc.w.WriteByte('0')
return
} else if v, err := n.Int64(); err == nil {
enc.eElement(reflect.ValueOf(v))
return
} else if v, err := n.Float64(); err == nil {
enc.eElement(reflect.ValueOf(v))
return
}
encPanic(fmt.Errorf("unable to convert %q to int64 or float64", n))
}
switch rv.Kind() {
case reflect.Ptr:
enc.eElement(rv.Elem())
return
case reflect.String:
enc.writeQuoted(rv.String())
case reflect.Bool:
enc.write(strconv.FormatBool(rv.Bool()))
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
enc.write(strconv.FormatInt(rv.Int(), 10))
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
enc.write(strconv.FormatUint(rv.Uint(), 10))
case reflect.Float32:
f := rv.Float()
if math.IsNaN(f) {
if math.Signbit(f) {
enc.write("-")
}
enc.write("nan")
} else if math.IsInf(f, 0) {
if math.Signbit(f) {
enc.write("-")
}
enc.write("inf")
} else {
enc.write(floatAddDecimal(strconv.FormatFloat(f, 'g', -1, 32)))
}
case reflect.Float64:
f := rv.Float()
if math.IsNaN(f) {
if math.Signbit(f) {
enc.write("-")
}
enc.write("nan")
} else if math.IsInf(f, 0) {
if math.Signbit(f) {
enc.write("-")
}
enc.write("inf")
} else {
enc.write(floatAddDecimal(strconv.FormatFloat(f, 'g', -1, 64)))
}
case reflect.Array, reflect.Slice:
enc.eArrayOrSliceElement(rv)
case reflect.Struct:
enc.eStruct(nil, rv, true)
case reflect.Map:
enc.eMap(nil, rv, true)
case reflect.Interface:
enc.eElement(rv.Elem())
default:
encPanic(fmt.Errorf("unexpected type: %s", fmtType(rv.Interface())))
}
}
// By the TOML spec, all floats must have a decimal with at least one number on
// either side.
func floatAddDecimal(fstr string) string {
for _, c := range fstr {
if c == 'e' { // Exponent syntax
return fstr
}
if c == '.' {
return fstr
}
}
return fstr + ".0"
}
func (enc *Encoder) writeQuoted(s string) {
enc.write(`"` + dblQuotedReplacer.Replace(s) + `"`)
}
func (enc *Encoder) eArrayOrSliceElement(rv reflect.Value) {
length := rv.Len()
enc.write("[")
for i := 0; i < length; i++ {
elem := eindirect(rv.Index(i))
enc.eElement(elem)
if i != length-1 {
enc.write(", ")
}
}
enc.write("]")
}
func (enc *Encoder) eArrayOfTables(key Key, rv reflect.Value) {
if len(key) == 0 {
encPanic(errNoKey)
}
for i := 0; i < rv.Len(); i++ {
trv := eindirect(rv.Index(i))
if isNil(trv) {
continue
}
enc.newline()
enc.writef("%s[[%s]]", enc.indentStr(key), key)
enc.newline()
enc.eMapOrStruct(key, trv, false)
}
}
func (enc *Encoder) eTable(key Key, rv reflect.Value) {
if len(key) == 1 {
// Output an extra newline between top-level tables.
// (The newline isn't written if nothing else has been written though.)
enc.newline()
}
if len(key) > 0 {
enc.writef("%s[%s]", enc.indentStr(key), key)
enc.newline()
}
enc.eMapOrStruct(key, rv, false)
}
func (enc *Encoder) eMapOrStruct(key Key, rv reflect.Value, inline bool) {
switch rv.Kind() {
case reflect.Map:
enc.eMap(key, rv, inline)
case reflect.Struct:
enc.eStruct(key, rv, inline)
default:
// Should never happen?
panic("eTable: unhandled reflect.Value Kind: " + rv.Kind().String())
}
}
func (enc *Encoder) eMap(key Key, rv reflect.Value, inline bool) {
rt := rv.Type()
if rt.Key().Kind() != reflect.String {
encPanic(errNonString)
}
// Sort keys so that we have deterministic output. And write keys directly
// underneath this key first, before writing sub-structs or sub-maps.
var mapKeysDirect, mapKeysSub []reflect.Value
for _, mapKey := range rv.MapKeys() {
if typeIsTable(tomlTypeOfGo(eindirect(rv.MapIndex(mapKey)))) {
mapKeysSub = append(mapKeysSub, mapKey)
} else {
mapKeysDirect = append(mapKeysDirect, mapKey)
}
}
writeMapKeys := func(mapKeys []reflect.Value, trailC bool) {
sort.Slice(mapKeys, func(i, j int) bool { return mapKeys[i].String() < mapKeys[j].String() })
for i, mapKey := range mapKeys {
val := eindirect(rv.MapIndex(mapKey))
if isNil(val) {
continue
}
if inline {
enc.writeKeyValue(Key{mapKey.String()}, val, true)
if trailC || i != len(mapKeys)-1 {
enc.write(", ")
}
} else {
enc.encode(key.add(mapKey.String()), val)
}
}
}
if inline {
enc.write("{")
}
writeMapKeys(mapKeysDirect, len(mapKeysSub) > 0)
writeMapKeys(mapKeysSub, false)
if inline {
enc.write("}")
}
}
func pointerTo(t reflect.Type) reflect.Type {
if t.Kind() == reflect.Ptr {
return pointerTo(t.Elem())
}
return t
}
func (enc *Encoder) eStruct(key Key, rv reflect.Value, inline bool) {
// Write keys for fields directly under this key first, because if we write
// a field that creates a new table then all keys under it will be in that
// table (not the one we're writing here).
//
// Fields is a [][]int: for fieldsDirect this always has one entry (the
// struct index). For fieldsSub it contains two entries: the parent field
// index from tv, and the field indexes for the fields of the sub.
var (
rt = rv.Type()
fieldsDirect, fieldsSub [][]int
addFields func(rt reflect.Type, rv reflect.Value, start []int)
)
addFields = func(rt reflect.Type, rv reflect.Value, start []int) {
for i := 0; i < rt.NumField(); i++ {
f := rt.Field(i)
isEmbed := f.Anonymous && pointerTo(f.Type).Kind() == reflect.Struct
if f.PkgPath != "" && !isEmbed { /// Skip unexported fields.
continue
}
opts := getOptions(f.Tag)
if opts.skip {
continue
}
frv := eindirect(rv.Field(i))
// Need to make a copy because ... ehm, I don't know why... I guess
// allocating a new array can cause it to fail(?)
//
// Done for: https://github.com/BurntSushi/toml/issues/430
// Previously only on 32bit for: https://github.com/BurntSushi/toml/issues/314
copyStart := make([]int, len(start))
copy(copyStart, start)
start = copyStart
// Treat anonymous struct fields with tag names as though they are
// not anonymous, like encoding/json does.
//
// Non-struct anonymous fields use the normal encoding logic.
if isEmbed {
if getOptions(f.Tag).name == "" && frv.Kind() == reflect.Struct {
addFields(frv.Type(), frv, append(start, f.Index...))
continue
}
}
if typeIsTable(tomlTypeOfGo(frv)) {
fieldsSub = append(fieldsSub, append(start, f.Index...))
} else {
fieldsDirect = append(fieldsDirect, append(start, f.Index...))
}
}
}
addFields(rt, rv, nil)
writeFields := func(fields [][]int, totalFields int) {
for _, fieldIndex := range fields {
fieldType := rt.FieldByIndex(fieldIndex)
fieldVal := rv.FieldByIndex(fieldIndex)
opts := getOptions(fieldType.Tag)
if opts.skip {
continue
}
if opts.omitempty && isEmpty(fieldVal) {
continue
}
fieldVal = eindirect(fieldVal)
if isNil(fieldVal) { /// Don't write anything for nil fields.
continue
}
keyName := fieldType.Name
if opts.name != "" {
keyName = opts.name
}
if opts.omitzero && isZero(fieldVal) {
continue
}
if inline {
enc.writeKeyValue(Key{keyName}, fieldVal, true)
if fieldIndex[0] != totalFields-1 {
enc.write(", ")
}
} else {
enc.encode(key.add(keyName), fieldVal)
}
}
}
if inline {
enc.write("{")
}
l := len(fieldsDirect) + len(fieldsSub)
writeFields(fieldsDirect, l)
writeFields(fieldsSub, l)
if inline {
enc.write("}")
}
}
// tomlTypeOfGo returns the TOML type name of the Go value's type.
//
// It is used to determine whether the types of array elements are mixed (which
// is forbidden). If the Go value is nil, then it is illegal for it to be an
// array element, and valueIsNil is returned as true.
//
// The type may be `nil`, which means no concrete TOML type could be found.
func tomlTypeOfGo(rv reflect.Value) tomlType {
if isNil(rv) || !rv.IsValid() {
return nil
}
if rv.Kind() == reflect.Struct {
if rv.Type() == timeType {
return tomlDatetime
}
if isMarshaler(rv) {
return tomlString
}
return tomlHash
}
if isMarshaler(rv) {
return tomlString
}
switch rv.Kind() {
case reflect.Bool:
return tomlBool
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32,
reflect.Int64,
reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32,
reflect.Uint64:
return tomlInteger
case reflect.Float32, reflect.Float64:
return tomlFloat
case reflect.Array, reflect.Slice:
if isTableArray(rv) {
return tomlArrayHash
}
return tomlArray
case reflect.Ptr, reflect.Interface:
return tomlTypeOfGo(rv.Elem())
case reflect.String:
return tomlString
case reflect.Map:
return tomlHash
default:
encPanic(errors.New("unsupported type: " + rv.Kind().String()))
panic("unreachable")
}
}
func isMarshaler(rv reflect.Value) bool {
return rv.Type().Implements(marshalText) || rv.Type().Implements(marshalToml)
}
// isTableArray reports if all entries in the array or slice are a table.
func isTableArray(arr reflect.Value) bool {
if isNil(arr) || !arr.IsValid() || arr.Len() == 0 {
return false
}
ret := true
for i := 0; i < arr.Len(); i++ {
tt := tomlTypeOfGo(eindirect(arr.Index(i)))
// Don't allow nil.
if tt == nil {
encPanic(errArrayNilElement)
}
if ret && !typeEqual(tomlHash, tt) {
ret = false
}
}
return ret
}
type tagOptions struct {
skip bool // "-"
name string
omitempty bool
omitzero bool
}
func getOptions(tag reflect.StructTag) tagOptions {
t := tag.Get("toml")
if t == "-" {
return tagOptions{skip: true}
}
var opts tagOptions
parts := strings.Split(t, ",")
opts.name = parts[0]
for _, s := range parts[1:] {
switch s {
case "omitempty":
opts.omitempty = true
case "omitzero":
opts.omitzero = true
}
}
return opts
}
func isZero(rv reflect.Value) bool {
switch rv.Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
return rv.Int() == 0
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
return rv.Uint() == 0
case reflect.Float32, reflect.Float64:
return rv.Float() == 0.0
}
return false
}
func isEmpty(rv reflect.Value) bool {
switch rv.Kind() {
case reflect.Array, reflect.Slice, reflect.Map, reflect.String:
return rv.Len() == 0
case reflect.Struct:
if rv.Type().Comparable() {
return reflect.Zero(rv.Type()).Interface() == rv.Interface()
}
// Need to also check if all the fields are empty, otherwise something
// like this with uncomparable types will always return true:
//
// type a struct{ field b }
// type b struct{ s []string }
// s := a{field: b{s: []string{"AAA"}}}
for i := 0; i < rv.NumField(); i++ {
if !isEmpty(rv.Field(i)) {
return false
}
}
return true
case reflect.Bool:
return !rv.Bool()
case reflect.Ptr:
return rv.IsNil()
}
return false
}
func (enc *Encoder) newline() {
if enc.hasWritten {
enc.write("\n")
}
}
// Write a key/value pair:
//
// key = <any value>
//
// This is also used for "k = v" in inline tables; so something like this will
// be written in three calls:
//
// ┌───────────────────┐
// │ ┌───┐ ┌────┐│
// v v v v vv
// key = {k = 1, k2 = 2}
func (enc *Encoder) writeKeyValue(key Key, val reflect.Value, inline bool) {
/// Marshaler used on top-level document; call eElement() to just call
/// Marshal{TOML,Text}.
if len(key) == 0 {
enc.eElement(val)
return
}
enc.writef("%s%s = ", enc.indentStr(key), key.maybeQuoted(len(key)-1))
enc.eElement(val)
if !inline {
enc.newline()
}
}
func (enc *Encoder) write(s string) {
_, err := enc.w.WriteString(s)
if err != nil {
encPanic(err)
}
enc.hasWritten = true
}
func (enc *Encoder) writef(format string, v ...any) {
_, err := fmt.Fprintf(enc.w, format, v...)
if err != nil {
encPanic(err)
}
enc.hasWritten = true
}
func (enc *Encoder) indentStr(key Key) string {
return strings.Repeat(enc.Indent, len(key)-1)
}
func encPanic(err error) {
panic(tomlEncodeError{err})
}
// Resolve any level of pointers to the actual value (e.g. **string → string).
func eindirect(v reflect.Value) reflect.Value {
if v.Kind() != reflect.Ptr && v.Kind() != reflect.Interface {
if isMarshaler(v) {
return v
}
if v.CanAddr() { /// Special case for marshalers; see #358.
if pv := v.Addr(); isMarshaler(pv) {
return pv
}
}
return v
}
if v.IsNil() {
return v
}
return eindirect(v.Elem())
}
func isNil(rv reflect.Value) bool {
switch rv.Kind() {
case reflect.Interface, reflect.Map, reflect.Ptr, reflect.Slice:
return rv.IsNil()
default:
return false
}
}
package toml
import (
"fmt"
"strings"
)
// ParseError is returned when there is an error parsing the TOML syntax such as
// invalid syntax, duplicate keys, etc.
//
// In addition to the error message itself, you can also print detailed location
// information with context by using [ErrorWithPosition]:
//
// toml: error: Key 'fruit' was already created and cannot be used as an array.
//
// At line 4, column 2-7:
//
// 2 | fruit = []
// 3 |
// 4 | [[fruit]] # Not allowed
// ^^^^^
//
// [ErrorWithUsage] can be used to print the above with some more detailed usage
// guidance:
//
// toml: error: newlines not allowed within inline tables
//
// At line 1, column 18:
//
// 1 | x = [{ key = 42 #
// ^
//
// Error help:
//
// Inline tables must always be on a single line:
//
// table = {key = 42, second = 43}
//
// It is invalid to split them over multiple lines like so:
//
// # INVALID
// table = {
// key = 42,
// second = 43
// }
//
// Use regular for this:
//
// [table]
// key = 42
// second = 43
type ParseError struct {
Message string // Short technical message.
Usage string // Longer message with usage guidance; may be blank.
Position Position // Position of the error
LastKey string // Last parsed key, may be blank.
// Line the error occurred.
//
// Deprecated: use [Position].
Line int
err error
input string
}
// Position of an error.
type Position struct {
Line int // Line number, starting at 1.
Col int // Error column, starting at 1.
Start int // Start of error, as byte offset starting at 0.
Len int // Length of the error in bytes.
}
func (p Position) withCol(tomlFile string) Position {
var (
pos int
lines = strings.Split(tomlFile, "\n")
)
for i := range lines {
ll := len(lines[i]) + 1 // +1 for the removed newline
if pos+ll >= p.Start {
p.Col = p.Start - pos + 1
if p.Col < 1 { // Should never happen, but just in case.
p.Col = 1
}
break
}
pos += ll
}
return p
}
func (pe ParseError) Error() string {
if pe.LastKey == "" {
return fmt.Sprintf("toml: line %d: %s", pe.Position.Line, pe.Message)
}
return fmt.Sprintf("toml: line %d (last key %q): %s",
pe.Position.Line, pe.LastKey, pe.Message)
}
// ErrorWithPosition returns the error with detailed location context.
//
// See the documentation on [ParseError].
func (pe ParseError) ErrorWithPosition() string {
if pe.input == "" { // Should never happen, but just in case.
return pe.Error()
}
// TODO: don't show control characters as literals? This may not show up
// well everywhere.
var (
lines = strings.Split(pe.input, "\n")
b = new(strings.Builder)
)
if pe.Position.Len == 1 {
fmt.Fprintf(b, "toml: error: %s\n\nAt line %d, column %d:\n\n",
pe.Message, pe.Position.Line, pe.Position.Col)
} else {
fmt.Fprintf(b, "toml: error: %s\n\nAt line %d, column %d-%d:\n\n",
pe.Message, pe.Position.Line, pe.Position.Col, pe.Position.Col+pe.Position.Len-1)
}
if pe.Position.Line > 2 {
fmt.Fprintf(b, "% 7d | %s\n", pe.Position.Line-2, expandTab(lines[pe.Position.Line-3]))
}
if pe.Position.Line > 1 {
fmt.Fprintf(b, "% 7d | %s\n", pe.Position.Line-1, expandTab(lines[pe.Position.Line-2]))
}
/// Expand tabs, so that the ^^^s are at the correct position, but leave
/// "column 10-13" intact. Adjusting this to the visual column would be
/// better, but we don't know the tabsize of the user in their editor, which
/// can be 8, 4, 2, or something else. We can't know. So leaving it as the
/// character index is probably the "most correct".
expanded := expandTab(lines[pe.Position.Line-1])
diff := len(expanded) - len(lines[pe.Position.Line-1])
fmt.Fprintf(b, "% 7d | %s\n", pe.Position.Line, expanded)
fmt.Fprintf(b, "% 10s%s%s\n", "", strings.Repeat(" ", pe.Position.Col-1+diff), strings.Repeat("^", pe.Position.Len))
return b.String()
}
// ErrorWithUsage returns the error with detailed location context and usage
// guidance.
//
// See the documentation on [ParseError].
func (pe ParseError) ErrorWithUsage() string {
m := pe.ErrorWithPosition()
if u, ok := pe.err.(interface{ Usage() string }); ok && u.Usage() != "" {
lines := strings.Split(strings.TrimSpace(u.Usage()), "\n")
for i := range lines {
if lines[i] != "" {
lines[i] = " " + lines[i]
}
}
return m + "Error help:\n\n" + strings.Join(lines, "\n") + "\n"
}
return m
}
func expandTab(s string) string {
var (
b strings.Builder
l int
fill = func(n int) string {
b := make([]byte, n)
for i := range b {
b[i] = ' '
}
return string(b)
}
)
b.Grow(len(s))
for _, r := range s {
switch r {
case '\t':
tw := 8 - l%8
b.WriteString(fill(tw))
l += tw
default:
b.WriteRune(r)
l += 1
}
}
return b.String()
}
type (
errLexControl struct{ r rune }
errLexEscape struct{ r rune }
errLexUTF8 struct{ b byte }
errParseDate struct{ v string }
errLexInlineTableNL struct{}
errLexStringNL struct{}
errParseRange struct {
i any // int or float
size string // "int64", "uint16", etc.
}
errUnsafeFloat struct {
i interface{} // float32 or float64
size string // "float32" or "float64"
}
errParseDuration struct{ d string }
)
func (e errLexControl) Error() string {
return fmt.Sprintf("TOML files cannot contain control characters: '0x%02x'", e.r)
}
func (e errLexControl) Usage() string { return "" }
func (e errLexEscape) Error() string { return fmt.Sprintf(`invalid escape in string '\%c'`, e.r) }
func (e errLexEscape) Usage() string { return usageEscape }
func (e errLexUTF8) Error() string { return fmt.Sprintf("invalid UTF-8 byte: 0x%02x", e.b) }
func (e errLexUTF8) Usage() string { return "" }
func (e errParseDate) Error() string { return fmt.Sprintf("invalid datetime: %q", e.v) }
func (e errParseDate) Usage() string { return usageDate }
func (e errLexInlineTableNL) Error() string { return "newlines not allowed within inline tables" }
func (e errLexInlineTableNL) Usage() string { return usageInlineNewline }
func (e errLexStringNL) Error() string { return "strings cannot contain newlines" }
func (e errLexStringNL) Usage() string { return usageStringNewline }
func (e errParseRange) Error() string { return fmt.Sprintf("%v is out of range for %s", e.i, e.size) }
func (e errParseRange) Usage() string { return usageIntOverflow }
func (e errUnsafeFloat) Error() string {
return fmt.Sprintf("%v is out of the safe %s range", e.i, e.size)
}
func (e errUnsafeFloat) Usage() string { return usageUnsafeFloat }
func (e errParseDuration) Error() string { return fmt.Sprintf("invalid duration: %q", e.d) }
func (e errParseDuration) Usage() string { return usageDuration }
const usageEscape = `
A '\' inside a "-delimited string is interpreted as an escape character.
The following escape sequences are supported:
\b, \t, \n, \f, \r, \", \\, \uXXXX, and \UXXXXXXXX
To prevent a '\' from being recognized as an escape character, use either:
- a ' or '''-delimited string; escape characters aren't processed in them; or
- write two backslashes to get a single backslash: '\\'.
If you're trying to add a Windows path (e.g. "C:\Users\martin") then using '/'
instead of '\' will usually also work: "C:/Users/martin".
`
const usageInlineNewline = `
Inline tables must always be on a single line:
table = {key = 42, second = 43}
It is invalid to split them over multiple lines like so:
# INVALID
table = {
key = 42,
second = 43
}
Use regular for this:
[table]
key = 42
second = 43
`
const usageStringNewline = `
Strings must always be on a single line, and cannot span more than one line:
# INVALID
string = "Hello,
world!"
Instead use """ or ''' to split strings over multiple lines:
string = """Hello,
world!"""
`
const usageIntOverflow = `
This number is too large; this may be an error in the TOML, but it can also be a
bug in the program that uses too small of an integer.
The maximum and minimum values are:
size │ lowest │ highest
───────┼────────────────┼──────────────
int8 │ -128 │ 127
int16 │ -32,768 │ 32,767
int32 │ -2,147,483,648 │ 2,147,483,647
int64 │ -9.2 × 10¹⁷ │ 9.2 × 10¹⁷
uint8 │ 0 │ 255
uint16 │ 0 │ 65,535
uint32 │ 0 │ 4,294,967,295
uint64 │ 0 │ 1.8 × 10¹⁸
int refers to int32 on 32-bit systems and int64 on 64-bit systems.
`
const usageUnsafeFloat = `
This number is outside of the "safe" range for floating point numbers; whole
(non-fractional) numbers outside the below range can not always be represented
accurately in a float, leading to some loss of accuracy.
Explicitly mark a number as a fractional unit by adding ".0", which will incur
some loss of accuracy; for example:
f = 2_000_000_000.0
Accuracy ranges:
float32 = 16,777,215
float64 = 9,007,199,254,740,991
`
const usageDuration = `
A duration must be as "number<unit>", without any spaces. Valid units are:
ns nanoseconds (billionth of a second)
us, µs microseconds (millionth of a second)
ms milliseconds (thousands of a second)
s seconds
m minutes
h hours
You can combine multiple units; for example "5m10s" for 5 minutes and 10
seconds.
`
const usageDate = `
A TOML datetime must be in one of the following formats:
2006-01-02T15:04:05Z07:00 Date and time, with timezone.
2006-01-02T15:04:05 Date and time, but without timezone.
2006-01-02 Date without a time or timezone.
15:04:05 Just a time, without any timezone.
Seconds may optionally have a fraction, up to nanosecond precision:
15:04:05.123
15:04:05.856018510
`
// TOML 1.1:
// The seconds part in times is optional, and may be omitted:
// 2006-01-02T15:04Z07:00
// 2006-01-02T15:04
// 15:04
package internal
import "time"
// Timezones used for local datetime, date, and time TOML types.
//
// The exact way times and dates without a timezone should be interpreted is not
// well-defined in the TOML specification and left to the implementation. These
// defaults to current local timezone offset of the computer, but this can be
// changed by changing these variables before decoding.
//
// TODO:
// Ideally we'd like to offer people the ability to configure the used timezone
// by setting Decoder.Timezone and Encoder.Timezone; however, this is a bit
// tricky: the reason we use three different variables for this is to support
// round-tripping – without these specific TZ names we wouldn't know which
// format to use.
//
// There isn't a good way to encode this right now though, and passing this sort
// of information also ties in to various related issues such as string format
// encoding, encoding of comments, etc.
//
// So, for the time being, just put this in internal until we can write a good
// comprehensive API for doing all of this.
//
// The reason they're exported is because they're referred from in e.g.
// internal/tag.
//
// Note that this behaviour is valid according to the TOML spec as the exact
// behaviour is left up to implementations.
var (
localOffset = func() int { _, o := time.Now().Zone(); return o }()
LocalDatetime = time.FixedZone("datetime-local", localOffset)
LocalDate = time.FixedZone("date-local", localOffset)
LocalTime = time.FixedZone("time-local", localOffset)
)
package toml
import (
"fmt"
"reflect"
"runtime"
"strings"
"unicode"
"unicode/utf8"
)
type itemType int
const (
itemError itemType = iota
itemEOF
itemText
itemString
itemStringEsc
itemRawString
itemMultilineString
itemRawMultilineString
itemBool
itemInteger
itemFloat
itemDatetime
itemArray // the start of an array
itemArrayEnd
itemTableStart
itemTableEnd
itemArrayTableStart
itemArrayTableEnd
itemKeyStart
itemKeyEnd
itemCommentStart
itemInlineTableStart
itemInlineTableEnd
)
const eof = 0
type stateFn func(lx *lexer) stateFn
func (p Position) String() string {
return fmt.Sprintf("at line %d; start %d; length %d", p.Line, p.Start, p.Len)
}
type lexer struct {
input string
start int
pos int
line int
state stateFn
items chan item
tomlNext bool
esc bool
// Allow for backing up up to 4 runes. This is necessary because TOML
// contains 3-rune tokens (""" and ''').
prevWidths [4]int
nprev int // how many of prevWidths are in use
atEOF bool // If we emit an eof, we can still back up, but it is not OK to call next again.
// A stack of state functions used to maintain context.
//
// The idea is to reuse parts of the state machine in various places. For
// example, values can appear at the top level or within arbitrarily nested
// arrays. The last state on the stack is used after a value has been lexed.
// Similarly for comments.
stack []stateFn
}
type item struct {
typ itemType
val string
err error
pos Position
}
func (lx *lexer) nextItem() item {
for {
select {
case item := <-lx.items:
return item
default:
lx.state = lx.state(lx)
//fmt.Printf(" STATE %-24s current: %-10s stack: %s\n", lx.state, lx.current(), lx.stack)
}
}
}
func lex(input string, tomlNext bool) *lexer {
lx := &lexer{
input: input,
state: lexTop,
items: make(chan item, 10),
stack: make([]stateFn, 0, 10),
line: 1,
tomlNext: tomlNext,
}
return lx
}
func (lx *lexer) push(state stateFn) {
lx.stack = append(lx.stack, state)
}
func (lx *lexer) pop() stateFn {
if len(lx.stack) == 0 {
panic("BUG in lexer: no states to pop")
}
last := lx.stack[len(lx.stack)-1]
lx.stack = lx.stack[0 : len(lx.stack)-1]
return last
}
func (lx *lexer) current() string {
return lx.input[lx.start:lx.pos]
}
func (lx lexer) getPos() Position {
p := Position{
Line: lx.line,
Start: lx.start,
Len: lx.pos - lx.start,
}
if p.Len <= 0 {
p.Len = 1
}
return p
}
func (lx *lexer) emit(typ itemType) {
// Needed for multiline strings ending with an incomplete UTF-8 sequence.
if lx.start > lx.pos {
lx.error(errLexUTF8{lx.input[lx.pos]})
return
}
lx.items <- item{typ: typ, pos: lx.getPos(), val: lx.current()}
lx.start = lx.pos
}
func (lx *lexer) emitTrim(typ itemType) {
lx.items <- item{typ: typ, pos: lx.getPos(), val: strings.TrimSpace(lx.current())}
lx.start = lx.pos
}
func (lx *lexer) next() (r rune) {
if lx.atEOF {
panic("BUG in lexer: next called after EOF")
}
if lx.pos >= len(lx.input) {
lx.atEOF = true
return eof
}
if lx.input[lx.pos] == '\n' {
lx.line++
}
lx.prevWidths[3] = lx.prevWidths[2]
lx.prevWidths[2] = lx.prevWidths[1]
lx.prevWidths[1] = lx.prevWidths[0]
if lx.nprev < 4 {
lx.nprev++
}
r, w := utf8.DecodeRuneInString(lx.input[lx.pos:])
if r == utf8.RuneError && w == 1 {
lx.error(errLexUTF8{lx.input[lx.pos]})
return utf8.RuneError
}
// Note: don't use peek() here, as this calls next().
if isControl(r) || (r == '\r' && (len(lx.input)-1 == lx.pos || lx.input[lx.pos+1] != '\n')) {
lx.errorControlChar(r)
return utf8.RuneError
}
lx.prevWidths[0] = w
lx.pos += w
return r
}
// ignore skips over the pending input before this point.
func (lx *lexer) ignore() {
lx.start = lx.pos
}
// backup steps back one rune. Can be called 4 times between calls to next.
func (lx *lexer) backup() {
if lx.atEOF {
lx.atEOF = false
return
}
if lx.nprev < 1 {
panic("BUG in lexer: backed up too far")
}
w := lx.prevWidths[0]
lx.prevWidths[0] = lx.prevWidths[1]
lx.prevWidths[1] = lx.prevWidths[2]
lx.prevWidths[2] = lx.prevWidths[3]
lx.nprev--
lx.pos -= w
if lx.pos < len(lx.input) && lx.input[lx.pos] == '\n' {
lx.line--
}
}
// accept consumes the next rune if it's equal to `valid`.
func (lx *lexer) accept(valid rune) bool {
if lx.next() == valid {
return true
}
lx.backup()
return false
}
// peek returns but does not consume the next rune in the input.
func (lx *lexer) peek() rune {
r := lx.next()
lx.backup()
return r
}
// skip ignores all input that matches the given predicate.
func (lx *lexer) skip(pred func(rune) bool) {
for {
r := lx.next()
if pred(r) {
continue
}
lx.backup()
lx.ignore()
return
}
}
// error stops all lexing by emitting an error and returning `nil`.
//
// Note that any value that is a character is escaped if it's a special
// character (newlines, tabs, etc.).
func (lx *lexer) error(err error) stateFn {
if lx.atEOF {
return lx.errorPrevLine(err)
}
lx.items <- item{typ: itemError, pos: lx.getPos(), err: err}
return nil
}
// errorfPrevline is like error(), but sets the position to the last column of
// the previous line.
//
// This is so that unexpected EOF or NL errors don't show on a new blank line.
func (lx *lexer) errorPrevLine(err error) stateFn {
pos := lx.getPos()
pos.Line--
pos.Len = 1
pos.Start = lx.pos - 1
lx.items <- item{typ: itemError, pos: pos, err: err}
return nil
}
// errorPos is like error(), but allows explicitly setting the position.
func (lx *lexer) errorPos(start, length int, err error) stateFn {
pos := lx.getPos()
pos.Start = start
pos.Len = length
lx.items <- item{typ: itemError, pos: pos, err: err}
return nil
}
// errorf is like error, and creates a new error.
func (lx *lexer) errorf(format string, values ...any) stateFn {
if lx.atEOF {
pos := lx.getPos()
if lx.pos >= 1 && lx.input[lx.pos-1] == '\n' {
pos.Line--
}
pos.Len = 1
pos.Start = lx.pos - 1
lx.items <- item{typ: itemError, pos: pos, err: fmt.Errorf(format, values...)}
return nil
}
lx.items <- item{typ: itemError, pos: lx.getPos(), err: fmt.Errorf(format, values...)}
return nil
}
func (lx *lexer) errorControlChar(cc rune) stateFn {
return lx.errorPos(lx.pos-1, 1, errLexControl{cc})
}
// lexTop consumes elements at the top level of TOML data.
func lexTop(lx *lexer) stateFn {
r := lx.next()
if isWhitespace(r) || isNL(r) {
return lexSkip(lx, lexTop)
}
switch r {
case '#':
lx.push(lexTop)
return lexCommentStart
case '[':
return lexTableStart
case eof:
if lx.pos > lx.start {
// TODO: never reached? I think this can only occur on a bug in the
// lexer(?)
return lx.errorf("unexpected EOF")
}
lx.emit(itemEOF)
return nil
}
// At this point, the only valid item can be a key, so we back up
// and let the key lexer do the rest.
lx.backup()
lx.push(lexTopEnd)
return lexKeyStart
}
// lexTopEnd is entered whenever a top-level item has been consumed. (A value
// or a table.) It must see only whitespace, and will turn back to lexTop
// upon a newline. If it sees EOF, it will quit the lexer successfully.
func lexTopEnd(lx *lexer) stateFn {
r := lx.next()
switch {
case r == '#':
// a comment will read to a newline for us.
lx.push(lexTop)
return lexCommentStart
case isWhitespace(r):
return lexTopEnd
case isNL(r):
lx.ignore()
return lexTop
case r == eof:
lx.emit(itemEOF)
return nil
}
return lx.errorf("expected a top-level item to end with a newline, comment, or EOF, but got %q instead", r)
}
// lexTable lexes the beginning of a table. Namely, it makes sure that
// it starts with a character other than '.' and ']'.
// It assumes that '[' has already been consumed.
// It also handles the case that this is an item in an array of tables.
// e.g., '[[name]]'.
func lexTableStart(lx *lexer) stateFn {
if lx.peek() == '[' {
lx.next()
lx.emit(itemArrayTableStart)
lx.push(lexArrayTableEnd)
} else {
lx.emit(itemTableStart)
lx.push(lexTableEnd)
}
return lexTableNameStart
}
func lexTableEnd(lx *lexer) stateFn {
lx.emit(itemTableEnd)
return lexTopEnd
}
func lexArrayTableEnd(lx *lexer) stateFn {
if r := lx.next(); r != ']' {
return lx.errorf("expected end of table array name delimiter ']', but got %q instead", r)
}
lx.emit(itemArrayTableEnd)
return lexTopEnd
}
func lexTableNameStart(lx *lexer) stateFn {
lx.skip(isWhitespace)
switch r := lx.peek(); {
case r == ']' || r == eof:
return lx.errorf("unexpected end of table name (table names cannot be empty)")
case r == '.':
return lx.errorf("unexpected table separator (table names cannot be empty)")
case r == '"' || r == '\'':
lx.ignore()
lx.push(lexTableNameEnd)
return lexQuotedName
default:
lx.push(lexTableNameEnd)
return lexBareName
}
}
// lexTableNameEnd reads the end of a piece of a table name, optionally
// consuming whitespace.
func lexTableNameEnd(lx *lexer) stateFn {
lx.skip(isWhitespace)
switch r := lx.next(); {
case r == '.':
lx.ignore()
return lexTableNameStart
case r == ']':
return lx.pop()
default:
return lx.errorf("expected '.' or ']' to end table name, but got %q instead", r)
}
}
// lexBareName lexes one part of a key or table.
//
// It assumes that at least one valid character for the table has already been
// read.
//
// Lexes only one part, e.g. only 'a' inside 'a.b'.
func lexBareName(lx *lexer) stateFn {
r := lx.next()
if isBareKeyChar(r, lx.tomlNext) {
return lexBareName
}
lx.backup()
lx.emit(itemText)
return lx.pop()
}
// lexQuotedName lexes one part of a quoted key or table name. It assumes that
// it starts lexing at the quote itself (" or ').
//
// Lexes only one part, e.g. only '"a"' inside '"a".b'.
func lexQuotedName(lx *lexer) stateFn {
r := lx.next()
switch {
case r == '"':
lx.ignore() // ignore the '"'
return lexString
case r == '\'':
lx.ignore() // ignore the "'"
return lexRawString
// TODO: I don't think any of the below conditions can ever be reached?
case isWhitespace(r):
return lexSkip(lx, lexValue)
case r == eof:
return lx.errorf("unexpected EOF; expected value")
default:
return lx.errorf("expected value but found %q instead", r)
}
}
// lexKeyStart consumes all key parts until a '='.
func lexKeyStart(lx *lexer) stateFn {
lx.skip(isWhitespace)
switch r := lx.peek(); {
case r == '=' || r == eof:
return lx.errorf("unexpected '=': key name appears blank")
case r == '.':
return lx.errorf("unexpected '.': keys cannot start with a '.'")
case r == '"' || r == '\'':
lx.ignore()
fallthrough
default: // Bare key
lx.emit(itemKeyStart)
return lexKeyNameStart
}
}
func lexKeyNameStart(lx *lexer) stateFn {
lx.skip(isWhitespace)
switch r := lx.peek(); {
default:
lx.push(lexKeyEnd)
return lexBareName
case r == '"' || r == '\'':
lx.ignore()
lx.push(lexKeyEnd)
return lexQuotedName
// TODO: I think these can never be reached?
case r == '=' || r == eof:
return lx.errorf("unexpected '='")
case r == '.':
return lx.errorf("unexpected '.'")
}
}
// lexKeyEnd consumes the end of a key and trims whitespace (up to the key
// separator).
func lexKeyEnd(lx *lexer) stateFn {
lx.skip(isWhitespace)
switch r := lx.next(); {
case isWhitespace(r):
return lexSkip(lx, lexKeyEnd)
case r == eof: // TODO: never reached
return lx.errorf("unexpected EOF; expected key separator '='")
case r == '.':
lx.ignore()
return lexKeyNameStart
case r == '=':
lx.emit(itemKeyEnd)
return lexSkip(lx, lexValue)
default:
if r == '\n' {
return lx.errorPrevLine(fmt.Errorf("expected '.' or '=', but got %q instead", r))
}
return lx.errorf("expected '.' or '=', but got %q instead", r)
}
}
// lexValue starts the consumption of a value anywhere a value is expected.
// lexValue will ignore whitespace.
// After a value is lexed, the last state on the next is popped and returned.
func lexValue(lx *lexer) stateFn {
// We allow whitespace to precede a value, but NOT newlines.
// In array syntax, the array states are responsible for ignoring newlines.
r := lx.next()
switch {
case isWhitespace(r):
return lexSkip(lx, lexValue)
case isDigit(r):
lx.backup() // avoid an extra state and use the same as above
return lexNumberOrDateStart
}
switch r {
case '[':
lx.ignore()
lx.emit(itemArray)
return lexArrayValue
case '{':
lx.ignore()
lx.emit(itemInlineTableStart)
return lexInlineTableValue
case '"':
if lx.accept('"') {
if lx.accept('"') {
lx.ignore() // Ignore """
return lexMultilineString
}
lx.backup()
}
lx.ignore() // ignore the '"'
return lexString
case '\'':
if lx.accept('\'') {
if lx.accept('\'') {
lx.ignore() // Ignore """
return lexMultilineRawString
}
lx.backup()
}
lx.ignore() // ignore the "'"
return lexRawString
case '.': // special error case, be kind to users
return lx.errorf("floats must start with a digit, not '.'")
case 'i', 'n':
if (lx.accept('n') && lx.accept('f')) || (lx.accept('a') && lx.accept('n')) {
lx.emit(itemFloat)
return lx.pop()
}
case '-', '+':
return lexDecimalNumberStart
}
if unicode.IsLetter(r) {
// Be permissive here; lexBool will give a nice error if the
// user wrote something like
// x = foo
// (i.e. not 'true' or 'false' but is something else word-like.)
lx.backup()
return lexBool
}
if r == eof {
return lx.errorf("unexpected EOF; expected value")
}
if r == '\n' {
return lx.errorPrevLine(fmt.Errorf("expected value but found %q instead", r))
}
return lx.errorf("expected value but found %q instead", r)
}
// lexArrayValue consumes one value in an array. It assumes that '[' or ','
// have already been consumed. All whitespace and newlines are ignored.
func lexArrayValue(lx *lexer) stateFn {
r := lx.next()
switch {
case isWhitespace(r) || isNL(r):
return lexSkip(lx, lexArrayValue)
case r == '#':
lx.push(lexArrayValue)
return lexCommentStart
case r == ',':
return lx.errorf("unexpected comma")
case r == ']':
return lexArrayEnd
}
lx.backup()
lx.push(lexArrayValueEnd)
return lexValue
}
// lexArrayValueEnd consumes everything between the end of an array value and
// the next value (or the end of the array): it ignores whitespace and newlines
// and expects either a ',' or a ']'.
func lexArrayValueEnd(lx *lexer) stateFn {
switch r := lx.next(); {
case isWhitespace(r) || isNL(r):
return lexSkip(lx, lexArrayValueEnd)
case r == '#':
lx.push(lexArrayValueEnd)
return lexCommentStart
case r == ',':
lx.ignore()
return lexArrayValue // move on to the next value
case r == ']':
return lexArrayEnd
default:
return lx.errorf("expected a comma (',') or array terminator (']'), but got %s", runeOrEOF(r))
}
}
// lexArrayEnd finishes the lexing of an array.
// It assumes that a ']' has just been consumed.
func lexArrayEnd(lx *lexer) stateFn {
lx.ignore()
lx.emit(itemArrayEnd)
return lx.pop()
}
// lexInlineTableValue consumes one key/value pair in an inline table.
// It assumes that '{' or ',' have already been consumed. Whitespace is ignored.
func lexInlineTableValue(lx *lexer) stateFn {
r := lx.next()
switch {
case isWhitespace(r):
return lexSkip(lx, lexInlineTableValue)
case isNL(r):
if lx.tomlNext {
return lexSkip(lx, lexInlineTableValue)
}
return lx.errorPrevLine(errLexInlineTableNL{})
case r == '#':
lx.push(lexInlineTableValue)
return lexCommentStart
case r == ',':
return lx.errorf("unexpected comma")
case r == '}':
return lexInlineTableEnd
}
lx.backup()
lx.push(lexInlineTableValueEnd)
return lexKeyStart
}
// lexInlineTableValueEnd consumes everything between the end of an inline table
// key/value pair and the next pair (or the end of the table):
// it ignores whitespace and expects either a ',' or a '}'.
func lexInlineTableValueEnd(lx *lexer) stateFn {
switch r := lx.next(); {
case isWhitespace(r):
return lexSkip(lx, lexInlineTableValueEnd)
case isNL(r):
if lx.tomlNext {
return lexSkip(lx, lexInlineTableValueEnd)
}
return lx.errorPrevLine(errLexInlineTableNL{})
case r == '#':
lx.push(lexInlineTableValueEnd)
return lexCommentStart
case r == ',':
lx.ignore()
lx.skip(isWhitespace)
if lx.peek() == '}' {
if lx.tomlNext {
return lexInlineTableValueEnd
}
return lx.errorf("trailing comma not allowed in inline tables")
}
return lexInlineTableValue
case r == '}':
return lexInlineTableEnd
default:
return lx.errorf("expected a comma or an inline table terminator '}', but got %s instead", runeOrEOF(r))
}
}
func runeOrEOF(r rune) string {
if r == eof {
return "end of file"
}
return "'" + string(r) + "'"
}
// lexInlineTableEnd finishes the lexing of an inline table.
// It assumes that a '}' has just been consumed.
func lexInlineTableEnd(lx *lexer) stateFn {
lx.ignore()
lx.emit(itemInlineTableEnd)
return lx.pop()
}
// lexString consumes the inner contents of a string. It assumes that the
// beginning '"' has already been consumed and ignored.
func lexString(lx *lexer) stateFn {
r := lx.next()
switch {
case r == eof:
return lx.errorf(`unexpected EOF; expected '"'`)
case isNL(r):
return lx.errorPrevLine(errLexStringNL{})
case r == '\\':
lx.push(lexString)
return lexStringEscape
case r == '"':
lx.backup()
if lx.esc {
lx.esc = false
lx.emit(itemStringEsc)
} else {
lx.emit(itemString)
}
lx.next()
lx.ignore()
return lx.pop()
}
return lexString
}
// lexMultilineString consumes the inner contents of a string. It assumes that
// the beginning '"""' has already been consumed and ignored.
func lexMultilineString(lx *lexer) stateFn {
r := lx.next()
switch r {
default:
return lexMultilineString
case eof:
return lx.errorf(`unexpected EOF; expected '"""'`)
case '\\':
return lexMultilineStringEscape
case '"':
/// Found " → try to read two more "".
if lx.accept('"') {
if lx.accept('"') {
/// Peek ahead: the string can contain " and "", including at the
/// end: """str"""""
/// 6 or more at the end, however, is an error.
if lx.peek() == '"' {
/// Check if we already lexed 5 's; if so we have 6 now, and
/// that's just too many man!
///
/// Second check is for the edge case:
///
/// two quotes allowed.
/// vv
/// """lol \""""""
/// ^^ ^^^---- closing three
/// escaped
///
/// But ugly, but it works
if strings.HasSuffix(lx.current(), `"""""`) && !strings.HasSuffix(lx.current(), `\"""""`) {
return lx.errorf(`unexpected '""""""'`)
}
lx.backup()
lx.backup()
return lexMultilineString
}
lx.backup() /// backup: don't include the """ in the item.
lx.backup()
lx.backup()
lx.esc = false
lx.emit(itemMultilineString)
lx.next() /// Read over ''' again and discard it.
lx.next()
lx.next()
lx.ignore()
return lx.pop()
}
lx.backup()
}
return lexMultilineString
}
}
// lexRawString consumes a raw string. Nothing can be escaped in such a string.
// It assumes that the beginning "'" has already been consumed and ignored.
func lexRawString(lx *lexer) stateFn {
r := lx.next()
switch {
default:
return lexRawString
case r == eof:
return lx.errorf(`unexpected EOF; expected "'"`)
case isNL(r):
return lx.errorPrevLine(errLexStringNL{})
case r == '\'':
lx.backup()
lx.emit(itemRawString)
lx.next()
lx.ignore()
return lx.pop()
}
}
// lexMultilineRawString consumes a raw string. Nothing can be escaped in such a
// string. It assumes that the beginning triple-' has already been consumed and
// ignored.
func lexMultilineRawString(lx *lexer) stateFn {
r := lx.next()
switch r {
default:
return lexMultilineRawString
case eof:
return lx.errorf(`unexpected EOF; expected "'''"`)
case '\'':
/// Found ' → try to read two more ''.
if lx.accept('\'') {
if lx.accept('\'') {
/// Peek ahead: the string can contain ' and '', including at the
/// end: '''str'''''
/// 6 or more at the end, however, is an error.
if lx.peek() == '\'' {
/// Check if we already lexed 5 's; if so we have 6 now, and
/// that's just too many man!
if strings.HasSuffix(lx.current(), "'''''") {
return lx.errorf(`unexpected "''''''"`)
}
lx.backup()
lx.backup()
return lexMultilineRawString
}
lx.backup() /// backup: don't include the ''' in the item.
lx.backup()
lx.backup()
lx.emit(itemRawMultilineString)
lx.next() /// Read over ''' again and discard it.
lx.next()
lx.next()
lx.ignore()
return lx.pop()
}
lx.backup()
}
return lexMultilineRawString
}
}
// lexMultilineStringEscape consumes an escaped character. It assumes that the
// preceding '\\' has already been consumed.
func lexMultilineStringEscape(lx *lexer) stateFn {
if isNL(lx.next()) { /// \ escaping newline.
return lexMultilineString
}
lx.backup()
lx.push(lexMultilineString)
return lexStringEscape(lx)
}
func lexStringEscape(lx *lexer) stateFn {
lx.esc = true
r := lx.next()
switch r {
case 'e':
if !lx.tomlNext {
return lx.error(errLexEscape{r})
}
fallthrough
case 'b':
fallthrough
case 't':
fallthrough
case 'n':
fallthrough
case 'f':
fallthrough
case 'r':
fallthrough
case '"':
fallthrough
case ' ', '\t':
// Inside """ .. """ strings you can use \ to escape newlines, and any
// amount of whitespace can be between the \ and \n.
fallthrough
case '\\':
return lx.pop()
case 'x':
if !lx.tomlNext {
return lx.error(errLexEscape{r})
}
return lexHexEscape
case 'u':
return lexShortUnicodeEscape
case 'U':
return lexLongUnicodeEscape
}
return lx.error(errLexEscape{r})
}
func lexHexEscape(lx *lexer) stateFn {
var r rune
for i := 0; i < 2; i++ {
r = lx.next()
if !isHex(r) {
return lx.errorf(`expected two hexadecimal digits after '\x', but got %q instead`, lx.current())
}
}
return lx.pop()
}
func lexShortUnicodeEscape(lx *lexer) stateFn {
var r rune
for i := 0; i < 4; i++ {
r = lx.next()
if !isHex(r) {
return lx.errorf(`expected four hexadecimal digits after '\u', but got %q instead`, lx.current())
}
}
return lx.pop()
}
func lexLongUnicodeEscape(lx *lexer) stateFn {
var r rune
for i := 0; i < 8; i++ {
r = lx.next()
if !isHex(r) {
return lx.errorf(`expected eight hexadecimal digits after '\U', but got %q instead`, lx.current())
}
}
return lx.pop()
}
// lexNumberOrDateStart processes the first character of a value which begins
// with a digit. It exists to catch values starting with '0', so that
// lexBaseNumberOrDate can differentiate base prefixed integers from other
// types.
func lexNumberOrDateStart(lx *lexer) stateFn {
if lx.next() == '0' {
return lexBaseNumberOrDate
}
return lexNumberOrDate
}
// lexNumberOrDate consumes either an integer, float or datetime.
func lexNumberOrDate(lx *lexer) stateFn {
r := lx.next()
if isDigit(r) {
return lexNumberOrDate
}
switch r {
case '-', ':':
return lexDatetime
case '_':
return lexDecimalNumber
case '.', 'e', 'E':
return lexFloat
}
lx.backup()
lx.emit(itemInteger)
return lx.pop()
}
// lexDatetime consumes a Datetime, to a first approximation.
// The parser validates that it matches one of the accepted formats.
func lexDatetime(lx *lexer) stateFn {
r := lx.next()
if isDigit(r) {
return lexDatetime
}
switch r {
case '-', ':', 'T', 't', ' ', '.', 'Z', 'z', '+':
return lexDatetime
}
lx.backup()
lx.emitTrim(itemDatetime)
return lx.pop()
}
// lexHexInteger consumes a hexadecimal integer after seeing the '0x' prefix.
func lexHexInteger(lx *lexer) stateFn {
r := lx.next()
if isHex(r) {
return lexHexInteger
}
switch r {
case '_':
return lexHexInteger
}
lx.backup()
lx.emit(itemInteger)
return lx.pop()
}
// lexOctalInteger consumes an octal integer after seeing the '0o' prefix.
func lexOctalInteger(lx *lexer) stateFn {
r := lx.next()
if isOctal(r) {
return lexOctalInteger
}
switch r {
case '_':
return lexOctalInteger
}
lx.backup()
lx.emit(itemInteger)
return lx.pop()
}
// lexBinaryInteger consumes a binary integer after seeing the '0b' prefix.
func lexBinaryInteger(lx *lexer) stateFn {
r := lx.next()
if isBinary(r) {
return lexBinaryInteger
}
switch r {
case '_':
return lexBinaryInteger
}
lx.backup()
lx.emit(itemInteger)
return lx.pop()
}
// lexDecimalNumber consumes a decimal float or integer.
func lexDecimalNumber(lx *lexer) stateFn {
r := lx.next()
if isDigit(r) {
return lexDecimalNumber
}
switch r {
case '.', 'e', 'E':
return lexFloat
case '_':
return lexDecimalNumber
}
lx.backup()
lx.emit(itemInteger)
return lx.pop()
}
// lexDecimalNumber consumes the first digit of a number beginning with a sign.
// It assumes the sign has already been consumed. Values which start with a sign
// are only allowed to be decimal integers or floats.
//
// The special "nan" and "inf" values are also recognized.
func lexDecimalNumberStart(lx *lexer) stateFn {
r := lx.next()
// Special error cases to give users better error messages
switch r {
case 'i':
if !lx.accept('n') || !lx.accept('f') {
return lx.errorf("invalid float: '%s'", lx.current())
}
lx.emit(itemFloat)
return lx.pop()
case 'n':
if !lx.accept('a') || !lx.accept('n') {
return lx.errorf("invalid float: '%s'", lx.current())
}
lx.emit(itemFloat)
return lx.pop()
case '0':
p := lx.peek()
switch p {
case 'b', 'o', 'x':
return lx.errorf("cannot use sign with non-decimal numbers: '%s%c'", lx.current(), p)
}
case '.':
return lx.errorf("floats must start with a digit, not '.'")
}
if isDigit(r) {
return lexDecimalNumber
}
return lx.errorf("expected a digit but got %q", r)
}
// lexBaseNumberOrDate differentiates between the possible values which
// start with '0'. It assumes that before reaching this state, the initial '0'
// has been consumed.
func lexBaseNumberOrDate(lx *lexer) stateFn {
r := lx.next()
// Note: All datetimes start with at least two digits, so we don't
// handle date characters (':', '-', etc.) here.
if isDigit(r) {
return lexNumberOrDate
}
switch r {
case '_':
// Can only be decimal, because there can't be an underscore
// between the '0' and the base designator, and dates can't
// contain underscores.
return lexDecimalNumber
case '.', 'e', 'E':
return lexFloat
case 'b':
r = lx.peek()
if !isBinary(r) {
lx.errorf("not a binary number: '%s%c'", lx.current(), r)
}
return lexBinaryInteger
case 'o':
r = lx.peek()
if !isOctal(r) {
lx.errorf("not an octal number: '%s%c'", lx.current(), r)
}
return lexOctalInteger
case 'x':
r = lx.peek()
if !isHex(r) {
lx.errorf("not a hexadecimal number: '%s%c'", lx.current(), r)
}
return lexHexInteger
}
lx.backup()
lx.emit(itemInteger)
return lx.pop()
}
// lexFloat consumes the elements of a float. It allows any sequence of
// float-like characters, so floats emitted by the lexer are only a first
// approximation and must be validated by the parser.
func lexFloat(lx *lexer) stateFn {
r := lx.next()
if isDigit(r) {
return lexFloat
}
switch r {
case '_', '.', '-', '+', 'e', 'E':
return lexFloat
}
lx.backup()
lx.emit(itemFloat)
return lx.pop()
}
// lexBool consumes a bool string: 'true' or 'false.
func lexBool(lx *lexer) stateFn {
var rs []rune
for {
r := lx.next()
if !unicode.IsLetter(r) {
lx.backup()
break
}
rs = append(rs, r)
}
s := string(rs)
switch s {
case "true", "false":
lx.emit(itemBool)
return lx.pop()
}
return lx.errorf("expected value but found %q instead", s)
}
// lexCommentStart begins the lexing of a comment. It will emit
// itemCommentStart and consume no characters, passing control to lexComment.
func lexCommentStart(lx *lexer) stateFn {
lx.ignore()
lx.emit(itemCommentStart)
return lexComment
}
// lexComment lexes an entire comment. It assumes that '#' has been consumed.
// It will consume *up to* the first newline character, and pass control
// back to the last state on the stack.
func lexComment(lx *lexer) stateFn {
switch r := lx.next(); {
case isNL(r) || r == eof:
lx.backup()
lx.emit(itemText)
return lx.pop()
default:
return lexComment
}
}
// lexSkip ignores all slurped input and moves on to the next state.
func lexSkip(lx *lexer, nextState stateFn) stateFn {
lx.ignore()
return nextState
}
func (s stateFn) String() string {
if s == nil {
return "<nil>"
}
name := runtime.FuncForPC(reflect.ValueOf(s).Pointer()).Name()
if i := strings.LastIndexByte(name, '.'); i > -1 {
name = name[i+1:]
}
return name + "()"
}
func (itype itemType) String() string {
switch itype {
case itemError:
return "Error"
case itemEOF:
return "EOF"
case itemText:
return "Text"
case itemString, itemStringEsc, itemRawString, itemMultilineString, itemRawMultilineString:
return "String"
case itemBool:
return "Bool"
case itemInteger:
return "Integer"
case itemFloat:
return "Float"
case itemDatetime:
return "DateTime"
case itemArray:
return "Array"
case itemArrayEnd:
return "ArrayEnd"
case itemTableStart:
return "TableStart"
case itemTableEnd:
return "TableEnd"
case itemArrayTableStart:
return "ArrayTableStart"
case itemArrayTableEnd:
return "ArrayTableEnd"
case itemKeyStart:
return "KeyStart"
case itemKeyEnd:
return "KeyEnd"
case itemCommentStart:
return "CommentStart"
case itemInlineTableStart:
return "InlineTableStart"
case itemInlineTableEnd:
return "InlineTableEnd"
}
panic(fmt.Sprintf("BUG: Unknown type '%d'.", int(itype)))
}
func (item item) String() string {
return fmt.Sprintf("(%s, %s)", item.typ, item.val)
}
func isWhitespace(r rune) bool { return r == '\t' || r == ' ' }
func isNL(r rune) bool { return r == '\n' || r == '\r' }
func isControl(r rune) bool { // Control characters except \t, \r, \n
switch r {
case '\t', '\r', '\n':
return false
default:
return (r >= 0x00 && r <= 0x1f) || r == 0x7f
}
}
func isDigit(r rune) bool { return r >= '0' && r <= '9' }
func isBinary(r rune) bool { return r == '0' || r == '1' }
func isOctal(r rune) bool { return r >= '0' && r <= '7' }
func isHex(r rune) bool { return (r >= '0' && r <= '9') || (r|0x20 >= 'a' && r|0x20 <= 'f') }
func isBareKeyChar(r rune, tomlNext bool) bool {
return (r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') ||
(r >= '0' && r <= '9') || r == '_' || r == '-'
}
package toml
import (
"strings"
)
// MetaData allows access to meta information about TOML data that's not
// accessible otherwise.
//
// It allows checking if a key is defined in the TOML data, whether any keys
// were undecoded, and the TOML type of a key.
type MetaData struct {
context Key // Used only during decoding.
keyInfo map[string]keyInfo
mapping map[string]any
keys []Key
decoded map[string]struct{}
data []byte // Input file; for errors.
}
// IsDefined reports if the key exists in the TOML data.
//
// The key should be specified hierarchically, for example to access the TOML
// key "a.b.c" you would use IsDefined("a", "b", "c"). Keys are case sensitive.
//
// Returns false for an empty key.
func (md *MetaData) IsDefined(key ...string) bool {
if len(key) == 0 {
return false
}
var (
hash map[string]any
ok bool
hashOrVal any = md.mapping
)
for _, k := range key {
if hash, ok = hashOrVal.(map[string]any); !ok {
return false
}
if hashOrVal, ok = hash[k]; !ok {
return false
}
}
return true
}
// Type returns a string representation of the type of the key specified.
//
// Type will return the empty string if given an empty key or a key that does
// not exist. Keys are case sensitive.
func (md *MetaData) Type(key ...string) string {
if ki, ok := md.keyInfo[Key(key).String()]; ok {
return ki.tomlType.typeString()
}
return ""
}
// Keys returns a slice of every key in the TOML data, including key groups.
//
// Each key is itself a slice, where the first element is the top of the
// hierarchy and the last is the most specific. The list will have the same
// order as the keys appeared in the TOML data.
//
// All keys returned are non-empty.
func (md *MetaData) Keys() []Key {
return md.keys
}
// Undecoded returns all keys that have not been decoded in the order in which
// they appear in the original TOML document.
//
// This includes keys that haven't been decoded because of a [Primitive] value.
// Once the Primitive value is decoded, the keys will be considered decoded.
//
// Also note that decoding into an empty interface will result in no decoding,
// and so no keys will be considered decoded.
//
// In this sense, the Undecoded keys correspond to keys in the TOML document
// that do not have a concrete type in your representation.
func (md *MetaData) Undecoded() []Key {
undecoded := make([]Key, 0, len(md.keys))
for _, key := range md.keys {
if _, ok := md.decoded[key.String()]; !ok {
undecoded = append(undecoded, key)
}
}
return undecoded
}
// Key represents any TOML key, including key groups. Use [MetaData.Keys] to get
// values of this type.
type Key []string
func (k Key) String() string {
// This is called quite often, so it's a bit funky to make it faster.
var b strings.Builder
b.Grow(len(k) * 25)
outer:
for i, kk := range k {
if i > 0 {
b.WriteByte('.')
}
if kk == "" {
b.WriteString(`""`)
} else {
for _, r := range kk {
// "Inline" isBareKeyChar
if !((r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '_' || r == '-') {
b.WriteByte('"')
b.WriteString(dblQuotedReplacer.Replace(kk))
b.WriteByte('"')
continue outer
}
}
b.WriteString(kk)
}
}
return b.String()
}
func (k Key) maybeQuoted(i int) string {
if k[i] == "" {
return `""`
}
for _, r := range k[i] {
if (r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '_' || r == '-' {
continue
}
return `"` + dblQuotedReplacer.Replace(k[i]) + `"`
}
return k[i]
}
// Like append(), but only increase the cap by 1.
func (k Key) add(piece string) Key {
newKey := make(Key, len(k)+1)
copy(newKey, k)
newKey[len(k)] = piece
return newKey
}
func (k Key) parent() Key { return k[:len(k)-1] } // all except the last piece.
func (k Key) last() string { return k[len(k)-1] } // last piece of this key.
package ossfuzz
import (
"bytes"
"fmt"
"github.com/BurntSushi/toml"
)
func FuzzToml(data []byte) int {
if len(data) >= 2048 {
return 0
}
var v any
_, err := toml.Decode(string(data), &v)
if err != nil {
return 0
}
buf := new(bytes.Buffer)
err = toml.NewEncoder(buf).Encode(v)
if err != nil {
panic(fmt.Sprintf("failed to encode decoded document: %s", err))
}
var v2 any
_, err = toml.Decode(buf.String(), &v2)
if err != nil {
panic(fmt.Sprintf("failed round trip: %s", err))
}
return 1
}
package toml
import (
"fmt"
"math"
"os"
"strconv"
"strings"
"time"
"unicode/utf8"
"github.com/BurntSushi/toml/internal"
)
type parser struct {
lx *lexer
context Key // Full key for the current hash in scope.
currentKey string // Base key name for everything except hashes.
pos Position // Current position in the TOML file.
tomlNext bool
ordered []Key // List of keys in the order that they appear in the TOML data.
keyInfo map[string]keyInfo // Map keyname → info about the TOML key.
mapping map[string]any // Map keyname → key value.
implicits map[string]struct{} // Record implicit keys (e.g. "key.group.names").
}
type keyInfo struct {
pos Position
tomlType tomlType
}
func parse(data string) (p *parser, err error) {
_, tomlNext := os.LookupEnv("BURNTSUSHI_TOML_110")
defer func() {
if r := recover(); r != nil {
if pErr, ok := r.(ParseError); ok {
pErr.input = data
err = pErr
return
}
panic(r)
}
}()
// Read over BOM; do this here as the lexer calls utf8.DecodeRuneInString()
// which mangles stuff. UTF-16 BOM isn't strictly valid, but some tools add
// it anyway.
if strings.HasPrefix(data, "\xff\xfe") || strings.HasPrefix(data, "\xfe\xff") { // UTF-16
data = data[2:]
} else if strings.HasPrefix(data, "\xef\xbb\xbf") { // UTF-8
data = data[3:]
}
// Examine first few bytes for NULL bytes; this probably means it's a UTF-16
// file (second byte in surrogate pair being NULL). Again, do this here to
// avoid having to deal with UTF-8/16 stuff in the lexer.
ex := 6
if len(data) < 6 {
ex = len(data)
}
if i := strings.IndexRune(data[:ex], 0); i > -1 {
return nil, ParseError{
Message: "files cannot contain NULL bytes; probably using UTF-16; TOML files must be UTF-8",
Position: Position{Line: 1, Col: 1, Start: i, Len: 1},
Line: 1,
input: data,
}
}
p = &parser{
keyInfo: make(map[string]keyInfo),
mapping: make(map[string]any),
lx: lex(data, tomlNext),
ordered: make([]Key, 0),
implicits: make(map[string]struct{}),
tomlNext: tomlNext,
}
for {
item := p.next()
if item.typ == itemEOF {
break
}
p.topLevel(item)
}
return p, nil
}
func (p *parser) panicErr(it item, err error) {
panic(ParseError{
Message: err.Error(),
err: err,
Position: it.pos.withCol(p.lx.input),
Line: it.pos.Len,
LastKey: p.current(),
})
}
func (p *parser) panicItemf(it item, format string, v ...any) {
panic(ParseError{
Message: fmt.Sprintf(format, v...),
Position: it.pos.withCol(p.lx.input),
Line: it.pos.Len,
LastKey: p.current(),
})
}
func (p *parser) panicf(format string, v ...any) {
panic(ParseError{
Message: fmt.Sprintf(format, v...),
Position: p.pos.withCol(p.lx.input),
Line: p.pos.Line,
LastKey: p.current(),
})
}
func (p *parser) next() item {
it := p.lx.nextItem()
//fmt.Printf("ITEM %-18s line %-3d │ %q\n", it.typ, it.pos.Line, it.val)
if it.typ == itemError {
if it.err != nil {
panic(ParseError{
Message: it.err.Error(),
err: it.err,
Position: it.pos.withCol(p.lx.input),
Line: it.pos.Line,
LastKey: p.current(),
})
}
p.panicItemf(it, "%s", it.val)
}
return it
}
func (p *parser) nextPos() item {
it := p.next()
p.pos = it.pos
return it
}
func (p *parser) bug(format string, v ...any) {
panic(fmt.Sprintf("BUG: "+format+"\n\n", v...))
}
func (p *parser) expect(typ itemType) item {
it := p.next()
p.assertEqual(typ, it.typ)
return it
}
func (p *parser) assertEqual(expected, got itemType) {
if expected != got {
p.bug("Expected '%s' but got '%s'.", expected, got)
}
}
func (p *parser) topLevel(item item) {
switch item.typ {
case itemCommentStart: // # ..
p.expect(itemText)
case itemTableStart: // [ .. ]
name := p.nextPos()
var key Key
for ; name.typ != itemTableEnd && name.typ != itemEOF; name = p.next() {
key = append(key, p.keyString(name))
}
p.assertEqual(itemTableEnd, name.typ)
p.addContext(key, false)
p.setType("", tomlHash, item.pos)
p.ordered = append(p.ordered, key)
case itemArrayTableStart: // [[ .. ]]
name := p.nextPos()
var key Key
for ; name.typ != itemArrayTableEnd && name.typ != itemEOF; name = p.next() {
key = append(key, p.keyString(name))
}
p.assertEqual(itemArrayTableEnd, name.typ)
p.addContext(key, true)
p.setType("", tomlArrayHash, item.pos)
p.ordered = append(p.ordered, key)
case itemKeyStart: // key = ..
outerContext := p.context
/// Read all the key parts (e.g. 'a' and 'b' in 'a.b')
k := p.nextPos()
var key Key
for ; k.typ != itemKeyEnd && k.typ != itemEOF; k = p.next() {
key = append(key, p.keyString(k))
}
p.assertEqual(itemKeyEnd, k.typ)
/// The current key is the last part.
p.currentKey = key.last()
/// All the other parts (if any) are the context; need to set each part
/// as implicit.
context := key.parent()
for i := range context {
p.addImplicitContext(append(p.context, context[i:i+1]...))
}
p.ordered = append(p.ordered, p.context.add(p.currentKey))
/// Set value.
vItem := p.next()
val, typ := p.value(vItem, false)
p.setValue(p.currentKey, val)
p.setType(p.currentKey, typ, vItem.pos)
/// Remove the context we added (preserving any context from [tbl] lines).
p.context = outerContext
p.currentKey = ""
default:
p.bug("Unexpected type at top level: %s", item.typ)
}
}
// Gets a string for a key (or part of a key in a table name).
func (p *parser) keyString(it item) string {
switch it.typ {
case itemText:
return it.val
case itemString, itemStringEsc, itemMultilineString,
itemRawString, itemRawMultilineString:
s, _ := p.value(it, false)
return s.(string)
default:
p.bug("Unexpected key type: %s", it.typ)
}
panic("unreachable")
}
var datetimeRepl = strings.NewReplacer(
"z", "Z",
"t", "T",
" ", "T")
// value translates an expected value from the lexer into a Go value wrapped
// as an empty interface.
func (p *parser) value(it item, parentIsArray bool) (any, tomlType) {
switch it.typ {
case itemString:
return it.val, p.typeOfPrimitive(it)
case itemStringEsc:
return p.replaceEscapes(it, it.val), p.typeOfPrimitive(it)
case itemMultilineString:
return p.replaceEscapes(it, p.stripEscapedNewlines(stripFirstNewline(it.val))), p.typeOfPrimitive(it)
case itemRawString:
return it.val, p.typeOfPrimitive(it)
case itemRawMultilineString:
return stripFirstNewline(it.val), p.typeOfPrimitive(it)
case itemInteger:
return p.valueInteger(it)
case itemFloat:
return p.valueFloat(it)
case itemBool:
switch it.val {
case "true":
return true, p.typeOfPrimitive(it)
case "false":
return false, p.typeOfPrimitive(it)
default:
p.bug("Expected boolean value, but got '%s'.", it.val)
}
case itemDatetime:
return p.valueDatetime(it)
case itemArray:
return p.valueArray(it)
case itemInlineTableStart:
return p.valueInlineTable(it, parentIsArray)
default:
p.bug("Unexpected value type: %s", it.typ)
}
panic("unreachable")
}
func (p *parser) valueInteger(it item) (any, tomlType) {
if !numUnderscoresOK(it.val) {
p.panicItemf(it, "Invalid integer %q: underscores must be surrounded by digits", it.val)
}
if numHasLeadingZero(it.val) {
p.panicItemf(it, "Invalid integer %q: cannot have leading zeroes", it.val)
}
num, err := strconv.ParseInt(it.val, 0, 64)
if err != nil {
// Distinguish integer values. Normally, it'd be a bug if the lexer
// provides an invalid integer, but it's possible that the number is
// out of range of valid values (which the lexer cannot determine).
// So mark the former as a bug but the latter as a legitimate user
// error.
if e, ok := err.(*strconv.NumError); ok && e.Err == strconv.ErrRange {
p.panicErr(it, errParseRange{i: it.val, size: "int64"})
} else {
p.bug("Expected integer value, but got '%s'.", it.val)
}
}
return num, p.typeOfPrimitive(it)
}
func (p *parser) valueFloat(it item) (any, tomlType) {
parts := strings.FieldsFunc(it.val, func(r rune) bool {
switch r {
case '.', 'e', 'E':
return true
}
return false
})
for _, part := range parts {
if !numUnderscoresOK(part) {
p.panicItemf(it, "Invalid float %q: underscores must be surrounded by digits", it.val)
}
}
if len(parts) > 0 && numHasLeadingZero(parts[0]) {
p.panicItemf(it, "Invalid float %q: cannot have leading zeroes", it.val)
}
if !numPeriodsOK(it.val) {
// As a special case, numbers like '123.' or '1.e2',
// which are valid as far as Go/strconv are concerned,
// must be rejected because TOML says that a fractional
// part consists of '.' followed by 1+ digits.
p.panicItemf(it, "Invalid float %q: '.' must be followed by one or more digits", it.val)
}
val := strings.Replace(it.val, "_", "", -1)
signbit := false
if val == "+nan" || val == "-nan" {
signbit = val == "-nan"
val = "nan"
}
num, err := strconv.ParseFloat(val, 64)
if err != nil {
if e, ok := err.(*strconv.NumError); ok && e.Err == strconv.ErrRange {
p.panicErr(it, errParseRange{i: it.val, size: "float64"})
} else {
p.panicItemf(it, "Invalid float value: %q", it.val)
}
}
if signbit {
num = math.Copysign(num, -1)
}
return num, p.typeOfPrimitive(it)
}
var dtTypes = []struct {
fmt string
zone *time.Location
next bool
}{
{time.RFC3339Nano, time.Local, false},
{"2006-01-02T15:04:05.999999999", internal.LocalDatetime, false},
{"2006-01-02", internal.LocalDate, false},
{"15:04:05.999999999", internal.LocalTime, false},
// tomlNext
{"2006-01-02T15:04Z07:00", time.Local, true},
{"2006-01-02T15:04", internal.LocalDatetime, true},
{"15:04", internal.LocalTime, true},
}
func (p *parser) valueDatetime(it item) (any, tomlType) {
it.val = datetimeRepl.Replace(it.val)
var (
t time.Time
ok bool
err error
)
for _, dt := range dtTypes {
if dt.next && !p.tomlNext {
continue
}
t, err = time.ParseInLocation(dt.fmt, it.val, dt.zone)
if err == nil {
if missingLeadingZero(it.val, dt.fmt) {
p.panicErr(it, errParseDate{it.val})
}
ok = true
break
}
}
if !ok {
p.panicErr(it, errParseDate{it.val})
}
return t, p.typeOfPrimitive(it)
}
// Go's time.Parse() will accept numbers without a leading zero; there isn't any
// way to require it. https://github.com/golang/go/issues/29911
//
// Depend on the fact that the separators (- and :) should always be at the same
// location.
func missingLeadingZero(d, l string) bool {
for i, c := range []byte(l) {
if c == '.' || c == 'Z' {
return false
}
if (c < '0' || c > '9') && d[i] != c {
return true
}
}
return false
}
func (p *parser) valueArray(it item) (any, tomlType) {
p.setType(p.currentKey, tomlArray, it.pos)
var (
// Initialize to a non-nil slice to make it consistent with how S = []
// decodes into a non-nil slice inside something like struct { S
// []string }. See #338
array = make([]any, 0, 2)
)
for it = p.next(); it.typ != itemArrayEnd; it = p.next() {
if it.typ == itemCommentStart {
p.expect(itemText)
continue
}
val, typ := p.value(it, true)
array = append(array, val)
// XXX: type isn't used here, we need it to record the accurate type
// information.
//
// Not entirely sure how to best store this; could use "key[0]",
// "key[1]" notation, or maybe store it on the Array type?
_ = typ
}
return array, tomlArray
}
func (p *parser) valueInlineTable(it item, parentIsArray bool) (any, tomlType) {
var (
topHash = make(map[string]any)
outerContext = p.context
outerKey = p.currentKey
)
p.context = append(p.context, p.currentKey)
prevContext := p.context
p.currentKey = ""
p.addImplicit(p.context)
p.addContext(p.context, parentIsArray)
/// Loop over all table key/value pairs.
for it := p.next(); it.typ != itemInlineTableEnd; it = p.next() {
if it.typ == itemCommentStart {
p.expect(itemText)
continue
}
/// Read all key parts.
k := p.nextPos()
var key Key
for ; k.typ != itemKeyEnd && k.typ != itemEOF; k = p.next() {
key = append(key, p.keyString(k))
}
p.assertEqual(itemKeyEnd, k.typ)
/// The current key is the last part.
p.currentKey = key.last()
/// All the other parts (if any) are the context; need to set each part
/// as implicit.
context := key.parent()
for i := range context {
p.addImplicitContext(append(p.context, context[i:i+1]...))
}
p.ordered = append(p.ordered, p.context.add(p.currentKey))
/// Set the value.
val, typ := p.value(p.next(), false)
p.setValue(p.currentKey, val)
p.setType(p.currentKey, typ, it.pos)
hash := topHash
for _, c := range context {
h, ok := hash[c]
if !ok {
h = make(map[string]any)
hash[c] = h
}
hash, ok = h.(map[string]any)
if !ok {
p.panicf("%q is not a table", p.context)
}
}
hash[p.currentKey] = val
/// Restore context.
p.context = prevContext
}
p.context = outerContext
p.currentKey = outerKey
return topHash, tomlHash
}
// numHasLeadingZero checks if this number has leading zeroes, allowing for '0',
// +/- signs, and base prefixes.
func numHasLeadingZero(s string) bool {
if len(s) > 1 && s[0] == '0' && !(s[1] == 'b' || s[1] == 'o' || s[1] == 'x') { // Allow 0b, 0o, 0x
return true
}
if len(s) > 2 && (s[0] == '-' || s[0] == '+') && s[1] == '0' {
return true
}
return false
}
// numUnderscoresOK checks whether each underscore in s is surrounded by
// characters that are not underscores.
func numUnderscoresOK(s string) bool {
switch s {
case "nan", "+nan", "-nan", "inf", "-inf", "+inf":
return true
}
accept := false
for _, r := range s {
if r == '_' {
if !accept {
return false
}
}
// isHex is a superset of all the permissible characters surrounding an
// underscore.
accept = isHex(r)
}
return accept
}
// numPeriodsOK checks whether every period in s is followed by a digit.
func numPeriodsOK(s string) bool {
period := false
for _, r := range s {
if period && !isDigit(r) {
return false
}
period = r == '.'
}
return !period
}
// Set the current context of the parser, where the context is either a hash or
// an array of hashes, depending on the value of the `array` parameter.
//
// Establishing the context also makes sure that the key isn't a duplicate, and
// will create implicit hashes automatically.
func (p *parser) addContext(key Key, array bool) {
/// Always start at the top level and drill down for our context.
hashContext := p.mapping
keyContext := make(Key, 0, len(key)-1)
/// We only need implicit hashes for the parents.
for _, k := range key.parent() {
_, ok := hashContext[k]
keyContext = append(keyContext, k)
// No key? Make an implicit hash and move on.
if !ok {
p.addImplicit(keyContext)
hashContext[k] = make(map[string]any)
}
// If the hash context is actually an array of tables, then set
// the hash context to the last element in that array.
//
// Otherwise, it better be a table, since this MUST be a key group (by
// virtue of it not being the last element in a key).
switch t := hashContext[k].(type) {
case []map[string]any:
hashContext = t[len(t)-1]
case map[string]any:
hashContext = t
default:
p.panicf("Key '%s' was already created as a hash.", keyContext)
}
}
p.context = keyContext
if array {
// If this is the first element for this array, then allocate a new
// list of tables for it.
k := key.last()
if _, ok := hashContext[k]; !ok {
hashContext[k] = make([]map[string]any, 0, 4)
}
// Add a new table. But make sure the key hasn't already been used
// for something else.
if hash, ok := hashContext[k].([]map[string]any); ok {
hashContext[k] = append(hash, make(map[string]any))
} else {
p.panicf("Key '%s' was already created and cannot be used as an array.", key)
}
} else {
p.setValue(key.last(), make(map[string]any))
}
p.context = append(p.context, key.last())
}
// setValue sets the given key to the given value in the current context.
// It will make sure that the key hasn't already been defined, account for
// implicit key groups.
func (p *parser) setValue(key string, value any) {
var (
tmpHash any
ok bool
hash = p.mapping
keyContext = make(Key, 0, len(p.context)+1)
)
for _, k := range p.context {
keyContext = append(keyContext, k)
if tmpHash, ok = hash[k]; !ok {
p.bug("Context for key '%s' has not been established.", keyContext)
}
switch t := tmpHash.(type) {
case []map[string]any:
// The context is a table of hashes. Pick the most recent table
// defined as the current hash.
hash = t[len(t)-1]
case map[string]any:
hash = t
default:
p.panicf("Key '%s' has already been defined.", keyContext)
}
}
keyContext = append(keyContext, key)
if _, ok := hash[key]; ok {
// Normally redefining keys isn't allowed, but the key could have been
// defined implicitly and it's allowed to be redefined concretely. (See
// the `valid/implicit-and-explicit-after.toml` in toml-test)
//
// But we have to make sure to stop marking it as an implicit. (So that
// another redefinition provokes an error.)
//
// Note that since it has already been defined (as a hash), we don't
// want to overwrite it. So our business is done.
if p.isArray(keyContext) {
p.removeImplicit(keyContext)
hash[key] = value
return
}
if p.isImplicit(keyContext) {
p.removeImplicit(keyContext)
return
}
// Otherwise, we have a concrete key trying to override a previous key,
// which is *always* wrong.
p.panicf("Key '%s' has already been defined.", keyContext)
}
hash[key] = value
}
// setType sets the type of a particular value at a given key. It should be
// called immediately AFTER setValue.
//
// Note that if `key` is empty, then the type given will be applied to the
// current context (which is either a table or an array of tables).
func (p *parser) setType(key string, typ tomlType, pos Position) {
keyContext := make(Key, 0, len(p.context)+1)
keyContext = append(keyContext, p.context...)
if len(key) > 0 { // allow type setting for hashes
keyContext = append(keyContext, key)
}
// Special case to make empty keys ("" = 1) work.
// Without it it will set "" rather than `""`.
// TODO: why is this needed? And why is this only needed here?
if len(keyContext) == 0 {
keyContext = Key{""}
}
p.keyInfo[keyContext.String()] = keyInfo{tomlType: typ, pos: pos}
}
// Implicit keys need to be created when tables are implied in "a.b.c.d = 1" and
// "[a.b.c]" (the "a", "b", and "c" hashes are never created explicitly).
func (p *parser) addImplicit(key Key) { p.implicits[key.String()] = struct{}{} }
func (p *parser) removeImplicit(key Key) { delete(p.implicits, key.String()) }
func (p *parser) isImplicit(key Key) bool { _, ok := p.implicits[key.String()]; return ok }
func (p *parser) isArray(key Key) bool { return p.keyInfo[key.String()].tomlType == tomlArray }
func (p *parser) addImplicitContext(key Key) { p.addImplicit(key); p.addContext(key, false) }
// current returns the full key name of the current context.
func (p *parser) current() string {
if len(p.currentKey) == 0 {
return p.context.String()
}
if len(p.context) == 0 {
return p.currentKey
}
return fmt.Sprintf("%s.%s", p.context, p.currentKey)
}
func stripFirstNewline(s string) string {
if len(s) > 0 && s[0] == '\n' {
return s[1:]
}
if len(s) > 1 && s[0] == '\r' && s[1] == '\n' {
return s[2:]
}
return s
}
// stripEscapedNewlines removes whitespace after line-ending backslashes in
// multiline strings.
//
// A line-ending backslash is an unescaped \ followed only by whitespace until
// the next newline. After a line-ending backslash, all whitespace is removed
// until the next non-whitespace character.
func (p *parser) stripEscapedNewlines(s string) string {
var (
b strings.Builder
i int
)
b.Grow(len(s))
for {
ix := strings.Index(s[i:], `\`)
if ix < 0 {
b.WriteString(s)
return b.String()
}
i += ix
if len(s) > i+1 && s[i+1] == '\\' {
// Escaped backslash.
i += 2
continue
}
// Scan until the next non-whitespace.
j := i + 1
whitespaceLoop:
for ; j < len(s); j++ {
switch s[j] {
case ' ', '\t', '\r', '\n':
default:
break whitespaceLoop
}
}
if j == i+1 {
// Not a whitespace escape.
i++
continue
}
if !strings.Contains(s[i:j], "\n") {
// This is not a line-ending backslash. (It's a bad escape sequence,
// but we can let replaceEscapes catch it.)
i++
continue
}
b.WriteString(s[:i])
s = s[j:]
i = 0
}
}
func (p *parser) replaceEscapes(it item, str string) string {
var (
b strings.Builder
skip = 0
)
b.Grow(len(str))
for i, c := range str {
if skip > 0 {
skip--
continue
}
if c != '\\' {
b.WriteRune(c)
continue
}
if i >= len(str) {
p.bug("Escape sequence at end of string.")
return ""
}
switch str[i+1] {
default:
p.bug("Expected valid escape code after \\, but got %q.", str[i+1])
case ' ', '\t':
p.panicItemf(it, "invalid escape: '\\%c'", str[i+1])
case 'b':
b.WriteByte(0x08)
skip = 1
case 't':
b.WriteByte(0x09)
skip = 1
case 'n':
b.WriteByte(0x0a)
skip = 1
case 'f':
b.WriteByte(0x0c)
skip = 1
case 'r':
b.WriteByte(0x0d)
skip = 1
case 'e':
if p.tomlNext {
b.WriteByte(0x1b)
skip = 1
}
case '"':
b.WriteByte(0x22)
skip = 1
case '\\':
b.WriteByte(0x5c)
skip = 1
// The lexer guarantees the correct number of characters are present;
// don't need to check here.
case 'x':
if p.tomlNext {
escaped := p.asciiEscapeToUnicode(it, str[i+2:i+4])
b.WriteRune(escaped)
skip = 3
}
case 'u':
escaped := p.asciiEscapeToUnicode(it, str[i+2:i+6])
b.WriteRune(escaped)
skip = 5
case 'U':
escaped := p.asciiEscapeToUnicode(it, str[i+2:i+10])
b.WriteRune(escaped)
skip = 9
}
}
return b.String()
}
func (p *parser) asciiEscapeToUnicode(it item, s string) rune {
hex, err := strconv.ParseUint(strings.ToLower(s), 16, 32)
if err != nil {
p.bug("Could not parse '%s' as a hexadecimal number, but the lexer claims it's OK: %s", s, err)
}
if !utf8.ValidRune(rune(hex)) {
p.panicItemf(it, "Escaped character '\\u%s' is not valid UTF-8.", s)
}
return rune(hex)
}
package toml
// Struct field handling is adapted from code in encoding/json:
//
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the Go distribution.
import (
"reflect"
"sort"
"sync"
)
// A field represents a single field found in a struct.
type field struct {
name string // the name of the field (`toml` tag included)
tag bool // whether field has a `toml` tag
index []int // represents the depth of an anonymous field
typ reflect.Type // the type of the field
}
// byName sorts field by name, breaking ties with depth,
// then breaking ties with "name came from toml tag", then
// breaking ties with index sequence.
type byName []field
func (x byName) Len() int { return len(x) }
func (x byName) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
func (x byName) Less(i, j int) bool {
if x[i].name != x[j].name {
return x[i].name < x[j].name
}
if len(x[i].index) != len(x[j].index) {
return len(x[i].index) < len(x[j].index)
}
if x[i].tag != x[j].tag {
return x[i].tag
}
return byIndex(x).Less(i, j)
}
// byIndex sorts field by index sequence.
type byIndex []field
func (x byIndex) Len() int { return len(x) }
func (x byIndex) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
func (x byIndex) Less(i, j int) bool {
for k, xik := range x[i].index {
if k >= len(x[j].index) {
return false
}
if xik != x[j].index[k] {
return xik < x[j].index[k]
}
}
return len(x[i].index) < len(x[j].index)
}
// typeFields returns a list of fields that TOML should recognize for the given
// type. The algorithm is breadth-first search over the set of structs to
// include - the top struct and then any reachable anonymous structs.
func typeFields(t reflect.Type) []field {
// Anonymous fields to explore at the current level and the next.
current := []field{}
next := []field{{typ: t}}
// Count of queued names for current level and the next.
var count map[reflect.Type]int
var nextCount map[reflect.Type]int
// Types already visited at an earlier level.
visited := map[reflect.Type]bool{}
// Fields found.
var fields []field
for len(next) > 0 {
current, next = next, current[:0]
count, nextCount = nextCount, map[reflect.Type]int{}
for _, f := range current {
if visited[f.typ] {
continue
}
visited[f.typ] = true
// Scan f.typ for fields to include.
for i := 0; i < f.typ.NumField(); i++ {
sf := f.typ.Field(i)
if sf.PkgPath != "" && !sf.Anonymous { // unexported
continue
}
opts := getOptions(sf.Tag)
if opts.skip {
continue
}
index := make([]int, len(f.index)+1)
copy(index, f.index)
index[len(f.index)] = i
ft := sf.Type
if ft.Name() == "" && ft.Kind() == reflect.Ptr {
// Follow pointer.
ft = ft.Elem()
}
// Record found field and index sequence.
if opts.name != "" || !sf.Anonymous || ft.Kind() != reflect.Struct {
tagged := opts.name != ""
name := opts.name
if name == "" {
name = sf.Name
}
fields = append(fields, field{name, tagged, index, ft})
if count[f.typ] > 1 {
// If there were multiple instances, add a second,
// so that the annihilation code will see a duplicate.
// It only cares about the distinction between 1 or 2,
// so don't bother generating any more copies.
fields = append(fields, fields[len(fields)-1])
}
continue
}
// Record new anonymous struct to explore in next round.
nextCount[ft]++
if nextCount[ft] == 1 {
f := field{name: ft.Name(), index: index, typ: ft}
next = append(next, f)
}
}
}
}
sort.Sort(byName(fields))
// Delete all fields that are hidden by the Go rules for embedded fields,
// except that fields with TOML tags are promoted.
// The fields are sorted in primary order of name, secondary order
// of field index length. Loop over names; for each name, delete
// hidden fields by choosing the one dominant field that survives.
out := fields[:0]
for advance, i := 0, 0; i < len(fields); i += advance {
// One iteration per name.
// Find the sequence of fields with the name of this first field.
fi := fields[i]
name := fi.name
for advance = 1; i+advance < len(fields); advance++ {
fj := fields[i+advance]
if fj.name != name {
break
}
}
if advance == 1 { // Only one field with this name
out = append(out, fi)
continue
}
dominant, ok := dominantField(fields[i : i+advance])
if ok {
out = append(out, dominant)
}
}
fields = out
sort.Sort(byIndex(fields))
return fields
}
// dominantField looks through the fields, all of which are known to
// have the same name, to find the single field that dominates the
// others using Go's embedding rules, modified by the presence of
// TOML tags. If there are multiple top-level fields, the boolean
// will be false: This condition is an error in Go and we skip all
// the fields.
func dominantField(fields []field) (field, bool) {
// The fields are sorted in increasing index-length order. The winner
// must therefore be one with the shortest index length. Drop all
// longer entries, which is easy: just truncate the slice.
length := len(fields[0].index)
tagged := -1 // Index of first tagged field.
for i, f := range fields {
if len(f.index) > length {
fields = fields[:i]
break
}
if f.tag {
if tagged >= 0 {
// Multiple tagged fields at the same level: conflict.
// Return no field.
return field{}, false
}
tagged = i
}
}
if tagged >= 0 {
return fields[tagged], true
}
// All remaining fields have the same length. If there's more than one,
// we have a conflict (two fields named "X" at the same level) and we
// return no field.
if len(fields) > 1 {
return field{}, false
}
return fields[0], true
}
var fieldCache struct {
sync.RWMutex
m map[reflect.Type][]field
}
// cachedTypeFields is like typeFields but uses a cache to avoid repeated work.
func cachedTypeFields(t reflect.Type) []field {
fieldCache.RLock()
f := fieldCache.m[t]
fieldCache.RUnlock()
if f != nil {
return f
}
// Compute fields without lock.
// Might duplicate effort but won't hold other computations back.
f = typeFields(t)
if f == nil {
f = []field{}
}
fieldCache.Lock()
if fieldCache.m == nil {
fieldCache.m = map[reflect.Type][]field{}
}
fieldCache.m[t] = f
fieldCache.Unlock()
return f
}
package toml
// tomlType represents any Go type that corresponds to a TOML type.
// While the first draft of the TOML spec has a simplistic type system that
// probably doesn't need this level of sophistication, we seem to be militating
// toward adding real composite types.
type tomlType interface {
typeString() string
}
// typeEqual accepts any two types and returns true if they are equal.
func typeEqual(t1, t2 tomlType) bool {
if t1 == nil || t2 == nil {
return false
}
return t1.typeString() == t2.typeString()
}
func typeIsTable(t tomlType) bool {
return typeEqual(t, tomlHash) || typeEqual(t, tomlArrayHash)
}
type tomlBaseType string
func (btype tomlBaseType) typeString() string { return string(btype) }
func (btype tomlBaseType) String() string { return btype.typeString() }
var (
tomlInteger tomlBaseType = "Integer"
tomlFloat tomlBaseType = "Float"
tomlDatetime tomlBaseType = "Datetime"
tomlString tomlBaseType = "String"
tomlBool tomlBaseType = "Bool"
tomlArray tomlBaseType = "Array"
tomlHash tomlBaseType = "Hash"
tomlArrayHash tomlBaseType = "ArrayHash"
)
// typeOfPrimitive returns a tomlType of any primitive value in TOML.
// Primitive values are: Integer, Float, Datetime, String and Bool.
//
// Passing a lexer item other than the following will cause a BUG message
// to occur: itemString, itemBool, itemInteger, itemFloat, itemDatetime.
func (p *parser) typeOfPrimitive(lexItem item) tomlType {
switch lexItem.typ {
case itemInteger:
return tomlInteger
case itemFloat:
return tomlFloat
case itemDatetime:
return tomlDatetime
case itemString, itemStringEsc:
return tomlString
case itemMultilineString:
return tomlString
case itemRawString:
return tomlString
case itemRawMultilineString:
return tomlString
case itemBool:
return tomlBool
}
p.bug("Cannot infer primitive type of lex item '%s'.", lexItem)
panic("unreachable")
}