// Copyright 2020-2025 The NATS Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build gofuzz package conf func Fuzz(data []byte) int { _, err := Parse(string(data)) if err != nil { return 0 } return 1 }
// Copyright 2013-2024 The NATS Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // Customized heavily from // https://github.com/BurntSushi/toml/blob/master/lex.go, which is based on // Rob Pike's talk: http://cuddle.googlecode.com/hg/talk/lex.html // The format supported is less restrictive than today's formats. // Supports mixed Arrays [], nested Maps {}, multiple comment types (# and //) // Also supports key value assignments using '=' or ':' or whiteSpace() // e.g. foo = 2, foo : 2, foo 2 // maps can be assigned with no key separator as well // semicolons as value terminators in key/value assignments are optional // // see lex_test.go for more examples. package conf import ( "encoding/hex" "fmt" "strings" "unicode" "unicode/utf8" ) type itemType int const ( itemError itemType = iota itemNIL // used in the parser to indicate no type itemEOF itemKey itemText itemString itemBool itemInteger itemFloat itemDatetime itemArrayStart itemArrayEnd itemMapStart itemMapEnd itemCommentStart itemVariable itemInclude ) const ( eof = 0 mapStart = '{' mapEnd = '}' keySepEqual = '=' keySepColon = ':' arrayStart = '[' arrayEnd = ']' arrayValTerm = ',' mapValTerm = ',' commentHashStart = '#' commentSlashStart = '/' dqStringStart = '"' dqStringEnd = '"' sqStringStart = '\'' sqStringEnd = '\'' optValTerm = ';' topOptStart = '{' topOptValTerm = ',' topOptTerm = '}' blockStart = '(' blockEnd = ')' mapEndString = string(mapEnd) ) type stateFn func(lx *lexer) stateFn type lexer struct { input string start int pos int width int line int state stateFn items chan item // A stack of state functions used to maintain context. // The idea is to reuse parts of the state machine in various places. // For example, values can appear at the top level or within arbitrarily // nested arrays. The last state on the stack is used after a value has // been lexed. Similarly for comments. stack []stateFn // Used for processing escapable substrings in double-quoted and raw strings stringParts []string stringStateFn stateFn // lstart is the start position of the current line. lstart int // ilstart is the start position of the line from the current item. ilstart int } type item struct { typ itemType val string line int pos int } func (lx *lexer) nextItem() item { for { select { case item := <-lx.items: return item default: lx.state = lx.state(lx) } } } func lex(input string) *lexer { lx := &lexer{ input: input, state: lexTop, line: 1, items: make(chan item, 10), stack: make([]stateFn, 0, 10), stringParts: []string{}, } return lx } func (lx *lexer) push(state stateFn) { lx.stack = append(lx.stack, state) } func (lx *lexer) pop() stateFn { if len(lx.stack) == 0 { return lx.errorf("BUG in lexer: no states to pop.") } li := len(lx.stack) - 1 last := lx.stack[li] lx.stack = lx.stack[0:li] return last } func (lx *lexer) emit(typ itemType) { val := strings.Join(lx.stringParts, "") + lx.input[lx.start:lx.pos] // Position of item in line where it started. pos := lx.pos - lx.ilstart - len(val) lx.items <- item{typ, val, lx.line, pos} lx.start = lx.pos lx.ilstart = lx.lstart } func (lx *lexer) emitString() { var finalString string if len(lx.stringParts) > 0 { finalString = strings.Join(lx.stringParts, "") + lx.input[lx.start:lx.pos] lx.stringParts = []string{} } else { finalString = lx.input[lx.start:lx.pos] } // Position of string in line where it started. pos := lx.pos - lx.ilstart - len(finalString) lx.items <- item{itemString, finalString, lx.line, pos} lx.start = lx.pos lx.ilstart = lx.lstart } func (lx *lexer) addCurrentStringPart(offset int) { lx.stringParts = append(lx.stringParts, lx.input[lx.start:lx.pos-offset]) lx.start = lx.pos } func (lx *lexer) addStringPart(s string) stateFn { lx.stringParts = append(lx.stringParts, s) lx.start = lx.pos return lx.stringStateFn } func (lx *lexer) hasEscapedParts() bool { return len(lx.stringParts) > 0 } func (lx *lexer) next() (r rune) { if lx.pos >= len(lx.input) { lx.width = 0 return eof } if lx.input[lx.pos] == '\n' { lx.line++ // Mark start position of current line. lx.lstart = lx.pos } r, lx.width = utf8.DecodeRuneInString(lx.input[lx.pos:]) lx.pos += lx.width return r } // ignore skips over the pending input before this point. func (lx *lexer) ignore() { lx.start = lx.pos lx.ilstart = lx.lstart } // backup steps back one rune. Can be called only once per call of next. func (lx *lexer) backup() { lx.pos -= lx.width if lx.pos < len(lx.input) && lx.input[lx.pos] == '\n' { lx.line-- } } // peek returns but does not consume the next rune in the input. func (lx *lexer) peek() rune { r := lx.next() lx.backup() return r } // errorf stops all lexing by emitting an error and returning `nil`. // Note that any value that is a character is escaped if it's a special // character (new lines, tabs, etc.). func (lx *lexer) errorf(format string, values ...any) stateFn { for i, value := range values { if v, ok := value.(rune); ok { values[i] = escapeSpecial(v) } } // Position of error in current line. pos := lx.pos - lx.lstart lx.items <- item{ itemError, fmt.Sprintf(format, values...), lx.line, pos, } return nil } // lexTop consumes elements at the top level of data structure. func lexTop(lx *lexer) stateFn { r := lx.next() if unicode.IsSpace(r) { return lexSkip(lx, lexTop) } switch r { case topOptStart: lx.push(lexTop) return lexSkip(lx, lexBlockStart) case commentHashStart: lx.push(lexTop) return lexCommentStart case commentSlashStart: rn := lx.next() if rn == commentSlashStart { lx.push(lexTop) return lexCommentStart } lx.backup() fallthrough case eof: if lx.pos > lx.start { return lx.errorf("Unexpected EOF.") } lx.emit(itemEOF) return nil } // At this point, the only valid item can be a key, so we back up // and let the key lexer do the rest. lx.backup() lx.push(lexTopValueEnd) return lexKeyStart } // lexTopValueEnd is entered whenever a top-level value has been consumed. // It must see only whitespace, and will turn back to lexTop upon a new line. // If it sees EOF, it will quit the lexer successfully. func lexTopValueEnd(lx *lexer) stateFn { r := lx.next() switch { case r == commentHashStart: // a comment will read to a new line for us. lx.push(lexTop) return lexCommentStart case r == commentSlashStart: rn := lx.next() if rn == commentSlashStart { lx.push(lexTop) return lexCommentStart } lx.backup() fallthrough case isWhitespace(r): return lexTopValueEnd case isNL(r) || r == eof || r == optValTerm || r == topOptValTerm || r == topOptTerm: lx.ignore() return lexTop } return lx.errorf("Expected a top-level value to end with a new line, "+ "comment or EOF, but got '%v' instead.", r) } func lexBlockStart(lx *lexer) stateFn { r := lx.next() if unicode.IsSpace(r) { return lexSkip(lx, lexBlockStart) } switch r { case topOptStart: lx.push(lexBlockEnd) return lexSkip(lx, lexBlockStart) case topOptTerm: lx.ignore() return lx.pop() case commentHashStart: lx.push(lexBlockStart) return lexCommentStart case commentSlashStart: rn := lx.next() if rn == commentSlashStart { lx.push(lexBlockStart) return lexCommentStart } lx.backup() fallthrough case eof: if lx.pos > lx.start { return lx.errorf("Unexpected EOF.") } lx.emit(itemEOF) return nil } // At this point, the only valid item can be a key, so we back up // and let the key lexer do the rest. lx.backup() lx.push(lexBlockValueEnd) return lexKeyStart } // lexBlockValueEnd is entered whenever a block-level value has been consumed. // It must see only whitespace, and will turn back to lexBlockStart upon a new line. // If it sees EOF, it will quit the lexer successfully. func lexBlockValueEnd(lx *lexer) stateFn { r := lx.next() switch { case r == commentHashStart: // a comment will read to a new line for us. lx.push(lexBlockValueEnd) return lexCommentStart case r == commentSlashStart: rn := lx.next() if rn == commentSlashStart { lx.push(lexBlockValueEnd) return lexCommentStart } lx.backup() fallthrough case isWhitespace(r): return lexBlockValueEnd case isNL(r) || r == optValTerm || r == topOptValTerm: lx.ignore() return lexBlockStart case r == topOptTerm: lx.backup() return lexBlockEnd } return lx.errorf("Expected a block-level value to end with a new line, "+ "comment or EOF, but got '%v' instead.", r) } // lexBlockEnd is entered whenever a block-level value has been consumed. // It must see only whitespace, and will turn back to lexTop upon a "}". func lexBlockEnd(lx *lexer) stateFn { r := lx.next() switch { case r == commentHashStart: // a comment will read to a new line for us. lx.push(lexBlockStart) return lexCommentStart case r == commentSlashStart: rn := lx.next() if rn == commentSlashStart { lx.push(lexBlockStart) return lexCommentStart } lx.backup() fallthrough case isNL(r) || isWhitespace(r): return lexBlockEnd case r == optValTerm || r == topOptValTerm: lx.ignore() return lexBlockStart case r == topOptTerm: lx.ignore() return lx.pop() } return lx.errorf("Expected a block-level to end with a '}', but got '%v' instead.", r) } // lexKeyStart consumes a key name up until the first non-whitespace character. // lexKeyStart will ignore whitespace. It will also eat enclosing quotes. func lexKeyStart(lx *lexer) stateFn { r := lx.peek() switch { case isKeySeparator(r): return lx.errorf("Unexpected key separator '%v'", r) case unicode.IsSpace(r): lx.next() return lexSkip(lx, lexKeyStart) case r == dqStringStart: lx.next() return lexSkip(lx, lexDubQuotedKey) case r == sqStringStart: lx.next() return lexSkip(lx, lexQuotedKey) } lx.ignore() lx.next() return lexKey } // lexDubQuotedKey consumes the text of a key between quotes. func lexDubQuotedKey(lx *lexer) stateFn { r := lx.peek() if r == dqStringEnd { lx.emit(itemKey) lx.next() return lexSkip(lx, lexKeyEnd) } else if r == eof { if lx.pos > lx.start { return lx.errorf("Unexpected EOF.") } lx.emit(itemEOF) return nil } lx.next() return lexDubQuotedKey } // lexQuotedKey consumes the text of a key between quotes. func lexQuotedKey(lx *lexer) stateFn { r := lx.peek() if r == sqStringEnd { lx.emit(itemKey) lx.next() return lexSkip(lx, lexKeyEnd) } else if r == eof { if lx.pos > lx.start { return lx.errorf("Unexpected EOF.") } lx.emit(itemEOF) return nil } lx.next() return lexQuotedKey } // keyCheckKeyword will check for reserved keywords as the key value when the key is // separated with a space. func (lx *lexer) keyCheckKeyword(fallThrough, push stateFn) stateFn { key := strings.ToLower(lx.input[lx.start:lx.pos]) switch key { case "include": lx.ignore() if push != nil { lx.push(push) } return lexIncludeStart } lx.emit(itemKey) return fallThrough } // lexIncludeStart will consume the whitespace til the start of the value. func lexIncludeStart(lx *lexer) stateFn { r := lx.next() if isWhitespace(r) { return lexSkip(lx, lexIncludeStart) } lx.backup() return lexInclude } // lexIncludeQuotedString consumes the inner contents of a string. It assumes that the // beginning '"' has already been consumed and ignored. It will not interpret any // internal contents. func lexIncludeQuotedString(lx *lexer) stateFn { r := lx.next() switch { case r == sqStringEnd: lx.backup() lx.emit(itemInclude) lx.next() lx.ignore() return lx.pop() case r == eof: return lx.errorf("Unexpected EOF in quoted include") } return lexIncludeQuotedString } // lexIncludeDubQuotedString consumes the inner contents of a string. It assumes that the // beginning '"' has already been consumed and ignored. It will not interpret any // internal contents. func lexIncludeDubQuotedString(lx *lexer) stateFn { r := lx.next() switch { case r == dqStringEnd: lx.backup() lx.emit(itemInclude) lx.next() lx.ignore() return lx.pop() case r == eof: return lx.errorf("Unexpected EOF in double quoted include") } return lexIncludeDubQuotedString } // lexIncludeString consumes the inner contents of a raw string. func lexIncludeString(lx *lexer) stateFn { r := lx.next() switch { case isNL(r) || r == eof || r == optValTerm || r == mapEnd || isWhitespace(r): lx.backup() lx.emit(itemInclude) return lx.pop() case r == sqStringEnd: lx.backup() lx.emit(itemInclude) lx.next() lx.ignore() return lx.pop() } return lexIncludeString } // lexInclude will consume the include value. func lexInclude(lx *lexer) stateFn { r := lx.next() switch { case r == sqStringStart: lx.ignore() // ignore the " or ' return lexIncludeQuotedString case r == dqStringStart: lx.ignore() // ignore the " or ' return lexIncludeDubQuotedString case r == arrayStart: return lx.errorf("Expected include value but found start of an array") case r == mapStart: return lx.errorf("Expected include value but found start of a map") case r == blockStart: return lx.errorf("Expected include value but found start of a block") case unicode.IsDigit(r), r == '-': return lx.errorf("Expected include value but found start of a number") case r == '\\': return lx.errorf("Expected include value but found escape sequence") case isNL(r): return lx.errorf("Expected include value but found new line") } lx.backup() return lexIncludeString } // lexKey consumes the text of a key. Assumes that the first character (which // is not whitespace) has already been consumed. func lexKey(lx *lexer) stateFn { r := lx.peek() if unicode.IsSpace(r) { // Spaces signal we could be looking at a keyword, e.g. include. // Keywords will eat the keyword and set the appropriate return stateFn. return lx.keyCheckKeyword(lexKeyEnd, nil) } else if isKeySeparator(r) || r == eof { lx.emit(itemKey) return lexKeyEnd } lx.next() return lexKey } // lexKeyEnd consumes the end of a key (up to the key separator). // Assumes that the first whitespace character after a key (or the '=' or ':' // separator) has NOT been consumed. func lexKeyEnd(lx *lexer) stateFn { r := lx.next() switch { case unicode.IsSpace(r): return lexSkip(lx, lexKeyEnd) case isKeySeparator(r): return lexSkip(lx, lexValue) case r == eof: lx.emit(itemEOF) return nil } // We start the value here lx.backup() return lexValue } // lexValue starts the consumption of a value anywhere a value is expected. // lexValue will ignore whitespace. // After a value is lexed, the last state on the next is popped and returned. func lexValue(lx *lexer) stateFn { // We allow whitespace to precede a value, but NOT new lines. // In array syntax, the array states are responsible for ignoring new lines. r := lx.next() if isWhitespace(r) { return lexSkip(lx, lexValue) } switch { case r == arrayStart: lx.ignore() lx.emit(itemArrayStart) return lexArrayValue case r == mapStart: lx.ignore() lx.emit(itemMapStart) return lexMapKeyStart case r == sqStringStart: lx.ignore() // ignore the " or ' return lexQuotedString case r == dqStringStart: lx.ignore() // ignore the " or ' lx.stringStateFn = lexDubQuotedString return lexDubQuotedString case r == '-': return lexNegNumberStart case r == blockStart: lx.ignore() return lexBlock case unicode.IsDigit(r): lx.backup() // avoid an extra state and use the same as above return lexNumberOrDateOrStringOrIPStart case r == '.': // special error case, be kind to users return lx.errorf("Floats must start with a digit") case isNL(r): return lx.errorf("Expected value but found new line") } lx.backup() lx.stringStateFn = lexString return lexString } // lexArrayValue consumes one value in an array. It assumes that '[' or ',' // have already been consumed. All whitespace and new lines are ignored. func lexArrayValue(lx *lexer) stateFn { r := lx.next() switch { case unicode.IsSpace(r): return lexSkip(lx, lexArrayValue) case r == commentHashStart: lx.push(lexArrayValue) return lexCommentStart case r == commentSlashStart: rn := lx.next() if rn == commentSlashStart { lx.push(lexArrayValue) return lexCommentStart } lx.backup() fallthrough case r == arrayValTerm: return lx.errorf("Unexpected array value terminator '%v'.", arrayValTerm) case r == arrayEnd: return lexArrayEnd } lx.backup() lx.push(lexArrayValueEnd) return lexValue } // lexArrayValueEnd consumes the cruft between values of an array. Namely, // it ignores whitespace and expects either a ',' or a ']'. func lexArrayValueEnd(lx *lexer) stateFn { r := lx.next() switch { case isWhitespace(r): return lexSkip(lx, lexArrayValueEnd) case r == commentHashStart: lx.push(lexArrayValueEnd) return lexCommentStart case r == commentSlashStart: rn := lx.next() if rn == commentSlashStart { lx.push(lexArrayValueEnd) return lexCommentStart } lx.backup() fallthrough case r == arrayValTerm || isNL(r): return lexSkip(lx, lexArrayValue) // Move onto next case r == arrayEnd: return lexArrayEnd } return lx.errorf("Expected an array value terminator %q or an array "+ "terminator %q, but got '%v' instead.", arrayValTerm, arrayEnd, r) } // lexArrayEnd finishes the lexing of an array. It assumes that a ']' has // just been consumed. func lexArrayEnd(lx *lexer) stateFn { lx.ignore() lx.emit(itemArrayEnd) return lx.pop() } // lexMapKeyStart consumes a key name up until the first non-whitespace // character. // lexMapKeyStart will ignore whitespace. func lexMapKeyStart(lx *lexer) stateFn { r := lx.peek() switch { case isKeySeparator(r): return lx.errorf("Unexpected key separator '%v'.", r) case r == arrayEnd: return lx.errorf("Unexpected array end '%v' processing map.", r) case unicode.IsSpace(r): lx.next() return lexSkip(lx, lexMapKeyStart) case r == mapEnd: lx.next() return lexSkip(lx, lexMapEnd) case r == commentHashStart: lx.next() lx.push(lexMapKeyStart) return lexCommentStart case r == commentSlashStart: lx.next() rn := lx.next() if rn == commentSlashStart { lx.push(lexMapKeyStart) return lexCommentStart } lx.backup() case r == sqStringStart: lx.next() return lexSkip(lx, lexMapQuotedKey) case r == dqStringStart: lx.next() return lexSkip(lx, lexMapDubQuotedKey) case r == eof: return lx.errorf("Unexpected EOF processing map.") } lx.ignore() lx.next() return lexMapKey } // lexMapQuotedKey consumes the text of a key between quotes. func lexMapQuotedKey(lx *lexer) stateFn { if r := lx.peek(); r == eof { return lx.errorf("Unexpected EOF processing quoted map key.") } else if r == sqStringEnd { lx.emit(itemKey) lx.next() return lexSkip(lx, lexMapKeyEnd) } lx.next() return lexMapQuotedKey } // lexMapDubQuotedKey consumes the text of a key between quotes. func lexMapDubQuotedKey(lx *lexer) stateFn { if r := lx.peek(); r == eof { return lx.errorf("Unexpected EOF processing double quoted map key.") } else if r == dqStringEnd { lx.emit(itemKey) lx.next() return lexSkip(lx, lexMapKeyEnd) } lx.next() return lexMapDubQuotedKey } // lexMapKey consumes the text of a key. Assumes that the first character (which // is not whitespace) has already been consumed. func lexMapKey(lx *lexer) stateFn { if r := lx.peek(); r == eof { return lx.errorf("Unexpected EOF processing map key.") } else if unicode.IsSpace(r) { // Spaces signal we could be looking at a keyword, e.g. include. // Keywords will eat the keyword and set the appropriate return stateFn. return lx.keyCheckKeyword(lexMapKeyEnd, lexMapValueEnd) } else if isKeySeparator(r) { lx.emit(itemKey) return lexMapKeyEnd } lx.next() return lexMapKey } // lexMapKeyEnd consumes the end of a key (up to the key separator). // Assumes that the first whitespace character after a key (or the '=' // separator) has NOT been consumed. func lexMapKeyEnd(lx *lexer) stateFn { r := lx.next() switch { case unicode.IsSpace(r): return lexSkip(lx, lexMapKeyEnd) case isKeySeparator(r): return lexSkip(lx, lexMapValue) } // We start the value here lx.backup() return lexMapValue } // lexMapValue consumes one value in a map. It assumes that '{' or ',' // have already been consumed. All whitespace and new lines are ignored. // Map values can be separated by ',' or simple NLs. func lexMapValue(lx *lexer) stateFn { r := lx.next() switch { case unicode.IsSpace(r): return lexSkip(lx, lexMapValue) case r == mapValTerm: return lx.errorf("Unexpected map value terminator %q.", mapValTerm) case r == mapEnd: return lexSkip(lx, lexMapEnd) } lx.backup() lx.push(lexMapValueEnd) return lexValue } // lexMapValueEnd consumes the cruft between values of a map. Namely, // it ignores whitespace and expects either a ',' or a '}'. func lexMapValueEnd(lx *lexer) stateFn { r := lx.next() switch { case isWhitespace(r): return lexSkip(lx, lexMapValueEnd) case r == commentHashStart: lx.push(lexMapValueEnd) return lexCommentStart case r == commentSlashStart: rn := lx.next() if rn == commentSlashStart { lx.push(lexMapValueEnd) return lexCommentStart } lx.backup() fallthrough case r == optValTerm || r == mapValTerm || isNL(r): return lexSkip(lx, lexMapKeyStart) // Move onto next case r == mapEnd: return lexSkip(lx, lexMapEnd) } return lx.errorf("Expected a map value terminator %q or a map "+ "terminator %q, but got '%v' instead.", mapValTerm, mapEnd, r) } // lexMapEnd finishes the lexing of a map. It assumes that a '}' has // just been consumed. func lexMapEnd(lx *lexer) stateFn { lx.ignore() lx.emit(itemMapEnd) return lx.pop() } // Checks if the unquoted string was actually a boolean func (lx *lexer) isBool() bool { str := strings.ToLower(lx.input[lx.start:lx.pos]) return str == "true" || str == "false" || str == "on" || str == "off" || str == "yes" || str == "no" } // Check if the unquoted string is a variable reference, starting with $. func (lx *lexer) isVariable() bool { if lx.start >= len(lx.input) { return false } if lx.input[lx.start] == '$' { lx.start += 1 return true } return false } // lexQuotedString consumes the inner contents of a string. It assumes that the // beginning '"' has already been consumed and ignored. It will not interpret any // internal contents. func lexQuotedString(lx *lexer) stateFn { r := lx.next() switch { case r == sqStringEnd: lx.backup() lx.emit(itemString) lx.next() lx.ignore() return lx.pop() case r == eof: if lx.pos > lx.start { return lx.errorf("Unexpected EOF.") } lx.emit(itemEOF) return nil } return lexQuotedString } // lexDubQuotedString consumes the inner contents of a string. It assumes that the // beginning '"' has already been consumed and ignored. It will not interpret any // internal contents. func lexDubQuotedString(lx *lexer) stateFn { r := lx.next() switch { case r == '\\': lx.addCurrentStringPart(1) return lexStringEscape case r == dqStringEnd: lx.backup() lx.emitString() lx.next() lx.ignore() return lx.pop() case r == eof: if lx.pos > lx.start { return lx.errorf("Unexpected EOF.") } lx.emit(itemEOF) return nil } return lexDubQuotedString } // lexString consumes the inner contents of a raw string. func lexString(lx *lexer) stateFn { r := lx.next() switch { case r == '\\': lx.addCurrentStringPart(1) return lexStringEscape // Termination of non-quoted strings case isNL(r) || r == eof || r == optValTerm || r == arrayValTerm || r == arrayEnd || r == mapEnd || isWhitespace(r): lx.backup() if lx.hasEscapedParts() { lx.emitString() } else if lx.isBool() { lx.emit(itemBool) } else if lx.isVariable() { lx.emit(itemVariable) } else { lx.emitString() } return lx.pop() case r == sqStringEnd: lx.backup() lx.emitString() lx.next() lx.ignore() return lx.pop() } return lexString } // lexBlock consumes the inner contents as a string. It assumes that the // beginning '(' has already been consumed and ignored. It will continue // processing until it finds a ')' on a new line by itself. func lexBlock(lx *lexer) stateFn { r := lx.next() switch { case r == blockEnd: lx.backup() lx.backup() // Looking for a ')' character on a line by itself, if the previous // character isn't a new line, then break so we keep processing the block. if lx.next() != '\n' { lx.next() break } lx.next() // Make sure the next character is a new line or an eof. We want a ')' on a // bare line by itself. switch lx.next() { case '\n', eof: lx.backup() lx.backup() lx.emit(itemString) lx.next() lx.ignore() return lx.pop() } lx.backup() case r == eof: return lx.errorf("Unexpected EOF processing block.") } return lexBlock } // lexStringEscape consumes an escaped character. It assumes that the preceding // '\\' has already been consumed. func lexStringEscape(lx *lexer) stateFn { r := lx.next() switch r { case 'x': return lexStringBinary case 't': return lx.addStringPart("\t") case 'n': return lx.addStringPart("\n") case 'r': return lx.addStringPart("\r") case '"': return lx.addStringPart("\"") case '\\': return lx.addStringPart("\\") } return lx.errorf("Invalid escape character '%v'. Only the following "+ "escape characters are allowed: \\xXX, \\t, \\n, \\r, \\\", \\\\.", r) } // lexStringBinary consumes two hexadecimal digits following '\x'. It assumes // that the '\x' has already been consumed. func lexStringBinary(lx *lexer) stateFn { r := lx.next() if isNL(r) { return lx.errorf("Expected two hexadecimal digits after '\\x', but hit end of line") } r = lx.next() if isNL(r) { return lx.errorf("Expected two hexadecimal digits after '\\x', but hit end of line") } offset := lx.pos - 2 byteString, err := hex.DecodeString(lx.input[offset:lx.pos]) if err != nil { return lx.errorf("Expected two hexadecimal digits after '\\x', but got '%s'", lx.input[offset:lx.pos]) } lx.addStringPart(string(byteString)) return lx.stringStateFn } // lexNumberOrDateOrStringOrIPStart consumes either a (positive) // integer, a float, a datetime, or IP, or String that started with a // number. It assumes that NO negative sign has been consumed, that // is triggered above. func lexNumberOrDateOrStringOrIPStart(lx *lexer) stateFn { r := lx.next() if !unicode.IsDigit(r) { if r == '.' { return lx.errorf("Floats must start with a digit, not '.'.") } return lx.errorf("Expected a digit but got '%v'.", r) } return lexNumberOrDateOrStringOrIP } // lexNumberOrDateOrStringOrIP consumes either a (positive) integer, // float, datetime, IP or string without quotes that starts with a // number. func lexNumberOrDateOrStringOrIP(lx *lexer) stateFn { r := lx.next() switch { case r == '-': if lx.pos-lx.start != 5 { return lx.errorf("All ISO8601 dates must be in full Zulu form.") } return lexDateAfterYear case unicode.IsDigit(r): return lexNumberOrDateOrStringOrIP case r == '.': // Assume float at first, but could be IP return lexFloatStart case isNumberSuffix(r): return lexConvenientNumber case !(isNL(r) || r == eof || r == mapEnd || r == optValTerm || r == mapValTerm || isWhitespace(r) || unicode.IsDigit(r)): // Treat it as a string value once we get a rune that // is not a number. lx.stringStateFn = lexString return lexString } lx.backup() lx.emit(itemInteger) return lx.pop() } // lexConvenientNumber is when we have a suffix, e.g. 1k or 1Mb func lexConvenientNumber(lx *lexer) stateFn { r := lx.next() switch { case r == 'b' || r == 'B' || r == 'i' || r == 'I': return lexConvenientNumber } lx.backup() if isNL(r) || r == eof || r == mapEnd || r == optValTerm || r == mapValTerm || isWhitespace(r) || unicode.IsDigit(r) { lx.emit(itemInteger) return lx.pop() } // This is not a number, so treat it as a string. lx.stringStateFn = lexString return lexString } // lexDateAfterYear consumes a full Zulu Datetime in ISO8601 format. // It assumes that "YYYY-" has already been consumed. func lexDateAfterYear(lx *lexer) stateFn { formats := []rune{ // digits are '0'. // everything else is direct equality. '0', '0', '-', '0', '0', 'T', '0', '0', ':', '0', '0', ':', '0', '0', 'Z', } for _, f := range formats { r := lx.next() if f == '0' { if !unicode.IsDigit(r) { return lx.errorf("Expected digit in ISO8601 datetime, "+ "but found '%v' instead.", r) } } else if f != r { return lx.errorf("Expected '%v' in ISO8601 datetime, "+ "but found '%v' instead.", f, r) } } lx.emit(itemDatetime) return lx.pop() } // lexNegNumberStart consumes either an integer or a float. It assumes that a // negative sign has already been read, but that *no* digits have been consumed. // lexNegNumberStart will move to the appropriate integer or float states. func lexNegNumberStart(lx *lexer) stateFn { // we MUST see a digit. Even floats have to start with a digit. r := lx.next() if !unicode.IsDigit(r) { if r == '.' { return lx.errorf("Floats must start with a digit, not '.'.") } return lx.errorf("Expected a digit but got '%v'.", r) } return lexNegNumber } // lexNegNumber consumes a negative integer or a float after seeing the first digit. func lexNegNumber(lx *lexer) stateFn { r := lx.next() switch { case unicode.IsDigit(r): return lexNegNumber case r == '.': return lexFloatStart case isNumberSuffix(r): return lexConvenientNumber } lx.backup() lx.emit(itemInteger) return lx.pop() } // lexFloatStart starts the consumption of digits of a float after a '.'. // Namely, at least one digit is required. func lexFloatStart(lx *lexer) stateFn { r := lx.next() if !unicode.IsDigit(r) { return lx.errorf("Floats must have a digit after the '.', but got "+ "'%v' instead.", r) } return lexFloat } // lexFloat consumes the digits of a float after a '.'. // Assumes that one digit has been consumed after a '.' already. func lexFloat(lx *lexer) stateFn { r := lx.next() if unicode.IsDigit(r) { return lexFloat } // Not a digit, if its another '.', need to see if we falsely assumed a float. if r == '.' { return lexIPAddr } lx.backup() lx.emit(itemFloat) return lx.pop() } // lexIPAddr consumes IP addrs, like 127.0.0.1:4222 func lexIPAddr(lx *lexer) stateFn { r := lx.next() if unicode.IsDigit(r) || r == '.' || r == ':' || r == '-' { return lexIPAddr } lx.backup() lx.emit(itemString) return lx.pop() } // lexCommentStart begins the lexing of a comment. It will emit // itemCommentStart and consume no characters, passing control to lexComment. func lexCommentStart(lx *lexer) stateFn { lx.ignore() lx.emit(itemCommentStart) return lexComment } // lexComment lexes an entire comment. It assumes that '#' has been consumed. // It will consume *up to* the first new line character, and pass control // back to the last state on the stack. func lexComment(lx *lexer) stateFn { r := lx.peek() if isNL(r) || r == eof { lx.emit(itemText) return lx.pop() } lx.next() return lexComment } // lexSkip ignores all slurped input and moves on to the next state. func lexSkip(lx *lexer, nextState stateFn) stateFn { return func(lx *lexer) stateFn { lx.ignore() return nextState } } // Tests to see if we have a number suffix func isNumberSuffix(r rune) bool { return r == 'k' || r == 'K' || r == 'm' || r == 'M' || r == 'g' || r == 'G' || r == 't' || r == 'T' || r == 'p' || r == 'P' || r == 'e' || r == 'E' } // Tests for both key separators func isKeySeparator(r rune) bool { return r == keySepEqual || r == keySepColon } // isWhitespace returns true if `r` is a whitespace character according // to the spec. func isWhitespace(r rune) bool { return r == '\t' || r == ' ' } func isNL(r rune) bool { return r == '\n' || r == '\r' } func (itype itemType) String() string { switch itype { case itemError: return "Error" case itemNIL: return "NIL" case itemEOF: return "EOF" case itemText: return "Text" case itemString: return "String" case itemBool: return "Bool" case itemInteger: return "Integer" case itemFloat: return "Float" case itemDatetime: return "DateTime" case itemKey: return "Key" case itemArrayStart: return "ArrayStart" case itemArrayEnd: return "ArrayEnd" case itemMapStart: return "MapStart" case itemMapEnd: return "MapEnd" case itemCommentStart: return "CommentStart" case itemVariable: return "Variable" case itemInclude: return "Include" } panic(fmt.Sprintf("BUG: Unknown type '%s'.", itype.String())) } func (item item) String() string { return fmt.Sprintf("(%s, '%s', %d, %d)", item.typ.String(), item.val, item.line, item.pos) } func escapeSpecial(c rune) string { switch c { case '\n': return "\\n" } return string(c) }
// Copyright 2013-2025 The NATS Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // Package conf supports a configuration file format used by gnatsd. It is // a flexible format that combines the best of traditional // configuration formats and newer styles such as JSON and YAML. package conf // The format supported is less restrictive than today's formats. // Supports mixed Arrays [], nested Maps {}, multiple comment types (# and //) // Also supports key value assignments using '=' or ':' or whiteSpace() // e.g. foo = 2, foo : 2, foo 2 // maps can be assigned with no key separator as well // semicolons as value terminators in key/value assignments are optional // // see parse_test.go for more examples. import ( "crypto/sha256" "encoding/json" "fmt" "os" "path/filepath" "strconv" "strings" "time" "unicode" ) const _EMPTY_ = "" type parser struct { mapping map[string]any lx *lexer // The current scoped context, can be array or map ctx any // stack of contexts, either map or array/slice stack ctxs []any // Keys stack keys []string // Keys stack as items ikeys []item // The config file path, empty by default. fp string // pedantic reports error when configuration is not correct. pedantic bool } // Parse will return a map of keys to any, although concrete types // underly them. The values supported are string, bool, int64, float64, DateTime. // Arrays and nested Maps are also supported. func Parse(data string) (map[string]any, error) { p, err := parse(data, "", false) if err != nil { return nil, err } return p.mapping, nil } // ParseWithChecks is equivalent to Parse but runs in pedantic mode. func ParseWithChecks(data string) (map[string]any, error) { p, err := parse(data, "", true) if err != nil { return nil, err } return p.mapping, nil } // ParseFile is a helper to open file, etc. and parse the contents. func ParseFile(fp string) (map[string]any, error) { data, err := os.ReadFile(fp) if err != nil { return nil, fmt.Errorf("error opening config file: %v", err) } p, err := parse(string(data), fp, false) if err != nil { return nil, err } return p.mapping, nil } // ParseFileWithChecks is equivalent to ParseFile but runs in pedantic mode. func ParseFileWithChecks(fp string) (map[string]any, error) { data, err := os.ReadFile(fp) if err != nil { return nil, err } p, err := parse(string(data), fp, true) if err != nil { return nil, err } return p.mapping, nil } // cleanupUsedEnvVars will recursively remove all already used // environment variables which might be in the parsed tree. func cleanupUsedEnvVars(m map[string]any) { for k, v := range m { t := v.(*token) if t.usedVariable { delete(m, k) continue } // Cleanup any other env var that is still in the map. if tm, ok := t.value.(map[string]any); ok { cleanupUsedEnvVars(tm) } } } // ParseFileWithChecksDigest returns the processed config and a digest // that represents the configuration. func ParseFileWithChecksDigest(fp string) (map[string]any, string, error) { data, err := os.ReadFile(fp) if err != nil { return nil, _EMPTY_, err } p, err := parse(string(data), fp, true) if err != nil { return nil, _EMPTY_, err } // Filter out any environment variables before taking the digest. cleanupUsedEnvVars(p.mapping) digest := sha256.New() e := json.NewEncoder(digest) err = e.Encode(p.mapping) if err != nil { return nil, _EMPTY_, err } return p.mapping, fmt.Sprintf("sha256:%x", digest.Sum(nil)), nil } type token struct { item item value any usedVariable bool sourceFile string } func (t *token) MarshalJSON() ([]byte, error) { return json.Marshal(t.value) } func (t *token) Value() any { return t.value } func (t *token) Line() int { return t.item.line } func (t *token) IsUsedVariable() bool { return t.usedVariable } func (t *token) SourceFile() string { return t.sourceFile } func (t *token) Position() int { return t.item.pos } func parse(data, fp string, pedantic bool) (p *parser, err error) { p = &parser{ mapping: make(map[string]any), lx: lex(data), ctxs: make([]any, 0, 4), keys: make([]string, 0, 4), ikeys: make([]item, 0, 4), fp: filepath.Dir(fp), pedantic: pedantic, } p.pushContext(p.mapping) var prevItem item for { it := p.next() if it.typ == itemEOF { // Here we allow the final character to be a bracket '}' // in order to support JSON like configurations. if prevItem.typ == itemKey && prevItem.val != mapEndString { return nil, fmt.Errorf("config is invalid (%s:%d:%d)", fp, it.line, it.pos) } break } prevItem = it if err := p.processItem(it, fp); err != nil { return nil, err } } return p, nil } func (p *parser) next() item { return p.lx.nextItem() } func (p *parser) pushContext(ctx any) { p.ctxs = append(p.ctxs, ctx) p.ctx = ctx } func (p *parser) popContext() any { if len(p.ctxs) == 0 { panic("BUG in parser, context stack empty") } li := len(p.ctxs) - 1 last := p.ctxs[li] p.ctxs = p.ctxs[0:li] p.ctx = p.ctxs[len(p.ctxs)-1] return last } func (p *parser) pushKey(key string) { p.keys = append(p.keys, key) } func (p *parser) popKey() string { if len(p.keys) == 0 { panic("BUG in parser, keys stack empty") } li := len(p.keys) - 1 last := p.keys[li] p.keys = p.keys[0:li] return last } func (p *parser) pushItemKey(key item) { p.ikeys = append(p.ikeys, key) } func (p *parser) popItemKey() item { if len(p.ikeys) == 0 { panic("BUG in parser, item keys stack empty") } li := len(p.ikeys) - 1 last := p.ikeys[li] p.ikeys = p.ikeys[0:li] return last } func (p *parser) processItem(it item, fp string) error { setValue := func(it item, v any) { if p.pedantic { p.setValue(&token{it, v, false, fp}) } else { p.setValue(v) } } switch it.typ { case itemError: return fmt.Errorf("Parse error on line %d: '%s'", it.line, it.val) case itemKey: // Keep track of the keys as items and strings, // we do this in order to be able to still support // includes without many breaking changes. p.pushKey(it.val) if p.pedantic { p.pushItemKey(it) } case itemMapStart: newCtx := make(map[string]any) p.pushContext(newCtx) case itemMapEnd: setValue(it, p.popContext()) case itemString: // FIXME(dlc) sanitize string? setValue(it, it.val) case itemInteger: lastDigit := 0 for _, r := range it.val { if !unicode.IsDigit(r) && r != '-' { break } lastDigit++ } numStr := it.val[:lastDigit] num, err := strconv.ParseInt(numStr, 10, 64) if err != nil { if e, ok := err.(*strconv.NumError); ok && e.Err == strconv.ErrRange { return fmt.Errorf("integer '%s' is out of the range", it.val) } return fmt.Errorf("expected integer, but got '%s'", it.val) } // Process a suffix suffix := strings.ToLower(strings.TrimSpace(it.val[lastDigit:])) switch suffix { case "": setValue(it, num) case "k": setValue(it, num*1000) case "kb", "ki", "kib": setValue(it, num*1024) case "m": setValue(it, num*1000*1000) case "mb", "mi", "mib": setValue(it, num*1024*1024) case "g": setValue(it, num*1000*1000*1000) case "gb", "gi", "gib": setValue(it, num*1024*1024*1024) case "t": setValue(it, num*1000*1000*1000*1000) case "tb", "ti", "tib": setValue(it, num*1024*1024*1024*1024) case "p": setValue(it, num*1000*1000*1000*1000*1000) case "pb", "pi", "pib": setValue(it, num*1024*1024*1024*1024*1024) case "e": setValue(it, num*1000*1000*1000*1000*1000*1000) case "eb", "ei", "eib": setValue(it, num*1024*1024*1024*1024*1024*1024) } case itemFloat: num, err := strconv.ParseFloat(it.val, 64) if err != nil { if e, ok := err.(*strconv.NumError); ok && e.Err == strconv.ErrRange { return fmt.Errorf("float '%s' is out of the range", it.val) } return fmt.Errorf("expected float, but got '%s'", it.val) } setValue(it, num) case itemBool: switch strings.ToLower(it.val) { case "true", "yes", "on": setValue(it, true) case "false", "no", "off": setValue(it, false) default: return fmt.Errorf("expected boolean value, but got '%s'", it.val) } case itemDatetime: dt, err := time.Parse("2006-01-02T15:04:05Z", it.val) if err != nil { return fmt.Errorf( "expected Zulu formatted DateTime, but got '%s'", it.val) } setValue(it, dt) case itemArrayStart: var array = make([]any, 0) p.pushContext(array) case itemArrayEnd: array := p.ctx p.popContext() setValue(it, array) case itemVariable: value, found, err := p.lookupVariable(it.val) if err != nil { return fmt.Errorf("variable reference for '%s' on line %d could not be parsed: %s", it.val, it.line, err) } if !found { return fmt.Errorf("variable reference for '%s' on line %d can not be found", it.val, it.line) } if p.pedantic { switch tk := value.(type) { case *token: // Mark the looked up variable as used, and make // the variable reference become handled as a token. tk.usedVariable = true p.setValue(&token{it, tk.Value(), false, fp}) default: // Special case to add position context to bcrypt references. p.setValue(&token{it, value, false, fp}) } } else { p.setValue(value) } case itemInclude: var ( m map[string]any err error ) if p.pedantic { m, err = ParseFileWithChecks(filepath.Join(p.fp, it.val)) } else { m, err = ParseFile(filepath.Join(p.fp, it.val)) } if err != nil { return fmt.Errorf("error parsing include file '%s', %v", it.val, err) } for k, v := range m { p.pushKey(k) if p.pedantic { switch tk := v.(type) { case *token: p.pushItemKey(tk.item) } } p.setValue(v) } } return nil } // Used to map an environment value into a temporary map to pass to secondary Parse call. const pkey = "pk" // We special case raw strings here that are bcrypt'd. This allows us not to force quoting the strings const bcryptPrefix = "2a$" // lookupVariable will lookup a variable reference. It will use block scoping on keys // it has seen before, with the top level scoping being the environment variables. We // ignore array contexts and only process the map contexts.. // // Returns true for ok if it finds something, similar to map. func (p *parser) lookupVariable(varReference string) (any, bool, error) { // Do special check to see if it is a raw bcrypt string. if strings.HasPrefix(varReference, bcryptPrefix) { return "$" + varReference, true, nil } // Loop through contexts currently on the stack. for i := len(p.ctxs) - 1; i >= 0; i-- { ctx := p.ctxs[i] // Process if it is a map context if m, ok := ctx.(map[string]any); ok { if v, ok := m[varReference]; ok { return v, ok, nil } } } // If we are here, we have exhausted our context maps and still not found anything. // Parse from the environment. if vStr, ok := os.LookupEnv(varReference); ok { // Everything we get here will be a string value, so we need to process as a parser would. if vmap, err := Parse(fmt.Sprintf("%s=%s", pkey, vStr)); err == nil { v, ok := vmap[pkey] return v, ok, nil } else { return nil, false, err } } return nil, false, nil } func (p *parser) setValue(val any) { // Test to see if we are on an array or a map // Array processing if ctx, ok := p.ctx.([]any); ok { p.ctx = append(ctx, val) p.ctxs[len(p.ctxs)-1] = p.ctx } // Map processing if ctx, ok := p.ctx.(map[string]any); ok { key := p.popKey() if p.pedantic { // Change the position to the beginning of the key // since more useful when reporting errors. switch v := val.(type) { case *token: it := p.popItemKey() v.item.pos = it.pos v.item.line = it.line ctx[key] = v } } else { // FIXME(dlc), make sure to error if redefining same key? ctx[key] = val } } }