package jsonparser const absMinInt64 = 1 << 63 const maxInt64 = 1<<63 - 1 const maxUint64 = 1<<64 - 1 // About 2x faster then strconv.ParseInt because it only supports base 10, which is enough for JSON func parseInt(bytes []byte) (v int64, ok bool, overflow bool) { if len(bytes) == 0 { return 0, false, false } var neg bool = false if bytes[0] == '-' { neg = true bytes = bytes[1:] } var n uint64 = 0 for _, c := range bytes { if c < '0' || c > '9' { return 0, false, false } if n > maxUint64/10 { return 0, false, true } n *= 10 n1 := n + uint64(c-'0') if n1 < n { return 0, false, true } n = n1 } if n > maxInt64 { if neg && n == absMinInt64 { return -absMinInt64, true, false } return 0, false, true } if neg { return -int64(n), true, false } else { return int64(n), true, false } }
// +build !appengine,!appenginevm package jsonparser import ( "reflect" "strconv" "unsafe" "runtime" ) // // The reason for using *[]byte rather than []byte in parameters is an optimization. As of Go 1.6, // the compiler cannot perfectly inline the function when using a non-pointer slice. That is, // the non-pointer []byte parameter version is slower than if its function body is manually // inlined, whereas the pointer []byte version is equally fast to the manually inlined // version. Instruction count in assembly taken from "go tool compile" confirms this difference. // // TODO: Remove hack after Go 1.7 release // func equalStr(b *[]byte, s string) bool { return *(*string)(unsafe.Pointer(b)) == s } func parseFloat(b *[]byte) (float64, error) { return strconv.ParseFloat(*(*string)(unsafe.Pointer(b)), 64) } // A hack until issue golang/go#2632 is fixed. // See: https://github.com/golang/go/issues/2632 func bytesToString(b *[]byte) string { return *(*string)(unsafe.Pointer(b)) } func StringToBytes(s string) []byte { b := make([]byte, 0, 0) bh := (*reflect.SliceHeader)(unsafe.Pointer(&b)) sh := (*reflect.StringHeader)(unsafe.Pointer(&s)) bh.Data = sh.Data bh.Cap = sh.Len bh.Len = sh.Len runtime.KeepAlive(s) return b }
package jsonparser import ( "bytes" "unicode/utf8" ) // JSON Unicode stuff: see https://tools.ietf.org/html/rfc7159#section-7 const supplementalPlanesOffset = 0x10000 const highSurrogateOffset = 0xD800 const lowSurrogateOffset = 0xDC00 const basicMultilingualPlaneReservedOffset = 0xDFFF const basicMultilingualPlaneOffset = 0xFFFF func combineUTF16Surrogates(high, low rune) rune { return supplementalPlanesOffset + (high-highSurrogateOffset)<<10 + (low - lowSurrogateOffset) } const badHex = -1 func h2I(c byte) int { switch { case c >= '0' && c <= '9': return int(c - '0') case c >= 'A' && c <= 'F': return int(c - 'A' + 10) case c >= 'a' && c <= 'f': return int(c - 'a' + 10) } return badHex } // decodeSingleUnicodeEscape decodes a single \uXXXX escape sequence. The prefix \u is assumed to be present and // is not checked. // In JSON, these escapes can either come alone or as part of "UTF16 surrogate pairs" that must be handled together. // This function only handles one; decodeUnicodeEscape handles this more complex case. func decodeSingleUnicodeEscape(in []byte) (rune, bool) { // We need at least 6 characters total if len(in) < 6 { return utf8.RuneError, false } // Convert hex to decimal h1, h2, h3, h4 := h2I(in[2]), h2I(in[3]), h2I(in[4]), h2I(in[5]) if h1 == badHex || h2 == badHex || h3 == badHex || h4 == badHex { return utf8.RuneError, false } // Compose the hex digits return rune(h1<<12 + h2<<8 + h3<<4 + h4), true } // isUTF16EncodedRune checks if a rune is in the range for non-BMP characters, // which is used to describe UTF16 chars. // Source: https://en.wikipedia.org/wiki/Plane_(Unicode)#Basic_Multilingual_Plane func isUTF16EncodedRune(r rune) bool { return highSurrogateOffset <= r && r <= basicMultilingualPlaneReservedOffset } func decodeUnicodeEscape(in []byte) (rune, int) { if r, ok := decodeSingleUnicodeEscape(in); !ok { // Invalid Unicode escape return utf8.RuneError, -1 } else if r <= basicMultilingualPlaneOffset && !isUTF16EncodedRune(r) { // Valid Unicode escape in Basic Multilingual Plane return r, 6 } else if r2, ok := decodeSingleUnicodeEscape(in[6:]); !ok { // Note: previous decodeSingleUnicodeEscape success guarantees at least 6 bytes remain // UTF16 "high surrogate" without manditory valid following Unicode escape for the "low surrogate" return utf8.RuneError, -1 } else if r2 < lowSurrogateOffset { // Invalid UTF16 "low surrogate" return utf8.RuneError, -1 } else { // Valid UTF16 surrogate pair return combineUTF16Surrogates(r, r2), 12 } } // backslashCharEscapeTable: when '\X' is found for some byte X, it is to be replaced with backslashCharEscapeTable[X] var backslashCharEscapeTable = [...]byte{ '"': '"', '\\': '\\', '/': '/', 'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t', } // unescapeToUTF8 unescapes the single escape sequence starting at 'in' into 'out' and returns // how many characters were consumed from 'in' and emitted into 'out'. // If a valid escape sequence does not appear as a prefix of 'in', (-1, -1) to signal the error. func unescapeToUTF8(in, out []byte) (inLen int, outLen int) { if len(in) < 2 || in[0] != '\\' { // Invalid escape due to insufficient characters for any escape or no initial backslash return -1, -1 } // https://tools.ietf.org/html/rfc7159#section-7 switch e := in[1]; e { case '"', '\\', '/', 'b', 'f', 'n', 'r', 't': // Valid basic 2-character escapes (use lookup table) out[0] = backslashCharEscapeTable[e] return 2, 1 case 'u': // Unicode escape if r, inLen := decodeUnicodeEscape(in); inLen == -1 { // Invalid Unicode escape return -1, -1 } else { // Valid Unicode escape; re-encode as UTF8 outLen := utf8.EncodeRune(out, r) return inLen, outLen } } return -1, -1 } // unescape unescapes the string contained in 'in' and returns it as a slice. // If 'in' contains no escaped characters: // Returns 'in'. // Else, if 'out' is of sufficient capacity (guaranteed if cap(out) >= len(in)): // 'out' is used to build the unescaped string and is returned with no extra allocation // Else: // A new slice is allocated and returned. func Unescape(in, out []byte) ([]byte, error) { firstBackslash := bytes.IndexByte(in, '\\') if firstBackslash == -1 { return in, nil } // Get a buffer of sufficient size (allocate if needed) if cap(out) < len(in) { out = make([]byte, len(in)) } else { out = out[0:len(in)] } // Copy the first sequence of unescaped bytes to the output and obtain a buffer pointer (subslice) copy(out, in[:firstBackslash]) in = in[firstBackslash:] buf := out[firstBackslash:] for len(in) > 0 { // Unescape the next escaped character inLen, bufLen := unescapeToUTF8(in, buf) if inLen == -1 { return nil, MalformedStringEscapeError } in = in[inLen:] buf = buf[bufLen:] // Copy everything up until the next backslash nextBackslash := bytes.IndexByte(in, '\\') if nextBackslash == -1 { copy(buf, in) buf = buf[len(in):] break } else { copy(buf, in[:nextBackslash]) buf = buf[nextBackslash:] in = in[nextBackslash:] } } // Trim the out buffer to the amount that was actually emitted return out[:len(out)-len(buf)], nil }
package jsonparser func FuzzParseString(data []byte) int { r, err := ParseString(data) if err != nil || r == "" { return 0 } return 1 } func FuzzEachKey(data []byte) int { paths := [][]string{ {"name"}, {"order"}, {"nested", "a"}, {"nested", "b"}, {"nested2", "a"}, {"nested", "nested3", "b"}, {"arr", "[1]", "b"}, {"arrInt", "[3]"}, {"arrInt", "[5]"}, {"nested"}, {"arr", "["}, {"a\n", "b\n"}, } EachKey(data, func(idx int, value []byte, vt ValueType, err error) {}, paths...) return 1 } func FuzzDelete(data []byte) int { Delete(data, "test") return 1 } func FuzzSet(data []byte) int { _, err := Set(data, []byte(`"new value"`), "test") if err != nil { return 0 } return 1 } func FuzzObjectEach(data []byte) int { _ = ObjectEach(data, func(key, value []byte, valueType ValueType, off int) error { return nil }) return 1 } func FuzzParseFloat(data []byte) int { _, err := ParseFloat(data) if err != nil { return 0 } return 1 } func FuzzParseInt(data []byte) int { _, err := ParseInt(data) if err != nil { return 0 } return 1 } func FuzzParseBool(data []byte) int { _, err := ParseBoolean(data) if err != nil { return 0 } return 1 } func FuzzTokenStart(data []byte) int { _ = tokenStart(data) return 1 } func FuzzGetString(data []byte) int { _, err := GetString(data, "test") if err != nil { return 0 } return 1 } func FuzzGetFloat(data []byte) int { _, err := GetFloat(data, "test") if err != nil { return 0 } return 1 } func FuzzGetInt(data []byte) int { _, err := GetInt(data, "test") if err != nil { return 0 } return 1 } func FuzzGetBoolean(data []byte) int { _, err := GetBoolean(data, "test") if err != nil { return 0 } return 1 } func FuzzGetUnsafeString(data []byte) int { _, err := GetUnsafeString(data, "test") if err != nil { return 0 } return 1 }
package jsonparser import ( "bytes" "errors" "fmt" "strconv" ) // Errors var ( KeyPathNotFoundError = errors.New("Key path not found") UnknownValueTypeError = errors.New("Unknown value type") MalformedJsonError = errors.New("Malformed JSON error") MalformedStringError = errors.New("Value is string, but can't find closing '\"' symbol") MalformedArrayError = errors.New("Value is array, but can't find closing ']' symbol") MalformedObjectError = errors.New("Value looks like object, but can't find closing '}' symbol") MalformedValueError = errors.New("Value looks like Number/Boolean/None, but can't find its end: ',' or '}' symbol") OverflowIntegerError = errors.New("Value is number, but overflowed while parsing") MalformedStringEscapeError = errors.New("Encountered an invalid escape sequence in a string") NullValueError = errors.New("Value is null") ) // How much stack space to allocate for unescaping JSON strings; if a string longer // than this needs to be escaped, it will result in a heap allocation const unescapeStackBufSize = 64 func tokenEnd(data []byte) int { for i, c := range data { switch c { case ' ', '\n', '\r', '\t', ',', '}', ']': return i } } return len(data) } func findTokenStart(data []byte, token byte) int { for i := len(data) - 1; i >= 0; i-- { switch data[i] { case token: return i case '[', '{': return 0 } } return 0 } func findKeyStart(data []byte, key string) (int, error) { i := nextToken(data) if i == -1 { return i, KeyPathNotFoundError } ln := len(data) if ln > 0 && (data[i] == '{' || data[i] == '[') { i += 1 } var stackbuf [unescapeStackBufSize]byte // stack-allocated array for allocation-free unescaping of small strings if ku, err := Unescape(StringToBytes(key), stackbuf[:]); err == nil { key = bytesToString(&ku) } for i < ln { switch data[i] { case '"': i++ keyBegin := i strEnd, keyEscaped := stringEnd(data[i:]) if strEnd == -1 { break } i += strEnd keyEnd := i - 1 valueOffset := nextToken(data[i:]) if valueOffset == -1 { break } i += valueOffset // if string is a key, and key level match k := data[keyBegin:keyEnd] // for unescape: if there are no escape sequences, this is cheap; if there are, it is a // bit more expensive, but causes no allocations unless len(key) > unescapeStackBufSize if keyEscaped { if ku, err := Unescape(k, stackbuf[:]); err != nil { break } else { k = ku } } if data[i] == ':' && len(key) == len(k) && bytesToString(&k) == key { return keyBegin - 1, nil } case '[': end := blockEnd(data[i:], data[i], ']') if end != -1 { i = i + end } case '{': end := blockEnd(data[i:], data[i], '}') if end != -1 { i = i + end } } i++ } return -1, KeyPathNotFoundError } func tokenStart(data []byte) int { for i := len(data) - 1; i >= 0; i-- { switch data[i] { case '\n', '\r', '\t', ',', '{', '[': return i } } return 0 } // Find position of next character which is not whitespace func nextToken(data []byte) int { for i, c := range data { switch c { case ' ', '\n', '\r', '\t': continue default: return i } } return -1 } // Find position of last character which is not whitespace func lastToken(data []byte) int { for i := len(data) - 1; i >= 0; i-- { switch data[i] { case ' ', '\n', '\r', '\t': continue default: return i } } return -1 } // Tries to find the end of string // Support if string contains escaped quote symbols. func stringEnd(data []byte) (int, bool) { escaped := false for i, c := range data { if c == '"' { if !escaped { return i + 1, false } else { j := i - 1 for { if j < 0 || data[j] != '\\' { return i + 1, true // even number of backslashes } j-- if j < 0 || data[j] != '\\' { break // odd number of backslashes } j-- } } } else if c == '\\' { escaped = true } } return -1, escaped } // Find end of the data structure, array or object. // For array openSym and closeSym will be '[' and ']', for object '{' and '}' func blockEnd(data []byte, openSym byte, closeSym byte) int { level := 0 i := 0 ln := len(data) for i < ln { switch data[i] { case '"': // If inside string, skip it se, _ := stringEnd(data[i+1:]) if se == -1 { return -1 } i += se case openSym: // If open symbol, increase level level++ case closeSym: // If close symbol, increase level level-- // If we have returned to the original level, we're done if level == 0 { return i + 1 } } i++ } return -1 } func searchKeys(data []byte, keys ...string) int { keyLevel := 0 level := 0 i := 0 ln := len(data) lk := len(keys) lastMatched := true if lk == 0 { return 0 } var stackbuf [unescapeStackBufSize]byte // stack-allocated array for allocation-free unescaping of small strings for i < ln { switch data[i] { case '"': i++ keyBegin := i strEnd, keyEscaped := stringEnd(data[i:]) if strEnd == -1 { return -1 } i += strEnd keyEnd := i - 1 valueOffset := nextToken(data[i:]) if valueOffset == -1 { return -1 } i += valueOffset // if string is a key if data[i] == ':' { if level < 1 { return -1 } key := data[keyBegin:keyEnd] // for unescape: if there are no escape sequences, this is cheap; if there are, it is a // bit more expensive, but causes no allocations unless len(key) > unescapeStackBufSize var keyUnesc []byte if !keyEscaped { keyUnesc = key } else if ku, err := Unescape(key, stackbuf[:]); err != nil { return -1 } else { keyUnesc = ku } if level <= len(keys) { if equalStr(&keyUnesc, keys[level-1]) { lastMatched = true // if key level match if keyLevel == level-1 { keyLevel++ // If we found all keys in path if keyLevel == lk { return i + 1 } } } else { lastMatched = false } } else { return -1 } } else { i-- } case '{': // in case parent key is matched then only we will increase the level otherwise can directly // can move to the end of this block if !lastMatched { end := blockEnd(data[i:], '{', '}') if end == -1 { return -1 } i += end - 1 } else { level++ } case '}': level-- if level == keyLevel { keyLevel-- } case '[': // If we want to get array element by index if keyLevel == level && keys[level][0] == '[' { keyLen := len(keys[level]) if keyLen < 3 || keys[level][0] != '[' || keys[level][keyLen-1] != ']' { return -1 } aIdx, err := strconv.Atoi(keys[level][1 : keyLen-1]) if err != nil { return -1 } var curIdx int var valueFound []byte var valueOffset int curI := i ArrayEach(data[i:], func(value []byte, dataType ValueType, offset int, err error) { if curIdx == aIdx { valueFound = value valueOffset = offset if dataType == String { valueOffset = valueOffset - 2 valueFound = data[curI+valueOffset : curI+valueOffset+len(value)+2] } } curIdx += 1 }) if valueFound == nil { return -1 } else { subIndex := searchKeys(valueFound, keys[level+1:]...) if subIndex < 0 { return -1 } return i + valueOffset + subIndex } } else { // Do not search for keys inside arrays if arraySkip := blockEnd(data[i:], '[', ']'); arraySkip == -1 { return -1 } else { i += arraySkip - 1 } } case ':': // If encountered, JSON data is malformed return -1 } i++ } return -1 } func sameTree(p1, p2 []string) bool { minLen := len(p1) if len(p2) < minLen { minLen = len(p2) } for pi_1, p_1 := range p1[:minLen] { if p2[pi_1] != p_1 { return false } } return true } const stackArraySize = 128 func EachKey(data []byte, cb func(int, []byte, ValueType, error), paths ...[]string) int { var x struct{} var level, pathsMatched, i int ln := len(data) pathFlags := make([]bool, stackArraySize)[:] if len(paths) > cap(pathFlags) { pathFlags = make([]bool, len(paths))[:] } pathFlags = pathFlags[0:len(paths)] var maxPath int for _, p := range paths { if len(p) > maxPath { maxPath = len(p) } } pathsBuf := make([]string, stackArraySize)[:] if maxPath > cap(pathsBuf) { pathsBuf = make([]string, maxPath)[:] } pathsBuf = pathsBuf[0:maxPath] for i < ln { switch data[i] { case '"': i++ keyBegin := i strEnd, keyEscaped := stringEnd(data[i:]) if strEnd == -1 { return -1 } i += strEnd keyEnd := i - 1 valueOffset := nextToken(data[i:]) if valueOffset == -1 { return -1 } i += valueOffset // if string is a key, and key level match if data[i] == ':' { match := -1 key := data[keyBegin:keyEnd] // for unescape: if there are no escape sequences, this is cheap; if there are, it is a // bit more expensive, but causes no allocations unless len(key) > unescapeStackBufSize var keyUnesc []byte if !keyEscaped { keyUnesc = key } else { var stackbuf [unescapeStackBufSize]byte if ku, err := Unescape(key, stackbuf[:]); err != nil { return -1 } else { keyUnesc = ku } } if maxPath >= level { if level < 1 { cb(-1, nil, Unknown, MalformedJsonError) return -1 } pathsBuf[level-1] = bytesToString(&keyUnesc) for pi, p := range paths { if len(p) != level || pathFlags[pi] || !equalStr(&keyUnesc, p[level-1]) || !sameTree(p, pathsBuf[:level]) { continue } match = pi pathsMatched++ pathFlags[pi] = true v, dt, _, e := Get(data[i+1:]) cb(pi, v, dt, e) if pathsMatched == len(paths) { break } } if pathsMatched == len(paths) { return i } } if match == -1 { tokenOffset := nextToken(data[i+1:]) i += tokenOffset if data[i] == '{' { blockSkip := blockEnd(data[i:], '{', '}') i += blockSkip + 1 } } if i < ln { switch data[i] { case '{', '}', '[', '"': i-- } } } else { i-- } case '{': level++ case '}': level-- case '[': var ok bool arrIdxFlags := make(map[int]struct{}) pIdxFlags := make([]bool, stackArraySize)[:] if len(paths) > cap(pIdxFlags) { pIdxFlags = make([]bool, len(paths))[:] } pIdxFlags = pIdxFlags[0:len(paths)] if level < 0 { cb(-1, nil, Unknown, MalformedJsonError) return -1 } for pi, p := range paths { if len(p) < level+1 || pathFlags[pi] || p[level][0] != '[' || !sameTree(p, pathsBuf[:level]) { continue } if len(p[level]) >= 2 { aIdx, _ := strconv.Atoi(p[level][1 : len(p[level])-1]) arrIdxFlags[aIdx] = x pIdxFlags[pi] = true } } if len(arrIdxFlags) > 0 { level++ var curIdx int arrOff, _ := ArrayEach(data[i:], func(value []byte, dataType ValueType, offset int, err error) { if _, ok = arrIdxFlags[curIdx]; ok { for pi, p := range paths { if pIdxFlags[pi] { aIdx, _ := strconv.Atoi(p[level-1][1 : len(p[level-1])-1]) if curIdx == aIdx { of := searchKeys(value, p[level:]...) pathsMatched++ pathFlags[pi] = true if of != -1 { v, dt, _, e := Get(value[of:]) cb(pi, v, dt, e) } } } } } curIdx += 1 }) if pathsMatched == len(paths) { return i } i += arrOff - 1 } else { // Do not search for keys inside arrays if arraySkip := blockEnd(data[i:], '[', ']'); arraySkip == -1 { return -1 } else { i += arraySkip - 1 } } case ']': level-- } i++ } return -1 } // Data types available in valid JSON data. type ValueType int const ( NotExist = ValueType(iota) String Number Object Array Boolean Null Unknown ) func (vt ValueType) String() string { switch vt { case NotExist: return "non-existent" case String: return "string" case Number: return "number" case Object: return "object" case Array: return "array" case Boolean: return "boolean" case Null: return "null" default: return "unknown" } } var ( trueLiteral = []byte("true") falseLiteral = []byte("false") nullLiteral = []byte("null") ) func createInsertComponent(keys []string, setValue []byte, comma, object bool) []byte { isIndex := string(keys[0][0]) == "[" offset := 0 lk := calcAllocateSpace(keys, setValue, comma, object) buffer := make([]byte, lk, lk) if comma { offset += WriteToBuffer(buffer[offset:], ",") } if isIndex && !comma { offset += WriteToBuffer(buffer[offset:], "[") } else { if object { offset += WriteToBuffer(buffer[offset:], "{") } if !isIndex { offset += WriteToBuffer(buffer[offset:], "\"") offset += WriteToBuffer(buffer[offset:], keys[0]) offset += WriteToBuffer(buffer[offset:], "\":") } } for i := 1; i < len(keys); i++ { if string(keys[i][0]) == "[" { offset += WriteToBuffer(buffer[offset:], "[") } else { offset += WriteToBuffer(buffer[offset:], "{\"") offset += WriteToBuffer(buffer[offset:], keys[i]) offset += WriteToBuffer(buffer[offset:], "\":") } } offset += WriteToBuffer(buffer[offset:], string(setValue)) for i := len(keys) - 1; i > 0; i-- { if string(keys[i][0]) == "[" { offset += WriteToBuffer(buffer[offset:], "]") } else { offset += WriteToBuffer(buffer[offset:], "}") } } if isIndex && !comma { offset += WriteToBuffer(buffer[offset:], "]") } if object && !isIndex { offset += WriteToBuffer(buffer[offset:], "}") } return buffer } func calcAllocateSpace(keys []string, setValue []byte, comma, object bool) int { isIndex := string(keys[0][0]) == "[" lk := 0 if comma { // , lk += 1 } if isIndex && !comma { // [] lk += 2 } else { if object { // { lk += 1 } if !isIndex { // "keys[0]" lk += len(keys[0]) + 3 } } lk += len(setValue) for i := 1; i < len(keys); i++ { if string(keys[i][0]) == "[" { // [] lk += 2 } else { // {"keys[i]":setValue} lk += len(keys[i]) + 5 } } if object && !isIndex { // } lk += 1 } return lk } func WriteToBuffer(buffer []byte, str string) int { copy(buffer, str) return len(str) } /* Del - Receives existing data structure, path to delete. Returns: `data` - return modified data */ func Delete(data []byte, keys ...string) []byte { lk := len(keys) if lk == 0 { return data[:0] } array := false if len(keys[lk-1]) > 0 && string(keys[lk-1][0]) == "[" { array = true } var startOffset, keyOffset int endOffset := len(data) var err error if !array { if len(keys) > 1 { _, _, startOffset, endOffset, err = internalGet(data, keys[:lk-1]...) if err == KeyPathNotFoundError { // problem parsing the data return data } } keyOffset, err = findKeyStart(data[startOffset:endOffset], keys[lk-1]) if err == KeyPathNotFoundError { // problem parsing the data return data } keyOffset += startOffset _, _, _, subEndOffset, _ := internalGet(data[startOffset:endOffset], keys[lk-1]) endOffset = startOffset + subEndOffset tokEnd := tokenEnd(data[endOffset:]) tokStart := findTokenStart(data[:keyOffset], ","[0]) if data[endOffset+tokEnd] == ","[0] { endOffset += tokEnd + 1 } else if data[endOffset+tokEnd] == " "[0] && len(data) > endOffset+tokEnd+1 && data[endOffset+tokEnd+1] == ","[0] { endOffset += tokEnd + 2 } else if data[endOffset+tokEnd] == "}"[0] && data[tokStart] == ","[0] { keyOffset = tokStart } } else { _, _, keyOffset, endOffset, err = internalGet(data, keys...) if err == KeyPathNotFoundError { // problem parsing the data return data } tokEnd := tokenEnd(data[endOffset:]) tokStart := findTokenStart(data[:keyOffset], ","[0]) if data[endOffset+tokEnd] == ","[0] { endOffset += tokEnd + 1 } else if data[endOffset+tokEnd] == "]"[0] && data[tokStart] == ","[0] { keyOffset = tokStart } } // We need to remove remaining trailing comma if we delete las element in the object prevTok := lastToken(data[:keyOffset]) remainedValue := data[endOffset:] var newOffset int if nextToken(remainedValue) > -1 && remainedValue[nextToken(remainedValue)] == '}' && data[prevTok] == ',' { newOffset = prevTok } else { newOffset = prevTok + 1 } // We have to make a copy here if we don't want to mangle the original data, because byte slices are // accessed by reference and not by value dataCopy := make([]byte, len(data)) copy(dataCopy, data) data = append(dataCopy[:newOffset], dataCopy[endOffset:]...) return data } /* Set - Receives existing data structure, path to set, and data to set at that key. Returns: `value` - modified byte array `err` - On any parsing error */ func Set(data []byte, setValue []byte, keys ...string) (value []byte, err error) { // ensure keys are set if len(keys) == 0 { return nil, KeyPathNotFoundError } _, _, startOffset, endOffset, err := internalGet(data, keys...) if err != nil { if err != KeyPathNotFoundError { // problem parsing the data return nil, err } // full path doesnt exist // does any subpath exist? var depth int for i := range keys { _, _, start, end, sErr := internalGet(data, keys[:i+1]...) if sErr != nil { break } else { endOffset = end startOffset = start depth++ } } comma := true object := false if endOffset == -1 { firstToken := nextToken(data) // We can't set a top-level key if data isn't an object if firstToken < 0 || data[firstToken] != '{' { return nil, KeyPathNotFoundError } // Don't need a comma if the input is an empty object secondToken := firstToken + 1 + nextToken(data[firstToken+1:]) if data[secondToken] == '}' { comma = false } // Set the top level key at the end (accounting for any trailing whitespace) // This assumes last token is valid like '}', could check and return error endOffset = lastToken(data) } depthOffset := endOffset if depth != 0 { // if subpath is a non-empty object, add to it // or if subpath is a non-empty array, add to it if (data[startOffset] == '{' && data[startOffset+1+nextToken(data[startOffset+1:])] != '}') || (data[startOffset] == '[' && data[startOffset+1+nextToken(data[startOffset+1:])] == '{') && keys[depth:][0][0] == 91 { depthOffset-- startOffset = depthOffset // otherwise, over-write it with a new object } else { comma = false object = true } } else { startOffset = depthOffset } value = append(data[:startOffset], append(createInsertComponent(keys[depth:], setValue, comma, object), data[depthOffset:]...)...) } else { // path currently exists startComponent := data[:startOffset] endComponent := data[endOffset:] value = make([]byte, len(startComponent)+len(endComponent)+len(setValue)) newEndOffset := startOffset + len(setValue) copy(value[0:startOffset], startComponent) copy(value[startOffset:newEndOffset], setValue) copy(value[newEndOffset:], endComponent) } return value, nil } func getType(data []byte, offset int) ([]byte, ValueType, int, error) { var dataType ValueType endOffset := offset // if string value if data[offset] == '"' { dataType = String if idx, _ := stringEnd(data[offset+1:]); idx != -1 { endOffset += idx + 1 } else { return nil, dataType, offset, MalformedStringError } } else if data[offset] == '[' { // if array value dataType = Array // break label, for stopping nested loops endOffset = blockEnd(data[offset:], '[', ']') if endOffset == -1 { return nil, dataType, offset, MalformedArrayError } endOffset += offset } else if data[offset] == '{' { // if object value dataType = Object // break label, for stopping nested loops endOffset = blockEnd(data[offset:], '{', '}') if endOffset == -1 { return nil, dataType, offset, MalformedObjectError } endOffset += offset } else { // Number, Boolean or None end := tokenEnd(data[endOffset:]) if end == -1 { return nil, dataType, offset, MalformedValueError } value := data[offset : endOffset+end] switch data[offset] { case 't', 'f': // true or false if bytes.Equal(value, trueLiteral) || bytes.Equal(value, falseLiteral) { dataType = Boolean } else { return nil, Unknown, offset, UnknownValueTypeError } case 'u', 'n': // undefined or null if bytes.Equal(value, nullLiteral) { dataType = Null } else { return nil, Unknown, offset, UnknownValueTypeError } case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-': dataType = Number default: return nil, Unknown, offset, UnknownValueTypeError } endOffset += end } return data[offset:endOffset], dataType, endOffset, nil } /* Get - Receives data structure, and key path to extract value from. Returns: `value` - Pointer to original data structure containing key value, or just empty slice if nothing found or error `dataType` - Can be: `NotExist`, `String`, `Number`, `Object`, `Array`, `Boolean` or `Null` `offset` - Offset from provided data structure where key value ends. Used mostly internally, for example for `ArrayEach` helper. `err` - If key not found or any other parsing issue it should return error. If key not found it also sets `dataType` to `NotExist` Accept multiple keys to specify path to JSON value (in case of quering nested structures). If no keys provided it will try to extract closest JSON value (simple ones or object/array), useful for reading streams or arrays, see `ArrayEach` implementation. */ func Get(data []byte, keys ...string) (value []byte, dataType ValueType, offset int, err error) { a, b, _, d, e := internalGet(data, keys...) return a, b, d, e } func internalGet(data []byte, keys ...string) (value []byte, dataType ValueType, offset, endOffset int, err error) { if len(keys) > 0 { if offset = searchKeys(data, keys...); offset == -1 { return nil, NotExist, -1, -1, KeyPathNotFoundError } } // Go to closest value nO := nextToken(data[offset:]) if nO == -1 { return nil, NotExist, offset, -1, MalformedJsonError } offset += nO value, dataType, endOffset, err = getType(data, offset) if err != nil { return value, dataType, offset, endOffset, err } // Strip quotes from string values if dataType == String { value = value[1 : len(value)-1] } return value[:len(value):len(value)], dataType, offset, endOffset, nil } // ArrayEach is used when iterating arrays, accepts a callback function with the same return arguments as `Get`. func ArrayEach(data []byte, cb func(value []byte, dataType ValueType, offset int, err error), keys ...string) (offset int, err error) { if len(data) == 0 { return -1, MalformedObjectError } nT := nextToken(data) if nT == -1 { return -1, MalformedJsonError } offset = nT + 1 if len(keys) > 0 { if offset = searchKeys(data, keys...); offset == -1 { return offset, KeyPathNotFoundError } // Go to closest value nO := nextToken(data[offset:]) if nO == -1 { return offset, MalformedJsonError } offset += nO if data[offset] != '[' { return offset, MalformedArrayError } offset++ } nO := nextToken(data[offset:]) if nO == -1 { return offset, MalformedJsonError } offset += nO if data[offset] == ']' { return offset, nil } for true { v, t, o, e := Get(data[offset:]) if e != nil { return offset, e } if o == 0 { break } if t != NotExist { cb(v, t, offset+o-len(v), e) } if e != nil { break } offset += o skipToToken := nextToken(data[offset:]) if skipToToken == -1 { return offset, MalformedArrayError } offset += skipToToken if data[offset] == ']' { break } if data[offset] != ',' { return offset, MalformedArrayError } offset++ } return offset, nil } // ObjectEach iterates over the key-value pairs of a JSON object, invoking a given callback for each such entry func ObjectEach(data []byte, callback func(key []byte, value []byte, dataType ValueType, offset int) error, keys ...string) (err error) { offset := 0 // Descend to the desired key, if requested if len(keys) > 0 { if off := searchKeys(data, keys...); off == -1 { return KeyPathNotFoundError } else { offset = off } } // Validate and skip past opening brace if off := nextToken(data[offset:]); off == -1 { return MalformedObjectError } else if offset += off; data[offset] != '{' { return MalformedObjectError } else { offset++ } // Skip to the first token inside the object, or stop if we find the ending brace if off := nextToken(data[offset:]); off == -1 { return MalformedJsonError } else if offset += off; data[offset] == '}' { return nil } // Loop pre-condition: data[offset] points to what should be either the next entry's key, or the closing brace (if it's anything else, the JSON is malformed) for offset < len(data) { // Step 1: find the next key var key []byte // Check what the the next token is: start of string, end of object, or something else (error) switch data[offset] { case '"': offset++ // accept as string and skip opening quote case '}': return nil // we found the end of the object; stop and return success default: return MalformedObjectError } // Find the end of the key string var keyEscaped bool if off, esc := stringEnd(data[offset:]); off == -1 { return MalformedJsonError } else { key, keyEscaped = data[offset:offset+off-1], esc offset += off } // Unescape the string if needed if keyEscaped { var stackbuf [unescapeStackBufSize]byte // stack-allocated array for allocation-free unescaping of small strings if keyUnescaped, err := Unescape(key, stackbuf[:]); err != nil { return MalformedStringEscapeError } else { key = keyUnescaped } } // Step 2: skip the colon if off := nextToken(data[offset:]); off == -1 { return MalformedJsonError } else if offset += off; data[offset] != ':' { return MalformedJsonError } else { offset++ } // Step 3: find the associated value, then invoke the callback if value, valueType, off, err := Get(data[offset:]); err != nil { return err } else if err := callback(key, value, valueType, offset+off); err != nil { // Invoke the callback here! return err } else { offset += off } // Step 4: skip over the next comma to the following token, or stop if we hit the ending brace if off := nextToken(data[offset:]); off == -1 { return MalformedArrayError } else { offset += off switch data[offset] { case '}': return nil // Stop if we hit the close brace case ',': offset++ // Ignore the comma default: return MalformedObjectError } } // Skip to the next token after the comma if off := nextToken(data[offset:]); off == -1 { return MalformedArrayError } else { offset += off } } return MalformedObjectError // we shouldn't get here; it's expected that we will return via finding the ending brace } // GetUnsafeString returns the value retrieved by `Get`, use creates string without memory allocation by mapping string to slice memory. It does not handle escape symbols. func GetUnsafeString(data []byte, keys ...string) (val string, err error) { v, _, _, e := Get(data, keys...) if e != nil { return "", e } return bytesToString(&v), nil } // GetString returns the value retrieved by `Get`, cast to a string if possible, trying to properly handle escape and utf8 symbols // If key data type do not match, it will return an error. func GetString(data []byte, keys ...string) (val string, err error) { v, t, _, e := Get(data, keys...) if e != nil { return "", e } if t != String { if t == Null { return "", NullValueError } return "", fmt.Errorf("Value is not a string: %s", string(v)) } // If no escapes return raw content if bytes.IndexByte(v, '\\') == -1 { return string(v), nil } return ParseString(v) } // GetFloat returns the value retrieved by `Get`, cast to a float64 if possible. // The offset is the same as in `Get`. // If key data type do not match, it will return an error. func GetFloat(data []byte, keys ...string) (val float64, err error) { v, t, _, e := Get(data, keys...) if e != nil { return 0, e } if t != Number { if t == Null { return 0, NullValueError } return 0, fmt.Errorf("Value is not a number: %s", string(v)) } return ParseFloat(v) } // GetInt returns the value retrieved by `Get`, cast to a int64 if possible. // If key data type do not match, it will return an error. func GetInt(data []byte, keys ...string) (val int64, err error) { v, t, _, e := Get(data, keys...) if e != nil { return 0, e } if t != Number { if t == Null { return 0, NullValueError } return 0, fmt.Errorf("Value is not a number: %s", string(v)) } return ParseInt(v) } // GetBoolean returns the value retrieved by `Get`, cast to a bool if possible. // The offset is the same as in `Get`. // If key data type do not match, it will return error. func GetBoolean(data []byte, keys ...string) (val bool, err error) { v, t, _, e := Get(data, keys...) if e != nil { return false, e } if t != Boolean { if t == Null { return false, NullValueError } return false, fmt.Errorf("Value is not a boolean: %s", string(v)) } return ParseBoolean(v) } // ParseBoolean parses a Boolean ValueType into a Go bool (not particularly useful, but here for completeness) func ParseBoolean(b []byte) (bool, error) { switch { case bytes.Equal(b, trueLiteral): return true, nil case bytes.Equal(b, falseLiteral): return false, nil default: return false, MalformedValueError } } // ParseString parses a String ValueType into a Go string (the main parsing work is unescaping the JSON string) func ParseString(b []byte) (string, error) { var stackbuf [unescapeStackBufSize]byte // stack-allocated array for allocation-free unescaping of small strings if bU, err := Unescape(b, stackbuf[:]); err != nil { return "", MalformedValueError } else { return string(bU), nil } } // ParseNumber parses a Number ValueType into a Go float64 func ParseFloat(b []byte) (float64, error) { if v, err := parseFloat(&b); err != nil { return 0, MalformedValueError } else { return v, nil } } // ParseInt parses a Number ValueType into a Go int64 func ParseInt(b []byte) (int64, error) { if v, ok, overflow := parseInt(b); !ok { if overflow { return 0, OverflowIntegerError } return 0, MalformedValueError } else { return v, nil } }