12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244 |
- // Package jlexer contains a JSON lexer implementation.
- //
- // It is expected that it is mostly used with generated parser code, so the interface is tuned
- // for a parser that knows what kind of data is expected.
- package jlexer
- import (
- "bytes"
- "encoding/base64"
- "encoding/json"
- "errors"
- "fmt"
- "io"
- "strconv"
- "unicode"
- "unicode/utf16"
- "unicode/utf8"
- "github.com/josharian/intern"
- )
- // tokenKind determines type of a token.
- type tokenKind byte
- const (
- tokenUndef tokenKind = iota // No token.
- tokenDelim // Delimiter: one of '{', '}', '[' or ']'.
- tokenString // A string literal, e.g. "abc\u1234"
- tokenNumber // Number literal, e.g. 1.5e5
- tokenBool // Boolean literal: true or false.
- tokenNull // null keyword.
- )
- // token describes a single token: type, position in the input and value.
- type token struct {
- kind tokenKind // Type of a token.
- boolValue bool // Value if a boolean literal token.
- byteValueCloned bool // true if byteValue was allocated and does not refer to original json body
- byteValue []byte // Raw value of a token.
- delimValue byte
- }
- // Lexer is a JSON lexer: it iterates over JSON tokens in a byte slice.
- type Lexer struct {
- Data []byte // Input data given to the lexer.
- start int // Start of the current token.
- pos int // Current unscanned position in the input stream.
- token token // Last scanned token, if token.kind != tokenUndef.
- firstElement bool // Whether current element is the first in array or an object.
- wantSep byte // A comma or a colon character, which need to occur before a token.
- UseMultipleErrors bool // If we want to use multiple errors.
- fatalError error // Fatal error occurred during lexing. It is usually a syntax error.
- multipleErrors []*LexerError // Semantic errors occurred during lexing. Marshalling will be continued after finding this errors.
- }
- // FetchToken scans the input for the next token.
- func (r *Lexer) FetchToken() {
- r.token.kind = tokenUndef
- r.start = r.pos
- // Check if r.Data has r.pos element
- // If it doesn't, it mean corrupted input data
- if len(r.Data) < r.pos {
- r.errParse("Unexpected end of data")
- return
- }
- // Determine the type of a token by skipping whitespace and reading the
- // first character.
- for _, c := range r.Data[r.pos:] {
- switch c {
- case ':', ',':
- if r.wantSep == c {
- r.pos++
- r.start++
- r.wantSep = 0
- } else {
- r.errSyntax()
- }
- case ' ', '\t', '\r', '\n':
- r.pos++
- r.start++
- case '"':
- if r.wantSep != 0 {
- r.errSyntax()
- }
- r.token.kind = tokenString
- r.fetchString()
- return
- case '{', '[':
- if r.wantSep != 0 {
- r.errSyntax()
- }
- r.firstElement = true
- r.token.kind = tokenDelim
- r.token.delimValue = r.Data[r.pos]
- r.pos++
- return
- case '}', ']':
- if !r.firstElement && (r.wantSep != ',') {
- r.errSyntax()
- }
- r.wantSep = 0
- r.token.kind = tokenDelim
- r.token.delimValue = r.Data[r.pos]
- r.pos++
- return
- case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-':
- if r.wantSep != 0 {
- r.errSyntax()
- }
- r.token.kind = tokenNumber
- r.fetchNumber()
- return
- case 'n':
- if r.wantSep != 0 {
- r.errSyntax()
- }
- r.token.kind = tokenNull
- r.fetchNull()
- return
- case 't':
- if r.wantSep != 0 {
- r.errSyntax()
- }
- r.token.kind = tokenBool
- r.token.boolValue = true
- r.fetchTrue()
- return
- case 'f':
- if r.wantSep != 0 {
- r.errSyntax()
- }
- r.token.kind = tokenBool
- r.token.boolValue = false
- r.fetchFalse()
- return
- default:
- r.errSyntax()
- return
- }
- }
- r.fatalError = io.EOF
- return
- }
- // isTokenEnd returns true if the char can follow a non-delimiter token
- func isTokenEnd(c byte) bool {
- return c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '[' || c == ']' || c == '{' || c == '}' || c == ',' || c == ':'
- }
- // fetchNull fetches and checks remaining bytes of null keyword.
- func (r *Lexer) fetchNull() {
- r.pos += 4
- if r.pos > len(r.Data) ||
- r.Data[r.pos-3] != 'u' ||
- r.Data[r.pos-2] != 'l' ||
- r.Data[r.pos-1] != 'l' ||
- (r.pos != len(r.Data) && !isTokenEnd(r.Data[r.pos])) {
- r.pos -= 4
- r.errSyntax()
- }
- }
- // fetchTrue fetches and checks remaining bytes of true keyword.
- func (r *Lexer) fetchTrue() {
- r.pos += 4
- if r.pos > len(r.Data) ||
- r.Data[r.pos-3] != 'r' ||
- r.Data[r.pos-2] != 'u' ||
- r.Data[r.pos-1] != 'e' ||
- (r.pos != len(r.Data) && !isTokenEnd(r.Data[r.pos])) {
- r.pos -= 4
- r.errSyntax()
- }
- }
- // fetchFalse fetches and checks remaining bytes of false keyword.
- func (r *Lexer) fetchFalse() {
- r.pos += 5
- if r.pos > len(r.Data) ||
- r.Data[r.pos-4] != 'a' ||
- r.Data[r.pos-3] != 'l' ||
- r.Data[r.pos-2] != 's' ||
- r.Data[r.pos-1] != 'e' ||
- (r.pos != len(r.Data) && !isTokenEnd(r.Data[r.pos])) {
- r.pos -= 5
- r.errSyntax()
- }
- }
- // fetchNumber scans a number literal token.
- func (r *Lexer) fetchNumber() {
- hasE := false
- afterE := false
- hasDot := false
- r.pos++
- for i, c := range r.Data[r.pos:] {
- switch {
- case c >= '0' && c <= '9':
- afterE = false
- case c == '.' && !hasDot:
- hasDot = true
- case (c == 'e' || c == 'E') && !hasE:
- hasE = true
- hasDot = true
- afterE = true
- case (c == '+' || c == '-') && afterE:
- afterE = false
- default:
- r.pos += i
- if !isTokenEnd(c) {
- r.errSyntax()
- } else {
- r.token.byteValue = r.Data[r.start:r.pos]
- }
- return
- }
- }
- r.pos = len(r.Data)
- r.token.byteValue = r.Data[r.start:]
- }
- // findStringLen tries to scan into the string literal for ending quote char to determine required size.
- // The size will be exact if no escapes are present and may be inexact if there are escaped chars.
- func findStringLen(data []byte) (isValid bool, length int) {
- for {
- idx := bytes.IndexByte(data, '"')
- if idx == -1 {
- return false, len(data)
- }
- if idx == 0 || (idx > 0 && data[idx-1] != '\\') {
- return true, length + idx
- }
- // count \\\\\\\ sequences. even number of slashes means quote is not really escaped
- cnt := 1
- for idx-cnt-1 >= 0 && data[idx-cnt-1] == '\\' {
- cnt++
- }
- if cnt%2 == 0 {
- return true, length + idx
- }
- length += idx + 1
- data = data[idx+1:]
- }
- }
- // unescapeStringToken performs unescaping of string token.
- // if no escaping is needed, original string is returned, otherwise - a new one allocated
- func (r *Lexer) unescapeStringToken() (err error) {
- data := r.token.byteValue
- var unescapedData []byte
- for {
- i := bytes.IndexByte(data, '\\')
- if i == -1 {
- break
- }
- escapedRune, escapedBytes, err := decodeEscape(data[i:])
- if err != nil {
- r.errParse(err.Error())
- return err
- }
- if unescapedData == nil {
- unescapedData = make([]byte, 0, len(r.token.byteValue))
- }
- var d [4]byte
- s := utf8.EncodeRune(d[:], escapedRune)
- unescapedData = append(unescapedData, data[:i]...)
- unescapedData = append(unescapedData, d[:s]...)
- data = data[i+escapedBytes:]
- }
- if unescapedData != nil {
- r.token.byteValue = append(unescapedData, data...)
- r.token.byteValueCloned = true
- }
- return
- }
- // getu4 decodes \uXXXX from the beginning of s, returning the hex value,
- // or it returns -1.
- func getu4(s []byte) rune {
- if len(s) < 6 || s[0] != '\\' || s[1] != 'u' {
- return -1
- }
- var val rune
- for i := 2; i < len(s) && i < 6; i++ {
- var v byte
- c := s[i]
- switch c {
- case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
- v = c - '0'
- case 'a', 'b', 'c', 'd', 'e', 'f':
- v = c - 'a' + 10
- case 'A', 'B', 'C', 'D', 'E', 'F':
- v = c - 'A' + 10
- default:
- return -1
- }
- val <<= 4
- val |= rune(v)
- }
- return val
- }
- // decodeEscape processes a single escape sequence and returns number of bytes processed.
- func decodeEscape(data []byte) (decoded rune, bytesProcessed int, err error) {
- if len(data) < 2 {
- return 0, 0, errors.New("incorrect escape symbol \\ at the end of token")
- }
- c := data[1]
- switch c {
- case '"', '/', '\\':
- return rune(c), 2, nil
- case 'b':
- return '\b', 2, nil
- case 'f':
- return '\f', 2, nil
- case 'n':
- return '\n', 2, nil
- case 'r':
- return '\r', 2, nil
- case 't':
- return '\t', 2, nil
- case 'u':
- rr := getu4(data)
- if rr < 0 {
- return 0, 0, errors.New("incorrectly escaped \\uXXXX sequence")
- }
- read := 6
- if utf16.IsSurrogate(rr) {
- rr1 := getu4(data[read:])
- if dec := utf16.DecodeRune(rr, rr1); dec != unicode.ReplacementChar {
- read += 6
- rr = dec
- } else {
- rr = unicode.ReplacementChar
- }
- }
- return rr, read, nil
- }
- return 0, 0, errors.New("incorrectly escaped bytes")
- }
- // fetchString scans a string literal token.
- func (r *Lexer) fetchString() {
- r.pos++
- data := r.Data[r.pos:]
- isValid, length := findStringLen(data)
- if !isValid {
- r.pos += length
- r.errParse("unterminated string literal")
- return
- }
- r.token.byteValue = data[:length]
- r.pos += length + 1 // skip closing '"' as well
- }
- // scanToken scans the next token if no token is currently available in the lexer.
- func (r *Lexer) scanToken() {
- if r.token.kind != tokenUndef || r.fatalError != nil {
- return
- }
- r.FetchToken()
- }
- // consume resets the current token to allow scanning the next one.
- func (r *Lexer) consume() {
- r.token.kind = tokenUndef
- r.token.byteValueCloned = false
- r.token.delimValue = 0
- }
- // Ok returns true if no error (including io.EOF) was encountered during scanning.
- func (r *Lexer) Ok() bool {
- return r.fatalError == nil
- }
- const maxErrorContextLen = 13
- func (r *Lexer) errParse(what string) {
- if r.fatalError == nil {
- var str string
- if len(r.Data)-r.pos <= maxErrorContextLen {
- str = string(r.Data)
- } else {
- str = string(r.Data[r.pos:r.pos+maxErrorContextLen-3]) + "..."
- }
- r.fatalError = &LexerError{
- Reason: what,
- Offset: r.pos,
- Data: str,
- }
- }
- }
- func (r *Lexer) errSyntax() {
- r.errParse("syntax error")
- }
- func (r *Lexer) errInvalidToken(expected string) {
- if r.fatalError != nil {
- return
- }
- if r.UseMultipleErrors {
- r.pos = r.start
- r.consume()
- r.SkipRecursive()
- switch expected {
- case "[":
- r.token.delimValue = ']'
- r.token.kind = tokenDelim
- case "{":
- r.token.delimValue = '}'
- r.token.kind = tokenDelim
- }
- r.addNonfatalError(&LexerError{
- Reason: fmt.Sprintf("expected %s", expected),
- Offset: r.start,
- Data: string(r.Data[r.start:r.pos]),
- })
- return
- }
- var str string
- if len(r.token.byteValue) <= maxErrorContextLen {
- str = string(r.token.byteValue)
- } else {
- str = string(r.token.byteValue[:maxErrorContextLen-3]) + "..."
- }
- r.fatalError = &LexerError{
- Reason: fmt.Sprintf("expected %s", expected),
- Offset: r.pos,
- Data: str,
- }
- }
- func (r *Lexer) GetPos() int {
- return r.pos
- }
- // Delim consumes a token and verifies that it is the given delimiter.
- func (r *Lexer) Delim(c byte) {
- if r.token.kind == tokenUndef && r.Ok() {
- r.FetchToken()
- }
- if !r.Ok() || r.token.delimValue != c {
- r.consume() // errInvalidToken can change token if UseMultipleErrors is enabled.
- r.errInvalidToken(string([]byte{c}))
- } else {
- r.consume()
- }
- }
- // IsDelim returns true if there was no scanning error and next token is the given delimiter.
- func (r *Lexer) IsDelim(c byte) bool {
- if r.token.kind == tokenUndef && r.Ok() {
- r.FetchToken()
- }
- return !r.Ok() || r.token.delimValue == c
- }
- // Null verifies that the next token is null and consumes it.
- func (r *Lexer) Null() {
- if r.token.kind == tokenUndef && r.Ok() {
- r.FetchToken()
- }
- if !r.Ok() || r.token.kind != tokenNull {
- r.errInvalidToken("null")
- }
- r.consume()
- }
- // IsNull returns true if the next token is a null keyword.
- func (r *Lexer) IsNull() bool {
- if r.token.kind == tokenUndef && r.Ok() {
- r.FetchToken()
- }
- return r.Ok() && r.token.kind == tokenNull
- }
- // Skip skips a single token.
- func (r *Lexer) Skip() {
- if r.token.kind == tokenUndef && r.Ok() {
- r.FetchToken()
- }
- r.consume()
- }
- // SkipRecursive skips next array or object completely, or just skips a single token if not
- // an array/object.
- //
- // Note: no syntax validation is performed on the skipped data.
- func (r *Lexer) SkipRecursive() {
- r.scanToken()
- var start, end byte
- startPos := r.start
- switch r.token.delimValue {
- case '{':
- start, end = '{', '}'
- case '[':
- start, end = '[', ']'
- default:
- r.consume()
- return
- }
- r.consume()
- level := 1
- inQuotes := false
- wasEscape := false
- for i, c := range r.Data[r.pos:] {
- switch {
- case c == start && !inQuotes:
- level++
- case c == end && !inQuotes:
- level--
- if level == 0 {
- r.pos += i + 1
- if !json.Valid(r.Data[startPos:r.pos]) {
- r.pos = len(r.Data)
- r.fatalError = &LexerError{
- Reason: "skipped array/object json value is invalid",
- Offset: r.pos,
- Data: string(r.Data[r.pos:]),
- }
- }
- return
- }
- case c == '\\' && inQuotes:
- wasEscape = !wasEscape
- continue
- case c == '"' && inQuotes:
- inQuotes = wasEscape
- case c == '"':
- inQuotes = true
- }
- wasEscape = false
- }
- r.pos = len(r.Data)
- r.fatalError = &LexerError{
- Reason: "EOF reached while skipping array/object or token",
- Offset: r.pos,
- Data: string(r.Data[r.pos:]),
- }
- }
- // Raw fetches the next item recursively as a data slice
- func (r *Lexer) Raw() []byte {
- r.SkipRecursive()
- if !r.Ok() {
- return nil
- }
- return r.Data[r.start:r.pos]
- }
- // IsStart returns whether the lexer is positioned at the start
- // of an input string.
- func (r *Lexer) IsStart() bool {
- return r.pos == 0
- }
- // Consumed reads all remaining bytes from the input, publishing an error if
- // there is anything but whitespace remaining.
- func (r *Lexer) Consumed() {
- if r.pos > len(r.Data) || !r.Ok() {
- return
- }
- for _, c := range r.Data[r.pos:] {
- if c != ' ' && c != '\t' && c != '\r' && c != '\n' {
- r.AddError(&LexerError{
- Reason: "invalid character '" + string(c) + "' after top-level value",
- Offset: r.pos,
- Data: string(r.Data[r.pos:]),
- })
- return
- }
- r.pos++
- r.start++
- }
- }
- func (r *Lexer) unsafeString(skipUnescape bool) (string, []byte) {
- if r.token.kind == tokenUndef && r.Ok() {
- r.FetchToken()
- }
- if !r.Ok() || r.token.kind != tokenString {
- r.errInvalidToken("string")
- return "", nil
- }
- if !skipUnescape {
- if err := r.unescapeStringToken(); err != nil {
- r.errInvalidToken("string")
- return "", nil
- }
- }
- bytes := r.token.byteValue
- ret := bytesToStr(r.token.byteValue)
- r.consume()
- return ret, bytes
- }
- // UnsafeString returns the string value if the token is a string literal.
- //
- // Warning: returned string may point to the input buffer, so the string should not outlive
- // the input buffer. Intended pattern of usage is as an argument to a switch statement.
- func (r *Lexer) UnsafeString() string {
- ret, _ := r.unsafeString(false)
- return ret
- }
- // UnsafeBytes returns the byte slice if the token is a string literal.
- func (r *Lexer) UnsafeBytes() []byte {
- _, ret := r.unsafeString(false)
- return ret
- }
- // UnsafeFieldName returns current member name string token
- func (r *Lexer) UnsafeFieldName(skipUnescape bool) string {
- ret, _ := r.unsafeString(skipUnescape)
- return ret
- }
- // String reads a string literal.
- func (r *Lexer) String() string {
- if r.token.kind == tokenUndef && r.Ok() {
- r.FetchToken()
- }
- if !r.Ok() || r.token.kind != tokenString {
- r.errInvalidToken("string")
- return ""
- }
- if err := r.unescapeStringToken(); err != nil {
- r.errInvalidToken("string")
- return ""
- }
- var ret string
- if r.token.byteValueCloned {
- ret = bytesToStr(r.token.byteValue)
- } else {
- ret = string(r.token.byteValue)
- }
- r.consume()
- return ret
- }
- // StringIntern reads a string literal, and performs string interning on it.
- func (r *Lexer) StringIntern() string {
- if r.token.kind == tokenUndef && r.Ok() {
- r.FetchToken()
- }
- if !r.Ok() || r.token.kind != tokenString {
- r.errInvalidToken("string")
- return ""
- }
- if err := r.unescapeStringToken(); err != nil {
- r.errInvalidToken("string")
- return ""
- }
- ret := intern.Bytes(r.token.byteValue)
- r.consume()
- return ret
- }
- // Bytes reads a string literal and base64 decodes it into a byte slice.
- func (r *Lexer) Bytes() []byte {
- if r.token.kind == tokenUndef && r.Ok() {
- r.FetchToken()
- }
- if !r.Ok() || r.token.kind != tokenString {
- r.errInvalidToken("string")
- return nil
- }
- if err := r.unescapeStringToken(); err != nil {
- r.errInvalidToken("string")
- return nil
- }
- ret := make([]byte, base64.StdEncoding.DecodedLen(len(r.token.byteValue)))
- n, err := base64.StdEncoding.Decode(ret, r.token.byteValue)
- if err != nil {
- r.fatalError = &LexerError{
- Reason: err.Error(),
- }
- return nil
- }
- r.consume()
- return ret[:n]
- }
- // Bool reads a true or false boolean keyword.
- func (r *Lexer) Bool() bool {
- if r.token.kind == tokenUndef && r.Ok() {
- r.FetchToken()
- }
- if !r.Ok() || r.token.kind != tokenBool {
- r.errInvalidToken("bool")
- return false
- }
- ret := r.token.boolValue
- r.consume()
- return ret
- }
- func (r *Lexer) number() string {
- if r.token.kind == tokenUndef && r.Ok() {
- r.FetchToken()
- }
- if !r.Ok() || r.token.kind != tokenNumber {
- r.errInvalidToken("number")
- return ""
- }
- ret := bytesToStr(r.token.byteValue)
- r.consume()
- return ret
- }
- func (r *Lexer) Uint8() uint8 {
- s := r.number()
- if !r.Ok() {
- return 0
- }
- n, err := strconv.ParseUint(s, 10, 8)
- if err != nil {
- r.addNonfatalError(&LexerError{
- Offset: r.start,
- Reason: err.Error(),
- Data: s,
- })
- }
- return uint8(n)
- }
- func (r *Lexer) Uint16() uint16 {
- s := r.number()
- if !r.Ok() {
- return 0
- }
- n, err := strconv.ParseUint(s, 10, 16)
- if err != nil {
- r.addNonfatalError(&LexerError{
- Offset: r.start,
- Reason: err.Error(),
- Data: s,
- })
- }
- return uint16(n)
- }
- func (r *Lexer) Uint32() uint32 {
- s := r.number()
- if !r.Ok() {
- return 0
- }
- n, err := strconv.ParseUint(s, 10, 32)
- if err != nil {
- r.addNonfatalError(&LexerError{
- Offset: r.start,
- Reason: err.Error(),
- Data: s,
- })
- }
- return uint32(n)
- }
- func (r *Lexer) Uint64() uint64 {
- s := r.number()
- if !r.Ok() {
- return 0
- }
- n, err := strconv.ParseUint(s, 10, 64)
- if err != nil {
- r.addNonfatalError(&LexerError{
- Offset: r.start,
- Reason: err.Error(),
- Data: s,
- })
- }
- return n
- }
- func (r *Lexer) Uint() uint {
- return uint(r.Uint64())
- }
- func (r *Lexer) Int8() int8 {
- s := r.number()
- if !r.Ok() {
- return 0
- }
- n, err := strconv.ParseInt(s, 10, 8)
- if err != nil {
- r.addNonfatalError(&LexerError{
- Offset: r.start,
- Reason: err.Error(),
- Data: s,
- })
- }
- return int8(n)
- }
- func (r *Lexer) Int16() int16 {
- s := r.number()
- if !r.Ok() {
- return 0
- }
- n, err := strconv.ParseInt(s, 10, 16)
- if err != nil {
- r.addNonfatalError(&LexerError{
- Offset: r.start,
- Reason: err.Error(),
- Data: s,
- })
- }
- return int16(n)
- }
- func (r *Lexer) Int32() int32 {
- s := r.number()
- if !r.Ok() {
- return 0
- }
- n, err := strconv.ParseInt(s, 10, 32)
- if err != nil {
- r.addNonfatalError(&LexerError{
- Offset: r.start,
- Reason: err.Error(),
- Data: s,
- })
- }
- return int32(n)
- }
- func (r *Lexer) Int64() int64 {
- s := r.number()
- if !r.Ok() {
- return 0
- }
- n, err := strconv.ParseInt(s, 10, 64)
- if err != nil {
- r.addNonfatalError(&LexerError{
- Offset: r.start,
- Reason: err.Error(),
- Data: s,
- })
- }
- return n
- }
- func (r *Lexer) Int() int {
- return int(r.Int64())
- }
- func (r *Lexer) Uint8Str() uint8 {
- s, b := r.unsafeString(false)
- if !r.Ok() {
- return 0
- }
- n, err := strconv.ParseUint(s, 10, 8)
- if err != nil {
- r.addNonfatalError(&LexerError{
- Offset: r.start,
- Reason: err.Error(),
- Data: string(b),
- })
- }
- return uint8(n)
- }
- func (r *Lexer) Uint16Str() uint16 {
- s, b := r.unsafeString(false)
- if !r.Ok() {
- return 0
- }
- n, err := strconv.ParseUint(s, 10, 16)
- if err != nil {
- r.addNonfatalError(&LexerError{
- Offset: r.start,
- Reason: err.Error(),
- Data: string(b),
- })
- }
- return uint16(n)
- }
- func (r *Lexer) Uint32Str() uint32 {
- s, b := r.unsafeString(false)
- if !r.Ok() {
- return 0
- }
- n, err := strconv.ParseUint(s, 10, 32)
- if err != nil {
- r.addNonfatalError(&LexerError{
- Offset: r.start,
- Reason: err.Error(),
- Data: string(b),
- })
- }
- return uint32(n)
- }
- func (r *Lexer) Uint64Str() uint64 {
- s, b := r.unsafeString(false)
- if !r.Ok() {
- return 0
- }
- n, err := strconv.ParseUint(s, 10, 64)
- if err != nil {
- r.addNonfatalError(&LexerError{
- Offset: r.start,
- Reason: err.Error(),
- Data: string(b),
- })
- }
- return n
- }
- func (r *Lexer) UintStr() uint {
- return uint(r.Uint64Str())
- }
- func (r *Lexer) UintptrStr() uintptr {
- return uintptr(r.Uint64Str())
- }
- func (r *Lexer) Int8Str() int8 {
- s, b := r.unsafeString(false)
- if !r.Ok() {
- return 0
- }
- n, err := strconv.ParseInt(s, 10, 8)
- if err != nil {
- r.addNonfatalError(&LexerError{
- Offset: r.start,
- Reason: err.Error(),
- Data: string(b),
- })
- }
- return int8(n)
- }
- func (r *Lexer) Int16Str() int16 {
- s, b := r.unsafeString(false)
- if !r.Ok() {
- return 0
- }
- n, err := strconv.ParseInt(s, 10, 16)
- if err != nil {
- r.addNonfatalError(&LexerError{
- Offset: r.start,
- Reason: err.Error(),
- Data: string(b),
- })
- }
- return int16(n)
- }
- func (r *Lexer) Int32Str() int32 {
- s, b := r.unsafeString(false)
- if !r.Ok() {
- return 0
- }
- n, err := strconv.ParseInt(s, 10, 32)
- if err != nil {
- r.addNonfatalError(&LexerError{
- Offset: r.start,
- Reason: err.Error(),
- Data: string(b),
- })
- }
- return int32(n)
- }
- func (r *Lexer) Int64Str() int64 {
- s, b := r.unsafeString(false)
- if !r.Ok() {
- return 0
- }
- n, err := strconv.ParseInt(s, 10, 64)
- if err != nil {
- r.addNonfatalError(&LexerError{
- Offset: r.start,
- Reason: err.Error(),
- Data: string(b),
- })
- }
- return n
- }
- func (r *Lexer) IntStr() int {
- return int(r.Int64Str())
- }
- func (r *Lexer) Float32() float32 {
- s := r.number()
- if !r.Ok() {
- return 0
- }
- n, err := strconv.ParseFloat(s, 32)
- if err != nil {
- r.addNonfatalError(&LexerError{
- Offset: r.start,
- Reason: err.Error(),
- Data: s,
- })
- }
- return float32(n)
- }
- func (r *Lexer) Float32Str() float32 {
- s, b := r.unsafeString(false)
- if !r.Ok() {
- return 0
- }
- n, err := strconv.ParseFloat(s, 32)
- if err != nil {
- r.addNonfatalError(&LexerError{
- Offset: r.start,
- Reason: err.Error(),
- Data: string(b),
- })
- }
- return float32(n)
- }
- func (r *Lexer) Float64() float64 {
- s := r.number()
- if !r.Ok() {
- return 0
- }
- n, err := strconv.ParseFloat(s, 64)
- if err != nil {
- r.addNonfatalError(&LexerError{
- Offset: r.start,
- Reason: err.Error(),
- Data: s,
- })
- }
- return n
- }
- func (r *Lexer) Float64Str() float64 {
- s, b := r.unsafeString(false)
- if !r.Ok() {
- return 0
- }
- n, err := strconv.ParseFloat(s, 64)
- if err != nil {
- r.addNonfatalError(&LexerError{
- Offset: r.start,
- Reason: err.Error(),
- Data: string(b),
- })
- }
- return n
- }
- func (r *Lexer) Error() error {
- return r.fatalError
- }
- func (r *Lexer) AddError(e error) {
- if r.fatalError == nil {
- r.fatalError = e
- }
- }
- func (r *Lexer) AddNonFatalError(e error) {
- r.addNonfatalError(&LexerError{
- Offset: r.start,
- Data: string(r.Data[r.start:r.pos]),
- Reason: e.Error(),
- })
- }
- func (r *Lexer) addNonfatalError(err *LexerError) {
- if r.UseMultipleErrors {
- // We don't want to add errors with the same offset.
- if len(r.multipleErrors) != 0 && r.multipleErrors[len(r.multipleErrors)-1].Offset == err.Offset {
- return
- }
- r.multipleErrors = append(r.multipleErrors, err)
- return
- }
- r.fatalError = err
- }
- func (r *Lexer) GetNonFatalErrors() []*LexerError {
- return r.multipleErrors
- }
- // JsonNumber fetches and json.Number from 'encoding/json' package.
- // Both int, float or string, contains them are valid values
- func (r *Lexer) JsonNumber() json.Number {
- if r.token.kind == tokenUndef && r.Ok() {
- r.FetchToken()
- }
- if !r.Ok() {
- r.errInvalidToken("json.Number")
- return json.Number("")
- }
- switch r.token.kind {
- case tokenString:
- return json.Number(r.String())
- case tokenNumber:
- return json.Number(r.Raw())
- case tokenNull:
- r.Null()
- return json.Number("")
- default:
- r.errSyntax()
- return json.Number("")
- }
- }
- // Interface fetches an interface{} analogous to the 'encoding/json' package.
- func (r *Lexer) Interface() interface{} {
- if r.token.kind == tokenUndef && r.Ok() {
- r.FetchToken()
- }
- if !r.Ok() {
- return nil
- }
- switch r.token.kind {
- case tokenString:
- return r.String()
- case tokenNumber:
- return r.Float64()
- case tokenBool:
- return r.Bool()
- case tokenNull:
- r.Null()
- return nil
- }
- if r.token.delimValue == '{' {
- r.consume()
- ret := map[string]interface{}{}
- for !r.IsDelim('}') {
- key := r.String()
- r.WantColon()
- ret[key] = r.Interface()
- r.WantComma()
- }
- r.Delim('}')
- if r.Ok() {
- return ret
- } else {
- return nil
- }
- } else if r.token.delimValue == '[' {
- r.consume()
- ret := []interface{}{}
- for !r.IsDelim(']') {
- ret = append(ret, r.Interface())
- r.WantComma()
- }
- r.Delim(']')
- if r.Ok() {
- return ret
- } else {
- return nil
- }
- }
- r.errSyntax()
- return nil
- }
- // WantComma requires a comma to be present before fetching next token.
- func (r *Lexer) WantComma() {
- r.wantSep = ','
- r.firstElement = false
- }
- // WantColon requires a colon to be present before fetching next token.
- func (r *Lexer) WantColon() {
- r.wantSep = ':'
- r.firstElement = false
- }
|