123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668 |
- // Copyright 2012-2014 Charles Banning. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file
- // keyvalues.go: Extract values from an arbitrary XML doc. Tag path can include wildcard characters.
- package mxj
- import (
- "errors"
- "fmt"
- "strconv"
- "strings"
- )
- // ----------------------------- get everything FOR a single key -------------------------
- const (
- minArraySize = 32
- )
- var defaultArraySize int = minArraySize
- // SetArraySize adjust the buffers for expected number of values to return from ValuesForKey() and ValuesForPath().
- // This can have the effect of significantly reducing memory allocation-copy functions for large data sets.
- // Returns the initial buffer size.
- func SetArraySize(size int) int {
- if size > minArraySize {
- defaultArraySize = size
- } else {
- defaultArraySize = minArraySize
- }
- return defaultArraySize
- }
- // ValuesForKey return all values in Map, 'mv', associated with a 'key'. If len(returned_values) == 0, then no match.
- // On error, the returned slice is 'nil'. NOTE: 'key' can be wildcard, "*".
- // 'subkeys' (optional) are "key:val[:type]" strings representing attributes or elements in a list.
- // - By default 'val' is of type string. "key:val:bool" and "key:val:float" to coerce them.
- // - For attributes prefix the label with the attribute prefix character, by default a
- // hyphen, '-', e.g., "-seq:3". (See SetAttrPrefix function.)
- // - If the 'key' refers to a list, then "key:value" could select a list member of the list.
- // - The subkey can be wildcarded - "key:*" - to require that it's there with some value.
- // - If a subkey is preceeded with the '!' character, the key:value[:type] entry is treated as an
- // exclusion critera - e.g., "!author:William T. Gaddis".
- // - If val contains ":" symbol, use SetFieldSeparator to a unused symbol, perhaps "|".
- func (mv Map) ValuesForKey(key string, subkeys ...string) ([]interface{}, error) {
- m := map[string]interface{}(mv)
- var subKeyMap map[string]interface{}
- if len(subkeys) > 0 {
- var err error
- subKeyMap, err = getSubKeyMap(subkeys...)
- if err != nil {
- return nil, err
- }
- }
- ret := make([]interface{}, 0, defaultArraySize)
- var cnt int
- hasKey(m, key, &ret, &cnt, subKeyMap)
- return ret[:cnt], nil
- }
- var KeyNotExistError = errors.New("Key does not exist")
- // ValueForKey is a wrapper on ValuesForKey. It returns the first member of []interface{}, if any.
- // If there is no value, "nil, nil" is returned.
- func (mv Map) ValueForKey(key string, subkeys ...string) (interface{}, error) {
- vals, err := mv.ValuesForKey(key, subkeys...)
- if err != nil {
- return nil, err
- }
- if len(vals) == 0 {
- return nil, KeyNotExistError
- }
- return vals[0], nil
- }
- // hasKey - if the map 'key' exists append it to array
- // if it doesn't do nothing except scan array and map values
- func hasKey(iv interface{}, key string, ret *[]interface{}, cnt *int, subkeys map[string]interface{}) {
- // func hasKey(iv interface{}, key string, ret *[]interface{}, subkeys map[string]interface{}) {
- switch iv.(type) {
- case map[string]interface{}:
- vv := iv.(map[string]interface{})
- // see if the current value is of interest
- if v, ok := vv[key]; ok {
- switch v.(type) {
- case map[string]interface{}:
- if hasSubKeys(v, subkeys) {
- *ret = append(*ret, v)
- *cnt++
- }
- case []interface{}:
- for _, av := range v.([]interface{}) {
- if hasSubKeys(av, subkeys) {
- *ret = append(*ret, av)
- *cnt++
- }
- }
- default:
- if len(subkeys) == 0 {
- *ret = append(*ret, v)
- *cnt++
- }
- }
- }
- // wildcard case
- if key == "*" {
- for _, v := range vv {
- switch v.(type) {
- case map[string]interface{}:
- if hasSubKeys(v, subkeys) {
- *ret = append(*ret, v)
- *cnt++
- }
- case []interface{}:
- for _, av := range v.([]interface{}) {
- if hasSubKeys(av, subkeys) {
- *ret = append(*ret, av)
- *cnt++
- }
- }
- default:
- if len(subkeys) == 0 {
- *ret = append(*ret, v)
- *cnt++
- }
- }
- }
- }
- // scan the rest
- for _, v := range vv {
- hasKey(v, key, ret, cnt, subkeys)
- }
- case []interface{}:
- for _, v := range iv.([]interface{}) {
- hasKey(v, key, ret, cnt, subkeys)
- }
- }
- }
- // ----------------------- get everything for a node in the Map ---------------------------
- // Allow indexed arrays in "path" specification. (Request from Abhijit Kadam - abhijitk100@gmail.com.)
- // 2014.04.28 - implementation note.
- // Implemented as a wrapper of (old)ValuesForPath() because we need look-ahead logic to handle expansion
- // of wildcards and unindexed arrays. Embedding such logic into valuesForKeyPath() would have made the
- // code much more complicated; this wrapper is straightforward, easy to debug, and doesn't add significant overhead.
- // ValuesForPatb retrieves all values for a path from the Map. If len(returned_values) == 0, then no match.
- // On error, the returned array is 'nil'.
- // 'path' is a dot-separated path of key values.
- // - If a node in the path is '*', then everything beyond is walked.
- // - 'path' can contain indexed array references, such as, "*.data[1]" and "msgs[2].data[0].field" -
- // even "*[2].*[0].field".
- // 'subkeys' (optional) are "key:val[:type]" strings representing attributes or elements in a list.
- // - By default 'val' is of type string. "key:val:bool" and "key:val:float" to coerce them.
- // - For attributes prefix the label with the attribute prefix character, by default a
- // hyphen, '-', e.g., "-seq:3". (See SetAttrPrefix function.)
- // - If the 'path' refers to a list, then "tag:value" would return member of the list.
- // - The subkey can be wildcarded - "key:*" - to require that it's there with some value.
- // - If a subkey is preceeded with the '!' character, the key:value[:type] entry is treated as an
- // exclusion critera - e.g., "!author:William T. Gaddis".
- // - If val contains ":" symbol, use SetFieldSeparator to a unused symbol, perhaps "|".
- func (mv Map) ValuesForPath(path string, subkeys ...string) ([]interface{}, error) {
- // If there are no array indexes in path, use legacy ValuesForPath() logic.
- if strings.Index(path, "[") < 0 {
- return mv.oldValuesForPath(path, subkeys...)
- }
- var subKeyMap map[string]interface{}
- if len(subkeys) > 0 {
- var err error
- subKeyMap, err = getSubKeyMap(subkeys...)
- if err != nil {
- return nil, err
- }
- }
- keys, kerr := parsePath(path)
- if kerr != nil {
- return nil, kerr
- }
- vals, verr := valuesForArray(keys, mv)
- if verr != nil {
- return nil, verr // Vals may be nil, but return empty array.
- }
- // Need to handle subkeys ... only return members of vals that satisfy conditions.
- retvals := make([]interface{}, 0)
- for _, v := range vals {
- if hasSubKeys(v, subKeyMap) {
- retvals = append(retvals, v)
- }
- }
- return retvals, nil
- }
- func valuesForArray(keys []*key, m Map) ([]interface{}, error) {
- var tmppath string
- var haveFirst bool
- var vals []interface{}
- var verr error
- lastkey := len(keys) - 1
- for i := 0; i <= lastkey; i++ {
- if !haveFirst {
- tmppath = keys[i].name
- haveFirst = true
- } else {
- tmppath += "." + keys[i].name
- }
- // Look-ahead: explode wildcards and unindexed arrays.
- // Need to handle un-indexed list recursively:
- // e.g., path is "stuff.data[0]" rather than "stuff[0].data[0]".
- // Need to treat it as "stuff[0].data[0]", "stuff[1].data[0]", ...
- if !keys[i].isArray && i < lastkey && keys[i+1].isArray {
- // Can't pass subkeys because we may not be at literal end of path.
- vv, vverr := m.oldValuesForPath(tmppath)
- if vverr != nil {
- return nil, vverr
- }
- for _, v := range vv {
- // See if we can walk the value.
- am, ok := v.(map[string]interface{})
- if !ok {
- continue
- }
- // Work the backend.
- nvals, nvalserr := valuesForArray(keys[i+1:], Map(am))
- if nvalserr != nil {
- return nil, nvalserr
- }
- vals = append(vals, nvals...)
- }
- break // have recursed the whole path - return
- }
- if keys[i].isArray || i == lastkey {
- // Don't pass subkeys because may not be at literal end of path.
- vals, verr = m.oldValuesForPath(tmppath)
- } else {
- continue
- }
- if verr != nil {
- return nil, verr
- }
- if i == lastkey && !keys[i].isArray {
- break
- }
- // Now we're looking at an array - supposedly.
- // Is index in range of vals?
- if len(vals) <= keys[i].position {
- vals = nil
- break
- }
- // Return the array member of interest, if at end of path.
- if i == lastkey {
- vals = vals[keys[i].position:(keys[i].position + 1)]
- break
- }
- // Extract the array member of interest.
- am := vals[keys[i].position:(keys[i].position + 1)]
- // must be a map[string]interface{} value so we can keep walking the path
- amm, ok := am[0].(map[string]interface{})
- if !ok {
- vals = nil
- break
- }
- m = Map(amm)
- haveFirst = false
- }
- return vals, nil
- }
- type key struct {
- name string
- isArray bool
- position int
- }
- func parsePath(s string) ([]*key, error) {
- keys := strings.Split(s, ".")
- ret := make([]*key, 0)
- for i := 0; i < len(keys); i++ {
- if keys[i] == "" {
- continue
- }
- newkey := new(key)
- if strings.Index(keys[i], "[") < 0 {
- newkey.name = keys[i]
- ret = append(ret, newkey)
- continue
- }
- p := strings.Split(keys[i], "[")
- newkey.name = p[0]
- p = strings.Split(p[1], "]")
- if p[0] == "" { // no right bracket
- return nil, fmt.Errorf("no right bracket on key index: %s", keys[i])
- }
- // convert p[0] to a int value
- pos, nerr := strconv.ParseInt(p[0], 10, 32)
- if nerr != nil {
- return nil, fmt.Errorf("cannot convert index to int value: %s", p[0])
- }
- newkey.position = int(pos)
- newkey.isArray = true
- ret = append(ret, newkey)
- }
- return ret, nil
- }
- // legacy ValuesForPath() - now wrapped to handle special case of indexed arrays in 'path'.
- func (mv Map) oldValuesForPath(path string, subkeys ...string) ([]interface{}, error) {
- m := map[string]interface{}(mv)
- var subKeyMap map[string]interface{}
- if len(subkeys) > 0 {
- var err error
- subKeyMap, err = getSubKeyMap(subkeys...)
- if err != nil {
- return nil, err
- }
- }
- keys := strings.Split(path, ".")
- if keys[len(keys)-1] == "" {
- keys = keys[:len(keys)-1]
- }
- ivals := make([]interface{}, 0, defaultArraySize)
- var cnt int
- valuesForKeyPath(&ivals, &cnt, m, keys, subKeyMap)
- return ivals[:cnt], nil
- }
- func valuesForKeyPath(ret *[]interface{}, cnt *int, m interface{}, keys []string, subkeys map[string]interface{}) {
- lenKeys := len(keys)
- // load 'm' values into 'ret'
- // expand any lists
- if lenKeys == 0 {
- switch m.(type) {
- case map[string]interface{}:
- if subkeys != nil {
- if ok := hasSubKeys(m, subkeys); !ok {
- return
- }
- }
- *ret = append(*ret, m)
- *cnt++
- case []interface{}:
- for i, v := range m.([]interface{}) {
- if subkeys != nil {
- if ok := hasSubKeys(v, subkeys); !ok {
- continue // only load list members with subkeys
- }
- }
- *ret = append(*ret, (m.([]interface{}))[i])
- *cnt++
- }
- default:
- if subkeys != nil {
- return // must be map[string]interface{} if there are subkeys
- }
- *ret = append(*ret, m)
- *cnt++
- }
- return
- }
- // key of interest
- key := keys[0]
- switch key {
- case "*": // wildcard - scan all values
- switch m.(type) {
- case map[string]interface{}:
- for _, v := range m.(map[string]interface{}) {
- // valuesForKeyPath(ret, v, keys[1:], subkeys)
- valuesForKeyPath(ret, cnt, v, keys[1:], subkeys)
- }
- case []interface{}:
- for _, v := range m.([]interface{}) {
- switch v.(type) {
- // flatten out a list of maps - keys are processed
- case map[string]interface{}:
- for _, vv := range v.(map[string]interface{}) {
- // valuesForKeyPath(ret, vv, keys[1:], subkeys)
- valuesForKeyPath(ret, cnt, vv, keys[1:], subkeys)
- }
- default:
- // valuesForKeyPath(ret, v, keys[1:], subkeys)
- valuesForKeyPath(ret, cnt, v, keys[1:], subkeys)
- }
- }
- }
- default: // key - must be map[string]interface{}
- switch m.(type) {
- case map[string]interface{}:
- if v, ok := m.(map[string]interface{})[key]; ok {
- // valuesForKeyPath(ret, v, keys[1:], subkeys)
- valuesForKeyPath(ret, cnt, v, keys[1:], subkeys)
- }
- case []interface{}: // may be buried in list
- for _, v := range m.([]interface{}) {
- switch v.(type) {
- case map[string]interface{}:
- if vv, ok := v.(map[string]interface{})[key]; ok {
- // valuesForKeyPath(ret, vv, keys[1:], subkeys)
- valuesForKeyPath(ret, cnt, vv, keys[1:], subkeys)
- }
- }
- }
- }
- }
- }
- // hasSubKeys() - interface{} equality works for string, float64, bool
- // 'v' must be a map[string]interface{} value to have subkeys
- // 'a' can have k:v pairs with v.(string) == "*", which is treated like a wildcard.
- func hasSubKeys(v interface{}, subkeys map[string]interface{}) bool {
- if len(subkeys) == 0 {
- return true
- }
- switch v.(type) {
- case map[string]interface{}:
- // do all subKey name:value pairs match?
- mv := v.(map[string]interface{})
- for skey, sval := range subkeys {
- isNotKey := false
- if skey[:1] == "!" { // a NOT-key
- skey = skey[1:]
- isNotKey = true
- }
- vv, ok := mv[skey]
- if !ok { // key doesn't exist
- if isNotKey { // key not there, but that's what we want
- if kv, ok := sval.(string); ok && kv == "*" {
- continue
- }
- }
- return false
- }
- // wildcard check
- if kv, ok := sval.(string); ok && kv == "*" {
- if isNotKey { // key is there, and we don't want it
- return false
- }
- continue
- }
- switch sval.(type) {
- case string:
- if s, ok := vv.(string); ok && s == sval.(string) {
- if isNotKey {
- return false
- }
- continue
- }
- case bool:
- if b, ok := vv.(bool); ok && b == sval.(bool) {
- if isNotKey {
- return false
- }
- continue
- }
- case float64:
- if f, ok := vv.(float64); ok && f == sval.(float64) {
- if isNotKey {
- return false
- }
- continue
- }
- }
- // key there but didn't match subkey value
- if isNotKey { // that's what we want
- continue
- }
- return false
- }
- // all subkeys matched
- return true
- }
- // not a map[string]interface{} value, can't have subkeys
- return false
- }
- // Generate map of key:value entries as map[string]string.
- // 'kv' arguments are "name:value" pairs: attribute keys are designated with prepended hyphen, '-'.
- // If len(kv) == 0, the return is (nil, nil).
- func getSubKeyMap(kv ...string) (map[string]interface{}, error) {
- if len(kv) == 0 {
- return nil, nil
- }
- m := make(map[string]interface{}, 0)
- for _, v := range kv {
- vv := strings.Split(v, fieldSep)
- switch len(vv) {
- case 2:
- m[vv[0]] = interface{}(vv[1])
- case 3:
- switch vv[2] {
- case "string", "char", "text":
- m[vv[0]] = interface{}(vv[1])
- case "bool", "boolean":
- // ParseBool treats "1"==true & "0"==false
- b, err := strconv.ParseBool(vv[1])
- if err != nil {
- return nil, fmt.Errorf("can't convert subkey value to bool: %s", vv[1])
- }
- m[vv[0]] = interface{}(b)
- case "float", "float64", "num", "number", "numeric":
- f, err := strconv.ParseFloat(vv[1], 64)
- if err != nil {
- return nil, fmt.Errorf("can't convert subkey value to float: %s", vv[1])
- }
- m[vv[0]] = interface{}(f)
- default:
- return nil, fmt.Errorf("unknown subkey conversion spec: %s", v)
- }
- default:
- return nil, fmt.Errorf("unknown subkey spec: %s", v)
- }
- }
- return m, nil
- }
- // ------------------------------- END of valuesFor ... ----------------------------
- // ----------------------- locate where a key value is in the tree -------------------
- //----------------------------- find all paths to a key --------------------------------
- // PathsForKey returns all paths through Map, 'mv', (in dot-notation) that terminate with the specified key.
- // Results can be used with ValuesForPath.
- func (mv Map) PathsForKey(key string) []string {
- m := map[string]interface{}(mv)
- breadbasket := make(map[string]bool, 0)
- breadcrumbs := ""
- hasKeyPath(breadcrumbs, m, key, breadbasket)
- if len(breadbasket) == 0 {
- return nil
- }
- // unpack map keys to return
- res := make([]string, len(breadbasket))
- var i int
- for k := range breadbasket {
- res[i] = k
- i++
- }
- return res
- }
- // PathForKeyShortest extracts the shortest path from all possible paths - from PathsForKey() - in Map, 'mv'..
- // Paths are strings using dot-notation.
- func (mv Map) PathForKeyShortest(key string) string {
- paths := mv.PathsForKey(key)
- lp := len(paths)
- if lp == 0 {
- return ""
- }
- if lp == 1 {
- return paths[0]
- }
- shortest := paths[0]
- shortestLen := len(strings.Split(shortest, "."))
- for i := 1; i < len(paths); i++ {
- vlen := len(strings.Split(paths[i], "."))
- if vlen < shortestLen {
- shortest = paths[i]
- shortestLen = vlen
- }
- }
- return shortest
- }
- // hasKeyPath - if the map 'key' exists append it to KeyPath.path and increment KeyPath.depth
- // This is really just a breadcrumber that saves all trails that hit the prescribed 'key'.
- func hasKeyPath(crumbs string, iv interface{}, key string, basket map[string]bool) {
- switch iv.(type) {
- case map[string]interface{}:
- vv := iv.(map[string]interface{})
- if _, ok := vv[key]; ok {
- // create a new breadcrumb, intialized with the one we have
- var nbc string
- if crumbs == "" {
- nbc = key
- } else {
- nbc = crumbs + "." + key
- }
- basket[nbc] = true
- }
- // walk on down the path, key could occur again at deeper node
- for k, v := range vv {
- // create a new breadcrumb, intialized with the one we have
- var nbc string
- if crumbs == "" {
- nbc = k
- } else {
- nbc = crumbs + "." + k
- }
- hasKeyPath(nbc, v, key, basket)
- }
- case []interface{}:
- // crumb-trail doesn't change, pass it on
- for _, v := range iv.([]interface{}) {
- hasKeyPath(crumbs, v, key, basket)
- }
- }
- }
- var PathNotExistError = errors.New("Path does not exist")
- // ValueForPath wraps ValuesFor Path and returns the first value returned.
- // If no value is found it returns 'nil' and PathNotExistError.
- func (mv Map) ValueForPath(path string) (interface{}, error) {
- vals, err := mv.ValuesForPath(path)
- if err != nil {
- return nil, err
- }
- if len(vals) == 0 {
- return nil, PathNotExistError
- }
- return vals[0], nil
- }
- // ValuesForPathString returns the first found value for the path as a string.
- func (mv Map) ValueForPathString(path string) (string, error) {
- vals, err := mv.ValuesForPath(path)
- if err != nil {
- return "", err
- }
- if len(vals) == 0 {
- return "", errors.New("ValueForPath: path not found")
- }
- val := vals[0]
- return fmt.Sprintf("%v", val), nil
- }
- // ValueOrEmptyForPathString returns the first found value for the path as a string.
- // If the path is not found then it returns an empty string.
- func (mv Map) ValueOrEmptyForPathString(path string) string {
- str, _ := mv.ValueForPathString(path)
- return str
- }
|