xmlseq.go 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844
  1. // Copyright 2012-2016, 2019 Charles Banning. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file
  4. // xmlseq.go - version of xml.go with sequence # injection on Decoding and sorting on Encoding.
  5. // Also, handles comments, directives and process instructions.
  6. package mxj
  7. import (
  8. "bytes"
  9. "encoding/xml"
  10. "errors"
  11. "fmt"
  12. "io"
  13. "sort"
  14. "strings"
  15. )
  16. // MapSeq is like Map but contains seqencing indices to allow recovering the original order of
  17. // the XML elements when the map[string]interface{} is marshaled. Element attributes are
  18. // stored as a map["#attr"]map[<attr_key>]map[string]interface{}{"#text":"<value>", "#seq":<attr_index>}
  19. // value instead of denoting the keys with a prefix character. Also, comments, directives and
  20. // process instructions are preserved.
  21. type MapSeq map[string]interface{}
  22. // NoRoot is returned by NewXmlSeq, etc., when a comment, directive or procinstr element is parsed
  23. // in the XML data stream and the element is not contained in an XML object with a root element.
  24. var NoRoot = errors.New("no root key")
  25. var NO_ROOT = NoRoot // maintain backwards compatibility
  26. // ------------------- NewMapXmlSeq & NewMapXmlSeqReader ... -------------------------
  27. // NewMapXmlSeq converts a XML doc into a MapSeq value with elements id'd with decoding sequence key represented
  28. // as map["#seq"]<int value>.
  29. // If the optional argument 'cast' is 'true', then values will be converted to boolean or float64 if possible.
  30. // NOTE: "#seq" key/value pairs are removed on encoding with msv.Xml() / msv.XmlIndent().
  31. // • attributes are a map - map["#attr"]map["attr_key"]map[string]interface{}{"#text":<aval>, "#seq":<num>}
  32. // • all simple elements are decoded as map["#text"]interface{} with a "#seq" k:v pair, as well.
  33. // • lists always decode as map["list_tag"][]map[string]interface{} where the array elements are maps that
  34. // include a "#seq" k:v pair based on sequence they are decoded. Thus, XML like:
  35. // <doc>
  36. // <ltag>value 1</ltag>
  37. // <newtag>value 2</newtag>
  38. // <ltag>value 3</ltag>
  39. // </doc>
  40. // is decoded as:
  41. // doc :
  42. // ltag :[[]interface{}]
  43. // [item: 0]
  44. // #seq :[int] 0
  45. // #text :[string] value 1
  46. // [item: 1]
  47. // #seq :[int] 2
  48. // #text :[string] value 3
  49. // newtag :
  50. // #seq :[int] 1
  51. // #text :[string] value 2
  52. // It will encode in proper sequence even though the MapSeq representation merges all "ltag" elements in an array.
  53. // • comments - "<!--comment-->" - are decoded as map["#comment"]map["#text"]"cmnt_text" with a "#seq" k:v pair.
  54. // • directives - "<!text>" - are decoded as map["#directive"]map[#text"]"directive_text" with a "#seq" k:v pair.
  55. // • process instructions - "<?instr?>" - are decoded as map["#procinst"]interface{} where the #procinst value
  56. // is of map[string]interface{} type with the following keys: #target, #inst, and #seq.
  57. // • comments, directives, and procinsts that are NOT part of a document with a root key will be returned as
  58. // map[string]interface{} and the error value 'NoRoot'.
  59. // • note: "<![CDATA[" syntax is lost in xml.Decode parser - and is not handled here, either.
  60. // and: "\r\n" is converted to "\n"
  61. //
  62. // NOTES:
  63. // 1. The 'xmlVal' will be parsed looking for an xml.StartElement, xml.Comment, etc., so BOM and other
  64. // extraneous xml.CharData will be ignored unless io.EOF is reached first.
  65. // 2. CoerceKeysToLower() is NOT recognized, since the intent here is to eventually call m.XmlSeq() to
  66. // re-encode the message in its original structure.
  67. // 3. If CoerceKeysToSnakeCase() has been called, then all key values will be converted to snake case.
  68. //
  69. // NAME SPACES:
  70. // 1. Keys in the MapSeq value that are parsed from a <name space prefix>:<local name> tag preserve the
  71. // "<prefix>:" notation rather than stripping it as with NewMapXml().
  72. // 2. Attribute keys for name space prefix declarations preserve "xmlns:<prefix>" notation.
  73. //
  74. // ERRORS:
  75. // 1. If a NoRoot error, "no root key," is returned, check the initial map key for a "#comment",
  76. // "#directive" or #procinst" key.
  77. func NewMapXmlSeq(xmlVal []byte, cast ...bool) (MapSeq, error) {
  78. var r bool
  79. if len(cast) == 1 {
  80. r = cast[0]
  81. }
  82. return xmlSeqToMap(xmlVal, r)
  83. }
  84. // NewMpaXmlSeqReader returns next XML doc from an io.Reader as a MapSeq value.
  85. // NOTES:
  86. // 1. The 'xmlReader' will be parsed looking for an xml.StartElement, xml.Comment, etc., so BOM and other
  87. // extraneous xml.CharData will be ignored unless io.EOF is reached first.
  88. // 2. CoerceKeysToLower() is NOT recognized, since the intent here is to eventually call m.XmlSeq() to
  89. // re-encode the message in its original structure.
  90. // 3. If CoerceKeysToSnakeCase() has been called, then all key values will be converted to snake case.
  91. //
  92. // ERRORS:
  93. // 1. If a NoRoot error, "no root key," is returned, check the initial map key for a "#comment",
  94. // "#directive" or #procinst" key.
  95. func NewMapXmlSeqReader(xmlReader io.Reader, cast ...bool) (MapSeq, error) {
  96. var r bool
  97. if len(cast) == 1 {
  98. r = cast[0]
  99. }
  100. // We need to put an *os.File reader in a ByteReader or the xml.NewDecoder
  101. // will wrap it in a bufio.Reader and seek on the file beyond where the
  102. // xml.Decoder parses!
  103. if _, ok := xmlReader.(io.ByteReader); !ok {
  104. xmlReader = myByteReader(xmlReader) // see code at EOF
  105. }
  106. // build the map
  107. return xmlSeqReaderToMap(xmlReader, r)
  108. }
  109. // NewMapXmlSeqReaderRaw returns the next XML doc from an io.Reader as a MapSeq value.
  110. // Returns MapSeq value, slice with the raw XML, and any error.
  111. // NOTES:
  112. // 1. Due to the implementation of xml.Decoder, the raw XML off the reader is buffered to []byte
  113. // using a ByteReader. If the io.Reader is an os.File, there may be significant performance impact.
  114. // See the examples - getmetrics1.go through getmetrics4.go - for comparative use cases on a large
  115. // data set. If the io.Reader is wrapping a []byte value in-memory, however, such as http.Request.Body
  116. // you CAN use it to efficiently unmarshal a XML doc and retrieve the raw XML in a single call.
  117. // 2. The 'raw' return value may be larger than the XML text value.
  118. // 3. The 'xmlReader' will be parsed looking for an xml.StartElement, xml.Comment, etc., so BOM and other
  119. // extraneous xml.CharData will be ignored unless io.EOF is reached first.
  120. // 4. CoerceKeysToLower() is NOT recognized, since the intent here is to eventually call m.XmlSeq() to
  121. // re-encode the message in its original structure.
  122. // 5. If CoerceKeysToSnakeCase() has been called, then all key values will be converted to snake case.
  123. //
  124. // ERRORS:
  125. // 1. If a NoRoot error, "no root key," is returned, check if the initial map key is "#comment",
  126. // "#directive" or #procinst" key.
  127. func NewMapXmlSeqReaderRaw(xmlReader io.Reader, cast ...bool) (MapSeq, []byte, error) {
  128. var r bool
  129. if len(cast) == 1 {
  130. r = cast[0]
  131. }
  132. // create TeeReader so we can retrieve raw XML
  133. buf := make([]byte, 0)
  134. wb := bytes.NewBuffer(buf)
  135. trdr := myTeeReader(xmlReader, wb)
  136. m, err := xmlSeqReaderToMap(trdr, r)
  137. // retrieve the raw XML that was decoded
  138. b := wb.Bytes()
  139. // err may be NoRoot
  140. return m, b, err
  141. }
  142. // xmlSeqReaderToMap() - parse a XML io.Reader to a map[string]interface{} value
  143. func xmlSeqReaderToMap(rdr io.Reader, r bool) (map[string]interface{}, error) {
  144. // parse the Reader
  145. p := xml.NewDecoder(rdr)
  146. if CustomDecoder != nil {
  147. useCustomDecoder(p)
  148. } else {
  149. p.CharsetReader = XmlCharsetReader
  150. }
  151. return xmlSeqToMapParser("", nil, p, r)
  152. }
  153. // xmlSeqToMap - convert a XML doc into map[string]interface{} value
  154. func xmlSeqToMap(doc []byte, r bool) (map[string]interface{}, error) {
  155. b := bytes.NewReader(doc)
  156. p := xml.NewDecoder(b)
  157. if CustomDecoder != nil {
  158. useCustomDecoder(p)
  159. } else {
  160. p.CharsetReader = XmlCharsetReader
  161. }
  162. return xmlSeqToMapParser("", nil, p, r)
  163. }
  164. // ===================================== where the work happens =============================
  165. // xmlSeqToMapParser - load a 'clean' XML doc into a map[string]interface{} directly.
  166. // Add #seq tag value for each element decoded - to be used for Encoding later.
  167. func xmlSeqToMapParser(skey string, a []xml.Attr, p *xml.Decoder, r bool) (map[string]interface{}, error) {
  168. if snakeCaseKeys {
  169. skey = strings.Replace(skey, "-", "_", -1)
  170. }
  171. // NOTE: all attributes and sub-elements parsed into 'na', 'na' is returned as value for 'skey' in 'n'.
  172. var n, na map[string]interface{}
  173. var seq int // for including seq num when decoding
  174. // Allocate maps and load attributes, if any.
  175. // NOTE: on entry from NewMapXml(), etc., skey=="", and we fall through
  176. // to get StartElement then recurse with skey==xml.StartElement.Name.Local
  177. // where we begin allocating map[string]interface{} values 'n' and 'na'.
  178. if skey != "" {
  179. // 'n' only needs one slot - save call to runtime•hashGrow()
  180. // 'na' we don't know
  181. n = make(map[string]interface{}, 1)
  182. na = make(map[string]interface{})
  183. if len(a) > 0 {
  184. // xml.Attr is decoded into: map["#attr"]map[<attr_label>]interface{}
  185. // where interface{} is map[string]interface{}{"#text":<attr_val>, "#seq":<attr_seq>}
  186. aa := make(map[string]interface{}, len(a))
  187. for i, v := range a {
  188. if snakeCaseKeys {
  189. v.Name.Local = strings.Replace(v.Name.Local, "-", "_", -1)
  190. }
  191. if len(v.Name.Space) > 0 {
  192. aa[v.Name.Space+`:`+v.Name.Local] = map[string]interface{}{"#text": cast(v.Value, r, ""), "#seq": i}
  193. } else {
  194. aa[v.Name.Local] = map[string]interface{}{"#text": cast(v.Value, r, ""), "#seq": i}
  195. }
  196. }
  197. na["#attr"] = aa
  198. }
  199. }
  200. // Return XMPP <stream:stream> message.
  201. if handleXMPPStreamTag && skey == "stream:stream" {
  202. n[skey] = na
  203. return n, nil
  204. }
  205. for {
  206. t, err := p.RawToken()
  207. if err != nil {
  208. if err != io.EOF {
  209. return nil, errors.New("xml.Decoder.Token() - " + err.Error())
  210. }
  211. return nil, err
  212. }
  213. switch t.(type) {
  214. case xml.StartElement:
  215. tt := t.(xml.StartElement)
  216. // First call to xmlSeqToMapParser() doesn't pass xml.StartElement - the map key.
  217. // So when the loop is first entered, the first token is the root tag along
  218. // with any attributes, which we process here.
  219. //
  220. // Subsequent calls to xmlSeqToMapParser() will pass in tag+attributes for
  221. // processing before getting the next token which is the element value,
  222. // which is done above.
  223. if skey == "" {
  224. if len(tt.Name.Space) > 0 {
  225. return xmlSeqToMapParser(tt.Name.Space+`:`+tt.Name.Local, tt.Attr, p, r)
  226. } else {
  227. return xmlSeqToMapParser(tt.Name.Local, tt.Attr, p, r)
  228. }
  229. }
  230. // If not initializing the map, parse the element.
  231. // len(nn) == 1, necessarily - it is just an 'n'.
  232. var nn map[string]interface{}
  233. if len(tt.Name.Space) > 0 {
  234. nn, err = xmlSeqToMapParser(tt.Name.Space+`:`+tt.Name.Local, tt.Attr, p, r)
  235. } else {
  236. nn, err = xmlSeqToMapParser(tt.Name.Local, tt.Attr, p, r)
  237. }
  238. if err != nil {
  239. return nil, err
  240. }
  241. // The nn map[string]interface{} value is a na[nn_key] value.
  242. // We need to see if nn_key already exists - means we're parsing a list.
  243. // This may require converting na[nn_key] value into []interface{} type.
  244. // First, extract the key:val for the map - it's a singleton.
  245. var key string
  246. var val interface{}
  247. for key, val = range nn {
  248. break
  249. }
  250. // add "#seq" k:v pair -
  251. // Sequence number included even in list elements - this should allow us
  252. // to properly resequence even something goofy like:
  253. // <list>item 1</list>
  254. // <subelement>item 2</subelement>
  255. // <list>item 3</list>
  256. // where all the "list" subelements are decoded into an array.
  257. switch val.(type) {
  258. case map[string]interface{}:
  259. val.(map[string]interface{})["#seq"] = seq
  260. seq++
  261. case interface{}: // a non-nil simple element: string, float64, bool
  262. v := map[string]interface{}{"#text": val, "#seq": seq}
  263. seq++
  264. val = v
  265. }
  266. // 'na' holding sub-elements of n.
  267. // See if 'key' already exists.
  268. // If 'key' exists, then this is a list, if not just add key:val to na.
  269. if v, ok := na[key]; ok {
  270. var a []interface{}
  271. switch v.(type) {
  272. case []interface{}:
  273. a = v.([]interface{})
  274. default: // anything else - note: v.(type) != nil
  275. a = []interface{}{v}
  276. }
  277. a = append(a, val)
  278. na[key] = a
  279. } else {
  280. na[key] = val // save it as a singleton
  281. }
  282. case xml.EndElement:
  283. if skey != "" {
  284. tt := t.(xml.EndElement)
  285. if snakeCaseKeys {
  286. tt.Name.Local = strings.Replace(tt.Name.Local, "-", "_", -1)
  287. }
  288. var name string
  289. if len(tt.Name.Space) > 0 {
  290. name = tt.Name.Space + `:` + tt.Name.Local
  291. } else {
  292. name = tt.Name.Local
  293. }
  294. if skey != name {
  295. return nil, fmt.Errorf("element %s not properly terminated, got %s at #%d",
  296. skey, name, p.InputOffset())
  297. }
  298. }
  299. // len(n) > 0 if this is a simple element w/o xml.Attrs - see xml.CharData case.
  300. if len(n) == 0 {
  301. // If len(na)==0 we have an empty element == "";
  302. // it has no xml.Attr nor xml.CharData.
  303. // Empty element content will be map["etag"]map["#text"]""
  304. // after #seq injection - map["etag"]map["#seq"]seq - after return.
  305. if len(na) > 0 {
  306. n[skey] = na
  307. } else {
  308. n[skey] = "" // empty element
  309. }
  310. }
  311. return n, nil
  312. case xml.CharData:
  313. // clean up possible noise
  314. tt := strings.Trim(string(t.(xml.CharData)), "\t\r\b\n ")
  315. if skey == "" {
  316. // per Adrian (http://www.adrianlungu.com/) catch stray text
  317. // in decoder stream -
  318. // https://github.com/clbanning/mxj/pull/14#issuecomment-182816374
  319. // NOTE: CharSetReader must be set to non-UTF-8 CharSet or you'll get
  320. // a p.Token() decoding error when the BOM is UTF-16 or UTF-32.
  321. continue
  322. }
  323. if len(tt) > 0 {
  324. // every simple element is a #text and has #seq associated with it
  325. na["#text"] = cast(tt, r, "")
  326. na["#seq"] = seq
  327. seq++
  328. }
  329. case xml.Comment:
  330. if n == nil { // no root 'key'
  331. n = map[string]interface{}{"#comment": string(t.(xml.Comment))}
  332. return n, NoRoot
  333. }
  334. cm := make(map[string]interface{}, 2)
  335. cm["#text"] = string(t.(xml.Comment))
  336. cm["#seq"] = seq
  337. seq++
  338. na["#comment"] = cm
  339. case xml.Directive:
  340. if n == nil { // no root 'key'
  341. n = map[string]interface{}{"#directive": string(t.(xml.Directive))}
  342. return n, NoRoot
  343. }
  344. dm := make(map[string]interface{}, 2)
  345. dm["#text"] = string(t.(xml.Directive))
  346. dm["#seq"] = seq
  347. seq++
  348. na["#directive"] = dm
  349. case xml.ProcInst:
  350. if n == nil {
  351. na = map[string]interface{}{"#target": t.(xml.ProcInst).Target, "#inst": string(t.(xml.ProcInst).Inst)}
  352. n = map[string]interface{}{"#procinst": na}
  353. return n, NoRoot
  354. }
  355. pm := make(map[string]interface{}, 3)
  356. pm["#target"] = t.(xml.ProcInst).Target
  357. pm["#inst"] = string(t.(xml.ProcInst).Inst)
  358. pm["#seq"] = seq
  359. seq++
  360. na["#procinst"] = pm
  361. default:
  362. // noop - shouldn't ever get here, now, since we handle all token types
  363. }
  364. }
  365. }
  366. // ------------------ END: NewMapXml & NewMapXmlReader -------------------------
  367. // --------------------- mv.XmlSeq & mv.XmlSeqWriter -------------------------
  368. // Xml encodes a MapSeq as XML with elements sorted on #seq. The companion of NewMapXmlSeq().
  369. // The following rules apply.
  370. // - The "#seq" key value is used to seqence the subelements or attributes only.
  371. // - The "#attr" map key identifies the map of attribute map[string]interface{} values with "#text" key.
  372. // - The "#comment" map key identifies a comment in the value "#text" map entry - <!--comment-->.
  373. // - The "#directive" map key identifies a directive in the value "#text" map entry - <!directive>.
  374. // - The "#procinst" map key identifies a process instruction in the value "#target" and "#inst"
  375. // map entries - <?target inst?>.
  376. // - Value type encoding:
  377. // > string, bool, float64, int, int32, int64, float32: per "%v" formating
  378. // > []bool, []uint8: by casting to string
  379. // > structures, etc.: handed to xml.Marshal() - if there is an error, the element
  380. // value is "UNKNOWN"
  381. // - Elements with only attribute values or are null are terminated using "/>" unless XmlGoEmptyElemSystax() called.
  382. // - If len(mv) == 1 and no rootTag is provided, then the map key is used as the root tag, possible.
  383. // Thus, `{ "key":"value" }` encodes as "<key>value</key>".
  384. func (mv MapSeq) Xml(rootTag ...string) ([]byte, error) {
  385. m := map[string]interface{}(mv)
  386. var err error
  387. s := new(string)
  388. p := new(pretty) // just a stub
  389. if len(m) == 1 && len(rootTag) == 0 {
  390. for key, value := range m {
  391. // if it's an array, see if all values are map[string]interface{}
  392. // we force a new root tag if we'll end up with no key:value in the list
  393. // so: key:[string_val, bool:true] --> <doc><key>string_val</key><bool>true</bool></doc>
  394. switch value.(type) {
  395. case []interface{}:
  396. for _, v := range value.([]interface{}) {
  397. switch v.(type) {
  398. case map[string]interface{}: // noop
  399. default: // anything else
  400. err = mapToXmlSeqIndent(false, s, DefaultRootTag, m, p)
  401. goto done
  402. }
  403. }
  404. }
  405. err = mapToXmlSeqIndent(false, s, key, value, p)
  406. }
  407. } else if len(rootTag) == 1 {
  408. err = mapToXmlSeqIndent(false, s, rootTag[0], m, p)
  409. } else {
  410. err = mapToXmlSeqIndent(false, s, DefaultRootTag, m, p)
  411. }
  412. done:
  413. return []byte(*s), err
  414. }
  415. // The following implementation is provided only for symmetry with NewMapXmlReader[Raw]
  416. // The names will also provide a key for the number of return arguments.
  417. // XmlWriter Writes the MapSeq value as XML on the Writer.
  418. // See MapSeq.Xml() for encoding rules.
  419. func (mv MapSeq) XmlWriter(xmlWriter io.Writer, rootTag ...string) error {
  420. x, err := mv.Xml(rootTag...)
  421. if err != nil {
  422. return err
  423. }
  424. _, err = xmlWriter.Write(x)
  425. return err
  426. }
  427. // XmlWriteRaw writes the MapSeq value as XML on the Writer. []byte is the raw XML that was written.
  428. // See Map.XmlSeq() for encoding rules.
  429. /*
  430. func (mv MapSeq) XmlWriterRaw(xmlWriter io.Writer, rootTag ...string) ([]byte, error) {
  431. x, err := mv.Xml(rootTag...)
  432. if err != nil {
  433. return x, err
  434. }
  435. _, err = xmlWriter.Write(x)
  436. return x, err
  437. }
  438. */
  439. // XmlIndentWriter writes the MapSeq value as pretty XML on the Writer.
  440. // See MapSeq.Xml() for encoding rules.
  441. func (mv MapSeq) XmlIndentWriter(xmlWriter io.Writer, prefix, indent string, rootTag ...string) error {
  442. x, err := mv.XmlIndent(prefix, indent, rootTag...)
  443. if err != nil {
  444. return err
  445. }
  446. _, err = xmlWriter.Write(x)
  447. return err
  448. }
  449. // XmlIndentWriterRaw writes the Map as pretty XML on the Writer. []byte is the raw XML that was written.
  450. // See Map.XmlSeq() for encoding rules.
  451. /*
  452. func (mv MapSeq) XmlIndentWriterRaw(xmlWriter io.Writer, prefix, indent string, rootTag ...string) ([]byte, error) {
  453. x, err := mv.XmlSeqIndent(prefix, indent, rootTag...)
  454. if err != nil {
  455. return x, err
  456. }
  457. _, err = xmlWriter.Write(x)
  458. return x, err
  459. }
  460. */
  461. // -------------------- END: mv.Xml & mv.XmlWriter -------------------------------
  462. // ---------------------- XmlSeqIndent ----------------------------
  463. // XmlIndent encodes a map[string]interface{} as a pretty XML string.
  464. // See MapSeq.XmlSeq() for encoding rules.
  465. func (mv MapSeq) XmlIndent(prefix, indent string, rootTag ...string) ([]byte, error) {
  466. m := map[string]interface{}(mv)
  467. var err error
  468. s := new(string)
  469. p := new(pretty)
  470. p.indent = indent
  471. p.padding = prefix
  472. if len(m) == 1 && len(rootTag) == 0 {
  473. // this can extract the key for the single map element
  474. // use it if it isn't a key for a list
  475. for key, value := range m {
  476. if _, ok := value.([]interface{}); ok {
  477. err = mapToXmlSeqIndent(true, s, DefaultRootTag, m, p)
  478. } else {
  479. err = mapToXmlSeqIndent(true, s, key, value, p)
  480. }
  481. }
  482. } else if len(rootTag) == 1 {
  483. err = mapToXmlSeqIndent(true, s, rootTag[0], m, p)
  484. } else {
  485. err = mapToXmlSeqIndent(true, s, DefaultRootTag, m, p)
  486. }
  487. return []byte(*s), err
  488. }
  489. // where the work actually happens
  490. // returns an error if an attribute is not atomic
  491. func mapToXmlSeqIndent(doIndent bool, s *string, key string, value interface{}, pp *pretty) error {
  492. var endTag bool
  493. var isSimple bool
  494. var noEndTag bool
  495. var elen int
  496. var ss string
  497. p := &pretty{pp.indent, pp.cnt, pp.padding, pp.mapDepth, pp.start}
  498. switch value.(type) {
  499. case map[string]interface{}, []byte, string, float64, bool, int, int32, int64, float32:
  500. if doIndent {
  501. *s += p.padding
  502. }
  503. if key != "#comment" && key != "#directive" && key != "#procinst" {
  504. *s += `<` + key
  505. }
  506. }
  507. switch value.(type) {
  508. case map[string]interface{}:
  509. val := value.(map[string]interface{})
  510. if key == "#comment" {
  511. *s += `<!--` + val["#text"].(string) + `-->`
  512. noEndTag = true
  513. break
  514. }
  515. if key == "#directive" {
  516. *s += `<!` + val["#text"].(string) + `>`
  517. noEndTag = true
  518. break
  519. }
  520. if key == "#procinst" {
  521. *s += `<?` + val["#target"].(string) + ` ` + val["#inst"].(string) + `?>`
  522. noEndTag = true
  523. break
  524. }
  525. haveAttrs := false
  526. // process attributes first
  527. if v, ok := val["#attr"].(map[string]interface{}); ok {
  528. // First, unroll the map[string]interface{} into a []keyval array.
  529. // Then sequence it.
  530. kv := make([]keyval, len(v))
  531. n := 0
  532. for ak, av := range v {
  533. kv[n] = keyval{ak, av}
  534. n++
  535. }
  536. sort.Sort(elemListSeq(kv))
  537. // Now encode the attributes in original decoding sequence, using keyval array.
  538. for _, a := range kv {
  539. vv := a.v.(map[string]interface{})
  540. switch vv["#text"].(type) {
  541. case string:
  542. if xmlEscapeChars {
  543. ss = escapeChars(vv["#text"].(string))
  544. } else {
  545. ss = vv["#text"].(string)
  546. }
  547. *s += ` ` + a.k + `="` + ss + `"`
  548. case float64, bool, int, int32, int64, float32:
  549. *s += ` ` + a.k + `="` + fmt.Sprintf("%v", vv["#text"]) + `"`
  550. case []byte:
  551. if xmlEscapeChars {
  552. ss = escapeChars(string(vv["#text"].([]byte)))
  553. } else {
  554. ss = string(vv["#text"].([]byte))
  555. }
  556. *s += ` ` + a.k + `="` + ss + `"`
  557. default:
  558. return fmt.Errorf("invalid attribute value for: %s", a.k)
  559. }
  560. }
  561. haveAttrs = true
  562. }
  563. // simple element?
  564. // every map value has, at least, "#seq" and, perhaps, "#text" and/or "#attr"
  565. _, seqOK := val["#seq"] // have key
  566. if v, ok := val["#text"]; ok && ((len(val) == 3 && haveAttrs) || (len(val) == 2 && !haveAttrs)) && seqOK {
  567. if stmp, ok := v.(string); ok && stmp != "" {
  568. if xmlEscapeChars {
  569. stmp = escapeChars(stmp)
  570. }
  571. *s += ">" + stmp
  572. endTag = true
  573. elen = 1
  574. }
  575. isSimple = true
  576. break
  577. } else if !ok && ((len(val) == 2 && haveAttrs) || (len(val) == 1 && !haveAttrs)) && seqOK {
  578. // here no #text but have #seq or #seq+#attr
  579. endTag = false
  580. break
  581. }
  582. // we now need to sequence everything except attributes
  583. // 'kv' will hold everything that needs to be written
  584. kv := make([]keyval, 0)
  585. for k, v := range val {
  586. if k == "#attr" { // already processed
  587. continue
  588. }
  589. if k == "#seq" { // ignore - just for sorting
  590. continue
  591. }
  592. switch v.(type) {
  593. case []interface{}:
  594. // unwind the array as separate entries
  595. for _, vv := range v.([]interface{}) {
  596. kv = append(kv, keyval{k, vv})
  597. }
  598. default:
  599. kv = append(kv, keyval{k, v})
  600. }
  601. }
  602. // close tag with possible attributes
  603. *s += ">"
  604. if doIndent {
  605. *s += "\n"
  606. }
  607. // something more complex
  608. p.mapDepth++
  609. sort.Sort(elemListSeq(kv))
  610. i := 0
  611. for _, v := range kv {
  612. switch v.v.(type) {
  613. case []interface{}:
  614. default:
  615. if i == 0 && doIndent {
  616. p.Indent()
  617. }
  618. }
  619. i++
  620. if err := mapToXmlSeqIndent(doIndent, s, v.k, v.v, p); err != nil {
  621. return err
  622. }
  623. switch v.v.(type) {
  624. case []interface{}: // handled in []interface{} case
  625. default:
  626. if doIndent {
  627. p.Outdent()
  628. }
  629. }
  630. i--
  631. }
  632. p.mapDepth--
  633. endTag = true
  634. elen = 1 // we do have some content other than attrs
  635. case []interface{}:
  636. for _, v := range value.([]interface{}) {
  637. if doIndent {
  638. p.Indent()
  639. }
  640. if err := mapToXmlSeqIndent(doIndent, s, key, v, p); err != nil {
  641. return err
  642. }
  643. if doIndent {
  644. p.Outdent()
  645. }
  646. }
  647. return nil
  648. case nil:
  649. // terminate the tag
  650. if doIndent {
  651. *s += p.padding
  652. }
  653. *s += "<" + key
  654. endTag, isSimple = true, true
  655. break
  656. default: // handle anything - even goofy stuff
  657. elen = 0
  658. switch value.(type) {
  659. case string:
  660. if xmlEscapeChars {
  661. ss = escapeChars(value.(string))
  662. } else {
  663. ss = value.(string)
  664. }
  665. elen = len(ss)
  666. if elen > 0 {
  667. *s += ">" + ss
  668. }
  669. case float64, bool, int, int32, int64, float32:
  670. v := fmt.Sprintf("%v", value)
  671. elen = len(v)
  672. if elen > 0 {
  673. *s += ">" + v
  674. }
  675. case []byte: // NOTE: byte is just an alias for uint8
  676. // similar to how xml.Marshal handles []byte structure members
  677. if xmlEscapeChars {
  678. ss = escapeChars(string(value.([]byte)))
  679. } else {
  680. ss = string(value.([]byte))
  681. }
  682. elen = len(ss)
  683. if elen > 0 {
  684. *s += ">" + ss
  685. }
  686. default:
  687. var v []byte
  688. var err error
  689. if doIndent {
  690. v, err = xml.MarshalIndent(value, p.padding, p.indent)
  691. } else {
  692. v, err = xml.Marshal(value)
  693. }
  694. if err != nil {
  695. *s += ">UNKNOWN"
  696. } else {
  697. elen = len(v)
  698. if elen > 0 {
  699. *s += string(v)
  700. }
  701. }
  702. }
  703. isSimple = true
  704. endTag = true
  705. }
  706. if endTag && !noEndTag {
  707. if doIndent {
  708. if !isSimple {
  709. *s += p.padding
  710. }
  711. }
  712. switch value.(type) {
  713. case map[string]interface{}, []byte, string, float64, bool, int, int32, int64, float32:
  714. if elen > 0 || useGoXmlEmptyElemSyntax {
  715. if elen == 0 {
  716. *s += ">"
  717. }
  718. *s += `</` + key + ">"
  719. } else {
  720. *s += `/>`
  721. }
  722. }
  723. } else if !noEndTag {
  724. if useGoXmlEmptyElemSyntax {
  725. *s += `</` + key + ">"
  726. // *s += "></" + key + ">"
  727. } else {
  728. *s += "/>"
  729. }
  730. }
  731. if doIndent {
  732. if p.cnt > p.start {
  733. *s += "\n"
  734. }
  735. p.Outdent()
  736. }
  737. return nil
  738. }
  739. // the element sort implementation
  740. type keyval struct {
  741. k string
  742. v interface{}
  743. }
  744. type elemListSeq []keyval
  745. func (e elemListSeq) Len() int {
  746. return len(e)
  747. }
  748. func (e elemListSeq) Swap(i, j int) {
  749. e[i], e[j] = e[j], e[i]
  750. }
  751. func (e elemListSeq) Less(i, j int) bool {
  752. var iseq, jseq int
  753. var fiseq, fjseq float64
  754. var ok bool
  755. if iseq, ok = e[i].v.(map[string]interface{})["#seq"].(int); !ok {
  756. if fiseq, ok = e[i].v.(map[string]interface{})["#seq"].(float64); ok {
  757. iseq = int(fiseq)
  758. } else {
  759. iseq = 9999999
  760. }
  761. }
  762. if jseq, ok = e[j].v.(map[string]interface{})["#seq"].(int); !ok {
  763. if fjseq, ok = e[j].v.(map[string]interface{})["#seq"].(float64); ok {
  764. jseq = int(fjseq)
  765. } else {
  766. jseq = 9999999
  767. }
  768. }
  769. return iseq <= jseq
  770. }
  771. // =============== https://groups.google.com/forum/#!topic/golang-nuts/lHPOHD-8qio
  772. // BeautifyXml (re)formats an XML doc similar to Map.XmlIndent().
  773. // It preserves comments, directives and process instructions,
  774. func BeautifyXml(b []byte, prefix, indent string) ([]byte, error) {
  775. x, err := NewMapXmlSeq(b)
  776. if err != nil {
  777. return nil, err
  778. }
  779. return x.XmlIndent(prefix, indent)
  780. }