xmlseq.go 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902
  1. // Copyright 2012-2016, 2019 Charles Banning. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file
  4. // xmlseq.go - version of xml.go with sequence # injection on Decoding and sorting on Encoding.
  5. // Also, handles comments, directives and process instructions.
  6. package mxj
  7. import (
  8. "bytes"
  9. "encoding/xml"
  10. "errors"
  11. "fmt"
  12. "io"
  13. "regexp"
  14. "sort"
  15. "strings"
  16. )
  17. // MapSeq is like Map but contains seqencing indices to allow recovering the original order of
  18. // the XML elements when the map[string]interface{} is marshaled. Element attributes are
  19. // stored as a map["#attr"]map[<attr_key>]map[string]interface{}{"#text":"<value>", "#seq":<attr_index>}
  20. // value instead of denoting the keys with a prefix character. Also, comments, directives and
  21. // process instructions are preserved.
  22. type MapSeq map[string]interface{}
  23. // NoRoot is returned by NewXmlSeq, etc., when a comment, directive or procinstr element is parsed
  24. // in the XML data stream and the element is not contained in an XML object with a root element.
  25. var NoRoot = errors.New("no root key")
  26. var NO_ROOT = NoRoot // maintain backwards compatibility
  27. // ------------------- NewMapXmlSeq & NewMapXmlSeqReader ... -------------------------
  28. // NewMapXmlSeq converts a XML doc into a MapSeq value with elements id'd with decoding sequence key represented
  29. // as map["#seq"]<int value>.
  30. // If the optional argument 'cast' is 'true', then values will be converted to boolean or float64 if possible.
  31. // NOTE: "#seq" key/value pairs are removed on encoding with msv.Xml() / msv.XmlIndent().
  32. // • attributes are a map - map["#attr"]map["attr_key"]map[string]interface{}{"#text":<aval>, "#seq":<num>}
  33. // • all simple elements are decoded as map["#text"]interface{} with a "#seq" k:v pair, as well.
  34. // • lists always decode as map["list_tag"][]map[string]interface{} where the array elements are maps that
  35. // include a "#seq" k:v pair based on sequence they are decoded. Thus, XML like:
  36. // <doc>
  37. // <ltag>value 1</ltag>
  38. // <newtag>value 2</newtag>
  39. // <ltag>value 3</ltag>
  40. // </doc>
  41. // is decoded as:
  42. // doc :
  43. // ltag :[[]interface{}]
  44. // [item: 0]
  45. // #seq :[int] 0
  46. // #text :[string] value 1
  47. // [item: 1]
  48. // #seq :[int] 2
  49. // #text :[string] value 3
  50. // newtag :
  51. // #seq :[int] 1
  52. // #text :[string] value 2
  53. // It will encode in proper sequence even though the MapSeq representation merges all "ltag" elements in an array.
  54. // • comments - "<!--comment-->" - are decoded as map["#comment"]map["#text"]"cmnt_text" with a "#seq" k:v pair.
  55. // • directives - "<!text>" - are decoded as map["#directive"]map[#text"]"directive_text" with a "#seq" k:v pair.
  56. // • process instructions - "<?instr?>" - are decoded as map["#procinst"]interface{} where the #procinst value
  57. // is of map[string]interface{} type with the following keys: #target, #inst, and #seq.
  58. // • comments, directives, and procinsts that are NOT part of a document with a root key will be returned as
  59. // map[string]interface{} and the error value 'NoRoot'.
  60. // • note: "<![CDATA[" syntax is lost in xml.Decode parser - and is not handled here, either.
  61. // and: "\r\n" is converted to "\n"
  62. //
  63. // NOTES:
  64. // 1. The 'xmlVal' will be parsed looking for an xml.StartElement, xml.Comment, etc., so BOM and other
  65. // extraneous xml.CharData will be ignored unless io.EOF is reached first.
  66. // 2. CoerceKeysToLower() is NOT recognized, since the intent here is to eventually call m.XmlSeq() to
  67. // re-encode the message in its original structure.
  68. // 3. If CoerceKeysToSnakeCase() has been called, then all key values will be converted to snake case.
  69. //
  70. // NAME SPACES:
  71. // 1. Keys in the MapSeq value that are parsed from a <name space prefix>:<local name> tag preserve the
  72. // "<prefix>:" notation rather than stripping it as with NewMapXml().
  73. // 2. Attribute keys for name space prefix declarations preserve "xmlns:<prefix>" notation.
  74. //
  75. // ERRORS:
  76. // 1. If a NoRoot error, "no root key," is returned, check the initial map key for a "#comment",
  77. // "#directive" or #procinst" key.
  78. // 2. Unmarshaling an XML doc that is formatted using the whitespace character, " ", will error, since
  79. // Decoder.RawToken treats such occurances as significant. See NewMapFormattedXmlSeq().
  80. func NewMapXmlSeq(xmlVal []byte, cast ...bool) (MapSeq, error) {
  81. var r bool
  82. if len(cast) == 1 {
  83. r = cast[0]
  84. }
  85. return xmlSeqToMap(xmlVal, r)
  86. }
  87. // NewMapFormattedXmlSeq performs the same as NewMapXmlSeq but is useful for processing XML objects that
  88. // are formatted using the whitespace character, " ". (The stdlib xml.Decoder, by default, treats all
  89. // whitespace as significant; Decoder.Token() and Decoder.RawToken() will return strings of one or more
  90. // whitespace characters and without alphanumeric or punctuation characters as xml.CharData values.)
  91. //
  92. // If you're processing such XML, then this will convert all occurrences of whitespace-only strings
  93. // into an empty string, "", prior to parsing the XML - irrespective of whether the occurrence is
  94. // formatting or is a actual element value.
  95. func NewMapFormattedXmlSeq(xmlVal []byte, cast ...bool) (MapSeq, error) {
  96. var c bool
  97. if len(cast) == 1 {
  98. c = cast[0]
  99. }
  100. // Per PR #104 - clean out formatting characters so they don't show up in Decoder.RawToken() stream.
  101. // NOTE: Also replaces element values that are solely comprised of formatting/whitespace characters
  102. // with empty string, "".
  103. r := regexp.MustCompile(`>[\n\t\r ]*<`)
  104. xmlVal = r.ReplaceAll(xmlVal, []byte("><"))
  105. return xmlSeqToMap(xmlVal, c)
  106. }
  107. // NewMpaXmlSeqReader returns next XML doc from an io.Reader as a MapSeq value.
  108. // NOTES:
  109. // 1. The 'xmlReader' will be parsed looking for an xml.StartElement, xml.Comment, etc., so BOM and other
  110. // extraneous xml.CharData will be ignored unless io.EOF is reached first.
  111. // 2. CoerceKeysToLower() is NOT recognized, since the intent here is to eventually call m.XmlSeq() to
  112. // re-encode the message in its original structure.
  113. // 3. If CoerceKeysToSnakeCase() has been called, then all key values will be converted to snake case.
  114. //
  115. // ERRORS:
  116. // 1. If a NoRoot error, "no root key," is returned, check the initial map key for a "#comment",
  117. // "#directive" or #procinst" key.
  118. func NewMapXmlSeqReader(xmlReader io.Reader, cast ...bool) (MapSeq, error) {
  119. var r bool
  120. if len(cast) == 1 {
  121. r = cast[0]
  122. }
  123. // We need to put an *os.File reader in a ByteReader or the xml.NewDecoder
  124. // will wrap it in a bufio.Reader and seek on the file beyond where the
  125. // xml.Decoder parses!
  126. if _, ok := xmlReader.(io.ByteReader); !ok {
  127. xmlReader = myByteReader(xmlReader) // see code at EOF
  128. }
  129. // build the map
  130. return xmlSeqReaderToMap(xmlReader, r)
  131. }
  132. // NewMapXmlSeqReaderRaw returns the next XML doc from an io.Reader as a MapSeq value.
  133. // Returns MapSeq value, slice with the raw XML, and any error.
  134. // NOTES:
  135. // 1. Due to the implementation of xml.Decoder, the raw XML off the reader is buffered to []byte
  136. // using a ByteReader. If the io.Reader is an os.File, there may be significant performance impact.
  137. // See the examples - getmetrics1.go through getmetrics4.go - for comparative use cases on a large
  138. // data set. If the io.Reader is wrapping a []byte value in-memory, however, such as http.Request.Body
  139. // you CAN use it to efficiently unmarshal a XML doc and retrieve the raw XML in a single call.
  140. // 2. The 'raw' return value may be larger than the XML text value.
  141. // 3. The 'xmlReader' will be parsed looking for an xml.StartElement, xml.Comment, etc., so BOM and other
  142. // extraneous xml.CharData will be ignored unless io.EOF is reached first.
  143. // 4. CoerceKeysToLower() is NOT recognized, since the intent here is to eventually call m.XmlSeq() to
  144. // re-encode the message in its original structure.
  145. // 5. If CoerceKeysToSnakeCase() has been called, then all key values will be converted to snake case.
  146. //
  147. // ERRORS:
  148. // 1. If a NoRoot error, "no root key," is returned, check if the initial map key is "#comment",
  149. // "#directive" or #procinst" key.
  150. func NewMapXmlSeqReaderRaw(xmlReader io.Reader, cast ...bool) (MapSeq, []byte, error) {
  151. var r bool
  152. if len(cast) == 1 {
  153. r = cast[0]
  154. }
  155. // create TeeReader so we can retrieve raw XML
  156. buf := make([]byte, 0)
  157. wb := bytes.NewBuffer(buf)
  158. trdr := myTeeReader(xmlReader, wb)
  159. m, err := xmlSeqReaderToMap(trdr, r)
  160. // retrieve the raw XML that was decoded
  161. b := wb.Bytes()
  162. // err may be NoRoot
  163. return m, b, err
  164. }
  165. // xmlSeqReaderToMap() - parse a XML io.Reader to a map[string]interface{} value
  166. func xmlSeqReaderToMap(rdr io.Reader, r bool) (map[string]interface{}, error) {
  167. // parse the Reader
  168. p := xml.NewDecoder(rdr)
  169. if CustomDecoder != nil {
  170. useCustomDecoder(p)
  171. } else {
  172. p.CharsetReader = XmlCharsetReader
  173. }
  174. return xmlSeqToMapParser("", nil, p, r)
  175. }
  176. // xmlSeqToMap - convert a XML doc into map[string]interface{} value
  177. func xmlSeqToMap(doc []byte, r bool) (map[string]interface{}, error) {
  178. b := bytes.NewReader(doc)
  179. p := xml.NewDecoder(b)
  180. if CustomDecoder != nil {
  181. useCustomDecoder(p)
  182. } else {
  183. p.CharsetReader = XmlCharsetReader
  184. }
  185. return xmlSeqToMapParser("", nil, p, r)
  186. }
  187. // ===================================== where the work happens =============================
  188. // xmlSeqToMapParser - load a 'clean' XML doc into a map[string]interface{} directly.
  189. // Add #seq tag value for each element decoded - to be used for Encoding later.
  190. func xmlSeqToMapParser(skey string, a []xml.Attr, p *xml.Decoder, r bool) (map[string]interface{}, error) {
  191. if snakeCaseKeys {
  192. skey = strings.Replace(skey, "-", "_", -1)
  193. }
  194. // NOTE: all attributes and sub-elements parsed into 'na', 'na' is returned as value for 'skey' in 'n'.
  195. var n, na map[string]interface{}
  196. var seq int // for including seq num when decoding
  197. // Allocate maps and load attributes, if any.
  198. // NOTE: on entry from NewMapXml(), etc., skey=="", and we fall through
  199. // to get StartElement then recurse with skey==xml.StartElement.Name.Local
  200. // where we begin allocating map[string]interface{} values 'n' and 'na'.
  201. if skey != "" {
  202. // 'n' only needs one slot - save call to runtime•hashGrow()
  203. // 'na' we don't know
  204. n = make(map[string]interface{}, 1)
  205. na = make(map[string]interface{})
  206. if len(a) > 0 {
  207. // xml.Attr is decoded into: map["#attr"]map[<attr_label>]interface{}
  208. // where interface{} is map[string]interface{}{"#text":<attr_val>, "#seq":<attr_seq>}
  209. aa := make(map[string]interface{}, len(a))
  210. for i, v := range a {
  211. if snakeCaseKeys {
  212. v.Name.Local = strings.Replace(v.Name.Local, "-", "_", -1)
  213. }
  214. if xmlEscapeCharsDecoder { // per issue#84
  215. v.Value = escapeChars(v.Value)
  216. }
  217. if len(v.Name.Space) > 0 {
  218. aa[v.Name.Space+`:`+v.Name.Local] = map[string]interface{}{textK: cast(v.Value, r, ""), seqK: i}
  219. } else {
  220. aa[v.Name.Local] = map[string]interface{}{textK: cast(v.Value, r, ""), seqK: i}
  221. }
  222. }
  223. na[attrK] = aa
  224. }
  225. }
  226. // Return XMPP <stream:stream> message.
  227. if handleXMPPStreamTag && skey == "stream:stream" {
  228. n[skey] = na
  229. return n, nil
  230. }
  231. for {
  232. t, err := p.RawToken()
  233. if err != nil {
  234. if err != io.EOF {
  235. return nil, errors.New("xml.Decoder.Token() - " + err.Error())
  236. }
  237. return nil, err
  238. }
  239. switch t.(type) {
  240. case xml.StartElement:
  241. tt := t.(xml.StartElement)
  242. // First call to xmlSeqToMapParser() doesn't pass xml.StartElement - the map key.
  243. // So when the loop is first entered, the first token is the root tag along
  244. // with any attributes, which we process here.
  245. //
  246. // Subsequent calls to xmlSeqToMapParser() will pass in tag+attributes for
  247. // processing before getting the next token which is the element value,
  248. // which is done above.
  249. if skey == "" {
  250. if len(tt.Name.Space) > 0 {
  251. return xmlSeqToMapParser(tt.Name.Space+`:`+tt.Name.Local, tt.Attr, p, r)
  252. } else {
  253. return xmlSeqToMapParser(tt.Name.Local, tt.Attr, p, r)
  254. }
  255. }
  256. // If not initializing the map, parse the element.
  257. // len(nn) == 1, necessarily - it is just an 'n'.
  258. var nn map[string]interface{}
  259. if len(tt.Name.Space) > 0 {
  260. nn, err = xmlSeqToMapParser(tt.Name.Space+`:`+tt.Name.Local, tt.Attr, p, r)
  261. } else {
  262. nn, err = xmlSeqToMapParser(tt.Name.Local, tt.Attr, p, r)
  263. }
  264. if err != nil {
  265. return nil, err
  266. }
  267. // The nn map[string]interface{} value is a na[nn_key] value.
  268. // We need to see if nn_key already exists - means we're parsing a list.
  269. // This may require converting na[nn_key] value into []interface{} type.
  270. // First, extract the key:val for the map - it's a singleton.
  271. var key string
  272. var val interface{}
  273. for key, val = range nn {
  274. break
  275. }
  276. // add "#seq" k:v pair -
  277. // Sequence number included even in list elements - this should allow us
  278. // to properly resequence even something goofy like:
  279. // <list>item 1</list>
  280. // <subelement>item 2</subelement>
  281. // <list>item 3</list>
  282. // where all the "list" subelements are decoded into an array.
  283. switch val.(type) {
  284. case map[string]interface{}:
  285. val.(map[string]interface{})[seqK] = seq
  286. seq++
  287. case interface{}: // a non-nil simple element: string, float64, bool
  288. v := map[string]interface{}{textK: val, seqK: seq}
  289. seq++
  290. val = v
  291. }
  292. // 'na' holding sub-elements of n.
  293. // See if 'key' already exists.
  294. // If 'key' exists, then this is a list, if not just add key:val to na.
  295. if v, ok := na[key]; ok {
  296. var a []interface{}
  297. switch v.(type) {
  298. case []interface{}:
  299. a = v.([]interface{})
  300. default: // anything else - note: v.(type) != nil
  301. a = []interface{}{v}
  302. }
  303. a = append(a, val)
  304. na[key] = a
  305. } else {
  306. na[key] = val // save it as a singleton
  307. }
  308. case xml.EndElement:
  309. if skey != "" {
  310. tt := t.(xml.EndElement)
  311. if snakeCaseKeys {
  312. tt.Name.Local = strings.Replace(tt.Name.Local, "-", "_", -1)
  313. }
  314. var name string
  315. if len(tt.Name.Space) > 0 {
  316. name = tt.Name.Space + `:` + tt.Name.Local
  317. } else {
  318. name = tt.Name.Local
  319. }
  320. if skey != name {
  321. return nil, fmt.Errorf("element %s not properly terminated, got %s at #%d",
  322. skey, name, p.InputOffset())
  323. }
  324. }
  325. // len(n) > 0 if this is a simple element w/o xml.Attrs - see xml.CharData case.
  326. if len(n) == 0 {
  327. // If len(na)==0 we have an empty element == "";
  328. // it has no xml.Attr nor xml.CharData.
  329. // Empty element content will be map["etag"]map["#text"]""
  330. // after #seq injection - map["etag"]map["#seq"]seq - after return.
  331. if len(na) > 0 {
  332. n[skey] = na
  333. } else {
  334. n[skey] = "" // empty element
  335. }
  336. }
  337. return n, nil
  338. case xml.CharData:
  339. // clean up possible noise
  340. tt := strings.Trim(string(t.(xml.CharData)), trimRunes)
  341. if xmlEscapeCharsDecoder { // issue#84
  342. tt = escapeChars(tt)
  343. }
  344. if skey == "" {
  345. // per Adrian (http://www.adrianlungu.com/) catch stray text
  346. // in decoder stream -
  347. // https://github.com/clbanning/mxj/pull/14#issuecomment-182816374
  348. // NOTE: CharSetReader must be set to non-UTF-8 CharSet or you'll get
  349. // a p.Token() decoding error when the BOM is UTF-16 or UTF-32.
  350. continue
  351. }
  352. if len(tt) > 0 {
  353. // every simple element is a #text and has #seq associated with it
  354. na[textK] = cast(tt, r, "")
  355. na[seqK] = seq
  356. seq++
  357. }
  358. case xml.Comment:
  359. if n == nil { // no root 'key'
  360. n = map[string]interface{}{commentK: string(t.(xml.Comment))}
  361. return n, NoRoot
  362. }
  363. cm := make(map[string]interface{}, 2)
  364. cm[textK] = string(t.(xml.Comment))
  365. cm[seqK] = seq
  366. seq++
  367. na[commentK] = cm
  368. case xml.Directive:
  369. if n == nil { // no root 'key'
  370. n = map[string]interface{}{directiveK: string(t.(xml.Directive))}
  371. return n, NoRoot
  372. }
  373. dm := make(map[string]interface{}, 2)
  374. dm[textK] = string(t.(xml.Directive))
  375. dm[seqK] = seq
  376. seq++
  377. na[directiveK] = dm
  378. case xml.ProcInst:
  379. if n == nil {
  380. na = map[string]interface{}{targetK: t.(xml.ProcInst).Target, instK: string(t.(xml.ProcInst).Inst)}
  381. n = map[string]interface{}{procinstK: na}
  382. return n, NoRoot
  383. }
  384. pm := make(map[string]interface{}, 3)
  385. pm[targetK] = t.(xml.ProcInst).Target
  386. pm[instK] = string(t.(xml.ProcInst).Inst)
  387. pm[seqK] = seq
  388. seq++
  389. na[procinstK] = pm
  390. default:
  391. // noop - shouldn't ever get here, now, since we handle all token types
  392. }
  393. }
  394. }
  395. // ------------------ END: NewMapXml & NewMapXmlReader -------------------------
  396. // --------------------- mv.XmlSeq & mv.XmlSeqWriter -------------------------
  397. // Xml encodes a MapSeq as XML with elements sorted on #seq. The companion of NewMapXmlSeq().
  398. // The following rules apply.
  399. // - The "#seq" key value is used to seqence the subelements or attributes only.
  400. // - The "#attr" map key identifies the map of attribute map[string]interface{} values with "#text" key.
  401. // - The "#comment" map key identifies a comment in the value "#text" map entry - <!--comment-->.
  402. // - The "#directive" map key identifies a directive in the value "#text" map entry - <!directive>.
  403. // - The "#procinst" map key identifies a process instruction in the value "#target" and "#inst"
  404. // map entries - <?target inst?>.
  405. // - Value type encoding:
  406. // > string, bool, float64, int, int32, int64, float32: per "%v" formating
  407. // > []bool, []uint8: by casting to string
  408. // > structures, etc.: handed to xml.Marshal() - if there is an error, the element
  409. // value is "UNKNOWN"
  410. // - Elements with only attribute values or are null are terminated using "/>" unless XmlGoEmptyElemSystax() called.
  411. // - If len(mv) == 1 and no rootTag is provided, then the map key is used as the root tag, possible.
  412. // Thus, `{ "key":"value" }` encodes as "<key>value</key>".
  413. func (mv MapSeq) Xml(rootTag ...string) ([]byte, error) {
  414. m := map[string]interface{}(mv)
  415. var err error
  416. s := new(string)
  417. p := new(pretty) // just a stub
  418. if len(m) == 1 && len(rootTag) == 0 {
  419. for key, value := range m {
  420. // if it's an array, see if all values are map[string]interface{}
  421. // we force a new root tag if we'll end up with no key:value in the list
  422. // so: key:[string_val, bool:true] --> <doc><key>string_val</key><bool>true</bool></doc>
  423. switch value.(type) {
  424. case []interface{}:
  425. for _, v := range value.([]interface{}) {
  426. switch v.(type) {
  427. case map[string]interface{}: // noop
  428. default: // anything else
  429. err = mapToXmlSeqIndent(false, s, DefaultRootTag, m, p)
  430. goto done
  431. }
  432. }
  433. }
  434. err = mapToXmlSeqIndent(false, s, key, value, p)
  435. }
  436. } else if len(rootTag) == 1 {
  437. err = mapToXmlSeqIndent(false, s, rootTag[0], m, p)
  438. } else {
  439. err = mapToXmlSeqIndent(false, s, DefaultRootTag, m, p)
  440. }
  441. done:
  442. if xmlCheckIsValid {
  443. d := xml.NewDecoder(bytes.NewReader([]byte(*s)))
  444. for {
  445. _, err = d.Token()
  446. if err == io.EOF {
  447. err = nil
  448. break
  449. } else if err != nil {
  450. return nil, err
  451. }
  452. }
  453. }
  454. return []byte(*s), err
  455. }
  456. // The following implementation is provided only for symmetry with NewMapXmlReader[Raw]
  457. // The names will also provide a key for the number of return arguments.
  458. // XmlWriter Writes the MapSeq value as XML on the Writer.
  459. // See MapSeq.Xml() for encoding rules.
  460. func (mv MapSeq) XmlWriter(xmlWriter io.Writer, rootTag ...string) error {
  461. x, err := mv.Xml(rootTag...)
  462. if err != nil {
  463. return err
  464. }
  465. _, err = xmlWriter.Write(x)
  466. return err
  467. }
  468. // XmlWriteRaw writes the MapSeq value as XML on the Writer. []byte is the raw XML that was written.
  469. // See Map.XmlSeq() for encoding rules.
  470. /*
  471. func (mv MapSeq) XmlWriterRaw(xmlWriter io.Writer, rootTag ...string) ([]byte, error) {
  472. x, err := mv.Xml(rootTag...)
  473. if err != nil {
  474. return x, err
  475. }
  476. _, err = xmlWriter.Write(x)
  477. return x, err
  478. }
  479. */
  480. // XmlIndentWriter writes the MapSeq value as pretty XML on the Writer.
  481. // See MapSeq.Xml() for encoding rules.
  482. func (mv MapSeq) XmlIndentWriter(xmlWriter io.Writer, prefix, indent string, rootTag ...string) error {
  483. x, err := mv.XmlIndent(prefix, indent, rootTag...)
  484. if err != nil {
  485. return err
  486. }
  487. _, err = xmlWriter.Write(x)
  488. return err
  489. }
  490. // XmlIndentWriterRaw writes the Map as pretty XML on the Writer. []byte is the raw XML that was written.
  491. // See Map.XmlSeq() for encoding rules.
  492. /*
  493. func (mv MapSeq) XmlIndentWriterRaw(xmlWriter io.Writer, prefix, indent string, rootTag ...string) ([]byte, error) {
  494. x, err := mv.XmlSeqIndent(prefix, indent, rootTag...)
  495. if err != nil {
  496. return x, err
  497. }
  498. _, err = xmlWriter.Write(x)
  499. return x, err
  500. }
  501. */
  502. // -------------------- END: mv.Xml & mv.XmlWriter -------------------------------
  503. // ---------------------- XmlSeqIndent ----------------------------
  504. // XmlIndent encodes a map[string]interface{} as a pretty XML string.
  505. // See MapSeq.XmlSeq() for encoding rules.
  506. func (mv MapSeq) XmlIndent(prefix, indent string, rootTag ...string) ([]byte, error) {
  507. m := map[string]interface{}(mv)
  508. var err error
  509. s := new(string)
  510. p := new(pretty)
  511. p.indent = indent
  512. p.padding = prefix
  513. if len(m) == 1 && len(rootTag) == 0 {
  514. // this can extract the key for the single map element
  515. // use it if it isn't a key for a list
  516. for key, value := range m {
  517. if _, ok := value.([]interface{}); ok {
  518. err = mapToXmlSeqIndent(true, s, DefaultRootTag, m, p)
  519. } else {
  520. err = mapToXmlSeqIndent(true, s, key, value, p)
  521. }
  522. }
  523. } else if len(rootTag) == 1 {
  524. err = mapToXmlSeqIndent(true, s, rootTag[0], m, p)
  525. } else {
  526. err = mapToXmlSeqIndent(true, s, DefaultRootTag, m, p)
  527. }
  528. if xmlCheckIsValid {
  529. if _, err = NewMapXml([]byte(*s)); err != nil {
  530. return nil, err
  531. }
  532. d := xml.NewDecoder(bytes.NewReader([]byte(*s)))
  533. for {
  534. _, err = d.Token()
  535. if err == io.EOF {
  536. err = nil
  537. break
  538. } else if err != nil {
  539. return nil, err
  540. }
  541. }
  542. }
  543. return []byte(*s), err
  544. }
  545. // where the work actually happens
  546. // returns an error if an attribute is not atomic
  547. func mapToXmlSeqIndent(doIndent bool, s *string, key string, value interface{}, pp *pretty) error {
  548. var endTag bool
  549. var isSimple bool
  550. var noEndTag bool
  551. var elen int
  552. var ss string
  553. p := &pretty{pp.indent, pp.cnt, pp.padding, pp.mapDepth, pp.start}
  554. switch value.(type) {
  555. case map[string]interface{}, []byte, string, float64, bool, int, int32, int64, float32:
  556. if doIndent {
  557. *s += p.padding
  558. }
  559. if key != commentK && key != directiveK && key != procinstK {
  560. *s += `<` + key
  561. }
  562. }
  563. switch value.(type) {
  564. case map[string]interface{}:
  565. val := value.(map[string]interface{})
  566. if key == commentK {
  567. *s += `<!--` + val[textK].(string) + `-->`
  568. noEndTag = true
  569. break
  570. }
  571. if key == directiveK {
  572. *s += `<!` + val[textK].(string) + `>`
  573. noEndTag = true
  574. break
  575. }
  576. if key == procinstK {
  577. *s += `<?` + val[targetK].(string) + ` ` + val[instK].(string) + `?>`
  578. noEndTag = true
  579. break
  580. }
  581. haveAttrs := false
  582. // process attributes first
  583. if v, ok := val[attrK].(map[string]interface{}); ok {
  584. // First, unroll the map[string]interface{} into a []keyval array.
  585. // Then sequence it.
  586. kv := make([]keyval, len(v))
  587. n := 0
  588. for ak, av := range v {
  589. kv[n] = keyval{ak, av}
  590. n++
  591. }
  592. sort.Sort(elemListSeq(kv))
  593. // Now encode the attributes in original decoding sequence, using keyval array.
  594. for _, a := range kv {
  595. vv := a.v.(map[string]interface{})
  596. switch vv[textK].(type) {
  597. case string:
  598. if xmlEscapeChars {
  599. ss = escapeChars(vv[textK].(string))
  600. } else {
  601. ss = vv[textK].(string)
  602. }
  603. *s += ` ` + a.k + `="` + ss + `"`
  604. case float64, bool, int, int32, int64, float32:
  605. *s += ` ` + a.k + `="` + fmt.Sprintf("%v", vv[textK]) + `"`
  606. case []byte:
  607. if xmlEscapeChars {
  608. ss = escapeChars(string(vv[textK].([]byte)))
  609. } else {
  610. ss = string(vv[textK].([]byte))
  611. }
  612. *s += ` ` + a.k + `="` + ss + `"`
  613. default:
  614. return fmt.Errorf("invalid attribute value for: %s", a.k)
  615. }
  616. }
  617. haveAttrs = true
  618. }
  619. // simple element?
  620. // every map value has, at least, "#seq" and, perhaps, "#text" and/or "#attr"
  621. _, seqOK := val[seqK] // have key
  622. if v, ok := val[textK]; ok && ((len(val) == 3 && haveAttrs) || (len(val) == 2 && !haveAttrs)) && seqOK {
  623. if stmp, ok := v.(string); ok && stmp != "" {
  624. if xmlEscapeChars {
  625. stmp = escapeChars(stmp)
  626. }
  627. *s += ">" + stmp
  628. endTag = true
  629. elen = 1
  630. }
  631. isSimple = true
  632. break
  633. } else if !ok && ((len(val) == 2 && haveAttrs) || (len(val) == 1 && !haveAttrs)) && seqOK {
  634. // here no #text but have #seq or #seq+#attr
  635. endTag = false
  636. break
  637. }
  638. // we now need to sequence everything except attributes
  639. // 'kv' will hold everything that needs to be written
  640. kv := make([]keyval, 0)
  641. for k, v := range val {
  642. if k == attrK { // already processed
  643. continue
  644. }
  645. if k == seqK { // ignore - just for sorting
  646. continue
  647. }
  648. switch v.(type) {
  649. case []interface{}:
  650. // unwind the array as separate entries
  651. for _, vv := range v.([]interface{}) {
  652. kv = append(kv, keyval{k, vv})
  653. }
  654. default:
  655. kv = append(kv, keyval{k, v})
  656. }
  657. }
  658. // close tag with possible attributes
  659. *s += ">"
  660. if doIndent {
  661. *s += "\n"
  662. }
  663. // something more complex
  664. p.mapDepth++
  665. sort.Sort(elemListSeq(kv))
  666. i := 0
  667. for _, v := range kv {
  668. switch v.v.(type) {
  669. case []interface{}:
  670. default:
  671. if i == 0 && doIndent {
  672. p.Indent()
  673. }
  674. }
  675. i++
  676. if err := mapToXmlSeqIndent(doIndent, s, v.k, v.v, p); err != nil {
  677. return err
  678. }
  679. switch v.v.(type) {
  680. case []interface{}: // handled in []interface{} case
  681. default:
  682. if doIndent {
  683. p.Outdent()
  684. }
  685. }
  686. i--
  687. }
  688. p.mapDepth--
  689. endTag = true
  690. elen = 1 // we do have some content other than attrs
  691. case []interface{}:
  692. for _, v := range value.([]interface{}) {
  693. if doIndent {
  694. p.Indent()
  695. }
  696. if err := mapToXmlSeqIndent(doIndent, s, key, v, p); err != nil {
  697. return err
  698. }
  699. if doIndent {
  700. p.Outdent()
  701. }
  702. }
  703. return nil
  704. case nil:
  705. // terminate the tag
  706. if doIndent {
  707. *s += p.padding
  708. }
  709. *s += "<" + key
  710. endTag, isSimple = true, true
  711. break
  712. default: // handle anything - even goofy stuff
  713. elen = 0
  714. switch value.(type) {
  715. case string:
  716. if xmlEscapeChars {
  717. ss = escapeChars(value.(string))
  718. } else {
  719. ss = value.(string)
  720. }
  721. elen = len(ss)
  722. if elen > 0 {
  723. *s += ">" + ss
  724. }
  725. case float64, bool, int, int32, int64, float32:
  726. v := fmt.Sprintf("%v", value)
  727. elen = len(v)
  728. if elen > 0 {
  729. *s += ">" + v
  730. }
  731. case []byte: // NOTE: byte is just an alias for uint8
  732. // similar to how xml.Marshal handles []byte structure members
  733. if xmlEscapeChars {
  734. ss = escapeChars(string(value.([]byte)))
  735. } else {
  736. ss = string(value.([]byte))
  737. }
  738. elen = len(ss)
  739. if elen > 0 {
  740. *s += ">" + ss
  741. }
  742. default:
  743. var v []byte
  744. var err error
  745. if doIndent {
  746. v, err = xml.MarshalIndent(value, p.padding, p.indent)
  747. } else {
  748. v, err = xml.Marshal(value)
  749. }
  750. if err != nil {
  751. *s += ">UNKNOWN"
  752. } else {
  753. elen = len(v)
  754. if elen > 0 {
  755. *s += string(v)
  756. }
  757. }
  758. }
  759. isSimple = true
  760. endTag = true
  761. }
  762. if endTag && !noEndTag {
  763. if doIndent {
  764. if !isSimple {
  765. *s += p.padding
  766. }
  767. }
  768. switch value.(type) {
  769. case map[string]interface{}, []byte, string, float64, bool, int, int32, int64, float32:
  770. if elen > 0 || useGoXmlEmptyElemSyntax {
  771. if elen == 0 {
  772. *s += ">"
  773. }
  774. *s += `</` + key + ">"
  775. } else {
  776. *s += `/>`
  777. }
  778. }
  779. } else if !noEndTag {
  780. if useGoXmlEmptyElemSyntax {
  781. *s += `</` + key + ">"
  782. // *s += "></" + key + ">"
  783. } else {
  784. *s += "/>"
  785. }
  786. }
  787. if doIndent {
  788. if p.cnt > p.start {
  789. *s += "\n"
  790. }
  791. p.Outdent()
  792. }
  793. return nil
  794. }
  795. // the element sort implementation
  796. type keyval struct {
  797. k string
  798. v interface{}
  799. }
  800. type elemListSeq []keyval
  801. func (e elemListSeq) Len() int {
  802. return len(e)
  803. }
  804. func (e elemListSeq) Swap(i, j int) {
  805. e[i], e[j] = e[j], e[i]
  806. }
  807. func (e elemListSeq) Less(i, j int) bool {
  808. var iseq, jseq int
  809. var fiseq, fjseq float64
  810. var ok bool
  811. if iseq, ok = e[i].v.(map[string]interface{})[seqK].(int); !ok {
  812. if fiseq, ok = e[i].v.(map[string]interface{})[seqK].(float64); ok {
  813. iseq = int(fiseq)
  814. } else {
  815. iseq = 9999999
  816. }
  817. }
  818. if jseq, ok = e[j].v.(map[string]interface{})[seqK].(int); !ok {
  819. if fjseq, ok = e[j].v.(map[string]interface{})[seqK].(float64); ok {
  820. jseq = int(fjseq)
  821. } else {
  822. jseq = 9999999
  823. }
  824. }
  825. return iseq <= jseq
  826. }
  827. // =============== https://groups.google.com/forum/#!topic/golang-nuts/lHPOHD-8qio
  828. // BeautifyXml (re)formats an XML doc similar to Map.XmlIndent().
  829. // It preserves comments, directives and process instructions,
  830. func BeautifyXml(b []byte, prefix, indent string) ([]byte, error) {
  831. x, err := NewMapXmlSeq(b)
  832. if err != nil {
  833. return nil, err
  834. }
  835. return x.XmlIndent(prefix, indent)
  836. }