gcharset.go 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. // Copyright GoFrame Author(https://goframe.org). All Rights Reserved.
  2. //
  3. // This Source Code Form is subject to the terms of the MIT License.
  4. // If a copy of the MIT was not distributed with this file,
  5. // You can obtain one at https://github.com/gogf/gf.
  6. // Package charset implements character-set conversion functionality.
  7. //
  8. // Supported Character Set:
  9. //
  10. // Chinese : GBK/GB18030/GB2312/Big5
  11. //
  12. // Japanese: EUCJP/ISO2022JP/ShiftJIS
  13. //
  14. // Korean : EUCKR
  15. //
  16. // Unicode : UTF-8/UTF-16/UTF-16BE/UTF-16LE
  17. //
  18. // Other : macintosh/IBM*/Windows*/ISO-*
  19. package gcharset
  20. import (
  21. "bytes"
  22. "github.com/gogf/gf/errors/gcode"
  23. "github.com/gogf/gf/errors/gerror"
  24. "io/ioutil"
  25. "golang.org/x/text/encoding"
  26. "golang.org/x/text/encoding/ianaindex"
  27. "golang.org/x/text/transform"
  28. )
  29. var (
  30. // Alias for charsets.
  31. charsetAlias = map[string]string{
  32. "HZGB2312": "HZ-GB-2312",
  33. "hzgb2312": "HZ-GB-2312",
  34. "GB2312": "HZ-GB-2312",
  35. "gb2312": "HZ-GB-2312",
  36. }
  37. )
  38. // Supported returns whether charset <charset> is supported.
  39. func Supported(charset string) bool {
  40. return getEncoding(charset) != nil
  41. }
  42. // Convert converts <src> charset encoding from <srcCharset> to <dstCharset>,
  43. // and returns the converted string.
  44. // It returns <src> as <dst> if it fails converting.
  45. func Convert(dstCharset string, srcCharset string, src string) (dst string, err error) {
  46. if dstCharset == srcCharset {
  47. return src, nil
  48. }
  49. dst = src
  50. // Converting <src> to UTF-8.
  51. if srcCharset != "UTF-8" {
  52. if e := getEncoding(srcCharset); e != nil {
  53. tmp, err := ioutil.ReadAll(
  54. transform.NewReader(bytes.NewReader([]byte(src)), e.NewDecoder()),
  55. )
  56. if err != nil {
  57. return "", gerror.WrapCodef(gcode.CodeInternalError, err, "%s to utf8 failed", srcCharset)
  58. }
  59. src = string(tmp)
  60. } else {
  61. return dst, gerror.NewCodef(gcode.CodeInvalidParameter, "unsupported srcCharset: %s", srcCharset)
  62. }
  63. }
  64. // Do the converting from UTF-8 to <dstCharset>.
  65. if dstCharset != "UTF-8" {
  66. if e := getEncoding(dstCharset); e != nil {
  67. tmp, err := ioutil.ReadAll(
  68. transform.NewReader(bytes.NewReader([]byte(src)), e.NewEncoder()),
  69. )
  70. if err != nil {
  71. return "", gerror.WrapCodef(gcode.CodeInternalError, err, "utf to %s failed", dstCharset)
  72. }
  73. dst = string(tmp)
  74. } else {
  75. return dst, gerror.NewCodef(gcode.CodeInvalidParameter, "unsupported dstCharset: %s", dstCharset)
  76. }
  77. } else {
  78. dst = src
  79. }
  80. return dst, nil
  81. }
  82. // ToUTF8 converts <src> charset encoding from <srcCharset> to UTF-8 ,
  83. // and returns the converted string.
  84. func ToUTF8(srcCharset string, src string) (dst string, err error) {
  85. return Convert("UTF-8", srcCharset, src)
  86. }
  87. // UTF8To converts <src> charset encoding from UTF-8 to <dstCharset>,
  88. // and returns the converted string.
  89. func UTF8To(dstCharset string, src string) (dst string, err error) {
  90. return Convert(dstCharset, "UTF-8", src)
  91. }
  92. // getEncoding returns the encoding.Encoding interface object for <charset>.
  93. // It returns nil if <charset> is not supported.
  94. func getEncoding(charset string) encoding.Encoding {
  95. if c, ok := charsetAlias[charset]; ok {
  96. charset = c
  97. }
  98. if e, err := ianaindex.MIB.Encoding(charset); err == nil && e != nil {
  99. return e
  100. }
  101. return nil
  102. }