id.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392
  1. // Package xid is a globally unique id generator suited for web scale
  2. //
  3. // Xid is using Mongo Object ID algorithm to generate globally unique ids:
  4. // https://docs.mongodb.org/manual/reference/object-id/
  5. //
  6. // - 4-byte value representing the seconds since the Unix epoch,
  7. // - 3-byte machine identifier,
  8. // - 2-byte process id, and
  9. // - 3-byte counter, starting with a random value.
  10. //
  11. // The binary representation of the id is compatible with Mongo 12 bytes Object IDs.
  12. // The string representation is using base32 hex (w/o padding) for better space efficiency
  13. // when stored in that form (20 bytes). The hex variant of base32 is used to retain the
  14. // sortable property of the id.
  15. //
  16. // Xid doesn't use base64 because case sensitivity and the 2 non alphanum chars may be an
  17. // issue when transported as a string between various systems. Base36 wasn't retained either
  18. // because 1/ it's not standard 2/ the resulting size is not predictable (not bit aligned)
  19. // and 3/ it would not remain sortable. To validate a base32 `xid`, expect a 20 chars long,
  20. // all lowercase sequence of `a` to `v` letters and `0` to `9` numbers (`[0-9a-v]{20}`).
  21. //
  22. // UUID is 16 bytes (128 bits), snowflake is 8 bytes (64 bits), xid stands in between
  23. // with 12 bytes with a more compact string representation ready for the web and no
  24. // required configuration or central generation server.
  25. //
  26. // Features:
  27. //
  28. // - Size: 12 bytes (96 bits), smaller than UUID, larger than snowflake
  29. // - Base32 hex encoded by default (16 bytes storage when transported as printable string)
  30. // - Non configured, you don't need set a unique machine and/or data center id
  31. // - K-ordered
  32. // - Embedded time with 1 second precision
  33. // - Unicity guaranteed for 16,777,216 (24 bits) unique ids per second and per host/process
  34. //
  35. // Best used with xlog's RequestIDHandler (https://godoc.org/github.com/rs/xlog#RequestIDHandler).
  36. //
  37. // References:
  38. //
  39. // - http://www.slideshare.net/davegardnerisme/unique-id-generation-in-distributed-systems
  40. // - https://en.wikipedia.org/wiki/Universally_unique_identifier
  41. // - https://blog.twitter.com/2010/announcing-snowflake
  42. package xid
  43. import (
  44. "bytes"
  45. "crypto/md5"
  46. "crypto/rand"
  47. "database/sql/driver"
  48. "encoding/binary"
  49. "fmt"
  50. "hash/crc32"
  51. "io/ioutil"
  52. "os"
  53. "sort"
  54. "sync/atomic"
  55. "time"
  56. "unsafe"
  57. )
  58. // Code inspired from mgo/bson ObjectId
  59. // ID represents a unique request id
  60. type ID [rawLen]byte
  61. const (
  62. encodedLen = 20 // string encoded len
  63. rawLen = 12 // binary raw len
  64. // encoding stores a custom version of the base32 encoding with lower case
  65. // letters.
  66. encoding = "0123456789abcdefghijklmnopqrstuv"
  67. )
  68. var (
  69. // objectIDCounter is atomically incremented when generating a new ObjectId
  70. // using NewObjectId() function. It's used as a counter part of an id.
  71. // This id is initialized with a random value.
  72. objectIDCounter = randInt()
  73. // machineId stores machine id generated once and used in subsequent calls
  74. // to NewObjectId function.
  75. machineID = readMachineID()
  76. // pid stores the current process id
  77. pid = os.Getpid()
  78. nilID ID
  79. // dec is the decoding map for base32 encoding
  80. dec [256]byte
  81. )
  82. func init() {
  83. for i := 0; i < len(dec); i++ {
  84. dec[i] = 0xFF
  85. }
  86. for i := 0; i < len(encoding); i++ {
  87. dec[encoding[i]] = byte(i)
  88. }
  89. // If /proc/self/cpuset exists and is not /, we can assume that we are in a
  90. // form of container and use the content of cpuset xor-ed with the PID in
  91. // order get a reasonable machine global unique PID.
  92. b, err := ioutil.ReadFile("/proc/self/cpuset")
  93. if err == nil && len(b) > 1 {
  94. pid ^= int(crc32.ChecksumIEEE(b))
  95. }
  96. }
  97. // readMachineId generates machine id and puts it into the machineId global
  98. // variable. If this function fails to get the hostname, it will cause
  99. // a runtime error.
  100. func readMachineID() []byte {
  101. id := make([]byte, 3)
  102. hid, err := readPlatformMachineID()
  103. if err != nil || len(hid) == 0 {
  104. hid, err = os.Hostname()
  105. }
  106. if err == nil && len(hid) != 0 {
  107. hw := md5.New()
  108. hw.Write([]byte(hid))
  109. copy(id, hw.Sum(nil))
  110. } else {
  111. // Fallback to rand number if machine id can't be gathered
  112. if _, randErr := rand.Reader.Read(id); randErr != nil {
  113. panic(fmt.Errorf("xid: cannot get hostname nor generate a random number: %v; %v", err, randErr))
  114. }
  115. }
  116. return id
  117. }
  118. // randInt generates a random uint32
  119. func randInt() uint32 {
  120. b := make([]byte, 3)
  121. if _, err := rand.Reader.Read(b); err != nil {
  122. panic(fmt.Errorf("xid: cannot generate random number: %v;", err))
  123. }
  124. return uint32(b[0])<<16 | uint32(b[1])<<8 | uint32(b[2])
  125. }
  126. // New generates a globally unique ID
  127. func New() ID {
  128. return NewWithTime(time.Now())
  129. }
  130. // NewWithTime generates a globally unique ID with the passed in time
  131. func NewWithTime(t time.Time) ID {
  132. var id ID
  133. // Timestamp, 4 bytes, big endian
  134. binary.BigEndian.PutUint32(id[:], uint32(t.Unix()))
  135. // Machine, first 3 bytes of md5(hostname)
  136. id[4] = machineID[0]
  137. id[5] = machineID[1]
  138. id[6] = machineID[2]
  139. // Pid, 2 bytes, specs don't specify endianness, but we use big endian.
  140. id[7] = byte(pid >> 8)
  141. id[8] = byte(pid)
  142. // Increment, 3 bytes, big endian
  143. i := atomic.AddUint32(&objectIDCounter, 1)
  144. id[9] = byte(i >> 16)
  145. id[10] = byte(i >> 8)
  146. id[11] = byte(i)
  147. return id
  148. }
  149. // FromString reads an ID from its string representation
  150. func FromString(id string) (ID, error) {
  151. i := &ID{}
  152. err := i.UnmarshalText([]byte(id))
  153. return *i, err
  154. }
  155. // String returns a base32 hex lowercased with no padding representation of the id (char set is 0-9, a-v).
  156. func (id ID) String() string {
  157. text := make([]byte, encodedLen)
  158. encode(text, id[:])
  159. return *(*string)(unsafe.Pointer(&text))
  160. }
  161. // Encode encodes the id using base32 encoding, writing 20 bytes to dst and return it.
  162. func (id ID) Encode(dst []byte) []byte {
  163. encode(dst, id[:])
  164. return dst
  165. }
  166. // MarshalText implements encoding/text TextMarshaler interface
  167. func (id ID) MarshalText() ([]byte, error) {
  168. text := make([]byte, encodedLen)
  169. encode(text, id[:])
  170. return text, nil
  171. }
  172. // MarshalJSON implements encoding/json Marshaler interface
  173. func (id ID) MarshalJSON() ([]byte, error) {
  174. if id.IsNil() {
  175. return []byte("null"), nil
  176. }
  177. text := make([]byte, encodedLen+2)
  178. encode(text[1:encodedLen+1], id[:])
  179. text[0], text[encodedLen+1] = '"', '"'
  180. return text, nil
  181. }
  182. // encode by unrolling the stdlib base32 algorithm + removing all safe checks
  183. func encode(dst, id []byte) {
  184. _ = dst[19]
  185. _ = id[11]
  186. dst[19] = encoding[(id[11]<<4)&0x1F]
  187. dst[18] = encoding[(id[11]>>1)&0x1F]
  188. dst[17] = encoding[(id[11]>>6)&0x1F|(id[10]<<2)&0x1F]
  189. dst[16] = encoding[id[10]>>3]
  190. dst[15] = encoding[id[9]&0x1F]
  191. dst[14] = encoding[(id[9]>>5)|(id[8]<<3)&0x1F]
  192. dst[13] = encoding[(id[8]>>2)&0x1F]
  193. dst[12] = encoding[id[8]>>7|(id[7]<<1)&0x1F]
  194. dst[11] = encoding[(id[7]>>4)&0x1F|(id[6]<<4)&0x1F]
  195. dst[10] = encoding[(id[6]>>1)&0x1F]
  196. dst[9] = encoding[(id[6]>>6)&0x1F|(id[5]<<2)&0x1F]
  197. dst[8] = encoding[id[5]>>3]
  198. dst[7] = encoding[id[4]&0x1F]
  199. dst[6] = encoding[id[4]>>5|(id[3]<<3)&0x1F]
  200. dst[5] = encoding[(id[3]>>2)&0x1F]
  201. dst[4] = encoding[id[3]>>7|(id[2]<<1)&0x1F]
  202. dst[3] = encoding[(id[2]>>4)&0x1F|(id[1]<<4)&0x1F]
  203. dst[2] = encoding[(id[1]>>1)&0x1F]
  204. dst[1] = encoding[(id[1]>>6)&0x1F|(id[0]<<2)&0x1F]
  205. dst[0] = encoding[id[0]>>3]
  206. }
  207. // UnmarshalText implements encoding/text TextUnmarshaler interface
  208. func (id *ID) UnmarshalText(text []byte) error {
  209. if len(text) != encodedLen {
  210. return ErrInvalidID
  211. }
  212. for _, c := range text {
  213. if dec[c] == 0xFF {
  214. return ErrInvalidID
  215. }
  216. }
  217. if !decode(id, text) {
  218. return ErrInvalidID
  219. }
  220. return nil
  221. }
  222. // UnmarshalJSON implements encoding/json Unmarshaler interface
  223. func (id *ID) UnmarshalJSON(b []byte) error {
  224. s := string(b)
  225. if s == "null" {
  226. *id = nilID
  227. return nil
  228. }
  229. // Check the slice length to prevent panic on passing it to UnmarshalText()
  230. if len(b) < 2 {
  231. return ErrInvalidID
  232. }
  233. return id.UnmarshalText(b[1 : len(b)-1])
  234. }
  235. // decode by unrolling the stdlib base32 algorithm + customized safe check.
  236. func decode(id *ID, src []byte) bool {
  237. _ = src[19]
  238. _ = id[11]
  239. id[11] = dec[src[17]]<<6 | dec[src[18]]<<1 | dec[src[19]]>>4
  240. id[10] = dec[src[16]]<<3 | dec[src[17]]>>2
  241. id[9] = dec[src[14]]<<5 | dec[src[15]]
  242. id[8] = dec[src[12]]<<7 | dec[src[13]]<<2 | dec[src[14]]>>3
  243. id[7] = dec[src[11]]<<4 | dec[src[12]]>>1
  244. id[6] = dec[src[9]]<<6 | dec[src[10]]<<1 | dec[src[11]]>>4
  245. id[5] = dec[src[8]]<<3 | dec[src[9]]>>2
  246. id[4] = dec[src[6]]<<5 | dec[src[7]]
  247. id[3] = dec[src[4]]<<7 | dec[src[5]]<<2 | dec[src[6]]>>3
  248. id[2] = dec[src[3]]<<4 | dec[src[4]]>>1
  249. id[1] = dec[src[1]]<<6 | dec[src[2]]<<1 | dec[src[3]]>>4
  250. id[0] = dec[src[0]]<<3 | dec[src[1]]>>2
  251. // Validate that there are no discarer bits (padding) in src that would
  252. // cause the string-encoded id not to equal src.
  253. var check [4]byte
  254. check[3] = encoding[(id[11]<<4)&0x1F]
  255. check[2] = encoding[(id[11]>>1)&0x1F]
  256. check[1] = encoding[(id[11]>>6)&0x1F|(id[10]<<2)&0x1F]
  257. check[0] = encoding[id[10]>>3]
  258. return bytes.Equal([]byte(src[16:20]), check[:])
  259. }
  260. // Time returns the timestamp part of the id.
  261. // It's a runtime error to call this method with an invalid id.
  262. func (id ID) Time() time.Time {
  263. // First 4 bytes of ObjectId is 32-bit big-endian seconds from epoch.
  264. secs := int64(binary.BigEndian.Uint32(id[0:4]))
  265. return time.Unix(secs, 0)
  266. }
  267. // Machine returns the 3-byte machine id part of the id.
  268. // It's a runtime error to call this method with an invalid id.
  269. func (id ID) Machine() []byte {
  270. return id[4:7]
  271. }
  272. // Pid returns the process id part of the id.
  273. // It's a runtime error to call this method with an invalid id.
  274. func (id ID) Pid() uint16 {
  275. return binary.BigEndian.Uint16(id[7:9])
  276. }
  277. // Counter returns the incrementing value part of the id.
  278. // It's a runtime error to call this method with an invalid id.
  279. func (id ID) Counter() int32 {
  280. b := id[9:12]
  281. // Counter is stored as big-endian 3-byte value
  282. return int32(uint32(b[0])<<16 | uint32(b[1])<<8 | uint32(b[2]))
  283. }
  284. // Value implements the driver.Valuer interface.
  285. func (id ID) Value() (driver.Value, error) {
  286. if id.IsNil() {
  287. return nil, nil
  288. }
  289. b, err := id.MarshalText()
  290. return string(b), err
  291. }
  292. // Scan implements the sql.Scanner interface.
  293. func (id *ID) Scan(value interface{}) (err error) {
  294. switch val := value.(type) {
  295. case string:
  296. return id.UnmarshalText([]byte(val))
  297. case []byte:
  298. return id.UnmarshalText(val)
  299. case nil:
  300. *id = nilID
  301. return nil
  302. default:
  303. return fmt.Errorf("xid: scanning unsupported type: %T", value)
  304. }
  305. }
  306. // IsNil Returns true if this is a "nil" ID
  307. func (id ID) IsNil() bool {
  308. return id == nilID
  309. }
  310. // NilID returns a zero value for `xid.ID`.
  311. func NilID() ID {
  312. return nilID
  313. }
  314. // Bytes returns the byte array representation of `ID`
  315. func (id ID) Bytes() []byte {
  316. return id[:]
  317. }
  318. // FromBytes convert the byte array representation of `ID` back to `ID`
  319. func FromBytes(b []byte) (ID, error) {
  320. var id ID
  321. if len(b) != rawLen {
  322. return id, ErrInvalidID
  323. }
  324. copy(id[:], b)
  325. return id, nil
  326. }
  327. // Compare returns an integer comparing two IDs. It behaves just like `bytes.Compare`.
  328. // The result will be 0 if two IDs are identical, -1 if current id is less than the other one,
  329. // and 1 if current id is greater than the other.
  330. func (id ID) Compare(other ID) int {
  331. return bytes.Compare(id[:], other[:])
  332. }
  333. type sorter []ID
  334. func (s sorter) Len() int {
  335. return len(s)
  336. }
  337. func (s sorter) Less(i, j int) bool {
  338. return s[i].Compare(s[j]) < 0
  339. }
  340. func (s sorter) Swap(i, j int) {
  341. s[i], s[j] = s[j], s[i]
  342. }
  343. // Sort sorts an array of IDs inplace.
  344. // It works by wrapping `[]ID` and use `sort.Sort`.
  345. func Sort(ids []ID) {
  346. sort.Sort(sorter(ids))
  347. }