_packer.pyx 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289
  1. #coding: utf-8
  2. #cython: embedsignature=True
  3. from cpython cimport *
  4. from libc.stdlib cimport malloc, free
  5. from libc.string cimport *
  6. from libc.limits cimport *
  7. from libc.stdint cimport *
  8. from erlpack.types import Atom
  9. cdef int DEFAULT_RECURSE_LIMIT = 256
  10. cdef size_t BIG_BUF_SIZE = 1024 * 1024 * 2
  11. cdef size_t INITIAL_BUFFER_SIZE = 1024 * 1024
  12. cdef size_t MAX_SIZE = (2 ** 32) - 1;
  13. cdef extern from "../../cpp/encoder.h":
  14. struct erlpack_buffer:
  15. char*buf
  16. size_t length
  17. size_t allocated_size
  18. int erlpack_append_int(erlpack_buffer *pk, int d)
  19. int erlpack_append_nil(erlpack_buffer *pk)
  20. int erlpack_append_true(erlpack_buffer *pk)
  21. int erlpack_append_false(erlpack_buffer *pk)
  22. int erlpack_append_version(erlpack_buffer *pk)
  23. int erlpack_append_small_integer(erlpack_buffer *pk, unsigned char d)
  24. int erlpack_append_integer(erlpack_buffer *pk, int32_t d)
  25. int erlpack_append_unsigned_long_long(erlpack_buffer *pk, unsigned long long d)
  26. int erlpack_append_long_long(erlpack_buffer *pk, long long d)
  27. int erlpack_append_double(erlpack_buffer *pk, double f)
  28. int erlpack_append_atom(erlpack_buffer *pk, const char *bytes, unsigned int size)
  29. int erlpack_append_binary(erlpack_buffer *pk, const char *bytes, unsigned int size)
  30. int erlpack_append_string(erlpack_buffer *pk, const char *bytes, unsigned int size)
  31. int erlpack_append_tuple_header(erlpack_buffer *pk, size_t size)
  32. int erlpack_append_nil_ext(erlpack_buffer *pk)
  33. int erlpack_append_list_header(erlpack_buffer *pk, size_t size)
  34. int erlpack_append_map_header(erlpack_buffer *pk, size_t size)
  35. class EncodingError(Exception):
  36. pass
  37. cdef class ErlangTermEncoder(object):
  38. cdef erlpack_buffer pk
  39. cdef char*_encoding
  40. cdef char*_unicode_errors
  41. cdef object _unicode_type
  42. cdef object _encode_hook
  43. def __cinit__(self):
  44. self.pk.buf = NULL
  45. def __init__(self, encoding='utf-8', unicode_errors='strict', unicode_type='binary', encode_hook=None):
  46. cdef object _encoding
  47. cdef object _unicode_errors
  48. if encoding is None:
  49. self._encoding = NULL
  50. self._unicode_errors = NULL
  51. else:
  52. if isinstance(encoding, unicode):
  53. _encoding = encoding.encode('ascii')
  54. else:
  55. _encoding = encoding
  56. if isinstance(unicode_errors, unicode):
  57. _unicode_errors = unicode_errors.encode('ascii')
  58. else:
  59. _unicode_errors = unicode_errors
  60. self._encoding = PyString_AsString(_encoding)
  61. self._unicode_errors = PyString_AsString(_unicode_errors)
  62. self._unicode_type = unicode_type
  63. self._encode_hook = encode_hook
  64. cdef _ensure_buf(self):
  65. """
  66. Ensures that a buffer is available to be written to when serializing data.
  67. If there is no buffer, allocate one sized to `INITIAL_BUFFER_SIZE`. If allocation
  68. fails, raise a MemoryError.
  69. """
  70. if self.pk.buf != NULL:
  71. self.pk.length = 0
  72. else:
  73. self.pk.buf = <char*> malloc(INITIAL_BUFFER_SIZE)
  74. if self.pk.buf == NULL:
  75. raise MemoryError('Unable to allocate buffer')
  76. self.pk.allocated_size = INITIAL_BUFFER_SIZE
  77. self.pk.length = 0
  78. cdef _free_big_buf(self):
  79. """
  80. If the buffer is larger than `BIG_BUF_SIZE`, free it, so that packing large data does not hold onto
  81. the big buffer after the serialization is complete.
  82. """
  83. if self.pk.allocated_size >= BIG_BUF_SIZE:
  84. free(self.pk.buf)
  85. self.pk.buf = NULL
  86. self.pk.length = 0
  87. self.pk.allocated_size = 0
  88. def __dealloc__(self):
  89. if self.pk.buf != NULL:
  90. free(self.pk.buf)
  91. cdef int _pack(self, object o, int nest_limit=DEFAULT_RECURSE_LIMIT) except -1:
  92. cdef int ret
  93. cdef long long llval
  94. cdef unsigned long long ullval
  95. cdef long longval
  96. cdef double doubleval
  97. cdef size_t sizeval
  98. cdef dict d
  99. cdef object obj
  100. if nest_limit < 0:
  101. raise EncodingError('Exceeded recursion limit')
  102. if o is None:
  103. ret = erlpack_append_nil(&self.pk)
  104. elif o is True:
  105. ret = erlpack_append_true(&self.pk)
  106. elif o is False:
  107. ret = erlpack_append_false(&self.pk)
  108. elif PyLong_Check(o) or PyInt_Check(o):
  109. if 0 <= o <= 255:
  110. ret = erlpack_append_small_integer(&self.pk, <unsigned char> o)
  111. elif -2147483648 <= o <= 2147483647:
  112. ret = erlpack_append_integer(&self.pk, <int32_t> o)
  113. else:
  114. if o > 0:
  115. ullval = o
  116. ret = erlpack_append_unsigned_long_long(&self.pk, ullval)
  117. else:
  118. llval = o
  119. ret = erlpack_append_long_long(&self.pk, llval)
  120. elif PyFloat_Check(o):
  121. doubleval = o
  122. ret = erlpack_append_double(&self.pk, doubleval)
  123. elif PyObject_IsInstance(o, Atom):
  124. val = str(o)
  125. ret = erlpack_append_atom(&self.pk, PyString_AS_STRING(val), PyString_GET_SIZE(val))
  126. elif PyString_Check(o):
  127. ret = erlpack_append_binary(&self.pk, PyString_AS_STRING(o), PyString_GET_SIZE(o))
  128. elif PyUnicode_Check(o):
  129. ret = self._encode_unicode(o)
  130. elif PyTuple_Check(o):
  131. sizeval = PyTuple_Size(o)
  132. if sizeval > MAX_SIZE:
  133. raise ValueError('tuple is too large')
  134. ret = erlpack_append_tuple_header(&self.pk, sizeval)
  135. if ret != 0:
  136. return ret
  137. for item in o:
  138. ret = self._pack(item, nest_limit - 1)
  139. if ret != 0:
  140. return ret
  141. elif PyList_Check(o):
  142. sizeval = PyList_Size(o)
  143. if sizeval == 0:
  144. ret = erlpack_append_nil_ext(&self.pk)
  145. else:
  146. if sizeval > MAX_SIZE:
  147. raise ValueError("list is too large")
  148. ret = erlpack_append_list_header(&self.pk, sizeval)
  149. if ret != 0:
  150. return ret
  151. for item in o:
  152. ret = self._pack(item, nest_limit - 1)
  153. if ret != 0:
  154. return ret
  155. ret = erlpack_append_nil_ext(&self.pk)
  156. elif PyDict_CheckExact(o):
  157. d = <dict> o
  158. sizeval = PyDict_Size(d)
  159. if sizeval > MAX_SIZE:
  160. raise ValueError("dict is too large")
  161. ret = erlpack_append_map_header(&self.pk, sizeval)
  162. if ret != 0:
  163. return ret
  164. for k, v in d.iteritems():
  165. ret = self._pack(k, nest_limit - 1)
  166. if ret != 0:
  167. return ret
  168. ret = self._pack(v, nest_limit - 1)
  169. if ret != 0:
  170. return ret
  171. # For user dict types, safer to use .items() # via msgpack-python
  172. elif PyDict_Check(o):
  173. sizeval = PyDict_Size(o)
  174. if sizeval > MAX_SIZE:
  175. raise ValueError("dict is too large")
  176. ret = erlpack_append_map_header(&self.pk, sizeval)
  177. if ret != 0:
  178. return ret
  179. for k, v in o.items():
  180. ret = self._pack(k, nest_limit - 1)
  181. if ret != 0:
  182. return ret
  183. ret = self._pack(v, nest_limit - 1)
  184. if ret != 0:
  185. return ret
  186. elif PyObject_HasAttrString(o, '__erlpack__'):
  187. obj = o.__erlpack__()
  188. return self._pack(obj, nest_limit - 1)
  189. else:
  190. if self._encode_hook:
  191. obj = self._encode_hook(o)
  192. if obj is not None:
  193. return self._pack(obj, nest_limit - 1)
  194. raise NotImplementedError('Unable to serialize %r' % o)
  195. return ret
  196. cdef _encode_unicode(self, object obj):
  197. if not self._encoding:
  198. return self._pack([ord(x) for x in obj])
  199. cdef object st = PyUnicode_AsEncodedString(obj, self._encoding, self._unicode_errors)
  200. cdef size_t size = PyString_Size(st)
  201. if self._unicode_type == 'binary':
  202. if size > MAX_SIZE:
  203. raise ValueError('unicode string is too large using unicode type binary')
  204. return erlpack_append_binary(&self.pk, PyString_AS_STRING(st), size)
  205. elif self._unicode_type == 'str':
  206. if size > 0xFFF:
  207. raise ValueError('unicode string is too large using unicode type str')
  208. return erlpack_append_string(&self.pk, PyString_AS_STRING(st), size)
  209. else:
  210. raise TypeError('Unknown unicode encoding type %s' % self._unicode_type)
  211. cpdef pack(self, object obj):
  212. cdef int ret
  213. self._ensure_buf()
  214. ret = erlpack_append_version(&self.pk)
  215. if ret == -1:
  216. raise MemoryError
  217. ret = self._pack(obj, DEFAULT_RECURSE_LIMIT)
  218. if ret == -1:
  219. raise MemoryError
  220. elif ret: # should not happen.
  221. raise TypeError('_pack returned code(%s)' % ret)
  222. buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length)
  223. self._free_big_buf()
  224. return buf