diff --git a/asyncpg/introspection.py b/asyncpg/introspection.py index 4554629a..a9050d99 100644 --- a/asyncpg/introspection.py +++ b/asyncpg/introspection.py @@ -7,8 +7,8 @@ INTRO_LOOKUP_TYPES = '''\ WITH RECURSIVE typeinfo_tree( - oid, ns, name, kind, basetype, elemtype, range_subtype, - elem_has_bin_input, elem_has_bin_output, attrtypoids, attrnames, depth) + oid, ns, name, kind, basetype, has_bin_io, elemtype, elemdelim, + range_subtype, elem_has_bin_io, attrtypoids, attrnames, depth) AS ( WITH composite_attrs AS ( @@ -58,10 +58,23 @@ ELSE NULL END) AS basetype, + t.typreceive::oid != 0 AND t.typsend::oid != 0 + AS has_bin_io, t.typelem AS elemtype, + elem_t.typdelim AS elemdelim, range_t.rngsubtype AS range_subtype, - elem_t.typreceive::oid != 0 AS elem_has_bin_input, - elem_t.typsend::oid != 0 AS elem_has_bin_output, + (CASE WHEN t.typtype = 'r' THEN + (SELECT + range_elem_t.typreceive::oid != 0 AND + range_elem_t.typsend::oid != 0 + FROM + pg_catalog.pg_type AS range_elem_t + WHERE + range_elem_t.oid = range_t.rngsubtype) + ELSE + elem_t.typreceive::oid != 0 AND + elem_t.typsend::oid != 0 + END) AS elem_has_bin_io, (CASE WHEN t.typtype = 'c' THEN (SELECT ca.typoids FROM composite_attrs AS ca @@ -91,8 +104,8 @@ ) SELECT - ti.oid, ti.ns, ti.name, ti.kind, ti.basetype, ti.elemtype, - ti.range_subtype, ti.elem_has_bin_input, ti.elem_has_bin_output, + ti.oid, ti.ns, ti.name, ti.kind, ti.basetype, ti.has_bin_io, + ti.elemtype, ti.elemdelim, ti.range_subtype, ti.elem_has_bin_io, ti.attrtypoids, ti.attrnames, 0 FROM typeinfo AS ti @@ -102,8 +115,8 @@ UNION ALL SELECT - ti.oid, ti.ns, ti.name, ti.kind, ti.basetype, ti.elemtype, - ti.range_subtype, ti.elem_has_bin_input, ti.elem_has_bin_output, + ti.oid, ti.ns, ti.name, ti.kind, ti.basetype, ti.has_bin_io, + ti.elemtype, ti.elemdelim, ti.range_subtype, ti.elem_has_bin_io, ti.attrtypoids, ti.attrnames, tt.depth + 1 FROM typeinfo ti, @@ -126,8 +139,8 @@ # Prior to 9.2 PostgreSQL did not have range types. INTRO_LOOKUP_TYPES_91 = '''\ WITH RECURSIVE typeinfo_tree( - oid, ns, name, kind, basetype, elemtype, range_subtype, - elem_has_bin_input, elem_has_bin_output, attrtypoids, attrnames, depth) + oid, ns, name, kind, basetype, has_bin_io, elemtype, elemdelim, + range_subtype, elem_has_bin_io, attrtypoids, attrnames, depth) AS ( WITH composite_attrs AS ( @@ -177,10 +190,14 @@ ELSE NULL END) AS basetype, + t.typreceive::oid != 0 AND t.typsend::oid != 0 + AS has_bin_io, t.typelem AS elemtype, + elem_t.typdelim AS elemdelim, NULL::oid AS range_subtype, - elem_t.typreceive::oid != 0 AS elem_has_bin_input, - elem_t.typsend::oid != 0 AS elem_has_bin_output, + elem_t.typreceive::oid != 0 AND + elem_t.typsend::oid != 0 + AS elem_has_bin_io, (CASE WHEN t.typtype = 'c' THEN (SELECT ca.typoids FROM composite_attrs AS ca @@ -207,8 +224,8 @@ ) SELECT - ti.oid, ti.ns, ti.name, ti.kind, ti.basetype, ti.elemtype, - ti.range_subtype, ti.elem_has_bin_input, ti.elem_has_bin_output, + ti.oid, ti.ns, ti.name, ti.kind, ti.basetype, ti.has_bin_io, + ti.elemtype, ti.elemdelim, ti.range_subtype, ti.elem_has_bin_io, ti.attrtypoids, ti.attrnames, 0 FROM typeinfo AS ti @@ -218,8 +235,8 @@ UNION ALL SELECT - ti.oid, ti.ns, ti.name, ti.kind, ti.basetype, ti.elemtype, - ti.range_subtype, ti.elem_has_bin_input, ti.elem_has_bin_output, + ti.oid, ti.ns, ti.name, ti.kind, ti.basetype, ti.has_bin_io, + ti.elemtype, ti.elemdelim, ti.range_subtype, ti.elem_has_bin_io, ti.attrtypoids, ti.attrnames, tt.depth + 1 FROM typeinfo ti, diff --git a/asyncpg/protocol/codecs/array.pyx b/asyncpg/protocol/codecs/array.pyx index 1d4044fc..5c13b2e8 100644 --- a/asyncpg/protocol/codecs/array.pyx +++ b/asyncpg/protocol/codecs/array.pyx @@ -10,6 +10,9 @@ from collections.abc import Container as ContainerABC DEF ARRAY_MAXDIM = 6 # defined in postgresql/src/includes/c.h +# "NULL" +cdef Py_UCS4 *APG_NULL = [0x004E, 0x0055, 0x004C, 0x004C, 0x0000] + ctypedef object (*encode_func_ex)(ConnectionSettings settings, WriteBuffer buf, @@ -146,7 +149,7 @@ cdef inline array_decode(ConnectionSettings settings, FastReadBuffer buf, list result int i int32_t elem_len - int64_t elem_count = 1 + int32_t elem_count = 1 FastReadBuffer elem_buf = FastReadBuffer.new() int32_t dims[ARRAY_MAXDIM] Codec elem_codec @@ -173,12 +176,12 @@ cdef inline array_decode(ConnectionSettings settings, FastReadBuffer buf, for i in range(ndims): dims[i] = hton.unpack_int32(buf.read(4)) - elem_count *= dims[i] # Ignore the lower bound information buf.read(4) if ndims == 1: # Fast path for flat arrays + elem_count = dims[0] result = cpython.PyList_New(elem_count) for i in range(elem_count): @@ -312,6 +315,416 @@ cdef inline _nested_array_decode(ConnectionSettings settings, return result +cdef textarray_decode(ConnectionSettings settings, FastReadBuffer buf, + decode_func_ex decoder, const void *decoder_arg, + Py_UCS4 typdelim): + cdef: + Py_UCS4 *array_text + str s + + # Make a copy of array data since we will be mutating it for + # the purposes of element decoding. + s = text_decode(settings, buf) + array_text = PyUnicode_AsUCS4Copy(s) + + try: + return _textarray_decode( + settings, array_text, decoder, decoder_arg, typdelim) + except ValueError as e: + raise ValueError( + 'malformed array literal {!r}: {}'.format(s, e.args[0])) + finally: + PyMem_Free(array_text) + + +cdef _textarray_decode(ConnectionSettings settings, + Py_UCS4 *array_text, + decode_func_ex decoder, + const void *decoder_arg, + Py_UCS4 typdelim): + + cdef: + bytearray array_bytes + list result + list new_stride + Py_UCS4 *ptr + int32_t ndims = 0 + int32_t ubound = 0 + int32_t lbound = 0 + int32_t dims[ARRAY_MAXDIM] + int32_t inferred_dims[ARRAY_MAXDIM] + int32_t inferred_ndims = 0 + void *strides[ARRAY_MAXDIM] + int32_t indexes[ARRAY_MAXDIM] + int32_t nest_level = 0 + int32_t item_level = 0 + bint end_of_array = False + + bint end_of_item = False + bint has_quoting = False + bint strip_spaces = False + bint in_quotes = False + Py_UCS4 *item_start + Py_UCS4 *item_ptr + Py_UCS4 *item_end + + int i + object item + str item_text + FastReadBuffer item_buf = FastReadBuffer.new() + char *pg_item_str + ssize_t pg_item_len + + ptr = array_text + + while True: + while apg_ascii_isspace(ptr[0]): + ptr += 1 + + if ptr[0] != '[': + # Finished parsing dimensions spec. + break + + ptr += 1 # '[' + + if ndims > ARRAY_MAXDIM: + raise ValueError( + 'number of array dimensions ({}) exceed the ' + 'maximum expected ({})'.format(ndims, ARRAY_MAXDIM)) + + ptr = apg_parse_int32(ptr, &ubound) + if ptr == NULL: + raise ValueError('missing array dimension value') + + if ptr[0] == ':': + ptr += 1 + lbound = ubound + + # [lower:upper] spec. We disregard the lbound for decoding. + ptr = apg_parse_int32(ptr, &ubound) + if ptr == NULL: + raise ValueError('missing array dimension value') + else: + lbound = 1 + + if ptr[0] != ']': + raise ValueError('missing \']\' after array dimensions') + + ptr += 1 # ']' + + dims[ndims] = ubound - lbound + 1 + ndims += 1 + + if ndims != 0: + # If dimensions were given, the '=' token is expected. + if ptr[0] != '=': + raise ValueError('missing \'=\' after array dimensions') + + ptr += 1 # '=' + + # Skip any whitespace after the '=', whitespace + # before was consumed in the above loop. + while apg_ascii_isspace(ptr[0]): + ptr += 1 + + # Infer the dimensions from the brace structure in the + # array literal body, and check that it matches the explicit + # spec. This also validates that the array literal is sane. + _infer_array_dims(ptr, typdelim, inferred_dims, &inferred_ndims) + + if inferred_ndims != ndims: + raise ValueError( + 'specified array dimensions do not match array content') + + for i in range(ndims): + if inferred_dims[i] != dims[i]: + raise ValueError( + 'specified array dimensions do not match array content') + else: + # Infer the dimensions from the brace structure in the array literal + # body. This also validates that the array literal is sane. + _infer_array_dims(ptr, typdelim, dims, &ndims) + + while not end_of_array: + # We iterate over the literal character by character + # and modify the string in-place removing the array-specific + # quoting and determining the boundaries of each element. + end_of_item = has_quoting = in_quotes = False + strip_spaces = True + + # Pointers to array element start, end, and the current pointer + # tracking the position where characters are written when + # escaping is folded. + item_start = item_end = item_ptr = ptr + item_level = 0 + + while not end_of_item: + if ptr[0] == '"': + in_quotes = not in_quotes + if in_quotes: + strip_spaces = False + else: + item_end = item_ptr + has_quoting = True + + elif ptr[0] == '\\': + # Quoted character, collapse the backslash. + ptr += 1 + has_quoting = True + item_ptr[0] = ptr[0] + item_ptr += 1 + strip_spaces = False + item_end = item_ptr + + elif in_quotes: + # Consume the string until we see the closing quote. + item_ptr[0] = ptr[0] + item_ptr += 1 + + elif ptr[0] == '{': + # Nesting level increase. + nest_level += 1 + + indexes[nest_level - 1] = 0 + new_stride = cpython.PyList_New(dims[nest_level - 1]) + strides[nest_level - 1] = \ + (new_stride) + + if nest_level > 1: + cpython.Py_INCREF(new_stride) + cpython.PyList_SET_ITEM( + strides[nest_level - 2], + indexes[nest_level - 2], + new_stride) + else: + result = new_stride + + elif ptr[0] == '}': + if item_level == 0: + # Make sure we keep track of which nesting + # level the item belongs to, as the loop + # will continue to consume closing braces + # until the delimiter or the end of input. + item_level = nest_level + + nest_level -= 1 + + if nest_level == 0: + end_of_array = end_of_item = True + + elif ptr[0] == typdelim: + # Array element delimiter, + end_of_item = True + if item_level == 0: + item_level = nest_level + + elif apg_ascii_isspace(ptr[0]): + if not strip_spaces: + item_ptr[0] = ptr[0] + item_ptr += 1 + # Ignore the leading literal whitespace. + + else: + item_ptr[0] = ptr[0] + item_ptr += 1 + strip_spaces = False + item_end = item_ptr + + ptr += 1 + + # end while not end_of_item + + if item_end == item_start: + # Empty array + continue + + item_end[0] = '\0' + + if not has_quoting and apg_strcasecmp(item_start, APG_NULL) == 0: + # NULL element. + item = None + else: + # XXX: find a way to avoid the redundant encode/decode + # cycle here. + item_text = PyUnicode_FromKindAndData( + PyUnicode_4BYTE_KIND, + item_start, + item_end - item_start) + + # Prepare the element buffer and call the text decoder + # for the element type. + as_pg_string_and_size( + settings, item_text, &pg_item_str, &pg_item_len) + item_buf.buf = pg_item_str + item_buf.len = pg_item_len + item = decoder(settings, item_buf, decoder_arg) + + # Place the decoded element in the array. + cpython.Py_INCREF(item) + cpython.PyList_SET_ITEM( + strides[item_level - 1], + indexes[item_level - 1], + item) + + indexes[nest_level - 1] += 1 + + return result + + +cdef enum _ArrayParseState: + APS_START = 1 + APS_STRIDE_STARTED = 2 + APS_STRIDE_DONE = 3 + APS_STRIDE_DELIMITED = 4 + APS_ELEM_STARTED = 5 + APS_ELEM_DELIMITED = 6 + + +cdef _UnexpectedCharacter(const Py_UCS4 *array_text, const Py_UCS4 *ptr): + return ValueError('unexpected character {!r} at position {}'.format( + cpython.PyUnicode_FromOrdinal(ptr[0]), ptr - array_text + 1)) + + +cdef _infer_array_dims(const Py_UCS4 *array_text, + Py_UCS4 typdelim, + int32_t *dims, + int32_t *ndims): + cdef: + const Py_UCS4 *ptr = array_text + int i + int nest_level = 0 + bint end_of_array = False + bint end_of_item = False + bint in_quotes = False + bint array_is_empty = True + int stride_len[ARRAY_MAXDIM] + int prev_stride_len[ARRAY_MAXDIM] + _ArrayParseState parse_state = APS_START + + for i in range(ARRAY_MAXDIM): + dims[i] = prev_stride_len[i] = 0 + stride_len[i] = 1 + + while not end_of_array: + end_of_item = False + + while not end_of_item: + if ptr[0] == '\0': + raise ValueError('unexpected end of string') + + elif ptr[0] == '"': + if (parse_state not in (APS_STRIDE_STARTED, + APS_ELEM_DELIMITED) and + not (parse_state == APS_ELEM_STARTED and in_quotes)): + raise _UnexpectedCharacter(array_text, ptr) + + in_quotes = not in_quotes + if in_quotes: + parse_state = APS_ELEM_STARTED + array_is_empty = False + + elif ptr[0] == '\\': + if parse_state not in (APS_STRIDE_STARTED, + APS_ELEM_STARTED, + APS_ELEM_DELIMITED): + raise _UnexpectedCharacter(array_text, ptr) + + parse_state = APS_ELEM_STARTED + array_is_empty = False + + if ptr[1] != '\0': + ptr += 1 + else: + raise ValueError('unexpected end of string') + + elif in_quotes: + # Ignore everything inside the quotes. + pass + + elif ptr[0] == '{': + if parse_state not in (APS_START, + APS_STRIDE_STARTED, + APS_STRIDE_DELIMITED): + raise _UnexpectedCharacter(array_text, ptr) + + parse_state = APS_STRIDE_STARTED + if nest_level >= ARRAY_MAXDIM: + raise ValueError( + 'number of array dimensions ({}) exceed the ' + 'maximum expected ({})'.format( + nest_level, ARRAY_MAXDIM)) + + dims[nest_level] = 0 + nest_level += 1 + if ndims[0] < nest_level: + ndims[0] = nest_level + + elif ptr[0] == '}': + if (parse_state not in (APS_ELEM_STARTED, APS_STRIDE_DONE) and + not (nest_level == 1 and + parse_state == APS_STRIDE_STARTED)): + raise _UnexpectedCharacter(array_text, ptr) + + parse_state = APS_STRIDE_DONE + + if nest_level == 0: + raise _UnexpectedCharacter(array_text, ptr) + + nest_level -= 1 + + if (prev_stride_len[nest_level] != 0 and + stride_len[nest_level] != prev_stride_len[nest_level]): + raise ValueError( + 'inconsistent sub-array dimensions' + ' at position {}'.format( + ptr - array_text + 1)) + + prev_stride_len[nest_level] = stride_len[nest_level] + stride_len[nest_level] = 1 + if nest_level == 0: + end_of_array = end_of_item = True + else: + dims[nest_level - 1] += 1 + + elif ptr[0] == typdelim: + if parse_state not in (APS_ELEM_STARTED, APS_STRIDE_DONE): + raise _UnexpectedCharacter(array_text, ptr) + + if parse_state == APS_STRIDE_DONE: + parse_state = APS_STRIDE_DELIMITED + else: + parse_state = APS_ELEM_DELIMITED + end_of_item = True + stride_len[nest_level - 1] += 1 + + elif not apg_ascii_isspace(ptr[0]): + if parse_state not in (APS_STRIDE_STARTED, + APS_ELEM_STARTED, + APS_ELEM_DELIMITED): + raise _UnexpectedCharacter(array_text, ptr) + + parse_state = APS_ELEM_STARTED + array_is_empty = False + + if not end_of_item: + ptr += 1 + + if not array_is_empty: + dims[ndims[0] - 1] += 1 + + ptr += 1 + + # only whitespace is allowed after the closing brace + while ptr[0] != '\0': + if not apg_ascii_isspace(ptr[0]): + raise _UnexpectedCharacter(array_text, ptr) + + ptr += 1 + + if array_is_empty: + ndims[0] = 0 + + cdef int4_encode_ex(ConnectionSettings settings, WriteBuffer buf, object obj, const void *arg): return int4_encode(settings, buf, obj) diff --git a/asyncpg/protocol/codecs/base.pxd b/asyncpg/protocol/codecs/base.pxd index a650b0b8..fe5d7b01 100644 --- a/asyncpg/protocol/codecs/base.pxd +++ b/asyncpg/protocol/codecs/base.pxd @@ -32,6 +32,7 @@ cdef enum CodecType: cdef enum CodecFormat: + PG_FORMAT_ANY = -1 PG_FORMAT_TEXT = 0 PG_FORMAT_BINARY = 1 @@ -55,6 +56,7 @@ cdef class Codec: # arrays Codec element_codec + Py_UCS4 element_delimiter # composite types tuple element_type_oids @@ -70,7 +72,8 @@ cdef class Codec: encode_func c_encoder, decode_func c_decoder, object py_encoder, object py_decoder, Codec element_codec, tuple element_type_oids, - object element_names, list element_codecs) + object element_names, list element_codecs, + Py_UCS4 element_delimiter) cdef encode_scalar(self, ConnectionSettings settings, WriteBuffer buf, object obj) @@ -91,6 +94,9 @@ cdef class Codec: cdef decode_array(self, ConnectionSettings settings, FastReadBuffer buf) + cdef decode_array_text(self, ConnectionSettings settings, + FastReadBuffer buf) + cdef decode_range(self, ConnectionSettings settings, FastReadBuffer buf) cdef decode_composite(self, ConnectionSettings settings, @@ -118,7 +124,8 @@ cdef class Codec: cdef Codec new_array_codec(uint32_t oid, str name, str schema, - Codec element_codec) + Codec element_codec, + Py_UCS4 element_delimiter) @staticmethod cdef Codec new_range_codec(uint32_t oid, @@ -149,4 +156,4 @@ cdef class DataCodecConfig: dict _type_codecs_cache dict _local_type_codecs - cdef inline Codec get_codec(self, uint32_t oid) + cdef inline Codec get_codec(self, uint32_t oid, CodecFormat format) diff --git a/asyncpg/protocol/codecs/base.pyx b/asyncpg/protocol/codecs/base.pyx index 9122faf4..2da74249 100644 --- a/asyncpg/protocol/codecs/base.pyx +++ b/asyncpg/protocol/codecs/base.pyx @@ -5,7 +5,8 @@ # the Apache 2.0 License: http://www.apache.org/licenses/LICENSE-2.0 -cdef void* codec_map[MAXSUPPORTEDOID + 1] +cdef void* binary_codec_map[MAXSUPPORTEDOID + 1] +cdef void* text_codec_map[MAXSUPPORTEDOID + 1] cdef dict TYPE_CODECS_CACHE = {} cdef dict EXTRA_CODECS = {} @@ -22,7 +23,8 @@ cdef class Codec: encode_func c_encoder, decode_func c_decoder, object py_encoder, object py_decoder, Codec element_codec, tuple element_type_oids, - object element_names, list element_codecs): + object element_names, list element_codecs, + Py_UCS4 element_delimiter): self.name = name self.schema = schema @@ -36,6 +38,7 @@ cdef class Codec: self.element_codec = element_codec self.element_type_oids = element_type_oids self.element_codecs = element_codecs + self.element_delimiter = element_delimiter if element_names is not None: self.element_names = record.ApgRecordDesc_New( @@ -48,7 +51,10 @@ cdef class Codec: self.decoder = &self.decode_scalar elif type == CODEC_ARRAY: self.encoder = &self.encode_array - self.decoder = &self.decode_array + if format == PG_FORMAT_BINARY: + self.decoder = &self.decode_array + else: + self.decoder = &self.decode_array_text elif type == CODEC_RANGE: self.encoder = &self.encode_range self.decoder = &self.decode_range @@ -71,7 +77,7 @@ cdef class Codec: self.py_encoder, self.py_decoder, self.element_codec, self.element_type_oids, self.element_names, - self.element_codecs) + self.element_codecs, self.element_delimiter) return codec @@ -134,6 +140,12 @@ cdef class Codec: return array_decode(settings, buf, codec_decode_func_ex, (self.element_codec)) + cdef decode_array_text(self, ConnectionSettings settings, + FastReadBuffer buf): + return textarray_decode(settings, buf, codec_decode_func_ex, + (self.element_codec), + self.element_delimiter) + cdef decode_range(self, ConnectionSettings settings, FastReadBuffer buf): return range_decode(settings, buf, codec_decode_func_ex, (self.element_codec)) @@ -241,11 +253,13 @@ cdef class Codec: cdef Codec new_array_codec(uint32_t oid, str name, str schema, - Codec element_codec): + Codec element_codec, + Py_UCS4 element_delimiter): cdef Codec codec codec = Codec(oid) - codec.init(name, schema, 'array', CODEC_ARRAY, PG_FORMAT_BINARY, - NULL, NULL, None, None, element_codec, None, None, None) + codec.init(name, schema, 'array', CODEC_ARRAY, element_codec.format, + NULL, NULL, None, None, element_codec, None, None, None, + element_delimiter) return codec @staticmethod @@ -256,7 +270,8 @@ cdef class Codec: cdef Codec codec codec = Codec(oid) codec.init(name, schema, 'range', CODEC_RANGE, PG_FORMAT_BINARY, - NULL, NULL, None, None, element_codec, None, None, None) + NULL, NULL, None, None, element_codec, None, None, None, + 0) return codec @staticmethod @@ -270,7 +285,7 @@ cdef class Codec: codec = Codec(oid) codec.init(name, schema, 'composite', CODEC_COMPOSITE, PG_FORMAT_BINARY, NULL, NULL, None, None, None, - element_type_oids, element_names, element_codecs) + element_type_oids, element_names, element_codecs, 0) return codec @staticmethod @@ -284,7 +299,7 @@ cdef class Codec: cdef Codec codec codec = Codec(oid) codec.init(name, schema, kind, CODEC_PY, format, NULL, NULL, - encoder, decoder, None, None, None, None) + encoder, decoder, None, None, None, None, 0) return codec @@ -313,11 +328,22 @@ cdef class DataCodecConfig: cdef: Codec elem_codec list comp_elem_codecs + CodecFormat format + CodecFormat elem_format + bint has_text_elements + Py_UCS4 elem_delim for ti in types: oid = ti['oid'] - if self.get_codec(oid) is not None: + if not ti['has_bin_io']: + format = PG_FORMAT_TEXT + else: + format = PG_FORMAT_BINARY + + has_text_elements = False + + if self.get_codec(oid, format) is not None: continue name = ti['name'] @@ -338,14 +364,21 @@ cdef class DataCodecConfig: name = name[1:] name = '{}[]'.format(name) - elem_codec = self.get_codec(array_element_oid) + if ti['elem_has_bin_io']: + elem_format = PG_FORMAT_BINARY + else: + elem_format = PG_FORMAT_TEXT + elem_codec = self.get_codec(array_element_oid, elem_format) if elem_codec is None: raise RuntimeError( 'no codec for array element type {}'.format( array_element_oid)) - self._type_codecs_cache[oid] = \ - Codec.new_array_codec(oid, name, schema, elem_codec) + elem_delim = ti['elemdelim'][0] + + self._type_codecs_cache[oid, elem_format] = \ + Codec.new_array_codec( + oid, name, schema, elem_codec, elem_delim) elif ti['kind'] == b'c': if not comp_type_attrs: @@ -358,7 +391,10 @@ cdef class DataCodecConfig: comp_elem_codecs = [] for typoid in comp_type_attrs: - elem_codec = self.get_codec(typoid) + elem_codec = self.get_codec(typoid, PG_FORMAT_BINARY) + if elem_codec is None: + elem_codec = self.get_codec(typoid, PG_FORMAT_TEXT) + has_text_elements = True if elem_codec is None: raise RuntimeError( 'no codec for composite attribute type {}'.format( @@ -369,7 +405,10 @@ cdef class DataCodecConfig: for i, attrname in enumerate(ti['attrnames']): element_names[attrname] = i - self._type_codecs_cache[oid] = \ + if has_text_elements: + format = PG_FORMAT_TEXT + + self._type_codecs_cache[oid, format] = \ Codec.new_composite_codec( oid, name, schema, comp_elem_codecs, comp_type_attrs, @@ -383,12 +422,12 @@ cdef class DataCodecConfig: 'type record missing base type for domain {}'.format( oid)) - elem_codec = self.get_codec(base_type) + elem_codec = self.get_codec(base_type, format) if elem_codec is None: raise RuntimeError( 'no codec for domain base type {}'.format(base_type)) - self._type_codecs_cache[oid] = elem_codec + self._type_codecs_cache[oid, format] = elem_codec elif ti['kind'] == b'r': # Range type @@ -398,13 +437,17 @@ cdef class DataCodecConfig: 'type record missing base type for range {}'.format( oid)) - elem_codec = self.get_codec(range_subtype_oid) + if ti['elem_has_bin_io']: + elem_format = PG_FORMAT_BINARY + else: + elem_format = PG_FORMAT_TEXT + elem_codec = self.get_codec(range_subtype_oid, elem_format) if elem_codec is None: raise RuntimeError( 'no codec for range element type {}'.format( range_subtype_oid)) - self._type_codecs_cache[oid] = \ + self._type_codecs_cache[oid, elem_format] = \ Codec.new_range_codec(oid, name, schema, elem_codec) else: @@ -429,13 +472,13 @@ cdef class DataCodecConfig: def add_python_codec(self, typeoid, typename, typeschema, typekind, encoder, decoder, binary): - if self.get_codec(typeoid) is not None: + format = PG_FORMAT_BINARY if binary else PG_FORMAT_TEXT + + if self.get_codec(typeoid, format) is not None: raise ValueError('cannot override codec for type {}'.format( typeoid)) - format = PG_FORMAT_BINARY if binary else PG_FORMAT_TEXT - - self._local_type_codecs[typeoid] = \ + self._local_type_codecs[typeoid, format] = \ Codec.new_python_codec(typeoid, typename, typeschema, typekind, encoder, decoder, format) @@ -445,57 +488,65 @@ cdef class DataCodecConfig: Codec codec Codec target_codec - if self.get_codec(typeoid) is not None: - raise ValueError('cannot override codec for type {}'.format( - typeoid)) + for format in (PG_FORMAT_BINARY, PG_FORMAT_TEXT): + if self.get_codec(typeoid, format) is not None: + raise ValueError('cannot override codec for type {}'.format( + typeoid)) - if isinstance(alias_to, int): - target_codec = self.get_codec(alias_to) - else: - target_codec = get_extra_codec(alias_to) + if isinstance(alias_to, int): + target_codec = self.get_codec(alias_to, format) + else: + target_codec = get_extra_codec(alias_to, format) - if target_codec is None: - raise ValueError('unknown alias target: {}'.format(alias_to)) + if target_codec is None: + continue - codec = target_codec.copy() - codec.oid = typeoid - codec.name = typename - codec.schema = typeschema - codec.kind = typekind + codec = target_codec.copy() + codec.oid = typeoid + codec.name = typename + codec.schema = typeschema + codec.kind = typekind - self._local_type_codecs[typeoid] = codec + self._local_type_codecs[typeoid, format] = codec + + if ((typeoid, PG_FORMAT_BINARY) not in self._local_type_codecs and + (typeoid, PG_FORMAT_TEXT) not in self._local_type_codecs): + raise ValueError('unknown alias target: {}'.format(alias_to)) def clear_type_cache(self): self._type_codecs_cache.clear() - cdef inline Codec get_codec(self, uint32_t oid): + cdef inline Codec get_codec(self, uint32_t oid, CodecFormat format): cdef Codec codec - codec = get_core_codec(oid) + codec = get_core_codec(oid, format) if codec is not None: return codec try: - return self._type_codecs_cache[oid] + return self._type_codecs_cache[oid, format] except KeyError: try: - return self._local_type_codecs[oid] + return self._local_type_codecs[oid, format] except KeyError: return None -cdef inline Codec get_core_codec(uint32_t oid): +cdef inline Codec get_core_codec(uint32_t oid, CodecFormat format): cdef void *ptr if oid > MAXSUPPORTEDOID: return None - ptr = codec_map[oid] + if format == PG_FORMAT_BINARY: + ptr = binary_codec_map[oid] + else: + ptr = text_codec_map[oid] if ptr is NULL: return None return ptr cdef inline int has_core_codec(uint32_t oid): - return codec_map[oid] != NULL + return binary_codec_map[oid] != NULL or text_codec_map[oid] != NULL cdef register_core_codec(uint32_t oid, @@ -518,9 +569,13 @@ cdef register_core_codec(uint32_t oid, codec = Codec(oid) codec.init(name, 'pg_catalog', kind, CODEC_C, format, encode, - decode, None, None, None, None, None, None) + decode, None, None, None, None, None, None, 0) cpython.Py_INCREF(codec) # immortalize - codec_map[oid] = codec + + if format == PG_FORMAT_BINARY: + binary_codec_map[oid] = codec + else: + text_codec_map[oid] = codec cdef register_extra_codec(str name, @@ -535,9 +590,9 @@ cdef register_extra_codec(str name, codec = Codec(INVALIDOID) codec.init(name, None, kind, CODEC_C, format, encode, - decode, None, None, None, None, None, None) - EXTRA_CODECS[name] = codec + decode, None, None, None, None, None, None, 0) + EXTRA_CODECS[name, format] = codec -cdef inline Codec get_extra_codec(str name): - return EXTRA_CODECS.get(name) +cdef inline Codec get_extra_codec(str name, CodecFormat format): + return EXTRA_CODECS.get((name, format)) diff --git a/asyncpg/protocol/codecs/record.pyx b/asyncpg/protocol/codecs/record.pyx index 6912f930..291a0b13 100644 --- a/asyncpg/protocol/codecs/record.pyx +++ b/asyncpg/protocol/codecs/record.pyx @@ -37,7 +37,8 @@ cdef anonymous_record_decode(ConnectionSettings settings, FastReadBuffer buf): elem_codec = settings.get_data_codec(elem_typ) if elem_codec is None or not elem_codec.has_decoder(): raise RuntimeError( - 'no decoder for type OID {}'.format(elem_typ)) + 'no decoder for composite type element in ' + 'position {} of type OID {}'.format(i, elem_typ)) elem = elem_codec.decode(settings, elem_buf.slice_from(buf, elem_len)) diff --git a/asyncpg/protocol/codecs/textutils.pyx b/asyncpg/protocol/codecs/textutils.pyx new file mode 100644 index 00000000..1a09c179 --- /dev/null +++ b/asyncpg/protocol/codecs/textutils.pyx @@ -0,0 +1,72 @@ +# Copyright (C) 2016-present the ayncpg authors and contributors +# +# +# This module is part of asyncpg and is released under +# the Apache 2.0 License: http://www.apache.org/licenses/LICENSE-2.0 + + + +cdef int apg_strcasecmp(const Py_UCS4 *s1, const Py_UCS4 *s2): + cdef: + uint32_t c1 + uint32_t c2 + int i = 0 + + while True: + c1 = s1[i] + c2 = s2[i] + + if c1 != c2: + if c1 >= 'A' and c1 <= 'Z': + c1 += 'a' - 'A' + if c2 >= 'A' and c2 <= 'Z': + c2 += 'a' - 'A' + + if c1 != c2: + return c1 - c2 + + if c1 == 0 or c2 == 0: + break + + i += 1 + + return 0 + + +cdef inline bint apg_ascii_isspace(Py_UCS4 ch): + return ( + ch == ' ' or + ch == '\n' or + ch == '\r' or + ch == '\t' or + ch == '\v' or + ch == '\f' + ) + + +cdef Py_UCS4 *apg_parse_int32(Py_UCS4 *buf, int32_t *num): + cdef: + Py_UCS4 *p + int32_t n = 0 + int32_t neg = 0 + + if buf[0] == '-': + neg = 1 + buf += 1 + elif buf[0] == '+': + buf += 1 + + p = buf + while p[0] >= '0' and p[0] <= '9': + n = 10 * n - (p[0] - '0') + p += 1 + + if p == buf: + return NULL + + if not neg: + n = -n + + num[0] = n + + return p diff --git a/asyncpg/protocol/protocol.pyx b/asyncpg/protocol/protocol.pyx index 51bbf9d9..6fdad835 100644 --- a/asyncpg/protocol/protocol.pyx +++ b/asyncpg/protocol/protocol.pyx @@ -24,7 +24,8 @@ from asyncpg.protocol.python cimport ( PyMem_Malloc, PyMem_Realloc, PyMem_Calloc, PyMem_Free, PyMemoryView_GET_BUFFER, PyMemoryView_Check, PyUnicode_AsUTF8AndSize, PyByteArray_AsString, - PyByteArray_Check) + PyByteArray_Check, PyUnicode_AsUCS4Copy, + PyUnicode_FromKindAndData, PyUnicode_4BYTE_KIND) from cpython cimport PyBuffer_FillInfo, PyBytes_AsString @@ -42,6 +43,7 @@ include "settings.pyx" include "buffer.pyx" include "codecs/base.pyx" +include "codecs/textutils.pyx" # String types. Need to go first, as other codecs may rely on # text decoding/encoding. diff --git a/asyncpg/protocol/python.pxd b/asyncpg/protocol/python.pxd index ea71b8fe..869eed3e 100644 --- a/asyncpg/protocol/python.pxd +++ b/asyncpg/protocol/python.pxd @@ -20,3 +20,8 @@ cdef extern from "Python.h": char* PyUnicode_AsUTF8AndSize(object unicode, ssize_t *size) except NULL char* PyByteArray_AsString(object) + Py_UCS4* PyUnicode_AsUCS4Copy(object) + object PyUnicode_FromKindAndData( + int kind, const void *buffer, Py_ssize_t size) + + int PyUnicode_4BYTE_KIND diff --git a/asyncpg/protocol/settings.pxd b/asyncpg/protocol/settings.pxd index ca9624ee..b50fe14d 100644 --- a/asyncpg/protocol/settings.pxd +++ b/asyncpg/protocol/settings.pxd @@ -22,4 +22,4 @@ cdef class ConnectionSettings: decoder, binary) cpdef inline set_builtin_type_codec( self, typeoid, typename, typeschema, typekind, alias_to) - cpdef inline Codec get_data_codec(self, uint32_t oid) + cpdef inline Codec get_data_codec(self, uint32_t oid, CodecFormat format=*) diff --git a/asyncpg/protocol/settings.pyx b/asyncpg/protocol/settings.pyx index aef492c3..9360c459 100644 --- a/asyncpg/protocol/settings.pyx +++ b/asyncpg/protocol/settings.pyx @@ -42,8 +42,15 @@ cdef class ConnectionSettings: self._data_codecs.set_builtin_type_codec(typeoid, typename, typeschema, typekind, alias_to) - cpdef inline Codec get_data_codec(self, uint32_t oid): - return self._data_codecs.get_codec(oid) + cpdef inline Codec get_data_codec(self, uint32_t oid, + CodecFormat format=PG_FORMAT_ANY): + if format == PG_FORMAT_ANY: + codec = self._data_codecs.get_codec(oid, PG_FORMAT_BINARY) + if codec is None: + codec = self._data_codecs.get_codec(oid, PG_FORMAT_TEXT) + return codec + else: + return self._data_codecs.get_codec(oid, format) def __getattr__(self, name): if not name.startswith('_'): diff --git a/tests/test_codecs.py b/tests/test_codecs.py index aa7e9028..070bec8d 100644 --- a/tests/test_codecs.py +++ b/tests/test_codecs.py @@ -917,7 +917,6 @@ async def test_table_as_composite(self): DROP TABLE tab; ''') - @unittest.expectedFailure async def test_relacl_array_type(self): await self.con.execute(r''' CREATE USER """u1'";