Skip to content

Commit b2e3802

Browse files
pythongh-113732: Fix support of QUOTE_NOTNULL and QUOTE_STRINGS in csv.reader
1 parent 88d0464 commit b2e3802

File tree

4 files changed

+54
-18
lines changed

4 files changed

+54
-18
lines changed

Doc/whatsnew/3.12.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -690,7 +690,7 @@ csv
690690

691691
* Add :const:`csv.QUOTE_NOTNULL` and :const:`csv.QUOTE_STRINGS` flags to
692692
provide finer grained control of ``None`` and empty strings by
693-
:class:`csv.writer` objects.
693+
:class:`~csv.reader` and :class:`csv.writer` objects.
694694

695695
dis
696696
---

Lib/test/test_csv.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,17 +347,39 @@ def test_read_quoting(self):
347347
# will this fail where locale uses comma for decimals?
348348
self._read_test([',3,"5",7.3, 9'], [['', 3, '5', 7.3, 9]],
349349
quoting=csv.QUOTE_NONNUMERIC)
350+
self._read_test([',3,"5",7.3, 9'], [[None, '3', '5', '7.3', ' 9']],
351+
quoting=csv.QUOTE_NOTNULL)
352+
self._read_test([',3,"5",7.3, 9'], [[None, 3, '5', 7.3, 9]],
353+
quoting=csv.QUOTE_STRINGS)
354+
355+
self._read_test([',,"",'], [['', '', '', '']])
356+
self._read_test([',,"",'], [['', '', '', '']],
357+
quoting=csv.QUOTE_NONNUMERIC)
358+
self._read_test([',,"",'], [[None, None, '', None]],
359+
quoting=csv.QUOTE_NOTNULL)
360+
self._read_test([',,"",'], [[None, None, '', None]],
361+
quoting=csv.QUOTE_STRINGS)
362+
350363
self._read_test(['"a\nb", 7'], [['a\nb', ' 7']])
351364
self.assertRaises(ValueError, self._read_test,
352365
['abc,3'], [[]],
353366
quoting=csv.QUOTE_NONNUMERIC)
367+
self.assertRaises(ValueError, self._read_test,
368+
['abc,3'], [[]],
369+
quoting=csv.QUOTE_STRINGS)
354370
self._read_test(['1,@,3,@,5'], [['1', ',3,', '5']], quotechar='@')
355371
self._read_test(['1,\0,3,\0,5'], [['1', ',3,', '5']], quotechar='\0')
356372

357373
def test_read_skipinitialspace(self):
358374
self._read_test(['no space, space, spaces,\ttab'],
359375
[['no space', 'space', 'spaces', '\ttab']],
360376
skipinitialspace=True)
377+
self._read_test([' , , '],
378+
[['', '', '']],
379+
skipinitialspace=True)
380+
self._read_test([' , , '],
381+
[[None, None, None]],
382+
skipinitialspace=True, quoting=csv.QUOTE_STRINGS)
361383

362384
def test_read_bigfield(self):
363385
# This exercises the buffer realloc functionality and field size
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix support of :data:`~csv.QUOTE_NOTNULL` and :data:`~csv.QUOTE_STRINGS` in
2+
:func:`csv.reader`.

Modules/_csv.c

Lines changed: 29 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ typedef struct {
133133
Py_UCS4 *field; /* temporary buffer */
134134
Py_ssize_t field_size; /* size of allocated buffer */
135135
Py_ssize_t field_len; /* length of current field */
136-
int numeric_field; /* treat field as numeric */
136+
int unquoted_field;
137137
unsigned long line_num; /* Source-file line number */
138138
} ReaderObj;
139139

@@ -607,22 +607,33 @@ _call_dialect(_csvstate *module_state, PyObject *dialect_inst, PyObject *kwargs)
607607
static int
608608
parse_save_field(ReaderObj *self)
609609
{
610+
int quoting = self->dialect->quoting;
610611
PyObject *field;
611612

612-
field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
613-
(void *) self->field, self->field_len);
614-
if (field == NULL)
615-
return -1;
616-
self->field_len = 0;
617-
if (self->numeric_field) {
618-
PyObject *tmp;
619-
620-
self->numeric_field = 0;
621-
tmp = PyNumber_Float(field);
622-
Py_DECREF(field);
623-
if (tmp == NULL)
613+
if (self->unquoted_field &&
614+
self->field_len == 0 &&
615+
(quoting == QUOTE_NOTNULL || quoting == QUOTE_STRINGS))
616+
{
617+
field = Py_NewRef(Py_None);
618+
}
619+
else {
620+
field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
621+
(void *) self->field, self->field_len);
622+
if (field == NULL)
624623
return -1;
625-
field = tmp;
624+
if (self->unquoted_field &&
625+
self->field_len != 0 &&
626+
(quoting == QUOTE_NONNUMERIC || quoting == QUOTE_STRINGS))
627+
{
628+
PyObject *tmp;
629+
630+
tmp = PyNumber_Float(field);
631+
Py_DECREF(field);
632+
if (tmp == NULL)
633+
return -1;
634+
field = tmp;
635+
}
636+
self->field_len = 0;
626637
}
627638
if (PyList_Append(self->fields, field) < 0) {
628639
Py_DECREF(field);
@@ -684,6 +695,7 @@ parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
684695
/* fallthru */
685696
case START_FIELD:
686697
/* expecting field */
698+
self->unquoted_field = 1;
687699
if (c == '\n' || c == '\r' || c == EOL) {
688700
/* save empty field - return [fields] */
689701
if (parse_save_field(self) < 0)
@@ -693,10 +705,12 @@ parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
693705
else if (c == dialect->quotechar &&
694706
dialect->quoting != QUOTE_NONE) {
695707
/* start quoted field */
708+
self->unquoted_field = 0;
696709
self->state = IN_QUOTED_FIELD;
697710
}
698711
else if (c == dialect->escapechar) {
699712
/* possible escaped character */
713+
self->unquoted_field = 0;
700714
self->state = ESCAPED_CHAR;
701715
}
702716
else if (c == ' ' && dialect->skipinitialspace)
@@ -709,8 +723,6 @@ parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
709723
}
710724
else {
711725
/* begin new unquoted field */
712-
if (dialect->quoting == QUOTE_NONNUMERIC)
713-
self->numeric_field = 1;
714726
if (parse_add_char(self, module_state, c) < 0)
715727
return -1;
716728
self->state = IN_FIELD;
@@ -854,7 +866,7 @@ parse_reset(ReaderObj *self)
854866
return -1;
855867
self->field_len = 0;
856868
self->state = START_RECORD;
857-
self->numeric_field = 0;
869+
self->unquoted_field = 0;
858870
return 0;
859871
}
860872

0 commit comments

Comments
 (0)