Skip to content

Commit ea30a28

Browse files
gh-113732: Fix support of QUOTE_NOTNULL and QUOTE_STRINGS in csv.reader (GH-113738)
1 parent 58f883b commit ea30a28

File tree

4 files changed

+57
-18
lines changed

4 files changed

+57
-18
lines changed

Doc/whatsnew/3.12.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -690,7 +690,7 @@ csv
690690

691691
* Add :const:`csv.QUOTE_NOTNULL` and :const:`csv.QUOTE_STRINGS` flags to
692692
provide finer grained control of ``None`` and empty strings by
693-
:class:`csv.writer` objects.
693+
:class:`~csv.reader` and :class:`~csv.writer` objects.
694694

695695
dis
696696
---

Lib/test/test_csv.py

+25
Original file line numberDiff line numberDiff line change
@@ -392,17 +392,42 @@ def test_read_quoting(self):
392392
# will this fail where locale uses comma for decimals?
393393
self._read_test([',3,"5",7.3, 9'], [['', 3, '5', 7.3, 9]],
394394
quoting=csv.QUOTE_NONNUMERIC)
395+
self._read_test([',3,"5",7.3, 9'], [[None, '3', '5', '7.3', ' 9']],
396+
quoting=csv.QUOTE_NOTNULL)
397+
self._read_test([',3,"5",7.3, 9'], [[None, 3, '5', 7.3, 9]],
398+
quoting=csv.QUOTE_STRINGS)
399+
400+
self._read_test([',,"",'], [['', '', '', '']])
401+
self._read_test([',,"",'], [['', '', '', '']],
402+
quoting=csv.QUOTE_NONNUMERIC)
403+
self._read_test([',,"",'], [[None, None, '', None]],
404+
quoting=csv.QUOTE_NOTNULL)
405+
self._read_test([',,"",'], [[None, None, '', None]],
406+
quoting=csv.QUOTE_STRINGS)
407+
395408
self._read_test(['"a\nb", 7'], [['a\nb', ' 7']])
396409
self.assertRaises(ValueError, self._read_test,
397410
['abc,3'], [[]],
398411
quoting=csv.QUOTE_NONNUMERIC)
412+
self.assertRaises(ValueError, self._read_test,
413+
['abc,3'], [[]],
414+
quoting=csv.QUOTE_STRINGS)
399415
self._read_test(['1,@,3,@,5'], [['1', ',3,', '5']], quotechar='@')
400416
self._read_test(['1,\0,3,\0,5'], [['1', ',3,', '5']], quotechar='\0')
401417

402418
def test_read_skipinitialspace(self):
403419
self._read_test(['no space, space, spaces,\ttab'],
404420
[['no space', 'space', 'spaces', '\ttab']],
405421
skipinitialspace=True)
422+
self._read_test([' , , '],
423+
[['', '', '']],
424+
skipinitialspace=True)
425+
self._read_test([' , , '],
426+
[[None, None, None]],
427+
skipinitialspace=True, quoting=csv.QUOTE_NOTNULL)
428+
self._read_test([' , , '],
429+
[[None, None, None]],
430+
skipinitialspace=True, quoting=csv.QUOTE_STRINGS)
406431

407432
def test_read_bigfield(self):
408433
# This exercises the buffer realloc functionality and field size
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix support of :data:`~csv.QUOTE_NOTNULL` and :data:`~csv.QUOTE_STRINGS` in
2+
:func:`csv.reader`.

Modules/_csv.c

+29-17
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ typedef struct {
131131
Py_UCS4 *field; /* temporary buffer */
132132
Py_ssize_t field_size; /* size of allocated buffer */
133133
Py_ssize_t field_len; /* length of current field */
134-
int numeric_field; /* treat field as numeric */
134+
bool unquoted_field; /* true if no quotes around the current field */
135135
unsigned long line_num; /* Source-file line number */
136136
} ReaderObj;
137137

@@ -644,22 +644,33 @@ _call_dialect(_csvstate *module_state, PyObject *dialect_inst, PyObject *kwargs)
644644
static int
645645
parse_save_field(ReaderObj *self)
646646
{
647+
int quoting = self->dialect->quoting;
647648
PyObject *field;
648649

649-
field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
650-
(void *) self->field, self->field_len);
651-
if (field == NULL)
652-
return -1;
653-
self->field_len = 0;
654-
if (self->numeric_field) {
655-
PyObject *tmp;
656-
657-
self->numeric_field = 0;
658-
tmp = PyNumber_Float(field);
659-
Py_DECREF(field);
660-
if (tmp == NULL)
650+
if (self->unquoted_field &&
651+
self->field_len == 0 &&
652+
(quoting == QUOTE_NOTNULL || quoting == QUOTE_STRINGS))
653+
{
654+
field = Py_NewRef(Py_None);
655+
}
656+
else {
657+
field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
658+
(void *) self->field, self->field_len);
659+
if (field == NULL) {
661660
return -1;
662-
field = tmp;
661+
}
662+
if (self->unquoted_field &&
663+
self->field_len != 0 &&
664+
(quoting == QUOTE_NONNUMERIC || quoting == QUOTE_STRINGS))
665+
{
666+
PyObject *tmp = PyNumber_Float(field);
667+
Py_DECREF(field);
668+
if (tmp == NULL) {
669+
return -1;
670+
}
671+
field = tmp;
672+
}
673+
self->field_len = 0;
663674
}
664675
if (PyList_Append(self->fields, field) < 0) {
665676
Py_DECREF(field);
@@ -721,6 +732,7 @@ parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
721732
/* fallthru */
722733
case START_FIELD:
723734
/* expecting field */
735+
self->unquoted_field = true;
724736
if (c == '\n' || c == '\r' || c == EOL) {
725737
/* save empty field - return [fields] */
726738
if (parse_save_field(self) < 0)
@@ -730,10 +742,12 @@ parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
730742
else if (c == dialect->quotechar &&
731743
dialect->quoting != QUOTE_NONE) {
732744
/* start quoted field */
745+
self->unquoted_field = false;
733746
self->state = IN_QUOTED_FIELD;
734747
}
735748
else if (c == dialect->escapechar) {
736749
/* possible escaped character */
750+
self->unquoted_field = false;
737751
self->state = ESCAPED_CHAR;
738752
}
739753
else if (c == ' ' && dialect->skipinitialspace)
@@ -746,8 +760,6 @@ parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
746760
}
747761
else {
748762
/* begin new unquoted field */
749-
if (dialect->quoting == QUOTE_NONNUMERIC)
750-
self->numeric_field = 1;
751763
if (parse_add_char(self, module_state, c) < 0)
752764
return -1;
753765
self->state = IN_FIELD;
@@ -892,7 +904,7 @@ parse_reset(ReaderObj *self)
892904
return -1;
893905
self->field_len = 0;
894906
self->state = START_RECORD;
895-
self->numeric_field = 0;
907+
self->unquoted_field = false;
896908
return 0;
897909
}
898910

0 commit comments

Comments
 (0)