From 22aee9a9bd99ac609fcbd038a06d3d48196a9a5d Mon Sep 17 00:00:00 2001 From: sobolevn Date: Mon, 19 Sep 2022 12:19:21 +0300 Subject: [PATCH] gh-94808: Cover `PyUnicode_Count` in CAPI --- Lib/test/test_unicode.py | 38 +++++++++++++++++++++++++++++++++++++ Modules/_testcapi/unicode.c | 21 ++++++++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 63bccb72e04646..30faaaf83bec96 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -2945,6 +2945,44 @@ def test_asutf8andsize(self): self.assertEqual(unicode_asutf8andsize(nonbmp), (b'\xf4\x8f\xbf\xbf', 4)) self.assertRaises(UnicodeEncodeError, unicode_asutf8andsize, 'a\ud800b\udfffc') + # Test PyUnicode_Count() + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_count(self): + from _testcapi import unicode_count + + st = 'abcabd' + self.assertEqual(unicode_count(st, 'a', 0, len(st)), 2) + self.assertEqual(unicode_count(st, 'ab', 0, len(st)), 2) + self.assertEqual(unicode_count(st, 'abc', 0, len(st)), 1) + self.assertEqual(unicode_count(st, 'а', 0, len(st)), 0) # cyrillic "a" + # start < end + self.assertEqual(unicode_count(st, 'a', 3, len(st)), 1) + self.assertEqual(unicode_count(st, 'a', 4, len(st)), 0) + self.assertEqual(unicode_count(st, 'a', 0, sys.maxsize), 2) + # start >= end + self.assertEqual(unicode_count(st, 'abc', 0, 0), 0) + self.assertEqual(unicode_count(st, 'a', 3, 2), 0) + self.assertEqual(unicode_count(st, 'a', sys.maxsize, 5), 0) + # negative + self.assertEqual(unicode_count(st, 'ab', -len(st), -1), 2) + self.assertEqual(unicode_count(st, 'a', -len(st), -3), 1) + # wrong args + self.assertRaises(TypeError, unicode_count, 'a', 'a') + self.assertRaises(TypeError, unicode_count, 'a', 'a', 1) + self.assertRaises(TypeError, unicode_count, 1, 'a', 0, 1) + self.assertRaises(TypeError, unicode_count, 'a', 1, 0, 1) + # empty string + self.assertEqual(unicode_count('abc', '', 0, 3), 4) + self.assertEqual(unicode_count('abc', '', 1, 3), 3) + self.assertEqual(unicode_count('', '', 0, 1), 1) + self.assertEqual(unicode_count('', 'a', 0, 1), 0) + # different unicode kinds + for uni in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1": + for ch in uni: + self.assertEqual(unicode_count(uni, ch, 0, len(uni)), 1) + self.assertEqual(unicode_count(st, ch, 0, len(st)), 0) + # Test PyUnicode_FindChar() @support.cpython_only @unittest.skipIf(_testcapi is None, 'need _testcapi module') diff --git a/Modules/_testcapi/unicode.c b/Modules/_testcapi/unicode.c index d0f1e2abdc8259..d5c4a9e5b95ec6 100644 --- a/Modules/_testcapi/unicode.c +++ b/Modules/_testcapi/unicode.c @@ -223,6 +223,26 @@ unicode_asutf8andsize(PyObject *self, PyObject *args) return Py_BuildValue("(Nn)", result, utf8_len); } +static PyObject * +unicode_count(PyObject *self, PyObject *args) +{ + PyObject *str; + PyObject *substr; + Py_ssize_t result; + Py_ssize_t start, end; + + if (!PyArg_ParseTuple(args, "UUnn:unicode_count", &str, &substr, + &start, &end)) { + return NULL; + } + + result = PyUnicode_Count(str, substr, start, end); + if (result == -1) + return NULL; + else + return PyLong_FromSsize_t(result); +} + static PyObject * unicode_findchar(PyObject *self, PyObject *args) { @@ -696,6 +716,7 @@ static PyMethodDef TestMethods[] = { {"unicode_asucs4", unicode_asucs4, METH_VARARGS}, {"unicode_asutf8", unicode_asutf8, METH_VARARGS}, {"unicode_asutf8andsize", unicode_asutf8andsize, METH_VARARGS}, + {"unicode_count", unicode_count, METH_VARARGS}, {"unicode_findchar", unicode_findchar, METH_VARARGS}, {"unicode_copycharacters", unicode_copycharacters, METH_VARARGS}, {NULL},