Skip to content

Commit e63d7da

Browse files
authored
gh-94808: Cover PyUnicode_Count in CAPI (#96929)
1 parent e39ae6b commit e63d7da

File tree

2 files changed

+59
-0
lines changed

2 files changed

+59
-0
lines changed

Lib/test/test_unicode.py

+38
Original file line numberDiff line numberDiff line change
@@ -2945,6 +2945,44 @@ def test_asutf8andsize(self):
29452945
self.assertEqual(unicode_asutf8andsize(nonbmp), (b'\xf4\x8f\xbf\xbf', 4))
29462946
self.assertRaises(UnicodeEncodeError, unicode_asutf8andsize, 'a\ud800b\udfffc')
29472947

2948+
# Test PyUnicode_Count()
2949+
@support.cpython_only
2950+
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
2951+
def test_count(self):
2952+
from _testcapi import unicode_count
2953+
2954+
st = 'abcabd'
2955+
self.assertEqual(unicode_count(st, 'a', 0, len(st)), 2)
2956+
self.assertEqual(unicode_count(st, 'ab', 0, len(st)), 2)
2957+
self.assertEqual(unicode_count(st, 'abc', 0, len(st)), 1)
2958+
self.assertEqual(unicode_count(st, 'а', 0, len(st)), 0) # cyrillic "a"
2959+
# start < end
2960+
self.assertEqual(unicode_count(st, 'a', 3, len(st)), 1)
2961+
self.assertEqual(unicode_count(st, 'a', 4, len(st)), 0)
2962+
self.assertEqual(unicode_count(st, 'a', 0, sys.maxsize), 2)
2963+
# start >= end
2964+
self.assertEqual(unicode_count(st, 'abc', 0, 0), 0)
2965+
self.assertEqual(unicode_count(st, 'a', 3, 2), 0)
2966+
self.assertEqual(unicode_count(st, 'a', sys.maxsize, 5), 0)
2967+
# negative
2968+
self.assertEqual(unicode_count(st, 'ab', -len(st), -1), 2)
2969+
self.assertEqual(unicode_count(st, 'a', -len(st), -3), 1)
2970+
# wrong args
2971+
self.assertRaises(TypeError, unicode_count, 'a', 'a')
2972+
self.assertRaises(TypeError, unicode_count, 'a', 'a', 1)
2973+
self.assertRaises(TypeError, unicode_count, 1, 'a', 0, 1)
2974+
self.assertRaises(TypeError, unicode_count, 'a', 1, 0, 1)
2975+
# empty string
2976+
self.assertEqual(unicode_count('abc', '', 0, 3), 4)
2977+
self.assertEqual(unicode_count('abc', '', 1, 3), 3)
2978+
self.assertEqual(unicode_count('', '', 0, 1), 1)
2979+
self.assertEqual(unicode_count('', 'a', 0, 1), 0)
2980+
# different unicode kinds
2981+
for uni in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1":
2982+
for ch in uni:
2983+
self.assertEqual(unicode_count(uni, ch, 0, len(uni)), 1)
2984+
self.assertEqual(unicode_count(st, ch, 0, len(st)), 0)
2985+
29482986
# Test PyUnicode_FindChar()
29492987
@support.cpython_only
29502988
@unittest.skipIf(_testcapi is None, 'need _testcapi module')

Modules/_testcapi/unicode.c

+21
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,26 @@ unicode_asutf8andsize(PyObject *self, PyObject *args)
223223
return Py_BuildValue("(Nn)", result, utf8_len);
224224
}
225225

226+
static PyObject *
227+
unicode_count(PyObject *self, PyObject *args)
228+
{
229+
PyObject *str;
230+
PyObject *substr;
231+
Py_ssize_t result;
232+
Py_ssize_t start, end;
233+
234+
if (!PyArg_ParseTuple(args, "UUnn:unicode_count", &str, &substr,
235+
&start, &end)) {
236+
return NULL;
237+
}
238+
239+
result = PyUnicode_Count(str, substr, start, end);
240+
if (result == -1)
241+
return NULL;
242+
else
243+
return PyLong_FromSsize_t(result);
244+
}
245+
226246
static PyObject *
227247
unicode_findchar(PyObject *self, PyObject *args)
228248
{
@@ -696,6 +716,7 @@ static PyMethodDef TestMethods[] = {
696716
{"unicode_asucs4", unicode_asucs4, METH_VARARGS},
697717
{"unicode_asutf8", unicode_asutf8, METH_VARARGS},
698718
{"unicode_asutf8andsize", unicode_asutf8andsize, METH_VARARGS},
719+
{"unicode_count", unicode_count, METH_VARARGS},
699720
{"unicode_findchar", unicode_findchar, METH_VARARGS},
700721
{"unicode_copycharacters", unicode_copycharacters, METH_VARARGS},
701722
{NULL},

0 commit comments

Comments
 (0)