apache · kaustuvnandy · Sep 3, 2025 · Sep 3, 2025 · Sep 4, 2025 · Sep 5, 2025
diff --git a/mkdocs/docs/recipe-count.md b/mkdocs/docs/recipe-count.md
@@ -0,0 +1,39 @@
+---
+title: Count Recipe
+---
+
+# Counting Rows in an Iceberg Table
+
+This recipe demonstrates how to use the `count()` function to efficiently count rows in an Iceberg table using PyIceberg.
+
+## Basic Usage
+
+To count all rows in a table:
+
+```python
+from pyiceberg.catalog import load_catalog
+
+catalog = load_catalog("default")
+table = catalog.load_table("default.cities")
+
+row_count = table.count()
+print(f"Total rows in table: {row_count}")
+```
+
+## Count with a Filter
+
+To count only rows matching a filter:
+
+```python
+from pyiceberg.expressions import EqualTo
+
+count = table.scan(row_filter=EqualTo("city", "Amsterdam")).count()
+print(f"Rows with city == 'Amsterdam': {count}")
+```
+
+## Notes
+- The `count()` method works for both catalog and static tables.
+- Filters can be applied using the `scan` API for more granular counts.
+- Deleted records are excluded from the count.
+
+For more details, see the [API documentation](api.md).
diff --git a/tests/table/test_count.py b/tests/table/test_count.py
@@ -0,0 +1,58 @@
+import pytest
+from unittest.mock import MagicMock, Mock, patch
+from pyiceberg.table import DataScan
+from pyiceberg.expressions import AlwaysTrue
+
+class DummyFile:
+    def __init__(self, record_count):
+        self.record_count = record_count
+
+class DummyTask:
+    def __init__(self, record_count, residual=None, delete_files=None):
+        self.file = DummyFile(record_count)
+        self.residual = residual if residual is not None else AlwaysTrue()
+        self.delete_files = delete_files or []
+
+def test_count_basic():
+    # Create a mock table with the necessary attributes
+    table = Mock(spec=DataScan)
+
+    # Mock the plan_files method to return our dummy task
+    task = DummyTask(42, residual=AlwaysTrue(), delete_files=[])
+    table.plan_files = MagicMock(return_value=[task])
+
+    # Import and call the actual count method
+    from pyiceberg.table import DataScan as ActualDataScan
+    table.count = ActualDataScan.count.__get__(table, ActualDataScan)
+
+    assert table.count() == 42
+
+def test_count_empty():
+    # Create a mock table with the necessary attributes
+    table = Mock(spec=DataScan)
+
+    # Mock the plan_files method to return no tasks
+    table.plan_files = MagicMock(return_value=[])
+
+    # Import and call the actual count method
+    from pyiceberg.table import DataScan as ActualDataScan
+    table.count = ActualDataScan.count.__get__(table, ActualDataScan)
+
+    assert table.count() == 0
+
+def test_count_large():
+    # Create a mock table with the necessary attributes
+    table = Mock(spec=DataScan)
+
+    # Mock the plan_files method to return multiple tasks
+    tasks = [
+        DummyTask(500000, residual=AlwaysTrue(), delete_files=[]),
+        DummyTask(500000, residual=AlwaysTrue(), delete_files=[]),
+    ]
+    table.plan_files = MagicMock(return_value=tasks)
+
+    # Import and call the actual count method
+    from pyiceberg.table import DataScan as ActualDataScan
+    table.count = ActualDataScan.count.__get__(table, ActualDataScan)
+
+    assert table.count() == 1000000