Dataframe v2: reference docs (#7820)

teh-cmc · teh-cmc · commit 046dc850946e · 2024-10-21T09:39:17.000+02:00
Add a reference page for the dataframe APIs. It's still very barebones at this point because #7819 makes it very difficult to write snippets for this. But it is literally infinitely better than what's there right now: nothing. - DNM: requires #7817 - Closes #7828
diff --git a/docs/content/reference/dataframes.md b/docs/content/reference/dataframes.md
@@ -3,4 +3,43 @@ title: Dataframes
 order: 300
 ---
 
-Incoming.
+Rerun, at its core, is a database. As such, you can always get your data back in the form of tables (also known as dataframes, or records, or batches...).
+
+This can be achieved in three different ways, depending on your needs:
+* using the dataframe API, currently available in [Python](https://ref.rerun.io/docs/python/stable/common/dataframe/) and [Rust](https://docs.rs/rerun/latest/rerun/dataframe/index.html),
+* using the [blueprint API](../concepts/blueprint) to configure a [dataframe view](types/views/dataframe_view) from code,
+* or simply by setting up [dataframe view](types/views/dataframe_view) manually in the UI.
+
+This page is meant as a reference to get you up and running with these different solutions as quickly as possible.
+For an in-depth introduction to the dataframe API and the possible workflows it enables, check out [our Getting Started guide](../getting-started/data-out) or one of the accompanying [How-Tos](../howto/dataframe-api).
+
+
+> We'll need an RRD file to query. Either use one of yours, or grab some of the example ones, e.g.:
+> ```
+> curl 'https://app.rerun.io/version/latest/examples/dna.rrd' -o - > /tmp/dna.rrd
+> ```
+
+### Using the dataframe API
+
+The following snippet demonstrates how to query the first 10 rows in a Rerun recording:
+
+snippet: reference/dataframe_query
+
+Check out the API reference to learn more about all the ways that data can be searched and filtered:
+* [🐍 Python API reference](https://ref.rerun.io/docs/python/stable/common/dataframe/)
+* [🐍 Python example](https://github.com/rerun-io/rerun/blob/c00a9f649fd4463f91620e8e2eac11355b245ac5/examples/python/dataframe_query/dataframe_query.py)
+* [🦀 Rust API reference](https://docs.rs/crate/rerun/latest)
+* [🦀 Rust example](https://github.com/rerun-io/rerun/blob/c00a9f649fd4463f91620e8e2eac11355b245ac5/examples/rust/dataframe_query/src/main.rs)
+
+
+### Using the blueprint API to configure a dataframe view
+
+TODO(cmc): incoming.
+
+Check out the blueprint API reference to learn more about all the ways that data can be searched and filtered:
+* [🐍 Python blueprint API reference](https://ref.rerun.io/docs/python/latest/common/blueprint_apis/)
+
+
+### Setting up dataframe view manually in the UI
+
+TODO(cmc): incoming.
diff --git a/docs/snippets/all/reference/dataframe_query.py b/docs/snippets/all/reference/dataframe_query.py
@@ -0,0 +1,18 @@
+"""Query and display the first 10 rows of a recording."""
+
+import sys
+
+import rerun as rr
+
+path_to_rrd = sys.argv[1]
+
+recording = rr.dataframe.load_recording(path_to_rrd)
+view = recording.view(index="log_time", contents="/**")
+batches = view.select()
+
+for _ in range(10):
+    row = batches.read_next_batch()
+    if row is None:
+        break
+    # Each row is a `RecordBatch`, which can be easily passed around across different data ecosystems.
+    print(row)
diff --git a/docs/snippets/all/reference/dataframe_query.rs b/docs/snippets/all/reference/dataframe_query.rs
@@ -0,0 +1,42 @@
+//! Query and display the first 10 rows of a recording.
+
+#![allow(clippy::unwrap_used)]
+
+use rerun::{
+    dataframe::{QueryCache, QueryEngine, QueryExpression, SparseFillStrategy, Timeline},
+    ChunkStore, ChunkStoreConfig, VersionPolicy,
+};
+
+fn main() -> Result<(), Box<dyn std::error::Error>> {
+    let args = std::env::args().collect::<Vec<_>>();
+
+    let path_to_rrd = &args[1];
+    let timeline = Timeline::log_time();
+
+    let stores = ChunkStore::from_rrd_filepath(
+        &ChunkStoreConfig::DEFAULT,
+        path_to_rrd,
+        VersionPolicy::Warn,
+    )?;
+    let (_, store) = stores.first_key_value().unwrap();
+
+    let query_cache = QueryCache::new(store);
+    let query_engine = QueryEngine {
+        store,
+        cache: &query_cache,
+    };
+
+    let query = QueryExpression {
+        filtered_index: Some(timeline),
+        sparse_fill_strategy: SparseFillStrategy::LatestAtGlobal,
+        ..Default::default()
+    };
+
+    let query_handle = query_engine.query(query.clone());
+    for row in query_handle.batch_iter().take(10) {
+        // Each row is a `RecordBatch`, which can be easily passed around across different data ecosystems.
+        println!("{row}");
+    }
+
+    Ok(())
+}