Skip to content

Commit 9093ac6

Browse files
authored
Add a query planner for OxQL (#7152)
- Add types to represent the plan tree and the currently-supported plan tree nodes. These mostly correspond to the existing query AST nodes, but include information about the expected schema for the input and output tables, along with the query AST nodes that "implement" that transformation. - Add an explicit node for computing deltas from a cumulative timeseries, automatically after the node for fetching its data from the DB. This is currently implicitly done after fetching the data, but will be part of an explicit plan step going forward. The ultimate goal is to push that into the database itself where possible. - Adds methods to optimize a query plan, which currently includes the predicate-pushdown and limit-pushdown tricks we already do to limit the amount of data we get from the database. Adds some tests to verify behavior of these optimizations, in particular that they don't change the _planned_ output of the query itself. - Add pretty-printing of the plan tree, and include a way to show that in the OxQL shell. - Add detection of full table scans. Use the planner in OxQL queries, _only to verify them_ and check that there are no scans. The queries themselves are executed in the original method today.
1 parent 017830d commit 9093ac6

File tree

32 files changed

+4395
-32
lines changed

32 files changed

+4395
-32
lines changed

Cargo.lock

Lines changed: 8 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -650,6 +650,7 @@ tar = "0.4"
650650
tempfile = "3.10"
651651
term = "0.7"
652652
termios = "0.3"
653+
termtree = "0.5.1"
653654
textwrap = "0.16.1"
654655
test-strategy = "0.3.1"
655656
thiserror = "1.0"

nexus/tests/integration_tests/metrics.rs

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -449,7 +449,8 @@ async fn test_instance_watcher_metrics(
449449
const STATE_STARTING: &str = "starting";
450450
const STATE_RUNNING: &str = "running";
451451
const STATE_STOPPING: &str = "stopping";
452-
const OXQL_QUERY: &str = "get virtual_machine:check";
452+
const OXQL_QUERY: &str = "get virtual_machine:check | \
453+
filter timestamp > @2000-01-01";
453454

454455
let client = &cptestctx.external_client;
455456
let internal_client = &cptestctx.internal_client;
@@ -706,7 +707,13 @@ async fn test_project_timeseries_query(
706707
// fields are. This is helpful generally, but here it would be better if
707708
// we could say something more like "you can't query this timeseries from
708709
// this endpoint"
709-
assert_eq!(result.message, "The filter expression contains identifiers that are not valid for its input timeseries. Invalid identifiers: [\"project_id\", \"silo_id\"], timeseries fields: {\"datum\", \"metric_name\", \"target_name\", \"timestamp\"}");
710+
const EXPECTED_ERROR_MESSAGE: &str = "\
711+
The filter expression refers to \
712+
identifiers that are not valid for its input \
713+
table \"integration_target:integration_metric\". \
714+
Invalid identifiers: [\"silo_id\", \"project_id\"], \
715+
valid identifiers: [\"datum\", \"metric_name\", \"target_name\", \"timestamp\"]";
716+
assert!(result.message.ends_with(EXPECTED_ERROR_MESSAGE));
710717

711718
// nonexistent project
712719
let url = "/v1/timeseries/query?project=nonexistent";
@@ -871,7 +878,8 @@ async fn test_mgs_metrics(
871878
return;
872879
}
873880

874-
let query = format!("get {metric_name}");
881+
let query =
882+
format!("get {metric_name} | filter timestamp > @2000-01-01");
875883

876884
// MGS polls SP sensor data once every second. It's possible that, when
877885
// we triggered Oximeter to collect samples from MGS, it may not have

oximeter/db/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ slog-async.workspace = true
4343
slog-dtrace.workspace = true
4444
slog-term.workspace = true
4545
strum.workspace = true
46+
termtree.workspace = true
4647
thiserror.workspace = true
4748
tokio-util.workspace = true
4849
usdt.workspace = true

oximeter/db/src/client/oxql.rs

Lines changed: 47 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ use crate::oxql::ast::table_ops::filter;
1515
use crate::oxql::ast::table_ops::filter::Filter;
1616
use crate::oxql::ast::table_ops::limit::Limit;
1717
use crate::oxql::ast::table_ops::limit::LimitKind;
18+
use crate::oxql::Query;
1819
use crate::query::field_table_name;
1920
use crate::Error;
2021
use crate::Metric;
@@ -113,6 +114,34 @@ struct ConsistentKeyGroup {
113114
}
114115

115116
impl Client {
117+
/// Build a query plan for the OxQL query.
118+
pub async fn plan_oxql_query(
119+
&self,
120+
query: impl AsRef<str>,
121+
) -> Result<oxql::plan::Plan, Error> {
122+
let query = query.as_ref();
123+
let parsed_query = oxql::Query::new(query)?;
124+
self.build_query_plan(&parsed_query).await
125+
}
126+
127+
/// Build a query plan for the OxQL query.
128+
async fn build_query_plan(
129+
&self,
130+
query: &Query,
131+
) -> Result<oxql::plan::Plan, Error> {
132+
let referenced_timeseries = query.all_timeseries_names();
133+
let mut schema = BTreeMap::new();
134+
for name in referenced_timeseries.into_iter() {
135+
let Some(sch) = self.schema_for_timeseries(name).await? else {
136+
return Err(Error::TimeseriesNotFound(name.to_string()));
137+
};
138+
schema.insert(name.clone(), sch);
139+
}
140+
let plan =
141+
oxql::plan::Plan::new(query.parsed_query().clone(), &schema)?;
142+
Ok(plan)
143+
}
144+
116145
/// Run a OxQL query.
117146
pub async fn oxql_query(
118147
&self,
@@ -132,6 +161,15 @@ impl Client {
132161
// See https://github.com/oxidecomputer/omicron/issues/5298.
133162
let query = query.as_ref();
134163
let parsed_query = oxql::Query::new(query)?;
164+
let plan = self.build_query_plan(&parsed_query).await?;
165+
if plan.requires_full_table_scan() {
166+
return Err(Error::Oxql(anyhow::anyhow!(
167+
"This query requires at least one full table scan. \
168+
Please rewrite the query to filter either the fields \
169+
or timestamps, in order to reduce the amount of data \
170+
fetched from the database."
171+
)));
172+
}
135173
let query_id = Uuid::new_v4();
136174
let query_log =
137175
self.log.new(slog::o!("query_id" => query_id.to_string()));
@@ -837,12 +875,12 @@ impl Client {
837875
// return.
838876
//
839877
// This is used to ensure that we never go above the limit in
840-
// `MAX_RESULT_SIZE`. That restricts the _total_ number of rows we want
841-
// to retch from the database. So we set our limit to be one more than
842-
// the remainder on our allotment. If we get exactly as many as we set
843-
// in the limit, then we fail the query because there are more rows that
844-
// _would_ be returned. We don't know how many more, but there is at
845-
// least 1 that pushes us over the limit. This prevents tricky
878+
// `MAX_DATABASE_ROWS`. That restricts the _total_ number of rows we
879+
// want to retch from the database. So we set our limit to be one more
880+
// than the remainder on our allotment. If we get exactly as many as we
881+
// set in the limit, then we fail the query because there are more row
882+
// that _would_ be returned. We don't know how many more, but there is
883+
// at least 1 that pushes us over the limit. This prevents tricky
846884
// TOCTOU-like bugs where we need to check the limit twice, and improves
847885
// performance, since we don't return much more than we could possibly
848886
// handle.
@@ -1293,7 +1331,9 @@ mod tests {
12931331
#[tokio::test]
12941332
async fn test_get_entire_table() {
12951333
let ctx = setup_oxql_test("test_get_entire_table").await;
1296-
let query = "get some_target:some_metric";
1334+
// We need _some_ filter here to avoid a provable full-table scan.
1335+
let query =
1336+
"get some_target:some_metric | filter timestamp > @2020-01-01";
12971337
let result = ctx
12981338
.client
12991339
.oxql_query(query)

oximeter/db/src/native/mod.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,11 @@ pub enum Error {
239239

240240
#[error("Expected an empty data block")]
241241
ExpectedEmptyDataBlock,
242+
243+
#[error(
244+
"A query unexpectedly resulted in an empty data block; query: {query}"
245+
)]
246+
UnexpectedEmptyBlock { query: String },
242247
}
243248

244249
impl Error {

oximeter/db/src/oxql/ast/literal.rs

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ use chrono::DateTime;
1313
use chrono::Utc;
1414
use oximeter::FieldType;
1515
use oximeter::FieldValue;
16+
use oxql_types::point::DataType;
1617
use regex::Regex;
1718
use std::borrow::Borrow;
1819
use std::fmt;
@@ -35,6 +36,20 @@ pub enum Literal {
3536
}
3637

3738
impl Literal {
39+
// Return the name of this literal's type as a string.
40+
pub(crate) fn type_name(&self) -> &'static str {
41+
match self {
42+
Literal::Integer(_) => "Integer",
43+
Literal::Double(_) => "Double",
44+
Literal::String(_) => "String",
45+
Literal::Boolean(_) => "Boolean",
46+
Literal::Uuid(_) => "Uuid",
47+
Literal::Duration(_) => "Duration",
48+
Literal::Timestamp(_) => "Timestamp",
49+
Literal::IpAddr(_) => "IpAddr",
50+
}
51+
}
52+
3853
// Format the literal as a safe, typed string for ClickHouse.
3954
pub(crate) fn as_db_safe_string(&self) -> String {
4055
match self {
@@ -93,6 +108,22 @@ impl Literal {
93108
}
94109
}
95110

111+
// Return true if this literal can be compared to a datum of the provided
112+
// type.
113+
pub(crate) fn is_compatible_with_datum(&self, data_type: DataType) -> bool {
114+
match (self, data_type) {
115+
(Literal::Integer(_), DataType::Integer)
116+
| (Literal::Double(_), DataType::Double)
117+
| (Literal::String(_), DataType::String)
118+
| (Literal::Boolean(_), DataType::Boolean)
119+
| (Literal::Duration(_), DataType::Integer)
120+
| (Literal::Duration(_), DataType::Double)
121+
| (Literal::Timestamp(_), DataType::Integer)
122+
| (Literal::Timestamp(_), DataType::Double) => true,
123+
(_, _) => false,
124+
}
125+
}
126+
96127
/// Apply the comparison op between self and the provided field.
97128
///
98129
/// Return None if the comparison cannot be applied, either because the type

oximeter/db/src/oxql/ast/mod.rs

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@
66
77
// Copyright 2024 Oxide Computer Company
88

9+
use std::collections::BTreeSet;
10+
use std::fmt;
11+
912
use chrono::DateTime;
1013
use chrono::Utc;
1114
use oximeter::TimeseriesName;
@@ -26,12 +29,35 @@ pub struct Query {
2629
ops: Vec<TableOp>,
2730
}
2831

32+
impl fmt::Display for Query {
33+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
34+
let n_ops = self.ops.len();
35+
for (i, op) in self.ops.iter().enumerate() {
36+
write!(f, "{op}")?;
37+
if i < n_ops - 1 {
38+
write!(f, " | ")?;
39+
}
40+
}
41+
Ok(())
42+
}
43+
}
44+
2945
impl Query {
3046
// Return the first operation in the query, which is always a form of `get`.
3147
fn first_op(&self) -> &TableOp {
3248
self.ops.first().expect("Should have parsed at least 1 operation")
3349
}
3450

51+
/// Iterate over the table operations.
52+
pub(crate) fn table_ops(
53+
&self,
54+
) -> impl ExactSizeIterator<Item = &'_ TableOp> + '_ {
55+
self.ops.iter()
56+
}
57+
58+
/// Return the name of the first referenced timeseries.
59+
///
60+
/// This is from the first `get`, which might be from a subquery.
3561
pub(crate) fn timeseries_name(&self) -> &TimeseriesName {
3662
match self.first_op() {
3763
TableOp::Basic(BasicTableOp::Get(n)) => n,
@@ -42,6 +68,33 @@ impl Query {
4268
}
4369
}
4470

71+
/// Return _all_ timeseries names referred to by get table operations.
72+
pub(crate) fn all_timeseries_names(&self) -> BTreeSet<&TimeseriesName> {
73+
let mut set = BTreeSet::new();
74+
self.all_timeseries_names_impl(&mut set);
75+
set
76+
}
77+
78+
// Add all timeseries names to the provided set, recursing into subqueries.
79+
fn all_timeseries_names_impl<'a>(
80+
&'a self,
81+
set: &mut BTreeSet<&'a TimeseriesName>,
82+
) {
83+
for op in self.ops.iter() {
84+
match op {
85+
TableOp::Basic(BasicTableOp::Get(name)) => {
86+
set.insert(name);
87+
}
88+
TableOp::Basic(_) => {}
89+
TableOp::Grouped(GroupedTableOp { ops }) => {
90+
for query in ops.iter() {
91+
query.all_timeseries_names_impl(set);
92+
}
93+
}
94+
}
95+
}
96+
}
97+
4598
// Check that this query (and any subqueries) start with a get table op, and
4699
// that there are no following get operations. I.e., we have:
47100
//

oximeter/db/src/oxql/ast/table_ops/align.rs

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ use oxql_types::point::Values;
1919
use oxql_types::Alignment;
2020
use oxql_types::Table;
2121
use oxql_types::Timeseries;
22+
use std::fmt;
2223
use std::time::Duration;
2324

2425
// The maximum factor by which an alignment operation may upsample data.
@@ -68,7 +69,7 @@ fn verify_max_upsampling_ratio(
6869
///
6970
/// Alignment is used to produce data at the defined timestamps, so that samples
7071
/// from multiple timeseries may be combined or correlated in meaningful ways.
71-
#[derive(Clone, Debug, PartialEq)]
72+
#[derive(Clone, Copy, Debug, PartialEq)]
7273
pub struct Align {
7374
/// The alignment method, used to describe how data over the input period
7475
/// is used to generate an output sample.
@@ -87,6 +88,12 @@ pub struct Align {
8788
pub period: Duration,
8889
}
8990

91+
impl std::fmt::Display for Align {
92+
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
93+
write!(f, "{}({:?})", self.method, self.period)
94+
}
95+
}
96+
9097
impl Align {
9198
// Apply the alignment function to the set of tables.
9299
pub(crate) fn apply(
@@ -108,7 +115,7 @@ impl Align {
108115
}
109116

110117
/// An alignment method.
111-
#[derive(Clone, Debug, PartialEq)]
118+
#[derive(Clone, Copy, Debug, PartialEq)]
112119
pub enum AlignmentMethod {
113120
/// Alignment is done by interpolating the output data at the specified
114121
/// period.
@@ -118,6 +125,15 @@ pub enum AlignmentMethod {
118125
MeanWithin,
119126
}
120127

128+
impl fmt::Display for AlignmentMethod {
129+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
130+
match self {
131+
AlignmentMethod::Interpolate => write!(f, "interpolate"),
132+
AlignmentMethod::MeanWithin => write!(f, "mean_within"),
133+
}
134+
}
135+
}
136+
121137
// Align the timeseries in a table by computing the average within each output
122138
// period.
123139
fn align_mean_within(

0 commit comments

Comments
 (0)