Skip to content

Commit 17fe504

Browse files
authored
Support filtering specific sqllogictests identified by line number (#16029)
* Support filtering specific sqllogictests identified by line number * Add license header * Try parsing in different dialects * Add test filtering example to README.md * Improve Filter doc comment * Factor out statement_is_skippable into its own function * Add example about how filters work in the doc comments
1 parent ce835da commit 17fe504

File tree

4 files changed

+207
-17
lines changed

4 files changed

+207
-17
lines changed

datafusion/sqllogictest/README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,14 @@ sqllogictests also supports `cargo test` style substring matches on file names t
156156
cargo test --test sqllogictests -- information
157157
```
158158

159+
Additionally, executing specific tests within a file is also supported. Tests are identified by line number within
160+
the .slt file; for example, the following command will run the test in line `709` for file `information.slt` along
161+
with any other preparatory statements:
162+
163+
```shell
164+
cargo test --test sqllogictests -- information:709
165+
```
166+
159167
## Running tests: Postgres compatibility
160168

161169
Test files that start with prefix `pg_compat_` verify compatibility

datafusion/sqllogictest/bin/sqllogictests.rs

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ use datafusion::common::instant::Instant;
2020
use datafusion::common::utils::get_available_parallelism;
2121
use datafusion::common::{exec_err, DataFusionError, Result};
2222
use datafusion_sqllogictest::{
23-
df_value_validator, read_dir_recursive, setup_scratch_dir, value_normalizer,
24-
DataFusion, TestContext,
23+
df_value_validator, read_dir_recursive, setup_scratch_dir, should_skip_file,
24+
should_skip_record, value_normalizer, DataFusion, Filter, TestContext,
2525
};
2626
use futures::stream::StreamExt;
2727
use indicatif::{
@@ -135,12 +135,19 @@ async fn run_tests() -> Result<()> {
135135

136136
let m_clone = m.clone();
137137
let m_style_clone = m_style.clone();
138+
let filters = options.filters.clone();
138139

139140
SpawnedTask::spawn(async move {
140141
match (options.postgres_runner, options.complete) {
141142
(false, false) => {
142-
run_test_file(test_file, validator, m_clone, m_style_clone)
143-
.await?
143+
run_test_file(
144+
test_file,
145+
validator,
146+
m_clone,
147+
m_style_clone,
148+
filters.as_ref(),
149+
)
150+
.await?
144151
}
145152
(false, true) => {
146153
run_complete_file(test_file, validator, m_clone, m_style_clone)
@@ -152,6 +159,7 @@ async fn run_tests() -> Result<()> {
152159
validator,
153160
m_clone,
154161
m_style_clone,
162+
filters.as_ref(),
155163
)
156164
.await?
157165
}
@@ -207,6 +215,7 @@ async fn run_test_file(
207215
validator: Validator,
208216
mp: MultiProgress,
209217
mp_style: ProgressStyle,
218+
filters: &[Filter],
210219
) -> Result<()> {
211220
let TestFile {
212221
path,
@@ -235,14 +244,15 @@ async fn run_test_file(
235244
runner.with_column_validator(strict_column_validator);
236245
runner.with_normalizer(value_normalizer);
237246
runner.with_validator(validator);
238-
let result = run_file_in_runner(path, runner).await;
247+
let result = run_file_in_runner(path, runner, filters).await;
239248
pb.finish_and_clear();
240249
result
241250
}
242251

243252
async fn run_file_in_runner<D: AsyncDB, M: MakeConnection<Conn = D>>(
244253
path: PathBuf,
245254
mut runner: sqllogictest::Runner<D, M>,
255+
filters: &[Filter],
246256
) -> Result<()> {
247257
let path = path.canonicalize()?;
248258
let records =
@@ -252,6 +262,9 @@ async fn run_file_in_runner<D: AsyncDB, M: MakeConnection<Conn = D>>(
252262
if let Record::Halt { .. } = record {
253263
break;
254264
}
265+
if should_skip_record::<D>(&record, filters) {
266+
continue;
267+
}
255268
if let Err(err) = runner.run_async(record).await {
256269
errs.push(format!("{err}"));
257270
}
@@ -318,6 +331,7 @@ async fn run_test_file_with_postgres(
318331
validator: Validator,
319332
mp: MultiProgress,
320333
mp_style: ProgressStyle,
334+
filters: &[Filter],
321335
) -> Result<()> {
322336
use datafusion_sqllogictest::Postgres;
323337
let TestFile {
@@ -339,7 +353,7 @@ async fn run_test_file_with_postgres(
339353
runner.with_column_validator(strict_column_validator);
340354
runner.with_normalizer(value_normalizer);
341355
runner.with_validator(validator);
342-
let result = run_file_in_runner(path, runner).await;
356+
let result = run_file_in_runner(path, runner, filters).await;
343357
pb.finish_and_clear();
344358
result
345359
}
@@ -350,6 +364,7 @@ async fn run_test_file_with_postgres(
350364
_validator: Validator,
351365
_mp: MultiProgress,
352366
_mp_style: ProgressStyle,
367+
_filters: &[Filter],
353368
) -> Result<()> {
354369
use datafusion::common::plan_err;
355370
plan_err!("Can not run with postgres as postgres feature is not enabled")
@@ -569,8 +584,11 @@ struct Options {
569584
#[clap(long, env = "INCLUDE_TPCH", help = "Include tpch files")]
570585
include_tpch: bool,
571586

572-
#[clap(action, help = "test filter (substring match on filenames)")]
573-
filters: Vec<String>,
587+
#[clap(
588+
action,
589+
help = "test filter (substring match on filenames with optional :{line_number} suffix)"
590+
)]
591+
filters: Vec<Filter>,
574592

575593
#[clap(
576594
long,
@@ -623,15 +641,7 @@ impl Options {
623641
/// filter and that does a substring match on each input. returns
624642
/// true f this path should be run
625643
fn check_test_file(&self, path: &Path) -> bool {
626-
if self.filters.is_empty() {
627-
return true;
628-
}
629-
630-
// otherwise check if any filter matches
631-
let path_string = path.to_string_lossy();
632-
self.filters
633-
.iter()
634-
.any(|filter| path_string.contains(filter))
644+
!should_skip_file(path, &self.filters)
635645
}
636646

637647
/// Postgres runner executes only tests in files with specific names or in
Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use datafusion::sql::parser::{DFParserBuilder, Statement};
19+
use sqllogictest::{AsyncDB, Record};
20+
use sqlparser::ast::{SetExpr, Statement as SqlStatement};
21+
use sqlparser::dialect::dialect_from_str;
22+
use std::path::Path;
23+
use std::str::FromStr;
24+
25+
/// Filter specification that determines whether a certain sqllogictest record in
26+
/// a certain file should be filtered. In order for a [`Filter`] to match a test case:
27+
///
28+
/// - The test must belong to a file whose absolute path contains the `file_substring` substring.
29+
/// - If a `line_number` is specified, the test must be declared in that same line number.
30+
///
31+
/// If a [`Filter`] matches a specific test case, then the record is executed, if there's
32+
/// no match, the record is skipped.
33+
///
34+
/// Filters can be parsed from strings of the form `<file_name>:line_number`. For example,
35+
/// `foo.slt:100` matches any test whose name contains `foo.slt` and the test starts on line
36+
/// number 100.
37+
#[derive(Debug, Clone)]
38+
pub struct Filter {
39+
file_substring: String,
40+
line_number: Option<u32>,
41+
}
42+
43+
impl FromStr for Filter {
44+
type Err = String;
45+
46+
fn from_str(s: &str) -> Result<Self, Self::Err> {
47+
let parts: Vec<&str> = s.rsplitn(2, ':').collect();
48+
if parts.len() == 2 {
49+
match parts[0].parse::<u32>() {
50+
Ok(line) => Ok(Filter {
51+
file_substring: parts[1].to_string(),
52+
line_number: Some(line),
53+
}),
54+
Err(_) => Err(format!("Cannot parse line number from '{s}'")),
55+
}
56+
} else {
57+
Ok(Filter {
58+
file_substring: s.to_string(),
59+
line_number: None,
60+
})
61+
}
62+
}
63+
}
64+
65+
/// Given a list of [`Filter`]s, determines if the whole file in the provided
66+
/// path can be skipped.
67+
///
68+
/// - If there's at least 1 filter whose file name is a substring of the provided path,
69+
/// it returns true.
70+
/// - If the provided filter list is empty, it returns false.
71+
pub fn should_skip_file(path: &Path, filters: &[Filter]) -> bool {
72+
if filters.is_empty() {
73+
return false;
74+
}
75+
76+
let path_string = path.to_string_lossy();
77+
for filter in filters {
78+
if path_string.contains(&filter.file_substring) {
79+
return false;
80+
}
81+
}
82+
true
83+
}
84+
85+
/// Determines whether a certain sqllogictest record should be skipped given the provided
86+
/// filters.
87+
///
88+
/// If there's at least 1 matching filter, or the filter list is empty, it returns false.
89+
///
90+
/// There are certain records that will never be skipped even if they are not matched
91+
/// by any filters, like CREATE TABLE, INSERT INTO, DROP or SELECT * INTO statements,
92+
/// as they populate tables necessary for other tests to work.
93+
pub fn should_skip_record<D: AsyncDB>(
94+
record: &Record<D::ColumnType>,
95+
filters: &[Filter],
96+
) -> bool {
97+
if filters.is_empty() {
98+
return false;
99+
}
100+
101+
let (sql, loc) = match record {
102+
Record::Statement { sql, loc, .. } => (sql, loc),
103+
Record::Query { sql, loc, .. } => (sql, loc),
104+
_ => return false,
105+
};
106+
107+
let statement = if let Some(statement) = parse_or_none(sql, "Postgres") {
108+
statement
109+
} else if let Some(statement) = parse_or_none(sql, "generic") {
110+
statement
111+
} else {
112+
return false;
113+
};
114+
115+
if !statement_is_skippable(&statement) {
116+
return false;
117+
}
118+
119+
for filter in filters {
120+
if !loc.file().contains(&filter.file_substring) {
121+
continue;
122+
}
123+
if let Some(line_num) = filter.line_number {
124+
if loc.line() != line_num {
125+
continue;
126+
}
127+
}
128+
129+
// This filter matches both file name substring and the exact
130+
// line number (if one was provided), so don't skip it.
131+
return false;
132+
}
133+
134+
true
135+
}
136+
137+
fn statement_is_skippable(statement: &Statement) -> bool {
138+
// Only SQL statements can be skipped.
139+
let Statement::Statement(sql_stmt) = statement else {
140+
return false;
141+
};
142+
143+
// Cannot skip SELECT INTO statements, as they can also create tables
144+
// that further test cases will use.
145+
if let SqlStatement::Query(v) = sql_stmt.as_ref() {
146+
if let SetExpr::Select(v) = v.body.as_ref() {
147+
if v.into.is_some() {
148+
return false;
149+
}
150+
}
151+
}
152+
153+
// Only SELECT and EXPLAIN statements can be skipped, as any other
154+
// statement might be populating tables that future test cases will use.
155+
matches!(
156+
sql_stmt.as_ref(),
157+
SqlStatement::Query(_) | SqlStatement::Explain { .. }
158+
)
159+
}
160+
161+
fn parse_or_none(sql: &str, dialect: &str) -> Option<Statement> {
162+
let Ok(Ok(Some(statement))) = DFParserBuilder::new(sql)
163+
.with_dialect(dialect_from_str(dialect).unwrap().as_ref())
164+
.build()
165+
.map(|mut v| v.parse_statements().map(|mut v| v.pop_front()))
166+
else {
167+
return None;
168+
};
169+
Some(statement)
170+
}

datafusion/sqllogictest/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,10 @@ pub use engines::DataFusion;
3838
#[cfg(feature = "postgres")]
3939
pub use engines::Postgres;
4040

41+
mod filters;
4142
mod test_context;
4243
mod util;
4344

45+
pub use filters::*;
4446
pub use test_context::TestContext;
4547
pub use util::*;

0 commit comments

Comments
 (0)