Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/query/ee/tests/it/inverted_index/pruning.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ async fn apply_block_pruning(
let segment_locs = table_snapshot.segments.clone();
let segment_locs = create_segment_location_vector(segment_locs, None);

FusePruner::create(&ctx, dal, schema, push_down, bloom_index_cols, None)?
FusePruner::create(&ctx, dal, schema, push_down, bloom_index_cols, vec![], None)?
.read_pruning(segment_locs)
.await
}
Expand Down
4 changes: 2 additions & 2 deletions src/query/functions/src/scalars/hash.rs
Original file line number Diff line number Diff line change
Expand Up @@ -250,13 +250,13 @@ where for<'a> T::ScalarRef<'a>: DFHash {
);
}

struct CityHasher64 {
pub struct CityHasher64 {
seed: u64,
value: u64,
}

impl CityHasher64 {
fn with_seed(s: u64) -> Self {
pub fn with_seed(s: u64) -> Self {
Self { seed: s, value: 0 }
}
}
Expand Down
2 changes: 2 additions & 0 deletions src/query/functions/src/scalars/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ mod vector;
pub use comparison::ALL_COMP_FUNC_NAMES;
use databend_functions_scalar_arithmetic::arithmetic;
use databend_functions_scalar_numeric_basic_arithmetic::register_numeric_basic_arithmetic;
pub use hash::CityHasher64;
pub use hash::DFHash;
pub use string::ALL_STRING_FUNC_NAMES;

pub fn register(registry: &mut FunctionRegistry) {
Expand Down
4 changes: 2 additions & 2 deletions src/query/service/src/test_kits/block_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,6 @@ impl<'a> BlockWriter<'a> {
bloom_filter_index_size,
None,
None,
None,
Compression::Lz4Raw,
Some(Utc::now()),
);
Expand All @@ -130,7 +129,8 @@ impl<'a> BlockWriter<'a> {
let bloom_index_cols = BloomIndexColumns::All;
let bloom_columns_map =
bloom_index_cols.bloom_index_fields(schema.clone(), BloomIndex::supported_type)?;
let mut builder = BloomIndexBuilder::create(FunctionContext::default(), bloom_columns_map);
let mut builder =
BloomIndexBuilder::create(FunctionContext::default(), bloom_columns_map, &[])?;
builder.add_block(block)?;
let maybe_bloom_index = builder.finalize()?;
if let Some(bloom_index) = maybe_bloom_index {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,6 @@ fn build_test_segment_info(
bloom_filter_index_location: Some(location_gen.block_bloom_index_location(&block_uuid)),
bloom_filter_index_size: 0,
inverted_index_size: None,
ngram_filter_index_size: None,
virtual_block_meta: None,
compression: Compression::Lz4,
create_on: Some(Utc::now()),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ async fn test_recluster_mutator_block_select() -> Result<()> {
0,
None,
None,
None,
meta::Compression::Lz4Raw,
Some(Utc::now()),
));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -773,7 +773,6 @@ impl CompactSegmentTestFixture {
0,
None,
None,
None,
Compression::Lz4Raw,
Some(Utc::now()),
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,6 @@ fn test_to_partitions() -> Result<()> {
bloom_filter_size,
None,
None,
None,
meta::Compression::Lz4Raw,
Some(Utc::now()),
));
Expand Down
2 changes: 1 addition & 1 deletion src/query/service/tests/it/storages/fuse/pruning.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ async fn apply_block_pruning(
let ctx: Arc<dyn TableContext> = ctx;
let segment_locs = table_snapshot.segments.clone();
let segment_locs = create_segment_location_vector(segment_locs, None);
FusePruner::create(&ctx, op, schema, push_down, bloom_index_cols, None)?
FusePruner::create(&ctx, op, schema, push_down, bloom_index_cols, vec![], None)?
.read_pruning(segment_locs)
.await
.map(|v| v.into_iter().map(|(_, v)| v).collect())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ async fn apply_snapshot_pruning(
schema.clone(),
push_down,
bloom_index_cols,
vec![],
None,
)?);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ async fn apply_snapshot_pruning(
schema,
push_down,
bloom_index_cols,
vec![],
None,
)?);

Expand Down
1 change: 0 additions & 1 deletion src/query/service/tests/it/storages/fuse/statistics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -628,7 +628,6 @@ fn test_reduce_block_meta() -> databend_common_exception::Result<()> {
bloom_filter_index_size,
None,
None,
None,
Compression::Lz4Raw,
Some(Utc::now()),
);
Expand Down
41 changes: 37 additions & 4 deletions src/query/sql/src/planner/binder/ddl/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ use crate::MetadataRef;
use crate::RefreshAggregatingIndexRewriter;
use crate::SUPPORTED_AGGREGATING_INDEX_FUNCTIONS;

const MAXIMUM_BLOOM_BITMAP_SIZE: usize = 128 * 1024 * 1024;

// valid values for inverted index option tokenizer
static INDEX_TOKENIZER_VALUES: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
let mut r = HashSet::new();
Expand Down Expand Up @@ -580,13 +582,44 @@ impl Binder {
let value = val.to_lowercase();
match key.as_str() {
"gram_size" => {
if value.parse::<u32>().is_err() {
return Err(ErrorCode::IndexOptionInvalid(format!(
"value `{value}` is not a legal number",
)));
match value.parse::<usize>() {
Ok(num) => {
if num == 0 {
return Err(ErrorCode::IndexOptionInvalid(
"`gram_size` cannot be 0",
));
}
}
Err(_) => {
return Err(ErrorCode::IndexOptionInvalid(format!(
"value `{value}` is not a legal number",
)));
}
}
options.insert("gram_size".to_string(), value);
}
"bitmap_size" => {
match value.parse::<usize>() {
Ok(num) => {
if num == 0 {
return Err(ErrorCode::IndexOptionInvalid(
"`bitmap_size` cannot be 0",
));
}
if num > MAXIMUM_BLOOM_BITMAP_SIZE {
return Err(ErrorCode::IndexOptionInvalid(format!(
"bitmap_size: `{num}` is too large (bitmap_size is maximum: {MAXIMUM_BLOOM_BITMAP_SIZE})",
)));
}
}
Err(_) => {
return Err(ErrorCode::IndexOptionInvalid(format!(
"value `{value}` is not a legal number",
)));
}
}
options.insert("bitmap_size".to_string(), value);
}
_ => {
return Err(ErrorCode::IndexOptionInvalid(format!(
"index option `{key}` is invalid key for create ngram index statement",
Expand Down
Loading
Loading