Skip to content

panic from subtraction overflow during merge, multi-valued fastfield #1151

@shikhar

Description

@shikhar

Describe the bug

  • What did you do?

Ran indexing process, which periodically indexes a batch of documents and commits.

  • What happened?
The application panicked (crashed).
Message:  attempt to subtract with overflow
Location: /Users/sbhushan/.cargo/registry/src/github.1485827954.workers.dev-1ecc6299db9ec823/tantivy-0.16.0/src/fastfield/multivalued/reader.rs:71

  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ BACKTRACE ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
                                ⋮ 12 frames hidden ⋮
  13: core::panicking::panic::hbb13bbcfbaa2890b
      at /rustc/a178d0322ce20e33eac124758e837cbd80a6f633/library/core/src/panicking.rs:50
  14: tantivy::fastfield::multivalued::reader::MultiValuedFastFieldReader<Item>::num_vals::heaf368d0a6f14b43
      at /Users/sbhushan/.cargo/registry/src/github.1485827954.workers.dev-1ecc6299db9ec823/tantivy-0.16.0/src/fastfield/multivalued/reader.rs:71
        69 │     pub fn num_vals(&self, doc: DocId) -> usize {
        70 │         let range = self.range(doc);
        71 >         (range.end - range.start) as usize
        72 │     }
        73 │
  15: <tantivy::fastfield::multivalued::reader::MultiValuedFastFieldReader<Item> as tantivy::fastfield::MultiValueLength>::get_len::h231cf36bdce28e4d
      at /Users/sbhushan/.cargo/registry/src/github.1485827954.workers.dev-1ecc6299db9ec823/tantivy-0.16.0/src/fastfield/multivalued/reader.rs:83
        81 │ impl<Item: FastValue> MultiValueLength for MultiValuedFastFieldReader<Item> {
        82 │     fn get_len(&self, doc_id: DocId) -> u64 {
        83 >         self.num_vals(doc_id) as u64
        84 │     }
        85 │
  16: tantivy::indexer::merger::IndexMerger::write_1_n_fast_field_idx_generic::h7b089a49f7c55428
      at /Users/sbhushan/.cargo/registry/src/github.1485827954.workers.dev-1ecc6299db9ec823/tantivy-0.16.0/src/indexer/merger.rs:510
       508 │                 for doc in 0u32..reader.max_doc() {
       509 │                     if delete_bitset.is_alive(doc) {
       510 >                         let num_vals = u64s_reader.get_len(doc) as u64;
       511 │                         total_num_vals += num_vals;
       512 │                     }
  17: tantivy::indexer::merger::IndexMerger::write_multi_value_fast_field_idx::hff15ed17f4f18c09
      at /Users/sbhushan/.cargo/registry/src/github.1485827954.workers.dev-1ecc6299db9ec823/tantivy-0.16.0/src/indexer/merger.rs:565
       563 │         }).collect::<Vec<_>>();
       564 │
       565 >         Self::write_1_n_fast_field_idx_generic(
       566 │             field,
       567 │             fast_field_serializer,
  18: tantivy::indexer::merger::IndexMerger::write_multi_fast_field::h8a8cf09448a3fc71
      at /Users/sbhushan/.cargo/registry/src/github.1485827954.workers.dev-1ecc6299db9ec823/tantivy-0.16.0/src/indexer/merger.rs:657
       655 │         // First we merge the idx fast field.
       656 │         let offsets =
       657 >             self.write_multi_value_fast_field_idx(field, fast_field_serializer, doc_id_mapping)?;
       658 │
       659 │         let mut min_value = u64::max_value();
  19: tantivy::indexer::merger::IndexMerger::write_fast_fields::h6be8ac9b0cab83c0
      at /Users/sbhushan/.cargo/registry/src/github.1485827954.workers.dev-1ecc6299db9ec823/tantivy-0.16.0/src/indexer/merger.rs:298
       296 │                     }
       297 │                     Some(Cardinality::MultiValues) => {
       298 >                         self.write_multi_fast_field(field, fast_field_serializer, doc_id_mapping)?;
       299 │                     }
       300 │                     None => {}
  20: tantivy::indexer::merger::IndexMerger::write::h08cf0320d357591c
      at /Users/sbhushan/.cargo/registry/src/github.1485827954.workers.dev-1ecc6299db9ec823/tantivy-0.16.0/src/indexer/merger.rs:1099
      1097 │             &doc_id_mapping,
      1098 │         )?;
      1099 >         self.write_fast_fields(
      1100 │             serializer.get_fast_field_serializer(),
      1101 │             term_ord_mappings,
  21: tantivy::indexer::segment_updater::merge::hdc3230d728afdd61
      at /Users/sbhushan/.cargo/registry/src/github.1485827954.workers.dev-1ecc6299db9ec823/tantivy-0.16.0/src/indexer/segment_updater.rs:142
       140 │     let segment_serializer = SegmentSerializer::for_segment(merged_segment.clone(), true)?;
       141 │
       142 >     let num_docs = merger.write(segment_serializer)?;
       143 │
       144 │     let merged_segment_id = merged_segment.id();
  22: tantivy::indexer::segment_updater::SegmentUpdater::start_merge::{{closure}}::h92bac4c09cef47c4
      at /Users/sbhushan/.cargo/registry/src/github.1485827954.workers.dev-1ecc6299db9ec823/tantivy-0.16.0/src/indexer/segment_updater.rs:498
       496 │             // as well as which segment is currently in merge and therefore should not be
       497 │             // candidate for another merge.
       498 >             match merge(
       499 │                 &segment_updater.index,
       500 │                 segment_entries,
  23: <core::future::from_generator::GenFuture<T> as core::future::future::Future>::poll::h0b9ec7c8a2a6a500
      at /rustc/a178d0322ce20e33eac124758e837cbd80a6f633/library/core/src/future/mod.rs:80
  24: <futures_task::future_obj::LocalFutureObj<T> as core::future::future::Future>::poll::h82b8c2584ca1d3c0
      at /Users/sbhushan/.cargo/registry/src/github.1485827954.workers.dev-1ecc6299db9ec823/futures-task-0.3.17/src/future_obj.rs:84
        82 │     #[inline]
        83 │     fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<T> {
        84 >         unsafe { Pin::new_unchecked(&mut *self.future).poll(cx) }
        85 │     }
        86 │ }
  25: <futures_task::future_obj::FutureObj<T> as core::future::future::Future>::poll::ha493c3e4cc50bed8
      at /Users/sbhushan/.cargo/registry/src/github.1485827954.workers.dev-1ecc6299db9ec823/futures-task-0.3.17/src/future_obj.rs:127
       125 │     #[inline]
       126 │     fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<T> {
       127 >         Pin::new(&mut self.0).poll(cx)
       128 │     }
       129 │ }
  26: futures_util::future::future::FutureExt::poll_unpin::h2db2c56d1c89396f
      at /Users/sbhushan/.cargo/registry/src/github.1485827954.workers.dev-1ecc6299db9ec823/futures-util-0.3.17/src/future/future/mod.rs:562
       560 │         Self: Unpin,
       561 │     {
       562 >         Pin::new(self).poll(cx)
       563 │     }
       564 │
  27: futures_executor::thread_pool::Task::run::h469ec9cc1fc5fa80
      at /Users/sbhushan/.cargo/registry/src/github.1485827954.workers.dev-1ecc6299db9ec823/futures-executor-0.3.17/src/thread_pool.rs:322
       320 │
       321 │             loop {
       322 >                 let res = future.poll_unpin(&mut cx);
       323 │                 match res {
       324 │                     Poll::Pending => {}
  28: futures_executor::thread_pool::PoolState::work::haf842fd80f3d5b25
      at /Users/sbhushan/.cargo/registry/src/github.1485827954.workers.dev-1ecc6299db9ec823/futures-executor-0.3.17/src/thread_pool.rs:154
       152 │             let msg = self.rx.lock().unwrap().recv().unwrap();
       153 │             match msg {
       154 >                 Message::Run(task) => task.run(),
       155 │                 Message::Close => break,
       156 │             }
  29: futures_executor::thread_pool::ThreadPoolBuilder::create::{{closure}}::h4de4015d663fd980
      at /Users/sbhushan/.cargo/registry/src/github.1485827954.workers.dev-1ecc6299db9ec823/futures-executor-0.3.17/src/thread_pool.rs:284
       282 │                 thread_builder = thread_builder.stack_size(self.stack_size);
       283 │             }
       284 >             thread_builder.spawn(move || state.work(counter, after_start, before_stop))?;
       285 │         }
       286 │         Ok(pool)
                                ⋮ 13 frames hidden ⋮
  • What was expected?

No panic in merge.

Which version of tantivy are you using?

0.16

To Reproduce

If your bug is deterministic, can you give a minimal reproducing code?
Some bugs are not deterministic. Can you describe with precision in which context it happened?

Not deterministic, once the index is in this state it reproduces but not sure what caused it to get into such a state. Rebuilding the index fixes the problem. If I see a pattern to when the index is entering such a state, I will note here. It has happened twice so far.

If this is possible, can you share your code?

Afraid not, for now. Full disclosure: this is happening when using a custom Directory implementation, there is a chance that could be implicated. I will try to reproduce with a regular MMapDirectory, but wanted to report the issue in case a bug if apparent to tantivy devs based on the stacktrace.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions