Skip to content

RUST-536 Eliminate redundant clones #377

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
7 changes: 4 additions & 3 deletions benchmarks/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ edition = "2018"

[features]
default = ["tokio-runtime"]
tokio-runtime = ["tokio/fs", "tokio/macros", "tokio/rt-core", "tokio/rt-threaded"]
tokio-runtime = ["tokio/fs", "tokio/macros", "tokio/rt", "tokio/rt-multi-thread", "tokio-stream"]
async-std-runtime = ["async-std"]

[dependencies]
Expand All @@ -16,8 +16,9 @@ lazy_static = "1.4.0"
clap = "2.33.3"
indicatif = "0.15.0"
async-trait = "0.1.41"
tokio = { version = "0.2.23", features = ["sync"] }
tokio = { version = "1.6", features = ["sync"] }
tokio-stream = { version = "0.1.6", features = ["io-util"], optional = true }
# "unstable" feature is needed for `spawn_blocking`, which is only used in task setup
async-std = { version = "=1.6.2", optional = true, features = ["attributes", "unstable"] }
async-std = { version = "1.9.0", optional = true, features = ["attributes", "unstable"] }
futures = "0.3.8"
anyhow = "1.0.34"
4 changes: 2 additions & 2 deletions benchmarks/src/bench/find_many.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::{convert::TryInto, path::PathBuf};

use anyhow::{bail, Result};
use futures::stream::StreamExt;
use mongodb::{bson::Bson, Client, Collection, Database};
use mongodb::{Client, Collection, Database, bson::{Bson, Document}};
use serde_json::Value;

use crate::{
Expand All @@ -12,7 +12,7 @@ use crate::{

pub struct FindManyBenchmark {
db: Database,
coll: Collection,
coll: Collection<Document>,
}

// Specifies the options to `FindManyBenchmark::setup` operation.
Expand Down
9 changes: 2 additions & 7 deletions benchmarks/src/bench/find_one.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,7 @@
use std::{convert::TryInto, path::PathBuf};

use anyhow::{bail, Result};
use mongodb::{
bson::{doc, Bson},
Client,
Collection,
Database,
};
use mongodb::{Client, Collection, Database, bson::{Bson, Document, doc}};
use serde_json::Value;

use crate::{
Expand All @@ -17,7 +12,7 @@ use crate::{
pub struct FindOneBenchmark {
db: Database,
num_iter: usize,
coll: Collection,
coll: Collection<Document>,
}

// Specifies the options to a `FindOneBenchmark::setup` operation.
Expand Down
8 changes: 4 additions & 4 deletions benchmarks/src/bench/insert_many.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use crate::bench::{Benchmark, COLL_NAME, DATABASE_NAME};
pub struct InsertManyBenchmark {
db: Database,
num_copies: usize,
coll: Collection,
coll: Collection<Document>,
doc: Document,
}

Expand Down Expand Up @@ -45,7 +45,7 @@ impl Benchmark for InsertManyBenchmark {
let mut file = spawn_blocking_and_await!(File::open(options.path))?;
let json: Value = spawn_blocking_and_await!(serde_json::from_reader(&mut file))?;

let coll = db.collection(&COLL_NAME);
let coll = db.collection(COLL_NAME.as_str());

Ok(InsertManyBenchmark {
db,
Expand All @@ -60,13 +60,13 @@ impl Benchmark for InsertManyBenchmark {

async fn before_task(&mut self) -> Result<()> {
self.coll.drop(None).await?;
self.db.create_collection(&COLL_NAME, None).await?;
self.db.create_collection(COLL_NAME.as_str(), None).await?;

Ok(())
}

async fn do_task(&self) -> Result<()> {
let insertions = vec![self.doc.clone(); self.num_copies];
let insertions = vec![&self.doc; self.num_copies];
self.coll.insert_many(insertions, None).await?;

Ok(())
Expand Down
6 changes: 3 additions & 3 deletions benchmarks/src/bench/insert_one.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use crate::bench::{Benchmark, COLL_NAME, DATABASE_NAME};
pub struct InsertOneBenchmark {
db: Database,
num_iter: usize,
coll: Collection,
coll: Collection<Document>,
doc: Document,
}

Expand Down Expand Up @@ -60,14 +60,14 @@ impl Benchmark for InsertOneBenchmark {

async fn before_task(&mut self) -> Result<()> {
self.coll.drop(None).await?;
self.db.create_collection(&COLL_NAME, None).await?;
self.db.create_collection(COLL_NAME.as_str(), None).await?;

Ok(())
}

async fn do_task(&self) -> Result<()> {
for _ in 0..self.num_iter {
self.coll.insert_one(self.doc.clone(), None).await?;
self.coll.insert_one(&self.doc, None).await?;
}

Ok(())
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/src/bench/json_multi_export.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::path::PathBuf;

use anyhow::Result;
use futures::stream::{FuturesUnordered, StreamExt, TryStreamExt};
use mongodb::{bson::doc, Client, Collection, Database};
use mongodb::{Client, Collection, Database, bson::{Document, doc}};

use crate::{
bench::{parse_json_file_to_documents, Benchmark, COLL_NAME, DATABASE_NAME},
Expand All @@ -13,7 +13,7 @@ const TOTAL_FILES: usize = 100;

pub struct JsonMultiExportBenchmark {
db: Database,
coll: Collection,
coll: Collection<Document>,
}

// Specifies the options to a `JsonMultiExportBenchmark::setup` operation.
Expand Down
8 changes: 4 additions & 4 deletions benchmarks/src/bench/json_multi_import.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::path::PathBuf;

use anyhow::Result;
use futures::stream::{FuturesUnordered, StreamExt};
use mongodb::{options::InsertManyOptions, Client, Collection, Database};
use mongodb::{Client, Collection, Database, bson::Document, options::InsertManyOptions};

use crate::{
bench::{parse_json_file_to_documents, Benchmark, COLL_NAME, DATABASE_NAME},
Expand All @@ -13,7 +13,7 @@ const TOTAL_FILES: usize = 100;

pub struct JsonMultiImportBenchmark {
db: Database,
coll: Collection,
coll: Collection<Document>,
path: PathBuf,
}

Expand Down Expand Up @@ -43,7 +43,7 @@ impl Benchmark for JsonMultiImportBenchmark {

async fn before_task(&mut self) -> Result<()> {
self.coll.drop(None).await?;
self.db.create_collection(&COLL_NAME, None).await?;
self.db.create_collection(COLL_NAME.as_str(), None).await?;

Ok(())
}
Expand All @@ -68,7 +68,7 @@ impl Benchmark for JsonMultiImportBenchmark {

docs.append(&mut new_docs);

let opts = Some(InsertManyOptions::builder().ordered(Some(false)).build());
let opts = Some(InsertManyOptions::builder().ordered(false).build());
coll_ref.insert_many(docs, opts).await.unwrap();
});
}
Expand Down
10 changes: 9 additions & 1 deletion benchmarks/src/fs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,14 @@ impl BufReader {
}

pub(crate) fn lines(self) -> impl Stream<Item = std::io::Result<String>> {
self.inner.lines()
#[cfg(feature = "tokio-runtime")]
{
tokio_stream::wrappers::LinesStream::new(self.inner.lines())
}

#[cfg(feature = "async-std-runtime")]
{
self.inner.lines()
}
}
}
8 changes: 6 additions & 2 deletions benchmarks/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ use crate::bench::{
json_multi_export::JsonMultiExportBenchmark,
json_multi_import::JsonMultiImportBenchmark,
run_command::RunCommandBenchmark,
TARGET_ITERATION_COUNT,
};

lazy_static! {
Expand All @@ -52,7 +51,12 @@ fn get_nth_percentile(durations: &[Duration], n: f64) -> Duration {
fn score_test(durations: Vec<Duration>, name: &str, task_size: f64, more_info: bool) -> f64 {
let median = get_nth_percentile(&durations, 50.0);
let score = task_size / (median.as_millis() as f64 / 1000.0);
println!("TEST: {} -- Score: {}\n", name, score);
println!(
"TEST: {} -- Score: {}, Median Iteration Time: {:.3}ms\n",
name,
score,
median.as_secs_f64()
);

if more_info {
println!(
Expand Down
45 changes: 26 additions & 19 deletions src/bson_util/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use std::{convert::TryFrom, time::Duration};
use serde::{de::Error, ser, Deserialize, Deserializer, Serialize, Serializer};

use crate::{
bson::{doc, oid::ObjectId, Binary, Bson, Document, JavaScriptCodeWithScope, Regex},
bson::{doc, Binary, Bson, Document, JavaScriptCodeWithScope, Regex},
error::{ErrorKind, Result},
};

Expand All @@ -31,11 +31,6 @@ pub(crate) fn get_u64(val: &Bson) -> Option<u64> {
}
}

pub(crate) fn add_id(doc: &mut Document) {
doc.entry("_id".to_string())
.or_insert_with(|| Bson::ObjectId(ObjectId::new()));
}

pub(crate) fn to_bson_array(docs: &[Document]) -> Bson {
Bson::Array(docs.iter().map(|doc| Bson::Document(doc.clone())).collect())
}
Expand Down Expand Up @@ -171,7 +166,7 @@ where
.ok_or_else(|| D::Error::custom(format!("could not deserialize u64 from {:?}", bson)))
}

pub fn doc_size_bytes(doc: &Document) -> usize {
pub fn doc_size_bytes(doc: &Document) -> u64 {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this was changed to always use a 64-bit integer in case we're on a 32-bit system and a large document is encountered.

//
// * i32 length prefix (4 bytes)
// * for each element:
Expand All @@ -182,19 +177,19 @@ pub fn doc_size_bytes(doc: &Document) -> usize {
// * null terminator (1 byte)
4 + doc
.into_iter()
.map(|(key, val)| 1 + key.len() + 1 + size_bytes(val))
.sum::<usize>()
.map(|(key, val)| 1 + key.len() as u64 + 1 + size_bytes(val))
.sum::<u64>()
+ 1
}

pub fn size_bytes(val: &Bson) -> usize {
pub fn size_bytes(val: &Bson) -> u64 {
match val {
Bson::Double(_) => 8,
//
// * length prefix (4 bytes)
// * number of UTF-8 bytes
// * null terminator (1 byte)
Bson::String(s) => 4 + s.len() + 1,
Bson::String(s) => 4 + s.len() as u64 + 1,
// An array is serialized as a document with the keys "0", "1", "2", etc., so the size of
// an array is:
//
Expand All @@ -210,7 +205,7 @@ pub fn size_bytes(val: &Bson) -> usize {
.iter()
.enumerate()
.map(|(i, val)| 1 + num_decimal_digits(i) + 1 + size_bytes(val))
.sum::<usize>()
.sum::<u64>()
+ 1
}
Bson::Document(doc) => doc_size_bytes(doc),
Expand All @@ -220,21 +215,21 @@ pub fn size_bytes(val: &Bson) -> usize {
// * number of UTF-8 bytes
// * null terminator (1 byte)
Bson::RegularExpression(Regex { pattern, options }) => {
pattern.len() + 1 + options.len() + 1
pattern.len() as u64 + 1 + options.len() as u64 + 1
}
//
// * length prefix (4 bytes)
// * number of UTF-8 bytes
// * null terminator (1 byte)
Bson::JavaScriptCode(code) => 4 + code.len() + 1,
Bson::JavaScriptCode(code) => 4 + code.len() as u64 + 1,
//
// * i32 length prefix (4 bytes)
// * i32 length prefix for code (4 bytes)
// * number of UTF-8 bytes in code
// * null terminator for code (1 byte)
// * length of document
Bson::JavaScriptCodeWithScope(JavaScriptCodeWithScope { code, scope }) => {
4 + 4 + code.len() + 1 + doc_size_bytes(scope)
4 + 4 + code.len() as u64 + 1 + doc_size_bytes(scope)
}
Bson::Int32(_) => 4,
Bson::Int64(_) => 8,
Expand All @@ -243,14 +238,14 @@ pub fn size_bytes(val: &Bson) -> usize {
// * i32 length prefix (4 bytes)
// * subtype (1 byte)
// * number of bytes
Bson::Binary(Binary { bytes, .. }) => 4 + 1 + bytes.len(),
Bson::Binary(Binary { bytes, .. }) => 4 + 1 + bytes.len() as u64,
Bson::ObjectId(_) => 12,
Bson::DateTime(_) => 8,
//
// * i32 length prefix (4 bytes)
// * subtype (1 byte)
// * number of UTF-8 bytes
Bson::Symbol(s) => 4 + 1 + s.len(),
Bson::Symbol(s) => 4 + 1 + s.len() as u64,
Bson::Decimal128(..) => 128 / 8,
Bson::Undefined | Bson::MaxKey | Bson::MinKey => 0,
// DbPointer doesn't have public details exposed by the BSON library, but it comprises of a
Expand All @@ -267,7 +262,19 @@ pub fn size_bytes(val: &Bson) -> usize {
}
}

fn num_decimal_digits(n: usize) -> usize {
/// The size in bytes of the provided document's entry in a BSON array at the given index.
pub(crate) fn array_entry_size_bytes(index: usize, doc: &Document) -> u64 {
//
// * type (1 byte)
// * number of decimal digits in key
// * null terminator for the key (1 byte)
// * size of value
1 + num_decimal_digits(index) + 1 + doc_size_bytes(&doc)
}

/// The number of digits in `n` in base 10.
/// Useful for calculating the size of an array entry in BSON.
fn num_decimal_digits(n: usize) -> u64 {
let mut digits = 1;
let mut curr = 10;

Expand Down Expand Up @@ -332,6 +339,6 @@ mod test {
let mut serialized_bytes = Vec::new();
doc.to_writer(&mut serialized_bytes).unwrap();

assert_eq!(size_bytes, serialized_bytes.len());
assert_eq!(size_bytes, serialized_bytes.len() as u64);
}
}
Loading