diff --git a/Cargo.lock b/Cargo.lock index 58079307e12..c084db2057d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -207,6 +207,15 @@ dependencies = [ "syn", ] +[[package]] +name = "atoi" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "616896e05fc0e2649463a93a15183c6a16bf03413a7af88ef1285ddedfa9cda5" +dependencies = [ + "num-traits", +] + [[package]] name = "atomic-waker" version = "1.0.0" @@ -1107,12 +1116,11 @@ name = "git-commitgraph" version = "0.7.0" dependencies = [ "bstr", - "byteorder", - "filebuffer", "git-chunk", "git-features 0.18.0", "git-hash 0.8.0", "git-testtools", + "memmap2", "serde", "thiserror", ] @@ -1227,6 +1235,18 @@ dependencies = [ [[package]] name = "git-index" version = "0.0.0" +dependencies = [ + "atoi", + "bstr", + "filetime", + "git-features 0.18.0", + "git-hash 0.8.0", + "git-testtools", + "memmap2", + "quick-error", + "serde", + "smallvec", +] [[package]] name = "git-lock" @@ -1350,12 +1370,10 @@ name = "git-pack" version = "0.16.0" dependencies = [ "bstr", - "byteorder", "bytesize", "clru", "common_macros", "dashmap 5.0.0", - "filebuffer", "git-chunk", "git-diff 0.12.0", "git-features 0.18.0", @@ -1365,6 +1383,7 @@ dependencies = [ "git-tempfile 1.0.3", "git-testtools", "git-traverse 0.11.0", + "memmap2", "os_str_bytes 6.0.0", "parking_lot", "serde", @@ -1468,7 +1487,6 @@ dependencies = [ name = "git-ref" version = "0.10.0" dependencies = [ - "filebuffer", "git-actor 0.7.0", "git-features 0.18.0", "git-hash 0.8.0", @@ -1478,6 +1496,7 @@ dependencies = [ "git-tempfile 1.0.3", "git-testtools", "git-validate 0.5.3", + "memmap2", "nom", "os_str_bytes 6.0.0", "quick-error", @@ -1664,6 +1683,10 @@ dependencies = [ "quick-error", ] +[[package]] +name = "git-worktree" +version = "0.0.0" + [[package]] name = "git2" version = "0.13.25" @@ -2067,6 +2090,15 @@ version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" +[[package]] +name = "memmap2" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4647a11b578fead29cdbb34d4adef8dd3dc35b876c9c6d5240d83f205abfe96e" +dependencies = [ + "libc", +] + [[package]] name = "memoffset" version = "0.6.5" diff --git a/Cargo.toml b/Cargo.toml index 8eaafb121e5..3f9c7a9e2ff 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -115,6 +115,7 @@ members = [ "git-diff", "git-traverse", "git-index", + "git-worktree", "git-packetline", "git-transport", "git-protocol", diff --git a/Makefile b/Makefile index fc7ce20cea0..8e8227296f7 100644 --- a/Makefile +++ b/Makefile @@ -82,6 +82,7 @@ check: ## Build all code in suitable configurations && cargo check cd git-object && cargo check --all-features \ && cargo check --features verbose-object-parsing-errors + cd git-index && cargo check --features serde1 cd git-actor && cargo check --features serde1 cd git-pack && cargo check --features serde1 \ && cargo check --features pack-cache-lru-static \ @@ -139,6 +140,8 @@ unit-tests: ## run all unit tests cd git-object && cargo test && cargo test --features verbose-object-parsing-errors cd git-pack && cargo test --features internal-testing-to-avoid-being-run-by-cargo-test-all \ && cargo test --features "internal-testing-git-features-parallel" + cd git-index && cargo test --features internal-testing-to-avoid-being-run-by-cargo-test-all \ + && cargo test --features "internal-testing-git-features-parallel" cd git-packetline && cargo test \ && cargo test --features blocking-io,maybe-async/is_sync --test blocking-packetline \ && cargo test --features "async-io" --test async-packetline diff --git a/README.md b/README.md index 8fa45c9586d..d268f20120d 100644 --- a/README.md +++ b/README.md @@ -83,9 +83,9 @@ Follow linked crate name for detailed status. Please note that all crates follow * [git-repository](https://github.com/Byron/gitoxide/blob/main/crate-status.md#git-repository) * `gitoxide-core` * **very early** -* **idea** * [git-index](https://github.com/Byron/gitoxide/blob/main/crate-status.md#git-index) - * git-status +* **idea** + * [git-worktree](https://github.com/Byron/gitoxide/blob/main/crate-status.md#git-worktree) * [git-tui](https://github.com/Byron/gitoxide/blob/main/crate-status.md#git-tui) * [git-bundle](https://github.com/Byron/gitoxide/blob/main/crate-status.md#git-bundle) @@ -240,14 +240,9 @@ Provide a CLI to for the most basic user journey: * [ ] `gix tool open-remote` open the URL of the remote, possibly after applying known transformations to go from `ssh` to `https`. * [ ] Open up SQL for git using [sqlite virtual tables](https://github.com/rusqlite/rusqlite/blob/master/tests/vtab.rs). Check out gitqlite - as well. What would an MVP look like? Maybe even something that could ship with gitoxide. + as well. What would an MVP look like? Maybe even something that could ship with gitoxide. See [this go implementation as example](https://github.com/filhodanuvem/gitql). * [ ] A truly awesome history rewriter which makes it easy to understand what happened while avoiding all pitfalls. Think BFG, but more awesome, if that's possible. * [ ] `git-tui` should learn a lot from [fossil-scm] regarding the presentation of data. Maybe [this](https://github.com/Lutetium-Vanadium/requestty/) can be used for prompts. Probably [magit] has a lot to offer, too. -* [ ] Can markdown be used as database so issue-trackers along with meta-data could just be markdown files which are mostly human-editable? Could user interfaces - be meta-data aware and just hide the meta-data chunks which are now editable in the GUI itself? Doing this would make conflicts easier to resolve than an `sqlite` - database. - * ~~A git-backend for `sqlite` which should allow embedding sqlite databases into git repositories, which in turn can be used for bug-trackers, wikis or other - features, making for a fully distributed github like experience, maybe.~~ ### Ideas for Spin-Offs @@ -259,6 +254,18 @@ Provide a CLI to for the most basic user journey: * [ ] A [syncthing] like client/server application. This is to demonstrate how lower-level crates can be combined into custom applications that use only part of git's technology to achieve their very own thing. Watch out for big file support, multi-device cross-syncing, the possibility for untrusted destinations using full-encryption, case-insensitive and sensitive filesystems, and extended file attributes as well as ignore files. +* An event-based database that uses commit messages to store deltas, while occasionally aggregating the actual state in a tree. Of course it's distributed by nature, allowing + people to work offline. + - It's abstracted to completely hide the actual data model behind it, allowing for all kinds of things to be implemented on top. + - Commits probably need a nanosecond component for the timestamp, which can be added via custom header field. + - having recording all changes allows for perfect merging, both on the client or on the server, while keeping a natural audit log which makes it useful for mission critical + databases in business. + * **Applications** + - Can markdown be used as database so issue-trackers along with meta-data could just be markdown files which are mostly human-editable? Could user interfaces + be meta-data aware and just hide the meta-data chunks which are now editable in the GUI itself? Doing this would make conflicts easier to resolve than an `sqlite` + database. + - A time tracker - simple data, very likely naturally conflict free, and interesting to see it in terms of teams or companies using it with maybe GitHub as Backing for authentication. + - How about supporting multiple different trackers, as in different remotes? [syncthing]: https://github.com/syncthing/syncthing [fossil-scm]: https://www.fossil-scm.org diff --git a/crate-status.md b/crate-status.md index b6883bea75e..2a13e1796ac 100644 --- a/crate-status.md +++ b/crate-status.md @@ -206,10 +206,35 @@ Check out the [performance discussion][git-traverse-performance] as well. * [x] API documentation * [ ] Some examples +### git-worktree +* handle the working tree/checkout +* manage multiple worktrees +* deal with exclude specifications, like .gitignore and other exclude files. + ### git-index -* read and write a git-index file - * non-sparse - * sparse (search for [`sparse index` here](https://github.blog/2021-08-16-highlights-from-git-2-33/)) +* read + * [ ] V2 + * [ ] V3 + * [ ] V4 + * optional threading + * [ ] concurrent loading of index extensions + * [ ] threaded cache entry reading +* `stat` update + * [ ] optional threaded `stat` based on thread_cost (aka preload) +* [ ] handling of `.gitignore` and system file exclude configuration +* [ ] handle potential races +* extensions + * [ ] TREE for speeding up tree generation + * [ ] REUC resolving undo + * [ ] UNTR untracked cache + * [ ] FSMN file system monitor cache V1 and V2 + * [ ] EOIE end of index entry + * [ ] IEOT index entry offset table + * [ ] link base indices to take information from, split index + * [ ] sdir sparse directory entries +* additinoal support + * [ ] non-sparse + * [ ] sparse (search for [`sparse index` here](https://github.blog/2021-08-16-highlights-from-git-2-33/)) * add and remove entries * [x] API documentation * [ ] Some examples diff --git a/etc/check-package-size.sh b/etc/check-package-size.sh index eab9d152b6c..8ec4c8f25c6 100755 --- a/etc/check-package-size.sh +++ b/etc/check-package-size.sh @@ -18,12 +18,13 @@ echo "in root: gitoxide CLI" #indent cargo diet -n --package-size-limit 25KB - fails right now because of dotted profile.dev.package (enter cargo-smart-release && indent cargo diet -n --package-size-limit 85KB) (enter git-actor && indent cargo diet -n --package-size-limit 5KB) +(enter git-index && indent cargo diet -n --package-size-limit 15KB) (enter git-tempfile && indent cargo diet -n --package-size-limit 25KB) (enter git-lock && indent cargo diet -n --package-size-limit 15KB) (enter git-config && indent cargo diet -n --package-size-limit 65KB) (enter git-hash && indent cargo diet -n --package-size-limit 10KB) (enter git-chunk && indent cargo diet -n --package-size-limit 10KB) -(enter git-features && indent cargo diet -n --package-size-limit 35KB) +(enter git-features && indent cargo diet -n --package-size-limit 40KB) (enter git-ref && indent cargo diet -n --package-size-limit 50KB) (enter git-diff && indent cargo diet -n --package-size-limit 10KB) (enter git-traverse && indent cargo diet -n --package-size-limit 10KB) diff --git a/git-commitgraph/Cargo.toml b/git-commitgraph/Cargo.toml index c31ec7c0bbe..5ba7a3f06b8 100644 --- a/git-commitgraph/Cargo.toml +++ b/git-commitgraph/Cargo.toml @@ -21,8 +21,7 @@ git-hash = { version ="^0.8.0", path = "../git-hash" } git-chunk = { version ="^0.2.0", path = "../git-chunk" } bstr = { version = "0.2.13", default-features = false, features = ["std"] } -byteorder = "1.2.3" -filebuffer = "0.4.0" +memmap2 = "0.5.0" serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"] } thiserror = "1.0.26" diff --git a/git-commitgraph/src/file/commit.rs b/git-commitgraph/src/file/commit.rs index 8e89b4b030d..10e45dc12ab 100644 --- a/git-commitgraph/src/file/commit.rs +++ b/git-commitgraph/src/file/commit.rs @@ -5,8 +5,6 @@ use std::{ slice::Chunks, }; -use byteorder::{BigEndian, ByteOrder}; - use crate::{ file::{self, File, EXTENDED_EDGES_MASK, LAST_EXTENDED_EDGE_MASK, NO_PARENT}, graph, @@ -38,6 +36,11 @@ pub struct Commit<'a> { root_tree_id: &'a git_hash::oid, } +#[inline] +fn read_u32(b: &[u8]) -> u32 { + u32::from_be_bytes(b.try_into().unwrap()) +} + impl<'a> Commit<'a> { pub(crate) fn new(file: &'a File, pos: file::Position) -> Self { let bytes = file.commit_data_bytes(pos); @@ -45,10 +48,11 @@ impl<'a> Commit<'a> { file, pos, root_tree_id: git_hash::oid::from_bytes_unchecked(&bytes[..file.hash_len]), - parent1: ParentEdge::from_raw(BigEndian::read_u32(&bytes[file.hash_len..][..4])), - parent2: ParentEdge::from_raw(BigEndian::read_u32(&bytes[file.hash_len + 4..][..4])), - generation: BigEndian::read_u32(&bytes[file.hash_len + 8..][..4]) >> 2, - commit_timestamp: BigEndian::read_u64(&bytes[file.hash_len + 8..][..8]) & 0x0003_ffff_ffff, + parent1: ParentEdge::from_raw(read_u32(&bytes[file.hash_len..][..4])), + parent2: ParentEdge::from_raw(read_u32(&bytes[file.hash_len + 4..][..4])), + generation: read_u32(&bytes[file.hash_len + 8..][..4]) >> 2, + commit_timestamp: u64::from_be_bytes(bytes[file.hash_len + 8..][..8].try_into().unwrap()) + & 0x0003_ffff_ffff, } } @@ -173,7 +177,7 @@ impl<'a> Iterator for ParentIterator<'a> { }, ParentIteratorState::Extra(mut chunks) => { if let Some(chunk) = chunks.next() { - let extra_edge = BigEndian::read_u32(chunk); + let extra_edge = read_u32(chunk); match ExtraEdge::from_raw(extra_edge) { ExtraEdge::Internal(pos) => { self.state = ParentIteratorState::Extra(chunks); diff --git a/git-commitgraph/src/file/init.rs b/git-commitgraph/src/file/init.rs index 5d9c03de9f5..1fd4516f24d 100644 --- a/git-commitgraph/src/file/init.rs +++ b/git-commitgraph/src/file/init.rs @@ -4,8 +4,7 @@ use std::{ }; use bstr::ByteSlice; -use byteorder::{BigEndian, ByteOrder}; -use filebuffer::FileBuffer; +use memmap2::Mmap; use crate::file::{ ChunkId, File, BASE_GRAPHS_LIST_CHUNK_ID, COMMIT_DATA_CHUNK_ID, COMMIT_DATA_ENTRY_SIZE_SANS_HASH, @@ -66,10 +65,18 @@ impl TryFrom<&Path> for File { type Error = Error; fn try_from(path: &Path) -> Result { - let data = FileBuffer::open(path).map_err(|e| Error::Io { - err: e, - path: path.to_owned(), - })?; + let data = std::fs::File::open(path) + .and_then(|file| { + // SAFETY: we have to take the risk of somebody changing the file underneath. Git never writes into the same file. + #[allow(unsafe_code)] + unsafe { + Mmap::map(&file) + } + }) + .map_err(|e| Error::Io { + err: e, + path: path.to_owned(), + })?; let data_size = data.len(); if data_size < MIN_FILE_SIZE { return Err(Error::Corrupt( @@ -241,7 +248,7 @@ impl TryFrom<&Path> for File { fn read_fan(d: &[u8]) -> ([u32; FAN_LEN], usize) { let mut fan = [0; FAN_LEN]; for (c, f) in d.chunks(4).zip(fan.iter_mut()) { - *f = BigEndian::read_u32(c); + *f = u32::from_be_bytes(c.try_into().unwrap()); } (fan, FAN_LEN * 4) } diff --git a/git-commitgraph/src/file/mod.rs b/git-commitgraph/src/file/mod.rs index 617f5cf0a0a..861ead20cbf 100644 --- a/git-commitgraph/src/file/mod.rs +++ b/git-commitgraph/src/file/mod.rs @@ -6,7 +6,7 @@ use std::{ path::PathBuf, }; -use filebuffer::FileBuffer; +use memmap2::Mmap; pub use self::{commit::Commit, init::Error}; @@ -42,7 +42,7 @@ pub struct File { base_graph_count: u8, base_graphs_list_offset: Option, commit_data_offset: usize, - data: FileBuffer, + data: Mmap, extra_edges_list_range: Option>, fan: [u32; FAN_LEN], oid_lookup_offset: usize, diff --git a/git-commitgraph/src/lib.rs b/git-commitgraph/src/lib.rs index cd516a3f507..b943050f22c 100644 --- a/git-commitgraph/src/lib.rs +++ b/git-commitgraph/src/lib.rs @@ -7,8 +7,7 @@ //! As generating the full commit graph from scratch can take some time, git may write new commits //! to separate [files][file::File] instead of overwriting the original file. //! Eventually, git will merge these files together as the number of files grows. -#![forbid(unsafe_code)] -#![deny(rust_2018_idioms, missing_docs)] +#![deny(unsafe_code, rust_2018_idioms, missing_docs)] pub mod file; pub mod graph; diff --git a/git-features/src/decode.rs b/git-features/src/decode.rs new file mode 100644 index 00000000000..0df38710ddb --- /dev/null +++ b/git-features/src/decode.rs @@ -0,0 +1,38 @@ +use std::io::Read; + +/// Decode variable int numbers from a `Read` implementation. +/// +/// Note: currently overflow checks are only done in debug mode. +#[inline] +pub fn leb64_from_read(mut r: impl Read) -> Result<(u64, usize), std::io::Error> { + let mut b = [0u8; 1]; + let mut i = 0; + r.read_exact(&mut b)?; + i += 1; + let mut value = b[0] as u64 & 0x7f; + while b[0] & 0x80 != 0 { + r.read_exact(&mut b)?; + i += 1; + debug_assert!(i <= 10, "Would overflow value at 11th iteration"); + value += 1; + value = (value << 7) + (b[0] as u64 & 0x7f) + } + Ok((value, i)) +} + +/// Decode variable int numbers. +#[inline] +pub fn leb64(d: &[u8]) -> (u64, usize) { + let mut i = 0; + let mut c = d[i]; + i += 1; + let mut value = c as u64 & 0x7f; + while c & 0x80 != 0 { + c = d[i]; + i += 1; + debug_assert!(i <= 10, "Would overflow value at 11th iteration"); + value += 1; + value = (value << 7) + (c as u64 & 0x7f) + } + (value, i) +} diff --git a/git-features/src/lib.rs b/git-features/src/lib.rs index 2321b72f12d..ebeef93bbe8 100644 --- a/git-features/src/lib.rs +++ b/git-features/src/lib.rs @@ -11,6 +11,8 @@ /// pub mod cache; +/// +pub mod decode; pub mod fs; pub mod hash; pub mod interrupt; diff --git a/git-pack/src/data/output/in_order.rs b/git-features/src/parallel/in_order.rs similarity index 90% rename from git-pack/src/data/output/in_order.rs rename to git-features/src/parallel/in_order.rs index 378218bafb6..2d5de0a0729 100644 --- a/git-pack/src/data/output/in_order.rs +++ b/git-features/src/parallel/in_order.rs @@ -1,7 +1,7 @@ use std::{cmp::Ordering, collections::BTreeMap}; -/// A counter for chunks to be able to put them back into original order later. -pub type ChunkId = usize; +/// A counter for items that are in sequence, to be able to put them back into original order later. +pub type SequenceId = usize; /// An iterator which olds iterated items with a **sequential** ID starting at 0 long enough to dispense them in order. /// @@ -10,14 +10,14 @@ pub type ChunkId = usize; pub struct InOrderIter { /// The iterator yielding the out-of-order elements we are to yield in order. pub inner: I, - store: BTreeMap, - next_chunk: ChunkId, + store: BTreeMap, + next_chunk: SequenceId, is_done: bool, } impl From for InOrderIter where - I: Iterator>, + I: Iterator>, { fn from(iter: I) -> Self { InOrderIter { @@ -31,7 +31,7 @@ where impl Iterator for InOrderIter where - I: Iterator>, + I: Iterator>, { type Item = Result; diff --git a/git-features/src/parallel/in_parallel.rs b/git-features/src/parallel/in_parallel.rs index 55f8935c40b..7eab2087662 100644 --- a/git-features/src/parallel/in_parallel.rs +++ b/git-features/src/parallel/in_parallel.rs @@ -10,6 +10,17 @@ pub fn join(left: impl FnOnce() -> O1 + Send, right: impl Fn .unwrap() } +/// Runs `f` with a scope to be used for spawning threads that will not outlive the function call. +/// That way it's possible to handle threads without needing the 'static lifetime for data they interact with. +/// +/// Note that the threads should not rely on actual parallelism as threading might be turned off entirely. +pub fn threads<'env, F, R>(f: F) -> std::thread::Result +where + F: FnOnce(&crossbeam_utils::thread::Scope<'env>) -> R, +{ + crossbeam_utils::thread::scope(f) +} + /// Read items from `input` and `consume` them in multiple threads, /// whose output output is collected by a `reducer`. Its task is to /// aggregate these outputs into the final result returned by this function with the benefit of not having to be thread-safe. diff --git a/git-features/src/parallel/mod.rs b/git-features/src/parallel/mod.rs index d3873b4392c..ebdedd3f308 100644 --- a/git-features/src/parallel/mod.rs +++ b/git-features/src/parallel/mod.rs @@ -35,11 +35,14 @@ #[cfg(feature = "parallel")] mod in_parallel; #[cfg(feature = "parallel")] -pub use in_parallel::{in_parallel, join}; +pub use in_parallel::{in_parallel, join, threads}; mod serial; #[cfg(not(feature = "parallel"))] -pub use serial::{in_parallel, join}; +pub use serial::{in_parallel, join, threads}; + +mod in_order; +pub use in_order::{InOrderIter, SequenceId}; mod eager_iter; pub use eager_iter::{EagerIter, EagerIterIf}; @@ -111,7 +114,7 @@ pub fn optimize_chunk_size_and_thread_limit( /// Always returns 1, available when the `parallel` feature toggle is unset. #[cfg(not(feature = "parallel"))] -fn num_threads(_thread_limit: Option) -> usize { +pub fn num_threads(_thread_limit: Option) -> usize { 1 } @@ -119,7 +122,7 @@ fn num_threads(_thread_limit: Option) -> usize { /// /// Only available with the `parallel` feature toggle set. #[cfg(feature = "parallel")] -fn num_threads(thread_limit: Option) -> usize { +pub fn num_threads(thread_limit: Option) -> usize { let logical_cores = num_cpus::get(); thread_limit .map(|l| if l == 0 { logical_cores } else { l }) diff --git a/git-features/src/parallel/serial.rs b/git-features/src/parallel/serial.rs index ddc6c3ef589..eb92aa0a724 100644 --- a/git-features/src/parallel/serial.rs +++ b/git-features/src/parallel/serial.rs @@ -1,10 +1,63 @@ use crate::parallel::Reduce; -/// Runs `left` and then `right`, one after another, returning their output when both are done. #[cfg(not(feature = "parallel"))] -pub fn join(left: impl FnOnce() -> O1, right: impl FnOnce() -> O2) -> (O1, O2) { - (left(), right()) +mod not_parallel { + /// Runs `left` and then `right`, one after another, returning their output when both are done. + pub fn join(left: impl FnOnce() -> O1, right: impl FnOnce() -> O2) -> (O1, O2) { + (left(), right()) + } + + /// A scope for spawning threads. + pub struct Scope<'env> { + _marker: std::marker::PhantomData<&'env mut &'env ()>, + } + + #[allow(unsafe_code)] + unsafe impl Sync for Scope<'_> {} + + impl<'env> Scope<'env> { + pub fn spawn<'scope, F, T>(&'scope self, f: F) -> ScopedJoinHandle<'scope, T> + where + F: FnOnce(&Scope<'env>) -> T, + F: Send + 'env, + T: Send + 'env, + { + ScopedJoinHandle { + result: f(self), + _marker: Default::default(), + } + } + } + + /// Runs `f` with a scope to be used for spawning threads that will not outlive the function call. + /// Note that this implementation will run the spawned functions immediately. + pub fn threads<'env, F, R>(f: F) -> std::thread::Result + where + F: FnOnce(&Scope<'env>) -> R, + { + Ok(f(&Scope { + _marker: Default::default(), + })) + } + + /// A handle that can be used to join its scoped thread. + /// + /// This struct is created by the [`Scope::spawn`] method and the + /// [`ScopedThreadBuilder::spawn`] method. + pub struct ScopedJoinHandle<'scope, T> { + /// Holds the result of the inner closure. + result: T, + _marker: std::marker::PhantomData<&'scope mut &'scope ()>, + } + + impl ScopedJoinHandle<'_, T> { + pub fn join(self) -> std::thread::Result { + Ok(self.result) + } + } } +#[cfg(not(feature = "parallel"))] +pub use not_parallel::{join, threads, Scope, ScopedJoinHandle}; /// Read items from `input` and `consume` them in a single thread, producing an output to be collected by a `reducer`, /// whose task is to aggregate these outputs into the final result returned by this function. diff --git a/git-pack/tests/pack/data/output/in_order_iter.rs b/git-features/tests/parallel/in_order_iter.rs similarity index 97% rename from git-pack/tests/pack/data/output/in_order_iter.rs rename to git-features/tests/parallel/in_order_iter.rs index 5ddc0199c5d..2daad4a071a 100644 --- a/git-pack/tests/pack/data/output/in_order_iter.rs +++ b/git-features/tests/parallel/in_order_iter.rs @@ -1,6 +1,6 @@ use std::convert::Infallible; -use git_odb::pack::data::output::InOrderIter; +use git_features::parallel::InOrderIter; #[test] fn in_order_stays_in_order() { diff --git a/git-features/tests/parallel/mod.rs b/git-features/tests/parallel/mod.rs index e70bbcca092..95801c6b588 100644 --- a/git-features/tests/parallel/mod.rs +++ b/git-features/tests/parallel/mod.rs @@ -1,6 +1,8 @@ //! Tests that are working similarly in parallel and serial mode use git_features::parallel; +mod in_order_iter; + #[derive(Default)] struct Adder { count: usize, diff --git a/git-index/Cargo.toml b/git-index/Cargo.toml index fda89b07c02..d0729a13831 100644 --- a/git-index/Cargo.toml +++ b/git-index/Cargo.toml @@ -5,11 +5,42 @@ repository = "https://github.com/Byron/gitoxide" license = "MIT/Apache-2.0" description = "A WIP crate of the gitoxide project dedicated implementing the git index file" authors = ["Sebastian Thiel "] -edition = "2018" +edition = "2021" [lib] doctest = false +test = true + +[[test]] +name = "multi-threaded" +path = "tests/index-multi-threaded.rs" +required-features = ["internal-testing-git-features-parallel"] + +[[test]] +name = "single-threaded" +path = "tests/index-single-threaded.rs" +required-features = ["internal-testing-to-avoid-being-run-by-cargo-test-all"] + +[features] +serde1 = ["serde"] + +internal-testing-git-features-parallel = ["git-features/parallel"] +internal-testing-to-avoid-being-run-by-cargo-test-all = [] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +git-features = { version ="^0.18.0", path = "../git-features", features = ["rustsha1"] } +git-hash = { version ="^0.8.0", path = "../git-hash" } + +quick-error = "2.0.0" +memmap2 = "0.5.0" +filetime = "0.2.15" +bstr = { version = "0.2.13", default-features = false, features = ["std"]} + +serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"] } +smallvec = "1.7.0" +atoi = "0.4.0" + +[dev-dependencies] +git-testtools = { path = "../tests/tools"} diff --git a/git-index/README.md b/git-index/README.md new file mode 100644 index 00000000000..f239be693c0 --- /dev/null +++ b/git-index/README.md @@ -0,0 +1,11 @@ + +#### Test fixtures + +Most of the test indices are snatched directly from the unit test suite of `git` itself, usually by running something like the following + +```shell + ./t1700-split-index.sh -r 2 --debug +``` + +Then one finds all test state and the index in particular in `trash directory/t1700-split-index/.git/index` and can possibly copy it over and use as fixture. +The preferred way is to find a test of interest, and use its setup code within one of our own fixture scripts that are executed once to generate the file of interest. diff --git a/git-index/src/decode/entries.rs b/git-index/src/decode/entries.rs new file mode 100644 index 00000000000..2b937c0cf51 --- /dev/null +++ b/git-index/src/decode/entries.rs @@ -0,0 +1,182 @@ +use std::ops::Range; + +use crate::{ + decode::{self, header}, + entry, + util::{read_u32, split_at_byte_exclusive, split_at_pos}, + Entry, Version, +}; + +/// a guess directly from git sources +pub const AVERAGE_V4_DELTA_PATH_LEN_IN_BYTES: usize = 80; + +pub struct Outcome { + pub is_sparse: bool, +} + +pub fn estimate_path_storage_requirements_in_bytes( + num_entries: u32, + on_disk_size: usize, + offset_to_extensions: Option, + object_hash: git_hash::Kind, + version: Version, +) -> usize { + const fn on_disk_entry_sans_path(object_hash: git_hash::Kind) -> usize { + 8 + // ctime + 8 + // mtime + (4 * 6) + // various stat fields + 2 + // flag, ignore extended flag as we'd rather overallocate a bit + object_hash.len_in_bytes() + } + match version { + Version::V3 | Version::V2 => { + let size_of_entries_block = offset_to_extensions.unwrap_or(on_disk_size); + size_of_entries_block + .saturating_sub(num_entries as usize * on_disk_entry_sans_path(object_hash)) + .saturating_sub(header::SIZE) + } + Version::V4 => num_entries as usize * AVERAGE_V4_DELTA_PATH_LEN_IN_BYTES, + } +} + +/// Note that `data` must point to the beginning of the entries, right past the header. +pub fn load_chunk<'a>( + mut data: &'a [u8], + entries: &mut Vec, + path_backing: &mut Vec, + num_entries: u32, + object_hash: git_hash::Kind, + version: Version, +) -> Result<(Outcome, &'a [u8]), decode::Error> { + let mut is_sparse = false; + let has_delta_paths = version == Version::V4; + let mut prev_path = None; + let mut delta_buf = Vec::::with_capacity(AVERAGE_V4_DELTA_PATH_LEN_IN_BYTES); + + for idx in 0..num_entries { + let (entry, remaining) = load_one( + data, + path_backing, + object_hash.len_in_bytes(), + has_delta_paths, + prev_path, + ) + .ok_or(decode::Error::Entry(idx))?; + + data = remaining; + if entry::mode::is_sparse(entry.stat.mode) { + is_sparse = true; + } + // TODO: entries are actually in an intrusive collection, with path as key. Could be set for us. This affects 'ignore_case' which we + // also don't yet handle but probably could, maybe even smartly with the collection. + // For now it's unclear to me how they access the index, they could iterate quickly, and have fast access by path. + entries.push(entry); + prev_path = entries.last().map(|e| (e.path.clone(), &mut delta_buf)); + } + + Ok((Outcome { is_sparse }, data)) +} + +/// Note that `prev_path` is only useful if the version is V4 +fn load_one<'a>( + data: &'a [u8], + path_backing: &mut Vec, + hash_len: usize, + has_delta_paths: bool, + prev_path_and_buf: Option<(Range, &mut Vec)>, +) -> Option<(Entry, &'a [u8])> { + let (ctime_secs, data) = read_u32(data)?; + let (ctime_nsecs, data) = read_u32(data)?; + let (mtime_secs, data) = read_u32(data)?; + let (mtime_nsecs, data) = read_u32(data)?; + let (dev, data) = read_u32(data)?; + let (ino, data) = read_u32(data)?; + let (mode, data) = read_u32(data)?; + let (uid, data) = read_u32(data)?; + let (gid, data) = read_u32(data)?; + let (size, data) = read_u32(data)?; + let (hash, data) = split_at_pos(data, hash_len)?; + let (flags, data) = read_u16(data)?; + let flags = flags as u32; + let (flags, data) = if flags & entry::flags::EXTENDED == entry::flags::EXTENDED { + let (extended_flags, data) = read_u16(data)?; + let extended_flags: u32 = (extended_flags as u32) << 16; + const ALL_KNOWN_EXTENDED_FLAGS: u32 = entry::flags::INTENT_TO_ADD | entry::flags::SKIP_WORKTREE; + assert_eq!( + extended_flags & !ALL_KNOWN_EXTENDED_FLAGS, + 0, + "BUG: encountered unknown extended bitflags in {:b}", + extended_flags + ); + (flags | extended_flags, data) + } else { + (flags, data) + }; + + let start = path_backing.len(); + let data = if has_delta_paths { + let (strip_len, consumed) = git_features::decode::leb64(data); + let data = &data[consumed..]; + if let Some((prev_path, buf)) = prev_path_and_buf { + let end = prev_path.end.checked_sub(strip_len.try_into().ok()?)?; + let copy_len = end.checked_sub(prev_path.start)?; + if copy_len > 0 { + buf.resize(copy_len, 0); + buf.copy_from_slice(&path_backing[prev_path.start..end]); + path_backing.extend_from_slice(buf); + } + } + + let (path, data) = split_at_byte_exclusive(data, 0)?; + path_backing.extend_from_slice(path); + + data + } else { + let (path, data) = if (flags & entry::mask::PATH_LEN) == entry::mask::PATH_LEN { + split_at_byte_exclusive(data, 0)? + } else { + let path_len = (flags & entry::mask::PATH_LEN) as usize; + split_at_pos(data, path_len)? + }; + + path_backing.extend_from_slice(path); + skip_padding(data) + }; + let path_range = start..path_backing.len(); + + Some(( + Entry { + stat: entry::Stat { + ctime: entry::Time { + secs: ctime_secs, + nsecs: ctime_nsecs, + }, + mtime: entry::Time { + secs: mtime_secs, + nsecs: mtime_nsecs, + }, + dev, + ino, + mode, + uid, + gid, + size, + }, + id: git_hash::ObjectId::from(hash), + flags: flags & !entry::mask::PATH_LEN, + path: path_range, + }, + data, + )) +} + +#[inline] +fn skip_padding(data: &[u8]) -> &[u8] { + let skip = data.iter().take_while(|b| **b == 0).count(); + &data[skip..] +} + +#[inline] +fn read_u16(data: &[u8]) -> Option<(u16, &[u8])> { + split_at_pos(data, 2).map(|(num, data)| (u16::from_be_bytes(num.try_into().unwrap()), data)) +} diff --git a/git-index/src/decode/header.rs b/git-index/src/decode/header.rs new file mode 100644 index 00000000000..097807dd511 --- /dev/null +++ b/git-index/src/decode/header.rs @@ -0,0 +1,49 @@ +pub(crate) const SIZE: usize = 4 /*signature*/ + 4 /*version*/ + 4 /* num entries */; + +use crate::{util::from_be_u32, Version}; + +mod error { + use quick_error::quick_error; + + quick_error! { + #[derive(Debug)] + pub enum Error { + Corrupt(message: &'static str) { + display("{}", message) + } + UnsupportedVersion(version: u32) { + display("Index version {} is not supported", version) + } + } + } +} + +pub use error::Error; + +pub(crate) fn decode(data: &[u8], object_hash: git_hash::Kind) -> Result<(crate::Version, u32, &[u8]), Error> { + if data.len() < (3 * 4) + object_hash.len_in_bytes() { + return Err(Error::Corrupt( + "File is too small even for header with zero entries and smallest hash", + )); + } + + const SIGNATURE: &[u8] = b"DIRC"; + let (signature, data) = data.split_at(4); + if signature != SIGNATURE { + return Err(Error::Corrupt( + "Signature mismatch - this doesn't claim to be a header file", + )); + } + + let (version, data) = data.split_at(4); + let version = match from_be_u32(version) { + 2 => Version::V2, + 3 => Version::V3, + 4 => Version::V4, + unknown => return Err(Error::UnsupportedVersion(unknown)), + }; + let (entries, data) = data.split_at(4); + let entries = from_be_u32(entries); + + Ok((version, entries, data)) +} diff --git a/git-index/src/decode/mod.rs b/git-index/src/decode/mod.rs new file mode 100644 index 00000000000..9e81877609d --- /dev/null +++ b/git-index/src/decode/mod.rs @@ -0,0 +1,246 @@ +use filetime::FileTime; + +use crate::{extension, Entry, State, Version}; + +mod entries; +pub mod header; + +mod error { + use quick_error::quick_error; + + use crate::decode; + + quick_error! { + #[derive(Debug)] + pub enum Error { + Header(err: decode::header::Error) { + display("The header could not be decoded") + source(err) + from() + } + Entry(index: u32) { + display("Could not parse entry at index {}", index) + } + UnexpectedTrailerLength { expected: usize, actual: usize } { + display("Index trailer should have been {} bytes long, but was {}", expected, actual) + } + } + } +} +pub use error::Error; +use git_features::parallel::InOrderIter; + +#[derive(Default)] +pub struct Options { + pub object_hash: git_hash::Kind, + /// If Some(_), we are allowed to use more than one thread. If Some(N), use no more than N threads. If Some(0)|None, use as many threads + /// as there are physical cores. + /// + /// This applies to loading extensions in parallel to entries if the common EOIE extension is available. + /// It also allows to use multiple threads for loading entries if the IEOT extension is present. + pub thread_limit: Option, + /// The minimum size in bytes to load extensions in their own thread, assuming there is enough `num_threads` available. + pub min_extension_block_in_bytes_for_threading: usize, +} + +impl State { + pub fn from_bytes( + data: &[u8], + timestamp: FileTime, + Options { + object_hash, + thread_limit, + min_extension_block_in_bytes_for_threading, + }: Options, + ) -> Result<(Self, git_hash::ObjectId), Error> { + let (version, num_entries, post_header_data) = header::decode(data, object_hash)?; + let start_of_extensions = extension::end_of_index_entry::decode(data, object_hash); + + let mut num_threads = git_features::parallel::num_threads(thread_limit); + let path_backing_buffer_size = entries::estimate_path_storage_requirements_in_bytes( + num_entries, + data.len(), + start_of_extensions, + object_hash, + version, + ); + + let (entries, ext, data) = match start_of_extensions { + Some(offset) if num_threads > 1 => { + let extensions_data = &data[offset..]; + let index_offsets_table = extension::index_entry_offset_table::find(extensions_data, object_hash); + let (entries_res, (ext, data)) = git_features::parallel::threads(|scope| { + let extension_loading = + (extensions_data.len() > min_extension_block_in_bytes_for_threading).then({ + num_threads -= 1; + || scope.spawn(|_| extension::decode::all(extensions_data, object_hash)) + }); + let entries_res = match index_offsets_table { + Some(entry_offsets) => { + let chunk_size = (entry_offsets.len() as f32 / num_threads as f32).ceil() as usize; + let num_chunks = entry_offsets.chunks(chunk_size).count(); + let mut threads = Vec::with_capacity(num_chunks); + for (id, chunks) in entry_offsets.chunks(chunk_size).enumerate() { + let chunks = chunks.to_vec(); + threads.push(scope.spawn(move |_| { + let num_entries_for_chunks = + chunks.iter().map(|c| c.num_entries).sum::() as usize; + let mut entries = Vec::with_capacity(num_entries_for_chunks); + let path_backing_buffer_size_for_chunks = + entries::estimate_path_storage_requirements_in_bytes( + num_entries_for_chunks as u32, + data.len() / num_chunks, + start_of_extensions.map(|ofs| ofs / num_chunks), + object_hash, + version, + ); + let mut path_backing = Vec::with_capacity(path_backing_buffer_size_for_chunks); + let mut is_sparse = false; + for offset in chunks { + let ( + entries::Outcome { + is_sparse: chunk_is_sparse, + }, + _data, + ) = entries::load_chunk( + &data[offset.from_beginning_of_file as usize..], + &mut entries, + &mut path_backing, + offset.num_entries, + object_hash, + version, + )?; + is_sparse |= chunk_is_sparse; + } + Ok::<_, Error>(( + id, + EntriesOutcome { + entries, + path_backing, + is_sparse, + }, + )) + })); + } + let mut results = + InOrderIter::from(threads.into_iter().map(|thread| thread.join().unwrap())); + let mut acc = results.next().expect("have at least two results, one per thread"); + // We explicitly don't adjust the reserve in acc and rather allow for more copying + // to happens as vectors grow to keep the peak memory size low. + // NOTE: one day, we might use a memory pool for paths. We could encode the block of memory + // in some bytes in the path offset. That way there is more indirection/slower access + // to the path, but it would save time here. + // As it stands, `git` is definitely more efficient at this and probably uses less memory too. + // Maybe benchmarks can tell if that is noticeable later at 200/400GB/s memory bandwidth, or maybe just + // 100GB/s on a single core. + while let (Ok(lhs), Some(res)) = (acc.as_mut(), results.next()) { + match res { + Ok(rhs) => { + lhs.is_sparse |= rhs.is_sparse; + let ofs = lhs.path_backing.len(); + lhs.path_backing.extend(rhs.path_backing); + lhs.entries.extend(rhs.entries.into_iter().map(|mut e| { + e.path.start += ofs; + e.path.end += ofs; + e + })); + } + Err(err) => { + acc = Err(err); + } + } + } + acc.map(|acc| (acc, &data[data.len() - object_hash.len_in_bytes()..])) + } + None => load_entries( + post_header_data, + path_backing_buffer_size, + num_entries, + object_hash, + version, + ), + }; + let ext_res = extension_loading + .map(|thread| thread.join().unwrap()) + .unwrap_or_else(|| extension::decode::all(extensions_data, object_hash)); + (entries_res, ext_res) + }) + .unwrap(); // this unwrap is for panics - if these happened we are done anyway. + (entries_res?.0, ext, data) + } + None | Some(_) => { + let (entries, data) = load_entries( + post_header_data, + path_backing_buffer_size, + num_entries, + object_hash, + version, + )?; + let (ext, data) = extension::decode::all(data, object_hash); + (entries, ext, data) + } + }; + + if data.len() != object_hash.len_in_bytes() { + return Err(Error::UnexpectedTrailerLength { + expected: object_hash.len_in_bytes(), + actual: data.len(), + }); + } + + let checksum = git_hash::ObjectId::from(data); + let EntriesOutcome { + entries, + path_backing, + is_sparse, + } = entries; + let extension::decode::Outcome { cache_tree } = ext; + + Ok(( + State { + timestamp, + version, + cache_tree, + entries, + path_backing, + is_sparse, + }, + checksum, + )) + } +} + +struct EntriesOutcome { + pub entries: Vec, + pub path_backing: Vec, + pub is_sparse: bool, +} + +fn load_entries( + post_header_data: &[u8], + path_backing_buffer_size: usize, + num_entries: u32, + object_hash: git_hash::Kind, + version: Version, +) -> Result<(EntriesOutcome, &[u8]), Error> { + let mut entries = Vec::with_capacity(num_entries as usize); + let mut path_backing = Vec::with_capacity(path_backing_buffer_size); + entries::load_chunk( + post_header_data, + &mut entries, + &mut path_backing, + num_entries, + object_hash, + version, + ) + .map(|(entries::Outcome { is_sparse }, data): (entries::Outcome, &[u8])| { + ( + EntriesOutcome { + entries, + path_backing, + is_sparse, + }, + data, + ) + }) +} diff --git a/git-index/src/entry.rs b/git-index/src/entry.rs new file mode 100644 index 00000000000..741b2bf56d3 --- /dev/null +++ b/git-index/src/entry.rs @@ -0,0 +1,45 @@ +pub(crate) mod mode { + const S_IFDIR: u32 = 0o040000; + pub fn is_sparse(mode: u32) -> bool { + mode == S_IFDIR + } +} + +pub(crate) mod flags { + pub const EXTENDED: u32 = 0x4000; + pub const INTENT_TO_ADD: u32 = 1 << 29; + pub const SKIP_WORKTREE: u32 = 1 << 30; +} + +pub(crate) mod mask { + pub const PATH_LEN: u32 = 0x0fff; +} + +pub struct Time { + pub secs: u32, + pub nsecs: u32, +} + +pub struct Stat { + pub mtime: Time, + pub ctime: Time, + pub dev: u32, + pub ino: u32, + pub mode: u32, + pub uid: u32, + pub gid: u32, + /// The size of bytes on disk. Capped to u32 so files bigger than that will need thorough checking (and hopefully never make it) + pub size: u32, +} + +mod access { + use bstr::{BStr, ByteSlice}; + + use crate::{Entry, State}; + + impl Entry { + pub fn path<'a>(&self, state: &'a State) -> &'a BStr { + (&state.path_backing[self.path.clone()]).as_bstr() + } + } +} diff --git a/git-index/src/extension/decode.rs b/git-index/src/extension/decode.rs new file mode 100644 index 00000000000..af28cc537ea --- /dev/null +++ b/git-index/src/extension/decode.rs @@ -0,0 +1,31 @@ +use crate::{extension, extension::Signature, util::from_be_u32}; + +pub fn header(data: &[u8]) -> (Signature, u32, &[u8]) { + let (signature, data) = data.split_at(4); + let (size, data) = data.split_at(4); + (signature.try_into().unwrap(), from_be_u32(size), data) +} + +pub fn all(maybe_beginning_of_extensions: &[u8], object_hash: git_hash::Kind) -> (Outcome, &[u8]) { + extension::Iter::new_without_checksum(maybe_beginning_of_extensions, object_hash) + .map(|mut ext_iter| { + let mut ext = Outcome::default(); + for (signature, ext_data) in ext_iter.by_ref() { + match signature { + extension::tree::SIGNATURE => { + ext.cache_tree = extension::tree::decode(ext_data, object_hash); + } + extension::end_of_index_entry::SIGNATURE => {} // skip already done + extension::index_entry_offset_table::SIGNATURE => {} // not relevant/obtained already + _unknown => {} // skip unknown extensions, too + } + } + (ext, &maybe_beginning_of_extensions[ext_iter.consumed..]) + }) + .unwrap_or_else(|| (Outcome::default(), maybe_beginning_of_extensions)) +} + +#[derive(Default)] +pub struct Outcome { + pub cache_tree: Option, +} diff --git a/git-index/src/extension/end_of_index_entry.rs b/git-index/src/extension/end_of_index_entry.rs new file mode 100644 index 00000000000..c44d15b295c --- /dev/null +++ b/git-index/src/extension/end_of_index_entry.rs @@ -0,0 +1,47 @@ +use crate::{decode::header, extension, extension::Signature, util::from_be_u32}; + +pub const SIGNATURE: Signature = *b"EOIE"; +pub const SIZE: usize = 4 /* offset to extensions */ + git_hash::Kind::Sha1.len_in_bytes(); +pub const SIZE_WITH_HEADER: usize = crate::extension::MIN_SIZE + SIZE; + +pub fn decode(data: &[u8], object_hash: git_hash::Kind) -> Option { + let hash_len = object_hash.len_in_bytes(); + if data.len() < SIZE_WITH_HEADER + hash_len { + return None; + } + + let start_of_eoie = data.len() - SIZE_WITH_HEADER - hash_len; + let ext_data = &data[start_of_eoie..data.len() - hash_len]; + + let (signature, ext_size, ext_data) = extension::decode::header(ext_data); + if signature != SIGNATURE || ext_size as usize != SIZE { + return None; + } + + let (offset, checksum) = ext_data.split_at(4); + let offset = from_be_u32(offset) as usize; + if offset < header::SIZE || offset > start_of_eoie || checksum.len() != git_hash::Kind::Sha1.len_in_bytes() { + return None; + } + + let mut hasher = git_features::hash::hasher(git_hash::Kind::Sha1); + let mut last_chunk = None; + for (signature, chunk) in extension::Iter::new(&data[offset..data.len() - SIZE_WITH_HEADER - hash_len]) { + hasher.update(&signature); + hasher.update(&(chunk.len() as u32).to_be_bytes()); + last_chunk = Some(chunk); + } + + if hasher.digest() != checksum { + return None; + } + // The last-to-this chunk ends where ours starts + if last_chunk + .map(|s| s.as_ptr_range().end != (&data[start_of_eoie]) as *const _) + .unwrap_or(true) + { + return None; + } + + Some(offset) +} diff --git a/git-index/src/extension/iter.rs b/git-index/src/extension/iter.rs new file mode 100644 index 00000000000..5e3d2dd136a --- /dev/null +++ b/git-index/src/extension/iter.rs @@ -0,0 +1,53 @@ +use crate::{extension, extension::Iter, util::from_be_u32}; + +impl<'a> Iter<'a> { + pub fn new(data_at_beginning_of_extensions_and_truncated: &'a [u8]) -> Self { + Iter { + data: data_at_beginning_of_extensions_and_truncated, + consumed: 0, + } + } + + pub fn new_without_checksum( + data_at_beginning_of_extensions: &'a [u8], + object_hash: git_hash::Kind, + ) -> Option { + let end = data_at_beginning_of_extensions + .len() + .checked_sub(object_hash.len_in_bytes())?; + Iter { + data: &data_at_beginning_of_extensions[..end], + consumed: 0, + } + .into() + } +} + +impl<'a> Iterator for Iter<'a> { + type Item = (extension::Signature, &'a [u8]); + + fn next(&mut self) -> Option { + if self.data.len() < 4 + 4 { + return None; + } + + let (signature, data) = self.data.split_at(4); + let (size, data) = data.split_at(4); + self.data = data; + self.consumed += 4 + 4; + + let size = from_be_u32(size) as usize; + + match data.get(..size) { + Some(ext_data) => { + self.data = &data[size..]; + self.consumed += size; + Some((signature.try_into().unwrap(), ext_data)) + } + None => { + self.data = &[]; + None + } + } + } +} diff --git a/git-index/src/extension/mod.rs b/git-index/src/extension/mod.rs new file mode 100644 index 00000000000..995b471d3ca --- /dev/null +++ b/git-index/src/extension/mod.rs @@ -0,0 +1,75 @@ +use smallvec::SmallVec; + +const MIN_SIZE: usize = 4 /* signature */ + 4 /* size */; + +pub type Signature = [u8; 4]; + +pub struct Iter<'a> { + data: &'a [u8], + pub consumed: usize, +} + +/// A structure to associate object ids of a tree with sections in the index entries list. +/// +/// It allows to more quickly build trees by avoiding as it can quickly re-use portions of the index and its associated tree ids +/// if there wa sno change to them. Portions of this tree are invalidated as the index is changed. +pub struct Tree { + name: SmallVec<[u8; 23]>, + /// Only set if there are any entries in the index we are associated with. + id: Option, + children: Vec, +} + +mod iter; + +pub(crate) mod decode; + +pub(crate) mod tree; + +pub(crate) mod end_of_index_entry; + +pub(crate) mod index_entry_offset_table { + use crate::{extension, extension::Signature, util::read_u32}; + + #[derive(Debug, Clone, Copy)] + pub struct Offset { + pub from_beginning_of_file: u32, + pub num_entries: u32, + } + + pub const SIGNATURE: Signature = *b"IEOT"; + + pub fn decode(data: &[u8]) -> Option> { + let (version, mut data) = read_u32(data)?; + match version { + 1 => {} + _unknown => return None, + } + + let entry_size = 4 + 4; + let num_offsets = data.len() / entry_size; + if num_offsets == 0 || data.len() % entry_size != 0 { + return None; + } + + let mut out = Vec::with_capacity(entry_size); + for _ in 0..num_offsets { + let (offset, chunk) = read_u32(data)?; + let (num_entries, chunk) = read_u32(chunk)?; + out.push(Offset { + from_beginning_of_file: offset, + num_entries, + }); + data = chunk; + } + debug_assert!(data.is_empty()); + + out.into() + } + + pub fn find(extensions: &[u8], object_hash: git_hash::Kind) -> Option> { + extension::Iter::new_without_checksum(extensions, object_hash)? + .find_map(|(sig, ext_data)| (sig == SIGNATURE).then(|| ext_data)) + .and_then(decode) + } +} diff --git a/git-index/src/extension/tree.rs b/git-index/src/extension/tree.rs new file mode 100644 index 00000000000..f7d97fbefe2 --- /dev/null +++ b/git-index/src/extension/tree.rs @@ -0,0 +1,73 @@ +use git_hash::ObjectId; + +use crate::{ + extension::{Signature, Tree}, + util::split_at_byte_exclusive, +}; + +pub const SIGNATURE: Signature = *b"TREE"; + +pub struct NodeId { + /// The id of the directory tree of the associated tree object. + id: git_hash::ObjectId, + /// The amount of non-tree entries contained within, and definitely not zero. + entry_count: u32, +} + +/// A recursive data structure +pub fn decode(data: &[u8], object_hash: git_hash::Kind) -> Option { + let (tree, data) = one_recursive(data, object_hash.len_in_bytes())?; + assert!( + data.is_empty(), + "BUG: should fully consume the entire tree extension chunk, got {} left", + data.len() + ); + Some(tree) +} + +pub fn one_recursive(data: &[u8], hash_len: usize) -> Option<(Tree, &[u8])> { + let (path, data) = split_at_byte_exclusive(data, 0)?; + + let (entry_count, data) = split_at_byte_exclusive(data, b' ')?; + let entry_count: u32 = atoi::atoi(entry_count)?; + + let (subtree_count, mut data) = split_at_byte_exclusive(data, b'\n')?; + let subtree_count: usize = atoi::atoi(subtree_count)?; + + let node_id = (entry_count != 0) + .then(|| { + (data.len() >= hash_len).then(|| { + let (hash, rest) = data.split_at(hash_len); + data = rest; + ObjectId::from(hash) + }) + }) + .flatten() + .map(|id| NodeId { id, entry_count }); + + let mut subtrees = Vec::with_capacity(subtree_count); + for _ in 0..subtree_count { + let (tree, rest) = one_recursive(data, hash_len)?; + subtrees.push(tree); + data = rest; + } + + Some(( + Tree { + id: node_id, + name: path.into(), + children: subtrees, + }, + data, + )) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn size_of_tree() { + assert_eq!(std::mem::size_of::(), 88); + } +} diff --git a/git-index/src/file.rs b/git-index/src/file.rs new file mode 100644 index 00000000000..6f087752a17 --- /dev/null +++ b/git-index/src/file.rs @@ -0,0 +1,66 @@ +mod impls { + use std::ops::{Deref, DerefMut}; + + use crate::{File, State}; + + impl Deref for File { + type Target = State; + + fn deref(&self) -> &Self::Target { + &self.state + } + } + + impl DerefMut for File { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.state + } + } +} + +pub mod init { + #![allow(unused)] + + use std::path::{Path, PathBuf}; + + use memmap2::Mmap; + + use crate::{decode, extension, File, State}; + + mod error { + use quick_error::quick_error; + + quick_error! { + #[derive(Debug)] + pub enum Error { + Io(err: std::io::Error) { + display("An IO error occurred while opening the index") + source(err) + from() + } + Decode(err: crate::decode::Error) { + display("The file could not be decoded") + source(err) + from() + } + } + } + } + pub use error::Error; + + impl File { + pub fn at(path: impl Into, options: decode::Options) -> Result { + let path = path.into(); + let (data, mtime) = { + // SAFETY: we have to take the risk of somebody changing the file underneath. Git never writes into the same file. + let file = std::fs::File::open(&path)?; + #[allow(unsafe_code)] + let data = unsafe { Mmap::map(&file)? }; + (data, filetime::FileTime::from_last_modification_time(&file.metadata()?)) + }; + + let (state, checksum) = State::from_bytes(&data, mtime, options)?; + Ok(File { state, path, checksum }) + } + } +} diff --git a/git-index/src/lib.rs b/git-index/src/lib.rs index d7a83e4f525..d7724084981 100644 --- a/git-index/src/lib.rs +++ b/git-index/src/lib.rs @@ -1 +1,121 @@ -#![forbid(unsafe_code, rust_2018_idioms)] +#![deny(unsafe_code, missing_docs, rust_2018_idioms)] +#![allow(missing_docs, dead_code)] + +use std::{ops::Range, path::PathBuf}; + +use filetime::FileTime; + +pub mod file; + +pub(crate) mod extension; + +pub mod entry; + +mod access { + use crate::{Entry, State, Version}; + + impl State { + pub fn version(&self) -> Version { + self.version + } + + pub fn entries(&self) -> &[Entry] { + &self.entries + } + } +} + +pub mod decode; + +/// All known versions of a git index file. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub enum Version { + /// Supports entries and various extensions. + V2 = 2, + /// Adds support for additional flags for each entry. + V3 = 3, + /// Supports deltified entry paths. + V4 = 4, +} + +/// An entry in the index, identifying a non-tree item on disk. +pub struct Entry { + pub stat: entry::Stat, + pub id: git_hash::ObjectId, + pub flags: u32, + path: Range, +} + +/// An index file whose state was read from a file on disk. +pub struct File { + pub state: State, + pub path: PathBuf, + /// The checksum of all bytes prior to the checksum itself. + pub checksum: git_hash::ObjectId, +} + +/// An in-memory cache of a fully parsed git index file. +/// +/// As opposed to a snapshot, it's meant to be altered and eventually be written back to disk or converted into a tree. +/// We treat index and its state synonymous. +pub struct State { + /// The time at which the state was created, indicating its freshness compared to other files on disk. + /// + /// Note that on platforms that only have a precisions of a second for this time, we will treat all entries with the + /// same timestamp as this as potentially changed, checking more thoroughly if a change actually happened. + timestamp: FileTime, + version: Version, + cache_tree: Option, + entries: Vec, + /// A memory area keeping all index paths, in full length, independently of the index version. + path_backing: Vec, + /// True if one entry in the index has a special marker mode + is_sparse: bool, +} + +pub(crate) mod util { + #[inline] + pub fn read_u32(data: &[u8]) -> Option<(u32, &[u8])> { + split_at_pos(data, 4).map(|(num, data)| (u32::from_be_bytes(num.try_into().unwrap()), data)) + } + + #[inline] + pub fn from_be_u32(b: &[u8]) -> u32 { + u32::from_be_bytes(b.try_into().unwrap()) + } + + #[inline] + pub fn split_at_byte_exclusive(data: &[u8], byte: u8) -> Option<(&[u8], &[u8])> { + if data.len() < 2 { + return None; + } + data.iter().enumerate().find_map(|(idx, b)| { + (*b == byte).then(|| { + if idx == 0 { + (&[] as &[u8], &data[1..]) + } else { + let (a, b) = data.split_at(idx); + (a, &b[1..]) + } + }) + }) + } + + #[inline] + pub fn split_at_pos(data: &[u8], pos: usize) -> Option<(&[u8], &[u8])> { + if data.len() < pos { + return None; + } + data.split_at(pos).into() + } +} + +#[test] +fn size_of_entry() { + assert_eq!(std::mem::size_of::(), 80); + + // the reason we have our own time is half the size. + assert_eq!(std::mem::size_of::(), 8); + assert_eq!(std::mem::size_of::(), 16); +} diff --git a/git-index/tests/fixtures/make_index/v2.sh b/git-index/tests/fixtures/make_index/v2.sh new file mode 100644 index 00000000000..a7ab127a393 --- /dev/null +++ b/git-index/tests/fixtures/make_index/v2.sh @@ -0,0 +1,11 @@ +#!/bin/bash +set -eu -o pipefail + +export GIT_INDEX_VERSION=2 +git init -q +git config commit.gpgsign false +git config index.threads 2 + +touch a +git add a +git commit -m "empty" diff --git a/git-index/tests/fixtures/make_index/v2_more_files.sh b/git-index/tests/fixtures/make_index/v2_more_files.sh new file mode 100644 index 00000000000..83a3583e6d0 --- /dev/null +++ b/git-index/tests/fixtures/make_index/v2_more_files.sh @@ -0,0 +1,14 @@ +#!/bin/bash +set -eu -o pipefail + +export GIT_INDEX_VERSION=2; +git init -q +git config commit.gpgsign false +git config index.threads 1 + +touch a b c +mkdir d +(cd d && touch a b c) + +git add . +git commit -m "empty" diff --git a/git-index/tests/fixtures/make_index/v4_more_files_IEOT.sh b/git-index/tests/fixtures/make_index/v4_more_files_IEOT.sh new file mode 100644 index 00000000000..9dff63b5b4d --- /dev/null +++ b/git-index/tests/fixtures/make_index/v4_more_files_IEOT.sh @@ -0,0 +1,15 @@ +#!/bin/bash +set -eu -o pipefail + +export GIT_INDEX_VERSION=4 +git init -q +git config commit.gpgsign false +git config index.threads 2 + +touch a b c +mkdir d +(cd d && touch a b c && mkdir last && cd last && touch 123 34 6) +touch x + +git add . +git commit -m "empty" diff --git a/git-index/tests/index-multi-threaded.rs b/git-index/tests/index-multi-threaded.rs new file mode 100644 index 00000000000..58bd56b4864 --- /dev/null +++ b/git-index/tests/index-multi-threaded.rs @@ -0,0 +1 @@ +mod index; diff --git a/git-index/tests/index-single-threaded.rs b/git-index/tests/index-single-threaded.rs new file mode 100644 index 00000000000..58bd56b4864 --- /dev/null +++ b/git-index/tests/index-single-threaded.rs @@ -0,0 +1 @@ +mod index; diff --git a/git-index/tests/index/file/mod.rs b/git-index/tests/index/file/mod.rs new file mode 100644 index 00000000000..64ef01058d7 --- /dev/null +++ b/git-index/tests/index/file/mod.rs @@ -0,0 +1,75 @@ +mod init { + use git_index::Version; + use git_testtools::hex_to_id; + + fn file(name: &str) -> git_index::File { + git_index::File::at(crate::index::fixture_path(name), git_index::decode::Options::default()).unwrap() + } + fn file_opt(name: &str, opts: git_index::decode::Options) -> git_index::File { + git_index::File::at(crate::index::fixture_path(name), opts).unwrap() + } + + #[test] + fn read_v2_with_single_entry_tree_and_eoie_ext() { + let file_disallow_threaded_loading = file_opt( + "v2", + git_index::decode::Options { + min_extension_block_in_bytes_for_threading: 100000, + ..Default::default() + }, + ); + for file in [file("v2"), file_disallow_threaded_loading] { + assert_eq!(file.version(), Version::V2); + + assert_eq!(file.entries().len(), 1); + + let entry = &file.entries()[0]; + assert_eq!(entry.id, hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391")); + assert_eq!(entry.path(&file.state), "a"); + } + } + + #[test] + fn read_v2_with_multiple_entries_without_eoie_ext() { + let file = file("v2_more_files"); + assert_eq!(file.version(), Version::V2); + + assert_eq!(file.entries().len(), 6); + for (idx, path) in ["a", "b", "c", "d/a", "d/b", "d/c"].into_iter().enumerate() { + let e = &file.entries()[idx]; + assert_eq!(e.path(&file), path); + assert_eq!(e.id, hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391")) + } + } + + #[test] + #[ignore] + fn read_without_any_extension() {} + + #[test] + fn read_v4_with_delta_paths_and_ieot_ext() { + let file = file("v4_more_files_IEOT"); + assert_eq!(file.version(), Version::V4); + + assert_eq!(file.entries().len(), 10); + for (idx, path) in [ + "a", + "b", + "c", + "d/a", + "d/b", + "d/c", + "d/last/123", + "d/last/34", + "d/last/6", + "x", + ] + .into_iter() + .enumerate() + { + let e = &file.entries()[idx]; + assert_eq!(e.path(&file), path); + assert_eq!(e.id, hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391")) + } + } +} diff --git a/git-index/tests/index/mod.rs b/git-index/tests/index/mod.rs new file mode 100644 index 00000000000..714788653f4 --- /dev/null +++ b/git-index/tests/index/mod.rs @@ -0,0 +1,18 @@ +use std::path::{Path, PathBuf}; + +mod file; + +pub fn fixture_path(name: &str) -> PathBuf { + let dir = git_testtools::scripted_fixture_repo_read_only(Path::new("make_index").join(name).with_extension("sh")) + .expect("script works"); + dir.join(".git").join("index") +} + +#[test] +fn size_of_entry() { + assert_eq!(std::mem::size_of::(), 80); + + // the reason we have our own time is half the size. + assert_eq!(std::mem::size_of::(), 8); + assert_eq!(std::mem::size_of::(), 16); +} diff --git a/git-odb/src/store_impls/dynamic/verify.rs b/git-odb/src/store_impls/dynamic/verify.rs index a4a43ab332b..3b36f64476f 100644 --- a/git-odb/src/store_impls/dynamic/verify.rs +++ b/git-odb/src/store_impls/dynamic/verify.rs @@ -1,7 +1,7 @@ -use std::time::Instant; use std::{ ops::Deref, sync::atomic::{AtomicBool, Ordering}, + time::Instant, }; use git_features::progress::{MessageLevel, Progress}; diff --git a/git-pack/Cargo.toml b/git-pack/Cargo.toml index b5cc1c26181..dcf64ace9bb 100644 --- a/git-pack/Cargo.toml +++ b/git-pack/Cargo.toml @@ -42,8 +42,7 @@ git-diff = { version ="^0.12.0", path = "../git-diff" } git-tempfile = { version ="^1.0.0", path = "../git-tempfile" } smallvec = "1.3.0" -filebuffer = "0.4.0" -byteorder = "1.2.3" +memmap2 = "0.5.0" serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"] } bytesize = "1.0.1" os_str_bytes = "6.0.0" diff --git a/git-pack/src/bundle/write/mod.rs b/git-pack/src/bundle/write/mod.rs index 8bdd6b7ac61..1abbd43f7fd 100644 --- a/git-pack/src/bundle/write/mod.rs +++ b/git-pack/src/bundle/write/mod.rs @@ -4,7 +4,6 @@ use std::{ sync::{atomic::AtomicBool, Arc}, }; -use filebuffer::FileBuffer; use git_features::{interrupt, progress, progress::Progress}; use git_tempfile::{handle::Writable, AutoRemove, ContainingDirectory}; @@ -295,7 +294,9 @@ impl crate::Bundle { fn new_pack_file_resolver( data_file: Arc>>, ) -> io::Result) -> Option<()> + Send + Clone> { - let mapped_file = Arc::new(FileBuffer::open(data_file.lock().with_mut(|f| f.path().to_owned())?)?); + let mapped_file = Arc::new(crate::mmap::read_only( + &data_file.lock().with_mut(|f| f.path().to_owned())?, + )?); let pack_data_lookup = move |range: std::ops::Range, out: &mut Vec| -> Option<()> { mapped_file .get(range.start as usize..range.end as usize) diff --git a/git-pack/src/data/entry/decode.rs b/git-pack/src/data/entry/decode.rs index e01b2cf50ea..646d5ffea18 100644 --- a/git-pack/src/data/entry/decode.rs +++ b/git-pack/src/data/entry/decode.rs @@ -1,5 +1,7 @@ use std::io; +use git_features::decode::{leb64, leb64_from_read}; + use super::{BLOB, COMMIT, OFS_DELTA, REF_DELTA, TAG, TREE}; use crate::data; @@ -16,7 +18,7 @@ impl data::Entry { use crate::data::entry::Header::*; let object = match type_id { OFS_DELTA => { - let (distance, leb_bytes) = leb64decode(&d[consumed..]); + let (distance, leb_bytes) = leb64(&d[consumed..]); let delta = OfsDelta { base_distance: distance, }; @@ -54,7 +56,7 @@ impl data::Entry { use crate::data::entry::Header::*; let object = match type_id { OFS_DELTA => { - let (distance, leb_bytes) = streaming_leb64decode(&mut r)?; + let (distance, leb_bytes) = leb64_from_read(&mut r)?; let delta = OfsDelta { base_distance: distance, }; @@ -85,37 +87,6 @@ impl data::Entry { } } -#[inline] -fn streaming_leb64decode(mut r: impl io::Read) -> Result<(u64, usize), io::Error> { - let mut b = [0u8; 1]; - let mut i = 0; - r.read_exact(&mut b)?; - i += 1; - let mut value = b[0] as u64 & 0x7f; - while b[0] & 0x80 != 0 { - r.read_exact(&mut b)?; - i += 1; - value += 1; - value = (value << 7) + (b[0] as u64 & 0x7f) - } - Ok((value, i)) -} - -#[inline] -fn leb64decode(d: &[u8]) -> (u64, usize) { - let mut i = 0; - let mut c = d[i]; - i += 1; - let mut value = c as u64 & 0x7f; - while c & 0x80 != 0 { - c = d[i]; - i += 1; - value += 1; - value = (value << 7) + (c as u64 & 0x7f) - } - (value, i) -} - #[inline] fn streaming_parse_header_info(mut read: impl io::Read) -> Result<(u8, u64, usize), io::Error> { let mut byte = [0u8; 1]; diff --git a/git-pack/src/data/entry/header.rs b/git-pack/src/data/entry/header.rs index df25a500e05..5856086de2c 100644 --- a/git-pack/src/data/entry/header.rs +++ b/git-pack/src/data/entry/header.rs @@ -102,21 +102,11 @@ impl Header { out.write_all(oid.as_slice())?; written += oid.as_slice().len(); } - OfsDelta { mut base_distance } => { + OfsDelta { base_distance } => { let mut buf = [0u8; 10]; - let mut bytes_written = 1; - buf[buf.len() - 1] = base_distance as u8 & 0b0111_1111; - for out in buf.iter_mut().rev().skip(1) { - base_distance >>= 7; - if base_distance == 0 { - break; - } - base_distance -= 1; - *out = 0b1000_0000 | (base_distance as u8 & 0b0111_1111); - bytes_written += 1; - } - out.write_all(&buf[buf.len() - bytes_written..])?; - written += bytes_written; + let buf = leb64_encode(*base_distance, &mut buf); + out.write_all(buf)?; + written += buf.len(); } Blob | Tree | Commit | Tag => {} } @@ -129,3 +119,32 @@ impl Header { .expect("io::sink() to never fail") } } + +#[inline] +fn leb64_encode(mut n: u64, buf: &mut [u8; 10]) -> &[u8] { + let mut bytes_written = 1; + buf[buf.len() - 1] = n as u8 & 0b0111_1111; + for out in buf.iter_mut().rev().skip(1) { + n >>= 7; + if n == 0 { + break; + } + n -= 1; + *out = 0b1000_0000 | (n as u8 & 0b0111_1111); + bytes_written += 1; + } + debug_assert_eq!(n, 0, "BUG: buffer must be large enough to hold a 64 bit integer"); + &buf[buf.len() - bytes_written..] +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn leb64_encode_max_int() { + let mut buf = [0u8; 10]; + let buf = leb64_encode(u64::MAX, &mut buf); + assert_eq!(buf.len(), 10, "10 bytes should be used when 64bits are encoded"); + } +} diff --git a/git-pack/src/data/file/init.rs b/git-pack/src/data/file/init.rs index ca7e3af5c86..390ad255f44 100644 --- a/git-pack/src/data/file/init.rs +++ b/git-pack/src/data/file/init.rs @@ -1,7 +1,5 @@ use std::{convert::TryInto, path::Path}; -use filebuffer::FileBuffer; - use crate::data; /// Instantiation @@ -18,7 +16,7 @@ impl data::File { use crate::data::header::N32_SIZE; let hash_len = object_hash.len_in_bytes(); - let data = FileBuffer::open(path).map_err(|e| data::header::decode::Error::Io { + let data = crate::mmap::read_only(path).map_err(|e| data::header::decode::Error::Io { source: e, path: path.to_owned(), })?; diff --git a/git-pack/src/data/header.rs b/git-pack/src/data/header.rs index 0ae297e80c2..348a4ca24ec 100644 --- a/git-pack/src/data/header.rs +++ b/git-pack/src/data/header.rs @@ -1,5 +1,3 @@ -use byteorder::{BigEndian, ByteOrder}; - use crate::data; pub(crate) const N32_SIZE: usize = std::mem::size_of::(); @@ -11,13 +9,13 @@ pub fn decode(data: &[u8; 12]) -> Result<(data::Version, u32), decode::Error> { return Err(decode::Error::Corrupt("Pack data type not recognized".into())); } ofs += N32_SIZE; - let kind = match BigEndian::read_u32(&data[ofs..ofs + N32_SIZE]) { + let kind = match crate::read_u32(&data[ofs..ofs + N32_SIZE]) { 2 => data::Version::V2, 3 => data::Version::V3, v => return Err(decode::Error::UnsupportedVersion(v)), }; ofs += N32_SIZE; - let num_objects = BigEndian::read_u32(&data[ofs..ofs + N32_SIZE]); + let num_objects = crate::read_u32(&data[ofs..ofs + N32_SIZE]); Ok((kind, num_objects)) } diff --git a/git-pack/src/data/mod.rs b/git-pack/src/data/mod.rs index 56d1adf0a03..d20a5538c65 100644 --- a/git-pack/src/data/mod.rs +++ b/git-pack/src/data/mod.rs @@ -7,7 +7,7 @@ pub type Offset = u64; /// An identifier to uniquely identify all packs loaded within a known context or namespace. pub type Id = u32; -use filebuffer::FileBuffer; +use memmap2::Mmap; /// An representing an full- or delta-object within a pack #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] @@ -62,7 +62,7 @@ impl Default for Version { /// A pack data file pub struct File { - data: FileBuffer, + data: Mmap, path: std::path::PathBuf, /// A value to represent this pack uniquely when used with cache lookup, or a way to identify this pack by its location on disk. /// The same location on disk should yield the same id. diff --git a/git-pack/src/data/output/entry/iter_from_counts.rs b/git-pack/src/data/output/entry/iter_from_counts.rs index 1c92123e8d2..2071212cad8 100644 --- a/git-pack/src/data/output/entry/iter_from_counts.rs +++ b/git-pack/src/data/output/entry/iter_from_counts.rs @@ -1,8 +1,8 @@ use std::{cmp::Ordering, sync::Arc}; -use git_features::{parallel, progress::Progress}; +use git_features::{parallel, parallel::SequenceId, progress::Progress}; -use crate::data::{output, output::ChunkId}; +use crate::data::output; /// Given a known list of object `counts`, calculate entries ready to be put into a data pack. /// @@ -44,7 +44,7 @@ pub fn iter_from_counts( thread_limit, chunk_size, }: Options, -) -> impl Iterator), Error>> +) -> impl Iterator), Error>> + parallel::reduce::Finalize>> where Find: crate::Find + Send + Clone + 'static, @@ -152,7 +152,7 @@ where }, { let counts = Arc::clone(&counts); - move |(chunk_id, chunk_range): (ChunkId, std::ops::Range), (buf, progress)| { + move |(chunk_id, chunk_range): (SequenceId, std::ops::Range), (buf, progress)| { let mut out = Vec::new(); let chunk = &counts[chunk_range]; let mut stats = Outcome::default(); @@ -276,9 +276,9 @@ mod util { mod reduce { use std::marker::PhantomData; - use git_features::parallel; + use git_features::{parallel, parallel::SequenceId}; - use super::{ChunkId, Outcome}; + use super::Outcome; use crate::data::output; pub struct Statistics { @@ -296,8 +296,8 @@ mod reduce { } impl parallel::Reduce for Statistics { - type Input = Result<(ChunkId, Vec, Outcome), Error>; - type FeedProduce = (ChunkId, Vec); + type Input = Result<(SequenceId, Vec, Outcome), Error>; + type FeedProduce = (SequenceId, Vec); type Output = Outcome; type Error = Error; diff --git a/git-pack/src/data/output/mod.rs b/git-pack/src/data/output/mod.rs index bae9342d1f2..0c3e6bfdcec 100644 --- a/git-pack/src/data/output/mod.rs +++ b/git-pack/src/data/output/mod.rs @@ -39,6 +39,3 @@ pub mod entry; /// pub mod bytes; - -mod in_order; -pub use in_order::{ChunkId, InOrderIter}; diff --git a/git-pack/src/index/access.rs b/git-pack/src/index/access.rs index c535377d672..aaa3319f9b4 100644 --- a/git-pack/src/index/access.rs +++ b/git-pack/src/index/access.rs @@ -1,7 +1,5 @@ use std::mem::size_of; -use byteorder::{BigEndian, ByteOrder}; - use crate::{ data, index::{self, EntryIndex, FAN_LEN}, @@ -39,7 +37,7 @@ impl index::File { let (ofs, oid) = c.split_at(N32_SIZE); Entry { oid: git_hash::ObjectId::from(oid), - pack_offset: BigEndian::read_u32(ofs) as u64, + pack_offset: crate::read_u32(ofs) as u64, crc32: None, } }), @@ -59,7 +57,7 @@ impl index::File { .map(move |(oid, crc32, ofs32)| Entry { oid: git_hash::ObjectId::from(oid), pack_offset: self.pack_offset_from_offset_v2(ofs32, pack64_offset), - crc32: Some(BigEndian::read_u32(crc32)), + crc32: Some(crate::read_u32(crc32)), }), _ => panic!("Cannot use iter_v2() on index of type {:?}", self.version), } @@ -94,7 +92,7 @@ impl index::File { } index::Version::V1 => { let start = V1_HEADER_SIZE + index * (N32_SIZE + self.hash_len); - BigEndian::read_u32(&self.data[start..][..N32_SIZE]) as u64 + crate::read_u32(&self.data[start..][..N32_SIZE]) as u64 } } } @@ -110,7 +108,7 @@ impl index::File { match self.version { index::Version::V2 => { let start = self.offset_crc32_v2() + index * N32_SIZE; - Some(BigEndian::read_u32(&self.data[start..start + N32_SIZE])) + Some(crate::read_u32(&self.data[start..start + N32_SIZE])) } index::Version::V1 => None, } @@ -153,14 +151,13 @@ impl index::File { let mut ofs: Vec<_> = match self.version { index::Version::V1 => self.iter().map(|e| e.pack_offset).collect(), index::Version::V2 => { - let mut v = Vec::with_capacity(self.num_objects as usize); - let mut ofs32 = &self.data[self.offset_pack_offset_v2()..]; - let pack_offset_64 = self.offset_pack_offset64_v2(); - for _ in 0..self.num_objects { - v.push(self.pack_offset_from_offset_v2(ofs32, pack_offset_64)); - ofs32 = &ofs32[4..]; - } - v + let offset32_start = &self.data[self.offset_pack_offset_v2()..]; + let pack_offset_64_start = self.offset_pack_offset64_v2(); + offset32_start + .chunks(N32_SIZE) + .take(self.num_objects as usize) + .map(|offset| self.pack_offset_from_offset_v2(offset, pack_offset_64_start)) + .collect() } }; ofs.sort_unstable(); @@ -185,10 +182,10 @@ impl index::File { #[inline] fn pack_offset_from_offset_v2(&self, offset: &[u8], pack64_offset: usize) -> data::Offset { debug_assert_eq!(self.version, index::Version::V2); - let ofs32 = BigEndian::read_u32(offset); + let ofs32 = crate::read_u32(offset); if (ofs32 & N32_HIGH_BIT) == N32_HIGH_BIT { let from = pack64_offset + (ofs32 ^ N32_HIGH_BIT) as usize * N64_SIZE; - BigEndian::read_u64(&self.data[from..][..N64_SIZE]) + crate::read_u64(&self.data[from..][..N64_SIZE]) } else { ofs32 as u64 } diff --git a/git-pack/src/index/init.rs b/git-pack/src/index/init.rs index 8f55c90f0ed..78b225ad665 100644 --- a/git-pack/src/index/init.rs +++ b/git-pack/src/index/init.rs @@ -1,8 +1,5 @@ use std::{mem::size_of, path::Path}; -use byteorder::{BigEndian, ByteOrder}; -use filebuffer::FileBuffer; - use crate::index::{self, Version, FAN_LEN, V2_SIGNATURE}; /// Returned by [`index::File::at()`]. @@ -33,7 +30,7 @@ impl index::File { } fn at_inner(path: &Path, object_hash: git_hash::Kind) -> Result { - let data = FileBuffer::open(&path).map_err(|source| Error::Io { + let data = crate::mmap::read_only(path).map_err(|source| Error::Io { source, path: path.to_owned(), })?; @@ -58,7 +55,7 @@ impl index::File { let d = { if let Version::V2 = kind { let (vd, dr) = d.split_at(N32_SIZE); - let version = BigEndian::read_u32(vd); + let version = crate::read_u32(vd); if version != Version::V2 as u32 { return Err(Error::UnsupportedVersion { version }); } @@ -88,7 +85,7 @@ impl index::File { fn read_fan(d: &[u8]) -> ([u32; FAN_LEN], usize) { let mut fan = [0; FAN_LEN]; for (c, f) in d.chunks(N32_SIZE).zip(fan.iter_mut()) { - *f = BigEndian::read_u32(c); + *f = crate::read_u32(c); } (fan, FAN_LEN * N32_SIZE) } diff --git a/git-pack/src/index/mod.rs b/git-pack/src/index/mod.rs index d1cc1b749b5..0ae786f22ed 100644 --- a/git-pack/src/index/mod.rs +++ b/git-pack/src/index/mod.rs @@ -73,7 +73,7 @@ macro_rules! izip { }; } -use filebuffer::FileBuffer; +use memmap2::Mmap; /// The version of an index file #[derive(PartialEq, Eq, Ord, PartialOrd, Debug, Hash, Clone, Copy)] @@ -106,7 +106,7 @@ const FAN_LEN: usize = 256; /// A representation of a pack index file pub struct File { - data: FileBuffer, + data: Mmap, path: std::path::PathBuf, version: Version, num_objects: u32, diff --git a/git-pack/src/index/write/encode.rs b/git-pack/src/index/write/encode.rs index feb3428ddf9..95dc6a45455 100644 --- a/git-pack/src/index/write/encode.rs +++ b/git-pack/src/index/write/encode.rs @@ -3,7 +3,6 @@ use std::{cmp::Ordering, io}; pub(crate) const LARGE_OFFSET_THRESHOLD: u64 = 0x7fff_ffff; pub(crate) const HIGH_BIT: u32 = 0x8000_0000; -use byteorder::{BigEndian, WriteBytesExt}; use git_features::{ hash, progress::{self, Progress}, @@ -35,7 +34,7 @@ pub(crate) fn write_to( hash::Write::new(out, kind.hash()), )); out.write_all(V2_SIGNATURE)?; - out.write_u32::(kind as u32)?; + out.write_all(&(kind as u32).to_be_bytes())?; progress.init(Some(4), progress::steps()); let start = std::time::Instant::now(); @@ -43,7 +42,7 @@ pub(crate) fn write_to( let fan_out = fanout(entries_sorted_by_oid.iter().map(|e| e.data.id.first_byte())); for value in fan_out { - out.write_u32::(value)?; + out.write_all(&value.to_be_bytes())?; } progress.inc(); @@ -55,7 +54,7 @@ pub(crate) fn write_to( progress.inc(); let _info = progress.add_child("writing crc32"); for entry in &entries_sorted_by_oid { - out.write_u32::(entry.data.crc32)?; + out.write_all(&entry.data.crc32.to_be_bytes())?; } progress.inc(); @@ -63,7 +62,7 @@ pub(crate) fn write_to( { let mut offsets64 = Vec::::new(); for entry in &entries_sorted_by_oid { - out.write_u32::(if entry.offset > LARGE_OFFSET_THRESHOLD { + let offset: u32 = if entry.offset > LARGE_OFFSET_THRESHOLD { assert!( offsets64.len() < LARGE_OFFSET_THRESHOLD as usize, "Encoding breakdown - way too many 64bit offsets" @@ -72,10 +71,11 @@ pub(crate) fn write_to( ((offsets64.len() - 1) as u32) | HIGH_BIT } else { entry.offset as u32 - })?; + }; + out.write_all(&offset.to_be_bytes())?; } for value in offsets64 { - out.write_u64::(value)?; + out.write_all(&value.to_be_bytes())?; } } diff --git a/git-pack/src/lib.rs b/git-pack/src/lib.rs index 4b81c8e6460..35f4e6f3c0c 100755 --- a/git-pack/src/lib.rs +++ b/git-pack/src/lib.rs @@ -32,6 +32,7 @@ pub mod cache; pub mod data; mod find_traits; + pub use find_traits::{Find, FindExt}; /// @@ -41,3 +42,28 @@ pub mod multi_index; /// pub mod verify; + +mod mmap { + use std::path::Path; + + pub fn read_only(path: &Path) -> std::io::Result { + let file = std::fs::File::open(path)?; + // SAFETY: we have to take the risk of somebody changing the file underneath. Git never writes into the same file. + #[allow(unsafe_code)] + unsafe { + memmap2::Mmap::map(&file) + } + } +} + +use std::convert::TryInto; + +#[inline] +fn read_u32(b: &[u8]) -> u32 { + u32::from_be_bytes(b.try_into().unwrap()) +} + +#[inline] +fn read_u64(b: &[u8]) -> u64 { + u64::from_be_bytes(b.try_into().unwrap()) +} diff --git a/git-pack/src/multi_index/access.rs b/git-pack/src/multi_index/access.rs index 081873fe4f8..42956ba0001 100644 --- a/git-pack/src/multi_index/access.rs +++ b/git-pack/src/multi_index/access.rs @@ -1,7 +1,5 @@ use std::path::{Path, PathBuf}; -use byteorder::{BigEndian, ByteOrder}; - use crate::{ data, multi_index::{EntryIndex, File, PackIndex, Version}, @@ -102,15 +100,15 @@ impl File { const HIGH_BIT: u32 = 1 << 31; - let pack_index = BigEndian::read_u32(&self.data[start..][..4]); + let pack_index = crate::read_u32(&self.data[start..][..4]); let offset = &self.data[start + 4..][..4]; - let ofs32 = BigEndian::read_u32(offset); + let ofs32 = crate::read_u32(offset); let pack_offset = if (ofs32 & HIGH_BIT) == HIGH_BIT { // We determine if large offsets are actually larger than 4GB and if not, we don't use the high-bit to signal anything // but allow the presence of the large-offset chunk to signal what's happening. if let Some(offsets_64) = self.large_offsets_ofs { let from = offsets_64 + (ofs32 ^ HIGH_BIT) as usize * 8; - BigEndian::read_u64(&self.data[from..][..8]) + crate::read_u64(&self.data[from..][..8]) } else { ofs32 as u64 } diff --git a/git-pack/src/multi_index/chunk.rs b/git-pack/src/multi_index/chunk.rs index 8d6c734a556..5aeae5fe3f2 100644 --- a/git-pack/src/multi_index/chunk.rs +++ b/git-pack/src/multi_index/chunk.rs @@ -108,8 +108,6 @@ pub mod index_names { pub mod fanout { use std::convert::TryInto; - use byteorder::{BigEndian, WriteBytesExt}; - use crate::multi_index; /// The size of the fanout table @@ -138,7 +136,7 @@ pub mod fanout { let fanout = crate::index::write::encode::fanout(sorted_entries.iter().map(|e| e.id.first_byte())); for value in fanout { - out.write_u32::(value)?; + out.write_all(&value.to_be_bytes())?; } Ok(()) } @@ -178,8 +176,6 @@ pub mod lookup { pub mod offsets { use std::{convert::TryInto, ops::Range}; - use byteorder::{BigEndian, WriteBytesExt}; - use crate::multi_index; /// The id uniquely identifying the offsets table. @@ -199,7 +195,7 @@ pub mod offsets { let mut num_large_offsets = 0u32; for entry in sorted_entries { - out.write_u32::(entry.pack_index)?; + out.write_all(&entry.pack_index.to_be_bytes())?; let offset: u32 = if large_offsets_needed { if entry.pack_offset > LARGE_OFFSET_THRESHOLD { @@ -215,7 +211,7 @@ pub mod offsets { .try_into() .expect("without large offsets, pack-offset fits u32") }; - out.write_u32::(offset)?; + out.write_all(&offset.to_be_bytes())?; } Ok(()) } @@ -231,8 +227,6 @@ pub mod offsets { pub mod large_offsets { use std::ops::Range; - use byteorder::{BigEndian, WriteBytesExt}; - use crate::{index::write::encode::LARGE_OFFSET_THRESHOLD, multi_index}; /// The id uniquely identifying the large offsets table (with 64 bit offsets) @@ -267,7 +261,7 @@ pub mod large_offsets { .iter() .filter_map(|e| (e.pack_offset > LARGE_OFFSET_THRESHOLD).then(|| e.pack_offset)) { - out.write_u64::(offset)?; + out.write_all(&offset.to_be_bytes())?; num_large_offsets = num_large_offsets .checked_sub(1) .expect("BUG: wrote more offsets the previously found"); diff --git a/git-pack/src/multi_index/init.rs b/git-pack/src/multi_index/init.rs index c3ad6445b30..ca1afb22d24 100644 --- a/git-pack/src/multi_index/init.rs +++ b/git-pack/src/multi_index/init.rs @@ -1,8 +1,5 @@ use std::{convert::TryFrom, path::Path}; -use byteorder::{BigEndian, ByteOrder}; -use filebuffer::FileBuffer; - use crate::multi_index::{chunk, File, Version}; mod error { @@ -52,7 +49,7 @@ impl TryFrom<&Path> for File { type Error = Error; fn try_from(path: &Path) -> Result { - let data = FileBuffer::open(path).map_err(|source| Error::Io { + let data = crate::mmap::read_only(path).map_err(|source| Error::Io { source, path: path.to_owned(), })?; @@ -91,7 +88,7 @@ impl TryFrom<&Path> for File { let (_num_base_files, data) = data.split_at(1); // TODO: handle base files once it's clear what this does let (num_indices, _) = data.split_at(4); - let num_indices = BigEndian::read_u32(num_indices); + let num_indices = crate::read_u32(num_indices); (version, object_hash, num_chunks, num_indices) }; diff --git a/git-pack/src/multi_index/mod.rs b/git-pack/src/multi_index/mod.rs index 31532b61dbe..ac84ea95b30 100644 --- a/git-pack/src/multi_index/mod.rs +++ b/git-pack/src/multi_index/mod.rs @@ -1,6 +1,6 @@ use std::path::PathBuf; -use filebuffer::FileBuffer; +use memmap2::Mmap; /// Known multi-index file versions #[derive(PartialEq, Eq, Ord, PartialOrd, Debug, Hash, Clone, Copy)] @@ -25,7 +25,7 @@ pub type EntryIndex = u32; /// A representation of an index file for multiple packs at the same time, typically stored in a file /// named 'multi-pack-index'. pub struct File { - data: FileBuffer, + data: Mmap, path: std::path::PathBuf, version: Version, hash_len: usize, diff --git a/git-pack/src/multi_index/write.rs b/git-pack/src/multi_index/write.rs index 04e18d4e900..9de5f50489b 100644 --- a/git-pack/src/multi_index/write.rs +++ b/git-pack/src/multi_index/write.rs @@ -5,7 +5,6 @@ use std::{ time::{Instant, SystemTime}, }; -use byteorder::{BigEndian, WriteBytesExt}; use git_features::progress::Progress; use crate::multi_index; @@ -217,7 +216,7 @@ impl multi_index::File { out.write_all(&[object_hash as u8])?; out.write_all(&[num_chunks])?; out.write_all(&[0])?; /* unused number of base files */ - out.write_u32::(num_indices)?; + out.write_all(&num_indices.to_be_bytes())?; Ok(Self::HEADER_LEN) } diff --git a/git-pack/tests/pack/data/output/count_and_entries.rs b/git-pack/tests/pack/data/output/count_and_entries.rs index 4cad9f8aea7..e20616fa9f1 100644 --- a/git-pack/tests/pack/data/output/count_and_entries.rs +++ b/git-pack/tests/pack/data/output/count_and_entries.rs @@ -1,6 +1,9 @@ use std::{convert::Infallible, sync::atomic::AtomicBool}; -use git_features::{parallel::reduce::Finalize, progress}; +use git_features::{ + parallel::{reduce::Finalize, InOrderIter}, + progress, +}; use git_odb::{compound, pack, pack::FindExt}; use git_pack::data::{ output, @@ -291,7 +294,7 @@ fn traversals() -> crate::Result { ..Default::default() }, ); - let entries: Vec<_> = output::InOrderIter::from(entries_iter.by_ref()) + let entries: Vec<_> = InOrderIter::from(entries_iter.by_ref()) .collect::, _>>()? .into_iter() .flatten() diff --git a/git-pack/tests/pack/data/output/mod.rs b/git-pack/tests/pack/data/output/mod.rs index 73e63a0d132..fdf93cfb5ea 100644 --- a/git-pack/tests/pack/data/output/mod.rs +++ b/git-pack/tests/pack/data/output/mod.rs @@ -44,4 +44,3 @@ fn db(kind: DbKind) -> crate::Result { } mod count_and_entries; -mod in_order_iter; diff --git a/git-pack/tests/pack/index.rs b/git-pack/tests/pack/index.rs index 94fde45d56e..441b5f9fe88 100644 --- a/git-pack/tests/pack/index.rs +++ b/git-pack/tests/pack/index.rs @@ -76,7 +76,6 @@ mod file { mod any { use std::{fs, io, sync::atomic::AtomicBool}; - use filebuffer::FileBuffer; use git_features::progress; use git_odb::pack; use git_pack::data::{input, EntryRange}; @@ -89,8 +88,10 @@ mod file { for compressed in &[input::EntryDataMode::Crc32, input::EntryDataMode::KeepAndCrc32] { for (index_path, data_path) in V2_PACKS_AND_INDICES { let resolve = { - let buf = - git_features::threading::OwnShared::new(FileBuffer::open(fixture_path(data_path))?); + let buf = git_features::threading::OwnShared::new({ + let file = std::fs::File::open(fixture_path(data_path))?; + unsafe { memmap2::Mmap::map(&file)? } + }); move |entry: EntryRange, out: &mut Vec| { buf.get(entry.start as usize..entry.end as usize) .map(|slice| out.copy_from_slice(slice)) diff --git a/git-ref/Cargo.toml b/git-ref/Cargo.toml index 97e7621496a..979ec377091 100644 --- a/git-ref/Cargo.toml +++ b/git-ref/Cargo.toml @@ -38,7 +38,7 @@ serde = { version = "1.0.114", optional = true, default-features = false, featur os_str_bytes = "6.0.0" # packed refs -filebuffer = "0.4.0" +memmap2 = "0.5.0" [dev-dependencies] git-testtools = { path = "../tests/tools" } diff --git a/git-ref/src/lib.rs b/git-ref/src/lib.rs index da554a12b15..c19e75cc355 100644 --- a/git-ref/src/lib.rs +++ b/git-ref/src/lib.rs @@ -16,8 +16,7 @@ //! * references are stored in a single human-readable file, along with their targets if they are symbolic. //! * **ref-table** //! * supersedes all of the above to allow handling hundreds of thousands of references. -#![forbid(unsafe_code)] -#![deny(missing_docs, rust_2018_idioms)] +#![deny(unsafe_code, missing_docs, rust_2018_idioms)] use std::borrow::Cow; diff --git a/git-ref/src/store/packed/buffer.rs b/git-ref/src/store/packed/buffer.rs index 8ff97f1f174..912ebe85747 100644 --- a/git-ref/src/store/packed/buffer.rs +++ b/git-ref/src/store/packed/buffer.rs @@ -19,7 +19,7 @@ impl AsRef<[u8]> for packed::Backing { pub mod open { use std::path::PathBuf; - use filebuffer::FileBuffer; + use memmap2::Mmap; use crate::store_impl::packed; @@ -35,7 +35,13 @@ pub mod open { let backing = if std::fs::metadata(&path)?.len() <= use_memory_map_if_larger_than_bytes { packed::Backing::InMemory(std::fs::read(&path)?) } else { - packed::Backing::Mapped(FileBuffer::open(&path)?) + packed::Backing::Mapped( + // SAFETY: we have to take the risk of somebody changing the file underneath. Git never writes into the same file. + #[allow(unsafe_code)] + unsafe { + Mmap::map(&std::fs::File::open(&path)?)? + }, + ) }; let (offset, sorted) = { diff --git a/git-ref/src/store/packed/mod.rs b/git-ref/src/store/packed/mod.rs index db775e5f6ce..45d2d925e59 100644 --- a/git-ref/src/store/packed/mod.rs +++ b/git-ref/src/store/packed/mod.rs @@ -1,9 +1,9 @@ use std::path::PathBuf; -use filebuffer::FileBuffer; use git_features::threading::OwnShared; use git_hash::ObjectId; use git_object::bstr::{BStr, BString}; +use memmap2::Mmap; use crate::{transaction::RefEdit, FullNameRef}; @@ -12,7 +12,7 @@ enum Backing { /// The buffer is loaded entirely in memory, along with the `offset` to the first record past the header. InMemory(Vec), /// The buffer is mapping the file on disk, along with the offset to the first record past the header - Mapped(FileBuffer), + Mapped(Mmap), } /// A buffer containing a packed-ref file that is either memory mapped or fully in-memory depending on a cutoff. diff --git a/git-worktree/CHANGELOG.md b/git-worktree/CHANGELOG.md new file mode 100644 index 00000000000..e48aefbcad3 --- /dev/null +++ b/git-worktree/CHANGELOG.md @@ -0,0 +1,30 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## 0.0.0 (2022-01-08) + +Reserve the name for a necessary crate of the `gitoxide` project. + +### Commit Statistics + + + + - 2 commits contributed to the release. + - 0 commits where understood as [conventional](https://www.conventionalcommits.org). + - 1 unique issue was worked on: [#293](https://github.com/Byron/gitoxide/issues/293) + +### Commit Details + + + +
view details + + * **[#293](https://github.com/Byron/gitoxide/issues/293)** + - update changelog ([`b3ee7c6`](https://github.com/Byron/gitoxide/commit/b3ee7c6f7553de6bff4934bbdf38f6c6ea2cf349)) + - preempt the eventual need for a worktree implementation ([`bce67d8`](https://github.com/Byron/gitoxide/commit/bce67d8ec58f78a1fce1c76f7b93d9650f9f550e)) +
+ diff --git a/git-worktree/Cargo.toml b/git-worktree/Cargo.toml new file mode 100644 index 00000000000..6aa73e98b5e --- /dev/null +++ b/git-worktree/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "git-worktree" +version = "0.0.0" +repository = "https://github.com/Byron/gitoxide" +license = "MIT/Apache-2.0" +description = "A WIP crate of the gitoxide project dedicated implementing everything around working trees and git excludes" +authors = ["Sebastian Thiel "] +edition = "2018" + +[lib] +doctest = false + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] diff --git a/git-worktree/src/lib.rs b/git-worktree/src/lib.rs new file mode 100644 index 00000000000..d7a83e4f525 --- /dev/null +++ b/git-worktree/src/lib.rs @@ -0,0 +1 @@ +#![forbid(unsafe_code, rust_2018_idioms)] diff --git a/gitoxide-core/src/pack/create.rs b/gitoxide-core/src/pack/create.rs index 8a31bef326e..dcd0df23a65 100644 --- a/gitoxide-core/src/pack/create.rs +++ b/gitoxide-core/src/pack/create.rs @@ -8,6 +8,7 @@ use git_repository::{ interrupt, objs::bstr::ByteVec, odb::{pack, pack::FindExt}, + parallel::InOrderIter, prelude::Finalize, progress, traverse, Progress, }; @@ -237,7 +238,7 @@ where let num_objects = counts.len(); let mut in_order_entries = { let progress = progress.add_child("creating entries"); - pack::data::output::InOrderIter::from(pack::data::output::entry::iter_from_counts( + InOrderIter::from(pack::data::output::entry::iter_from_counts( counts, handle, progress, diff --git a/gitoxide-core/src/repository.rs b/gitoxide-core/src/repository.rs index 15d93d3d0da..aad5dfa7944 100644 --- a/gitoxide-core/src/repository.rs +++ b/gitoxide-core/src/repository.rs @@ -8,12 +8,12 @@ pub fn init(directory: Option) -> Result { } pub mod verify { - use crate::pack; - use crate::OutputFormat; use std::{path::PathBuf, sync::atomic::AtomicBool}; use git_repository::Progress; + use crate::{pack, OutputFormat}; + /// A general purpose context for many operations provided here pub struct Context { /// If set, provide statistics to `out` in the given format @@ -41,6 +41,7 @@ pub mod verify { }: Context, ) -> anyhow::Result<()> { let repo = git_repository::open(repo)?; + #[cfg_attr(not(feature = "serde1"), allow(unused))] let outcome = repo.objects.verify_integrity( progress, should_interrupt, diff --git a/tests/tools/src/lib.rs b/tests/tools/src/lib.rs index 78ef564b750..f60ecad5b37 100644 --- a/tests/tools/src/lib.rs +++ b/tests/tools/src/lib.rs @@ -19,7 +19,9 @@ pub fn hex_to_id(hex: &str) -> git_hash::ObjectId { pub fn fixture_path(path: impl AsRef) -> PathBuf { PathBuf::from("tests").join("fixtures").join(path.as_ref()) } -pub fn scripted_fixture_repo_read_only(script_name: &str) -> std::result::Result> { +pub fn scripted_fixture_repo_read_only( + script_name: impl AsRef, +) -> std::result::Result> { scripted_fixture_repo_read_only_with_args(script_name, None) } @@ -59,7 +61,7 @@ pub fn copy_recursively_into_existing_dir(src_dir: impl AsRef, dst_dir: im /// Returns the directory at which the data is present pub fn scripted_fixture_repo_read_only_with_args( - script_name: &str, + script_name: impl AsRef, args: impl IntoIterator, ) -> std::result::Result> { let script_path = fixture_path(script_name);