From ffee55c18c19c551d58a6d68e1b3feb7618d0455 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Tue, 25 Mar 2025 20:52:17 +1100 Subject: [PATCH] rustdoc: Rearrange `Item`/`ItemInner`. The `Item` struct is 48 bytes and contains a `Box`; `ItemInner` is 104 bytes. This is an odd arrangement. Normally you'd have one of the following. - A single large struct, which avoids the allocation for the `Box`, but can result in lots of wasted space in unused parts of a container like `Vec`, `HashSet`, etc. - Or, something like `struct Item(Box)`, which requires the `Box` allocation but gives a very small Item size, which is good for containers like `Vec`. `Item`/`ItemInner` currently gets the worst of both worlds: it always requires a `Box`, but `Item` is also pretty big and so wastes space in containers. It would make sense to push it in one direction or the other. #138916 showed that the first option is a regression for rustdoc, so this commit does the second option, which improves speed and reduces memory usage. --- src/librustdoc/clean/auto_trait.rs | 8 ++--- src/librustdoc/clean/blanket_impl.rs | 8 ++--- src/librustdoc/clean/inline.rs | 14 ++++---- src/librustdoc/clean/mod.rs | 2 +- src/librustdoc/clean/types.rs | 42 +++++++++++++--------- src/librustdoc/formats/cache.rs | 1 - src/librustdoc/json/conversions.rs | 2 +- src/librustdoc/passes/propagate_doc_cfg.rs | 6 ++-- 8 files changed, 46 insertions(+), 37 deletions(-) diff --git a/src/librustdoc/clean/auto_trait.rs b/src/librustdoc/clean/auto_trait.rs index 6ace626fdcd93..a48f5c623cd24 100644 --- a/src/librustdoc/clean/auto_trait.rs +++ b/src/librustdoc/clean/auto_trait.rs @@ -114,8 +114,8 @@ fn synthesize_auto_trait_impl<'tcx>( }; Some(clean::Item { - name: None, inner: Box::new(clean::ItemInner { + name: None, attrs: Default::default(), stability: None, kind: clean::ImplItem(Box::new(clean::Impl { @@ -127,10 +127,10 @@ fn synthesize_auto_trait_impl<'tcx>( polarity, kind: clean::ImplKind::Auto, })), + item_id: clean::ItemId::Auto { trait_: trait_def_id, for_: item_def_id }, + cfg: None, + inline_stmt_id: None, }), - item_id: clean::ItemId::Auto { trait_: trait_def_id, for_: item_def_id }, - cfg: None, - inline_stmt_id: None, }) } diff --git a/src/librustdoc/clean/blanket_impl.rs b/src/librustdoc/clean/blanket_impl.rs index a6d9676dd84ad..89245fee51551 100644 --- a/src/librustdoc/clean/blanket_impl.rs +++ b/src/librustdoc/clean/blanket_impl.rs @@ -83,9 +83,9 @@ pub(crate) fn synthesize_blanket_impls( cx.generated_synthetics.insert((ty.skip_binder(), trait_def_id)); blanket_impls.push(clean::Item { - name: None, - item_id: clean::ItemId::Blanket { impl_id: impl_def_id, for_: item_def_id }, inner: Box::new(clean::ItemInner { + name: None, + item_id: clean::ItemId::Blanket { impl_id: impl_def_id, for_: item_def_id }, attrs: Default::default(), stability: None, kind: clean::ImplItem(Box::new(clean::Impl { @@ -122,9 +122,9 @@ pub(crate) fn synthesize_blanket_impls( None, ))), })), + cfg: None, + inline_stmt_id: None, }), - cfg: None, - inline_stmt_id: None, }); } } diff --git a/src/librustdoc/clean/inline.rs b/src/librustdoc/clean/inline.rs index e973b89b2375b..d9cef28b3d806 100644 --- a/src/librustdoc/clean/inline.rs +++ b/src/librustdoc/clean/inline.rs @@ -151,7 +151,7 @@ pub(crate) fn try_inline( let mut item = crate::clean::generate_item_with_correct_attrs(cx, kind, did, name, import_def_id, None); // The visibility needs to reflect the one from the reexport and not from the "source" DefId. - item.inline_stmt_id = import_def_id; + item.inner.inline_stmt_id = import_def_id; ret.push(item); Some(ret) } @@ -665,11 +665,11 @@ fn build_module_items( // Primitive types can't be inlined so generate an import instead. let prim_ty = clean::PrimitiveType::from(p); items.push(clean::Item { - name: None, - // We can use the item's `DefId` directly since the only information ever used - // from it is `DefId.krate`. - item_id: ItemId::DefId(did), inner: Box::new(clean::ItemInner { + name: None, + // We can use the item's `DefId` directly since the only information ever + // used from it is `DefId.krate`. + item_id: ItemId::DefId(did), attrs: Default::default(), stability: None, kind: clean::ImportItem(clean::Import::new_simple( @@ -689,9 +689,9 @@ fn build_module_items( }, true, )), + cfg: None, + inline_stmt_id: None, }), - cfg: None, - inline_stmt_id: None, }); } else if let Some(i) = try_inline(cx, res, item.ident.name, attrs, visited) { items.extend(i) diff --git a/src/librustdoc/clean/mod.rs b/src/librustdoc/clean/mod.rs index de6dc088176ff..ada370a8d52e2 100644 --- a/src/librustdoc/clean/mod.rs +++ b/src/librustdoc/clean/mod.rs @@ -210,7 +210,7 @@ fn generate_item_with_correct_attrs( let name = renamed.or(Some(name)); let mut item = Item::from_def_id_and_attrs_and_parts(def_id, name, kind, attrs, cfg); - item.inline_stmt_id = import_id; + item.inner.inline_stmt_id = import_id; item } diff --git a/src/librustdoc/clean/types.rs b/src/librustdoc/clean/types.rs index 1207f2f0360f2..356691e71b834 100644 --- a/src/librustdoc/clean/types.rs +++ b/src/librustdoc/clean/types.rs @@ -311,26 +311,31 @@ pub(crate) enum ExternalLocation { /// directly to the AST's concept of an item; it's a strict superset. #[derive(Clone)] pub(crate) struct Item { - /// The name of this item. - /// Optional because not every item has a name, e.g. impls. - pub(crate) name: Option, pub(crate) inner: Box, - pub(crate) item_id: ItemId, - /// This is the `LocalDefId` of the `use` statement if the item was inlined. - /// The crate metadata doesn't hold this information, so the `use` statement - /// always belongs to the current crate. - pub(crate) inline_stmt_id: Option, - pub(crate) cfg: Option>, } +// Why does the `Item`/`ItemInner` split exist? `Vec`s are common, and +// without the split `Item` would be a large type (100+ bytes) which results in +// lots of wasted space in the unused parts of a `Vec`. With the split, +// `Item` is just 8 bytes, and the wasted space is avoided, at the cost of an +// extra allocation per item. This is a performance win. #[derive(Clone)] pub(crate) struct ItemInner { + /// The name of this item. + /// Optional because not every item has a name, e.g. impls. + pub(crate) name: Option, /// Information about this item that is specific to what kind of item it is. /// E.g., struct vs enum vs function. pub(crate) kind: ItemKind, pub(crate) attrs: Attributes, /// The effective stability, filled out by the `propagate-stability` pass. pub(crate) stability: Option, + pub(crate) item_id: ItemId, + /// This is the `LocalDefId` of the `use` statement if the item was inlined. + /// The crate metadata doesn't hold this information, so the `use` statement + /// always belongs to the current crate. + pub(crate) inline_stmt_id: Option, + pub(crate) cfg: Option>, } impl std::ops::Deref for Item { @@ -488,11 +493,15 @@ impl Item { trace!("name={name:?}, def_id={def_id:?} cfg={cfg:?}"); Item { - item_id: def_id.into(), - inner: Box::new(ItemInner { kind, attrs, stability: None }), - name, - cfg, - inline_stmt_id: None, + inner: Box::new(ItemInner { + item_id: def_id.into(), + kind, + attrs, + stability: None, + name, + cfg, + inline_stmt_id: None, + }), } } @@ -2618,13 +2627,14 @@ mod size_asserts { use super::*; // tidy-alphabetical-start - static_assert_size!(Crate, 56); // frequently moved by-value + static_assert_size!(Crate, 16); // frequently moved by-value static_assert_size!(DocFragment, 32); static_assert_size!(GenericArg, 32); static_assert_size!(GenericArgs, 24); static_assert_size!(GenericParamDef, 40); static_assert_size!(Generics, 16); - static_assert_size!(Item, 48); + static_assert_size!(Item, 8); + static_assert_size!(ItemInner, 136); static_assert_size!(ItemKind, 48); static_assert_size!(PathSegment, 32); static_assert_size!(Type, 32); diff --git a/src/librustdoc/formats/cache.rs b/src/librustdoc/formats/cache.rs index 2648641e53e75..f8f7816f5a671 100644 --- a/src/librustdoc/formats/cache.rs +++ b/src/librustdoc/formats/cache.rs @@ -385,7 +385,6 @@ impl DocFolder for CacheBuilder<'_, '_> { // implementations elsewhere. let ret = if let clean::Item { inner: box clean::ItemInner { kind: clean::ImplItem(ref i), .. }, - .. } = item { // Figure out the id of this impl. This may map to a diff --git a/src/librustdoc/json/conversions.rs b/src/librustdoc/json/conversions.rs index a5351b350dd5f..9d8eb70fbe071 100644 --- a/src/librustdoc/json/conversions.rs +++ b/src/librustdoc/json/conversions.rs @@ -43,7 +43,7 @@ impl JsonRenderer<'_> { let attrs = item.attributes(self.tcx, self.cache(), true); let span = item.span(self.tcx); let visibility = item.visibility(self.tcx); - let clean::Item { name, item_id, .. } = item; + let clean::ItemInner { name, item_id, .. } = *item.inner; let id = self.id_from_item(&item); let inner = match item.kind { clean::KeywordItem => return None, diff --git a/src/librustdoc/passes/propagate_doc_cfg.rs b/src/librustdoc/passes/propagate_doc_cfg.rs index 572c9bf7552fa..eddafa9ba8e49 100644 --- a/src/librustdoc/passes/propagate_doc_cfg.rs +++ b/src/librustdoc/passes/propagate_doc_cfg.rs @@ -61,7 +61,7 @@ impl CfgPropagator<'_, '_> { let (_, cfg) = merge_attrs(self.cx, item.attrs.other_attrs.as_slice(), Some((&attrs, None))); - item.cfg = cfg; + item.inner.cfg = cfg; } } @@ -71,7 +71,7 @@ impl DocFolder for CfgPropagator<'_, '_> { self.merge_with_parent_attributes(&mut item); - let new_cfg = match (self.parent_cfg.take(), item.cfg.take()) { + let new_cfg = match (self.parent_cfg.take(), item.inner.cfg.take()) { (None, None) => None, (Some(rc), None) | (None, Some(rc)) => Some(rc), (Some(mut a), Some(b)) => { @@ -81,7 +81,7 @@ impl DocFolder for CfgPropagator<'_, '_> { } }; self.parent_cfg = new_cfg.clone(); - item.cfg = new_cfg; + item.inner.cfg = new_cfg; let old_parent = if let Some(def_id) = item.item_id.as_def_id().and_then(|def_id| def_id.as_local()) {