Skip to content

Commit a0a62ea

Browse files
authored
Prevent sync lookups from reverting to awaiting block (#6443)
* Prevent sync lookups from reverting to awaiting block * Remove stale comment
1 parent da290e8 commit a0a62ea

File tree

4 files changed

+128
-102
lines changed

4 files changed

+128
-102
lines changed

beacon_node/network/src/sync/block_lookups/common.rs

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ use std::sync::Arc;
1313
use types::blob_sidecar::FixedBlobSidecarList;
1414
use types::{DataColumnSidecarList, SignedBeaconBlock};
1515

16-
use super::single_block_lookup::DownloadResult;
16+
use super::single_block_lookup::{ComponentRequests, DownloadResult};
1717
use super::SingleLookupId;
1818

1919
#[derive(Debug, Copy, Clone)]
@@ -42,7 +42,7 @@ pub trait RequestState<T: BeaconChainTypes> {
4242
&self,
4343
id: Id,
4444
peer_id: PeerId,
45-
downloaded_block: Option<Arc<SignedBeaconBlock<T::EthSpec>>>,
45+
expected_blobs: usize,
4646
cx: &mut SyncNetworkContext<T>,
4747
) -> Result<LookupRequestResult, LookupRequestError>;
4848

@@ -61,7 +61,7 @@ pub trait RequestState<T: BeaconChainTypes> {
6161
fn response_type() -> ResponseType;
6262

6363
/// A getter for the `BlockRequestState` or `BlobRequestState` associated with this trait.
64-
fn request_state_mut(request: &mut SingleBlockLookup<T>) -> &mut Self;
64+
fn request_state_mut(request: &mut SingleBlockLookup<T>) -> Result<&mut Self, &'static str>;
6565

6666
/// A getter for a reference to the `SingleLookupRequestState` associated with this trait.
6767
fn get_state(&self) -> &SingleLookupRequestState<Self::VerifiedResponseType>;
@@ -77,7 +77,7 @@ impl<T: BeaconChainTypes> RequestState<T> for BlockRequestState<T::EthSpec> {
7777
&self,
7878
id: SingleLookupId,
7979
peer_id: PeerId,
80-
_: Option<Arc<SignedBeaconBlock<T::EthSpec>>>,
80+
_: usize,
8181
cx: &mut SyncNetworkContext<T>,
8282
) -> Result<LookupRequestResult, LookupRequestError> {
8383
cx.block_lookup_request(id, peer_id, self.requested_block_root)
@@ -107,8 +107,8 @@ impl<T: BeaconChainTypes> RequestState<T> for BlockRequestState<T::EthSpec> {
107107
fn response_type() -> ResponseType {
108108
ResponseType::Block
109109
}
110-
fn request_state_mut(request: &mut SingleBlockLookup<T>) -> &mut Self {
111-
&mut request.block_request_state
110+
fn request_state_mut(request: &mut SingleBlockLookup<T>) -> Result<&mut Self, &'static str> {
111+
Ok(&mut request.block_request_state)
112112
}
113113
fn get_state(&self) -> &SingleLookupRequestState<Self::VerifiedResponseType> {
114114
&self.state
@@ -125,10 +125,10 @@ impl<T: BeaconChainTypes> RequestState<T> for BlobRequestState<T::EthSpec> {
125125
&self,
126126
id: Id,
127127
peer_id: PeerId,
128-
downloaded_block: Option<Arc<SignedBeaconBlock<T::EthSpec>>>,
128+
expected_blobs: usize,
129129
cx: &mut SyncNetworkContext<T>,
130130
) -> Result<LookupRequestResult, LookupRequestError> {
131-
cx.blob_lookup_request(id, peer_id, self.block_root, downloaded_block)
131+
cx.blob_lookup_request(id, peer_id, self.block_root, expected_blobs)
132132
.map_err(LookupRequestError::SendFailedNetwork)
133133
}
134134

@@ -150,8 +150,13 @@ impl<T: BeaconChainTypes> RequestState<T> for BlobRequestState<T::EthSpec> {
150150
fn response_type() -> ResponseType {
151151
ResponseType::Blob
152152
}
153-
fn request_state_mut(request: &mut SingleBlockLookup<T>) -> &mut Self {
154-
&mut request.blob_request_state
153+
fn request_state_mut(request: &mut SingleBlockLookup<T>) -> Result<&mut Self, &'static str> {
154+
match &mut request.component_requests {
155+
ComponentRequests::WaitingForBlock => Err("waiting for block"),
156+
ComponentRequests::ActiveBlobRequest(request, _) => Ok(request),
157+
ComponentRequests::ActiveCustodyRequest { .. } => Err("expecting custody request"),
158+
ComponentRequests::NotNeeded { .. } => Err("not needed"),
159+
}
155160
}
156161
fn get_state(&self) -> &SingleLookupRequestState<Self::VerifiedResponseType> {
157162
&self.state
@@ -169,10 +174,10 @@ impl<T: BeaconChainTypes> RequestState<T> for CustodyRequestState<T::EthSpec> {
169174
id: Id,
170175
// TODO(das): consider selecting peers that have custody but are in this set
171176
_peer_id: PeerId,
172-
downloaded_block: Option<Arc<SignedBeaconBlock<T::EthSpec>>>,
177+
_: usize,
173178
cx: &mut SyncNetworkContext<T>,
174179
) -> Result<LookupRequestResult, LookupRequestError> {
175-
cx.custody_lookup_request(id, self.block_root, downloaded_block)
180+
cx.custody_lookup_request(id, self.block_root)
176181
.map_err(LookupRequestError::SendFailedNetwork)
177182
}
178183

@@ -200,8 +205,13 @@ impl<T: BeaconChainTypes> RequestState<T> for CustodyRequestState<T::EthSpec> {
200205
fn response_type() -> ResponseType {
201206
ResponseType::CustodyColumn
202207
}
203-
fn request_state_mut(request: &mut SingleBlockLookup<T>) -> &mut Self {
204-
&mut request.custody_request_state
208+
fn request_state_mut(request: &mut SingleBlockLookup<T>) -> Result<&mut Self, &'static str> {
209+
match &mut request.component_requests {
210+
ComponentRequests::WaitingForBlock => Err("waiting for block"),
211+
ComponentRequests::ActiveBlobRequest { .. } => Err("expecting blob request"),
212+
ComponentRequests::ActiveCustodyRequest(request) => Ok(request),
213+
ComponentRequests::NotNeeded { .. } => Err("not needed"),
214+
}
205215
}
206216
fn get_state(&self) -> &SingleLookupRequestState<Self::VerifiedResponseType> {
207217
&self.state

beacon_node/network/src/sync/block_lookups/mod.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -450,7 +450,9 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
450450
};
451451

452452
let block_root = lookup.block_root();
453-
let request_state = R::request_state_mut(lookup).get_state_mut();
453+
let request_state = R::request_state_mut(lookup)
454+
.map_err(|e| LookupRequestError::BadState(e.to_owned()))?
455+
.get_state_mut();
454456

455457
match response {
456458
Ok((response, peer_group, seen_timestamp)) => {
@@ -545,7 +547,9 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
545547
};
546548

547549
let block_root = lookup.block_root();
548-
let request_state = R::request_state_mut(lookup).get_state_mut();
550+
let request_state = R::request_state_mut(lookup)
551+
.map_err(|e| LookupRequestError::BadState(e.to_owned()))?
552+
.get_state_mut();
549553

550554
debug!(
551555
self.log,

beacon_node/network/src/sync/block_lookups/single_block_lookup.rs

Lines changed: 97 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use crate::sync::network_context::{
44
LookupRequestResult, PeerGroup, ReqId, RpcRequestSendError, SendErrorProcessor,
55
SyncNetworkContext,
66
};
7-
use beacon_chain::BeaconChainTypes;
7+
use beacon_chain::{BeaconChainTypes, BlockProcessStatus};
88
use derivative::Derivative;
99
use lighthouse_network::service::api_types::Id;
1010
use rand::seq::IteratorRandom;
@@ -62,8 +62,7 @@ pub enum LookupRequestError {
6262
pub struct SingleBlockLookup<T: BeaconChainTypes> {
6363
pub id: Id,
6464
pub block_request_state: BlockRequestState<T::EthSpec>,
65-
pub blob_request_state: BlobRequestState<T::EthSpec>,
66-
pub custody_request_state: CustodyRequestState<T::EthSpec>,
65+
pub component_requests: ComponentRequests<T::EthSpec>,
6766
/// Peers that claim to have imported this set of block components
6867
#[derivative(Debug(format_with = "fmt_peer_set_as_len"))]
6968
peers: HashSet<PeerId>,
@@ -72,6 +71,16 @@ pub struct SingleBlockLookup<T: BeaconChainTypes> {
7271
created: Instant,
7372
}
7473

74+
#[derive(Debug)]
75+
pub(crate) enum ComponentRequests<E: EthSpec> {
76+
WaitingForBlock,
77+
ActiveBlobRequest(BlobRequestState<E>, usize),
78+
ActiveCustodyRequest(CustodyRequestState<E>),
79+
// When printing in debug this state display the reason why it's not needed
80+
#[allow(dead_code)]
81+
NotNeeded(&'static str),
82+
}
83+
7584
impl<T: BeaconChainTypes> SingleBlockLookup<T> {
7685
pub fn new(
7786
requested_block_root: Hash256,
@@ -82,8 +91,7 @@ impl<T: BeaconChainTypes> SingleBlockLookup<T> {
8291
Self {
8392
id,
8493
block_request_state: BlockRequestState::new(requested_block_root),
85-
blob_request_state: BlobRequestState::new(requested_block_root),
86-
custody_request_state: CustodyRequestState::new(requested_block_root),
94+
component_requests: ComponentRequests::WaitingForBlock,
8795
peers: HashSet::from_iter(peers.iter().copied()),
8896
block_root: requested_block_root,
8997
awaiting_parent,
@@ -150,16 +158,28 @@ impl<T: BeaconChainTypes> SingleBlockLookup<T> {
150158
/// Returns true if the block has already been downloaded.
151159
pub fn all_components_processed(&self) -> bool {
152160
self.block_request_state.state.is_processed()
153-
&& self.blob_request_state.state.is_processed()
154-
&& self.custody_request_state.state.is_processed()
161+
&& match &self.component_requests {
162+
ComponentRequests::WaitingForBlock => false,
163+
ComponentRequests::ActiveBlobRequest(request, _) => request.state.is_processed(),
164+
ComponentRequests::ActiveCustodyRequest(request) => request.state.is_processed(),
165+
ComponentRequests::NotNeeded { .. } => true,
166+
}
155167
}
156168

157169
/// Returns true if this request is expecting some event to make progress
158170
pub fn is_awaiting_event(&self) -> bool {
159171
self.awaiting_parent.is_some()
160172
|| self.block_request_state.state.is_awaiting_event()
161-
|| self.blob_request_state.state.is_awaiting_event()
162-
|| self.custody_request_state.state.is_awaiting_event()
173+
|| match &self.component_requests {
174+
ComponentRequests::WaitingForBlock => true,
175+
ComponentRequests::ActiveBlobRequest(request, _) => {
176+
request.state.is_awaiting_event()
177+
}
178+
ComponentRequests::ActiveCustodyRequest(request) => {
179+
request.state.is_awaiting_event()
180+
}
181+
ComponentRequests::NotNeeded { .. } => false,
182+
}
163183
}
164184

165185
/// Makes progress on all requests of this lookup. Any error is not recoverable and must result
@@ -169,9 +189,66 @@ impl<T: BeaconChainTypes> SingleBlockLookup<T> {
169189
cx: &mut SyncNetworkContext<T>,
170190
) -> Result<LookupResult, LookupRequestError> {
171191
// TODO: Check what's necessary to download, specially for blobs
172-
self.continue_request::<BlockRequestState<T::EthSpec>>(cx)?;
173-
self.continue_request::<BlobRequestState<T::EthSpec>>(cx)?;
174-
self.continue_request::<CustodyRequestState<T::EthSpec>>(cx)?;
192+
self.continue_request::<BlockRequestState<T::EthSpec>>(cx, 0)?;
193+
194+
if let ComponentRequests::WaitingForBlock = self.component_requests {
195+
let downloaded_block = self
196+
.block_request_state
197+
.state
198+
.peek_downloaded_data()
199+
.cloned();
200+
201+
if let Some(block) = downloaded_block.or_else(|| {
202+
// If the block is already being processed or fully validated, retrieve how many blobs
203+
// it expects. Consider any stage of the block. If the block root has been validated, we
204+
// can assert that this is the correct value of `blob_kzg_commitments_count`.
205+
match cx.chain.get_block_process_status(&self.block_root) {
206+
BlockProcessStatus::Unknown => None,
207+
BlockProcessStatus::NotValidated(block)
208+
| BlockProcessStatus::ExecutionValidated(block) => Some(block.clone()),
209+
}
210+
}) {
211+
let expected_blobs = block.num_expected_blobs();
212+
let block_epoch = block.slot().epoch(T::EthSpec::slots_per_epoch());
213+
if expected_blobs == 0 {
214+
self.component_requests = ComponentRequests::NotNeeded("no data");
215+
}
216+
if cx.chain.should_fetch_blobs(block_epoch) {
217+
self.component_requests = ComponentRequests::ActiveBlobRequest(
218+
BlobRequestState::new(self.block_root),
219+
expected_blobs,
220+
);
221+
} else if cx.chain.should_fetch_custody_columns(block_epoch) {
222+
self.component_requests = ComponentRequests::ActiveCustodyRequest(
223+
CustodyRequestState::new(self.block_root),
224+
);
225+
} else {
226+
self.component_requests = ComponentRequests::NotNeeded("outside da window");
227+
}
228+
} else {
229+
// Wait to download the block before downloading blobs. Then we can be sure that the
230+
// block has data, so there's no need to do "blind" requests for all possible blobs and
231+
// latter handle the case where if the peer sent no blobs, penalize.
232+
//
233+
// Lookup sync event safety: Reaching this code means that a block is not in any pre-import
234+
// cache nor in the request state of this lookup. Therefore, the block must either: (1) not
235+
// be downloaded yet or (2) the block is already imported into the fork-choice.
236+
// In case (1) the lookup must either successfully download the block or get dropped.
237+
// In case (2) the block will be downloaded, processed, reach `DuplicateFullyImported`
238+
// and get dropped as completed.
239+
}
240+
}
241+
242+
match &self.component_requests {
243+
ComponentRequests::WaitingForBlock => {} // do nothing
244+
ComponentRequests::ActiveBlobRequest(_, expected_blobs) => {
245+
self.continue_request::<BlobRequestState<T::EthSpec>>(cx, *expected_blobs)?
246+
}
247+
ComponentRequests::ActiveCustodyRequest(_) => {
248+
self.continue_request::<CustodyRequestState<T::EthSpec>>(cx, 0)?
249+
}
250+
ComponentRequests::NotNeeded { .. } => {} // do nothing
251+
}
175252

176253
// If all components of this lookup are already processed, there will be no future events
177254
// that can make progress so it must be dropped. Consider the lookup completed.
@@ -187,15 +264,12 @@ impl<T: BeaconChainTypes> SingleBlockLookup<T> {
187264
fn continue_request<R: RequestState<T>>(
188265
&mut self,
189266
cx: &mut SyncNetworkContext<T>,
267+
expected_blobs: usize,
190268
) -> Result<(), LookupRequestError> {
191269
let id = self.id;
192270
let awaiting_parent = self.awaiting_parent.is_some();
193-
let downloaded_block = self
194-
.block_request_state
195-
.state
196-
.peek_downloaded_data()
197-
.cloned();
198-
let request = R::request_state_mut(self);
271+
let request =
272+
R::request_state_mut(self).map_err(|e| LookupRequestError::BadState(e.to_owned()))?;
199273

200274
// Attempt to progress awaiting downloads
201275
if request.get_state().is_awaiting_download() {
@@ -214,13 +288,16 @@ impl<T: BeaconChainTypes> SingleBlockLookup<T> {
214288
// not receive any new peers for some time it will be dropped. If it receives a new
215289
// peer it must attempt to make progress.
216290
R::request_state_mut(self)
291+
.map_err(|e| LookupRequestError::BadState(e.to_owned()))?
217292
.get_state_mut()
218293
.update_awaiting_download_status("no peers");
219294
return Ok(());
220295
};
221296

222-
let request = R::request_state_mut(self);
223-
match request.make_request(id, peer_id, downloaded_block, cx)? {
297+
let request = R::request_state_mut(self)
298+
.map_err(|e| LookupRequestError::BadState(e.to_owned()))?;
299+
300+
match request.make_request(id, peer_id, expected_blobs, cx)? {
224301
LookupRequestResult::RequestSent(req_id) => {
225302
// Lookup sync event safety: If make_request returns `RequestSent`, we are
226303
// guaranteed that `BlockLookups::on_download_response` will be called exactly

beacon_node/network/src/sync/network_context.rs

Lines changed: 1 addition & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -632,45 +632,8 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
632632
lookup_id: SingleLookupId,
633633
peer_id: PeerId,
634634
block_root: Hash256,
635-
downloaded_block: Option<Arc<SignedBeaconBlock<T::EthSpec>>>,
635+
expected_blobs: usize,
636636
) -> Result<LookupRequestResult, RpcRequestSendError> {
637-
let Some(block) = downloaded_block.or_else(|| {
638-
// If the block is already being processed or fully validated, retrieve how many blobs
639-
// it expects. Consider any stage of the block. If the block root has been validated, we
640-
// can assert that this is the correct value of `blob_kzg_commitments_count`.
641-
match self.chain.get_block_process_status(&block_root) {
642-
BlockProcessStatus::Unknown => None,
643-
BlockProcessStatus::NotValidated(block)
644-
| BlockProcessStatus::ExecutionValidated(block) => Some(block.clone()),
645-
}
646-
}) else {
647-
// Wait to download the block before downloading blobs. Then we can be sure that the
648-
// block has data, so there's no need to do "blind" requests for all possible blobs and
649-
// latter handle the case where if the peer sent no blobs, penalize.
650-
// - if `downloaded_block_expected_blobs` is Some = block is downloading or processing.
651-
// - if `num_expected_blobs` returns Some = block is processed.
652-
//
653-
// Lookup sync event safety: Reaching this code means that a block is not in any pre-import
654-
// cache nor in the request state of this lookup. Therefore, the block must either: (1) not
655-
// be downloaded yet or (2) the block is already imported into the fork-choice.
656-
// In case (1) the lookup must either successfully download the block or get dropped.
657-
// In case (2) the block will be downloaded, processed, reach `DuplicateFullyImported`
658-
// and get dropped as completed.
659-
return Ok(LookupRequestResult::Pending("waiting for block download"));
660-
};
661-
let expected_blobs = block.num_expected_blobs();
662-
let block_epoch = block.slot().epoch(T::EthSpec::slots_per_epoch());
663-
664-
// Check if we are in deneb, before peerdas and inside da window
665-
if !self.chain.should_fetch_blobs(block_epoch) {
666-
return Ok(LookupRequestResult::NoRequestNeeded("blobs not required"));
667-
}
668-
669-
// No data required for this block
670-
if expected_blobs == 0 {
671-
return Ok(LookupRequestResult::NoRequestNeeded("no data"));
672-
}
673-
674637
let imported_blob_indexes = self
675638
.chain
676639
.data_availability_checker
@@ -760,35 +723,7 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
760723
&mut self,
761724
lookup_id: SingleLookupId,
762725
block_root: Hash256,
763-
downloaded_block: Option<Arc<SignedBeaconBlock<T::EthSpec>>>,
764726
) -> Result<LookupRequestResult, RpcRequestSendError> {
765-
let Some(block) =
766-
downloaded_block.or_else(|| match self.chain.get_block_process_status(&block_root) {
767-
BlockProcessStatus::Unknown => None,
768-
BlockProcessStatus::NotValidated(block)
769-
| BlockProcessStatus::ExecutionValidated(block) => Some(block.clone()),
770-
})
771-
else {
772-
// Wait to download the block before downloading columns. Then we can be sure that the
773-
// block has data, so there's no need to do "blind" requests for all possible columns and
774-
// latter handle the case where if the peer sent no columns, penalize.
775-
// - if `downloaded_block_expected_blobs` is Some = block is downloading or processing.
776-
// - if `num_expected_blobs` returns Some = block is processed.
777-
return Ok(LookupRequestResult::Pending("waiting for block download"));
778-
};
779-
let expected_blobs = block.num_expected_blobs();
780-
let block_epoch = block.slot().epoch(T::EthSpec::slots_per_epoch());
781-
782-
// Check if we are into peerdas and inside da window
783-
if !self.chain.should_fetch_custody_columns(block_epoch) {
784-
return Ok(LookupRequestResult::NoRequestNeeded("columns not required"));
785-
}
786-
787-
// No data required for this block
788-
if expected_blobs == 0 {
789-
return Ok(LookupRequestResult::NoRequestNeeded("no data"));
790-
}
791-
792727
let custody_indexes_imported = self
793728
.chain
794729
.data_availability_checker

0 commit comments

Comments
 (0)