Skip to content

Commit 567ac64

Browse files
committed
Revert attribution of failures
1 parent 74e6b57 commit 567ac64

File tree

1 file changed

+86
-43
lines changed

1 file changed

+86
-43
lines changed

lightning/src/ln/onion_utils.rs

Lines changed: 86 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -979,6 +979,8 @@ mod fuzzy_onion_utils {
979979
pub(crate) onion_error_code: Option<LocalHTLCFailureReason>,
980980
#[cfg(any(test, feature = "_test_utils"))]
981981
pub(crate) onion_error_data: Option<Vec<u8>>,
982+
#[cfg(test)]
983+
pub(crate) attribution_failed_channel: Option<u64>,
982984
}
983985
}
984986
#[cfg(fuzzing)]
@@ -1053,6 +1055,8 @@ where
10531055
onion_error_code: None,
10541056
#[cfg(any(test, feature = "_test_utils"))]
10551057
onion_error_data: None,
1058+
#[cfg(test)]
1059+
attribution_failed_channel: None,
10561060
};
10571061
}
10581062

@@ -1120,6 +1124,9 @@ where
11201124
// the first 20 hops. Determine the number of hops to be used for attribution data.
11211125
let attributable_hop_count = usize::min(path.hops.len(), MAX_HOPS);
11221126

1127+
// Keep track of the first hop for which the attribution data failed to check out.
1128+
let mut attribution_failed_channel = None;
1129+
11231130
// Handle packed channel/node updates for passing back for the route handler
11241131
let mut iter = nontrampolines.chain(trampolines.into_iter().flatten()).enumerate().peekable();
11251132
while let Some((route_hop_idx, (route_hop_option, shared_secret))) = iter.next() {
@@ -1191,44 +1198,53 @@ where
11911198

11921199
let um = gen_um_from_shared_secret(shared_secret.as_ref());
11931200

1194-
// Check attr error HMACs if present.
1195-
if let Some(ref mut attribution_data) = encrypted_packet.attribution_data {
1196-
// Only consider hops in the regular path for attribution data. Failures in a blinded path are not attributable.
1197-
if route_hop_idx < attributable_hop_count {
1198-
// Calculate position relative to the last attributable hop. The last attributable hop is at position 0.
1199-
// The failure node does not need to come from the last attributable hop, but we need to look at the
1200-
// chain of HMACs that does include all data up to the last attributable hop. For a more nearby failure,
1201-
// the verified HMACs will include some zero padding data. Failures beyond the last attributable hop
1202-
// will not be attributable.
1203-
let position = attributable_hop_count - route_hop_idx - 1;
1204-
let hold_time = attribution_data.verify(
1205-
&encrypted_packet.data,
1206-
shared_secret.as_ref(),
1207-
position,
1208-
);
1209-
if let Some(hold_time) = hold_time {
1210-
hop_hold_times.push(hold_time);
1211-
1212-
log_debug!(logger, "Htlc hold time at pos {}: {} ms", route_hop_idx, hold_time);
1213-
1214-
// Shift attribution data to prepare for processing the next hop.
1215-
attribution_data.shift_left();
1216-
} else {
1217-
res = Some(FailureLearnings {
1218-
network_update: None,
1219-
short_channel_id: route_hop.short_channel_id(),
1220-
payment_failed_permanently: false,
1221-
failed_within_blinded_path: false,
1222-
});
1223-
1224-
log_debug!(
1225-
logger,
1226-
"Invalid HMAC in attribution data for node at pos {}",
1227-
route_hop_idx
1201+
// Only check attribution when an attribution data failure has not yet occurred.
1202+
if attribution_failed_channel.is_none() {
1203+
// Check attr error HMACs if present.
1204+
if let Some(ref mut attribution_data) = encrypted_packet.attribution_data {
1205+
// Only consider hops in the regular path for attribution data. Failures in a blinded path are not
1206+
// attributable.
1207+
if route_hop_idx < attributable_hop_count {
1208+
// Calculate position relative to the last attributable hop. The last attributable hop is at
1209+
// position 0. The failure node does not need to come from the last attributable hop, but we need to
1210+
// look at the chain of HMACs that does include all data up to the last attributable hop. For a more
1211+
// nearby failure, the verified HMACs will include some zero padding data. Failures beyond the last
1212+
// attributable hop will not be attributable.
1213+
let position = attributable_hop_count - route_hop_idx - 1;
1214+
let hold_time = attribution_data.verify(
1215+
&encrypted_packet.data,
1216+
shared_secret.as_ref(),
1217+
position,
12281218
);
1219+
if let Some(hold_time) = hold_time {
1220+
hop_hold_times.push(hold_time);
1221+
1222+
log_debug!(
1223+
logger,
1224+
"Htlc hold time at pos {}: {} ms",
1225+
route_hop_idx,
1226+
hold_time
1227+
);
12291228

1230-
break;
1229+
// Shift attribution data to prepare for processing the next hop.
1230+
attribution_data.shift_left();
1231+
} else {
1232+
// Store the failing hop, but continue processing the failure for the remaining hops. During the
1233+
// upgrade period, it may happen that nodes along the way drop attribution data. If the legacy
1234+
// failure is still valid, it should be processed normally.
1235+
attribution_failed_channel = route_hop.short_channel_id();
1236+
1237+
log_debug!(
1238+
logger,
1239+
"Invalid HMAC in attribution data for node at pos {}",
1240+
route_hop_idx
1241+
);
1242+
}
12311243
}
1244+
} else {
1245+
// When no attribution data is provided at all, blame the first hop when the failing node turns out to
1246+
// be unindentifiable.
1247+
attribution_failed_channel = route_hop.short_channel_id();
12321248
}
12331249
}
12341250

@@ -1421,14 +1437,17 @@ where
14211437
onion_error_code: _error_code_ret,
14221438
#[cfg(any(test, feature = "_test_utils"))]
14231439
onion_error_data: _error_packet_ret,
1440+
#[cfg(test)]
1441+
attribution_failed_channel,
14241442
}
14251443
} else {
14261444
// only not set either packet unparseable or hmac does not match with any
14271445
// payment not retryable only when garbage is from the final node
14281446
log_warn!(
14291447
logger,
1430-
"Non-attributable failure encountered on route {}",
1431-
path.hops.iter().map(|h| h.pubkey.to_string()).collect::<Vec<_>>().join("->")
1448+
"Non-attributable failure encountered on route {}. Attributation data failed for channel {}",
1449+
path.hops.iter().map(|h| h.pubkey.to_string()).collect::<Vec<_>>().join("->"),
1450+
attribution_failed_channel.unwrap_or_default(),
14321451
);
14331452

14341453
DecodedOnionFailure {
@@ -1441,6 +1460,8 @@ where
14411460
onion_error_code: None,
14421461
#[cfg(any(test, feature = "_test_utils"))]
14431462
onion_error_data: None,
1463+
#[cfg(test)]
1464+
attribution_failed_channel,
14441465
}
14451466
}
14461467
}
@@ -2046,6 +2067,8 @@ impl HTLCFailReason {
20462067
onion_error_code: Some(*failure_reason),
20472068
#[cfg(any(test, feature = "_test_utils"))]
20482069
onion_error_data: Some(data.clone()),
2070+
#[cfg(test)]
2071+
attribution_failed_channel: None,
20492072
}
20502073
} else {
20512074
unreachable!();
@@ -2787,6 +2810,7 @@ fn process_failure_packet(
27872810

27882811
#[cfg(test)]
27892812
mod tests {
2813+
use core::iter;
27902814
use std::sync::Arc;
27912815

27922816
use crate::io;
@@ -3130,18 +3154,32 @@ mod tests {
31303154

31313155
for mutation_type in failure_mutations
31323156
.chain(attribution_data_mutations.map(MutationType::AttributionData))
3157+
.chain(iter::once(MutationType::DropAttributionData))
31333158
{
3159+
// If the mutation is in the attribution data and not in the failure message itself, the invalid
3160+
// attribution data should be ignored and the failure should still surface.
3161+
let failure_ok = matches!(mutation_type, MutationType::DropAttributionData)
3162+
|| matches!(mutation_type, MutationType::AttributionData(_));
3163+
31343164
let mutation = Mutation { node: mutating_node, mutation_type };
31353165
let decrypted_failure =
31363166
test_attributable_failure_packet_onion_with_mutation(Some(mutation));
31373167

3138-
if decrypted_failure.onion_error_code
3139-
== Some(LocalHTLCFailureReason::IncorrectPaymentDetails)
3140-
{
3168+
if failure_ok {
3169+
assert_eq!(
3170+
decrypted_failure.onion_error_code,
3171+
Some(LocalHTLCFailureReason::IncorrectPaymentDetails)
3172+
);
31413173
continue;
31423174
}
31433175

3144-
assert!(decrypted_failure.short_channel_id == Some(mutating_node as u64));
3176+
// Currently attribution data isn't used yet to identify the failing node, because this would hinder the
3177+
// upgrade path.
3178+
assert!(decrypted_failure.short_channel_id.is_none());
3179+
3180+
// Assert that attribution data is interpreted correctly via a test-only field.
3181+
assert!(decrypted_failure.attribution_failed_channel == Some(mutating_node as u64));
3182+
31453183
assert_eq!(decrypted_failure.hold_times, [5, 4, 3, 2, 1][..mutating_node]);
31463184
}
31473185
}
@@ -3165,6 +3203,7 @@ mod tests {
31653203
enum MutationType {
31663204
FailureMessage(usize),
31673205
AttributionData(AttributionDataMutationType),
3206+
DropAttributionData,
31683207
}
31693208

31703209
struct Mutation {
@@ -3252,6 +3291,10 @@ mod tests {
32523291
// Mutate hold times.
32533292
packet.attribution_data.as_mut().unwrap().hmacs[*i] ^= 1;
32543293
},
3294+
MutationType::DropAttributionData => {
3295+
// Drop attribution data completely. This simulates a node that does not support the feature.
3296+
packet.attribution_data = None;
3297+
},
32553298
}
32563299
};
32573300

@@ -3560,7 +3603,7 @@ mod tests {
35603603
// With attributable failures, it should still be possible to identify the failing node.
35613604
let logger: TestLogger = TestLogger::new();
35623605
let decrypted_failure = test_failure_attribution(&logger, onion_error_packet);
3563-
assert_eq!(decrypted_failure.short_channel_id, Some(0));
3606+
assert_eq!(decrypted_failure.attribution_failed_channel, Some(0));
35643607
}
35653608

35663609
#[test]
@@ -3628,7 +3671,7 @@ mod tests {
36283671
// Expect attribution up to hop 20.
36293672
let expected_failed_chan =
36303673
if failure_pos < MAX_HOPS { Some(failure_pos as u64) } else { None };
3631-
assert_eq!(decrypted_failure.short_channel_id, expected_failed_chan);
3674+
assert_eq!(decrypted_failure.attribution_failed_channel, expected_failed_chan);
36323675
}
36333676
}
36343677

0 commit comments

Comments
 (0)