Skip to content

Commit 58b4feb

Browse files
authored
sctp: optimize packet marshalling (#364)
As discussed in #360 the marshalling code has become a bottle neck in high bandwidth sending situations. I found two places that had a big effect on the performance, the hot path for this situation is marshalling packets with exactly one data chunk in them. After this PR the marshalling is largely dominated by the CRC32 calculation which is... not easy to speed up.
1 parent 0f98498 commit 58b4feb

File tree

4 files changed

+17
-17
lines changed

4 files changed

+17
-17
lines changed

sctp/CHANGELOG.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
## Unreleased
44

55
* Performance improvements
6-
* The lock for the internal association was contended badly because marshaling was done while still in a critical section and also tokio was scheduling tasks badly[#363](https://github.com/webrtc-rs/webrtc/pull/363)
6+
* reuse as many allocations as possible when marshaling [#364](https://github.com/webrtc-rs/webrtc/pull/364)
7+
* The lock for the internal association was contended badly because marshaling was done while still in a critical section and also tokio was scheduling tasks badly [#363](https://github.com/webrtc-rs/webrtc/pull/363)
78

89
## v0.7.0
910

sctp/src/chunk/chunk_payload_data.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ impl Chunk for ChunkPayloadData {
233233
writer.put_u16(self.stream_identifier);
234234
writer.put_u16(self.stream_sequence_number);
235235
writer.put_u32(self.payload_type as u32);
236-
writer.extend(self.user_data.clone());
236+
writer.extend_from_slice(&self.user_data);
237237

238238
Ok(writer.len())
239239
}

sctp/src/packet.rs

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ use crate::util::*;
2020

2121
use crate::chunk::chunk_unknown::ChunkUnknown;
2222
use bytes::{Buf, BufMut, Bytes, BytesMut};
23-
use crc::{Crc, CRC_32_ISCSI};
2423
use std::fmt;
2524

2625
///Packet represents an SCTP packet, defined in https://tools.ietf.org/html/rfc4960#section-3
@@ -155,30 +154,29 @@ impl Packet {
155154
writer.put_u16(self.destination_port);
156155
writer.put_u32(self.verification_tag);
157156

157+
// This is where the checksum will be written
158+
let checksum_pos = writer.len();
159+
writer.extend_from_slice(&[0, 0, 0, 0]);
160+
158161
// Populate chunks
159-
let mut raw = BytesMut::new();
160162
for c in &self.chunks {
161-
let chunk_raw = c.marshal()?;
162-
raw.extend(chunk_raw);
163+
c.marshal_to(writer)?;
163164

164-
let padding_needed = get_padding_size(raw.len());
165+
let padding_needed = get_padding_size(writer.len());
165166
if padding_needed != 0 {
166-
raw.extend(vec![0u8; padding_needed]);
167+
// padding needed if < 4 because we pad to 4
168+
writer.extend_from_slice(&[0u8; 16][..padding_needed]);
167169
}
168170
}
169-
let raw = raw.freeze();
170171

171-
let hasher = Crc::<u32>::new(&CRC_32_ISCSI);
172-
let mut digest = hasher.digest();
172+
let mut digest = ISCSI_CRC.digest();
173173
digest.update(writer);
174-
digest.update(&FOUR_ZEROES);
175-
digest.update(&raw[..]);
176174
let checksum = digest.finalize();
177175

178176
// Checksum is already in BigEndian
179177
// Using LittleEndian stops it from being flipped
180-
writer.put_u32_le(checksum);
181-
writer.extend(raw);
178+
let checksum_place = &mut writer[checksum_pos..checksum_pos + 4];
179+
checksum_place.copy_from_slice(&checksum.to_le_bytes());
182180

183181
Ok(writer.len())
184182
}

sctp/src/util.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,11 @@ pub(crate) fn get_padding_size(len: usize) -> usize {
1111
/// We need to use it for the checksum and don't want to allocate/clear each time.
1212
pub(crate) static FOUR_ZEROES: Bytes = Bytes::from_static(&[0, 0, 0, 0]);
1313

14+
pub(crate) const ISCSI_CRC: Crc<u32> = Crc::<u32>::new(&CRC_32_ISCSI);
15+
1416
/// Fastest way to do a crc32 without allocating.
1517
pub(crate) fn generate_packet_checksum(raw: &Bytes) -> u32 {
16-
let hasher = Crc::<u32>::new(&CRC_32_ISCSI);
17-
let mut digest = hasher.digest();
18+
let mut digest = ISCSI_CRC.digest();
1819
digest.update(&raw[0..8]);
1920
digest.update(&FOUR_ZEROES[..]);
2021
digest.update(&raw[12..]);

0 commit comments

Comments
 (0)