Skip to content

Commit 36bfe3a

Browse files
rustyrussellNicolasDorier
authored andcommitted
gossipd: handle overflowing query properly (avoid slow 100% CPU reports)
Don't do this: (gdb) bt #0 0x00007f37ae667c40 in ?? () from /lib/x86_64-linux-gnu/libz.so.1 #1 0x00007f37ae668b38 in ?? () from /lib/x86_64-linux-gnu/libz.so.1 #2 0x00007f37ae669907 in deflate () from /lib/x86_64-linux-gnu/libz.so.1 #3 0x00007f37ae674c65 in compress2 () from /lib/x86_64-linux-gnu/libz.so.1 #4 0x000000000040cfe3 in zencode_scids (ctx=0xc1f118, scids=0x2599bc49 "\a\325{", len=176320) at gossipd/gossipd.c:218 ElementsProject#5 0x000000000040d0b3 in encode_short_channel_ids_end (encoded=0x7fff8f98d9f0, max_bytes=65490) at gossipd/gossipd.c:236 ElementsProject#6 0x000000000040dd28 in queue_channel_ranges (peer=0x3868fc8, first_blocknum=17290511, number_of_blocks=8) at gossipd/gossipd.c:576 ElementsProject#7 0x000000000040ddc6 in queue_channel_ranges (peer=0x3868fc8, first_blocknum=17290511, number_of_blocks=16) at gossipd/gossipd.c:595 ElementsProject#8 0x000000000040ddee in queue_channel_ranges (peer=0x3868fc8, first_blocknum=17290495, number_of_blocks=32) at gossipd/gossipd.c:596 ElementsProject#9 0x000000000040ddc6 in queue_channel_ranges (peer=0x3868fc8, first_blocknum=17290495, number_of_blocks=64) at gossipd/gossipd.c:595 ElementsProject#10 0x000000000040ddee in queue_channel_ranges (peer=0x3868fc8, first_blocknum=17290431, number_of_blocks=128) at gossipd/gossipd.c:596 ElementsProject#11 0x000000000040ddc6 in queue_channel_ranges (peer=0x3868fc8, first_blocknum=17290431, number_of_blocks=256) at gossipd/gossipd.c:595 ElementsProject#12 0x000000000040ddc6 in queue_channel_ranges (peer=0x3868fc8, first_blocknum=17290431, number_of_blocks=512) at gossipd/gossipd.c:595 ElementsProject#13 0x000000000040ddc6 in queue_channel_ranges (peer=0x3868fc8, first_blocknum=17290431, number_of_blocks=1024) at gossipd/gossipd.c:595 ElementsProject#14 0x000000000040ddee in queue_channel_ranges (peer=0x3868fc8, first_blocknum=17289408, number_of_blocks=2047) at gossipd/gossipd.c:596 ElementsProject#15 0x000000000040ddc6 in queue_channel_ranges (peer=0x3868fc8, first_blocknum=17289408, number_of_blocks=4095) at gossipd/gossipd.c:595 ElementsProject#16 0x000000000040ddc6 in queue_channel_ranges (peer=0x3868fc8, first_blocknum=17289408, number_of_blocks=8191) at gossipd/gossipd.c:595 ElementsProject#17 0x000000000040ddc6 in queue_channel_ranges (peer=0x3868fc8, first_blocknum=17289408, number_of_blocks=16382) at gossipd/gossipd.c:595 ElementsProject#18 0x000000000040ddc6 in queue_channel_ranges (peer=0x3868fc8, first_blocknum=17289408, number_of_blocks=32764) at gossipd/gossipd.c:595 ElementsProject#19 0x000000000040ddc6 in queue_channel_ranges (peer=0x3868fc8, first_blocknum=17289408, number_of_blocks=65528) at gossipd/gossipd.c:595 ElementsProject#20 0x000000000040ddc6 in queue_channel_ranges (peer=0x3868fc8, first_blocknum=17289408, number_of_blocks=131056) at gossipd/gossipd.c:595 ElementsProject#21 0x000000000040ddc6 in queue_channel_ranges (peer=0x3868fc8, first_blocknum=17289408, number_of_blocks=262112) at gossipd/gossipd.c:595 ElementsProject#22 0x000000000040ddc6 in queue_channel_ranges (peer=0x3868fc8, first_blocknum=17289408, number_of_blocks=524225) at gossipd/gossipd.c:595 ElementsProject#23 0x000000000040ddc6 in queue_channel_ranges (peer=0x3868fc8, first_blocknum=17289408, number_of_blocks=1048450) at gossipd/gossipd.c:595 ElementsProject#24 0x000000000040ddc6 in queue_channel_ranges (peer=0x3868fc8, first_blocknum=17289408, number_of_blocks=2096900) at gossipd/gossipd.c:595 ElementsProject#25 0x000000000040ddc6 in queue_channel_ranges (peer=0x3868fc8, first_blocknum=17289408, number_of_blocks=4193801) at gossipd/gossipd.c:595 ElementsProject#26 0x000000000040ddc6 in queue_channel_ranges (peer=0x3868fc8, first_blocknum=17289408, number_of_blocks=8387603) at gossipd/gossipd.c:595 ElementsProject#27 0x000000000040ddc6 in queue_channel_ranges (peer=0x3868fc8, first_blocknum=17289408, number_of_blocks=16775207) at gossipd/gossipd.c:595 ElementsProject#28 0x000000000040ddee in queue_channel_ranges (peer=0x3868fc8, first_blocknum=514201, number_of_blocks=33550414) at gossipd/gossipd.c:596 ElementsProject#29 0x000000000040ddc6 in queue_channel_ranges (peer=0x3868fc8, first_blocknum=514201, number_of_blocks=67100829) at gossipd/gossipd.c:595 ElementsProject#30 0x000000000040ddc6 in queue_channel_ranges (peer=0x3868fc8, first_blocknum=514201, number_of_blocks=134201659) at gossipd/gossipd.c:595 ElementsProject#31 0x000000000040ddc6 in queue_channel_ranges (peer=0x3868fc8, first_blocknum=514201, number_of_blocks=268403318) at gossipd/gossipd.c:595 ElementsProject#32 0x000000000040ddc6 in queue_channel_ranges (peer=0x3868fc8, first_blocknum=514201, number_of_blocks=536806636) at gossipd/gossipd.c:595 ElementsProject#33 0x000000000040ddc6 in queue_channel_ranges (peer=0x3868fc8, first_blocknum=514201, number_of_blocks=1073613273) at gossipd/gossipd.c:595 ElementsProject#34 0x000000000040ddc6 in queue_channel_ranges (peer=0x3868fc8, first_blocknum=514201, number_of_blocks=2147226547) at gossipd/gossipd.c:595 ElementsProject#35 0x000000000040ddc6 in queue_channel_ranges (peer=0x3868fc8, first_blocknum=514201, number_of_blocks=4294453094) at gossipd/gossipd.c:595 ElementsProject#36 0x000000000040df26 in handle_query_channel_range (peer=0x3868fc8, msg=0x37e0678 "\001\ao\342\214\n\266\361\263r\301\246\242F\256c\367O\223\036\203e\341Z\b\234h\326\031") at gossipd/gossipd.c:625 The cause was that converting a block number to an scid truncates it at 24 bits. When we look through the index from (truncated number) to (real end number) we get every channel, which is too large to encode, so we iterate again. This fixes both that problem, and also the issue that we'd end up dividing into many empty sections until we get to the highest block number. Instead, we just tack the empty blocks on to then end of the final query. (My initial version requested 0xFFFFFFFE blocks, but the dev code which records what blocks were returned can't make a bitmap that big on 32 bit). Reported-by: George Vaccaro Signed-off-by: Rusty Russell <[email protected]>
1 parent a49f6ce commit 36bfe3a

File tree

3 files changed

+89
-16
lines changed

3 files changed

+89
-16
lines changed

CHANGELOG.md

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,38 @@ All notable changes to this project will be documented in this file.
44
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
55
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
66

7+
<<<<<<< HEAD
8+
=======
9+
## [Unreleased]
10+
11+
12+
### Added
13+
14+
- plugins: fully enabled, and ready for you to write some!
15+
- lightning-cli: `help <cmd>` finds man pages even if `make install` not run.
16+
- JSON API: `waitsendpay` now has an `erring_direction` field.
17+
- JSON API: `listpeers` now has a `direction` field in `channels`.
18+
- JSON API: `listchannels` now takes a `source` option to filter by node id.
19+
20+
### Changed
21+
22+
- The `short_channel_id` separator has been changed to be `x` to match the specification.
23+
24+
### Deprecated
25+
26+
Note: You should always set `allow-deprecated-apis=false` to test for
27+
changes.
28+
29+
### Removed
30+
31+
### Fixed
32+
33+
- Protocol: handling `query_channel_range` for large numbers of blocks
34+
(eg. 4 billion) was slow due to a bug.
35+
36+
### Security
37+
38+
>>>>>>> 0ba547ee... gossipd: handle overflowing query properly (avoid slow 100% CPU reports)
739
## [0.6.3] - 2019-01-09: "The Smallblock Conspiracy"
840

941
This release named by @molxyz and [@ctrlbreak](https://twitter.com/ctrlbreak).

gossipd/gossipd.c

Lines changed: 44 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -659,9 +659,14 @@ static void reply_channel_range(struct peer *peer,
659659
/*~ When we need to send an array of channels, it might go over our 64k packet
660660
* size. If it doesn't, we recurse, splitting in two, etc. Each message
661661
* indicates what blocks it contains, so the recipient knows when we're
662-
* finished. */
662+
* finished.
663+
*
664+
* tail_blocks is the empty blocks at the end, in case they asked for all
665+
* blocks to 4 billion.
666+
*/
663667
static void queue_channel_ranges(struct peer *peer,
664-
u32 first_blocknum, u32 number_of_blocks)
668+
u32 first_blocknum, u32 number_of_blocks,
669+
u32 tail_blocks)
665670
{
666671
struct routing_state *rstate = peer->daemon->rstate;
667672
u8 *encoded = encode_short_channel_ids_start(tmpctx);
@@ -704,7 +709,8 @@ static void queue_channel_ranges(struct peer *peer,
704709

705710
/* If we can encode that, fine: send it */
706711
if (encode_short_channel_ids_end(&encoded, max_encoded_bytes)) {
707-
reply_channel_range(peer, first_blocknum, number_of_blocks,
712+
reply_channel_range(peer, first_blocknum,
713+
number_of_blocks + tail_blocks,
708714
encoded);
709715
return;
710716
}
@@ -717,22 +723,26 @@ static void queue_channel_ranges(struct peer *peer,
717723
first_blocknum);
718724
return;
719725
}
720-
status_debug("queue_channel_ranges full: splitting %u+%u and %u+%u",
726+
status_debug("queue_channel_ranges full: splitting %u+%u and %u+%u(+%u)",
721727
first_blocknum,
722728
number_of_blocks / 2,
723729
first_blocknum + number_of_blocks / 2,
724-
number_of_blocks - number_of_blocks / 2);
725-
queue_channel_ranges(peer, first_blocknum, number_of_blocks / 2);
730+
number_of_blocks - number_of_blocks / 2,
731+
tail_blocks);
732+
queue_channel_ranges(peer, first_blocknum, number_of_blocks / 2, 0);
726733
queue_channel_ranges(peer, first_blocknum + number_of_blocks / 2,
727-
number_of_blocks - number_of_blocks / 2);
734+
number_of_blocks - number_of_blocks / 2,
735+
tail_blocks);
728736
}
729737

730738
/*~ The peer can ask for all channels is a series of blocks. We reply with one
731739
* or more messages containing the short_channel_ids. */
732740
static u8 *handle_query_channel_range(struct peer *peer, const u8 *msg)
733741
{
742+
struct routing_state *rstate = peer->daemon->rstate;
734743
struct bitcoin_blkid chain_hash;
735-
u32 first_blocknum, number_of_blocks;
744+
u32 first_blocknum, number_of_blocks, tail_blocks;
745+
struct short_channel_id last_scid;
736746

737747
if (!fromwire_query_channel_range(msg, &chain_hash,
738748
&first_blocknum, &number_of_blocks)) {
@@ -751,14 +761,25 @@ static u8 *handle_query_channel_range(struct peer *peer, const u8 *msg)
751761
return NULL;
752762
}
753763

754-
/* This checks for 32-bit overflow! */
755-
if (first_blocknum + number_of_blocks < first_blocknum) {
756-
return towire_errorfmt(peer, NULL,
757-
"query_channel_range overflow %u+%u",
758-
first_blocknum, number_of_blocks);
759-
}
760-
761-
queue_channel_ranges(peer, first_blocknum, number_of_blocks);
764+
/* If they ask for number_of_blocks UINTMAX, and we have to divide
765+
* and conquer, we'll do a lot of unnecessary work. Cap it at the
766+
* last value we have, then send an empty reply. */
767+
if (uintmap_last(&rstate->chanmap, &last_scid.u64)) {
768+
u32 last_block = short_channel_id_blocknum(&last_scid);
769+
770+
/* u64 here avoids overflow on number_of_blocks
771+
UINTMAX for example */
772+
if ((u64)first_blocknum + number_of_blocks > last_block) {
773+
tail_blocks = first_blocknum + number_of_blocks
774+
- last_block - 1;
775+
number_of_blocks -= tail_blocks;
776+
} else
777+
tail_blocks = 0;
778+
} else
779+
tail_blocks = 0;
780+
781+
queue_channel_ranges(peer, first_blocknum, number_of_blocks,
782+
tail_blocks);
762783
return NULL;
763784
}
764785

@@ -2292,6 +2313,13 @@ static struct io_plan *query_channel_range(struct io_conn *conn,
22922313
goto fail;
22932314
}
22942315

2316+
/* Check for overflow on 32-bit machines! */
2317+
if (BITMAP_NWORDS(number_of_blocks) < number_of_blocks / BITMAP_WORD_BITS) {
2318+
status_broken("query_channel_range: huge number_of_blocks (%u) not supported",
2319+
number_of_blocks);
2320+
goto fail;
2321+
}
2322+
22952323
status_debug("sending query_channel_range for blocks %u+%u",
22962324
first_blocknum, number_of_blocks);
22972325
msg = towire_query_channel_range(NULL, &daemon->chain_hash,

tests/test_gossip.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -640,6 +640,9 @@ def test_gossip_query_channel_range(node_factory, bitcoind):
640640
first=0,
641641
num=1000000)
642642

643+
# Turns out it sends: 0+53, 53+26, 79+13, 92+7, 99+3, 102+2, 104+1, 105+999895
644+
l1.daemon.wait_for_logs([r'\[IN\] 0108'] * 8)
645+
643646
# It should definitely have split
644647
assert ret['final_first_block'] != 0 or ret['final_num_blocks'] != 1000000
645648
assert ret['final_complete']
@@ -648,6 +651,16 @@ def test_gossip_query_channel_range(node_factory, bitcoind):
648651
assert ret['short_channel_ids'][1] == scid23
649652
l2.daemon.wait_for_log('queue_channel_ranges full: splitting')
650653

654+
# Test overflow case doesn't split forever; should still only get 8 for this
655+
ret = l1.rpc.dev_query_channel_range(id=l2.info['id'],
656+
first=1,
657+
num=429496000)
658+
l1.daemon.wait_for_logs([r'\[IN\] 0108'] * 8)
659+
660+
# And no more!
661+
time.sleep(1)
662+
assert not l1.daemon.is_in_log(r'\[IN\] 0108', start=l1.daemon.logsearch_start)
663+
651664
# This should actually be large enough for zlib to kick in!
652665
l3.fund_channel(l4, 10**5)
653666
bitcoind.generate_block(5)

0 commit comments

Comments
 (0)