diff --git a/extension/pytree/pytree.h b/extension/pytree/pytree.h index b0f2ca1fab6..6bceaf9e917 100644 --- a/extension/pytree/pytree.h +++ b/extension/pytree/pytree.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -60,7 +61,7 @@ struct Key { std::variant repr_; public: - Key() {} + Key() = default; /*implicit*/ Key(KeyInt key) : repr_(key) {} /*implicit*/ Key(KeyStr key) : repr_(std::move(key)) {} @@ -131,7 +132,7 @@ struct ContainerHandle { using leaf_type = T; std::unique_ptr handle; - ContainerHandle() {} + ContainerHandle() = default; template ContainerHandle(Args... args) @@ -427,6 +428,22 @@ struct arr { return data_[idx]; } + T& at(size_t idx) { + if (idx >= size()) { + throw std::out_of_range( + "bounds check failed in pytree arr at index " + std::to_string(idx)); + } + return data_[idx]; + } + + const T& at(size_t idx) const { + if (idx >= size()) { + throw std::out_of_range( + "bounds check failed in pytree arr at index " + std::to_string(idx)); + } + return data_[idx]; + } + inline T* data() { return data_.get(); } @@ -458,7 +475,7 @@ struct arr { inline size_t read_number(const StrTreeSpec& spec, size_t& read_idx) { size_t num = 0; - while (isdigit(spec[read_idx])) { + while (isdigit(spec.at(read_idx))) { num = 10 * num + (spec[read_idx] - '0'); read_idx++; } @@ -470,19 +487,22 @@ inline arr read_node_layout(const StrTreeSpec& spec, size_t& read_idx) { arr ret(child_num); size_t child_idx = 0; - while (spec[read_idx] == Config::kChildrenDataSep) { + while (spec.at(read_idx) == Config::kChildrenDataSep) { ++read_idx; - ret[child_idx++] = read_number(spec, read_idx); + ret.at(child_idx++) = read_number(spec, read_idx); } return ret; } +// spec_data comes from pre_parse, which guarantees 1) +// spec_data.size() == spec.size() and 2) contents of spec_data are +// in-bounds indices for spec, so we omit bounds checks for spec_data. template TreeSpec from_str_internal( const StrTreeSpec& spec, size_t read_idx, const arr& spec_data) { - const auto kind_char = spec[read_idx]; + const auto kind_char = spec.at(read_idx); switch (kind_char) { case Config::kTuple: case Config::kNamedTuple: @@ -496,7 +516,7 @@ TreeSpec from_str_internal( } else if (Config::kCustom == kind_char) { kind = Kind::Custom; read_idx++; - assert(spec[read_idx] == '('); + assert(spec.at(read_idx) == '('); auto type_str_end = spec_data[read_idx]; read_idx++; custom_type = spec.substr(read_idx, type_str_end - read_idx); @@ -515,10 +535,15 @@ TreeSpec from_str_internal( size_t leaves_offset = 0; if (size > 0) { - while (spec[read_idx] != Config::kNodeDataEnd) { + while (spec.at(read_idx) != Config::kNodeDataEnd) { // NOLINTNEXTLINE auto next_delim_idx = spec_data[read_idx]; read_idx++; + if (child_idx >= size) { + throw std::out_of_range( + "bounds check failed writing to pytree item at index " + + std::to_string(child_idx)); + } c->items[child_idx] = from_str_internal(spec, read_idx, spec_data); read_idx = next_delim_idx; @@ -541,11 +566,16 @@ TreeSpec from_str_internal( size_t leaves_offset = 0; if (size > 0) { - while (spec[read_idx] != Config::kNodeDataEnd) { + while (spec.at(read_idx) != Config::kNodeDataEnd) { // NOLINTNEXTLINE auto next_delim_idx = spec_data[read_idx]; read_idx++; - if (spec[read_idx] == Config::kDictStrKeyQuote) { + if (child_idx >= size) { + throw std::out_of_range( + "bounds check failed decoding pytree dict at index " + + std::to_string(child_idx)); + } + if (spec.at(read_idx) == Config::kDictStrKeyQuote) { auto key_delim_idx = spec_data[read_idx]; read_idx++; const size_t key_len = key_delim_idx - read_idx; @@ -562,7 +592,7 @@ TreeSpec from_str_internal( c->items[child_idx] = from_str_internal(spec, read_idx, spec_data); read_idx = next_delim_idx; - leaves_offset += layout[child_idx++]; + leaves_offset += layout.at(child_idx++); } } else { read_idx++; @@ -605,7 +635,9 @@ struct stack final { } }; +// We guarantee indicies in the result are in bounds. inline arr pre_parse(const StrTreeSpec& spec) { + // Invariant: indices in stack are in bounds. stack> stack; size_t i = 0; const size_t size = spec.size(); @@ -627,11 +659,16 @@ inline arr pre_parse(const StrTreeSpec& spec) { case Config::kDictStrKeyQuote: { size_t idx = i; i++; - while (spec[i] != Config::kDictStrKeyQuote) { + while (spec.at(i) != Config::kDictStrKeyQuote) { i++; } - ret[idx] = i; - ret[i] = idx; + if (i >= size) { + throw std::out_of_range( + "bounds check failed while parsing dictionary key at index " + + std::to_string(i)); + } + ret.at(idx) = i; + ret.at(i) = idx; break; } case Config::kChildrenSep: { diff --git a/extension/pytree/test/test_pytree.cpp b/extension/pytree/test/test_pytree.cpp index 885481b87cd..cde8b3c2d64 100644 --- a/extension/pytree/test/test_pytree.cpp +++ b/extension/pytree/test/test_pytree.cpp @@ -22,6 +22,7 @@ using Leaf = int32_t; TEST(PyTreeTest, ArrBasic) { arr x(5); ASSERT_EQ(x.size(), 5); + EXPECT_THROW(x.at(5), std::out_of_range); for (int ii = 0; ii < x.size(); ++ii) { x[ii] = 2 * ii; } @@ -197,3 +198,22 @@ TEST(pytree, FlattenNestedDict) { ASSERT_EQ(*leaves[i], items[i]); } } + +TEST(pytree, EmptySpec) { + Leaf items[1] = {9}; + EXPECT_THROW(unflatten("", items), std::out_of_range); +} + +TEST(pytree, BoundsCheckListLayout) { + // Malformed: layout one child, have two + std::string spec = "L1#1($,$)"; + Leaf items[2] = {11, 12}; + EXPECT_THROW(unflatten(spec, items), std::out_of_range); +} + +TEST(pytree, BoundsCheckDictLayout) { + // Malformed: layout one child, have two. + std::string spec = "D1#1('key0':$,'key1':$)"; + Leaf items[2] = {11, 12}; + EXPECT_THROW(unflatten(spec, items), std::out_of_range); +}