diff --git a/extension/flat_tensor/serialize/flat_tensor_header.cpp b/extension/flat_tensor/serialize/flat_tensor_header.cpp new file mode 100644 index 00000000000..fe1db318e97 --- /dev/null +++ b/extension/flat_tensor/serialize/flat_tensor_header.cpp @@ -0,0 +1,114 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +#include +#include + +#include +#include + +#pragma clang diagnostic ignored "-Wdeprecated" + +namespace executorch { +using runtime::Error; +using runtime::Result; +namespace extension { +namespace { + +/// The expected location of the header length field relative to the beginning +/// of the header. +static constexpr size_t kHeaderLengthOffset = FlatTensorHeader::kMagicSize; + +/// The expected location of the flatbuffer_offset field relative to the +/// beginning of the header. +static constexpr size_t kHeaderFlatbufferStartOffset = + kHeaderLengthOffset + sizeof(uint32_t); + +/// The expected location of the flatbuffer_size field relative to the beginning +/// of the header. +static constexpr size_t kHeaderFlatbufferSizeOffset = + kHeaderFlatbufferStartOffset + sizeof(uint64_t); + +/// The expected location of the segment_base_offset field relative to the +/// beginning of the header. +static constexpr size_t kHeaderSegmentBaseOffsetOffset = + kHeaderFlatbufferSizeOffset + sizeof(uint64_t); + +/// The expected location of the segment_data_size field relative to the +/// beginning of the header. +static constexpr size_t kHeaderSegmentDataSizeOffset = + kHeaderSegmentBaseOffsetOffset + sizeof(uint64_t); + +/** + * The size of the header that covers the fields known of by this version of + * the code. It's ok for a header to be larger as long as the fields stay in + * the same place, but this code will ignore any new fields. + */ +static constexpr size_t kMinimumHeaderLength = + kHeaderSegmentDataSizeOffset + sizeof(uint64_t); + +/// Interprets the 4 bytes at `data` as a little-endian uint32_t. +uint32_t GetUInt32LE(const uint8_t* data) { + return (uint32_t)data[0] | ((uint32_t)data[1] << 8) | + ((uint32_t)data[2] << 16) | ((uint32_t)data[3] << 24); +} + +/// Interprets the 8 bytes at `data` as a little-endian uint64_t. +uint64_t GetUInt64LE(const uint8_t* data) { + return (uint64_t)data[0] | ((uint64_t)data[1] << 8) | + ((uint64_t)data[2] << 16) | ((uint64_t)data[3] << 24) | + ((uint64_t)data[4] << 32) | ((uint64_t)data[5] << 40) | + ((uint64_t)data[6] << 48) | ((uint64_t)data[7] << 56); +} + +} // namespace + +/* static */ Result FlatTensorHeader::Parse( + const void* data, + size_t size) { + if (size < FlatTensorHeader::kNumHeadBytes) { + return Error::InvalidArgument; + } + const uint8_t* header = reinterpret_cast(data); + + // Check magic bytes. + if (std::memcmp( + header, FlatTensorHeader::kMagic, FlatTensorHeader::kMagicSize) != + 0) { + return Error::NotFound; + } + + // Check header length. + uint32_t header_length = GetUInt32LE(header + kHeaderLengthOffset); + if (header_length < kMinimumHeaderLength) { + ET_LOG( + Error, + "FlatTensor header length %" PRIu32 " < %zu", + header_length, + kMinimumHeaderLength); + return Error::InvalidExternalData; + } + + // The header is present and apparently valid. + return FlatTensorHeader{ + /*flatbuffer_offset=*/GetUInt64LE(header + kHeaderFlatbufferStartOffset), + /*flatbuffer_size=*/GetUInt64LE(header + kHeaderFlatbufferSizeOffset), + /*segment_base_offset=*/ + GetUInt64LE(header + kHeaderSegmentBaseOffsetOffset), + /*segment_data_size=*/GetUInt64LE(header + kHeaderSegmentDataSizeOffset), + }; +} + +// Define storage for the static. +// @lint-ignore CLANGTIDY facebook-hte-CArray +constexpr char FlatTensorHeader::kMagic[kMagicSize]; + +} // namespace extension +} // namespace executorch diff --git a/extension/flat_tensor/serialize/flat_tensor_header.h b/extension/flat_tensor/serialize/flat_tensor_header.h new file mode 100644 index 00000000000..08b87a5aa4c --- /dev/null +++ b/extension/flat_tensor/serialize/flat_tensor_header.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include + +namespace executorch { +namespace extension { + +/** + * A FlatTensor header found at the beginning of a flat_tensor-serialized blob. + */ +struct FlatTensorHeader { + /** + * To find the header, callers should provide at least this many bytes of the + * head of the serialized FlatTensor data. + */ + static constexpr size_t kNumHeadBytes = 64; + + /** + * The magic bytes that identify the header. This should be in sync with + * the magic in executorch/extension/flat_tensor/serialize/serialize.py + * + * This is the canonical definition of the expected value. If the header + * layout ever changes in a compatibility-breaking way, increment the digits + * in the magic. But, doing so will prevent older binaries from recognizing + * the presence of the header. The compatibility-preserving way to make + * changes is to increase the header's length field and add new fields at the + * end. + */ + static constexpr size_t kMagicSize = 4; + // @lint-ignore CLANGTIDY facebook-hte-CArray + static constexpr char kMagic[kMagicSize] = {'F', 'H', '0', '1'}; + + /** + * Look for and parse a FlatTensorHeader in the provided data. + * + * @param[in] data The contents of the beginning of the serialized binary + * FlatTensor data, starting at offset 0 (i.e., the head of the file). + * @param[in] size Length of `data` in bytes. Must be >= kNumHeadBytes or this + * call will fail. + * + * @returns a FlatTensorHeader if the header was found and is valid. Returns + * an error if size was too short, if the header was not found, or if the + * header appeared to be corrupt. + */ + static runtime::Result Parse(const void* data, size_t size); + + /// Offset of the FlatTensor flatbuffer in the serialized binary. + uint64_t flatbuffer_offset; + + /// Size of the flatbuffer in bytes. + uint64_t flatbuffer_size; + + /// The offset in bytes of the first segment. + uint64_t segment_base_offset; + + /// Size of all the segment data, in bytes. + uint64_t segment_data_size; +}; + +} // namespace extension +} // namespace executorch diff --git a/extension/flat_tensor/serialize/serialize.py b/extension/flat_tensor/serialize/serialize.py index 6a07892eb5d..8857f1f5bbc 100644 --- a/extension/flat_tensor/serialize/serialize.py +++ b/extension/flat_tensor/serialize/serialize.py @@ -93,6 +93,8 @@ class FlatTensorConfig: class FlatTensorHeader: # Class constants. # The magic bytes that should be at the beginning of the header. + # This should be in sync with the magic in + # executorch/extension/flat_tensor/serialize/flat_tensor_header.h EXPECTED_MAGIC: ClassVar[bytes] = b"FH01" EXPECTED_LENGTH: ClassVar[int] = ( # Header magic diff --git a/extension/flat_tensor/serialize/targets.bzl b/extension/flat_tensor/serialize/targets.bzl index be0460147a3..cdafba55d99 100644 --- a/extension/flat_tensor/serialize/targets.bzl +++ b/extension/flat_tensor/serialize/targets.bzl @@ -34,3 +34,11 @@ def define_common_targets(): }, exported_external_deps = ["flatbuffers-api"], ) + + runtime.cxx_library( + name = "flat_tensor_header", + srcs = ["flat_tensor_header.cpp"], + exported_headers = ["flat_tensor_header.h"], + visibility = ["//executorch/..."], + exported_deps = ["//executorch/runtime/core:core"], + ) diff --git a/extension/flat_tensor/test/TARGETS b/extension/flat_tensor/test/TARGETS index 6f708ae8489..c9989b67554 100644 --- a/extension/flat_tensor/test/TARGETS +++ b/extension/flat_tensor/test/TARGETS @@ -1,7 +1,13 @@ +# Any targets that should be shared between fbcode and xplat must be defined in +# targets.bzl. This file can contain fbcode-only targets. + load("@fbcode_macros//build_defs:python_unittest.bzl", "python_unittest") +load(":targets.bzl", "define_common_targets") oncall("executorch") +define_common_targets() + python_unittest( name = "serialize", srcs = [ diff --git a/extension/flat_tensor/test/flat_tensor_header_test.cpp b/extension/flat_tensor/test/flat_tensor_header_test.cpp new file mode 100644 index 00000000000..d8d95686515 --- /dev/null +++ b/extension/flat_tensor/test/flat_tensor_header_test.cpp @@ -0,0 +1,92 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +#include + +#include +#include + +using namespace ::testing; +using executorch::extension::FlatTensorHeader; +using executorch::runtime::Error; +using executorch::runtime::Result; + +class FlatTensorHeaderTest : public ::testing::Test { + protected: + void SetUp() override { + // Since these tests cause ET_LOG to be called, the PAL must be initialized + // first. + executorch::runtime::runtime_init(); + } +}; + +/** + * An example, valid flat_tensor header. + * + * This data is intentionally fragile. If the header layout or magic changes, + * this test data must change too. The layout of the header is a contract, not + * an implementation detail. + */ +// clang-format off +// @lint-ignore CLANGTIDY facebook-hte-CArray +constexpr char kExampleHeaderData[] = { + // Magic bytes + 'F', 'H', '0', '1', + // uint32_t header size (little endian) + 0x28, 0x00, 0x00, 0x00, + // uint64_t flatbuffer_offset + 0x71, 0x61, 0x51, 0x41, 0x31, 0x21, 0x11, 0x01, + // uint64_t flatbuffer_size + 0x72, 0x62, 0x52, 0x42, 0x32, 0x22, 0x12, 0x02, + // uint64_t segment_base_offset + 0x73, 0x63, 0x53, 0x43, 0x33, 0x23, 0x13, 0x03, + // uint64_t segment_data_size + 0x74, 0x64, 0x54, 0x44, 0x34, 0x24, 0x14, 0x04, +}; + +constexpr uint64_t kExampleFlatbufferOffset = 0x0111213141516171; +constexpr uint64_t kExampleFlatbufferSize = 0x0212223242526272; +constexpr uint64_t kExampleSegmentBaseOffset = 0x0313233343536373; +constexpr uint64_t kExampleSegmentDataSize = 0x0414243444546474; + +/** + * Returns fake serialized FlatTensor data that contains kExampleHeaderData at + * the expected offset. + */ +std::vector CreateExampleFlatTensorHeader() { + // Allocate memory representing the FlatTensor header. + std::vector ret(FlatTensorHeader::kNumHeadBytes); + // Write non-zeros into it to make it more obvious if we read outside the + // header. + memset(ret.data(), 0x55, ret.size()); + // Copy the example header into the right offset. + memcpy( + ret.data(), + kExampleHeaderData, + sizeof(kExampleHeaderData)); + return ret; +} + +TEST_F(FlatTensorHeaderTest, ValidHeaderParsesCorrectly) { + std::vector flat_tensor = CreateExampleFlatTensorHeader(); + + Result header = FlatTensorHeader::Parse(flat_tensor.data(), flat_tensor.size()); + + // The header should be present. + ASSERT_EQ(header.error(), Error::Ok); + + // Since each byte of these fields is unique, success demonstrates that the + // endian-to-int conversion is correct and looks at the expected bytes of the + // header. + EXPECT_EQ(header->flatbuffer_offset, kExampleFlatbufferOffset); + EXPECT_EQ(header->flatbuffer_size, kExampleFlatbufferSize); + EXPECT_EQ(header->segment_base_offset, kExampleSegmentBaseOffset); + EXPECT_EQ(header->segment_data_size, kExampleSegmentDataSize); +} diff --git a/extension/flat_tensor/test/targets.bzl b/extension/flat_tensor/test/targets.bzl new file mode 100644 index 00000000000..0c08106a5c5 --- /dev/null +++ b/extension/flat_tensor/test/targets.bzl @@ -0,0 +1,18 @@ +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") + +def define_common_targets(): + """Defines targets that should be shared between fbcode and xplat. + + The directory containing this targets.bzl file should also contain both + TARGETS and BUCK files that call this function. + """ + + runtime.cxx_test( + name = "flat_tensor_header_test", + srcs = [ + "flat_tensor_header_test.cpp", + ], + deps = [ + "//executorch/extension/flat_tensor/serialize:flat_tensor_header", + ], + ) diff --git a/runtime/core/error.h b/runtime/core/error.h index 4babb4d148f..cdf6303a650 100644 --- a/runtime/core/error.h +++ b/runtime/core/error.h @@ -79,6 +79,9 @@ enum class Error : error_code_t { /// Error caused by the contents of a program. InvalidProgram = 0x23, + /// Error caused by the contents of external data. + InvalidExternalData = 0x24, + /* * Delegate errors. */