Skip to content

Initial framework of an ethos-u runtime backend #3 #659

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 25 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
225c115
Initial Ethos-U runtime backend
robell Oct 3, 2023
a7aa848
Fixed error messages on runtime init
robell Oct 4, 2023
f95fead
lintrunner cleanup
robell Oct 4, 2023
524afc6
Add ArmBackend to example scripts
robell Oct 4, 2023
2bfdb5f
Add delegate test and FVP output
robell Oct 4, 2023
8267612
Fix delegate runner patch
robell Oct 4, 2023
6b398fe
cmake compiler and log behaviour fixing
robell Oct 4, 2023
6ca9b26
Minimal example of AoT with ArmPartitioner+Vela
robell Oct 5, 2023
f40ab5f
Generate pte for delegate test on the fly
robell Oct 5, 2023
d10d620
Added support for variable input output patterns
robell Oct 5, 2023
b812898
Handle multiple delegate inputs with SRAM offsets
robell Oct 5, 2023
44a46a1
Add TOSA ref model and Vela dependencies
robell Oct 5, 2023
cb16e68
Cleanup from lintrunner and other bits of tidyup
robell Oct 5, 2023
8aa80d0
Removed ethos u driver build and cmsis dependency
robell Oct 5, 2023
3a5fd4f
renamed lib ethos_u to executorch_delegate_ethos_u
robell Oct 5, 2023
e676620
lintfix
robell Oct 5, 2023
4b1125e
tidied delegate_runner output
robell Oct 5, 2023
7afc5e4
Fixed some merge issues
robell Oct 6, 2023
e340b5c
Test fixes for compiler output choice
robell Oct 6, 2023
6b7a18a
review fixes
robell Oct 6, 2023
c453e43
review feedback/improvements
robell Oct 6, 2023
1917b54
tidy up example scripts
robell Oct 6, 2023
20bf2eb
further review comments
robell Oct 6, 2023
468d6fc
revised path for ethosu_minimal
robell Oct 6, 2023
709a688
lintfix
robell Oct 6, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,6 @@
[submodule "examples/demo-apps/android/jni/third-party/fbjni"]
path = examples/demo-apps/android/jni/third-party/fbjni
url = https://github.com/facebookincubator/fbjni.git
[submodule "backends/arm/third-party/ethos-u-core-driver"]
path = backends/arm/third-party/ethos-u-core-driver
url = https://git.mlplatform.org/ml/ethos-u/ethos-u-core-driver.git
7 changes: 7 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,13 @@ if(EXECUTORCH_BUILD_QNN)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/examples/qualcomm)
endif()

# Build Arm Baremetal backend
option(EXECUTORCH_BUILD_ARM_BAREMETAL
"Build the Arm Baremetal flow for Cortex-M and Ethos-U" OFF)
if(EXECUTORCH_BUILD_ARM_BAREMETAL)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/arm)
endif()

# Add selective build subdirectory
if(BUILD_SELECTIVE_BUILD_TEST)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/examples/selective_build)
Expand Down
36 changes: 36 additions & 0 deletions backends/arm/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Copyright 2023 Arm Limited and/or its affiliates.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
cmake_minimum_required(VERSION 3.19)

set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

# Source root directory for executorch.
if(NOT EXECUTORCH_ROOT)
set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..)
endif()

include(${EXECUTORCH_ROOT}/build/Utils.cmake)

set(_common_include_directories ${EXECUTORCH_ROOT}/..)

include(cmake/Dependencies.cmake)

set(_arm_baremetal_sources backends/arm/runtime/ArmBackendEthosU.cpp)
list(TRANSFORM _arm_baremetal_sources PREPEND "${EXECUTORCH_ROOT}/")

add_library(
executorch_delegate_ethos_u
STATIC ${_arm_baremetal_sources}
)
target_include_directories(
executorch_delegate_ethos_u
PUBLIC
${_common_include_directories}
)
target_include_directories(
executorch_delegate_ethos_u
PUBLIC
${DRIVER_ETHOSU_INCLUDE_DIR}
)
113 changes: 100 additions & 13 deletions backends/arm/arm_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
import logging
import operator
import os
import struct
import subprocess
import tempfile
from typing import final, List

Expand Down Expand Up @@ -136,13 +138,89 @@ def dbg_tosa_dump(tosa_fb, path):
fb = tosa_fb.serialize()
js = tosa_fb.writeJson(filename)

f = open(path + filename, "wb")
f.write(fb)
f.close()
with open(path + filename, "wb") as f:
f.write(fb)

f = open(path + "desc.json", "w")
f.write(js)
f.close()
with open(path + "desc.json", "w") as f:
f.write(js)


# Output to Vela with current file-based compilation
# WARNING: if this changes, the runtime reader also needs to change
def vela_compile(tosa_fb):
with tempfile.TemporaryDirectory() as tmpdir:
tosaname = "out.tosa"
flatbuffer = tosa_fb.serialize()
with open(os.path.join(tmpdir, tosaname), "wb") as f:
f.write(flatbuffer)

# invoke vela
vela_command = (
f"cd {tmpdir}; vela --accelerator-config ethos-u55-128 {tosaname}"
)
subprocess.run([vela_command], shell=True, check=True)

np_path = os.path.join(tmpdir, "output", "out_sg0_vela.npz")
blocks = b""
with np.load(np_path, allow_pickle=False) as data:
# Emit the NPZ regions as:
# - 16 byte block name null terminated string (padded to 16 if name shorter)
# - 4 bytes of int32 block length and 12 bytes of 0's
# - block data (padded to 16 byte alignment at end)
# Repeat for all blocks
for key in data.keys():
block_name = bytes(key, "utf8")[:15]
block_name = block_name + b"\x00" * (16 - len(block_name))

block_data = b""
if key in ("input_shape", "output_shape"):
inputs = data[key]
# Encode a struct of int len; and one or more int x,y,z,w shape;
input_struct = struct.pack("<i", len(inputs))
for inp in inputs:
assert len(inp) <= 4
inp_pad = inp.tolist() + [0] * (4 - len(inp))
input_struct = input_struct + struct.pack("<iiii", *inp_pad)
block_data = input_struct
elif key in ("input_offset", "output_offset"):
inputs = data[key]
if key == "output_offset" and len(inputs) > 1:
raise RuntimeError(
"Currently only support one output in Vela ArmBackend"
)
offset_struct = struct.pack("<i", len(inputs))
for inp in inputs:
offset_struct = offset_struct + struct.pack("<i", inp)
block_data = offset_struct
else:
block_data = data[key].tobytes()
# We need the acual unpadded block lengths for hw setup
block_length = len(block_data).to_bytes(16, "little")
# pad block data to multiple of 16 bytes
block_data = block_data + b"\x00" * (15 - (len(block_data) - 1) % 16)

block = block_name + block_length + block_data
blocks = blocks + block

# Add a block for scratch, inputs and outputs
# scratch shape is a 1 element array giving us size in bytes
block_name = bytes("scratch_data", "utf8")[:15]
block_name = block_name + b"\x00" * (16 - len(block_name))
block_length = data["scratch_shape"][0].item()
block_length = block_length + (15 - (block_length - 1) % 16)
block_data = b"\x00" * block_length
block_length = block_length.to_bytes(16, "little")
block = block_name + block_length + block_data
blocks = blocks + block
# TODO are these already in scratch shape? look to be
# input_shape * input_elem_size
# output_shape * output_elem_size
# input_offset and output_offset specify the location these arrays are written from base of scratch

# return 16 byte VELA bin header + blocks + footer
header = bytes("vela_bin_stream", "utf-8") + b"\x00"
footer = bytes("vela_end_stream", "utf-8") + b"\x00"
return header + blocks + footer


def dbg_fail(node, tosa_fb, path):
Expand Down Expand Up @@ -237,14 +315,13 @@ def preprocess( # noqa: C901
# if a debug/test build capture output files from TOSA stage
path = None
debug_output = False
output_format = "vela"
for spec in compile_spec:
if spec.key == "debug_tosa_path":
path = spec.value.decode()
debug_output = True

# in non debug builds we still pass files to vela
if path is None:
path = tempfile.mkdtemp(prefix="arm_tosa_")
if spec.key == "output_format":
output_format = spec.value.decode()

# Converted output for this subgraph, serializer needs path early as it emits
# const data directly. Path created and data written only in debug builds.
Expand Down Expand Up @@ -890,6 +967,16 @@ def preprocess( # noqa: C901
if debug_output is True:
dbg_tosa_dump(tosa_fb, path)

# Serialize and return the tosa flatbuffer
fb = tosa_fb.serialize()
return PreprocessResult(processed_bytes=bytes(fb))
# Serialize and return the program. While we have always produced TOSA
# output as an intermediate, some flows compile to device binaries in
# preprocess and some consume TOSA fb directly.
if output_format == "vela":
# Emit vela_bin_stream format
binary = vela_compile(tosa_fb)
elif output_format == "tosa":
# Emit TOSA flatbuffer
binary = bytes(tosa_fb.serialize())
else:
raise RuntimeError(f"Unknown format {output_format}")

return PreprocessResult(processed_bytes=binary)
10 changes: 10 additions & 0 deletions backends/arm/cmake/Dependencies.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Copyright 2023 Arm Limited and/or its affiliates.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

set(THIRD_PARTY_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/third-party")

# Ethos-U driver
set(DRIVER_ETHOSU_INCLUDE_DIR "${THIRD_PARTY_ROOT}/ethos-u-core-driver/include")
include_directories( ${DRIVER_ETHOSU_INCLUDE_DIR} )
53 changes: 53 additions & 0 deletions backends/arm/cmake/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/bin/bash
# Copyright 2023 Arm Limited and/or its affiliates.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
set -e

#
# Setup toolchain
#
BASEDIR=`realpath $(dirname "$0")`
echo "building using build.sh in $BASEDIR"

ARCH=$(uname -i)
GCCPATH=${BASEDIR}/arm-gnu-toolchain-12.3.rel1-${ARCH}-arm-none-eabi/bin/

echo $GCCPATH
if test -d "${GCCPATH}"; then
echo Using exising compiler ${GCCPATH}
else
pushd ${BASEDIR}/
./toolchain.sh
popd
fi
export PATH=${PATH}:${GCCPATH}

echo building with `arm-none-eabi-gcc -v 2>&1 | grep "^gcc"`


#
# Prepare and run clean build
#
rm -rf buck-out/ build/lib/ cmake-out/
rm -rf cmake-corstone
mkdir cmake-corstone
cd cmake-corstone

#cmake -DBUCK2=buck2 ..

#cmake --toolchain backends/arm/cmake/arm-none-eabi-gcc.cmake ..
cmake -DFLATC_EXECUTABLE=flatc \
-DEXECUTORCH_BUILD_XNNPACK=OFF \
-DEXECUTORCH_BUILD_HOST_TARGETS=OFF \
-DEXECUTORCH_BUILD_ARM_BAREMETAL=ON \
-DCMAKE_SYSTEM_PROCESSOR=cortex-m55+nodsp+nofp \
-DETHOSU_TARGET_NPU_CONFIG=ethos-u55-128 \
--toolchain backends/arm/cmake/arm-none-eabi-gcc.cmake \
-DCMAKE_BUILD_TYPE=Release \
-DEXECUTORCH_ENABLE_LOGGING_RELEASE_MODE=ON \
..

cd ..
cmake --build cmake-corstone -j9 --target ethos_u ethosu_core_driver executorch portable_ops_lib portable_kernels
12 changes: 12 additions & 0 deletions backends/arm/cmake/toolchain.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/bash
# Copyright 2023 Arm Limited and/or its affiliates.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
set -e

# Cross compiler for Arm baremetal (e.g. Corestone-300 FVP or silcon)
ARCH=$(uname -i)
curl -o gcc.tar.xz https://armkeil.blob.core.windows.net/developer/Files/downloads/gnu/12.3.rel1/binrel/arm-gnu-toolchain-12.3.rel1-${ARCH}-arm-none-eabi.tar.xz
tar xf gcc.tar.xz
export PATH=${PATH}:`(cd arm-gnu-toolchain-12.3.rel1-aarch64-arm-none-eabi/bin/; pwd)`
Loading