Skip to content
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
521124b
[NOMERGE] stub out tests with codegen issues temporarily
robell Oct 3, 2023
2a6dce5
Initial Ethos-U runtime backend
robell Oct 3, 2023
2083a71
Fixed error messages on runtime init
robell Oct 4, 2023
f5cff0b
lintrunner cleanup
robell Oct 4, 2023
a1fbdb4
Enable logging in Release mode
digantdesai Oct 4, 2023
918c798
Move arm example dir under backends
digantdesai Oct 4, 2023
b2a395a
Add option specify a list of ops
digantdesai Oct 4, 2023
862e510
Add softmax as another toy model
digantdesai Oct 4, 2023
69c132d
patches and utils for setting up baremetal stack for cs300
digantdesai Oct 4, 2023
e87db01
Add example script to run on cpu
digantdesai Oct 4, 2023
afd8080
Add ArmBackend to example scripts
robell Oct 4, 2023
a29523f
Add delegate test and FVP output
robell Oct 4, 2023
8efd2e3
align builds on same cmake toolchain
robell Oct 4, 2023
52c1c09
Fix delegate runner patch
robell Oct 4, 2023
8ae524c
cmake compiler and log behaviour fixing
robell Oct 4, 2023
a64e835
Minimal example of AoT with ArmPartitioner+Vela
robell Oct 5, 2023
ec03f3c
Generate pte for delegate test on the fly
robell Oct 5, 2023
1ba71d7
Added support for variable input output patterns
robell Oct 5, 2023
f29715e
Handle multiple delegate inputs with SRAM offsets
robell Oct 5, 2023
3b35ff6
Add TOSA ref model and Vela dependencies
robell Oct 5, 2023
94c598e
Cleanup from lintrunner and other bits of tidyup
robell Oct 5, 2023
683d428
Removed ethos u driver build and cmsis dependency
robell Oct 5, 2023
3292199
renamed lib ethos_u to executorch_delegate_ethos_u
robell Oct 5, 2023
e6ede01
lintfix
robell Oct 5, 2023
76a393a
tidied delegate_runner output
robell Oct 5, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,9 @@
[submodule "examples/third-party/llama"]
path = examples/third-party/llama
url = https://github.com/facebookresearch/llama.git
[submodule "backends/arm/third-party/ethos-u-core-driver"]
path = backends/arm/third-party/ethos-u-core-driver
url = https://git.mlplatform.org/ml/ethos-u/ethos-u-core-driver.git
[submodule "backends/arm/third-party/cmsis"]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is this used for?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are essentially the driver headers for ethos-u we need to link against.

I've also added a build of the ethos-u driver in executorch cmake (ethosu_core_driver target) but I moved to link against the demo app library build of it and can remove the target.

We still need this small submodule for the header/interface - it's fortunately small, but unfortunately it pulls in CMSIS for a few platform defines which is huge - this is really the area we discussed a build script pulling these in. Given ./run has a checkout of the core_platform and core_software, I'll look at moving to those and removing these submodules.

Copy link
Collaborator Author

@robell robell Oct 5, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've removed most of this, we just have the small ethos-u-core-driver for headers now, and don't build it in tree and cmsis is removed. The driver builds in the driver/app repo and that app links the two together.

as it's only 25 files i suggest we keep the driver as a submodule to keep things simple until there's a more general "selective submodule" approach.

path = backends/arm/third-party/cmsis
url = https://github.com/ARM-software/CMSIS_5.git
8 changes: 8 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,10 @@ if(BUILD_SELECTIVE_BUILD_TEST)
option(SELECT_OPS_YAML "Register all the ops from a given yaml file" OFF)
endif()

# Build Arm Baremetal backend
option(EXECUTORCH_BUILD_ARM_BAREMETAL
"Build the Arm Baremetal flow for Cortex-M and Ethos-U" OFF)

# Build xnn_executor_runner which depends on XNNPACK
option(EXECUTORCH_BUILD_XNNPACK
"Build xnn_executor_runner which depends on XNNPACK" OFF)
Expand Down Expand Up @@ -303,6 +307,10 @@ if(EXECUTORCH_BUILD_XNNPACK)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/xnnpack)
endif()

if(EXECUTORCH_BUILD_ARM_BAREMETAL)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/arm)
endif()

# Add selective build subdirectory
if(BUILD_SELECTIVE_BUILD_TEST)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/examples/selective_build)
Expand Down
25 changes: 25 additions & 0 deletions backends/arm/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Copyright 2023 Arm Limited and/or its affiliates.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
cmake_minimum_required(VERSION 3.19)

set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

# Source root directory for executorch.
if(NOT EXECUTORCH_ROOT)
set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..)
endif()

include(${EXECUTORCH_ROOT}/build/Utils.cmake)

set(_common_include_directories ${EXECUTORCH_ROOT}/..)
set(_common_compile_options -Wno-deprecated-declarations)

include(cmake/Dependencies.cmake)

set(_arm_baremetal_sources backends/arm/runtime/ArmBackendEthosU.cpp)
list(TRANSFORM _arm_baremetal_sources PREPEND "${EXECUTORCH_ROOT}/")
add_library(ethos_u STATIC ${_arm_baremetal_sources})
target_include_directories(ethos_u PUBLIC ${_common_include_directories})
target_include_directories(ethos_u PUBLIC ${DRIVER_ETHOSU_INCLUDE_DIR})
69 changes: 63 additions & 6 deletions backends/arm/arm_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import operator
import os
import tempfile
import subprocess
from typing import final, List

import numpy as np
Expand Down Expand Up @@ -142,6 +143,64 @@ def dbg_tosa_dump(tosa_fb, path):
f.write(js)
f.close()

# Output to Vela with current file-based compilation
# WARNING: if this changes, the runtime reader also needs to change
def vela_compile(tosa_fb):
with tempfile.TemporaryDirectory() as tmpdir:
print(f"compiling to Vela in {tmpdir}")

tosaname = "out.tosa"
flatbuffer = tosa_fb.serialize()
f = open(os.path.join(tmpdir,tosaname), "wb")
f.write(flatbuffer)
f.close()

# invoke vela
# TODO target ethos-u55-128
vela_command = f"cd {tmpdir}; vela --accelerator-config ethos-u55-128 {tosaname}"
subprocess.run([vela_command], shell=True, check=True)

np_path = os.path.join(tmpdir,"output","out_sg0_vela.npz")
blocks = b''
with np.load(np_path, allow_pickle=False) as data:
# Emit the NPZ regions as:
# - 16 byte block name null terminated string (padded to 16 if name shorter)
# - 4 byes of int32 block length and 12 bytes of 0's
# - block data (padded to 16 byte alignment at end)
# Repeat for all blocks
for key in data.keys():
block_name = bytes(key,"utf8")[:15]
block_name = block_name + b'\x00'*(16-len(block_name))
block_data = data[key].tobytes()
# We need the acual unpadded block lengths for hw setup
block_length = len(block_data).to_bytes(16, 'little')
# pad block data to multiple of 16 bytes
block_data = block_data + b'\x00'*(15-(len(block_data)-1)%16)

block = block_name + block_length + block_data
blocks = blocks + block

# Add a block for scratch, inputs and outputs
# scratch shape is a 1 element array giving us size in bytes
block_name = bytes("scratch_data","utf8")[:15]
block_name = block_name + b'\x00'*(16-len(block_name))
block_length = data["scratch_shape"][0].item()
print(f"scratch length = {block_length}")
block_length = block_length+(15-(block_length-1)%16)
block_data = b'\x00'*block_length
block_length = block_length.to_bytes(16, 'little')
print(f"lengths {len(block_name)} {len(block_length)} {len(block_data)}")
block = block_name + block_length + block_data
blocks = blocks + block
# TODO are these already in scratch shape? look to be
#input_shape * input_elem_size
#output_shape * output_elem_size
# input_offset and output_offset specify the location these arrays are written from base of scratch

# return 16 byte VELA bin header + blocks + footer
header = bytes("vela_bin_stream","utf-8") + b'\x00'
footer = bytes("vela_end_stream","utf-8") + b'\x00'
return header + blocks + footer

def dbg_fail(node, tosa_fb, path):
dbg_tosa_dump(tosa_fb, path)
Expand Down Expand Up @@ -240,10 +299,6 @@ def preprocess( # noqa: C901
path = spec.value.decode()
debug_output = True

# in non debug builds we still pass files to vela
if path is None:
path = tempfile.mkdtemp(prefix="arm_tosa_")

# Converted output for this subgraph, serializer needs path early as it emits
# const data directly. Path created and data written only in debug builds.
tosa_fb = ts.TosaSerializer(path)
Expand Down Expand Up @@ -881,5 +936,7 @@ def preprocess( # noqa: C901
dbg_tosa_dump(tosa_fb, path)

# Serialize and return the tosa flatbuffer
fb = tosa_fb.serialize()
return PreprocessResult(processed_bytes=bytes(fb))
# fb = bytes(tosa_fb.serialize())
binary = vela_compile(tosa_fb)

return PreprocessResult(processed_bytes=binary)
12 changes: 12 additions & 0 deletions backends/arm/cmake/Dependencies.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Copyright 2023 Arm Limited and/or its affiliates.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

set(THIRD_PARTY_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/third-party")

# Ethos-U driver
set(DRIVER_ETHOSU_SOURCE_DIR "${THIRD_PARTY_ROOT}/ethos-u-core-driver")
set(DRIVER_ETHOSU_INCLUDE_DIR "${THIRD_PARTY_ROOT}/ethos-u-core-driver/include")
add_subdirectory( ${DRIVER_ETHOSU_SOURCE_DIR} )
include_directories( ${DRIVER_ETHOSU_INCLUDE_DIR} )
90 changes: 90 additions & 0 deletions backends/arm/cmake/arm-none-eabi-gcc.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# Copyright 2023 Arm Limited and/or its affiliates.
Copy link
Contributor

@digantdesai digantdesai Oct 4, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this same as - core_platform/cmake/toolchain/arm-none-eabi-gcc.cmake? But will respect cmdline vars?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd anticipate further changes and cleaning but it was derived from there, yes.

It has a similar override but for m55 our only current supported target. The way the toolchain file is constructed, some of the general ./configure like step in cmake was invoking it without an appropriate value so the default prevents the -mfpu flag which was the default set on an m4 target messing things up.

#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

set(TARGET_CPU "cortex-m4" CACHE STRING "Target CPU")
string(TOLOWER ${TARGET_CPU} CMAKE_SYSTEM_PROCESSOR)

set(CMAKE_SYSTEM_NAME Generic)
set(CMAKE_C_COMPILER "arm-none-eabi-gcc")
set(CMAKE_CXX_COMPILER "arm-none-eabi-g++")
set(CMAKE_ASM_COMPILER "arm-none-eabi-gcc")
set(CMAKE_LINKER "arm-none-eabi-ld")

set(CMAKE_EXECUTABLE_SUFFIX ".elf")
set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)

# Select C/C++ version
set(CMAKE_C_STANDARD 11)
set(CMAKE_CXX_STANDARD 14)

set(GCC_CPU ${CMAKE_SYSTEM_PROCESSOR})
string(REPLACE "cortex-m85" "cortex-m55" GCC_CPU ${GCC_CPU})

# Compile options
add_compile_options(
-mcpu=${GCC_CPU}
-mthumb
"$<$<CONFIG:DEBUG>:-gdwarf-3>"
"$<$<COMPILE_LANGUAGE:CXX>:-fno-unwind-tables;-fno-rtti;-fno-exceptions>"
-fdata-sections
-ffunction-sections)

# Compile defines
add_compile_definitions(
"$<$<NOT:$<CONFIG:DEBUG>>:NDEBUG>")

# Link options
add_link_options(
-mcpu=${GCC_CPU}
-mthumb
--specs=nosys.specs)

# Set floating point unit
if(CMAKE_SYSTEM_PROCESSOR MATCHES "\\+fp")
set(FLOAT hard)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "\\+nofp")
set(FLOAT soft)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "cortex-m33(\\+|$)" OR
CMAKE_SYSTEM_PROCESSOR MATCHES "cortex-m55(\\+|$)" OR
CMAKE_SYSTEM_PROCESSOR MATCHES "cortex-m85(\\+|$)")
set(FLOAT hard)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "cortex-m4(\\+|$)" OR
CMAKE_SYSTEM_PROCESSOR MATCHES "cortex-m7(\\+|$)")
set(FLOAT hard)
set(FPU_CONFIG "fpv4-sp-d16")
add_compile_options(-mfpu=${FPU_CONFIG})
add_link_options(-mfpu=${FPU_CONFIG})
else()
set(FLOAT soft)
endif()

if (FLOAT)
add_compile_options(-mfloat-abi=${FLOAT})
add_link_options(-mfloat-abi=${FLOAT})
endif()

add_link_options(LINKER:--nmagic,--gc-sections)

# Compilation warnings
add_compile_options(
# -Wall
# -Wextra

# -Wcast-align
# -Wdouble-promotion
# -Wformat
# -Wmissing-field-initializers
# -Wnull-dereference
# -Wredundant-decls
# -Wshadow
# -Wswitch
# -Wswitch-default
# -Wunused
-Wno-redundant-decls
-Wno-psabi
)
53 changes: 53 additions & 0 deletions backends/arm/cmake/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/bin/bash
# Copyright 2023 Arm Limited and/or its affiliates.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
set -e

#
# Setup toolchain
#
BASEDIR=`realpath $(dirname "$0")`
echo "building using build.sh in $BASEDIR"

ARCH=$(uname -i)
GCCPATH=${BASEDIR}/arm-gnu-toolchain-12.3.rel1-${ARCH}-arm-none-eabi/bin/

echo $GCCPATH
if test -d "${GCCPATH}"; then
echo Using exising compiler ${GCCPATH}
else
pushd ${BASEDIR}/
./toolchain.sh
popd
fi
export PATH=${PATH}:${GCCPATH}

echo building with `arm-none-eabi-gcc -v 2>&1 | grep "^gcc"`


#
# Prepare and run clean build
#
rm -rf buck-out/ build/lib/ cmake-out/
rm -rf cmake-corstone
mkdir cmake-corstone
cd cmake-corstone

#cmake -DBUCK2=buck2 ..

#cmake --toolchain backends/arm/cmake/arm-none-eabi-gcc.cmake ..
cmake -DFLATC_EXECUTABLE=flatc \
-DEXECUTORCH_BUILD_XNNPACK=OFF \
-DEXECUTORCH_BUILD_HOST_TARGETS=OFF \
-DEXECUTORCH_BUILD_ARM_BAREMETAL=ON \
-DCMAKE_SYSTEM_PROCESSOR=cortex-m55+nodsp+nofp \
-DETHOSU_TARGET_NPU_CONFIG=ethos-u55-128 \
--toolchain backends/arm/cmake/arm-none-eabi-gcc.cmake \
-DCMAKE_BUILD_TYPE=Release \
-DEXECUTORCH_ENABLE_LOGGING_RELEASE_MODE=ON \
..

cd ..
cmake --build cmake-corstone -j9 --target ethos_u ethosu_core_driver executorch portable_ops_lib portable_kernels
12 changes: 12 additions & 0 deletions backends/arm/cmake/toolchain.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/bash
# Copyright 2023 Arm Limited and/or its affiliates.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
set -e

# Cross compiler for Arm baremetal (e.g. Corestone-300 FVP or silcon)
ARCH=$(uname -i)
curl -o gcc.tar.xz https://armkeil.blob.core.windows.net/developer/Files/downloads/gnu/12.3.rel1/binrel/arm-gnu-toolchain-12.3.rel1-${ARCH}-arm-none-eabi.tar.xz
tar xf gcc.tar.xz
export PATH=${PATH}:`(cd arm-gnu-toolchain-12.3.rel1-aarch64-arm-none-eabi/bin/; pwd)`
Loading