Skip to content

Commit f93bc1e

Browse files
committed
[XNNPACK][Weights Cache] Initial Weights Cache Design with NamedDataMap
Pull Request resolved: #9154 XNNWeightsCache Design with NamedDataMap. The intent of the weights cache is for tensors to be loaded (via name) through the named data map. APIs to be used by XNNCompiler: - load_unpacked_data - Takes in a string name (tensor name). The weights cache loads the data for this string from the named data map and returns the pointer. It also creates a mapping of this pointer to the name which is later used by the XNNPACK's internal weight cache implementation - free_unpacked_data - Frees all the unpacked data loaded from NamedDataMap. This is only safe to call after xnn_create_runtime has been called. This is because create_runtime takes unpacked data pointers and packs them into a separate buffer. - a couple getter methods - get_packed_data_names - get_unpacked_data_names - get_num_packed_data - get() (get's the xnn_weights_cache object) Internal APIs used by XNNPACK Library - look_up - takes a cache key (weight and bias pointers) and looks up the offset to the packed weight if it exists - look_up_or_insert - takes a cache key and pointer to packed weights and looks_up the offset if it exists, or inserts a new packed weight into the cache and returns that offset - offset_to_addr - gets offset and returns address to packed pointer - reserve_space - returns memory address with appropriate sie for XNNPACK to populate with packed weights ( I want to use the runtime_allocator for this but i don't think we have the right sizes, so for now we are just using a string buffer and resizing it) - is_finalized - since this cache doesn't necessarily need to care about a finalized state we always return true. - delete_cache - deletes cache ghstack-source-id: 271090605 @exported-using-ghexport Differential Revision: [D70885917](https://our.internmc.facebook.com/intern/diff/D70885917/)
1 parent d83495c commit f93bc1e

File tree

6 files changed

+691
-1
lines changed

6 files changed

+691
-1
lines changed
Lines changed: 227 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,227 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#include <executorch/backends/xnnpack/runtime/XNNWeightsCache.h>
10+
#include <executorch/runtime/core/memory_allocator.h>
11+
#include <executorch/runtime/core/error.h>
12+
#include <sys/stat.h>
13+
#include <xnnpack.h>
14+
namespace executorch {
15+
namespace backends {
16+
namespace xnnpack {
17+
namespace delegate {
18+
19+
using executorch::runtime::MemoryAllocator;
20+
using executorch::runtime::NamedDataMap;
21+
22+
XNNWeightsCache::XNNWeightsCache()
23+
{
24+
weights_cache_.context = this;
25+
weights_cache_.look_up = (size_t(*)(
26+
void*, const xnn_weights_cache_look_up_key*))XNNWeightsCache::look_up;
27+
weights_cache_.reserve_space =
28+
(void* (*)(void*, size_t))XNNWeightsCache::reserve_space;
29+
weights_cache_.look_up_or_insert =
30+
(size_t(*)(void*, const xnn_weights_cache_look_up_key*, void*, size_t))
31+
XNNWeightsCache::look_up_or_insert;
32+
weights_cache_.is_finalized = (bool (*)(void*))XNNWeightsCache::is_finalized;
33+
weights_cache_.offset_to_addr =
34+
(void* (*)(void*, size_t))XNNWeightsCache::offset_to_addr;
35+
weights_cache_.delete_cache =
36+
(enum xnn_status(*)(void*))XNNWeightsCache::delete_cache;
37+
}
38+
39+
Error XNNWeightsCache::initialize_for_runtime(
40+
MemoryAllocator* runtime_allocator,
41+
const NamedDataMap* named_data_map)
42+
{
43+
44+
runtime_allocator_ = runtime_allocator;
45+
named_data_map_ = named_data_map;
46+
is_finalized_ = false;
47+
48+
return Error::Ok;
49+
}
50+
51+
Result<std::vector<std::string>> XNNWeightsCache::finalize_for_runtime(){
52+
is_finalized_ = true;
53+
54+
// All data has been packed by create_runtime
55+
// so we clear the unpacked data as it is no longer needed
56+
for (FreeableBuffer& buffer : unpacked_data_){
57+
buffer.Free();
58+
}
59+
unpacked_data_.clear();
60+
unpacked_data_to_name_.clear();
61+
62+
std::vector<std::string> packed_data_names;
63+
// update the reference count of all the packed data
64+
// used by this runtime
65+
for (auto& entry : name_to_packed_data_metadata_){
66+
if (entry.second.in_current_runtime){
67+
entry.second.ref_count++;
68+
entry.second.in_current_runtime = false;
69+
packed_data_names.push_back(entry.first);
70+
}
71+
}
72+
73+
return packed_data_names;
74+
}
75+
76+
77+
Result<const uint8_t*> XNNWeightsCache::load_unpacked_data(const std::string& name){
78+
Result<FreeableBuffer> named_data = named_data_map_->get_data(name.c_str());
79+
if (!named_data.ok()){
80+
ET_LOG(Error, "Failed to load constant data for key %s", name.c_str());
81+
return Error::InvalidExternalData;
82+
}
83+
const uint8_t* data_pointer = static_cast<const uint8_t*>(named_data.get().data());
84+
unpacked_data_.push_back(std::move(named_data.get()));
85+
unpacked_data_to_name_[data_pointer] = name;
86+
87+
return data_pointer;
88+
}
89+
90+
Error XNNWeightsCache::delete_packed_data(const std::vector<std::string>& packed_data_names){
91+
if (!is_finalized_){
92+
ET_LOG(Error, "Error, attempted to delete packed data from the cache but the cache is not finalized");
93+
return Error::InvalidArgument;
94+
}
95+
for (const std::string& name : packed_data_names){
96+
auto entry = name_to_packed_data_metadata_.find(name);
97+
if (entry == name_to_packed_data_metadata_.end()){
98+
ET_LOG(Error, "Error, attempted to deleted packed data: %s, from the cache but it wasn't found", name.c_str());
99+
return Error::InvalidArgument;
100+
} else {
101+
entry->second.ref_count--;
102+
if (entry->second.ref_count == 0) {
103+
void* packed_data_ptr = packed_data_ptrs_[entry->second.offset];
104+
// Erase the key/value from the map frees the pointer holding the packed data
105+
packed_pointer_to_container_.erase(packed_data_ptr);
106+
// remove the pointer from the packed_data_ptrs_
107+
packed_data_ptrs_[entry->second.offset] = nullptr;
108+
// Erase the name to packed metadata entry
109+
name_to_packed_data_metadata_.erase(entry->first);
110+
}
111+
}
112+
}
113+
114+
return Error::Ok;
115+
}
116+
117+
118+
size_t XNNWeightsCache::look_up(
119+
XNNWeightsCache* context,
120+
const xnn_weights_cache_look_up_key* cache_key) {
121+
const void* unpacked_weights_ptr = cache_key->kernel;
122+
const void* unpacked_bias_ptr = cache_key->bias;
123+
auto entry = context->unpacked_data_to_name_.find(unpacked_weights_ptr);
124+
125+
// Check if weight_pointer has been cached
126+
if (entry == context->unpacked_data_to_name_.end()){
127+
return SIZE_MAX;
128+
}
129+
130+
std::string weight_bias_name = entry->second;
131+
132+
// Check if bias_pointer has been cached
133+
if (unpacked_bias_ptr != nullptr){
134+
auto bias_entry = context->unpacked_data_to_name_.find(unpacked_bias_ptr);
135+
if (bias_entry != context->unpacked_data_to_name_.end()){
136+
weight_bias_name.append(bias_entry->second);
137+
}
138+
}
139+
140+
// check if weight_bias_name has been packed already
141+
auto packed_weight_entry = context->name_to_packed_data_metadata_.find(weight_bias_name);
142+
if (packed_weight_entry == context->name_to_packed_data_metadata_.end()){
143+
return SIZE_MAX;
144+
}
145+
packed_weight_entry->second.in_current_runtime = true;
146+
147+
return packed_weight_entry->second.offset;
148+
}
149+
150+
void* XNNWeightsCache::reserve_space(XNNWeightsCache* context, size_t n) {
151+
// MemoryAllocator* allocator = context->runtime_allocator_;
152+
// void* reserved_pointer = allocator->allocate(n, context->kPackedAllocationAlignment);
153+
154+
// return reserved_pointer;
155+
std::string data_container;
156+
data_container.resize(n + context->kPackedAllocationAlignment);
157+
void* maybe_aligned_space = data_container.data();
158+
void* aligned_space = (void*)((intptr_t)maybe_aligned_space + 64 -
159+
(intptr_t)maybe_aligned_space % 64);
160+
161+
context->packed_pointer_to_container_[aligned_space] = std::move(data_container);
162+
return aligned_space;
163+
}
164+
165+
size_t XNNWeightsCache::look_up_or_insert(
166+
XNNWeightsCache* context,
167+
const xnn_weights_cache_look_up_key* cache_key,
168+
void* ptr,
169+
size_t size) {
170+
size_t offset = context->look_up(context, cache_key);
171+
172+
if (offset != SIZE_MAX) {
173+
void* saved_ptr = context->offset_to_addr(context, offset);
174+
if (0 == memcmp(ptr, saved_ptr, size)) {
175+
return offset;
176+
}
177+
// Failure, cache is out of date
178+
return SIZE_MAX;
179+
}
180+
181+
// Add to Cache if it is not finalized
182+
size_t next_offset = context->packed_data_ptrs_.size();
183+
auto entry = context->unpacked_data_to_name_.find(cache_key->kernel);
184+
185+
// Check if weight_pointer has been cached
186+
if (entry != context->unpacked_data_to_name_.end()){
187+
std::string weight_bias_name = entry->second;
188+
if (cache_key->bias != nullptr){
189+
auto bias_entry = context->unpacked_data_to_name_.find(cache_key->bias);
190+
if (bias_entry != context->unpacked_data_to_name_.end()){
191+
weight_bias_name.append(bias_entry->second);
192+
}
193+
}
194+
PackedDataMeta packed_data_metadata = {
195+
.offset=next_offset,
196+
.ref_count = 0, // ref_count is only incremented after finalizing for runtime
197+
.in_current_runtime = true
198+
};
199+
context->name_to_packed_data_metadata_[weight_bias_name] = packed_data_metadata;
200+
} else{
201+
ET_LOG(
202+
Info,
203+
"Warning: Unpacked weight and bias were not registered with names, "
204+
"this will add new cache entries for packed data and may affect performance."
205+
);
206+
}
207+
context->packed_data_ptrs_.push_back(ptr);
208+
209+
return next_offset;
210+
}
211+
212+
bool XNNWeightsCache::is_finalized(XNNWeightsCache* context) {
213+
return context->is_finalized_;
214+
}
215+
216+
void* XNNWeightsCache::offset_to_addr(XNNWeightsCache* context, size_t offset) {
217+
return context->packed_data_ptrs_[offset];
218+
}
219+
220+
enum xnn_status XNNWeightsCache::delete_cache(XNNWeightsCache* context) {
221+
return xnn_status_success;
222+
}
223+
224+
} // namespace delegate
225+
} // namespace xnnpack
226+
} // namespace executor
227+
} // namespace torch
Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#pragma once
10+
11+
#include <xnnpack.h>
12+
13+
#include <executorch/runtime/executor/pte_data_map.h>
14+
#include <executorch/runtime/core/memory_allocator.h>
15+
#include <executorch/runtime/core/error.h>
16+
#include <executorch/runtime/core/result.h>
17+
#include <string>
18+
#include <array>
19+
#include <unordered_map>
20+
21+
namespace executorch {
22+
namespace backends {
23+
namespace xnnpack {
24+
namespace delegate {
25+
26+
using executorch::runtime::MemoryAllocator;
27+
using executorch::runtime::NamedDataMap;
28+
using executorch::runtime::Error;
29+
using executorch::runtime::Result;
30+
using executorch::runtime::FreeableBuffer;
31+
32+
struct PackedDataMeta {
33+
size_t offset;
34+
// Count number of xnn_runtime_t this packed data is used in
35+
size_t ref_count;
36+
// true if this packed data was inserted or looked up for the
37+
// current runtime being created
38+
bool in_current_runtime;
39+
};
40+
41+
class XNNWeightsCache {
42+
public:
43+
XNNWeightsCache();
44+
45+
/**
46+
* Initializes the XNNWeightsCache for the next xnn_create_runtime
47+
*/
48+
Error initialize_for_runtime(
49+
MemoryAllocator* runtime_allocator,
50+
const NamedDataMap* named_data_map);
51+
52+
/**
53+
* Finalizes the weights cache after the weights have been packed
54+
* in xnn_create_runtime.
55+
*
56+
* This should only be called after creating the runtime. Returns
57+
* the name of all the packed weights used by this runtime
58+
*/
59+
Result<std::vector<std::string>> finalize_for_runtime();
60+
61+
// Taken from XNN_ALLOCATION_ALIGNMENT in xnnpack/common.h
62+
static const size_t kPackedAllocationAlignment = 64;
63+
64+
/**
65+
* Returns XNNPACK's underlying weights_cache pointer
66+
*/
67+
inline xnn_weights_cache_t get() {
68+
return (xnn_weights_cache_t)&weights_cache_;
69+
}
70+
71+
/**
72+
* Returns the number of unpacked data
73+
*/
74+
inline size_t get_num_unpacked_data(){
75+
return unpacked_data_.size();
76+
};
77+
78+
/**
79+
* Returns the names of all unpacked data
80+
*/
81+
inline std::vector<std::string> get_unpacked_data_names(){
82+
std::vector<std::string> names;
83+
for (const auto& pair : unpacked_data_to_name_) {
84+
names.push_back(pair.second);
85+
}
86+
return names;
87+
};
88+
89+
/**
90+
* Returns the packed data names
91+
*/
92+
inline std::vector<std::string> get_packed_data_names(){
93+
std::vector<std::string> names;
94+
for (const auto& pair : name_to_packed_data_metadata_) {
95+
names.push_back(pair.first);
96+
}
97+
return names;
98+
};
99+
100+
101+
/**
102+
* Loads unpacked named data from the NamedDataMap into this XNNWeightsCache
103+
* and returns a pointer to the unpacked data. This unpacked data is given
104+
* to XNNPACK's define_tensor APIs, and used as the cache key for look_up_or_insert.
105+
* @param[in] name The name of the data to load
106+
* @param[out] out the pointer to the unpacked data that was loaded
107+
*/
108+
Result<const uint8_t*> load_unpacked_data(const std::string& name);
109+
110+
/**
111+
* Deletes the packed data associated with the names given.
112+
* Decrements the ref_count if the packed data is used by other
113+
* models
114+
*
115+
*/
116+
Error delete_packed_data(const std::vector<std::string>& packed_names);
117+
118+
119+
private:
120+
// Runtime Allocator used to reserve memory for packed weights
121+
MemoryAllocator* runtime_allocator_;
122+
123+
// Named Data Map used to load named data
124+
const NamedDataMap* named_data_map_;
125+
126+
// Map of unpacked pointers to the data name
127+
std::unordered_map<const void*, std::string> unpacked_data_to_name_;
128+
// Map of data names to offset into the packed data
129+
std::unordered_map<std::string, PackedDataMeta> name_to_packed_data_metadata_;
130+
// Vector holding list of pointers to the packed data
131+
std::vector<void*> packed_data_ptrs_;
132+
// vector holding list of strings which are containers for packed_data_ptrs
133+
std::unordered_map<void*, std::string> packed_pointer_to_container_;
134+
// Vector hodling list of unpacked freeable buffers
135+
std::vector<FreeableBuffer> unpacked_data_;
136+
// xnnpack's weight cache provider
137+
xnn_weights_cache_provider weights_cache_;
138+
// whether or not the weight cache is finalized
139+
bool is_finalized_;
140+
141+
// Function pointers to override XNNPACK's default xnn_weights_cache_provider
142+
// functions.
143+
static size_t look_up(
144+
XNNWeightsCache* context,
145+
const xnn_weights_cache_look_up_key* cache_key);
146+
147+
static void* reserve_space(XNNWeightsCache* context, size_t n);
148+
149+
static size_t look_up_or_insert(
150+
XNNWeightsCache* context,
151+
const xnn_weights_cache_look_up_key* cache_key,
152+
void* ptr,
153+
size_t size);
154+
155+
static bool is_finalized(XNNWeightsCache* context);
156+
157+
static void* offset_to_addr(XNNWeightsCache* context, size_t offset);
158+
159+
static enum xnn_status delete_cache(XNNWeightsCache* context);
160+
161+
};
162+
163+
} // namespace delegate
164+
} // namespace xnnpack
165+
} // namespace executor
166+
} // namespace torch

0 commit comments

Comments
 (0)