pytorch
diff --git a/‎backends/xnnpack/runtime/XNNWeightsCache.cpp
Lines changed: 227 additions & 0 deletions b/‎backends/xnnpack/runtime/XNNWeightsCache.cpp
Lines changed: 227 additions & 0 deletions
diff --git a/‎backends/xnnpack/runtime/XNNWeightsCache.h
Lines changed: 166 additions & 0 deletions b/‎backends/xnnpack/runtime/XNNWeightsCache.h
Lines changed: 166 additions & 0 deletions
@@ -0,0 +1,227 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <executorch/backends/xnnpack/runtime/XNNWeightsCache.h>
+#include <executorch/runtime/core/memory_allocator.h>
+#include <executorch/runtime/core/error.h>
+#include <sys/stat.h>
+#include <xnnpack.h>
+namespace executorch {
+namespace backends {
+namespace xnnpack {
+namespace delegate {
+
+using executorch::runtime::MemoryAllocator;
+using executorch::runtime::NamedDataMap;
+
+XNNWeightsCache::XNNWeightsCache()
+ {
+  weights_cache_.context = this;
+  weights_cache_.look_up = (size_t(*)(
+      void*, const xnn_weights_cache_look_up_key*))XNNWeightsCache::look_up;
+  weights_cache_.reserve_space =
+      (void* (*)(void*, size_t))XNNWeightsCache::reserve_space;
+  weights_cache_.look_up_or_insert =
+      (size_t(*)(void*, const xnn_weights_cache_look_up_key*, void*, size_t))
+          XNNWeightsCache::look_up_or_insert;
+  weights_cache_.is_finalized = (bool (*)(void*))XNNWeightsCache::is_finalized;
+  weights_cache_.offset_to_addr =
+      (void* (*)(void*, size_t))XNNWeightsCache::offset_to_addr;
+  weights_cache_.delete_cache =
+      (enum xnn_status(*)(void*))XNNWeightsCache::delete_cache;
+}
+
+Error XNNWeightsCache::initialize_for_runtime(
+    MemoryAllocator* runtime_allocator, 
+    const NamedDataMap* named_data_map)
+  {
+
+  runtime_allocator_ = runtime_allocator;
+  named_data_map_ = named_data_map;
+  is_finalized_ = false;
+
+  return Error::Ok;
+}
+
+Result<std::vector<std::string>> XNNWeightsCache::finalize_for_runtime(){
+  is_finalized_ = true;
+
+  // All data has been packed by create_runtime
+  // so we clear the unpacked data as it is no longer needed
+  for (FreeableBuffer& buffer : unpacked_data_){
+    buffer.Free();
+  }
+  unpacked_data_.clear();
+  unpacked_data_to_name_.clear();
+
+  std::vector<std::string> packed_data_names;
+  // update the reference count of all the packed data
+  // used by this runtime
+  for (auto& entry : name_to_packed_data_metadata_){
+    if (entry.second.in_current_runtime){
+      entry.second.ref_count++;
+      entry.second.in_current_runtime = false;
+      packed_data_names.push_back(entry.first);
+    }
+  }
+
+  return packed_data_names;
+}
+
+
+Result<const uint8_t*> XNNWeightsCache::load_unpacked_data(const std::string& name){
+  Result<FreeableBuffer> named_data = named_data_map_->get_data(name.c_str());
+  if (!named_data.ok()){
+    ET_LOG(Error, "Failed to load constant data for key %s", name.c_str());
+    return Error::InvalidExternalData;
+  }
+  const uint8_t* data_pointer = static_cast<const uint8_t*>(named_data.get().data());
+  unpacked_data_.push_back(std::move(named_data.get()));
+  unpacked_data_to_name_[data_pointer] = name;
+
+  return data_pointer;
+}
+
+Error XNNWeightsCache::delete_packed_data(const std::vector<std::string>& packed_data_names){
+  if (!is_finalized_){
+    ET_LOG(Error, "Error, attempted to delete packed data from the cache but the cache is not finalized");
+    return Error::InvalidArgument;
+  }
+  for (const std::string& name : packed_data_names){
+    auto entry = name_to_packed_data_metadata_.find(name);
+    if (entry == name_to_packed_data_metadata_.end()){
+      ET_LOG(Error, "Error, attempted to deleted packed data: %s, from the cache but it wasn't found", name.c_str());
+      return Error::InvalidArgument;
+    } else {
+      entry->second.ref_count--;
+      if (entry->second.ref_count == 0) {
+        void* packed_data_ptr = packed_data_ptrs_[entry->second.offset];
+        // Erase the key/value from the map frees the pointer holding the packed data
+        packed_pointer_to_container_.erase(packed_data_ptr);
+        // remove the pointer from the packed_data_ptrs_
+        packed_data_ptrs_[entry->second.offset] = nullptr;
+        // Erase the name to packed metadata entry
+        name_to_packed_data_metadata_.erase(entry->first);
+      }
+    }
+  }
+
+  return Error::Ok;
+}
+
+
+size_t XNNWeightsCache::look_up(
+    XNNWeightsCache* context,
+    const xnn_weights_cache_look_up_key* cache_key) {
+  const void* unpacked_weights_ptr = cache_key->kernel;
+  const void* unpacked_bias_ptr = cache_key->bias;
+  auto entry = context->unpacked_data_to_name_.find(unpacked_weights_ptr);
+
+  // Check if weight_pointer has been cached
+  if (entry == context->unpacked_data_to_name_.end()){
+    return SIZE_MAX;
+  }
+
+  std::string weight_bias_name = entry->second;
+
+  // Check if bias_pointer has been cached
+  if (unpacked_bias_ptr != nullptr){
+    auto bias_entry = context->unpacked_data_to_name_.find(unpacked_bias_ptr);
+    if (bias_entry != context->unpacked_data_to_name_.end()){
+      weight_bias_name.append(bias_entry->second);
+    }
+  }
+
+  // check if weight_bias_name has been packed already
+  auto packed_weight_entry = context->name_to_packed_data_metadata_.find(weight_bias_name);
+  if (packed_weight_entry == context->name_to_packed_data_metadata_.end()){
+    return SIZE_MAX;
+  }
+  packed_weight_entry->second.in_current_runtime = true;
+
+  return packed_weight_entry->second.offset;
+}
+
+void* XNNWeightsCache::reserve_space(XNNWeightsCache* context, size_t n) {
+  // MemoryAllocator* allocator = context->runtime_allocator_;
+  // void* reserved_pointer = allocator->allocate(n, context->kPackedAllocationAlignment);
+  
+  // return reserved_pointer;
+  std::string data_container;
+  data_container.resize(n + context->kPackedAllocationAlignment);
+  void* maybe_aligned_space = data_container.data();
+  void* aligned_space = (void*)((intptr_t)maybe_aligned_space + 64 -
+                                (intptr_t)maybe_aligned_space % 64);
+
+  context->packed_pointer_to_container_[aligned_space] = std::move(data_container);
+  return aligned_space;
+}
+
+size_t XNNWeightsCache::look_up_or_insert(
+    XNNWeightsCache* context,
+    const xnn_weights_cache_look_up_key* cache_key,
+    void* ptr,
+    size_t size) {
+  size_t offset = context->look_up(context, cache_key);
+
+  if (offset != SIZE_MAX) {
+    void* saved_ptr = context->offset_to_addr(context, offset);
+    if (0 == memcmp(ptr, saved_ptr, size)) {
+      return offset;
+    }
+    // Failure, cache is out of date
+    return SIZE_MAX;
+  }
+
+  // Add to Cache if it is not finalized
+  size_t next_offset = context->packed_data_ptrs_.size();
+  auto entry = context->unpacked_data_to_name_.find(cache_key->kernel);
+
+  // Check if weight_pointer has been cached
+  if (entry != context->unpacked_data_to_name_.end()){
+    std::string weight_bias_name = entry->second;
+    if (cache_key->bias != nullptr){
+      auto bias_entry = context->unpacked_data_to_name_.find(cache_key->bias);
+      if (bias_entry != context->unpacked_data_to_name_.end()){
+        weight_bias_name.append(bias_entry->second);
+      }
+    }
+    PackedDataMeta packed_data_metadata = {
+      .offset=next_offset,
+      .ref_count = 0, // ref_count is only incremented after finalizing for runtime
+      .in_current_runtime = true
+    };
+    context->name_to_packed_data_metadata_[weight_bias_name] = packed_data_metadata;
+  } else{
+    ET_LOG(
+      Info, 
+      "Warning: Unpacked weight and bias were not registered with names, "
+      "this will add new cache entries for packed data and may affect performance."
+    );
+  }
+  context->packed_data_ptrs_.push_back(ptr);
+
+  return next_offset;
+}
+
+bool XNNWeightsCache::is_finalized(XNNWeightsCache* context) {
+  return context->is_finalized_;
+}
+
+void* XNNWeightsCache::offset_to_addr(XNNWeightsCache* context, size_t offset) {
+  return context->packed_data_ptrs_[offset];
+}
+
+enum xnn_status XNNWeightsCache::delete_cache(XNNWeightsCache* context) {
+  return xnn_status_success;
+}
+
+} // namespace delegate
+} // namespace xnnpack
+} // namespace executor
+} // namespace torch
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <xnnpack.h>
+
+#include <executorch/runtime/executor/pte_data_map.h>
+#include <executorch/runtime/core/memory_allocator.h>
+#include <executorch/runtime/core/error.h>
+#include <executorch/runtime/core/result.h>
+#include <string>
+#include <array>
+#include <unordered_map>
+
+namespace executorch {
+namespace backends {
+namespace xnnpack {
+namespace delegate {
+
+using executorch::runtime::MemoryAllocator;
+using executorch::runtime::NamedDataMap;
+using executorch::runtime::Error;
+using executorch::runtime::Result;
+using executorch::runtime::FreeableBuffer;
+
+struct PackedDataMeta {
+  size_t offset;
+  // Count number of xnn_runtime_t this packed data is used in
+  size_t ref_count;
+  // true if this packed data was inserted or looked up for the
+  // current runtime being created
+  bool in_current_runtime;
+};
+
+class XNNWeightsCache {
+ public:
+  XNNWeightsCache();
+
+  /**
+   * Initializes the XNNWeightsCache for the next xnn_create_runtime
+   */
+  Error initialize_for_runtime(
+    MemoryAllocator* runtime_allocator, 
+    const NamedDataMap* named_data_map);
+
+  /**
+   * Finalizes the weights cache after the weights have been packed
+   * in xnn_create_runtime. 
+   *
+   * This should only be called after creating the runtime. Returns
+   * the name of all the packed weights used by this runtime
+   */
+  Result<std::vector<std::string>> finalize_for_runtime();
+
+  // Taken from XNN_ALLOCATION_ALIGNMENT in xnnpack/common.h
+  static const size_t kPackedAllocationAlignment = 64;
+
+  /**
+   * Returns XNNPACK's underlying weights_cache pointer
+   */
+  inline xnn_weights_cache_t get() {
+    return (xnn_weights_cache_t)&weights_cache_;
+  }
+
+  /**
+   * Returns the number of unpacked data
+   */
+  inline size_t get_num_unpacked_data(){
+    return unpacked_data_.size();
+  };
+
+  /**
+   * Returns the names of all unpacked data 
+   */
+  inline std::vector<std::string> get_unpacked_data_names(){
+    std::vector<std::string> names;
+    for (const auto& pair : unpacked_data_to_name_) {
+      names.push_back(pair.second);
+    }
+    return names;
+  };
+
+  /**
+   * Returns the packed data names
+   */
+  inline std::vector<std::string> get_packed_data_names(){
+    std::vector<std::string> names;
+    for (const auto& pair : name_to_packed_data_metadata_) {
+      names.push_back(pair.first);
+    }
+    return names;
+  };
+
+
+  /**
+   * Loads unpacked named data from the NamedDataMap into this XNNWeightsCache
+   * and returns a pointer to the unpacked data. This unpacked data is given
+   * to XNNPACK's define_tensor APIs, and used as the cache key for look_up_or_insert.
+   * @param[in] name The name of the data to load
+   * @param[out] out the pointer to the unpacked data that was loaded
+   */
+  Result<const uint8_t*> load_unpacked_data(const std::string& name);
+
+  /**
+   * Deletes the packed data associated with the names given. 
+   * Decrements the ref_count if the packed data is used by other
+   * models
+   * 
+   */
+   Error delete_packed_data(const std::vector<std::string>& packed_names);
+
+
+ private:
+  // Runtime Allocator used to reserve memory for packed weights
+  MemoryAllocator* runtime_allocator_;
+
+  // Named Data Map used to load named data
+  const NamedDataMap* named_data_map_;
+
+  // Map of unpacked pointers to the data name
+  std::unordered_map<const void*, std::string> unpacked_data_to_name_;
+  // Map of data names to offset into the packed data
+  std::unordered_map<std::string, PackedDataMeta> name_to_packed_data_metadata_;
+  // Vector holding list of pointers to the packed data
+  std::vector<void*> packed_data_ptrs_;
+  // vector holding list of strings which are containers for packed_data_ptrs
+  std::unordered_map<void*, std::string> packed_pointer_to_container_;
+  // Vector hodling list of unpacked freeable buffers
+  std::vector<FreeableBuffer> unpacked_data_;
+  // xnnpack's weight cache provider
+  xnn_weights_cache_provider weights_cache_;
+  // whether or not the weight cache is finalized
+  bool is_finalized_;
+
+  // Function pointers to override XNNPACK's default xnn_weights_cache_provider
+  // functions.
+  static size_t look_up(
+      XNNWeightsCache* context,
+      const xnn_weights_cache_look_up_key* cache_key);
+
+  static void* reserve_space(XNNWeightsCache* context, size_t n);
+
+  static size_t look_up_or_insert(
+      XNNWeightsCache* context,
+      const xnn_weights_cache_look_up_key* cache_key,
+      void* ptr,
+      size_t size);
+
+  static bool is_finalized(XNNWeightsCache* context);
+
+  static void* offset_to_addr(XNNWeightsCache* context, size_t offset);
+
+  static enum xnn_status delete_cache(XNNWeightsCache* context);
+
+};
+
+} // namespace delegate
+} // namespace xnnpack
+} // namespace executor
+} // namespace torch