diff --git a/src/gpu.cpp b/src/gpu.cpp index 00a711d0951..06c7c29b4d9 100644 --- a/src/gpu.cpp +++ b/src/gpu.cpp @@ -3734,7 +3734,7 @@ int VulkanDevice::create_pipeline_layout(int push_constant_count, VkDescriptorSe return 0; } -int VulkanDevice::create_pipeline(VkShaderModule shader_module, VkPipelineLayout pipeline_layout, const std::vector& specializations, uint32_t subgroup_size, VkPipeline* pipeline) const +int VulkanDevice::create_pipeline(VkShaderModule shader_module, VkPipelineLayout pipeline_layout, const std::vector& specializations, uint32_t subgroup_size, VkPipelineCache* vk_pipeline_cache, VkPipeline* pipeline) const { const int specialization_count = specializations.size(); @@ -3792,7 +3792,11 @@ int VulkanDevice::create_pipeline(VkShaderModule shader_module, VkPipelineLayout computePipelineCreateInfo.basePipelineHandle = 0; computePipelineCreateInfo.basePipelineIndex = 0; - VkResult ret = vkCreateComputePipelines(d->device, 0, 1, &computePipelineCreateInfo, 0, pipeline); + VkResult ret; + if (vk_pipeline_cache != VK_NULL_HANDLE) + ret = vkCreateComputePipelines(d->device, *vk_pipeline_cache, 1, &computePipelineCreateInfo, 0, pipeline); + else + ret = vkCreateComputePipelines(d->device, VK_NULL_HANDLE, 1, &computePipelineCreateInfo, 0, pipeline); if (ret != VK_SUCCESS) { NCNN_LOGE("vkCreateComputePipelines failed %d", ret); @@ -3801,6 +3805,40 @@ int VulkanDevice::create_pipeline(VkShaderModule shader_module, VkPipelineLayout return 0; } +int VulkanDevice::create_empty_pipeline_cache(VkPipelineCache* vk_pipeline_cache) const +{ + VkPipelineCacheCreateInfo info; + info.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO; + info.pNext = 0; + info.flags = VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT; + info.initialDataSize = 0; + info.pInitialData = 0; + VkResult ret = vkCreatePipelineCache(d->device, &info, 0, vk_pipeline_cache); + if (ret != VK_SUCCESS) + { + NCNN_LOGE("vkCreatePipelineCache failed %d", ret); + return -1; + } + + return 0; +} +int VulkanDevice::create_pipeline_cache_with_data(const void* initial_data, size_t data_size, VkPipelineCache* vk_pipeline_cache) const +{ + VkPipelineCacheCreateInfo info; + info.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO; + info.pNext = 0; + info.flags = VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT; + info.initialDataSize = data_size; + info.pInitialData = initial_data; + VkResult ret = vkCreatePipelineCache(d->device, &info, 0, vk_pipeline_cache); + if (ret != VK_SUCCESS) + { + NCNN_LOGE("vkCreatePipelineCache failed %d", ret); + return -1; + } + + return 0; +} int VulkanDevice::create_descriptor_update_template(int binding_count, const int* binding_types, VkDescriptorSetLayout descriptorset_layout, VkPipelineLayout pipeline_layout, VkDescriptorUpdateTemplateKHR* descriptor_update_template) const { diff --git a/src/gpu.h b/src/gpu.h index 7863b2e21a4..5037922d0b8 100644 --- a/src/gpu.h +++ b/src/gpu.h @@ -419,7 +419,10 @@ class NCNN_EXPORT VulkanDevice // helper for creating pipeline int create_descriptorset_layout(int binding_count, const int* binding_types, VkDescriptorSetLayout* descriptorset_layout) const; int create_pipeline_layout(int push_constant_count, VkDescriptorSetLayout descriptorset_layout, VkPipelineLayout* pipeline_layout) const; - int create_pipeline(VkShaderModule shader_module, VkPipelineLayout pipeline_layout, const std::vector& specializations, uint32_t subgroup_size, VkPipeline* pipeline) const; + int create_pipeline(VkShaderModule shader_module, VkPipelineLayout pipeline_layout, const std::vector& specializations, uint32_t subgroup_size, VkPipelineCache* vk_pipeline_cache, VkPipeline* pipeline) const; + int create_empty_pipeline_cache(VkPipelineCache* vk_pipeline_cache) const; + int create_pipeline_cache_with_data(const void* initial_data, size_t data_size, VkPipelineCache* vk_pipeline_cache) const; + int create_descriptor_update_template(int binding_count, const int* binding_types, VkDescriptorSetLayout descriptorset_layout, VkPipelineLayout pipeline_layout, VkDescriptorUpdateTemplateKHR* descriptor_update_template) const; uint32_t find_memory_index(uint32_t memory_type_bits, VkFlags required, VkFlags preferred, VkFlags preferred_not) const; diff --git a/src/pipelinecache.cpp b/src/pipelinecache.cpp index 1bd27451440..4dfda99b694 100644 --- a/src/pipelinecache.cpp +++ b/src/pipelinecache.cpp @@ -5,8 +5,21 @@ #include "gpu.h" +#include +#include +#include + +#ifdef _WIN32 +#include +#include +#else +#include +#include +#include +#include +#include +#endif namespace ncnn { - #if NCNN_VULKAN // https://en.wikipedia.org/wiki/MurmurHash static uint32_t murmur3_32(const uint32_t* data, int size) @@ -51,9 +64,114 @@ static uint32_t fnv1a_32(const uint8_t* data, int size) return h; } +static int atomic_rename(const char* old_path, const char* new_path) +{ +#ifdef _WIN32 + if (MoveFileExA(old_path, new_path, MOVEFILE_REPLACE_EXISTING | MOVEFILE_WRITE_THROUGH)) + return 0; + return -1; +#else + return std::rename(old_path, new_path); +#endif // _WIN32 +} + +static int make_dir(const std::string& dirpath) +{ + if (dirpath.empty()) + return -1; + + std::string dir = dirpath; + +#ifdef _WIN32 + for (size_t i = 0; i < dir.size(); i++) + { + if (dir[i] == '/') + dir[i] = '\\'; + } + + size_t start = (dir.size() > 2 && dir[1] == ':') ? 3 : 0; + + for (size_t i = start; i <= dir.size(); i++) + { + if (i == dir.size() || dir[i] == '\\') + { + char tmp = dir[i]; + dir[i] = '\0'; + if (_mkdir(dir.c_str()) != 0 && errno != EEXIST) + { + return -1; + } + dir[i] = tmp; + } + } +#else + size_t start = dir[0] == '/' ? 1 : 0; + + for (size_t i = start; i <= dir.size(); i++) + { + if (i == dir.size() || dir[i] == '/') + { + char tmp = dir[i]; + dir[i] = '\0'; + if (mkdir(dir.c_str(), 0755) != 0 && errno != EEXIST) + { + return -1; + } + dir[i] = tmp; + } + } +#endif + + return 0; +} + +static constexpr uint32_t spv_cache_magic() +{ + return ('S' | 'P' << 8 | 'V' << 16 | 'C' << 24); +} + +enum class PipelineCacheIOResult +{ + Success, + FileFailure, + InvalidFile, + InvalidCache, + DataCorruption, + CreationFailure, +}; + class PipelineCachePrivate { public: + static constexpr uint32_t CURRENT_SPV_CACHE_HEADER_VERSION = 1; + static constexpr uint32_t CURRENT_PIPELINE_CACHE_VERSION = 1; + + PipelineCachePrivate() + { +#ifdef _WIN32 + shader_cache_dir = std::string(getenv("LOCALAPPDATA") ? getenv("LOCALAPPDATA") : ".") + "/ncnn/shadercache"; +#else + shader_cache_dir = std::string(getenv("HOME") ? getenv("HOME") : ".") + "/.ncnn/shadercache"; +#endif + } + + struct pipeline_cache_prefix_header + { + uint32_t magic; + uint32_t version; + uint32_t data_size; + uint32_t data_hash_fnv1a; // fnv1a hash + + uint32_t vendor_id; + uint32_t device_id; + uint32_t driver_version; + uint32_t driver_abi; + + uint8_t uuid[VK_UUID_SIZE]; + + uint32_t reserved[4]; + }; + // digest -> artifact struct pipeline_cache_digest { @@ -110,9 +228,78 @@ class PipelineCachePrivate ShaderInfo shader_info; // TODO use pointer ? }; + struct spv_cache_header + { + uint32_t magic; // magic number, 'SPVC' in host endian + uint32_t header_version; // version of cache header format + uint32_t ncnn_version; // ncnn version when the cache is created + // if ncnn upgrade and update glslang, shader code or preprocessing steps + // we want the cache to be invalid + + uint32_t spv_size; // size of spv binary data + uint32_t data_hash_fnv1a; // hash of spv binary data using fnv1a + uint32_t data_hash_murmur3; // second hash of spv binary data using murmur3 + + // since a driver update/device switch might lead changes to supported extensions + // and change the defines added in code, we want to verify that the cache is valid for the current device + uint32_t vendor_id; + uint32_t device_id; + uint32_t driver_version; + uint8_t uuid[VK_UUID_SIZE]; + uint32_t reserved[4]; // reserved for future use, must be zero + }; + mutable std::vector cache_digests; mutable std::vector cache_artifacts; + mutable std::map > spv_code_cache; + mutable VkPipelineCache pipeline_cache = VK_NULL_HANDLE; mutable Mutex cache_lock; + mutable std::string shader_cache_dir; + + int load_spv_code_cache_from_disk(const VulkanDevice& device, uint64_t shader_key) const; + PipelineCacheIOResult try_load_pipeline_cache_from_disk(const VulkanDevice* vkdev, const char* path); + int save_spv_code_cache_to_disk(uint64_t shader_key, const VulkanDevice& device, const std::vector& spirv) const; + + static constexpr uint32_t vk_pipeline_cache_header_magic() + { + return ('V' | 'P' << 8 | 'C' << 16 | 'H' << 24); // Vulkan Pipeline Cache Header + } + + static bool validate_pipeline_cache_header(const pipeline_cache_prefix_header& header, const VkPhysicalDeviceProperties& physical_device_properties) + { + if (header.magic != vk_pipeline_cache_header_magic()) + return false; + if (header.vendor_id != physical_device_properties.vendorID) + return false; + if (header.device_id != physical_device_properties.deviceID) + return false; + if (header.driver_version != physical_device_properties.driverVersion) + return false; + if (header.driver_abi != sizeof(void*)) + return false; + if (memcmp(header.uuid, physical_device_properties.pipelineCacheUUID, VK_UUID_SIZE) != 0) + return false; + return true; + } + + static bool validate_spv_code_cache(const spv_cache_header& header, const VkPhysicalDeviceProperties& physical_device_properties) + { + if (header.magic != spv_cache_magic()) + return false; + if (header.header_version != CURRENT_SPV_CACHE_HEADER_VERSION) + return false; + if (header.vendor_id != physical_device_properties.vendorID) + return false; + if (header.device_id != physical_device_properties.deviceID) + return false; + if (header.driver_version != physical_device_properties.driverVersion) + return false; + if (header.spv_size % 4 != 0) + return false; + if (memcmp(header.uuid, physical_device_properties.pipelineCacheUUID, VK_UUID_SIZE) != 0) + return false; + return true; + } }; PipelineCachePrivate::pipeline_cache_digest::pipeline_cache_digest(const uint32_t* spv_data, size_t spv_data_size, const std::vector& specializations, @@ -133,18 +320,36 @@ PipelineCachePrivate::pipeline_cache_digest::pipeline_cache_digest(const uint32_ specializations_fnv1a = fnv1a_32((const uint8_t*)specializations.data(), specialization_count * sizeof(vk_specialization_type)); } +static uint32_t encode_opt_bits(const Option& opt) +{ + return 0 << 7 + | opt.use_fp16_packed << 6 + | opt.use_fp16_storage << 5 + | opt.use_fp16_arithmetic << 4 + | opt.use_int8_storage << 3 + | opt.use_int8_arithmetic << 2; +} + +static uint64_t shader_spv_key(int shader_type_index, const Option& opt) +{ + // TODO: if shader code is changed, using shader_type_index is not enough + return static_cast(shader_type_index) << 32 + | static_cast(opt.use_fp16_uniform) << 31 + | static_cast(opt.use_int8_uniform) << 30 + | static_cast(opt.use_int8_packed) << 29 + | static_cast(opt.use_subgroup_ops) << 28 + | static_cast(opt.use_shader_pack8) << 27 + | static_cast(opt.use_shader_local_memory) << 26 + | encode_opt_bits(opt); +} + PipelineCachePrivate::pipeline_cache_digest::pipeline_cache_digest(int _shader_type_index, const Option& opt, const std::vector& specializations, uint32_t _local_size_x, uint32_t _local_size_y, uint32_t _local_size_z, uint32_t _subgroup_size) { shader_type_index = _shader_type_index; // encode opt - opt_bits = 0 << 7 - | opt.use_fp16_packed << 6 - | opt.use_fp16_storage << 5 - | opt.use_fp16_arithmetic << 4 - | opt.use_int8_storage << 3 - | opt.use_int8_arithmetic << 2; + opt_bits = encode_opt_bits(opt); local_size_x = _local_size_x; local_size_y = _local_size_y; @@ -216,6 +421,14 @@ void PipelineCache::clear() } } + if (d->pipeline_cache) + { + vkDestroyPipelineCache(vkdev->vkdevice(), d->pipeline_cache, 0); + d->pipeline_cache = VK_NULL_HANDLE; + } + + d->spv_code_cache.clear(); + d->cache_digests.clear(); d->cache_artifacts.clear(); } @@ -334,8 +547,6 @@ int PipelineCache::get_pipeline(int shader_type_index, const Option& opt, const *descriptor_update_template = cc.descriptor_update_template; shader_info = cc.shader_info; - // NCNN_LOGE("get_pipeline hit %d", last_digest_index); - return 0; } } @@ -381,20 +592,322 @@ int PipelineCache::get_pipeline(int shader_type_index, const Option& opt, const return 0; } +int PipelineCachePrivate::load_spv_code_cache_from_disk(const VulkanDevice& device, uint64_t shader_key) const +{ + std::string cachepath = shader_cache_dir + "/" + std::to_string(shader_key) + ".spvcache"; + + FILE* fp = fopen(cachepath.c_str(), "rb"); + if (!fp) + { + return -1; + } + + spv_cache_header header; + if (fread(&header, sizeof(header), 1, fp) != 1) + { + NCNN_LOGE("load_spv_code_cache_from_disk fread header failed"); + fclose(fp); + return -1; + } + + if (!validate_spv_code_cache(header, device.info.physicalDeviceProperties())) + { + NCNN_LOGE("load_spv_code_cache_from_disk validate_spv_code_cache failed"); + fclose(fp); + return -1; + } + + std::vector spirv; + spirv.resize(header.spv_size / 4); + size_t nread = fread(spirv.data(), 1, header.spv_size, fp); + fclose(fp); + + if (nread != header.spv_size) + { + NCNN_LOGE("load_spv_code_cache_from_disk fread spirv data failed %zu != %d", nread, header.spv_size); + return -1; + } + + uint32_t hash_fnv1a = fnv1a_32(reinterpret_cast(spirv.data()), header.spv_size); + if (hash_fnv1a != header.data_hash_fnv1a) + { + NCNN_LOGE("load_spv_code_cache_from_disk data hash1 mismatch %x != %x", hash_fnv1a, header.data_hash_fnv1a); + return -1; + } + + uint32_t hash_murmur3 = murmur3_32(spirv.data(), spirv.size()); + if (hash_murmur3 != header.data_hash_murmur3) + { + NCNN_LOGE("load_spv_code_cache_from_disk data hash2 mismatch %x != %x", hash_murmur3, header.data_hash_murmur3); + return -1; + } + + spv_code_cache[shader_key] = std::move(spirv); + return 0; +} +PipelineCacheIOResult PipelineCachePrivate::try_load_pipeline_cache_from_disk(const VulkanDevice* vkdev, const char* path) +{ + FILE* file = fopen(path, "rb"); + if (!file) + { + return PipelineCacheIOResult::FileFailure; + } + + fseek(file, 0, SEEK_END); + long pos = ftell(file); + if (pos == -1L) + { + fclose(file); + return PipelineCacheIOResult::FileFailure; + } + size_t file_size = static_cast(pos); + rewind(file); + + if (file_size < sizeof(pipeline_cache_prefix_header)) + { + fclose(file); + return PipelineCacheIOResult::InvalidFile; + } + + std::vector buffer(file_size - sizeof(pipeline_cache_prefix_header)); + pipeline_cache_prefix_header header; + if (fread(&header, sizeof(pipeline_cache_prefix_header), 1, file) != 1) + { + fclose(file); + return PipelineCacheIOResult::InvalidFile; + } + if (fread(buffer.data(), 1, file_size - sizeof(pipeline_cache_prefix_header), file) != file_size - sizeof(PipelineCachePrivate::pipeline_cache_prefix_header)) + { + fclose(file); + return PipelineCacheIOResult::DataCorruption; + } + fclose(file); + + if (header.magic != vk_pipeline_cache_header_magic()) + { + return PipelineCacheIOResult::InvalidCache; + } + + if (header.version != CURRENT_PIPELINE_CACHE_VERSION) + { + return PipelineCacheIOResult::InvalidCache; + } + + void* cache_data_begin = buffer.data(); + const VkPhysicalDeviceProperties& device_properties = vkdev->info.physicalDeviceProperties(); + if (!validate_pipeline_cache_header(header, device_properties)) + { + return PipelineCacheIOResult::InvalidCache; + } + + size_t cache_data_size = header.data_size; + if (cache_data_size == 0 || cache_data_size > buffer.size()) + { + return PipelineCacheIOResult::DataCorruption; + } + + uint64_t hash = fnv1a_32(reinterpret_cast(cache_data_begin), cache_data_size); + if (hash != header.data_hash_fnv1a) + { + return PipelineCacheIOResult::DataCorruption; + } + + if (vkdev->create_pipeline_cache_with_data(cache_data_begin, cache_data_size, &pipeline_cache) != VK_SUCCESS) + { + return PipelineCacheIOResult::CreationFailure; + } + + return PipelineCacheIOResult::Success; +} +int PipelineCachePrivate::save_spv_code_cache_to_disk(uint64_t shader_key, const VulkanDevice& device, const std::vector& spirv) const +{ + std::string cachepath = shader_cache_dir + "/" + std::to_string(shader_key) + ".spvcache"; + std::string tmp_cachepath = cachepath + ".tmp"; + + make_dir(shader_cache_dir); + + FILE* fp = fopen(tmp_cachepath.c_str(), "wb"); + if (!fp) + { + NCNN_LOGE("save_spv_code_cache_to_disk fopen %s failed", tmp_cachepath.c_str()); + return -1; + } + + spv_cache_header header; + header.magic = spv_cache_magic(); + header.header_version = CURRENT_SPV_CACHE_HEADER_VERSION; + header.spv_size = spirv.size() * sizeof(uint32_t); + + header.data_hash_fnv1a = fnv1a_32((const uint8_t*)spirv.data(), header.spv_size); // fnv1a hash + header.data_hash_murmur3 = murmur3_32((const uint32_t*)spirv.data(), spirv.size()); // murmur3 hash + + const VkPhysicalDeviceProperties& physical_device_properties = device.info.physicalDeviceProperties(); + header.vendor_id = physical_device_properties.vendorID; + header.device_id = physical_device_properties.deviceID; + header.driver_version = physical_device_properties.driverVersion; + memcpy(header.uuid, physical_device_properties.pipelineCacheUUID, VK_UUID_SIZE); + memset(header.reserved, 0, sizeof(header.reserved)); + if (fwrite(&header, sizeof(header), 1, fp) != 1) + { + NCNN_LOGE("save_spv_code_cache_to_disk fwrite header failed"); + fclose(fp); + return -1; + } + + if (fwrite(spirv.data(), sizeof(uint32_t), spirv.size(), fp) != spirv.size()) + { + NCNN_LOGE("save_spv_code_cache_to_disk fwrite spirv data failed"); + fclose(fp); + return -1; + } + + fclose(fp); + + if (atomic_rename(tmp_cachepath.c_str(), cachepath.c_str()) != 0) + { + NCNN_LOGE("save_spv_code_cache_to_disk rename %s to %s failed", tmp_cachepath.c_str(), cachepath.c_str()); + return -1; + } + + return 0; +} + +int PipelineCache::load_pipeline_cache(const char* path) const +{ + MutexLockGuard lock(d->cache_lock); + if (d->pipeline_cache != VK_NULL_HANDLE) + { + NCNN_LOGE("a valid pipeline cache already exists, stop loading"); + return 0; + } + PipelineCacheIOResult result = d->try_load_pipeline_cache_from_disk(vkdev, path); + if (result == PipelineCacheIOResult::Success) return 0; + switch (result) + { + case PipelineCacheIOResult::FileFailure: + NCNN_LOGE("Failed to open pipeline cache file: %s", path); + break; + case PipelineCacheIOResult::InvalidFile: + NCNN_LOGE("File %s is not a valid file for pipeline cache", path); + break; + case PipelineCacheIOResult::InvalidCache: + NCNN_LOGE("The cache in file %s is not valid for current platform", path); + break; + case PipelineCacheIOResult::DataCorruption: + NCNN_LOGE("Data in file %s is corrupted", path); + break; + case PipelineCacheIOResult::CreationFailure: + NCNN_LOGE("Failed to create pipeline cache from data in file %s", path); + break; + default: + ; + } + + NCNN_LOGE("Failed to load pipeline cache from file %s, fall back to create empty pipeline cache", path); + if (vkdev->create_empty_pipeline_cache(&d->pipeline_cache) != 0) + { + NCNN_LOGE("Failed to create pipeline cache"); + return -1; + } + + return 0; +} + +int PipelineCache::save_pipeline_cache(const char* path) const +{ + MutexLockGuard lock(d->cache_lock); + if (d->pipeline_cache == VK_NULL_HANDLE) return 0; + size_t cache_data_size; + if (vkGetPipelineCacheData(vkdev->vkdevice(), d->pipeline_cache, &cache_data_size, nullptr) != VK_SUCCESS) + { + NCNN_LOGE("Failed to get pipeline cache data"); + return -1; + } + + std::vector buffer(cache_data_size); + if (vkGetPipelineCacheData(vkdev->vkdevice(), d->pipeline_cache, &cache_data_size, buffer.data()) != VK_SUCCESS) + { + NCNN_LOGE("Failed to get pipeline cache data"); + return -1; + } + + const VkPhysicalDeviceProperties& device_properties = vkdev->info.physicalDeviceProperties(); + + PipelineCachePrivate::pipeline_cache_prefix_header header = {}; + header.vendor_id = device_properties.vendorID; + header.device_id = device_properties.deviceID; + header.driver_version = device_properties.driverVersion; + header.driver_abi = sizeof(void*); + header.version = PipelineCachePrivate::CURRENT_PIPELINE_CACHE_VERSION; + std::copy_n(device_properties.pipelineCacheUUID, VK_UUID_SIZE, header.uuid); + header.data_size = cache_data_size; + header.magic = PipelineCachePrivate::vk_pipeline_cache_header_magic(); + + header.data_hash_fnv1a = fnv1a_32(reinterpret_cast(buffer.data()), cache_data_size); // fnv1a hash + + std::string expected_path = path; + std::string temp_file_path = expected_path + ".tmp"; + FILE* file = fopen(temp_file_path.c_str(), "wb"); + if (!file) + { + NCNN_LOGE("Failed to open temporary file %s for writing pipeline cache", temp_file_path.c_str()); + return -1; + } + + size_t header_bytes_written = fwrite(&header, 1, sizeof(PipelineCachePrivate::pipeline_cache_prefix_header), file); + size_t data_bytes_written = fwrite(buffer.data(), 1, cache_data_size, file); + if (header_bytes_written != sizeof(PipelineCachePrivate::pipeline_cache_prefix_header) || data_bytes_written != cache_data_size) + { + NCNN_LOGE("Failed to write pipeline cache data to file %s", temp_file_path.c_str()); + fclose(file); + return -1; + } + + fclose(file); + + if (atomic_rename(temp_file_path.c_str(), expected_path.c_str()) != 0) + { + NCNN_LOGE("Failed to rename file %s to %s", temp_file_path.c_str(), path); + return -1; + } + + return 0; +} + int PipelineCache::create_shader_module(int shader_type_index, const Option& opt, uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z, VkShaderModule* _shader_module, ShaderInfo& si) const { + const uint32_t* spv_data = nullptr; + size_t spv_data_size = 0; + uint64_t key = shader_spv_key(shader_type_index, opt); + std::vector spirv; - int retc = compile_spirv_module(shader_type_index, opt, spirv); - if (retc != 0) + if (d->spv_code_cache.find(key) != d->spv_code_cache.end() || d->load_spv_code_cache_from_disk(*vkdev, key) == 0) { - NCNN_LOGE("compile_spirv_module failed %d", retc); - return -1; + const std::vector& spirv_cache = d->spv_code_cache[key]; + spv_data = spirv_cache.data(); + spv_data_size = spirv_cache.size() * 4; } + else + { + int retc = compile_spirv_module(shader_type_index, opt, spirv); + if (retc != 0) + { + NCNN_LOGE("compile_spirv_module failed"); + return -1; + } - const uint32_t* spv_data = spirv.data(); - size_t spv_data_size = spirv.size() * 4; + d->spv_code_cache[key] = spirv; + int ret = d->save_spv_code_cache_to_disk(key, *vkdev, spirv); + if (ret != 0) + { + NCNN_LOGE("save_spv_code_cache_to_disk failed"); + } + + spv_data = spirv.data(); + spv_data_size = spirv.size() * 4; + } int ret = resolve_shader_info(spv_data, spv_data_size, si); if (ret != 0) @@ -445,7 +958,13 @@ int PipelineCache::new_pipeline(VkShaderModule shader_module, const ShaderInfo& if (ret != 0) goto ERROR_PipelineCache; - ret = vkdev->create_pipeline(shader_module, pipeline_layout, specializations, subgroup_size, &pipeline); + if (!d->pipeline_cache) + { + ret = vkdev->create_empty_pipeline_cache(&d->pipeline_cache); + if (ret != 0) + NCNN_LOGE("vkdev->create_empty_pipeline_cache failed, don't use cache"); + } + ret = vkdev->create_pipeline(shader_module, pipeline_layout, specializations, subgroup_size, &d->pipeline_cache, &pipeline); if (ret != 0) goto ERROR_PipelineCache; @@ -491,6 +1010,76 @@ int PipelineCache::new_pipeline(VkShaderModule shader_module, const ShaderInfo& return -1; } +void PipelineCache::set_shader_cache_dir(const char* dir) +{ + MutexLockGuard lock(d->cache_lock); + d->shader_cache_dir = dir; +} + +static bool clear_directory(const std::string& path) +{ +#ifdef _WIN32 + WIN32_FIND_DATAA findData; + HANDLE hFind = FindFirstFileA((path + "\\*").c_str(), &findData); + if (hFind == INVALID_HANDLE_VALUE) return false; + + do { + std::string name = findData.cFileName; + if (name == "." || name == "..") continue; + + std::string fullPath = path + "\\" + name; + if (findData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) + { + clear_directory(fullPath); + RemoveDirectoryA(fullPath.c_str()); + } + else + { + DeleteFileA(fullPath.c_str()); + } + } while (FindNextFileA(hFind, &findData)); + + FindClose(hFind); + return true; +#else + DIR* dir = opendir(path.c_str()); + if (!dir) return false; + + struct dirent* entry; + while ((entry = readdir(dir)) != nullptr) + { + std::string name = entry->d_name; + if (name == "." || name == "..") continue; + + std::string fullPath = path + "/" + name; + struct stat st; + if (stat(fullPath.c_str(), &st) == 0) + { + if (S_ISDIR(st.st_mode)) + { + clear_directory(fullPath); + rmdir(fullPath.c_str()); + } + else + { + unlink(fullPath.c_str()); + } + } + } + closedir(dir); + return true; +#endif +} + +int PipelineCache::clear_shader_cache() const +{ + MutexLockGuard lock(d->cache_lock); + d->spv_code_cache.clear(); + + if (clear_directory(d->shader_cache_dir)) return 0; + return -1; +} + #endif // NCNN_VULKAN } // namespace ncnn diff --git a/src/pipelinecache.h b/src/pipelinecache.h index b93c0cfd8f0..f4a58efbd72 100644 --- a/src/pipelinecache.h +++ b/src/pipelinecache.h @@ -24,6 +24,10 @@ class NCNN_EXPORT PipelineCache void clear(); + void set_shader_cache_dir(const char* dir); + + int clear_shader_cache() const; + int get_pipeline(const uint32_t* spv_data, size_t spv_data_size, const std::vector& specializations, uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z, uint32_t subgroup_size, VkShaderModule* shader_module, @@ -42,6 +46,10 @@ class NCNN_EXPORT PipelineCache VkDescriptorUpdateTemplateKHR* descriptor_update_template, ShaderInfo& shader_info) const; + int load_pipeline_cache(const char* path) const; + + int save_pipeline_cache(const char* path) const; + protected: int create_shader_module(int shader_type_index, const Option& opt, uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z, diff --git a/src/simplevk.h b/src/simplevk.h index c2c7060dd1d..7507b15be42 100644 --- a/src/simplevk.h +++ b/src/simplevk.h @@ -1097,6 +1097,12 @@ typedef enum VkCommandBufferResetFlagBits } VkCommandBufferResetFlagBits; typedef VkFlags VkCommandBufferResetFlags; +typedef enum VkPipelineCacheCreateFlagBits { + VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT = 0x00000001, + VK_PIPELINE_CACHE_CREATE_INTERNALLY_SYNCHRONIZED_MERGE_BIT_KHR = 0x00000008, + VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT_EXT = VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT, +} VkPipelineCacheCreateFlagBits; + typedef struct VkApplicationInfo { VkStructureType sType; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 5a0940e88c6..86e6203e2a2 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -65,6 +65,7 @@ ncnn_add_test(paramdict) if(NCNN_VULKAN) ncnn_add_test(command) + ncnn_add_test(pipeline_cache) endif() if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") diff --git a/tests/test_pipeline_cache.cpp b/tests/test_pipeline_cache.cpp new file mode 100644 index 00000000000..ed0a7dcf014 --- /dev/null +++ b/tests/test_pipeline_cache.cpp @@ -0,0 +1,366 @@ +#include "benchmark.h" +#include "testutil.h" +#include "layer.h" +#include "mat.h" +#include "option.h" +#include "gpu.h" +#include "pipelinecache.h" + +#include +#include +#ifdef _WIN32 +#include +#else +#include +#endif + +void random_truncate_file(const char* filename, size_t new_size) +{ + FILE* fp = fopen(filename, "rb+"); + if (!fp) return; +#ifdef _WIN32 + int fd = _fileno(fp); + _chsize(fd, new_size); +#else + int fd = fileno(fp); + ftruncate(fd, new_size); +#endif + fclose(fp); +} + +void corrupt_file(const char* filename) +{ + int mode = RandomInt(0, 10000) % 3; + if (mode == 0) + { + if (remove(filename) != 0) + fprintf(stderr, "Failed to remove file %s\n", filename); + return; + } + if (mode == 1) + { + // empty file + FILE* f = fopen(filename, "wb"); + if (!f) return; + fclose(f); + return; + } + // truncate to random size between 1 and original file size + FILE* fp = fopen(filename, "rb"); + if (!fp) return; + fseek(fp, 0, SEEK_END); + long file_size = ftell(fp); + fclose(fp); + + size_t new_size = (size_t)(RandomInt(0, 10000) % file_size + 1); + random_truncate_file(filename, new_size); +} + +bool test_pipeline_creation(const ncnn::Option& opt, double* build_time = nullptr, int layer_type_index = 0) +{ + const ncnn::VulkanDevice* vkdev = ncnn::get_gpu_device(0); + ncnn::Pipeline pipeline(vkdev); + double start = ncnn::get_current_time(); + int ret = pipeline.create(0, opt, std::vector {1}); + double end = ncnn::get_current_time(); + if (build_time) *build_time = end - start; + if (ret != 0) return false; + return true; +} + +bool pipeline_cache_test_basic_creation() +{ + fprintf(stdout, "Start basic test\n"); + ncnn::create_gpu_instance(); + ncnn::VulkanDevice* vkdev = ncnn::get_gpu_device(0); + const int options[][6] = { + {0, 0, 0, 0, 0, 0}, + }; + + ncnn::Option opt{}; + opt.num_threads = 1; + opt.use_packing_layout = options[0][0]; + opt.use_fp16_packed = options[0][1]; + opt.use_fp16_storage = options[0][2]; + opt.use_fp16_arithmetic = options[0][3]; + opt.use_bf16_storage = options[0][4]; + opt.use_shader_pack8 = options[0][5]; + + double duration_1; + if (vkdev->get_pipeline_cache()->clear_shader_cache() != 0) + { + fprintf(stderr, "clear shader cache failed\n"); + ncnn::destroy_gpu_instance(); + return false; + } + if (!test_pipeline_creation(opt, &duration_1)) + { + fprintf(stderr, "pipeline creation without cache failed\n"); + ncnn::destroy_gpu_instance(); + return false; + } + fprintf(stdout, "pipeline cache test creation time (without cache): %.2f ms\n", duration_1); + if (vkdev->get_pipeline_cache()->save_pipeline_cache("vk_pipeline_cache") != 0) + { + fprintf(stderr, "save pipeline cache failed\n"); + ncnn::destroy_gpu_instance(); + return false; + } + + ncnn::destroy_gpu_instance(); + + ncnn::create_gpu_instance(); + + int ret = ncnn::get_gpu_device(0)->get_pipeline_cache()->load_pipeline_cache("vk_pipeline_cache"); + if (ret != 0) + { + fprintf(stderr, "load pipeline cache failed\n"); + return false; + } + double duration_2; + if (!test_pipeline_creation(opt, &duration_2)) + { + fprintf(stderr, "pipeline creation without cache failed\n"); + ncnn::destroy_gpu_instance(); + return false; + } + fprintf(stdout, "pipeline cache test creation time (with cache): %.2f ms\n", duration_2); + remove("vk_pipeline_cache"); + ncnn::destroy_gpu_instance(); + return true; +} + +bool pipeline_cache_test_corrupted_cache_file() +{ + fprintf(stdout, "Start file corruption test\n"); + ncnn::VulkanDevice* vkdev = ncnn::get_gpu_device(0); + // first create and save cache file + ncnn::create_gpu_instance(); + const int options[][6] = { + {0, 0, 0, 0, 0, 0}, + }; + + ncnn::Option opt{}; + opt.num_threads = 1; + opt.use_packing_layout = options[0][0]; + opt.use_fp16_packed = options[0][1]; + opt.use_fp16_storage = options[0][2]; + opt.use_fp16_arithmetic = options[0][3]; + opt.use_bf16_storage = options[0][4]; + opt.use_shader_pack8 = options[0][5]; + + if (vkdev->get_pipeline_cache()->clear_shader_cache() != 0) + { + fprintf(stderr, "clear shader cache failed\n"); + ncnn::destroy_gpu_instance(); + return false; + } + double duration_1; + if (!test_pipeline_creation(opt, &duration_1)) + { + fprintf(stderr, "pipeline creation without cache failed\n"); + ncnn::destroy_gpu_instance(); + return false; + } + + fprintf(stdout, "pipeline cache test creation time (without cache): %.2f ms\n", duration_1); + if (vkdev->get_pipeline_cache()->save_pipeline_cache("vk_pipeline_cache") != 0) + { + fprintf(stderr, "save pipeline cache failed\n"); + ncnn::destroy_gpu_instance(); + return false; + } + ncnn::destroy_gpu_instance(); + corrupt_file("vk_pipeline_cache"); + ncnn::create_gpu_instance(); + int ret = ncnn::get_gpu_device(0)->get_pipeline_cache()->load_pipeline_cache("vk_pipeline_cache"); + if (ret) + { + fprintf(stderr, "load cache after file corruption failed\n"); + ncnn::destroy_gpu_instance(); + return false; + } + double duration_2; + if (!test_pipeline_creation(opt, &duration_2)) + { + fprintf(stderr, "pipeline creation after cache corruption failed\n"); + ncnn::destroy_gpu_instance(); + return false; + } + fprintf(stdout, "pipeline cache test creation time (after cache corruption): %.2f ms\n", duration_2); + remove("vk_pipeline_cache"); + ncnn::destroy_gpu_instance(); + return true; +} + +bool pipeline_cache_test_multithread_creation() +{ + fprintf(stdout, "Start multi-thread test\n"); + + ncnn::create_gpu_instance(); + ncnn::VulkanDevice* vkdev = ncnn::get_gpu_device(0); + + ncnn::Option opt{}; + opt.num_threads = 1; + opt.use_packing_layout = 0; + opt.use_fp16_packed = 1; + opt.use_fp16_storage = 0; + opt.use_fp16_arithmetic = 0; + opt.use_bf16_storage = 1; + opt.use_shader_pack8 = 0; + + if (vkdev->get_pipeline_cache()->clear_shader_cache() != 0) + { + fprintf(stderr, "clear shader cache failed\n"); + ncnn::destroy_gpu_instance(); + return false; + } + double duration; + if (!test_pipeline_creation(opt, &duration)) + { + fprintf(stderr, "pipeline creation failed before multi-thread test\n"); + ncnn::destroy_gpu_instance(); + return false; + } + if (vkdev->get_pipeline_cache()->save_pipeline_cache("vk_pipeline_cache") != 0) + { + fprintf(stderr, "save pipeline cache failed\n"); + ncnn::destroy_gpu_instance(); + return false; + } + ncnn::destroy_gpu_instance(); + + ncnn::create_gpu_instance(); + vkdev = ncnn::get_gpu_device(0); + if (vkdev->get_pipeline_cache()->load_pipeline_cache("vk_pipeline_cache") != 0) + { + fprintf(stderr, "load pipeline cache failed\n"); + ncnn::destroy_gpu_instance(); + return false; + } + + const int thread_count = 8; + std::vector > futures; + for (int i = 0; i < thread_count; i++) + { + futures.emplace_back(std::async(std::launch::async, [&opt]() { + return test_pipeline_creation(opt, nullptr); + })); + } + + bool all_ok = true; + for (auto& fut : futures) + { + if (!fut.get()) + all_ok = false; + } + + remove("vk_pipeline_cache"); + ncnn::destroy_gpu_instance(); + + if (!all_ok) + { + fprintf(stderr, "multi-thread pipeline creation failed\n"); + return false; + } + + fprintf(stdout, "multi-thread pipeline creation passed\n"); + return true; +} + +bool pipeline_cache_test_multithread_save() +{ + fprintf(stdout, "Start multi-thread save test\n"); + + ncnn::create_gpu_instance(); + ncnn::VulkanDevice* vkdev = ncnn::get_gpu_device(0); + + ncnn::Option opt{}; + opt.num_threads = 1; + opt.use_packing_layout = 0; + opt.use_fp16_packed = 0; + opt.use_fp16_storage = 0; + opt.use_fp16_arithmetic = 0; + opt.use_bf16_storage = 0; + opt.use_shader_pack8 = 0; + + if (vkdev->get_pipeline_cache()->clear_shader_cache() != 0) + { + fprintf(stderr, "clear shader cache failed\n"); + ncnn::destroy_gpu_instance(); + return false; + } + + if (!test_pipeline_creation(opt, nullptr)) + { + fprintf(stderr, "pipeline creation failed before multi-thread save test\n"); + ncnn::destroy_gpu_instance(); + return false; + } + + const int thread_count = 8; + std::vector > futures; + for (int i = 0; i < thread_count; i++) + { + futures.emplace_back(std::async(std::launch::async, [vkdev]() { + return vkdev->get_pipeline_cache()->save_pipeline_cache("vk_pipeline_cache"); + })); + } + + bool all_ok = true; + for (auto& fut : futures) + { + if (fut.get() != 0) + all_ok = false; + } + + ncnn::destroy_gpu_instance(); + + if (!all_ok) + { + fprintf(stderr, "multi-thread save_pipeline_cache had errors\n"); + return false; + } + + ncnn::create_gpu_instance(); + vkdev = ncnn::get_gpu_device(0); + int ret = vkdev->get_pipeline_cache()->load_pipeline_cache("vk_pipeline_cache"); + remove("vk_pipeline_cache"); + ncnn::destroy_gpu_instance(); + + if (ret != 0) + { + fprintf(stderr, "cache file after multi-thread save is invalid\n"); + return false; + } + + fprintf(stdout, "multi-thread save_pipeline_cache passed\n"); + return true; +} + +int main() +{ + SRAND(7767517); + if (!pipeline_cache_test_basic_creation()) + { + fprintf(stderr, "pipeline cache basic test failed\n"); + return -1; + } + if (!pipeline_cache_test_corrupted_cache_file()) + { + fprintf(stderr, "pipeline cache corrupted file test failed\n"); + return -1; + } + if (!pipeline_cache_test_multithread_creation()) + { + fprintf(stderr, "pipeline cache multi-thread creation test failed\n"); + return -1; + } + if (!pipeline_cache_test_multithread_save()) + { + fprintf(stderr, "pipeline cache multi-thread save test failed\n"); + return -1; + } + fprintf(stdout, "All pipeline cache tests passed\n"); + return 0; +} \ No newline at end of file