Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
511 changes: 511 additions & 0 deletions src/allocator.cpp

Large diffs are not rendered by default.

26 changes: 26 additions & 0 deletions src/allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,7 @@ class NCNN_EXPORT VkAllocator
VkBuffer create_buffer(size_t size, VkBufferUsageFlags usage);
VkDeviceMemory allocate_memory(size_t size, uint32_t memory_type_index);
VkDeviceMemory allocate_dedicated_memory(size_t size, uint32_t memory_type_index, VkImage image, VkBuffer buffer);
VkDeviceMemory allocate_import_host_memory(size_t size, uint32_t memory_type_index, void* host_ptr);

VkImage create_image(int width, int height, int depth, VkFormat format, VkImageTiling tiling, VkImageUsageFlags usage);
VkImageView create_imageview(VkImage image, VkFormat format);
Expand Down Expand Up @@ -342,6 +343,31 @@ class NCNN_EXPORT VkWeightAllocator : public VkAllocator
VkWeightAllocatorPrivate* const d;
};

class VkHostAllocatorPrivate;
class NCNN_EXPORT VkHostAllocator : public VkAllocator
{
public:
explicit VkHostAllocator(const VulkanDevice* vkdev, size_t preferred_block_size = 8 * 1024 * 1024); // 8M
virtual ~VkHostAllocator();

public:
// release all blocks immediately
virtual void clear();

public:
virtual VkBufferMemory* fastMalloc(size_t size);
virtual void fastFree(VkBufferMemory* ptr);
virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack);
virtual void fastFree(VkImageMemory* ptr);

private:
VkHostAllocator(const VkHostAllocator&);
VkHostAllocator& operator=(const VkHostAllocator&);

private:
VkHostAllocatorPrivate* const d;
};

class VkStagingAllocatorPrivate;
class NCNN_EXPORT VkStagingAllocator : public VkAllocator
{
Expand Down
32 changes: 32 additions & 0 deletions src/gpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,7 @@ class GpuInfoPrivate
int support_VK_KHR_zero_initialize_workgroup_memory;
int support_VK_EXT_buffer_device_address;
int support_VK_EXT_descriptor_indexing;
int support_VK_EXT_external_memory_host;
int support_VK_EXT_memory_budget;
int support_VK_EXT_memory_priority;
int support_VK_EXT_queue_family_foreign;
Expand Down Expand Up @@ -390,6 +391,7 @@ class GpuInfoPrivate
VkPhysicalDeviceSubgroupProperties querySubgroupProperties;
VkPhysicalDeviceDriverPropertiesKHR queryDriverProperties;
VkPhysicalDeviceSubgroupSizeControlPropertiesEXT querySubgroupSizeControlProperties;
VkPhysicalDeviceExternalMemoryHostPropertiesEXT queryExternalMemoryHostProperties;
VkPhysicalDeviceCooperativeMatrix2PropertiesNV queryCooperativeMatrix2PropertiesNV;
VkPhysicalDeviceCooperativeVectorPropertiesNV queryCooperativeVectorPropertiesNV;

Expand Down Expand Up @@ -660,6 +662,7 @@ int GpuInfoPrivate::query_extensions()
support_VK_KHR_zero_initialize_workgroup_memory = 0;
support_VK_EXT_buffer_device_address = 0;
support_VK_EXT_descriptor_indexing = 0;
support_VK_EXT_external_memory_host = 0;
support_VK_EXT_memory_budget = 0;
support_VK_EXT_memory_priority = 0;
support_VK_EXT_queue_family_foreign = 0;
Expand Down Expand Up @@ -746,6 +749,8 @@ int GpuInfoPrivate::query_extensions()
support_VK_EXT_buffer_device_address = exp.specVersion;
else if (strcmp(exp.extensionName, "VK_EXT_descriptor_indexing") == 0)
support_VK_EXT_descriptor_indexing = exp.specVersion;
else if (strcmp(exp.extensionName, "VK_EXT_external_memory_host") == 0)
support_VK_EXT_external_memory_host = exp.specVersion;
else if (strcmp(exp.extensionName, "VK_EXT_memory_budget") == 0)
support_VK_EXT_memory_budget = exp.specVersion;
else if (strcmp(exp.extensionName, "VK_EXT_memory_priority") == 0)
Expand Down Expand Up @@ -1140,6 +1145,16 @@ void GpuInfoPrivate::query_extension_properties()
queryExtensionProperties = &querySubgroupSizeControlProperties;
}

// query external memory host
memset(&queryExternalMemoryHostProperties, 0, sizeof(queryExternalMemoryHostProperties));
queryExternalMemoryHostProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT;
queryExternalMemoryHostProperties.pNext = 0;
if (support_VK_EXT_external_memory_host)
{
queryExternalMemoryHostProperties.pNext = queryExtensionProperties;
queryExtensionProperties = &queryExternalMemoryHostProperties;
}

// query nv cooperative matrix2
memset(&queryCooperativeMatrix2PropertiesNV, 0, sizeof(queryCooperativeMatrix2PropertiesNV));
queryCooperativeMatrix2PropertiesNV.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_2_PROPERTIES_NV;
Expand Down Expand Up @@ -1920,6 +1935,11 @@ int GpuInfo::support_VK_EXT_descriptor_indexing() const
return d->support_VK_EXT_descriptor_indexing;
}

int GpuInfo::support_VK_EXT_external_memory_host() const
{
return d->support_VK_EXT_external_memory_host;
}

int GpuInfo::support_VK_EXT_memory_budget() const
{
return d->support_VK_EXT_memory_budget;
Expand Down Expand Up @@ -2127,6 +2147,11 @@ const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT& GpuInfo::querySubgroupSi
return d->querySubgroupSizeControlProperties;
}

const VkPhysicalDeviceExternalMemoryHostPropertiesEXT& GpuInfo::queryExternalMemoryHostProperties() const
{
return d->queryExternalMemoryHostProperties;
}

const std::vector<VkCooperativeMatrixPropertiesKHR>& GpuInfo::queryCooperativeMatrixSubProperties() const
{
return d->queryCooperativeMatrixSubProperties;
Expand Down Expand Up @@ -3500,6 +3525,8 @@ VulkanDevice::VulkanDevice(int device_index)
enabledExtensions.push_back("VK_EXT_buffer_device_address");
if (info.support_VK_EXT_descriptor_indexing())
enabledExtensions.push_back("VK_EXT_descriptor_indexing");
if (info.support_VK_EXT_external_memory_host())
enabledExtensions.push_back("VK_EXT_external_memory_host");
if (info.support_VK_EXT_memory_budget())
enabledExtensions.push_back("VK_EXT_memory_budget");
if (info.support_VK_EXT_memory_priority())
Expand Down Expand Up @@ -4545,6 +4572,11 @@ int VulkanDevice::init_device_extension()
vkGetBufferDeviceAddressEXT = (PFN_vkGetBufferDeviceAddressEXT)vkGetDeviceProcAddr(d->device, "vkGetBufferDeviceAddressEXT");
}

if (info.support_VK_EXT_external_memory_host())
{
vkGetMemoryHostPointerPropertiesEXT = (PFN_vkGetMemoryHostPointerPropertiesEXT)vkGetDeviceProcAddr(d->device, "vkGetMemoryHostPointerPropertiesEXT");
}

#if __ANDROID_API__ >= 26
if (info.support_VK_ANDROID_external_memory_android_hardware_buffer())
{
Expand Down
5 changes: 5 additions & 0 deletions src/gpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,7 @@ class NCNN_EXPORT GpuInfo
int support_VK_KHR_zero_initialize_workgroup_memory() const;
int support_VK_EXT_buffer_device_address() const;
int support_VK_EXT_descriptor_indexing() const;
int support_VK_EXT_external_memory_host() const;
int support_VK_EXT_memory_budget() const;
int support_VK_EXT_memory_priority() const;
int support_VK_EXT_queue_family_foreign() const;
Expand Down Expand Up @@ -385,6 +386,7 @@ class NCNN_EXPORT GpuInfo
const VkPhysicalDeviceShaderIntegerDotProductProperties& queryShaderIntegerDotProductProperties() const;
const VkPhysicalDeviceSubgroupProperties& querySubgroupProperties() const;
const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT& querySubgroupSizeControlProperties() const;
const VkPhysicalDeviceExternalMemoryHostPropertiesEXT& queryExternalMemoryHostProperties() const;

// extension sub properties
const std::vector<VkCooperativeMatrixPropertiesKHR>& queryCooperativeMatrixSubProperties() const;
Expand Down Expand Up @@ -511,6 +513,9 @@ class NCNN_EXPORT VulkanDevice
// VK_EXT_buffer_device_address
PFN_vkGetBufferDeviceAddressEXT vkGetBufferDeviceAddressEXT;

// VK_EXT_external_memory_host
PFN_vkGetMemoryHostPointerPropertiesEXT vkGetMemoryHostPointerPropertiesEXT;

#if __ANDROID_API__ >= 26
// VK_ANDROID_external_memory_android_hardware_buffer
PFN_vkGetAndroidHardwareBufferPropertiesANDROID vkGetAndroidHardwareBufferPropertiesANDROID;
Expand Down
9 changes: 8 additions & 1 deletion src/net.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,14 @@ int NetPrivate::upload_model()
// create gpu device allocator if null
if (!weight_vkallocator)
{
weight_vkallocator = new VkWeightAllocator(vkdev);
if (opt.use_weights_in_host_memory)
{
weight_vkallocator = new VkHostAllocator(vkdev);
}
else
{
weight_vkallocator = new VkWeightAllocator(vkdev);
}
}
if (!weight_staging_vkallocator)
{
Expand Down
2 changes: 1 addition & 1 deletion src/option.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ Option::Option()
use_tensor_storage = false;
use_reserved_1p = false;

use_reserved_2 = false;
use_weights_in_host_memory = false;

flush_denormals = 3;

Expand Down
2 changes: 1 addition & 1 deletion src/option.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ class NCNN_EXPORT Option
bool use_tensor_storage;

bool use_reserved_1p;
bool use_reserved_2;
bool use_weights_in_host_memory;

// enable DAZ(Denormals-Are-Zero) and FTZ(Flush-To-Zero)
// default value is 3
Expand Down
27 changes: 27 additions & 0 deletions src/vulkan_header_fix.h
Original file line number Diff line number Diff line change
Expand Up @@ -1671,4 +1671,31 @@ typedef struct VkPhysicalDeviceVulkanMemoryModelFeatures
typedef VkPhysicalDeviceVulkanMemoryModelFeatures VkPhysicalDeviceVulkanMemoryModelFeaturesKHR;
#endif // VK_KHR_vulkan_memory_model

#ifndef VK_EXT_external_memory_host
#define VK_EXT_external_memory_host 1
#define VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT (VkStructureType)1000178000
#define VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT (VkStructureType)1000178001
#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT (VkStructureType)1000178002
typedef struct VkImportMemoryHostPointerInfoEXT
{
VkStructureType sType;
const void* pNext;
VkExternalMemoryHandleTypeFlagBits handleType;
void* pHostPointer;
} VkImportMemoryHostPointerInfoEXT;
typedef struct VkMemoryHostPointerPropertiesEXT
{
VkStructureType sType;
void* pNext;
uint32_t memoryTypeBits;
} VkMemoryHostPointerPropertiesEXT;
typedef struct VkPhysicalDeviceExternalMemoryHostPropertiesEXT
{
VkStructureType sType;
void* pNext;
VkDeviceSize minImportedHostPointerAlignment;
} VkPhysicalDeviceExternalMemoryHostPropertiesEXT;
typedef VkResult(VKAPI_PTR* PFN_vkGetMemoryHostPointerPropertiesEXT)(VkDevice device, VkExternalMemoryHandleTypeFlagBits handleType, const void* pHostPointer, VkMemoryHostPointerPropertiesEXT* pMemoryHostPointerProperties);
#endif // VK_EXT_external_memory_host

#endif // NCNN_VULKAN_HEADER_FIX_H
Loading