Skip to content

Commit 39e6556

Browse files
authored
Merge pull request #447 from clEsperanto/fix-macos-leak
minimize clBuildProgram calls
2 parents 5c835a7 + e5f6b8e commit 39e6556

File tree

4 files changed

+164
-8
lines changed

4 files changed

+164
-8
lines changed

clic/include/device.hpp

Lines changed: 86 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,72 @@
44
#include "clic.hpp"
55

66
#include <iostream>
7+
#include <list>
78
#include <memory>
89
#include <sstream>
910
#include <unordered_map>
1011

1112
namespace cle
1213
{
1314

15+
struct Cache
16+
{
17+
static constexpr size_t MAX_PROGRAM_CACHE_SIZE = 64;
18+
19+
// Store program and its position in LRU list
20+
struct Entry
21+
{
22+
std::shared_ptr<void> program;
23+
std::list<std::string>::iterator lru_it;
24+
};
25+
26+
std::unordered_map<std::string, Entry> program_cache;
27+
std::list<std::string> program_lru;
28+
29+
Cache() { program_cache.reserve(MAX_PROGRAM_CACHE_SIZE); }
30+
31+
~Cache() = default;
32+
33+
auto
34+
cacheProgram(const std::string & key, const std::shared_ptr<void> & program) -> void
35+
{
36+
auto it = program_cache.find(key);
37+
if (it != program_cache.end())
38+
{
39+
// Move key to back (most recently used)
40+
program_lru.erase(it->second.lru_it);
41+
program_lru.push_back(key);
42+
it->second.lru_it = std::prev(program_lru.end());
43+
it->second.program = program;
44+
return;
45+
}
46+
if (program_cache.size() >= MAX_PROGRAM_CACHE_SIZE)
47+
{
48+
// Remove oldest
49+
auto oldest = program_lru.front();
50+
program_lru.pop_front();
51+
program_cache.erase(oldest);
52+
}
53+
program_lru.push_back(key);
54+
program_cache[key] = { program, std::prev(program_lru.end()) };
55+
}
56+
57+
auto
58+
getCachedProgram(const std::string & key) -> std::shared_ptr<void>
59+
{
60+
auto it = program_cache.find(key);
61+
if (it != program_cache.end())
62+
{
63+
// Move accessed key to back (most recently used)
64+
program_lru.erase(it->second.lru_it);
65+
program_lru.push_back(key);
66+
it->second.lru_it = std::prev(program_lru.end());
67+
return it->second.program;
68+
}
69+
return nullptr;
70+
}
71+
};
72+
1473
/**
1574
* @brief Device class
1675
* This class is used to manage devices
@@ -145,6 +204,12 @@ class Device
145204
[[nodiscard]] virtual auto
146205
getLocalMemorySize() const -> size_t = 0;
147206

207+
[[nodiscard]] virtual auto
208+
getProgramFromCache(const std::string & key) const -> std::shared_ptr<void> = 0;
209+
210+
virtual auto
211+
addProgramToCache(const std::string & key, std::shared_ptr<void> program) -> void = 0;
212+
148213
/**
149214
* @brief operator << for Device::Type
150215
*/
@@ -412,10 +477,18 @@ class OpenCLDevice : public Device
412477
[[nodiscard]] auto
413478
getLocalMemorySize() const -> size_t override;
414479

480+
481+
[[nodiscard]] auto
482+
getProgramFromCache(const std::string & key) const -> std::shared_ptr<void> override;
483+
484+
auto
485+
addProgramToCache(const std::string & key, std::shared_ptr<void> program) -> void override;
486+
415487
private:
416488
std::shared_ptr<Ressources> clRessources = nullptr;
417489
std::shared_ptr<Context> clContext = nullptr;
418490
std::shared_ptr<CommandQueue> clCommandQueue = nullptr;
491+
std::shared_ptr<Cache> cache = std::make_shared<Cache>();
419492
bool initialized = false;
420493
bool waitFinish = false;
421494
};
@@ -582,13 +655,20 @@ class CUDADevice : public Device
582655
[[nodiscard]] auto
583656
getLocalMemorySize() const -> size_t override;
584657

658+
[[nodiscard]] auto
659+
getProgramFromCache(const std::string & key) const -> std::shared_ptr<void> override;
660+
661+
auto
662+
addProgramToCache(const std::string & key, std::shared_ptr<void> program) -> void override;
663+
585664
private:
586-
int cudaDeviceIndex;
587-
CUdevice cudaDevice;
588-
CUcontext cudaContext;
589-
CUstream cudaStream;
590-
bool initialized = false;
591-
bool waitFinish = false;
665+
int cudaDeviceIndex;
666+
CUdevice cudaDevice;
667+
CUcontext cudaContext;
668+
CUstream cudaStream;
669+
shared_ptr<void> cache = std::make_shared<Cache>();
670+
bool initialized = false;
671+
bool waitFinish = false;
592672
};
593673
#endif // USE_CUDA
594674

clic/src/cudadevice.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,19 @@ CUDADevice::getInfoExtended() const -> std::string
243243
return getInfo();
244244
}
245245

246+
auto
247+
CUDADevice::getProgramFromCache(const std::string & key) const -> std::shared_ptr<void>
248+
{
249+
return cache->getCachedProgram(key);
250+
}
251+
252+
auto
253+
CUDADevice::addProgramToCache(const std::string & key, std::shared_ptr<void> program) -> void
254+
{
255+
cache->cacheProgram(key, program);
256+
}
257+
258+
246259
#endif // USE_CUDA
247260

248261
} // namespace cle

clic/src/openclbackend.cpp

Lines changed: 53 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "backend.hpp"
22
#include "cle_preamble_cl.h"
3+
#include <list>
34

45
#include <unordered_map>
56

@@ -1261,6 +1262,49 @@ CreateProgramFromSource(const Device::Pointer & device, const std::string & kern
12611262
}
12621263
#endif
12631264

1265+
1266+
// static constexpr size_t MAX_PROGRAM_CACHE_SIZE = 64;
1267+
// static std::unordered_map<std::string, std::shared_ptr<void>> program_cache;
1268+
// static std::list<std::string> program_lru;
1269+
1270+
// void
1271+
// cacheProgram(const std::string & key, std::shared_ptr<void> program)
1272+
// {
1273+
// std::cout << "caching program: " << key << std::endl;
1274+
// if (program_cache.find(key) != program_cache.end())
1275+
// {
1276+
// // Program already exists, update LRU
1277+
// program_lru.remove(key);
1278+
// program_lru.push_back(key);
1279+
// std::cout << "\tProgram already cached, updating LRU." << std::endl;
1280+
// return;
1281+
// }
1282+
// if (program_cache.size() >= MAX_PROGRAM_CACHE_SIZE)
1283+
// {
1284+
// // Remove oldest
1285+
// auto oldest = program_lru.front();
1286+
// program_lru.pop_front();
1287+
// program_cache.erase(oldest);
1288+
// std::cout << "\tCache full, removing oldest program" << std::endl;
1289+
// }
1290+
// program_cache[key] = program;
1291+
// program_lru.push_back(key);
1292+
// std::cout << program_cache.size() << " programs cached." << std::endl;
1293+
// return;
1294+
// }
1295+
1296+
// std::shared_ptr<void>
1297+
// getCachedProgram(const std::string & key)
1298+
// {
1299+
// std::cout << "fetching program from cache: " << key << std::endl;
1300+
// auto it = program_cache.find(key);
1301+
// if (it != program_cache.end())
1302+
// {
1303+
// return it->second;
1304+
// }
1305+
// return nullptr;
1306+
// }
1307+
12641308
auto
12651309
OpenCLBackend::buildKernel(const Device::Pointer & device,
12661310
const std::string & kernel_source,
@@ -1272,13 +1316,17 @@ OpenCLBackend::buildKernel(const Device::Pointer & device,
12721316
cl_int err;
12731317
auto opencl_device = std::dynamic_pointer_cast<const OpenCLDevice>(device);
12741318
const auto use_cache = is_cache_enabled();
1319+
// std::shared_ptr<void> program = nullptr;
12751320

12761321
std::hash<std::string> hasher;
12771322
const auto source_hash = std::to_string(hasher(kernel_source));
12781323
const auto device_hash = std::to_string(hasher(opencl_device->getInfo()));
12791324

1280-
std::shared_ptr<void> program = nullptr;
1281-
if (use_cache)
1325+
// fetch the internal cache to avoid rebuilding
1326+
const auto cache_key = device_hash + "_" + source_hash;
1327+
auto program = device->getProgramFromCache(cache_key);
1328+
1329+
if (program == nullptr && use_cache)
12821330
{
12831331
program = loadProgramFromCache(device, device_hash, source_hash);
12841332
}
@@ -1291,6 +1339,9 @@ OpenCLBackend::buildKernel(const Device::Pointer & device,
12911339
saveBinaryToCache(device_hash, source_hash, program, device);
12921340
}
12931341
}
1342+
1343+
device->addProgramToCache(cache_key, program);
1344+
12941345
auto ocl_kernel = clCreateKernel(reinterpret_cast<cl_program>(program.get()), kernel_name.c_str(), &err);
12951346
if (err != CL_SUCCESS)
12961347
{

clic/src/opencldevice.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -434,6 +434,18 @@ OpenCLDevice::getInfoExtended() const -> std::string
434434
return result.str();
435435
}
436436

437+
auto
438+
OpenCLDevice::getProgramFromCache(const std::string & key) const -> std::shared_ptr<void>
439+
{
440+
return cache->getCachedProgram(key);
441+
}
442+
443+
auto
444+
OpenCLDevice::addProgramToCache(const std::string & key, std::shared_ptr<void> program) -> void
445+
{
446+
cache->cacheProgram(key, program);
447+
}
448+
437449

438450
#endif // USE_OPENCL
439451

0 commit comments

Comments
 (0)