Skip to content

Commit 2d840ea

Browse files
authored
Merge pull request #2596 from wangkuiyi/memory_cpu_allocator
Memory CPU allocator
2 parents 9a41f82 + 9490d24 commit 2d840ea

17 files changed

+521
-36
lines changed

CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ if(NOT CMAKE_CROSSCOMPILING)
2727
endif(NOT CMAKE_CROSSCOMPILING)
2828
find_package(Git REQUIRED)
2929
find_package(Threads REQUIRED)
30+
find_package(Boost QUIET)
3031

3132
include(simd)
3233

@@ -110,6 +111,7 @@ include_directories("${PROJ_ROOT}")
110111
include_directories("${PROJ_ROOT}/paddle/cuda/include")
111112
include_directories("${CMAKE_CURRENT_BINARY_DIR}/proto")
112113
include_directories("${CMAKE_CURRENT_BINARY_DIR}/go/pserver/cclient")
114+
include_directories(${Boost_INCLUDE_DIRS})
113115

114116
set(EXTERNAL_LIBS
115117
${GFLAGS_LIBRARIES}

paddle/CMakeLists.txt

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,8 @@ add_subdirectory(scripts)
1111
add_subdirectory(optimizer)
1212
add_subdirectory(strings)
1313

14-
# Do not build go directory until go cmake is working smoothly.
15-
# if(CMAKE_Go_COMPILER)
16-
# add_subdirectory(go)
17-
# endif()
18-
19-
find_package(Boost QUIET)
20-
2114
if(Boost_FOUND)
22-
include_directories(${Boost_INCLUDE_DIRS})
15+
add_subdirectory(memory)
2316
add_subdirectory(platform)
2417
add_subdirectory(framework)
2518
endif()

paddle/memory/.clang-format

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
Language: Cpp
3+
BasedOnStyle: Google
4+
Standard: Cpp11
5+
...

paddle/memory/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
add_subdirectory(detail)

paddle/memory/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ class BuddyAllocator {
9797
struct Block {
9898
size_t size;
9999
Block* left, right;
100+
size_t index; // allocator id
100101
};
101102
...
102103
};
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
if(${WITH_GPU})
2+
nv_library(system_allocator SRCS system_allocator.cc DEPS gflags)
3+
nv_test(system_allocator_test SRCS system_allocator_test.cc DEPS system_allocator gflags)
4+
else(${WITH_GPU})
5+
cc_library(system_allocator SRCS system_allocator.cc DEPS gflags)
6+
cc_test(system_allocator_test SRCS system_allocator_test.cc DEPS system_allocator gflags)
7+
endif(${WITH_GPU})
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#pragma once
16+
17+
#include "paddle/memory/detail/buddy_allocator.h"
18+
19+
namespace paddle {
20+
namespace memory {
21+
namespace detail {
22+
23+
BuddyAllocator::BuddyAllocator(size_t pool_size, size_t max_pools,
24+
SystemAllocator* system_allocator)
25+
: pool_size_(pool_size),
26+
max_pools_(max_pools),
27+
system_allocator_(system_allocator) {
28+
PADDLE_ASSERT(pool_size > 0);
29+
PADDLE_ASSERT(max_pools > 0);
30+
PADDLE_ASSERT(system_allocator != nullptr);
31+
}
32+
33+
} // namespace detail
34+
} // namespace memory
35+
} // namespace paddle
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#pragma once
16+
17+
#include "paddle/memory/detail/system_allocator.h"
18+
19+
#include <mutex>
20+
#include <vector>
21+
22+
namespace paddle {
23+
namespace memory {
24+
namespace detail {
25+
26+
class BuddyAllocator {
27+
public:
28+
BuddyAllocator(size_t pool_size, size_t max_pools,
29+
SystemAllocator* system_allocator);
30+
~BuddyAllocator();
31+
32+
void* Alloc(size_t size);
33+
void Free(void*);
34+
size_t Used();
35+
36+
private:
37+
struct Block {
38+
size_t size_;
39+
Block* left_; // left buddy
40+
Block* right_; // right buddy
41+
};
42+
43+
// Initially, there is only one pool. If a Alloc founds not enough
44+
// memory from that pool, and there has not been max_num_pools_,
45+
// create a new pool by calling system_allocator_.Alloc(pool_size_).
46+
std::vector<void*> pools_;
47+
48+
size_t pool_size_; // the size of each pool;
49+
size_t max_num_pools_; // the size of all pools;
50+
51+
SystemAllocator* system_allocator_;
52+
53+
std::mutex mutex_;
54+
55+
// Disable copy and assignment.
56+
BuddyAllocator(const BuddyAllocator&) = delete;
57+
BuddyAllocator& operator=(const BuddyAllocator&) = delete;
58+
};
59+
60+
BuddyAllocator<CPUAllocator>* GetCPUBuddyAllocator() {
61+
static BuddyAllocator<CPUAllocator>* a = nullptr;
62+
if (a == nullptr) {
63+
a = new BuddyAllocator<CPUAllocator>();
64+
}
65+
return a;
66+
}
67+
68+
#ifndef PADDLE_ONLY_CPU // The following code are for CUDA.
69+
70+
BuddyAllocator<GPUAllocator>* GetGPUBuddyAllocator(int gpu_id) {
71+
static BuddyAllocator<GPUAllocator>** as = NULL;
72+
if (as == NULL) {
73+
int gpu_num = platform::GetDeviceCount();
74+
as = new BuddyAllocator<GPUAllocator>*[gpu_num];
75+
for (int gpu = 0; gpu < gpu_num; gpu++) {
76+
as[gpu] = new BuddyAllocator<GPUAllocator>();
77+
}
78+
}
79+
return as[gpu_id];
80+
}
81+
82+
#endif // PADDLE_ONLY_CPU
83+
84+
} // namespace detail
85+
} // namespace memory
86+
} // namespace paddle
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#include "paddle/memory/detail/system_allocator.h"
16+
17+
#include <stdlib.h> // for malloc and free
18+
#include <sys/mman.h> // for mlock and munlock
19+
20+
#include "gflags/gflags.h"
21+
#include "paddle/platform/assert.h"
22+
#include "paddle/platform/cuda.h"
23+
24+
// If use_pinned_memory is true, CPUAllocator calls mlock, which
25+
// returns pinned and locked memory as staging areas for data exchange
26+
// between host and device. Allocates too much would reduce the amount
27+
// of memory available to the system for paging. So, by default, we
28+
// should set false to use_pinned_memory.
29+
DEFINE_bool(use_pinned_memory, false,
30+
"If set, allocate cpu/gpu pinned memory.");
31+
32+
namespace paddle {
33+
namespace memory {
34+
namespace detail {
35+
36+
void* CPUAllocator::Alloc(size_t size) {
37+
// According to http://www.cplusplus.com/reference/cstdlib/malloc/,
38+
// malloc might not return nullptr if size is zero, but the returned
39+
// pointer shall not be dereferenced -- so we make it nullptr.
40+
if (size <= 0) return nullptr;
41+
42+
void* p = malloc(size);
43+
if (p != nullptr && FLAGS_use_pinned_memory) {
44+
mlock(p, size);
45+
}
46+
return p;
47+
}
48+
49+
void CPUAllocator::Free(void* p, size_t size) {
50+
if (p != nullptr && FLAGS_use_pinned_memory) {
51+
munlock(p, size);
52+
}
53+
free(p);
54+
}
55+
56+
#ifndef PADDLE_ONLY_CPU
57+
58+
void* GPUAllocator::Alloc(size_t size) {
59+
// CUDA documentation doesn't explain if cudaMalloc returns nullptr
60+
// if size is 0. We just make sure it does.
61+
if (size <= 0) {
62+
return nullptr;
63+
}
64+
65+
void* p = 0;
66+
cudaError_t result =
67+
FLAGS_use_pinned_memory ? cudaMallocHost(&p, size) : cudaMalloc(&p, size);
68+
if (result != cudaSuccess) {
69+
cudaGetLastError(); // clear error if there is any.
70+
}
71+
return result == cudaSuccess ? p : nullptr;
72+
}
73+
74+
void GPUAllocator::Free(void* p, size_t size) {
75+
// Purposefully allow cudaErrorCudartUnloading, because
76+
// that is returned if you ever call cudaFree after the
77+
// driver has already shutdown. This happens only if the
78+
// process is terminating, in which case we don't care if
79+
// cudaFree succeeds.
80+
cudaError_t err = FLAGS_use_pinned_memory ? cudaFreeHost(p) : cudaFree(p);
81+
if (err != cudaErrorCudartUnloading) {
82+
platform::throw_on_error(err, "cudaFree{Host} failed");
83+
}
84+
}
85+
86+
#endif // PADDLE_ONLY_CPU
87+
88+
} // namespace detail
89+
} // namespace memory
90+
} // namespace paddle
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#pragma once
16+
17+
#include <stddef.h> // for size_t
18+
19+
namespace paddle {
20+
namespace memory {
21+
namespace detail {
22+
23+
// SystemAllocator is the parent class of CPUAllocator and
24+
// GPUAllocator. A BuddyAllocator object uses a SystemAllocator*
25+
// pointing to the underlying system allocator. An alternative to
26+
// this class hierarchy is to pass a system allocator class to
27+
// BuddyAllocator as a template parameter. This approach makes
28+
// BuddyAllocator a class template, and it's very complicated
29+
// algorithm would make the buddy_allocator.h messy.
30+
class SystemAllocator {
31+
public:
32+
virtual ~SystemAllocator() {}
33+
virtual void* Alloc(size_t size) = 0;
34+
virtual void Free(void* p, size_t size) = 0;
35+
};
36+
37+
class CPUAllocator : public SystemAllocator {
38+
public:
39+
virtual void* Alloc(size_t size);
40+
virtual void Free(void* p, size_t size);
41+
};
42+
43+
#ifndef PADDLE_ONLY_CPU
44+
class GPUAllocator : public SystemAllocator {
45+
public:
46+
virtual void* Alloc(size_t size);
47+
virtual void Free(void* p, size_t size);
48+
};
49+
#endif // PADDLE_ONLY_CPU
50+
51+
} // namespace detail
52+
} // namespace memory
53+
} // namespace paddle

0 commit comments

Comments
 (0)