|
| 1 | +/* SPDX-License-Identifier: GPL-2.0 */ |
| 2 | + |
| 3 | +#ifndef _LINUX_OBJPOOL_H |
| 4 | +#define _LINUX_OBJPOOL_H |
| 5 | + |
| 6 | +#include <linux/types.h> |
| 7 | +#include <linux/refcount.h> |
| 8 | + |
| 9 | +/* |
| 10 | + * objpool: ring-array based lockless MPMC queue |
| 11 | + * |
| 12 | + |
| 13 | + * |
| 14 | + * objpool is a scalable implementation of high performance queue for |
| 15 | + * object allocation and reclamation, such as kretprobe instances. |
| 16 | + * |
| 17 | + * With leveraging percpu ring-array to mitigate hot spots of memory |
| 18 | + * contention, it delivers near-linear scalability for high parallel |
| 19 | + * scenarios. The objpool is best suited for the following cases: |
| 20 | + * 1) Memory allocation or reclamation are prohibited or too expensive |
| 21 | + * 2) Consumers are of different priorities, such as irqs and threads |
| 22 | + * |
| 23 | + * Limitations: |
| 24 | + * 1) Maximum objects (capacity) is fixed after objpool creation |
| 25 | + * 2) All pre-allocated objects are managed in percpu ring array, |
| 26 | + * which consumes more memory than linked lists |
| 27 | + */ |
| 28 | + |
| 29 | +/** |
| 30 | + * struct objpool_slot - percpu ring array of objpool |
| 31 | + * @head: head sequence of the local ring array (to retrieve at) |
| 32 | + * @tail: tail sequence of the local ring array (to append at) |
| 33 | + * @last: the last sequence number marked as ready for retrieve |
| 34 | + * @mask: bits mask for modulo capacity to compute array indexes |
| 35 | + * @entries: object entries on this slot |
| 36 | + * |
| 37 | + * Represents a cpu-local array-based ring buffer, its size is specialized |
| 38 | + * during initialization of object pool. The percpu objpool node is to be |
| 39 | + * allocated from local memory for NUMA system, and to be kept compact in |
| 40 | + * continuous memory: CPU assigned number of objects are stored just after |
| 41 | + * the body of objpool_node. |
| 42 | + * |
| 43 | + * Real size of the ring array is far too smaller than the value range of |
| 44 | + * head and tail, typed as uint32_t: [0, 2^32), so only lower bits (mask) |
| 45 | + * of head and tail are used as the actual position in the ring array. In |
| 46 | + * general the ring array is acting like a small sliding window, which is |
| 47 | + * always moving forward in the loop of [0, 2^32). |
| 48 | + */ |
| 49 | +struct objpool_slot { |
| 50 | + uint32_t head; |
| 51 | + uint32_t tail; |
| 52 | + uint32_t last; |
| 53 | + uint32_t mask; |
| 54 | + void *entries[]; |
| 55 | +} __packed; |
| 56 | + |
| 57 | +struct objpool_head; |
| 58 | + |
| 59 | +/* |
| 60 | + * caller-specified callback for object initial setup, it's only called |
| 61 | + * once for each object (just after the memory allocation of the object) |
| 62 | + */ |
| 63 | +typedef int (*objpool_init_obj_cb)(void *obj, void *context); |
| 64 | + |
| 65 | +/* caller-specified cleanup callback for objpool destruction */ |
| 66 | +typedef int (*objpool_fini_cb)(struct objpool_head *head, void *context); |
| 67 | + |
| 68 | +/** |
| 69 | + * struct objpool_head - object pooling metadata |
| 70 | + * @obj_size: object size, aligned to sizeof(void *) |
| 71 | + * @nr_objs: total objs (to be pre-allocated with objpool) |
| 72 | + * @nr_cpus: local copy of nr_cpu_ids |
| 73 | + * @capacity: max objs can be managed by one objpool_slot |
| 74 | + * @gfp: gfp flags for kmalloc & vmalloc |
| 75 | + * @ref: refcount of objpool |
| 76 | + * @flags: flags for objpool management |
| 77 | + * @cpu_slots: pointer to the array of objpool_slot |
| 78 | + * @release: resource cleanup callback |
| 79 | + * @context: caller-provided context |
| 80 | + */ |
| 81 | +struct objpool_head { |
| 82 | + int obj_size; |
| 83 | + int nr_objs; |
| 84 | + int nr_cpus; |
| 85 | + int capacity; |
| 86 | + gfp_t gfp; |
| 87 | + refcount_t ref; |
| 88 | + unsigned long flags; |
| 89 | + struct objpool_slot **cpu_slots; |
| 90 | + objpool_fini_cb release; |
| 91 | + void *context; |
| 92 | +}; |
| 93 | + |
| 94 | +#define OBJPOOL_NR_OBJECT_MAX (1UL << 24) /* maximum numbers of total objects */ |
| 95 | +#define OBJPOOL_OBJECT_SIZE_MAX (1UL << 16) /* maximum size of an object */ |
| 96 | + |
| 97 | +/** |
| 98 | + * objpool_init() - initialize objpool and pre-allocated objects |
| 99 | + * @pool: the object pool to be initialized, declared by caller |
| 100 | + * @nr_objs: total objects to be pre-allocated by this object pool |
| 101 | + * @object_size: size of an object (should be > 0) |
| 102 | + * @gfp: flags for memory allocation (via kmalloc or vmalloc) |
| 103 | + * @context: user context for object initialization callback |
| 104 | + * @objinit: object initialization callback for extra setup |
| 105 | + * @release: cleanup callback for extra cleanup task |
| 106 | + * |
| 107 | + * return value: 0 for success, otherwise error code |
| 108 | + * |
| 109 | + * All pre-allocated objects are to be zeroed after memory allocation. |
| 110 | + * Caller could do extra initialization in objinit callback. objinit() |
| 111 | + * will be called just after slot allocation and called only once for |
| 112 | + * each object. After that the objpool won't touch any content of the |
| 113 | + * objects. It's caller's duty to perform reinitialization after each |
| 114 | + * pop (object allocation) or do clearance before each push (object |
| 115 | + * reclamation). |
| 116 | + */ |
| 117 | +int objpool_init(struct objpool_head *pool, int nr_objs, int object_size, |
| 118 | + gfp_t gfp, void *context, objpool_init_obj_cb objinit, |
| 119 | + objpool_fini_cb release); |
| 120 | + |
| 121 | +/** |
| 122 | + * objpool_pop() - allocate an object from objpool |
| 123 | + * @pool: object pool |
| 124 | + * |
| 125 | + * return value: object ptr or NULL if failed |
| 126 | + */ |
| 127 | +void *objpool_pop(struct objpool_head *pool); |
| 128 | + |
| 129 | +/** |
| 130 | + * objpool_push() - reclaim the object and return back to objpool |
| 131 | + * @obj: object ptr to be pushed to objpool |
| 132 | + * @pool: object pool |
| 133 | + * |
| 134 | + * return: 0 or error code (it fails only when user tries to push |
| 135 | + * the same object multiple times or wrong "objects" into objpool) |
| 136 | + */ |
| 137 | +int objpool_push(void *obj, struct objpool_head *pool); |
| 138 | + |
| 139 | +/** |
| 140 | + * objpool_drop() - discard the object and deref objpool |
| 141 | + * @obj: object ptr to be discarded |
| 142 | + * @pool: object pool |
| 143 | + * |
| 144 | + * return: 0 if objpool was released; -EAGAIN if there are still |
| 145 | + * outstanding objects |
| 146 | + * |
| 147 | + * objpool_drop is normally for the release of outstanding objects |
| 148 | + * after objpool cleanup (objpool_fini). Thinking of this example: |
| 149 | + * kretprobe is unregistered and objpool_fini() is called to release |
| 150 | + * all remained objects, but there are still objects being used by |
| 151 | + * unfinished kretprobes (like blockable function: sys_accept). So |
| 152 | + * only when the last outstanding object is dropped could the whole |
| 153 | + * objpool be released along with the call of objpool_drop() |
| 154 | + */ |
| 155 | +int objpool_drop(void *obj, struct objpool_head *pool); |
| 156 | + |
| 157 | +/** |
| 158 | + * objpool_free() - release objpool forcely (all objects to be freed) |
| 159 | + * @pool: object pool to be released |
| 160 | + */ |
| 161 | +void objpool_free(struct objpool_head *pool); |
| 162 | + |
| 163 | +/** |
| 164 | + * objpool_fini() - deref object pool (also releasing unused objects) |
| 165 | + * @pool: object pool to be dereferenced |
| 166 | + * |
| 167 | + * objpool_fini() will try to release all remained free objects and |
| 168 | + * then drop an extra reference of the objpool. If all objects are |
| 169 | + * already returned to objpool (so called synchronous use cases), |
| 170 | + * the objpool itself will be freed together. But if there are still |
| 171 | + * outstanding objects (so called asynchronous use cases, such like |
| 172 | + * blockable kretprobe), the objpool won't be released until all |
| 173 | + * the outstanding objects are dropped, but the caller must assure |
| 174 | + * there are no concurrent objpool_push() on the fly. Normally RCU |
| 175 | + * is being required to make sure all ongoing objpool_push() must |
| 176 | + * be finished before calling objpool_fini(), so does test_objpool, |
| 177 | + * kretprobe or rethook |
| 178 | + */ |
| 179 | +void objpool_fini(struct objpool_head *pool); |
| 180 | + |
| 181 | +#endif /* _LINUX_OBJPOOL_H */ |
0 commit comments