00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054 #include "w.h"
00055 #include "mem_block.h"
00056 #include "atomic_ops.h"
00057 #include <cstdlib>
00058 #include <stdio.h>
00059 #include <algorithm>
00060 #ifdef __linux
00061 #include <malloc.h>
00062 #endif
00063
00064
00065 #undef assert
00066 void assert_failed(const char *desc, const char *f, int l) {
00067 fprintf(stdout, "Assertion failed: %s at line %d file %s ", desc, l,f);
00068 w_assert0(0);
00069 }
00070 #define assert(x) if (!(x)) assert_failed(#x, __FILE__, __LINE__);
00071
00072 #define TEMPLATE_ARGS chip_size, chip_count, block_size
00073
00074 #ifdef __GNUC__
00075 #if defined(__x86_64) || defined(i386) || defined(__i386__)
00076 #define NEED_POPC64 0
00077 #elif defined(__sparcv9)
00078 #define NEED_POPC64 0
00079 #else
00080 #define NEED_POPC64 1
00081 #endif
00082 #else // !defined(__GNUC__)
00083 #define NEED_POPC64 1
00084 #endif
00085
00086 namespace memory_block {
00087 #if 0
00088 }
00089 #endif
00090
00091 #if NEED_POPC64==1
00092
00093 typedef unsigned long long u64;
00094 static inline
00095 long popc64(u64 x) {
00096 u64 k1 = 0x5555555555555555ull;
00097 u64 k2 = 0x3333333333333333ull;
00098 u64 k4 = 0x0f0f0f0f0f0f0f0full;
00099 u64 kf = 0x0101010101010101ull;
00100 x = x - ((x >> 1) & k1);
00101 x = (x & k2) + ((x >> 2) & k2);
00102 x = (x + (x >> 4)) & k4 ;
00103 x = (x * kf) >> 56;
00104 return x;
00105 }
00106 #endif
00107
00108 size_t block_bits::_popc(bitmap bm) {
00109 #if NEED_POPC64==1
00110 #warning "using home-brew popc routine"
00111 return popc64(bm);
00112 #elif defined(__x86_64) || defined(i386) || defined(__i386__)
00113
00114 return __builtin_popcountll(bm);
00115 #elif defined(__sparcv9)
00116 #warning "Using gcc inline asm to access sparcv9 'popc' instruction"
00117 long rval;
00118 __asm__("popc %[in], %[out]" : [out] "=r"(rval) : [in] "r"(x));
00119 return rval;
00120 #else
00121 #error "NEED_POPC64: Platform not supported"
00122 #endif
00123 }
00124
00125 block_bits::block_bits(size_t chip_count)
00126 : _usable_chips(create_mask(chip_count))
00127 , _zombie_chips(0)
00128 {
00129 assert(chip_count <= 8*sizeof(bitmap));
00130 }
00131
00132 size_t block_bits::acquire_contiguous(size_t chip_count) {
00133 (void) chip_count;
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144 bitmap one_bit = _usable_chips &- _usable_chips;
00145 size_t index = _popc(one_bit-1);
00146 if(index < 8*sizeof(bitmap)) {
00147
00148 assert(index < chip_count);
00149 _usable_chips ^= one_bit;
00150 }
00151 else {
00152
00153 assert(index == 8*sizeof(bitmap));
00154 }
00155
00156 return index;
00157 }
00158
00159 void block_bits::release_contiguous(size_t index, size_t chip_count) {
00160
00161 (void) chip_count;
00162 assert(index < chip_count);
00163 bitmap to_free = bitmap(1) << index;
00164 assert(! (to_free & *usable_chips()));
00165 #ifdef I_WISH
00166 bitmap was_free = __sync_fetch_and_or(&_zombie_chips, to_free);
00167 #else
00168 membar_exit();
00169 bitmap volatile* ptr = &_zombie_chips;
00170 bitmap ov = *ptr;
00171 while(1) {
00172 bitmap nv = ov | to_free;
00173 bitmap cv = atomic_cas_64(ptr, ov, nv);
00174 if(cv == ov)
00175 break;
00176 ov = cv;
00177 }
00178 bitmap was_free = ov;
00179 #endif
00180 (void) was_free;
00181
00182 assert( ! (was_free & to_free));
00183 }
00184
00185 block_bits::bitmap block_bits::create_mask(size_t bits_set) {
00186
00187 return ~bitmap(0) >> (8*sizeof(bitmap) - bits_set);
00188 }
00189
00190 void block_bits::recycle() {
00191
00192
00193
00194
00195
00196
00197
00198
00199 bitmap newly_usable = *&_zombie_chips;
00200 _usable_chips |= newly_usable;
00201 #ifdef I_WISH
00202 __sync_xor_and_fetch(&_zombie_chips, newly_usable);
00203 #else
00204 membar_exit();
00205 bitmap volatile* ptr = &_zombie_chips;
00206 bitmap ov = *ptr;
00207 while(1) {
00208 bitmap nv = ov ^ newly_usable;
00209 bitmap cv = atomic_cas_64(ptr, ov, nv);
00210 if(cv == ov)
00211 break;
00212 ov = cv;
00213 }
00214 #endif
00215 }
00216
00217 void* block::acquire_chip(size_t chip_size, size_t chip_count, size_t ) {
00218 size_t index = _bits.acquire_contiguous(chip_count);
00219 return (index < chip_count)? _get(index, chip_size) : 0;
00220 }
00221
00222 void block::release_chip(void* ptr, size_t chip_size, size_t chip_count, size_t block_size)
00223 {
00224
00225
00226
00227
00228
00229
00230 union { void* v; size_t n; block* b; char* c; } u = {ptr}, v=u;
00231 u.n &= -block_size;
00232 size_t offset = v.c - u.b->_data;
00233 size_t idx = offset/chip_size;
00234 assert(u.b->_data + idx*chip_size == ptr);
00235 u.b->_bits.release_contiguous(idx, chip_count);
00236 }
00237
00238 char* block::_get(size_t index, size_t chip_size) {
00239 return _data + index*chip_size;
00240 }
00241
00242 block::block(size_t chip_size, size_t chip_count, size_t block_size)
00243 : _bits(chip_count)
00244 , _owner(0)
00245 , _next(0)
00246 {
00247
00248 char* end_of_block = _get(0, chip_size)-sizeof(this)+block_size;
00249 char* end_of_chips = _get(chip_count, chip_size);
00250 (void) end_of_block;
00251 (void) end_of_chips;
00252 assert(end_of_chips <= end_of_block);
00253
00254
00255
00256
00257
00258
00259 }
00260
00261 void* block_list::acquire(size_t chip_size, size_t chip_count, size_t block_size)
00262 {
00263 if(void* ptr = _tail->acquire_chip(TEMPLATE_ARGS)) {
00264 _hit_count++;
00265 return ptr;
00266 }
00267
00268
00269 return _slow_acquire(TEMPLATE_ARGS);
00270 }
00271
00272
00273 block_list::block_list(block_pool* pool, size_t chip_size, size_t chip_count, size_t block_size)
00274 : _fake_block(TEMPLATE_ARGS)
00275 , _tail(&_fake_block)
00276 , _pool(pool)
00277 , _hit_count(0)
00278 , _avg_hit_rate(0)
00279 {
00280
00281
00282
00283
00284
00285
00286
00287
00288
00289
00290 _fake_block._bits._usable_chips = 0;
00291 _fake_block._bits._zombie_chips = 0;
00292 }
00293
00294
00295 void* block_list::_slow_acquire(size_t chip_size,
00296 size_t chip_count, size_t block_size)
00297 {
00298 _change_blocks(TEMPLATE_ARGS);
00299 return acquire(TEMPLATE_ARGS);
00300 }
00301
00302 block* block_list::acquire_block(size_t block_size)
00303 {
00304 union { block* b; uintptr_t n; } u = {_pool->acquire_block(this)};
00305 (void) block_size;
00306 assert((u.n & -block_size) == u.n);
00307 return u.b;
00308
00309 }
00310 void block_list::_change_blocks(size_t chip_size,
00311 size_t chip_count, size_t block_size)
00312 {
00313 (void) chip_size;
00314
00315
00316 if(_tail == &_fake_block) {
00317 _tail = acquire_block(block_size);
00318 _tail->_next = _tail;
00319 return;
00320 }
00321
00322
00323
00324
00325
00326
00327
00328
00329
00330
00331
00332
00333
00334
00335 static double const decay_rate = 1./5;
00336
00337 size_t const max_available = chip_count - std::max((int)(.1*chip_count), 1);
00338
00339 size_t const min_allocated = (chip_count+1)/2;
00340
00341 _avg_hit_rate = _hit_count*(1-decay_rate) + _avg_hit_rate*decay_rate;
00342 if(_hit_count < min_allocated && _avg_hit_rate < min_allocated) {
00343
00344 block* new_block = acquire_block(block_size);
00345 new_block->_next = _tail->_next;
00346 _tail = _tail->_next = new_block;
00347 }
00348 else {
00349
00350 block* prev = 0;
00351 block* cur = _tail;
00352 block* next;
00353 while(1) {
00354 next = cur->_next;
00355 next->recycle();
00356 if(next->_bits.usable_count() <= max_available)
00357 break;
00358
00359
00360 if(prev) {
00361 assert(prev != cur);
00362 assert(cur->_bits.usable_count() > max_available);
00363 assert(next->_bits.usable_count() > max_available);
00364 prev->_next = next;
00365 _pool->release_block(cur);
00366 cur = prev;
00367 }
00368
00369
00370 if(next == _tail) break;
00371
00372 prev = cur;
00373 cur = next;
00374 }
00375
00376
00377 _tail = cur;
00378 }
00379
00380 _hit_count = 0;
00381 }
00382
00383 block_list::~block_list() {
00384
00385 if(_tail == &_fake_block) return;
00386
00387
00388 block* cur = _tail->_next;
00389 _tail->_next = 0;
00390
00391
00392 while( (cur=_pool->release_block(cur)) ) ;
00393 }
00394
00395
00396 }