gcc/libstdc++-v3/include/ext/mt_allocator.h

865 lines
26 KiB
C
Raw Normal View History

// MT-optimized allocator -*- C++ -*-
// Copyright (C) 2003 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 2, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License along
// with this library; see the file COPYING. If not, write to the Free
// Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307,
// USA.
// As a special exception, you may use this file as part of a free software
// library without restriction. Specifically, if other files instantiate
// templates or use macros or inline functions from this file, or you compile
// this file and link it with other files to produce an executable, this
// file does not by itself cause the resulting executable to be covered by
// the GNU General Public License. This exception does not however
// invalidate any other reasons why the executable file might be covered by
// the GNU General Public License.
/** @file ext/mt_allocator.h
* This file is a GNU extension to the Standard C++ Library.
* You should only include this header if you are using GCC 3 or later.
*/
#ifndef _MT_ALLOCATOR_H
#define _MT_ALLOCATOR_H 1
#include <cstdlib>
#include <bits/functexcept.h>
#include <bits/stl_threads.h>
#include <bits/atomicity.h>
#include <bits/allocator_traits.h>
namespace __gnu_cxx
{
/**
* This is a fixed size (power of 2) allocator which - when compiled
* with thread support - will maintain one freelist per size per thread
* plus a "global" one. Steps are taken to limit the per thread freelist
* sizes (by returning excess back to "global").
*
* Usage examples:
* @code
* vector<int, __gnu_cxx::__mt_alloc<0> > v1;
*
* typedef std::__allocator<char, __gnu_cxx::__mt_alloc<0> > string_alloc;
* std::basic_string<char, std::char_traits<char>, string_alloc> s1;
* @endcode
*/
template<int __inst>
class __mt_alloc
{
private:
/*
* We need to create the initial lists and set up some variables
* before we can answer to the first request for memory.
* The initialization of these variables is done at file scope
* below class declaration.
*/
#ifdef __GTHREADS
static __gthread_once_t _S_once_mt;
#endif
static bool _S_initialized;
/*
* Using short int as type for the binmap implies we are never caching
* blocks larger than 65535 with this allocator
*/
typedef unsigned short int binmap_type;
static binmap_type* _S_binmap;
static void _S_init();
/*
* Variables used to "tune" the behavior of the allocator, assigned
* and explained in detail below.
*/
static size_t _S_max_bytes;
static size_t _S_chunk_size;
static size_t _S_max_threads;
static size_t _S_no_of_bins;
static size_t _S_freelist_headroom;
/*
* Each requesting thread is assigned an id ranging from 1 to
* _S_max_threads. Thread id 0 is used as a global memory pool.
* In order to get constant performance on the thread assignment
* routine, we keep a list of free ids. When a thread first requests
* memory we remove the first record in this list and stores the address
* in a __gthread_key. When initializing the __gthread_key
* we specify a destructor. When this destructor (i.e. the thread dies)
* is called, we return the thread id to the back of this list.
*/
#ifdef __GTHREADS
struct thread_record
{
/*
* Points to next free thread id record. NULL if last record in list.
*/
thread_record* next;
/*
* Thread id ranging from 1 to _S_max_threads.
*/
size_t id;
};
static thread_record* _S_thread_freelist_first;
static thread_record* _S_thread_freelist_last;
static __gthread_mutex_t _S_thread_freelist_mutex;
static void _S_thread_key_destr(void* freelist_pos);
static __gthread_key_t _S_thread_key;
static size_t _S_get_thread_id();
#endif
struct block_record
{
/*
* Points to the next block_record for its thread_id.
*/
block_record* next;
/*
* The thread id of the thread which has requested this block.
* All blocks are initially "owned" by global pool thread id 0.
*/
size_t thread_id;
};
struct bin_record
{
/*
* An "array" of pointers to the first/last free block for each
* thread id. Memory to these "arrays" is allocated in _S_init()
* for _S_max_threads + global pool 0.
*/
block_record** first;
block_record** last;
/*
* An "array" of counters used to keep track of the amount of blocks
* that are on the freelist/used for each thread id.
* Memory to these "arrays" is allocated in _S_init()
* for _S_max_threads + global pool 0.
*/
size_t* free;
size_t* used;
/*
* Each bin has its own mutex which is used to ensure data integrity
* while changing "ownership" on a block.
* The mutex is initialized in _S_init().
*/
#ifdef __GTHREADS
__gthread_mutex_t* mutex;
#endif
};
/*
* An "array" of bin_records each of which represents a specific
* power of 2 size. Memory to this "array" is allocated in _S_init().
*/
static bin_record* _S_bin;
public:
static void*
allocate(size_t __n)
{
/*
* Requests larger than _S_max_bytes are handled by
* malloc/free directly
*/
if (__n > _S_max_bytes)
{
void* __ret = malloc(__n);
if (!__ret)
__throw_bad_alloc();
return __ret;
}
/*
* Although the test in __gthread_once() would suffice, we
* wrap test of the once condition in our own unlocked
* check. This saves one function call to pthread_once()
* (which itself only tests for the once value unlocked anyway
* and immediately returns if set)
*/
if (!_S_initialized)
{
#ifdef __GTHREADS
if (__gthread_active_p())
__gthread_once(&_S_once_mt, _S_init);
else
#endif
{
_S_max_threads = 0;
_S_init();
}
}
/*
* Round up to power of 2 and figure out which bin to use
*/
size_t bin = _S_binmap[__n];
#ifdef __GTHREADS
size_t thread_id = _S_get_thread_id();
#else
size_t thread_id = 0;
#endif
block_record* block;
/*
* Find out if we have blocks on our freelist.
* If so, go ahead and use them directly without
* having to lock anything.
*/
if (_S_bin[bin].first[thread_id] == NULL)
{
/*
* Are we using threads?
* - Yes, lock and check if there are free blocks on the global
* list (and if not add new ones), get the first one
* and change owner.
* - No, all operations are made directly to global pool 0
* no need to lock or change ownership but check for free
* blocks on global list (and if not add new ones) and
* get the first one.
*/
#ifdef __GTHREADS
if (__gthread_active_p())
{
__gthread_mutex_lock(_S_bin[bin].mutex);
if (_S_bin[bin].first[0] == NULL)
{
_S_bin[bin].first[0] =
(block_record*)malloc(_S_chunk_size);
if (!_S_bin[bin].first[0])
{
__gthread_mutex_unlock(_S_bin[bin].mutex);
__throw_bad_alloc();
}
size_t bin_t = 1 << bin;
size_t block_count =
_S_chunk_size /(bin_t + sizeof(block_record));
_S_bin[bin].free[0] = block_count;
block_count--;
block = _S_bin[bin].first[0];
while (block_count > 0)
{
block->next = (block_record*)((char*)block +
(bin_t + sizeof(block_record)));
block = block->next;
block_count--;
}
block->next = NULL;
_S_bin[bin].last[0] = block;
}
block = _S_bin[bin].first[0];
/*
* Remove from list and count down the available counter on
* global pool 0.
*/
_S_bin[bin].first[0] = _S_bin[bin].first[0]->next;
_S_bin[bin].free[0]--;
__gthread_mutex_unlock(_S_bin[bin].mutex);
/*
* Now that we have removed the block from the global
* freelist we can change owner and update the used
* counter for this thread without locking.
*/
block->thread_id = thread_id;
_S_bin[bin].used[thread_id]++;
}
else
#endif
{
_S_bin[bin].first[0] = (block_record*)malloc(_S_chunk_size);
if (!_S_bin[bin].first[0])
__throw_bad_alloc();
size_t bin_t = 1 << bin;
size_t block_count =
_S_chunk_size / (bin_t + sizeof(block_record));
_S_bin[bin].free[0] = block_count;
block_count--;
block = _S_bin[bin].first[0];
while (block_count > 0)
{
block->next = (block_record*)((char*)block +
(bin_t + sizeof(block_record)));
block = block->next;
block_count--;
}
block->next = NULL;
_S_bin[bin].last[0] = block;
block = _S_bin[bin].first[0];
/*
* Remove from list and count down the available counter on
* global pool 0 and increase it's used counter.
*/
_S_bin[bin].first[0] = _S_bin[bin].first[0]->next;
_S_bin[bin].free[0]--;
_S_bin[bin].used[0]++;
}
}
else
{
/*
* "Default" operation - we have blocks on our own freelist
* grab the first record and update the counters.
*/
block = _S_bin[bin].first[thread_id];
_S_bin[bin].first[thread_id] = _S_bin[bin].first[thread_id]->next;
_S_bin[bin].free[thread_id]--;
_S_bin[bin].used[thread_id]++;
}
return (void*)((char*)block + sizeof(block_record));
}
static void
deallocate(void* __p, size_t __n)
{
/*
* Requests larger than _S_max_bytes are handled by
* malloc/free directly
*/
if (__n > _S_max_bytes)
{
free(__p);
return;
}
/*
* Round up to power of 2 and figure out which bin to use
*/
size_t bin = _S_binmap[__n];
#ifdef __GTHREADS
size_t thread_id = _S_get_thread_id();
#else
size_t thread_id = 0;
#endif
block_record* block = (block_record*)((char*)__p
- sizeof(block_record));
/*
* This block will always be at the back of a list and thus
* we set its next pointer to NULL.
*/
block->next = NULL;
#ifdef __GTHREADS
if (__gthread_active_p())
{
/*
* Calculate the number of records to remove from our freelist
*/
int remove = _S_bin[bin].free[thread_id] -
(_S_bin[bin].used[thread_id] / _S_freelist_headroom);
/*
* The calculation above will almost always tell us to
* remove one or two records at a time, but this creates
* too much contention when locking and therefore we
* wait until the number of records is "high enough".
*/
if (remove > (int)(100 * (_S_no_of_bins - bin)) &&
remove > (int)(_S_bin[bin].free[thread_id] /
_S_freelist_headroom))
{
__gthread_mutex_lock(_S_bin[bin].mutex);
while (remove > 0)
{
if (_S_bin[bin].first[0] == NULL)
_S_bin[bin].first[0] = _S_bin[bin].first[thread_id];
else
_S_bin[bin].last[0]->next = _S_bin[bin].first[thread_id];
_S_bin[bin].last[0] = _S_bin[bin].first[thread_id];
_S_bin[bin].first[thread_id] =
_S_bin[bin].first[thread_id]->next;
_S_bin[bin].free[0]++;
_S_bin[bin].free[thread_id]--;
remove--;
}
_S_bin[bin].last[0]->next = NULL;
__gthread_mutex_unlock(_S_bin[bin].mutex);
}
/*
* Did we allocate this block?
* - Yes, return it to our freelist
* - No, return it to global pool
*/
if (thread_id == block->thread_id)
{
if (_S_bin[bin].first[thread_id] == NULL)
_S_bin[bin].first[thread_id] = block;
else
_S_bin[bin].last[thread_id]->next = block;
_S_bin[bin].last[thread_id] = block;
_S_bin[bin].free[thread_id]++;
_S_bin[bin].used[thread_id]--;
}
else
{
__gthread_mutex_lock(_S_bin[bin].mutex);
if (_S_bin[bin].first[0] == NULL)
_S_bin[bin].first[0] = block;
else
_S_bin[bin].last[0]->next = block;
_S_bin[bin].last[0] = block;
_S_bin[bin].free[0]++;
_S_bin[bin].used[block->thread_id]--;
__gthread_mutex_unlock(_S_bin[bin].mutex);
}
}
else
#endif
{
/*
* Single threaded application - return to global pool
*/
if (_S_bin[bin].first[0] == NULL)
_S_bin[bin].first[0] = block;
else
_S_bin[bin].last[0]->next = block;
_S_bin[bin].last[0] = block;
_S_bin[bin].free[0]++;
_S_bin[bin].used[0]--;
}
}
};
template<int __inst>
void
__mt_alloc<__inst>::
_S_init()
{
/*
* Calculate the number of bins required based on _S_max_bytes,
* _S_no_of_bins is initialized to 1 below.
*/
{
size_t bin_t = 1;
while (_S_max_bytes > bin_t)
{
bin_t = bin_t << 1;
_S_no_of_bins++;
}
}
/*
* Setup the bin map for quick lookup of the relevant bin
*/
_S_binmap = (binmap_type*)
malloc ((_S_max_bytes + 1) * sizeof(binmap_type));
if (!_S_binmap)
__throw_bad_alloc();
binmap_type* bp_t = _S_binmap;
binmap_type bin_max_t = 1;
binmap_type bin_t = 0;
for (binmap_type ct = 0; ct <= _S_max_bytes; ct++)
{
if (ct > bin_max_t)
{
bin_max_t <<= 1;
bin_t++;
}
*bp_t++ = bin_t;
}
/*
* If __gthread_active_p() create and initialize the list of
* free thread ids. Single threaded applications use thread id 0
* directly and have no need for this.
*/
#ifdef __GTHREADS
if (__gthread_active_p())
{
_S_thread_freelist_first =
(thread_record*)malloc(sizeof(thread_record) * _S_max_threads);
if (!_S_thread_freelist_first)
__throw_bad_alloc();
/*
* NOTE! The first assignable thread id is 1 since the global
* pool uses id 0
*/
size_t i;
for (i = 1; i < _S_max_threads; i++)
{
_S_thread_freelist_first[i - 1].next =
&_S_thread_freelist_first[i];
_S_thread_freelist_first[i - 1].id = i;
}
/*
* Set last record and pointer to this
*/
_S_thread_freelist_first[i - 1].next = NULL;
_S_thread_freelist_first[i - 1].id = i;
_S_thread_freelist_last = &_S_thread_freelist_first[i - 1];
/*
* Initialize per thread key to hold pointer to
* _S_thread_freelist NOTE! Here's an ugly workaround - if
* _S_thread_key_destr is not explicitly called at least
* once it won't be linked into the application. This is the
* behavior of template methods and __gthread_key_create()
* takes only a pointer to the function and does not cause
* the compiler to create an instance.
*/
_S_thread_key_destr(NULL);
__gthread_key_create(&_S_thread_key, _S_thread_key_destr);
}
#endif
/*
* Initialize _S_bin and its members
*/
_S_bin = (bin_record*)malloc(sizeof(bin_record) * _S_no_of_bins);
if (!_S_bin)
__throw_bad_alloc();
for (size_t bin = 0; bin < _S_no_of_bins; bin++)
{
_S_bin[bin].first = (block_record**)
malloc(sizeof(block_record*) * (_S_max_threads + 1));
if (!_S_bin[bin].first)
__throw_bad_alloc();
_S_bin[bin].last = (block_record**)
malloc(sizeof(block_record*) * (_S_max_threads + 1));
if (!_S_bin[bin].last)
__throw_bad_alloc();
_S_bin[bin].free = (size_t*)
malloc(sizeof(size_t) * (_S_max_threads + 1));
if (!_S_bin[bin].free)
__throw_bad_alloc();
_S_bin[bin].used = (size_t*)
malloc(sizeof(size_t) * (_S_max_threads + 1));
if (!_S_bin[bin].used)
__throw_bad_alloc();
/*
* Ugly workaround of what at the time of writing seems to be
* a parser problem - see PR c++/9779 for more info.
*/
#ifdef __GTHREADS
size_t s = sizeof(__gthread_mutex_t);
_S_bin[bin].mutex = (__gthread_mutex_t*)malloc(s);
if (!_S_bin[bin].mutex)
__throw_bad_alloc();
#ifdef __GTHREAD_MUTEX_INIT
{
// Do not copy a POSIX/gthr mutex once in use.
__gthread_mutex_t __tmp = __GTHREAD_MUTEX_INIT;
*_S_bin[bin].mutex = __tmp;
}
#else
{ __GTHREAD_MUTEX_INIT_FUNCTION (_S_bin[bin].mutex); }
#endif
#endif
for (size_t thread = 0; thread <= _S_max_threads; thread++)
{
_S_bin[bin].first[thread] = NULL;
_S_bin[bin].last[thread] = NULL;
_S_bin[bin].free[thread] = 0;
_S_bin[bin].used[thread] = 0;
}
}
_S_initialized = true;
}
#ifdef __GTHREADS
template<int __inst>
void
__mt_alloc<__inst>::
_S_thread_key_destr(void* freelist_pos)
{
/*
* This is due to the ugly workaround mentioned in _S_init()
*/
if (freelist_pos == NULL)
return;
/*
* If the thread - when it dies - still have records on its
* freelist we return them to the global pool here.
*/
for (size_t bin = 0; bin < _S_no_of_bins; bin++)
{
block_record* block =
_S_bin[bin].first[((thread_record*)freelist_pos)->id];
if (block != NULL)
{
__gthread_mutex_lock(_S_bin[bin].mutex);
while (block != NULL)
{
if (_S_bin[bin].first[0] == NULL)
_S_bin[bin].first[0] = block;
else
_S_bin[bin].last[0]->next = block;
_S_bin[bin].last[0] = block;
block = block->next;
_S_bin[bin].free[0]++;
}
_S_bin[bin].last[0]->next = NULL;
__gthread_mutex_unlock(_S_bin[bin].mutex);
}
}
/*
* Return this thread id record to thread_freelist
*/
__gthread_mutex_lock(&_S_thread_freelist_mutex);
_S_thread_freelist_last->next = (thread_record*)freelist_pos;
_S_thread_freelist_last = (thread_record*)freelist_pos;
_S_thread_freelist_last->next = NULL;
__gthread_mutex_unlock(&_S_thread_freelist_mutex);
}
template<int __inst>
size_t
__mt_alloc<__inst>::
_S_get_thread_id()
{
/*
* If we have thread support and it's active we check the thread
* key value and return it's id or if it's not set we take the
* first record from _S_thread_freelist and sets the key and
* returns it's id.
*/
if (__gthread_active_p())
{
thread_record* freelist_pos;
if ((freelist_pos =
(thread_record*)__gthread_getspecific(_S_thread_key)) == NULL)
{
__gthread_mutex_lock(&_S_thread_freelist_mutex);
/*
* Since _S_max_threads must be larger than the
* theoretical max number of threads of the OS the list
* can never be empty.
*/
freelist_pos = _S_thread_freelist_first;
_S_thread_freelist_first = _S_thread_freelist_first->next;
__gthread_mutex_unlock(&_S_thread_freelist_mutex);
__gthread_setspecific(_S_thread_key, (void*)freelist_pos);
/*
* Since thread_ids may/will be reused (espcially in
* producer/consumer applications) we make sure that the
* list pointers and free counter is reset BUT as the
* "old" thread may still be owner of some memory (which
* is referred to by other threads and thus not freed)
* we don't reset the used counter.
*/
for (size_t bin = 0; bin < _S_no_of_bins; bin++)
{
_S_bin[bin].first[freelist_pos->id] = NULL;
_S_bin[bin].last[freelist_pos->id] = NULL;
_S_bin[bin].free[freelist_pos->id] = 0;
}
}
return freelist_pos->id;
}
/*
* Otherwise (no thread support or inactive) all requests are
* served from the global pool 0.
*/
return 0;
}
template<int __inst> __gthread_once_t
__mt_alloc<__inst>::_S_once_mt = __GTHREAD_ONCE_INIT;
#endif
template<int __inst> bool
__mt_alloc<__inst>::_S_initialized = false;
template<int __inst> typename __mt_alloc<__inst>::binmap_type*
__mt_alloc<__inst>::_S_binmap = NULL;
/*
* Allocation requests (after round-up to power of 2) below this
* value will be handled by the allocator. A raw malloc/free() call
* will be used for requests larger than this value.
*/
template<int __inst> size_t
__mt_alloc<__inst>::_S_max_bytes = 128;
/*
* In order to avoid fragmenting and minimize the number of malloc()
* calls we always request new memory using this value. Based on
* previous discussions on the libstdc++ mailing list we have
* choosen the value below. See
* http://gcc.gnu.org/ml/libstdc++/2001-07/msg00077.html
*/
template<int __inst> size_t
__mt_alloc<__inst>::_S_chunk_size = 4096 - 4 * sizeof(void*);
/*
* The maximum number of supported threads. Our Linux 2.4.18 reports
* 4070 in /proc/sys/kernel/threads-max
*/
template<int __inst> size_t
__mt_alloc<__inst>::_S_max_threads = 4096;
/*
* Actual value calculated in _S_init()
*/
template<int __inst> size_t
__mt_alloc<__inst>::_S_no_of_bins = 1;
/*
* Each time a deallocation occurs in a threaded application we make
* sure that there are no more than _S_freelist_headroom % of used
* memory on the freelist. If the number of additional records is
* more than _S_freelist_headroom % of the freelist, we move these
* records back to the global pool.
*/
template<int __inst> size_t
__mt_alloc<__inst>::_S_freelist_headroom = 10;
/*
* Actual initialization in _S_init()
*/
#ifdef __GTHREADS
template<int __inst> typename __mt_alloc<__inst>::thread_record*
__mt_alloc<__inst>::_S_thread_freelist_first = NULL;
template<int __inst> typename __mt_alloc<__inst>::thread_record*
__mt_alloc<__inst>::_S_thread_freelist_last = NULL;
template<int __inst> __gthread_mutex_t
__mt_alloc<__inst>::_S_thread_freelist_mutex = __GTHREAD_MUTEX_INIT;
/*
* Actual initialization in _S_init()
*/
template<int __inst> __gthread_key_t
__mt_alloc<__inst>::_S_thread_key;
#endif
template<int __inst> typename __mt_alloc<__inst>::bin_record*
__mt_alloc<__inst>::_S_bin = NULL;
template<int __inst>
inline bool
operator==(const __mt_alloc<__inst>&, const __mt_alloc<__inst>&)
{ return true; }
template<int __inst>
inline bool
operator!=(const __mt_alloc<__inst>&, const __mt_alloc<__inst>&)
{ return false; }
} // namespace __gnu_cxx
namespace std
{
template<typename _Tp, int __inst>
struct _Alloc_traits<_Tp, __gnu_cxx::__mt_alloc<__inst> >
{
static const bool _S_instanceless = true;
typedef __gnu_cxx:: __mt_alloc<__inst> base_alloc_type;
typedef __simple_alloc<_Tp, base_alloc_type> _Alloc_type;
typedef __allocator<_Tp, base_alloc_type> allocator_type;
};
template<typename _Tp, typename _Tp1, int __inst>
struct _Alloc_traits<_Tp,
__allocator<_Tp1, __gnu_cxx::__mt_alloc<__inst> > >
{
static const bool _S_instanceless = true;
typedef __gnu_cxx:: __mt_alloc<__inst> base_alloc_type;
typedef __simple_alloc<_Tp, base_alloc_type> _Alloc_type;
typedef __allocator<_Tp, base_alloc_type> allocator_type;
};
} // namespace std
#endif