memory.c 7.17 KB

Raw Blame History Permalink

/**
 * \file This holds all stufff related our memory managent.
 * I try the best as far as I can to reduce memory fragmentation
 * and unneccessary calls to alloc and free.
 *
 * To achive this I try an approach described here as "Quick Fit".
 * http://www.flounder.com/memory_allocation.htm
 *
 * The basic idea is to keep allocated memory segments and don't free
 * them again. Instead I will put them in a tree indexed by their size.
 * To get new memory I first have a look in the tree if there is
 * a fitting memory segment. Fitting mean, larger or exactly the size
 * I need. If there is one, use it. If not create a new one using
 * usual malloc approach.
 * I won't split the reagions at all because most likely they will be
 * free soon again. This way I might waste some memory, so I have to
 * keep an eye on this.
 *
 * Right now I don't build an upper limit for allocation. The limit
 * still is the system memory itself.
 *
 * This is not implemented as a class because it will be used in the
 * process of object creation.
 *
 * The data structure is a balanced tree with size as key.
 * Under the size key is a list of elements of the same size.
 *
 * \author	Georg Hopp
 *
 * \copyright
 * Copyright © 2014 Georg Hopp
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

#define _GNU_SOURCE

#include <stdio.h>

#include <pthread.h>
#include <stdlib.h>
#include <string.h>
#include <search.h>
#include <unistd.h>
#include <stdint.h>

#include "tr/memory.h"


struct memSegment
{
	size_t   ref_count;
    size_t   size;
	int      idx;
    void   * ptr;

    struct memSegment * next;
};

static
struct memSegment *
newElement(size_t size, int idx)
{
    struct memSegment * element = malloc(size);

	element->ref_count = 1;
    element->size      = size;
    element->idx       = idx;
    element->ptr       = (void*)element + sizeof(struct memSegment);

    element->next      = NULL;

    return element;
}

#ifdef MEM_OPT
/**
 * insert element in tree
 */
static
inline
struct memSegment *
insertElement(struct memSegment ** stack, struct memSegment * element)
{
	element->next = *stack;
	*stack        = element;

	return element;
}

static
inline
struct memSegment *
deleteElement(struct memSegment ** stack)
{
	struct memSegment * del_node = *stack;

	if (*stack) {
		*stack = (*stack)->next;
	}

	return del_node;
}

#define TR_MAX_MEM_IDX  1024

struct memSegment * segments[TR_MAX_MEM_IDX] = {};
pthread_mutex_t TR_memop_lock = PTHREAD_MUTEX_INITIALIZER;

static
inline
void
segmentFree(struct memSegment * segment, int depth)
{
    while (NULL != segment) {
        struct memSegment * next = segment->next;
        free(segment);
        segment = next;
    }
}
#endif

void *
TR_reference(void * mem)
{
	struct memSegment * seg = (mem - sizeof(struct memSegment));

	seg->ref_count++;

	return mem;
}

/*
 * This tries to reflect the memory management behaviour of the
 * GNU version of malloc. For other versions this might need
 * to be changed to be optimal.
 *
 * However, GNU malloc keeps separate pools for each power of
 * 2 memory size up to page size. So one page consists all of
 * memory blocks of the same sizei (a power of 2).
 *
 * Also as far as I understand the smallest allocatable block is
 * 8 bytes. At least the adresses are alwayse a multiple of 8.
 *
 * So lets say page size is 4096. There is nothing allocated
 * right now. We allocate a block of 8 bytes. This will request
 * a memory page from the OS. Then define it as a page containing
 * 8 byte blocks and return the address of the first one of these.
 * Any subsequent call to malloc for 8 bytes will return one of the
 * blocks within this page as long as there are some left.
 *
 * So what we do here is up to page size round the request size up
 * to the next power of 2 >= 8.
 * Sizes greater then pagesize will be round up to the next
 * multiple of pagesize. As far as I understand these are not
 * pooled anyway.
 *
 * For now this assumes we are on a little endian machine.
 */
void *
TR_malloc(size_t size)
{
	struct memSegment * seg         = NULL;
	long                psize       = sysconf(_SC_PAGESIZE);
	static int          psize_width = 0;
	int                 idx;

	if (psize_width == 0) psize_width = bitwidth(psize);

	size += sizeof(struct memSegment);

#define MIN_BITS   8

    if (size >= psize) {
        idx = size / psize;

        if (0 != (size % psize)) {
            // size if not a multiple of pagesize so bring it to one.
            size = (idx + 1) * psize;
            idx++;
        }

        idx += psize_width - MIN_BITS;
    } else {
        if (size <= 1 << (MIN_BITS - 1)) {
            size = 1 << (MIN_BITS - 1);
            idx = 0;
        } else {
            size_t mask;

            idx   = bitwidth(size);
            mask  = (1 << (idx + 1)) - 1;
            idx  -= (MIN_BITS - 1);

            if (size != (size & ~(mask >> 1))) {
                size = (size << 1) & ~mask;
                idx++;
            }
        }
    }

#undef MIN_BITS

#ifdef MEM_OPT
	if (idx < TR_MAX_MEM_IDX) {
		pthread_mutex_lock(&TR_memop_lock);
		seg = deleteElement(&(segments[idx]));
		pthread_mutex_unlock(&TR_memop_lock);
	} else
#endif
	{
		idx = -1;
	}

	if (NULL == seg) {
		seg = newElement(size, idx);
	}

	return seg->ptr;
}

/**
 * this is a really memory wasting solution....just to be able to
 * use calloc, which might be faster then malloc/memset solution.
 *
 * Maybe this is a bad idea, as we need to memset the buffer anyway
 * if it comes from our tree, which hopefully should be the majority
 * of cases.
 */
void *
TR_calloc(size_t nmemb, size_t size)
{
	size_t   _size = nmemb * size;
	void   * mem   = TR_malloc(_size);

	memset(mem, 0, _size);

	return mem;
}

void
TR_free(void ** mem)
{
	if (NULL != *mem) {
		struct memSegment * seg = (*mem - sizeof(struct memSegment));

		if (1 < seg->ref_count) {
			seg->ref_count--;
		} else {
#ifdef MEM_OPT
			if (-1 != seg->idx) {
				pthread_mutex_lock(&TR_memop_lock);
				insertElement(&(segments[seg->idx]), seg);
				pthread_mutex_unlock(&TR_memop_lock);
			} else
#endif
			{
				free(seg);
			}
		}

		*mem = NULL;
	}
}

size_t
TR_getSize(void * mem)
{
	struct memSegment * segment;

	if (NULL == mem) {
		return 0;
	}

	segment = (struct memSegment *)(mem - sizeof(struct memSegment));
	return segment->size;
}

void
TR_cleanup()
{
#ifdef MEM_OPT
	int i;

	for (i=0; i<TR_MAX_MEM_IDX; i++) {
		while(segments[i]) {
			struct memSegment * next = segments[i]->next;
			free(segments[i]);
			segments[i] = next;
		}
	}
#endif
}

char *
TR_strdup(const char * src)
{
	char * dup;

	if (NULL == src) {
		return NULL;
	}

	dup = TR_malloc(strlen(src)+1);
	strcpy(dup, src);

	return dup;
}

// vim: set ts=4 sw=4: