blob: 6c413ef45286ae6e5a4a96440159fe5fad750971 [file] [log] [blame]
#include "median.h"
#include "hex.h"
#include <strings.h>
#include <stdio.h>
// The structure of the data buffer is this:
// ----------------------------------------------------------------------------
// | Metadata | BlockData | Data * nMembers | BlockData | Data * nMembers .....
// ----------------------------------------------------------------------------
// Where the number of blocks is nLevels and stored in Metadata, and nMembers
// is stored in Metadata as well.
// The buffer is cast into this struct.
typedef struct metadata {
unsigned int gid; // The next available global id
size_t nLevels; // Number of buffers available.
size_t nMembers; // The number of members in each block.
size_t currentBufferIdx; // The index of the currently active buffer.
unsigned char data[0]; // The start of the data.
} metadata_t;
typedef struct bufferdata {
unsigned int id; // Block identity.
size_t level; // The level of this buffer.
size_t occupancy; // Number of occupants for this buffer.
median_data_t data[0]; // The data for this buffer.
} blockdata_t;
// ----------------------------------------------------------------------------
// Helper functions.
//
// This turns epsilon into a suggested size for each block.
size_t calculate_nMembers(double epsilon) {
return (size_t) ((1.0 / epsilon) + 1);
}
// This turns maxN into the number of levels needed. It's ceiling(log2(maxN)).
size_t calculate_nLevels(size_t maxN) {
size_t nLevels = 1;
while((maxN = (maxN >> 1))) nLevels++;
return nLevels;
}
// This returns the blocksize.
size_t blocksize(metadata_t * m) {
return sizeof(blockdata_t) + m->nMembers * sizeof(median_data_t);
}
// This returns the size of the entire buffer.
size_t buffersize(metadata_t * m) {
return sizeof(metadata_t) + m->nLevels * blocksize(m);
}
// Returns the first byte address which is out of bounds.
unsigned char * out_of_bounds(metadata_t * m) {
return m->data + buffersize(m) - sizeof(blockdata_t);
}
// Get the current block.
blockdata_t * get_current_block(metadata_t *m) {
size_t offset = m->currentBufferIdx * blocksize(m);
return (blockdata_t *)(m->data + offset);
}
// Does the current block have space?
unsigned int current_block_has_space(metadata_t* m) {
return (get_current_block(m)->occupancy < m->nMembers);
}
// This will only be called in a safe situation. It's an internal function.
// Does not bound check!
// Do not call it unless you know what you are doing.
void insert_data_into_current_block(metadata_t *m, median_data_t data) {
blockdata_t * b = get_current_block(m);
b->data[b->occupancy++] = data;
}
// Check if there is space in some block and if there is, then
// make that block the current block.
unsigned int occupy_available_empty_block(metadata_t * m) {
size_t idx = 0;
for(unsigned char * ptr = (unsigned char *) m->data; // Start of the data blocks.
ptr <= (m->data + buffersize(m) - blocksize(m)); // The next block will fit into the buffer.
ptr += blocksize(m)) { // Advance by a block
blockdata_t * b = (blockdata_t *)ptr;
if(b->occupancy < m->nMembers) {
// Empty block found!
m->currentBufferIdx = idx;
return 1;
} else {
idx++;
}
}
return 0;
}
unsigned int create_empty_block(metadata_t * m) {
// TODO
return 0;
}
// Shift the current block.
unsigned int shift_current_block(metadata_t * m) {
if(!occupy_available_empty_block(m)) {
if(create_empty_block(m)) {
// Now there should be an empty block!
// So occupy it.
occupy_available_empty_block(m);
} else {
// Stuck we are, block not.
return 0;
}
}
return 1;
}
// ----------------------------------------------------------------------------
// Implementation of the interface functions (specified in median.h).
median_error_t median_suggest_buffer_size(double epsilon, size_t maxN, size_t *suggested_size){
// First check for sane parameters.
// Check that maxN is not zero.
if (maxN <= 0) return MEDIAN_ERROR_INVALID_PARAMETERS;
// If n is too large, (i.e. epsilon is too small), you are looking for too fine an approximation.
// This is not the right algorithm. Maybe consider random sampling instead.
if (calculate_nMembers(epsilon) > 100000) return MEDIAN_ERROR_INVALID_PARAMETERS;
// Calculate the required buffer size for the right nLevels and nMembers
// value.
metadata_t m;
m.nLevels = calculate_nLevels(maxN);
m.nMembers = calculate_nMembers(epsilon);
*suggested_size = buffersize(&m);
// Things worked. Return MEDIAN_OK
return MEDIAN_OK;
}
median_error_t median_init_buffer(void * buffer, size_t buffer_size, double epsilon, size_t maxN, median_buffer_t *initialized_buffer){
// Initialize.
size_t expectedSize = 0;
median_error_t error = MEDIAN_OK;
// First run median_suggest_buffer_size
if ((error = median_suggest_buffer_size(epsilon,maxN,&expectedSize)) != MEDIAN_OK) return error;
// Ensure that the buffer is large enough.
if (expectedSize > buffer_size) return MEDIAN_ERROR_BUFFER_TOO_SMALL;
// Cast the buffer as metadata_t so that we can initialize the metadata block.
metadata_t * m = (metadata_t *)buffer;
// Initialize the metadata block.
m->gid = 1;
m->nMembers = calculate_nMembers(epsilon);
m->nLevels = calculate_nLevels(maxN);
m->currentBufferIdx = 0;
// Zero all the data following the metadata block.
// Strictly this should not be necessary, but I'm a coward!
bzero(m->data, buffersize(m) - sizeof(metadata_t));
// cast the data part as an array of blockdata_t fronted blocks.
// and initialize each of the blocks.
for(unsigned char * ptr = (unsigned char *) m->data; // Start of the data blocks.
ptr <= (m->data + buffersize(m) - blocksize(m)); // The next block will fit into the buffer.
ptr += blocksize(m)) { // Advance by a block
blockdata_t * t = (blockdata_t *) ptr;
t->id = (m->gid)++;
t->level = 0;
t->occupancy = 0;
}
// Populate the return value.
*initialized_buffer = (median_buffer_t)buffer;
return error;
}
// Insert data into the buffer.
// We could have made this more concise and pretty if we used short-circuit
// execution of the boolean condition..
//
// if (current_block_has_space(m) || shift_current_block(m)) {
// insert_data_into_current_block(m,data);
// return MEDIAN_OK;
// } else {
// return MEDIAN_ERROR_MAX_N_EXCEEDED;
// }
//
// But this code is harder to read, because of the side
// effect issue.
median_error_t median_insert_data(median_buffer_t buffer,
median_data_t data){
metadata_t * m = (metadata_t *) buffer;
// If there is space in the current block..
if(current_block_has_space(m)) {
// Insert data into the current block. This
// call is to an unchecked method, which will
// never fail.
insert_data_into_current_block(m,data);
} else {
// Current block is full. So try and shift it.
if(shift_current_block(m)) {
// The current block has shifted successfully.
// So we can now call the unchecked method again.
insert_data_into_current_block(m,data);
} else {
// Failed to shift block. Stuck we are.
return MEDIAN_ERROR_MAX_N_EXCEEDED;
}
}
// Everything worked. So return MEDIAN_OK.
return MEDIAN_OK;
}
median_error_t median_output_median(const median_buffer_t buffer,
median_data_t *approximate_median){
return MEDIAN_OK;
}
// Dump the state to stderr.
median_error_t median_dump_stderr(const median_buffer_t buffer){
// First print out all the metadata.
metadata_t * m = (metadata_t *) buffer;
blockdata_t * current = get_current_block(m);
fprintf(stderr, "Metadata:\n Next GID:%d\n nMembers:%d\n nLevels:%d\n CurrentBlock:%d\n",m->gid,(unsigned)m->nMembers,(unsigned)m->nLevels, current->id);
// Now dump the blocks.
for(unsigned char * ptr = (unsigned char *) m->data; // Start of the data blocks.
ptr <= (m->data + buffersize(m) - blocksize(m)); // The next block will fit into the buffer.
ptr += blocksize(m)) { // Advance by a block
blockdata_t * b = (blockdata_t *)ptr;
fprintf(stderr, "Block:%d\n Level:%d\n Occupancy:%d\n", b->id,(unsigned)b->level,(unsigned)b->occupancy);
// For every block, dump the first 8 keys in the block.
stderr_hexdmp("Data:",(unsigned char *)(b->data),8*sizeof(median_data_t));
}
// Done, return MEDIAN_OK.
return MEDIAN_OK;
}