Merge branch 'develop' of sso://edge/median-approx into develop
diff --git a/Makefile b/Makefile index d124a2e..27fbc86 100644 --- a/Makefile +++ b/Makefile
@@ -1,4 +1,5 @@ -SRCS = median.c +SRCS = median.c \ + hex.c MAIN = median.main.c OBJS = $(patsubst %.c,%.o,$(SRCS)) MAINO = $(patsubst %.c,%.o,$(MAIN))
diff --git a/hex.c b/hex.c new file mode 100644 index 0000000..cc42430 --- /dev/null +++ b/hex.c
@@ -0,0 +1,27 @@ +#include "hex.h" +#include <stdio.h> + +hexdmp_error_t stderr_hexdmp(const char * description, const unsigned char * data_ptr, size_t datasz){ + if(description) { + fprintf(stderr,"%s:\n", description); + } + if(datasz == 0) { + fprintf(stderr,"No Data, Size is 0"); + return HEXDMP_OK; + } + if(!data_ptr) { + fprintf(stderr,"No data pointer"); + return HEXDMP_ERROR; + } + for(size_t i = 0; i < datasz; i += 16) { + // First print the offset. + fprintf(stderr, "%08x", (unsigned) i); + // Now print the next (at most) 16 characters. + for(size_t j = 0; (j < 16) && (i+j) < datasz; j++) { + fprintf(stderr, " %02x", data_ptr[i+j]); + } + // Print a newline. + fprintf(stderr, "\n"); + } + return HEXDMP_OK; +}
diff --git a/hex.h b/hex.h new file mode 100644 index 0000000..d47281e --- /dev/null +++ b/hex.h
@@ -0,0 +1,11 @@ +#ifndef __MEDIAN_HEX_H_INCLUDED__ +#define __MEDIAN_HEX_H_INCLUDED__ +#include <strings.h> // size_t +typedef int hexdmp_error_t; +static const hexdmp_error_t HEXDMP_OK = 0; +static const hexdmp_error_t HEXDMP_ERROR = -1; + +// The function produces an output to stderr similar to the "hexdump" +// application. Useful in debugging things. +hexdmp_error_t stderr_hexdmp(const char * description, const unsigned char * data_ptr, size_t datasz); +#endif // __MEDIAN_HEX_H_INCLUDED__
diff --git a/median.c b/median.c index 3cc991a..9a6f183 100644 --- a/median.c +++ b/median.c
@@ -1,47 +1,126 @@ #include "median.h" +#include "hex.h" #include <strings.h> +#include <stdio.h> +// The structure of the data buffer is this: +// ---------------------------------------------------------------------------- +// | Metadata | BlockData | Data * nMembers | BlockData | Data * nMembers ..... +// ---------------------------------------------------------------------------- +// Where the number of blocks is nLevels and stored in Metadata, and nMembers +// is stored in Metadata as well. // The buffer is cast into this struct. typedef struct metadata { - size_t bufferSize; // Number of bytes available for use. + unsigned int gid; // The next available global id size_t nLevels; // Number of buffers available. + size_t nMembers; // The number of members in each block. size_t currentBufferIdx; // The index of the currently active buffer. unsigned char data[0]; // The start of the data. } metadata_t; -typedef uint32_t parity_t; - typedef struct bufferdata { - parity_t parity; // The parity of this buffer. + unsigned int id; // Block identity. size_t level; // The level of this buffer. size_t occupancy; // Number of occupants for this buffer. -} bufferdata_t; + median_data_t data[0]; // The data for this buffer. +} blockdata_t; -static const parity_t PARITY_NONE = 0; // The parity is none. -static const parity_t PARITY_LEFT = 1; // The buffer has LEFT parity. -static const parity_t PARITY_RIGHT = 2; // The buffer has RIGHT parity. +// ---------------------------------------------------------------------------- +// Helper functions. +// +// This turns epsilon into a suggested size for each block. +size_t calculate_nMembers(double epsilon) { + return (size_t) ((1.0 / epsilon) + 1); +} -median_error_t median_suggest_buffer_size(double epsilon, size_t maxN, size_t *suggested_size){ - // First check for sane parameters. - if (maxN <= 0) return MEDIAN_ERROR_INVALID_PARAMETERS; - - // First compute the size of each buffer. - size_t n = (1.0/epsilon)+ 1; - // If n is too large, you are looking for too fine an approximation. - // This is not the right algorithm. Maybe consider random sampling instead. - if (n > 100000) return MEDIAN_ERROR_INVALID_PARAMETERS; - - // Compute the log of maxN using a divide by 2 loop. - // There is no need to make this more complicated at this stage. It's not - // a deep loop function. +// This turns maxN into the number of levels needed. It's ceiling(log2(maxN)). +size_t calculate_nLevels(size_t maxN) { size_t nLevels = 1; while((maxN = (maxN >> 1))) nLevels++; + return nLevels; +} - // This is the total bytes needed. - // One metadata block. nLevels bufferdata_t blockes. - // and n * nLevels median_data_t elements. - *suggested_size = n * nLevels * sizeof(median_data_t) + nLevels * sizeof(bufferdata_t) + sizeof(metadata_t); +// This returns the blocksize. +size_t blocksize(metadata_t * m) { + return sizeof(blockdata_t) + m->nMembers * sizeof(median_data_t); +} + +// This returns the size of the entire buffer. +size_t buffersize(metadata_t * m) { + return sizeof(metadata_t) + m->nLevels * blocksize(m); +} + +// Returns the first byte address which is out of bounds. +unsigned char * out_of_bounds(metadata_t * m) { + return m->data + buffersize(m) - sizeof(blockdata_t); +} + +// Get the current block. +blockdata_t * get_current_block(metadata_t *m) { + size_t offset = m->currentBufferIdx * blocksize(m); + return (blockdata_t *)(m->data + offset); +} +// Does the current block have space? +unsigned int current_block_has_space(metadata_t* m) { + return (get_current_block(m)->occupancy < m->nMembers); +} + +// This will only be called in a safe situation. It's an internal function. +// Does not bound check! +// Do not call it unless you know what you are doing. +void insert_data_into_current_block(metadata_t *m, median_data_t data) { + blockdata_t * b = get_current_block(m); + b->data[b->occupancy++] = data; +} + +unsigned int occupy_available_empty_block(metadata_t * m) { + size_t idx = 0; + for(unsigned char * ptr = (unsigned char *) m->data; // Start of the data blocks. + ptr <= (m->data + buffersize(m) - blocksize(m)); // The next block will fit into the buffer. + ptr += blocksize(m)) { // Advance by a block + blockdata_t * b = (blockdata_t *)ptr; + if(b->occupancy < m->nMembers) { + // Empty block found! + m->currentBufferIdx = idx; + return 1; + } else { + idx++; + } + } + return 0; +} + +void create_empty_block(metadata_t * m) { + // TODO +} + +// Shift the current block. +void shift_current_block(metadata_t * m) { + if(!occupy_available_empty_block(m)) { + create_empty_block(m); + } +} + + +// ---------------------------------------------------------------------------- +// Implementation of the interface functions (specified in median.h). +median_error_t median_suggest_buffer_size(double epsilon, size_t maxN, size_t *suggested_size){ + // First check for sane parameters. + + // Check that maxN is not zero. + if (maxN <= 0) return MEDIAN_ERROR_INVALID_PARAMETERS; + + // If n is too large, (i.e. epsilon is too small), you are looking for too fine an approximation. + // This is not the right algorithm. Maybe consider random sampling instead. + if (calculate_nMembers(epsilon) > 100000) return MEDIAN_ERROR_INVALID_PARAMETERS; + + // Calculate the required buffer size for the right nLevels and nMembers + // value. + metadata_t m; + m.nLevels = calculate_nLevels(maxN); + m.nMembers = calculate_nMembers(epsilon); + *suggested_size = buffersize(&m); // Things worked. Return MEDIAN_OK return MEDIAN_OK; @@ -56,22 +135,26 @@ if ((error = median_suggest_buffer_size(epsilon,maxN,&expectedSize)) != MEDIAN_OK) return error; // Ensure that the buffer is large enough. if (expectedSize > buffer_size) return MEDIAN_ERROR_BUFFER_TOO_SMALL; - // Cast the buffer as metadata_t so that we can initialize the metadata block. metadata_t * m = (metadata_t *)buffer; // Initialize the metadata block. - m->bufferSize = buffer_size - sizeof(metadata_t); - m->nLevels = m->bufferSize/((maxN * sizeof(median_data_t)) + sizeof(bufferdata_t)); + m->gid = 1; + m->nMembers = calculate_nMembers(epsilon); + m->nLevels = calculate_nLevels(maxN); m->currentBufferIdx = 0; - bzero(m->data, m->bufferSize); + // Zero all the data following the metadata block. + // Strictly this should not be necessary, but I'm a coward! + bzero(m->data, buffersize(m) - sizeof(metadata_t)); - // cast the data part as an array of bufferdata_t blocks. + // cast the data part as an array of blockdata_t fronted blocks. // and initialize each of the blocks. - bufferdata_t * t = (bufferdata_t *) m->data; - for(size_t i = 0; i < m->nLevels; i++) { - t[i].parity = PARITY_NONE; - t[i].level = 0; - t[i].occupancy = 0; + for(unsigned char * ptr = (unsigned char *) m->data; // Start of the data blocks. + ptr <= (m->data + buffersize(m) - blocksize(m)); // The next block will fit into the buffer. + ptr += blocksize(m)) { // Advance by a block + blockdata_t * t = (blockdata_t *) ptr; + t->id = (m->gid)++; + t->level = 0; + t->occupancy = 0; } // Populate the return value. @@ -79,11 +162,35 @@ return error; } -median_error_t median_insert_data(median_buffer_t buffer, median_data_t data){ - // First locate the empty buffer. + +median_error_t median_insert_data(median_buffer_t buffer, + median_data_t data){ + metadata_t * m = (metadata_t *) buffer; + if(current_block_has_space(m)) { + insert_data_into_current_block(m,data); + } else { + shift_current_block(m); + return median_insert_data(buffer,data); + } return MEDIAN_OK; } -median_error_t median_output_median(median_buffer_t buffer, median_data_t *approximate_median){ +median_error_t median_output_median(const median_buffer_t buffer, + median_data_t *approximate_median){ + return MEDIAN_OK; +} + +median_error_t median_dump_stderr(const median_buffer_t buffer){ + metadata_t * m = (metadata_t *) buffer; + blockdata_t * current = get_current_block(m); + fprintf(stderr, "Metadata:\n Next GID:%d\n nMembers:%d\n nLevels:%d\n",m->gid,(unsigned)m->nMembers,(unsigned)m->nLevels); + fprintf(stderr, "Current Block:%d\n", current->id); + for(unsigned char * ptr = (unsigned char *) m->data; // Start of the data blocks. + ptr <= (m->data + buffersize(m) - blocksize(m)); // The next block will fit into the buffer. + ptr += blocksize(m)) { // Advance by a block + blockdata_t * b = (blockdata_t *)ptr; + fprintf(stderr, "Block:%d\n Level:%d\n Occupancy:%d\n", b->id,(unsigned)b->level,(unsigned)b->occupancy); + stderr_hexdmp("Data:",(unsigned char *)(b->data),256); + } return MEDIAN_OK; }
diff --git a/median.h b/median.h index 0ea0495..c62fbbd 100644 --- a/median.h +++ b/median.h
@@ -108,6 +108,9 @@ // return an error. // If it does return an error, the error will be // MEDIAN_ERROR_INVALID_PARAMETERS -median_error_t median_output_median(median_buffer_t buffer, median_data_t *approximate_median); +median_error_t median_output_median(const median_buffer_t buffer, median_data_t *approximate_median); + +// This hexdumps the state of the buffer to stderr so that it can help debug if needed. +median_error_t median_dump_stderr(const median_buffer_t buffer); #endif // ._MONITORING_MEDIAN>H_INCLUDED__
diff --git a/median.main.c b/median.main.c index 59c8a53..1aef5c7 100644 --- a/median.main.c +++ b/median.main.c
@@ -1,7 +1,7 @@ -#include <stdio.h> -#include <stdlib.h> -#include <assert.h> +#include <stdlib.h> // size_t +#include <assert.h> // assert #include "median.h" +#include "hex.h" // A sinple main routine meant as an example, // and also to unit test most of the intersting @@ -11,8 +11,6 @@ // we need to build the data structure. size_t n = 0; assert(median_suggest_buffer_size(.001, 1000, &n) == MEDIAN_OK); - // Print the number out. - printf("%ld\n", n); // Allocate the buffer of the required size. And then // initialize it (median_init_buffer). @@ -20,6 +18,14 @@ median_buffer_t buf; assert(median_init_buffer(b, n, .001, 1000, &buf) == MEDIAN_OK); + // Insert some data. + for(median_data_t i = 1; i < 3010; i++) { + assert(median_insert_data(buf,i) == MEDIAN_OK); + } + + // Dump it. + assert(median_dump_stderr(buf) == MEDIAN_OK); + // All done, return 0 return 0; }