Merge branch 'develop' of sso://edge/median-approx into develop
diff --git a/Makefile b/Makefile
index d124a2e..27fbc86 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,5 @@
-SRCS = median.c
+SRCS = median.c \
+ hex.c
MAIN = median.main.c
OBJS = $(patsubst %.c,%.o,$(SRCS))
MAINO = $(patsubst %.c,%.o,$(MAIN))
diff --git a/hex.c b/hex.c
new file mode 100644
index 0000000..cc42430
--- /dev/null
+++ b/hex.c
@@ -0,0 +1,27 @@
+#include "hex.h"
+#include <stdio.h>
+
+hexdmp_error_t stderr_hexdmp(const char * description, const unsigned char * data_ptr, size_t datasz){
+ if(description) {
+ fprintf(stderr,"%s:\n", description);
+ }
+ if(datasz == 0) {
+ fprintf(stderr,"No Data, Size is 0");
+ return HEXDMP_OK;
+ }
+ if(!data_ptr) {
+ fprintf(stderr,"No data pointer");
+ return HEXDMP_ERROR;
+ }
+ for(size_t i = 0; i < datasz; i += 16) {
+ // First print the offset.
+ fprintf(stderr, "%08x", (unsigned) i);
+ // Now print the next (at most) 16 characters.
+ for(size_t j = 0; (j < 16) && (i+j) < datasz; j++) {
+ fprintf(stderr, " %02x", data_ptr[i+j]);
+ }
+ // Print a newline.
+ fprintf(stderr, "\n");
+ }
+ return HEXDMP_OK;
+}
diff --git a/hex.h b/hex.h
new file mode 100644
index 0000000..d47281e
--- /dev/null
+++ b/hex.h
@@ -0,0 +1,11 @@
+#ifndef __MEDIAN_HEX_H_INCLUDED__
+#define __MEDIAN_HEX_H_INCLUDED__
+#include <strings.h> // size_t
+typedef int hexdmp_error_t;
+static const hexdmp_error_t HEXDMP_OK = 0;
+static const hexdmp_error_t HEXDMP_ERROR = -1;
+
+// The function produces an output to stderr similar to the "hexdump"
+// application. Useful in debugging things.
+hexdmp_error_t stderr_hexdmp(const char * description, const unsigned char * data_ptr, size_t datasz);
+#endif // __MEDIAN_HEX_H_INCLUDED__
diff --git a/median.c b/median.c
index 3cc991a..9a6f183 100644
--- a/median.c
+++ b/median.c
@@ -1,47 +1,126 @@
#include "median.h"
+#include "hex.h"
#include <strings.h>
+#include <stdio.h>
+// The structure of the data buffer is this:
+// ----------------------------------------------------------------------------
+// | Metadata | BlockData | Data * nMembers | BlockData | Data * nMembers .....
+// ----------------------------------------------------------------------------
+// Where the number of blocks is nLevels and stored in Metadata, and nMembers
+// is stored in Metadata as well.
// The buffer is cast into this struct.
typedef struct metadata {
- size_t bufferSize; // Number of bytes available for use.
+ unsigned int gid; // The next available global id
size_t nLevels; // Number of buffers available.
+ size_t nMembers; // The number of members in each block.
size_t currentBufferIdx; // The index of the currently active buffer.
unsigned char data[0]; // The start of the data.
} metadata_t;
-typedef uint32_t parity_t;
-
typedef struct bufferdata {
- parity_t parity; // The parity of this buffer.
+ unsigned int id; // Block identity.
size_t level; // The level of this buffer.
size_t occupancy; // Number of occupants for this buffer.
-} bufferdata_t;
+ median_data_t data[0]; // The data for this buffer.
+} blockdata_t;
-static const parity_t PARITY_NONE = 0; // The parity is none.
-static const parity_t PARITY_LEFT = 1; // The buffer has LEFT parity.
-static const parity_t PARITY_RIGHT = 2; // The buffer has RIGHT parity.
+// ----------------------------------------------------------------------------
+// Helper functions.
+//
+// This turns epsilon into a suggested size for each block.
+size_t calculate_nMembers(double epsilon) {
+ return (size_t) ((1.0 / epsilon) + 1);
+}
-median_error_t median_suggest_buffer_size(double epsilon, size_t maxN, size_t *suggested_size){
- // First check for sane parameters.
- if (maxN <= 0) return MEDIAN_ERROR_INVALID_PARAMETERS;
-
- // First compute the size of each buffer.
- size_t n = (1.0/epsilon)+ 1;
- // If n is too large, you are looking for too fine an approximation.
- // This is not the right algorithm. Maybe consider random sampling instead.
- if (n > 100000) return MEDIAN_ERROR_INVALID_PARAMETERS;
-
- // Compute the log of maxN using a divide by 2 loop.
- // There is no need to make this more complicated at this stage. It's not
- // a deep loop function.
+// This turns maxN into the number of levels needed. It's ceiling(log2(maxN)).
+size_t calculate_nLevels(size_t maxN) {
size_t nLevels = 1;
while((maxN = (maxN >> 1))) nLevels++;
+ return nLevels;
+}
- // This is the total bytes needed.
- // One metadata block. nLevels bufferdata_t blockes.
- // and n * nLevels median_data_t elements.
- *suggested_size = n * nLevels * sizeof(median_data_t) + nLevels * sizeof(bufferdata_t) + sizeof(metadata_t);
+// This returns the blocksize.
+size_t blocksize(metadata_t * m) {
+ return sizeof(blockdata_t) + m->nMembers * sizeof(median_data_t);
+}
+
+// This returns the size of the entire buffer.
+size_t buffersize(metadata_t * m) {
+ return sizeof(metadata_t) + m->nLevels * blocksize(m);
+}
+
+// Returns the first byte address which is out of bounds.
+unsigned char * out_of_bounds(metadata_t * m) {
+ return m->data + buffersize(m) - sizeof(blockdata_t);
+}
+
+// Get the current block.
+blockdata_t * get_current_block(metadata_t *m) {
+ size_t offset = m->currentBufferIdx * blocksize(m);
+ return (blockdata_t *)(m->data + offset);
+}
+// Does the current block have space?
+unsigned int current_block_has_space(metadata_t* m) {
+ return (get_current_block(m)->occupancy < m->nMembers);
+}
+
+// This will only be called in a safe situation. It's an internal function.
+// Does not bound check!
+// Do not call it unless you know what you are doing.
+void insert_data_into_current_block(metadata_t *m, median_data_t data) {
+ blockdata_t * b = get_current_block(m);
+ b->data[b->occupancy++] = data;
+}
+
+unsigned int occupy_available_empty_block(metadata_t * m) {
+ size_t idx = 0;
+ for(unsigned char * ptr = (unsigned char *) m->data; // Start of the data blocks.
+ ptr <= (m->data + buffersize(m) - blocksize(m)); // The next block will fit into the buffer.
+ ptr += blocksize(m)) { // Advance by a block
+ blockdata_t * b = (blockdata_t *)ptr;
+ if(b->occupancy < m->nMembers) {
+ // Empty block found!
+ m->currentBufferIdx = idx;
+ return 1;
+ } else {
+ idx++;
+ }
+ }
+ return 0;
+}
+
+void create_empty_block(metadata_t * m) {
+ // TODO
+}
+
+// Shift the current block.
+void shift_current_block(metadata_t * m) {
+ if(!occupy_available_empty_block(m)) {
+ create_empty_block(m);
+ }
+}
+
+
+// ----------------------------------------------------------------------------
+// Implementation of the interface functions (specified in median.h).
+median_error_t median_suggest_buffer_size(double epsilon, size_t maxN, size_t *suggested_size){
+ // First check for sane parameters.
+
+ // Check that maxN is not zero.
+ if (maxN <= 0) return MEDIAN_ERROR_INVALID_PARAMETERS;
+
+ // If n is too large, (i.e. epsilon is too small), you are looking for too fine an approximation.
+ // This is not the right algorithm. Maybe consider random sampling instead.
+ if (calculate_nMembers(epsilon) > 100000) return MEDIAN_ERROR_INVALID_PARAMETERS;
+
+ // Calculate the required buffer size for the right nLevels and nMembers
+ // value.
+ metadata_t m;
+ m.nLevels = calculate_nLevels(maxN);
+ m.nMembers = calculate_nMembers(epsilon);
+ *suggested_size = buffersize(&m);
// Things worked. Return MEDIAN_OK
return MEDIAN_OK;
@@ -56,22 +135,26 @@
if ((error = median_suggest_buffer_size(epsilon,maxN,&expectedSize)) != MEDIAN_OK) return error;
// Ensure that the buffer is large enough.
if (expectedSize > buffer_size) return MEDIAN_ERROR_BUFFER_TOO_SMALL;
-
// Cast the buffer as metadata_t so that we can initialize the metadata block.
metadata_t * m = (metadata_t *)buffer;
// Initialize the metadata block.
- m->bufferSize = buffer_size - sizeof(metadata_t);
- m->nLevels = m->bufferSize/((maxN * sizeof(median_data_t)) + sizeof(bufferdata_t));
+ m->gid = 1;
+ m->nMembers = calculate_nMembers(epsilon);
+ m->nLevels = calculate_nLevels(maxN);
m->currentBufferIdx = 0;
- bzero(m->data, m->bufferSize);
+ // Zero all the data following the metadata block.
+ // Strictly this should not be necessary, but I'm a coward!
+ bzero(m->data, buffersize(m) - sizeof(metadata_t));
- // cast the data part as an array of bufferdata_t blocks.
+ // cast the data part as an array of blockdata_t fronted blocks.
// and initialize each of the blocks.
- bufferdata_t * t = (bufferdata_t *) m->data;
- for(size_t i = 0; i < m->nLevels; i++) {
- t[i].parity = PARITY_NONE;
- t[i].level = 0;
- t[i].occupancy = 0;
+ for(unsigned char * ptr = (unsigned char *) m->data; // Start of the data blocks.
+ ptr <= (m->data + buffersize(m) - blocksize(m)); // The next block will fit into the buffer.
+ ptr += blocksize(m)) { // Advance by a block
+ blockdata_t * t = (blockdata_t *) ptr;
+ t->id = (m->gid)++;
+ t->level = 0;
+ t->occupancy = 0;
}
// Populate the return value.
@@ -79,11 +162,35 @@
return error;
}
-median_error_t median_insert_data(median_buffer_t buffer, median_data_t data){
- // First locate the empty buffer.
+
+median_error_t median_insert_data(median_buffer_t buffer,
+ median_data_t data){
+ metadata_t * m = (metadata_t *) buffer;
+ if(current_block_has_space(m)) {
+ insert_data_into_current_block(m,data);
+ } else {
+ shift_current_block(m);
+ return median_insert_data(buffer,data);
+ }
return MEDIAN_OK;
}
-median_error_t median_output_median(median_buffer_t buffer, median_data_t *approximate_median){
+median_error_t median_output_median(const median_buffer_t buffer,
+ median_data_t *approximate_median){
+ return MEDIAN_OK;
+}
+
+median_error_t median_dump_stderr(const median_buffer_t buffer){
+ metadata_t * m = (metadata_t *) buffer;
+ blockdata_t * current = get_current_block(m);
+ fprintf(stderr, "Metadata:\n Next GID:%d\n nMembers:%d\n nLevels:%d\n",m->gid,(unsigned)m->nMembers,(unsigned)m->nLevels);
+ fprintf(stderr, "Current Block:%d\n", current->id);
+ for(unsigned char * ptr = (unsigned char *) m->data; // Start of the data blocks.
+ ptr <= (m->data + buffersize(m) - blocksize(m)); // The next block will fit into the buffer.
+ ptr += blocksize(m)) { // Advance by a block
+ blockdata_t * b = (blockdata_t *)ptr;
+ fprintf(stderr, "Block:%d\n Level:%d\n Occupancy:%d\n", b->id,(unsigned)b->level,(unsigned)b->occupancy);
+ stderr_hexdmp("Data:",(unsigned char *)(b->data),256);
+ }
return MEDIAN_OK;
}
diff --git a/median.h b/median.h
index 0ea0495..c62fbbd 100644
--- a/median.h
+++ b/median.h
@@ -108,6 +108,9 @@
// return an error.
// If it does return an error, the error will be
// MEDIAN_ERROR_INVALID_PARAMETERS
-median_error_t median_output_median(median_buffer_t buffer, median_data_t *approximate_median);
+median_error_t median_output_median(const median_buffer_t buffer, median_data_t *approximate_median);
+
+// This hexdumps the state of the buffer to stderr so that it can help debug if needed.
+median_error_t median_dump_stderr(const median_buffer_t buffer);
#endif // ._MONITORING_MEDIAN>H_INCLUDED__
diff --git a/median.main.c b/median.main.c
index 59c8a53..1aef5c7 100644
--- a/median.main.c
+++ b/median.main.c
@@ -1,7 +1,7 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <assert.h>
+#include <stdlib.h> // size_t
+#include <assert.h> // assert
#include "median.h"
+#include "hex.h"
// A sinple main routine meant as an example,
// and also to unit test most of the intersting
@@ -11,8 +11,6 @@
// we need to build the data structure.
size_t n = 0;
assert(median_suggest_buffer_size(.001, 1000, &n) == MEDIAN_OK);
- // Print the number out.
- printf("%ld\n", n);
// Allocate the buffer of the required size. And then
// initialize it (median_init_buffer).
@@ -20,6 +18,14 @@
median_buffer_t buf;
assert(median_init_buffer(b, n, .001, 1000, &buf) == MEDIAN_OK);
+ // Insert some data.
+ for(median_data_t i = 1; i < 3010; i++) {
+ assert(median_insert_data(buf,i) == MEDIAN_OK);
+ }
+
+ // Dump it.
+ assert(median_dump_stderr(buf) == MEDIAN_OK);
+
// All done, return 0
return 0;
}