Intermediate commit.
So that those who are interested can read through the code!
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..3878e37
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+*.o
+*.d
+*.main
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..29ea20d
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,17 @@
+SRCS = median.c
+MAIN = median.main.c
+OBJS = $(patsubst %.c,%.o,$(SRCS))
+MAINO = $(patsubst %.c,%.o,$(MAIN))
+DEPS = $(patsubst %.c,%.d,$(SRCS) $(MAIN))
+EXECS = $(patsubst %.c,%,$(MAIN))
+all : $(EXECS)
+clean :
+ rm -f $(OBJS) $(DEPS) $(MAINO)
+%.o : %.c
+ gcc -MD -c $<
+%.d : %.c
+ gcc -MD -c $<
+%.main : %.main.o $(OBJS)
+ gcc $< $(OBJS) -o $@
+
+-include $(DEPS)
diff --git a/median.c b/median.c
new file mode 100644
index 0000000..face1ef
--- /dev/null
+++ b/median.c
@@ -0,0 +1,48 @@
+#include "median.h"
+#include <strings.h>
+
+typedef struct metadata {
+ size_t buffer_size;
+ size_t nLevels;
+ unsigned char data[0];
+} metadata_t;
+
+median_error_t median_suggest_buffer_size(double epsilon, size_t maxN, size_t *suggested_size){
+ // First check for sane parameters.
+ if (maxN <= 0) return MEDIAN_ERROR_INVALID_PARAMETERS;
+ // First compute the size of each buffer.
+ size_t n = (1.0/epsilon)+ 1;
+ // If n is too large, you are looking for too fine an approximation.
+ // This is not the right algorithm. Maybe consider random sampling instead.
+ if (n > 100000) return MEDIAN_ERROR_INVALID_PARAMETERS;
+ // Compute the log of maxN using a divide by 2 loop.
+ // There is no need to make this more complicated at this stage. It's not
+ // a deep loop function.
+ size_t nLevels = 1;
+ while(maxN = (maxN >> 1)) nLevels++;
+ // What is the size of the metadata structure
+ size_t extra = sizeof(metadata_t);
+ // This is the total.
+ *suggested_size = n * nLevels + extra;
+
+ return MEDIAN_ERROR_OK;
+}
+median_error_t median_init_buffer(void * buffer, size_t buffer_size, double epsilon, size_t maxN, median_buffer_t *initialized_buffer){
+ // Initialize.
+ size_t expected_size = 0;
+ median_error_t error = MEDIAN_ERROR_OK;;
+ // First run median_suggest_buffer_size
+ if ((error = median_suggest_buffer_size(epsilon,maxN,&expected_size)) != MEDIAN_ERROR_OK) return error;
+ // Ensure that the buffer is large enough.
+ if (expected_size > buffer_size) return MEDIAN_ERROR_BUFFER_TOO_SMALL;
+ metadata_t * m = (metadata_t *)buffer;
+
+ m->buffer_size = buffer_size - sizeof(metadata_t);
+ m->nLevels = m->buffer_size/(maxN * sizeof(median_data_t));
+ bzero(m->data, m->buffer_size);
+
+ return error;
+}
+
+
+
diff --git a/median.h b/median.h
new file mode 100644
index 0000000..90a3650
--- /dev/null
+++ b/median.h
@@ -0,0 +1,113 @@
+#ifndef __MONITORING_MEDIAN_H_INCLUDED__
+#define __MONITORING_MEDIAN_H_INCLUDED__
+#include <stdint.h>
+#include <stddef.h>
+
+// ------------------------------------------------------------------------
+// The core phylosophy of the interface is this.
+// The code and data structure are completely separated.
+// There are no mallocs in this codebase.
+// This is so that when we have a long running process which is using this
+// these methods to maintain statistics, there is no memory allocation or
+// fragmentation going on. This means that the statistics are always valid
+// and we do not have to figure out how report them in panic situations.
+// Other than memory corruption issues, the state of the buffer will always
+// be good, and we will never be in a state where an update could not occur
+// because of memory pressure.
+// ------------------------------------------------------------------------
+
+// The ErrorCode returned by all
+// functions in this file.
+typedef int32_t median_error_t;
+
+// A buffer which contains all the state held
+// within an approximate median calculator.
+typedef void * median_buffer_t;
+
+// A single data element. For now this is defined
+// to be a 64 bit integer. But in principle this could be
+// anything at all.
+typedef uint64_t median_data_t;
+
+// All the error codes.
+// No Error.
+static median_error_t MEDIAN_ERROR_OK = 0;
+
+// The parameters are meaningless (for example, asking for neg values
+// for epsilon).
+static median_error_t MEDIAN_ERROR_INVALID_PARAMETERS = -1;
+
+// The buffer allocated is too small for the parameters requested.
+static median_error_t MEDIAN_ERROR_BUFFER_TOO_SMALL = -2;
+
+// Overflow because MaxN has been exceeded.
+static median_error_t MEDIAN_ERROR_MAX_N_EXCEEDED = -3;
+
+// Indicates that the size of the largest possible dataset is not known.
+static size_t MEDIAN_MAX_N_UNKNOWN = 0;
+
+// The default size of the largest possible dataset.
+static size_t MEDIAN_MAX_N_DEFAULT = 1024*1024*1024; // 1G.
+
+// This routine returns a suggested size for a buffer to hold all the
+// state needed for a median calculation. Here epsilon is the permissible error.
+// maxN is the largest dataset that we anticipate. If maxN is set to
+// MEDIAN_MAX_N_UNKNOWN (or zero), then we assume MEDIAN_MAX_N_DEFAULT to be the value of
+// maxN. The value return parameter is conventionally the last one. In this
+// case, it contains the suggested_size of the buffer, assuming that the return
+// value is MEDIAN_ERROR_OK.
+// In all reasonable calls, the returned value will be
+// MEDIAN_ERROR_OK.
+median_error_t median_suggest_buffer_size(double epsilon, size_t maxN, size_t *suggested_size);
+
+// This routine will init the buffer. It will check if the buffer meets the
+// size restriction returned by #median_suggest_buffer_size(). If not, it will
+// exit with MEDIAN_ERROR_BUFFER_TOO_SMALL.
+// The method can also exit with MEDIAN_ERR)R_INVALID_PARAMETERS in case there
+// are some meaningless parameters passed in the call.
+// We assume that a buffer of length buffer_size (in bytes) has been allocated and is
+// available at the pointer specified by buffer.
+// The parameters to median_suggest_buffer_size are passed into the data
+// initialization routine, and the init routine will store them inside the data
+// structure, so that the values can be used during the operation
+// of the data structure.
+// The value return parameter initialized_buffer is identical to the buffer
+// variable (i.e. *initialized_buffer == buffer), but is appropriately cast
+// to indicate that it is initialized.
+// In all reasonable calls, the returned value will be
+// MEDIAN_ERROR_OK.
+median_error_t median_init_buffer(void * buffer, size_t buffer_size, double epsilon, size_t maxN, median_buffer_t *initialized_buffer);
+
+// This routine inserts a data point into the median computation. It's
+// implemented to be really cheap. However, it is not always constant time.
+// There can be invocations of this function which take a bit longer, but
+// in no case can it become onerous. The runtime is ammortizable to a constant
+// per invocation.
+// The first parameter, buffer, is the pointer to the buffer.
+// The second parameter, data, is the data to be inserted.
+// If the data inserted exceeds MaxN, an error might be returned. In such a
+// case, MEDIAN_ERROR_MAX_N_EXCEEDED will be returned. There is no guarantee
+// that the error will be returned as soon as MaxN is exceeded. But no error
+// will be returned until it is.
+// If the parameters do not make sense, for example buffer is 0x0,
+// then MEDIAN_ERROR_INVALID_PARAMETERS will be returned.
+// In all reasonable calls, the returned value will be
+// MEDIAN_ERROR_OK.
+median_error_t median_insert_data(median_buffer_t buffer, median_data_t data);
+
+// This routine returns the approximate median of the data seen thusfar.
+// This is implemented to be really cheap as well, but not as cheap as the
+// median_insert_data() function. So it should be used a bit less than the other
+// case.
+// The first parameter, buffer, is the buffer which contains the inserted data.
+// The second parameter, approximate_median, is a value return parameter which
+// will, on exit, contain the value of the approximate median.
+// The only condition under which this method will return an error is if one of
+// the parameters is invalid (for example, approximate_median is null).
+// By and large, if there is no bug in the calling code, this method should not
+// return an error.
+// If it does return an error, the error will be
+// MEDIAN_ERROR_INVALID_PARAMETERS
+median_error_t median_output_median(median_buffer_t buffer, median_data_t *approximate_median);
+
+#endif // ._MONITORING_MEDIAN>H_INCLUDED__
diff --git a/median.main.c b/median.main.c
new file mode 100644
index 0000000..4879c04
--- /dev/null
+++ b/median.main.c
@@ -0,0 +1,9 @@
+#include <stdio.h>
+#include "median.h"
+
+int main(int argc, char * argv[]) {
+ size_t n = 0;
+ median_suggest_buffer_size(.001, 1000, &n);
+ printf("%ld\n", n);
+ return 0;
+}