blob: 90a3650cc970ecc97bcf8916c9643129364310da [file] [log] [blame]
#ifndef __MONITORING_MEDIAN_H_INCLUDED__
#define __MONITORING_MEDIAN_H_INCLUDED__
#include <stdint.h>
#include <stddef.h>
// ------------------------------------------------------------------------
// The core phylosophy of the interface is this.
// The code and data structure are completely separated.
// There are no mallocs in this codebase.
// This is so that when we have a long running process which is using this
// these methods to maintain statistics, there is no memory allocation or
// fragmentation going on. This means that the statistics are always valid
// and we do not have to figure out how report them in panic situations.
// Other than memory corruption issues, the state of the buffer will always
// be good, and we will never be in a state where an update could not occur
// because of memory pressure.
// ------------------------------------------------------------------------
// The ErrorCode returned by all
// functions in this file.
typedef int32_t median_error_t;
// A buffer which contains all the state held
// within an approximate median calculator.
typedef void * median_buffer_t;
// A single data element. For now this is defined
// to be a 64 bit integer. But in principle this could be
// anything at all.
typedef uint64_t median_data_t;
// All the error codes.
// No Error.
static median_error_t MEDIAN_ERROR_OK = 0;
// The parameters are meaningless (for example, asking for neg values
// for epsilon).
static median_error_t MEDIAN_ERROR_INVALID_PARAMETERS = -1;
// The buffer allocated is too small for the parameters requested.
static median_error_t MEDIAN_ERROR_BUFFER_TOO_SMALL = -2;
// Overflow because MaxN has been exceeded.
static median_error_t MEDIAN_ERROR_MAX_N_EXCEEDED = -3;
// Indicates that the size of the largest possible dataset is not known.
static size_t MEDIAN_MAX_N_UNKNOWN = 0;
// The default size of the largest possible dataset.
static size_t MEDIAN_MAX_N_DEFAULT = 1024*1024*1024; // 1G.
// This routine returns a suggested size for a buffer to hold all the
// state needed for a median calculation. Here epsilon is the permissible error.
// maxN is the largest dataset that we anticipate. If maxN is set to
// MEDIAN_MAX_N_UNKNOWN (or zero), then we assume MEDIAN_MAX_N_DEFAULT to be the value of
// maxN. The value return parameter is conventionally the last one. In this
// case, it contains the suggested_size of the buffer, assuming that the return
// value is MEDIAN_ERROR_OK.
// In all reasonable calls, the returned value will be
// MEDIAN_ERROR_OK.
median_error_t median_suggest_buffer_size(double epsilon, size_t maxN, size_t *suggested_size);
// This routine will init the buffer. It will check if the buffer meets the
// size restriction returned by #median_suggest_buffer_size(). If not, it will
// exit with MEDIAN_ERROR_BUFFER_TOO_SMALL.
// The method can also exit with MEDIAN_ERR)R_INVALID_PARAMETERS in case there
// are some meaningless parameters passed in the call.
// We assume that a buffer of length buffer_size (in bytes) has been allocated and is
// available at the pointer specified by buffer.
// The parameters to median_suggest_buffer_size are passed into the data
// initialization routine, and the init routine will store them inside the data
// structure, so that the values can be used during the operation
// of the data structure.
// The value return parameter initialized_buffer is identical to the buffer
// variable (i.e. *initialized_buffer == buffer), but is appropriately cast
// to indicate that it is initialized.
// In all reasonable calls, the returned value will be
// MEDIAN_ERROR_OK.
median_error_t median_init_buffer(void * buffer, size_t buffer_size, double epsilon, size_t maxN, median_buffer_t *initialized_buffer);
// This routine inserts a data point into the median computation. It's
// implemented to be really cheap. However, it is not always constant time.
// There can be invocations of this function which take a bit longer, but
// in no case can it become onerous. The runtime is ammortizable to a constant
// per invocation.
// The first parameter, buffer, is the pointer to the buffer.
// The second parameter, data, is the data to be inserted.
// If the data inserted exceeds MaxN, an error might be returned. In such a
// case, MEDIAN_ERROR_MAX_N_EXCEEDED will be returned. There is no guarantee
// that the error will be returned as soon as MaxN is exceeded. But no error
// will be returned until it is.
// If the parameters do not make sense, for example buffer is 0x0,
// then MEDIAN_ERROR_INVALID_PARAMETERS will be returned.
// In all reasonable calls, the returned value will be
// MEDIAN_ERROR_OK.
median_error_t median_insert_data(median_buffer_t buffer, median_data_t data);
// This routine returns the approximate median of the data seen thusfar.
// This is implemented to be really cheap as well, but not as cheap as the
// median_insert_data() function. So it should be used a bit less than the other
// case.
// The first parameter, buffer, is the buffer which contains the inserted data.
// The second parameter, approximate_median, is a value return parameter which
// will, on exit, contain the value of the approximate median.
// The only condition under which this method will return an error is if one of
// the parameters is invalid (for example, approximate_median is null).
// By and large, if there is no bug in the calling code, this method should not
// return an error.
// If it does return an error, the error will be
// MEDIAN_ERROR_INVALID_PARAMETERS
median_error_t median_output_median(median_buffer_t buffer, median_data_t *approximate_median);
#endif // ._MONITORING_MEDIAN>H_INCLUDED__