adaptive packer (needs to be improved), and packed vec matcher

10 years ago · 98f8984f38
parent a5f18e9f84
commit 98f8984f38
4 changed files with 343 additions and 57 deletions
--- a/main.c
+++ b/main.c
@ -4,55 +4,81 @@
 #include "src/vec_match.h"
-#define DATA_LEN 11
+#define DATA_LEN 16
 #define REF_LEN 8
 static float reference[DATA_LEN] = {
-	0, 10, 20, 30, 40, 50, 40, 30, 20, 10, 0
+	//0, 10, 20, 30, 40, 50, 40, 30, 20, 10, 0
 	0, 15.7, 0, 0, 0.1, 0.2, 0.1, 10, 24.242, 0, 0, 2, 0.2, 0.4, 0.5, 0
 };
 static float data[DATA_LEN] = {
-	0, 10, 20, 30, 40, 50, 50, 40, 30, 30, 10
+	//0, 10, 20, 30, 40, 50, 50, 35, 15, 15, 0
 	0, 15.7, 0, 0, 0.1, 0.2, 0.1, 10, 24.242, 0, 0, 2, 0.2, 0.4, 0.5, 0
 };
 static float ref_p[REF_LEN];
 static float data_cprs[16] = {
 	0, 15.7, 0, 0, 0.1, 0.2, 0.1, 10, 24.242, 0, 0, 0.1, 0.2, 0.4, 0.5, 0
 };
-static float data_cprs2[16];
+//static float data_cprs[16] = {
 //	0, 15.7, 0, 0, 0.1, 0.2, 0.1, 10, 24.242, 0, 0, 2, 0.2, 0.4, 0.5, 0
 //};
-static float data_cprs3[16];
+//static float data_packed[16];
 int main(void)
 {
-	for (int i = 0; i < 16; i++) {
+/*	for (int i = 0; i < 16; i++) {
 		printf("%.1f, ", data_cprs[i]);
 	}
 	printf("\n");
-	int len = vec_pack(data_cprs2, 16, data_cprs, 16, 2);
+	int pklen = vec_pack(data_packed, 16, data_cprs, 16, 2);
-	printf("compressed len = %d\n", len);
+	printf("compressed len = %d\n", pklen);
-	for (int i = 0; i < len; i++) {
+	for (int i = 0; i < pklen; i++) {
-		printf("%.1f, ", data_cprs2[i]);
+		printf("%.1f, ", data_packed[i]);
 	}
 	printf("\n");
-	len = vec_unpack(data_cprs3, 16, data_cprs2, len);
+	pack_walker_t pw;
 	pw_init(&pw, data_packed, pklen);
 	for(int i=0; i<16; i++) {
 		printf("Value at %d is %.1f\n", i, pw_get(&pw, i));
 	}
 	int i = 8;
 	printf("Value at %d is %.1f\n", i, pw_get(&pw, i));
 	i = 7;
 	printf("Value at %d is %.1f\n", i, pw_get(&pw, i));
 	i = 1;
 	printf("Value at %d is %.1f\n", i, pw_get(&pw, i));
 	i = 15;
 	printf("Value at %d is %.1f\n", i, pw_get(&pw, i));
 	i = 11;
 	printf("Value at %d is %.1f\n", i, pw_get(&pw, i));
 	i = 12;
 	printf("Value at %d is %.1f\n", i, pw_get(&pw, i));*/
 /*	len = vec_unpack(data_cprs3, 16, data_cprs2, len);
 	printf("unpacked len = %d\n", len);
 	for (int i = 0; i < len; i++) {
 		printf("%.1f, ", data_cprs3[i]);
 	}
 	printf("\n");
 */
-
+	//return 0;
 	return 0;
 	vec_match_cfg_t cfg = {
 		.length = DATA_LEN,
 		.drift_x = 1,
-		.offset_y = 5,
+		.offset_y = 1,
 		.abs_threshold = 0.1
 	};
@ -60,14 +86,14 @@ int main(void)
 	printf("REF:  ");
 	for (int i = 0; i < DATA_LEN; i++) {
-		printf("%.0f, ", reference[i]);
+		printf("%.1f, ", reference[i]);
 	}
 	printf("\n");
 	printf("MEAS: ");
 	for (int i = 0; i < DATA_LEN; i++) {
-		printf("%.0f, ", data[i]);
+		printf("%.1f, ", data[i]);
 	}
 	printf("\n");
@ -80,4 +106,28 @@ int main(void)
 	printf("\n");
 	printf("Error rate: ENV %.2f, ABS %.2f\n", env_e, abs_e);
 	float thr;
 	int ref_pack_len = vec_pack_fit(ref_p, REF_LEN, reference, DATA_LEN, &thr);
 	printf("Reference packed with zero threshold %.1f to %d items.\n", thr, ref_pack_len);
 	printf("REF packed:  ");
 	for (int i = 0; i < ref_pack_len; i++) {
 		printf("%.1f, ", ref_p[i]);
 	}
 	printf("\n");
 	pack_walker_t pw;
 	pw_init(&pw, ref_p, ref_pack_len);
 	printf("Value #8 = %.1f\n", pw_get(&pw, 8));
 	printf("Value #7 = %.1f\n", pw_get(&pw, 7));
 	printf("Value #15 = %.1f\n", pw_get(&pw, 15));
 	printf("Trying packed match\n");
 	ok = vec_match_packed(data, ref_p, ref_pack_len, &cfg, &env_e, &abs_e);
 	printf("%s", ok ? "MATCH OK" : "MATCH FAILED");
 	printf("\n");
 	printf("Error rate: ENV %.2f, ABS %.2f\n", env_e, abs_e);
 }
--- a/main.d
+++ b/main.d
@ -14,7 +14,10 @@ main.out: src/vec_match.c /usr/include/stdc-predef.h \
 /usr/include/bits/select.h /usr/include/bits/sigset.h \
 /usr/include/bits/time.h /usr/include/sys/sysmacros.h \
 /usr/include/bits/pthreadtypes.h /usr/include/alloca.h \
- /usr/include/bits/stdlib-float.h \
+ /usr/include/bits/stdlib-float.h /usr/include/stdio.h \
 /usr/include/libio.h /usr/include/_G_config.h /usr/include/wchar.h \
 /usr/lib/gcc/x86_64-unknown-linux-gnu/5.3.0/include/stdarg.h \
 /usr/include/bits/stdio_lim.h /usr/include/bits/sys_errlist.h \
 /usr/lib/gcc/x86_64-unknown-linux-gnu/5.3.0/include/float.h \
 /usr/include/math.h /usr/include/bits/math-vector.h \
 /usr/include/bits/libm-simd-decl-stubs.h /usr/include/bits/huge_val.h \
--- a/src/vec_match.c
+++ b/src/vec_match.c
@ -1,21 +1,22 @@
 #include <stdint.h>
 #include <stdbool.h>
 #include <stdlib.h>
 #include <stdio.h>
 #include <float.h>
 #include <math.h>
 #include "vec_match.h"
-#define SQUARE(a) ((a)*(a))
+#define SQ(a) ((a)*(a))
-#define F2ZEROES(f) roundf(-(f))
+#define IS_FZERO(f) ((f) < 0.0f)
-#define ZEROES2F(z) (0.0f - z)
+#define F2ZERO(f) roundf(-(f))
 #define ZERO2F(z) (0.0f - z)
-bool vec_match_do(const float *data, const float *ref,
+bool vec_match(const float *data, const float *ref,
-				  const vec_match_cfg_t *cfg,
+			   const vec_match_cfg_t *cfg,
-				  float *fuzzy_match_error, float *abs_match_error,
+			   float *fuzzy_match_error, float *abs_match_error)
 				  bool packed)
 {
 	int a, b;
@ -23,16 +24,24 @@ bool vec_match_do(const float *data, const float *ref,
 	float env_err = 0;
 	float abs_err = 0;
-	for (int i = 0; i < (int)cfg->length; i++) {
+	for (uint32_t i = 0; i < cfg->length; i++) {
 		float peak = FLT_MIN;
 		float base = FLT_MAX;
-		// find highest value in the surrounding drift_x points
+		// bounds for base and peak search
-		a = i - cfg->drift_x;
+		if (i < cfg->drift_x) {
-		b = i + cfg->drift_x;
+			a = 0;
-		if (a < 0) a = 0;
+		} else {
-		if (b >= (int)cfg->length) b = cfg->length - 1;
+			a = i - cfg->drift_x;
 		}
 		if (i + cfg->drift_x >= cfg->length) {
 			b = cfg->length - 1;
 		} else {
 			b = i + cfg->drift_x;
 		}
 		// find base and peak
 		for (int j = a; j <= b; j++) {
 			if (peak < ref[j]) peak = ref[j];
 			if (base > ref[j]) base = ref[j];
@ -42,9 +51,10 @@ bool vec_match_do(const float *data, const float *ref,
 		peak += cfg->offset_y; // add abs threshold on top
 		base -= cfg->offset_y;
 		// ignore abs threshold difference (float precision error)
 		if (fabs(ref[i] - data[i]) > cfg->abs_threshold) {
-			abs_err += SQUARE(ref[i] - data[i]);
+			abs_err += SQ(ref[i] - data[i]);
 		}
@ -54,8 +64,8 @@ bool vec_match_do(const float *data, const float *ref,
 		} else {
 			//printf("data[%d] out of range: %f, [%f ; %f]\n", i, data[i], base, peak);
-			if (data[i] < base) env_err += SQUARE(base - data[i]);
+			if (data[i] < base) env_err += SQ(base - data[i]);
-			if (data[i] > peak) env_err += SQUARE(data[i] - peak);
+			if (data[i] > peak) env_err += SQ(data[i] - peak);
 			err_cnt++;
 		}
@ -69,22 +79,83 @@ bool vec_match_do(const float *data, const float *ref,
 }
-
+bool vec_match_packed(const float *data, const float *ref_packed, uint32_t ref_p_len,
-bool vec_match(const float *data, const float *ref, const vec_match_cfg_t *cfg,
+					  const vec_match_cfg_t *cfg,
-			   float *fuzzy_match_error, float *abs_match_error)
+					  float *fuzzy_match_error, float *abs_match_error)
 {
-	return vec_match_do(data, ref, cfg, fuzzy_match_error, abs_match_error, false);
+	uint32_t a, b;
-}
+	float f; // tmp float
 	pack_walker_t w; // walker
 	pw_init(&w, ref_packed, ref_p_len);
-bool vec_match_packed(const float *data, const float *ref, const vec_match_cfg_t *cfg,
+	int err_cnt = 0;
-					  float *fuzzy_match_error, float *abs_match_error)
+	float env_err = 0;
-{
+	float abs_err = 0;
-	return vec_match_do(data, ref, cfg, fuzzy_match_error, abs_match_error, true);
+
 	float ref_at;
 	for (uint32_t i = 0; i < cfg->length; i++) {
 		float peak = FLT_MIN;
 		float base = FLT_MAX;
 		// bounds for base and peak search
 		if (i < cfg->drift_x) {
 			a = 0;
 		} else {
 			a = i - cfg->drift_x;
 		}
 		if (i + cfg->drift_x >= cfg->length) {
 			b = cfg->length - 1;
 		} else {
 			b = i + cfg->drift_x;
 		}
 		// find base and peak
 		for (uint32_t j = a; j <= b; j++) {
 			f = pw_get(&w, j);
 			if (peak < f) peak = f;
 			if (base > f) base = f;
 		}
 		ref_at = pw_get(&w, i);
 		// apply drift_y
 		peak += cfg->offset_y; // add abs threshold on top
 		base -= cfg->offset_y;
 		// ignore abs threshold difference (float precision error)
 		if (fabs(ref_at - data[i]) > cfg->abs_threshold) {
 			abs_err += SQ(ref_at - data[i]);
 		}
 		if (data[i] >= (base - cfg->abs_threshold) && data[i] <= (peak + cfg->abs_threshold)) {
 			// within limits
 			continue;
 		} else {
 			//printf("data[%d] out of range: %f, [%f ; %f]\n", i, data[i], base, peak);
 			if (data[i] < base) env_err += SQ(base - data[i]);
 			if (data[i] > peak) env_err += SQ(data[i] - peak);
 			err_cnt++;
 		}
 	}
 	// write error values to provided fields
 	if (fuzzy_match_error != NULL) *fuzzy_match_error = env_err;
 	if (abs_match_error != NULL) *abs_match_error = abs_err;
 	return err_cnt == 0;
 }
 // ---- PACKING UTILS ----
 uint32_t vec_pack(float *result, uint32_t result_capacity,
 				  const float *data, uint32_t data_length, float threshold)
@ -96,10 +167,11 @@ uint32_t vec_pack(float *result, uint32_t result_capacity,
 		if (data[i] < threshold) {
 			zeroes++;
 		} else {
 			// not a zero
 			// write zero marker to result
 			if (zeroes) {
 				if (result_len < result_capacity) {
-					result[result_len] = ZEROES2F(zeroes); // float and negative
+					result[result_len] = ZERO2F(zeroes); // float and negative
 				}
 				zeroes = 0;
@ -128,6 +200,27 @@ uint32_t vec_pack(float *result, uint32_t result_capacity,
 uint32_t vec_pack_fit(float *result, uint32_t result_capacity,
 				  const float *data, uint32_t data_length, float *threshold_p)
 {
 	float thr = 0;
 	uint32_t ref_pack_len;
 	// TODO use smarter algorithm
 	while(true) {
 		ref_pack_len = vec_pack(result, result_capacity, data, data_length, thr);
 		printf("try %f -> %d\n", thr, ref_pack_len);//FIXME remove
 		if (ref_pack_len <= result_capacity) {
 			if (threshold_p != NULL) *threshold_p = thr;
 			return result_capacity;
 		}
 		thr += 0.1f;
 	}
 }
 uint32_t vec_unpack(float *result, uint32_t result_capacity,
 					const float *compr_data, uint32_t compr_length)
@ -135,8 +228,8 @@ uint32_t vec_unpack(float *result, uint32_t result_capacity,
 	uint32_t idx = 0;
 	for (uint32_t i = 0; i < compr_length; i++) {
-		if (compr_data[i] < 0) {
+		if (IS_FZERO(compr_data[i])) {
-			uint32_t zeroes = F2ZEROES(compr_data[i]);
+			uint32_t zeroes = F2ZERO(compr_data[i]);
 			for (uint32_t j = 0; j < zeroes; j++) {
 				if (idx < result_capacity) {
 					result[idx] = 0;
@ -154,3 +247,96 @@ uint32_t vec_unpack(float *result, uint32_t result_capacity,
 	return idx;
 }
 // ---- PACK WALKER CODE ----
 // init the pack walker struct
 void pw_init(pack_walker_t *wlkr, const float *packed_vec, uint32_t packed_len)
 {
 	wlkr->p_vec = packed_vec;
 	wlkr->p_length = packed_len;
 	wlkr->real_idx = 0;
 	wlkr->p_idx = 0;
 	wlkr->p_zero_j = 0;
 	wlkr->p_zero_n = 0;
 }
 // rewind the struct to first entry, handle leading zero(s)
 void pw_rewind(pack_walker_t *w)
 {
 	w->real_idx = 0;
 	w->p_idx = 0;
 	if (IS_FZERO(w->p_vec[0])) {
 		// multi-zero
 		w->p_zero_n = F2ZERO(w->p_vec[0]);
 		w->p_zero_j = 1;
 	} else {
 		w->p_zero_j = 0;
 		w->p_zero_n = 0;
 	}
 }
 // get value at index in packed vector
 float pw_get(pack_walker_t *w, uint32_t idx)
 {
 	if (idx < w->real_idx && idx > w->real_idx / 2) {
 		// backtrack
 		while (w->real_idx > idx) {
 			if (w->p_zero_n && w->p_zero_j > 1) { // multi-zero value
 				w->p_zero_j--;
 			} else {
 				// go to previous
 				w->p_idx--;
 				if (IS_FZERO(w->p_vec[w->p_idx])) {
 					// multi-zero
 					w->p_zero_n = F2ZERO(w->p_vec[w->p_idx]);
 					w->p_zero_j = w->p_zero_n;
 				} else {
 					w->p_zero_n = 0; // no zeros
 				}
 			}
 			w->real_idx--;
 		}
 	} else {
 		// forward
 		if (idx < w->real_idx || idx == 0) {
 			pw_rewind(w);
 		}
 		// add until reached
 		while (w->real_idx < idx && w->p_idx < w->p_length) {
 			if (w->p_zero_n && w->p_zero_j < w->p_zero_n) { // multi-zero value
 				w->p_zero_j++;
 			} else {
 				// go to next
 				w->p_idx++;
 				if (IS_FZERO(w->p_vec[w->p_idx])) {
 					// multi-zero
 					w->p_zero_n = F2ZERO(w->p_vec[w->p_idx]);
 					w->p_zero_j = 1;
 				} else {
 					w->p_zero_n = 0; // no zeros
 				}
 			}
 			w->real_idx++;
 		}
 	}
 	// handle overflow. underflow impossible <- index is unsigned
 	if (w->p_idx >= w->p_length) {
 		w->p_idx = w->p_length - 1;
 		return 0;
 	}
 	return w->p_zero_n ? 0 : w->p_vec[w->p_idx];
 }
--- a/src/vec_match.h
+++ b/src/vec_match.h
@ -12,6 +12,38 @@ typedef struct {
 } vec_match_cfg_t;
 typedef struct {
 	const float *p_vec;
 	uint32_t p_length;
 	uint32_t real_idx; // public index
 	uint32_t p_idx;
 	uint32_t p_zero_j;
 	uint32_t p_zero_n;
 } pack_walker_t;
 /**
 * Populate a packed vector walker (used to read values without unpacking)
 *
 * @param wlkr pack walker instance to populate
 * @param packed_vec vec to walk
 * @param packed_len packed vec len
 */
 void pw_init(pack_walker_t *wlkr, const float *packed_vec, uint32_t packed_len);
 /**
 * Get a value from the packed vect.
 *
 * Note: random access is slower than sequential
 *
 * @param w walker
 * @param idx unpacked vector index to read
 * @return value. Returns 0 if index is out of bounds.
 */
 float pw_get(pack_walker_t *w, uint32_t idx);
 /**
 * Match signal to reference, allowing for some offser and noise
 *
@ -22,18 +54,16 @@ typedef struct {
 * @param abs_match_error error metric calculated from raw data (can be used if envelope match passes)
 * @return envelope match status (match using drift and offset)
 */
-bool vec_match(const float *data, const float *ref, const vec_match_cfg_t *cfg, float *fuzzy_match_error, float *abs_match_error);
+bool vec_match(const float *data, const float *ref, const vec_match_cfg_t *cfg,
 			   float *fuzzy_match_error, float *abs_match_error);
 /**
- * Match vectors of positive numbers.
+ * Match vector against a packed reference vector (without unpacking).
 * Negative number indicates how many consecutive elements are zero (hence the compression).
 *
 * 1024-long vector [12, 0, ...] would be [12, -1023]
 *
 * Params otherwise the same as vec_match()
 */
-bool vec_match_packed(const float *data, const float *ref, const vec_match_cfg_t *cfg, float *fuzzy_match_error, float *abs_match_error);
+bool vec_match_packed(const float *data, const float *ref_packed, uint32_t ref_p_len,
 					  const vec_match_cfg_t *cfg, float *fuzzy_match_error, float *abs_match_error);
 /**
@ -51,7 +81,9 @@ bool vec_match_packed(const float *data, const float *ref, const vec_match_cfg_t
 * @param threshold max value to be considered zero in the compression
 * @return length of result vector
 */
-uint32_t vec_pack(float *result, uint32_t result_capacity, const float *data, uint32_t length, float threshold);
+uint32_t vec_pack(float *result, uint32_t result_capacity,
 				  const float *data, uint32_t length,
 				  float threshold);
 /**
@ -66,4 +98,19 @@ uint32_t vec_pack(float *result, uint32_t result_capacity, const float *data, ui
 * @param compr_length compressed data vector length
 * @return
 */
-uint32_t vec_unpack(float *result, uint32_t result_capacity, const float *compr_data, uint32_t compr_length);
+uint32_t vec_unpack(float *result, uint32_t result_capacity,
 					const float *compr_data, uint32_t compr_length);
 /**
 * Pack to given length, adjusting threshold automatically.
 *
 * @param result result array
 * @param result_capacity result max capacity
 * @param data data to compress
 * @param data_length data length
 * @param threshold_p field to store the used threshold, can be NULL
 * @return real result size
 */
 uint32_t vec_pack_fit(float *result, uint32_t result_capacity,
 					  const float *data, uint32_t data_length, float *threshold_p);