commit 50a26664e364e246d36573befb18d59af58fd5bb Author: Ondřej Hruška Date: Tue Jun 9 22:42:32 2020 +0200 Initial diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..00303f2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +cmake-build-debug/ +CMakeCache.txt +*.o +*.out +*~ +.idea/ + diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..13ee121 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,16 @@ +# Unit tests project + +cmake_minimum_required(VERSION 3.10) + +set(CMAKE_CXX_STANDARD 11) +SET(CMAKE_BUILD_TYPE Debug) + +project(test) + +set(SOURCES prefix_match.c test/main.c test/test_framework.c) + +add_executable(test ${SOURCES}) + +target_include_directories(test + PRIVATE "." "test" +) diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..00927c7 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,19 @@ +Copyright (c) 2020 Ondřej Hruška + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..5661ef4 --- /dev/null +++ b/Makefile @@ -0,0 +1,9 @@ +.PHONY: build run + +build: a.out + +a.out: + cc prefix_match.c test/main.c test/test_framework.c -I. -Itest -Wall -Wextra -pedantic -Og + +run: a.out + ./a.out diff --git a/prefix_match.c b/prefix_match.c new file mode 100644 index 0000000..82a2c82 --- /dev/null +++ b/prefix_match.c @@ -0,0 +1,169 @@ +#include +#include +#include +#include "prefix_match.h" + +int prefix_match(const char *value, const char **options, int flags) { + flags &= ~PREFIXMATCH_MULTI_PARTIAL; // this doesn't make sense here + bool case_sensitive = PREFIXMATCH_CASE_SENSITIVE == (flags & PREFIXMATCH_CASE_SENSITIVE); + bool can_abbrev = 0 == (flags & PREFIXMATCH_NOABBREV); + + int (*cmpfn) (const char *, const char *) = case_sensitive ? strcmp : strcasecmp; + int (*ncmpfn) (const char *, const char *, size_t) = case_sensitive ? strncmp : strncasecmp; + + if (!value || !options) return -1; + size_t input_len = strlen(value); + const char *option = NULL; + int counter = 0; + int result = -1; + while (NULL != (option = options[counter])) { + if (cmpfn(option, value) == 0) { + return counter; // full exact match + } else { + // Test for partial match + if (can_abbrev && ncmpfn(value, option, input_len) == 0) { + if (result == -1) { + result = counter; // first partial match + } else { + // ambiguous match + return -1; + } + } + } + counter++; + } + return result; +} + +size_t pm_word_len(const char *word, const char *delims) { + char d = 0; + const char *dp = delims; + size_t word_len = 0; + while ('\0' != (d = *dp++)) { + char *end = strchr(word, d); + if (NULL == end) continue; + size_t len = end - word; + if (!word_len || len < word_len) { + word_len = len; + } + } + if (!word_len) { + word_len = strlen(word); + } + return word_len; +} + +size_t pm_count_words(const char *sentence, const char *delims) { + char c; + size_t n = 0; + bool in_word = false; + if (!sentence || !delims) return 0; + while (0 != (c = *sentence++)) { + bool is_delim = NULL != strchr(delims, c); + if (is_delim && in_word) { + in_word = false; + } else if (!in_word && !is_delim) { + n++; + in_word = true; + } + } + return n; +} + +int pm_multipart_test(const char *a, const char* b, const char *delims, int flags) { + bool case_sensitive = PREFIXMATCH_CASE_SENSITIVE == (flags & PREFIXMATCH_CASE_SENSITIVE); + bool can_abbrev = 0 == (flags & PREFIXMATCH_NOABBREV); + + int (*ncmpfn) (const char *, const char *, size_t) = case_sensitive ? strncmp : strncasecmp; + // lazy shortcut first... + if ((case_sensitive && 0 == strcmp(a, b)) || (!case_sensitive && 0 == strcasecmp(a, b))) { + return 1; // full match + } + + const char *word_a = a; + const char *word_b = b; + size_t word_a_len = 0; + size_t word_b_len = 0; + while (1) { + word_a += word_a_len; + word_b += word_b_len; + + // advance past leading delims, if any + while (*word_a != '\0' && NULL != strchr(delims, *word_a)) word_a++; + while (*word_b != '\0' && NULL != strchr(delims, *word_b)) word_b++; + + // test for terminator + if (*word_a == '\0' && *word_b == '\0') { + // both ended at the same number of words + return 1; // full match + } + + if (*word_a == '\0' || *word_b == '\0') { + // sentences ended at different length + if (0 != (flags & PREFIXMATCH_MULTI_PARTIAL) && *word_b != '\0') { // word prefix match (a is a prefix of b) + return 2; // partial word match + } else { + return 0; // no match + } + } + + // find end of the words + word_a_len = pm_word_len(word_a, delims); + word_b_len = pm_word_len(word_b, delims); + + if (word_a_len > word_b_len || (!can_abbrev && word_a_len != word_b_len)) { + return 0; // no match + } + + int cmp = ncmpfn(word_a, word_b, word_a_len); + if (0 != cmp) { // words differ + return 0; // no match + } + } +} + +int prefix_multipart_match(const char *value, const char **options, const char* delims, int flags) { + bool multi_partial = 0 != (flags & PREFIXMATCH_MULTI_PARTIAL); + flags &= ~PREFIXMATCH_MULTI_PARTIAL; // turn it off for passing the to test fn + bool can_abbrev = 0 == (flags & PREFIXMATCH_NOABBREV); + + if (!value || !options) return -1; + const char *option = NULL; + int counter = 0; + int result = -1; + int result_partial = -1; + int result_partial_nwords = 0; + while (NULL != (option = options[counter])) { + if (pm_multipart_test(value, option, delims, flags | PREFIXMATCH_NOABBREV)) { + return counter; // full exact match + } else if (can_abbrev) { + // Test for partial match + if (pm_multipart_test(value, option, delims, flags)) { + if (result == -1) { + result = counter; // first partial match in all words + } else { + return -1; + } + } else if (multi_partial && 2 == pm_multipart_test(value, option, delims, flags | PREFIXMATCH_MULTI_PARTIAL)) { + int nwords = pm_count_words(option, delims); + if (result_partial == -1 || result_partial_nwords < nwords) { + result_partial = counter; // first partial match + result_partial_nwords = nwords; + } else { + result_partial = -2; + } + } + } + counter++; + } + + if (result != -1) { + return result; + } + + if (result_partial >= 0) { + return result_partial; + } + + return -1; +} diff --git a/prefix_match.h b/prefix_match.h new file mode 100644 index 0000000..943aa69 --- /dev/null +++ b/prefix_match.h @@ -0,0 +1,77 @@ +/** + * Prefix Match + * + * Match input value to a list of options, allowing non-ambiguous abbreviation and partial matching. + * This library was designed for command recognition in interactive consoles and command interfaces. + * + * Created on 2020/06/09 by Ondřej Hruška + */ + +#ifndef _PREFIX_MATCH_H +#define _PREFIX_MATCH_H + +#include + +/** Use case-sensitive matching */ +#define PREFIXMATCH_CASE_SENSITIVE 1 +/** Forbid abbreviations */ +#define PREFIXMATCH_NOABBREV 2 +/** Allow matching fewer words, if unambiguous */ +#define PREFIXMATCH_MULTI_PARTIAL 4 + +/** + * Recognize (optionally abbreviated) input + * + * @param[in] value - tested value + * @param[in] options - options to match against + * @param[in] flags - matching options (bitmask) - accepts PREFIXMATCH_CASE_SENSITIVE and PREFIXMATCH_NOABBREV + * @return index of the matched option, -1 on mismatch or ambiguous match + */ +int prefix_match(const char *value, const char **options, int flags); + +/** + * Recognize input consisting of one or more (optionally abbreviated) words + * + * @param[in] value - tested value + * @param[in] options - options to match against, multi-word options separated by the listed delimiters + * @param[in] delims - string with a list of possible delimiters (like for strtok) + * @param[in] flags - matching options (bitmask) - accepts all options + * @return index of the matched option, -1 on mismatch or ambiguous match + */ +int prefix_multipart_match(const char *value, const char **options, const char* delims, int flags); + +// useful internal functions exported for possible re-use + +/** + * Test if two word sentences match, with individual words optionally allowed to be abbreviated. + * + * @internal + * @param[in] a - tested (optionally abbreviated) sentence + * @param[in] b - full sentence + * @param[in] delims - list of possible delimiters, same may be used for both sentences + * @param[in] flags - matching options (bitmask) - accepts all options + * @return 1-match; 0-no match; 2-partial (some words) match, if the PREFIXMATCH_MULTI_PARTIAL flag is set + */ +int pm_multipart_test(const char *a, const char* b, const char *delims, int flags); + +/** + * Count words in a "sentence", delimited by any of the given set of delimiters. + * + * @internal + * @param[in] sentence - one or multi-word string + * @param[in] delims - delimiters accepted + * @return number of words + */ +size_t pm_count_words(const char *sentence, const char *delims); + +/** + * Measure word length + * + * @internal + * @param[in] word - start of a word that ends with either one of the delimiters, or a null byte. + * @param[in] delims - delimiters accepted + * @return word length + */ +size_t pm_word_len(const char *word, const char *delims); + +#endif //_PREFIX_MATCH_H diff --git a/test/main.c b/test/main.c new file mode 100644 index 0000000..03e9b16 --- /dev/null +++ b/test/main.c @@ -0,0 +1,159 @@ +#include +#include +#include +#include + +#include "prefix_match.h" +#include "test_framework.h" + +// --- test cases --- + +bool test_prefix_match() { + const char *options[] = { + "ahoj",//0 + "ahojahoj",//1 + "ble",//2 + "citron",//3 + "foo",//4 + "foo bar",//5 + "foo baz",//6 + "foo bcz",//7 + "eps",//8 + "eps set",//9 + "eps set config",//10 + "eps get config",//11 + "eps hk2",//12 + "eps hk2 vi",//13 + "eps hk2 out",//14 + "delete from table users",//15 + "delete,from;,,,,table sessions",//16 + NULL + }; + + check_eq(prefix_match("", options, 0), -1); + check_eq(prefix_match("b", options, 0), 2); + check_eq(prefix_match("c", options, 0), 3); + check_eq(prefix_match("citron", options, 0), 3); + check_eq(prefix_match("foo", options, 0), 4); + check_eq(prefix_match("foo ", options, 0), -1); + check_eq(prefix_match("foo bc", options, 0), 7); + check_eq(prefix_match("foo bcz", options, 0), 7); + + check_eq(prefix_match("fOO bcz", options, PREFIXMATCH_CASE_SENSITIVE), -1); // CS + check_eq(prefix_match("fOO bcz", options, 0), 7); // CI + + check_eq(prefix_match("ahoj", options, 0), 0); + check_eq(prefix_match("ahoja", options, 0), 1); + + return true; +} + +bool test_multipart_test() { + check_eq(pm_multipart_test("ahoj", "ahoj", " ", 0), 1); + check_eq(pm_multipart_test("ah", "ahoj", " ", 0), 1); + check_eq(pm_multipart_test("xxxx", "ahoj", " ", 0), 0); + + check_eq(pm_multipart_test("", "ahoj", " ", 0), 0); + check_eq(pm_multipart_test("", "", " ", 0), 1); + + check_eq(pm_multipart_test("multi part", "multi part", " ", 0), 1); + check_eq(pm_multipart_test("multi", "multi part", " ", 0), 0); + check_eq(pm_multipart_test("multi part", "multi", " ", 0), 0); + check_eq(pm_multipart_test("multi part", "multi dog", " ", 0), 0); + check_eq(pm_multipart_test("multi part", "multi part", " ", 0), 1); + check_eq(pm_multipart_test(" multi part ", "multi part", " ", 0), 1); + check_eq(pm_multipart_test("multi ,; part", "multi,part", ",; ", 0), 1); + + check_eq(pm_multipart_test("m p", "multi part", " ", 0), 1); + check_eq(pm_multipart_test("mu pa", "multi part", " ", 0), 1); + check_eq(pm_multipart_test("mu pp", "multi part", " ", 0), 0); + check_eq(pm_multipart_test("m ", "multi part", " ", PREFIXMATCH_MULTI_PARTIAL), 2); + + check_eq(pm_multipart_test("v l s h", "very long sentence here", " ", 0), 1); + check_eq(pm_multipart_test("v l s h", "very long sentence here too", " ", 0), 0); + check_eq(pm_multipart_test("v l", "very long sentence here too", " ", 0), 0); + check_eq(pm_multipart_test("v l", "very long sentence here too", " ", PREFIXMATCH_MULTI_PARTIAL), 2); + + check_eq(pm_multipart_test("v l s h", "very long sentence here", " ", PREFIXMATCH_MULTI_PARTIAL | PREFIXMATCH_CASE_SENSITIVE), 1); + check_eq(pm_multipart_test("v l s h", "very long sentence here too", " ", PREFIXMATCH_MULTI_PARTIAL), 2); + + return true; +} + +bool test_multipart_match() { + const char *options[] = { + "ahoj",//0 + "foo",//1 + "foo bar",//2 + "foo baz",//3 + "foo bcz",//4 + "eps", // 5 + "eps set",//6 + "eps set config",//7 + "eps get config",//8 + "eps hk2",//9 + "eps hk2 vi", //10 + "eps hk2 out",//11 + "delete from table users",//12 + "delete,from;,,,,table sessions",//13 + NULL + }; + + check_eq(prefix_multipart_match("", options, " ", 0), -1); + check_eq(prefix_multipart_match("x", options, " ", 0), -1); + + check_eq(prefix_multipart_match("ahoj", options, " ", 0), 0); + check_eq(prefix_multipart_match("a", options, " ", 0), 0); + check_eq(prefix_multipart_match("ah", options, " ", 0), 0); + + check_eq(prefix_multipart_match("foo", options, " ", 0), 1); + check_eq(prefix_multipart_match("f", options, " ", 0), 1); + + check_eq(prefix_multipart_match("f b", options, " ", 0), -1); + check_eq(prefix_multipart_match("f b", options, " ", PREFIXMATCH_MULTI_PARTIAL), -1); + check_eq(prefix_multipart_match("f bc", options, " ", 0), 4); + + check_eq(prefix_multipart_match("e", options, " ", 0), 5); + check_eq(prefix_multipart_match("eps", options, " ", 0), 5); + check_eq(prefix_multipart_match("eps banana", options, " ", 0), -1); + check_eq(prefix_multipart_match("eps banana", options, " ", PREFIXMATCH_MULTI_PARTIAL), -1); + + check_eq(prefix_multipart_match("eps set", options, " ", 0), 6); + check_eq(prefix_multipart_match("e s", options, " ", 0), 6); + check_eq(prefix_multipart_match("e set", options, " ", 0), 6); + check_eq(prefix_multipart_match("eps s", options, " ", 0), 6); + check_eq(prefix_multipart_match("eps s", options, " ", 0), 6); + + check_eq(prefix_multipart_match("e s c", options, " ", 0), 7); + check_eq(prefix_multipart_match("e g c", options, " ", 0), 8); + + // there is only one get command + + check_eq(prefix_multipart_match("eps get", options, " ", 0), -1); + check_eq(prefix_multipart_match("eps get", options, " ", PREFIXMATCH_MULTI_PARTIAL), 8); + + check_eq(prefix_multipart_match("e g", options, " ", 0), -1); + check_eq(prefix_multipart_match("e g", options, " ", PREFIXMATCH_MULTI_PARTIAL), 8); + + check_eq(prefix_multipart_match("epx get", options, " ", 0), -1); + check_eq(prefix_multipart_match("epx get", options, " ", PREFIXMATCH_MULTI_PARTIAL), -1); + + check_eq(prefix_multipart_match("d f t u", options, " ", 0), 12); + check_eq(prefix_multipart_match("d f t s", options, ",; ", 0), 13); + + return true; +} + +// --- test launcher --- + +static struct Test tests[] = { + {"prefix_match", test_prefix_match}, + {"prefix_multipart_test", test_multipart_test}, + {"prefix_multipart_match", test_multipart_match}, + {NULL, NULL} +}; + +int main() { + run_tests(tests, "main"); + return 0; +} diff --git a/test/test_framework.c b/test/test_framework.c new file mode 100644 index 0000000..3ba03e4 --- /dev/null +++ b/test/test_framework.c @@ -0,0 +1,22 @@ +#include +#include "test_framework.h" + +void run_tests(struct Test *tests, const char *module_name) { + printf("Running tests module \"%s\"...\n\n", module_name); + + struct Test *t = &tests[0]; + int passed = 0, failed = 0; + do { + printf("Running test \"%s\"...\n", t->name); + + if (t->func()) { + printf("Test %s \x1b[32mPASSED\x1b[m\n", t->name); + passed++; + } else { + printf("Test %s \x1b[31mFAILED\x1b[m\n", t->name); + failed++; + } + } while((++t)->name != NULL); + + printf("\nTests module \"%s\" done. %d passed, %d failed.\n\n", module_name, passed, failed); +} diff --git a/test/test_framework.h b/test/test_framework.h new file mode 100644 index 0000000..db92ffb --- /dev/null +++ b/test/test_framework.h @@ -0,0 +1,100 @@ +/** + * shared defines and utils for tests + * + * Created on 2020/05/12. + */ + +#ifndef TESTS_TEST_FRAMEWORK_H +#define TESTS_TEST_FRAMEWORK_H + +#include +#include +#include +#include +#include + +#define XSTR(s) STR(s) +#define STR(s) #s + +typedef bool(*testfn_t)(void); + +struct Test { + const char *name; + testfn_t func; +}; + +void run_tests(struct Test *tests, const char *module_name); + +// _good must be an array, not a pointer +#define check_array(_var, _good) \ + do { \ + for (int i = 0; i < (int)sizeof(_good); i++) { \ + if (_var[i] != _good[i]) { \ + printf("%s:%d \x1b[31mAssert failed\x1b[m: arrays differ\n\x1b[37;1m n# exp act\x1b[m\n", __func__, __LINE__); \ + for (i = 0; i < (int)sizeof(_good); i++) { \ + if (_var[i] == _good[i]) { \ + printf(" %2d - 0x%02x -\n", i, _var[i]); \ + } else { \ + printf(" %2d \x1b[32m0x%02x \x1b[31m0x%02x\x1b[m\n", i, _good[i], _var[i]); \ + } \ + } \ + return false; \ + } \ + } \ + } while(0) + +#define check(_cond) \ + do { \ + if (!(_cond)) { \ + printf("%s:%d \x1b[31mAssert failed\x1b[m: %s\n", __func__, __LINE__, STR(_cond)); \ + return false; \ + } \ + } while(0) + +#define check_eq(_var, _good) \ + do { \ + if ((_var) != (_good)) { \ + printf("%s:%d \x1b[31mAssert failed\x1b[m: %s == %s\n", __func__, __LINE__, STR(_var), STR(_good)); \ + printf(" \x1b[31m%d (0x%02x)\x1b[m != \x1b[32m%d (0x%02x)\x1b[m\n", _var, _var, _good, _good); \ + return false; \ + } \ + } while(0) + +#define check_eq_ul(_var, _good) \ + do { \ + if ((_var) != (_good)) { \ + printf("%s:%d \x1b[31mAssert failed\x1b[m: %s == %s\n", __func__, __LINE__, STR(_var), STR(_good)); \ + printf(" \x1b[31m%ld (0x%02lx)\x1b[m != \x1b[32m%ld (0x%02lx)\x1b[m\n", _var, _var, _good, _good); \ + return false; \ + } \ + } while(0) + +#define check_eq_l(_var, _good) \ + do { \ + if ((_var) != (_good)) { \ + printf("%s:%d \x1b[31mAssert failed\x1b[m: %s == %s\n", __func__, __LINE__, STR(_var), STR(_good)); \ + printf(" \x1b[31m%ld\x1b[m != \x1b[32m%ld\x1b[m\n", _var, _good); \ + return false; \ + } \ + } while(0) + +#define check_eq_f(_var, _good) \ + do { \ + if ((_var) != (_good)) { \ + printf("%s:%d \x1b[31mAssert failed\x1b[m: %s == %s\n", __func__, __LINE__, STR(_var), STR(_good)); \ + printf(" \x1b[31m%f\x1b[m != \x1b[32m%f\x1b[m\n", _var, _good); \ + return false; \ + } \ + } while(0) + +#define check_eq_f_safe(_var, _good, thr) \ + do { \ + if (fabs((double)((_var) - (_good))) > thr) { \ + printf("%s:%d \x1b[31mAssert failed\x1b[m: %s == %s\n", __func__, __LINE__, STR(_var), STR(_good)); \ + printf(" \x1b[31m%f\x1b[m != \x1b[32m%f\x1b[m\n", _var, _good); \ + return false; \ + } \ + } while(0) + + +#endif //TESTS_TEST_FRAMEWORK_H