From abb1d5cb1bd545e7ac0cdcf44701e2f586924aab Mon Sep 17 00:00:00 2001 From: Unbewohnte Date: Fri, 20 Jan 2023 20:26:40 +0300 Subject: [PATCH] Levenshtein string distance --- src/strings/levenshtein.c | 54 +++++++++++++++++++++++++++++++++++++++ src/strings/levenshtein.h | 4 +++ testing/test.c | 32 +++++++++++++++++++++++ 3 files changed, 90 insertions(+) create mode 100644 src/strings/levenshtein.c create mode 100644 src/strings/levenshtein.h diff --git a/src/strings/levenshtein.c b/src/strings/levenshtein.c new file mode 100644 index 0000000..c37edae --- /dev/null +++ b/src/strings/levenshtein.c @@ -0,0 +1,54 @@ +#include "levenshtein.h" + +int _min(int a, int b) { + if (a < b) { + return a; + } + + return b; +} + +// Calculates Levenshtein distance for str1 and str2 of length len1 and len2 respectively +int levenshtein_distance(const char* str1, const char* str2) { + unsigned int len1 = strlen(str1) + 1; + unsigned int len2 = strlen(str2) + 1; + + if (len1 == 0) { + return len2; + } + + if (len2 == 0) { + return len1; + } + + if (strcmp(str1, str2) == 0) { + return 0; + } + + char s1[len2]; + char s2[len2]; + char row[len1]; + + strcpy(s1, str1); + strcpy(s2, str2); + + for (unsigned int i = 0; i < len1; i++) { + row[i] = i; + } + + for (unsigned int i = 1; i < len2; i++) { + int previous = i; + + for (unsigned int j = 1; j < len1; j++) { + int current = row[j-1]; + if (s2[i-1] != s1[j-1]) { + current = _min(_min(row[j-1]+1, previous+1), row[j]+1); + } + row[j-1] = previous; + previous = current; + } + row[len1] = previous; + } + + return row[len1]; +} \ No newline at end of file diff --git a/src/strings/levenshtein.h b/src/strings/levenshtein.h new file mode 100644 index 0000000..9f8d942 --- /dev/null +++ b/src/strings/levenshtein.h @@ -0,0 +1,4 @@ +#include + +// Calculates Levenshtein distance for str1 and str2 +int levenshtein_distance(const char* str1, const char* str2); \ No newline at end of file diff --git a/testing/test.c b/testing/test.c index bb31709..6771bc7 100644 --- a/testing/test.c +++ b/testing/test.c @@ -26,6 +26,7 @@ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR I #include "../src/math/vector.h" #include "../src/math/misc.h" #include "../src/datastruct/cvec.h" +#include "../src/strings/levenshtein.h" int test_rng() { lcg(76); @@ -302,6 +303,29 @@ int test_datastruct() { return EXIT_SUCCESS; } +int test_levenshtein() { + const char* str1 = "Fellow"; + const char* str2 = "Fella"; + + int distance = levenshtein_distance(str1, str2); + + if (distance != 2) { + printf("[ERROR] Invalid string distance between \"%s\" and \"%s\": expected to be %d; got %d\n", str1, str2, 2, distance); + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} + +int test_strings() { + if (test_levenshtein() == EXIT_FAILURE) { + printf("[ERROR] Levenshtein test failed\n"); + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} + int main() { // rng printf("[INFO] Testing rng...\n"); @@ -359,5 +383,13 @@ int main() { printf("[INFO] Datastruct test passed\n\n"); } + // strings + printf("[INFO] Testing strings...\n"); + if (test_strings() == EXIT_FAILURE) { + printf("[INFO] Strings test failed\n\n"); + } else { + printf("[INFO] Strings test passed\n\n"); + } + return EXIT_SUCCESS; } \ No newline at end of file