C++ Utilities 5.27.0
Useful C++ classes and routines such as argument parser, IO and conversion utilities
Loading...
Searching...
No Matches
levenshtein.cpp
Go to the documentation of this file.
1#include "./levenshtein.h"
2
3#include "./math.h"
4#include "./multiarray.h"
5
6#include <iostream>
7#include <limits>
8#include <memory>
9
10using namespace std;
11
12namespace CppUtilities {
13
15
17using DistanceArray = MultiArray<size_t, NoneOwningMultiArray, size_t, size_t>;
18
31void initDistanceArray(DistanceArray &distanceArray, const size_t size1, const size_t size2)
32{
33 const auto maxDistance(size1 + size2);
34 // ignore warning about null pointer dereference for now (which is *likely* not correct)
35#ifdef __GNUC__
36#pragma GCC diagnostic push
37#pragma GCC diagnostic ignored "-Wnull-dereference"
38#endif
39 distanceArray.at(0, 0) = maxDistance;
40#ifdef __GNUC__
41#pragma GCC diagnostic pop
42#endif
43 for (size_t i = 0; i <= size1; ++i) {
44 distanceArray.at(i + 1, 1) = i;
45 distanceArray.at(i + 1, 0) = maxDistance;
46 }
47 for (size_t i = 0; i <= size2; ++i) {
48 distanceArray.at(1, i + 1) = i;
49 distanceArray.at(0, i + 1) = maxDistance;
50 }
51}
52
55size_t performDamerauLevenshteinAlgorithm(
56 DistanceArray &distanceArray, const char *const str1, const size_t size1, const char *const str2, const size_t size2)
57{
58 size_t dist1[std::numeric_limits<unsigned char>::max() + 1] = { 0 };
59 for (size_t index1 = 1; index1 <= size1; ++index1) {
60 size_t dist2 = 0;
61 for (size_t index2 = 1; index2 <= size2; ++index2) {
62 const size_t substitution((str1[index1 - 1] == str2[index2 - 1]) ? 0 : 1);
63 const size_t transposition1(dist1[static_cast<unsigned char>(str2[index2 - 1])]);
64 const size_t transposition2(dist2);
65 if (!substitution) {
66 dist2 = index2;
67 }
68 // clang-format off
69 distanceArray.at(index1 + 1, index2 + 1) = CppUtilities::min(
70 distanceArray.at(index1, index2) + substitution, // substitution
71 distanceArray.at(index1 + 1, index2) + 1, // insertion
72 distanceArray.at(index1, index2 + 1) + 1, // deletion
73 distanceArray.at(transposition1, transposition2) + (index1 - transposition1 - 1) + 1 + (index2 - transposition2 - 1) // transposition
74 );
75 // clang-format on
76 }
77 dist1[static_cast<int>(str1[index1 - 1])] = index1;
78 }
79 return distanceArray.at(size1 + 1, size2 + 1);
80}
81
83template <typename DistanceArray>
84size_t performDamerauLevenshteinAlgorithmAllocatingOnHeap(
85 DistanceArray &distanceArray, const char *const str1, const size_t size1, const char *const str2, const size_t size2)
86{
87 std::vector<size_t> buffer(distanceArray.totalSize());
88 distanceArray.buffer() = buffer.data();
89 initDistanceArray(distanceArray, size1, size2);
90 return performDamerauLevenshteinAlgorithm(distanceArray, str1, size1, str2, size2);
91}
92
95template <typename DistanceArray>
96size_t performDamerauLevenshteinAlgorithmAllocatingOnStack(
97 DistanceArray &distanceArray, const char *const str1, const size_t size1, const char *const str2, const size_t size2)
98{
99 size_t buffer[128] = { 0 };
100 distanceArray.buffer() = buffer;
101 initDistanceArray(distanceArray, size1, size2);
102 return performDamerauLevenshteinAlgorithm(distanceArray, str1, size1, str2, size2);
103}
104
106
125std::size_t computeDamerauLevenshteinDistance(const char *const str1, const size_t size1, const char *const str2, const size_t size2)
126{
127 // allocate distance array
128 auto distanceArray(makeNoneOwningMultiArray<std::size_t>(size1 + 2, size2 + 2));
129 if (distanceArray.totalSize() <= 128) {
130 return performDamerauLevenshteinAlgorithmAllocatingOnStack(distanceArray, str1, size1, str2, size2);
131 } else {
132 return performDamerauLevenshteinAlgorithmAllocatingOnHeap(distanceArray, str1, size1, str2, size2);
133 }
134}
135
136} // namespace CppUtilities
Contains all utilities provides by the c++utilities library.
auto makeNoneOwningMultiArray(DimensionSizes... dimensionSizes)
Constructs a new N-dimensional array using a caller-managed buffer as underlying container....
Definition multiarray.h:185
constexpr T min(T first, T second)
Returns the smallest of the given items.
Definition math.h:88
CPP_UTILITIES_EXPORT std::size_t computeDamerauLevenshteinDistance(const char *str1, std::size_t size1, const char *str2, std::size_t size2)
STL namespace.
constexpr int i