C++ Utilities 5.31.1
Useful C++ classes and routines such as argument parser, IO and conversion utilities
Loading...
Searching...
No Matches
levenshtein.cpp
Go to the documentation of this file.
1#include "./levenshtein.h"
2
3#include "./math.h"
4#include "./multiarray.h"
5
6#include <limits>
7
8using namespace std;
9
10namespace CppUtilities {
11
13
16
29void initDistanceArray(DistanceArray &distanceArray, const size_t size1, const size_t size2)
30{
31 const auto maxDistance(size1 + size2);
32 // ignore warning about null pointer dereference for now (which is *likely* not correct)
33#ifdef __GNUC__
34#pragma GCC diagnostic push
35#pragma GCC diagnostic ignored "-Wnull-dereference"
36#endif
37 distanceArray.at(0, 0) = maxDistance;
38#ifdef __GNUC__
39#pragma GCC diagnostic pop
40#endif
41 for (size_t i = 0; i <= size1; ++i) {
42 distanceArray.at(i + 1, 1) = i;
43 distanceArray.at(i + 1, 0) = maxDistance;
44 }
45 for (size_t i = 0; i <= size2; ++i) {
46 distanceArray.at(1, i + 1) = i;
47 distanceArray.at(0, i + 1) = maxDistance;
48 }
49}
50
53size_t performDamerauLevenshteinAlgorithm(
54 DistanceArray &distanceArray, const char *const str1, const size_t size1, const char *const str2, const size_t size2)
55{
56 size_t dist1[std::numeric_limits<unsigned char>::max() + 1] = { 0 };
57 for (size_t index1 = 1; index1 <= size1; ++index1) {
58 size_t dist2 = 0;
59 for (size_t index2 = 1; index2 <= size2; ++index2) {
60 const size_t substitution((str1[index1 - 1] == str2[index2 - 1]) ? 0 : 1);
61 const size_t transposition1(dist1[static_cast<unsigned char>(str2[index2 - 1])]);
62 const size_t transposition2(dist2);
63 if (!substitution) {
64 dist2 = index2;
65 }
66 // clang-format off
67 distanceArray.at(index1 + 1, index2 + 1) = CppUtilities::min(
68 distanceArray.at(index1, index2) + substitution, // substitution
69 distanceArray.at(index1 + 1, index2) + 1, // insertion
70 distanceArray.at(index1, index2 + 1) + 1, // deletion
71 distanceArray.at(transposition1, transposition2) + (index1 - transposition1 - 1) + 1 + (index2 - transposition2 - 1) // transposition
72 );
73 // clang-format on
74 }
75 dist1[static_cast<int>(str1[index1 - 1])] = index1;
76 }
77 return distanceArray.at(size1 + 1, size2 + 1);
78}
79
81template <typename DistanceArray>
82size_t performDamerauLevenshteinAlgorithmAllocatingOnHeap(
83 DistanceArray &distanceArray, const char *const str1, const size_t size1, const char *const str2, const size_t size2)
84{
85 std::vector<size_t> buffer(distanceArray.totalSize());
86 distanceArray.buffer() = buffer.data();
87 initDistanceArray(distanceArray, size1, size2);
88 return performDamerauLevenshteinAlgorithm(distanceArray, str1, size1, str2, size2);
89}
90
93template <typename DistanceArray>
94size_t performDamerauLevenshteinAlgorithmAllocatingOnStack(
95 DistanceArray &distanceArray, const char *const str1, const size_t size1, const char *const str2, const size_t size2)
96{
97 size_t buffer[128] = { 0 };
98 distanceArray.buffer() = buffer;
99 initDistanceArray(distanceArray, size1, size2);
100 return performDamerauLevenshteinAlgorithm(distanceArray, str1, size1, str2, size2);
101}
102
104
123std::size_t computeDamerauLevenshteinDistance(const char *const str1, const size_t size1, const char *const str2, const size_t size2)
124{
125 // allocate distance array
126 auto distanceArray(makeNoneOwningMultiArray<std::size_t>(size1 + 2, size2 + 2));
127 if (distanceArray.totalSize() <= 128) {
128 return performDamerauLevenshteinAlgorithmAllocatingOnStack(distanceArray, str1, size1, str2, size2);
129 } else {
130 return performDamerauLevenshteinAlgorithmAllocatingOnHeap(distanceArray, str1, size1, str2, size2);
131 }
132}
133
134} // namespace CppUtilities
The MultiArray class provides an N-dimensional array.
Definition multiarray.h:76
Contains all utilities provided by the c++utilities library.
auto makeNoneOwningMultiArray(DimensionSizes... dimensionSizes)
Constructs a new N-dimensional array using a caller-managed buffer as underlying container....
Definition multiarray.h:185
constexpr T min(T first, T second)
Returns the smallest of the given items.
Definition math.h:84
CPP_UTILITIES_EXPORT std::size_t computeDamerauLevenshteinDistance(const char *str1, std::size_t size1, const char *str2, std::size_t size2)
STL namespace.
constexpr int i