Coverage Report

Created: 2025-06-13 06:18

/src/gdal/port/cpl_levenshtein.cpp
Line
Count
Source (jump to first uncovered line)
1
// SPDX-License-Identifier: MIT
2
// Copyright (c) 2019, Guilherme Agostinelli
3
// Ported from https://github.com/guilhermeagostinelli/levenshtein/blob/master/levenshtein.cpp
4
5
#include "cpl_levenshtein.h"
6
7
#include <algorithm>
8
#include <cstring>
9
#include <limits>
10
#include <vector>
11
12
/** Computes the Levenshtein distance between 2 words.
13
 *
14
 * If transpositionAllowed = true, then it is the Damerau-Levenshtein distance
15
 *
16
 * Return SIZE_T_MAX in case of error.
17
 */
18
size_t CPLLevenshteinDistance(const char *word1, const char *word2,
19
                              bool transpositionAllowed)
20
0
{
21
0
    const size_t size1 = strlen(word1);
22
0
    const size_t size2 = strlen(word2);
23
24
    // If one of the words has zero length, the distance is equal to the size of the other word.
25
0
    if (size1 == 0)
26
0
        return size2;
27
0
    if (size2 == 0)
28
0
        return size1;
29
30
    // Would cause enormous amount of memory to be allocated
31
0
    if (size1 >= 32768 || size2 >= 32768)
32
0
    {
33
0
        return strcmp(word1, word2) == 0 ? 0
34
0
                                         : std::numeric_limits<size_t>::max();
35
0
    }
36
37
    // Verification matrix i.e. 2D array which will store the calculated distance.
38
0
    const size_t dimFastSize = size2 + 1;
39
0
    std::vector<unsigned short> verif;
40
0
    try
41
0
    {
42
0
        verif.resize((size1 + 1) * dimFastSize);
43
0
    }
44
0
    catch (const std::exception &)
45
0
    {
46
0
        return std::numeric_limits<size_t>::max();
47
0
    }
48
49
0
    const auto verifVal = [&verif, dimFastSize](size_t i,
50
0
                                                size_t j) -> unsigned short &
51
0
    { return verif[i * dimFastSize + j]; };
52
53
    // Sets the first row and the first column of the verification matrix with the numerical order from 0 to the length of each word.
54
0
    for (size_t i = 0; i <= size1; i++)
55
0
        verifVal(i, 0) = static_cast<unsigned short>(i);
56
0
    for (size_t j = 0; j <= size2; j++)
57
0
        verifVal(0, j) = static_cast<unsigned short>(j);
58
59
    // Verification step / matrix filling.
60
0
    for (size_t i = 1; i <= size1; i++)
61
0
    {
62
0
        for (size_t j = 1; j <= size2; j++)
63
0
        {
64
            // Sets the modification cost.
65
            // 0 means no modification (i.e. equal letters) and 1 means that a modification is needed (i.e. unequal letters).
66
0
            const int cost = (word2[j - 1] == word1[i - 1]) ? 0 : 1;
67
68
            // Sets the current position of the matrix as the minimum value between a (deletion), b (insertion) and c (substitution).
69
            // a = the upper adjacent value plus 1: verif[i - 1][j] + 1
70
            // b = the left adjacent value plus 1: verif[i][j - 1] + 1
71
            // c = the upper left adjacent value plus the modification cost: verif[i - 1][j - 1] + cost
72
0
            verifVal(i, j) = std::min(
73
0
                std::min(static_cast<unsigned short>(verifVal(i - 1, j) + 1),
74
0
                         static_cast<unsigned short>(verifVal(i, j - 1) + 1)),
75
0
                static_cast<unsigned short>(verifVal(i - 1, j - 1) + cost));
76
77
            // Cf https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance#Optimal_string_alignment_distance
78
            // (note that in the Wikipedia page, a[] and b[] are indexed from 1...
79
0
            if (transpositionAllowed && i > 1 && j > 1 &&
80
0
                word1[i - 1] == word2[j - 2] && word1[i - 2] == word2[j - 1])
81
0
            {
82
0
                verifVal(i, j) = std::min(
83
0
                    verifVal(i, j),
84
0
                    static_cast<unsigned short>(verifVal(i - 2, j - 2) + 1));
85
0
            }
86
0
        }
87
0
    }
88
89
    // The last position of the matrix will contain the Levenshtein distance.
90
0
    return verifVal(size1, size2);
91
0
}