Code Implementation of Levenshtein Distance

In the previous article, I mentioned that when doing fuzzy matching, Levenshtein distance was selected as the reference value. At the time, I directly used the Python library to implement it, which did not run fast in actual use.

In recent days, I have stayed at home and took the time to write several implementations of Levenshtein Distance in Kotlin.

What is Levenshtein Distance

See wikipedia to get more information.

In information theory, linguistics and computer science, the Levenshtein distance is a string metric for measuring the difference between two sequences. Informally, the Levenshtein distance between two words is the minimum number of single-character edits (insertions, deletions or substitutions) required to change one word into the other. It is named after the Soviet mathematician Vladimir Levenshtein, who considered this distance in 1965.

Levenshtein distance may also be referred to as edit distance, although that term may also denote a larger family of distance metrics. It is closely related to pairwise string alignments.

Implementation

Recursive

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
class LevenshteinRecursiveUtils {

companion object {

@Throws(InvalidParameterException::class)
fun compare(originString: String?, targetString: String?): Int {
if (originString == null) {
throw InvalidParameterException("Origin string should not be null! ")
}
if (targetString == null) {
throw InvalidParameterException("Target string should not be null! ")
}
if (originString.isEmpty()) {
return targetString.length
}
if (targetString.isEmpty()) {
return originString.length
}

return compareWithPositionRecursive(
originString,
targetString,
originString.length - 1,
targetString.length - 1
)
}

private fun compareWithPositionRecursive(
originString: String,
targetString: String,
positionOfOriginString: Int,
positionOfTargetString: Int
): Int {
if (positionOfOriginString < 0) {
return max(positionOfTargetString + 1, 0)
}
if (positionOfTargetString < 0) {
return max(positionOfOriginString + 1, 0)
}

if (originString[positionOfOriginString] == targetString[positionOfTargetString]) {
return compareWithPositionRecursive(
originString,
targetString,
positionOfOriginString - 1,
positionOfTargetString - 1
)
}

return 1 + minOf(
compareWithPositionRecursive(
originString,
targetString,
positionOfOriginString,
positionOfTargetString - 1
),
compareWithPositionRecursive(
originString,
targetString,
positionOfOriginString - 1,
positionOfTargetString
),
compareWithPositionRecursive(
originString,
targetString,
positionOfOriginString - 1,
positionOfTargetString - 1
)
)
}
}
}

Recursive With Memo

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
class LevenshteinRecursiveWithMemoUtils {

companion object {

@Throws(InvalidParameterException::class)
fun compare(originString: String?, targetString: String?): Int {

if (originString == null) {
throw Exception("Origin string should not be null! ")
}
if (targetString == null) {
throw Exception("Target string should not be null! ")
}
if (originString.isEmpty()) {
return targetString.length
}
if (targetString.isEmpty()) {
return originString.length
}
val memoWithPositionList = Array(originString.length) { IntArray(targetString.length) { -1 } }

return compareWithPositionWithMemo(
originString,
targetString,
originString.length - 1,
targetString.length - 1,
memoWithPositionList
)
}

private fun compareWithPositionWithMemo(
originString: String,
targetString: String,
positionOfOriginString: Int,
positionOfTargetString: Int,
memoWithPositionList: Array<IntArray>
): Int {

if (positionOfOriginString < 0) {
return max(positionOfTargetString + 1, 0)
}
if (positionOfTargetString < 0) {
return max(positionOfOriginString + 1, 0)
}

val referencePoint = memoWithPositionList[positionOfOriginString][positionOfTargetString]
if (-1 != referencePoint) {
return referencePoint
}

if (originString[positionOfOriginString] == targetString[positionOfTargetString]) {
return compareWithPositionWithMemo(
originString,
targetString,
positionOfOriginString - 1,
positionOfTargetString - 1,
memoWithPositionList
)
}

memoWithPositionList[positionOfOriginString][positionOfTargetString] = 1 + minOf(
compareWithPositionWithMemo(
originString,
targetString,
positionOfOriginString,
positionOfTargetString - 1,
memoWithPositionList
),
compareWithPositionWithMemo(
originString,
targetString,
positionOfOriginString - 1,
positionOfTargetString,
memoWithPositionList
),
compareWithPositionWithMemo(
originString,
targetString,
positionOfOriginString - 1,
positionOfTargetString - 1,
memoWithPositionList
)
)

return memoWithPositionList[positionOfOriginString][positionOfTargetString]
}
}
}

Dynamic Programming

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
class LevenshteinDpUtils {

companion object {

@Throws(InvalidParameterException::class)
fun compare(originString: String?, targetString: String?): Int {
if (originString == null) {
throw InvalidParameterException("Origin string should not be null! ")
}
if (targetString == null) {
throw InvalidParameterException("Target string should not be null! ")
}
if (originString.isEmpty()) {
return targetString.length
}
if (targetString.isEmpty()) {
return originString.length
}

val memoWithPositionList = Array(originString.length) { IntArray(targetString.length) { 0 } }

for (indexOfOriginString in originString.indices) {
for (indexOfTargetString in targetString.indices) {
if (0 == indexOfOriginString) {
memoWithPositionList[indexOfOriginString][indexOfTargetString] =
indexOfTargetString + if (originString[indexOfOriginString] == targetString[indexOfTargetString]) 0 else 1
continue
}
if (0 == indexOfTargetString) {
memoWithPositionList[indexOfOriginString][indexOfTargetString] =
indexOfOriginString + if (originString[indexOfOriginString] == targetString[indexOfTargetString]) 0 else 1
continue
}
if (originString[indexOfOriginString] == targetString[indexOfTargetString]) {
memoWithPositionList[indexOfOriginString][indexOfTargetString] =
memoWithPositionList[indexOfOriginString - 1][indexOfTargetString - 1]
continue
}
memoWithPositionList[indexOfOriginString][indexOfTargetString] = 1 + minOf(
memoWithPositionList[indexOfOriginString][indexOfTargetString - 1],
memoWithPositionList[indexOfOriginString - 1][indexOfTargetString],
memoWithPositionList[indexOfOriginString - 1][indexOfTargetString - 1]
)
}
}
return memoWithPositionList[originString.length - 1][targetString.length - 1]
}
}
}