Finding editing distance
Problem
Given two strings, return boolean True/False, if they are only one edit apart.
Edit can be insert/delete/update of only one character in the string.
Eg.:
True:
xyz, xz
xyz, xyk
xy, xyz
False:
xyz, xyz
xyz, xzy
x, xyz
In comparing two strings there are two cases to be considered.
Answer
Case 1: two string are in the same length
In this case, we can compare each letter in the strings sequentially and record any mismatches during the comparison. The number of matching counts should be Length(input)−1.
In comparing two strings there are two cases to be considered.
Answer
Case 1: two string are in the same length
In this case, we can compare each letter in the strings sequentially and record any mismatches during the comparison. The number of matching counts should be Length(input)−1.
Case 2: two string are not in the same length (UPDATE 2022-06-14: removed the subcase)
In this case, we just examine the letters sequentially. When a mismatch happens, we record the mismatch and only increase the next position of the longer string leaving the position of the short string as it is. It is to skip the mismatching letter in the longer string to find the matching one coming next.
The overall running time of this algorithm is O(N).
Here is the complete code in C++.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <string> | |
#include <stdlib.h> | |
#include <iostream> | |
using namespace std; | |
bool is_one_edit_distance(string s1, string s2) | |
{ | |
string left = s1, right = s2; | |
int mismatch = 0; | |
int lpos =0, rpos =0; | |
int llen, rlen; | |
if (abs((int)(s1.length() - s2.length())) >1) | |
return false; | |
if(s1== s2) | |
return false; | |
if (s1.length() > s2.length()) | |
{ | |
left = s2; | |
right = s1; | |
} | |
llen = left.length(); | |
rlen = right.length(); | |
while(lpos < llen && rpos <rlen) | |
{ | |
if (left[lpos] != right[rpos]) | |
{ | |
if (llen == rlen) | |
lpos++; | |
mismatch++; | |
rpos++; | |
} else { | |
lpos++; | |
rpos++; | |
} | |
} | |
return (mismatch==1) || (mismatch == 0 && lpos == llen && rpos < rlen); | |
} | |
int main() | |
{ | |
cout <<" xyz, xz = " << is_one_edit_distance("xyz", "xz") <<endl; | |
cout <<" xyz, xyk = " << is_one_edit_distance("xyz", "xyk")<<endl; | |
cout <<" xy, xyz = " << is_one_edit_distance("xy", "xyz")<<endl; | |
cout <<" xyz, xyz = " << is_one_edit_distance("xyz", "xyz")<<endl; | |
cout <<" xyz, xzy = " << is_one_edit_distance("xyz", "xzy")<<endl; | |
cout <<" x, xyz = " << is_one_edit_distance("x", "xyz")<<endl; | |
return 1; | |
} |
Practice statistics:
Planning: 8.47 mins
Coding : 8.55 mins
Validation: 8.11 mins
Note: Made a couple of typos found by IDE and made a critical mistake in logic.
UPDATE(2022-06-14): Solve the problem again and found that the original algorithm has the necessary sub-case. Simplified the algorithm into two cases.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# code | |
def compare_equal_length(left, right): | |
lpos = 0 | |
rpos = 0 | |
matching = 0 | |
while lpos < len(left): | |
if left[lpos] == right[rpos]: | |
matching = matching + 1 | |
lpos = lpos + 1 | |
rpos = rpos + 1 | |
# only one char is mismatching | |
return matching == len(left) - 1 | |
def compare_one_smaller(left, right): # longer = xyz shorter = xz | |
longer = left if len(left) > len(right) else right | |
shorter = left if len(left) != len(longer) else right | |
allowed_mismatch = 1 | |
matching = 0 | |
lpos = 0 | |
spos = 0 | |
while lpos < len(longer) and spos < len(shorter): # lpos = 1 spos =1 matching = 1 allowed_mismatch = 1 | |
if longer[lpos] == shorter[spos]: # x == x | |
matching = matching + 1 | |
lpos = lpos + 1 | |
spos = spos + 1 | |
else: | |
if allowed_mismatch > 0: | |
# move to next char in longer | |
lpos = lpos + 1 | |
allowed_mismatch = allowed_mismatch - 1 | |
else: # more than 1 chars are mismatching | |
return False | |
return matching == len(longer) - 1 | |
def find_editing_distance(left, right): | |
if len(left) == len(right): | |
# case 1 | |
return compare_equal_length(left, right) | |
elif abs(len(left) - len(right)) == 1: | |
# case 2 | |
return compare_one_smaller(left, right) | |
return False | |
# test | |
left = "xyz" | |
right = "xz" | |
result = find_editing_distance(left, right) | |
print ("{}, {}: editable = {}".format(left, right, result)) | |
left = "xyz" | |
right = "xyk" | |
result = find_editing_distance(left, right) | |
print ("{}, {}: editable = {}".format(left, right, result)) | |
left = "xy" | |
right = "xyz" | |
result = find_editing_distance(left, right) | |
print ("{}, {}: editable = {}".format(left, right, result)) | |
left = "xyz" | |
right = "xyz" | |
result = find_editing_distance(left, right) | |
print ("{}, {}: editable = {}".format(left, right, result)) | |
left = "xyz" | |
right = "xzy" | |
result = find_editing_distance(left, right) | |
print ("{}, {}: editable = {}".format(left, right, result)) | |
left = "x" | |
right = "xyz" | |
result = find_editing_distance(left, right) | |
print ("{}, {}: editable = {}".format(left, right, result)) |
8:00: for algorithm
12:00: coding
08:42: for fixing the silly but critical mistake.
Comments
Post a Comment