Finding the shortest path from start to end word given list of words

Given list of array and start and end word as below:
   ["hot", "hit", "his", "hat"]
Start word: "hot"
End word: "his"

Find the shortest sequence starting the from start and ending the with end word.

The rule is, from the start word, each time you can only change one character,
and only use words from the given set, find out how to get to the end word.
In the above example, the sequence is:
   hot  -> hit -> his
(hat is not used)

for this example:
start = 'his'
end = 'hit'
input = ["his", "has", "hit", "hat"]

we can find this:
his -> has -> hat -> hit.

However: his -> hit is the sorted path.




Answer

1. Brute-frce search via DFS search O(N!) (Please read the second option. Be patient)


To find the possible permutation of words starting from start and ending with end words, we need to first need to define the followings:

1. Condition to check if two words are one edit apart. 

In this question, we assume the length of words are the same. Therefore, only UPDATE will be possible action to transform one word to the other. (e.g. from his -> has. need to update i to a)

2. Ensure the path found is starting with the start word and ends with the end word.

To ensure this, re-arranging the given input such that it starts with the start word and ends with the end word.
It takes O(N) time.

3. Finding the shortest permutation.

Now, we have a list of words starting from the start and ending with the end. 

We can do the depth first search using backtracking and find the possible permutation that satisfies the requirements.

One optimization, we can make is that we only consider the permutation that starts with a start.
I did not think of this condition when I was writing the code. But it is good to bring up.

The overall running time of this algorithm is O(N!)
where N is the number of words

Here is the solution in python with the optimization.

class FindEditablePath:
def find(self, list, start, end):
#rearrange the array such that list[0] = start list[-1] = end
rearranged = [start]
for w in list:
if w != start and w != end:
rearranged.append(w)
#add end
rearranged.append(end)
self.result = None
self.start = start
self.end = end
self.dfs(rearranged, 0, [])
return self.result
def can_transform(self, before, after):
pos = 0
matched = 0
while pos < len(before):
if before[pos] == after[pos]:
matched += 1
pos += 1
return matched >= 2
def dfs(self, list, start, found):
#stop invaild path.
if len(found) > 0 and found[0] != self.start:
return
if start >= len(list) and found[0] == self.start and found[-1] == self.end:
if self.result == None or len(found) < len(self.result):
self.result = found[:]
for i in range(start, len(list)):
prev = found[-1] if len(found) > 0 else None
if prev == None or self.can_transform(prev, list[i]) == True:
found.append(list[i])
self.dfs(list, i+1, found)
found.pop()
finder = FindEditablePath()
input = ["hot", "hit", "his", "hat"]
start = 'hot'
end = 'his'
print("start = {} end = {} output ={}".format(start, end, finder.find(input, start, end)))
input = ["his", "has", "hit", "hat"]
start = 'his'
end = 'hit'
print("start = {} end = {} output ={}".format(start, end, finder.find(input, start, end)))

Practice statistic

16:07: to write up the code (two bugs found after running - exit condition and variable)
05:50: write a test case and fix two bugs.

Did not think of optimization when writing the solution

UPDATE(2021-06-21): Solve the problem again. The initial idea was to use DFS to find the shortest path. I wanted to optimize the rearranging process using the hashtable but make one mistake of using the old indices before swapping the words. This resulted in the wrong answers and it took a while to debug.

def can_move(src, dst):
if len(src) != len(dst):
return False
matching = 0
for i in range(len(src)):
if src[i] == dst[i]:
matching +=1
return matching == len(src) - 1
class FindShortest:
def __init__(self, input):
self.vertices = {}
self.input = input
for i in range(len(input)):
self.vertices[input[i]] = i
self.min_path = []
def swap(self, src, dst):
self.vertices[self.input[src]] = dst
self.vertices[self.input[dst]] = src
tmp = self.input[src]
self.input[src] = self.input[dst]
self.input[dst] = tmp
def find_shortest(self, start, end):
# move the start and end to the first and end of the list respectively.
spos = self.vertices[start]
self.swap(spos, 0)
epos = self.vertices[end]
self.swap(len(self.input) - 1, epos)
self.min_path = []
#path to collect the current path
path = [start]
self.find_path(1, path)
return self.min_path
def find_path(self, next, path):
if next >= len(self.input):
# reach the end replace the min_path if current path is shorter.
if len(self.min_path) == 0 or len(path) < len(self.min_path):
self.min_path.clear()
self.min_path += path
for i in range(next, len(self.input)):
# check if currnt i th is reachable from the currnet node in the path.
cur = path[-1]
if can_move(cur, self.input[i]) == True:
path.append(self.input[i])
self.find_path(i + 1, path=path)
path.pop()
# test
input = ["hot", "hit", "his", "hat"]
finder = FindShortest(input)
start = 'his'
end = 'hit'
result = finder.find_shortest(start, end)
print("shorted path from {} to {}: {}".format(start, end, result))
start = 'hot'
end = 'his'
result = finder.find_shortest(start, end)
print("shorted path from {} to {}: {}".format(start, end, result))

Still, DFS is too inefficient and

2. Shortest path search via BFS O(N2) or less


The better way is to do the BFS to find the shortest path given that the weight of the edge between one word to the other is 1.

In that case, we need an additional Array, A to keep track of the shortest path value for each vertex, Vi in A[$V_{i}]. We also need to keep track of whether each node (or word) had been explored. 

This algorithm will run in O(N) time, where N is the number of words.

One catch is that we need to first build the graph by examining each word and comparing it against other words to see if there can be an edge between two words. This process will take O(N2). However, this can be done once during the initialization.

Here is the better algorithm using BFS:

# code
class Vertex:
def __init__(self, value):
self.edges = []
self.value = value
class Edge:
def __init__(self, v, w):
self.v = v
self.w = w
def can_move(src, dst):
if len(src) != len(dst):
return False
matching = 0
for i in range(len(src)):
if src[i] == dst[i]:
matching +=1
return matching == len(src) - 1
class FindShortestBFS:
def __init__(self, input):
self.vertices = {}
for i in range(len(input)):
self.vertices[input[i]] = Vertex(input[i])
# O (N * N)
for i in input:
v = self.vertices[i]
for j in input:
if can_move(v.value, j) == True:
w = self.vertices[j]
v.edges.append(Edge(i, j))
w.edges.append(Edge(j, i))
def swap(self, src, dst):
self.vertices[self.input[src]] = dst
self.vertices[self.input[dst]] = src
tmp = self.input[src]
self.input[src] = self.input[dst]
self.input[dst] = tmp
def find_shortest(self, start, end):
# set up the shortest path array and visited array
A = {i: None for i in self.vertices}
visited = {i: False for i in self.vertices}
A[start] = 0
path = {}
# add the start node to the queue
s = self.vertices[start]
visited[start] = True
path[start] = [start]
queue = [s]
while len(queue) > 0:
cur = queue.pop(0)
for e in cur.edges:
if visited[e.w] != True:
# calculate the path value
A[e.w] = A[e.v] + 1
path[e.w] = path[e.v] + [e.w]
w = self.vertices[e.w]
queue.append(w)
visited[e.w] = True
return A[end], path[end]
# test
input = ["hot", "hit", "his", "hat"]
finder = FindShortestBFS(input)
start = 'his'
end = 'hit'
score, path = finder.find_shortest(start, end)
print("shorted path from {} to {}: {}-{}".format(start, end, score, path))
start = 'hot'
end = 'his'
result = finder.find_shortest(start, end)
score, path = finder.find_shortest(start, end)
print("shorted path from {} to {}: {}-{}".format(start, end, score, path))

Comments

Popular posts from this blog

Planting flowers with no adjacent flower plots

Find the shorted path from the vertex 0 for given list of vertices.