Finding the string in the sorted array of strings with empty strings

Problem

Given a sorted array of strings that is interspersed with empty strings, write a method to find the location of a given string.

Example

input: find "ball" in  {"at", "", "", "","ball", "", "", "car", "", "". "dad", "",""}

Output: 4


Answer


Binary Search approach, O(N)


This question is close to the classic binary search problem except for the fact that there are empty strings, which fail the binary search.

Basically, we start with the middle of the array and see if the string in the middle, M is the same as the target string.

If not, we check if a string M is earlier or later than the target alphabetically. 
If M is earlier, we search the right side of the array by halving the search space.
Otherwise, we search the left side of the array, which also halves the search space. 

The problem arise, when M is an empty string. When the empty string is encountered, we should search both sides of empty strings since we can not determine which side to search. In this case, we can't halve the search space.

Due to the empty string case, the overall running time can not be O(logN).

However, the running time will be less than O(N) in practice.
Here is the complete code in C++.

#include<vector>
#include<string>
#include<iostream>
using namespace std;
bool compare(string prev, string next)
{
int l = 0, r = 0;
while(l < prev.length() && r < next.length())
{
if (prev[l] != next[r])
return prev[l] < next[r];
l++;
r++;
}
if (l < prev.length())
return prev[l] < next[r-1];
else if (r< next.length())
return prev[l-1] < next[r];
return prev[l-1] < next[r-1];
}
int search(vector<string>& input, string& target, int s, int e)
{
if (s > e)
return INT_MIN;
int h = s+ (e-s)/2;
int found = INT_MIN;
if ( input[h].length() == 0)
{
found = search(input, target, s, h-1);
if (found == INT_MIN)
found = search(input, target, h+1, e);
} else {
if (input[h] == target)
found = h;
else if(compare(input[h], target))
found = search(input, target, h+1, e);
else
found = search(input, target, s, h-1);
}
return found;
}
int main()
{
vector<string> input;
input.push_back("at");
input.push_back("");
input.push_back("");
input.push_back("");
input.push_back("ball");
input.push_back("");
input.push_back("");
input.push_back("car");
input.push_back("");
input.push_back("");
input.push_back("dad");
input.push_back("");
input.push_back("");
string target = "ball";
int found = search(input, target, 0, input.size()-1);
cout<<"output = "<<found<<endl;
return 1;
}



Here is the python version of a solution.

class SearchArray:
def __init__(self, input):
self.list = input
def search_list(self, start, end, target):
if (start > end):
return -1
half = start + (end - start)/2
value = self.list[half]
if value == target:
return half
elif value == "":
left = self.search_list(start, half -1, target)
right = self.search_list(half + 1, end, target)
return max(left, right)
elif target > value:
return self.search_list(half+1, end, target)
else:
return self.search_list(start, half -1 , target)
def search(self, target):
start = 0
end = len(self.list) -1
return self.search_list(start, end, target)
input = ["at", "", "", "","ball", "", "", "car", "", "", "dad", "",""]
client = SearchArray(input)
print('at is at %d' % client.search('at'))
print('ball is at %d' % client.search('ball'))
print('char is at %d' %client.search('car'))
print('dad is at %d' % client.search('dad'))
print('dad is at %d' % client.search('test'))

UPDATE(2022-06-13): Solved the problem again with the binary search.

class Search:
def __init__(self, input):
self.list = input
def binary_search(self, min, max, target):
if min > max:
return None
half = (min + max) // 2
if self.list[half] == target:
return half
elif self.list[half] == "":
# search both side
found = self.binary_search(min, half -1, target)
if found == None:
found = self.binary_search(half + 1, max, target)
return found
elif self.list[half] > target:
return self.binary_search(min, half -1, target)
return self.binary_search(half + 1, max, target)
def search(self, target):
min = 0
max = len(self.list) - 1
return self.binary_search(min, max, target)
# test
input = ["at", "", "", "","ball", "", "", "car", "", "", "dad", "",""]
finder = Search(input)
target = "ball"
found = finder.search(target)
print ("pos for {} = {}".format(target, found))
target = "at"
found = finder.search(target)
print ("pos for {} = {}".format(target, found))
target = "dad"
found = finder.search(target)
print ("pos for {} = {}".format(target, found))
target = "max"
found = finder.search(target)
print ("pos for {} = {}".format(target, found))

5:30: for algorithm
9:56: coding

Comments

Popular posts from this blog

Planting flowers with no adjacent flower plots

Find the shorted path from the vertex 0 for given list of vertices.