Finding the string in the sorted array of strings with empty strings
Problem
Given a sorted array of strings that is interspersed with empty strings, write a method to find the location of a given string.
Example
input: find "ball" in {"at", "", "", "","ball", "", "", "car", "", "". "dad", "",""}
Output: 4
Answer
Binary Search approach, O(N)
This question is close to the classic binary search problem except for the fact that there are empty strings, which fail the binary search.
Basically, we start with the middle of the array and see if the string in the middle, M is the same as the target string.
If not, we check if a string M is earlier or later than the target alphabetically.
If M is earlier, we search the right side of the array by halving the search space.
Otherwise, we search the left side of the array, which also halves the search space.
The problem arise, when M is an empty string. When the empty string is encountered, we should search both sides of empty strings since we can not determine which side to search. In this case, we can't halve the search space.
Due to the empty string case, the overall running time can not be O(logN).
However, the running time will be less than O(N) in practice.
Here is the complete code in C++.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include<vector> | |
#include<string> | |
#include<iostream> | |
using namespace std; | |
bool compare(string prev, string next) | |
{ | |
int l = 0, r = 0; | |
while(l < prev.length() && r < next.length()) | |
{ | |
if (prev[l] != next[r]) | |
return prev[l] < next[r]; | |
l++; | |
r++; | |
} | |
if (l < prev.length()) | |
return prev[l] < next[r-1]; | |
else if (r< next.length()) | |
return prev[l-1] < next[r]; | |
return prev[l-1] < next[r-1]; | |
} | |
int search(vector<string>& input, string& target, int s, int e) | |
{ | |
if (s > e) | |
return INT_MIN; | |
int h = s+ (e-s)/2; | |
int found = INT_MIN; | |
if ( input[h].length() == 0) | |
{ | |
found = search(input, target, s, h-1); | |
if (found == INT_MIN) | |
found = search(input, target, h+1, e); | |
} else { | |
if (input[h] == target) | |
found = h; | |
else if(compare(input[h], target)) | |
found = search(input, target, h+1, e); | |
else | |
found = search(input, target, s, h-1); | |
} | |
return found; | |
} | |
int main() | |
{ | |
vector<string> input; | |
input.push_back("at"); | |
input.push_back(""); | |
input.push_back(""); | |
input.push_back(""); | |
input.push_back("ball"); | |
input.push_back(""); | |
input.push_back(""); | |
input.push_back("car"); | |
input.push_back(""); | |
input.push_back(""); | |
input.push_back("dad"); | |
input.push_back(""); | |
input.push_back(""); | |
string target = "ball"; | |
int found = search(input, target, 0, input.size()-1); | |
cout<<"output = "<<found<<endl; | |
return 1; | |
} |
Here is the python version of a solution.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class SearchArray: | |
def __init__(self, input): | |
self.list = input | |
def search_list(self, start, end, target): | |
if (start > end): | |
return -1 | |
half = start + (end - start)/2 | |
value = self.list[half] | |
if value == target: | |
return half | |
elif value == "": | |
left = self.search_list(start, half -1, target) | |
right = self.search_list(half + 1, end, target) | |
return max(left, right) | |
elif target > value: | |
return self.search_list(half+1, end, target) | |
else: | |
return self.search_list(start, half -1 , target) | |
def search(self, target): | |
start = 0 | |
end = len(self.list) -1 | |
return self.search_list(start, end, target) | |
input = ["at", "", "", "","ball", "", "", "car", "", "", "dad", "",""] | |
client = SearchArray(input) | |
print('at is at %d' % client.search('at')) | |
print('ball is at %d' % client.search('ball')) | |
print('char is at %d' %client.search('car')) | |
print('dad is at %d' % client.search('dad')) | |
print('dad is at %d' % client.search('test')) |
UPDATE(2022-06-13): Solved the problem again with the binary search.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Search: | |
def __init__(self, input): | |
self.list = input | |
def binary_search(self, min, max, target): | |
if min > max: | |
return None | |
half = (min + max) // 2 | |
if self.list[half] == target: | |
return half | |
elif self.list[half] == "": | |
# search both side | |
found = self.binary_search(min, half -1, target) | |
if found == None: | |
found = self.binary_search(half + 1, max, target) | |
return found | |
elif self.list[half] > target: | |
return self.binary_search(min, half -1, target) | |
return self.binary_search(half + 1, max, target) | |
def search(self, target): | |
min = 0 | |
max = len(self.list) - 1 | |
return self.binary_search(min, max, target) | |
# test | |
input = ["at", "", "", "","ball", "", "", "car", "", "", "dad", "",""] | |
finder = Search(input) | |
target = "ball" | |
found = finder.search(target) | |
print ("pos for {} = {}".format(target, found)) | |
target = "at" | |
found = finder.search(target) | |
print ("pos for {} = {}".format(target, found)) | |
target = "dad" | |
found = finder.search(target) | |
print ("pos for {} = {}".format(target, found)) | |
target = "max" | |
found = finder.search(target) | |
print ("pos for {} = {}".format(target, found)) |
5:30: for algorithm
9:56: coding
Comments
Post a Comment