Finding m missing numbers in the Array of size (n - m) [reviewed]
Problem
Array of size (n - m) with numbers from 1..n with m of them missing. Find all of the missing numbers in O(log n). Array is sorted.
example:
n = 8
m = 2
arr = = {1,2,4,5,6,8}
output = {3,7}
n = 8
m = 2
arr = = {2,3,4,5,6,7}
output = {1, 8}
As requested in the question, our algorithm should run in O(logN). It is lucky to know the required running time since it gives an idea of which algorithm you can choose to solve the question.
In many cases, no running time requirement is given. Therefore, keep in mind that we should aim for the fastest possible.
Given O(logN) running time requirement, we can easily think of using binary search. Problem is how we can use it and it is enough to solve the problem. In fact, we need to consider the following three cases.
As shown Figure 1, if numbers are missing in the left side of array, the index, i of the element in the beginning of array is not equal to A[0]−1. It is an easy case to handle, we just need to add missing number from 0 to A[0]-1. (running time O(M),M<N)
UPDATE(2019-07-26): There is typo in Figure 2, it should be 5 instead of 7 in the given array.
As shown Figure 2, if numbers are missing in the right side of array, the index, i of the element in the end of array is not smaller than to N. It is also an easy case to handle, we just need to add missing number from A[0]+1 to N. (running time O(M),M<N)
This case is a tricky case that we need to put most of our efforts. To achieve O(logN) running time, we need to use binary search to reduce the searching space by half. Here is the rough algorithm.
In this algorithm, we only ignore the half of the search space, if there is no missing numbers in the range, which means all integers in the range are consecutive.
As requested in the question, our algorithm should run in O(logN). It is lucky to know the required running time since it gives an idea of which algorithm you can choose to solve the question.
In many cases, no running time requirement is given. Therefore, keep in mind that we should aim for the fastest possible.
Given O(logN) running time requirement, we can easily think of using binary search. Problem is how we can use it and it is enough to solve the problem. In fact, we need to consider the following three cases.
Case 1) Left side of the given array has missing numbers
![]() |
Figure 1. Missing numbers in the left side of array |
As shown Figure 1, if numbers are missing in the left side of array, the index, i of the element in the beginning of array is not equal to A[0]−1. It is an easy case to handle, we just need to add missing number from 0 to A[0]-1. (running time O(M),M<N)
Case 2) Right side of the given array has missing numbers
![]() |
Figure 2. the right size of array has missing number. |
UPDATE(2019-07-26): There is typo in Figure 2, it should be 5 instead of 7 in the given array.
As shown Figure 2, if numbers are missing in the right side of array, the index, i of the element in the end of array is not smaller than to N. It is also an easy case to handle, we just need to add missing number from A[0]+1 to N. (running time O(M),M<N)
Case 3) Numbers are missing among integers in the array
![]() |
Figure 3. Numbers are missing among integers in the array. |
This case is a tricky case that we need to put most of our efforts. To achieve O(logN) running time, we need to use binary search to reduce the searching space by half. Here is the rough algorithm.
Search(s, e, A, m), s = start index, e = end index, A = input array, m = # of missing number { if (s>e or m = missing.size()) return if e-s == 1 // only two elements are left to search Add any missing numbers between A[s] and A[e] return h = middle of s and e if A[h] - A[s] != h -s // numbers are missing between s and h search(s, h, A, m) // search between s and h if A[e] - A[h] != e -h // numbers are missing between h and e search(h, e, A, m) // search between h and e }
In this algorithm, we only ignore the half of the search space, if there is no missing numbers in the range, which means all integers in the range are consecutive.
The running time of this algorithm is O(logN)
Based on the assumption that M is trivial compare the N, the total running time of handing all three cases stated above is as below: O(M)+O(logN)+O(M)≈O(logN)
Here is the complete code running in O(logN)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include<vector> | |
#include<iostream> | |
using namespace std; | |
void search_middle(int s, int e, vector<int> A, vector<int>& missing, int m) | |
{ | |
if (s > e || missing.size() == m) | |
return; | |
if (e-s == 1) | |
{ | |
for (int i = A[s]+1; i < A[e]; i++) | |
missing.push_back(i); | |
return; | |
} | |
int h = s + (e-s)/2; | |
if (A[h] - A[s] != h-s) | |
search_middle(s, h, A, missing, m); | |
if (A[e] - A[h] != e-h) | |
search_middle(h, e, A, missing, m); | |
} | |
vector<int> find_missing_numbers(vector<int>& A, int n, int m) | |
{ | |
vector<int> result; | |
int i; | |
if (A[0] > 1) //missing left of array | |
{ | |
for (i = 1; i < A[0]; i++) | |
result.push_back(i); | |
} | |
if (A[A.size()-1] < n) //missing right of array | |
{ | |
for (i = A[A.size()-1]+1; i<=n; i++) | |
result.push_back(i); | |
} | |
search_middle(0, A.size()-1, A, result, m); | |
return result; | |
} | |
int main() | |
{ | |
int i = 0; | |
vector<int> input; | |
input.push_back(1); | |
input.push_back(2); | |
input.push_back(4); | |
input.push_back(5); | |
input.push_back(6); | |
input.push_back(8); | |
vector<int>result = find_missing_numbers(input, 8, 2); | |
cout<<"result 1 = { "; | |
for (i = 0; i < result.size(); i++) | |
cout<<" "<< result[i] << ", "; | |
cout<<" }"<<endl; | |
vector<int> input2; | |
input2.push_back(2); | |
input2.push_back(3); | |
input2.push_back(4); | |
input2.push_back(5); | |
input2.push_back(6); | |
input2.push_back(7); | |
result = find_missing_numbers(input2, 8, 2); | |
cout<<"result 2 = { "; | |
for (i = 0; i < result.size(); i++) | |
cout<<" "<< result[i] << ", "; | |
cout<<"}"<<endl; | |
return 1; | |
} |
Practice statistics
22:17 : to think up the algorithm and hand-wrote the algorithm (still did not consider the corner cases)
22:50: to write the code in the notepad and rethink the algorithm to consider the corner cases.
6:09: to add the test cases to main()
3:34: to fix up the typos through IDE.
Total time: 54:42 (too long)
UPDATE(2019-07-26): solve this problem again. Could not come up with a complete algorithm in 10 mins.
Here is a solution in python.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class FindMissingNumbers: | |
def __init__(self): | |
self.missing = [] | |
def find_missing_numbers(self, list, N, M): | |
self.missing = [] | |
#check if there is any preceding missing numbers | |
if list[0] > 1: | |
# there are missing numbers. add 1 to list[0] - 1 to missing | |
for i in range(1, list[0]): | |
self.missing.append(i) | |
#check if there are any trailing missing numbers | |
if N - list[-1] > 0: | |
# add missing number between list[-1] and N | |
for i in range(list[-1]+1, N + 1): | |
self.missing.append(i) | |
#now check for missing numbers in the middle via binary search. | |
s = 0 | |
e = len(list) - 1 | |
self.binary_search(list, s, e, M) | |
return self.missing | |
def binary_search(self, list, s, e, M): | |
if s >= e or len(self.missing) == M: | |
return | |
# there is missig number between s and e | |
if e - s == 1 and e - s != list[e] - list[s]: | |
# add all numbers missing between list[s] and list[e] | |
for i in range(list[s] + 1, list[e]): | |
self.missing.append(i) | |
return | |
half = s + (e -s)//2 | |
# if numbers are missing between s and half | |
if half - s != list[half] - list[s]: | |
self.binary_search(list, s, half, M) | |
# if numbrers are missing between half and e | |
if e - half != list[e] - list[half]: | |
self.binary_search(list, half, e, M) | |
f = FindMissingNumbers() | |
input = [1,2,4,5,6,8] | |
M = 2 | |
N = 8 | |
print ("input = {} N = {} M = {} output = {}".format(input, N, M, f.find_missing_numbers(input, N, M))) | |
input = [2,3,4,5,6,7] | |
M = 2 | |
N = 8 | |
print ("input = {} N = {} M = {} output = {}".format(input, N, M, f.find_missing_numbers(input, N, M))) | |
Practice statistics:
10:00: could not come up with solution.
15:00: to write up the solution.
5:00: to review code and fix two bugs (through running)
- condition to determine missing trailing numbers were wrong
- test code was putting wrong input variable.
Comments
Post a Comment