Produce the non-repeating string from a given string [updated]
Problem
Rearrange characters in a string so that no character repeats twice.
Input: abcccddaaabc
output: cacacbadcdab
First all, given string "abcccddaaabc", we need to understand that we first need to find the longest repeat string, which are "aaaa" or "cccc".
After that we need to put shorter repeated characters between longest string, so that we can produce the non-repeated string.
One more thing we need to keep in mind is that "longest repeated" string is the string is not used to produce the resultant string yet.
This means we need to find new longest repeated string each time. Look at the example in Figure 1.
In iteration 3, the longest repeated strings changes to 'b' from 'a'.
This means we need to keep track of the current longest repeated string in each iteration, which is quite expensive.
Here is the rough algorithm.
1. Sort the input string in ascending order (descending order is ok as well.) - O(NlogN)
2. Count the occurrence of each character and put { char, occurrence count} pair into Max Heap, which put the node with largest occurrence count on the time of the tree.
3. Take two nodes from heap and put two characters into the result string avoiding the repetition.
4. Decrease the occurrence counts of two nodes by 1.
4. put back the nodes if their occurrence count is still positive.
Step 2 - 3 will take O(NlogN), O(N) for iterating over the Max Heap, and O(logN) for extracting the max node and putting back the nodes.
Overall running time is O(NlogN), where N is the length of the input string.
Here is the complete code.
UPDATE(2019-07-20): solved again and first attempt used the hashtable to record the occurrence but
tried to reconstruct the solution in round robin fashion regardless of each character's repetition count.
Problem of this approach is that it is possible that shorter chars can be left last instead of the longest char.
In the second attempt, I used the MaxHeap to keep the {char: repeat count} in descending order.
this approach ensures the longer chars are filled with shorter chars thereby we will end up with the longer chars left last.
Here is python solution:
First attempt:
51:50: failed to come up with a correct and working solution.
Second attempt:
Used a MaxHeap and took 30 mins to write the working code.
Finding the pattern
First all, given string "abcccddaaabc", we need to understand that we first need to find the longest repeat string, which are "aaaa" or "cccc".
After that we need to put shorter repeated characters between longest string, so that we can produce the non-repeated string.
One more thing we need to keep in mind is that "longest repeated" string is the string is not used to produce the resultant string yet.
This means we need to find new longest repeated string each time. Look at the example in Figure 1.
![]() |
Figure 1: Sample algorithm |
This means we need to keep track of the current longest repeated string in each iteration, which is quite expensive.
Here is the rough algorithm.
1. Sort the input string in ascending order (descending order is ok as well.) - O(NlogN)
2. Count the occurrence of each character and put { char, occurrence count} pair into Max Heap, which put the node with largest occurrence count on the time of the tree.
3. Take two nodes from heap and put two characters into the result string avoiding the repetition.
4. Decrease the occurrence counts of two nodes by 1.
4. put back the nodes if their occurrence count is still positive.
Step 2 - 3 will take O(NlogN), O(N) for iterating over the Max Heap, and O(logN) for extracting the max node and putting back the nodes.
Overall running time is O(NlogN), where N is the length of the input string.
Here is the complete code.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include<string> | |
#include<algorithm> | |
#include<iostream> | |
#include<vector> | |
#include<math.h> | |
using namespace std; | |
struct node { | |
char c; | |
int n; | |
node(char ch, int num): c(ch), n(num){} | |
}; | |
class MaxHeap{ | |
public: | |
void insert(node * n) | |
{ | |
list.push_back(n); | |
BubbleUp(list.size()-1); | |
} | |
node * extract() | |
{ | |
node * r; | |
if (list.size() ==0) | |
return 0; | |
r = list[0]; | |
swap(0, list.size()-1); | |
list.pop_back(); | |
Heapify(0); | |
return r; | |
} | |
int size() {return list.size();} | |
private: | |
int left(int p) {return 2*p+1;} | |
int right(int p) {return 2*p+2;} | |
int parent(int c) {return floor((double)(c/2));} | |
bool IsParent(int p, int c) | |
{ | |
return list[p]->n > list[c]->n; | |
} | |
void Heapify(int p) | |
{ | |
int largest = p; | |
int l = left(p); | |
int r = right(p); | |
if (l < list.size() && !IsParent(largest, l)) | |
largest = l; | |
if (r < list.size() && !IsParent(largest, r)) | |
largest = r; | |
if (p != largest) | |
{ | |
swap(p, largest); | |
Heapify(largest); | |
} | |
} | |
void BubbleUp(int c) | |
{ | |
if (c == 0) | |
return; | |
int p = parent(c); | |
if (!IsParent(p, c)) | |
{ | |
swap(p, c); | |
BubbleUp(p); | |
} | |
} | |
void swap(int s, int d) | |
{ | |
node* t = list[s]; | |
list[s] = list[d]; | |
list[d] = t; | |
} | |
vector<node *> list; | |
}; | |
string create_non_repeating_string(string input) | |
{ | |
MaxHeap heap; | |
sort(input.begin(), input.end()); | |
string result; | |
char cur = ' '; | |
int count = 0; | |
for (int i =0; i < input.length(); i++) | |
{ | |
if (cur == ' ') | |
{ | |
cur = input[i]; | |
count = 1; | |
} else if (cur == input[i]) | |
{ | |
count++; | |
} else { | |
heap.insert(new node(cur, count)); | |
cur = input[i]; | |
count = 1; | |
} | |
if (i == input.length()-1) | |
{ | |
heap.insert(new node(cur, count)); | |
} | |
} | |
while(heap.size()) | |
{ | |
char last; | |
node * first = heap.extract(); | |
node * second = heap.extract(); | |
if (!second && (first->n > 1 || last == first->c)) | |
return ""; | |
if (last == first->c) | |
{ | |
result.push_back(second->c); | |
second->n--; | |
result.push_back(first->c); | |
first->n--; | |
last= first->c; | |
} else { | |
result.push_back(first->c); | |
first->n--; | |
last= first->c; | |
if (second) | |
{ | |
result.push_back(second->c); | |
second->n--; | |
last= second->c; | |
} | |
} | |
if (first && first->n > 0) | |
heap.insert(first); | |
if (second && second->n > 0) | |
heap.insert(second); | |
} | |
return result; | |
} | |
int main() | |
{ | |
string input="abcccddaaabc"; | |
string result = create_non_repeating_string(input); | |
if ( result.length() == 0) | |
cout<<"Can't find the non-repeated string"<<endl; | |
else | |
cout<<"Non-repeated string = "<<result<<endl; | |
return 1; | |
} |
Practice statistics
13:00 to think up the algorithm
14:12 to hand-wrote the solution.
14:12: to write the solution in the IDE.
13:05: to write up the max heap code. (got the bubbleUp() method wrong).
UPDATE(2019-07-20): solved again and first attempt used the hashtable to record the occurrence but
tried to reconstruct the solution in round robin fashion regardless of each character's repetition count.
Problem of this approach is that it is possible that shorter chars can be left last instead of the longest char.
In the second attempt, I used the MaxHeap to keep the {char: repeat count} in descending order.
this approach ensures the longer chars are filled with shorter chars thereby we will end up with the longer chars left last.
Here is python solution:
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Rearrange characters in a string so that no character repeats twice. | |
Input: abcccddaaabc | |
output: cacacbadcdab | |
First attempt: | |
Easist way. read the input and put it in a hashtable with counter | |
Read over hash table in a loop and find places to place char. | |
This way had flaw since it is not guarantee that what's left was the longest one. We need to use up longer strings first. | |
51:50: to write up code. | |
First attempt was over time and partially in correct. | |
Need to use Heap to keep the longest char and fill it with second longest char. | |
''' | |
class MaxHeap: | |
def __init__(self): | |
self.list = [None] | |
def extract(self): | |
if len(self.list) <= 1: | |
return None | |
value = self.list[1] | |
self.swap(1, len(self.list) -1 ) | |
self.list.pop() | |
self.bubbleDown(1) | |
return value | |
def get_length(self): | |
return len(self.list) - 1 | |
def add(self, value): | |
self.list.append(value) | |
self.bubbleUp(len(self.list) - 1) | |
def left(self, p): | |
return 2 * p | |
def right(self, p): | |
return 2 * p + 1 | |
def parent(self, c): | |
return c//2 | |
def swap(self, l, r): | |
tmp = self.list[l] | |
self.list[l] = self.list[r] | |
self.list[r] = tmp | |
def is_child(self, p, c): | |
return self.list[p][1] > self.list[c][1] | |
def bubbleDown(self, parent): | |
if parent >= len(self.list) - 1: | |
return | |
left = self.left(parent) | |
if left > len(self.list) -1: | |
return | |
right = self.right(parent) | |
to_swap = left if right > len(self.list) -1 or self.is_child(left, right) == True else right | |
if self.is_child(parent, to_swap) != True: | |
self.swap(parent, to_swap) | |
self.bubbleDown(to_swap) | |
def bubbleUp(self, child): | |
if child <= 1: | |
return | |
parent = self.parent(child) | |
if self.is_child(parent, child)!= True: | |
self.swap(parent, child) | |
self.bubbleUp(parent) | |
def rearrage(input): | |
dic = {} | |
for c in input: | |
dic[c] = 1 if c not in dic else dic[c] + 1 | |
heap = MaxHeap() | |
for k in dic: | |
heap.add([k, dic[k]]) | |
result = [] | |
last_char = None | |
while heap.get_length() > 0: | |
first = heap.extract() | |
second = heap.extract() | |
if second == None: | |
if first[1] > 1 or first[0] == last_char: | |
return [] | |
result.append(first[0]) | |
first[1] -= 1 | |
elif last_char == first[0]: | |
result.append(second[0]) | |
second[1] -= 1 | |
else: | |
result.append(first[0]) | |
first[1] -= 1 | |
last_char = result[-1] | |
if first[1] > 0: | |
heap.add(first) | |
if second != None and second[1] > 0: | |
heap.add(second) | |
return "".join(result) | |
input = 'abcccddaaabc' | |
print("input = {}, output = {}".format(input, rearrage(input))) |
51:50: failed to come up with a correct and working solution.
Second attempt:
Used a MaxHeap and took 30 mins to write the working code.
Comments
Post a Comment