# Sorting and Searching

10.1 Sorted Merge: You are given two sorted arrays, A and B, where A has a large enough buffer at the end to hold B. Write a method to merge B into A in sorted order.
• Order 2n, with O(n) extra space
```c++ void sorted_merge(vector &one, vector &two) { int iter1 = 0, iter2 = 0; int count = 0; vector temp(one.size()); while (one.at(iter1) != 0 && (iter1 < one.size() || iter2 < two.size())) { if (one.at(iter1) < two.at(iter2)) temp.at(count++) = one.at(iter1++);
else if (one.at(iter1) >= two.at(iter2))
temp.at(count++) = two.at(iter2++);
}
while (one.at(iter1) != 0 && iter1 < one.size()) temp.at(count++) = one.at(iter1++); while (iter2 < two.size()) temp.at(count++) = two.at(iter2++);
for (int i = 0; i < temp.size(); i++) one.at(i) = temp.at(i); }
int main() { vector one = { 1,2,3,4 }; one.resize(10); vector two = { -1,2,3,10,6,20 };
sorted_merge(one, two);
print(one);
}
* O(N) time, O(1) Space, best as we can get
* Simpler too. We just decrement from the end, and look for the greatest elements.
```c++
void sorted_merge(vector<int> &one, vector<int> &two) {
if (two.empty()) return;
// find one end position
int iter1, iter2 = two.size() - 1;
for (int i = 0; i < one.size(); i++) {
if (one.at(i) == 0) {
iter1 = i - 1;
break;
}
}
int count = one.size() - 1;
while (iter1 >= 0 && iter1 >= 0) {
if (one.at(iter1) > two.at(iter2))
one.at(count--) = one.at(iter1--);
else if (one.at(iter1) <= two.at(iter2))
one.at(count--) = two.at(iter2--);
}
while (iter1 >= 0) one.at(count--) = one.at(iter1--);
while (iter2 >= 0) one.at(count--) = two.at(iter2--);
}
int main()
{
vector<int> one = { -3 ,-1, 4, 10 };
one.resize(8);
vector<int> two = { -10, 5,6,7 };
sorted_merge(one, two);
print(one);
one = { 1,2,3,4 };
one.resize(10);
two = { -1,2,3,10,20,30 };
sorted_merge(one, two);
print(one);
}
10.2 Group Anagrams: Write a method to sort an array of strings so that all the anagrams are next to each other.
• O(NlogN) time?, O(N) extra space
• One way of doing this is first sort the strings, and pair them into a map.
• Then we can sort the map by its second value, then return its first value.
typedef std::pair<std::string, string> p;
struct Comp {
bool operator()(const p &left, const p &right) const {
return left.first < right.second;
}
};
void group_anagrams(vector<string> &strings) {
map<string, string> word_anagram;
for (int i = 0; i < strings.size(); i++) {
string cpy = strings.at(i);
sort(strings.at(i).begin(), strings.at(i).end());
word_anagram.insert( { { cpy }, { strings.at(i) } });
}
// dump into vector in order to make it sortable
vector<pair<string, string>> cpy(word_anagram.begin(), word_anagram.end());
sort(cpy.begin(), cpy.end(), Comp());
int count = 0;
for (auto &i : cpy) {
strings.at(count++) = i.first;
}
}
int main() {
vector<string> words = {"could", "cloud", "dance", "caned", "own", "now", "clams", "calms", "wings", "swing", "a" };
group_anagrams(words);
print(words);
}
10.3 Search in Rotated Array: Given a sorted array of n integers that has been rotated an unknown number of times, write code to find an element in the array. You may assume that the array was originally sorted in increasing order.
EXAMPLE
Input:find 5 in { 15, 16, 19, 20, 25, 1, 3, 4, 5, 7,10, 14 }
Output: 8 (the index of 5 in the array)
• O(logN) time, with O(N) extra space (recursive)
• Algorithm: Find focal point in logn (minimum), preform binary search of left or right half.
// finding min element in O(logN)
int find_focal(vector<int> &ar, int low, int high) {
// not rotated
if (ar.begin() < ar.end()[-1]) return 0;
else {
int middle = low + ((high - low) / 2);
// min element = next element is smaller || prev element is larger
if (ar.at(middle + 1) < ar.at(middle))
return middle + 1;
if (ar.at(middle - 1) > ar.at(middle))
return middle;
// search right
else if (ar.at(low) < ar.at(middle)) {
find_focal(ar, middle + 1, high);
}
else {
find_focal(ar, low, middle - 1);
}
}
}
int binary_search(vector<int> &ar, int low, int high, int target) {
if (high < low) return INT_MIN;
int middle = low + ((high - low) / 2);
if (ar.at(middle) == target) return middle;
// search right
else if (ar.at(middle) < target) {
binary_search(ar, middle + 1, high, target);
}
else {
binary_search(ar, low, middle - 1, target);
}
}
int find_rotated(vector<int> &ar, int findthis) {
if (ar.empty()) return INT_MIN;
// 1) Find focal point (min element)
int focal = find_focal(ar, 0, ar.size() - 1);
if (ar.at(focal) == findthis) return focal;
// search right
if (findthis > ar.at(focal) && findthis <= ar.end()[-1]) {
return binary_search(ar, focal + 1, ar.size() - 1, findthis);
}
// search left
else {
return binary_search(ar, 0, focal - 1, findthis);
}
}
int main() {
vector<int> ar = { 15, 16, 19, 20, 25, 1, 3, 4, 5, 7,10, 14 };
cout << find_rotated(ar, 5) << endl;
ar = { 20, 25, 1, 3, 4, 5, 7,10, 14, 15, 16, 17, 18 };
cout << find_rotated(ar, 5) << endl;
}
10.4 Sorted Search, No Size: You are given an array-like data structure Listy which lacks a size method. It does, however, have an elementAt (i) method that returns the element at index i in 0(1) time. If i is beyond the bounds of the data structure, it returns -1. (For this reason, the data structure only supports positive integers.) Given a Listy which contains sorted, positive integers, find the index at which an element x occurs. If x occurs multiple times, you may return any index.
• O(logN) time, O(N) space
• Algorithm: Find length in LogN, then preform binary search.
class Listy {
public:
Listy() {}
int at(int i) {
if (i > stuff.size() - 1)
return -1;
else return stuff.at(i);
}
void push_back(int i) {
stuff.push_back(i);
}
private:
vector<int> stuff;
};
int binary_search(Listy &ar, int low, int high, int target) {
if (high < low) return INT_MIN;
int middle = low + ((high - low) / 2);
if (ar.at(middle) == target) return middle;
// search right
else if (ar.at(middle) < target) {
binary_search(ar, middle + 1, high, target);
}
else {
binary_search(ar, low, middle - 1, target);
}
}
int binary_search_length(Listy &ar, int low, int high) {
if (high < low) return INT_MIN;
int middle = low + ((high - low) / 2);
if (ar.at(middle) == -1 && ar.at(middle - 1) != -1)
return middle;
// search right
else if (ar.at(middle) != -1) {
binary_search_length(ar, middle + 1, high);
}
else {
binary_search_length(ar, low, middle - 1);
}
}
int find_max_power(Listy &ar) {
int count = 0;
int cur_length = pow(2, count++);
while (ar.at(cur_length) != -1) {
cur_length = pow(2, count++);
}
return count - 1;
}
int no_size_search(Listy &ar, int target) {
if (ar.at(0) == -1) return 0;
int max_pow = find_max_power(ar);
int low = pow(2, max_pow - 1);
int high = pow(2, max_pow);
int size = binary_search_length(ar, low, high);
return binary_search(ar, 0, size - 1, target);
}
int main() {
Listy ar;
for (int i = 0; i < 93893; i++)
ar.push_back(i);
cout << no_size_search(ar, 239) << endl;
}
10.5 Sparse Search: Given a sorted array of strings that is interspersed with empty strings, write a method to find the location of a given string.
EXAMPLE
Input: ball, {"at", "", "", "", "ball", "", "", "car", "", "", "dad", "", ""}
Output: 4
• Worst case order O(n), Avg. O(logN), O(N) space
• Algorithm: Just ignore the empty strings. If you find an empty string, then preform a radial search outwards from that location until you find a non-empty string. This works because all be need is the closest thing to compare the target string with the middle. Technically, either the right or left string from the space could work, because either one will notify us whether we need to search right or left. We just chose the faster alternative.
int binary_search(vector<string> &strs, int low, int high, string target) {
if (high < low) return -1;
int middle = low + ((high - low) / 2);
if (strs.at(middle).empty()) {
int left = middle - 1;
int right = middle + 1;
while (left > low && right < high) {
if (strs.at(left).empty())
left--;
else {
middle = left;
break;
}
if (strs.at(right).empty())
right++;
else {
middle = right;
break;
}
}
}
if (strs.at(middle) == target) return middle;
// search right
else if (strs.at(middle) < target) {
binary_search(strs, middle + 1, high, target);
}
else {
binary_search(strs, low, middle - 1, target);
}
}
int sparse_search(vector<string> &strs, string str) {
if (strs.empty() || str == "") return -1;
return binary_search(strs, 0, strs.size() - 1, str);
}
int main() {
vector<string> strings = { "at", "", "", "", "ball", "", "", "car", "", "", "dad", "", "" };
cout << sparse_search(strings, "ball") << endl;
}
10.6 Sort Big File: Imagine you have a 20 GB file with one string per line. Explain how you would sort the file.
• Thoughts when I have no idea:
• Cant fit in RAM
• Assuming 4 GB of RAM, would take in 4GB of data at a time, and then group the data into 5 different containers of disk. The amount of containers that I will need will be = ceil(26/5), where Container 1 contains words that start with {a,b,c,d,e}, Container 2 = {f,g,h,i,j}, ..., and Container 6 = {w,x,y,z}.
• Quick sort all containers (in-place alg, no extra memory used).
• Combine containers by on disk sequentially in their preserved order.
• Solution:
• Bring in X amount of data into X amount of memory and sort each chuck in the file. Then preform inplace_merge on each check until the entire array is sorted.
• For example, if you have 8 pages, and your memory can take in 2 pages at a time. Then sort the first sequential 2, until you reach the end of the pages.
• Then sort each sequential 4 by merging the two together.
• Finally sort each sequential 8 by merging the 4 together.
• To understand why we can merge more than 4 pages at a time we have to know how inplace merge works. In place merge compares at most two elements together (2 pages). Since the elements are sorted, we just have to look at the top of the stack of the two stacks. We select the pages one by one and place them into disk at the same time in a sorted manner.
10.7 Missing Int: Given an input file with four billion non-negative integers, provide an algorithm to generate an integer that is not contained in the file. Assume you have 1 GB of memory available for this task. FOLLOW UP What if you have only 10MB of memory? Assume that all the values are distinct and we now have no more than one billion non-negative integers.
• Brainstorm:
• Assuming 4 byte (32 bit) unsigned integers that represent 2^32 possible things. [0 - 2^32]
• 4 billion integers have to consume 128,000,000,000 bits or 16 Billion bytes which is roughly = 16x10^12/1x10^9 = 16 GB of memory. Memory space is insufficient to represent all numbers individually.
• Idea: Perform external merge sort (above). Build 16 hash tables, that each contain n/2 space (500MB) that each store the contents of the digits. I chose to do this as a preprocessing step. Then I would read in the hash tables one by one from disk, and iterate to find the first missing positive in O(N) time.
• Solution:
• Use a bit vector to that hashes the value of the unsigned integer into the bitset.
• 1 GB of memory can hold 2^30 bytes or 2^30 x 8 bits (8 billion) distinguishable things. Because we only have 4 billion elements, these can all hash to the bit vector that contains 4 billion boolean positions. Duplicates can be handled as well because we only need to flip the bit if it is true.
• Then, iterate through the array, and find the first missing index.
• This will take O(N) space, and O(N) time.
template<size_t size>
unsigned long int getFirstMissingNumber(bitset<size> bits) {
for (int i = 0; i < bits.size(); i++) {
if (!bits[i]) return i;
}
}
int main() {
srand(time(nullptr));
unsigned long int integers = 4000000000;
// (pow(2, 30) * 8) - 1 = 1GB = 8 billion bits
// 4294967295 (2^32-1) = unsigned long
bitset<ULONG_MAX> bits;
bits.reset(); // set to zero
for (unsigned long int i = 1; i <= integers + 1; i++) {
unsigned long int r = rand() % ULONG_MAX;
bits.set(r - 1, true);
}
cout << getFirstMissingNumber(bits) << endl;
}
10.8 Find Duplicates: You have an array with all the numbers from 1 to N, where N is at most 32,000. The array may have duplicate entries and you do not know what N is. With only 4 kilobytes of memory available, how would you print all duplicate elements in the array?
• Brainstorm:
• Given 32000 numbers each of which are 32 bit integers, we have a total of 1024000 bits of information, and insufficient 4kb = 2^12 bits = 4096 bits of memory. (250x fewer).
• Store in 500 hash tables. That contain the digits as well as the frequencies of the digits.
• Merge the hash tables with external merge sort.
• Print out frequencies stored.
• Solution:
• Bit vector.
10.9 Sorted Matrix Search: Given an M x N matrix in which each row and each column is sorted in ascending order, write a method to find an element.
• Approach:
• The big idea in this problem was recognizing that a 2d array can be read as a single dimension array and vise-versa. A sorted matrix down the diagonal also implies that we can aline each row next to each other sequentially and have a single sorted array.
struct MyException : public exception {
const char* what() const throw() {
return "Vector was empty";
}
};
pair<int, int> d1_to_d2(int index_d1, int dim) {
int row = index_d1 / dim;
int col = index_d1 % dim;
return make_pair(row, col);
}
pair<int, int> matrix_search(vector<vector<int>> &nxnMatrix, int find, int low, int high) {
if (high > low)
{
int middle = low + ((high - low) / 2);
pair<int, int> d2 = d1_to_d2(middle, nxnMatrix.size());
if (nxnMatrix.at(d2.first).at(d2.second) == find) {
return d2;
}
else if (nxnMatrix.at(d2.first).at(d2.second) > find) {
//search left
return matrix_search(nxnMatrix, find, low, middle - 1);
}
else {
//search right
return matrix_search(nxnMatrix, find, middle + 1, high);
}
}
return make_pair(INT_MIN, INT_MIN);
}
pair<int, int> matrix_search(vector<vector<int>> &nxnMatrix, int find) {
if (nxnMatrix.empty()) {
throw MyException();
}
return matrix_search(nxnMatrix, find, 0, (nxnMatrix.size() * nxnMatrix.at(0).size()) - 1);
}
int main() {
vector<vector<int>> matrix = {
{ 1, 2, 3, 4, 5, 6 },
{ 7, 8, 9, 10,11,12 },
{ 13,14,15,16,17,18 },
{ 19,20,21,22,23,24 },
{ 25,26,27,28,29,30 },
{ 31,32,33,34,35,36 }
};
pair<int, int> loc = matrix_search(matrix, 30);
cout << loc.first << " " << loc.second << endl;
pause();
}