Directory Sum

Problem 1: The idea I used was to maintain the max while removing duplicates, and converting back to the original string..

int solution(int X) {
    int max = INT_MIN;
    string str = to_string(X);
    string str_cpy = str;
    bool removed = false;
    for (int i = 1; i < str.length(); i++) {
        if (str.at(i - 1) == str.at(i)) {
            removed = true;
            str.erase(str.begin() + (i - 1), str.begin() + i);
            if (atoi(str.c_str()) > max) {
                max = atoi(str.c_str());
            }
            str = str_cpy;
            while (i < str.length() && str.at(i - 1) == str.at(i)) {
                i++;
            }
        }
    }
    if (!removed) return X;
    return max;
}


int main() {
    cout << solution(223336226) << endl;
    pause();
    cout << solution(123456789) << endl;
    pause();
}

Problem 2: http://pastebin.com/TCty5ggg Bad implementation -> not O(n) time!

  • The idea: Consider the following example:

dir1        
    dir11    
    dir12    
        picture.jpeg
        dir121
        file.txt
dir2        
    file2.gif

We can arrange the following tables through a preprocessing step

line    name    space
0       dir1        0
1       dir11    1
2       dir12    1
4       dir121    2
6       dir2        0

line     space    
3        2    
7        1
  • The basic idea is for each img file, we match the closest line that is less that is at least 1 less then the img line that also contains 1 less space than it.

  • We continue looking for 1 less space until all the spaces are exhausted (root directory is reached).

class dir_sum {
public:
    // preprocess file
    dir_sum(string &str) {
        std::istringstream stream(str);
        string line;

        int line_num = 0;
        bool is_img = false;
        while (std::getline(stream, line)) {
            cout << line << endl;
            int num_levels = 0;
            for (int i = 0; i < line.length(); i++) {
                while (line.at(i) == ' ') {
                    num_levels++;
                    i++;
                }
                if (line.at(i) == '.' && line.at(i + 1) != 't') {
                    img.push_back(make_pair(line_num, num_levels));
                    is_img = true;
                    break;
                }
            }
            if (!is_img) {
                dir.insert({ { line_num },
                { make_pair(line.substr(num_levels, line.length()),
                    num_levels) } });
                is_img = false;
            }
            line_num++;
        }
        max_abs_path();
    }


    void max_abs_path() {
        for (int i = 0; i < img.size(); i++) {
            int line_num = img.at(i).first;
            int num_spaces = img.at(i).second;
            if (num_spaces == 0) {
                final_sum++;
                continue;
            }
            int back = 1;
            bool checkNext = true;
            while (checkNext) {
                auto found = dir.find(line_num - back);
                if (found == dir.end() || found->second.second != num_spaces - 1) {
                    back++;
                    continue;
                }
                else {
                    checkNext = false;
                    final_sum += found->second.first.length() + 1;

                    int num_dir_spaces = found->second.second;
                    int num_dir_lines = found->first;
                    int back2 = 1;
                    while (num_dir_spaces != 0) {
                        auto found2 = dir.find(num_dir_lines - back2);
                        if (found2 == dir.end() || found2->second.second != num_dir_spaces - 1) {
                            back2++;
                            continue;
                        }
                        else {
                            final_sum += found2->second.first.length() + 1;
                            num_dir_spaces = found2->second.second;
                            num_dir_lines = found2->first;
                        }
                    }
                }
            }
        }
    }

    int get_sum() {
        return final_sum % 1000000007;
    }

private:
    int final_sum;
    //            line,     name,   space
    unordered_map<int, pair<string, int>> dir;
    //          line,level
    vector<pair<int, int>> img;
};

void test() {

    /*** TEST 1 ***/
    std::string test1 = "dir1\n dir11\n dir12\n  picture.jpeg\n  dir121\n  file1.txt\ndir2\n file2.gif\n";
    dir_sum one(test1);
    cout << one.get_sum() << endl;
    pause();

    /*** TEST 2 ***/
    std::string test2 = "dir1\n dir11\n  dir111\n   dir1111\n    a.png\n    b.jpeg\n    c.gif\n    d.png\n";
    dir_sum two(test2);
    cout << two.get_sum() << endl;
    pause();

    /*** TEST 3 ***/
    std::string test3 = "a.png\nb.jpeg\nc.gif\nd.png\n";
    dir_sum three(test3);
    cout << three.get_sum() << endl;
    pause();

    /*** TEST 4 ***/
    std::string test4 = "a.png\ndir1\nb.jpeg\ndir2\nc.gif\nd.png\n";
    dir_sum four(test4);
    cout << four.get_sum() << endl;
    pause();

    /*** TEST 5 ***/
    std::string test5 = "a.png\ndir1\n dir11\n  dir111\n   dir1111\nb.jpeg\ndir2\nc.gif\nd.png\n";
    dir_sum five(test5);
    cout << five.get_sum() << endl;
    pause();
}

int main() {
    test();
}

Problem 2: Attempt 2

  • More simplified. Uses a stack to mimic tree structure.

  • To understand this we first should recognize a few things:

    • One, a file system is effectively a tree. A graph too, but more simply, a tree. Our algorithm simplifies a lot this way.

For example, this file structure is effectively:

dir
    subdir1
        file1.ext
        subsubdir1
    subdir2
        subsubdir2
            file2.ext
  • Next, what distinguishes a node and its children are the number of tabs in the tree of each line.

0 dir
1    subdir1
2        file1.ext
2        subsubdir1
1    subdir2
2        subsubdir2
3            file2.ext
  • Notice that the child of every parent is 1 tab more than it. Using this as our guiding principle, we can model our tree.

  • So the solution is just iterating through the entire tree, and keeping track of the largest path for a file.

  • A file is distinguished with a '.'

  • Because our 'tree' is a txt file, we'll be modeling our tree using a stack, which is just effective as using recursion.

    • Our stack begins with the root directory, and we'll be adding elements to our stack in a pre-order fashion. That is, elements continue to be added, until we find that the next child is not a child of the previous top element. We do this by comparing the number of tabs of the current node, to the top of the stack.

    • In the process, we collect the size on the current file path. And if the node is a file, we update the current max path.

    • Otherwise, if our node is not a child, we'll have to 'recurse back', until we find a position where it is. That is, our stack can only grow in the number of tabs it has. For example, we can have 1->2->3->4... but not 1->2->2. The second while loop in the code takes care of this for us.

      • For example, consider that we are in subsubdir1 in the tree above. In other to get to subdir2, it would not make sense just to recurse back once and go to subdir1. We'll have to continue recursing back until subdir2 tabs are > dir1.

int get_num_tabs(string &in)
{
    int count = 0;
    for (char c : in) {
        if (c == '\t') count++;
        else break; // first non-occurance = none left
    }
    return count;
}

bool isFile(string &in) 
{
    for (auto i = in.rbegin(); i != in.rend(); ++i)
        if (*i == '.') return true;
    return false;
}

void sanatize_input(string &str)
{
    for (int i = 0; i < str.length(); i++) {
        if (str[i] == ' ') str.erase(0, 1);
    }
}

int lengthLongestPath(string input)
{
    stringstream ss(input);
    cout << ss.str() << endl;
    string next_line;

    stack<pair<int, string>> s;
    getline(ss, next_line);
    int num_tabs = get_num_tabs(next_line);
    s.push(make_pair(num_tabs, next_line));

    int cur_length = 3; // dir
    int maxx = 0;

    while (getline(ss, next_line)) {
        sanatize_input(next_line);
        num_tabs = get_num_tabs(next_line);
        if (num_tabs - 1 == s.top().first) {
            s.push(make_pair(num_tabs, next_line));
            cur_length += s.top().second.length() - num_tabs + 1;
            if (isFile(next_line)) 
                maxx = max(maxx, cur_length);
        }
        else {
            while (!s.empty() && num_tabs - 1 != s.top().first) {
                cur_length = cur_length - (s.top().second.length() - s.top().first) - 1;
                s.pop();
            }
            s.push(make_pair(num_tabs, next_line));
            cur_length += s.top().second.length() - num_tabs + 1;
        }
    }

    return maxx;
}

int main()
{
    /*
    0dir
    1    subdir1
    2        file1.ext
    2        subsubdir1
    1    subdir2
    2        subsubdir2
    3            file2.ext
    */
    // return : "dir/subdir2/subsubdir2/file2.ext"

    string input = "dir\n\tsubdir1\n\t\tfile1.ext\n\t\tsubsubdir1\n\tsubdir2\n\t\tsubsubdir2\n\t\t\tfile2.ext";
    //input = "dir\n     file.txt";
    //input = "dir\n\tsubdir1\n\t\tfile1.ext\n\t\tsubsubdir1\n\tsubdir2\n\tsubdir3\n\tsubdir4\n\t\tsubsubdir2\n\t\t\tfile2.ext";
    cout << lengthLongestPath(input);

}

Problem 2: Attempt 3

Similar idea as before, just cleaned up. The algorithm comes down to knowing which position in the current file is by using the number of tabs as a guiding principle.

Complexity: O(n) time and space

import re


class Solution:
    def lengthLongestPath(self, input):
        """
        :type input: str
        :rtype: int
        """

        if not input: return 0

        # split by tab delimitor
        tree = re.split('(\t|\n)', input)

        # clean for simplier processing later
        tree = [elm for elm in tree if elm != '\n' and elm != '']

        # begin the preorder + 1 for dir + '/'
        s = [(len(tree[0]) + 1, 0)]
        abs_max = 0

        # iterate through elements
        iter = 1
        while iter < len(tree):
            file = tree[iter]
            parent_len = s[-1][0]
            parent_lvl = s[-1][1]

            tab_cnt = 0
            while iter < len(tree) and file == '\t':
                iter += 1
                file = tree[iter]
                tab_cnt += 1

            # step into the correct parent of the tree
            while tab_cnt != parent_lvl + 1:
                s.pop()
                parent_len = s[-1][0]
                parent_lvl = s[-1][1]

            if '.' not in file:  # is directory, add 1 for '/'
                s.append((parent_len + len(file) + 1, tab_cnt))
            else:  # is file, no need to recurse down
                abs_max = max(abs_max, parent_len + len(file))
            iter += 1

        return abs_max

Last updated