140 Word Break II

Given a string s and a dictionary of words dict, add spaces in s to construct a sentence where each word is a valid dictionary word.

Return all such possible sentences.

For example, given

s = "catsanddog",
dict = ["cat", "cats", "and", "sand", "dog"].

A solution is ["cats and dog", "cat sand dog"]

  • Algorithm (I am so close!):

    • First I find the intersection of the string and the dictionary words. We can do this with std::string::find. In this process, I also keep the location. I need to do this because I want to be able to preserve the order of the string itself.

    • Now when I sort the string by its value pair (location), which is an NlogN operation, I preserve the original order of the string with the all the found words.

  • Next, I group all the words that are substrings of one another. However, I stop the first instance a substring isnt found. This just means that in the process of finding the words, we found an over lap between two words.

  • The next step would be to group words from the vector in a clever way that resembles possible sentences. I cannot simply pair cat and and together, although they appear in different groups.

template<typename T, typename T2>
void print_v_pair(vector<pair<T, T2>> &ar) {
    for (auto i : ar) {
        cout << i.first << " ";
    }
    cout << endl;
    for (auto i : ar) {
        cout << i.second << " ";
    }
    cout << endl;
    pause();
}

struct Comp {
    typedef pair<string, size_t> P;
    bool operator() (const P &first, const P &second) {
        return first.second < second.second;
    }
};


bool isSubstr(string &one, string &two) {
    if (one.find(two) == string::npos && two.find(one) == string::npos)
        return false;
    return true;
}

vector<pair<string, size_t>> intersect(string s, unordered_set<string>& wordDict) {
    // getting extra digit for location of string that I can sort later
    // that why I can maintain the order of the string
    vector<pair<string, size_t>> both;
    for (auto i : wordDict) {
        size_t found = s.find(i);
        if (found != string::npos)
            both.push_back(make_pair(i, found));
    }
    return both;
}

vector<vector<string>> group_substrings(vector<pair<string, size_t>> & strs) {
    vector<vector<string>> groups;
    vector<string> temp;
    for (int i = strs.size() - 1; i >= 0; i--) {
        string cur = strs.at(i).first;
        temp.push_back(cur);
        // move back from current string to check other potential substrings
        for (int j = i - 1; j >= 0; j--) {
            if (isSubstr(strs.at(j).first, cur)) {
                temp.push_back(strs.at(j).first);
            }
            else {
                i = j + 1;
                break;
            }
        }
        groups.push_back(temp);
        temp.clear();
        if (i == 1) break;
    }

    return groups;
}

vector<string> wordBreak(string s, unordered_set<string>& wordDict) {

    vector<pair<string, size_t>> both = intersect(s, wordDict);
    sort(both.begin(), both.end(), Comp());
    print_v_pair(both);

    vector<vector<string>> grouped = group_substrings(both);
    print2d(grouped);

    for (int i = grouped.size(); i >= 0; i--) {

    }

    vector<string> sol;
    return sol;
}


int main() {
    unordered_set<string> map = {
        "and", "sand", "cat", "cats", "dog",
    };

    print(wordBreak("catsanddog", map));
}

Last updated