题意

You are given a string, s, and a list of words, words, that are all of the same length. Find all starting indices of substring(s) in s that is a concatenation of each word in words exactly once and without any intervening characters.

For example, given:

s: "barfoothefoobarman"

words: ["foo", "bar"]

You should return the indices: [0,9].

(order does not matter).

Subscribe to see which companies asked this question

大概来说,就是给定一串字符串和单词数组,找到字符串中,也就是子串必须全部包含单词数组中的单词,要求必须连续,顺序可以不要求,其中单词数组中的单词的个数是固定的,还有单词可以是重复的;

思路

其实最简单的思路就是对字符串进行逐次遍历,先找到第一个匹配的单词,这又要去往单词数组中去遍历,也就是其复杂时间为(字符串的长度*单词数组的单词的个数),虽然这种方法较为简单,但是其实花销是比较大的,同时需要注意的地方也是比较多的。所以在我参考一些代码之后,发现一些好的方法-包括双map,使用队列,使用trie树等等;

实现

我的实现(最简单容易理解)

vector<int> findSubstring1(string s, vector<string>& words) {
vector<int> result;
size_t word_len = words[0].length();
multimap<string, bool> maps;
for (size_t j = 0; j < words.size(); j++) {
maps.insert(make_pair(words[j], false));
}
for (size_t i = 0; i < s.length(); i++) {
for (size_t j = 0; j < words.size(); j++) {
for (auto beg = maps.lower_bound(words[j]), end = maps.upper_bound(words[j]); beg != end; ++beg) {
beg->second = false;
}
} //先找到第一个单词在子串中的位置
string subs = s.substr(i, word_len);
size_t first_pos = -1;
for (size_t j = 0; j < words.size(); j++) {
if (words[j] == subs) {
first_pos = i;
auto item = maps.find(words[j]);
item->second = true;
}
} //找第一个单词以后的所有单词,如果成功则返回开始的下标
if (first_pos != -1) {
size_t last_pos = first_pos + words.size() * word_len;
bool isValid = true;
size_t k = first_pos + word_len;
for (; k < last_pos; k+=word_len) {
if (k + word_len > s.length()) {
isValid = false;
break;
}
string osubs = s.substr(k, word_len);
auto item = maps.find(osubs);
auto itemcnt = maps.count(osubs); if (item != maps.end()) {
if (item->second == false) {
item->second = true;
}
else if (itemcnt > 1) {
bool ishave = false;
for (auto beg = ++item, end = maps.upper_bound(item->first); beg != end; ++beg) {
if (!beg->second) {
beg->second = true;
ishave = true;
break;
}
}
// 全部已经访问过了
if (!ishave) {
isValid = false;
}
}
else if (itemcnt == 1) {
isValid = false;
}
}
else {
isValid = false;
}
} // 坐标位置不正确,不成功
if (k != last_pos) {
isValid = false;
} //没有全部访问过,不成功
for (size_t q = 0; q < words.size(); q++) {
for (auto beg = maps.lower_bound(words[q]), end = maps.upper_bound(words[q]); beg != end; ++beg) {
if (!beg->second) {
isValid = false;
break;
}
}
} //成功则加入结果中
if(isValid) {
result.push_back((int)first_pos);
}
}
}
return result;
}

双map(最基础的优化)

/**
* 默认的简化的方法,利用unorder_map进行判断,维护一个left值
* 也就是全部单词字符串开始的地方
*
* @param s <#s description#>
* @param words <#words description#>
*
* @return <#return value description#>
*/
vector<int> findSubstring2(string s, vector<string>& words) {
vector<int> ans;
int n = s.size(), cnt = words.size();
if (n <= 0 || cnt <= 0) {
return ans;
} // 单词的hash数组,初始化
unordered_map<string, int> dict;
for (int i = 0; i < cnt; ++i) dict[words[i]]++; int wl = words[0].length();
for (int i = 0; i < wl; ++i) {
// left为起始单词串的下标
int left = i, count = 0;
unordered_map<string, int> tdict;
for (int j = i; j <= n - wl; j+=wl) {
string str = s.substr(j, wl);
// 计算单词数组中是否存在
if (dict.count(str)) {
tdict[str]++;
// 计算已访问的单词个数
if (tdict[str] <= dict[str]) {
count++;
}
else {
// 字符串中存在连续相同的单词,并且已经大于了单词数组中的个数,
// 这时需要向右进行移动
while (tdict[str] > dict[str]) {
string str1 = s.substr(left, wl);
tdict[str1]--;
if (tdict[str1] < dict[str1]) {
count--;
}
left += wl;
}
} //如果访问个数相同,则成功
if (count == cnt) {
ans.push_back(left);
tdict[s.substr(left, wl)]--;
count--;
left += wl;
}
}
else {
// 失败,重新统计
count = 0;
tdict.clear();
left += wl;
}
}
}
return ans;
}

使用队列

/**
* 这个方法比较复杂,比较难想懂,
* 利用每个单词对应一个队列,并且队列中存储每个单词出现的下标(初始情况均为-1)
* 根据下标去判断该单词的访问情况,或者第一次访问(-1),或者第n次访问(下标)等等
*/
typedef unordered_map<string, queue<int>> wordItr;
vector<int> findSubstring3(string s, vector<string>& words) {
vector<int> res;
if (words.size() == 0)
return res;
if (s.length() == 0)
return res;
int wordlen = words[0].size();
if (s.size() < wordlen) return res; wordItr wordHash;
wordItr::iterator it;
queue<int> q;
q.push(-1); // 对哈希表进行初始化,存在则往队列中添加-1
for (int i = 0; i < words.size(); i++) {
it = wordHash.find(words[i]);
if (it == wordHash.end()) {
wordHash[words[i]] = q;
}
else {
it->second.push(-1);
}
} wordItr temp = wordHash;
for (int i = 0; i < wordlen; i++) {
int curWordCnt = 0; //已经访问单词的个数
wordHash = temp;
for (int j = i; j <= s.size() - wordlen; j += wordlen) {
string str = s.substr(j, wordlen);
it = wordHash.find(str);
// 哈希数组里面是否存在字符串的key
if (it == wordHash.end()) {
curWordCnt = 0;
}
else {
// 访问队列
int lastPos = it->second.front();
// 如果为-1则表明第一次访问该单词
if (lastPos == -1) {
curWordCnt++;
}
// ??
else if (curWordCnt * wordlen < j - lastPos) {
curWordCnt++;
}
// 在访问完一次所有单词以后,重复出现该单词,该位置已经发生变化
else {
curWordCnt = (j - lastPos)/wordlen;
}
it->second.pop();
it->second.push(j); //该单词出现的下标 // 测试...
queue<int> tque = it->second;
while (!tque.empty()) {
cout << it->first << "->" << tque.front();
tque.pop();
} cout << endl; // 当前访问单词个数已经访问完
if (curWordCnt == words.size()) {
res.push_back((int)(j - wordlen * (words.size() - 1)));
}
}
}
}
return res;
}

Trie树

/**
* 这个方法可能更难想到,因为是用的trie树,
* 相较于前面的哈希,这里使用trie树进行适配
*
* @param s <#s description#>
* @param words <#words description#>
*
* @return <#return value description#>
*/
class TrieNode {
public:
TrieNode* child[26];
int cnt;
TrieNode(): cnt(0) {
memset(child, NULL, sizeof(TrieNode*) * 26);//分配空间
}
}; class Trie {
TrieNode* root;
public:
Trie() {
root = new TrieNode();
} TrieNode* getRoot() {
return root;
} void buildTrie(vector<string> words) {
for (string word : words) {
addWord(word);
}
} void addWord(string& word) {
TrieNode* cur = root;
for (int i = 0; i < word.size(); i++) {
char m = word[i] - 'a';
if (!cur->child[m]) {
cur->child[m] = new TrieNode();
}
cur = cur->child[m];
}
cur->cnt++;
}
}; Trie* trie;
/**
* 利用递归将字符串中的所有单词用trie树进行查找,找不到则表明不符合
* 我觉得除了递归以外,也可以通过两个遍历,最外层为遍历单词的个数,移动单词长度,
* 最内层循环为对每一个单词的进行Trie树的匹配;
*
* @param s <#s description#>
* @param start <#start description#>
* @param end <#end description#>
*
* @return <#return value description#>
*/
bool isSubString1(string& s, int start, int end) {
TrieNode* node = trie->getRoot();
int idx;
for (int i = start; i < end; i++) {
idx = s[i] - 'a';
if (!node->child[idx]) {
return false;
}
node = node->child[idx];
// 表明已经达到单词的末尾
if (node->cnt > 0) {
node->cnt--; //标记为已经使用
if (i + 1 == end || isSubString1(s, i+1, end)) {
node->cnt++; //标记为未使用
return true;
}
node->cnt++; //标记为未使用
}
}
return false;
} /**
* 这个方法比较巧妙,利用trie树去匹配字符串中的所有单词
*
* @param s <#s description#>
* @param words <#words description#>
*
* @return <#return value description#>
*/
vector<int> findSubstring4(string s, vector<string>& words) {
trie = new Trie();
trie->buildTrie(words);
int length = (int)words[0].size() * words.size(); vector<int> result;
for (int i = 0; i < s.length() - length; i++) {
if (isSubString1(s, i, i+length)) {
result.push_back(i);
}
}
return result;
}

总结

我觉得无论是什么方法,都逃不掉对字符串的遍历,对单词的匹配,就是看这个过程可以进行多大的优化。

Substring with Concatenation of All Words 题解的更多相关文章

  1. 【leetcode】Substring with Concatenation of All Words

    Substring with Concatenation of All Words You are given a string, S, and a list of words, L, that ar ...

  2. LeetCode - 30. Substring with Concatenation of All Words

    30. Substring with Concatenation of All Words Problem's Link --------------------------------------- ...

  3. leetcode面试准备: Substring with Concatenation of All Words

    leetcode面试准备: Substring with Concatenation of All Words 1 题目 You are given a string, s, and a list o ...

  4. [LeetCode] 30. Substring with Concatenation of All Words 解题思路 - Java

    You are given a string, s, and a list of words, words, that are all of the same length. Find all sta ...

  5. [Leetcode][Python]30: Substring with Concatenation of All Words

    # -*- coding: utf8 -*-'''__author__ = 'dabay.wang@gmail.com' 30: Substring with Concatenation of All ...

  6. leetcode-algorithms-30 Substring with Concatenation of All Words

    leetcode-algorithms-30 Substring with Concatenation of All Words You are given a string, s, and a li ...

  7. LeetCode: Substring with Concatenation of All Words 解题报告

    Substring with Concatenation of All Words You are given a string, S, and a list of words, L, that ar ...

  8. leetCode 30.Substring with Concatenation of All Words (words中全部子串相连) 解题思路和方法

    Substring with Concatenation of All Words You are given a string, s, and a list of words, words, tha ...

  9. LeetCode HashTable 30 Substring with Concatenation of All Words

    You are given a string, s, and a list of words, words, that are all of the same length. Find all sta ...

随机推荐

  1. cout如何输出十六进制

    http://blog.csdn.net/okadler0518/article/details/4962340 cout<<hex<<i<<endl; //输出十 ...

  2. 33 Introducing the Go Race Detector

    Introducing the Go Race Detector 26 June 2013 Introduction Race conditions are among the most insidi ...

  3. git —— 分支

    git中每一个分支相当于一个时间线 并列且相互平行 控制用指针控制~ 1.第一种创建命令: $ git branch 分支名称 —— 创建分支 $ git checkout 分支名称 —— 切换分支 ...

  4. Android Studio配置opencv

    安装过程参考:http://www.cnblogs.com/tail/p/4618476.html demo参考:http://blog.csdn.net/gao_chun/article/detai ...

  5. js中的事件委托或是事件代理

    JavaScript(jQuery)中的事件委托 https://www.cnblogs.com/zhoushengxiu/p/5703095.html js中的事件委托或是事件代理详解 https: ...

  6. Excel根据单元格内容设置整行颜色

    1. 选择需要设置的区域,条件格式中找到“新建规则” 2. 弹出窗口中选择“使用公式确定要设置格式的单元格”一项.填写公式如下: =IF(OR($D1="已完成",$D1=&quo ...

  7. CF474D. Flowers

    D. Flowers time limit per test 1.5 seconds memory limit per test 256 megabytes input standard input ...

  8. jQuery类名添加click方法

    通过$("").jQuery为元素添加点击事件,在使用类的情况下,可以使用$(e.target).attr('title');获得被点击元素的属性值. 示例代码如下 $(" ...

  9. css如何画出类似原生的线条?

    做前端的程序猿特别是移动端的,都很头疼遇到一种情况就是需要div底部加一个线条但是 1px的效果很粗,跟设计案上的不符合. 我的一个伙伴查找出来的一个解决方法: 在需要加上的线条的地方加上一个div ...

  10. xmanager

    [root@upright91 run]# ./runBenchmark.sh updbtpcc.properties sqlTableCreates Exception in thread &quo ...