1078-Bigram 分词

Raphael Liu Lv10

给出第一个词 first 和第二个词 second,考虑在某些文本 text 中可能以 "first second third"
形式出现的情况,其中 second 紧随 first 出现,third 紧随 second 出现。

对于每种这样的情况,将第三个词 “third“ 添加到答案中,并返回答案。

示例 1:

**输入:** text = "alice is a good girl she is a good student", first = "a", second = "good"
**输出:** ["girl","student"]

示例 2:

**输入:** text = "we will we will rock you", first = "we", second = "will"
**输出:** ["we","rock"]

提示:

  • 1 <= text.length <= 1000
  • text 由小写英文字母和空格组成
  • text 中的所有单词之间都由 单个空格字符 分隔
  • 1 <= first.length, second.length <= 10
  • firstsecond 由小写英文字母组成

方法一:遍历

思路与算法

我们将文本 text 按空格分割成单词数组 words,然后遍历 words 数组,如果一个单词的前两个单词分别按顺序等于 first 和 second,则该单词符合第三个单词 third 的定义, 将其加入结果中。

代码

[sol1-C]
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
char ** findOcurrences(char * text, char * first, char * second, int* returnSize){
int s = 0, e = 0, len = strlen(text);
char **words = (char **)malloc(sizeof(char *) * len);
memset(words, 0, sizeof(char *) * len);
int nwords = 0;
while (true) {
while (s < len && text[s] == ' ') {
s++;
}
if (s >= len) {
break;
}
e = s + 1;
while (e < len && text[e] != ' ') {
e++;
}
words[nwords++] = strndup(text + s, e - s);
s = e + 1;
}
char **ret = (char **)malloc(sizeof(char *) * (nwords + 1));
memset(ret, 0, sizeof(char *) * (nwords + 1));
int nret = 0;
for (int i = 2; i < nwords; i++) {
if (strcmp(words[i - 2], first) == 0 && strcmp(words[i - 1], second) == 0) {
ret[nret++] = strdup(words[i]);
}
}
for (int i = 0; i < nwords; i++) {
free(words[i]);
}
free(words);
*returnSize = nret;
return ret;
}
[sol1-C++]
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
class Solution {
public:
vector<string> findOcurrences(string text, string first, string second) {
vector<string> words;
int s = 0, e = 0, len = text.length();
while (true) {
while (s < len && text[s] == ' ') {
s++;
}
if (s >= len) {
break;
}
e = s + 1;
while (e < len && text[e] != ' ') {
e++;
}
words.push_back(text.substr(s, e - s));
s = e + 1;
}
vector<string> ret;
for (int i = 2; i < words.size(); i++) {
if (words[i - 2] == first && words[i - 1] == second) {
ret.push_back(words[i]);
}
}
return ret;
}
};
[sol1-Java]
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
class Solution {
public String[] findOcurrences(String text, String first, String second) {
String[] words = text.split(" ");
List<String> list = new ArrayList<String>();
for (int i = 2; i < words.length; i++) {
if (words[i - 2].equals(first) && words[i - 1].equals(second)) {
list.add(words[i]);
}
}
int size = list.size();
String[] ret = new String[size];
for (int i = 0; i < size; i++) {
ret[i] = list.get(i);
}
return ret;
}
}
[sol1-C#]
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
public class Solution {
public string[] FindOcurrences(string text, string first, string second) {
string[] words = text.Split(" ");
IList<string> list = new List<string>();
for (int i = 2; i < words.Length; i++) {
if (words[i - 2].Equals(first) && words[i - 1].Equals(second)) {
list.Add(words[i]);
}
}
int size = list.Count;
string[] ret = new string[size];
for (int i = 0; i < size; i++) {
ret[i] = list[i];
}
return ret;
}
}
[sol1-Python3]
1
2
3
4
class Solution:
def findOcurrences(self, text: str, first: str, second: str) -> List[str]:
words = text.split()
return [words[i] for i in range(2, len(words)) if words[i - 2] == first and words[i - 1] == second]
[sol1-Golang]
1
2
3
4
5
6
7
8
9
func findOcurrences(text, first, second string) (ans []string) {
words := strings.Split(text, " ")
for i := 2; i < len(words); i++ {
if words[i-2] == first && words[i-1] == second {
ans = append(ans, words[i])
}
}
return
}
[sol1-JavaScript]
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
var findOcurrences = function(text, first, second) {
const words = text.split(" ");
const list = [];
for (let i = 2; i < words.length; i++) {
if (words[i - 2] === first && words[i - 1] === second) {
list.push(words[i]);
}
}
const size = list.length;
const ret = Array(size).fill('');
for (let i = 0; i < size; i++) {
ret[i] = list[i];
}
return ret;
};

复杂度分析

  • 时间复杂度:O(N),其中 N 为 text 的长度。分割 text 需要 O(N),words 每个元素最多访问两次,需要 O(N),所以总的时间复杂度为 O(N)。

  • 空间复杂度:O(N)。需要 O(N) 的空间来保存 words 数组。

 Comments
On this page
1078-Bigram 分词