說明
今日的一些高階程式語言對於字串的處理支援越來越強大(例如Java、Perl等),不過字串搜尋本身仍是個值得
探討的課題,在這邊以Boyer- Moore法來說明如何進行字串說明,這個方法快且原理簡潔易懂。
解法
字串搜尋本身不難,使用暴力法也可以求解,但如何快速搜尋字串就不簡單了,傳統的字串搜尋是從關鍵字與字串的開頭
開始比對,例如 Knuth-Morris-Pratt
演算法
字串搜尋,這個方法也不錯,不過要花時間在公式計算上;Boyer-Moore字串核對改由關鍵字的後面開始核對字串,並製作前進表,如果比對不符合則依
前進表中的值前進至下一個核對處,假設是p好了,然後比對字串中p-n+1至p的值是否與關鍵字相同。

那麼前進表該如何前進,舉個實際的例子,如果要在字串中搜尋JUST這個字串,則可能遇到的幾個情況如下所示:
依照這個例子,可以決定出我們的前進值表如下:
| 其它 |
J |
U |
S |
T |
| 4 |
3 |
2 |
1 |
4(match?) |
如果關鍵字中有重複出現的字元,則前進值就會有兩個以上的值,此時則取前進值較小的值,如此就不會跳過可能的位置,例如texture這個關鍵字,t的前
進值應該取後面的3而不是取前面的7。
#include <stdio.h> #include <stdlib.h> #include <string.h> #define SKIP_TABLE_SIZE 256 #define STRING_LENGTH 80
void table(int*, char*); // 建立前進表 int search(int* skip, int, char*, char*); // 搜尋關鍵字 void substring(char*, char*, int, int); // 取出子字串
int main(void) { int skip[SKIP_TABLE_SIZE]; char str_input[STRING_LENGTH]; char str_key[STRING_LENGTH]; char tmp[STRING_LENGTH] = {'\0'}; printf("請輸入字串:"); gets(str_input); printf("請輸入搜尋關鍵字:"); gets(str_key);
int m = strlen(str_input); // 計算字串長度 int n = strlen(str_key); table(skip, str_key); int p = search(skip, n-1, str_input, str_key);
while(p != -1) { substring(str_input, tmp, p, m); printf("%s\n", tmp); p = search(skip, p+n+1, str_input, str_key); }
printf("\n");
return 0; }
void table(int* skip, char *key) { int n = strlen(key); int k; for(k = 0; k < SKIP_TABLE_SIZE; k++) skip[k] = n; for(k = 0; k < n - 1; k++) skip[key[k]] = n - k - 1; }
int search(int* skip, int p, char* input, char* key) { char tmp[STRING_LENGTH] = {'\0'};
int m = strlen(input); int n = strlen(key);
while(p < m) { substring(input, tmp, p-n+1, p); if(!strcmp(tmp, key)) // 比較兩字串是否相同 return p-n+1; p += skip[input[p]]; }
return -1; }
void substring(char *text, char* tmp, int s, int e) { int i, j; for(i = s, j = 0; i <= e; i++, j++) tmp[j] = text[i]; tmp[j] = '\0'; }
import java.util.*; import java.io.*;
public class StringMatcher implements Iterable<String>, Iterator<String>{ private int[] skip = new int[256]; private String str; private String key; private int pos;
public StringMatcher(String str, String key) { this.str = str; this.key = key; for(int k = 0; k < 256; k++) skip[k] = key.length(); for(int k = 0; k < key.length() - 1; k++) skip[key.charAt(k)] = key.length() - k - 1; pos = search(key.length()-1, str, key); } public Iterator<String> iterator() { return this; }
private int search(int p, String str, String key) { int tp = p; while(tp < str.length() && ! str.substring(tp-key.length()+1, tp+1).equals(key)) { tp += skip[str.charAt(tp)]; } return tp-key.length()+1; }
public boolean hasNext() { return pos < str.length() - 1; }
public String next() { String tmp = str.substring(pos); pos = search(pos+key.length()+1, str, key); return tmp; }
public void remove() { throw new RuntimeException("Not supported"); } public static void main(String[] args) throws IOException { BufferedReader bufReader = new BufferedReader( new InputStreamReader(System.in));
System.out.print("請輸入字串:"); String str = bufReader.readLine(); System.out.print("請輸入搜尋關鍵字:"); String key = bufReader.readLine();
for(String s : new StringMatcher(str, key)) { System.out.println(s); } } }
class StringMatcher: def __init__(self, str, key): self.__str = str self.__key = key self.__skip = [len(key) for k in range(256)] for k in range(len(key) - 1): self.__skip[ord(key[k])] = len(key) -k - 1 self.__pos = self.__search(len(key), str, key)
def __iter__(self): return self
def __search(self, p, input, key): tp = p while(tp < len(input) and input[tp-len(key) + 1: tp + 1] != key): tp += self.__skip[ord(input[tp])] return tp - len(key) + 1
def __next__(self): if not self.__hasNext(): raise StopIteration tmp = self.__str[self.__pos:] self.__pos = self.__search( self.__pos + len(self.__key) + 1, str, self.__key) return tmp
def __hasNext(self): return self.__pos < len(self.__str) - 1
str = input("請輸入字串:") key = input("請輸入搜尋關鍵字:") for s in StringMatcher(str, key): print(s)
class StringMatcher(val str: String, val key: String) extends Iterator[String] { private val skip = { val table = (for(k <- 0 to 256) yield key.length).toArray for(k <- 0 until key.length - 1) { table(key.charAt(k)) = key.length - k - 1 } table } private var pos = search(key.length - 1, str, key) private def search(p: Int, str: String, key: String) = { var tp = p while(tp < str.length && str.substring(tp - key.length + 1, tp + 1) != key) { tp += skip(str.charAt(tp)) } tp - key.length + 1 } def hasNext() = pos < str.length - 1 def next() = { val tmp = str.substring(pos) pos = search(pos + key.length + 1, str, key) tmp } }
print("請輸入字串:") val str = readLine print("請輸入搜尋關鍵字:") val key = readLine for(s <- new StringMatcher(str, key)) { println(s) }
# encoding: Big5 class StringMatcher def initialize(str, key) @str = str @key = key @skip = Array.new(256, key.length) (@key.length - 1).times { |k| @skip[key[k].ord] = key.length - k - 1 } @pos = search(key.length, str, key) end def search(p, input, key) tp = p while tp < input.length && input[(tp - key.length + 1)..(tp)] != key tp += @skip[input[tp].ord] end tp - key.length + 1 end private :search def each while @pos < @str.length - 1 tmp = @str[@pos..-1] @pos = search(@pos + @key.length, @str, @key) yield(tmp) end end end
print "請輸入字串:" str = gets.chomp print "請輸入搜尋關鍵字:" key = gets.chomp StringMatcher.new(str, key).each { |s| puts s }
|
|