String matching algorithms

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Diagnostics;

namespace NaiveStringMatcher
{
class Program
{
static void Main(string[] args)
{
string str = @”BANAN BANBAN BANBANANA BANAN BANAN BANANAN BANANANANANANANANAN”;

string pattern = “NANAN”;

float numTries = 10000;

List shifts = null;

Stopwatch stopWatch = new Stopwatch();

stopWatch.Start();

for (int i = 0; i < numTries; i++ )
shifts = NaiveStringMatcher(str, pattern);

stopWatch.Stop();

Trace.WriteLine(String.Format("Naive string matcher {0}", stopWatch.ElapsedMilliseconds / numTries));

foreach (var item in shifts)
{
Trace.WriteLine(item);
}

stopWatch.Restart();

for (int i = 0; i < numTries; i++)
shifts = Knuth_Morris_Pratt(str, pattern);

stopWatch.Stop();
Trace.WriteLine(String.Format("Knuth_Morris_Pratt {0}", stopWatch.ElapsedMilliseconds / numTries));

foreach (var item in shifts)
{
Trace.WriteLine(item);
}

Console.ReadKey();
}

///

/// The naive algorithm has one phase: it does a search.
/// It does that search much less efficiently in the worst case than the KMP search phase.
///
/// The Naive String Matcher algorithm takes time O((n – m + 1)m, and this bound is tight in the worst case.
///

/// text
/// pattern
/// shifts
static List NaiveStringMatcher(string text, string pattern)
{
int textLength = text.Length;
int patternLength = pattern.Length;

List shifts = new List();

for (int s = 0; s <= textLength – patternLength; s++)
{
//if we meet a character which is equal to the first
//character of the pattern we should take a chance and
//try to compare with other characters of the pattern
if (text[s] == pattern[0])
{
int i = 1;
while (i < patternLength)
{
if (text[s + i] == pattern[i])
i++;
//in case we are unhappy we have to try again from the position of s
else break;
}
if (i == patternLength)
{
shifts.Add(s);
}
}
}

return shifts;
}

///

/// The KMP algorithm has two phases: first it builds a table, and then it does a search,
/// directed by the contents of the table.
/// KMP is only a win if you have enormous strings and you are doing lots of searches that allow you to re-use an
/// already-built table. You need to amortize away the huge cost of building the table by doing lots of searches
/// using that table.
///
/// Preprocessing time is Θ(m) and matching time is Θ(n).
/// m is the pattern length
/// n is the text length
///

/// text
/// pattern
/// shifts
static List Knuth_Morris_Pratt(string text, string pattern)
{

int patternLength = pattern.Length;
int textLength = text.Length;

List shifts = new List();

int[] pi = ComputePrefixFunction(pattern);

int matchedSymNum = 0;

//Start scanning the text from the first symbol
for (int i = 0; i 0 && pattern[matchedSymNum] != text[i])
matchedSymNum = pi[matchedSymNum – 1];

//Scanning for the first match or
//Checking whether the symbol of the pattern at the calculated position
//is equal to the symbol of the text at the iterator’s position
//and increasing the number of matching symbols
if (pattern[matchedSymNum] == text[i])
matchedSymNum++;

//When the number of the matches is equal to the pattern length
//it means that we get a shift
if (matchedSymNum == patternLength)
{
shifts.Add(i – patternLength + 1);
matchedSymNum = pi[matchedSymNum – 1];
}

}

return shifts;
}

///

/// Builds a table with values of lengths beetwen matches
///
/// For example:
/// ababaca
/// 0012301
///

///
///
static int[] ComputePrefixFunction(string pattern)
{
int j;

int[] pi = new int[pattern.Length];

///The first value of pi[] is zero by default
//so we start scanning from 1
for (int i = 1; i < pi.Length; i++)
{
j = 0;
//If we find a match we start scanning inside the loop
//increasing the counter of matches
while((i < pi.Length) && (pattern[i] == pattern[j]) )
{
j++;
pi[i++] = j;
}
}

return pi;
}
}
}