forked from sba1/simplemail
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathboyermoore.c
181 lines (156 loc) · 3.9 KB
/
boyermoore.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
/**
* This is an implementation of the (turbo) bayer/moore string search algorithm.
* Based upon the code presented at http://www-igm.univ-mlv.fr/~lecroq/string/.
*
* @file boyermoore.c
*/
#include <limits.h>
#include <stdlib.h>
#include "boyermoore.h"
#include "codesets.h"
#include "debug.h"
#define MAX(a,b) ((a)>(b)?(a):(b))
static void suffixes(char *x, int m, unsigned short *suff)
{
int g, i;
int f = 0; /* This is to silent the compiler */
suff[m - 1] = m;
g = m - 1;
for (i = m - 2; i >= 0; --i)
{
if (i > g && suff[i + m - 1 - f] < i - g)
suff[i] = suff[i + m - 1 - f];
else {
if (i < g)
g = i;
f = i;
while (g >= 0 && x[g] == x[g + m - 1 - f])
--g;
suff[i] = f - g;
}
}
}
/**
* Computes the good suffix skip table.
*
* @param x
* @param m
* @param suff
* @param bmGs
*/
static void preBmGs(char *x, int m, unsigned short suff[], unsigned short bmGs[])
{
int i, j;
suffixes(x, m, suff);
for (i = 0; i < m; ++i)
bmGs[i] = m;
j = 0;
for (i = m - 1; i >= 0; --i)
if (suff[i] == i + 1)
for (; j < m - 1 - i; ++j)
if (bmGs[j] == m)
bmGs[j] = m - 1 - i;
for (i = 0; i <= m - 2; ++i)
bmGs[m - 1 - suff[i]] = m - 1 - i;
}
/**
* @brief The context data structure of the boyermoore search
*/
struct boyermoore_context
{
/** @brief The bad character skip table */
unsigned short skip_table[UCHAR_MAX];
/** @brief The good suffix table */
unsigned short *good_suffix_table;
char *pat; /**! @brief pattern to be searched for */
int plen; /**! @brief length of the pattern */
};
/**
* Creates the boyermoore context for a given pattern and
* length.
*
* @param p
* @param plen
* @return
*/
struct boyermoore_context *boyermoore_create_context(char *p, int plen)
{
struct boyermoore_context *context;
unsigned short *next;
unsigned short *suff;
int i;
if (!(context = (struct boyermoore_context*)malloc(sizeof(*context))))
return NULL;
if (!(context->good_suffix_table = next = (unsigned short*)malloc((plen + 1)*sizeof(context->good_suffix_table[0]))))
{
free(context);
return NULL;
}
if (!(suff = (unsigned short*)(unsigned short*)malloc((plen + 1)*sizeof(suff[0]))))
{
free(next);
free(context);
return NULL;
}
context->pat = p;
context->plen = plen;
/* Prepare bad character skip table */
for (i = 0; i < sizeof(context->skip_table)/sizeof(context->skip_table[0]); i++)
context->skip_table[i] = plen;
for (i = 0; i < plen - 1; i++)
context->skip_table[(unsigned char)p[i]] = plen - i - 1;
preBmGs(p, plen, suff, next);
free(suff);
return context;
}
/**
* Creates the boyermoore context.
*
* @param context
*/
void boyermoore_delete_context(struct boyermoore_context *context)
{
if (context)
{
free(context->good_suffix_table);
free(context);
}
}
/**
* Performs the boyermoore algorithm.
*
* @param context the context
* @param str string to be searched through
* @param n number of bytes to be searches through
* @param callback function that is called for every hit. If callback returns 0,
* the search is aborted.
* @param user_data data pointer that is fed into the callback function.
*
* @return the position of the last found pattern or -1 if the pattern could not be found.
*/
int boyermoore(struct boyermoore_context *context, char *str, int n, bm_callback callback, void *user_data)
{
int i, j;
int rc = -1;
unsigned short *bmBc = context->skip_table;
unsigned short *bmGs = context->good_suffix_table;
int m = context->plen;
char *substr = context->pat;
/* Searching */
j = 0;
while (j <= n - m)
{
for (i = m - 1; i >= 0 && substr[i] == str[i + j]; --i);
if (i < 0)
{
rc = j;
if (!callback || !callback(substr,j,user_data))
return rc;
j += bmGs[0];
} else
{
j += MAX(bmGs[i], bmBc[(unsigned char)str[i + j]] - m + 1 + i);
}
}
return rc;
}