I'm trying to make a quick and simple signature detection program in C. It should read a binary file (.exe, ELF, a library, etc...) and search for binary data (sometimes strings, sometimes bytes);
I have a simple test program in C:
#include <stdio.h>
#include <unistd.h>
const char *str = "TestingOneTwoThree";
int main()
{
while(1)
{
fprintf(stdout, "%s %ld\n", str, (long)getpid());
sleep(1);
}
}
Here is the horspool algorithm I'm using. I adapted it directly from the wikipedia pseudocode found here: https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore%E2%80%93Horspool_algorithm
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define HORSPOOL_COUNT 256
#define BLOCK_SIZE 1024
#define MAX(a, b) a > b ? a : b
ssize_t horspool_find(const char *buf, size_t buflen, const char *egg, size_t egglen)
{
int table[HORSPOOL_COUNT];
ssize_t shift = 0, i, tmp;
for(i = 0; i < HORSPOOL_COUNT; ++i)
{
table[i] = (int)egglen;
}
for(i = 0; i < egglen - 1; ++i)
{
table[(int)egg[i]] = egglen - i - 1;
}
while(shift <= buflen - egglen)
{
i = egglen - 1;
while(buf[shift + i] == egg[i])
{
if(i == 0)
{
return shift;
}
i--;
}
shift += MAX(1, table[(int)buf[shift + egglen - 1]]);
}
return -1;
}
char *readfile(const char *filename, size_t *size)
{
int ch;
size_t used = 0, allocated = 0;
char *buf = NULL, *tmp = NULL;
FILE *f;
if((f = fopen(filename, "rb")) == NULL)
{
if(size) *size = 0;
return perror("fopen"), NULL;
}
while((ch=fgetc(f)) != EOF)
{
if(used >= allocated)
{
allocated += BLOCK_SIZE;
tmp = realloc(buf, allocated);
if(tmp == NULL)
{
free(buf);
if(size) *size = 0;
fclose(f);
return perror("realloc"), NULL;
}
buf = tmp;
}
buf[used++] = (char)ch;
}
fclose(f);
if(size) *size = used;
return realloc(buf, used);
}
ssize_t naivealg_find(const char *buf, size_t buflen, const char *find, size_t findlen)
{
size_t i, j, diff = buflen - findlen;
for(i = 0; i < diff; ++i)
{
for(j = 0; j < findlen; ++j)
{
if(buf[i+j] != find[j])
{
break;
}
}
if(j == findlen)
{
return (ssize_t)i;
}
}
return -1;
}
int main()
{
size_t size;
char *buf = readfile("./a.out", &size);
char *pat = "TestingOneTwoThree";
ssize_t pos1 = horspool_find(buf, size, pat, strlen(pat));
ssize_t pos2 = naivealg_find(buf, size, pat, strlen(pat));
fprintf(stdout, "Offsets: %zd ~ %zd\n", pos1, pos2);
return 0;
}
Output is something along the lines of:
Offsets: -1 ~ 2052
Notes:
buf
and egg
parameters.Code was using a signed char
and with binary data, from time to time, would index incorrectly with a negative index.
// table[(int)buf[shift + egglen - 1]]
table[(unsigned char )buf[shift + egglen - 1]]
This problem also exists in the the egg
pattern.
// table[(int) egg[i]] = egglen - i - 1;
table[(unsigned char) egg[i]] = egglen - i - 1;
Other sign issues occur when buflen < egglen
// while (shift <= buflen - egglen)
// change to avoid underflow
while (shift + egglen <= buflen)
Also consider opening the file in binary and:
ssize_t shift,i; --> size_t shift,i;
int table[HORSPOOL_COUNT]; -- > size_t table[HORSPOOL_COUNT];
Add ()
s to #define MAX(a, b) (((a) > (b)) ? (a) : (b))
この記事はインターネットから収集されたものであり、転載の際にはソースを示してください。
侵害の場合は、連絡してください[email protected]
コメントを追加