From 900aa95d4dc4254cf54328c9466b801a38fa540d Mon Sep 17 00:00:00 2001 From: Florian Pritz Date: Tue, 16 Oct 2012 15:08:18 +0200 Subject: add trimmer; partextract.c Signed-off-by: Florian Pritz --- partextract.c | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100755 partextract.c (limited to 'partextract.c') diff --git a/partextract.c b/partextract.c new file mode 100755 index 0000000..ad5e55b --- /dev/null +++ b/partextract.c @@ -0,0 +1,84 @@ +#!/usr/bin/tcc -run -O2 +#include +#include + +int main(int argc, char const* argv[]) +{ + static const char *beginning; + static const char *ending; + int beginning_found = 0, beginning_length = 0; + int ending_found = 0, ending_length = 0; + + // TODO: getopt to enable this + int multimatching = 0; + + FILE *fp; + int read; + char buf[4096]; + int i; + + if (argc != 4) { + puts("Usage: partextract "); + puts(""); + puts("partextract outputs everything between the start and end token"); + puts("(including the tokens). Only the first match will be output"); + puts("unless multimatching is enabled and overlapping tokens will be ignored."); + return 1; + } + + beginning = argv[2]; + ending = argv[3]; + + beginning_length = strlen(beginning); + ending_length = strlen(ending); + + fp = fopen(argv[1], "rb"); + if (fp == NULL) { + perror(argv[1]); + return 1; + } + + while (!feof(fp)) { + read = fread(buf, sizeof(char), sizeof(buf), fp); + for (i = 0; i < read; i++) { + // find the starting token + if (beginning_found != beginning_length && ending_found != ending_length) { + if (buf[i] == beginning[beginning_found]) { + beginning_found++; + if (beginning_found == beginning_length) { + // we don't allow tokens to overlap + ending_found = 0; + + // We only start outputting if we had the complete start token + // This makes sure the token is in the output too + // and jumps to the next char so the fputc below won't run for + // the last char of the token + fputs(beginning, stdout); + continue; + } + } else if (beginning_found != beginning_length) { + beginning_found = 0; + } + } + + // output until we hit the ending token + if (beginning_found == beginning_length && ending_found != ending_length) { + fputc(buf[i], stdout); + + if (buf[i] == ending[ending_found]) { + ending_found++; + + if (multimatching && ending_found == ending_length) { + beginning_found = 0; + ending_found = 0; + } + } else if (ending_found != ending_length) { + ending_found = 0; + } + } + } + } + + + return 0; +} -- cgit v1.2.3-24-g4f1b