add trimmer; partextract.c

Signed-off-by: Florian Pritz <bluewind@xinu.at>
author: Florian Pritz <bluewind@xinu.at> 2012-10-16 15:08:18 +0200
committer: Florian Pritz <bluewind@xinu.at> 2012-10-16 15:08:18 +0200
commit: 900aa95d4dc4254cf54328c9466b801a38fa540d (patch)
tree: 328eca1ba834478933b3482c967fea245e1e6da9 /partextract.c
parent: 27052abe40c471807fc3fe2d97b775ac4ec9c44c (diff)
download: bin-900aa95d4dc4254cf54328c9466b801a38fa540d.tar.gz
bin-900aa95d4dc4254cf54328c9466b801a38fa540d.tar.xz
1 files changed, 84 insertions, 0 deletions
diff --git a/partextract.c b/partextract.c
new file mode 100755
index 0000000..ad5e55b
--- /dev/null
+++ b/partextract.c
@@ -0,0 +1,84 @@
+#!/usr/bin/tcc -run -O2
+#include <stdio.h>
+#include <string.h>
+
+int main(int argc, char const* argv[])
+{
+	static const char *beginning;
+	static const char *ending;
+	int beginning_found = 0, beginning_length = 0;
+	int ending_found = 0, ending_length = 0;
+
+	// TODO: getopt to enable this
+	int multimatching = 0;
+
+	FILE *fp;
+	int read;
+	char buf[4096];
+	int i;
+
+	if (argc != 4) {
+		puts("Usage: partextract <file> <start token> <end token>");
+		puts("");
+		puts("partextract outputs everything between the start and end token");
+		puts("(including the tokens). Only the first match will be output");
+		puts("unless multimatching is enabled and overlapping tokens will be ignored.");
+		return 1;
+	}
+
+	beginning = argv[2];
+	ending = argv[3];
+
+	beginning_length = strlen(beginning);
+	ending_length = strlen(ending);
+
+	fp = fopen(argv[1], "rb");
+	if (fp == NULL) {
+		perror(argv[1]);
+		return 1;
+	}
+
+	while (!feof(fp)) {
+		read = fread(buf, sizeof(char), sizeof(buf), fp);
+		for (i = 0; i < read; i++) {
+			// find the starting token
+			if (beginning_found != beginning_length && ending_found != ending_length) {
+				if (buf[i] == beginning[beginning_found]) {
+					beginning_found++;
+					if (beginning_found == beginning_length) {
+						// we don't allow tokens to overlap
+						ending_found = 0;
+
+						// We only start outputting if we had the complete start token
+						// This makes sure the token is in the output too
+						// and jumps to the next char so the fputc below won't run for
+						// the last char of the token
+						fputs(beginning, stdout);
+						continue;
+					}
+				} else if (beginning_found != beginning_length) {
+					beginning_found = 0;
+				}
+			}
+
+			// output until we hit the ending token
+			if (beginning_found == beginning_length && ending_found != ending_length) {
+				fputc(buf[i], stdout);
+
+				if (buf[i] == ending[ending_found]) {
+					ending_found++;
+
+					if (multimatching && ending_found == ending_length) {
+						beginning_found = 0;
+						ending_found = 0;
+					}
+				} else if (ending_found != ending_length) {
+					ending_found = 0;
+				}
+			}
+		}
+	}
+
+
+	return 0;
+}
author	Florian Pritz <bluewind@xinu.at>	2012-10-16 15:08:18 +0200
committer	Florian Pritz <bluewind@xinu.at>	2012-10-16 15:08:18 +0200
commit	900aa95d4dc4254cf54328c9466b801a38fa540d (patch)
tree	328eca1ba834478933b3482c967fea245e1e6da9 /partextract.c
parent	27052abe40c471807fc3fe2d97b775ac4ec9c44c (diff)
download	bin-900aa95d4dc4254cf54328c9466b801a38fa540d.tar.gz bin-900aa95d4dc4254cf54328c9466b801a38fa540d.tar.xz