#!/usr/bin/tcc -run -O2 #include #include int main(int argc, char const* argv[]) { static const char *beginning; static const char *ending; int beginning_found = 0, beginning_length = 0; int ending_found = 0, ending_length = 0; // TODO: getopt to enable this int multimatching = 0; FILE *fp; int read; char buf[4096]; int i; if (argc != 4) { puts("Usage: partextract "); puts(""); puts("partextract outputs everything between the start and end token"); puts("(including the tokens). Only the first match will be output"); puts("unless multimatching is enabled and overlapping tokens will be ignored."); return 1; } beginning = argv[2]; ending = argv[3]; beginning_length = strlen(beginning); ending_length = strlen(ending); fp = fopen(argv[1], "rb"); if (fp == NULL) { perror(argv[1]); return 1; } while (!feof(fp)) { read = fread(buf, sizeof(char), sizeof(buf), fp); for (i = 0; i < read; i++) { // find the starting token if (beginning_found != beginning_length && ending_found != ending_length) { if (buf[i] == beginning[beginning_found]) { beginning_found++; if (beginning_found == beginning_length) { // we don't allow tokens to overlap ending_found = 0; // We only start outputting if we had the complete start token // This makes sure the token is in the output too // and jumps to the next char so the fputc below won't run for // the last char of the token fputs(beginning, stdout); continue; } } else if (beginning_found != beginning_length) { beginning_found = 0; } } // output until we hit the ending token if (beginning_found == beginning_length && ending_found != ending_length) { fputc(buf[i], stdout); if (buf[i] == ending[ending_found]) { ending_found++; if (multimatching && ending_found == ending_length) { beginning_found = 0; ending_found = 0; } } else if (ending_found != ending_length) { ending_found = 0; } } } } return 0; }