summaryrefslogtreecommitdiffstats
path: root/split-kochabo.sh
diff options
context:
space:
mode:
Diffstat (limited to 'split-kochabo.sh')
-rwxr-xr-xsplit-kochabo.sh48
1 files changed, 48 insertions, 0 deletions
diff --git a/split-kochabo.sh b/split-kochabo.sh
new file mode 100755
index 0000000..8f30f32
--- /dev/null
+++ b/split-kochabo.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+#
+# This script can extract recipes from the PDFs mailed to customers of
+# kochabo.at.
+# Recipes from 2013 need extra processing for the file name in get_name().
+#
+
+set -e
+
+extract_pages() {
+ local first=$1
+ local last=$2
+ local output=$3
+ shift 3
+
+ gs -dNOPAUSE -dQUIET -dBATCH -dFirstPage=$first -dLastPage=$last -sOutputFile="$output" -sDEVICE=pdfwrite "$@"
+}
+
+get_name() {
+ pdftotext "$1" /dev/stdout | grep -B10 "Nährwertangaben pro Person" | sed '/[1-5]$/d; $d' | paste -sd " "
+ # needed for older files with all uppercase names
+ #| awk '{print tolower($0)}' sed 's/\b\(.\)/\u\1/g; s#\bMit\b#mit#g; s#\bUnd\b#und#g; s#\bAuf\b#auf#g; s#\s\+$##;'
+}
+
+mkdir -p extracted
+
+skip_pages=4
+for file; do
+ for i in 1 2 3 4 5; do
+ extract_pages $((skip_pages+((i-1)*2)+1)) $((skip_pages+((i-1)*2)+2)) rezept-$i.pdf "$file"
+ src=rezept-$i.pdf
+ name="$(get_name $src)"
+
+ if [[ -z $name ]]; then
+ rm "rezept-$i.pdf"
+ continue
+ fi
+
+ mv "$src" "extracted/$name.pdf"
+
+ pdfimages -j "extracted/$name.pdf" "extracted/$name"
+ rm -f extracted/*-{001,002,003,004}.{jpg,ppm}
+ mv "extracted/$name-000.jpg" "extracted/$name.jpg"
+ done
+done
+
+cd extracted
+generate-kochabo-index.sh