summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFlorian Pritz <bluewind@xinu.at>2014-02-13 21:32:22 +0100
committerFlorian Pritz <bluewind@xinu.at>2014-02-13 21:32:22 +0100
commitf6c6b964fc8750fe46cf1d53d067ff135ec59708 (patch)
treeaf1cbad7845d80939dd5983db3ea9c005b647cf0
parent457cbccf334faa04f790d4654af364c243c18e94 (diff)
downloadbin-f6c6b964fc8750fe46cf1d53d067ff135ec59708.tar.gz
bin-f6c6b964fc8750fe46cf1d53d067ff135ec59708.tar.xz
add split-kochabo.sh
Signed-off-by: Florian Pritz <bluewind@xinu.at>
-rwxr-xr-xgenerate-kochabo-index.sh79
-rwxr-xr-xsplit-kochabo.sh48
2 files changed, 127 insertions, 0 deletions
diff --git a/generate-kochabo-index.sh b/generate-kochabo-index.sh
new file mode 100755
index 0000000..2bc6c2c
--- /dev/null
+++ b/generate-kochabo-index.sh
@@ -0,0 +1,79 @@
+#!/bin/bash
+
+output="index.html"
+
+cat <<EOF >"$output"
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+
+<head>
+ <title></title>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+ <style type="text/css">
+ html {
+ min-height: 100%;
+ background-color: #fff;
+ }
+
+ * {
+ margin: 0;
+ padding: 0;
+ }
+
+ a {
+ text-decoration: none;
+ color: #048;
+ }
+
+ a:hover {
+ color: #06C;
+ }
+
+ img {
+ border:0;
+ width: 220px;
+ }
+
+ .thumbnail {
+ line-height: 0;
+ }
+
+ div.thumbnail {
+ display: inline-block;
+ text-align: center;
+ vertical-align: middle;
+ width: 232px;
+ padding: 20px 0;
+ }
+
+ div.thumbnail p {
+ line-height: 1em;
+ padding-top: 10px;
+ }
+
+ img.thumbnail {
+ margin:2px;
+ border: 1px solid #fff;
+ }
+
+ body {
+ color: #444;
+ font-family: "Bitstream Vera Sans", Monospace;
+ margin-top: 30px;
+ text-align: center;
+ }
+ </style>
+</head>
+
+<body>
+EOF
+
+for i in *.jpg; do
+ n=${i%.jpg}
+ echo "<div class='thumbnail'><a href='$n.pdf'><img src='$n.jpg'></a><p>$n</p></div>" >> "$output"
+done
+
+cat <<EOF >>"$output"
+</body>
+</html>
+EOF
diff --git a/split-kochabo.sh b/split-kochabo.sh
new file mode 100755
index 0000000..8f30f32
--- /dev/null
+++ b/split-kochabo.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+#
+# This script can extract recipes from the PDFs mailed to customers of
+# kochabo.at.
+# Recipes from 2013 need extra processing for the file name in get_name().
+#
+
+set -e
+
+extract_pages() {
+ local first=$1
+ local last=$2
+ local output=$3
+ shift 3
+
+ gs -dNOPAUSE -dQUIET -dBATCH -dFirstPage=$first -dLastPage=$last -sOutputFile="$output" -sDEVICE=pdfwrite "$@"
+}
+
+get_name() {
+ pdftotext "$1" /dev/stdout | grep -B10 "Nährwertangaben pro Person" | sed '/[1-5]$/d; $d' | paste -sd " "
+ # needed for older files with all uppercase names
+ #| awk '{print tolower($0)}' sed 's/\b\(.\)/\u\1/g; s#\bMit\b#mit#g; s#\bUnd\b#und#g; s#\bAuf\b#auf#g; s#\s\+$##;'
+}
+
+mkdir -p extracted
+
+skip_pages=4
+for file; do
+ for i in 1 2 3 4 5; do
+ extract_pages $((skip_pages+((i-1)*2)+1)) $((skip_pages+((i-1)*2)+2)) rezept-$i.pdf "$file"
+ src=rezept-$i.pdf
+ name="$(get_name $src)"
+
+ if [[ -z $name ]]; then
+ rm "rezept-$i.pdf"
+ continue
+ fi
+
+ mv "$src" "extracted/$name.pdf"
+
+ pdfimages -j "extracted/$name.pdf" "extracted/$name"
+ rm -f extracted/*-{001,002,003,004}.{jpg,ppm}
+ mv "extracted/$name-000.jpg" "extracted/$name.jpg"
+ done
+done
+
+cd extracted
+generate-kochabo-index.sh