diff options
author | Florian Pritz <bluewind@xinu.at> | 2014-02-13 21:32:22 +0100 |
---|---|---|
committer | Florian Pritz <bluewind@xinu.at> | 2014-02-13 21:32:22 +0100 |
commit | f6c6b964fc8750fe46cf1d53d067ff135ec59708 (patch) | |
tree | af1cbad7845d80939dd5983db3ea9c005b647cf0 | |
parent | 457cbccf334faa04f790d4654af364c243c18e94 (diff) | |
download | bin-f6c6b964fc8750fe46cf1d53d067ff135ec59708.tar.gz bin-f6c6b964fc8750fe46cf1d53d067ff135ec59708.tar.xz |
add split-kochabo.sh
Signed-off-by: Florian Pritz <bluewind@xinu.at>
-rwxr-xr-x | generate-kochabo-index.sh | 79 | ||||
-rwxr-xr-x | split-kochabo.sh | 48 |
2 files changed, 127 insertions, 0 deletions
diff --git a/generate-kochabo-index.sh b/generate-kochabo-index.sh new file mode 100755 index 0000000..2bc6c2c --- /dev/null +++ b/generate-kochabo-index.sh @@ -0,0 +1,79 @@ +#!/bin/bash + +output="index.html" + +cat <<EOF >"$output" +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml"> + +<head> + <title></title> + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> + <style type="text/css"> + html { + min-height: 100%; + background-color: #fff; + } + + * { + margin: 0; + padding: 0; + } + + a { + text-decoration: none; + color: #048; + } + + a:hover { + color: #06C; + } + + img { + border:0; + width: 220px; + } + + .thumbnail { + line-height: 0; + } + + div.thumbnail { + display: inline-block; + text-align: center; + vertical-align: middle; + width: 232px; + padding: 20px 0; + } + + div.thumbnail p { + line-height: 1em; + padding-top: 10px; + } + + img.thumbnail { + margin:2px; + border: 1px solid #fff; + } + + body { + color: #444; + font-family: "Bitstream Vera Sans", Monospace; + margin-top: 30px; + text-align: center; + } + </style> +</head> + +<body> +EOF + +for i in *.jpg; do + n=${i%.jpg} + echo "<div class='thumbnail'><a href='$n.pdf'><img src='$n.jpg'></a><p>$n</p></div>" >> "$output" +done + +cat <<EOF >>"$output" +</body> +</html> +EOF diff --git a/split-kochabo.sh b/split-kochabo.sh new file mode 100755 index 0000000..8f30f32 --- /dev/null +++ b/split-kochabo.sh @@ -0,0 +1,48 @@ +#!/bin/bash +# +# This script can extract recipes from the PDFs mailed to customers of +# kochabo.at. +# Recipes from 2013 need extra processing for the file name in get_name(). +# + +set -e + +extract_pages() { + local first=$1 + local last=$2 + local output=$3 + shift 3 + + gs -dNOPAUSE -dQUIET -dBATCH -dFirstPage=$first -dLastPage=$last -sOutputFile="$output" -sDEVICE=pdfwrite "$@" +} + +get_name() { + pdftotext "$1" /dev/stdout | grep -B10 "Nährwertangaben pro Person" | sed '/[1-5]$/d; $d' | paste -sd " " + # needed for older files with all uppercase names + #| awk '{print tolower($0)}' sed 's/\b\(.\)/\u\1/g; s#\bMit\b#mit#g; s#\bUnd\b#und#g; s#\bAuf\b#auf#g; s#\s\+$##;' +} + +mkdir -p extracted + +skip_pages=4 +for file; do + for i in 1 2 3 4 5; do + extract_pages $((skip_pages+((i-1)*2)+1)) $((skip_pages+((i-1)*2)+2)) rezept-$i.pdf "$file" + src=rezept-$i.pdf + name="$(get_name $src)" + + if [[ -z $name ]]; then + rm "rezept-$i.pdf" + continue + fi + + mv "$src" "extracted/$name.pdf" + + pdfimages -j "extracted/$name.pdf" "extracted/$name" + rm -f extracted/*-{001,002,003,004}.{jpg,ppm} + mv "extracted/$name-000.jpg" "extracted/$name.jpg" + done +done + +cd extracted +generate-kochabo-index.sh |