summaryrefslogtreecommitdiffstats
path: root/urlencode
diff options
context:
space:
mode:
Diffstat (limited to 'urlencode')
-rwxr-xr-xurlencode125
1 files changed, 125 insertions, 0 deletions
diff --git a/urlencode b/urlencode
new file mode 100755
index 0000000..c472ab8
--- /dev/null
+++ b/urlencode
@@ -0,0 +1,125 @@
+:
+##########################################################################
+# Title : urlencode - encode URL data
+# Author : Heiner Steven (heiner.steven@odn.de)
+# Date : 2000-03-15
+# Requires : awk
+# Categories : File Conversion, WWW, CGI
+# SCCS-Id. : @(#) urlencode 1.4 06/10/29
+##########################################################################
+# Description
+# Encode data according to
+# RFC 1738: "Uniform Resource Locators (URL)" and
+# RFC 1866: "Hypertext Markup Language - 2.0" (HTML)
+#
+# This encoding is used i.e. for the MIME type
+# "application/x-www-form-urlencoded"
+#
+# Notes
+# o The default behaviour is not to encode the line endings. This
+# may not be what was intended, because the result will be
+# multiple lines of output (which cannot be used in an URL or a
+# HTTP "POST" request). If the desired output should be one
+# line, use the "-l" option.
+#
+# o The "-l" option assumes, that the end-of-line is denoted by
+# the character LF (ASCII 10). This is not true for Windows or
+# Mac systems, where the end of a line is denoted by the two
+# characters CR LF (ASCII 13 10).
+# We use this for symmetry; data processed in the following way:
+# cat | urlencode -l | urldecode -l
+# should (and will) result in the original data
+#
+# o Large lines (or binary files) will break many AWK
+# implementations. If you get the message
+# awk: record `...' too long
+# record number xxx
+# consider using GNU AWK (gawk).
+#
+# o urlencode will always terminate it's output with an EOL
+# character
+#
+# Thanks to Stefan Brozinski for pointing out a bug related to non-standard
+# locales.
+#
+# See also
+# urldecode
+##########################################################################
+
+PN=`basename "$0"` # Program name
+VER='1.4'
+
+: ${AWK=awk}
+
+Usage () {
+ echo >&2 "$PN - encode URL data, $VER
+usage: $PN [-l] [file ...]
+ -l: encode line endings (result will be one line of output)
+
+The default is to encode each input line on its own."
+ exit 1
+}
+
+Msg () {
+ for MsgLine
+ do echo "$PN: $MsgLine" >&2
+ done
+}
+
+Fatal () { Msg "$@"; exit 1; }
+
+set -- `getopt hl "$@" 2>/dev/null` || Usage
+[ $# -lt 1 ] && Usage # "getopt" detected an error
+
+EncodeEOL=no
+while [ $# -gt 0 ]
+do
+ case "$1" in
+ -l) EncodeEOL=yes;;
+ --) shift; break;;
+ -h) Usage;;
+ -*) Usage;;
+ *) break;; # First file name
+ esac
+ shift
+done
+
+LANG=C export LANG
+$AWK '
+ BEGIN {
+ # We assume an awk implementation that is just plain dumb.
+ # We will convert an character to its ASCII value with the
+ # table ord[], and produce two-digit hexadecimal output
+ # without the printf("%02X") feature.
+
+ EOL = "%0A" # "end of line" string (encoded)
+ split ("1 2 3 4 5 6 7 8 9 A B C D E F", hextab, " ")
+ hextab [0] = 0
+ for ( i=1; i<=255; ++i ) ord [ sprintf ("%c", i) "" ] = i + 0
+ if ("'"$EncodeEOL"'" == "yes") EncodeEOL = 1; else EncodeEOL = 0
+ }
+ {
+ encoded = ""
+ for ( i=1; i<=length ($0); ++i ) {
+ c = substr ($0, i, 1)
+ if ( c ~ /[a-zA-Z0-9.-]/ ) {
+ encoded = encoded c # safe character
+ } else if ( c == " " ) {
+ encoded = encoded "+" # special handling
+ } else {
+ # unsafe character, encode it as a two-digit hex-number
+ lo = ord [c] % 16
+ hi = int (ord [c] / 16);
+ encoded = encoded "%" hextab [hi] hextab [lo]
+ }
+ }
+ if ( EncodeEOL ) {
+ printf ("%s", encoded EOL)
+ } else {
+ print encoded
+ }
+ }
+ END {
+ #if ( EncodeEOL ) print ""
+ }
+' "$@"