123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126 |
- # Taken from http://www.shelldorado.com/scripts/cmds/urlencode
- ##########################################################################
- # Title : urlencode - encode URL data
- # Author : Heiner Steven (heiner.steven@odn.de)
- # Date : 2000-03-15
- # Requires : awk
- # Categories : File Conversion, WWW, CGI
- # SCCS-Id. : @(#) urlencode 1.4 06/10/29
- ##########################################################################
- # Description
- # Encode data according to
- # RFC 1738: "Uniform Resource Locators (URL)" and
- # RFC 1866: "Hypertext Markup Language - 2.0" (HTML)
- #
- # This encoding is used i.e. for the MIME type
- # "application/x-www-form-urlencoded"
- #
- # Notes
- # o The default behaviour is not to encode the line endings. This
- # may not be what was intended, because the result will be
- # multiple lines of output (which cannot be used in an URL or a
- # HTTP "POST" request). If the desired output should be one
- # line, use the "-l" option.
- #
- # o The "-l" option assumes, that the end-of-line is denoted by
- # the character LF (ASCII 10). This is not true for Windows or
- # Mac systems, where the end of a line is denoted by the two
- # characters CR LF (ASCII 13 10).
- # We use this for symmetry; data processed in the following way:
- # cat | urlencode -l | urldecode -l
- # should (and will) result in the original data
- #
- # o Large lines (or binary files) will break many AWK
- # implementations. If you get the message
- # awk: record `...' too long
- # record number xxx
- # consider using GNU AWK (gawk).
- #
- # o urlencode will always terminate it's output with an EOL
- # character
- #
- # Thanks to Stefan Brozinski for pointing out a bug related to non-standard
- # locales.
- #
- # See also
- # urldecode
- ##########################################################################
- PN=`basename "$0"` # Program name
- VER='1.4'
- : ${AWK=awk}
- Usage () {
- echo >&2 "$PN - encode URL data, $VER
- usage: $PN [-l] [file ...]
- -l: encode line endings (result will be one line of output)
- The default is to encode each input line on its own."
- exit 1
- }
- Msg () {
- for MsgLine
- do echo "$PN: $MsgLine" >&2
- done
- }
- Fatal () { Msg "$@"; exit 1; }
- set -- `getopt hl "$@" 2>/dev/null` || Usage
- [ $# -lt 1 ] && Usage # "getopt" detected an error
- EncodeEOL=no
- while [ $# -gt 0 ]
- do
- case "$1" in
- -l) EncodeEOL=yes;;
- --) shift; break;;
- -h) Usage;;
- -*) Usage;;
- *) break;; # First file name
- esac
- shift
- done
- LANG=C export LANG
- $AWK '
- BEGIN {
- # We assume an awk implementation that is just plain dumb.
- # We will convert an character to its ASCII value with the
- # table ord[], and produce two-digit hexadecimal output
- # without the printf("%02X") feature.
- EOL = "%0A" # "end of line" string (encoded)
- split ("1 2 3 4 5 6 7 8 9 A B C D E F", hextab, " ")
- hextab [0] = 0
- for ( i=1; i<=255; ++i ) ord [ sprintf ("%c", i) "" ] = i + 0
- if ("'"$EncodeEOL"'" == "yes") EncodeEOL = 1; else EncodeEOL = 0
- }
- {
- encoded = ""
- for ( i=1; i<=length ($0); ++i ) {
- c = substr ($0, i, 1)
- if ( c ~ /[a-zA-Z0-9.-]/ ) {
- encoded = encoded c # safe character
- } else if ( c == " " ) {
- encoded = encoded "+" # special handling
- } else {
- # unsafe character, encode it as a two-digit hex-number
- lo = ord [c] % 16
- hi = int (ord [c] / 16);
- encoded = encoded "%" hextab [hi] hextab [lo]
- }
- }
- if ( EncodeEOL ) {
- printf ("%s", encoded EOL)
- } else {
- print encoded
- }
- }
- END {
- #if ( EncodeEOL ) print ""
- }
- ' "$@"
|