urlencode.awk 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. # Taken from http://www.shelldorado.com/scripts/cmds/urlencode
  2. ##########################################################################
  3. # Title : urlencode - encode URL data
  4. # Author : Heiner Steven (heiner.steven@odn.de)
  5. # Date : 2000-03-15
  6. # Requires : awk
  7. # Categories : File Conversion, WWW, CGI
  8. # SCCS-Id. : @(#) urlencode 1.4 06/10/29
  9. ##########################################################################
  10. # Description
  11. # Encode data according to
  12. # RFC 1738: "Uniform Resource Locators (URL)" and
  13. # RFC 1866: "Hypertext Markup Language - 2.0" (HTML)
  14. #
  15. # This encoding is used i.e. for the MIME type
  16. # "application/x-www-form-urlencoded"
  17. #
  18. # Notes
  19. # o The default behaviour is not to encode the line endings. This
  20. # may not be what was intended, because the result will be
  21. # multiple lines of output (which cannot be used in an URL or a
  22. # HTTP "POST" request). If the desired output should be one
  23. # line, use the "-l" option.
  24. #
  25. # o The "-l" option assumes, that the end-of-line is denoted by
  26. # the character LF (ASCII 10). This is not true for Windows or
  27. # Mac systems, where the end of a line is denoted by the two
  28. # characters CR LF (ASCII 13 10).
  29. # We use this for symmetry; data processed in the following way:
  30. # cat | urlencode -l | urldecode -l
  31. # should (and will) result in the original data
  32. #
  33. # o Large lines (or binary files) will break many AWK
  34. # implementations. If you get the message
  35. # awk: record `...' too long
  36. # record number xxx
  37. # consider using GNU AWK (gawk).
  38. #
  39. # o urlencode will always terminate it's output with an EOL
  40. # character
  41. #
  42. # Thanks to Stefan Brozinski for pointing out a bug related to non-standard
  43. # locales.
  44. #
  45. # See also
  46. # urldecode
  47. ##########################################################################
  48. PN=`basename "$0"` # Program name
  49. VER='1.4'
  50. : ${AWK=awk}
  51. Usage () {
  52. echo >&2 "$PN - encode URL data, $VER
  53. usage: $PN [-l] [file ...]
  54. -l: encode line endings (result will be one line of output)
  55. The default is to encode each input line on its own."
  56. exit 1
  57. }
  58. Msg () {
  59. for MsgLine
  60. do echo "$PN: $MsgLine" >&2
  61. done
  62. }
  63. Fatal () { Msg "$@"; exit 1; }
  64. set -- `getopt hl "$@" 2>/dev/null` || Usage
  65. [ $# -lt 1 ] && Usage # "getopt" detected an error
  66. EncodeEOL=no
  67. while [ $# -gt 0 ]
  68. do
  69. case "$1" in
  70. -l) EncodeEOL=yes;;
  71. --) shift; break;;
  72. -h) Usage;;
  73. -*) Usage;;
  74. *) break;; # First file name
  75. esac
  76. shift
  77. done
  78. LANG=C export LANG
  79. $AWK '
  80. BEGIN {
  81. # We assume an awk implementation that is just plain dumb.
  82. # We will convert an character to its ASCII value with the
  83. # table ord[], and produce two-digit hexadecimal output
  84. # without the printf("%02X") feature.
  85. EOL = "%0A" # "end of line" string (encoded)
  86. split ("1 2 3 4 5 6 7 8 9 A B C D E F", hextab, " ")
  87. hextab [0] = 0
  88. for ( i=1; i<=255; ++i ) ord [ sprintf ("%c", i) "" ] = i + 0
  89. if ("'"$EncodeEOL"'" == "yes") EncodeEOL = 1; else EncodeEOL = 0
  90. }
  91. {
  92. encoded = ""
  93. for ( i=1; i<=length ($0); ++i ) {
  94. c = substr ($0, i, 1)
  95. if ( c ~ /[a-zA-Z0-9.-]/ ) {
  96. encoded = encoded c # safe character
  97. } else if ( c == " " ) {
  98. encoded = encoded "+" # special handling
  99. } else {
  100. # unsafe character, encode it as a two-digit hex-number
  101. lo = ord [c] % 16
  102. hi = int (ord [c] / 16);
  103. encoded = encoded "%" hextab [hi] hextab [lo]
  104. }
  105. }
  106. if ( EncodeEOL ) {
  107. printf ("%s", encoded EOL)
  108. } else {
  109. print encoded
  110. }
  111. }
  112. END {
  113. #if ( EncodeEOL ) print ""
  114. }
  115. ' "$@"