summaryrefslogtreecommitdiff
path: root/scripts/web
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/web')
-rwxr-xr-xscripts/web148
1 files changed, 148 insertions, 0 deletions
diff --git a/scripts/web b/scripts/web
new file mode 100755
index 0000000..382b18e
--- /dev/null
+++ b/scripts/web
@@ -0,0 +1,148 @@
+#! /bin/sh
+## The web sucks. It is a mighty dismal kludge built out of a thousand
+## tiny dismal kludges all band-aided together, and now these bottom-line
+## clueless pinheads who never heard of "TCP handshake" want to run
+## *commerce* over the damn thing. Ye godz. Welcome to TV of the next
+## century -- six million channels of worthless shit to choose from, and
+## about as much security as today's cable industry!
+##
+## Having grown mightily tired of pain in the ass browsers, I decided
+## to build the minimalist client. It doesn't handle POST, just GETs, but
+## the majority of cgi forms handlers apparently ignore the method anyway.
+## A distinct advantage is that it *doesn't* pass on any other information
+## to the server, like Referer: or info about your local machine such as
+## Netscum tries to!
+##
+## Since the first version, this has become the *almost*-minimalist client,
+## but it saves a lot of typing now. And with netcat as its backend, it's
+## totally the balls. Don't have netcat? Get it here in /src/hacks!
+## _H* 950824, updated 951009 et seq.
+##
+## args: hostname [port]. You feed it the filename-parts of URLs.
+## In the loop, HOST, PORT, and SAVE do the right things; a null line
+## gets the previous spec again [useful for initial timeouts]; EOF to exit.
+## Relative URLs behave like a "cd" to wherever the last slash appears, or
+## just use the last component with the saved preceding "directory" part.
+## "\" clears the "filename" part and asks for just the "directory", and
+## ".." goes up one "directory" level while retaining the "filename" part.
+## Play around; you'll get used to it.
+
+if test "$1" = "" ; then
+ echo Needs hostname arg.
+ exit 1
+fi
+umask 022
+
+# optional PATH fixup
+# PATH=${HOME}:${PATH} ; export PATH
+
+test "${PAGER}" || PAGER=more
+BACKEND="nc -v -w 15"
+TMPAGE=/tmp/web$$
+host="$1"
+port="80"
+if test "$2" != "" ; then
+ port="$2"
+fi
+
+spec="/"
+specD="/"
+specF=''
+saving=''
+
+# be vaguely smart about temp file usage. Use your own homedir if you're
+# paranoid about someone symlink-racing your shell script, jeez.
+rm -f ${TMPAGE}
+test -f ${TMPAGE} && echo "Can't use ${TMPAGE}" && exit 1
+
+# get loopy. Yes, I know "echo -n" aint portable. Everything echoed would
+# need "\c" tacked onto the end in an SV universe, which you can fix yourself.
+while echo -n "${specD}${specF} " && read spec ; do
+ case $spec in
+ HOST)
+ echo -n 'New host: '
+ read host
+ continue
+ ;;
+ PORT)
+ echo -n 'New port: '
+ read port
+ continue
+ ;;
+ SAVE)
+ echo -n 'Save file: '
+ read saving
+# if we've already got a page, save it
+ test "${saving}" && test -f ${TMPAGE} &&
+ echo "=== ${host}:${specD}${specF} ===" >> $saving &&
+ cat ${TMPAGE} >> $saving && echo '' >> $saving
+ continue
+ ;;
+# changing the logic a bit here. Keep a state-concept of "current dir"
+# and "current file". Dir is /foo/bar/ ; file is "baz" or null.
+# leading slash: create whole new state.
+ /*)
+ specF=`echo "${spec}" | sed 's|.*/||'`
+ specD=`echo "${spec}" | sed 's|\(.*/\).*|\1|'`
+ spec="${specD}${specF}"
+ ;;
+# embedded slash: adding to the path. "file" part can be blank, too
+ */*)
+ specF=`echo "${spec}" | sed 's|.*/||'`
+ specD=`echo "${specD}${spec}" | sed 's|\(.*/\).*|\1|'`
+ ;;
+# dotdot: jump "up" one level and just reprompt [confirms what it did...]
+ ..)
+ specD=`echo "${specD}" | sed 's|\(.*/\)..*/|\1|'`
+ continue
+ ;;
+# blank line: do nothing, which will re-get the current one
+ '')
+ ;;
+# hack-quoted blank line: "\" means just zero out "file" part
+ '\')
+ specF=''
+ ;;
+# sigh
+ '?')
+ echo Help yourself. Read the script fer krissake.
+ continue
+ ;;
+# anything else is taken as a "file" part
+ *)
+ specF=${spec}
+ ;;
+ esac
+
+# now put it together and stuff it down a connection. Some lame non-unix
+# http servers assume they'll never get simple-query format, and wait till
+# an extra newline arrives. If you're up against one of these, change
+# below to (echo GET "$spec" ; echo '') | $BACKEND ...
+ spec="${specD}${specF}"
+ echo GET "${spec}" | $BACKEND $host $port > ${TMPAGE}
+ ${PAGER} ${TMPAGE}
+
+# save in a format that still shows the URLs we hit after a de-html run
+ if test "${saving}" ; then
+ echo "=== ${host}:${spec} ===" >> $saving
+ cat ${TMPAGE} >> $saving
+ echo '' >> $saving
+ fi
+done
+rm -f ${TMPAGE}
+exit 0
+
+#######
+# Encoding notes, finally from RFC 1738:
+# %XX -- hex-encode of special chars
+# allowed alphas in a URL: $_-.+!*'(),
+# relative names *not* described, but obviously used all over the place
+# transport://user:pass@host:port/path/name?query-string
+# wais: port 210, //host:port/database?search or /database/type/file?
+# cgi-bin/script?arg1=foo&arg2=bar&... scripts have to parse xxx&yyy&zzz
+# ISMAP imagemap stuff: /bin/foobar.map?xxx,yyy -- have to guess at coords!
+# local access-ctl files: ncsa: .htaccess ; cern: .www_acl
+#######
+# SEARCH ENGINES: fortunately, all are GET forms or at least work that way...
+# multi-word args for most cases: foo+bar
+# See 'websearch' for concise results of this research...