summaryrefslogtreecommitdiff
path: root/scripts/web
blob: 382b18e1e3f756bf79fb3f45019efdf1a7a2b8ff (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#! /bin/sh
## The web sucks.  It is a mighty dismal kludge built out of a thousand
## tiny dismal kludges all band-aided together, and now these bottom-line
## clueless pinheads who never heard of "TCP handshake" want to run
## *commerce* over the damn thing.  Ye godz.  Welcome to TV of the next
## century -- six million channels of worthless shit to choose from, and
## about as much security as today's cable industry!
##
## Having grown mightily tired of pain in the ass browsers, I decided
## to build the minimalist client.  It doesn't handle POST, just GETs, but
## the majority of cgi forms handlers apparently ignore the method anyway.
## A distinct advantage is that it *doesn't* pass on any other information
## to the server, like Referer: or info about your local machine such as
## Netscum tries to!
##
## Since the first version, this has become the *almost*-minimalist client,
## but it saves a lot of typing now.  And with netcat as its backend, it's
## totally the balls.  Don't have netcat?  Get it here in /src/hacks!
## _H* 950824, updated 951009 et seq.
##
## args: hostname [port].  You feed it the filename-parts of URLs.
## In the loop, HOST, PORT, and SAVE do the right things; a null line
## gets the previous spec again [useful for initial timeouts]; EOF to exit.
## Relative URLs behave like a "cd" to wherever the last slash appears, or
## just use the last component with the saved preceding "directory" part.
## "\" clears the "filename" part and asks for just the "directory", and
## ".." goes up one "directory" level while retaining the "filename" part.
## Play around; you'll get used to it.

if test "$1" = "" ; then
  echo Needs hostname arg.
  exit 1
fi
umask 022

# optional PATH fixup
# PATH=${HOME}:${PATH} ; export PATH

test "${PAGER}" || PAGER=more
BACKEND="nc -v -w 15"
TMPAGE=/tmp/web$$
host="$1"
port="80"
if test "$2" != "" ; then
  port="$2"
fi

spec="/"
specD="/"
specF=''
saving=''

# be vaguely smart about temp file usage.  Use your own homedir if you're
# paranoid about someone symlink-racing your shell script, jeez.
rm -f ${TMPAGE}
test -f ${TMPAGE} && echo "Can't use ${TMPAGE}" && exit 1

# get loopy.  Yes, I know "echo -n" aint portable.  Everything echoed would
# need "\c" tacked onto the end in an SV universe, which you can fix yourself.
while echo -n "${specD}${specF} " && read spec ; do
  case $spec in
  HOST)
    echo -n 'New host: '
    read host
    continue
  ;;
  PORT)
    echo -n 'New port: '
    read port
    continue
  ;;
  SAVE)
    echo -n 'Save file: '
    read saving
# if we've already got a page, save it
    test "${saving}" && test -f ${TMPAGE} &&
      echo "=== ${host}:${specD}${specF} ===" >> $saving &&
      cat ${TMPAGE} >> $saving && echo '' >> $saving
    continue
  ;;
# changing the logic a bit here.  Keep a state-concept of "current dir"
# and "current file".  Dir is /foo/bar/ ; file is "baz" or null.
# leading slash: create whole new state.
  /*)
    specF=`echo "${spec}" | sed 's|.*/||'`
    specD=`echo "${spec}" | sed 's|\(.*/\).*|\1|'`
    spec="${specD}${specF}"
  ;;
# embedded slash: adding to the path.  "file" part can be blank, too
  */*)
    specF=`echo "${spec}" | sed 's|.*/||'`
    specD=`echo "${specD}${spec}" | sed 's|\(.*/\).*|\1|'`
  ;;
# dotdot: jump "up" one level and just reprompt [confirms what it did...]
  ..)
    specD=`echo "${specD}" | sed 's|\(.*/\)..*/|\1|'`
    continue
  ;;
# blank line: do nothing, which will re-get the current one
  '')
  ;;
# hack-quoted blank line: "\" means just zero out "file" part
  '\')
    specF=''
  ;;
# sigh
  '?')
    echo Help yourself.  Read the script fer krissake.
    continue
  ;;
# anything else is taken as a "file" part
  *)
    specF=${spec}
  ;;
  esac

# now put it together and stuff it down a connection.  Some lame non-unix
# http servers assume they'll never get simple-query format, and wait till
# an extra newline arrives.  If you're up against one of these, change
# below to (echo GET "$spec" ; echo '') | $BACKEND ...
  spec="${specD}${specF}"
    echo GET "${spec}" | $BACKEND $host $port > ${TMPAGE}
  ${PAGER} ${TMPAGE}

# save in a format that still shows the URLs we hit after a de-html run
  if test "${saving}" ; then
    echo "=== ${host}:${spec} ===" >> $saving
    cat ${TMPAGE} >> $saving
    echo '' >> $saving
  fi
done
rm -f ${TMPAGE}
exit 0

#######
# Encoding notes, finally from RFC 1738:
# %XX -- hex-encode of special chars
# allowed alphas in a URL: $_-.+!*'(),
# relative names *not* described, but obviously used all over the place
# transport://user:pass@host:port/path/name?query-string
# wais: port 210, //host:port/database?search or /database/type/file?
# cgi-bin/script?arg1=foo&arg2=bar&...  scripts have to parse xxx&yyy&zzz
# ISMAP imagemap stuff: /bin/foobar.map?xxx,yyy -- have to guess at coords!
# local access-ctl files: ncsa: .htaccess ; cern: .www_acl
#######
# SEARCH ENGINES: fortunately, all are GET forms or at least work that way...
# multi-word args for most cases: foo+bar
# See 'websearch' for concise results of this research...