diff options
Diffstat (limited to 'scripts/websearch')
-rwxr-xr-x | scripts/websearch | 77 |
1 files changed, 0 insertions, 77 deletions
diff --git a/scripts/websearch b/scripts/websearch deleted file mode 100755 index 60c3a33..0000000 --- a/scripts/websearch +++ /dev/null @@ -1,77 +0,0 @@ -#! /bin/sh -## Hit the major search engines. Hose the [large] output to a file! -## autoconverts multiple arguments into the right format for given servers -- -## usually worda+wordb, with certain lame exceptions like dejanews. -## Extracting and post-sorting the URLs is highly recommended... -## -## Altavista currently handled by a separate script; may merge at some point. -## -## _H* original 950824, updated 951218 and 960209 - -test "${1}" = "" && echo 'Needs argument[s] to search for!' && exit 1 -PLUSARG="`echo $* | sed 's/ /+/g'`" -PIPEARG="`echo ${PLUSARG} | sed 's/+/|/g'`" -IFILE=/tmp/.webq.$$ - -# Don't have "nc"? Get "netcat" from avian.org and add it to your toolkit. -doquery () { - echo GET "$1" | nc -v -i 1 -w 30 "$2" "$3" -} - -# changed since original: now supplying port numbers and separator lines... - -echo "=== Yahoo ===" -doquery "/bin/search?p=${PLUSARG}&n=300&w=w&s=a" search.yahoo.com 80 - -echo '' ; echo "=== Webcrawler ===" -doquery "/cgi-bin/WebQuery?searchText=${PLUSARG}&maxHits=300" webcrawler.com 80 - -# the infoseek lamers want "registration" before they do a real search, but... -echo '' ; echo "=== Infoseek ===" -echo " is broken." -# doquery "WW/IS/Titles?qt=${PLUSARG}" www2.infoseek.com 80 -# ... which doesn't work cuz their lame server wants the extra newlines, WITH -# CRLF pairs ferkrissake. Fuck 'em for now, they're hopelessly broken. If -# you want to play, the basic idea and query formats follow. -# echo "GET /WW/IS/Titles?qt=${PLUSARG}" > $IFILE -# echo "" >> $IFILE -# nc -v -w 30 guide-p.infoseek.com 80 < $IFILE - -# this is kinda flakey; might have to do twice?? -echo '' ; echo "=== Opentext ===" -doquery "/omw/simplesearch?SearchFor=${PLUSARG}&mode=phrase" \ - search.opentext.com 80 - -# looks like inktomi will only take hits=100, or defaults back to 30 -# we try to suppress all the stupid rating dots here, too -echo '' ; echo "=== Inktomi ===" -doquery "/query/?query=${PLUSARG}&hits=100" ink3.cs.berkeley.edu 1234 | \ - sed '/^<IMG ALT.*inktomi.*\.gif">$/d' - -#djnews lame shit limits hits to 120 and has nonstandard format -echo '' ; echo "=== Dejanews ===" -doquery "/cgi-bin/nph-dnquery?query=${PIPEARG}+maxhits=110+format=terse+defaultOp=AND" \ - smithers.dejanews.com 80 - -# OLD lycos: used to work until they fucking BROKE it... -# doquery "/cgi-bin/pursuit?query=${PLUSARG}&maxhits=300&terse=1" \ -# query5.lycos.cs.cmu.edu 80 -# NEW lycos: wants the User-agent field present in query or it returns nothing -# 960206: webmaster@lycos duly bitched at -# 960208: reply received; here's how we will now handle it: -echo \ -"GET /cgi-bin/pursuit?query=${PLUSARG}&maxhits=300&terse=terse&matchmode=and&minscore=.5 HTTP/1.x" \ - > $IFILE -echo "User-agent: *FUCK OFF*" >> $IFILE -echo "Why: go ask todd@pointcom.com (Todd Whitney)" >> $IFILE -echo '' >> $IFILE -echo '' ; echo "=== Lycos ===" -nc -v -i 1 -w 30 twelve.srv.lycos.com 80 < $IFILE - -rm -f $IFILE -exit 0 - -# CURRENTLY BROKEN [?] -# infoseek - -# some args need to be redone to ensure whatever "and" mode applies |