Name: Anonymous 2013-09-03 1:32
[rem][/rem]
#!/bin/sh
script=`basename $0`
status=0
board=$1
num_threads=$2
delay=$3
if [ -z "${board}" ]
then
echo "${script}: Usage: ${script} board [ num-threads [ delay ] ]"
exit 1
fi
if [ -z ${delay} ]
then
# 2 seconds is generous enough
delay=2
fi
mkdir ${board}
wget https://dis.4chan.org/${board}/subject.txt
if [ $? -ne 0 ]
then
echo "Couldn't get the subject page."
exit 1
fi
if [ -z "${num_threads}" ]
then
sed 's/[^<]*<>[^<]*<>[^<]*<>\([^<]*\)<>.*/\1/' < subject.txt > threads.tt
else
# head gets the top num_threads threads
# sed extracts the thread number
head -n ${num_threads} subject.txt \
| sed 's/[^<]*<>[^<]*<>[^<]*<>\([^<]*\)<>.*/\1/' \
> threads.tt
fi
# check the validity of the thread number. It should be all digits.
# this also protects against shell injection.
grep '^[0-9]\+$' threads.tt > wellformed-threads.tt
grep -v '^[0-9]\+$' threads.tt > non-wellformed-threads.tt
for thread in `cat wellformed-threads.tt`
do
echo "Downloading thread ${board}/${thread}:"
wget https://dis.4chan.org/read/${board}/${thread} -O ${board}/${thread}.html
if [ $? -ne 0 ]
then
echo "Error downloading thread ${board}/${thread}" | tee -a errors.tt
fi
sleep ${delay}
done
if [ -s non-wellformed-threads.tt ]
then
echo "These threads could not be downloaded because the subject was messed up."
cat non-wellformed-threads.tt
echo
status=1
fi
if [ -s errors.tt ]
then
echo "These threads could not be downloaded."
cat errors.tt
echo
status=1
fi
exit ${status}