mirror of
https://git.davidovski.xyz/dot.git
synced 2024-08-15 00:43:28 +00:00
initial commit
This commit is contained in:
commit
01ced0b7ce
184 changed files with 35358 additions and 0 deletions
43
scripts/reddit-scrape
Executable file
43
scripts/reddit-scrape
Executable file
|
@ -0,0 +1,43 @@
|
|||
#!/bin/sh
|
||||
|
||||
#cfg
|
||||
useragent="Love by u/gadelat"
|
||||
timeout=60
|
||||
|
||||
subreddit=$1
|
||||
sort=$2
|
||||
top_time=$3
|
||||
|
||||
if [ -z $sort ]; then
|
||||
sort="hot"
|
||||
fi
|
||||
|
||||
if [ -z $top_time ];then
|
||||
top_time=""
|
||||
fi
|
||||
|
||||
url="https://www.reddit.com/r/$subreddit/$sort/.json?raw_json=1&t=$top_time"
|
||||
content=`wget -T $timeout -U "$useragent" -q -O - $url`
|
||||
mkdir -p $subreddit
|
||||
while : ; do
|
||||
urls=$(echo -n "$content"| jq -r '.data.children[]|select(.data.post_hint|test("image")?) | .data.preview.images[0].source.url')
|
||||
names=$(echo -n "$content"| jq -r '.data.children[]|select(.data.post_hint|test("image")?) | .data.title')
|
||||
ids=$(echo -n "$content"| jq -r '.data.children[]|select(.data.post_hint|test("image")?) | .data.id')
|
||||
a=1
|
||||
wait # prevent spawning too many processes
|
||||
for url in $urls; do
|
||||
name=`echo -n "$names"|sed -n "$a"p`
|
||||
id=`echo -n "$ids"|sed -n "$a"p`
|
||||
ext=`echo -n "${url##*.}"|cut -d '?' -f 1`
|
||||
newname=`echo $name | sed "s/^\///;s/\// /g"`_"$subreddit"_$id.$ext
|
||||
echo $name
|
||||
wget -T $timeout -U "$useragent" --no-check-certificate -nv -nc -P down -O "$subreddit/$newname" $url &>/dev/null &
|
||||
a=$(($a+1))
|
||||
done
|
||||
after=$(echo -n "$content"| jq -r '.data.after//empty')
|
||||
if [ -z $after ]; then
|
||||
break
|
||||
fi
|
||||
url="https://www.reddit.com/r/$subreddit/$sort/.json?count=200&after=$after&raw_json=1&t=$top_time"
|
||||
content=`wget -T $timeout -U "$useragent" --no-check-certificate -q -O - $url`
|
||||
done
|
Loading…
Add table
Add a link
Reference in a new issue