Add 'twitter'

2024-08-15 01:03:27 +00:00 · 2022-02-10 18:10:25 +00:00 · 2022-02-10 18:10:25 +00:00 · c30212f44f
commit c30212f44f
parent 8469f73f40
1 changed files with 74 additions and 0 deletions
--- a/74
+++ b/74
@ -0,0 +1,74 @@
 import requests
 from bs4 import BeautifulSoup
 from urllib.parse import urljoin
 from urllib.request import urlretrieve
 from sqlite3 import Error
 import numpy as np
 import tweepy
 import os
 CONSUMER_KEY = 'INSERT HERE'
 CONSUMER_SECRET = 'INSERT HERE'
 ACCESS_KEY = 'INSERT HERE'
 ACCESS_SECRET = 'INSERT HERE'
 auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
 auth.set_access_token(ACCESS_KEY, ACCESS_SECRET)
 twitter_API = tweepy.API(auth)
 auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
 auth.set_access_token(ACCESS_KEY, ACCESS_SECRET)
 api = tweepy.API(auth)
 def scraper3():
    url = "https://suchwow.xyz/"
    imageroot = r'C:\Users\31622\Documents\twitterbot\images'
    page = requests.get(url)
    soup = BeautifulSoup(page.content, 'html.parser')
    results = {}
    def get_fullsize_path(thumbnail):
        link_path_split = thumbnail.split(".")
        link_path_split.pop(-2)
        return '.'.join(link_path_split)
    def save_image(imageurl, filename):
        if os.path.isfile(os.path.join(imageroot, filename)):
            raise FileExistsError(os.path.join(imageroot, filename))
        return urlretrieve(urljoin(url, imageurl), os.path.join(imageroot, filename))
    for card in soup.select('div.card'):
        thumbnail = card.img['src']
        imagepath = get_fullsize_path(thumbnail)
        imagename = imagepath.split('/')[-1]
        # title is in the first p element of the card
        # this seems consistent though this can be made more robust with .select_one('p.title')
        title = card.select_one('p.title').get_text().strip()
        submitter = card.select_one('p.subtitle').get_text().strip()
        # numerical id of the post
        postid = int(card.a['href'].split('/')[-1])
        try:
            size = save_image(imagepath, imagename)
        except FileExistsError:
            continue  # immediately jump to the next iteration of the containing loop
        except Exception:
            continue
        tweetid = api.update_with_media(status=title + " | Credits to: " + submitter + " | #Wownero $WOW #wow #cryptocurrency #privacy #memecoin #doge #shitcoin ", filename=os.path.join(imageroot, imagename))
        print("Tweet sent!")
        results[postid] = dict(
            postid=postid,
            imagename=imagename,
            title=title,
            submitter=submitter,
            size=size,
            tweetid=tweetid
        )
    return results
 scraper3()