From 22db73364df6210b00cb0e8aaca797f68fd232d3 Mon Sep 17 00:00:00 2001 From: mwiegand Date: Sun, 10 Apr 2022 17:21:30 +0200 Subject: [PATCH] initial commit --- app.py | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100755 app.py diff --git a/app.py b/app.py new file mode 100755 index 0000000..eac937e --- /dev/null +++ b/app.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python3 + +# LOGIN + +# https://steam.readthedocs.io/en/latest/api/steam.webauth.html + +import steam.webauth as wa +from os import environ + +user = wa.WebAuth(environ['STEAM_USERNAME']) + +try: + user.login(environ['STEAM_PASSWORD']) +except (wa.LoginIncorrect) as exp: + raise +except (wa.CaptchaRequired) as exp: + print(user.captcha_url) + user.login(password=environ['STEAM_PASSWORD'], captcha=input("Captcha: ")) +except wa.EmailCodeRequired: + user.login(email_code=input("Email Code: ")) +except wa.TwoFactorCodeRequired: + user.login(twofactor_code=input("2FA Code: ")) + +# CRAWL + +from bs4 import BeautifulSoup +from datetime import datetime, timezone +import pytz +import re +from urllib.parse import urlparse +import requests + +pdt = pytz.timezone("Us/Pacific") + +def parse_trs(trs): + for tr in trs: + tds = tr.find_all('td') + from_url = tds[0].find('a')['href'] + from_name = tds[0].text + to_url = tds[1].find('a')['href'] + to_name = tds[1].text + date = datetime.strptime(tds[2].text, '%b %d, %Y @ %I:%M%p PDT').replace(tzinfo=pdt) + text = tds[3].text + print(f'({date}) {from_name} -> {to_name}: {text}') + # download steamuserimages + for url_string in re.findall(r'(https?://\S+)', text): + url = urlparse(url_string) + if url.netloc.startswith('steamuserimages'): + response = requests.get(url_string) + with open(url.path.strip('/').replace('/', '_'), "wb") as f: + f.write(response.content) + +# get first page + +r = user.session.get('https://help.steampowered.com/en/accountdata/GetFriendMessagesLog') + +soup = BeautifulSoup(r.text, 'html.parser') + +continue_value = soup.find(class_='AccountDataLoadMore')['data-continuevalue'] + +account_data_table = soup.find(id='AccountDataTable_1') +trs = account_data_table.find_all('tr')[1:] + +parse_trs(trs) + +# get further pages + +while True: + r = user.session.get(f'https://help.steampowered.com/en/accountdata/AjaxLoadMoreData/?url=GetFriendMessagesLog&continue={continue_value}') + continue_value = r.json()['continue'] + html = r.json()['html'] + trs = BeautifulSoup(html, 'html.parser').find_all('tr') + parse_trs(trs)