initial commit

2022-04-10 17:21:30 +02:00 · 2022-04-10 17:21:30 +02:00 · 22db73364d
commit 22db73364d
1 changed files with 73 additions and 0 deletions
--- a/app.py
+++ b/app.py
@ -0,0 +1,73 @@
+#!/usr/bin/env python3
+
+# LOGIN
+
+# https://steam.readthedocs.io/en/latest/api/steam.webauth.html
+
+import steam.webauth as wa
+from os import environ
+
+user = wa.WebAuth(environ['STEAM_USERNAME'])
+
+try:
+    user.login(environ['STEAM_PASSWORD'])
+except (wa.LoginIncorrect) as exp:
+    raise
+except (wa.CaptchaRequired) as exp:
+    print(user.captcha_url)
+    user.login(password=environ['STEAM_PASSWORD'], captcha=input("Captcha: "))
+except wa.EmailCodeRequired:
+    user.login(email_code=input("Email Code: "))
+except wa.TwoFactorCodeRequired:
+    user.login(twofactor_code=input("2FA Code: "))
+
+# CRAWL
+
+from bs4 import BeautifulSoup
+from datetime import datetime, timezone
+import pytz
+import re
+from urllib.parse import urlparse
+import requests
+
+pdt = pytz.timezone("Us/Pacific")
+
+def parse_trs(trs):
+    for tr in trs:
+        tds = tr.find_all('td')
+        from_url = tds[0].find('a')['href']
+        from_name = tds[0].text
+        to_url = tds[1].find('a')['href']
+        to_name = tds[1].text
+        date = datetime.strptime(tds[2].text, '%b %d, %Y @ %I:%M%p PDT').replace(tzinfo=pdt)
+        text = tds[3].text
+        print(f'({date}) {from_name} -> {to_name}: {text}')
+        # download steamuserimages
+        for url_string in re.findall(r'(https?://\S+)', text):
+            url = urlparse(url_string)
+            if url.netloc.startswith('steamuserimages'):
+                response = requests.get(url_string)
+                with open(url.path.strip('/').replace('/', '_'), "wb") as f:
+                    f.write(response.content)
+
+# get first page
+
+r = user.session.get('https://help.steampowered.com/en/accountdata/GetFriendMessagesLog')
+
+soup = BeautifulSoup(r.text, 'html.parser')
+
+continue_value = soup.find(class_='AccountDataLoadMore')['data-continuevalue']
+
+account_data_table = soup.find(id='AccountDataTable_1')
+trs = account_data_table.find_all('tr')[1:]
+
+parse_trs(trs)
+
+# get further pages
+
+while True:
+    r = user.session.get(f'https://help.steampowered.com/en/accountdata/AjaxLoadMoreData/?url=GetFriendMessagesLog&continue={continue_value}')
+    continue_value = r.json()['continue']
+    html = r.json()['html']
+    trs = BeautifulSoup(html, 'html.parser').find_all('tr')
+    parse_trs(trs)