stop at the end

db
2022-04-10 22:43:03 +02:00 · 2022-04-10 22:35:28 +02:00
1 changed files with 52 additions and 13 deletions
--- a/steam_chat_logger.py
+++ b/steam_chat_logger.py
@ -92,8 +92,21 @@ import re
 from urllib.parse import urlparse
 import requests
 from hashlib import sha3_256
+import pg8000

 pdt = pytz.timezone("Us/Pacific")
+db = pg8000.connect(
+    database=environ['DB_NAME'],
+    user=environ['DB_USER'],
+    password=environ['DB_PASSWORD'],
+)
+
+def query(query, **params):
+    cursor = db.cursor()
+    cursor.paramstyle = "named"
+    cursor.execute(query, params)
+    db.commit()
+    return cursor

 def parse_trs(trs):
    for tr in trs:
@ -103,19 +116,42 @@ def parse_trs(trs):
        to_url = tds[1].find('a')['href']
        to_name = tds[1].text
        date = datetime.strptime(tds[2].text, '%b %d, %Y @ %I:%M%p PDT').replace(tzinfo=pdt)
-        text = tds[3].text
+        message = tds[3].text
        checksum = sha3_256(
-            (from_url + to_url + str(date.timestamp()) + text).encode()
+            (from_url + to_url + str(date.timestamp()) + message).encode()
        ).hexdigest()
-        print(f'(#{checksum}@{date}) {from_name} -> {to_name}: {text}')
+        print(f'(#{checksum}@{date}) {from_name} -> {to_name}: {message}')

-        # download steamuserimages
-        for url_string in re.findall(r'(https?://\S+)', text):
-            url = urlparse(url_string)
-            if url.netloc.startswith('steamuserimages'):
-                response = requests.get(url_string)
-                with open(url.path.strip('/').replace('/', '_'), "wb") as f:
-                    f.write(response.content)
+        if query(
+            '''
+                SELECT 1 FROM messages
+                WHERE checksum = :checksum
+            ''',
+            checksum=checksum,
+        ).rowcount:
+            print(f'message {checksum} already exists')
+        else:
+            print(f'adding new message {checksum}')
+            query(
+                '''
+                    INSERT INTO messages (checksum, from_url, from_name, to_url, to_name, date, message)
+                    VALUES(:checksum, :from_url, :from_name, :to_url, :to_name, :date, :message)
+                ''',
+                checksum=checksum,
+                from_url=from_url,
+                from_name=from_name,
+                to_url=to_url,
+                to_name=to_name,
+                date=date,
+                message=message,
+            )
+            # download steamuserimages
+            for url_string in re.findall(r'(https?://\S+)', message):
+                url = urlparse(url_string)
+                if url.netloc.startswith('steamuserimages'):
+                    response = requests.get(url_string)
+                    with open(url.path.strip('/').replace('/', '_'), "wb") as f:
+                        f.write(response.content)

 # get first page

@ -134,9 +170,12 @@ while True:
    print('getting next page')
    r = user.session.get(f'https://help.steampowered.com/en/accountdata/AjaxLoadMoreData/?url=GetFriendMessagesLog&continue={continue_value}')
    continue_value = r.json()['continue']
-    html = r.json()['html']
-    trs = BeautifulSoup(html, 'html.parser').find_all('tr')
-    parse_trs(trs)
+    if continue_value:
+        html = r.json()['html']
+        trs = BeautifulSoup(html, 'html.parser').find_all('tr')
+        parse_trs(trs)
+    else:
+        break

 # CLOSE
Author	SHA1	Message	Date
mwiegand	aaa35f20c2	stop at the end	2022-04-10 22:43:03 +02:00
mwiegand	9845e9068e	db	2022-04-10 22:35:28 +02:00