diff --git a/steam_chat_logger.py b/steam_chat_logger.py index ea4bed3..27f13f1 100755 --- a/steam_chat_logger.py +++ b/steam_chat_logger.py @@ -92,8 +92,23 @@ import re from urllib.parse import urlparse import requests from hashlib import sha3_256 +import pb8000 pdt = pytz.timezone("Us/Pacific") +db = pg8000.connect( + host=credentials[app]['host'], + port=credentials[app]['port'], + database=credentials[app]['database'], + user=credentials[app]['user'], + password=credentials[app]['password'], +) + +def query(app, query, **params): + cursor = db(app).cursor() + cursor.paramstyle = "named" + cursor.execute(query, params) + db(app).commit() + return cursor def parse_trs(trs): for tr in trs: @@ -103,19 +118,42 @@ def parse_trs(trs): to_url = tds[1].find('a')['href'] to_name = tds[1].text date = datetime.strptime(tds[2].text, '%b %d, %Y @ %I:%M%p PDT').replace(tzinfo=pdt) - text = tds[3].text + message = tds[3].text checksum = sha3_256( - (from_url + to_url + str(date.timestamp()) + text).encode() + (from_url + to_url + str(date.timestamp()) + message).encode() ).hexdigest() - print(f'(#{checksum}@{date}) {from_name} -> {to_name}: {text}') + print(f'(#{checksum}@{date}) {from_name} -> {to_name}: {message}') - # download steamuserimages - for url_string in re.findall(r'(https?://\S+)', text): - url = urlparse(url_string) - if url.netloc.startswith('steamuserimages'): - response = requests.get(url_string) - with open(url.path.strip('/').replace('/', '_'), "wb") as f: - f.write(response.content) + if query( + ''' + SELECT 1 FROM messages + WHERE checksum = :checksum + ''', + checksum=checksum, + ).rowcount: + print(f'message {checksum} already exists') + else: + print(f'adding new message {checksum}') + query( + ''' + INSERT INTO messages (checksum, from_url, from_name, to_url, to_name, date, message) + VALUES(:checksum, :from_url, :from_name, :to_url, :to_name, :date, :message) + ''', + checksum=checksum, + from_url=from_url, + from_name=from_name, + to_url=to_url, + to_name=to_name, + date=date, + message=message, + ) + # download steamuserimages + for url_string in re.findall(r'(https?://\S+)', text): + url = urlparse(url_string) + if url.netloc.startswith('steamuserimages'): + response = requests.get(url_string) + with open(url.path.strip('/').replace('/', '_'), "wb") as f: + f.write(response.content) # get first page