Compare commits
2 commits
98ef086160
...
aaa35f20c2
Author | SHA1 | Date | |
---|---|---|---|
![]() |
aaa35f20c2 | ||
![]() |
9845e9068e |
1 changed files with 52 additions and 13 deletions
|
@ -92,8 +92,21 @@ import re
|
|||
from urllib.parse import urlparse
|
||||
import requests
|
||||
from hashlib import sha3_256
|
||||
import pg8000
|
||||
|
||||
pdt = pytz.timezone("Us/Pacific")
|
||||
db = pg8000.connect(
|
||||
database=environ['DB_NAME'],
|
||||
user=environ['DB_USER'],
|
||||
password=environ['DB_PASSWORD'],
|
||||
)
|
||||
|
||||
def query(query, **params):
|
||||
cursor = db.cursor()
|
||||
cursor.paramstyle = "named"
|
||||
cursor.execute(query, params)
|
||||
db.commit()
|
||||
return cursor
|
||||
|
||||
def parse_trs(trs):
|
||||
for tr in trs:
|
||||
|
@ -103,19 +116,42 @@ def parse_trs(trs):
|
|||
to_url = tds[1].find('a')['href']
|
||||
to_name = tds[1].text
|
||||
date = datetime.strptime(tds[2].text, '%b %d, %Y @ %I:%M%p PDT').replace(tzinfo=pdt)
|
||||
text = tds[3].text
|
||||
message = tds[3].text
|
||||
checksum = sha3_256(
|
||||
(from_url + to_url + str(date.timestamp()) + text).encode()
|
||||
(from_url + to_url + str(date.timestamp()) + message).encode()
|
||||
).hexdigest()
|
||||
print(f'(#{checksum}@{date}) {from_name} -> {to_name}: {text}')
|
||||
print(f'(#{checksum}@{date}) {from_name} -> {to_name}: {message}')
|
||||
|
||||
# download steamuserimages
|
||||
for url_string in re.findall(r'(https?://\S+)', text):
|
||||
url = urlparse(url_string)
|
||||
if url.netloc.startswith('steamuserimages'):
|
||||
response = requests.get(url_string)
|
||||
with open(url.path.strip('/').replace('/', '_'), "wb") as f:
|
||||
f.write(response.content)
|
||||
if query(
|
||||
'''
|
||||
SELECT 1 FROM messages
|
||||
WHERE checksum = :checksum
|
||||
''',
|
||||
checksum=checksum,
|
||||
).rowcount:
|
||||
print(f'message {checksum} already exists')
|
||||
else:
|
||||
print(f'adding new message {checksum}')
|
||||
query(
|
||||
'''
|
||||
INSERT INTO messages (checksum, from_url, from_name, to_url, to_name, date, message)
|
||||
VALUES(:checksum, :from_url, :from_name, :to_url, :to_name, :date, :message)
|
||||
''',
|
||||
checksum=checksum,
|
||||
from_url=from_url,
|
||||
from_name=from_name,
|
||||
to_url=to_url,
|
||||
to_name=to_name,
|
||||
date=date,
|
||||
message=message,
|
||||
)
|
||||
# download steamuserimages
|
||||
for url_string in re.findall(r'(https?://\S+)', message):
|
||||
url = urlparse(url_string)
|
||||
if url.netloc.startswith('steamuserimages'):
|
||||
response = requests.get(url_string)
|
||||
with open(url.path.strip('/').replace('/', '_'), "wb") as f:
|
||||
f.write(response.content)
|
||||
|
||||
# get first page
|
||||
|
||||
|
@ -134,9 +170,12 @@ while True:
|
|||
print('getting next page')
|
||||
r = user.session.get(f'https://help.steampowered.com/en/accountdata/AjaxLoadMoreData/?url=GetFriendMessagesLog&continue={continue_value}')
|
||||
continue_value = r.json()['continue']
|
||||
html = r.json()['html']
|
||||
trs = BeautifulSoup(html, 'html.parser').find_all('tr')
|
||||
parse_trs(trs)
|
||||
if continue_value:
|
||||
html = r.json()['html']
|
||||
trs = BeautifulSoup(html, 'html.parser').find_all('tr')
|
||||
parse_trs(trs)
|
||||
else:
|
||||
break
|
||||
|
||||
# CLOSE
|
||||
|
||||
|
|
Loading…
Reference in a new issue