initial commit

This commit is contained in:
mwiegand 2023-04-21 18:35:26 +02:00
commit 22e982d49d
No known key found for this signature in database
3 changed files with 124 additions and 0 deletions

13
.envrc Normal file
View file

@ -0,0 +1,13 @@
if test -f .venv/bin/python && test "$(realpath .venv/bin/python)" != "$(realpath "$(pyenv which python)")"
then
echo "rebuilding venv für new python version"
rm -rf .venv
fi
python3 -m venv .venv
source .venv/bin/activate
PATH_add .venv/bin
PATH_add bin
python3 -m pip --require-virtualenv --quiet install --upgrade pip wheel
python3 -m pip --require-virtualenv --quiet install --upgrade -r requirements.txt

1
requirements.txt Normal file
View file

@ -0,0 +1 @@
requests

110
scrape.py Executable file
View file

@ -0,0 +1,110 @@
#!/usr/bin/env python3
import requests
from shutil import copyfileobj
from os.path import exists, splitext
session = requests.Session()
response = session.get('https://www.spektorsthesaurus.com/songs')
session_id = session.cookies.get_dict()['svSession']
response = session.post(
'https://www.spektorsthesaurus.com/_api/cloud-data/v1/wix-data/collections/query',
headers={
'Accept': 'application/json',
},
cookies={
'svSession': session_id,
},
json={
'collectionName':'tblSongs',
'dataQuery':{
'filter':{
'$and':[]
},
'sort':[
{
'fieldName':'song_name',
'order':'ASC'
}
],
'paging':{
'offset':0,
'limit':999
},
'fields':[]
},
'options':{},
'includeReferencedItems':[],
'segment':'LIVE',
'appId':'e3c84d19-bfb6-4299-824a-3236a027d528'
},
)
response.raise_for_status()
for song in response.json()['items']:
print(song['song_name'])
song_id = song['song_id']
# live performances
response = session.post(
'https://www.spektorsthesaurus.com/_api/cloud-data/v1/wix-data/collections/query',
headers={
'Accept': 'application/json',
},
cookies={
'svSession': session_id,
},
json={
'collectionName':'tblSongLivePerfs',
'dataQuery':{
'filter':{
'$and':[
{
'songId': {
'$eq': song_id,
}
},
{
'songInfo':{
'$ne': 'Aborted'
}
}
]
},
'sort':[
{
'fieldName':'eventDate',
'order':'ASC'
}
],
'paging':{
'offset':0,
'limit':999
},
'fields':[]
},
'options':{},
'includeReferencedItems':[],
'segment':'LIVE',
'appId':'e3c84d19-bfb6-4299-824a-3236a027d528'
},
)
response.raise_for_status()
for performance in response.json()['items']:
if 'bootleg' in performance:
_, extension = splitext(performance['bootleg'])
filename = f"{song['song_name']} - {performance['eventName']}{extension}"
if exists(filename):
print(filename, 'exists')
continue
else:
print('downloading', filename)
with requests.get(performance['bootleg'], stream=True) as stream:
with open(filename, 'wb') as file:
copyfileobj(stream.raw, file)