spektors_thesaurus_scraper/scrape.py
2023-04-21 18:40:21 +02:00

110 lines
3 KiB
Python
Executable file

#!/usr/bin/env python3
import requests
from shutil import copyfileobj
from os.path import exists, splitext
session = requests.Session()
response = session.get('https://www.spektorsthesaurus.com/songs')
session_id = session.cookies.get_dict()['svSession']
response = session.post(
'https://www.spektorsthesaurus.com/_api/cloud-data/v1/wix-data/collections/query',
headers={
'Accept': 'application/json',
},
cookies={
'svSession': session_id,
},
json={
'collectionName':'tblSongs',
'dataQuery':{
'filter':{
'$and':[]
},
'sort':[
{
'fieldName':'song_name',
'order':'ASC'
}
],
'paging':{
'offset':0,
'limit':999
},
'fields':[]
},
'options':{},
'includeReferencedItems':[],
'segment':'LIVE',
'appId':'e3c84d19-bfb6-4299-824a-3236a027d528'
},
)
response.raise_for_status()
for song in response.json()['items']:
print(song['song_name'])
song_id = song['song_id']
# live performances
response = session.post(
'https://www.spektorsthesaurus.com/_api/cloud-data/v1/wix-data/collections/query',
headers={
'Accept': 'application/json',
},
cookies={
'svSession': session_id,
},
json={
'collectionName':'tblSongLivePerfs',
'dataQuery':{
'filter':{
'$and':[
{
'songId': {
'$eq': song_id,
}
},
{
'songInfo':{
'$ne': 'Aborted'
}
}
]
},
'sort':[
{
'fieldName':'eventDate',
'order':'ASC'
}
],
'paging':{
'offset':0,
'limit':999
},
'fields':[]
},
'options':{},
'includeReferencedItems':[],
'segment':'LIVE',
'appId':'e3c84d19-bfb6-4299-824a-3236a027d528'
},
)
response.raise_for_status()
for performance in response.json()['items']:
if 'bootleg' in performance:
_, extension = splitext(performance['bootleg'])
filename = f"{song['song_name']} - {performance['eventName']}{extension}".replace('/', '|')
if exists(filename):
print(filename, 'exists')
continue
else:
print('downloading', filename)
with requests.get(performance['bootleg'], stream=True) as stream:
with open(filename, 'wb') as file:
copyfileobj(stream.raw, file)