initial commit
This commit is contained in:
commit
22e982d49d
3 changed files with 124 additions and 0 deletions
13
.envrc
Normal file
13
.envrc
Normal file
|
@ -0,0 +1,13 @@
|
|||
if test -f .venv/bin/python && test "$(realpath .venv/bin/python)" != "$(realpath "$(pyenv which python)")"
|
||||
then
|
||||
echo "rebuilding venv für new python version"
|
||||
rm -rf .venv
|
||||
fi
|
||||
|
||||
python3 -m venv .venv
|
||||
source .venv/bin/activate
|
||||
PATH_add .venv/bin
|
||||
PATH_add bin
|
||||
python3 -m pip --require-virtualenv --quiet install --upgrade pip wheel
|
||||
python3 -m pip --require-virtualenv --quiet install --upgrade -r requirements.txt
|
||||
|
1
requirements.txt
Normal file
1
requirements.txt
Normal file
|
@ -0,0 +1 @@
|
|||
requests
|
110
scrape.py
Executable file
110
scrape.py
Executable file
|
@ -0,0 +1,110 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import requests
|
||||
from shutil import copyfileobj
|
||||
from os.path import exists, splitext
|
||||
|
||||
|
||||
session = requests.Session()
|
||||
response = session.get('https://www.spektorsthesaurus.com/songs')
|
||||
session_id = session.cookies.get_dict()['svSession']
|
||||
|
||||
response = session.post(
|
||||
'https://www.spektorsthesaurus.com/_api/cloud-data/v1/wix-data/collections/query',
|
||||
headers={
|
||||
'Accept': 'application/json',
|
||||
},
|
||||
cookies={
|
||||
'svSession': session_id,
|
||||
},
|
||||
json={
|
||||
'collectionName':'tblSongs',
|
||||
'dataQuery':{
|
||||
'filter':{
|
||||
'$and':[]
|
||||
},
|
||||
'sort':[
|
||||
{
|
||||
'fieldName':'song_name',
|
||||
'order':'ASC'
|
||||
}
|
||||
],
|
||||
'paging':{
|
||||
'offset':0,
|
||||
'limit':999
|
||||
},
|
||||
'fields':[]
|
||||
},
|
||||
'options':{},
|
||||
'includeReferencedItems':[],
|
||||
'segment':'LIVE',
|
||||
'appId':'e3c84d19-bfb6-4299-824a-3236a027d528'
|
||||
},
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
for song in response.json()['items']:
|
||||
print(song['song_name'])
|
||||
|
||||
song_id = song['song_id']
|
||||
|
||||
# live performances
|
||||
response = session.post(
|
||||
'https://www.spektorsthesaurus.com/_api/cloud-data/v1/wix-data/collections/query',
|
||||
headers={
|
||||
'Accept': 'application/json',
|
||||
},
|
||||
cookies={
|
||||
'svSession': session_id,
|
||||
},
|
||||
json={
|
||||
'collectionName':'tblSongLivePerfs',
|
||||
'dataQuery':{
|
||||
'filter':{
|
||||
'$and':[
|
||||
{
|
||||
'songId': {
|
||||
'$eq': song_id,
|
||||
}
|
||||
},
|
||||
{
|
||||
'songInfo':{
|
||||
'$ne': 'Aborted'
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
'sort':[
|
||||
{
|
||||
'fieldName':'eventDate',
|
||||
'order':'ASC'
|
||||
}
|
||||
],
|
||||
'paging':{
|
||||
'offset':0,
|
||||
'limit':999
|
||||
},
|
||||
'fields':[]
|
||||
},
|
||||
'options':{},
|
||||
'includeReferencedItems':[],
|
||||
'segment':'LIVE',
|
||||
'appId':'e3c84d19-bfb6-4299-824a-3236a027d528'
|
||||
},
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
for performance in response.json()['items']:
|
||||
if 'bootleg' in performance:
|
||||
_, extension = splitext(performance['bootleg'])
|
||||
filename = f"{song['song_name']} - {performance['eventName']}{extension}"
|
||||
|
||||
if exists(filename):
|
||||
print(filename, 'exists')
|
||||
continue
|
||||
else:
|
||||
print('downloading', filename)
|
||||
|
||||
with requests.get(performance['bootleg'], stream=True) as stream:
|
||||
with open(filename, 'wb') as file:
|
||||
copyfileobj(stream.raw, file)
|
Loading…
Reference in a new issue