commit 22e982d49d93e07d7c72dd75f67448e4c1b2fe25 Author: mwiegand Date: Fri Apr 21 18:35:26 2023 +0200 initial commit diff --git a/.envrc b/.envrc new file mode 100644 index 0000000..aade2ee --- /dev/null +++ b/.envrc @@ -0,0 +1,13 @@ +if test -f .venv/bin/python && test "$(realpath .venv/bin/python)" != "$(realpath "$(pyenv which python)")" +then + echo "rebuilding venv für new python version" + rm -rf .venv +fi + +python3 -m venv .venv +source .venv/bin/activate +PATH_add .venv/bin +PATH_add bin +python3 -m pip --require-virtualenv --quiet install --upgrade pip wheel +python3 -m pip --require-virtualenv --quiet install --upgrade -r requirements.txt + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..f229360 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +requests diff --git a/scrape.py b/scrape.py new file mode 100755 index 0000000..5fa0280 --- /dev/null +++ b/scrape.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python3 + +import requests +from shutil import copyfileobj +from os.path import exists, splitext + + +session = requests.Session() +response = session.get('https://www.spektorsthesaurus.com/songs') +session_id = session.cookies.get_dict()['svSession'] + +response = session.post( + 'https://www.spektorsthesaurus.com/_api/cloud-data/v1/wix-data/collections/query', + headers={ + 'Accept': 'application/json', + }, + cookies={ + 'svSession': session_id, + }, + json={ + 'collectionName':'tblSongs', + 'dataQuery':{ + 'filter':{ + '$and':[] + }, + 'sort':[ + { + 'fieldName':'song_name', + 'order':'ASC' + } + ], + 'paging':{ + 'offset':0, + 'limit':999 + }, + 'fields':[] + }, + 'options':{}, + 'includeReferencedItems':[], + 'segment':'LIVE', + 'appId':'e3c84d19-bfb6-4299-824a-3236a027d528' + }, +) +response.raise_for_status() + +for song in response.json()['items']: + print(song['song_name']) + + song_id = song['song_id'] + + # live performances + response = session.post( + 'https://www.spektorsthesaurus.com/_api/cloud-data/v1/wix-data/collections/query', + headers={ + 'Accept': 'application/json', + }, + cookies={ + 'svSession': session_id, + }, + json={ + 'collectionName':'tblSongLivePerfs', + 'dataQuery':{ + 'filter':{ + '$and':[ + { + 'songId': { + '$eq': song_id, + } + }, + { + 'songInfo':{ + '$ne': 'Aborted' + } + } + ] + }, + 'sort':[ + { + 'fieldName':'eventDate', + 'order':'ASC' + } + ], + 'paging':{ + 'offset':0, + 'limit':999 + }, + 'fields':[] + }, + 'options':{}, + 'includeReferencedItems':[], + 'segment':'LIVE', + 'appId':'e3c84d19-bfb6-4299-824a-3236a027d528' + }, + ) + response.raise_for_status() + + for performance in response.json()['items']: + if 'bootleg' in performance: + _, extension = splitext(performance['bootleg']) + filename = f"{song['song_name']} - {performance['eventName']}{extension}" + + if exists(filename): + print(filename, 'exists') + continue + else: + print('downloading', filename) + + with requests.get(performance['bootleg'], stream=True) as stream: + with open(filename, 'wb') as file: + copyfileobj(stream.raw, file)