Compare commits

...

9 Commits

Author SHA1 Message Date
Ventilaar
72af6b6126 Handle mass websub subscriptions with added statistics. General cleanup 2024-04-18 23:36:45 +02:00
Ventilaar
8bf8e08af3 Forgot admin imports 2024-04-18 00:59:46 +02:00
Ventilaar
236b56915b Handle WebSub endpoint renewing. Basic code for XML parsing (not implemented yet) 2024-04-18 00:56:22 +02:00
Ventilaar
ac0243a783 Quick key rename title_slug 2024-04-17 12:24:14 +02:00
Ventilaar
bb78c97d52 Do not store websub posted raw data as str 2024-04-10 11:25:05 +02:00
Ventilaar
7ccb827a9c hotfix the hotfix of the hotfix 2024-04-09 13:01:23 +02:00
Ventilaar
9c0e4fb63c Hotfix the websub hotfix. Add button to easily monitor websub callbacks. Clean stuck websub requests after 3 days 2024-04-09 12:56:57 +02:00
Ventilaar
75d42ad3cd Websub callback domain hotfix 2024-04-09 12:16:47 +02:00
Ventilaar
4fa0ee2c68 Hotfix channel sorting 2024-04-09 12:11:14 +02:00
16 changed files with 274 additions and 70 deletions

View File

@@ -1,4 +1,4 @@
name: Generate release
name: Generate docker image
on:
release:
@@ -22,13 +22,4 @@ jobs:
uses: docker/build-push-action@v5
with:
push: true
tags: git.ventilaar.nl/ventilaar/ayta:latest
- name: Update worker server
uses: appleboy/ssh-action@v1.0.3
with:
host: 192.168.66.109
username: root
key: ${{ secrets.SERVER_KEY }}
port: 22
script: /root/update_worker.sh
tags: git.ventilaar.nl/ventilaar/ayta:latest

View File

@@ -0,0 +1,18 @@
name: Update worker server
on:
release:
types: [published]
jobs:
build-and-publish:
runs-on: ubuntu-latest
steps:
- name: Update worker server
uses: appleboy/ssh-action@v1.0.3
with:
host: 192.168.66.109
username: root
key: ${{ secrets.SERVER_KEY }}
port: 22
script: /root/update_worker.sh

View File

@@ -9,15 +9,24 @@ def create_app(test_config=None):
config = {'MONGO_CONNECTION': os.environ.get('AYTA_MONGOCONNECTION', 'mongodb://root:example@192.168.66.140:27017'),
'OIDC_PROVIDER': os.environ.get('AYTA_OIDC_PROVIDER', 'https://auth.ventilaar.nl'),
'OIDC_ID': os.environ.get('AYTA_OIDC_ID', 'ayta'),
'CACHE_TYPE': os.environ.get('AYTA_CACHETYPE', 'SimpleCache'),
'CACHE_DEFAULT_TIMEOUT': int(os.environ.get('AYTA_CACHETIMEOUT', 6)),
'SECRET_KEY': os.environ.get('AYTA_SECRETKEY', secrets.token_hex(32)),
'DEBUG': bool(os.environ.get('AYTA_DEBUG', False)),
'DOMAIN': os.environ.get('AYTA_DOMAIN', 'https://testing.mashallah.nl'),
'CELERY': dict(broker_url=str(os.environ.get('AYTA_CELERYBROKER', 'amqp://guest:guest@192.168.66.140:5672/')),
task_ignore_result=True,)
'CELERY': {'broker_url': str(os.environ.get('AYTA_CELERYBROKER', 'amqp://guest:guest@192.168.66.140:5672/'))}
}
# Static Flask configuration options
config['CELERY']['task_ignore_result'] = True
config['CACHE_TYPE'] = 'SimpleCache'
config['SECRET_KEY'] = secrets.token_bytes(32)
# Celery Periodic tasks
config['CELERY']['beat_schedule'] = {}
config['CELERY']['beat_schedule']['Renew WebSub endpoints'] = {'task': 'ayta.tasks.websub_renew_expiring', 'schedule': 4000}
#config['CELERY']['beat_schedule']['Process WebSub data'] = {'task': 'ayta.tasks.websub_process_data', 'schedule': 6}
app = Flask(__name__)
app.config.from_mapping(config)

View File

@@ -2,7 +2,7 @@ from flask import Blueprint, render_template, request, redirect, url_for, flash
from ..nosql import get_nosql
from ..dlp import checkChannelId, getChannelInfo
from ..decorators import login_required
from ..tasks import subscribe_websub_callback, unsubscribe_websub_callback
from ..tasks import websub_subscribe_callback, websub_unsubscribe_callback
from datetime import datetime
from secrets import token_urlsafe
@@ -71,15 +71,15 @@ def channel(channelId):
value = request.form.get('value', None)
if task == 'subscribe-websub':
task = subscribe_websub_callback.delay(channelId)
task = websub_subscribe_callback.delay(channelId)
flash(f"Started task {task.id}")
return redirect(url_for('admin.channel', channelId=channelId))
if task == 'update-value':
if key == 'active':
if key in ['active', 'websub']:
value = True if value else False
if key == 'added_date':
if key in ['added_date']:
value = datetime.strptime(value, '%Y-%m-%d')
get_nosql().update_channel_key(channelId, key, value)
@@ -109,29 +109,41 @@ def run(runId):
@bp.route('/websub', methods=['GET', 'POST'])
@login_required
def websub():
render = {}
if request.method == 'POST':
task = request.form.get('task', None)
value = request.form.get('value', None)
if task == 'unsubscribe':
channelId = get_nosql().websub_getCallback(value).get('channel')
task = unsubscribe_websub_callback.delay(value, channelId)
task = websub_unsubscribe_callback.delay(value)
flash(f"Started task {task.id}")
return redirect(url_for('admin.websub'))
elif task == 'clean-retired':
get_nosql().websub_cleanRetired()
return redirect(url_for('admin.websub'))
elif task == 'unsubscribe-callbacks':
for callbackId in get_nosql().websub_getCallbacks():
websub_unsubscribe_callback.delay(callbackId)
flash(f"Started unsubscribe tasks for all callbacks")
return redirect(url_for('admin.websub'))
elif task == 'subscribe-channels':
for channelId in get_nosql().list_all_channels(websub=True):
websub_subscribe_callback.delay(channelId)
flash(f'Started subscribe tasks for activated channels')
return redirect(url_for('admin.websub'))
callbackIds = get_nosql().websub_getCallbacks()
callbacks = {}
render['stats'] = get_nosql().websub_statistics()
for callbackId in callbackIds:
callbacks[callbackId] = get_nosql().websub_getCallback(callbackId)
return render_template('admin/websub.html', callbacks=callbacks)
return render_template('admin/websub.html', callbacks=callbacks, render=render)
@bp.route('/reports', methods=['GET', 'POST'])
@login_required

View File

@@ -33,7 +33,7 @@ def websub(cap):
return challenge
if get_nosql().websub_existsCallback(cap):
if not get_nosql().websub_savePost(cap, str(request.data)):
if not get_nosql().websub_savePost(cap, request.data):
return abort(500)
return '', 202

View File

@@ -34,7 +34,7 @@ def channel(channelId):
for videoId in videoIds:
videos.append(get_nosql().get_video_info(videoId, limited=True))
videos = sorted(videos, key=lambda x: x.get('upload_date'), reverse=True)
videos = sorted(videos, key=lambda x: x.get('upload_date', '19700101'), reverse=True)
return render_template('channel/channel.html', channel=channelInfo, videos=videos)

View File

@@ -36,4 +36,9 @@ def base():
render['info'] = get_nosql().get_video_info(vGet)
render['params'] = request.args.get('v')
if render['info']['_status'] != 'available':
flash(render['info'].get('_status_description', 'Video unavailable because of technical errors. Come back later.'))
return redirect(url_for('index.base'))
return render_template('watch/index.html', render=render)

View File

@@ -142,12 +142,14 @@ class Mango:
# channel operations #
##########################################
def list_all_channels(self, active=False):
def list_all_channels(self, active=False, websub=False):
""" Returns a SET of YouTube channel ID's; Depending on given positional BOOL only active channels or everything"""
search_terms = {}
if active:
search_terms['active'] = True
elif websub:
search_terms['websub'] = True
channels = []
for channel in self.channels.find(search_terms, {'id': 1}):
@@ -169,9 +171,6 @@ class Mango:
def get_channel_info(self, channelId):
return self.channels.find_one({'id': channelId})
def update_channel_state(self, channelId, state):
self.channels.update_one({'id': channelId}, {"$set": {"active": bool(state)}})
return True
def update_channel_key(self, channelId, key, value):
self.channels.update_one({'id': channelId}, {"$set": {key: value}})
@@ -205,7 +204,7 @@ class Mango:
def get_recent_videos(self, count=99):
""" Returns a SET of YouTube video ID's which have been added last to the info_json collection """
result = self.info_json.find({}, {'_id': 0, 'id': 1}, sort=[('_id', pymongo.DESCENDING)]).limit(count)
result = self.info_json.find({'_status': 'available'}, {'_id': 0, 'id': 1}, sort=[('_id', pymongo.DESCENDING)]).limit(count)
ids = []
@@ -216,7 +215,7 @@ class Mango:
def get_video_info(self, videoId, limited=False):
if limited:
projection = {'_id': 1, 'id': 1, 'title': 1, 'upload_date': 1, 'description': 1, 'channel_id': 1, 'uploader': 1, 'epoch': 1, 'title_slug': 1}
projection = {'_id': 1, 'id': 1, 'title': 1, 'upload_date': 1, 'description': 1, 'channel_id': 1, 'uploader': 1, 'epoch': 1, '_title_slug': 1}
else:
projection = {}
@@ -294,22 +293,22 @@ class Mango:
status = status.get('status')
if status in ['requesting']:
self.websub_callbacks.update_one({'id': callbackId}, {'$set': {'status': 'active', 'activation_time': current_time(object=True), 'lease': lease}})
self.websub_callbacks.update_one({'id': callbackId}, {'$set': {'status': 'active', 'activation_time': current_time(object=True), 'lease': int(lease)}})
return True
return False
def websub_existsCallback(self, callbackId):
status = self.websub_callbacks.find_one({'id': callbackId}, {'status': 1})
def websub_existsCallback(self, callbackId, channel=False):
if channel:
query = {'channel': callbackId, 'status': {'$in': ['requesting', 'active', 'retiring']}}
else:
query = {'id': callbackId, 'status': {'$in': ['requesting', 'active', 'retiring']}}
status = self.websub_callbacks.find_one(query, {'id': 1, 'status': 1})
if not status:
return False
status = status.get('status')
if status:
return status.get('id')
if status in ['requesting', 'active', 'retiring']:
return True
return False
def websub_retiringCallback(self, callbackId):
@@ -330,7 +329,7 @@ class Mango:
def websub_getCallback(self, callbackId):
return self.websub_callbacks.find_one({'id': callbackId})
def websub_getCallbacks(self, channelId=''):
def websub_getCallbacks(self, channelId=None):
callbacks = []
if channelId:
@@ -338,7 +337,6 @@ class Mango:
else:
filter = {}
for callback in self.websub_callbacks.find(filter, {'id': 1}):
callbacks.append(callback['id'])
@@ -347,12 +345,34 @@ class Mango:
def websub_savePost(self, callbackId, data):
return self.websub_data.insert_one({'callback_id': callbackId, 'state': 'unprocessed', 'received_time': current_time(object=True), 'raw_data': data}).inserted_id
def websub_cleanRetired(self, days=3):
def websub_getFirstPostData(self):
data = self.websub_data.find_one({'state': 'unprocessed'}, {'_id': 1, 'raw_data': 1}, sort=[('received_time', 1)])
if not data:
return None
self.websub_data.update_one({'_id': data['_id']}, {'$set': {'state': 'processing'}})
return (data.get('_id'), data.get('raw_data'))
def websub_deletePostProcessing(self, _id):
self.websub_data.delete_one({'_id': _id})
def websub_cleanRetired(self, days=1):
days = self.datetime.utcnow() - self.timedelta(days=days)
self.websub_callbacks.delete_many({'status': 'retired', 'retired_time': {'$lt': days}})
self.websub_callbacks.delete_many({'status': 'requesting', 'requesting_time': {'$lt': days}})
return True
def websub_statistics(self):
stats = {}
stats['unprocessed_data'] = self.websub_data.count_documents({'state': 'unprocessed'})
stats['active_callbacks'] = self.websub_callbacks.count_documents({'status': 'active'})
return stats
##########################################
# POSTER FUNCTIONS #

View File

@@ -1,4 +1,10 @@
class OIDC():
"""
This function class is nothing more than a nonce and state store for security in the authentication mechanism.
Additionally this class provides the function to generate redirect url's and check bearer tokens on their validity as well as caching jwt signing keys.
Fairly barebones and should be 100% secure. (famous last words)
This is made for form posted JWT's. While not the most secure it is the most easy way to implement. Moving on to a code based solution might be preferred in the future.
"""
def __init__(self, app=None):
self.states = {}
self.nonces = {}
@@ -15,6 +21,7 @@ class OIDC():
self.client_id = config['OIDC_ID']
self.provider = config['OIDC_PROVIDER']
self.domain = config['DOMAIN']
self.window = 120 # the time window to allow states and nonces in seconds
if self.provider[:8] != 'https://' or self.provider[-1] == '/':
print('Incorrect OIDC provider URI', flush=True)
@@ -27,12 +34,12 @@ class OIDC():
self.jwks_manager = jwt.PyJWKClient(jwks_uri)
#################################
#######################################################
def state_maintenance(self):
from datetime import datetime
pivot = datetime.now().timestamp() - 120
pivot = datetime.now().timestamp() - self.window
expired_states = [state for state, timestamp in self.states.items() if timestamp <= pivot]
@@ -61,12 +68,14 @@ class OIDC():
return False
#################################
#######################################################
# Same code as above but a different store for nonces #
#######################################################
def nonce_maintenance(self):
from datetime import datetime
pivot = datetime.now().timestamp() - 120
pivot = datetime.now().timestamp() - self.window
expired_nonces = [nonce for nonce, timestamp in self.nonces.items() if timestamp <= pivot]
@@ -95,7 +104,7 @@ class OIDC():
return False
#################################
#######################################################
def generate_redirect(self):
return str(f'{self.authorize_uri}'

View File

@@ -1,38 +1,57 @@
from celery import shared_task
from flask import current_app
##########################################
# CELERY TASKS #
##########################################
@shared_task()
def subscribe_websub_callback(channelId):
def websub_subscribe_callback(channelId):
import requests
from .nosql import get_nosql
callbackId = get_nosql().websub_newCallback(channelId)
# check if a callback already exists for channel
answer = get_nosql().websub_existsCallback(channelId, channel=True)
if not answer:
callbackId = get_nosql().websub_newCallback(channelId)
else:
callbackId = answer
url = 'https://pubsubhubbub.appspot.com/subscribe'
data = {
'hub.callback': f'https://{current_app.config["DOMAIN"]}/api/websub//{callbackId}',
'hub.callback': f'{current_app.config["DOMAIN"]}/api/websub/{callbackId}',
'hub.topic': f'https://www.youtube.com/xml/feeds/videos.xml?channel_id={channelId}',
'hub.verify': 'async',
'hub.mode': 'subscribe',
'hub.verify_token': '',
'hub.secret': '',
'hub.lease_numbers': '86400',
'hub.lease_numbers': '432000',
}
get_nosql().websub_requestingCallback(callbackId)
response = requests.post(url, data=data)
if response.status_code == 202:
return True
# maybe handle errors?
return False
@shared_task()
def unsubscribe_websub_callback(callbackId, channelId):
def websub_unsubscribe_callback(callbackId):
import requests
from .nosql import get_nosql
answer = get_nosql().websub_existsCallback(callbackId)
if not answer:
return False
channelId = get_nosql().websub_getCallback(callbackId).get('channel')
url = 'https://pubsubhubbub.appspot.com/subscribe'
data = {'hub.callback': f'https://{current_app.config["DOMAIN"]}/api/websub/{callbackId}',
data = {'hub.callback': f'{current_app.config["DOMAIN"]}/api/websub/{callbackId}',
'hub.topic': f'https://www.youtube.com/xml/feeds/videos.xml?channel_id={channelId}',
'hub.verify': 'async',
'hub.mode': 'unsubscribe'
@@ -44,4 +63,92 @@ def unsubscribe_websub_callback(callbackId, channelId):
if response.status_code == 202:
return True
return False
# maybe handle errors?
return False
@shared_task()
def websub_process_data():
from .nosql import get_nosql
while True:
data = get_nosql().websub_getFirstPostData()
if not data:
break
_id, data = data
parsed = do_parse_data(data)
if not parsed:
get_nosql().websub_deletePostProcessing(_id)
state, channelId, videoId = parsed
# do things
get_nosql().websub_deletePostProcessing(_id)
@shared_task()
def websub_renew_expiring(hours=6):
from .nosql import get_nosql
from datetime import datetime, timedelta
count = 0
for callbackId in get_nosql().websub_getCallbacks():
data = get_nosql().websub_getCallback(callbackId)
if data.get('status') not in ['active']: # callback not active
continue
pivot = datetime.utcnow() + timedelta(hours=hours) # hours past now
expires = data.get('activation_time') + timedelta(seconds=data.get('lease')) # callback expires at
if pivot <= expires: # expiration happens after n hours fron now
continue # skip callback
# expiration happens within n hours
websub_subscribe_callback.delay(data.get('channel'))
# limit amount of subscribe requests to spread out the requests over time
count = count + 1
if count >= 16:
break
##########################################
# TASK MODULES #
##########################################
def do_parse_data(data):
import xml.etree.ElementTree as ET
data = data.decode('utf-8')
try:
root = ET.fromstring(data)
except ET.ParseError:
print('Not XML')
return False
yt = any(child.tag.startswith('{http://www.youtube.com/xml/schemas/2015}') for child in root.iter())
at = any(child.tag.startswith('{http://purl.org/atompub/tombstones/1.0}') for child in root.iter())
if yt and not at:
# Video published
state = 'added'
ns = {'yt': 'http://www.youtube.com/xml/schemas/2015', '': 'http://www.w3.org/2005/Atom'}
entry = root.find('.//{http://www.w3.org/2005/Atom}entry')
videoId = entry.find('./yt:videoId', ns).text
channelId = entry.find('./yt:channelId', ns).text
elif not yt and at:
# Video hidden
state = 'removed'
ns = {'at': 'http://purl.org/atompub/tombstones/1.0', '': 'http://www.w3.org/2005/Atom'}
deleted_entry = root.find('.//{http://purl.org/atompub/tombstones/1.0}deleted-entry')
videoId = deleted_entry.attrib['ref'].split(':')[-1]
channelId = deleted_entry.find('./at:by/uri', ns).text.split('/')[-1]
else:
print('Unknown xml')
return False
return (state, channelId, videoId)

View File

@@ -19,7 +19,7 @@
{% for item in channelInfo %}
<form method="POST">
<div class="input-field">
<span class="supporting-text">{{ item }}</span>
<span class="supporting-text mb-2">{{ item }}</span>
<input class="validate" type="text" value="{{ item }}" name="key" hidden>
</div>

View File

@@ -4,14 +4,9 @@
{% block content %}
<div class="row">
<div class="col s12 l11">
<div class="col s12">
<h4>WebSub administration page</h4>
</div>
<div class="col s12 l1 m-5">
<form method="POST">
<input title="Prunes all retired callbacks, but keeps last 3 days" type="submit" value="clean-retired" name="task">
</form>
</div>
</div>
<div class="divider"></div>
<div class="row">
@@ -19,6 +14,43 @@
<h5>WebSub options</h5>
</div>
</div>
<div class="row">
<div class="col s12 l4 m-4">
<div class="card">
<div class="card-content">
<span class="card-title">Direct actions</span>
<form method="post" onsubmit="return confirm('Are you sure?');">
<button class="btn mb-2 green" type="submit" name="task" value="subscribe-channels">Subscribe channels</button>
<br>
<span class="supporting-text">Send WebSub subscription request for all activated channels. (This will renew existing ones as well)</span>
</form>
<form class="mt-4" method="post" onsubmit="return confirm('Are you sure?');">
<button class="btn mb-2 red" type="submit" name="task" value="unsubscribe-callbacks">Unsubscribe channels</button>
<br>
<span class="supporting-text">Send WebSub unsubscription request for all activated endpoints. (This will only unsubscribe, not disable)</span>
</form>
<form class="mt-4" method="post" onsubmit="return confirm('Are you sure?');">
<button class="btn mb-2" type="submit" name="task" value="clean-retired">Clean retired</button>
<br>
<span class="supporting-text">Prunes all retired callbacks, but keeps until last day</span>
</form>
</div>
</div>
</div>
<div class="col s12 l4 m-4">
<div class="card">
<div class="card-content">
<span class="card-title">Statistics</span>
<h6>Unprocessed callback datapoints</h6>
<p>{{ render['stats']['unprocessed_data'] }}</p>
<h6>Active callbacks</h6>
<p>{{ render['stats']['active_callbacks'] }}</p>
<h6>Something</h6>
<p>Blah</p>
</div>
</div>
</div>
</div>
<div class="divider"></div>
<div class="row">
<div class="col s6 l9">
@@ -50,6 +82,7 @@
{% for callback in callbacks %}
<tr class="filterable">
<td>
<a target="_blank" rel="noopener noreferrer" href="https://pubsubhubbub.appspot.com/subscription-details?hub.callback={{ config['DOMAIN'] }}/api/websub/{{ callbacks[callback].get('id') }}&hub.topic=https://www.youtube.com/xml/feeds/videos.xml?channel_id={{ callbacks[callback].get('channel') }}"><button class="btn-small waves-effect waves-light" title="Information on Pubsubhubbub (external link)"></button></a>
<form method="post">
<input type="text" value="{{ callbacks[callback].get('id') }}" name="value" hidden>
<button class="btn-small waves-effect waves-light" type="submit" name="task" value="unsubscribe" title="Send unsubscribe request to hub" {% if callbacks[callback].get('status') != 'active' %}disabled{% endif %}>🗑️</button>

View File

@@ -25,7 +25,7 @@
<div class="card medium black-text">
<a href="{{ url_for('watch.base') }}?v={{ video.get('id') }}">
<div class="card-image">
<img loading="lazy" src="https://archive.ventilaar.net/videos/automatic/{{ video.get('channel_id') }}/{{ video.get('id') }}/{{ video.get('title_slug') }}.jpg">
<img loading="lazy" src="https://archive.ventilaar.net/videos/automatic/{{ video.get('channel_id') }}/{{ video.get('id') }}/{{ video.get('_title_slug') }}.jpg">
</div>
</a>
<div class="card-content activator">

View File

@@ -25,7 +25,7 @@
<div class="card medium black-text">
<a href="{{ url_for('watch.base') }}?v={{ video.get('id') }}">
<div class="card-image">
<img loading="lazy" src="https://archive.ventilaar.net/videos/automatic/{{ video.get('channel_id') }}/{{ video.get('id') }}/{{ video.get('title_slug') }}.jpg">
<img loading="lazy" src="https://archive.ventilaar.net/videos/automatic/{{ video.get('channel_id') }}/{{ video.get('id') }}/{{ video.get('_title_slug') }}.jpg">
</div>
</a>
<div class="card-content activator">

View File

@@ -25,7 +25,7 @@
<div class="card medium black-text">
<a href="{{ url_for('watch.base') }}?v={{ video.get('id') }}">
<div class="card-image">
<img loading="lazy" src="https://archive.ventilaar.net/videos/automatic/{{ video.get('channel_id') }}/{{ video.get('id') }}/{{ video.get('title_slug') }}.jpg">
<img loading="lazy" src="https://archive.ventilaar.net/videos/automatic/{{ video.get('channel_id') }}/{{ video.get('id') }}/{{ video.get('_title_slug') }}.jpg">
</div>
</a>
<div class="card-content activator">

View File

@@ -14,8 +14,8 @@
<div class="row">
<div class="col s12 mt-4 center-align">
<video controls class="responsive-video">
<source src="https://archive.ventilaar.net/videos/automatic/{{ render.get('info').get('channel_id') }}/{{ render.get('info').get('id') }}/{{ render.get('info').get('title_slug') }}.mp4">
<source src="https://archive.ventilaar.net/videos/automatic/{{ render.get('info').get('channel_id') }}/{{ render.get('info').get('id') }}/{{ render.get('info').get('title_slug') }}.webm">
<source src="https://archive.ventilaar.net/videos/automatic/{{ render.get('info').get('channel_id') }}/{{ render.get('info').get('id') }}/{{ render.get('info').get('_title_slug') }}.mp4">
<source src="https://archive.ventilaar.net/videos/automatic/{{ render.get('info').get('channel_id') }}/{{ render.get('info').get('id') }}/{{ render.get('info').get('_title_slug') }}.webm">
Your browser does not support the video tag.
</video>
</div>