Compare commits

..

9 Commits

Author SHA1 Message Date
Ventilaar
8bf8e08af3 Forgot admin imports 2024-04-18 00:59:46 +02:00
Ventilaar
236b56915b Handle WebSub endpoint renewing. Basic code for XML parsing (not implemented yet) 2024-04-18 00:56:22 +02:00
Ventilaar
ac0243a783 Quick key rename title_slug 2024-04-17 12:24:14 +02:00
Ventilaar
bb78c97d52 Do not store websub posted raw data as str 2024-04-10 11:25:05 +02:00
Ventilaar
7ccb827a9c hotfix the hotfix of the hotfix 2024-04-09 13:01:23 +02:00
Ventilaar
9c0e4fb63c Hotfix the websub hotfix. Add button to easily monitor websub callbacks. Clean stuck websub requests after 3 days 2024-04-09 12:56:57 +02:00
Ventilaar
75d42ad3cd Websub callback domain hotfix 2024-04-09 12:16:47 +02:00
Ventilaar
4fa0ee2c68 Hotfix channel sorting 2024-04-09 12:11:14 +02:00
Ventilaar
7e06c8673b Update PyJWT requirement 2024-04-06 23:27:18 +02:00
14 changed files with 160 additions and 39 deletions

View File

@@ -48,7 +48,7 @@ Extra functionality for further development of features.
- [x] Video reporting functionality - [x] Video reporting functionality
- [x] Ability (for external applications) to queue up video ids for download - [x] Ability (for external applications) to queue up video ids for download
- [x] Add websub requesting and receiving ability. (not fully usable yet without celery tasks) - [x] Add websub requesting and receiving ability. (not fully usable yet without celery tasks)
- [] OIDC or Webauthn logins instead of static argon2 passwords - [x] OIDC or Webauthn logins instead of static argon2 passwords
### Stage 3 ### Stage 3
Mainly focused on retiring the cronjob based scripts and moving it to celery based tasks Mainly focused on retiring the cronjob based scripts and moving it to celery based tasks

View File

@@ -9,15 +9,24 @@ def create_app(test_config=None):
config = {'MONGO_CONNECTION': os.environ.get('AYTA_MONGOCONNECTION', 'mongodb://root:example@192.168.66.140:27017'), config = {'MONGO_CONNECTION': os.environ.get('AYTA_MONGOCONNECTION', 'mongodb://root:example@192.168.66.140:27017'),
'OIDC_PROVIDER': os.environ.get('AYTA_OIDC_PROVIDER', 'https://auth.ventilaar.nl'), 'OIDC_PROVIDER': os.environ.get('AYTA_OIDC_PROVIDER', 'https://auth.ventilaar.nl'),
'OIDC_ID': os.environ.get('AYTA_OIDC_ID', 'ayta'), 'OIDC_ID': os.environ.get('AYTA_OIDC_ID', 'ayta'),
'CACHE_TYPE': os.environ.get('AYTA_CACHETYPE', 'SimpleCache'),
'CACHE_DEFAULT_TIMEOUT': int(os.environ.get('AYTA_CACHETIMEOUT', 6)), 'CACHE_DEFAULT_TIMEOUT': int(os.environ.get('AYTA_CACHETIMEOUT', 6)),
'SECRET_KEY': os.environ.get('AYTA_SECRETKEY', secrets.token_hex(32)),
'DEBUG': bool(os.environ.get('AYTA_DEBUG', False)), 'DEBUG': bool(os.environ.get('AYTA_DEBUG', False)),
'DOMAIN': os.environ.get('AYTA_DOMAIN', 'https://testing.mashallah.nl'), 'DOMAIN': os.environ.get('AYTA_DOMAIN', 'https://testing.mashallah.nl'),
'CELERY': dict(broker_url=str(os.environ.get('AYTA_CELERYBROKER', 'amqp://guest:guest@192.168.66.140:5672/')), 'CELERY': {'broker_url': str(os.environ.get('AYTA_CELERYBROKER', 'amqp://guest:guest@192.168.66.140:5672/'))}
task_ignore_result=True,)
} }
# Static Flask configuration options
config['CELERY']['task_ignore_result'] = True
config['CACHE_TYPE'] = 'SimpleCache'
config['SECRET_KEY'] = secrets.token_bytes(32)
# Celery Periodic tasks
config['CELERY']['beat_schedule'] = {}
config['CELERY']['beat_schedule']['Renew WebSub endpoints'] = {'task': 'ayta.tasks.websub_renew_expiring', 'schedule': 4000}
#config['CELERY']['beat_schedule']['Process WebSub data'] = {'task': 'ayta.tasks.websub_process_data', 'schedule': 6}
app = Flask(__name__) app = Flask(__name__)
app.config.from_mapping(config) app.config.from_mapping(config)

View File

@@ -2,7 +2,7 @@ from flask import Blueprint, render_template, request, redirect, url_for, flash
from ..nosql import get_nosql from ..nosql import get_nosql
from ..dlp import checkChannelId, getChannelInfo from ..dlp import checkChannelId, getChannelInfo
from ..decorators import login_required from ..decorators import login_required
from ..tasks import subscribe_websub_callback, unsubscribe_websub_callback from ..tasks import websub_subscribe_callback, websub_unsubscribe_callback
from datetime import datetime from datetime import datetime
from secrets import token_urlsafe from secrets import token_urlsafe
@@ -71,7 +71,7 @@ def channel(channelId):
value = request.form.get('value', None) value = request.form.get('value', None)
if task == 'subscribe-websub': if task == 'subscribe-websub':
task = subscribe_websub_callback.delay(channelId) task = websub_subscribe_callback.delay(channelId)
flash(f"Started task {task.id}") flash(f"Started task {task.id}")
return redirect(url_for('admin.channel', channelId=channelId)) return redirect(url_for('admin.channel', channelId=channelId))
@@ -114,9 +114,7 @@ def websub():
value = request.form.get('value', None) value = request.form.get('value', None)
if task == 'unsubscribe': if task == 'unsubscribe':
channelId = get_nosql().websub_getCallback(value).get('channel') task = websub_unsubscribe_callback.delay(value)
task = unsubscribe_websub_callback.delay(value, channelId)
flash(f"Started task {task.id}") flash(f"Started task {task.id}")
return redirect(url_for('admin.websub')) return redirect(url_for('admin.websub'))

View File

@@ -33,7 +33,7 @@ def websub(cap):
return challenge return challenge
if get_nosql().websub_existsCallback(cap): if get_nosql().websub_existsCallback(cap):
if not get_nosql().websub_savePost(cap, str(request.data)): if not get_nosql().websub_savePost(cap, request.data):
return abort(500) return abort(500)
return '', 202 return '', 202

View File

@@ -34,7 +34,7 @@ def channel(channelId):
for videoId in videoIds: for videoId in videoIds:
videos.append(get_nosql().get_video_info(videoId, limited=True)) videos.append(get_nosql().get_video_info(videoId, limited=True))
videos = sorted(videos, key=lambda x: x.get('upload_date'), reverse=True) videos = sorted(videos, key=lambda x: x.get('upload_date', '19700101'), reverse=True)
return render_template('channel/channel.html', channel=channelInfo, videos=videos) return render_template('channel/channel.html', channel=channelInfo, videos=videos)

View File

@@ -36,4 +36,9 @@ def base():
render['info'] = get_nosql().get_video_info(vGet) render['info'] = get_nosql().get_video_info(vGet)
render['params'] = request.args.get('v') render['params'] = request.args.get('v')
if render['info']['_status'] != 'available':
flash(render['info'].get('_status_description', 'Video unavailable because of technical errors. Come back later.'))
return redirect(url_for('index.base'))
return render_template('watch/index.html', render=render) return render_template('watch/index.html', render=render)

View File

@@ -205,7 +205,7 @@ class Mango:
def get_recent_videos(self, count=99): def get_recent_videos(self, count=99):
""" Returns a SET of YouTube video ID's which have been added last to the info_json collection """ """ Returns a SET of YouTube video ID's which have been added last to the info_json collection """
result = self.info_json.find({}, {'_id': 0, 'id': 1}, sort=[('_id', pymongo.DESCENDING)]).limit(count) result = self.info_json.find({'_status': 'available'}, {'_id': 0, 'id': 1}, sort=[('_id', pymongo.DESCENDING)]).limit(count)
ids = [] ids = []
@@ -216,7 +216,7 @@ class Mango:
def get_video_info(self, videoId, limited=False): def get_video_info(self, videoId, limited=False):
if limited: if limited:
projection = {'_id': 1, 'id': 1, 'title': 1, 'upload_date': 1, 'description': 1, 'channel_id': 1, 'uploader': 1, 'epoch': 1, 'title_slug': 1} projection = {'_id': 1, 'id': 1, 'title': 1, 'upload_date': 1, 'description': 1, 'channel_id': 1, 'uploader': 1, 'epoch': 1, '_title_slug': 1}
else: else:
projection = {} projection = {}
@@ -294,21 +294,24 @@ class Mango:
status = status.get('status') status = status.get('status')
if status in ['requesting']: if status in ['requesting']:
self.websub_callbacks.update_one({'id': callbackId}, {'$set': {'status': 'active', 'activation_time': current_time(object=True), 'lease': lease}}) self.websub_callbacks.update_one({'id': callbackId}, {'$set': {'status': 'active', 'activation_time': current_time(object=True), 'lease': int(lease)}})
return True return True
return False return False
def websub_existsCallback(self, callbackId): def websub_existsCallback(self, callbackId, channel=False):
status = self.websub_callbacks.find_one({'id': callbackId}, {'status': 1}) if channel:
query = {'channel': callbackId}
else:
query = {'id': callbackId}
status = self.websub_callbacks.find_one(query, {'id': 1, 'status': 1})
if not status: if not status:
return False return False
status = status.get('status') if status.get('status') in ['requesting', 'active', 'retiring']:
return status.get('id')
if status in ['requesting', 'active', 'retiring']:
return True
return False return False
@@ -347,10 +350,24 @@ class Mango:
def websub_savePost(self, callbackId, data): def websub_savePost(self, callbackId, data):
return self.websub_data.insert_one({'callback_id': callbackId, 'state': 'unprocessed', 'received_time': current_time(object=True), 'raw_data': data}).inserted_id return self.websub_data.insert_one({'callback_id': callbackId, 'state': 'unprocessed', 'received_time': current_time(object=True), 'raw_data': data}).inserted_id
def websub_getFirstPostData(self):
data = self.websub_data.find_one({'state': 'unprocessed'}, {'_id': 1, 'raw_data': 1}, sort=[('received_time', 1)])
if not data:
return None
self.websub_data.update_one({'_id': data['_id']}, {'$set': {'state': 'processing'}})
return (data.get('_id'), data.get('raw_data'))
def websub_deletePostProcessing(self, _id):
self.websub_data.delete_one({'_id': _id})
def websub_cleanRetired(self, days=3): def websub_cleanRetired(self, days=3):
days = self.datetime.utcnow() - self.timedelta(days=days) days = self.datetime.utcnow() - self.timedelta(days=days)
self.websub_callbacks.delete_many({'status': 'retired', 'retired_time': {'$lt': days}}) self.websub_callbacks.delete_many({'status': 'retired', 'retired_time': {'$lt': days}})
self.websub_callbacks.delete_many({'status': 'requesting', 'requesting_time': {'$lt': days}})
return True return True

View File

@@ -1,22 +1,32 @@
from celery import shared_task from celery import shared_task
from flask import current_app from flask import current_app
##########################################
# CELERY TASKS #
##########################################
@shared_task() @shared_task()
def subscribe_websub_callback(channelId): def websub_subscribe_callback(channelId):
import requests import requests
from .nosql import get_nosql from .nosql import get_nosql
# check if a callback already exists for channel
answer = get_nosql().websub_existsCallback(channelId, channel=True)
if not answer:
callbackId = get_nosql().websub_newCallback(channelId) callbackId = get_nosql().websub_newCallback(channelId)
else:
callbackId = answer
url = 'https://pubsubhubbub.appspot.com/subscribe' url = 'https://pubsubhubbub.appspot.com/subscribe'
data = { data = {
'hub.callback': f'https://{current_app.config["DOMAIN"]}/api/websub//{callbackId}', 'hub.callback': f'{current_app.config["DOMAIN"]}/api/websub/{callbackId}',
'hub.topic': f'https://www.youtube.com/xml/feeds/videos.xml?channel_id={channelId}', 'hub.topic': f'https://www.youtube.com/xml/feeds/videos.xml?channel_id={channelId}',
'hub.verify': 'async', 'hub.verify': 'async',
'hub.mode': 'subscribe', 'hub.mode': 'subscribe',
'hub.verify_token': '', 'hub.verify_token': '',
'hub.secret': '', 'hub.secret': '',
'hub.lease_numbers': '86400', 'hub.lease_numbers': '432000',
} }
get_nosql().websub_requestingCallback(callbackId) get_nosql().websub_requestingCallback(callbackId)
@@ -27,12 +37,19 @@ def subscribe_websub_callback(channelId):
return False return False
@shared_task() @shared_task()
def unsubscribe_websub_callback(callbackId, channelId): def websub_unsubscribe_callback(callbackId):
import requests import requests
from .nosql import get_nosql from .nosql import get_nosql
answer = get_nosql().websub_existsCallback(callbackId)
if not answer:
return False
channelId = get_nosql().websub_getCallback(callbackId).get('channel')
url = 'https://pubsubhubbub.appspot.com/subscribe' url = 'https://pubsubhubbub.appspot.com/subscribe'
data = {'hub.callback': f'https://{current_app.config["DOMAIN"]}/api/websub/{callbackId}', data = {'hub.callback': f'{current_app.config["DOMAIN"]}/api/websub/{callbackId}',
'hub.topic': f'https://www.youtube.com/xml/feeds/videos.xml?channel_id={channelId}', 'hub.topic': f'https://www.youtube.com/xml/feeds/videos.xml?channel_id={channelId}',
'hub.verify': 'async', 'hub.verify': 'async',
'hub.mode': 'unsubscribe' 'hub.mode': 'unsubscribe'
@@ -45,3 +62,78 @@ def unsubscribe_websub_callback(callbackId, channelId):
return True return True
return False return False
@shared_task()
def websub_process_data():
from .nosql import get_nosql
while True:
data = get_nosql().websub_getFirstPostData()
if not data:
break
_id, data = data
parsed = do_parse_data(data)
if not parsed:
get_nosql().websub_deletePostProcessing(_id)
state, channelId, videoId = parsed
get_nosql().websub_deletePostProcessing(_id)
@shared_task()
def websub_renew_expiring(hours=6):
from .nosql import get_nosql
from datetime import datetime, timedelta
for callbackId in get_nosql().websub_getCallbacks():
data = get_nosql().websub_getCallback(callbackId)
pivot = datetime.utcnow() - timedelta(hours=hours)
expires = data.get('activation_time') + timedelta(seconds=data.get('lease'))
if pivot <= expires: # if expiration happens after the calculation time pass the loop
continue
print(f'{callbackId} should be renewed')
websub_subscribe_callback.delay(data.get('channel'))
##########################################
# TASK MODULES #
##########################################
def do_parse_data(data):
import xml.etree.ElementTree as ET
data = data.decode('utf-8')
try:
root = ET.fromstring(data)
except ET.ParseError:
print('Not XML')
return False
yt = any(child.tag.startswith('{http://www.youtube.com/xml/schemas/2015}') for child in root.iter())
at = any(child.tag.startswith('{http://purl.org/atompub/tombstones/1.0}') for child in root.iter())
if yt and not at:
# Video published
state = 'added'
ns = {'yt': 'http://www.youtube.com/xml/schemas/2015', '': 'http://www.w3.org/2005/Atom'}
entry = root.find('.//{http://www.w3.org/2005/Atom}entry')
videoId = entry.find('./yt:videoId', ns).text
channelId = entry.find('./yt:channelId', ns).text
elif not yt and at:
# Video hidden
state = 'removed'
ns = {'at': 'http://purl.org/atompub/tombstones/1.0', '': 'http://www.w3.org/2005/Atom'}
deleted_entry = root.find('.//{http://purl.org/atompub/tombstones/1.0}deleted-entry')
videoId = deleted_entry.attrib['ref'].split(':')[-1]
channelId = deleted_entry.find('./at:by/uri', ns).text.split('/')[-1]
else:
print('Unknown xml')
return False
return (state, channelId, videoId)

View File

@@ -50,6 +50,7 @@
{% for callback in callbacks %} {% for callback in callbacks %}
<tr class="filterable"> <tr class="filterable">
<td> <td>
<a target="_blank" rel="noopener noreferrer" href="https://pubsubhubbub.appspot.com/subscription-details?hub.callback={{ config['DOMAIN'] }}/api/websub/{{ callbacks[callback].get('id') }}&hub.topic=https://www.youtube.com/xml/feeds/videos.xml?channel_id={{ callbacks[callback].get('channel') }}"><button class="btn-small waves-effect waves-light" title="Information on Pubsubhubbub (external link)"></button></a>
<form method="post"> <form method="post">
<input type="text" value="{{ callbacks[callback].get('id') }}" name="value" hidden> <input type="text" value="{{ callbacks[callback].get('id') }}" name="value" hidden>
<button class="btn-small waves-effect waves-light" type="submit" name="task" value="unsubscribe" title="Send unsubscribe request to hub" {% if callbacks[callback].get('status') != 'active' %}disabled{% endif %}>🗑️</button> <button class="btn-small waves-effect waves-light" type="submit" name="task" value="unsubscribe" title="Send unsubscribe request to hub" {% if callbacks[callback].get('status') != 'active' %}disabled{% endif %}>🗑️</button>

View File

@@ -25,7 +25,7 @@
<div class="card medium black-text"> <div class="card medium black-text">
<a href="{{ url_for('watch.base') }}?v={{ video.get('id') }}"> <a href="{{ url_for('watch.base') }}?v={{ video.get('id') }}">
<div class="card-image"> <div class="card-image">
<img loading="lazy" src="https://archive.ventilaar.net/videos/automatic/{{ video.get('channel_id') }}/{{ video.get('id') }}/{{ video.get('title_slug') }}.jpg"> <img loading="lazy" src="https://archive.ventilaar.net/videos/automatic/{{ video.get('channel_id') }}/{{ video.get('id') }}/{{ video.get('_title_slug') }}.jpg">
</div> </div>
</a> </a>
<div class="card-content activator"> <div class="card-content activator">

View File

@@ -25,7 +25,7 @@
<div class="card medium black-text"> <div class="card medium black-text">
<a href="{{ url_for('watch.base') }}?v={{ video.get('id') }}"> <a href="{{ url_for('watch.base') }}?v={{ video.get('id') }}">
<div class="card-image"> <div class="card-image">
<img loading="lazy" src="https://archive.ventilaar.net/videos/automatic/{{ video.get('channel_id') }}/{{ video.get('id') }}/{{ video.get('title_slug') }}.jpg"> <img loading="lazy" src="https://archive.ventilaar.net/videos/automatic/{{ video.get('channel_id') }}/{{ video.get('id') }}/{{ video.get('_title_slug') }}.jpg">
</div> </div>
</a> </a>
<div class="card-content activator"> <div class="card-content activator">

View File

@@ -25,7 +25,7 @@
<div class="card medium black-text"> <div class="card medium black-text">
<a href="{{ url_for('watch.base') }}?v={{ video.get('id') }}"> <a href="{{ url_for('watch.base') }}?v={{ video.get('id') }}">
<div class="card-image"> <div class="card-image">
<img loading="lazy" src="https://archive.ventilaar.net/videos/automatic/{{ video.get('channel_id') }}/{{ video.get('id') }}/{{ video.get('title_slug') }}.jpg"> <img loading="lazy" src="https://archive.ventilaar.net/videos/automatic/{{ video.get('channel_id') }}/{{ video.get('id') }}/{{ video.get('_title_slug') }}.jpg">
</div> </div>
</a> </a>
<div class="card-content activator"> <div class="card-content activator">

View File

@@ -14,8 +14,8 @@
<div class="row"> <div class="row">
<div class="col s12 mt-4 center-align"> <div class="col s12 mt-4 center-align">
<video controls class="responsive-video"> <video controls class="responsive-video">
<source src="https://archive.ventilaar.net/videos/automatic/{{ render.get('info').get('channel_id') }}/{{ render.get('info').get('id') }}/{{ render.get('info').get('title_slug') }}.mp4"> <source src="https://archive.ventilaar.net/videos/automatic/{{ render.get('info').get('channel_id') }}/{{ render.get('info').get('id') }}/{{ render.get('info').get('_title_slug') }}.mp4">
<source src="https://archive.ventilaar.net/videos/automatic/{{ render.get('info').get('channel_id') }}/{{ render.get('info').get('id') }}/{{ render.get('info').get('title_slug') }}.webm"> <source src="https://archive.ventilaar.net/videos/automatic/{{ render.get('info').get('channel_id') }}/{{ render.get('info').get('id') }}/{{ render.get('info').get('_title_slug') }}.webm">
Your browser does not support the video tag. Your browser does not support the video tag.
</video> </video>
</div> </div>

View File

@@ -5,8 +5,7 @@ flask-caching
flask-limiter flask-limiter
pymongo pymongo
yt-dlp yt-dlp
argon2-cffi
gunicorn gunicorn
celery celery
sqlalchemy sqlalchemy
pyjwt pyjwt[crypto]