You've already forked amazing-ytdlp-archive
Compare commits
14 Commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
20e5793cd8 | ||
![]() |
282b895170 | ||
![]() |
38f6f04260 | ||
![]() |
43e6c00787 | ||
![]() |
d42030dcbc | ||
![]() |
5530179558 | ||
![]() |
1186d236f2 | ||
![]() |
5a4726ac10 | ||
![]() |
46bde82d32 | ||
![]() |
6c681d6b07 | ||
![]() |
0d5d233e90 | ||
![]() |
548a4860fc | ||
![]() |
da333ab4f6 | ||
![]() |
f2b01033ea |
@@ -1,8 +1,7 @@
|
||||
FROM python:3-alpine
|
||||
RUN apk update && apk add python3-dev gcc libc-dev libffi-dev && rm -rf /var/cache/apk/*
|
||||
FROM python:3.12-alpine
|
||||
WORKDIR /app
|
||||
COPY requirements.txt /app
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
COPY . /app
|
||||
EXPOSE 8000
|
||||
CMD ["gunicorn", "--bind", "0.0.0.0:8000", "ayta:create_app()"]
|
||||
CMD ["gunicorn", "--bind", "0.0.0.0:8000", "--workers", "1", "ayta:create_app()"]
|
@@ -6,7 +6,7 @@ current cronjob yt-dlp archive service.
|
||||
Partially inspired by [hobune](https://github.com/rebane2001/hobune). While that project is amazingby it's own, it's just not scaleable.
|
||||
|
||||
## The idea
|
||||
Having over 250k videos, scaling the current cronjob yt-dlp archive task is just really hard. Filetypes change, things get partially downloaded and such.
|
||||
Having over 350k videos, scaling the current cronjob yt-dlp archive task is just really hard. Filetypes change, things get partially downloaded and such.
|
||||
Partially yt-dlp is to blame because it's a package that needs to change all the time. But with this some changes are not accounted for.
|
||||
yt-dlp will still do the downloads. But a flask frontend will be developed to make all downloaded videos easily indexable.
|
||||
For it to be quick (unlike hobune) a database has to be implemented. This could get solved by a static site generator type of software, but that is not my choice.
|
||||
@@ -54,7 +54,7 @@ Extra functionality for further development of features.
|
||||
Mainly focused on retiring the cronjob based scripts and moving it to celery based tasks
|
||||
- [ ] manage videos by ID's instead of per channel basis
|
||||
- [ ] download videos from queue
|
||||
- [ ] Manage websub callbacks
|
||||
- [x] Manage websub callbacks
|
||||
|
||||
### Stage 4
|
||||
Mongodb finally has it's limitations.
|
||||
|
@@ -2,7 +2,7 @@ from flask import Blueprint, render_template, request, redirect, url_for, flash,
|
||||
from ..nosql import get_nosql
|
||||
from ..dlp import checkChannelId, getChannelInfo
|
||||
from ..decorators import login_required
|
||||
from ..tasks import test_sleep, websub_subscribe_callback, websub_unsubscribe_callback, video_download
|
||||
from ..tasks import test_sleep, websub_subscribe_callback, websub_unsubscribe_callback, video_download, video_queue, playlist_to_queue
|
||||
from datetime import datetime
|
||||
from secrets import token_urlsafe
|
||||
|
||||
@@ -30,6 +30,9 @@ def channels():
|
||||
generic = {}
|
||||
|
||||
if request.method == 'POST':
|
||||
task = request.form.get('task', None)
|
||||
|
||||
if task == 'subscribe-websub':
|
||||
channelId = request.form.get('channel_id', None)
|
||||
originalName = request.form.get('original_name', None)
|
||||
addedDate = request.form.get('added_date', None)
|
||||
@@ -47,11 +50,15 @@ def channels():
|
||||
|
||||
return redirect(url_for('admin.channel', channelId=channelId))
|
||||
|
||||
elif task == 'playlist-queue':
|
||||
task = playlist_to_queue.delay()
|
||||
flash(f'Task playlist-queue has been queued: {task.id}')
|
||||
|
||||
generic['currentDate'] = datetime.utcnow()
|
||||
channelIds = get_nosql().list_all_channels()
|
||||
|
||||
for channelId in channelIds:
|
||||
channels[channelId] = get_nosql().get_channel_info(channelId)
|
||||
channels[channelId] = get_nosql().get_channel_info(channelId, limited=True)
|
||||
channels[channelId]['video_count'] = get_nosql().get_channel_videos_count(channelId)
|
||||
|
||||
return render_template('admin/channels.html', channels=channels, generic=generic)
|
||||
@@ -190,14 +197,16 @@ def queue():
|
||||
flash(f'Cleaned retired endpoints')
|
||||
|
||||
elif task == 'manual-queue':
|
||||
if not get_nosql().check_exists(value):
|
||||
direct = request.form.get('direct', None)
|
||||
|
||||
if direct:
|
||||
task = video_download.delay(value)
|
||||
flash(f"Started task {task.id}")
|
||||
else:
|
||||
get_nosql().queue_insertQueue(value, 'webui')
|
||||
flash(f'Added to queue: {value}')
|
||||
else:
|
||||
flash(f'This video ID already exists in the archive: {value}')
|
||||
|
||||
elif task == 'delete-queue':
|
||||
get_nosql().queue_deleteQueue(value)
|
||||
@@ -207,6 +216,12 @@ def queue():
|
||||
get_nosql().queue_emptyQueue()
|
||||
flash(f'Queue has been emptied')
|
||||
|
||||
elif task == 'queue-run-once':
|
||||
value = int(value) if value.isdigit() else 1
|
||||
for x in range(value):
|
||||
task = video_queue.delay()
|
||||
flash(f'Task has been started on the oldest queued item: {task.id}')
|
||||
|
||||
return redirect(url_for('admin.queue'))
|
||||
|
||||
endpoints = get_nosql().queue_getEndpoints()
|
||||
@@ -243,7 +258,7 @@ def users():
|
||||
return render_template('admin/users.html', users=users)
|
||||
|
||||
@bp.route('/workers', methods=['GET', 'POST'])
|
||||
#@login_required
|
||||
@login_required
|
||||
def workers():
|
||||
if request.method == 'POST':
|
||||
task = request.form.get('task', None)
|
||||
@@ -253,4 +268,5 @@ def workers():
|
||||
celery = current_app.extensions.get('celery')
|
||||
|
||||
tasks = celery.control.inspect().active()
|
||||
return render_template('admin/workers.html', tasks=tasks)
|
||||
reserved = celery.control.inspect().reserved()
|
||||
return render_template('admin/workers.html', tasks=tasks, reserved=reserved)
|
@@ -11,7 +11,7 @@ def base():
|
||||
channelIds = get_nosql().list_all_channels()
|
||||
|
||||
for channelId in channelIds:
|
||||
channel = get_nosql().get_channel_info(channelId)
|
||||
channel = get_nosql().get_channel_info(channelId, limited=True)
|
||||
channel['video_count'] = get_nosql().get_channel_videos_count(channelId)
|
||||
channels.append(channel)
|
||||
|
||||
|
@@ -168,8 +168,12 @@ class Mango:
|
||||
ids.append(video['id'])
|
||||
return tuple(ids)
|
||||
|
||||
def get_channel_info(self, channelId):
|
||||
return self.channels.find_one({'id': channelId})
|
||||
def get_channel_info(self, channelId, limited=False):
|
||||
projection = {}
|
||||
if limited:
|
||||
projection['playlist'] = 0
|
||||
|
||||
return self.channels.find_one({'id': channelId}, projection)
|
||||
|
||||
|
||||
def update_channel_key(self, channelId, key, value):
|
||||
@@ -411,14 +415,17 @@ class Mango:
|
||||
##########################################
|
||||
|
||||
def queue_insertQueue(self, videoId, endpointId=None):
|
||||
# if no document exists
|
||||
if not self.download_queue.count_documents({'id': videoId}) >= 1:
|
||||
self.download_queue.insert_one({'id': videoId, 'endpoint': endpointId, 'created_time': current_time(object=True), 'status': 'queued'}).inserted_id
|
||||
return True
|
||||
|
||||
# key already in queue
|
||||
# if already queued
|
||||
if self.download_queue.count_documents({'id': videoId}) >= 1:
|
||||
return False
|
||||
|
||||
# if already in archive
|
||||
if self.check_exists(videoId):
|
||||
return False
|
||||
|
||||
# add to queue
|
||||
return self.download_queue.insert_one({'id': videoId, 'endpoint': endpointId, 'created_time': current_time(object=True), 'status': 'queued'}).inserted_id
|
||||
|
||||
def queue_deleteQueue(self, videoId):
|
||||
if self.download_queue.delete_one({'id': videoId}):
|
||||
return True
|
||||
@@ -430,6 +437,21 @@ class Mango:
|
||||
def queue_emptyQueue(self):
|
||||
return self.download_queue.delete_many({})
|
||||
|
||||
def queue_setFailed(self, videoId):
|
||||
return self.download_queue.update_one({'id': videoId}, {'$set': {'status': 'failed'}})
|
||||
|
||||
def queue_getNext(self):
|
||||
""" Returns a LIST of queue parameters. Function first checks if ID exists, if so deletes and then checks the next queued until queue is empty (None) or queued id does not exist yet."""
|
||||
while True:
|
||||
queueItem = self.download_queue.find_one({'status': 'queued'})
|
||||
if not queueItem:
|
||||
return None
|
||||
elif self.check_exists(queueItem['id']):
|
||||
self.queue_deleteQueue(queueItem['id'])
|
||||
continue
|
||||
self.download_queue.update_one({'id': queueItem['id']}, {'$set': {'status': 'working'}})
|
||||
return queueItem
|
||||
|
||||
##########################################
|
||||
# HELPER FUNCTIONS #
|
||||
##########################################
|
||||
@@ -454,6 +476,3 @@ def clean_info_json(originalInfo, format='dict'):
|
||||
else:
|
||||
print('The requested output format is not supported!')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
mango = Mango('mongodb://root:example@192.168.66.140:27017')
|
@@ -4,6 +4,7 @@ class OIDC():
|
||||
Additionally this class provides the function to generate redirect url's and check bearer tokens on their validity as well as caching jwt signing keys.
|
||||
Fairly barebones and should be 100% secure. (famous last words)
|
||||
This is made for form posted JWT's. While not the most secure it is the most easy way to implement. Moving on to a code based solution might be preferred in the future.
|
||||
The nonce and state store is in memory, so only one instance can be used at a time until central key caching is implemented.
|
||||
"""
|
||||
def __init__(self, app=None):
|
||||
self.states = {}
|
||||
@@ -151,7 +152,6 @@ class OIDC():
|
||||
|
||||
# Any exception (invalid JWT, invalid formatting etc...) must return False
|
||||
except Exception as e:
|
||||
print(e, flush=True)
|
||||
return False
|
||||
|
||||
# Double check if given token is really requested by us by matching the nonce in the signed key
|
||||
|
@@ -24,6 +24,26 @@ def video_download(videoId):
|
||||
return False
|
||||
return True
|
||||
|
||||
@shared_task()
|
||||
def video_queue():
|
||||
"""
|
||||
Gets the oldest video ID from the queue and runs video_download() on it.
|
||||
"""
|
||||
from .nosql import get_nosql
|
||||
|
||||
videoId = get_nosql().queue_getNext()
|
||||
|
||||
if videoId:
|
||||
videoId = videoId['id']
|
||||
else:
|
||||
return None
|
||||
|
||||
if video_download(videoId):
|
||||
get_nosql().queue_deleteQueue(videoId)
|
||||
return True
|
||||
else:
|
||||
get_nosql().queue_setFailed(videoId)
|
||||
return False
|
||||
|
||||
@shared_task()
|
||||
def websub_subscribe_callback(channelId):
|
||||
@@ -144,6 +164,19 @@ def websub_renew_expiring(hours=6):
|
||||
if count >= 256:
|
||||
break
|
||||
|
||||
@shared_task()
|
||||
def playlist_to_queue():
|
||||
from .nosql import get_nosql
|
||||
|
||||
channels = get_nosql().list_all_channels(active=True)
|
||||
|
||||
for channel in channels:
|
||||
info = get_nosql().get_channel_info(channel)
|
||||
|
||||
for item in info['playlist']['entries']:
|
||||
videoId = item['id']
|
||||
get_nosql().queue_insertQueue(videoId, 'Playlist mirroring')
|
||||
|
||||
##########################################
|
||||
# TASK MODULES #
|
||||
##########################################
|
||||
|
@@ -15,6 +15,18 @@
|
||||
</div>
|
||||
</div>
|
||||
<div class="row">
|
||||
<div class="col s12 l4 m-4">
|
||||
<div class="card">
|
||||
<div class="card-content">
|
||||
<span class="card-title">Direct actions</span>
|
||||
<form class="mt-4" method="post">
|
||||
<button class="btn mb-2 green" type="submit" name="task" value="playlist-queue">Playlist to Queue</button>
|
||||
<br>
|
||||
<span class="supporting-text">Forcerun playlist to queue task</span>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col s12 l4 m-4">
|
||||
<div class="card">
|
||||
<div class="card-content">
|
||||
@@ -38,7 +50,7 @@
|
||||
});
|
||||
</script>
|
||||
</div>
|
||||
<button class="btn mt-4" type="submit" name="action" value="add_channel">Add</button>
|
||||
<button class="btn mt-4" type="submit" name="task" value="add_channel">Add</button>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
|
@@ -29,7 +29,12 @@
|
||||
<br>
|
||||
<span class="supporting-text">Prunes all deactivated endpoints, but keeps last 3 days</span>
|
||||
</form>
|
||||
|
||||
<form class="mt-4 input-field" method="post" onsubmit="return confirm('Are you sure?');">
|
||||
<input type="number" style="width: 80px" value="1" name="value" min="1" max="99">
|
||||
<button class="btn mb-2 green" type="submit" name="task" value="queue-run-once">Download oldest queued</button>
|
||||
<br>
|
||||
<span class="supporting-text">Will download the oldest queued video ID</span>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@@ -147,7 +152,12 @@
|
||||
<td>
|
||||
<form method="post">
|
||||
<input type="text" value="{{ id.get('id') }}" name="value" hidden>
|
||||
<button class="btn-small waves-effect waves-light" type="submit" name="task" value="delete-queue" title="Delete from queue" {% if id.get('status') != 'queued' %}disabled{% endif %}>🗑️</button>
|
||||
<button class="btn-small waves-effect waves-light" type="submit" name="task" value="delete-queue" title="Delete from queue" {% if id.get('status') == 'working' %}disabled{% endif %}>🗑️</button>
|
||||
</form>
|
||||
<form method="post">
|
||||
<input type="text" value="{{ id.get('id') }}" name="value" hidden>
|
||||
<button class="btn-small waves-effect waves-light" type="submit" name="task" value="run-download" title="Run download task" disabled>⏩</button>
|
||||
<!-- This function fill not work until the download queue and video download process is rewritten -->
|
||||
</form>
|
||||
</td>
|
||||
<td>{{ id.get('id') }}</td>
|
||||
|
@@ -19,8 +19,40 @@
|
||||
</form>
|
||||
<div class="divider"></div>
|
||||
<div class="row">
|
||||
<div class="col s12">
|
||||
<h6>Current workers</h6>
|
||||
<div class="col s12 m-4">
|
||||
<h5>Reserved tasks per worker</h5>
|
||||
<p>Usually 4 tasks per worker</p>
|
||||
{% if reserved is none %}
|
||||
<h6>No workers with reserved tasks, are there any workers with stuck tasks or are they even online?</h6>
|
||||
{% else %}
|
||||
{% for worker in reserved %}
|
||||
<span>{{ worker }}</span>
|
||||
<table class="striped highlight responsive-table" style=" border: 1px solid black;">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>ID</th>
|
||||
<th>Task</th>
|
||||
<th>Arguments</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for task in reserved[worker] %}
|
||||
<tr>
|
||||
<td>{{ task.get('id') }}</td>
|
||||
<td>{{ task.get('name') }}</td>
|
||||
<td>{{ task.get('args') }} {{ task.get('kwargs') }}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
</div>
|
||||
<div class="col s12 m-4">
|
||||
<h5>Current workers and processing tasks</h5>
|
||||
{% if tasks is none %}
|
||||
<h6>No workers with running tasks, are there any workers with stuck tasks or are they even online?</h6>
|
||||
{% else %}
|
||||
{% for worker in tasks %}
|
||||
<span>{{ worker }}</span>
|
||||
<table class="striped highlight responsive-table" style=" border: 1px solid black;">
|
||||
@@ -35,13 +67,14 @@
|
||||
{% for task in tasks[worker] %}
|
||||
<tr>
|
||||
<td>{{ task.get('id') }}</td>
|
||||
<td>{{ task.get('type') }}</td>
|
||||
<td>{{ task.get('name') }}</td>
|
||||
<td>{{ task.get('time_start')|epoch_time }}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
@@ -25,12 +25,24 @@
|
||||
</li>
|
||||
<li class="collection-item">
|
||||
<a href="{{ url_for('channel.channel', channelId='UCIcgBZ9hEJxHv6r_jDYOMqg') }}"><span class="title">Unus Annus</span></a>
|
||||
<p>Reason: This channel does not exist. (Self removed)</p>
|
||||
<p>Reason: This channel does not exist.</p>
|
||||
</li>
|
||||
<li class="collection-item">
|
||||
<a href="{{ url_for('channel.channel', channelId='UCz1s8aJYSQuaXJCtEi-VWRA') }}"><span class="title">Dutch Legion</span></a>
|
||||
<p>Reason: This account has been terminated due to multiple or severe violations of YouTube's policy prohibiting hate speech.</p>
|
||||
</li>
|
||||
<li class="collection-item">
|
||||
<a href="{{ url_for('channel.channel', channelId='UC91-8aNaRbp71UMEb_34ryg') }}"><span class="title">RBMK5000</span></a>
|
||||
<p>Reason: This channel does not exist.</p>
|
||||
</li>
|
||||
<li class="collection-item">
|
||||
<a href="{{ url_for('channel.channel', channelId='UCoPSAT64vfXlulyWd_dPE3Q') }}"><span class="title">Evilfisher2</span></a>
|
||||
<p>Reason: This channel was removed because it violated our Community Guidelines.</p>
|
||||
</li>
|
||||
<li class="collection-item">
|
||||
<a href="{{ url_for('channel.channel', channelId='UCZXkvavD2YKnFCzCkZ-bNPw') }}"><span class="title">mrabhy</span></a>
|
||||
<p>Reason: This channel was removed because it violated our Community Guidelines.</p>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="col s12 l6 center-align">
|
||||
@@ -45,6 +57,18 @@
|
||||
</li>
|
||||
<li class="collection-item">
|
||||
<a href="{{ url_for('channel.channel', channelId='UCtfg1tENiu3SgGMZVduFmTg') }}"><span class="title">FiberNinja</span></a>
|
||||
<p>Reason: This channel was removed because it violated our Community Guidelines.</p>
|
||||
</li>
|
||||
<li class="collection-item">
|
||||
<a href="{{ url_for('channel.channel', channelId='UCv4VkfbX8YfqodF-4coEEfQ') }}"><span class="title">James Somerton</span></a>
|
||||
<p>Reason: This channel does not exist.</p>
|
||||
</li>
|
||||
<li class="collection-item">
|
||||
<a href="{{ url_for('channel.channel', channelId='UC8XH9kpilkuss4bVeRZD1kw') }}"><span class="title">Plagued Moth</span></a>
|
||||
<p>Reason: This channel was removed because it violated our Community Guidelines.</p>
|
||||
</li>
|
||||
<li class="collection-item">
|
||||
<a href="{{ url_for('channel.channel', channelId='UCxZTTWP0QN7-ch2wW1QeFwg') }}"><span class="title">CowOfTheSea</span></a>
|
||||
<p>Reason: This channel was removed because it violated our Community Guidelines.</p>
|
||||
</li>
|
||||
</ul>
|
||||
|
@@ -3,9 +3,12 @@
|
||||
flask
|
||||
flask-caching
|
||||
flask-limiter
|
||||
flask-sqlalchemy
|
||||
flask-migrate
|
||||
pymongo
|
||||
yt-dlp
|
||||
gunicorn
|
||||
celery
|
||||
sqlalchemy
|
||||
requests
|
||||
pyjwt[crypto]
|
Reference in New Issue
Block a user