Compare commits

..

4 Commits

Author SHA1 Message Date
Ventilaar
5a4726ac10 Add queue download function
All checks were successful
Update worker server / build-and-publish (release) Successful in 9s
Generate docker image / build-and-publish (release) Successful in 1m3s
2025-01-18 22:20:17 +01:00
Ventilaar
46bde82d32 Hotfix shared state issue
All checks were successful
Update worker server / build-and-publish (release) Successful in 12s
Generate docker image / build-and-publish (release) Successful in 19s
2024-12-07 14:58:52 +01:00
Ventilaar
6c681d6b07 Uhhh
All checks were successful
Update worker server / build-and-publish (release) Successful in 9s
Generate docker image / build-and-publish (release) Successful in 49s
2024-12-05 22:20:55 +01:00
Ventilaar
0d5d233e90 Cleanup and documentation
All checks were successful
Generate docker image / build-and-publish (release) Successful in 19s
Update worker server / build-and-publish (release) Successful in 20s
2024-12-05 22:15:42 +01:00
8 changed files with 72 additions and 18 deletions

View File

@@ -4,4 +4,4 @@ COPY requirements.txt /app
RUN pip install --no-cache-dir -r requirements.txt
COPY . /app
EXPOSE 8000
CMD ["gunicorn", "--bind", "0.0.0.0:8000", "ayta:create_app()"]
CMD ["gunicorn", "--bind", "0.0.0.0:8000", "--workers", "1", "ayta:create_app()"]

View File

@@ -6,7 +6,7 @@ current cronjob yt-dlp archive service.
Partially inspired by [hobune](https://github.com/rebane2001/hobune). While that project is amazingby it's own, it's just not scaleable.
## The idea
Having over 250k videos, scaling the current cronjob yt-dlp archive task is just really hard. Filetypes change, things get partially downloaded and such.
Having over 350k videos, scaling the current cronjob yt-dlp archive task is just really hard. Filetypes change, things get partially downloaded and such.
Partially yt-dlp is to blame because it's a package that needs to change all the time. But with this some changes are not accounted for.
yt-dlp will still do the downloads. But a flask frontend will be developed to make all downloaded videos easily indexable.
For it to be quick (unlike hobune) a database has to be implemented. This could get solved by a static site generator type of software, but that is not my choice.
@@ -54,7 +54,7 @@ Extra functionality for further development of features.
Mainly focused on retiring the cronjob based scripts and moving it to celery based tasks
- [ ] manage videos by ID's instead of per channel basis
- [ ] download videos from queue
- [ ] Manage websub callbacks
- [x] Manage websub callbacks
### Stage 4
Mongodb finally has it's limitations.

View File

@@ -2,7 +2,7 @@ from flask import Blueprint, render_template, request, redirect, url_for, flash,
from ..nosql import get_nosql
from ..dlp import checkChannelId, getChannelInfo
from ..decorators import login_required
from ..tasks import test_sleep, websub_subscribe_callback, websub_unsubscribe_callback, video_download
from ..tasks import test_sleep, websub_subscribe_callback, websub_unsubscribe_callback, video_download, video_queue
from datetime import datetime
from secrets import token_urlsafe
@@ -190,14 +190,16 @@ def queue():
flash(f'Cleaned retired endpoints')
elif task == 'manual-queue':
direct = request.form.get('direct', None)
if direct:
task = video_download.delay(value)
flash(f"Started task {task.id}")
if not get_nosql().check_exists(value):
direct = request.form.get('direct', None)
if direct:
task = video_download.delay(value)
flash(f"Started task {task.id}")
else:
get_nosql().queue_insertQueue(value, 'webui')
flash(f'Added to queue: {value}')
else:
get_nosql().queue_insertQueue(value, 'webui')
flash(f'Added to queue: {value}')
flash(f'This video ID already exists in the archive: {value}')
elif task == 'delete-queue':
get_nosql().queue_deleteQueue(value)
@@ -205,7 +207,7 @@ def queue():
elif task == 'empty-queue':
get_nosql().queue_emptyQueue()
flash(f'Queue has been emptied')
flash(f'Queue has been emptied')
return redirect(url_for('admin.queue'))
@@ -249,6 +251,8 @@ def workers():
task = request.form.get('task', None)
if task == 'test-sleep':
test_sleep.delay()
elif task == 'queue-single':
video_queue.delay()
celery = current_app.extensions.get('celery')

View File

@@ -429,7 +429,15 @@ class Mango:
def queue_emptyQueue(self):
return self.download_queue.delete_many({})
def queue_getNext(self, newest=False):
sort = []
if newest:
sort = [( 'created_time', pymongo.DESCENDING )]
return self.download_queue.find_one({}, sort=sort)
##########################################
# HELPER FUNCTIONS #
##########################################
@@ -453,7 +461,4 @@ def clean_info_json(originalInfo, format='dict'):
return json.dumps(originalInfo)
else:
print('The requested output format is not supported!')
if __name__ == '__main__':
mango = Mango('mongodb://root:example@192.168.66.140:27017')

View File

@@ -24,6 +24,25 @@ def video_download(videoId):
return False
return True
@shared_task()
def video_queue():
"""
Gets the oldest video ID from the queue and runs video_download() on it.
"""
from .nosql import get_nosql
videoId = get_nosql().queue_getNext()
if videoId:
videoId = videoId['id']
else:
return None
if video_download(videoId):
get_nosql().queue_deleteQueue(videoId)
return True
else:
return False
@shared_task()
def websub_subscribe_callback(channelId):

View File

@@ -16,6 +16,7 @@
</div>
<form method="POST">
<input title="test-sleep" type="submit" value="test-sleep" name="task">
<input title="test-sleep" type="submit" value="queue-single" name="task">
</form>
<div class="divider"></div>
<div class="row">

View File

@@ -25,12 +25,24 @@
</li>
<li class="collection-item">
<a href="{{ url_for('channel.channel', channelId='UCIcgBZ9hEJxHv6r_jDYOMqg') }}"><span class="title">Unus Annus</span></a>
<p>Reason: This channel does not exist. (Self removed)</p>
<p>Reason: This channel does not exist.</p>
</li>
<li class="collection-item">
<a href="{{ url_for('channel.channel', channelId='UCz1s8aJYSQuaXJCtEi-VWRA') }}"><span class="title">Dutch Legion</span></a>
<p>Reason: This account has been terminated due to multiple or severe violations of YouTube's policy prohibiting hate speech.</p>
</li>
<li class="collection-item">
<a href="{{ url_for('channel.channel', channelId='UC91-8aNaRbp71UMEb_34ryg') }}"><span class="title">RBMK5000</span></a>
<p>Reason: This channel does not exist.</p>
</li>
<li class="collection-item">
<a href="{{ url_for('channel.channel', channelId='UCoPSAT64vfXlulyWd_dPE3Q') }}"><span class="title">Evilfisher2</span></a>
<p>Reason: This channel was removed because it violated our Community Guidelines.</p>
</li>
<li class="collection-item">
<a href="{{ url_for('channel.channel', channelId='UCZXkvavD2YKnFCzCkZ-bNPw') }}"><span class="title">mrabhy</span></a>
<p>Reason: This channel was removed because it violated our Community Guidelines.</p>
</li>
</ul>
</div>
<div class="col s12 l6 center-align">
@@ -45,6 +57,18 @@
</li>
<li class="collection-item">
<a href="{{ url_for('channel.channel', channelId='UCtfg1tENiu3SgGMZVduFmTg') }}"><span class="title">FiberNinja</span></a>
<p>Reason: This channel was removed because it violated our Community Guidelines.</p>
</li>
<li class="collection-item">
<a href="{{ url_for('channel.channel', channelId='UCv4VkfbX8YfqodF-4coEEfQ') }}"><span class="title">James Somerton</span></a>
<p>Reason: This channel does not exist.</p>
</li>
<li class="collection-item">
<a href="{{ url_for('channel.channel', channelId='UC8XH9kpilkuss4bVeRZD1kw') }}"><span class="title">Plagued Moth</span></a>
<p>Reason: This channel was removed because it violated our Community Guidelines.</p>
</li>
<li class="collection-item">
<a href="{{ url_for('channel.channel', channelId='UCxZTTWP0QN7-ch2wW1QeFwg') }}"><span class="title">CowOfTheSea</span></a>
<p>Reason: This channel was removed because it violated our Community Guidelines.</p>
</li>
</ul>

View File

@@ -8,4 +8,5 @@ yt-dlp
gunicorn
celery
sqlalchemy
requests
pyjwt[crypto]