many things changed

This commit is contained in:
Ventilaar 2024-03-20 22:44:02 +01:00
parent e264a346a5
commit 69bf7026dd
No known key found for this signature in database
43 changed files with 151 additions and 1079 deletions

View File

@ -1,11 +1,8 @@
name: Generate release
on:
push:
tags:
- 'v*'
branches:
- master
release:
types: [published]
jobs:
build-and-publish:
@ -25,4 +22,13 @@ jobs:
uses: docker/build-push-action@v5
with:
push: true
tags: git.ventilaar.nl/ventilaar/ayta:latest
tags: git.ventilaar.nl/ventilaar/ayta:latest
- name: Update worker server
uses: appleboy/ssh-action@v1.0.3
with:
host: 192.168.66.109
username: root
key: ${{ secrets.SERVER_KEY }}
port: 22
script: /root/update_worker.sh

View File

@ -1,7 +1,7 @@
def create_app(test_config=None):
import os, secrets
from flask import Flask
from ayta.extensions import limiter, caching
from ayta.extensions import limiter, caching, celery_init_app
from werkzeug.middleware.proxy_fix import ProxyFix
from . import filters
@ -14,7 +14,9 @@ def create_app(test_config=None):
'CACHE_DEFAULT_TIMEOUT': int(os.environ.get('AYTA_CACHETIMEOUT', 6)),
'SECRET_KEY': os.environ.get('AYTA_SECRETKEY', secrets.token_hex(32)),
'DEBUG': bool(os.environ.get('AYTA_DEBUG', False)),
'MATRIX_TOKEN': bool(os.environ.get('AYTA_MATRIXTOKEN', None))
'DOMAIN': os.environ.get('AYTA_DOMAIN', 'testing.mashallah.nl'),
'CELERY': dict(broker_url=str(os.environ.get('AYTA_CELERYBROKER', 'amqp://guest:guest@192.168.66.140:5672/')),
task_ignore_result=True,)
}
app = Flask(__name__)
@ -22,6 +24,7 @@ def create_app(test_config=None):
limiter.init_app(app)
caching.init_app(app)
celery_init_app(app)
app.wsgi_app = ProxyFix(app.wsgi_app, x_for=1)

View File

@ -3,6 +3,7 @@ from ..nosql import get_nosql
from ..s3 import get_s3
from ..dlp import checkChannelId, getChannelInfo
from ..decorators import login_required
from ..tasks import subscribe_websub_callback, unsubscribe_websub_callback
from datetime import datetime
import requests
@ -17,7 +18,9 @@ def base():
@login_required
def system():
if request.method == 'POST':
pass
task = request.form.get('task', None)
if task == 'update-value':
pass
return render_template('admin/system.html')
@ -40,7 +43,8 @@ def channels():
channelId, originalName = getChannelInfo(channelId, ('channel_id', 'uploader'))
if not get_nosql().insert_new_channel(channelId, originalName, addedDate):
return 'Error inserting new channel, you probably made a mistake somewhere'
flash('Error inserting new channel, you probably made a mistake somewhere')
return redirect(url_for('admin.channels'))
return redirect(url_for('admin.channel', channelId=channelId))
@ -56,11 +60,22 @@ def channels():
@bp.route('/channel/<channelId>', methods=['GET', 'POST'])
@login_required
def channel(channelId):
channelInfo = get_nosql().get_channel_info(channelId)
if not channelInfo:
flash('That channel ID does not exist in the system')
return redirect(url_for('admin.channels'))
if request.method == 'POST':
task = request.form.get('task', None)
key = request.form.get('key', None)
value = request.form.get('value', None)
if task == 'subscribe-websub':
task = subscribe_websub_callback.delay(channelId)
flash(f"Started task {task.id}")
return redirect(url_for('admin.channel', channelId=channelId))
if task == 'update-value':
if key == 'active':
value = True if value else False
@ -69,11 +84,7 @@ def channel(channelId):
value = datetime.strptime(value, '%Y-%m-%d')
get_nosql().update_channel_key(channelId, key, value)
channelInfo = get_nosql().get_channel_info(channelId)
if not channelInfo:
return 'That channel ID does not exist in the system'
return redirect(url_for('admin.channel', channelId=channelId))
return render_template('admin/channel.html', channelInfo=channelInfo)
@ -84,9 +95,8 @@ def runs():
task = request.form.get('task', None)
if task == 'clean_runs':
get_nosql().clean_runs()
else:
pass
return redirect(url_for('admin.runs'))
runs = reversed(list(get_nosql().get_runs()))
return render_template('admin/runs.html', runs=runs)
@ -106,15 +116,15 @@ def websub():
if task == 'unsubscribe':
channelId = get_nosql().websub_getCallback(value).get('channel')
data = {'hub.callback': f'https://testing.ventilaar.net/websub/c/{value}',
'hub.topic': f'https://www.youtube.com/xml/feeds/videos.xml?channel_id={channelId}',
'hub.verify': 'async',
'hub.mode': 'unsubscribe'
}
requests.post('https://pubsubhubbub.appspot.com/subscribe', data=data)
task = unsubscribe_websub_callback.delay(value, channelId)
flash(f"Started task {task.id}")
return redirect(url_for('admin.websub'))
elif task == 'clean-retired':
get_nosql().websub_cleanRetired()
return redirect(url_for('admin.websub'))
callbackIds = get_nosql().websub_getCallbacks()
callbacks = {}
@ -134,6 +144,7 @@ def reports():
if task == 'close':
get_nosql().close_report(value)
flash(f'Report closed {value}')
return redirect(url_for('admin.reports'))
reports = get_nosql().list_reports()

View File

@ -29,21 +29,25 @@ def login():
if current_app.config.get('DEBUG'):
session['username'] = 'admin'
flash('You have been logged in')
return redirect(url_for('admin.base'))
return redirect(request.args.get('next', url_for('admin.base')))
if not password:
flash('Password was empty')
return 'password required!'
return redirect(url_for('auth.login'))
try:
ph = PasswordHasher()
if ph.verify(corr, password):
session['username'] = 'admin'
flash('You have been logged in')
return redirect(url_for('admin.base'))
return redirect(request.args.get('next', url_for('admin.base')))
except VerifyMismatchError:
flash('Wrong password')
return redirect(url_for('auth.login'))
except:
flash('Something went wrong')
return redirect(url_for('auth.login'))
return render_template('login.html')

View File

@ -1,4 +1,4 @@
from flask import Blueprint, render_template
from flask import Blueprint, render_template, flash, url_for, redirect
from ..nosql import get_nosql
from ..s3 import get_s3
from ..extensions import caching, caching_unless
@ -23,7 +23,8 @@ def channel(channelId):
channelInfo = get_nosql().get_channel_info(channelId)
if not channelInfo:
return 'That channel ID does not exist in the system'
flash('That channel ID does not exist in the system')
return redirect(url_for('channel.base'))
videoIds = get_nosql().get_channel_videoIds(channelId)

View File

@ -12,3 +12,8 @@ def base():
@caching.cached(unless=caching_unless)
def help():
return render_template('help.html')
@bp.route('robots.txt', methods=['GET'])
@caching.cached(unless=caching_unless)
def robots():
return render_template('robots.txt')

View File

@ -1,4 +1,4 @@
from flask import Blueprint, render_template, request, flash
from flask import Blueprint, render_template, request, flash, redirect, url_for
from ..nosql import get_nosql
from ..extensions import caching, caching_v_parameter, caching_unless
@ -10,10 +10,12 @@ def base():
vGet = request.args.get('v')
if not vGet:
return 'Missing v argument', 400
flash('Thats not how it works pal')
return redirect(url_for('index.base'))
if not get_nosql().check_exists(vGet):
return 'The requested video is not in the archive', 404
flash('The requested video is not in the archive')
return redirect(url_for('index.base'))
render = {}
@ -22,12 +24,15 @@ def base():
if reason not in ['auto-video', 'metadata', 'illegal']:
flash('Invalid report reason')
return redirect(url_for('watch.base', v=vGet))
else:
reportId = get_nosql().insert_report(vGet, reason)
if reportId:
flash(f'Report has been received: {reportId}')
return redirect(url_for('watch.base', v=vGet))
else:
flash('Something went wrong with reporting')
return redirect(url_for('watch.base', v=vGet))
render['info'] = get_nosql().get_video_info(vGet)
render['params'] = request.args.get('v')

View File

@ -35,6 +35,6 @@ def callback(cap):
if get_nosql().websub_existsCallback(cap):
if not get_nosql().websub_savePost(cap, str(request.data)):
return abort(500)
return '', 204
return '', 202
return abort(404)

View File

@ -2,10 +2,10 @@ import yt_dlp
def checkChannelId(channelId):
if len(channelId) < 24: # channelId lengths are 24 characters
if len(channelId) <= 23: # channelId lengths are 24 characters
return False
if len(channelId) > 25: # But some are 25, idk why
if len(channelId) >= 26: # But some are 25, idk why
return False
if channelId[0:2] not in ['UC', 'UU']:

View File

@ -3,8 +3,21 @@ from flask_limiter.util import get_remote_address
from flask_caching import Cache
from flask import request, session
from flask import Flask, request, session
from celery import Celery, Task
def celery_init_app(app: Flask) -> Celery:
class FlaskTask(Task):
def __call__(self, *args: object, **kwargs: object) -> object:
with app.app_context():
return self.run(*args, **kwargs)
celery_app = Celery(app.name, task_cls=FlaskTask)
celery_app.config_from_object(app.config["CELERY"])
celery_app.set_default()
app.extensions["celery"] = celery_app
return celery_app
def caching_unless(*args, **kwargs):
# if it is not a get request
@ -25,11 +38,11 @@ def caching_unless(*args, **kwargs):
def caching_v_parameter(*args, **kwargs):
return request.args.get('v')
limiter = Limiter(
get_remote_address,
default_limits=['86400 per day', '3600 per hour'],
storage_uri="memory://",
)
caching = Cache()
caching = Cache()

View File

@ -293,13 +293,16 @@ class Mango:
status = status.get('status')
if status in ['requesting', 'active']:
if status in ['requesting', 'active', 'retiring']:
return True
return False
def websub_retiringCallback(self, callbackId):
return self.websub_callbacks.update_one({'id': callbackId}, {'$set': {'status': 'retiring', 'retiring_time': current_time(object=True)}})
def websub_retireCallback(self, callbackId):
return self.websub_callbacks.update_one({'id': callbackId}, {'$set': {'status': 'retired', 'retiring_time': current_time(object=True)}})
return self.websub_callbacks.update_one({'id': callbackId}, {'$set': {'status': 'retired', 'retired_time': current_time(object=True)}})
def websub_deleteCallback(self, callbackId):
return self.websub_callbacks.delete_one({'id': callbackId})

47
ayta/tasks.py Normal file
View File

@ -0,0 +1,47 @@
from celery import shared_task
from flask import current_app
@shared_task()
def subscribe_websub_callback(channelId):
import requests
from .nosql import get_nosql
callbackId = get_nosql().websub_newCallback(channelId)
url = 'https://pubsubhubbub.appspot.com/subscribe'
data = {
'hub.callback': f'https://{current_app.config["DOMAIN"]}/websub/c/{callbackId}',
'hub.topic': f'https://www.youtube.com/xml/feeds/videos.xml?channel_id={channelId}',
'hub.verify': 'async',
'hub.mode': 'subscribe',
'hub.verify_token': '',
'hub.secret': '',
'hub.lease_numbers': '86400',
}
get_nosql().websub_requestingCallback(callbackId)
response = requests.post(url, data=data)
if response.status_code == 202:
return True
return False
@shared_task()
def unsubscribe_websub_callback(callbackId, channelId):
import requests
from .nosql import get_nosql
url = 'https://pubsubhubbub.appspot.com/subscribe'
data = {'hub.callback': f'https://{current_app.config["DOMAIN"]}/websub/c/{callbackId}',
'hub.topic': f'https://www.youtube.com/xml/feeds/videos.xml?channel_id={channelId}',
'hub.verify': 'async',
'hub.mode': 'unsubscribe'
}
get_nosql().websub_retiringCallback(callbackId)
response = requests.post(url, data=data)
if response.status_code == 202:
return True
return False

View File

@ -4,10 +4,15 @@
{% block content %}
<div class="row">
<div class="col s12">
<div class="col s12 l11">
<h4>{{ channelInfo.original_name }} administration page</h4>
<p>The update actions below directly apply to the database!</p>
</div>
<div class="col s12 l1 m-5">
<form method="POST">
<input title="Requests callback URL from youtube API" type="submit" value="subscribe-websub" name="task">
</form>
</div>
</div>
<div class="row">
<div class="col s12 l4">

View File

@ -43,30 +43,6 @@
</div>
</div>
</div>
<div class="col s12 l4 m-4">
<div class="card green">
<div class="card-content white-text">
<span class="card-title">Placeholder</span>
<p>I am a very simple card. I am good at containing small bits of information. I am convenient because I require little markup to use effectively.</p>
</div>
<div class="card-action">
<a href="#">This is a link</a>
<a href="#">This is a link</a>
</div>
</div>
</div>
<div class="col s12 l4 m-4">
<div class="card green">
<div class="card-content white-text">
<span class="card-title">Placeholder</span>
<p>I am a very simple card. I am good at containing small bits of information. I am convenient because I require little markup to use effectively.</p>
</div>
<div class="card-action">
<a href="#">This is a link</a>
<a href="#">This is a link</a>
</div>
</div>
</div>
</div>
<div class="divider"></div>
<div class="row">

View File

@ -19,32 +19,6 @@
<h5>Report options</h5>
</div>
</div>
<div class="row">
<div class="col s12 l4 m-4">
<div class="card green">
<div class="card-content white-text">
<span class="card-title">Placeholder</span>
<p>I am a very simple card. I am good at containing small bits of information. I am convenient because I require little markup to use effectively.</p>
</div>
<div class="card-action">
<a href="#">This is a link</a>
<a href="#">This is a link</a>
</div>
</div>
</div>
<div class="col s12 l4 m-4">
<div class="card green">
<div class="card-content white-text">
<span class="card-title">Placeholder</span>
<p>I am a very simple card. I am good at containing small bits of information. I am convenient because I require little markup to use effectively.</p>
</div>
<div class="card-action">
<a href="#">This is a link</a>
<a href="#">This is a link</a>
</div>
</div>
</div>
</div>
<div class="divider"></div>
<div class="row">
<div class="col s6 l9">

View File

@ -21,7 +21,7 @@
<form method="POST">
<div class="input-field">
<span class="supporting-text">Enable WebSub</span>
<input class="validate" type="text" value="{{ item }}" name="key" hidden>
<input class="validate" type="text" value="websub" name="key" hidden>
</div>
<div class="input-field m-4">
<div class="switch">

View File

@ -19,32 +19,6 @@
<h5>WebSub options</h5>
</div>
</div>
<div class="row">
<div class="col s12 l4 m-4">
<div class="card green">
<div class="card-content white-text">
<span class="card-title">Placeholder</span>
<p>I am a very simple card. I am good at containing small bits of information. I am convenient because I require little markup to use effectively.</p>
</div>
<div class="card-action">
<a href="#">This is a link</a>
<a href="#">This is a link</a>
</div>
</div>
</div>
<div class="col s12 l4 m-4">
<div class="card green">
<div class="card-content white-text">
<span class="card-title">Placeholder</span>
<p>I am a very simple card. I am good at containing small bits of information. I am convenient because I require little markup to use effectively.</p>
</div>
<div class="card-action">
<a href="#">This is a link</a>
<a href="#">This is a link</a>
</div>
</div>
</div>
</div>
<div class="divider"></div>
<div class="row">
<div class="col s6 l9">
@ -68,6 +42,7 @@
<th>requesting_time</th>
<th>activation_time</th>
<th>retiring_time</th>
<th>retired_time</th>
<th>lease</th>
</tr>
</thead>
@ -87,6 +62,7 @@
<td>{{ callbacks[callback].get('requesting_time') }}</td>
<td>{{ callbacks[callback].get('activation_time') }}</td>
<td>{{ callbacks[callback].get('retiring_time') }}</td>
<td>{{ callbacks[callback].get('retired_time') }}</td>
<td>{{ callbacks[callback].get('lease') }}</td>
</tr>
{% endfor %}

View File

@ -1,37 +0,0 @@
tgtoken = 'xxx'
chatid = xxx
def send_messagen(service, message, silent):
import requests
apirurl = f'https://api.telegram.org/bot{tgtoken}/sendMessage'
outtext = f'Message from: <b>{service}</b>\n<code>{message}</code>'
options = {'chat_id': chatid, 'text': outtext, 'parse_mode': 'HTML', 'disable_notification': silent}
r = requests.post(apirurl, json=options)
if r.status_code != 200:
print(f"Error Sending message to telegram servers, got http status code {r.status_code}")
exit(1)
exit()
def main():
import sys
args = sys.argv[1:]
if len(args) != 3:
print('Usage: scriptname.py "service" "message" "Silent: 1/0"')
else:
if args[2] == '1':
silence = True
else:
silence = False
send_messagen(args[0], args[1], silence)
if __name__ == "__main__":
main()

View File

@ -1,18 +0,0 @@
# _______ _ ______ _______
#|\ /|( ___ )( \ ( __ \ |\ /|( ____ )
#| ) ( || ( ) || ( | ( \ ) | ) ( || ( )|
#| (___) || | | || | | | ) | | | | || (____)|
#| ___ || | | || | | | | | | | | || _____)
#| ( ) || | | || | | | ) | | | | || (
#| ) ( || (___) || (____/\| (__/ ) | (___) || )
#|/ \|(_______)(_______/(______/ (_______)|/
#
# Did you search for a video that got removed?
# Goto the search page to find it back again at https://archive.ventilaar.net/search/
#
# below are listed video id's that can NOT be archived be it a
# 1. private video
# 2. copyright claimed and thus blocked
# 3. paid video
#
# And since this hobby turned out to be usefull to lots of people, please consider donating via monero 83YQWgHU3n6XybsquTy8mMincFz5rwGvF79293NrvWtrR6NNwKSsNYPBHpq1qRcKxjTkkmermZbiBTaJ81MtTGty8B1uNAF

View File

@ -1,19 +0,0 @@
##########################
# README #
##########################
#
# Want something archived?
# Contact me at: archive (at) ventilaar (d0t) nl
# To easily search for a desired video go to the search page https://archive.ventilaar.net/search/
#
##########################
# CHANNELS #
##########################
#
# Please include personal and edgy channels that are likely to get taken down. Please no corporations with large amount of video's like Buzzfeed(needs archiving? lol) or Linus Tech Tips(just too large and big)
#
# Format looks like this
# # Channelname / dateaddedtoarchive
# channel link
#
# And since this hobby turned out to be useful to lots of people, please consider donating via monero 83YQWgHU3n6XybsquTy8mMincFz5rwGvF79293NrvWtrR6NNwKSsNYPBHpq1qRcKxjTkkmermZbiBTaJ81MtTGty8B1uNAF

View File

@ -1,61 +0,0 @@
# Set Output
# /var/www/archive.ventilaar.net/videos/channelname/YYYYMMDD/ID/humantitle.extension
--output "/var/www/archive.ventilaar.net/videos/automatic/%(channel_id)s(%(uploader)s)/%(upload_date)s/%(id)s/%(title)s.%(ext)s"
# Set Archive
# Insert video ID's that already have been downloaded
--download-archive "/var/www/archive.ventilaar.net/goodstuff/archive.txt"
# Channel List
# Grab channels and videos from this list
--batch-file "/var/www/archive.ventilaar.net/goodstuff/channels.txt"
# Filter Out Ongoing Livestream
# Do this so that if a livestream goes on for 24h or more, this script will not stay active during the stream
--match-filter "!is_live"
# Set Format
# 720pMP4/720pMP4+128kM4A/360pmp4, bestvideo+audio as last so that we always have a video and audio stream
--format "22/136+140/18/bestvideo[ext=mp4]+bestaudio[ext=m4a]/best"
--prefer-ffmpeg
--merge-output-format "mp4"
# Add Metadata
# Adds metadata to video file and just downloads more metadata
--add-metadata
--write-thumbnail
--write-info-json
# Subs
# Download all available real subs only in srt/vtt
--write-sub
--all-subs
--sub-format "vtt"
# Limit Speed and Requests
# So that we don't get CAPTCHAS and overload our network
--sleep-interval "5"
--max-sleep-interval "10"
--limit-rate "16M"
# Good options
# Arguments speak for themselves
--no-cache-dir
--ignore-errors
--geo-bypass
--retries 2
--no-continue
#test(bypasses age gate, but playlist listing does not work)
#--user-agent "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
# Bypass Limit
# Go to youtube in a browser, copy the abuse cookie to this file et voila
--cookies "/var/www/archive.ventilaar.net/goodstuff/cookie.jar"
# youtube-dlp
--no-sponskrub
# test
--exec "python3 /var/www/archive.ventilaar.net/search/add_db.py %(filepath)q -c %(channel_id)q -d %(upload_date)q -i=%(id)q -e %(ext)q"

View File

@ -1,64 +0,0 @@
# Set Output
# /var/www/archive.ventilaar.net/videos/channelname/YYYYMMDD/ID/humantitle.extension
--output "/var/www/archive.ventilaar.net/videos/manual/%(id)s/%(title)s.%(ext)s"
# Set Archive
# Insert video ID's that already have been downloaded
--download-archive "/var/www/archive.ventilaar.net/goodstuff/archive.txt"
# Filter Out Ongoing Livestream
# Do this so that if a livestream goes on for 24h or more, this script will not stay active during the stream
--match-filter "!is_live"
# Set Format
# 720pMP4/720pMP4+128kM4A/360pmp4, bestvideo+audio as last so that we always have a video and audio stream
--format "22/136+140/18/bestvideo[ext=mp4]+bestaudio[ext=m4a]/best"
--prefer-ffmpeg
--merge-output-format "mp4"
# Add Metadata
# Adds metadata to video file and just downloads more metadata
--add-metadata
--write-thumbnail
--write-info-json
# Subs
# Download all available real subs only in srt/vtt
--write-sub
--all-subs
--sub-format "vtt"
# Limit Speed and Requests
# So that we don't get CAPTCHAS and overload our network
--limit-rate "16M"
--sleep-interval "5"
--max-sleep-interval "10"
# Good options
# Arguments speak for themselves
--no-cache-dir
--ignore-errors
--geo-bypass
--retries 2
--no-continue
#test (bypasses age gate, but playlist listing does not work)
#--user-agent "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
# Bypass Limit
# Go to youtube in a browser, copy the abuse cookie to this file et voila
--cookies "/var/www/archive.ventilaar.net/goodstuff/cookie.jar"
# youtube-dlp
--no-sponskrub
# do not run the script. that one is only for automatic manual you can search for yourself
#--exec "python /var/www/archive.ventilaar.net/goodstuff/db/add_db.py -c %(channel_id)q -n %(uploader)q -d %(upload_date)q -i %(id)q -t %(title)q -e %(ext)q"
##########################
# PLAYLIST #
##########################
# The only playlist, intended for one video instead of a channel. Like a livestream, compilation or memes
https://www.youtube.com/playlist?list=PLaF89kHAz45s1_hqMMQuzS-6gZ4cidmfy

View File

@ -1,8 +0,0 @@
# Netscape HTTP Cookie File
# This file is generated by yt-dlp. Do not edit.
.youtube.com TRUE / FALSE 2145916800 CONSENT YES+cb.20210328-17-p0.en+FX+351
.youtube.com TRUE / FALSE 2147483647 GOOGLE_ABUSE_EXEMPTION ID=d75ce9a52dc4b333:TM=1618262296:C=r:IP=134.19.179.163-:S=APGng0u-ZJZWFH2kNJH0ds-kxYaDetBuLA
.youtube.com TRUE / TRUE 1636522686 GPS 1
.youtube.com TRUE / TRUE 1652052127 VISITOR_INFO1_LIVE Jx2J9exBFv4
.youtube.com TRUE / TRUE 0 YSC ij9SBx7PmbE

View File

@ -1,76 +0,0 @@
<!---
DONT LOOK AT MY CODE, IT's SHITTY
I'm NOT A QUALIFIED WEB DEVELOPER
IF IT WORKS, IT WORKS, DON't TOUCH IT
--->
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<meta name="description" content="My casual website with no ads at all.">
<meta name="robots" content="no follow">
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
<title>archive · ventilaar.net</title>
<link href="https://cdn.ventilaar.net/assets/css/bootstrap.min.css" rel="stylesheet">
</head>
<body class="bg-dark">
<!-- Navigation -->
<nav class="navbar navbar-expand navbar-dark bg-dark static-top">
<div class="container">
<a class="navbar-brand" href="https://ventilaar.net/">Ventilaar.net</a>
<ul class="navbar-nav ">
<li class="nav-item">
<a class="nav-link" href="https://ventilaar.net/">Home</a>
</li>
<!---
<li class="nav-item">
<a class="nav-link" href="https://ventilaar.net/vitas.html">Vitas</a>
</li>
<li class="nav-item">
<a class="nav-link" href="https://venitlaar.net/minecraft.html">Minecraft</a>
</li>
<li class="nav-item">
<a class="nav-link" href="https://ventilaar.net/blog.html">Blog</a>
</li>
<li class="nav-item">
<a class="nav-link" href="https://files.ventilaar.net/">Files</a>
</li>
--->
<li class="nav-item active">
<a class="nav-link" href="/">Archive</a>
</li>
<li class="nav-item">
<a class="nav-link" href="/search/">Search</a>
</li>
</ul>
</div>
</nav>
<!-- Page Content -->
<div class="container text-light">
<div class="row">
<div class="col-lg-12 text-center">
<h1 class="mt-5">How did I do it?</h1>
<p class="lead">One word: Youtube-DL(p)</p>
<p>Basically I made a script to automatically download video's from channels every 6 hours</p>
<p>Split up to automatic and manual. Automatic archives channels, including new uploads. Manual archives set video's in <a href="https://www.youtube.com/playlist?list=PLaF89kHAz45s1_hqMMQuzS-6gZ4cidmfy">this </a> playlist</p>
<p>Want me to add a channel?<a href="/goodstuff/channels.txt"> Find contact info here</a></p>
<p>For more technical people, the files to make this possible are linked below</p>
<a href="/goodstuff/run.sh">run.sh </a><a href="/goodstuff/config_manual.conf">config_manual.conf </a><a href="/goodstuff/config_automatic.conf">config_automatic.conf </a><a href="/goodstuff/channels.txt">channels.txt </a><a href="/goodstuff/archive.txt">archive.txt </a><a href="/goodstuff/lastlog.txt">lastlog.txt</a><a></a>
<br>
<br>
<!---
<div class="alert alert-warning" role="alert">
Some channels are missing a handful of video's. This is a known problem. Unfortunately the maintainters at Youtube-DL<a href="https://github.com/ytdl-org/youtube-dl/issues/21121"> do not want to fix</a> this bug.<br>
Update: Actually I started using yt-dlp which has more features than youtube-dl, including a fix for this bug.
</div>
--->
<div class="alert alert-info" role="alert">
Video streaming problems? Just download the video! This archive isn't meant for streaming anyway, hence the lower quality. Be happy that there is something archived
</div>
</div>
</div>
</div>
</body>
</html>

View File

@ -1,2 +0,0 @@
--- It is 2021/11/10 06:08:01 ---
--- Stopped at 2021/11/10 08:29:34 ---

View File

@ -1,48 +0,0 @@
#!/bin/sh
# TODO, split manual into a cronjob that runs hourly/every 3 hours, currently main archive runs every 6 hours
echo "--- It is $(date "+%Y/%m/%d %H:%M:%S") ---"
# Check for lockfile
if [ ! -f "/var/www/archive.ventilaar.net/goodstuff/lockfile" ]; then
# Create lock file
/bin/touch /var/www/archive.ventilaar.net/goodstuff/lockfile
# Update youtube-dl before executing main process
/usr/local/bin/yt-dlp -U
#hotfix, after update the executable changes permission, of course this runs as root, why do you ask?
chmod +x /usr/local/bin/yt-dlp
# Run youtube-dl with config gile
/usr/local/bin/yt-dlp --config-location /var/www/archive.ventilaar.net/goodstuff/config_manual.conf
/usr/local/bin/yt-dlp --config-location /var/www/archive.ventilaar.net/goodstuff/config_automatic.conf
# Create sitemap
#/var/www/archive.ventilaar.net/sitemap.sh
# Send message if there are errors
# not using cat but tail, because cat refuses because the file is still in use, tail does not
# update: holy shit, i just remembered that i can just use the exitcode of yt-dlp instead of this junk, but wont fix lol
errors=$(/usr/bin/tail -c 999999 /var/www/archive.ventilaar.net/goodstuff/lastlog.txt | grep "ERROR: " | grep -v "ERROR: Premieres" | wc -l)
if [ $errors != "0" ]; then
echo "There were $errors errors, tryting to send message"
/usr/bin/python3 /var/www/archive.ventilaar.net/goodstuff/.send_to_telegram.py "debian-archive" "Youtube mirror script finished with $errors errors!" 0
fi
# generate php statistics and copy database for php
/usr/bin/python3 /var/www/archive.ventilaar.net/search/gen_stats.py
cp -f /var/www/archive.ventilaar.net/search/videos.db /var/www/archive.ventilaar.net/search/copy.db
# Remove lock file
/bin/rm -f /var/www/archive.ventilaar.net/goodstuff/lockfile
else
# Oof, a lock file exists, something went wrong
echo "Script already running!"
echo "This could mean that a big channel is currently being archived."
/usr/bin/python3 /var/www/archive.ventilaar.net/goodstuff/.send_to_telegram.py "debian-archive" "Youtube mirror script started while the previous one was still going, sum tin won?" 0
fi
echo "--- Stopped at $(date "+%Y/%m/%d %H:%M:%S") ---"

View File

@ -1,35 +0,0 @@
# This prints the channel id of channels that are double in the folder that it is ran in
import os
def get_channel_id(folder):
return folder[:folder.rindex('(')]
def get_folders():
folders = list()
items = os.listdir()
for x in items:
if os.path.isdir(x):
folders.append(x)
return folders
def main():
ids = list()
count = dict()
for x in get_folders():
ids.append(get_channel_id(x))
for x in ids:
if x in count:
count[x] = count[x] + 1
else:
count[x] = 1
for x in count:
if count[x] > 1:
print(f'{x}: {count[x]}')
if __name__ == "__main__":
main()

View File

@ -1,71 +0,0 @@
import json
import os
loc = 'xxx.json'
allowed_childs = {'id',
'title',
'description',
'upload_date',
'uploader',
'uploader_id',
'channel_id',
'duration',
'view_count',
'age_limit',
'categories',
'tags',
'playable_in_embed',
'is_live',
'was_live',
'like_count',
'channel',
'availability',
'duration_string',
'asr',
'format_id',
'format_note',
'fps',
'height',
'quality',
'tbr',
'width',
'language',
'language_preference',
'ext',
'vcodec',
'acodec',
'dynamic_range',
'video_ext',
'audio_ext',
'vbr',
'abr',
'format',
'resolution',
'filesize_approx',
'fulltitle',
'epoch'
}
def read_file(location):
with open(location, 'r') as file:
return json.load(file)
def generate_new(data):
new = {}
for x in data:
if x in allowed_childs:
new[x] = data[x]
return new
def rename_file(location, ext):
os.rename(loc, f'{loc}.{ext}')
def write_json(data, location):
with open(location, 'w') as file:
file.writelines(json.dumps(data))
if __name__ == '__main__':
old = read_file(loc)
new = generate_new(old)
rename_file(loc, 'bek')
write_json(new, loc)

View File

@ -1,120 +0,0 @@
# this script reads the directory structure and compares the
import sqlite3
import os
import glob
#root = 'W:\\archive.ventilaar.net\\videos\\automatic' # if on windows
root = '/var/www/archive.ventilaar.net/videos/automatic' # if on archive container
extensions = ['mp4', 'webm', 'mkv']
class Mydb:
def __init__(self):
self.conn = sqlite3.connect('/root/videos.db')
#self.conn = sqlite3.connect('/var/www/archive.ventilaar.net/goodstuff/db/videos.db')
self.cur = self.conn.cursor()
def insert_video(self, dic):
query = ''' INSERT INTO videos (channel_id, channel_name, upload_date, video_id, video_title, video_ext) VALUES (?, ?, ?, ?, ?, ?) '''
data = (dic['channel_id'], dic['channel_name'], dic['upload_date'], dic['video_id'], dic['video_title'], dic['video_ext'])
self.cur.execute(query, data)
self.conn.commit()
def check_video_id(self, vid):
"""
returns true if video id exists in table, false if not
"""
query = ''' SELECT video_id FROM videos WHERE video_id = ? '''
data = [vid]
self.cur.execute(query, data)
return bool(self.cur.fetchone())
def __del__(self):
self.conn.commit()
self.conn.close()
def list_channels():
""" returns list of channels in 'channel_id(channel_name)' format """
return os.listdir(root)
def list_upload_dates(channel):
dates = os.listdir(f'{root}/{channel}')
for x in dates:
if x == 'NA':
dates.remove(x)
return dates
def list_video_ids(channel, date):
return os.listdir(f'{root}/{channel}/{date}')
def list_video(channel, date, video_id):
""" returns tulple (video_title, ext)"""
dir_content = os.listdir(f'{root}/{channel}/{date}/{video_id}')
for x in dir_content:
splat = split_video(x)
if splat[1] in extensions:
return splat
def split_video(video):
""" returns list with [video_title, video_ext] """
splat = video.split('.')
ext = splat[-1]
title = splat[:-1]
goodtitle = ''
for x in title:
goodtitle = f'{goodtitle}.{x}' # adds points at first character
return goodtitle[1:], ext # omit first character
def split_channel(channel):
channel_name = channel[channel.index("(") + 1: channel.rindex(")")]
channel_id = channel[:channel.index("(")]
return channel_id, channel_name
def main():
data = dict()
allchannels = list_channels()
total_channels = len(allchannels)
channel_progress = 0
skip_channels = 0
for channel in allchannels:
channel_progress = channel_progress + 1
print(f'Currently working on channel {channel_progress}/{total_channels} - {channel}')
if skip_channels > 0:
print(f'Skipping {channel}')
skip_channels = skip_channels - 1
continue
for upload_date in list_upload_dates(channel):
for video_id in list_video_ids(channel, upload_date):
try:
video_title, video_ext = list_video(channel, upload_date, video_id)
except:
print(f'exception on {channel, upload_date, video_id}')
exit()
channel_id, channel_name = split_channel(channel)
data['upload_date'] = upload_date
data['video_id'] = video_id
data['video_title'] = video_title
data['video_ext'] = video_ext
data['channel_id'] = channel_id
data['channel_name'] = channel_name
if db.check_video_id(video_id):
pass
else:
db.insert_video(data)
print(f'{video_id} by {channel_name} added to table')
if __name__ == "__main__":
db = Mydb()
main()

View File

@ -1,22 +0,0 @@
#removes the video id's in the archive.txt file in the same folder as the script
#lists date folders and grabs id's from them
import os
count = 0
clean_dirs = [x for x in os.listdir() if '20' in x]
ids = []
for datedir in clean_dirs:
for iddir in os.listdir(datedir):
ids.append(f'youtube {iddir}')
with open('archive.txt', 'r') as file:
lines = file.readlines()
with open('archive.txt', 'w') as file:
for line in lines:
if line.strip("\n") not in ids:
file.write(line)
print(f'Finished removing id\'s from archive.txt. Counted {len(ids)} id\'s to remove')

View File

@ -1,21 +0,0 @@
This folder contains some files making the static file storage a bit more databasey.
## database
The database stores the following values in a videos table:
```
channel_id channel_name upload_date video_id video_title video_ext
```
It is not normalized, because I do not want to blow my brains out yet. People change their usernames and youtube changes video_ids eventough they should be static and more bullshit like that.
I do not want to go scripting all edge-cases. So the drawback for now is that this uses a lot more storage on disk.
## search
With those values you can reconstuct the http url, so as you might have guessed I need a search script that will search trough the db and return the http path to the client.
Maybe a cgi script?
## add_db.py
This script will be run after a new video has been downloaded by yt-dlp. It will add the values listed above to the database.
## delete_db.py
You can run this with an video_id as the argument. It will then delete the entry from the database.
##

View File

@ -1,79 +0,0 @@
# this script requires the options listed below and then checks if the video_id exists in table. if not, it adds it
# TODO: THE TITLE THAT YT-DLP PASSES TROUGH IS UTF8 COMPATIBLE, HOWEVER THAT IS NOT WHAT IS STORED ON DISK. REVERT BACK TO THE OUTPUT URL AND SPLIT THAT UP INTO THE DIFFERENT FIELDS MANUALLY
import argparse
import sqlite3
# --exec "python3 /var/www/archive.ventilaar.net/search/add_db.py %(filepath)q -c %(channel_id)q -d %(upload_date)q -i %(id)q -e %(ext)q"
def parse_args():
parser = argparse.ArgumentParser(description='All "optional" options are required! If not passed, the script might break!')
parser.add_argument('path')
parser.add_argument('-c', '--channel_id')
parser.add_argument('-d', '--upload_date')
parser.add_argument('-i', '--video_id')
parser.add_argument('-e', '--video_extension')
return parser.parse_args()
class Mydb:
def __init__(self):
#self.conn = sqlite3.connect('videos.db')
self.conn = sqlite3.connect('/var/www/archive.ventilaar.net/search/videos.db')
self.cur = self.conn.cursor()
def insert_video(self, args, filen, uploader):
query = ''' INSERT INTO videos (channel_id, channel_name, upload_date, video_id, video_title, video_ext) VALUES (?, ?, ?, ?, ?, ?) '''
data = (args.channel_id, uploader, args.upload_date, args.video_id, filen, args.video_extension)
self.cur.execute(query, data)
self.conn.commit()
def check_video_id(self, vid):
"""
returns true if video id exists in table, false if not
"""
query = ''' SELECT video_id FROM videos WHERE video_id = ? '''
data = [vid]
self.cur.execute(query, data)
return bool(self.cur.fetchone())
def __del__(self):
self.conn.commit()
self.conn.close()
def get_filename(opts):
fullpath = opts.path
fullfilename = fullpath.split('/')[-1]
splitfilename = fullfilename.split('.')[:-1]
filename = ''
for x in splitfilename:
filename = f'{filename}.{x}'
return filename[1:]
def get_uploader(opts):
fullpath = opts.path
wid = fullpath.split('/')[6]
uploader = wid[wid.index('(') + 1: wid.rindex(')')]
return uploader
def main(args):
title = get_filename(args)
uploader = get_uploader(args)
if db.check_video_id(args.video_id):
print('ERROR: This video ID was in the table. What went wrong man?')
exit(1)
else:
db.insert_video(args, title, uploader)
print('SUCCESS: Video metadata added to database')
if __name__ == "__main__":
options = parse_args() # haal argumenten op
db = Mydb()
main(options)

Binary file not shown.

Before

Width:  |  Height:  |  Size: 530 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 642 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 602 B

View File

@ -1,34 +0,0 @@
import argparse
import sqlite3
def parse_args():
parser = argparse.ArgumentParser(description='Remove a video entry from the database')
parser.add_argument('vid_id', help='youtube video id')
return parser.parse_args()
class Mydb:
def __init__(self):
#self.conn = sqlite3.connect('videos.db')
self.conn = sqlite3.connect('/var/www/archive.ventilaar.net/search/videos.db')
self.cur = self.conn.cursor()
def delete_video(self, v_id):
query = ''' DELETE from videos WHERE video_id=? '''
data = [v_id]
self.cur.execute(query, data)
self.conn.commit()
def __del__(self):
self.conn.commit()
self.conn.close()
def main(args):
db.delete_video(args.vid_id)
print(f'{args.vid_id} should be deleted')
if __name__ == "__main__":
options = parse_args() # haal argumenten op
db = Mydb()
main(options)

View File

@ -1,40 +0,0 @@
import sqlite3
class Mydb:
def __init__(self):
#self.conn = sqlite3.connect('videos.db')
self.conn = sqlite3.connect('/var/www/archive.ventilaar.net/search/videos.db')
self.cur = self.conn.cursor()
def calc_total_videos(self):
query = ''' SELECT count(DISTINCT video_id) FROM videos '''
self.cur.execute(query)
return self.cur.fetchone()[0]
def calc_total_channels(self):
query = ''' SELECT count(DISTINCT channel_id) FROM videos '''
self.cur.execute(query)
return self.cur.fetchone()[0]
def __del__(self):
self.conn.close()
def main():
echo = ( \
'<?php\n' \
'# THIS FILE IS DYNAMICALLY GENERATED BY gen_stats.py\n' \
"if(!ISSET($_POST['q'])){\n" \
f' $total_videos = {db.calc_total_videos()};\n' \
f' $total_channels = {db.calc_total_channels()};\n' \
" echo '<p>Total videos: '.$total_videos.'</p>';\n" \
" echo '<p>Total channels: '.$total_channels.'</p>';\n" \
'}\n' \
'?>'
)
with open('/var/www/archive.ventilaar.net/search/stats.php', 'w') as file:
file.writelines(echo)
if __name__ == "__main__":
db = Mydb()
main()

View File

@ -1,73 +0,0 @@
<!---
DONT LOOK AT MY CODE, IT's SHITTY
I'm NOT A QUALIFIED WEB DEVELOPER
IF IT WORKS, IT WORKS, DON't TOUCH IT
--->
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<meta name="description" content="Archiving youtube videos in mass(and in LQ)">
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
<title>archive · ventilaar.net</title>
<link href="https://cdn.ventilaar.net/assets/css/bootstrap.min.css" rel="stylesheet">
</head>
<body class="bg-dark">
<!-- Navigation -->
<nav class="navbar navbar-expand navbar-dark bg-dark static-top">
<div class="container">
<a class="navbar-brand" href="https://www.ventilaar.nl/">Ventilaar.net</a>
<ul class="navbar-nav ">
<li class="nav-item">
<a class="nav-link" href="https://ventilaar.net/">Home</a>
</li>
<li class="nav-iteme">
<a class="nav-link" href="/">Archive</a>
</li>
<li class="nav-item active">
<a class="nav-link" href="/search/">Search</a>
</li>
</ul>
</div>
</nav>
<!-- Page Content -->
<div class="container text-light">
<div class="row">
<div class="col-lg-12 text-center">
<div>
<h1 class="mt-5">Search the archive</h1>
<p class="lead">"A big archive needs a search function." -Sun Tzu</p>
<p>Use the form below to search trough the automatic channel archive.</p>
<p class="small">The manual <a href="https://www.youtube.com/playlist?list=PLaF89kHAz45s1_hqMMQuzS-6gZ4cidmfy">playlist</a> is not being indexed here. For that look <a href="https://archive.ventilaar.net/videos/manual/">here</a> instead.</p>
</div>
<hr>
<div>
<form method="POST" action=''>
<div class="form-group">
<label for="forminput">Text to search for</label>
<input name="q" class="form-control" id="forminput" type="text">
</div>
<div class="form-group">
<label for="formcriteria">Search criteria</label>
<select class="form-control" id="formcriteria" name="c">
<option value="video_title">Video Title</option>
<option value="channel_name">Channel Name</option>
<option value="channel_id">Channel ID</option>
<option value="video_id">Youtube video ID (the 11 character ID)</option>
<option value="upload_date">Upload date(YYYYMMDD)</option>
</select>
</div>
<button class="btn btn-success">Search</button>
</form>
</div>
<br>
<hr>
<?php include'stats.php' ?>
<?php include'search_do.php' ?>
</div>
</div>
</div>
</body>
</html>

View File

@ -1,67 +0,0 @@
<?php
if(ISSET($_POST['q'])){
$keyword = $_POST['q'];
$field = $_POST['c'];
$order = 'upload_date';
if ( ($field == 'channel_id') || ($field == 'channel_name') ) {
$order = $field;
}
if (strlen($keyword) < 3) {
die('<div class="alert alert-danger" role="alert">Search term shoud be at least 3 characters!</h1></div>');
}
if ( (strlen($keyword) < 6) && ($field == 'upload_date') ) {
die('<div class="alert alert-danger" role="alert">Please include at least year and month. Example: 201708</h1></div>');
}
$conn=new SQLite3('/var/www/archive.ventilaar.net/search/copy.db') or die('<div class="alert alert-danger" role="alert">Unable to open database!</div>');
$query=$conn->query("SELECT * FROM `videos` WHERE `".$field."` LIKE '%".$keyword."%' ORDER BY channel_name, upload_date DESC;") or die("I see you made an SQL error. Yes this form leaks SQL data. But the database gets rewritten every 6 hours anyway so who cares.");
echo'
<div>
<h2>Video Title</h2>
<p style="color: #007bff;">Video title url = Raw archived files</p>
<p><img src="assets/play24.png" /> = Play video direct</p>
<p><img src="assets/youtube24.png" /> = Play video on Youtube</p>
<table class="table table-dark table-bordered">
<thead>
<tr>
<th scope="col">Video Title</th>
<th scope="col">Upload Date</th>
<th scope="col">Video ID</th>
<th scope="col">Channel name</th>
<th scope="col">Channel ID</th>
</tr>
</thead>
<tbody>';
while($fetch=$query->fetchArray()){
$channelpath='https://archive.ventilaar.net/videos/automatic/'.$fetch['channel_id'].'('.$fetch['channel_name'].')/';
$idpath=$channelpath.$fetch['upload_date'].'/'.$fetch['video_id'].'/';
$videopath=$idpath.$fetch['video_title'].'.'.$fetch['video_ext'];
$uploaddate=date("Y M d", strtotime($fetch['upload_date']));
$youtubeurl='https://youtu.be/'.$fetch['video_id'];
echo
'<tr>
<td>
<a href="'.$idpath.'">'.$fetch['video_title'].'</a>
<a href="'.$videopath.'"><img src="assets/play24.png" /></a>
</td>
<td>'.$uploaddate."</td>
<td>
".$fetch['video_id'].'
<a href="'.$youtubeurl.'"><img src="assets/youtube24.png" /></a>
</td>
<td>'.$fetch['channel_name']."</td>
<td>".$fetch['channel_id']."</td>
</tr>";
}
echo'</div>';
}
?>

View File

@ -1,66 +0,0 @@
<?php
if(ISSET($_POST['q'])){
$keyword = $_POST['q'] or die;
$field = $_POST['c'] or die;
$order = 'upload_date';
#if ( ($field == 'channel_id') || ($field == 'channel_name') ) {
# $order = $field;
#}
if (strlen($keyword) < 3) {
die('<div class="alert alert-danger" role="alert">Search term shoud be at least 3 characters!</h1></div>');
}
$db = new SQLite3('/var/www/archive.ventilaar.net/search/copy.db') or die('<div class="alert alert-danger" role="alert">Unable to open database!</div>');
$stmt_h = $db->prepare("SELECT * FROM 'videos' WHERE :fld LIKE ':wrd' ORDER BY channel_name, upload_date DESC;") or die("I see you made an SQL error. Yes this form leaks SQL data. But the database gets rewritten every 6 hours anyway so who cares.");
$stmt_h->bindValue(':fld', $field);
$stmt_h->bindValue(':wrd', '%'.$keyword.'%');
$res = $stmt_h->execute();
echo'
<div>
<h2>Video Title</h2>
<p style="color: #007bff;">Video title url = Raw archived files</p>
<p><img src="assets/play24.png" /> = Play video direct</p>
<p><img src="assets/youtube24.png" /> = Play video on Youtube</p>
<table class="table table-dark table-bordered">
<thead>
<tr>
<th scope="col">Video Title</th>
<th scope="col">Upload Date (YYYYMMDD)</th>
<th scope="col">Video ID</th>
<th scope="col">Channel name</th>
<th scope="col">Channel ID</th>
</tr>
</thead>
<tbody>';
while($row = $res->fetchArray()) {
$channelpath='https://archive.ventilaar.net/videos/automatic/'.$row['channel_id'].'('.$row['channel_name'].')/';
$idpath=$channelpath.$row['upload_date'].'/'.$row['video_id'].'/';
$videopath=$idpath.$row['video_title'].'.'.$row['video_ext'];
$youtubeurl='https://youtu.be/'.$row['video_id'];
echo
'<tr>
<td>
<a href="'.$idpath.'">'.$row['video_title'].'</a>
<a href="'.$videopath.'"><img src="assets/play24.png" /></a>
</td>
<td>'.$row['upload_date']."</td>
<td>
".$row['video_id'].'
<a href="'.$youtubeurl.'"><img src="assets/youtube24.png" /></a>
</td>
<td>'.$row['channel_name']."</td>
<td>".$row['channel_id']."</td>
</tr>";
}
echo'</div>';
$db -> close();
}
?>

Binary file not shown.

4
make_celery.py Normal file
View File

@ -0,0 +1,4 @@
import ayta
flask_app = ayta.create_app()
celery_app = flask_app.extensions["celery"]