many things changed
This commit is contained in:
parent
e264a346a5
commit
69bf7026dd
|
@ -1,11 +1,8 @@
|
|||
name: Generate release
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- 'v*'
|
||||
branches:
|
||||
- master
|
||||
release:
|
||||
types: [published]
|
||||
|
||||
jobs:
|
||||
build-and-publish:
|
||||
|
@ -25,4 +22,13 @@ jobs:
|
|||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
push: true
|
||||
tags: git.ventilaar.nl/ventilaar/ayta:latest
|
||||
tags: git.ventilaar.nl/ventilaar/ayta:latest
|
||||
|
||||
- name: Update worker server
|
||||
uses: appleboy/ssh-action@v1.0.3
|
||||
with:
|
||||
host: 192.168.66.109
|
||||
username: root
|
||||
key: ${{ secrets.SERVER_KEY }}
|
||||
port: 22
|
||||
script: /root/update_worker.sh
|
|
@ -1,7 +1,7 @@
|
|||
def create_app(test_config=None):
|
||||
import os, secrets
|
||||
from flask import Flask
|
||||
from ayta.extensions import limiter, caching
|
||||
from ayta.extensions import limiter, caching, celery_init_app
|
||||
from werkzeug.middleware.proxy_fix import ProxyFix
|
||||
|
||||
from . import filters
|
||||
|
@ -14,7 +14,9 @@ def create_app(test_config=None):
|
|||
'CACHE_DEFAULT_TIMEOUT': int(os.environ.get('AYTA_CACHETIMEOUT', 6)),
|
||||
'SECRET_KEY': os.environ.get('AYTA_SECRETKEY', secrets.token_hex(32)),
|
||||
'DEBUG': bool(os.environ.get('AYTA_DEBUG', False)),
|
||||
'MATRIX_TOKEN': bool(os.environ.get('AYTA_MATRIXTOKEN', None))
|
||||
'DOMAIN': os.environ.get('AYTA_DOMAIN', 'testing.mashallah.nl'),
|
||||
'CELERY': dict(broker_url=str(os.environ.get('AYTA_CELERYBROKER', 'amqp://guest:guest@192.168.66.140:5672/')),
|
||||
task_ignore_result=True,)
|
||||
}
|
||||
|
||||
app = Flask(__name__)
|
||||
|
@ -22,6 +24,7 @@ def create_app(test_config=None):
|
|||
|
||||
limiter.init_app(app)
|
||||
caching.init_app(app)
|
||||
celery_init_app(app)
|
||||
|
||||
app.wsgi_app = ProxyFix(app.wsgi_app, x_for=1)
|
||||
|
||||
|
|
|
@ -3,6 +3,7 @@ from ..nosql import get_nosql
|
|||
from ..s3 import get_s3
|
||||
from ..dlp import checkChannelId, getChannelInfo
|
||||
from ..decorators import login_required
|
||||
from ..tasks import subscribe_websub_callback, unsubscribe_websub_callback
|
||||
from datetime import datetime
|
||||
import requests
|
||||
|
||||
|
@ -17,7 +18,9 @@ def base():
|
|||
@login_required
|
||||
def system():
|
||||
if request.method == 'POST':
|
||||
pass
|
||||
task = request.form.get('task', None)
|
||||
if task == 'update-value':
|
||||
pass
|
||||
|
||||
return render_template('admin/system.html')
|
||||
|
||||
|
@ -40,7 +43,8 @@ def channels():
|
|||
channelId, originalName = getChannelInfo(channelId, ('channel_id', 'uploader'))
|
||||
|
||||
if not get_nosql().insert_new_channel(channelId, originalName, addedDate):
|
||||
return 'Error inserting new channel, you probably made a mistake somewhere'
|
||||
flash('Error inserting new channel, you probably made a mistake somewhere')
|
||||
return redirect(url_for('admin.channels'))
|
||||
|
||||
return redirect(url_for('admin.channel', channelId=channelId))
|
||||
|
||||
|
@ -56,11 +60,22 @@ def channels():
|
|||
@bp.route('/channel/<channelId>', methods=['GET', 'POST'])
|
||||
@login_required
|
||||
def channel(channelId):
|
||||
channelInfo = get_nosql().get_channel_info(channelId)
|
||||
|
||||
if not channelInfo:
|
||||
flash('That channel ID does not exist in the system')
|
||||
return redirect(url_for('admin.channels'))
|
||||
|
||||
if request.method == 'POST':
|
||||
task = request.form.get('task', None)
|
||||
key = request.form.get('key', None)
|
||||
value = request.form.get('value', None)
|
||||
|
||||
if task == 'subscribe-websub':
|
||||
task = subscribe_websub_callback.delay(channelId)
|
||||
flash(f"Started task {task.id}")
|
||||
return redirect(url_for('admin.channel', channelId=channelId))
|
||||
|
||||
if task == 'update-value':
|
||||
if key == 'active':
|
||||
value = True if value else False
|
||||
|
@ -69,11 +84,7 @@ def channel(channelId):
|
|||
value = datetime.strptime(value, '%Y-%m-%d')
|
||||
|
||||
get_nosql().update_channel_key(channelId, key, value)
|
||||
|
||||
channelInfo = get_nosql().get_channel_info(channelId)
|
||||
|
||||
if not channelInfo:
|
||||
return 'That channel ID does not exist in the system'
|
||||
return redirect(url_for('admin.channel', channelId=channelId))
|
||||
|
||||
return render_template('admin/channel.html', channelInfo=channelInfo)
|
||||
|
||||
|
@ -84,9 +95,8 @@ def runs():
|
|||
task = request.form.get('task', None)
|
||||
if task == 'clean_runs':
|
||||
get_nosql().clean_runs()
|
||||
else:
|
||||
pass
|
||||
|
||||
return redirect(url_for('admin.runs'))
|
||||
|
||||
|
||||
runs = reversed(list(get_nosql().get_runs()))
|
||||
return render_template('admin/runs.html', runs=runs)
|
||||
|
@ -106,15 +116,15 @@ def websub():
|
|||
|
||||
if task == 'unsubscribe':
|
||||
channelId = get_nosql().websub_getCallback(value).get('channel')
|
||||
data = {'hub.callback': f'https://testing.ventilaar.net/websub/c/{value}',
|
||||
'hub.topic': f'https://www.youtube.com/xml/feeds/videos.xml?channel_id={channelId}',
|
||||
'hub.verify': 'async',
|
||||
'hub.mode': 'unsubscribe'
|
||||
}
|
||||
requests.post('https://pubsubhubbub.appspot.com/subscribe', data=data)
|
||||
|
||||
task = unsubscribe_websub_callback.delay(value, channelId)
|
||||
|
||||
flash(f"Started task {task.id}")
|
||||
return redirect(url_for('admin.websub'))
|
||||
|
||||
elif task == 'clean-retired':
|
||||
get_nosql().websub_cleanRetired()
|
||||
return redirect(url_for('admin.websub'))
|
||||
|
||||
callbackIds = get_nosql().websub_getCallbacks()
|
||||
callbacks = {}
|
||||
|
@ -134,6 +144,7 @@ def reports():
|
|||
if task == 'close':
|
||||
get_nosql().close_report(value)
|
||||
flash(f'Report closed {value}')
|
||||
return redirect(url_for('admin.reports'))
|
||||
|
||||
reports = get_nosql().list_reports()
|
||||
|
||||
|
|
|
@ -29,21 +29,25 @@ def login():
|
|||
if current_app.config.get('DEBUG'):
|
||||
session['username'] = 'admin'
|
||||
flash('You have been logged in')
|
||||
return redirect(url_for('admin.base'))
|
||||
return redirect(request.args.get('next', url_for('admin.base')))
|
||||
|
||||
if not password:
|
||||
flash('Password was empty')
|
||||
return 'password required!'
|
||||
return redirect(url_for('auth.login'))
|
||||
|
||||
try:
|
||||
ph = PasswordHasher()
|
||||
if ph.verify(corr, password):
|
||||
session['username'] = 'admin'
|
||||
flash('You have been logged in')
|
||||
return redirect(url_for('admin.base'))
|
||||
|
||||
return redirect(request.args.get('next', url_for('admin.base')))
|
||||
|
||||
except VerifyMismatchError:
|
||||
flash('Wrong password')
|
||||
return redirect(url_for('auth.login'))
|
||||
except:
|
||||
flash('Something went wrong')
|
||||
return redirect(url_for('auth.login'))
|
||||
|
||||
return render_template('login.html')
|
|
@ -1,4 +1,4 @@
|
|||
from flask import Blueprint, render_template
|
||||
from flask import Blueprint, render_template, flash, url_for, redirect
|
||||
from ..nosql import get_nosql
|
||||
from ..s3 import get_s3
|
||||
from ..extensions import caching, caching_unless
|
||||
|
@ -23,7 +23,8 @@ def channel(channelId):
|
|||
channelInfo = get_nosql().get_channel_info(channelId)
|
||||
|
||||
if not channelInfo:
|
||||
return 'That channel ID does not exist in the system'
|
||||
flash('That channel ID does not exist in the system')
|
||||
return redirect(url_for('channel.base'))
|
||||
|
||||
videoIds = get_nosql().get_channel_videoIds(channelId)
|
||||
|
||||
|
|
|
@ -12,3 +12,8 @@ def base():
|
|||
@caching.cached(unless=caching_unless)
|
||||
def help():
|
||||
return render_template('help.html')
|
||||
|
||||
@bp.route('robots.txt', methods=['GET'])
|
||||
@caching.cached(unless=caching_unless)
|
||||
def robots():
|
||||
return render_template('robots.txt')
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
from flask import Blueprint, render_template, request, flash
|
||||
from flask import Blueprint, render_template, request, flash, redirect, url_for
|
||||
from ..nosql import get_nosql
|
||||
from ..extensions import caching, caching_v_parameter, caching_unless
|
||||
|
||||
|
@ -10,10 +10,12 @@ def base():
|
|||
vGet = request.args.get('v')
|
||||
|
||||
if not vGet:
|
||||
return 'Missing v argument', 400
|
||||
flash('Thats not how it works pal')
|
||||
return redirect(url_for('index.base'))
|
||||
|
||||
if not get_nosql().check_exists(vGet):
|
||||
return 'The requested video is not in the archive', 404
|
||||
flash('The requested video is not in the archive')
|
||||
return redirect(url_for('index.base'))
|
||||
|
||||
render = {}
|
||||
|
||||
|
@ -22,12 +24,15 @@ def base():
|
|||
|
||||
if reason not in ['auto-video', 'metadata', 'illegal']:
|
||||
flash('Invalid report reason')
|
||||
return redirect(url_for('watch.base', v=vGet))
|
||||
else:
|
||||
reportId = get_nosql().insert_report(vGet, reason)
|
||||
if reportId:
|
||||
flash(f'Report has been received: {reportId}')
|
||||
return redirect(url_for('watch.base', v=vGet))
|
||||
else:
|
||||
flash('Something went wrong with reporting')
|
||||
return redirect(url_for('watch.base', v=vGet))
|
||||
|
||||
render['info'] = get_nosql().get_video_info(vGet)
|
||||
render['params'] = request.args.get('v')
|
||||
|
|
|
@ -35,6 +35,6 @@ def callback(cap):
|
|||
if get_nosql().websub_existsCallback(cap):
|
||||
if not get_nosql().websub_savePost(cap, str(request.data)):
|
||||
return abort(500)
|
||||
return '', 204
|
||||
return '', 202
|
||||
|
||||
return abort(404)
|
|
@ -2,10 +2,10 @@ import yt_dlp
|
|||
|
||||
|
||||
def checkChannelId(channelId):
|
||||
if len(channelId) < 24: # channelId lengths are 24 characters
|
||||
if len(channelId) <= 23: # channelId lengths are 24 characters
|
||||
return False
|
||||
|
||||
if len(channelId) > 25: # But some are 25, idk why
|
||||
if len(channelId) >= 26: # But some are 25, idk why
|
||||
return False
|
||||
|
||||
if channelId[0:2] not in ['UC', 'UU']:
|
||||
|
|
|
@ -3,8 +3,21 @@ from flask_limiter.util import get_remote_address
|
|||
|
||||
from flask_caching import Cache
|
||||
|
||||
from flask import request, session
|
||||
from flask import Flask, request, session
|
||||
|
||||
from celery import Celery, Task
|
||||
|
||||
def celery_init_app(app: Flask) -> Celery:
|
||||
class FlaskTask(Task):
|
||||
def __call__(self, *args: object, **kwargs: object) -> object:
|
||||
with app.app_context():
|
||||
return self.run(*args, **kwargs)
|
||||
|
||||
celery_app = Celery(app.name, task_cls=FlaskTask)
|
||||
celery_app.config_from_object(app.config["CELERY"])
|
||||
celery_app.set_default()
|
||||
app.extensions["celery"] = celery_app
|
||||
return celery_app
|
||||
|
||||
def caching_unless(*args, **kwargs):
|
||||
# if it is not a get request
|
||||
|
@ -25,11 +38,11 @@ def caching_unless(*args, **kwargs):
|
|||
def caching_v_parameter(*args, **kwargs):
|
||||
return request.args.get('v')
|
||||
|
||||
|
||||
limiter = Limiter(
|
||||
get_remote_address,
|
||||
default_limits=['86400 per day', '3600 per hour'],
|
||||
storage_uri="memory://",
|
||||
)
|
||||
|
||||
caching = Cache()
|
||||
caching = Cache()
|
||||
|
||||
|
|
|
@ -293,13 +293,16 @@ class Mango:
|
|||
|
||||
status = status.get('status')
|
||||
|
||||
if status in ['requesting', 'active']:
|
||||
if status in ['requesting', 'active', 'retiring']:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def websub_retiringCallback(self, callbackId):
|
||||
return self.websub_callbacks.update_one({'id': callbackId}, {'$set': {'status': 'retiring', 'retiring_time': current_time(object=True)}})
|
||||
|
||||
def websub_retireCallback(self, callbackId):
|
||||
return self.websub_callbacks.update_one({'id': callbackId}, {'$set': {'status': 'retired', 'retiring_time': current_time(object=True)}})
|
||||
return self.websub_callbacks.update_one({'id': callbackId}, {'$set': {'status': 'retired', 'retired_time': current_time(object=True)}})
|
||||
|
||||
def websub_deleteCallback(self, callbackId):
|
||||
return self.websub_callbacks.delete_one({'id': callbackId})
|
||||
|
|
|
@ -0,0 +1,47 @@
|
|||
from celery import shared_task
|
||||
from flask import current_app
|
||||
|
||||
@shared_task()
|
||||
def subscribe_websub_callback(channelId):
|
||||
import requests
|
||||
from .nosql import get_nosql
|
||||
|
||||
callbackId = get_nosql().websub_newCallback(channelId)
|
||||
|
||||
url = 'https://pubsubhubbub.appspot.com/subscribe'
|
||||
data = {
|
||||
'hub.callback': f'https://{current_app.config["DOMAIN"]}/websub/c/{callbackId}',
|
||||
'hub.topic': f'https://www.youtube.com/xml/feeds/videos.xml?channel_id={channelId}',
|
||||
'hub.verify': 'async',
|
||||
'hub.mode': 'subscribe',
|
||||
'hub.verify_token': '',
|
||||
'hub.secret': '',
|
||||
'hub.lease_numbers': '86400',
|
||||
}
|
||||
|
||||
get_nosql().websub_requestingCallback(callbackId)
|
||||
response = requests.post(url, data=data)
|
||||
if response.status_code == 202:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
@shared_task()
|
||||
def unsubscribe_websub_callback(callbackId, channelId):
|
||||
import requests
|
||||
from .nosql import get_nosql
|
||||
|
||||
url = 'https://pubsubhubbub.appspot.com/subscribe'
|
||||
data = {'hub.callback': f'https://{current_app.config["DOMAIN"]}/websub/c/{callbackId}',
|
||||
'hub.topic': f'https://www.youtube.com/xml/feeds/videos.xml?channel_id={channelId}',
|
||||
'hub.verify': 'async',
|
||||
'hub.mode': 'unsubscribe'
|
||||
}
|
||||
|
||||
get_nosql().websub_retiringCallback(callbackId)
|
||||
response = requests.post(url, data=data)
|
||||
|
||||
if response.status_code == 202:
|
||||
return True
|
||||
|
||||
return False
|
|
@ -4,10 +4,15 @@
|
|||
|
||||
{% block content %}
|
||||
<div class="row">
|
||||
<div class="col s12">
|
||||
<div class="col s12 l11">
|
||||
<h4>{{ channelInfo.original_name }} administration page</h4>
|
||||
<p>The update actions below directly apply to the database!</p>
|
||||
</div>
|
||||
<div class="col s12 l1 m-5">
|
||||
<form method="POST">
|
||||
<input title="Requests callback URL from youtube API" type="submit" value="subscribe-websub" name="task">
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
<div class="row">
|
||||
<div class="col s12 l4">
|
||||
|
|
|
@ -43,30 +43,6 @@
|
|||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col s12 l4 m-4">
|
||||
<div class="card green">
|
||||
<div class="card-content white-text">
|
||||
<span class="card-title">Placeholder</span>
|
||||
<p>I am a very simple card. I am good at containing small bits of information. I am convenient because I require little markup to use effectively.</p>
|
||||
</div>
|
||||
<div class="card-action">
|
||||
<a href="#">This is a link</a>
|
||||
<a href="#">This is a link</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col s12 l4 m-4">
|
||||
<div class="card green">
|
||||
<div class="card-content white-text">
|
||||
<span class="card-title">Placeholder</span>
|
||||
<p>I am a very simple card. I am good at containing small bits of information. I am convenient because I require little markup to use effectively.</p>
|
||||
</div>
|
||||
<div class="card-action">
|
||||
<a href="#">This is a link</a>
|
||||
<a href="#">This is a link</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="divider"></div>
|
||||
<div class="row">
|
||||
|
|
|
@ -19,32 +19,6 @@
|
|||
<h5>Report options</h5>
|
||||
</div>
|
||||
</div>
|
||||
<div class="row">
|
||||
<div class="col s12 l4 m-4">
|
||||
<div class="card green">
|
||||
<div class="card-content white-text">
|
||||
<span class="card-title">Placeholder</span>
|
||||
<p>I am a very simple card. I am good at containing small bits of information. I am convenient because I require little markup to use effectively.</p>
|
||||
</div>
|
||||
<div class="card-action">
|
||||
<a href="#">This is a link</a>
|
||||
<a href="#">This is a link</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col s12 l4 m-4">
|
||||
<div class="card green">
|
||||
<div class="card-content white-text">
|
||||
<span class="card-title">Placeholder</span>
|
||||
<p>I am a very simple card. I am good at containing small bits of information. I am convenient because I require little markup to use effectively.</p>
|
||||
</div>
|
||||
<div class="card-action">
|
||||
<a href="#">This is a link</a>
|
||||
<a href="#">This is a link</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="divider"></div>
|
||||
<div class="row">
|
||||
<div class="col s6 l9">
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
<form method="POST">
|
||||
<div class="input-field">
|
||||
<span class="supporting-text">Enable WebSub</span>
|
||||
<input class="validate" type="text" value="{{ item }}" name="key" hidden>
|
||||
<input class="validate" type="text" value="websub" name="key" hidden>
|
||||
</div>
|
||||
<div class="input-field m-4">
|
||||
<div class="switch">
|
||||
|
|
|
@ -19,32 +19,6 @@
|
|||
<h5>WebSub options</h5>
|
||||
</div>
|
||||
</div>
|
||||
<div class="row">
|
||||
<div class="col s12 l4 m-4">
|
||||
<div class="card green">
|
||||
<div class="card-content white-text">
|
||||
<span class="card-title">Placeholder</span>
|
||||
<p>I am a very simple card. I am good at containing small bits of information. I am convenient because I require little markup to use effectively.</p>
|
||||
</div>
|
||||
<div class="card-action">
|
||||
<a href="#">This is a link</a>
|
||||
<a href="#">This is a link</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col s12 l4 m-4">
|
||||
<div class="card green">
|
||||
<div class="card-content white-text">
|
||||
<span class="card-title">Placeholder</span>
|
||||
<p>I am a very simple card. I am good at containing small bits of information. I am convenient because I require little markup to use effectively.</p>
|
||||
</div>
|
||||
<div class="card-action">
|
||||
<a href="#">This is a link</a>
|
||||
<a href="#">This is a link</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="divider"></div>
|
||||
<div class="row">
|
||||
<div class="col s6 l9">
|
||||
|
@ -68,6 +42,7 @@
|
|||
<th>requesting_time</th>
|
||||
<th>activation_time</th>
|
||||
<th>retiring_time</th>
|
||||
<th>retired_time</th>
|
||||
<th>lease</th>
|
||||
</tr>
|
||||
</thead>
|
||||
|
@ -87,6 +62,7 @@
|
|||
<td>{{ callbacks[callback].get('requesting_time') }}</td>
|
||||
<td>{{ callbacks[callback].get('activation_time') }}</td>
|
||||
<td>{{ callbacks[callback].get('retiring_time') }}</td>
|
||||
<td>{{ callbacks[callback].get('retired_time') }}</td>
|
||||
<td>{{ callbacks[callback].get('lease') }}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
|
|
|
@ -1,37 +0,0 @@
|
|||
tgtoken = 'xxx'
|
||||
chatid = xxx
|
||||
|
||||
def send_messagen(service, message, silent):
|
||||
import requests
|
||||
|
||||
apirurl = f'https://api.telegram.org/bot{tgtoken}/sendMessage'
|
||||
outtext = f'Message from: <b>{service}</b>\n<code>{message}</code>'
|
||||
|
||||
options = {'chat_id': chatid, 'text': outtext, 'parse_mode': 'HTML', 'disable_notification': silent}
|
||||
|
||||
r = requests.post(apirurl, json=options)
|
||||
|
||||
if r.status_code != 200:
|
||||
print(f"Error Sending message to telegram servers, got http status code {r.status_code}")
|
||||
exit(1)
|
||||
|
||||
exit()
|
||||
|
||||
def main():
|
||||
import sys
|
||||
|
||||
args = sys.argv[1:]
|
||||
|
||||
if len(args) != 3:
|
||||
print('Usage: scriptname.py "service" "message" "Silent: 1/0"')
|
||||
else:
|
||||
if args[2] == '1':
|
||||
silence = True
|
||||
else:
|
||||
silence = False
|
||||
|
||||
send_messagen(args[0], args[1], silence)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -1,18 +0,0 @@
|
|||
# _______ _ ______ _______
|
||||
#|\ /|( ___ )( \ ( __ \ |\ /|( ____ )
|
||||
#| ) ( || ( ) || ( | ( \ ) | ) ( || ( )|
|
||||
#| (___) || | | || | | | ) | | | | || (____)|
|
||||
#| ___ || | | || | | | | | | | | || _____)
|
||||
#| ( ) || | | || | | | ) | | | | || (
|
||||
#| ) ( || (___) || (____/\| (__/ ) | (___) || )
|
||||
#|/ \|(_______)(_______/(______/ (_______)|/
|
||||
#
|
||||
# Did you search for a video that got removed?
|
||||
# Goto the search page to find it back again at https://archive.ventilaar.net/search/
|
||||
#
|
||||
# below are listed video id's that can NOT be archived be it a
|
||||
# 1. private video
|
||||
# 2. copyright claimed and thus blocked
|
||||
# 3. paid video
|
||||
#
|
||||
# And since this hobby turned out to be usefull to lots of people, please consider donating via monero 83YQWgHU3n6XybsquTy8mMincFz5rwGvF79293NrvWtrR6NNwKSsNYPBHpq1qRcKxjTkkmermZbiBTaJ81MtTGty8B1uNAF
|
|
@ -1,19 +0,0 @@
|
|||
##########################
|
||||
# README #
|
||||
##########################
|
||||
#
|
||||
# Want something archived?
|
||||
# Contact me at: archive (at) ventilaar (d0t) nl
|
||||
# To easily search for a desired video go to the search page https://archive.ventilaar.net/search/
|
||||
#
|
||||
##########################
|
||||
# CHANNELS #
|
||||
##########################
|
||||
#
|
||||
# Please include personal and edgy channels that are likely to get taken down. Please no corporations with large amount of video's like Buzzfeed(needs archiving? lol) or Linus Tech Tips(just too large and big)
|
||||
#
|
||||
# Format looks like this
|
||||
# # Channelname / dateaddedtoarchive
|
||||
# channel link
|
||||
#
|
||||
# And since this hobby turned out to be useful to lots of people, please consider donating via monero 83YQWgHU3n6XybsquTy8mMincFz5rwGvF79293NrvWtrR6NNwKSsNYPBHpq1qRcKxjTkkmermZbiBTaJ81MtTGty8B1uNAF
|
|
@ -1,61 +0,0 @@
|
|||
# Set Output
|
||||
# /var/www/archive.ventilaar.net/videos/channelname/YYYYMMDD/ID/humantitle.extension
|
||||
--output "/var/www/archive.ventilaar.net/videos/automatic/%(channel_id)s(%(uploader)s)/%(upload_date)s/%(id)s/%(title)s.%(ext)s"
|
||||
|
||||
# Set Archive
|
||||
# Insert video ID's that already have been downloaded
|
||||
--download-archive "/var/www/archive.ventilaar.net/goodstuff/archive.txt"
|
||||
|
||||
# Channel List
|
||||
# Grab channels and videos from this list
|
||||
--batch-file "/var/www/archive.ventilaar.net/goodstuff/channels.txt"
|
||||
|
||||
# Filter Out Ongoing Livestream
|
||||
# Do this so that if a livestream goes on for 24h or more, this script will not stay active during the stream
|
||||
--match-filter "!is_live"
|
||||
|
||||
# Set Format
|
||||
# 720pMP4/720pMP4+128kM4A/360pmp4, bestvideo+audio as last so that we always have a video and audio stream
|
||||
--format "22/136+140/18/bestvideo[ext=mp4]+bestaudio[ext=m4a]/best"
|
||||
--prefer-ffmpeg
|
||||
--merge-output-format "mp4"
|
||||
|
||||
# Add Metadata
|
||||
# Adds metadata to video file and just downloads more metadata
|
||||
--add-metadata
|
||||
--write-thumbnail
|
||||
--write-info-json
|
||||
|
||||
# Subs
|
||||
# Download all available real subs only in srt/vtt
|
||||
--write-sub
|
||||
--all-subs
|
||||
--sub-format "vtt"
|
||||
|
||||
# Limit Speed and Requests
|
||||
# So that we don't get CAPTCHAS and overload our network
|
||||
--sleep-interval "5"
|
||||
--max-sleep-interval "10"
|
||||
--limit-rate "16M"
|
||||
|
||||
|
||||
# Good options
|
||||
# Arguments speak for themselves
|
||||
--no-cache-dir
|
||||
--ignore-errors
|
||||
--geo-bypass
|
||||
--retries 2
|
||||
--no-continue
|
||||
|
||||
#test(bypasses age gate, but playlist listing does not work)
|
||||
#--user-agent "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
|
||||
|
||||
# Bypass Limit
|
||||
# Go to youtube in a browser, copy the abuse cookie to this file et voila
|
||||
--cookies "/var/www/archive.ventilaar.net/goodstuff/cookie.jar"
|
||||
|
||||
# youtube-dlp
|
||||
--no-sponskrub
|
||||
|
||||
# test
|
||||
--exec "python3 /var/www/archive.ventilaar.net/search/add_db.py %(filepath)q -c %(channel_id)q -d %(upload_date)q -i=%(id)q -e %(ext)q"
|
|
@ -1,64 +0,0 @@
|
|||
# Set Output
|
||||
# /var/www/archive.ventilaar.net/videos/channelname/YYYYMMDD/ID/humantitle.extension
|
||||
--output "/var/www/archive.ventilaar.net/videos/manual/%(id)s/%(title)s.%(ext)s"
|
||||
|
||||
# Set Archive
|
||||
# Insert video ID's that already have been downloaded
|
||||
--download-archive "/var/www/archive.ventilaar.net/goodstuff/archive.txt"
|
||||
|
||||
# Filter Out Ongoing Livestream
|
||||
# Do this so that if a livestream goes on for 24h or more, this script will not stay active during the stream
|
||||
--match-filter "!is_live"
|
||||
|
||||
# Set Format
|
||||
# 720pMP4/720pMP4+128kM4A/360pmp4, bestvideo+audio as last so that we always have a video and audio stream
|
||||
--format "22/136+140/18/bestvideo[ext=mp4]+bestaudio[ext=m4a]/best"
|
||||
--prefer-ffmpeg
|
||||
--merge-output-format "mp4"
|
||||
|
||||
# Add Metadata
|
||||
# Adds metadata to video file and just downloads more metadata
|
||||
--add-metadata
|
||||
--write-thumbnail
|
||||
--write-info-json
|
||||
|
||||
# Subs
|
||||
# Download all available real subs only in srt/vtt
|
||||
--write-sub
|
||||
--all-subs
|
||||
--sub-format "vtt"
|
||||
|
||||
# Limit Speed and Requests
|
||||
# So that we don't get CAPTCHAS and overload our network
|
||||
--limit-rate "16M"
|
||||
--sleep-interval "5"
|
||||
--max-sleep-interval "10"
|
||||
|
||||
|
||||
# Good options
|
||||
# Arguments speak for themselves
|
||||
--no-cache-dir
|
||||
--ignore-errors
|
||||
--geo-bypass
|
||||
--retries 2
|
||||
--no-continue
|
||||
|
||||
#test (bypasses age gate, but playlist listing does not work)
|
||||
#--user-agent "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
|
||||
|
||||
# Bypass Limit
|
||||
# Go to youtube in a browser, copy the abuse cookie to this file et voila
|
||||
--cookies "/var/www/archive.ventilaar.net/goodstuff/cookie.jar"
|
||||
|
||||
# youtube-dlp
|
||||
--no-sponskrub
|
||||
|
||||
# do not run the script. that one is only for automatic manual you can search for yourself
|
||||
#--exec "python /var/www/archive.ventilaar.net/goodstuff/db/add_db.py -c %(channel_id)q -n %(uploader)q -d %(upload_date)q -i %(id)q -t %(title)q -e %(ext)q"
|
||||
|
||||
##########################
|
||||
# PLAYLIST #
|
||||
##########################
|
||||
|
||||
# The only playlist, intended for one video instead of a channel. Like a livestream, compilation or memes
|
||||
https://www.youtube.com/playlist?list=PLaF89kHAz45s1_hqMMQuzS-6gZ4cidmfy
|
|
@ -1,8 +0,0 @@
|
|||
# Netscape HTTP Cookie File
|
||||
# This file is generated by yt-dlp. Do not edit.
|
||||
|
||||
.youtube.com TRUE / FALSE 2145916800 CONSENT YES+cb.20210328-17-p0.en+FX+351
|
||||
.youtube.com TRUE / FALSE 2147483647 GOOGLE_ABUSE_EXEMPTION ID=d75ce9a52dc4b333:TM=1618262296:C=r:IP=134.19.179.163-:S=APGng0u-ZJZWFH2kNJH0ds-kxYaDetBuLA
|
||||
.youtube.com TRUE / TRUE 1636522686 GPS 1
|
||||
.youtube.com TRUE / TRUE 1652052127 VISITOR_INFO1_LIVE Jx2J9exBFv4
|
||||
.youtube.com TRUE / TRUE 0 YSC ij9SBx7PmbE
|
|
@ -1,76 +0,0 @@
|
|||
<!---
|
||||
DONT LOOK AT MY CODE, IT's SHITTY
|
||||
I'm NOT A QUALIFIED WEB DEVELOPER
|
||||
IF IT WORKS, IT WORKS, DON't TOUCH IT
|
||||
--->
|
||||
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="description" content="My casual website with no ads at all.">
|
||||
<meta name="robots" content="no follow">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
|
||||
<title>archive · ventilaar.net</title>
|
||||
<link href="https://cdn.ventilaar.net/assets/css/bootstrap.min.css" rel="stylesheet">
|
||||
</head>
|
||||
<body class="bg-dark">
|
||||
<!-- Navigation -->
|
||||
<nav class="navbar navbar-expand navbar-dark bg-dark static-top">
|
||||
<div class="container">
|
||||
<a class="navbar-brand" href="https://ventilaar.net/">Ventilaar.net</a>
|
||||
<ul class="navbar-nav ">
|
||||
<li class="nav-item">
|
||||
<a class="nav-link" href="https://ventilaar.net/">Home</a>
|
||||
</li>
|
||||
<!---
|
||||
<li class="nav-item">
|
||||
<a class="nav-link" href="https://ventilaar.net/vitas.html">Vitas</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="nav-link" href="https://venitlaar.net/minecraft.html">Minecraft</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="nav-link" href="https://ventilaar.net/blog.html">Blog</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="nav-link" href="https://files.ventilaar.net/">Files</a>
|
||||
</li>
|
||||
--->
|
||||
<li class="nav-item active">
|
||||
<a class="nav-link" href="/">Archive</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="nav-link" href="/search/">Search</a>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav>
|
||||
<!-- Page Content -->
|
||||
<div class="container text-light">
|
||||
<div class="row">
|
||||
<div class="col-lg-12 text-center">
|
||||
<h1 class="mt-5">How did I do it?</h1>
|
||||
<p class="lead">One word: Youtube-DL(p)</p>
|
||||
<p>Basically I made a script to automatically download video's from channels every 6 hours</p>
|
||||
<p>Split up to automatic and manual. Automatic archives channels, including new uploads. Manual archives set video's in <a href="https://www.youtube.com/playlist?list=PLaF89kHAz45s1_hqMMQuzS-6gZ4cidmfy">this </a> playlist</p>
|
||||
<p>Want me to add a channel?<a href="/goodstuff/channels.txt"> Find contact info here</a></p>
|
||||
<p>For more technical people, the files to make this possible are linked below</p>
|
||||
<a href="/goodstuff/run.sh">run.sh </a><a href="/goodstuff/config_manual.conf">config_manual.conf </a><a href="/goodstuff/config_automatic.conf">config_automatic.conf </a><a href="/goodstuff/channels.txt">channels.txt </a><a href="/goodstuff/archive.txt">archive.txt </a><a href="/goodstuff/lastlog.txt">lastlog.txt</a><a></a>
|
||||
<br>
|
||||
<br>
|
||||
<!---
|
||||
<div class="alert alert-warning" role="alert">
|
||||
Some channels are missing a handful of video's. This is a known problem. Unfortunately the maintainters at Youtube-DL<a href="https://github.com/ytdl-org/youtube-dl/issues/21121"> do not want to fix</a> this bug.<br>
|
||||
Update: Actually I started using yt-dlp which has more features than youtube-dl, including a fix for this bug.
|
||||
</div>
|
||||
--->
|
||||
<div class="alert alert-info" role="alert">
|
||||
Video streaming problems? Just download the video! This archive isn't meant for streaming anyway, hence the lower quality. Be happy that there is something archived
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,2 +0,0 @@
|
|||
--- It is 2021/11/10 06:08:01 ---
|
||||
--- Stopped at 2021/11/10 08:29:34 ---
|
|
@ -1,48 +0,0 @@
|
|||
#!/bin/sh
|
||||
|
||||
# TODO, split manual into a cronjob that runs hourly/every 3 hours, currently main archive runs every 6 hours
|
||||
|
||||
echo "--- It is $(date "+%Y/%m/%d %H:%M:%S") ---"
|
||||
|
||||
# Check for lockfile
|
||||
if [ ! -f "/var/www/archive.ventilaar.net/goodstuff/lockfile" ]; then
|
||||
# Create lock file
|
||||
/bin/touch /var/www/archive.ventilaar.net/goodstuff/lockfile
|
||||
|
||||
# Update youtube-dl before executing main process
|
||||
/usr/local/bin/yt-dlp -U
|
||||
|
||||
#hotfix, after update the executable changes permission, of course this runs as root, why do you ask?
|
||||
chmod +x /usr/local/bin/yt-dlp
|
||||
|
||||
# Run youtube-dl with config gile
|
||||
/usr/local/bin/yt-dlp --config-location /var/www/archive.ventilaar.net/goodstuff/config_manual.conf
|
||||
/usr/local/bin/yt-dlp --config-location /var/www/archive.ventilaar.net/goodstuff/config_automatic.conf
|
||||
|
||||
# Create sitemap
|
||||
#/var/www/archive.ventilaar.net/sitemap.sh
|
||||
|
||||
# Send message if there are errors
|
||||
# not using cat but tail, because cat refuses because the file is still in use, tail does not
|
||||
# update: holy shit, i just remembered that i can just use the exitcode of yt-dlp instead of this junk, but wont fix lol
|
||||
errors=$(/usr/bin/tail -c 999999 /var/www/archive.ventilaar.net/goodstuff/lastlog.txt | grep "ERROR: " | grep -v "ERROR: Premieres" | wc -l)
|
||||
if [ $errors != "0" ]; then
|
||||
echo "There were $errors errors, tryting to send message"
|
||||
/usr/bin/python3 /var/www/archive.ventilaar.net/goodstuff/.send_to_telegram.py "debian-archive" "Youtube mirror script finished with $errors errors!" 0
|
||||
fi
|
||||
|
||||
# generate php statistics and copy database for php
|
||||
/usr/bin/python3 /var/www/archive.ventilaar.net/search/gen_stats.py
|
||||
cp -f /var/www/archive.ventilaar.net/search/videos.db /var/www/archive.ventilaar.net/search/copy.db
|
||||
|
||||
# Remove lock file
|
||||
/bin/rm -f /var/www/archive.ventilaar.net/goodstuff/lockfile
|
||||
else
|
||||
# Oof, a lock file exists, something went wrong
|
||||
echo "Script already running!"
|
||||
echo "This could mean that a big channel is currently being archived."
|
||||
/usr/bin/python3 /var/www/archive.ventilaar.net/goodstuff/.send_to_telegram.py "debian-archive" "Youtube mirror script started while the previous one was still going, sum tin won?" 0
|
||||
fi
|
||||
|
||||
echo "--- Stopped at $(date "+%Y/%m/%d %H:%M:%S") ---"
|
||||
|
|
@ -1,35 +0,0 @@
|
|||
# This prints the channel id of channels that are double in the folder that it is ran in
|
||||
import os
|
||||
|
||||
def get_channel_id(folder):
|
||||
return folder[:folder.rindex('(')]
|
||||
|
||||
def get_folders():
|
||||
folders = list()
|
||||
items = os.listdir()
|
||||
|
||||
for x in items:
|
||||
if os.path.isdir(x):
|
||||
folders.append(x)
|
||||
|
||||
return folders
|
||||
|
||||
def main():
|
||||
ids = list()
|
||||
count = dict()
|
||||
|
||||
for x in get_folders():
|
||||
ids.append(get_channel_id(x))
|
||||
|
||||
for x in ids:
|
||||
if x in count:
|
||||
count[x] = count[x] + 1
|
||||
else:
|
||||
count[x] = 1
|
||||
|
||||
for x in count:
|
||||
if count[x] > 1:
|
||||
print(f'{x}: {count[x]}')
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -1,71 +0,0 @@
|
|||
import json
|
||||
import os
|
||||
|
||||
loc = 'xxx.json'
|
||||
allowed_childs = {'id',
|
||||
'title',
|
||||
'description',
|
||||
'upload_date',
|
||||
'uploader',
|
||||
'uploader_id',
|
||||
'channel_id',
|
||||
'duration',
|
||||
'view_count',
|
||||
'age_limit',
|
||||
'categories',
|
||||
'tags',
|
||||
'playable_in_embed',
|
||||
'is_live',
|
||||
'was_live',
|
||||
'like_count',
|
||||
'channel',
|
||||
'availability',
|
||||
'duration_string',
|
||||
'asr',
|
||||
'format_id',
|
||||
'format_note',
|
||||
'fps',
|
||||
'height',
|
||||
'quality',
|
||||
'tbr',
|
||||
'width',
|
||||
'language',
|
||||
'language_preference',
|
||||
'ext',
|
||||
'vcodec',
|
||||
'acodec',
|
||||
'dynamic_range',
|
||||
'video_ext',
|
||||
'audio_ext',
|
||||
'vbr',
|
||||
'abr',
|
||||
'format',
|
||||
'resolution',
|
||||
'filesize_approx',
|
||||
'fulltitle',
|
||||
'epoch'
|
||||
}
|
||||
|
||||
def read_file(location):
|
||||
with open(location, 'r') as file:
|
||||
return json.load(file)
|
||||
|
||||
def generate_new(data):
|
||||
new = {}
|
||||
for x in data:
|
||||
if x in allowed_childs:
|
||||
new[x] = data[x]
|
||||
return new
|
||||
|
||||
def rename_file(location, ext):
|
||||
os.rename(loc, f'{loc}.{ext}')
|
||||
|
||||
def write_json(data, location):
|
||||
with open(location, 'w') as file:
|
||||
file.writelines(json.dumps(data))
|
||||
|
||||
if __name__ == '__main__':
|
||||
old = read_file(loc)
|
||||
new = generate_new(old)
|
||||
rename_file(loc, 'bek')
|
||||
write_json(new, loc)
|
|
@ -1,120 +0,0 @@
|
|||
# this script reads the directory structure and compares the
|
||||
|
||||
import sqlite3
|
||||
import os
|
||||
import glob
|
||||
|
||||
#root = 'W:\\archive.ventilaar.net\\videos\\automatic' # if on windows
|
||||
root = '/var/www/archive.ventilaar.net/videos/automatic' # if on archive container
|
||||
extensions = ['mp4', 'webm', 'mkv']
|
||||
|
||||
|
||||
class Mydb:
|
||||
def __init__(self):
|
||||
self.conn = sqlite3.connect('/root/videos.db')
|
||||
#self.conn = sqlite3.connect('/var/www/archive.ventilaar.net/goodstuff/db/videos.db')
|
||||
self.cur = self.conn.cursor()
|
||||
|
||||
def insert_video(self, dic):
|
||||
query = ''' INSERT INTO videos (channel_id, channel_name, upload_date, video_id, video_title, video_ext) VALUES (?, ?, ?, ?, ?, ?) '''
|
||||
data = (dic['channel_id'], dic['channel_name'], dic['upload_date'], dic['video_id'], dic['video_title'], dic['video_ext'])
|
||||
self.cur.execute(query, data)
|
||||
self.conn.commit()
|
||||
|
||||
def check_video_id(self, vid):
|
||||
"""
|
||||
returns true if video id exists in table, false if not
|
||||
"""
|
||||
query = ''' SELECT video_id FROM videos WHERE video_id = ? '''
|
||||
data = [vid]
|
||||
self.cur.execute(query, data)
|
||||
return bool(self.cur.fetchone())
|
||||
|
||||
def __del__(self):
|
||||
self.conn.commit()
|
||||
self.conn.close()
|
||||
|
||||
def list_channels():
|
||||
""" returns list of channels in 'channel_id(channel_name)' format """
|
||||
return os.listdir(root)
|
||||
|
||||
def list_upload_dates(channel):
|
||||
dates = os.listdir(f'{root}/{channel}')
|
||||
for x in dates:
|
||||
if x == 'NA':
|
||||
dates.remove(x)
|
||||
return dates
|
||||
|
||||
def list_video_ids(channel, date):
|
||||
return os.listdir(f'{root}/{channel}/{date}')
|
||||
|
||||
def list_video(channel, date, video_id):
|
||||
""" returns tulple (video_title, ext)"""
|
||||
dir_content = os.listdir(f'{root}/{channel}/{date}/{video_id}')
|
||||
|
||||
for x in dir_content:
|
||||
splat = split_video(x)
|
||||
if splat[1] in extensions:
|
||||
return splat
|
||||
|
||||
def split_video(video):
|
||||
""" returns list with [video_title, video_ext] """
|
||||
splat = video.split('.')
|
||||
ext = splat[-1]
|
||||
title = splat[:-1]
|
||||
goodtitle = ''
|
||||
|
||||
for x in title:
|
||||
goodtitle = f'{goodtitle}.{x}' # adds points at first character
|
||||
|
||||
return goodtitle[1:], ext # omit first character
|
||||
|
||||
def split_channel(channel):
|
||||
channel_name = channel[channel.index("(") + 1: channel.rindex(")")]
|
||||
channel_id = channel[:channel.index("(")]
|
||||
return channel_id, channel_name
|
||||
|
||||
def main():
|
||||
data = dict()
|
||||
|
||||
allchannels = list_channels()
|
||||
total_channels = len(allchannels)
|
||||
channel_progress = 0
|
||||
|
||||
skip_channels = 0
|
||||
|
||||
for channel in allchannels:
|
||||
channel_progress = channel_progress + 1
|
||||
print(f'Currently working on channel {channel_progress}/{total_channels} - {channel}')
|
||||
|
||||
if skip_channels > 0:
|
||||
print(f'Skipping {channel}')
|
||||
skip_channels = skip_channels - 1
|
||||
continue
|
||||
|
||||
for upload_date in list_upload_dates(channel):
|
||||
for video_id in list_video_ids(channel, upload_date):
|
||||
try:
|
||||
video_title, video_ext = list_video(channel, upload_date, video_id)
|
||||
except:
|
||||
print(f'exception on {channel, upload_date, video_id}')
|
||||
exit()
|
||||
|
||||
channel_id, channel_name = split_channel(channel)
|
||||
|
||||
data['upload_date'] = upload_date
|
||||
data['video_id'] = video_id
|
||||
data['video_title'] = video_title
|
||||
data['video_ext'] = video_ext
|
||||
data['channel_id'] = channel_id
|
||||
data['channel_name'] = channel_name
|
||||
|
||||
if db.check_video_id(video_id):
|
||||
pass
|
||||
else:
|
||||
db.insert_video(data)
|
||||
print(f'{video_id} by {channel_name} added to table')
|
||||
|
||||
if __name__ == "__main__":
|
||||
db = Mydb()
|
||||
main()
|
|
@ -1,22 +0,0 @@
|
|||
#removes the video id's in the archive.txt file in the same folder as the script
|
||||
#lists date folders and grabs id's from them
|
||||
|
||||
import os
|
||||
|
||||
count = 0
|
||||
clean_dirs = [x for x in os.listdir() if '20' in x]
|
||||
ids = []
|
||||
|
||||
for datedir in clean_dirs:
|
||||
for iddir in os.listdir(datedir):
|
||||
ids.append(f'youtube {iddir}')
|
||||
|
||||
|
||||
with open('archive.txt', 'r') as file:
|
||||
lines = file.readlines()
|
||||
with open('archive.txt', 'w') as file:
|
||||
for line in lines:
|
||||
if line.strip("\n") not in ids:
|
||||
file.write(line)
|
||||
|
||||
print(f'Finished removing id\'s from archive.txt. Counted {len(ids)} id\'s to remove')
|
|
@ -1,21 +0,0 @@
|
|||
This folder contains some files making the static file storage a bit more databasey.
|
||||
|
||||
## database
|
||||
The database stores the following values in a videos table:
|
||||
```
|
||||
channel_id channel_name upload_date video_id video_title video_ext
|
||||
```
|
||||
It is not normalized, because I do not want to blow my brains out yet. People change their usernames and youtube changes video_ids eventough they should be static and more bullshit like that.
|
||||
I do not want to go scripting all edge-cases. So the drawback for now is that this uses a lot more storage on disk.
|
||||
|
||||
## search
|
||||
With those values you can reconstuct the http url, so as you might have guessed I need a search script that will search trough the db and return the http path to the client.
|
||||
Maybe a cgi script?
|
||||
|
||||
## add_db.py
|
||||
This script will be run after a new video has been downloaded by yt-dlp. It will add the values listed above to the database.
|
||||
|
||||
## delete_db.py
|
||||
You can run this with an video_id as the argument. It will then delete the entry from the database.
|
||||
|
||||
##
|
|
@ -1,79 +0,0 @@
|
|||
# this script requires the options listed below and then checks if the video_id exists in table. if not, it adds it
|
||||
|
||||
# TODO: THE TITLE THAT YT-DLP PASSES TROUGH IS UTF8 COMPATIBLE, HOWEVER THAT IS NOT WHAT IS STORED ON DISK. REVERT BACK TO THE OUTPUT URL AND SPLIT THAT UP INTO THE DIFFERENT FIELDS MANUALLY
|
||||
|
||||
import argparse
|
||||
import sqlite3
|
||||
|
||||
# --exec "python3 /var/www/archive.ventilaar.net/search/add_db.py %(filepath)q -c %(channel_id)q -d %(upload_date)q -i %(id)q -e %(ext)q"
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description='All "optional" options are required! If not passed, the script might break!')
|
||||
parser.add_argument('path')
|
||||
parser.add_argument('-c', '--channel_id')
|
||||
parser.add_argument('-d', '--upload_date')
|
||||
parser.add_argument('-i', '--video_id')
|
||||
parser.add_argument('-e', '--video_extension')
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
class Mydb:
|
||||
def __init__(self):
|
||||
#self.conn = sqlite3.connect('videos.db')
|
||||
self.conn = sqlite3.connect('/var/www/archive.ventilaar.net/search/videos.db')
|
||||
self.cur = self.conn.cursor()
|
||||
|
||||
def insert_video(self, args, filen, uploader):
|
||||
query = ''' INSERT INTO videos (channel_id, channel_name, upload_date, video_id, video_title, video_ext) VALUES (?, ?, ?, ?, ?, ?) '''
|
||||
data = (args.channel_id, uploader, args.upload_date, args.video_id, filen, args.video_extension)
|
||||
self.cur.execute(query, data)
|
||||
self.conn.commit()
|
||||
|
||||
def check_video_id(self, vid):
|
||||
"""
|
||||
returns true if video id exists in table, false if not
|
||||
"""
|
||||
query = ''' SELECT video_id FROM videos WHERE video_id = ? '''
|
||||
data = [vid]
|
||||
self.cur.execute(query, data)
|
||||
return bool(self.cur.fetchone())
|
||||
|
||||
def __del__(self):
|
||||
self.conn.commit()
|
||||
self.conn.close()
|
||||
|
||||
def get_filename(opts):
|
||||
fullpath = opts.path
|
||||
fullfilename = fullpath.split('/')[-1]
|
||||
splitfilename = fullfilename.split('.')[:-1]
|
||||
filename = ''
|
||||
|
||||
for x in splitfilename:
|
||||
filename = f'{filename}.{x}'
|
||||
|
||||
return filename[1:]
|
||||
|
||||
def get_uploader(opts):
|
||||
fullpath = opts.path
|
||||
wid = fullpath.split('/')[6]
|
||||
uploader = wid[wid.index('(') + 1: wid.rindex(')')]
|
||||
|
||||
return uploader
|
||||
|
||||
|
||||
def main(args):
|
||||
title = get_filename(args)
|
||||
uploader = get_uploader(args)
|
||||
|
||||
if db.check_video_id(args.video_id):
|
||||
print('ERROR: This video ID was in the table. What went wrong man?')
|
||||
exit(1)
|
||||
else:
|
||||
db.insert_video(args, title, uploader)
|
||||
print('SUCCESS: Video metadata added to database')
|
||||
|
||||
if __name__ == "__main__":
|
||||
options = parse_args() # haal argumenten op
|
||||
db = Mydb()
|
||||
main(options)
|
Binary file not shown.
Before Width: | Height: | Size: 530 B |
Binary file not shown.
Before Width: | Height: | Size: 642 B |
Binary file not shown.
Before Width: | Height: | Size: 602 B |
|
@ -1,34 +0,0 @@
|
|||
import argparse
|
||||
import sqlite3
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description='Remove a video entry from the database')
|
||||
parser.add_argument('vid_id', help='youtube video id')
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
class Mydb:
|
||||
def __init__(self):
|
||||
#self.conn = sqlite3.connect('videos.db')
|
||||
self.conn = sqlite3.connect('/var/www/archive.ventilaar.net/search/videos.db')
|
||||
self.cur = self.conn.cursor()
|
||||
|
||||
def delete_video(self, v_id):
|
||||
query = ''' DELETE from videos WHERE video_id=? '''
|
||||
data = [v_id]
|
||||
self.cur.execute(query, data)
|
||||
self.conn.commit()
|
||||
|
||||
def __del__(self):
|
||||
self.conn.commit()
|
||||
self.conn.close()
|
||||
|
||||
def main(args):
|
||||
db.delete_video(args.vid_id)
|
||||
print(f'{args.vid_id} should be deleted')
|
||||
|
||||
if __name__ == "__main__":
|
||||
options = parse_args() # haal argumenten op
|
||||
db = Mydb()
|
||||
main(options)
|
|
@ -1,40 +0,0 @@
|
|||
import sqlite3
|
||||
|
||||
class Mydb:
|
||||
def __init__(self):
|
||||
#self.conn = sqlite3.connect('videos.db')
|
||||
self.conn = sqlite3.connect('/var/www/archive.ventilaar.net/search/videos.db')
|
||||
self.cur = self.conn.cursor()
|
||||
|
||||
def calc_total_videos(self):
|
||||
query = ''' SELECT count(DISTINCT video_id) FROM videos '''
|
||||
self.cur.execute(query)
|
||||
return self.cur.fetchone()[0]
|
||||
|
||||
def calc_total_channels(self):
|
||||
query = ''' SELECT count(DISTINCT channel_id) FROM videos '''
|
||||
self.cur.execute(query)
|
||||
return self.cur.fetchone()[0]
|
||||
|
||||
def __del__(self):
|
||||
self.conn.close()
|
||||
|
||||
def main():
|
||||
echo = ( \
|
||||
'<?php\n' \
|
||||
'# THIS FILE IS DYNAMICALLY GENERATED BY gen_stats.py\n' \
|
||||
"if(!ISSET($_POST['q'])){\n" \
|
||||
f' $total_videos = {db.calc_total_videos()};\n' \
|
||||
f' $total_channels = {db.calc_total_channels()};\n' \
|
||||
" echo '<p>Total videos: '.$total_videos.'</p>';\n" \
|
||||
" echo '<p>Total channels: '.$total_channels.'</p>';\n" \
|
||||
'}\n' \
|
||||
'?>'
|
||||
)
|
||||
|
||||
with open('/var/www/archive.ventilaar.net/search/stats.php', 'w') as file:
|
||||
file.writelines(echo)
|
||||
|
||||
if __name__ == "__main__":
|
||||
db = Mydb()
|
||||
main()
|
|
@ -1,73 +0,0 @@
|
|||
<!---
|
||||
DONT LOOK AT MY CODE, IT's SHITTY
|
||||
I'm NOT A QUALIFIED WEB DEVELOPER
|
||||
IF IT WORKS, IT WORKS, DON't TOUCH IT
|
||||
--->
|
||||
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="description" content="Archiving youtube videos in mass(and in LQ)">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
|
||||
<title>archive · ventilaar.net</title>
|
||||
<link href="https://cdn.ventilaar.net/assets/css/bootstrap.min.css" rel="stylesheet">
|
||||
</head>
|
||||
<body class="bg-dark">
|
||||
<!-- Navigation -->
|
||||
<nav class="navbar navbar-expand navbar-dark bg-dark static-top">
|
||||
<div class="container">
|
||||
<a class="navbar-brand" href="https://www.ventilaar.nl/">Ventilaar.net</a>
|
||||
<ul class="navbar-nav ">
|
||||
<li class="nav-item">
|
||||
<a class="nav-link" href="https://ventilaar.net/">Home</a>
|
||||
</li>
|
||||
<li class="nav-iteme">
|
||||
<a class="nav-link" href="/">Archive</a>
|
||||
</li>
|
||||
<li class="nav-item active">
|
||||
<a class="nav-link" href="/search/">Search</a>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav>
|
||||
<!-- Page Content -->
|
||||
<div class="container text-light">
|
||||
<div class="row">
|
||||
<div class="col-lg-12 text-center">
|
||||
<div>
|
||||
<h1 class="mt-5">Search the archive</h1>
|
||||
<p class="lead">"A big archive needs a search function." -Sun Tzu</p>
|
||||
<p>Use the form below to search trough the automatic channel archive.</p>
|
||||
<p class="small">The manual <a href="https://www.youtube.com/playlist?list=PLaF89kHAz45s1_hqMMQuzS-6gZ4cidmfy">playlist</a> is not being indexed here. For that look <a href="https://archive.ventilaar.net/videos/manual/">here</a> instead.</p>
|
||||
</div>
|
||||
<hr>
|
||||
<div>
|
||||
<form method="POST" action=''>
|
||||
<div class="form-group">
|
||||
<label for="forminput">Text to search for</label>
|
||||
<input name="q" class="form-control" id="forminput" type="text">
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="formcriteria">Search criteria</label>
|
||||
<select class="form-control" id="formcriteria" name="c">
|
||||
<option value="video_title">Video Title</option>
|
||||
<option value="channel_name">Channel Name</option>
|
||||
<option value="channel_id">Channel ID</option>
|
||||
<option value="video_id">Youtube video ID (the 11 character ID)</option>
|
||||
<option value="upload_date">Upload date(YYYYMMDD)</option>
|
||||
</select>
|
||||
</div>
|
||||
<button class="btn btn-success">Search</button>
|
||||
</form>
|
||||
</div>
|
||||
<br>
|
||||
<hr>
|
||||
<?php include'stats.php' ?>
|
||||
<?php include'search_do.php' ?>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,67 +0,0 @@
|
|||
<?php
|
||||
if(ISSET($_POST['q'])){
|
||||
$keyword = $_POST['q'];
|
||||
$field = $_POST['c'];
|
||||
$order = 'upload_date';
|
||||
|
||||
if ( ($field == 'channel_id') || ($field == 'channel_name') ) {
|
||||
$order = $field;
|
||||
}
|
||||
|
||||
if (strlen($keyword) < 3) {
|
||||
die('<div class="alert alert-danger" role="alert">Search term shoud be at least 3 characters!</h1></div>');
|
||||
}
|
||||
|
||||
if ( (strlen($keyword) < 6) && ($field == 'upload_date') ) {
|
||||
die('<div class="alert alert-danger" role="alert">Please include at least year and month. Example: 201708</h1></div>');
|
||||
}
|
||||
|
||||
$conn=new SQLite3('/var/www/archive.ventilaar.net/search/copy.db') or die('<div class="alert alert-danger" role="alert">Unable to open database!</div>');
|
||||
|
||||
$query=$conn->query("SELECT * FROM `videos` WHERE `".$field."` LIKE '%".$keyword."%' ORDER BY channel_name, upload_date DESC;") or die("I see you made an SQL error. Yes this form leaks SQL data. But the database gets rewritten every 6 hours anyway so who cares.");
|
||||
|
||||
echo'
|
||||
<div>
|
||||
<h2>Video Title</h2>
|
||||
<p style="color: #007bff;">Video title url = Raw archived files</p>
|
||||
<p><img src="assets/play24.png" /> = Play video direct</p>
|
||||
<p><img src="assets/youtube24.png" /> = Play video on Youtube</p>
|
||||
<table class="table table-dark table-bordered">
|
||||
<thead>
|
||||
<tr>
|
||||
<th scope="col">Video Title</th>
|
||||
<th scope="col">Upload Date</th>
|
||||
<th scope="col">Video ID</th>
|
||||
<th scope="col">Channel name</th>
|
||||
<th scope="col">Channel ID</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>';
|
||||
|
||||
while($fetch=$query->fetchArray()){
|
||||
$channelpath='https://archive.ventilaar.net/videos/automatic/'.$fetch['channel_id'].'('.$fetch['channel_name'].')/';
|
||||
$idpath=$channelpath.$fetch['upload_date'].'/'.$fetch['video_id'].'/';
|
||||
$videopath=$idpath.$fetch['video_title'].'.'.$fetch['video_ext'];
|
||||
$uploaddate=date("Y M d", strtotime($fetch['upload_date']));
|
||||
|
||||
$youtubeurl='https://youtu.be/'.$fetch['video_id'];
|
||||
echo
|
||||
'<tr>
|
||||
<td>
|
||||
<a href="'.$idpath.'">'.$fetch['video_title'].'</a>
|
||||
<a href="'.$videopath.'"><img src="assets/play24.png" /></a>
|
||||
</td>
|
||||
<td>'.$uploaddate."</td>
|
||||
<td>
|
||||
".$fetch['video_id'].'
|
||||
<a href="'.$youtubeurl.'"><img src="assets/youtube24.png" /></a>
|
||||
</td>
|
||||
<td>'.$fetch['channel_name']."</td>
|
||||
<td>".$fetch['channel_id']."</td>
|
||||
</tr>";
|
||||
}
|
||||
|
||||
echo'</div>';
|
||||
}
|
||||
?>
|
||||
|
|
@ -1,66 +0,0 @@
|
|||
<?php
|
||||
if(ISSET($_POST['q'])){
|
||||
$keyword = $_POST['q'] or die;
|
||||
$field = $_POST['c'] or die;
|
||||
$order = 'upload_date';
|
||||
|
||||
#if ( ($field == 'channel_id') || ($field == 'channel_name') ) {
|
||||
# $order = $field;
|
||||
#}
|
||||
|
||||
if (strlen($keyword) < 3) {
|
||||
die('<div class="alert alert-danger" role="alert">Search term shoud be at least 3 characters!</h1></div>');
|
||||
}
|
||||
|
||||
$db = new SQLite3('/var/www/archive.ventilaar.net/search/copy.db') or die('<div class="alert alert-danger" role="alert">Unable to open database!</div>');
|
||||
|
||||
$stmt_h = $db->prepare("SELECT * FROM 'videos' WHERE :fld LIKE ':wrd' ORDER BY channel_name, upload_date DESC;") or die("I see you made an SQL error. Yes this form leaks SQL data. But the database gets rewritten every 6 hours anyway so who cares.");
|
||||
$stmt_h->bindValue(':fld', $field);
|
||||
$stmt_h->bindValue(':wrd', '%'.$keyword.'%');
|
||||
$res = $stmt_h->execute();
|
||||
|
||||
echo'
|
||||
<div>
|
||||
<h2>Video Title</h2>
|
||||
<p style="color: #007bff;">Video title url = Raw archived files</p>
|
||||
<p><img src="assets/play24.png" /> = Play video direct</p>
|
||||
<p><img src="assets/youtube24.png" /> = Play video on Youtube</p>
|
||||
<table class="table table-dark table-bordered">
|
||||
<thead>
|
||||
<tr>
|
||||
<th scope="col">Video Title</th>
|
||||
<th scope="col">Upload Date (YYYYMMDD)</th>
|
||||
<th scope="col">Video ID</th>
|
||||
<th scope="col">Channel name</th>
|
||||
<th scope="col">Channel ID</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>';
|
||||
|
||||
while($row = $res->fetchArray()) {
|
||||
$channelpath='https://archive.ventilaar.net/videos/automatic/'.$row['channel_id'].'('.$row['channel_name'].')/';
|
||||
$idpath=$channelpath.$row['upload_date'].'/'.$row['video_id'].'/';
|
||||
$videopath=$idpath.$row['video_title'].'.'.$row['video_ext'];
|
||||
|
||||
$youtubeurl='https://youtu.be/'.$row['video_id'];
|
||||
echo
|
||||
'<tr>
|
||||
<td>
|
||||
<a href="'.$idpath.'">'.$row['video_title'].'</a>
|
||||
<a href="'.$videopath.'"><img src="assets/play24.png" /></a>
|
||||
</td>
|
||||
<td>'.$row['upload_date']."</td>
|
||||
<td>
|
||||
".$row['video_id'].'
|
||||
<a href="'.$youtubeurl.'"><img src="assets/youtube24.png" /></a>
|
||||
</td>
|
||||
<td>'.$row['channel_name']."</td>
|
||||
<td>".$row['channel_id']."</td>
|
||||
</tr>";
|
||||
}
|
||||
|
||||
echo'</div>';
|
||||
$db -> close();
|
||||
}
|
||||
?>
|
||||
|
Binary file not shown.
|
@ -0,0 +1,4 @@
|
|||
import ayta
|
||||
|
||||
flask_app = ayta.create_app()
|
||||
celery_app = flask_app.extensions["celery"]
|
Loading…
Reference in New Issue