Compare commits

...

1 Commits

Author SHA1 Message Date
Ventilaar
570ac88b99 Reimplement orphaned video listing and more accurate stats
Some checks failed
Update worker server / build-and-publish (release) Has been cancelled
Generate docker image / build-and-publish (release) Successful in 19s
2025-02-11 22:55:21 +01:00

View File

@@ -193,7 +193,9 @@ class Mango:
def get_orphaned_videos(self):
""" Returns a SET of YouTube video ID's which have info_jsons in the collection but no permanent channel is defined. SLOW OPERATION """
# Ok lemme explain. Perform inner join from channel collection on channel_id key. match only the fields which are empty. return video id
# The following code I have commented out because the query took too long to proccess, timing the operation out
"""# Ok lemme explain. Perform inner join from channel collection on channel_id key. match only the fields which are empty. return video id
pipeline = [{'$match': {'_status': 'available'}},
{'$lookup': {'from': 'channels', 'localField': 'channel_id', 'foreignField': 'id', 'as': 'channel'}},
{'$match': {'channel': {'$size': 0}}},{'$project': {'id': 1}},
@@ -202,7 +204,20 @@ class Mango:
results = self.info_json.aggregate(pipeline)
ids = [result['id'] for result in results]
return tuple(ids)
return tuple(ids)"""
# Reimplementing the query but in python, as I do not care about memory usage or data transfer
channels = self.channels.find({}, {'_id': 0, 'id': 1})
videos = self.info_json.find({'_status': 'available'}, {'_id': 0, 'channel_id': 1, 'id': 1})
channels = set([x['id'] for x in channels])
orphaned = []
for item in videos:
if item['channel_id'] not in channels:
orphaned.append(item['id'])
return tuple(orphaned)
def get_recent_videos(self, count=99):
""" Returns a SET of YouTube video ID's which have been added last to the info_json collection """
@@ -458,9 +473,9 @@ class Mango:
def statistics_counts(self):
counts = {}
counts['videos'] = f'{self.info_json.count_documents({})} videos in the archive'
counts['videos'] = f"{self.info_json.count_documents({'_status': 'available'})} videos in the archive"
counts['channels'] = f'{self.channels.count_documents({})} channels in the system'
counts['download_queue'] = f'{self.download_queue.count_documents({})} queued videos for download'
counts['download_queue'] = f"{self.download_queue.count_documents({'status': 'queued'})} queued videos for download"
return counts