You've already forked amazing-ytdlp-archive
Compare commits
1 Commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
570ac88b99 |
@@ -193,7 +193,9 @@ class Mango:
|
||||
|
||||
def get_orphaned_videos(self):
|
||||
""" Returns a SET of YouTube video ID's which have info_jsons in the collection but no permanent channel is defined. SLOW OPERATION """
|
||||
# Ok lemme explain. Perform inner join from channel collection on channel_id key. match only the fields which are empty. return video id
|
||||
|
||||
# The following code I have commented out because the query took too long to proccess, timing the operation out
|
||||
"""# Ok lemme explain. Perform inner join from channel collection on channel_id key. match only the fields which are empty. return video id
|
||||
pipeline = [{'$match': {'_status': 'available'}},
|
||||
{'$lookup': {'from': 'channels', 'localField': 'channel_id', 'foreignField': 'id', 'as': 'channel'}},
|
||||
{'$match': {'channel': {'$size': 0}}},{'$project': {'id': 1}},
|
||||
@@ -202,7 +204,20 @@ class Mango:
|
||||
results = self.info_json.aggregate(pipeline)
|
||||
ids = [result['id'] for result in results]
|
||||
|
||||
return tuple(ids)
|
||||
return tuple(ids)"""
|
||||
|
||||
# Reimplementing the query but in python, as I do not care about memory usage or data transfer
|
||||
channels = self.channels.find({}, {'_id': 0, 'id': 1})
|
||||
videos = self.info_json.find({'_status': 'available'}, {'_id': 0, 'channel_id': 1, 'id': 1})
|
||||
|
||||
channels = set([x['id'] for x in channels])
|
||||
orphaned = []
|
||||
|
||||
for item in videos:
|
||||
if item['channel_id'] not in channels:
|
||||
orphaned.append(item['id'])
|
||||
|
||||
return tuple(orphaned)
|
||||
|
||||
def get_recent_videos(self, count=99):
|
||||
""" Returns a SET of YouTube video ID's which have been added last to the info_json collection """
|
||||
@@ -458,9 +473,9 @@ class Mango:
|
||||
def statistics_counts(self):
|
||||
counts = {}
|
||||
|
||||
counts['videos'] = f'{self.info_json.count_documents({})} videos in the archive'
|
||||
counts['videos'] = f"{self.info_json.count_documents({'_status': 'available'})} videos in the archive"
|
||||
counts['channels'] = f'{self.channels.count_documents({})} channels in the system'
|
||||
counts['download_queue'] = f'{self.download_queue.count_documents({})} queued videos for download'
|
||||
counts['download_queue'] = f"{self.download_queue.count_documents({'status': 'queued'})} queued videos for download"
|
||||
|
||||
return counts
|
||||
|
||||
|
Reference in New Issue
Block a user