You've already forked amazing-ytdlp-archive
Compare commits
1 Commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
570ac88b99 |
@@ -193,7 +193,9 @@ class Mango:
|
|||||||
|
|
||||||
def get_orphaned_videos(self):
|
def get_orphaned_videos(self):
|
||||||
""" Returns a SET of YouTube video ID's which have info_jsons in the collection but no permanent channel is defined. SLOW OPERATION """
|
""" Returns a SET of YouTube video ID's which have info_jsons in the collection but no permanent channel is defined. SLOW OPERATION """
|
||||||
# Ok lemme explain. Perform inner join from channel collection on channel_id key. match only the fields which are empty. return video id
|
|
||||||
|
# The following code I have commented out because the query took too long to proccess, timing the operation out
|
||||||
|
"""# Ok lemme explain. Perform inner join from channel collection on channel_id key. match only the fields which are empty. return video id
|
||||||
pipeline = [{'$match': {'_status': 'available'}},
|
pipeline = [{'$match': {'_status': 'available'}},
|
||||||
{'$lookup': {'from': 'channels', 'localField': 'channel_id', 'foreignField': 'id', 'as': 'channel'}},
|
{'$lookup': {'from': 'channels', 'localField': 'channel_id', 'foreignField': 'id', 'as': 'channel'}},
|
||||||
{'$match': {'channel': {'$size': 0}}},{'$project': {'id': 1}},
|
{'$match': {'channel': {'$size': 0}}},{'$project': {'id': 1}},
|
||||||
@@ -202,7 +204,20 @@ class Mango:
|
|||||||
results = self.info_json.aggregate(pipeline)
|
results = self.info_json.aggregate(pipeline)
|
||||||
ids = [result['id'] for result in results]
|
ids = [result['id'] for result in results]
|
||||||
|
|
||||||
return tuple(ids)
|
return tuple(ids)"""
|
||||||
|
|
||||||
|
# Reimplementing the query but in python, as I do not care about memory usage or data transfer
|
||||||
|
channels = self.channels.find({}, {'_id': 0, 'id': 1})
|
||||||
|
videos = self.info_json.find({'_status': 'available'}, {'_id': 0, 'channel_id': 1, 'id': 1})
|
||||||
|
|
||||||
|
channels = set([x['id'] for x in channels])
|
||||||
|
orphaned = []
|
||||||
|
|
||||||
|
for item in videos:
|
||||||
|
if item['channel_id'] not in channels:
|
||||||
|
orphaned.append(item['id'])
|
||||||
|
|
||||||
|
return tuple(orphaned)
|
||||||
|
|
||||||
def get_recent_videos(self, count=99):
|
def get_recent_videos(self, count=99):
|
||||||
""" Returns a SET of YouTube video ID's which have been added last to the info_json collection """
|
""" Returns a SET of YouTube video ID's which have been added last to the info_json collection """
|
||||||
@@ -458,9 +473,9 @@ class Mango:
|
|||||||
def statistics_counts(self):
|
def statistics_counts(self):
|
||||||
counts = {}
|
counts = {}
|
||||||
|
|
||||||
counts['videos'] = f'{self.info_json.count_documents({})} videos in the archive'
|
counts['videos'] = f"{self.info_json.count_documents({'_status': 'available'})} videos in the archive"
|
||||||
counts['channels'] = f'{self.channels.count_documents({})} channels in the system'
|
counts['channels'] = f'{self.channels.count_documents({})} channels in the system'
|
||||||
counts['download_queue'] = f'{self.download_queue.count_documents({})} queued videos for download'
|
counts['download_queue'] = f"{self.download_queue.count_documents({'status': 'queued'})} queued videos for download"
|
||||||
|
|
||||||
return counts
|
return counts
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user