From a0e07d31616102ac905c0519474d2c01db7ee392 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 3 Sep 2014 12:41:05 +0200 Subject: [PATCH] [youtube] Move cache into its own module --- test/test_cache.py | 59 +++++++++++++++++++++ youtube_dl/YoutubeDL.py | 4 +- youtube_dl/__init__.py | 21 ++------ youtube_dl/cache.py | 93 +++++++++++++++++++++++++++++++++ youtube_dl/extractor/youtube.py | 46 +++------------- youtube_dl/utils.py | 6 --- 6 files changed, 166 insertions(+), 63 deletions(-) create mode 100644 test/test_cache.py create mode 100644 youtube_dl/cache.py diff --git a/test/test_cache.py b/test/test_cache.py new file mode 100644 index 0000000000..dbefad109e --- /dev/null +++ b/test/test_cache.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python +# coding: utf-8 + +from __future__ import unicode_literals + +import shutil + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +from test.helper import FakeYDL +from youtube_dl.cache import Cache + + +def _is_empty(d): + return not bool(os.listdir(d)) + + +def _mkdir(d): + if not os.path.exists(d): + os.mkdir(d) + + +class TestCache(unittest.TestCase): + def setUp(self): + TEST_DIR = os.path.dirname(os.path.abspath(__file__)) + TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata') + _mkdir(TESTDATA_DIR) + self.test_dir = os.path.join(TESTDATA_DIR, 'cache_test') + self.tearDown() + + def tearDown(self): + if os.path.exists(self.test_dir): + shutil.rmtree(self.test_dir) + + def test_cache(self): + ydl = FakeYDL({ + 'cachedir': self.test_dir, + }) + c = Cache(ydl) + obj = {'x': 1, 'y': ['รค', '\\a', True]} + self.assertEqual(c.load('test_cache', 'k'), None) + c.store('test_cache', 'k', obj) + self.assertEqual(c.load('test_cache', 'k2'), None) + self.assertFalse(_is_empty(self.test_dir)) + self.assertEqual(c.load('test_cache', 'k'), obj) + self.assertEqual(c.load('test_cache', 'y'), None) + self.assertEqual(c.load('test_cache2', 'k'), None) + c.remove() + self.assertFalse(os.path.exists(self.test_dir)) + self.assertEqual(c.load('test_cache', 'k'), None) + + +if __name__ == '__main__': + unittest.main() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 98639e004c..553bf559b3 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -57,6 +57,7 @@ from .utils import ( YoutubeDLHandler, prepend_extension, ) +from .cache import Cache from .extractor import get_info_extractor, gen_extractors from .downloader import get_suitable_downloader from .postprocessor import FFmpegMergerPP @@ -133,7 +134,7 @@ class YoutubeDL(object): daterange: A DateRange object, download only if the upload_date is in the range. skip_download: Skip the actual download of the video file cachedir: Location of the cache files in the filesystem. - None to disable filesystem cache. + False to disable filesystem cache. noplaylist: Download single video instead of a playlist if in doubt. age_limit: An integer representing the user's age in years. Unsuitable videos for the given age are skipped. @@ -195,6 +196,7 @@ class YoutubeDL(object): self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] self._err_file = sys.stderr self.params = params + self.cache = Cache(self) if params.get('bidi_workaround', False): try: diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index bf616e3b60..6b5187fb61 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -84,7 +84,6 @@ import optparse import os import random import shlex -import shutil import sys @@ -96,7 +95,6 @@ from .utils import ( decodeOption, get_term_width, DownloadError, - get_cachedir, MaxDownloadsReached, preferredencoding, read_batch_urls, @@ -518,10 +516,10 @@ def parseOpts(overrideArguments=None): filesystem.add_option('--cookies', dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in') filesystem.add_option( - '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR', + '--cache-dir', dest='cachedir', default=None, metavar='DIR', help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.') filesystem.add_option( - '--no-cache-dir', action='store_const', const=None, dest='cachedir', + '--no-cache-dir', action='store_const', const=False, dest='cachedir', help='Disable filesystem caching') filesystem.add_option( '--rm-cache-dir', action='store_true', dest='rm_cachedir', @@ -872,20 +870,7 @@ def _real_main(argv=None): # Remove cache dir if opts.rm_cachedir: - if opts.cachedir is None: - ydl.to_screen(u'No cache dir specified (Did you combine --no-cache-dir and --rm-cache-dir?)') - else: - if ('.cache' not in opts.cachedir) or ('youtube-dl' not in opts.cachedir): - ydl.to_screen(u'Not removing directory %s - this does not look like a cache dir' % opts.cachedir) - retcode = 141 - else: - ydl.to_screen( - u'Removing cache dir %s .' % opts.cachedir, - skip_eol=True) - if os.path.exists(opts.cachedir): - ydl.to_screen(u'.', skip_eol=True) - shutil.rmtree(opts.cachedir) - ydl.to_screen(u'.') + ydl.cache.remove() # Maybe do nothing if (len(all_urls) < 1) and (opts.load_info_filename is None): diff --git a/youtube_dl/cache.py b/youtube_dl/cache.py new file mode 100644 index 0000000000..6cae53d221 --- /dev/null +++ b/youtube_dl/cache.py @@ -0,0 +1,93 @@ +from __future__ import unicode_literals + +import errno +import io +import json +import os +import re +import shutil +import traceback + +from .utils import ( + write_json_file, +) + + +class Cache(object): + def __init__(self, ydl): + self._ydl = ydl + + def _get_root_dir(self): + res = self._ydl.params.get('cachedir') + if res is None: + cache_root = os.environ.get('XDG_CACHE_HOME', '~/.cache') + res = os.path.join(cache_root, 'youtube-dl') + return os.path.expanduser(res) + + def _get_cache_fn(self, section, key, dtype): + assert re.match(r'^[a-zA-Z0-9_-]+$', section) + assert re.match(r'^[a-zA-Z0-9_-]+$', key) + return os.path.join( + self._get_root_dir(), section, '%s.%s' % (key, dtype)) + + @property + def enabled(self): + return self._ydl.params.get('cachedir') is not False + + def store(self, section, key, data, dtype='json'): + assert dtype in ('json',) + + if not self.enabled: + return + + fn = self._get_cache_fn(section, key, dtype) + try: + try: + os.makedirs(os.path.dirname(fn)) + except OSError as ose: + if ose.errno != errno.EEXIST: + raise + write_json_file(data, fn) + except Exception: + tb = traceback.format_exc() + self._ydl.report_warning( + 'Writing cache to %r failed: %s' % (fn, tb)) + + def load(self, section, key, dtype='json', default=None): + assert dtype in ('json',) + + if not self.enabled: + return default + + cache_fn = self._get_cache_fn(section, key, dtype) + try: + try: + with io.open(cache_fn, 'r', encoding='utf-8') as cachef: + return json.load(cachef) + except ValueError: + try: + file_size = os.path.getsize(cache_fn) + except (OSError, IOError) as oe: + file_size = str(oe) + self._ydl.report_warning( + 'Cache retrieval from %s failed (%s)' % (cache_fn, file_size)) + except IOError: + pass # No cache available + + return default + + def remove(self): + if not self.enabled: + self._ydl.to_screen('Cache is disabled (Did you combine --no-cache-dir and --rm-cache-dir?)') + return + + cachedir = self._get_root_dir() + if not any((term in cachedir) for term in ('cache', 'tmp')): + raise Exception('Not removing directory %s - this does not look like a cache dir' % cachedir) + + self._ydl.to_screen( + 'Removing cache dir %s .' % cachedir, skip_eol=True) + if os.path.exists(cachedir): + self._ydl.to_screen('.', skip_eol=True) + shutil.rmtree(cachedir) + self._ydl.to_screen('.') diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 08a04737c3..70f670682e 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1,7 +1,5 @@ # coding: utf-8 -import errno -import io import itertools import json import os.path @@ -21,7 +19,6 @@ from ..utils import ( compat_str, clean_html, - get_cachedir, get_element_by_id, get_element_by_attribute, ExtractorError, @@ -30,7 +27,6 @@ from ..utils import ( unescapeHTML, unified_strdate, orderedSet, - write_json_file, uppercase_escape, ) @@ -435,26 +431,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): func_id = '%s_%s_%s' % ( player_type, player_id, self._signature_cache_id(example_sig)) assert os.path.basename(func_id) == func_id - cache_dir = get_cachedir(self._downloader.params) - cache_enabled = cache_dir is not None - if cache_enabled: - cache_fn = os.path.join(os.path.expanduser(cache_dir), - u'youtube-sigfuncs', - func_id + '.json') - try: - with io.open(cache_fn, 'r', encoding='utf-8') as cachef: - cache_spec = json.load(cachef) - return lambda s: u''.join(s[i] for i in cache_spec) - except IOError: - pass # No cache available - except ValueError: - try: - file_size = os.path.getsize(cache_fn) - except (OSError, IOError) as oe: - file_size = str(oe) - self._downloader.report_warning( - u'Cache %s failed (%s)' % (cache_fn, file_size)) + cache_spec = self._downloader.cache.load(u'youtube-sigfuncs', func_id) + if cache_spec is not None: + return lambda s: u''.join(s[i] for i in cache_spec) if player_type == 'js': code = self._download_webpage( @@ -472,22 +452,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): else: assert False, 'Invalid player type %r' % player_type - if cache_enabled: - try: - test_string = u''.join(map(compat_chr, range(len(example_sig)))) - cache_res = res(test_string) - cache_spec = [ord(c) for c in cache_res] - try: - os.makedirs(os.path.dirname(cache_fn)) - except OSError as ose: - if ose.errno != errno.EEXIST: - raise - write_json_file(cache_spec, cache_fn) - except Exception: - tb = traceback.format_exc() - self._downloader.report_warning( - u'Writing cache to %r failed: %s' % (cache_fn, tb)) + if cache_spec is None: + test_string = u''.join(map(compat_chr, range(len(example_sig)))) + cache_res = res(test_string) + cache_spec = [ord(c) for c in cache_res] + self._downloader.cache.store(u'youtube-sigfuncs', func_id, cache_spec) return res def _print_sig_code(self, func, example_sig): diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 3846dfdcad..0bc410e91b 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1076,12 +1076,6 @@ def intlist_to_bytes(xs): return bytes(xs) -def get_cachedir(params={}): - cache_root = os.environ.get('XDG_CACHE_HOME', - os.path.expanduser('~/.cache')) - return params.get('cachedir', os.path.join(cache_root, 'youtube-dl')) - - # Cross-platform file locking if sys.platform == 'win32': import ctypes.wintypes