qobuz-dl/qobuz_dl/utils.py

191 lines
6.4 KiB
Python

import re
import string
import os
import logging
import time
from mutagen.mp3 import EasyMP3
from mutagen.flac import FLAC
logger = logging.getLogger(__name__)
EXTENSIONS = (".mp3", ".flac")
class PartialFormatter(string.Formatter):
def __init__(self, missing="n/a", bad_fmt="n/a"):
self.missing, self.bad_fmt = missing, bad_fmt
def get_field(self, field_name, args, kwargs):
try:
val = super(PartialFormatter, self).get_field(field_name, args, kwargs)
except (KeyError, AttributeError):
val = None, field_name
return val
def format_field(self, value, spec):
if not value:
return self.missing
try:
return super(PartialFormatter, self).format_field(value, spec)
except ValueError:
if self.bad_fmt:
return self.bad_fmt
raise
def make_m3u(pl_directory):
track_list = ["#EXTM3U"]
rel_folder = os.path.basename(os.path.normpath(pl_directory))
pl_name = rel_folder + ".m3u"
for local, dirs, files in os.walk(pl_directory):
dirs.sort()
audio_rel_files = [
os.path.join(os.path.basename(os.path.normpath(local)), file_)
for file_ in files
if os.path.splitext(file_)[-1] in EXTENSIONS
]
audio_files = [
os.path.abspath(os.path.join(local, file_))
for file_ in files
if os.path.splitext(file_)[-1] in EXTENSIONS
]
if not audio_files or len(audio_files) != len(audio_rel_files):
continue
for audio_rel_file, audio_file in zip(audio_rel_files, audio_files):
try:
pl_item = (
EasyMP3(audio_file) if ".mp3" in audio_file else FLAC(audio_file)
)
title = pl_item["TITLE"][0]
artist = pl_item["ARTIST"][0]
length = int(pl_item.info.length)
index = "#EXTINF:{}, {} - {}\n{}".format(
length, artist, title, audio_rel_file
)
except: # noqa
continue
track_list.append(index)
if len(track_list) > 1:
with open(os.path.join(pl_directory, pl_name), "w") as pl:
pl.write("\n\n".join(track_list))
def smart_discography_filter(
contents: list, save_space: bool = False, skip_extras: bool = False
) -> list:
"""When downloading some artists' discography, many random and spam-like
albums can get downloaded. This helps filter those out to just get the good stuff.
This function removes:
* albums by other artists, which may contain a feature from the requested artist
* duplicate albums in different qualities
* (optionally) removes collector's, deluxe, live albums
:param list contents: contents returned by qobuz API
:param bool save_space: choose highest bit depth, lowest sampling rate
:param bool remove_extras: remove albums with extra material (i.e. live, deluxe,...)
:returns: filtered items list
"""
# for debugging
def print_album(album: dict) -> None:
logger.debug(
f"{album['title']} - {album.get('version', '~~')} "
"({album['maximum_bit_depth']}/{album['maximum_sampling_rate']}"
" by {album['artist']['name']}) {album['id']}"
)
TYPE_REGEXES = {
"remaster": r"(?i)(re)?master(ed)?",
"extra": r"(?i)(anniversary|deluxe|live|collector|demo|expanded)",
}
def is_type(album_t: str, album: dict) -> bool:
"""Check if album is of type `album_t`"""
version = album.get("version", "")
title = album.get("title", "")
regex = TYPE_REGEXES[album_t]
return re.search(regex, f"{title} {version}") is not None
def essence(album: dict) -> str:
"""Ignore text in parens/brackets, return all lowercase.
Used to group two albums that may be named similarly, but not exactly
the same.
"""
r = re.match(r"([^\(]+)(?:\s*[\(\[][^\)][\)\]])*", album)
return r.group(1).strip().lower()
requested_artist = contents[0]["name"]
items = [item["albums"]["items"] for item in contents][0]
# use dicts to group duplicate albums together by title
title_grouped = dict()
for item in items:
title_ = essence(item["title"])
if title_ not in title_grouped: # ?
# if (t := essence(item["title"])) not in title_grouped:
title_grouped[title_] = []
title_grouped[title_].append(item)
items = []
for albums in title_grouped.values():
best_bit_depth = max(a["maximum_bit_depth"] for a in albums)
get_best = min if save_space else max
best_sampling_rate = get_best(
a["maximum_sampling_rate"]
for a in albums
if a["maximum_bit_depth"] == best_bit_depth
)
remaster_exists = any(is_type("remaster", a) for a in albums)
def is_valid(album: dict) -> bool:
return (
album["maximum_bit_depth"] == best_bit_depth
and album["maximum_sampling_rate"] == best_sampling_rate
and album["artist"]["name"] == requested_artist
and not ( # states that are not allowed
(remaster_exists and not is_type("remaster", album))
or (skip_extras and is_type("extra", album))
)
)
filtered = tuple(filter(is_valid, albums))
# most of the time, len is 0 or 1.
# if greater, it is a complete duplicate,
# so it doesn't matter which is chosen
if len(filtered) >= 1:
items.append(filtered[0])
return items
def format_duration(duration):
return time.strftime("%H:%M:%S", time.gmtime(duration))
def create_and_return_dir(directory):
fix = os.path.normpath(directory)
os.makedirs(fix, exist_ok=True)
return fix
def get_url_info(url):
"""Returns the type of the url and the id.
Compatible with urls of the form:
https://www.qobuz.com/us-en/{type}/{name}/{id}
https://open.qobuz.com/{type}/{id}
https://play.qobuz.com/{type}/{id}
/us-en/{type}/-/{id}
"""
r = re.search(
r"(?:https:\/\/(?:w{3}|open|play)\.qobuz\.com)?(?:\/[a-z]{2}-[a-z]{2})"
r"?\/(album|artist|track|playlist|label)(?:\/[-\w\d]+)?\/([\w\d]+)",
url,
)
return r.groups()