import logging import mimetypes import os import pathlib from typing import Callable, Iterable, Optional, Tuple from pip._internal.models.candidate import InstallationCandidate from pip._internal.models.link import Link from pip._internal.utils.urls import path_to_url, url_to_path from pip._internal.vcs import is_url logger = logging.getLogger(__name__) FoundCandidates = Iterable[InstallationCandidate] FoundLinks = Iterable[Link] CandidatesFromPage = Callable[[Link], Iterable[InstallationCandidate]] PageValidator = Callable[[Link], bool] class LinkSource: @property def link(self) -> Optional[Link]: """Returns the underlying link, if there's one.""" raise NotImplementedError() def page_candidates(self) -> FoundCandidates: """Candidates found by parsing an archive listing HTML file.""" raise NotImplementedError() def file_links(self) -> FoundLinks: """Links found by specifying archives directly.""" raise NotImplementedError() def _is_html_file(file_url: str) -> bool: return mimetypes.guess_type(file_url, strict=False)[0] == "text/html" class _FlatDirectorySource(LinkSource): """Link source specified by ``--find-links=``. This looks the content of the directory, and returns: * ``page_candidates``: Links listed on each HTML file in the directory. * ``file_candidates``: Archives in the directory. """ def __init__( self, candidates_from_page: CandidatesFromPage, path: str, ) -> None: self._candidates_from_page = candidates_from_page self._path = pathlib.Path(os.path.realpath(path)) @property def link(self) -> Optional[Link]: return None def page_candidates(self) -> FoundCandidates: for path in self._path.iterdir(): url = path_to_url(str(path)) if not _is_html_file(url): continue yield from self._candidates_from_page(Link(url)) def file_links(self) -> FoundLinks: for path in self._path.iterdir(): url = path_to_url(str(path)) if _is_html_file(url): continue yield Link(url) class _LocalFileSource(LinkSource): """``--find-links=`` or ``--[extra-]index-url=``. If a URL is supplied, it must be a ``file:`` URL. If a path is supplied to the option, it is converted to a URL first. This returns: * ``page_candidates``: Links listed on an HTML file. * ``file_candidates``: The non-HTML file. """ def __init__( self, candidates_from_page: CandidatesFromPage, link: Link, ) -> None: self._candidates_from_page = candidates_from_page self._link = link @property def link(self) -> Optional[Link]: return self._link def page_candidates(self) -> FoundCandidates: if not _is_html_file(self._link.url): return yield from self._candidates_from_page(self._link) def file_links(self) -> FoundLinks: if _is_html_file(self._link.url): return yield self._link class _RemoteFileSource(LinkSource): """``--find-links=`` or ``--[extra-]index-url=``. This returns: * ``page_candidates``: Links listed on an HTML file. * ``file_candidates``: The non-HTML file. """ def __init__( self, candidates_from_page: CandidatesFromPage, page_validator: PageValidator, link: Link, ) -> None: self._candidates_from_page = candidates_from_page self._page_validator = page_validator self._link = link @property def link(self) -> Optional[Link]: return self._link def page_candidates(self) -> FoundCandidates: if not self._page_validator(self._link): return yield from self._candidates_from_page(self._link) def file_links(self) -> FoundLinks: yield self._link class _IndexDirectorySource(LinkSource): """``--[extra-]index-url=``. This is treated like a remote URL; ``candidates_from_page`` contains logic for this by appending ``index.html`` to the link. """ def __init__( self, candidates_from_page: CandidatesFromPage, link: Link, ) -> None: self._candidates_from_page = candidates_from_page self._link = link @property def link(self) -> Optional[Link]: return self._link def page_candidates(self) -> FoundCandidates: yield from self._candidates_from_page(self._link) def file_links(self) -> FoundLinks: return () def build_source( location: str, *, candidates_from_page: CandidatesFromPage, page_validator: PageValidator, expand_dir: bool, cache_link_parsing: bool, ) -> Tuple[Optional[str], Optional[LinkSource]]: path: Optional[str] = None url: Optional[str] = None if os.path.exists(location): # Is a local path. url = path_to_url(location) path = location elif location.startswith("file:"): # A file: URL. url = location path = url_to_path(location) elif is_url(location): url = location if url is None: msg = ( "Location '%s' is ignored: " "it is either a non-existing path or lacks a specific scheme." ) logger.warning(msg, location) return (None, None) if path is None: source: LinkSource = _RemoteFileSource( candidates_from_page=candidates_from_page, page_validator=page_validator, link=Link(url, cache_link_parsing=cache_link_parsing), ) return (url, source) if os.path.isdir(path): if expand_dir: source = _FlatDirectorySource( candidates_from_page=candidates_from_page, path=path, ) else: source = _IndexDirectorySource( candidates_from_page=candidates_from_page, link=Link(url, cache_link_parsing=cache_link_parsing), ) return (url, source) elif os.path.isfile(path): source = _LocalFileSource( candidates_from_page=candidates_from_page, link=Link(url, cache_link_parsing=cache_link_parsing), ) return (url, source) logger.warning( "Location '%s' is ignored: it is neither a file nor a directory.", location, ) return (url, None)