84 lines
3.0 KiB
Python
84 lines
3.0 KiB
Python
from __future__ import annotations
|
|
|
|
from email.message import Message
|
|
import re
|
|
from pathlib import Path
|
|
from urllib.parse import unquote, urlparse
|
|
|
|
import httpx
|
|
|
|
from app.config import settings
|
|
from app.storage.repository import Repository
|
|
from app.utils.validators import validate_url_host
|
|
|
|
|
|
SAFE_FILE_RE = re.compile(r"[^a-zA-Z0-9._+-]+")
|
|
|
|
|
|
def _sanitize_file_name(value: str, fallback: str = "package.deb") -> str:
|
|
name = SAFE_FILE_RE.sub("-", value).strip("-.")
|
|
return name or fallback
|
|
|
|
|
|
def _safe_url_file_name(url: str) -> str:
|
|
parsed = urlparse(url)
|
|
name = Path(unquote(parsed.path)).name or "package.deb"
|
|
if name == "download":
|
|
parts = [part for part in parsed.path.split("/") if part]
|
|
name = "-".join(parts[-3:]) if len(parts) >= 3 else name
|
|
return _sanitize_file_name(name)
|
|
|
|
|
|
def _content_disposition_file_name(value: str) -> str:
|
|
if not value:
|
|
return ""
|
|
|
|
message = Message()
|
|
message["content-disposition"] = value
|
|
return _sanitize_file_name(message.get_filename() or "", fallback="")
|
|
|
|
|
|
def _response_file_name(url: str, response: httpx.Response) -> str:
|
|
name = _content_disposition_file_name(response.headers.get("content-disposition", ""))
|
|
if not name:
|
|
name = _safe_url_file_name(str(response.url) or url)
|
|
|
|
content_type = response.headers.get("content-type", "").split(";", 1)[0].strip().lower()
|
|
if content_type == "application/vnd.debian.binary-package" and not name.lower().endswith(".deb"):
|
|
name = f"{name}.deb"
|
|
|
|
return name
|
|
|
|
|
|
class Downloader:
|
|
def __init__(self, repository: Repository, task_id: str) -> None:
|
|
self.repository = repository
|
|
self.task_id = task_id
|
|
|
|
def download(self, url: str) -> Path:
|
|
validate_url_host(url, settings.allowed_download_hosts)
|
|
settings.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
self.repository.add_log(self.task_id, "info", f"Downloading {url}")
|
|
with httpx.stream("GET", url, follow_redirects=True, timeout=120) as response:
|
|
response.raise_for_status()
|
|
self._validate_response(url, response)
|
|
destination = settings.cache_dir / _response_file_name(url, response)
|
|
with destination.open("wb") as handle:
|
|
for chunk in response.iter_bytes():
|
|
handle.write(chunk)
|
|
|
|
self.repository.add_log(self.task_id, "info", f"Downloaded to {destination}")
|
|
return destination
|
|
|
|
def _validate_response(self, requested_url: str, response: httpx.Response) -> None:
|
|
final_url = str(response.url)
|
|
validate_url_host(final_url, settings.allowed_download_hosts)
|
|
|
|
content_type = response.headers.get("content-type", "").split(";", 1)[0].strip().lower()
|
|
if content_type in {"text/html", "text/plain"}:
|
|
raise ValueError(
|
|
"download did not return a package file "
|
|
f"(requested {requested_url}, final {final_url}, content-type {content_type or 'unknown'})"
|
|
)
|