from __future__ import annotations from email.message import Message import re from pathlib import Path from urllib.parse import unquote, urlparse import httpx from app.config import settings from app.storage.repository import Repository from app.utils.validators import validate_url_host SAFE_FILE_RE = re.compile(r"[^a-zA-Z0-9._+-]+") def _sanitize_file_name(value: str, fallback: str = "package.deb") -> str: name = SAFE_FILE_RE.sub("-", value).strip("-.") return name or fallback def _safe_url_file_name(url: str) -> str: parsed = urlparse(url) name = Path(unquote(parsed.path)).name or "package.deb" if name == "download": parts = [part for part in parsed.path.split("/") if part] name = "-".join(parts[-3:]) if len(parts) >= 3 else name return _sanitize_file_name(name) def _content_disposition_file_name(value: str) -> str: if not value: return "" message = Message() message["content-disposition"] = value return _sanitize_file_name(message.get_filename() or "", fallback="") def _response_file_name(url: str, response: httpx.Response) -> str: name = _content_disposition_file_name(response.headers.get("content-disposition", "")) if not name: name = _safe_url_file_name(str(response.url) or url) content_type = response.headers.get("content-type", "").split(";", 1)[0].strip().lower() if content_type == "application/vnd.debian.binary-package" and not name.lower().endswith(".deb"): name = f"{name}.deb" return name class Downloader: def __init__(self, repository: Repository, task_id: str) -> None: self.repository = repository self.task_id = task_id def download(self, url: str) -> Path: validate_url_host(url, settings.allowed_download_hosts) settings.cache_dir.mkdir(parents=True, exist_ok=True) self.repository.add_log(self.task_id, "info", f"Downloading {url}") with httpx.stream("GET", url, follow_redirects=True, timeout=120) as response: response.raise_for_status() self._validate_response(url, response) destination = settings.cache_dir / _response_file_name(url, response) with destination.open("wb") as handle: for chunk in response.iter_bytes(): handle.write(chunk) self.repository.add_log(self.task_id, "info", f"Downloaded to {destination}") return destination def _validate_response(self, requested_url: str, response: httpx.Response) -> None: final_url = str(response.url) validate_url_host(final_url, settings.allowed_download_hosts) content_type = response.headers.get("content-type", "").split(";", 1)[0].strip().lower() if content_type in {"text/html", "text/plain"}: raise ValueError( "download did not return a package file " f"(requested {requested_url}, final {final_url}, content-type {content_type or 'unknown'})" )