Files
InstallerRobot/agent/app/core/downloader.py
2026-05-26 15:43:56 +07:00

84 lines
3.0 KiB
Python

from __future__ import annotations
from email.message import Message
import re
from pathlib import Path
from urllib.parse import unquote, urlparse
import httpx
from app.config import settings
from app.storage.repository import Repository
from app.utils.validators import validate_url_host
SAFE_FILE_RE = re.compile(r"[^a-zA-Z0-9._+-]+")
def _sanitize_file_name(value: str, fallback: str = "package.deb") -> str:
name = SAFE_FILE_RE.sub("-", value).strip("-.")
return name or fallback
def _safe_url_file_name(url: str) -> str:
parsed = urlparse(url)
name = Path(unquote(parsed.path)).name or "package.deb"
if name == "download":
parts = [part for part in parsed.path.split("/") if part]
name = "-".join(parts[-3:]) if len(parts) >= 3 else name
return _sanitize_file_name(name)
def _content_disposition_file_name(value: str) -> str:
if not value:
return ""
message = Message()
message["content-disposition"] = value
return _sanitize_file_name(message.get_filename() or "", fallback="")
def _response_file_name(url: str, response: httpx.Response) -> str:
name = _content_disposition_file_name(response.headers.get("content-disposition", ""))
if not name:
name = _safe_url_file_name(str(response.url) or url)
content_type = response.headers.get("content-type", "").split(";", 1)[0].strip().lower()
if content_type == "application/vnd.debian.binary-package" and not name.lower().endswith(".deb"):
name = f"{name}.deb"
return name
class Downloader:
def __init__(self, repository: Repository, task_id: str) -> None:
self.repository = repository
self.task_id = task_id
def download(self, url: str) -> Path:
validate_url_host(url, settings.allowed_download_hosts)
settings.cache_dir.mkdir(parents=True, exist_ok=True)
self.repository.add_log(self.task_id, "info", f"Downloading {url}")
with httpx.stream("GET", url, follow_redirects=True, timeout=120) as response:
response.raise_for_status()
self._validate_response(url, response)
destination = settings.cache_dir / _response_file_name(url, response)
with destination.open("wb") as handle:
for chunk in response.iter_bytes():
handle.write(chunk)
self.repository.add_log(self.task_id, "info", f"Downloaded to {destination}")
return destination
def _validate_response(self, requested_url: str, response: httpx.Response) -> None:
final_url = str(response.url)
validate_url_host(final_url, settings.allowed_download_hosts)
content_type = response.headers.get("content-type", "").split(";", 1)[0].strip().lower()
if content_type in {"text/html", "text/plain"}:
raise ValueError(
"download did not return a package file "
f"(requested {requested_url}, final {final_url}, content-type {content_type or 'unknown'})"
)