"""Open-source reCAPTCHA v2 audio-challenge solver (X13, Tier 1).

Pure open-source, zero-API-cost: switch the reCAPTCHA widget to its **audio**
challenge, download the mp3, transcribe it with a **local Whisper** model
(``faster-whisper``), and submit the transcript. This is the well-known
"Buster"-style technique. It is intentionally a *best-effort* solver —
reCAPTCHA actively fights audio solving, so a non-trivial failure rate is
expected and handled by the Tier-2 human fallback (INV-CF3), never hidden.

Model is loaded lazily and cached; ``WHISPER_MODEL`` (default ``small``) and
``WHISPER_DEVICE`` (default ``cpu``) tune it. The dependency is optional — if
``faster-whisper`` isn't installed, ``transcribe_audio`` raises a clear error
so the caller falls back to a human solve rather than crashing the service.
"""

from __future__ import annotations

import logging
import os
import tempfile

import httpx

logger = logging.getLogger(__name__)

_WHISPER_MODEL_NAME = os.environ.get("WHISPER_MODEL", "small")
_WHISPER_DEVICE = os.environ.get("WHISPER_DEVICE", "cpu")
_model = None


class AudioSolveUnavailable(RuntimeError):
    """faster-whisper isn't installed — cannot solve audio locally."""


def _get_model():
    global _model
    if _model is not None:
        return _model
    try:
        from faster_whisper import WhisperModel  # type: ignore
    except ImportError as e:
        raise AudioSolveUnavailable(
            "faster-whisper אינו מותקן — לא ניתן לפתור reCAPTCHA אודיו מקומית. "
            "התקן `pip install faster-whisper` או הסתמך על fallback אנושי (VNC)."
        ) from e
    logger.info("loading whisper model %s on %s", _WHISPER_MODEL_NAME, _WHISPER_DEVICE)
    _model = WhisperModel(
        _WHISPER_MODEL_NAME, device=_WHISPER_DEVICE, compute_type="int8"
    )
    return _model


async def download_audio(audio_url: str) -> bytes:
    async with httpx.AsyncClient(timeout=30, follow_redirects=True) as c:
        r = await c.get(audio_url)
        r.raise_for_status()
        return r.content


def transcribe_audio(mp3_bytes: bytes) -> str:
    """Transcribe a reCAPTCHA audio clip to its (English) digit/word phrase.

    Raises ``AudioSolveUnavailable`` if the local model isn't installed.
    """
    model = _get_model()
    with tempfile.NamedTemporaryFile(suffix=".mp3", delete=True) as f:
        f.write(mp3_bytes)
        f.flush()
        # reCAPTCHA audio is English regardless of page locale.
        segments, _info = model.transcribe(f.name, language="en")
        text = " ".join(seg.text for seg in segments).strip()
    # Normalise: reCAPTCHA expects the bare phrase, lower-case, no punctuation.
    cleaned = "".join(ch for ch in text.lower() if ch.isalnum() or ch.isspace())
    return " ".join(cleaned.split())


async def solve_from_audio_url(audio_url: str) -> str:
    """Convenience: download + transcribe an audio-challenge URL."""
    mp3 = await download_audio(audio_url)
    return transcribe_audio(mp3)