Skip to content

YouTube Tools

enhancedtoolkits.youtube.YouTubeTools

YouTubeTools(rate_limit_delay: float = 0.5, timeout: int = 30, max_retries: int = 3, add_instructions: bool = True, **kwargs)

Bases: StrictToolkit

YouTube metadata + transcript tools.

  • Metadata uses YouTube oEmbed.
  • Transcripts use youtube-transcript-api.

All tools return JSON strings.

Parameters:

Name Type Description Default
rate_limit_delay float

Delay between outbound requests (seconds).

0.5
timeout int

HTTP request timeout (seconds).

30
max_retries int

Retry attempts for transcript fetches.

3
add_instructions bool

Whether to attach LLM usage instructions.

True
Source code in src/enhancedtoolkits/youtube.py
def __init__(
    self,
    rate_limit_delay: float = 0.5,
    timeout: int = 30,
    max_retries: int = 3,
    add_instructions: bool = True,
    **kwargs,
):
    """Initialize the toolkit.

    Args:
        rate_limit_delay: Delay between outbound requests (seconds).
        timeout: HTTP request timeout (seconds).
        max_retries: Retry attempts for transcript fetches.
        add_instructions: Whether to attach LLM usage instructions.
    """

    self.rate_limit_delay = float(max(0.1, min(5.0, rate_limit_delay)))
    self.timeout = int(max(5, min(120, timeout)))
    self.max_retries = int(max(1, min(10, max_retries)))
    self._last_request_time = 0.0

    instructions = (
        self.get_llm_usage_instructions() if add_instructions else ""
    )

    super().__init__(
        name="enhanced_youtube_tools",
        instructions=instructions,
        add_instructions=add_instructions,
        **kwargs,
    )

    # Register methods
    self.register(self.fetch_youtube_video_metadata)
    self.register(self.fetch_youtube_video_transcript)
    self.register(self.extract_youtube_video_id)
    self.register(self.fetch_comprehensive_youtube_video_info)

    self.register(self.fetch_available_youtube_transcripts)
    self.register(self.fetch_youtube_transcript_languages)

    log_info(
        "Enhanced YouTube Tools initialized - "
        f"Rate Limit: {self.rate_limit_delay}s, "
        f"Timeout: {self.timeout}s"
    )

Functions

fetch_youtube_video_metadata

fetch_youtube_video_metadata(video_url: str) -> str

Fetch basic metadata for a YouTube video via oEmbed.

Source code in src/enhancedtoolkits/youtube.py
def fetch_youtube_video_metadata(self, video_url: str) -> str:
    """Fetch basic metadata for a YouTube video via oEmbed."""
    try:
        video_id = self._extract_video_id(video_url)
        self._apply_rate_limit()

        oembed = self._fetch_oembed_data(video_id)
        metadata = self._enhance_oembed_metadata(
            oembed, video_id, video_url
        )

        return self._format_json_response(metadata)
    except (YouTubeValidationError, YouTubeDataError):
        raise
    except Exception as e:  # pylint: disable=broad-exception-caught
        log_error(
            f"Unexpected error getting metadata for {video_url}: {e}"
        )
        raise YouTubeDataError(f"Failed to get video metadata: {e}") from e

fetch_youtube_video_transcript

fetch_youtube_video_transcript(video_url: str, language: str = 'en', auto_generated: bool = True) -> str

Fetch a transcript (if available) with optional language preference.

Source code in src/enhancedtoolkits/youtube.py
def fetch_youtube_video_transcript(
    self, video_url: str, language: str = "en", auto_generated: bool = True
) -> str:
    """Fetch a transcript (if available) with optional language preference."""
    try:
        video_id = self._extract_video_id(video_url)

        self._apply_rate_limit()
        transcript = self._fetch_transcript_with_retry(
            video_id=video_id,
            language=(language or "en").strip().lower(),
            auto_generated=bool(auto_generated),
        )

        return self._format_json_response(transcript)
    except (YouTubeValidationError, YouTubeDataError):
        raise
    except Exception as e:  # pylint: disable=broad-exception-caught
        log_error(
            f"Unexpected error getting transcript for {video_url}: {e}"
        )
        raise YouTubeDataError(
            f"Failed to get video transcript: {e}"
        ) from e

fetch_available_youtube_transcripts

fetch_available_youtube_transcripts(video_url: str) -> str

List available transcript tracks for a video.

Source code in src/enhancedtoolkits/youtube.py
def fetch_available_youtube_transcripts(self, video_url: str) -> str:
    """List available transcript tracks for a video."""
    try:
        video_id = self._extract_video_id(video_url)
        self._apply_rate_limit()

        ytt = YouTubeTranscriptApi()

        result: dict[str, Any] = {
            "video_id": video_id,
            "manual_transcripts": [],
            "auto_generated_transcripts": [],
            "translatable_transcripts": [],
            "timestamp": datetime.now().isoformat(),
        }

        try:
            transcript_list = ytt.list(video_id)
        except (
            TranscriptsDisabled,
            NoTranscriptFound,
            VideoUnavailable,
        ) as e:
            result["message"] = f"No transcripts available: {e}"
            if isinstance(e, TranscriptsDisabled):
                result["note"] = (
                    "TranscriptsDisabled can be a false-positive on some cloud "
                    "environments (IP-based blocking/challenges). If it works locally, "
                    "try a different egress IP or use the library's ProxyConfig."
                )
            return self._format_json_response(result)

        for transcript in transcript_list:
            is_translatable = bool(
                getattr(transcript, "is_translatable", False)
            )
            info = {
                "language": transcript.language,
                "language_code": transcript.language_code,
                "is_generated": transcript.is_generated,
                "is_translatable": is_translatable,
            }

            if transcript.is_generated:
                result["auto_generated_transcripts"].append(info)
            else:
                result["manual_transcripts"].append(info)

            if is_translatable:
                result["translatable_transcripts"].append(info)

        return self._format_json_response(result)

    except (YouTubeValidationError, YouTubeDataError):
        raise
    except Exception as e:  # pylint: disable=broad-exception-caught
        log_error(
            f"Unexpected error getting available transcripts for {video_url}: {e}"
        )
        raise YouTubeDataError(
            f"Failed to get available transcripts: {e}"
        ) from e

fetch_youtube_transcript_languages

fetch_youtube_transcript_languages(video_url: str) -> str

Return a simplified list of available transcript language codes.

Source code in src/enhancedtoolkits/youtube.py
def fetch_youtube_transcript_languages(self, video_url: str) -> str:
    """Return a simplified list of available transcript language codes."""
    try:
        transcripts = json.loads(
            self.fetch_available_youtube_transcripts(video_url)
        )

        language_codes: set[str] = set()
        for key in ("manual_transcripts", "auto_generated_transcripts"):
            for item in transcripts.get(key, []) or []:
                code = item.get("language_code")
                if code:
                    language_codes.add(code)

        result: dict[str, Any] = {
            "video_id": transcripts.get("video_id"),
            "available_languages": sorted(language_codes),
            "common_languages_available": [
                lang
                for lang in self.COMMON_LANGUAGES
                if lang in language_codes
            ],
            "timestamp": datetime.now().isoformat(),
        }

        # Preserve capability errors (e.g., missing list_transcripts).
        if isinstance(transcripts, dict) and transcripts.get("error"):
            result["error"] = transcripts.get("error")

        return self._format_json_response(result)
    except (YouTubeValidationError, YouTubeDataError):
        raise
    except Exception as e:  # pylint: disable=broad-exception-caught
        log_error(
            f"Unexpected error getting transcript languages for {video_url}: {e}"
        )
        raise YouTubeDataError(
            f"Failed to get transcript languages: {e}"
        ) from e

extract_youtube_video_id

extract_youtube_video_id(video_url: str) -> str

Extract a YouTube video id from a URL (or accept a raw id).

Source code in src/enhancedtoolkits/youtube.py
def extract_youtube_video_id(self, video_url: str) -> str:
    """Extract a YouTube video id from a URL (or accept a raw id)."""
    try:
        video_id = self._extract_video_id(video_url)
        return self._format_json_response(
            {
                "video_id": video_id,
                "timestamp": datetime.now().isoformat(),
            }
        )
    except (YouTubeValidationError, YouTubeDataError):
        raise
    except Exception as e:  # pylint: disable=broad-exception-caught
        log_error(
            f"Unexpected error extracting video id for {video_url}: {e}"
        )
        raise YouTubeDataError(f"Failed to extract video id: {e}") from e

fetch_comprehensive_youtube_video_info

fetch_comprehensive_youtube_video_info(video_url: str, include_transcript: bool = False) -> str

Combine oEmbed metadata and optional transcript info into one response.

Source code in src/enhancedtoolkits/youtube.py
def fetch_comprehensive_youtube_video_info(
    self, video_url: str, include_transcript: bool = False
) -> str:
    """Combine oEmbed metadata and optional transcript info into one response."""
    try:
        video_id = self._extract_video_id(video_url)

        metadata = json.loads(self.fetch_youtube_video_metadata(video_url))

        result: dict[str, Any] = {
            "video_id": video_id,
            "video_url": video_url,
            "metadata": metadata,
            "timestamp": datetime.now().isoformat(),
        }

        if include_transcript:
            # languages + best-effort transcript
            try:
                languages = json.loads(
                    self.fetch_youtube_transcript_languages(video_url)
                )
                result["transcript_info"] = {
                    "available_languages": languages.get(
                        "available_languages", []
                    ),
                    "common_languages_available": languages.get(
                        "common_languages_available", []
                    ),
                }

                # Fetch English if available, else first available language.
                preferred: Optional[str] = None
                available = languages.get("available_languages", []) or []
                if "en" in available:
                    preferred = "en"
                elif available:
                    preferred = available[0]

                if preferred:
                    result["transcript"] = json.loads(
                        self.fetch_youtube_video_transcript(
                            video_url, preferred
                        )
                    )
            except (
                Exception
            ) as e:  # pylint: disable=broad-exception-caught
                log_warning(f"Could not add transcript data: {e}")
                result["transcript_info"] = {"error": str(e)}

        return self._format_json_response(result)

    except (YouTubeValidationError, YouTubeDataError):
        raise
    except Exception as e:  # pylint: disable=broad-exception-caught
        log_error(
            f"Unexpected error getting video info for {video_url}: {e}"
        )
        raise YouTubeDataError(
            f"Failed to get comprehensive video info: {e}"
        ) from e

get_llm_usage_instructions staticmethod

get_llm_usage_instructions() -> str

Return short, text-first usage instructions for the YouTube tools.

Source code in src/enhancedtoolkits/youtube.py
    @staticmethod
    def get_llm_usage_instructions() -> str:
        """Return short, text-first usage instructions for the YouTube tools."""
        return """
<youtube_tools>
YouTube metadata (oEmbed) + transcripts (youtube-transcript-api)

GOAL
- YouTube metadata (oEmbed) + transcripts (youtube-transcript-api). All tools return JSON strings.

CORE TOOLS
- fetch_youtube_video_metadata(video_url)
- extract_youtube_video_id(video_url)
- fetch_comprehensive_youtube_video_info(video_url, include_transcript=False)

TRANSCRIPT TOOLS
- fetch_available_youtube_transcripts(video_url)
- fetch_youtube_transcript_languages(video_url)
- fetch_youtube_video_transcript(video_url, language='en', auto_generated=True)

LIMITATIONS
- oEmbed does NOT provide: views, likes, upload date, duration.

CONTEXT-SIZE RULES (IMPORTANT)
- Transcripts can be very large; fetch only when needed.
- Do not dump full transcripts into the final answer; summarize + quote short excerpts.

NOTES
- If transcripts work locally but fail on servers with TranscriptsDisabled, your egress IP may be blocked/challenged.
</youtube_tools>
"""