Skip to content

YouTube Tools

enhancedtoolkits.youtube.EnhancedYouTubeTools

EnhancedYouTubeTools(rate_limit_delay: float = 0.5, timeout: int = 30, max_retries: int = 3, **kwargs)

Bases: StrictToolkit

Enhanced YouTube Tools v2.0

A production-ready YouTube toolkit with comprehensive error handling, input validation, transcript support, and enhanced metadata extraction.

Parameters:

Name Type Description Default
rate_limit_delay float

Delay between API requests in seconds

0.5
timeout int

Request timeout in seconds

30
max_retries int

Maximum number of retry attempts

3
Source code in src/enhancedtoolkits/youtube.py
def __init__(
    self,
    rate_limit_delay: float = 0.5,  # 500ms between requests
    timeout: int = 30,
    max_retries: int = 3,
    **kwargs,
):
    """
    Initialize Enhanced YouTube Tools.

    Args:
        rate_limit_delay: Delay between API requests in seconds
        timeout: Request timeout in seconds
        max_retries: Maximum number of retry attempts
    """
    self.add_instructions = True
    self.instructions = EnhancedYouTubeTools.get_llm_usage_instructions()

    super().__init__(name="enhanced_youtube_tools", **kwargs)

    # Configuration
    self.rate_limit_delay = max(0.1, min(5.0, rate_limit_delay))
    self.timeout = max(5, min(120, timeout))
    self.max_retries = max(1, min(10, max_retries))
    self.last_request_time = 0.0

    # Register methods
    self.register(self.fetch_youtube_video_metadata)
    self.register(self.fetch_youtube_video_transcript)
    self.register(self.extract_youtube_video_id)
    self.register(self.fetch_comprehensive_youtube_video_info)

    # Register backward compatibility methods
    self.register(self.legacy_fetch_youtube_video_metadata)
    self.register(self.legacy_fetch_youtube_video_transcript)

    if TRANSCRIPT_API_AVAILABLE:
        self.register(self.fetch_available_youtube_transcripts)
        self.register(self.fetch_youtube_transcript_languages)

    log_info(
        f"Enhanced YouTube Tools initialized - Rate Limit: {rate_limit_delay}s, Timeout: {timeout}s, Transcript API: {TRANSCRIPT_API_AVAILABLE}"
    )

Functions

fetch_youtube_video_metadata

fetch_youtube_video_metadata(video_url: str) -> str

Retrieve comprehensive metadata for a YouTube video.

Parameters:

Name Type Description Default
video_url str

The URL of the YouTube video

required

Returns:

Type Description
str

JSON string containing video metadata

Raises:

Type Description
YouTubeValidationError

If URL is invalid

YouTubeDataError

If metadata cannot be retrieved

Source code in src/enhancedtoolkits/youtube.py
def fetch_youtube_video_metadata(self, video_url: str) -> str:
    """
    Retrieve comprehensive metadata for a YouTube video.

    Args:
        video_url: The URL of the YouTube video

    Returns:
        JSON string containing video metadata

    Raises:
        YouTubeValidationError: If URL is invalid
        YouTubeDataError: If metadata cannot be retrieved
    """
    try:
        video_id = self._extract_video_id(video_url)
        log_debug(f"Getting metadata for video: {video_id}")

        # Apply rate limiting
        self._apply_rate_limit()

        # Get metadata from YouTube oEmbed API
        metadata = self._fetch_oembed_data(video_id)

        # Enhance metadata with additional information
        enhanced_metadata = self._enhance_metadata(metadata, video_id, video_url)

        return self._format_json_response(enhanced_metadata)

    except (YouTubeValidationError, YouTubeDataError):
        raise
    except Exception as e:
        log_error(f"Unexpected error getting metadata for {video_url}: {e}")
        raise YouTubeDataError(f"Failed to get video metadata: {e}")

fetch_youtube_video_transcript

fetch_youtube_video_transcript(video_url: str, language: str = 'en', auto_generated: bool = True) -> str

Retrieve transcript for a YouTube video with language support.

Parameters:

Name Type Description Default
video_url str

The URL of the YouTube video

required
language str

Preferred language code (e.g., 'en', 'es', 'fr')

'en'
auto_generated bool

Whether to include auto-generated transcripts

True

Returns:

Type Description
str

JSON string containing transcript data

Raises:

Type Description
YouTubeValidationError

If URL is invalid

YouTubeDataError

If transcript cannot be retrieved

Source code in src/enhancedtoolkits/youtube.py
def fetch_youtube_video_transcript(
    self, video_url: str, language: str = "en", auto_generated: bool = True
) -> str:
    """
    Retrieve transcript for a YouTube video with language support.

    Args:
        video_url: The URL of the YouTube video
        language: Preferred language code (e.g., 'en', 'es', 'fr')
        auto_generated: Whether to include auto-generated transcripts

    Returns:
        JSON string containing transcript data

    Raises:
        YouTubeValidationError: If URL is invalid
        YouTubeDataError: If transcript cannot be retrieved
    """
    try:
        if not TRANSCRIPT_API_AVAILABLE:
            raise YouTubeDataError(
                "Transcript API not available. Install youtube-transcript-api package."
            )

        video_id = self._extract_video_id(video_url)
        log_debug(f"Getting transcript for video: {video_id} (language: {language})")

        # Apply rate limiting
        self._apply_rate_limit()

        # Get transcript with retry logic
        transcript_data = self._fetch_transcript_with_retry(
            video_id, language, auto_generated
        )

        return self._format_json_response(transcript_data)

    except (YouTubeValidationError, YouTubeDataError):
        raise
    except Exception as e:
        log_error(f"Unexpected error getting transcript for {video_url}: {e}")
        raise YouTubeDataError(f"Failed to get video transcript: {e}")

fetch_available_youtube_transcripts

fetch_available_youtube_transcripts(video_url: str) -> str

Get list of available transcript languages for a video.

Parameters:

Name Type Description Default
video_url str

The URL of the YouTube video

required

Returns:

Type Description
str

JSON string containing available transcript languages

Raises:

Type Description
YouTubeValidationError

If URL is invalid

YouTubeDataError

If transcript info cannot be retrieved

Source code in src/enhancedtoolkits/youtube.py
def fetch_available_youtube_transcripts(self, video_url: str) -> str:
    """
    Get list of available transcript languages for a video.

    Args:
        video_url: The URL of the YouTube video

    Returns:
        JSON string containing available transcript languages

    Raises:
        YouTubeValidationError: If URL is invalid
        YouTubeDataError: If transcript info cannot be retrieved
    """
    try:
        if not TRANSCRIPT_API_AVAILABLE:
            raise YouTubeDataError(
                "Transcript API not available. Install youtube-transcript-api package."
            )

        video_id = self._extract_video_id(video_url)
        log_debug(f"Getting available transcripts for video: {video_id}")

        # Apply rate limiting
        self._apply_rate_limit()

        try:
            if not TRANSCRIPT_API_AVAILABLE or YouTubeTranscriptApi is None:
                raise YouTubeDataError("Transcript API not available")
            transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)

            available_transcripts = {
                "video_id": video_id,
                "manual_transcripts": [],
                "auto_generated_transcripts": [],
                "translatable_transcripts": [],
            }

            for transcript in transcript_list:
                transcript_info = {
                    "language": transcript.language,
                    "language_code": transcript.language_code,
                    "is_generated": transcript.is_generated,
                    "is_translatable": transcript.is_translatable,
                }

                if transcript.is_generated:
                    available_transcripts["auto_generated_transcripts"].append(
                        transcript_info
                    )
                else:
                    available_transcripts["manual_transcripts"].append(
                        transcript_info
                    )

                if transcript.is_translatable:
                    available_transcripts["translatable_transcripts"].append(
                        transcript_info
                    )

            available_transcripts["timestamp"] = datetime.now().isoformat()
            return self._format_json_response(available_transcripts)

        except (TranscriptsDisabled, NoTranscriptFound) as e:
            return self._format_json_response(
                {
                    "video_id": video_id,
                    "available_transcripts": [],
                    "message": f"No transcripts available: {e}",
                    "timestamp": datetime.now().isoformat(),
                }
            )

    except (YouTubeValidationError, YouTubeDataError):
        raise
    except Exception as e:
        log_error(
            f"Unexpected error getting available transcripts for {video_url}: {e}"
        )
        raise YouTubeDataError(f"Failed to get available transcripts: {e}")

fetch_youtube_transcript_languages

fetch_youtube_transcript_languages(video_url: str) -> str

Get simplified list of available transcript language codes.

Parameters:

Name Type Description Default
video_url str

The URL of the YouTube video

required

Returns:

Type Description
str

JSON string containing language codes

Raises:

Type Description
YouTubeValidationError

If URL is invalid

YouTubeDataError

If language info cannot be retrieved

Source code in src/enhancedtoolkits/youtube.py
def fetch_youtube_transcript_languages(self, video_url: str) -> str:
    """
    Get simplified list of available transcript language codes.

    Args:
        video_url: The URL of the YouTube video

    Returns:
        JSON string containing language codes

    Raises:
        YouTubeValidationError: If URL is invalid
        YouTubeDataError: If language info cannot be retrieved
    """
    try:
        transcripts_data = self.fetch_available_youtube_transcripts(video_url)
        transcripts = json.loads(transcripts_data)

        language_codes = set()

        # Collect all language codes
        for transcript_type in ["manual_transcripts", "auto_generated_transcripts"]:
            for transcript in transcripts.get(transcript_type, []):
                language_codes.add(transcript["language_code"])

        result = {
            "video_id": transcripts.get("video_id"),
            "available_languages": sorted(list(language_codes)),
            "common_languages_available": [
                lang for lang in self.COMMON_LANGUAGES if lang in language_codes
            ],
            "timestamp": datetime.now().isoformat(),
        }

        return self._format_json_response(result)

    except (YouTubeValidationError, YouTubeDataError):
        raise
    except Exception as e:
        log_error(
            f"Unexpected error getting transcript languages for {video_url}: {e}"
        )
        raise YouTubeDataError(f"Failed to get transcript languages: {e}")

extract_youtube_video_id

extract_youtube_video_id(video_url: str) -> str

Extract video ID from YouTube URL (public method).

Parameters:

Name Type Description Default
video_url str

The URL of the YouTube video

required

Returns:

Type Description
str

Video ID string

Raises:

Type Description
YouTubeValidationError

If URL is invalid or video ID cannot be extracted

Source code in src/enhancedtoolkits/youtube.py
def extract_youtube_video_id(self, video_url: str) -> str:
    """
    Extract video ID from YouTube URL (public method).

    Args:
        video_url: The URL of the YouTube video

    Returns:
        Video ID string

    Raises:
        YouTubeValidationError: If URL is invalid or video ID cannot be extracted
    """
    return self._extract_video_id(video_url)

fetch_comprehensive_youtube_video_info

fetch_comprehensive_youtube_video_info(video_url: str, include_transcript: bool = False) -> str

Get comprehensive video information including metadata and optionally transcript.

Parameters:

Name Type Description Default
video_url str

The URL of the YouTube video

required
include_transcript bool

Whether to include transcript data

False

Returns:

Type Description
str

JSON string containing comprehensive video information

Raises:

Type Description
YouTubeValidationError

If URL is invalid

YouTubeDataError

If video info cannot be retrieved

Source code in src/enhancedtoolkits/youtube.py
def fetch_comprehensive_youtube_video_info(self, video_url: str, include_transcript: bool = False) -> str:
    """
    Get comprehensive video information including metadata and optionally transcript.

    Args:
        video_url: The URL of the YouTube video
        include_transcript: Whether to include transcript data

    Returns:
        JSON string containing comprehensive video information

    Raises:
        YouTubeValidationError: If URL is invalid
        YouTubeDataError: If video info cannot be retrieved
    """
    try:
        video_id = self._extract_video_id(video_url)
        log_debug(f"Getting comprehensive info for video: {video_id}")

        # Get metadata
        metadata_str = self.fetch_youtube_video_metadata(video_url)
        metadata = json.loads(metadata_str)

        comprehensive_info = {
            "video_id": video_id,
            "video_url": video_url,
            "metadata": metadata,
            "timestamp": datetime.now().isoformat(),
        }

        # Add transcript info if requested and available
        if include_transcript and TRANSCRIPT_API_AVAILABLE:
            try:
                # Get available languages first
                languages_str = self.fetch_youtube_transcript_languages(video_url)
                languages_data = json.loads(languages_str)

                comprehensive_info["transcript_info"] = {
                    "available_languages": languages_data.get(
                        "available_languages", []
                    ),
                    "common_languages_available": languages_data.get(
                        "common_languages_available", []
                    ),
                }

                # Try to get English transcript if available
                if "en" in languages_data.get("available_languages", []):
                    try:
                        transcript_str = self.fetch_youtube_video_transcript(video_url, "en")
                        transcript_data = json.loads(transcript_str)
                        comprehensive_info["transcript"] = transcript_data
                    except Exception as e:
                        log_warning(f"Could not get English transcript: {e}")
                        comprehensive_info["transcript_error"] = str(e)

            except Exception as e:
                log_warning(f"Could not get transcript info: {e}")
                comprehensive_info["transcript_info"] = {"error": str(e)}

        return self._format_json_response(comprehensive_info)

    except (YouTubeValidationError, YouTubeDataError):
        raise
    except Exception as e:
        log_error(f"Unexpected error getting video info for {video_url}: {e}")
        raise YouTubeDataError(f"Failed to get comprehensive video info: {e}")

legacy_fetch_youtube_video_metadata

legacy_fetch_youtube_video_metadata(video_url: str) -> str

Legacy method for backward compatibility. Use fetch_youtube_video_metadata() instead.

Source code in src/enhancedtoolkits/youtube.py
def legacy_fetch_youtube_video_metadata(self, video_url: str) -> str:
    """
    Legacy method for backward compatibility.
    Use fetch_youtube_video_metadata() instead.
    """
    return self.fetch_youtube_video_metadata(video_url)

legacy_fetch_youtube_video_transcript

legacy_fetch_youtube_video_transcript(video_url: str) -> str

Legacy method for backward compatibility. Use fetch_youtube_video_transcript() instead.

Source code in src/enhancedtoolkits/youtube.py
def legacy_fetch_youtube_video_transcript(self, video_url: str) -> str:
    """
    Legacy method for backward compatibility.
    Use fetch_youtube_video_transcript() instead.
    """
    try:
        transcript_data_str = self.fetch_youtube_video_transcript(video_url)
        transcript_data = json.loads(transcript_data_str)

        # Return just the text for backward compatibility
        if "transcript_text" in transcript_data:
            return transcript_data["transcript_text"]
        elif "segments" in transcript_data:
            return " ".join(
                [segment.get("text", "") for segment in transcript_data["segments"]]
            )
        else:
            return "No transcript text available"

    except Exception as e:
        raise Exception(f"Error getting video transcript: {e}")

legacy_extract_youtube_video_id

legacy_extract_youtube_video_id(youtube_url: str) -> str

Legacy method for backward compatibility. Use extract_youtube_video_id() instead.

Source code in src/enhancedtoolkits/youtube.py
def legacy_extract_youtube_video_id(self, youtube_url: str) -> str:
    """
    Legacy method for backward compatibility.
    Use extract_youtube_video_id() instead.
    """
    return self._extract_video_id(youtube_url)

get_llm_usage_instructions staticmethod

get_llm_usage_instructions() -> str

Returns a set of detailed instructions for LLMs on how to use each tool in EnhancedYouTubeTools. Each instruction includes the method name, description, parameters, types, and example values.

Source code in src/enhancedtoolkits/youtube.py
    @staticmethod
    def get_llm_usage_instructions() -> str:
        """
        Returns a set of detailed instructions for LLMs on how to use each tool in EnhancedYouTubeTools.
        Each instruction includes the method name, description, parameters, types, and example values.
        """
        instructions = """
<youtube_tools_instructions>
*** YouTube Tools Instructions ***

By leveraging the following set of tools, you can retrieve comprehensive metadata, transcripts, and video information from YouTube. These tools empower you to deliver accurate, real-time video intelligence and content extraction with ease. Here are the detailed instructions for using the set of tools:

- Use fetch_youtube_video_metadata to retrieve metadata for a YouTube video.
   Parameters:
      - video_url (str): The URL of the YouTube video, e.g., "https://www.youtube.com/watch?v=dQw4w9WgXcQ".

- Use fetch_youtube_video_transcript to retrieve the transcript for a YouTube video (requires youtube-transcript-api).
   Parameters:
      - video_url (str): The URL of the YouTube video, e.g., "https://youtu.be/dQw4w9WgXcQ".
      - language (str, optional): Preferred language code, e.g., "en", "es", "fr" (default: "en").
      - auto_generated (bool, optional): Whether to include auto-generated transcripts (default: True).

- Use fetch_available_youtube_transcripts to list available transcript languages for a video (requires youtube-transcript-api).
   Parameters:
      - video_url (str): The URL of the YouTube video.

- Use fetch_youtube_transcript_languages to get a simplified list of available transcript language codes (requires youtube-transcript-api).
   Parameters:
      - video_url (str): The URL of the YouTube video.

- Use extract_youtube_video_id to extract the video ID from a YouTube URL.
   Parameters:
      - video_url (str): The URL of the YouTube video.

- Use fetch_comprehensive_youtube_video_info to get comprehensive video information, including metadata and optionally transcript.
   Parameters:
      - video_url (str): The URL of the YouTube video.
      - include_transcript (bool, optional): Whether to include transcript data (default: False).

- Use legacy_fetch_youtube_video_metadata for backward compatibility (same as fetch_youtube_video_metadata).
   Parameters:
      - video_url (str): The URL of the YouTube video.

- Use legacy_fetch_youtube_video_transcript for backward compatibility (same as fetch_youtube_video_transcript).
   Parameters:
      - video_url (str): The URL of the YouTube video.

Notes:
- Transcript-related tools (fetch_youtube_video_transcript, fetch_available_youtube_transcripts, fetch_youtube_transcript_languages) require the youtube-transcript-api package to be installed.
- The language parameter for transcripts should be a valid language code, e.g., "en" for English, "es" for Spanish.
- The auto_generated parameter controls whether to include auto-generated transcripts.
</youtube_tools_instructions>
"""
        return instructions