YouTubeTools(rate_limit_delay: float = 0.5, timeout: int = 30, max_retries: int = 3, add_instructions: bool = True, **kwargs)
Bases: StrictToolkit
YouTube metadata + transcript tools.
- Metadata uses YouTube oEmbed.
- Transcripts use
youtube-transcript-api.
All tools return JSON strings.
Parameters:
| Name | Type | Description | Default |
rate_limit_delay | float | Delay between outbound requests (seconds). | 0.5 |
timeout | int | HTTP request timeout (seconds). | 30 |
max_retries | int | Retry attempts for transcript fetches. | 3 |
add_instructions | bool | Whether to attach LLM usage instructions. | True |
Source code in src/enhancedtoolkits/youtube.py
| def __init__(
self,
rate_limit_delay: float = 0.5,
timeout: int = 30,
max_retries: int = 3,
add_instructions: bool = True,
**kwargs,
):
"""Initialize the toolkit.
Args:
rate_limit_delay: Delay between outbound requests (seconds).
timeout: HTTP request timeout (seconds).
max_retries: Retry attempts for transcript fetches.
add_instructions: Whether to attach LLM usage instructions.
"""
self.rate_limit_delay = float(max(0.1, min(5.0, rate_limit_delay)))
self.timeout = int(max(5, min(120, timeout)))
self.max_retries = int(max(1, min(10, max_retries)))
self._last_request_time = 0.0
instructions = (
self.get_llm_usage_instructions() if add_instructions else ""
)
super().__init__(
name="enhanced_youtube_tools",
instructions=instructions,
add_instructions=add_instructions,
**kwargs,
)
# Register methods
self.register(self.fetch_youtube_video_metadata)
self.register(self.fetch_youtube_video_transcript)
self.register(self.extract_youtube_video_id)
self.register(self.fetch_comprehensive_youtube_video_info)
self.register(self.fetch_available_youtube_transcripts)
self.register(self.fetch_youtube_transcript_languages)
log_info(
"Enhanced YouTube Tools initialized - "
f"Rate Limit: {self.rate_limit_delay}s, "
f"Timeout: {self.timeout}s"
)
|
fetch_youtube_video_metadata(video_url: str) -> str
Fetch basic metadata for a YouTube video via oEmbed.
Source code in src/enhancedtoolkits/youtube.py
| def fetch_youtube_video_metadata(self, video_url: str) -> str:
"""Fetch basic metadata for a YouTube video via oEmbed."""
try:
video_id = self._extract_video_id(video_url)
self._apply_rate_limit()
oembed = self._fetch_oembed_data(video_id)
metadata = self._enhance_oembed_metadata(
oembed, video_id, video_url
)
return self._format_json_response(metadata)
except (YouTubeValidationError, YouTubeDataError):
raise
except Exception as e: # pylint: disable=broad-exception-caught
log_error(
f"Unexpected error getting metadata for {video_url}: {e}"
)
raise YouTubeDataError(f"Failed to get video metadata: {e}") from e
|
fetch_youtube_video_transcript(video_url: str, language: str = 'en', auto_generated: bool = True) -> str
Fetch a transcript (if available) with optional language preference.
Source code in src/enhancedtoolkits/youtube.py
| def fetch_youtube_video_transcript(
self, video_url: str, language: str = "en", auto_generated: bool = True
) -> str:
"""Fetch a transcript (if available) with optional language preference."""
try:
video_id = self._extract_video_id(video_url)
self._apply_rate_limit()
transcript = self._fetch_transcript_with_retry(
video_id=video_id,
language=(language or "en").strip().lower(),
auto_generated=bool(auto_generated),
)
return self._format_json_response(transcript)
except (YouTubeValidationError, YouTubeDataError):
raise
except Exception as e: # pylint: disable=broad-exception-caught
log_error(
f"Unexpected error getting transcript for {video_url}: {e}"
)
raise YouTubeDataError(
f"Failed to get video transcript: {e}"
) from e
|
fetch_available_youtube_transcripts(video_url: str) -> str
List available transcript tracks for a video.
Source code in src/enhancedtoolkits/youtube.py
| def fetch_available_youtube_transcripts(self, video_url: str) -> str:
"""List available transcript tracks for a video."""
try:
video_id = self._extract_video_id(video_url)
self._apply_rate_limit()
ytt = YouTubeTranscriptApi()
result: dict[str, Any] = {
"video_id": video_id,
"manual_transcripts": [],
"auto_generated_transcripts": [],
"translatable_transcripts": [],
"timestamp": datetime.now().isoformat(),
}
try:
transcript_list = ytt.list(video_id)
except (
TranscriptsDisabled,
NoTranscriptFound,
VideoUnavailable,
) as e:
result["message"] = f"No transcripts available: {e}"
if isinstance(e, TranscriptsDisabled):
result["note"] = (
"TranscriptsDisabled can be a false-positive on some cloud "
"environments (IP-based blocking/challenges). If it works locally, "
"try a different egress IP or use the library's ProxyConfig."
)
return self._format_json_response(result)
for transcript in transcript_list:
is_translatable = bool(
getattr(transcript, "is_translatable", False)
)
info = {
"language": transcript.language,
"language_code": transcript.language_code,
"is_generated": transcript.is_generated,
"is_translatable": is_translatable,
}
if transcript.is_generated:
result["auto_generated_transcripts"].append(info)
else:
result["manual_transcripts"].append(info)
if is_translatable:
result["translatable_transcripts"].append(info)
return self._format_json_response(result)
except (YouTubeValidationError, YouTubeDataError):
raise
except Exception as e: # pylint: disable=broad-exception-caught
log_error(
f"Unexpected error getting available transcripts for {video_url}: {e}"
)
raise YouTubeDataError(
f"Failed to get available transcripts: {e}"
) from e
|
fetch_youtube_transcript_languages(video_url: str) -> str
Return a simplified list of available transcript language codes.
Source code in src/enhancedtoolkits/youtube.py
| def fetch_youtube_transcript_languages(self, video_url: str) -> str:
"""Return a simplified list of available transcript language codes."""
try:
transcripts = json.loads(
self.fetch_available_youtube_transcripts(video_url)
)
language_codes: set[str] = set()
for key in ("manual_transcripts", "auto_generated_transcripts"):
for item in transcripts.get(key, []) or []:
code = item.get("language_code")
if code:
language_codes.add(code)
result: dict[str, Any] = {
"video_id": transcripts.get("video_id"),
"available_languages": sorted(language_codes),
"common_languages_available": [
lang
for lang in self.COMMON_LANGUAGES
if lang in language_codes
],
"timestamp": datetime.now().isoformat(),
}
# Preserve capability errors (e.g., missing list_transcripts).
if isinstance(transcripts, dict) and transcripts.get("error"):
result["error"] = transcripts.get("error")
return self._format_json_response(result)
except (YouTubeValidationError, YouTubeDataError):
raise
except Exception as e: # pylint: disable=broad-exception-caught
log_error(
f"Unexpected error getting transcript languages for {video_url}: {e}"
)
raise YouTubeDataError(
f"Failed to get transcript languages: {e}"
) from e
|
extract_youtube_video_id(video_url: str) -> str
Extract a YouTube video id from a URL (or accept a raw id).
Source code in src/enhancedtoolkits/youtube.py
| def extract_youtube_video_id(self, video_url: str) -> str:
"""Extract a YouTube video id from a URL (or accept a raw id)."""
try:
video_id = self._extract_video_id(video_url)
return self._format_json_response(
{
"video_id": video_id,
"timestamp": datetime.now().isoformat(),
}
)
except (YouTubeValidationError, YouTubeDataError):
raise
except Exception as e: # pylint: disable=broad-exception-caught
log_error(
f"Unexpected error extracting video id for {video_url}: {e}"
)
raise YouTubeDataError(f"Failed to extract video id: {e}") from e
|
fetch_comprehensive_youtube_video_info(video_url: str, include_transcript: bool = False) -> str
Combine oEmbed metadata and optional transcript info into one response.
Source code in src/enhancedtoolkits/youtube.py
| def fetch_comprehensive_youtube_video_info(
self, video_url: str, include_transcript: bool = False
) -> str:
"""Combine oEmbed metadata and optional transcript info into one response."""
try:
video_id = self._extract_video_id(video_url)
metadata = json.loads(self.fetch_youtube_video_metadata(video_url))
result: dict[str, Any] = {
"video_id": video_id,
"video_url": video_url,
"metadata": metadata,
"timestamp": datetime.now().isoformat(),
}
if include_transcript:
# languages + best-effort transcript
try:
languages = json.loads(
self.fetch_youtube_transcript_languages(video_url)
)
result["transcript_info"] = {
"available_languages": languages.get(
"available_languages", []
),
"common_languages_available": languages.get(
"common_languages_available", []
),
}
# Fetch English if available, else first available language.
preferred: Optional[str] = None
available = languages.get("available_languages", []) or []
if "en" in available:
preferred = "en"
elif available:
preferred = available[0]
if preferred:
result["transcript"] = json.loads(
self.fetch_youtube_video_transcript(
video_url, preferred
)
)
except (
Exception
) as e: # pylint: disable=broad-exception-caught
log_warning(f"Could not add transcript data: {e}")
result["transcript_info"] = {"error": str(e)}
return self._format_json_response(result)
except (YouTubeValidationError, YouTubeDataError):
raise
except Exception as e: # pylint: disable=broad-exception-caught
log_error(
f"Unexpected error getting video info for {video_url}: {e}"
)
raise YouTubeDataError(
f"Failed to get comprehensive video info: {e}"
) from e
|
get_llm_usage_instructions() -> str
Return short, text-first usage instructions for the YouTube tools.
Source code in src/enhancedtoolkits/youtube.py
| @staticmethod
def get_llm_usage_instructions() -> str:
"""Return short, text-first usage instructions for the YouTube tools."""
return """
<youtube_tools>
YouTube metadata (oEmbed) + transcripts (youtube-transcript-api)
GOAL
- YouTube metadata (oEmbed) + transcripts (youtube-transcript-api). All tools return JSON strings.
CORE TOOLS
- fetch_youtube_video_metadata(video_url)
- extract_youtube_video_id(video_url)
- fetch_comprehensive_youtube_video_info(video_url, include_transcript=False)
TRANSCRIPT TOOLS
- fetch_available_youtube_transcripts(video_url)
- fetch_youtube_transcript_languages(video_url)
- fetch_youtube_video_transcript(video_url, language='en', auto_generated=True)
LIMITATIONS
- oEmbed does NOT provide: views, likes, upload date, duration.
CONTEXT-SIZE RULES (IMPORTANT)
- Transcripts can be very large; fetch only when needed.
- Do not dump full transcripts into the final answer; summarize + quote short excerpts.
NOTES
- If transcripts work locally but fail on servers with TranscriptsDisabled, your egress IP may be blocked/challenged.
</youtube_tools>
"""
|