Skip to content

Files Tools

enhancedtoolkits.files.FilesTools

FilesTools(base_dir: Optional[Path] = None, **kwargs)

Bases: StrictToolkit

Secure toolkit for file operations with comprehensive security controls.

Source code in src/enhancedtoolkits/files.py
def __init__(self, base_dir: Optional[Path] = None, **kwargs):
    self.base_dir = (
        Path(base_dir).resolve() if base_dir else Path.cwd().resolve()
    )
    self.add_instructions = True
    self.instructions = FilesTools.get_llm_usage_instructions()

    super().__init__(name="secure_files_toolkit", **kwargs)
    self.register(self.read_file_lines_chunk)
    self.register(self.replace_file_lines_chunk)
    self.register(self.insert_lines_into_file_chunk)
    self.register(self.delete_lines_from_file_chunk)
    self.register(self.save_file_with_validation)
    self.register(self.retrieve_file_metadata)
    self.register(self.list_files_with_pattern)
    self.register(self.search_files_by_name_regex)
    self.register(self.search_file_contents_by_regex)

Functions

read_file_lines_chunk

read_file_lines_chunk(file_name: str, chunk_size: int = 100, offset: int = 0) -> str

Read a chunk of lines from a file with security validation.

Source code in src/enhancedtoolkits/files.py
def read_file_lines_chunk(
    self, file_name: str, chunk_size: int = 100, offset: int = 0
) -> str:
    """Read a chunk of lines from a file with security validation."""
    try:
        self._validate_inputs(
            file_name, chunk_size=chunk_size, offset=offset
        )
        file_path = self._secure_resolve_path(file_name)

        with self._secure_file_lock(file_path, "r") as f:
            lines = self._stream_read_lines(f, offset, chunk_size)
            total_lines = self._count_lines_efficiently(file_path)

        result = {
            "operation": "read_file_chunk",
            "result": lines,
            "metadata": {
                "start_line": offset,
                "lines_read": len(lines),
                "total_lines": total_lines,
                "eof": offset + len(lines) >= total_lines,
                "timestamp": self._timestamp(),
            },
        }
        log_info(f"Read {len(lines)} lines from {file_name}")
        return self._safe_json(result)
    except Exception as exc:  # pylint: disable=broad-exception-caught
        return self._error_response("read_file_chunk", file_name, exc)

replace_file_lines_chunk

replace_file_lines_chunk(file_name: str, new_lines: List[str], offset: int, length: int) -> str

Replace lines with security validation and atomic operations.

Source code in src/enhancedtoolkits/files.py
def replace_file_lines_chunk(
    self, file_name: str, new_lines: List[str], offset: int, length: int
) -> str:
    """Replace lines with security validation and atomic operations."""
    try:
        self._validate_inputs(
            file_name, new_lines=new_lines, offset=offset, length=length
        )
        file_path = self._secure_resolve_path(file_name)

        with tempfile.NamedTemporaryFile(
            mode="w", delete=False, suffix=".tmp"
        ) as tmp_file:
            with self._secure_file_lock(file_path, "r") as src:
                self._atomic_edit(src, tmp_file, new_lines, offset, length)

            # Atomic replace
            os.replace(tmp_file.name, file_path)

        result = {
            "operation": "edit_file_chunk",
            "result": f"Replaced {length} lines at offset {offset}",
            "metadata": {"timestamp": self._timestamp()},
        }
        log_info(
            f"Edited {file_name}: replaced {length} lines at {offset}"
        )
        return self._safe_json(result)
    except Exception as exc:  # pylint: disable=broad-exception-caught
        return self._error_response("edit_file_chunk", file_name, exc)

insert_lines_into_file_chunk

insert_lines_into_file_chunk(file_name: str, new_lines: List[str], offset: int) -> str

Insert lines with security validation.

Source code in src/enhancedtoolkits/files.py
def insert_lines_into_file_chunk(
    self, file_name: str, new_lines: List[str], offset: int
) -> str:
    """Insert lines with security validation."""
    try:
        self._validate_inputs(
            file_name, new_lines=new_lines, offset=offset
        )
        file_path = self._secure_resolve_path(file_name)

        with tempfile.NamedTemporaryFile(
            mode="w", delete=False, suffix=".tmp"
        ) as tmp_file:
            with self._secure_file_lock(file_path, "r") as src:
                self._atomic_insert(src, tmp_file, new_lines, offset)

            os.replace(tmp_file.name, file_path)

        result = {
            "operation": "insert_file_chunk",
            "result": f"Inserted {len(new_lines)} lines at offset {offset}",
            "metadata": {"timestamp": self._timestamp()},
        }
        log_info(f"Inserted {len(new_lines)} lines in {file_name}")
        return self._safe_json(result)
    except Exception as exc:  # pylint: disable=broad-exception-caught
        return self._error_response("insert_file_chunk", file_name, exc)

delete_lines_from_file_chunk

delete_lines_from_file_chunk(file_name: str, offset: int, length: int) -> str

Delete lines with security validation.

Source code in src/enhancedtoolkits/files.py
def delete_lines_from_file_chunk(
    self, file_name: str, offset: int, length: int
) -> str:
    """Delete lines with security validation."""
    try:
        self._validate_inputs(file_name, offset=offset, length=length)
        file_path = self._secure_resolve_path(file_name)

        with tempfile.NamedTemporaryFile(
            mode="w", delete=False, suffix=".tmp"
        ) as tmp_file:
            with self._secure_file_lock(file_path, "r") as src:
                self._atomic_delete(src, tmp_file, offset, length)

            os.replace(tmp_file.name, file_path)

        result = {
            "operation": "delete_file_chunk",
            "result": f"Deleted {length} lines at offset {offset}",
            "metadata": {"timestamp": self._timestamp()},
        }
        log_info(f"Deleted {length} lines from {file_name}")
        return self._safe_json(result)
    except Exception as exc:  # pylint: disable=broad-exception-caught
        return self._error_response("delete_file_chunk", file_name, exc)

save_file_with_validation

save_file_with_validation(contents: str, file_name: str, overwrite: bool = True) -> str

Save file with security validation.

Source code in src/enhancedtoolkits/files.py
def save_file_with_validation(
    self, contents: str, file_name: str, overwrite: bool = True
) -> str:
    """Save file with security validation."""
    try:
        self._validate_inputs(file_name, contents=contents)
        file_path = self._secure_resolve_path(file_name)

        if file_path.exists() and not overwrite:
            raise FileOperationError("File exists and overwrite is False")

        file_path.parent.mkdir(parents=True, exist_ok=True)

        with tempfile.NamedTemporaryFile(
            mode="w", delete=False, suffix=".tmp", dir=file_path.parent
        ) as tmp_file:
            tmp_file.write(contents)
            tmp_file.flush()
            os.fsync(tmp_file.fileno())

        os.replace(tmp_file.name, file_path)

        result = {
            "operation": "save_file",
            "result": f"Saved file {file_name}",
            "metadata": {
                "file_size": file_path.stat().st_size,
                "timestamp": self._timestamp(),
            },
        }
        log_info(f"Saved file {file_name}")
        return self._safe_json(result)
    except Exception as exc:  # pylint: disable=broad-exception-caught
        return self._error_response("save_file", file_name, exc)

retrieve_file_metadata

retrieve_file_metadata(file_name: str) -> str

Get file metadata with security validation.

Source code in src/enhancedtoolkits/files.py
def retrieve_file_metadata(self, file_name: str) -> str:
    """Get file metadata with security validation."""
    try:
        self._validate_inputs(file_name)
        file_path = self._secure_resolve_path(file_name)

        stat = file_path.stat()
        line_count = self._count_lines_efficiently(file_path)

        result = {
            "operation": "get_file_metadata",
            "result": {
                "file_name": file_name,
                "total_lines": line_count,
                "file_size": stat.st_size,
                "mime_type": mimetypes.guess_type(file_name)[0],
            },
            "metadata": {"timestamp": self._timestamp()},
        }
        return self._safe_json(result)
    except Exception as exc:  # pylint: disable=broad-exception-caught
        return self._error_response("get_file_metadata", file_name, exc)

list_files_with_pattern

list_files_with_pattern(pattern: str = '**/*') -> str

List files with security validation.

Source code in src/enhancedtoolkits/files.py
def list_files_with_pattern(self, pattern: str = "**/*") -> str:
    """List files with security validation."""
    try:
        files = []
        for p in self.base_dir.glob(pattern):
            if p.is_file() and self._is_safe_file(p):
                rel_path = str(p.relative_to(self.base_dir))
                files.append(rel_path)
                if len(files) > 1000:  # Limit results
                    break

        result = {
            "operation": "list_files",
            "result": sorted(files),
            "metadata": {
                "file_count": len(files),
                "timestamp": self._timestamp(),
            },
        }
        return self._safe_json(result)
    except Exception as exc:  # pylint: disable=broad-exception-caught
        return self._error_response("list_files", str(self.base_dir), exc)

search_files_by_name_regex

search_files_by_name_regex(regex_pattern: str, recursive: bool = True, max_results: int = 1000) -> str

Search for files with names matching a regex pattern with security validation.

Parameters:

Name Type Description Default
regex_pattern str

Regular expression pattern to match against file names.

required
recursive bool

Whether to search recursively in subdirectories. Defaults to True.

True
max_results int

Maximum number of results to return. Defaults to 1000.

1000

Returns:

Name Type Description
str str

JSON string containing the search results and metadata

Source code in src/enhancedtoolkits/files.py
def search_files_by_name_regex(  # pylint: disable=too-many-nested-blocks
    self,
    regex_pattern: str,
    recursive: bool = True,
    max_results: int = 1000,
) -> str:
    """
    Search for files with names matching a regex pattern with security validation.

    Args:
        regex_pattern (str): Regular expression pattern to match against file names.
        recursive (bool, optional): Whether to search recursively in subdirectories.
            Defaults to True.
        max_results (int, optional): Maximum number of results to return.
            Defaults to 1000.

    Returns:
        str: JSON string containing the search results and metadata
    """
    try:
        # Validate regex pattern
        self._validate_regex(regex_pattern)

        pattern = re.compile(regex_pattern)

        files = []
        search_path = self.base_dir

        # Use walk for recursive search or listdir for non-recursive
        if recursive:
            for root, _, filenames in os.walk(search_path):
                for filename in filenames:
                    file_path = Path(root) / filename
                    if self._is_safe_file(file_path) and pattern.search(
                        filename
                    ):
                        rel_path = str(
                            file_path.relative_to(self.base_dir)
                        )
                        files.append(rel_path)
                        if len(files) >= max_results:  # Limit results
                            break
                if len(files) >= max_results:
                    break
        else:
            for item in search_path.iterdir():
                if (
                    item.is_file()
                    and self._is_safe_file(item)
                    and pattern.search(item.name)
                ):
                    rel_path = str(item.relative_to(self.base_dir))
                    files.append(rel_path)
                    if len(files) >= max_results:  # Limit results
                        break

        result = {
            "operation": "search_files_by_name",
            "result": sorted(files),
            "metadata": {
                "file_count": len(files),
                "pattern": regex_pattern,
                "recursive": recursive,
                "timestamp": self._timestamp(),
            },
        }
        log_info(
            f"Found {len(files)} files matching pattern '{regex_pattern}'"
        )
        return self._safe_json(result)
    except Exception as exc:  # pylint: disable=broad-exception-caught
        return self._error_response(
            "search_files_by_name", str(self.base_dir), exc
        )

search_file_contents_by_regex

search_file_contents_by_regex(regex_pattern: str, file_pattern: str = '**/*', recursive: bool = False, max_files: int = 100, max_matches: int = 1000, context_lines: int = 2) -> str

Search for content inside files matching a regex pattern with security validation.

Parameters:

Name Type Description Default
regex_pattern str

Regular expression pattern to match against file content.

required
file_pattern str

Glob pattern to filter files. Defaults to "**/*".

'**/*'
recursive bool

Whether to search recursively in subdirectories. Defaults to False.

False
max_files int

Maximum number of files to search. Defaults to 100.

100
max_matches int

Maximum number of matches to return. Defaults to 1000.

1000
context_lines int

Number of context lines to include before and after match. Defaults to 2.

2

Returns:

Name Type Description
str str

JSON string containing the search results and metadata

Source code in src/enhancedtoolkits/files.py
def search_file_contents_by_regex(  # pylint: disable=too-many-arguments,too-many-positional-arguments,too-many-locals,too-many-branches,too-many-nested-blocks
    self,
    regex_pattern: str,
    file_pattern: str = "**/*",
    recursive: bool = False,
    max_files: int = 100,
    max_matches: int = 1000,
    context_lines: int = 2,
) -> str:
    """
    Search for content inside files matching a regex pattern with security validation.

    Args:
        regex_pattern (str): Regular expression pattern to match against file content.
        file_pattern (str, optional): Glob pattern to filter files.
            Defaults to "**/*".
        recursive (bool, optional): Whether to search recursively in subdirectories.
            Defaults to False.
        max_files (int, optional): Maximum number of files to search.
            Defaults to 100.
        max_matches (int, optional): Maximum number of matches to return.
            Defaults to 1000.
        context_lines (int, optional): Number of context lines to include before and
            after match. Defaults to 2.

    Returns:
        str: JSON string containing the search results and metadata
    """
    try:
        # Validate regex pattern
        self._validate_regex(regex_pattern)

        pattern = re.compile(regex_pattern)

        matches = []
        files_searched = 0
        total_matches = 0

        # Get files matching the file pattern
        if recursive:
            file_paths = self.base_dir.glob(file_pattern)
        else:
            # For non-recursive search, ensure the pattern doesn't contain directory traversal
            if "**" in file_pattern:
                # Replace ** with * for non-recursive search
                file_pattern = file_pattern.replace("**", "*")
            file_paths = self.base_dir.glob(file_pattern)

        for file_path in file_paths:
            if not (file_path.is_file() and self._is_safe_file(file_path)):
                continue

            files_searched += 1
            if files_searched > max_files:
                break

            rel_path = str(file_path.relative_to(self.base_dir))

            # Search inside the file
            try:
                with self._secure_file_lock(file_path, "r") as f:
                    lines = f.readlines()

                file_matches = []
                for i, line in enumerate(lines):
                    if pattern.search(line):
                        # Get context lines
                        start = max(0, i - context_lines)
                        end = min(len(lines), i + context_lines + 1)

                        context = {
                            "line_number": i + 1,
                            "content": line.rstrip("\n\r"),
                            "context": [
                                lines[j].rstrip("\n\r")
                                for j in range(start, end)
                            ],
                        }

                        file_matches.append(context)
                        total_matches += 1

                        if total_matches >= max_matches:
                            break

                if file_matches:
                    matches.append(
                        {"file": rel_path, "matches": file_matches}
                    )

                if total_matches >= max_matches:
                    break

            except (
                Exception
            ) as file_error:  # pylint: disable=broad-exception-caught
                # Skip files with errors
                log_error(f"Error searching file {rel_path}: {file_error}")
                continue

        result = {
            "operation": "search_inside_files",
            "result": matches,
            "metadata": {
                "files_searched": files_searched,
                "total_matches": total_matches,
                "pattern": regex_pattern,
                "recursive": recursive,
                "timestamp": self._timestamp(),
            },
        }
        log_info(
            f"Found {total_matches} matches in {files_searched} files for pattern "
            f"'{regex_pattern}'"
        )
        return self._safe_json(result)
    except Exception as exc:  # pylint: disable=broad-exception-caught
        return self._error_response(
            "search_inside_files", str(self.base_dir), exc
        )

get_llm_usage_instructions staticmethod

get_llm_usage_instructions() -> str

Return precise, structured instructions for LLM tool calling.

Source code in src/enhancedtoolkits/files.py
    @staticmethod
    def get_llm_usage_instructions() -> str:
        """Return precise, structured instructions for LLM tool calling."""
        instructions = """
<file_tools_instructions>
Secure sandboxed file operations (read/search/edit/write)

GOAL
- Perform safe, constrained file operations within `base_dir`.
- Keep outputs small via chunked reads/searches.

RETURN FORMAT (ALWAYS)
- Every method returns a JSON string with:
  - operation: str
  - result: payload or null
  - metadata: includes timestamp; on failure includes metadata.error (sanitized)

SAFETY / SCOPE
- All paths resolve under `base_dir`.
- No path traversal, no symlinks, extension allowlist enforced.

TOOLS (PREFER SMALL OUTPUTS)
- read_file_lines_chunk(file_name, chunk_size=100, offset=0)  # offset is 0-based
- replace_file_lines_chunk(file_name, new_lines, offset, length)
- insert_lines_into_file_chunk(file_name, new_lines, offset)
- delete_lines_from_file_chunk(file_name, offset, length)
- save_file_with_validation(contents, file_name, overwrite=True)
- retrieve_file_metadata(file_name)
- list_files_with_pattern(pattern="**/*")  # capped
- search_files_by_name_regex(regex_pattern, recursive=True, max_results=1000)
- search_file_contents_by_regex(regex_pattern, file_pattern="**/*", recursive=False, max_files=100, max_matches=1000, context_lines=2)

CONTEXT-SIZE RULES (IMPORTANT)
- Never read an entire large file; use small chunks and targeted searches.
- For edits: read a small region → edit that region → re-read only that region to verify.
- When returning results to the user, summarize instead of pasting large JSON blocks.

</file_tools_instructions>
"""
        return instructions