"""Regex search in files tool."""


TOOL_VERSION = "2026-05-22"

from __future__ import annotations

import locale
import re
from pathlib import Path

from harzoo.agent.kernel.tool import Tool, ToolResult


def resolve_tool_path(path: str) -> Path:
    p = Path(path).expanduser()
    if p.is_absolute():
        return p.resolve()
    return (Path.cwd() / p).resolve()


def decode_bytes_prefer_default(data: bytes) -> tuple[str, str]:
    default_encoding = (locale.getpreferredencoding(False) or "").strip().lower()
    if default_encoding in ("utf8", "cp65001"):
        default_encoding = "utf-8"
    encodings = ["utf-8"] if not default_encoding or default_encoding == "utf-8" else [default_encoding, "utf-8"]
    for encoding in encodings:
        try:
            return data.decode(encoding), encoding
        except (UnicodeDecodeError, LookupError):
            continue
    return data.decode("utf-8", errors="replace"), "utf-8 (replace)"


def read_file_text(path: Path) -> tuple[str, str]:
    return decode_bytes_prefer_default(path.read_bytes())


class GrepTool(Tool):
    name = "Grep"
    description = "Search for regex pattern in files."
    parameters = {
        "properties": {
            "pattern": {"type": "string", "description": "Regex pattern"},
            "path": {"type": "string", "description": "Root path to search", "default": "."},
            "glob": {"type": "string", "description": "File glob filter", "default": "*"},
            "output_mode": {"type": "string", "enum": ["content", "files_with_matches", "count"], "default": "content"},
            "head_limit": {"type": "integer", "description": "Max results", "default": 100},
            "-i": {"type": "boolean", "description": "Case-insensitive", "default": False},
        },
        "required": ["pattern"],
    }

    def execute(self, pattern: str, path: str = ".", glob: str = "*", output_mode: str = "content", head_limit: int = 100, **kwargs) -> ToolResult:
        try:
            base = resolve_tool_path(path)
            if not base.exists():
                return ToolResult.failure(f"Path not found: {path}", code="PATH_NOT_FOUND")
            flags = re.IGNORECASE if kwargs.get("-i") else 0
            try:
                cre = re.compile(pattern, flags)
            except re.error:
                return ToolResult.failure(f"Invalid regex: {pattern}", code="INVALID_REGEX")
            files = [base] if base.is_file() else [f for f in base.rglob(glob) if f.is_file()]
            results, file_counts, total_count = [], {}, 0
            for fp in files:
                try:
                    text, _enc = read_file_text(fp)
                except OSError:
                    continue
                if output_mode == "count":
                    n = sum(1 for _ in cre.finditer(text))
                    if n:
                        file_counts[str(fp)] = n
                        total_count += n
                elif output_mode == "files_with_matches":
                    if cre.search(text):
                        results.append(str(fp))
                        if len(results) >= head_limit:
                            break
                else:
                    for i, line in enumerate(text.splitlines(), 1):
                        if cre.search(line):
                            results.append(f"{fp}:{i}: {line.strip()}")
                            if len(results) >= head_limit:
                                break
                    if len(results) >= head_limit:
                        break
            if output_mode == "count":
                return ToolResult.success(
                    {"counts": dict(sorted(file_counts.items())) if file_counts else {}, "total": total_count, "output_mode": output_mode}
                )
            return ToolResult.success({"matches": results, "count": len(results), "output_mode": output_mode})
        except Exception as e:
            return ToolResult.failure(str(e), code="TOOL_EXCEPTION")


TOOL = GrepTool
