3 files changed, 324 insertions, 0 deletions
diff --git a/README.md b/README.md
index 7ccc620..2206092 100644
--- a/README.md
+++ b/README.md
@@ -204,6 +204,40 @@ model switch hyperstack1/openai/gpt-oss-120b
 
 Pi sends subsequent requests to the new model ID immediately; the provider base URL stays the same.
 
+## Extensions
+
+Custom extensions live in `pi/agent/extensions/` and are loaded automatically via the `~/.pi` symlink.
+
+| Extension | Purpose |
+|-----------|---------|
+| `web-search` | `web_search` and `web_fetch` tools — DuckDuckGo search + page fetching, no API key |
+| `ask-mode` | `/ask` command — restricts the model to read-only exploration tools |
+| `loop-scheduler` | `/loop` command — re-sends a prompt on a recurring interval |
+| `inline-bash` | `!{cmd}` syntax — expands shell output inline before sending to the model |
+| `session-name` | Auto-names sessions from the first message |
+| `modal-editor` | Opens an external editor (`$VISUAL`) for composing long prompts |
+| `handoff` | Compacts and hands off context to a fresh session |
+| `fresh-subagent` | Spawns a sub-agent in a clean context for isolated tasks |
+| `reload-runtime` | `/reload-runtime` command — hot-reloads extensions without restarting Pi |
+| `nemotron-tool-repair` | Repairs malformed tool calls from Nemotron models |
+| `taskwarrior-plan-mode` | Integrates Taskwarrior task management into Pi sessions |
+
+### Web search
+
+The `web-search` extension registers two LLM-callable tools:
+
+- **`web_search`** — searches DuckDuckGo and returns up to 8 results (title, URL, snippet)
+- **`web_fetch`** — fetches a URL and returns up to 12,000 characters of readable text
+
+Example prompts:
+
+```
+Search for the vLLM 0.9.0 changelog
+Find the Qwen3-Coder model card and summarize the recommended vLLM flags
+```
+
+No API key or account required. Uses DuckDuckGo's free HTML endpoint.
+
 ## Single-VM setup
 
 A single VM can be deployed with the default config (GPT-OSS 120B):
diff --git a/pi/agent/extensions/web-search/README.md b/pi/agent/extensions/web-search/README.md
new file mode 100644
index 0000000..c7b77b3
--- /dev/null
+++ b/pi/agent/extensions/web-search/README.md
@@ -0,0 +1,42 @@
+# web-search
+
+Pi.dev extension that gives the LLM two tools for consulting the web during coding sessions.
+
+## Tools
+
+### `web_search`
+
+Searches DuckDuckGo (no API key, no account required) and returns up to 8 results with
+titles, URLs, and snippets. Use when the model needs current documentation, release notes,
+library APIs, or any information not in its training data.
+
+### `web_fetch`
+
+Fetches the full text of a URL. Strips `<script>`, `<style>`, `<nav>`, `<header>`, and
+`<footer>` blocks, collapses whitespace, and truncates to 12,000 characters. Use after
+`web_search` to read the complete content of a result page.
+
+## Usage
+
+The tools are registered automatically when the extension loads. Just ask the model to
+look something up:
+
+```
+Search for the vLLM changelog for version 0.9.0
+```
+
+```
+Find the Qwen3 model card on HuggingFace and summarize the recommended vLLM flags
+```
+
+## Backend
+
+Uses DuckDuckGo's free HTML endpoint (`https://html.duckduckgo.com/html/`). No API key,
+no rate-limit registration, and no personally identifying headers are sent. HTTP requests
+time out after 15 seconds.
+
+## Limitations
+
+- DuckDuckGo HTML scraping may break if DDG changes its page structure.
+- Pages that require JavaScript rendering return little or no content.
+- Results are in English (`kl=us-en`).
diff --git a/pi/agent/extensions/web-search/index.ts b/pi/agent/extensions/web-search/index.ts
new file mode 100644
index 0000000..8c7b532
--- /dev/null
+++ b/pi/agent/extensions/web-search/index.ts
@@ -0,0 +1,248 @@
+import * as https from "node:https";
+import * as http from "node:http";
+import { URL } from "node:url";
+import { Type } from "@sinclair/typebox";
+import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
+
+// Maximum number of search results to return per query.
+const MAX_RESULTS = 8;
+
+// Maximum characters to include from a fetched page.
+const MAX_PAGE_CHARS = 12000;
+
+// Timeout in milliseconds for HTTP requests.
+const REQUEST_TIMEOUT_MS = 15000;
+
+interface SearchResult {
+	title: string;
+	url: string;
+	snippet: string;
+}
+
+/**
+ * Fetch a URL and return the response body as a string.
+ * Follows a single redirect. Rejects on timeout or non-2xx status.
+ */
+function fetchUrl(url: string, extraHeaders: Record<string, string> = {}): Promise<string> {
+	return new Promise((resolve, reject) => {
+		const parsed = new URL(url);
+		const transport = parsed.protocol === "https:" ? https : http;
+
+		const options = {
+			hostname: parsed.hostname,
+			port: parsed.port || (parsed.protocol === "https:" ? 443 : 80),
+			path: parsed.pathname + parsed.search,
+			method: "GET",
+			headers: {
+				"User-Agent":
+					"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
+				Accept: "text/html,application/xhtml+xml,*/*;q=0.8",
+				"Accept-Language": "en-US,en;q=0.9",
+				...extraHeaders,
+			},
+		};
+
+		const req = transport.request(options, (res) => {
+			// Follow a single redirect.
+			if (
+				res.statusCode &&
+				res.statusCode >= 300 &&
+				res.statusCode < 400 &&
+				res.headers.location
+			) {
+				fetchUrl(res.headers.location, extraHeaders).then(resolve, reject);
+				res.resume();
+				return;
+			}
+
+			if (!res.statusCode || res.statusCode < 200 || res.statusCode >= 300) {
+				reject(new Error(`HTTP ${res.statusCode} for ${url}`));
+				res.resume();
+				return;
+			}
+
+			const chunks: Buffer[] = [];
+			res.on("data", (chunk: Buffer) => chunks.push(chunk));
+			res.on("end", () => resolve(Buffer.concat(chunks).toString("utf-8")));
+			res.on("error", reject);
+		});
+
+		req.setTimeout(REQUEST_TIMEOUT_MS, () => {
+			req.destroy();
+			reject(new Error(`Timeout fetching ${url}`));
+		});
+
+		req.on("error", reject);
+		req.end();
+	});
+}
+
+/**
+ * Search DuckDuckGo using the HTML interface (no API key required).
+ * Parses result titles, URLs, and snippets from the response HTML.
+ */
+async function searchDuckDuckGo(query: string): Promise<SearchResult[]> {
+	const params = new URLSearchParams({ q: query, kl: "us-en" });
+	const html = await fetchUrl(`https://html.duckduckgo.com/html/?${params}`, {
+		// DuckDuckGo HTML endpoint requires an Accept header to avoid redirects.
+		Accept: "text/html",
+	});
+
+	const results: SearchResult[] = [];
+
+	// Each result block looks like:
+	//   <div class="result__body">
+	//     <a class="result__a" href="...">Title</a>
+	//     <a class="result__snippet">Snippet text</a>
+	//   </div>
+	// The href on result__a is a DDG redirect; the real URL is in the href
+	// query param `uddg=`.
+	const resultBlockRe = /<div class="result__body"[\s\S]*?(?=<div class="result__body"|<\/div><!--end-results-->)/g;
+	const titleRe = /<a[^>]*class="result__a"[^>]*href="([^"]*)"[^>]*>([\s\S]*?)<\/a>/;
+	const snippetRe = /<a[^>]*class="result__snippet"[^>]*>([\s\S]*?)<\/a>/;
+
+	let block: RegExpExecArray | null;
+	while ((block = resultBlockRe.exec(html)) !== null && results.length < MAX_RESULTS) {
+		const blockHtml = block[0];
+
+		const titleMatch = titleRe.exec(blockHtml);
+		if (!titleMatch) continue;
+
+		const rawHref = titleMatch[1];
+		const rawTitle = titleMatch[2].replace(/<[^>]+>/g, "").trim();
+
+		// Resolve the real URL from the DDG redirect link.
+		let realUrl = rawHref;
+		try {
+			const hrefUrl = new URL(rawHref.startsWith("//") ? `https:${rawHref}` : rawHref);
+			const uddg = hrefUrl.searchParams.get("uddg");
+			if (uddg) realUrl = decodeURIComponent(uddg);
+		} catch {
+			// Keep rawHref if URL parsing fails.
+		}
+
+		const snippetMatch = snippetRe.exec(blockHtml);
+		const rawSnippet = snippetMatch
+			? snippetMatch[1].replace(/<[^>]+>/g, "").trim()
+			: "";
+
+		if (!rawTitle && !rawSnippet) continue;
+
+		results.push({
+			title: decodeHtmlEntities(rawTitle),
+			url: realUrl,
+			snippet: decodeHtmlEntities(rawSnippet),
+		});
+	}
+
+	return results;
+}
+
+/** Decode common HTML entities in search result text. */
+function decodeHtmlEntities(text: string): string {
+	return text
+		.replace(/&amp;/g, "&")
+		.replace(/&lt;/g, "<")
+		.replace(/&gt;/g, ">")
+		.replace(/&quot;/g, '"')
+		.replace(/&#39;/g, "'")
+		.replace(/&nbsp;/g, " ")
+		.replace(/&#x27;/g, "'")
+		.replace(/&#x2F;/g, "/");
+}
+
+/**
+ * Fetch a web page and extract its readable text content.
+ * Strips HTML tags, collapses whitespace, and truncates to MAX_PAGE_CHARS.
+ */
+async function fetchPage(url: string): Promise<string> {
+	const html = await fetchUrl(url);
+
+	// Remove script, style, and nav blocks before stripping tags.
+	const cleaned = html
+		.replace(/<script[\s\S]*?<\/script>/gi, " ")
+		.replace(/<style[\s\S]*?<\/style>/gi, " ")
+		.replace(/<nav[\s\S]*?<\/nav>/gi, " ")
+		.replace(/<header[\s\S]*?<\/header>/gi, " ")
+		.replace(/<footer[\s\S]*?<\/footer>/gi, " ")
+		.replace(/<[^>]+>/g, " ")
+		.replace(/\s{2,}/g, " ")
+		.trim();
+
+	if (cleaned.length <= MAX_PAGE_CHARS) return cleaned;
+	return cleaned.slice(0, MAX_PAGE_CHARS) + `\n\n[... truncated at ${MAX_PAGE_CHARS} chars]`;
+}
+
+/** Format search results as plain text for the LLM. */
+function formatResults(results: SearchResult[]): string {
+	if (results.length === 0) return "No results found.";
+	return results
+		.map(
+			(r, i) =>
+				`${i + 1}. ${r.title}\n   URL: ${r.url}\n   ${r.snippet}`,
+		)
+		.join("\n\n");
+}
+
+export default function webSearchExtension(pi: ExtensionAPI): void {
+	// Tool: search the web and return a list of results with titles and snippets.
+	pi.registerTool({
+		name: "web_search",
+		label: "Web Search",
+		description:
+			"Search the web using DuckDuckGo (no API key required). Returns up to 8 results with titles, URLs, and snippets. Use this when you need current information, documentation, or anything not in your training data.",
+		promptSnippet: "Search the web for current information",
+		parameters: Type.Object({
+			query: Type.String({
+				description: "The search query to look up on DuckDuckGo",
+			}),
+		}),
+		async execute(_toolCallId, params, _signal) {
+			try {
+				const results = await searchDuckDuckGo(params.query);
+				return {
+					content: [{ type: "text", text: formatResults(results) }],
+					details: { query: params.query, resultCount: results.length, results },
+				};
+			} catch (err) {
+				const msg = err instanceof Error ? err.message : String(err);
+				return {
+					content: [{ type: "text", text: `Search failed: ${msg}` }],
+					details: { query: params.query, error: msg },
+					isError: true,
+				};
+			}
+		},
+	});
+
+	// Tool: fetch a specific URL and return its text content.
+	// Useful after a web_search to read the full content of a result.
+	pi.registerTool({
+		name: "web_fetch",
+		label: "Web Fetch",
+		description:
+			"Fetch the text content of a specific URL. Use after web_search to read the full content of a result page. Returns up to 12,000 characters of readable text.",
+		promptSnippet: "Fetch and read a specific URL",
+		parameters: Type.Object({
+			url: Type.String({
+				description: "The full URL to fetch (must start with http:// or https://)",
+			}),
+		}),
+		async execute(_toolCallId, params, _signal) {
+			try {
+				const text = await fetchPage(params.url);
+				return {
+					content: [{ type: "text", text }],
+					details: { url: params.url, length: text.length },
+				};
+			} catch (err) {
+				const msg = err instanceof Error ? err.message : String(err);
+				return {
+					content: [{ type: "text", text: `Fetch failed: ${msg}` }],
+					details: { url: params.url, error: msg },
+					isError: true,
+				};
+			}
+		},
+	});
+}