summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.md34
-rw-r--r--pi/agent/extensions/web-search/README.md42
-rw-r--r--pi/agent/extensions/web-search/index.ts248
3 files changed, 324 insertions, 0 deletions
diff --git a/README.md b/README.md
index 7ccc620..2206092 100644
--- a/README.md
+++ b/README.md
@@ -204,6 +204,40 @@ model switch hyperstack1/openai/gpt-oss-120b
Pi sends subsequent requests to the new model ID immediately; the provider base URL stays the same.
+## Extensions
+
+Custom extensions live in `pi/agent/extensions/` and are loaded automatically via the `~/.pi` symlink.
+
+| Extension | Purpose |
+|-----------|---------|
+| `web-search` | `web_search` and `web_fetch` tools — DuckDuckGo search + page fetching, no API key |
+| `ask-mode` | `/ask` command — restricts the model to read-only exploration tools |
+| `loop-scheduler` | `/loop` command — re-sends a prompt on a recurring interval |
+| `inline-bash` | `!{cmd}` syntax — expands shell output inline before sending to the model |
+| `session-name` | Auto-names sessions from the first message |
+| `modal-editor` | Opens an external editor (`$VISUAL`) for composing long prompts |
+| `handoff` | Compacts and hands off context to a fresh session |
+| `fresh-subagent` | Spawns a sub-agent in a clean context for isolated tasks |
+| `reload-runtime` | `/reload-runtime` command — hot-reloads extensions without restarting Pi |
+| `nemotron-tool-repair` | Repairs malformed tool calls from Nemotron models |
+| `taskwarrior-plan-mode` | Integrates Taskwarrior task management into Pi sessions |
+
+### Web search
+
+The `web-search` extension registers two LLM-callable tools:
+
+- **`web_search`** — searches DuckDuckGo and returns up to 8 results (title, URL, snippet)
+- **`web_fetch`** — fetches a URL and returns up to 12,000 characters of readable text
+
+Example prompts:
+
+```
+Search for the vLLM 0.9.0 changelog
+Find the Qwen3-Coder model card and summarize the recommended vLLM flags
+```
+
+No API key or account required. Uses DuckDuckGo's free HTML endpoint.
+
## Single-VM setup
A single VM can be deployed with the default config (GPT-OSS 120B):
diff --git a/pi/agent/extensions/web-search/README.md b/pi/agent/extensions/web-search/README.md
new file mode 100644
index 0000000..c7b77b3
--- /dev/null
+++ b/pi/agent/extensions/web-search/README.md
@@ -0,0 +1,42 @@
+# web-search
+
+Pi.dev extension that gives the LLM two tools for consulting the web during coding sessions.
+
+## Tools
+
+### `web_search`
+
+Searches DuckDuckGo (no API key, no account required) and returns up to 8 results with
+titles, URLs, and snippets. Use when the model needs current documentation, release notes,
+library APIs, or any information not in its training data.
+
+### `web_fetch`
+
+Fetches the full text of a URL. Strips `<script>`, `<style>`, `<nav>`, `<header>`, and
+`<footer>` blocks, collapses whitespace, and truncates to 12,000 characters. Use after
+`web_search` to read the complete content of a result page.
+
+## Usage
+
+The tools are registered automatically when the extension loads. Just ask the model to
+look something up:
+
+```
+Search for the vLLM changelog for version 0.9.0
+```
+
+```
+Find the Qwen3 model card on HuggingFace and summarize the recommended vLLM flags
+```
+
+## Backend
+
+Uses DuckDuckGo's free HTML endpoint (`https://html.duckduckgo.com/html/`). No API key,
+no rate-limit registration, and no personally identifying headers are sent. HTTP requests
+time out after 15 seconds.
+
+## Limitations
+
+- DuckDuckGo HTML scraping may break if DDG changes its page structure.
+- Pages that require JavaScript rendering return little or no content.
+- Results are in English (`kl=us-en`).
diff --git a/pi/agent/extensions/web-search/index.ts b/pi/agent/extensions/web-search/index.ts
new file mode 100644
index 0000000..8c7b532
--- /dev/null
+++ b/pi/agent/extensions/web-search/index.ts
@@ -0,0 +1,248 @@
+import * as https from "node:https";
+import * as http from "node:http";
+import { URL } from "node:url";
+import { Type } from "@sinclair/typebox";
+import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
+
+// Maximum number of search results to return per query.
+const MAX_RESULTS = 8;
+
+// Maximum characters to include from a fetched page.
+const MAX_PAGE_CHARS = 12000;
+
+// Timeout in milliseconds for HTTP requests.
+const REQUEST_TIMEOUT_MS = 15000;
+
+interface SearchResult {
+ title: string;
+ url: string;
+ snippet: string;
+}
+
+/**
+ * Fetch a URL and return the response body as a string.
+ * Follows a single redirect. Rejects on timeout or non-2xx status.
+ */
+function fetchUrl(url: string, extraHeaders: Record<string, string> = {}): Promise<string> {
+ return new Promise((resolve, reject) => {
+ const parsed = new URL(url);
+ const transport = parsed.protocol === "https:" ? https : http;
+
+ const options = {
+ hostname: parsed.hostname,
+ port: parsed.port || (parsed.protocol === "https:" ? 443 : 80),
+ path: parsed.pathname + parsed.search,
+ method: "GET",
+ headers: {
+ "User-Agent":
+ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
+ Accept: "text/html,application/xhtml+xml,*/*;q=0.8",
+ "Accept-Language": "en-US,en;q=0.9",
+ ...extraHeaders,
+ },
+ };
+
+ const req = transport.request(options, (res) => {
+ // Follow a single redirect.
+ if (
+ res.statusCode &&
+ res.statusCode >= 300 &&
+ res.statusCode < 400 &&
+ res.headers.location
+ ) {
+ fetchUrl(res.headers.location, extraHeaders).then(resolve, reject);
+ res.resume();
+ return;
+ }
+
+ if (!res.statusCode || res.statusCode < 200 || res.statusCode >= 300) {
+ reject(new Error(`HTTP ${res.statusCode} for ${url}`));
+ res.resume();
+ return;
+ }
+
+ const chunks: Buffer[] = [];
+ res.on("data", (chunk: Buffer) => chunks.push(chunk));
+ res.on("end", () => resolve(Buffer.concat(chunks).toString("utf-8")));
+ res.on("error", reject);
+ });
+
+ req.setTimeout(REQUEST_TIMEOUT_MS, () => {
+ req.destroy();
+ reject(new Error(`Timeout fetching ${url}`));
+ });
+
+ req.on("error", reject);
+ req.end();
+ });
+}
+
+/**
+ * Search DuckDuckGo using the HTML interface (no API key required).
+ * Parses result titles, URLs, and snippets from the response HTML.
+ */
+async function searchDuckDuckGo(query: string): Promise<SearchResult[]> {
+ const params = new URLSearchParams({ q: query, kl: "us-en" });
+ const html = await fetchUrl(`https://html.duckduckgo.com/html/?${params}`, {
+ // DuckDuckGo HTML endpoint requires an Accept header to avoid redirects.
+ Accept: "text/html",
+ });
+
+ const results: SearchResult[] = [];
+
+ // Each result block looks like:
+ // <div class="result__body">
+ // <a class="result__a" href="...">Title</a>
+ // <a class="result__snippet">Snippet text</a>
+ // </div>
+ // The href on result__a is a DDG redirect; the real URL is in the href
+ // query param `uddg=`.
+ const resultBlockRe = /<div class="result__body"[\s\S]*?(?=<div class="result__body"|<\/div><!--end-results-->)/g;
+ const titleRe = /<a[^>]*class="result__a"[^>]*href="([^"]*)"[^>]*>([\s\S]*?)<\/a>/;
+ const snippetRe = /<a[^>]*class="result__snippet"[^>]*>([\s\S]*?)<\/a>/;
+
+ let block: RegExpExecArray | null;
+ while ((block = resultBlockRe.exec(html)) !== null && results.length < MAX_RESULTS) {
+ const blockHtml = block[0];
+
+ const titleMatch = titleRe.exec(blockHtml);
+ if (!titleMatch) continue;
+
+ const rawHref = titleMatch[1];
+ const rawTitle = titleMatch[2].replace(/<[^>]+>/g, "").trim();
+
+ // Resolve the real URL from the DDG redirect link.
+ let realUrl = rawHref;
+ try {
+ const hrefUrl = new URL(rawHref.startsWith("//") ? `https:${rawHref}` : rawHref);
+ const uddg = hrefUrl.searchParams.get("uddg");
+ if (uddg) realUrl = decodeURIComponent(uddg);
+ } catch {
+ // Keep rawHref if URL parsing fails.
+ }
+
+ const snippetMatch = snippetRe.exec(blockHtml);
+ const rawSnippet = snippetMatch
+ ? snippetMatch[1].replace(/<[^>]+>/g, "").trim()
+ : "";
+
+ if (!rawTitle && !rawSnippet) continue;
+
+ results.push({
+ title: decodeHtmlEntities(rawTitle),
+ url: realUrl,
+ snippet: decodeHtmlEntities(rawSnippet),
+ });
+ }
+
+ return results;
+}
+
+/** Decode common HTML entities in search result text. */
+function decodeHtmlEntities(text: string): string {
+ return text
+ .replace(/&amp;/g, "&")
+ .replace(/&lt;/g, "<")
+ .replace(/&gt;/g, ">")
+ .replace(/&quot;/g, '"')
+ .replace(/&#39;/g, "'")
+ .replace(/&nbsp;/g, " ")
+ .replace(/&#x27;/g, "'")
+ .replace(/&#x2F;/g, "/");
+}
+
+/**
+ * Fetch a web page and extract its readable text content.
+ * Strips HTML tags, collapses whitespace, and truncates to MAX_PAGE_CHARS.
+ */
+async function fetchPage(url: string): Promise<string> {
+ const html = await fetchUrl(url);
+
+ // Remove script, style, and nav blocks before stripping tags.
+ const cleaned = html
+ .replace(/<script[\s\S]*?<\/script>/gi, " ")
+ .replace(/<style[\s\S]*?<\/style>/gi, " ")
+ .replace(/<nav[\s\S]*?<\/nav>/gi, " ")
+ .replace(/<header[\s\S]*?<\/header>/gi, " ")
+ .replace(/<footer[\s\S]*?<\/footer>/gi, " ")
+ .replace(/<[^>]+>/g, " ")
+ .replace(/\s{2,}/g, " ")
+ .trim();
+
+ if (cleaned.length <= MAX_PAGE_CHARS) return cleaned;
+ return cleaned.slice(0, MAX_PAGE_CHARS) + `\n\n[... truncated at ${MAX_PAGE_CHARS} chars]`;
+}
+
+/** Format search results as plain text for the LLM. */
+function formatResults(results: SearchResult[]): string {
+ if (results.length === 0) return "No results found.";
+ return results
+ .map(
+ (r, i) =>
+ `${i + 1}. ${r.title}\n URL: ${r.url}\n ${r.snippet}`,
+ )
+ .join("\n\n");
+}
+
+export default function webSearchExtension(pi: ExtensionAPI): void {
+ // Tool: search the web and return a list of results with titles and snippets.
+ pi.registerTool({
+ name: "web_search",
+ label: "Web Search",
+ description:
+ "Search the web using DuckDuckGo (no API key required). Returns up to 8 results with titles, URLs, and snippets. Use this when you need current information, documentation, or anything not in your training data.",
+ promptSnippet: "Search the web for current information",
+ parameters: Type.Object({
+ query: Type.String({
+ description: "The search query to look up on DuckDuckGo",
+ }),
+ }),
+ async execute(_toolCallId, params, _signal) {
+ try {
+ const results = await searchDuckDuckGo(params.query);
+ return {
+ content: [{ type: "text", text: formatResults(results) }],
+ details: { query: params.query, resultCount: results.length, results },
+ };
+ } catch (err) {
+ const msg = err instanceof Error ? err.message : String(err);
+ return {
+ content: [{ type: "text", text: `Search failed: ${msg}` }],
+ details: { query: params.query, error: msg },
+ isError: true,
+ };
+ }
+ },
+ });
+
+ // Tool: fetch a specific URL and return its text content.
+ // Useful after a web_search to read the full content of a result.
+ pi.registerTool({
+ name: "web_fetch",
+ label: "Web Fetch",
+ description:
+ "Fetch the text content of a specific URL. Use after web_search to read the full content of a result page. Returns up to 12,000 characters of readable text.",
+ promptSnippet: "Fetch and read a specific URL",
+ parameters: Type.Object({
+ url: Type.String({
+ description: "The full URL to fetch (must start with http:// or https://)",
+ }),
+ }),
+ async execute(_toolCallId, params, _signal) {
+ try {
+ const text = await fetchPage(params.url);
+ return {
+ content: [{ type: "text", text }],
+ details: { url: params.url, length: text.length },
+ };
+ } catch (err) {
+ const msg = err instanceof Error ? err.message : String(err);
+ return {
+ content: [{ type: "text", text: `Fetch failed: ${msg}` }],
+ details: { url: params.url, error: msg },
+ isError: true,
+ };
+ }
+ },
+ });
+}