package processor import ( "fmt" "html" "os" "regexp" "strings" "time" "codeberg.org/snonux/snonux/internal/post" ) // urlPattern matches http/https URLs in plain text. // Trailing sentence punctuation is stripped separately by stripURLTrailing. var urlPattern = regexp.MustCompile(`https?://\S+`) type txtBuilder struct{} func (txtBuilder) Plan(srcPath string, ext string) (postPlan, error) { plan := postPlan{srcPath: srcPath, ext: ext} html, err := processTxt(srcPath) if err != nil { return postPlan{}, err } plan.textHTML = html return plan, nil } func (txtBuilder) Commit(plan postPlan, postDir string, id string, now time.Time) (*post.Post, []string, error) { return &post.Post{ ID: id, Timestamp: now, PostType: post.TypeText, Content: plan.textHTML, }, nil, nil } func init() { register(".txt", txtBuilder{}) } // processTxt reads a plain-text file and wraps each non-empty paragraph in

tags. // URLs are automatically converted to clickable links. // Non-URL text is HTML-escaped to prevent XSS. func processTxt(path string) (string, error) { data, err := os.ReadFile(path) if err != nil { return "", fmt.Errorf("read txt %s: %w", path, err) } raw := strings.TrimSpace(string(data)) if raw == "" { return "

", nil } // Split on blank lines to get logical paragraphs. paragraphs := strings.Split(raw, "\n\n") var sb strings.Builder for _, para := range paragraphs { trimmed := strings.TrimSpace(para) if trimmed == "" { continue } fmt.Fprintf(&sb, "

%s

\n", formatParagraph(trimmed)) } return sb.String(), nil } // formatParagraph formats a single paragraph: auto-links URLs, escapes non-URL // text, and converts single newlines to
line breaks. func formatParagraph(para string) string { lines := strings.Split(para, "\n") formatted := make([]string, 0, len(lines)) for _, line := range lines { if t := strings.TrimSpace(line); t != "" { formatted = append(formatted, autolinkLine(t)) } } return strings.Join(formatted, "
\n") } // autolinkLine escapes non-URL text and wraps detected URLs in
tags. // Opens in a new tab with rel="noopener noreferrer" for security. func autolinkLine(line string) string { locs := urlPattern.FindAllStringIndex(line, -1) if len(locs) == 0 { return html.EscapeString(line) } var sb strings.Builder prev := 0 for _, loc := range locs { sb.WriteString(html.EscapeString(line[prev:loc[0]])) rawURL := line[loc[0]:loc[1]] cleanURL := stripURLTrailing(rawURL) trailing := rawURL[len(cleanURL):] fmt.Fprintf(&sb, `%s`, html.EscapeString(cleanURL), html.EscapeString(cleanURL)) if trailing != "" { sb.WriteString(html.EscapeString(trailing)) } prev = loc[1] } sb.WriteString(html.EscapeString(line[prev:])) return sb.String() } // stripURLTrailing removes common sentence-ending punctuation from the end of a // URL match. These characters are valid in URLs but almost never appear there // at the end in prose (e.g. "Visit https://foo.com." — the "." ends the sentence). func stripURLTrailing(u string) string { const cutset = ".,;:!?\"')>]}" for len(u) > 0 && strings.ContainsRune(cutset, rune(u[len(u)-1])) { u = u[:len(u)-1] } return u }