summaryrefslogtreecommitdiff
path: root/internal/showcase/code_extractor.go
diff options
context:
space:
mode:
authorPaul Buetow <paul@buetow.org>2025-07-08 23:48:19 +0300
committerPaul Buetow <paul@buetow.org>2025-07-08 23:48:19 +0300
commit78c8d659e1a642e3938e763065da3a67c35f7183 (patch)
tree2012511c8f4ff97445f487c958b2688a384c9f34 /internal/showcase/code_extractor.go
parent60c1322272cf43ed5620433753b7db0b9bafcb55 (diff)
feat: improve language detection with shebang parsing
- Always check shebang lines for executable files without extensions - Detect AWK scripts with .cgi extension (like awksite) - Support detecting Python, Ruby, Perl, Raku, JavaScript, PHP, Lua via shebang - Add comprehensive shebang parsing for awk/gawk/mawk variants - awksite now correctly shows as 72.1% AWK 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
Diffstat (limited to 'internal/showcase/code_extractor.go')
-rw-r--r--internal/showcase/code_extractor.go37
1 files changed, 28 insertions, 9 deletions
diff --git a/internal/showcase/code_extractor.go b/internal/showcase/code_extractor.go
index 4a4a429..518e590 100644
--- a/internal/showcase/code_extractor.go
+++ b/internal/showcase/code_extractor.go
@@ -49,7 +49,7 @@ func extractCodeSnippet(repoPath string, languages []LanguageStats) (string, str
"SQL": {".sql"},
"Make": {"Makefile", "makefile", "GNUmakefile"},
"HCL": {".tf", ".tfvars", ".hcl"},
- "AWK": {".awk"},
+ "AWK": {".awk", ".cgi"}, // .cgi files can be AWK scripts
}
// Get file extensions for the primary language
@@ -100,19 +100,38 @@ func extractCodeSnippet(repoPath string, languages []LanguageStats) (string, str
basename := filepath.Base(path)
ext := filepath.Ext(path)
+ matched := false
for _, validExt := range extensions {
if validExt == basename || (strings.HasPrefix(validExt, ".") && ext == validExt) {
- // Skip test files and generated files
- if !strings.Contains(basename, "_test") &&
- !strings.Contains(basename, ".test.") &&
- !strings.Contains(basename, ".min.") &&
- !strings.Contains(path, "/test/") &&
- !strings.Contains(path, "/tests/") {
- codeFiles = append(codeFiles, path)
- }
+ matched = true
break
}
}
+
+ // For executable files, also check shebang if primary language is AWK and file has .cgi extension
+ if !matched && primaryLang == "AWK" && ext == ".cgi" && info.Mode()&0111 != 0 {
+ if file, err := os.Open(path); err == nil {
+ scanner := bufio.NewScanner(file)
+ if scanner.Scan() {
+ firstLine := scanner.Text()
+ if strings.Contains(firstLine, "awk") || strings.Contains(firstLine, "gawk") {
+ matched = true
+ }
+ }
+ file.Close()
+ }
+ }
+
+ if matched {
+ // Skip test files and generated files
+ if !strings.Contains(basename, "_test") &&
+ !strings.Contains(basename, ".test.") &&
+ !strings.Contains(basename, ".min.") &&
+ !strings.Contains(path, "/test/") &&
+ !strings.Contains(path, "/tests/") {
+ codeFiles = append(codeFiles, path)
+ }
+ }
return nil
})