diff options
| author | Paul Buetow <paul@buetow.org> | 2025-07-08 23:48:19 +0300 |
|---|---|---|
| committer | Paul Buetow <paul@buetow.org> | 2025-07-08 23:48:19 +0300 |
| commit | 78c8d659e1a642e3938e763065da3a67c35f7183 (patch) | |
| tree | 2012511c8f4ff97445f487c958b2688a384c9f34 /internal/showcase | |
| parent | 60c1322272cf43ed5620433753b7db0b9bafcb55 (diff) | |
feat: improve language detection with shebang parsing
- Always check shebang lines for executable files without extensions
- Detect AWK scripts with .cgi extension (like awksite)
- Support detecting Python, Ruby, Perl, Raku, JavaScript, PHP, Lua via shebang
- Add comprehensive shebang parsing for awk/gawk/mawk variants
- awksite now correctly shows as 72.1% AWK
🤖 Generated with [Claude Code](https://claude.ai/code)
Co-Authored-By: Claude <noreply@anthropic.com>
Diffstat (limited to 'internal/showcase')
| -rw-r--r-- | internal/showcase/code_extractor.go | 37 | ||||
| -rw-r--r-- | internal/showcase/language_detector.go | 33 |
2 files changed, 61 insertions, 9 deletions
diff --git a/internal/showcase/code_extractor.go b/internal/showcase/code_extractor.go index 4a4a429..518e590 100644 --- a/internal/showcase/code_extractor.go +++ b/internal/showcase/code_extractor.go @@ -49,7 +49,7 @@ func extractCodeSnippet(repoPath string, languages []LanguageStats) (string, str "SQL": {".sql"}, "Make": {"Makefile", "makefile", "GNUmakefile"}, "HCL": {".tf", ".tfvars", ".hcl"}, - "AWK": {".awk"}, + "AWK": {".awk", ".cgi"}, // .cgi files can be AWK scripts } // Get file extensions for the primary language @@ -100,19 +100,38 @@ func extractCodeSnippet(repoPath string, languages []LanguageStats) (string, str basename := filepath.Base(path) ext := filepath.Ext(path) + matched := false for _, validExt := range extensions { if validExt == basename || (strings.HasPrefix(validExt, ".") && ext == validExt) { - // Skip test files and generated files - if !strings.Contains(basename, "_test") && - !strings.Contains(basename, ".test.") && - !strings.Contains(basename, ".min.") && - !strings.Contains(path, "/test/") && - !strings.Contains(path, "/tests/") { - codeFiles = append(codeFiles, path) - } + matched = true break } } + + // For executable files, also check shebang if primary language is AWK and file has .cgi extension + if !matched && primaryLang == "AWK" && ext == ".cgi" && info.Mode()&0111 != 0 { + if file, err := os.Open(path); err == nil { + scanner := bufio.NewScanner(file) + if scanner.Scan() { + firstLine := scanner.Text() + if strings.Contains(firstLine, "awk") || strings.Contains(firstLine, "gawk") { + matched = true + } + } + file.Close() + } + } + + if matched { + // Skip test files and generated files + if !strings.Contains(basename, "_test") && + !strings.Contains(basename, ".test.") && + !strings.Contains(basename, ".min.") && + !strings.Contains(path, "/test/") && + !strings.Contains(path, "/tests/") { + codeFiles = append(codeFiles, path) + } + } return nil }) diff --git a/internal/showcase/language_detector.go b/internal/showcase/language_detector.go index b82430d..0f356b8 100644 --- a/internal/showcase/language_detector.go +++ b/internal/showcase/language_detector.go @@ -172,6 +172,39 @@ func detectLanguages(repoPath string) (languages []LanguageStats, documentation } } + // Check shebang for executable files when no language was detected + if language == "" && info.Mode()&0111 != 0 { + if file, err := os.Open(path); err == nil { + scanner := bufio.NewScanner(file) + if scanner.Scan() { + firstLine := scanner.Text() + if strings.HasPrefix(firstLine, "#!") { + // Check for various interpreters in shebang + if strings.Contains(firstLine, "python") { + language = "Python" + } else if strings.Contains(firstLine, "node") { + language = "JavaScript" + } else if strings.Contains(firstLine, "ruby") { + language = "Ruby" + } else if strings.Contains(firstLine, "perl") && !strings.Contains(firstLine, "perl6") { + language = "Perl" + } else if strings.Contains(firstLine, "perl6") || strings.Contains(firstLine, "raku") { + language = "Raku" + } else if strings.Contains(firstLine, "awk") || strings.Contains(firstLine, "gawk") || strings.Contains(firstLine, "mawk") { + language = "AWK" + } else if strings.Contains(firstLine, "sh") || strings.Contains(firstLine, "bash") || strings.Contains(firstLine, "zsh") || strings.Contains(firstLine, "fish") { + language = "Shell" + } else if strings.Contains(firstLine, "php") { + language = "PHP" + } else if strings.Contains(firstLine, "lua") { + language = "Lua" + } + } + } + file.Close() + } + } + // If we identified a language, count its lines if language != "" { lines, err := countFileLines(path) |
