summaryrefslogtreecommitdiff
path: root/internal/batch/processor.go
blob: 0b2017957743d414c5c6edd4101e9794747ed489 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
package batch

import (
	"fmt"
	"os"
	"strings"
)

// WordEntry represents a word with optional translation
type WordEntry struct {
	Bulgarian   string
	Translation string
	// NeedsTranslation indicates if translation from English to Bulgarian is needed
	NeedsTranslation bool
}

// ReadBatchFile reads words from a file and returns WordEntry slice
// Supports formats:
// - Bulgarian word only: "ябълка" (will be translated to English)
// - With translation: "ябълка = apple" (both provided, no translation needed)
// - English only: "= apple" (will be translated to Bulgarian)
func ReadBatchFile(filename string) ([]WordEntry, error) {
	content, err := os.ReadFile(filename)
	if err != nil {
		return nil, fmt.Errorf("failed to read batch file: %w", err)
	}

	var entries []WordEntry
	lines := string(content)

	for _, line := range splitLines(lines) {
		if line = trimSpace(line); line != "" {
			// Check if line contains '=' for translation format
			if strings.Contains(line, "=") {
				parts := strings.SplitN(line, "=", 2)
				if len(parts) == 2 {
					bulgarian := strings.TrimSpace(parts[0])
					english := strings.TrimSpace(parts[1])

					if bulgarian == "" && english != "" {
						// Format: "= ENGLISH" - need to translate English to Bulgarian
						entries = append(entries, WordEntry{
							Bulgarian:        "", // Will be filled by translation
							Translation:      english,
							NeedsTranslation: true,
						})
					} else if bulgarian != "" && english != "" {
						// Format: "BULGARIAN = ENGLISH" - both provided
						entries = append(entries, WordEntry{
							Bulgarian:        bulgarian,
							Translation:      english,
							NeedsTranslation: false,
						})
					}
					// Ignore lines with empty English part
				}
			} else {
				// Just a Bulgarian word - needs translation to English
				entries = append(entries, WordEntry{
					Bulgarian:        line,
					Translation:      "",
					NeedsTranslation: false,
				})
			}
		}
	}

	return entries, nil
}

// splitLines splits a string by newlines
func splitLines(s string) []string {
	var lines []string
	current := ""
	for _, r := range s {
		if r == '\n' {
			lines = append(lines, current)
			current = ""
		} else if r != '\r' {
			current += string(r)
		}
	}
	if current != "" {
		lines = append(lines, current)
	}
	return lines
}

// trimSpace trims whitespace from string
func trimSpace(s string) string {
	start := 0
	end := len(s)

	// Trim from start
	for start < end && isSpace(rune(s[start])) {
		start++
	}

	// Trim from end
	for end > start && isSpace(rune(s[end-1])) {
		end--
	}

	return s[start:end]
}

func isSpace(r rune) bool {
	return r == ' ' || r == '\t' || r == '\n' || r == '\r'
}