1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
|
package processor
import (
"context"
"fmt"
"math/rand"
"os"
"path/filepath"
"strings"
"time"
"github.com/spf13/viper"
"codeberg.org/snonux/totalrecall/internal"
"codeberg.org/snonux/totalrecall/internal/anki"
"codeberg.org/snonux/totalrecall/internal/audio"
"codeberg.org/snonux/totalrecall/internal/batch"
"codeberg.org/snonux/totalrecall/internal/cli"
"codeberg.org/snonux/totalrecall/internal/gui"
"codeberg.org/snonux/totalrecall/internal/image"
"codeberg.org/snonux/totalrecall/internal/phonetic"
"codeberg.org/snonux/totalrecall/internal/translation"
)
// Processor handles the main word processing logic
type Processor struct {
flags *cli.Flags
translator *translation.Translator
translationCache *translation.TranslationCache
phoneticFetcher *phonetic.Fetcher
}
// NewProcessor creates a new word processor
func NewProcessor(flags *cli.Flags) *Processor {
apiKey := cli.GetOpenAIKey()
return &Processor{
flags: flags,
translator: translation.NewTranslator(apiKey),
translationCache: translation.NewTranslationCache(),
phoneticFetcher: phonetic.NewFetcher(apiKey),
}
}
// ProcessBatch processes multiple words from a batch file
func (p *Processor) ProcessBatch() error {
entries, err := batch.ReadBatchFile(p.flags.BatchFile)
if err != nil {
return err
}
// Validate words
for _, entry := range entries {
if err := audio.ValidateBulgarianText(entry.Bulgarian); err != nil {
return fmt.Errorf("invalid word '%s': %w", entry.Bulgarian, err)
}
}
// Create output directory
if err := os.MkdirAll(p.flags.OutputDir, 0755); err != nil {
return fmt.Errorf("failed to create output directory: %w", err)
}
// Process each entry
for i, entry := range entries {
fmt.Printf("\nProcessing %d/%d: %s\n", i+1, len(entries), entry.Bulgarian)
if err := p.ProcessWordWithTranslation(entry.Bulgarian, entry.Translation); err != nil {
fmt.Fprintf(os.Stderr, "Error processing '%s': %v\n", entry.Bulgarian, err)
// Continue with next word
}
}
return nil
}
// ProcessSingleWord processes a single word from command line
func (p *Processor) ProcessSingleWord(word string) error {
// Validate word
if err := audio.ValidateBulgarianText(word); err != nil {
return fmt.Errorf("invalid word '%s': %w", word, err)
}
// Create output directory
if err := os.MkdirAll(p.flags.OutputDir, 0755); err != nil {
return fmt.Errorf("failed to create output directory: %w", err)
}
fmt.Printf("\nProcessing: %s\n", word)
return p.ProcessWordWithTranslation(word, "")
}
// ProcessWordWithTranslation processes a word with optional provided translation
func (p *Processor) ProcessWordWithTranslation(word, providedTranslation string) error {
var translationText string
// Use provided translation if available, otherwise translate
if providedTranslation != "" {
translationText = providedTranslation
fmt.Printf(" Using provided translation: %s\n", translationText)
} else {
// Translate the word first
fmt.Printf(" Translating to English...\n")
var err error
translationText, err = p.translator.TranslateWord(word)
if err != nil {
fmt.Printf(" Warning: Translation failed: %v\n", err)
translationText = "" // Continue without translation
} else {
fmt.Printf(" Translation: %s\n", translationText)
}
}
// Store translation for Anki export
if translationText != "" {
p.translationCache.Add(word, translationText)
// Find or create word directory
wordDir := p.findOrCreateWordDirectory(word)
// Save translation to file
if err := translation.SaveTranslation(wordDir, word, translationText); err != nil {
fmt.Printf(" Warning: Failed to save translation: %v\n", err)
}
}
// Generate audio
if !p.flags.SkipAudio {
fmt.Printf(" Generating audio...\n")
if err := p.generateAudio(word); err != nil {
return fmt.Errorf("audio generation failed: %w", err)
}
}
// Download images - pass the translation for better image generation
if !p.flags.SkipImages {
fmt.Printf(" Downloading images...\n")
if err := p.downloadImagesWithTranslation(word, translationText); err != nil {
return fmt.Errorf("image download failed: %w", err)
}
}
// Fetch phonetic information
fmt.Printf(" Fetching phonetic information...\n")
wordDir := p.findCardDirectory(word)
if wordDir != "" {
if err := p.phoneticFetcher.FetchAndSave(word, wordDir); err != nil {
// Don't fail the whole process if phonetic info fails
fmt.Printf(" Warning: Failed to fetch phonetic info: %v\n", err)
} else {
fmt.Printf(" Saved phonetic information\n")
}
}
return nil
}
// generateAudio generates audio files for a word
func (p *Processor) generateAudio(word string) error {
allVoicesList := []string{"alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer", "verse"}
// Get list of voices to use
var voices []string
if p.flags.AllVoices {
voices = allVoicesList
} else if p.flags.OpenAIVoice != "" {
// Use explicitly specified voice
voices = []string{p.flags.OpenAIVoice}
fmt.Printf(" Using specified voice: %s\n", p.flags.OpenAIVoice)
} else {
// Select a random voice
randomVoice := allVoicesList[rand.Intn(len(allVoicesList))]
voices = []string{randomVoice}
fmt.Printf(" Using random voice: %s\n", randomVoice)
}
// Generate audio for each voice
for i, voice := range voices {
if p.flags.AllVoices {
fmt.Printf(" Generating audio %d/%d (voice: %s)...\n", i+1, len(voices), voice)
}
if err := p.generateAudioWithVoice(word, voice); err != nil {
return fmt.Errorf("failed to generate audio with voice %s: %w", voice, err)
}
}
return nil
}
// generateAudioWithVoice generates audio for a word with a specific voice
func (p *Processor) generateAudioWithVoice(word, voice string) error {
// Generate random speed between 0.90 and 1.00 if not explicitly set
speed := p.flags.OpenAISpeed
if p.flags.OpenAISpeed == 0.9 && !viper.IsSet("audio.openai_speed") {
// Default was used, generate random speed
speed = 0.90 + rand.Float64()*0.10
}
// Create audio provider configuration
providerConfig := &audio.Config{
Provider: "openai",
OutputDir: p.flags.OutputDir,
OutputFormat: p.flags.AudioFormat,
// OpenAI settings
OpenAIKey: cli.GetOpenAIKey(),
OpenAIModel: p.flags.OpenAIModel,
OpenAIVoice: voice,
OpenAISpeed: speed,
OpenAIInstruction: p.flags.OpenAIInstruction,
// Caching
EnableCache: viper.GetBool("audio.enable_cache"),
CacheDir: viper.GetString("audio.cache_dir"),
}
// Set defaults
if providerConfig.CacheDir == "" {
providerConfig.CacheDir = "./.audio_cache"
}
// Use config file values if not overridden by flags
if p.flags.OpenAIModel == "gpt-4o-mini-tts" && viper.IsSet("audio.openai_model") {
providerConfig.OpenAIModel = viper.GetString("audio.openai_model")
}
if p.flags.OpenAISpeed == 0.9 && viper.IsSet("audio.openai_speed") {
providerConfig.OpenAISpeed = viper.GetFloat64("audio.openai_speed")
}
if p.flags.OpenAIInstruction == "" && viper.IsSet("audio.openai_instruction") {
providerConfig.OpenAIInstruction = viper.GetString("audio.openai_instruction")
}
// Create the audio provider
provider, err := audio.NewProvider(providerConfig)
if err != nil {
return err
}
// Generate audio file
ctx := context.Background()
// Find existing card directory or create new one
wordDir := p.findOrCreateWordDirectory(word)
// Add voice name to filename if generating multiple voices
var outputFile string
if p.flags.AllVoices {
outputFile = filepath.Join(wordDir, fmt.Sprintf("audio_%s.%s", voice, p.flags.AudioFormat))
} else {
outputFile = filepath.Join(wordDir, fmt.Sprintf("audio.%s", p.flags.AudioFormat))
}
// Generate the audio
err = provider.GenerateAudio(ctx, word, outputFile)
if err != nil {
return err
}
// Save audio attribution
if err := p.saveAudioAttribution(word, outputFile, providerConfig); err != nil {
fmt.Printf(" Warning: Failed to save audio attribution: %v\n", err)
}
return nil
}
// downloadImagesWithTranslation downloads images for a word
func (p *Processor) downloadImagesWithTranslation(word, translationText string) error {
// Create image searcher based on provider
var searcher image.ImageSearcher
switch p.flags.ImageAPI {
case "openai":
// Create OpenAI image configuration
openaiConfig := &image.OpenAIConfig{
APIKey: cli.GetOpenAIKey(),
Model: p.flags.OpenAIImageModel,
Size: p.flags.OpenAIImageSize,
Quality: p.flags.OpenAIImageQuality,
Style: p.flags.OpenAIImageStyle,
}
// Use config file values if not overridden by flags
if p.flags.OpenAIImageModel == "dall-e-3" && viper.IsSet("image.openai_model") {
openaiConfig.Model = viper.GetString("image.openai_model")
}
if p.flags.OpenAIImageSize == "1024x1024" && viper.IsSet("image.openai_size") {
openaiConfig.Size = viper.GetString("image.openai_size")
}
if p.flags.OpenAIImageQuality == "standard" && viper.IsSet("image.openai_quality") {
openaiConfig.Quality = viper.GetString("image.openai_quality")
}
if p.flags.OpenAIImageStyle == "natural" && viper.IsSet("image.openai_style") {
openaiConfig.Style = viper.GetString("image.openai_style")
}
searcher = image.NewOpenAIClient(openaiConfig)
if openaiConfig.APIKey == "" {
return fmt.Errorf("OpenAI API key is required for image generation")
}
default:
return fmt.Errorf("unknown image provider: %s", p.flags.ImageAPI)
}
// Find existing card directory or create new one
wordDir := p.findOrCreateWordDirectory(word)
// Create downloader
downloadOpts := &image.DownloadOptions{
OutputDir: wordDir,
OverwriteExisting: true,
CreateDir: true,
FileNamePattern: "image",
MaxSizeBytes: 5 * 1024 * 1024, // 5MB
}
downloader := image.NewDownloader(searcher, downloadOpts)
// Create search options with translation if provided
searchOpts := image.DefaultSearchOptions(word)
if translationText != "" {
searchOpts.Translation = translationText
}
// Download single image
ctx := context.Background()
_, path, err := downloader.DownloadBestMatchWithOptions(ctx, searchOpts)
if err != nil {
return err
}
fmt.Printf(" Downloaded: %s\n", path)
// If using OpenAI, save the prompt
if p.flags.ImageAPI == "openai" {
if openaiClient, ok := searcher.(*image.OpenAIClient); ok {
usedPrompt := openaiClient.GetLastPrompt()
if usedPrompt != "" {
promptFile := filepath.Join(wordDir, "image_prompt.txt")
if err := os.WriteFile(promptFile, []byte(usedPrompt), 0644); err != nil {
fmt.Printf(" Warning: Failed to save image prompt: %v\n", err)
}
}
}
}
return nil
}
// GenerateAnkiFile generates the Anki import file
func (p *Processor) GenerateAnkiFile() error {
// Create Anki generator
gen := anki.NewGenerator(&anki.GeneratorOptions{
OutputPath: filepath.Join(p.flags.OutputDir, "anki_import.csv"),
MediaFolder: p.flags.OutputDir,
IncludeHeaders: true,
AudioFormat: p.flags.AudioFormat,
})
// Generate cards from output directory
if err := gen.GenerateFromDirectory(p.flags.OutputDir); err != nil {
return fmt.Errorf("failed to generate cards: %w", err)
}
// Add translations to cards
translations := p.translationCache.GetAll()
for i := range gen.GetCards() {
if translation, ok := translations[gen.GetCards()[i].Bulgarian]; ok {
gen.GetCards()[i].Translation = translation
}
}
if p.flags.AnkiCSV {
// Generate CSV
if err := gen.GenerateCSV(); err != nil {
return fmt.Errorf("failed to generate CSV: %w", err)
}
} else {
// Generate APKG
outputPath := filepath.Join(p.flags.OutputDir, fmt.Sprintf("%s.apkg", internal.SanitizeFilename(p.flags.DeckName)))
if err := gen.GenerateAPKG(outputPath, p.flags.DeckName); err != nil {
return fmt.Errorf("failed to generate APKG: %w", err)
}
}
// Print stats
total, withAudio, withImages := gen.Stats()
fmt.Printf(" Generated %d cards (%d with audio, %d with images)\n",
total, withAudio, withImages)
return nil
}
// RunGUIMode launches the GUI application
func (p *Processor) RunGUIMode() error {
// Create GUI configuration from command line flags and viper config
guiConfig := &gui.Config{
AudioFormat: p.flags.AudioFormat,
ImageProvider: p.flags.ImageAPI,
OpenAIKey: cli.GetOpenAIKey(),
}
// Only set OutputDir if it was explicitly provided via flag
// Check if the outputDir is different from the default
home, _ := os.UserHomeDir()
defaultOutputDir := filepath.Join(home, "Downloads")
if p.flags.OutputDir != defaultOutputDir {
// User explicitly set a different output directory
guiConfig.OutputDir = p.flags.OutputDir
}
// Otherwise, gui.New will use its own default (XDG state directory)
// Create and run GUI application
app := gui.New(guiConfig)
app.Run()
return nil
}
// Helper methods
func (p *Processor) findOrCreateWordDirectory(word string) string {
// Try to find existing directory first
if dir := p.findCardDirectory(word); dir != "" {
return dir
}
// No existing directory, create new one with card ID
cardID := internal.GenerateCardID(word)
wordDir := filepath.Join(p.flags.OutputDir, cardID)
if err := os.MkdirAll(wordDir, 0755); err != nil {
fmt.Printf("Warning: failed to create word directory: %v\n", err)
return p.flags.OutputDir // Fallback to output directory
}
// Save word metadata
metadataFile := filepath.Join(wordDir, "word.txt")
if err := os.WriteFile(metadataFile, []byte(word), 0644); err != nil {
fmt.Printf("Warning: failed to save word metadata: %v\n", err)
}
return wordDir
}
func (p *Processor) findCardDirectory(word string) string {
entries, err := os.ReadDir(p.flags.OutputDir)
if err != nil {
return ""
}
// Look through all directories to find one with matching word.txt
for _, entry := range entries {
if !entry.IsDir() || strings.HasPrefix(entry.Name(), ".") {
continue
}
dirPath := filepath.Join(p.flags.OutputDir, entry.Name())
wordFile := filepath.Join(dirPath, "word.txt")
// Read the word file to check if it matches
if data, err := os.ReadFile(wordFile); err == nil {
storedWord := strings.TrimSpace(string(data))
if storedWord == word {
return dirPath
}
} else {
// Try old format with underscore for backward compatibility
wordFile = filepath.Join(dirPath, "_word.txt")
if data, err := os.ReadFile(wordFile); err == nil {
storedWord := strings.TrimSpace(string(data))
if storedWord == word {
return dirPath
}
}
}
}
return ""
}
func (p *Processor) saveAudioAttribution(word, audioFile string, config *audio.Config) error {
// Create attribution text
attribution := fmt.Sprintf("Audio generated by OpenAI TTS\n\n")
attribution += fmt.Sprintf("Bulgarian word: %s\n", word)
attribution += fmt.Sprintf("Model: %s\n", config.OpenAIModel)
attribution += fmt.Sprintf("Voice: %s\n", config.OpenAIVoice)
attribution += fmt.Sprintf("Speed: %.2f\n", config.OpenAISpeed)
if config.OpenAIInstruction != "" {
attribution += fmt.Sprintf("\nVoice instructions:\n%s\n", config.OpenAIInstruction)
}
// Add preprocessing information
cleanedWord := strings.TrimSpace(word)
punctuationToRemove := []string{"!", "?", ".", ",", ";", ":", "\"", "'", "(", ")", "[", "]", "{", "}", "-", "—", "–"}
for _, punct := range punctuationToRemove {
cleanedWord = strings.ReplaceAll(cleanedWord, punct, "")
}
processedText := fmt.Sprintf("%s...", strings.TrimSpace(cleanedWord))
attribution += fmt.Sprintf("\nProcessed text sent to TTS: %s\n", processedText)
attribution += fmt.Sprintf("\nGenerated at: %s\n", time.Now().Format("2006-01-02 15:04:05"))
// Save to file
attrPath := strings.TrimSuffix(audioFile, filepath.Ext(audioFile)) + "_attribution.txt"
if err := os.WriteFile(attrPath, []byte(attribution), 0644); err != nil {
return fmt.Errorf("failed to write audio attribution file: %w", err)
}
// Also save metadata for GUI display
wordDir := filepath.Dir(audioFile)
metadataFile := filepath.Join(wordDir, "audio_metadata.txt")
metadata := fmt.Sprintf("voice=%s\nspeed=%.2f\n", config.OpenAIVoice, config.OpenAISpeed)
if err := os.WriteFile(metadataFile, []byte(metadata), 0644); err != nil {
// Non-fatal error, just log it
fmt.Printf("Warning: Failed to save audio metadata: %v\n", err)
}
return nil
}
|