diff options
| author | Paul Buetow <paul@buetow.org> | 2025-07-20 22:18:57 +0300 |
|---|---|---|
| committer | Paul Buetow <paul@buetow.org> | 2025-07-20 22:18:57 +0300 |
| commit | e580fb57a29ec3c3f3e180b20cfa6ec28687689b (patch) | |
| tree | de74f04450b830268e4c1644a91acb9fd45c3802 /internal | |
| parent | 9e3328a6aaefe4bd1aa0ec3e8bf6e93d6033180b (diff) | |
Refactor main.go into focused packages
- Reduced main.go from 961 lines to 89 lines (91% reduction)
- Created new packages for better separation of concerns:
- cli: Command-line interface setup and configuration
- processor: Core word processing logic and orchestration
- batch: Batch file processing functionality
- translation: Bulgarian to English translation services
- models: OpenAI model listing functionality
- phonetic: Phonetic information fetching
- Each package has clear documentation in doc.go files
- Improved testability and maintainability
- All existing functionality preserved
- All tests passing and build successful
Diffstat (limited to 'internal')
| -rw-r--r-- | internal/batch/doc.go | 4 | ||||
| -rw-r--r-- | internal/batch/processor.go | 94 | ||||
| -rw-r--r-- | internal/cli/command.go | 136 | ||||
| -rw-r--r-- | internal/cli/doc.go | 4 | ||||
| -rw-r--r-- | internal/cli/flags.go | 46 | ||||
| -rw-r--r-- | internal/models/doc.go | 4 | ||||
| -rw-r--r-- | internal/models/lister.go | 100 | ||||
| -rw-r--r-- | internal/phonetic/doc.go | 4 | ||||
| -rw-r--r-- | internal/phonetic/fetcher.go | 83 | ||||
| -rw-r--r-- | internal/processor/doc.go | 5 | ||||
| -rw-r--r-- | internal/processor/processor.go | 519 | ||||
| -rw-r--r-- | internal/translation/doc.go | 4 | ||||
| -rw-r--r-- | internal/translation/translator.go | 103 |
13 files changed, 1106 insertions, 0 deletions
diff --git a/internal/batch/doc.go b/internal/batch/doc.go new file mode 100644 index 0000000..33f3b19 --- /dev/null +++ b/internal/batch/doc.go @@ -0,0 +1,4 @@ +// Package batch handles batch processing of Bulgarian words from files. +// It supports reading word lists with optional translations in the format: +// "bulgarian_word" or "bulgarian_word = english_translation" +package batch diff --git a/internal/batch/processor.go b/internal/batch/processor.go new file mode 100644 index 0000000..75a38aa --- /dev/null +++ b/internal/batch/processor.go @@ -0,0 +1,94 @@ +package batch + +import ( + "fmt" + "os" + "strings" +) + +// WordEntry represents a word with optional translation +type WordEntry struct { + Bulgarian string + Translation string +} + +// ReadBatchFile reads words from a file and returns WordEntry slice +// Supports formats: +// - Bulgarian word only: "ябълка" +// - With translation: "ябълка = apple" +func ReadBatchFile(filename string) ([]WordEntry, error) { + content, err := os.ReadFile(filename) + if err != nil { + return nil, fmt.Errorf("failed to read batch file: %w", err) + } + + var entries []WordEntry + lines := string(content) + + for _, line := range splitLines(lines) { + if line = trimSpace(line); line != "" { + // Check if line contains '=' for bulgarian = english format + if strings.Contains(line, "=") { + parts := strings.SplitN(line, "=", 2) + if len(parts) == 2 { + bulgarian := strings.TrimSpace(parts[0]) + english := strings.TrimSpace(parts[1]) + if bulgarian != "" { + entries = append(entries, WordEntry{ + Bulgarian: bulgarian, + Translation: english, + }) + } + } + } else { + // Just a bulgarian word + entries = append(entries, WordEntry{ + Bulgarian: line, + Translation: "", + }) + } + } + } + + return entries, nil +} + +// splitLines splits a string by newlines +func splitLines(s string) []string { + var lines []string + current := "" + for _, r := range s { + if r == '\n' { + lines = append(lines, current) + current = "" + } else if r != '\r' { + current += string(r) + } + } + if current != "" { + lines = append(lines, current) + } + return lines +} + +// trimSpace trims whitespace from string +func trimSpace(s string) string { + start := 0 + end := len(s) + + // Trim from start + for start < end && isSpace(rune(s[start])) { + start++ + } + + // Trim from end + for end > start && isSpace(rune(s[end-1])) { + end-- + } + + return s[start:end] +} + +func isSpace(r rune) bool { + return r == ' ' || r == '\t' || r == '\n' || r == '\r' +} diff --git a/internal/cli/command.go b/internal/cli/command.go new file mode 100644 index 0000000..6f9f1c0 --- /dev/null +++ b/internal/cli/command.go @@ -0,0 +1,136 @@ +package cli + +import ( + "fmt" + "os" + "path/filepath" + + "github.com/spf13/cobra" + "github.com/spf13/viper" + + "codeberg.org/snonux/totalrecall/internal" +) + +// CreateRootCommand creates and configures the root cobra command +func CreateRootCommand(flags *Flags) *cobra.Command { + rootCmd := &cobra.Command{ + Use: "totalrecall [word]", + Short: "Bulgarian Anki Flashcard Generator", + Long: `totalrecall generates Anki flashcard materials from Bulgarian words. + +It creates audio pronunciation files using OpenAI TTS and downloads +representative images from web search APIs. + +Examples: + totalrecall # Launch interactive GUI (default) + totalrecall ябълка # Generate materials for "apple" via CLI + totalrecall --batch words.txt # Process multiple words from file + totalrecall --gui # Explicitly launch GUI mode`, + Args: cobra.MaximumNArgs(1), + Version: internal.Version, + } + + // Set up flags + setupFlags(rootCmd, flags) + + return rootCmd +} + +func setupFlags(cmd *cobra.Command, flags *Flags) { + // Set default output directory + home, _ := os.UserHomeDir() + defaultOutputDir := filepath.Join(home, "Downloads") + + // Global flags + cmd.PersistentFlags().StringVar(&flags.CfgFile, "config", "", "config file (default is $HOME/.totalrecall.yaml)") + + // Local flags + cmd.Flags().StringVarP(&flags.OutputDir, "output", "o", defaultOutputDir, "Output directory") + cmd.Flags().StringVarP(&flags.AudioFormat, "format", "f", flags.AudioFormat, "Audio format (wav or mp3)") + cmd.Flags().StringVar(&flags.ImageAPI, "image-api", flags.ImageAPI, "Image source (only openai supported)") + cmd.Flags().StringVar(&flags.BatchFile, "batch", "", "Process words from file (one per line)") + cmd.Flags().BoolVar(&flags.SkipAudio, "skip-audio", false, "Skip audio generation") + cmd.Flags().BoolVar(&flags.SkipImages, "skip-images", false, "Skip image download") + cmd.Flags().BoolVar(&flags.GenerateAnki, "anki", false, "Generate Anki import file (APKG format by default, use --anki-csv for legacy CSV)") + cmd.Flags().BoolVar(&flags.AnkiCSV, "anki-csv", false, "Generate legacy CSV format instead of APKG when using --anki") + cmd.Flags().StringVar(&flags.DeckName, "deck-name", flags.DeckName, "Deck name for APKG export") + cmd.Flags().BoolVar(&flags.ListModels, "list-models", false, "List available OpenAI models for the current API key") + cmd.Flags().BoolVar(&flags.AllVoices, "all-voices", false, "Generate audio in all available voices (creates multiple files)") + cmd.Flags().BoolVar(&flags.GUIMode, "gui", false, "Launch interactive GUI mode") + + // OpenAI flags + cmd.Flags().StringVar(&flags.OpenAIModel, "openai-model", flags.OpenAIModel, "OpenAI TTS model: tts-1, tts-1-hd, gpt-4o-mini-tts") + cmd.Flags().StringVar(&flags.OpenAIVoice, "openai-voice", "", "OpenAI voice: alloy, ash, ballad, coral, echo, fable, onyx, nova, sage, shimmer, verse (default: random)") + cmd.Flags().Float64Var(&flags.OpenAISpeed, "openai-speed", flags.OpenAISpeed, "OpenAI speech speed (0.25 to 4.0, may be ignored by gpt-4o-mini-tts)") + cmd.Flags().StringVar(&flags.OpenAIInstruction, "openai-instruction", "", "Voice instructions for gpt-4o-mini-tts model (e.g., 'speak slowly with a Bulgarian accent')") + + // OpenAI Image Generation flags + cmd.Flags().StringVar(&flags.OpenAIImageModel, "openai-image-model", flags.OpenAIImageModel, "OpenAI image model: dall-e-2 or dall-e-3") + cmd.Flags().StringVar(&flags.OpenAIImageSize, "openai-image-size", flags.OpenAIImageSize, "Image size: 256x256, 512x512, 1024x1024 (dall-e-3: also 1024x1792, 1792x1024)") + cmd.Flags().StringVar(&flags.OpenAIImageQuality, "openai-image-quality", flags.OpenAIImageQuality, "Image quality: standard or hd (dall-e-3 only)") + cmd.Flags().StringVar(&flags.OpenAIImageStyle, "openai-image-style", flags.OpenAIImageStyle, "Image style: natural or vivid (dall-e-3 only)") + + // Bind flags to viper + bindFlagsToViper(cmd) +} + +func bindFlagsToViper(cmd *cobra.Command) { + viper.BindPFlag("audio.provider", cmd.Flags().Lookup("audio-provider")) + viper.BindPFlag("audio.voice", cmd.Flags().Lookup("voice")) + viper.BindPFlag("audio.format", cmd.Flags().Lookup("format")) + viper.BindPFlag("audio.pitch", cmd.Flags().Lookup("pitch")) + viper.BindPFlag("audio.amplitude", cmd.Flags().Lookup("amplitude")) + viper.BindPFlag("audio.word_gap", cmd.Flags().Lookup("word-gap")) + viper.BindPFlag("audio.openai_model", cmd.Flags().Lookup("openai-model")) + viper.BindPFlag("audio.openai_voice", cmd.Flags().Lookup("openai-voice")) + viper.BindPFlag("audio.openai_speed", cmd.Flags().Lookup("openai-speed")) + viper.BindPFlag("audio.openai_instruction", cmd.Flags().Lookup("openai-instruction")) + viper.BindPFlag("output.directory", cmd.Flags().Lookup("output")) + viper.BindPFlag("image.provider", cmd.Flags().Lookup("image-api")) + // Bind OpenAI image flags + viper.BindPFlag("image.openai_model", cmd.Flags().Lookup("openai-image-model")) + viper.BindPFlag("image.openai_size", cmd.Flags().Lookup("openai-image-size")) + viper.BindPFlag("image.openai_quality", cmd.Flags().Lookup("openai-image-quality")) + viper.BindPFlag("image.openai_style", cmd.Flags().Lookup("openai-image-style")) +} + +// InitConfig initializes viper configuration +func InitConfig(cfgFile string) { + if cfgFile != "" { + // Use config file from the flag + viper.SetConfigFile(cfgFile) + } else { + // Find home directory + home, err := os.UserHomeDir() + if err != nil { + fmt.Fprintf(os.Stderr, "Error getting home directory: %v\n", err) + return + } + + // Search config in home directory with name ".totalrecall" (without extension) + viper.AddConfigPath(home) + viper.AddConfigPath(".") + viper.SetConfigType("yaml") + viper.SetConfigName(".totalrecall") + } + + // Environment variables + viper.SetEnvPrefix("TOTALRECALL") + viper.AutomaticEnv() + + // Read config file + if err := viper.ReadInConfig(); err == nil { + fmt.Fprintln(os.Stderr, "Using config file:", viper.ConfigFileUsed()) + } +} + +// GetOpenAIKey retrieves the OpenAI API key from environment or config +func GetOpenAIKey() string { + // First check environment variable + if key := os.Getenv("OPENAI_API_KEY"); key != "" { + return key + } + + // Then check config file + return viper.GetString("audio.openai_key") +} diff --git a/internal/cli/doc.go b/internal/cli/doc.go new file mode 100644 index 0000000..6ed189b --- /dev/null +++ b/internal/cli/doc.go @@ -0,0 +1,4 @@ +// Package cli provides command-line interface setup and configuration +// for the totalrecall application. It handles flag parsing, command +// creation, and configuration management using cobra and viper. +package cli diff --git a/internal/cli/flags.go b/internal/cli/flags.go new file mode 100644 index 0000000..64575a0 --- /dev/null +++ b/internal/cli/flags.go @@ -0,0 +1,46 @@ +package cli + +// Flags holds all command-line flag values +type Flags struct { + // General flags + CfgFile string + OutputDir string + AudioFormat string + ImageAPI string + BatchFile string + SkipAudio bool + SkipImages bool + GenerateAnki bool + AnkiCSV bool + DeckName string + ListModels bool + AllVoices bool + GUIMode bool + + // OpenAI flags + OpenAIModel string + OpenAIVoice string + OpenAISpeed float64 + OpenAIInstruction string + + // OpenAI Image flags + OpenAIImageModel string + OpenAIImageSize string + OpenAIImageQuality string + OpenAIImageStyle string +} + +// NewFlags creates a new Flags instance with default values +func NewFlags() *Flags { + return &Flags{ + AudioFormat: "mp3", + ImageAPI: "openai", + DeckName: "Bulgarian Vocabulary", + OpenAIModel: "gpt-4o-mini-tts", + OpenAISpeed: 0.9, + OpenAIImageModel: "dall-e-3", + OpenAIImageSize: "1024x1024", + OpenAIImageQuality: "standard", + OpenAIImageStyle: "natural", + } +} diff --git a/internal/models/doc.go b/internal/models/doc.go new file mode 100644 index 0000000..116a04a --- /dev/null +++ b/internal/models/doc.go @@ -0,0 +1,4 @@ +// Package models provides functionality for listing and categorizing +// available OpenAI models. It helps users discover which TTS, image +// generation, and chat models are available with their API key. +package models diff --git a/internal/models/lister.go b/internal/models/lister.go new file mode 100644 index 0000000..bb383bc --- /dev/null +++ b/internal/models/lister.go @@ -0,0 +1,100 @@ +package models + +import ( + "context" + "fmt" + "sort" + "strings" + + "github.com/sashabaranov/go-openai" +) + +// Lister handles listing available OpenAI models +type Lister struct { + apiKey string + client *openai.Client +} + +// NewLister creates a new model lister +func NewLister(apiKey string) *Lister { + return &Lister{ + apiKey: apiKey, + client: openai.NewClient(apiKey), + } +} + +// ListAvailableModels lists all available OpenAI models categorized by type +func (l *Lister) ListAvailableModels() error { + if l.apiKey == "" { + return fmt.Errorf("OpenAI API key not found. Set OPENAI_API_KEY environment variable or configure in .totalrecall.yaml") + } + + // List models + ctx := context.Background() + models, err := l.client.ListModels(ctx) + if err != nil { + return fmt.Errorf("failed to list models: %w", err) + } + + // Categorize models + ttsModels := []string{} + imageModels := []string{} + chatModels := []string{} + + for _, model := range models.Models { + modelID := model.ID + if strings.Contains(modelID, "tts") || strings.Contains(modelID, "audio") { + ttsModels = append(ttsModels, modelID) + } else if strings.Contains(modelID, "dall-e") { + imageModels = append(imageModels, modelID) + } else if strings.Contains(modelID, "gpt") || strings.Contains(modelID, "chat") { + chatModels = append(chatModels, modelID) + } + } + + // Sort models + sort.Strings(ttsModels) + sort.Strings(imageModels) + sort.Strings(chatModels) + + // Print models + fmt.Println("Available OpenAI Models:") + fmt.Println("\nText-to-Speech (TTS) Models:") + if len(ttsModels) == 0 { + fmt.Println(" No TTS models found") + } else { + for _, model := range ttsModels { + fmt.Printf(" %s\n", model) + } + } + + fmt.Println("\nImage Generation Models:") + if len(imageModels) == 0 { + fmt.Println(" No image models found") + } else { + for _, model := range imageModels { + fmt.Printf(" %s\n", model) + } + } + + fmt.Println("\nChat/Translation Models (for Bulgarian translation):") + if len(chatModels) > 10 { + // Show only relevant models + relevantModels := []string{} + for _, model := range chatModels { + if strings.Contains(model, "gpt-4") || strings.Contains(model, "gpt-3.5") { + relevantModels = append(relevantModels, model) + } + } + for _, model := range relevantModels { + fmt.Printf(" %s\n", model) + } + fmt.Printf(" ... and %d more models\n", len(chatModels)-len(relevantModels)) + } else { + for _, model := range chatModels { + fmt.Printf(" %s\n", model) + } + } + + return nil +} diff --git a/internal/phonetic/doc.go b/internal/phonetic/doc.go new file mode 100644 index 0000000..9209f41 --- /dev/null +++ b/internal/phonetic/doc.go @@ -0,0 +1,4 @@ +// Package phonetic provides functionality for fetching detailed phonetic +// information about Bulgarian words using OpenAI's GPT models. It generates +// IPA transcriptions with detailed explanations for language learners. +package phonetic diff --git a/internal/phonetic/fetcher.go b/internal/phonetic/fetcher.go new file mode 100644 index 0000000..7b168c4 --- /dev/null +++ b/internal/phonetic/fetcher.go @@ -0,0 +1,83 @@ +package phonetic + +import ( + "context" + "fmt" + "os" + "path/filepath" + "strings" + "time" + + "github.com/sashabaranov/go-openai" +) + +// Fetcher handles fetching phonetic information for Bulgarian words +type Fetcher struct { + apiKey string + client *openai.Client +} + +// NewFetcher creates a new phonetic information fetcher +func NewFetcher(apiKey string) *Fetcher { + return &Fetcher{ + apiKey: apiKey, + client: openai.NewClient(apiKey), + } +} + +// FetchAndSave fetches phonetic information for a word and saves it to the word directory +func (f *Fetcher) FetchAndSave(word, wordDir string) error { + if f.apiKey == "" { + return fmt.Errorf("OpenAI API key not configured") + } + + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + req := openai.ChatCompletionRequest{ + Model: openai.GPT4o, + Messages: []openai.ChatCompletionMessage{ + { + Role: openai.ChatMessageRoleSystem, + Content: "You are a Bulgarian language expert helping language learners understand pronunciation. Provide detailed phonetic information using the International Phonetic Alphabet (IPA). For each IPA symbol used, give concrete examples of how it sounds using familiar English words or sounds when possible.", + }, + { + Role: openai.ChatMessageRoleUser, + Content: fmt.Sprintf(`For the Bulgarian word '%s': +1. Provide the complete IPA transcription +2. Break down EACH phonetic symbol used in the transcription +3. For EVERY symbol, explain how it's pronounced with examples: + - If similar to an English sound, give English word examples + - If not in English, describe tongue/mouth position or compare to similar sounds + - Include stress marks and explain which syllable is stressed + +Example format: +Word: [IPA transcription] +• /p/ - like 'p' in English 'pot' +• /a/ - like 'a' in 'father' +• /ˈ/ - stress mark (following syllable is stressed)`, word), + }, + }, + Temperature: 0.3, + MaxTokens: 500, + } + + resp, err := f.client.CreateChatCompletion(ctx, req) + if err != nil { + return fmt.Errorf("OpenAI API error: %w", err) + } + + if len(resp.Choices) == 0 || resp.Choices[0].Message.Content == "" { + return fmt.Errorf("no response from OpenAI") + } + + phoneticInfo := strings.TrimSpace(resp.Choices[0].Message.Content) + + // Save phonetic info to file + phoneticFile := filepath.Join(wordDir, "phonetic.txt") + if err := os.WriteFile(phoneticFile, []byte(phoneticInfo), 0644); err != nil { + return fmt.Errorf("failed to write phonetic file: %w", err) + } + + return nil +} diff --git a/internal/processor/doc.go b/internal/processor/doc.go new file mode 100644 index 0000000..6738816 --- /dev/null +++ b/internal/processor/doc.go @@ -0,0 +1,5 @@ +// Package processor contains the core business logic for processing Bulgarian +// words. It orchestrates audio generation, image downloading, translation, +// phonetic information fetching, and Anki file generation. This package +// serves as the main coordinator between all other components. +package processor diff --git a/internal/processor/processor.go b/internal/processor/processor.go new file mode 100644 index 0000000..be4a5e6 --- /dev/null +++ b/internal/processor/processor.go @@ -0,0 +1,519 @@ +package processor + +import ( + "context" + "fmt" + "math/rand" + "os" + "path/filepath" + "strings" + "time" + + "github.com/spf13/viper" + + "codeberg.org/snonux/totalrecall/internal" + "codeberg.org/snonux/totalrecall/internal/anki" + "codeberg.org/snonux/totalrecall/internal/audio" + "codeberg.org/snonux/totalrecall/internal/batch" + "codeberg.org/snonux/totalrecall/internal/cli" + "codeberg.org/snonux/totalrecall/internal/gui" + "codeberg.org/snonux/totalrecall/internal/image" + "codeberg.org/snonux/totalrecall/internal/phonetic" + "codeberg.org/snonux/totalrecall/internal/translation" +) + +// Processor handles the main word processing logic +type Processor struct { + flags *cli.Flags + translator *translation.Translator + translationCache *translation.TranslationCache + phoneticFetcher *phonetic.Fetcher +} + +// NewProcessor creates a new word processor +func NewProcessor(flags *cli.Flags) *Processor { + apiKey := cli.GetOpenAIKey() + return &Processor{ + flags: flags, + translator: translation.NewTranslator(apiKey), + translationCache: translation.NewTranslationCache(), + phoneticFetcher: phonetic.NewFetcher(apiKey), + } +} + +// ProcessBatch processes multiple words from a batch file +func (p *Processor) ProcessBatch() error { + entries, err := batch.ReadBatchFile(p.flags.BatchFile) + if err != nil { + return err + } + + // Validate words + for _, entry := range entries { + if err := audio.ValidateBulgarianText(entry.Bulgarian); err != nil { + return fmt.Errorf("invalid word '%s': %w", entry.Bulgarian, err) + } + } + + // Create output directory + if err := os.MkdirAll(p.flags.OutputDir, 0755); err != nil { + return fmt.Errorf("failed to create output directory: %w", err) + } + + // Process each entry + for i, entry := range entries { + fmt.Printf("\nProcessing %d/%d: %s\n", i+1, len(entries), entry.Bulgarian) + + if err := p.ProcessWordWithTranslation(entry.Bulgarian, entry.Translation); err != nil { + fmt.Fprintf(os.Stderr, "Error processing '%s': %v\n", entry.Bulgarian, err) + // Continue with next word + } + } + + return nil +} + +// ProcessSingleWord processes a single word from command line +func (p *Processor) ProcessSingleWord(word string) error { + // Validate word + if err := audio.ValidateBulgarianText(word); err != nil { + return fmt.Errorf("invalid word '%s': %w", word, err) + } + + // Create output directory + if err := os.MkdirAll(p.flags.OutputDir, 0755); err != nil { + return fmt.Errorf("failed to create output directory: %w", err) + } + + fmt.Printf("\nProcessing: %s\n", word) + return p.ProcessWordWithTranslation(word, "") +} + +// ProcessWordWithTranslation processes a word with optional provided translation +func (p *Processor) ProcessWordWithTranslation(word, providedTranslation string) error { + var translationText string + + // Use provided translation if available, otherwise translate + if providedTranslation != "" { + translationText = providedTranslation + fmt.Printf(" Using provided translation: %s\n", translationText) + } else { + // Translate the word first + fmt.Printf(" Translating to English...\n") + var err error + translationText, err = p.translator.TranslateWord(word) + if err != nil { + fmt.Printf(" Warning: Translation failed: %v\n", err) + translationText = "" // Continue without translation + } else { + fmt.Printf(" Translation: %s\n", translationText) + } + } + + // Store translation for Anki export + if translationText != "" { + p.translationCache.Add(word, translationText) + + // Find or create word directory + wordDir := p.findOrCreateWordDirectory(word) + + // Save translation to file + if err := translation.SaveTranslation(wordDir, word, translationText); err != nil { + fmt.Printf(" Warning: Failed to save translation: %v\n", err) + } + } + + // Generate audio + if !p.flags.SkipAudio { + fmt.Printf(" Generating audio...\n") + if err := p.generateAudio(word); err != nil { + return fmt.Errorf("audio generation failed: %w", err) + } + } + + // Download images - pass the translation for better image generation + if !p.flags.SkipImages { + fmt.Printf(" Downloading images...\n") + if err := p.downloadImagesWithTranslation(word, translationText); err != nil { + return fmt.Errorf("image download failed: %w", err) + } + } + + // Fetch phonetic information + fmt.Printf(" Fetching phonetic information...\n") + wordDir := p.findCardDirectory(word) + if wordDir != "" { + if err := p.phoneticFetcher.FetchAndSave(word, wordDir); err != nil { + // Don't fail the whole process if phonetic info fails + fmt.Printf(" Warning: Failed to fetch phonetic info: %v\n", err) + } else { + fmt.Printf(" Saved phonetic information\n") + } + } + + return nil +} + +// generateAudio generates audio files for a word +func (p *Processor) generateAudio(word string) error { + allVoicesList := []string{"alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer", "verse"} + + // Get list of voices to use + var voices []string + if p.flags.AllVoices { + voices = allVoicesList + } else if p.flags.OpenAIVoice != "" { + // Use explicitly specified voice + voices = []string{p.flags.OpenAIVoice} + fmt.Printf(" Using specified voice: %s\n", p.flags.OpenAIVoice) + } else { + // Select a random voice + randomVoice := allVoicesList[rand.Intn(len(allVoicesList))] + voices = []string{randomVoice} + fmt.Printf(" Using random voice: %s\n", randomVoice) + } + + // Generate audio for each voice + for i, voice := range voices { + if p.flags.AllVoices { + fmt.Printf(" Generating audio %d/%d (voice: %s)...\n", i+1, len(voices), voice) + } + if err := p.generateAudioWithVoice(word, voice); err != nil { + return fmt.Errorf("failed to generate audio with voice %s: %w", voice, err) + } + } + + return nil +} + +// generateAudioWithVoice generates audio for a word with a specific voice +func (p *Processor) generateAudioWithVoice(word, voice string) error { + // Generate random speed between 0.90 and 1.00 if not explicitly set + speed := p.flags.OpenAISpeed + if p.flags.OpenAISpeed == 0.9 && !viper.IsSet("audio.openai_speed") { + // Default was used, generate random speed + speed = 0.90 + rand.Float64()*0.10 + } + + // Create audio provider configuration + providerConfig := &audio.Config{ + Provider: "openai", + OutputDir: p.flags.OutputDir, + OutputFormat: p.flags.AudioFormat, + + // OpenAI settings + OpenAIKey: cli.GetOpenAIKey(), + OpenAIModel: p.flags.OpenAIModel, + OpenAIVoice: voice, + OpenAISpeed: speed, + OpenAIInstruction: p.flags.OpenAIInstruction, + + // Caching + EnableCache: viper.GetBool("audio.enable_cache"), + CacheDir: viper.GetString("audio.cache_dir"), + } + + // Set defaults + if providerConfig.CacheDir == "" { + providerConfig.CacheDir = "./.audio_cache" + } + + // Use config file values if not overridden by flags + if p.flags.OpenAIModel == "gpt-4o-mini-tts" && viper.IsSet("audio.openai_model") { + providerConfig.OpenAIModel = viper.GetString("audio.openai_model") + } + if p.flags.OpenAISpeed == 0.9 && viper.IsSet("audio.openai_speed") { + providerConfig.OpenAISpeed = viper.GetFloat64("audio.openai_speed") + } + if p.flags.OpenAIInstruction == "" && viper.IsSet("audio.openai_instruction") { + providerConfig.OpenAIInstruction = viper.GetString("audio.openai_instruction") + } + + // Create the audio provider + provider, err := audio.NewProvider(providerConfig) + if err != nil { + return err + } + + // Generate audio file + ctx := context.Background() + + // Find existing card directory or create new one + wordDir := p.findOrCreateWordDirectory(word) + + // Add voice name to filename if generating multiple voices + var outputFile string + if p.flags.AllVoices { + outputFile = filepath.Join(wordDir, fmt.Sprintf("audio_%s.%s", voice, p.flags.AudioFormat)) + } else { + outputFile = filepath.Join(wordDir, fmt.Sprintf("audio.%s", p.flags.AudioFormat)) + } + + // Generate the audio + err = provider.GenerateAudio(ctx, word, outputFile) + if err != nil { + return err + } + + // Save audio attribution + if err := p.saveAudioAttribution(word, outputFile, providerConfig); err != nil { + fmt.Printf(" Warning: Failed to save audio attribution: %v\n", err) + } + + return nil +} + +// downloadImagesWithTranslation downloads images for a word +func (p *Processor) downloadImagesWithTranslation(word, translationText string) error { + // Create image searcher based on provider + var searcher image.ImageSearcher + + switch p.flags.ImageAPI { + case "openai": + // Create OpenAI image configuration + openaiConfig := &image.OpenAIConfig{ + APIKey: cli.GetOpenAIKey(), + Model: p.flags.OpenAIImageModel, + Size: p.flags.OpenAIImageSize, + Quality: p.flags.OpenAIImageQuality, + Style: p.flags.OpenAIImageStyle, + } + + // Use config file values if not overridden by flags + if p.flags.OpenAIImageModel == "dall-e-3" && viper.IsSet("image.openai_model") { + openaiConfig.Model = viper.GetString("image.openai_model") + } + if p.flags.OpenAIImageSize == "1024x1024" && viper.IsSet("image.openai_size") { + openaiConfig.Size = viper.GetString("image.openai_size") + } + if p.flags.OpenAIImageQuality == "standard" && viper.IsSet("image.openai_quality") { + openaiConfig.Quality = viper.GetString("image.openai_quality") + } + if p.flags.OpenAIImageStyle == "natural" && viper.IsSet("image.openai_style") { + openaiConfig.Style = viper.GetString("image.openai_style") + } + + searcher = image.NewOpenAIClient(openaiConfig) + if openaiConfig.APIKey == "" { + return fmt.Errorf("OpenAI API key is required for image generation") + } + + default: + return fmt.Errorf("unknown image provider: %s", p.flags.ImageAPI) + } + + // Find existing card directory or create new one + wordDir := p.findOrCreateWordDirectory(word) + + // Create downloader + downloadOpts := &image.DownloadOptions{ + OutputDir: wordDir, + OverwriteExisting: true, + CreateDir: true, + FileNamePattern: "image", + MaxSizeBytes: 5 * 1024 * 1024, // 5MB + } + + downloader := image.NewDownloader(searcher, downloadOpts) + + // Create search options with translation if provided + searchOpts := image.DefaultSearchOptions(word) + if translationText != "" { + searchOpts.Translation = translationText + } + + // Download single image + ctx := context.Background() + _, path, err := downloader.DownloadBestMatchWithOptions(ctx, searchOpts) + if err != nil { + return err + } + fmt.Printf(" Downloaded: %s\n", path) + + // If using OpenAI, save the prompt + if p.flags.ImageAPI == "openai" { + if openaiClient, ok := searcher.(*image.OpenAIClient); ok { + usedPrompt := openaiClient.GetLastPrompt() + if usedPrompt != "" { + promptFile := filepath.Join(wordDir, "image_prompt.txt") + if err := os.WriteFile(promptFile, []byte(usedPrompt), 0644); err != nil { + fmt.Printf(" Warning: Failed to save image prompt: %v\n", err) + } + } + } + } + + return nil +} + +// GenerateAnkiFile generates the Anki import file +func (p *Processor) GenerateAnkiFile() error { + // Create Anki generator + gen := anki.NewGenerator(&anki.GeneratorOptions{ + OutputPath: filepath.Join(p.flags.OutputDir, "anki_import.csv"), + MediaFolder: p.flags.OutputDir, + IncludeHeaders: true, + AudioFormat: p.flags.AudioFormat, + }) + + // Generate cards from output directory + if err := gen.GenerateFromDirectory(p.flags.OutputDir); err != nil { + return fmt.Errorf("failed to generate cards: %w", err) + } + + // Add translations to cards + translations := p.translationCache.GetAll() + for i := range gen.GetCards() { + if translation, ok := translations[gen.GetCards()[i].Bulgarian]; ok { + gen.GetCards()[i].Translation = translation + } + } + + if p.flags.AnkiCSV { + // Generate CSV + if err := gen.GenerateCSV(); err != nil { + return fmt.Errorf("failed to generate CSV: %w", err) + } + } else { + // Generate APKG + outputPath := filepath.Join(p.flags.OutputDir, fmt.Sprintf("%s.apkg", internal.SanitizeFilename(p.flags.DeckName))) + if err := gen.GenerateAPKG(outputPath, p.flags.DeckName); err != nil { + return fmt.Errorf("failed to generate APKG: %w", err) + } + } + + // Print stats + total, withAudio, withImages := gen.Stats() + fmt.Printf(" Generated %d cards (%d with audio, %d with images)\n", + total, withAudio, withImages) + + return nil +} + +// RunGUIMode launches the GUI application +func (p *Processor) RunGUIMode() error { + // Create GUI configuration from command line flags and viper config + guiConfig := &gui.Config{ + AudioFormat: p.flags.AudioFormat, + ImageProvider: p.flags.ImageAPI, + OpenAIKey: cli.GetOpenAIKey(), + } + + // Only set OutputDir if it was explicitly provided via flag + // Check if the outputDir is different from the default + home, _ := os.UserHomeDir() + defaultOutputDir := filepath.Join(home, "Downloads") + if p.flags.OutputDir != defaultOutputDir { + // User explicitly set a different output directory + guiConfig.OutputDir = p.flags.OutputDir + } + // Otherwise, gui.New will use its own default (XDG state directory) + + // Create and run GUI application + app := gui.New(guiConfig) + app.Run() + + return nil +} + +// Helper methods + +func (p *Processor) findOrCreateWordDirectory(word string) string { + // Try to find existing directory first + if dir := p.findCardDirectory(word); dir != "" { + return dir + } + + // No existing directory, create new one with card ID + cardID := internal.GenerateCardID(word) + wordDir := filepath.Join(p.flags.OutputDir, cardID) + if err := os.MkdirAll(wordDir, 0755); err != nil { + fmt.Printf("Warning: failed to create word directory: %v\n", err) + return p.flags.OutputDir // Fallback to output directory + } + + // Save word metadata + metadataFile := filepath.Join(wordDir, "word.txt") + if err := os.WriteFile(metadataFile, []byte(word), 0644); err != nil { + fmt.Printf("Warning: failed to save word metadata: %v\n", err) + } + + return wordDir +} + +func (p *Processor) findCardDirectory(word string) string { + entries, err := os.ReadDir(p.flags.OutputDir) + if err != nil { + return "" + } + + // Look through all directories to find one with matching word.txt + for _, entry := range entries { + if !entry.IsDir() || strings.HasPrefix(entry.Name(), ".") { + continue + } + + dirPath := filepath.Join(p.flags.OutputDir, entry.Name()) + wordFile := filepath.Join(dirPath, "word.txt") + + // Read the word file to check if it matches + if data, err := os.ReadFile(wordFile); err == nil { + storedWord := strings.TrimSpace(string(data)) + if storedWord == word { + return dirPath + } + } else { + // Try old format with underscore for backward compatibility + wordFile = filepath.Join(dirPath, "_word.txt") + if data, err := os.ReadFile(wordFile); err == nil { + storedWord := strings.TrimSpace(string(data)) + if storedWord == word { + return dirPath + } + } + } + } + + return "" +} + +func (p *Processor) saveAudioAttribution(word, audioFile string, config *audio.Config) error { + // Create attribution text + attribution := fmt.Sprintf("Audio generated by OpenAI TTS\n\n") + attribution += fmt.Sprintf("Bulgarian word: %s\n", word) + attribution += fmt.Sprintf("Model: %s\n", config.OpenAIModel) + attribution += fmt.Sprintf("Voice: %s\n", config.OpenAIVoice) + attribution += fmt.Sprintf("Speed: %.2f\n", config.OpenAISpeed) + + if config.OpenAIInstruction != "" { + attribution += fmt.Sprintf("\nVoice instructions:\n%s\n", config.OpenAIInstruction) + } + + // Add preprocessing information + cleanedWord := strings.TrimSpace(word) + punctuationToRemove := []string{"!", "?", ".", ",", ";", ":", "\"", "'", "(", ")", "[", "]", "{", "}", "-", "—", "–"} + for _, punct := range punctuationToRemove { + cleanedWord = strings.ReplaceAll(cleanedWord, punct, "") + } + processedText := fmt.Sprintf("%s...", strings.TrimSpace(cleanedWord)) + attribution += fmt.Sprintf("\nProcessed text sent to TTS: %s\n", processedText) + + attribution += fmt.Sprintf("\nGenerated at: %s\n", time.Now().Format("2006-01-02 15:04:05")) + + // Save to file + attrPath := strings.TrimSuffix(audioFile, filepath.Ext(audioFile)) + "_attribution.txt" + if err := os.WriteFile(attrPath, []byte(attribution), 0644); err != nil { + return fmt.Errorf("failed to write audio attribution file: %w", err) + } + + // Also save metadata for GUI display + wordDir := filepath.Dir(audioFile) + metadataFile := filepath.Join(wordDir, "audio_metadata.txt") + metadata := fmt.Sprintf("voice=%s\nspeed=%.2f\n", config.OpenAIVoice, config.OpenAISpeed) + if err := os.WriteFile(metadataFile, []byte(metadata), 0644); err != nil { + // Non-fatal error, just log it + fmt.Printf("Warning: Failed to save audio metadata: %v\n", err) + } + + return nil +} diff --git a/internal/translation/doc.go b/internal/translation/doc.go new file mode 100644 index 0000000..fac31ff --- /dev/null +++ b/internal/translation/doc.go @@ -0,0 +1,4 @@ +// Package translation provides Bulgarian to English translation services +// using the OpenAI API. It includes translation caching for batch operations +// and file persistence for translated words. +package translation diff --git a/internal/translation/translator.go b/internal/translation/translator.go new file mode 100644 index 0000000..ab3a879 --- /dev/null +++ b/internal/translation/translator.go @@ -0,0 +1,103 @@ +package translation + +import ( + "context" + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/sashabaranov/go-openai" +) + +// Translator handles Bulgarian to English translation +type Translator struct { + apiKey string + client *openai.Client +} + +// NewTranslator creates a new translator instance +func NewTranslator(apiKey string) *Translator { + return &Translator{ + apiKey: apiKey, + client: openai.NewClient(apiKey), + } +} + +// TranslateWord translates a Bulgarian word to English +func (t *Translator) TranslateWord(word string) (string, error) { + if t.apiKey == "" { + return "", fmt.Errorf("OpenAI API key not found") + } + + ctx := context.Background() + + req := openai.ChatCompletionRequest{ + Model: openai.GPT4oMini, + Messages: []openai.ChatCompletionMessage{ + { + Role: openai.ChatMessageRoleUser, + Content: fmt.Sprintf("Translate the Bulgarian word '%s' to English. Respond with only the English translation, nothing else.", word), + }, + }, + MaxTokens: 50, + Temperature: 0.3, + } + + resp, err := t.client.CreateChatCompletion(ctx, req) + if err != nil { + return "", fmt.Errorf("OpenAI API error: %w", err) + } + + if len(resp.Choices) == 0 { + return "", fmt.Errorf("no translation returned") + } + + translation := strings.TrimSpace(resp.Choices[0].Message.Content) + return translation, nil +} + +// SaveTranslation saves the translation to a file in the word directory +func SaveTranslation(wordDir, word, translation string) error { + outputFile := filepath.Join(wordDir, "translation.txt") + content := fmt.Sprintf("%s = %s\n", word, translation) + + if err := os.WriteFile(outputFile, []byte(content), 0644); err != nil { + return fmt.Errorf("failed to write translation file: %w", err) + } + + return nil +} + +// TranslationCache stores translations in memory for batch operations +type TranslationCache struct { + translations map[string]string +} + +// NewTranslationCache creates a new translation cache +func NewTranslationCache() *TranslationCache { + return &TranslationCache{ + translations: make(map[string]string), + } +} + +// Add adds a translation to the cache +func (tc *TranslationCache) Add(word, translation string) { + tc.translations[word] = translation +} + +// Get retrieves a translation from the cache +func (tc *TranslationCache) Get(word string) (string, bool) { + translation, ok := tc.translations[word] + return translation, ok +} + +// GetAll returns all cached translations +func (tc *TranslationCache) GetAll() map[string]string { + // Return a copy to prevent external modification + result := make(map[string]string) + for k, v := range tc.translations { + result[k] = v + } + return result +} |
