diff options
| author | Paul Buetow <paul@buetow.org> | 2025-07-18 23:18:45 +0300 |
|---|---|---|
| committer | Paul Buetow <paul@buetow.org> | 2025-07-18 23:18:45 +0300 |
| commit | 8779de2c97e445acf82e8422c899fdefa4649bda (patch) | |
| tree | b24d270a4e655c66059034cfdf0713e3bf3d32f5 /internal/gui | |
| parent | f3c1b568ac28e211d218f0b33ccb85cf782ce248 (diff) | |
feat: multiple improvements to GUI and codebase
- Add random voice speed between 0.90-1.00 for more natural audio
- Display voice and speed info in GUI audio player
- Implement automatic retry loading for missing files (checks every 2 seconds)
- Fix voice/speed info persistence during audio playback
- Remove image caching functionality for cleaner codebase
- Rename prompt.txt to image_prompt.txt for clarity
- Fix GUI to recognize newly added cards during runtime (rescan on navigation)
- Update README to reflect removed image cache
These changes improve the user experience by making the audio more natural,
providing better feedback about audio generation parameters, and ensuring
the GUI stays synchronized with externally added cards.
π€ Generated with [Claude Code](https://claude.ai/code)
Co-Authored-By: Claude <noreply@anthropic.com>
Diffstat (limited to 'internal/gui')
| -rw-r--r-- | internal/gui/app.go | 15 | ||||
| -rw-r--r-- | internal/gui/audio_player.go | 45 | ||||
| -rw-r--r-- | internal/gui/generator.go | 23 | ||||
| -rw-r--r-- | internal/gui/navigation.go | 163 |
4 files changed, 225 insertions, 21 deletions
diff --git a/internal/gui/app.go b/internal/gui/app.go index 28abb7b..da66287 100644 --- a/internal/gui/app.go +++ b/internal/gui/app.go @@ -65,6 +65,7 @@ type Application struct { currentWordIndex int deleteConfirming bool // Track if we're in delete confirmation mode wordChangeTimer *time.Timer // Timer for detecting word changes + fileCheckTicker *time.Ticker // Ticker for checking missing files // Word processing queue queue *WordQueue @@ -88,7 +89,6 @@ type Config struct { OutputDir string AudioFormat string ImageProvider string - EnableCache bool OpenAIKey string } @@ -98,7 +98,6 @@ func DefaultConfig() *Config { OutputDir: "./anki_cards", AudioFormat: "mp3", ImageProvider: "openai", - EnableCache: true, } } @@ -135,7 +134,7 @@ func New(config *Config) *Application { OpenAIVoice: "nova", OpenAISpeed: 0.9, OpenAIInstruction: "You are speaking Bulgarian language (Π±ΡΠ»Π³Π°ΡΡΠΊΠΈ Π΅Π·ΠΈΠΊ). Pronounce the Bulgarian text with authentic Bulgarian phonetics, not Russian. Speak slowly and clearly for language learners.", - EnableCache: config.EnableCache, + EnableCache: true, CacheDir: "./.audio_cache", } @@ -371,6 +370,10 @@ func (a *Application) setupUI() { // Add the tooltip layer to enable tooltips a.window.SetContent(fynetooltip.AddWindowToolTipLayer(content, a.window.Canvas())) a.window.SetOnClosed(func() { + // Stop file check ticker + if a.fileCheckTicker != nil { + a.fileCheckTicker.Stop() + } a.cancel() a.queue.Stop() a.wg.Wait() @@ -1080,6 +1083,12 @@ func (a *Application) showError(err error) { } func (a *Application) clearUI() { + // Stop file check ticker when clearing UI + if a.fileCheckTicker != nil { + a.fileCheckTicker.Stop() + a.fileCheckTicker = nil + } + a.imageDisplay.Clear() a.audioPlayer.Clear() // Don't clear the word input or translation entry - they should stay populated diff --git a/internal/gui/audio_player.go b/internal/gui/audio_player.go index 8df4461..38494c5 100644 --- a/internal/gui/audio_player.go +++ b/internal/gui/audio_player.go @@ -2,9 +2,11 @@ package gui import ( "fmt" + "os" "os/exec" "path/filepath" "runtime" + "strings" "fyne.io/fyne/v2" "fyne.io/fyne/v2/container" @@ -23,9 +25,10 @@ type AudioPlayer struct { stopButton *ttwidget.Button statusLabel *widget.Label - audioFile string - isPlaying bool - playCmd *exec.Cmd + audioFile string + isPlaying bool + playCmd *exec.Cmd + voiceInfo string // Stores voice and speed info } // NewAudioPlayer creates a new audio player widget @@ -71,7 +74,34 @@ func (p *AudioPlayer) SetAudioFile(audioFile string) { if audioFile != "" { p.playButton.Enable() - p.statusLabel.SetText(fmt.Sprintf("Audio: %s", filepath.Base(audioFile))) + + // Try to load voice metadata + wordDir := filepath.Dir(audioFile) + metadataFile := filepath.Join(wordDir, "audio_metadata.txt") + voice := "" + speed := "" + + if data, err := os.ReadFile(metadataFile); err == nil { + lines := strings.Split(string(data), "\n") + for _, line := range lines { + if strings.HasPrefix(line, "voice=") { + voice = strings.TrimPrefix(line, "voice=") + } else if strings.HasPrefix(line, "speed=") { + speed = strings.TrimPrefix(line, "speed=") + } + } + } + + // Store voice info + if voice != "" && speed != "" { + p.voiceInfo = fmt.Sprintf(" (voice: %s, speed: %s)", voice, speed) + } else { + p.voiceInfo = "" + } + + // Format status text with voice and speed info + statusText := fmt.Sprintf("Audio: %s%s", filepath.Base(audioFile), p.voiceInfo) + p.statusLabel.SetText(statusText) } else { p.Clear() } @@ -82,6 +112,7 @@ func (p *AudioPlayer) Clear() { p.onStop() // Stop any playing audio p.audioFile = "" p.isPlaying = false + p.voiceInfo = "" p.playButton.Disable() p.stopButton.Disable() p.statusLabel.SetText("No audio loaded") @@ -108,7 +139,7 @@ func (p *AudioPlayer) onPlay() { p.isPlaying = true p.playButton.SetIcon(theme.MediaPauseIcon()) p.stopButton.Enable() - p.statusLabel.SetText("Playing: " + filepath.Base(p.audioFile)) + p.statusLabel.SetText(fmt.Sprintf("Playing: %s%s", filepath.Base(p.audioFile), p.voiceInfo)) } // onStop handles stop button click @@ -121,7 +152,7 @@ func (p *AudioPlayer) onStop() { p.isPlaying = false p.playButton.SetIcon(theme.MediaPlayIcon()) p.stopButton.Disable() - p.statusLabel.SetText("Stopped: " + filepath.Base(p.audioFile)) + p.statusLabel.SetText(fmt.Sprintf("Stopped: %s%s", filepath.Base(p.audioFile), p.voiceInfo)) } // Play triggers audio playback @@ -174,7 +205,7 @@ func (p *AudioPlayer) startPlayback() error { p.isPlaying = false p.playButton.SetIcon(theme.MediaPlayIcon()) p.stopButton.Disable() - p.statusLabel.SetText("Finished: " + filepath.Base(p.audioFile)) + p.statusLabel.SetText(fmt.Sprintf("Finished: %s%s", filepath.Base(p.audioFile), p.voiceInfo)) }) } }() diff --git a/internal/gui/generator.go b/internal/gui/generator.go index ca9d854..92ead40 100644 --- a/internal/gui/generator.go +++ b/internal/gui/generator.go @@ -91,8 +91,12 @@ func (a *Application) generateAudio(word string) (string, error) { rand.Seed(time.Now().UnixNano()) voice := allVoices[rand.Intn(len(allVoices))] - // Update audio config with random voice + // Generate random speed between 0.90 and 1.00 + speed := 0.90 + rand.Float64()*0.10 + + // Update audio config with random voice and speed a.audioConfig.OpenAIVoice = voice + a.audioConfig.OpenAISpeed = speed // Create audio provider provider, err := audio.NewProvider(a.audioConfig) @@ -127,11 +131,18 @@ func (a *Application) generateAudio(word string) (string, error) { } // Save audio attribution - if err := a.saveAudioAttribution(word, outputFile, voice); err != nil { + if err := a.saveAudioAttribution(word, outputFile, voice, speed); err != nil { // Non-fatal error, just log it fmt.Printf("Warning: Failed to save audio attribution: %v\n", err) } + // Save voice metadata for GUI display + metadataFile := filepath.Join(wordDir, "audio_metadata.txt") + metadata := fmt.Sprintf("voice=%s\nspeed=%.2f\n", voice, speed) + if err := os.WriteFile(metadataFile, []byte(metadata), 0644); err != nil { + fmt.Printf("Warning: Failed to save audio metadata: %v\n", err) + } + return outputFile, nil } @@ -154,8 +165,6 @@ func (a *Application) generateImagesWithPrompt(word string, customPrompt string, Size: "512x512", // Half of 1024x1024 Quality: "standard", Style: "natural", - CacheDir: "./.image_cache", - EnableCache: a.config.EnableCache, } searcher = image.NewOpenAIClient(openaiConfig) @@ -216,7 +225,7 @@ func (a *Application) generateImagesWithPrompt(word string, customPrompt string, usedPrompt := openaiClient.GetLastPrompt() if usedPrompt != "" { // Save the prompt to disk immediately for this word - promptFile := filepath.Join(wordDir, "prompt.txt") + promptFile := filepath.Join(wordDir, "image_prompt.txt") os.WriteFile(promptFile, []byte(usedPrompt), 0644) // Only update UI if this word is still the current word @@ -237,12 +246,12 @@ func (a *Application) generateImagesWithPrompt(word string, customPrompt string, } // saveAudioAttribution saves attribution info for generated audio -func (a *Application) saveAudioAttribution(word, audioFile, voice string) error { +func (a *Application) saveAudioAttribution(word, audioFile, voice string, speed float64) error { attribution := fmt.Sprintf("Audio generated by OpenAI TTS\n\n") attribution += fmt.Sprintf("Bulgarian word: %s\n", word) attribution += fmt.Sprintf("Model: %s\n", a.audioConfig.OpenAIModel) attribution += fmt.Sprintf("Voice: %s\n", voice) - attribution += fmt.Sprintf("Speed: %.2f\n", a.audioConfig.OpenAISpeed) + attribution += fmt.Sprintf("Speed: %.2f\n", speed) if a.audioConfig.OpenAIInstruction != "" { attribution += fmt.Sprintf("\nVoice instructions:\n%s\n", a.audioConfig.OpenAIInstruction) diff --git a/internal/gui/navigation.go b/internal/gui/navigation.go index 5a41d7f..a73e4d0 100644 --- a/internal/gui/navigation.go +++ b/internal/gui/navigation.go @@ -197,12 +197,32 @@ func (a *Application) getAllAvailableWords() []string { // onPrevWord loads the previous word func (a *Application) onPrevWord() { + // Store current word before rescanning + currentWord := a.currentWord + + // Rescan to pick up any new cards added externally + a.scanExistingWords() + allWords := a.getAllAvailableWords() if len(allWords) == 0 { return } - newIndex := a.currentWordIndex - 1 + // Find current word's new index after rescan + currentIndex := -1 + for i, word := range allWords { + if word == currentWord { + currentIndex = i + break + } + } + + // If current word not found, use the stored index + if currentIndex == -1 { + currentIndex = a.currentWordIndex + } + + newIndex := currentIndex - 1 // Wrap around to the end if at beginning if newIndex < 0 { newIndex = len(allWords) - 1 @@ -213,12 +233,32 @@ func (a *Application) onPrevWord() { // onNextWord loads the next word func (a *Application) onNextWord() { + // Store current word before rescanning + currentWord := a.currentWord + + // Rescan to pick up any new cards added externally + a.scanExistingWords() + allWords := a.getAllAvailableWords() if len(allWords) == 0 { return } - newIndex := a.currentWordIndex + 1 + // Find current word's new index after rescan + currentIndex := -1 + for i, word := range allWords { + if word == currentWord { + currentIndex = i + break + } + } + + // If current word not found, use the stored index + if currentIndex == -1 { + currentIndex = a.currentWordIndex + } + + newIndex := currentIndex + 1 // Wrap around to the beginning if at end if newIndex >= len(allWords) { newIndex = 0 @@ -229,6 +269,12 @@ func (a *Application) onNextWord() { // loadWordByIndex loads a word by its index in the combined word list func (a *Application) loadWordByIndex(index int) { + // Stop any existing file check ticker + if a.fileCheckTicker != nil { + a.fileCheckTicker.Stop() + a.fileCheckTicker = nil + } + allWords := a.getAllAvailableWords() if index < 0 || index >= len(allWords) { return @@ -269,7 +315,7 @@ func (a *Application) loadWordByIndex(index int) { // Load image prompt from disk if it exists if wordDir := a.findCardDirectory(word); wordDir != "" { - promptFile := filepath.Join(wordDir, "prompt.txt") + promptFile := filepath.Join(wordDir, "image_prompt.txt") if data, err := os.ReadFile(promptFile); err == nil { prompt := strings.TrimSpace(string(data)) a.imagePromptEntry.SetText(prompt) @@ -297,6 +343,9 @@ func (a *Application) loadWordByIndex(index int) { if hasContent { a.setActionButtonsEnabled(true) } + + // Start ticker to check for missing files + a.startFileCheckTicker() } // loadExistingFiles loads existing files for a word @@ -326,7 +375,7 @@ func (a *Application) loadExistingFiles(word string) { } // Load image prompt file - promptFile := filepath.Join(wordDir, "prompt.txt") + promptFile := filepath.Join(wordDir, "image_prompt.txt") if data, err := os.ReadFile(promptFile); err == nil { prompt := strings.TrimSpace(string(data)) fmt.Printf("Loaded prompt from file: %s\n", promptFile) @@ -404,6 +453,112 @@ func (a *Application) loadExistingFiles(word string) { }) } +// startFileCheckTicker starts a ticker to check for missing files +func (a *Application) startFileCheckTicker() { + // Create ticker that checks every 2 seconds + a.fileCheckTicker = time.NewTicker(2 * time.Second) + + go func() { + for range a.fileCheckTicker.C { + // Only check files for the current word + a.mu.Lock() + currentWord := a.currentWord + a.mu.Unlock() + + if currentWord != "" { + a.checkForMissingFiles(currentWord) + } + } + }() +} + +// checkForMissingFiles checks for missing files and attempts to load them +func (a *Application) checkForMissingFiles(word string) { + // Find the card directory for this word + wordDir := a.findCardDirectory(word) + if wordDir == "" { + return + } + + // Check for missing audio file + if a.currentAudioFile == "" { + audioFile := filepath.Join(wordDir, fmt.Sprintf("audio.%s", a.config.AudioFormat)) + if _, err := os.Stat(audioFile); err == nil { + a.currentAudioFile = audioFile + fyne.Do(func() { + a.audioPlayer.SetAudioFile(audioFile) + a.updateStatus(fmt.Sprintf("Found audio file for %s", word)) + }) + } + } + + // Check for missing image file + if a.currentImage == "" { + patterns := []string{"image.jpg", "image.png"} + for _, pattern := range patterns { + imagePath := filepath.Join(wordDir, pattern) + if _, err := os.Stat(imagePath); err == nil { + a.currentImage = imagePath + fyne.Do(func() { + a.imageDisplay.SetImages([]string{imagePath}) + a.updateStatus(fmt.Sprintf("Found image file for %s", word)) + }) + break + } + } + } + + // Check for missing translation + if a.currentTranslation == "" { + translationFile := filepath.Join(wordDir, "translation.txt") + if data, err := os.ReadFile(translationFile); err == nil { + content := string(data) + parts := strings.Split(content, "=") + if len(parts) >= 2 { + a.currentTranslation = strings.TrimSpace(parts[1]) + fyne.Do(func() { + a.translationEntry.SetText(a.currentTranslation) + a.updateStatus(fmt.Sprintf("Found translation for %s", word)) + }) + } + } + } + + // Check for missing prompt + currentPrompt := a.imagePromptEntry.Text + if currentPrompt == "" { + promptFile := filepath.Join(wordDir, "image_prompt.txt") + if data, err := os.ReadFile(promptFile); err == nil { + prompt := strings.TrimSpace(string(data)) + fyne.Do(func() { + a.imagePromptEntry.SetText(prompt) + a.updateStatus(fmt.Sprintf("Found prompt for %s", word)) + }) + } + } + + // Check for missing phonetic info + currentPhonetic := a.phoneticDisplay.Text + if currentPhonetic == "" { + phoneticFile := filepath.Join(wordDir, "phonetic.txt") + if data, err := os.ReadFile(phoneticFile); err == nil { + phoneticInfo := string(data) + fyne.Do(func() { + a.phoneticDisplay.SetText(phoneticInfo) + a.updateStatus(fmt.Sprintf("Found phonetic info for %s", word)) + }) + } + } + + // Update action buttons if we now have content + hasContent := a.currentAudioFile != "" || a.currentImage != "" || a.currentTranslation != "" + if hasContent { + fyne.Do(func() { + a.setActionButtonsEnabled(true) + }) + } +} + // onDelete moves the current word's files to trash bin func (a *Application) onDelete() { if a.currentWord == "" { |
