diff options
| author | Paul Buetow <paul@buetow.org> | 2025-07-19 22:27:13 +0300 |
|---|---|---|
| committer | Paul Buetow <paul@buetow.org> | 2025-07-19 22:27:13 +0300 |
| commit | 44cf5eee8fba096496f0704cec44fd436ecb5c2e (patch) | |
| tree | 5596a40ebaeb3e30f2bb210b321a573110f1a997 | |
| parent | d663a312ff5f45513a23adf9eb9ffb29377bbad9 (diff) | |
feat: update default voice to alloy with speed 0.98
- Changed default voice from 'nova' to 'alloy'
- Changed default speed from 0.9 to 0.98 for better clarity
- GUI now uses default voice/speed for first generation
- Regeneration still uses random voice/speed for variety
π€ Generated with [Claude Code](https://claude.ai/code)
Co-Authored-By: Claude <noreply@anthropic.com>
| -rw-r--r-- | internal/audio/provider.go | 4 | ||||
| -rw-r--r-- | internal/gui/generator.go | 40 |
2 files changed, 32 insertions, 12 deletions
diff --git a/internal/audio/provider.go b/internal/audio/provider.go index fd47ef4..53b08c1 100644 --- a/internal/audio/provider.go +++ b/internal/audio/provider.go @@ -42,8 +42,8 @@ func DefaultProviderConfig() *Config { OutputDir: "./", OutputFormat: "mp3", OpenAIModel: "gpt-4o-mini-tts", // New model with voice instructions support - OpenAIVoice: "nova", - OpenAISpeed: 0.9, // Slightly slower for clarity (note: may be ignored by gpt-4o-mini-tts) + OpenAIVoice: "alloy", + OpenAISpeed: 0.98, // Default speed for clarity OpenAIInstruction: "You are speaking Bulgarian language (Π±ΡΠ»Π³Π°ΡΡΠΊΠΈ Π΅Π·ΠΈΠΊ). Pronounce the Bulgarian text with authentic Bulgarian phonetics, not Russian. Speak slowly and clearly for language learners.", EnableCache: true, CacheDir: "./.audio_cache", diff --git a/internal/gui/generator.go b/internal/gui/generator.go index 92ead40..08785ed 100644 --- a/internal/gui/generator.go +++ b/internal/gui/generator.go @@ -84,17 +84,37 @@ func (a *Application) translateEnglishToBulgarian(word string) (string, error) { // generateAudio generates audio for a word func (a *Application) generateAudio(word string) (string, error) { - // Get available voices - allVoices := []string{"alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer", "verse"} + // Check if this is a regeneration by looking for existing audio file + wordDir := a.findCardDirectory(word) + isRegeneration := false + if wordDir != "" { + audioFile := filepath.Join(wordDir, fmt.Sprintf("audio.%s", a.config.AudioFormat)) + if _, err := os.Stat(audioFile); err == nil { + isRegeneration = true + } + } - // Select a random voice - rand.Seed(time.Now().UnixNano()) - voice := allVoices[rand.Intn(len(allVoices))] + // For regeneration, use random voice and speed; otherwise use defaults + var voice string + var speed float64 - // Generate random speed between 0.90 and 1.00 - speed := 0.90 + rand.Float64()*0.10 + if isRegeneration { + // Get available voices + allVoices := []string{"alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer", "verse"} + + // Select a random voice + rand.Seed(time.Now().UnixNano()) + voice = allVoices[rand.Intn(len(allVoices))] + + // Generate random speed between 0.90 and 1.00 + speed = 0.90 + rand.Float64()*0.10 + } else { + // Use defaults for first generation + voice = "alloy" + speed = 0.98 + } - // Update audio config with random voice and speed + // Update audio config with selected voice and speed a.audioConfig.OpenAIVoice = voice a.audioConfig.OpenAISpeed = speed @@ -104,8 +124,8 @@ func (a *Application) generateAudio(word string) (string, error) { return "", err } - // Find existing card directory or create new one - wordDir := a.findCardDirectory(word) + // Find existing card directory or create new one again after provider creation + wordDir = a.findCardDirectory(word) if wordDir == "" { // No existing directory, create new one with card ID cardID := internal.GenerateCardID(word) |
