From 44cf5eee8fba096496f0704cec44fd436ecb5c2e Mon Sep 17 00:00:00 2001 From: Paul Buetow Date: Sat, 19 Jul 2025 22:27:13 +0300 Subject: feat: update default voice to alloy with speed 0.98 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Changed default voice from 'nova' to 'alloy' - Changed default speed from 0.9 to 0.98 for better clarity - GUI now uses default voice/speed for first generation - Regeneration still uses random voice/speed for variety πŸ€– Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- internal/audio/provider.go | 4 ++-- internal/gui/generator.go | 40 ++++++++++++++++++++++++++++++---------- 2 files changed, 32 insertions(+), 12 deletions(-) (limited to 'internal') diff --git a/internal/audio/provider.go b/internal/audio/provider.go index fd47ef4..53b08c1 100644 --- a/internal/audio/provider.go +++ b/internal/audio/provider.go @@ -42,8 +42,8 @@ func DefaultProviderConfig() *Config { OutputDir: "./", OutputFormat: "mp3", OpenAIModel: "gpt-4o-mini-tts", // New model with voice instructions support - OpenAIVoice: "nova", - OpenAISpeed: 0.9, // Slightly slower for clarity (note: may be ignored by gpt-4o-mini-tts) + OpenAIVoice: "alloy", + OpenAISpeed: 0.98, // Default speed for clarity OpenAIInstruction: "You are speaking Bulgarian language (Π±ΡŠΠ»Π³Π°Ρ€ΡΠΊΠΈ Π΅Π·ΠΈΠΊ). Pronounce the Bulgarian text with authentic Bulgarian phonetics, not Russian. Speak slowly and clearly for language learners.", EnableCache: true, CacheDir: "./.audio_cache", diff --git a/internal/gui/generator.go b/internal/gui/generator.go index 92ead40..08785ed 100644 --- a/internal/gui/generator.go +++ b/internal/gui/generator.go @@ -84,17 +84,37 @@ func (a *Application) translateEnglishToBulgarian(word string) (string, error) { // generateAudio generates audio for a word func (a *Application) generateAudio(word string) (string, error) { - // Get available voices - allVoices := []string{"alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer", "verse"} + // Check if this is a regeneration by looking for existing audio file + wordDir := a.findCardDirectory(word) + isRegeneration := false + if wordDir != "" { + audioFile := filepath.Join(wordDir, fmt.Sprintf("audio.%s", a.config.AudioFormat)) + if _, err := os.Stat(audioFile); err == nil { + isRegeneration = true + } + } - // Select a random voice - rand.Seed(time.Now().UnixNano()) - voice := allVoices[rand.Intn(len(allVoices))] + // For regeneration, use random voice and speed; otherwise use defaults + var voice string + var speed float64 - // Generate random speed between 0.90 and 1.00 - speed := 0.90 + rand.Float64()*0.10 + if isRegeneration { + // Get available voices + allVoices := []string{"alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer", "verse"} + + // Select a random voice + rand.Seed(time.Now().UnixNano()) + voice = allVoices[rand.Intn(len(allVoices))] + + // Generate random speed between 0.90 and 1.00 + speed = 0.90 + rand.Float64()*0.10 + } else { + // Use defaults for first generation + voice = "alloy" + speed = 0.98 + } - // Update audio config with random voice and speed + // Update audio config with selected voice and speed a.audioConfig.OpenAIVoice = voice a.audioConfig.OpenAISpeed = speed @@ -104,8 +124,8 @@ func (a *Application) generateAudio(word string) (string, error) { return "", err } - // Find existing card directory or create new one - wordDir := a.findCardDirectory(word) + // Find existing card directory or create new one again after provider creation + wordDir = a.findCardDirectory(word) if wordDir == "" { // No existing directory, create new one with card ID cardID := internal.GenerateCardID(word) -- cgit v1.2.3