From 44cf5eee8fba096496f0704cec44fd436ecb5c2e Mon Sep 17 00:00:00 2001
From: Paul Buetow <paul@buetow.org>
Date: Sat, 19 Jul 2025 22:27:13 +0300
Subject: feat: update default voice to alloy with speed 0.98
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Changed default voice from 'nova' to 'alloy'
- Changed default speed from 0.9 to 0.98 for better clarity
- GUI now uses default voice/speed for first generation
- Regeneration still uses random voice/speed for variety

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 internal/audio/provider.go |  4 ++--
 internal/gui/generator.go  | 40 ++++++++++++++++++++++++++++++----------
 2 files changed, 32 insertions(+), 12 deletions(-)

(limited to 'internal')

diff --git a/internal/audio/provider.go b/internal/audio/provider.go
index fd47ef4..53b08c1 100644
--- a/internal/audio/provider.go
+++ b/internal/audio/provider.go
@@ -42,8 +42,8 @@ func DefaultProviderConfig() *Config {
 		OutputDir:       "./",
 		OutputFormat:    "mp3",
 		OpenAIModel:       "gpt-4o-mini-tts", // New model with voice instructions support
-		OpenAIVoice:       "nova",
-		OpenAISpeed:       0.9, // Slightly slower for clarity (note: may be ignored by gpt-4o-mini-tts)
+		OpenAIVoice:       "alloy",
+		OpenAISpeed:       0.98, // Default speed for clarity
 		OpenAIInstruction: "You are speaking Bulgarian language (български език). Pronounce the Bulgarian text with authentic Bulgarian phonetics, not Russian. Speak slowly and clearly for language learners.",
 		EnableCache:     true,
 		CacheDir:        "./.audio_cache",
diff --git a/internal/gui/generator.go b/internal/gui/generator.go
index 92ead40..08785ed 100644
--- a/internal/gui/generator.go
+++ b/internal/gui/generator.go
@@ -84,17 +84,37 @@ func (a *Application) translateEnglishToBulgarian(word string) (string, error) {
 
 // generateAudio generates audio for a word
 func (a *Application) generateAudio(word string) (string, error) {
-	// Get available voices
-	allVoices := []string{"alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer", "verse"}
+	// Check if this is a regeneration by looking for existing audio file
+	wordDir := a.findCardDirectory(word)
+	isRegeneration := false
+	if wordDir != "" {
+		audioFile := filepath.Join(wordDir, fmt.Sprintf("audio.%s", a.config.AudioFormat))
+		if _, err := os.Stat(audioFile); err == nil {
+			isRegeneration = true
+		}
+	}
 	
-	// Select a random voice
-	rand.Seed(time.Now().UnixNano())
-	voice := allVoices[rand.Intn(len(allVoices))]
+	// For regeneration, use random voice and speed; otherwise use defaults
+	var voice string
+	var speed float64
 	
-	// Generate random speed between 0.90 and 1.00
-	speed := 0.90 + rand.Float64()*0.10
+	if isRegeneration {
+		// Get available voices
+		allVoices := []string{"alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer", "verse"}
+		
+		// Select a random voice
+		rand.Seed(time.Now().UnixNano())
+		voice = allVoices[rand.Intn(len(allVoices))]
+		
+		// Generate random speed between 0.90 and 1.00
+		speed = 0.90 + rand.Float64()*0.10
+	} else {
+		// Use defaults for first generation
+		voice = "alloy"
+		speed = 0.98
+	}
 	
-	// Update audio config with random voice and speed
+	// Update audio config with selected voice and speed
 	a.audioConfig.OpenAIVoice = voice
 	a.audioConfig.OpenAISpeed = speed
 	
@@ -104,8 +124,8 @@ func (a *Application) generateAudio(word string) (string, error) {
 		return "", err
 	}
 	
-	// Find existing card directory or create new one
-	wordDir := a.findCardDirectory(word)
+	// Find existing card directory or create new one again after provider creation
+	wordDir = a.findCardDirectory(word)
 	if wordDir == "" {
 		// No existing directory, create new one with card ID
 		cardID := internal.GenerateCardID(word)
-- 
cgit v1.2.3