summaryrefslogtreecommitdiff
path: root/internal
diff options
context:
space:
mode:
authorPaul Buetow <paul@buetow.org>2025-07-19 22:27:13 +0300
committerPaul Buetow <paul@buetow.org>2025-07-19 22:27:13 +0300
commit44cf5eee8fba096496f0704cec44fd436ecb5c2e (patch)
tree5596a40ebaeb3e30f2bb210b321a573110f1a997 /internal
parentd663a312ff5f45513a23adf9eb9ffb29377bbad9 (diff)
feat: update default voice to alloy with speed 0.98
- Changed default voice from 'nova' to 'alloy' - Changed default speed from 0.9 to 0.98 for better clarity - GUI now uses default voice/speed for first generation - Regeneration still uses random voice/speed for variety πŸ€– Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
Diffstat (limited to 'internal')
-rw-r--r--internal/audio/provider.go4
-rw-r--r--internal/gui/generator.go40
2 files changed, 32 insertions, 12 deletions
diff --git a/internal/audio/provider.go b/internal/audio/provider.go
index fd47ef4..53b08c1 100644
--- a/internal/audio/provider.go
+++ b/internal/audio/provider.go
@@ -42,8 +42,8 @@ func DefaultProviderConfig() *Config {
OutputDir: "./",
OutputFormat: "mp3",
OpenAIModel: "gpt-4o-mini-tts", // New model with voice instructions support
- OpenAIVoice: "nova",
- OpenAISpeed: 0.9, // Slightly slower for clarity (note: may be ignored by gpt-4o-mini-tts)
+ OpenAIVoice: "alloy",
+ OpenAISpeed: 0.98, // Default speed for clarity
OpenAIInstruction: "You are speaking Bulgarian language (Π±ΡŠΠ»Π³Π°Ρ€ΡΠΊΠΈ Π΅Π·ΠΈΠΊ). Pronounce the Bulgarian text with authentic Bulgarian phonetics, not Russian. Speak slowly and clearly for language learners.",
EnableCache: true,
CacheDir: "./.audio_cache",
diff --git a/internal/gui/generator.go b/internal/gui/generator.go
index 92ead40..08785ed 100644
--- a/internal/gui/generator.go
+++ b/internal/gui/generator.go
@@ -84,17 +84,37 @@ func (a *Application) translateEnglishToBulgarian(word string) (string, error) {
// generateAudio generates audio for a word
func (a *Application) generateAudio(word string) (string, error) {
- // Get available voices
- allVoices := []string{"alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer", "verse"}
+ // Check if this is a regeneration by looking for existing audio file
+ wordDir := a.findCardDirectory(word)
+ isRegeneration := false
+ if wordDir != "" {
+ audioFile := filepath.Join(wordDir, fmt.Sprintf("audio.%s", a.config.AudioFormat))
+ if _, err := os.Stat(audioFile); err == nil {
+ isRegeneration = true
+ }
+ }
- // Select a random voice
- rand.Seed(time.Now().UnixNano())
- voice := allVoices[rand.Intn(len(allVoices))]
+ // For regeneration, use random voice and speed; otherwise use defaults
+ var voice string
+ var speed float64
- // Generate random speed between 0.90 and 1.00
- speed := 0.90 + rand.Float64()*0.10
+ if isRegeneration {
+ // Get available voices
+ allVoices := []string{"alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer", "verse"}
+
+ // Select a random voice
+ rand.Seed(time.Now().UnixNano())
+ voice = allVoices[rand.Intn(len(allVoices))]
+
+ // Generate random speed between 0.90 and 1.00
+ speed = 0.90 + rand.Float64()*0.10
+ } else {
+ // Use defaults for first generation
+ voice = "alloy"
+ speed = 0.98
+ }
- // Update audio config with random voice and speed
+ // Update audio config with selected voice and speed
a.audioConfig.OpenAIVoice = voice
a.audioConfig.OpenAISpeed = speed
@@ -104,8 +124,8 @@ func (a *Application) generateAudio(word string) (string, error) {
return "", err
}
- // Find existing card directory or create new one
- wordDir := a.findCardDirectory(word)
+ // Find existing card directory or create new one again after provider creation
+ wordDir = a.findCardDirectory(word)
if wordDir == "" {
// No existing directory, create new one with card ID
cardID := internal.GenerateCardID(word)