diff options
| author | Paul Buetow <paul@buetow.org> | 2025-07-17 15:35:03 +0300 |
|---|---|---|
| committer | Paul Buetow <paul@buetow.org> | 2025-07-17 15:35:03 +0300 |
| commit | f6477f82dc79d17e9ee3193c81dca2db884a7119 (patch) | |
| tree | 3424d2342ba976791dc3bb8b108dfdd290018175 | |
| parent | 094447b570c5c5a7c751e0e60279cfa08e945755 (diff) | |
add phoenetic alphabet
| -rw-r--r-- | internal/gui/app.go | 184 | ||||
| -rw-r--r-- | internal/gui/generator.go | 13 | ||||
| -rw-r--r-- | internal/gui/navigation.go | 23 | ||||
| -rw-r--r-- | internal/image/openai.go | 21 | ||||
| -rw-r--r-- | internal/image/search.go | 1 |
5 files changed, 214 insertions, 28 deletions
diff --git a/internal/gui/app.go b/internal/gui/app.go index a586d73..45eec63 100644 --- a/internal/gui/app.go +++ b/internal/gui/app.go @@ -7,6 +7,7 @@ import ( "path/filepath" "strings" "sync" + "time" "fyne.io/fyne/v2" "fyne.io/fyne/v2/app" @@ -15,6 +16,7 @@ import ( "fyne.io/fyne/v2/layout" "fyne.io/fyne/v2/storage" "fyne.io/fyne/v2/widget" + "github.com/sashabaranov/go-openai" "codeberg.org/snonux/totalrecall/internal" "codeberg.org/snonux/totalrecall/internal/anki" @@ -33,10 +35,10 @@ type Application struct { imageDisplay *ImageDisplay audioPlayer *AudioPlayer translationEntry *widget.Entry - progressBar *widget.ProgressBar statusLabel *widget.Label queueStatusLabel *widget.Label imagePromptEntry *widget.Entry + phoneticDisplay *widget.Label // Navigation buttons prevWordBtn *widget.Button @@ -189,6 +191,10 @@ func (a *Application) setupUI() { a.imagePromptEntry = widget.NewMultiLineEntry() a.imagePromptEntry.SetPlaceHolder("Custom image prompt (optional)...") a.imagePromptEntry.Wrapping = fyne.TextWrapWord // Enable word wrapping + a.imagePromptEntry.OnChanged = func(text string) { + // Save the image prompt immediately when changed + a.saveImagePrompt() + } // Create container for image and prompt with proper sizing promptContainer := container.NewBorder( @@ -206,9 +212,33 @@ func (a *Application) setupUI() { ) imageSection.SetOffset(0.5) // Equal 50/50 split - displaySection := container.NewBorder( + // Create phonetic display section + a.phoneticDisplay = widget.NewLabel("Phonetic information will appear here...") + a.phoneticDisplay.Wrapping = fyne.TextWrapWord + + // Set minimum size for phonetic display (reduced to ~5 lines of text) + // Assuming ~20 pixels per line with standard font + phoneticScroll := container.NewScroll(a.phoneticDisplay) + phoneticScroll.SetMinSize(fyne.NewSize(0, 100)) + + phoneticContainer := container.NewBorder( + widget.NewLabel("Phonetic Information:"), nil, + nil, + nil, + phoneticScroll, + ) + + // Create a container for audio player and phonetic info + audioPhoneticSection := container.NewVSplit( + phoneticContainer, a.audioPlayer, + ) + audioPhoneticSection.SetOffset(0.5) // Equal split between phonetic and audio + + displaySection := container.NewBorder( + nil, + audioPhoneticSection, nil, nil, imageSection, ) @@ -235,8 +265,6 @@ func (a *Application) setupUI() { ) // Create status section - a.progressBar = widget.NewProgressBar() - a.progressBar.Hide() a.statusLabel = widget.NewLabel("Ready") a.queueStatusLabel = widget.NewLabel("Queue: Empty") a.queueStatusLabel.TextStyle = fyne.TextStyle{Italic: true} @@ -244,7 +272,6 @@ func (a *Application) setupUI() { statusSection := container.NewBorder( nil, nil, nil, nil, container.NewVBox( - a.progressBar, a.statusLabel, widget.NewSeparator(), a.queueStatusLabel, @@ -434,7 +461,13 @@ func (a *Application) generateMaterials(word string) { // Get custom prompt from UI customPrompt := a.imagePromptEntry.Text - imageFile, err := a.generateImagesWithPrompt(word, customPrompt) + // Pass the current translation to avoid re-translating + translation := a.currentTranslation + if translation == "" { + // Use the text from translationEntry if currentTranslation is not set + translation = strings.TrimSpace(a.translationEntry.Text) + } + imageFile, err := a.generateImagesWithPrompt(word, customPrompt, translation) a.decrementProcessing() // Image processing ends if err != nil { @@ -477,8 +510,10 @@ func (a *Application) onKeepAndContinue() { count := len(a.savedCards) a.mu.Unlock() - // Save translation file for future navigation + // Save translation, prompt, and phonetic files for future navigation a.saveTranslation() + a.saveImagePrompt() + a.savePhoneticInfo() // Rescan existing words to include the new one a.scanExistingWords() @@ -528,7 +563,13 @@ func (a *Application) onRegenerateImage() { defer a.wg.Done() defer a.decrementProcessing() // Image processing ends - imageFile, err := a.generateImagesWithPrompt(a.currentWord, customPrompt) + // Use the current translation to avoid re-translating + translation := a.currentTranslation + if translation == "" { + // Use the text from translationEntry if currentTranslation is not set + translation = strings.TrimSpace(a.translationEntry.Text) + } + imageFile, err := a.generateImagesWithPrompt(a.currentWord, customPrompt, translation) if err != nil { fyne.Do(func() { a.showError(fmt.Errorf("Image regeneration failed: %w", err)) @@ -687,13 +728,11 @@ func (a *Application) setActionButtonsEnabled(enabled bool) { } func (a *Application) showProgress(message string) { - a.progressBar.Show() - a.progressBar.SetValue(0.1) // Start at 10% a.statusLabel.SetText(message) } func (a *Application) hideProgress() { - a.progressBar.Hide() + // Progress bar removed - nothing to hide } func (a *Application) updateStatus(message string) { @@ -710,6 +749,7 @@ func (a *Application) clearUI() { a.audioPlayer.Clear() // Don't clear the word input or translation entry - they should stay populated a.imagePromptEntry.SetText("") + a.phoneticDisplay.SetText("Phonetic information will appear here...") a.setActionButtonsEnabled(false) } @@ -780,10 +820,39 @@ func (a *Application) processWordJob(job *WordJob) { } a.mu.Unlock() + // Start fetching phonetic information concurrently + phoneticDone := make(chan struct{}) + go func() { + defer close(phoneticDone) + + fyne.Do(func() { + a.incrementProcessing() // Phonetic processing starts + }) + + phoneticInfo, err := a.getPhoneticInfo(job.Word) + if err != nil { + // Log error but don't fail the job - phonetic info is optional + fmt.Printf("Warning: Failed to get phonetic info: %v\n", err) + phoneticInfo = "Failed to fetch phonetic information" + } + + // Update UI with phonetic info if this is still the current job + a.mu.Lock() + if a.currentJobID == job.ID { + fyne.Do(func() { + a.phoneticDisplay.SetText(phoneticInfo) + // Save phonetic info to file + a.savePhoneticInfo() + }) + } + a.mu.Unlock() + + a.decrementProcessing() // Phonetic processing ends + }() + // Generate audio fyne.Do(func() { a.updateStatus(fmt.Sprintf("Generating audio for '%s'...", job.Word)) - a.progressBar.SetValue(0.4) a.incrementProcessing() // Audio processing starts }) @@ -811,12 +880,12 @@ func (a *Application) processWordJob(job *WordJob) { // Generate images fyne.Do(func() { a.updateStatus(fmt.Sprintf("Downloading images for '%s'...", job.Word)) - a.progressBar.SetValue(0.7) a.incrementProcessing() // Image processing starts }) // Use the custom prompt from the job - imageFile, err := a.generateImagesWithPrompt(job.Word, job.CustomPrompt) + // The translation variable already contains the correct translation (either from job or translated) + imageFile, err := a.generateImagesWithPrompt(job.Word, job.CustomPrompt, translation) a.decrementProcessing() // Image processing ends if err != nil { @@ -825,9 +894,11 @@ func (a *Application) processWordJob(job *WordJob) { return } + // Wait for phonetic fetching to complete before finalizing + <-phoneticDone + // Mark job as completed fyne.Do(func() { - a.progressBar.SetValue(0.95) a.updateStatus(fmt.Sprintf("Finalizing '%s'...", job.Word)) }) @@ -1074,3 +1145,86 @@ func (a *Application) saveTranslation() { } } +// saveImagePrompt saves the current image prompt to a file +func (a *Application) saveImagePrompt() { + if a.currentWord != "" && a.imagePromptEntry.Text != "" { + filename := sanitizeFilename(a.currentWord) + promptFile := filepath.Join(a.config.OutputDir, fmt.Sprintf("%s_prompt.txt", filename)) + os.WriteFile(promptFile, []byte(a.imagePromptEntry.Text), 0644) + } +} + +// savePhoneticInfo saves the phonetic information to a file +func (a *Application) savePhoneticInfo() { + phoneticText := a.phoneticDisplay.Text + if a.currentWord != "" && phoneticText != "" && + phoneticText != "Failed to fetch phonetic information" && + phoneticText != "Phonetic information will appear here..." { + filename := sanitizeFilename(a.currentWord) + phoneticFile := filepath.Join(a.config.OutputDir, fmt.Sprintf("%s_phonetic.txt", filename)) + os.WriteFile(phoneticFile, []byte(phoneticText), 0644) + } +} + +// loadPhoneticInfo loads phonetic information from a file if it exists +func (a *Application) loadPhoneticInfo(word string) { + filename := sanitizeFilename(word) + phoneticFile := filepath.Join(a.config.OutputDir, fmt.Sprintf("%s_phonetic.txt", filename)) + + if data, err := os.ReadFile(phoneticFile); err == nil { + a.phoneticDisplay.SetText(string(data)) + } +} + +// getPhoneticInfo fetches phonetic information for a Bulgarian word using OpenAI GPT-4o +func (a *Application) getPhoneticInfo(word string) (string, error) { + if a.config.OpenAIKey == "" { + return "", fmt.Errorf("OpenAI API key not configured") + } + + client := openai.NewClient(a.config.OpenAIKey) + + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + req := openai.ChatCompletionRequest{ + Model: openai.GPT4o, + Messages: []openai.ChatCompletionMessage{ + { + Role: openai.ChatMessageRoleSystem, + Content: "You are a Bulgarian language expert helping language learners understand pronunciation. Provide detailed phonetic information using the International Phonetic Alphabet (IPA). For each IPA symbol used, give concrete examples of how it sounds using familiar English words or sounds when possible.", + }, + { + Role: openai.ChatMessageRoleUser, + Content: fmt.Sprintf(`For the Bulgarian word '%s': +1. Provide the complete IPA transcription +2. Break down EACH phonetic symbol used in the transcription +3. For EVERY symbol, explain how it's pronounced with examples: + - If similar to an English sound, give English word examples + - If not in English, describe tongue/mouth position or compare to similar sounds + - Include stress marks and explain which syllable is stressed + +Example format: +Word: [IPA transcription] +• /p/ - like 'p' in English 'pot' +• /a/ - like 'a' in 'father' +• /ˈ/ - stress mark (following syllable is stressed) +etc.`, word), + }, + }, + Temperature: 0.3, + MaxTokens: 800, + } + + resp, err := client.CreateChatCompletion(ctx, req) + if err != nil { + return "", fmt.Errorf("failed to get phonetic info: %w", err) + } + + if len(resp.Choices) == 0 { + return "", fmt.Errorf("no response from OpenAI") + } + + return resp.Choices[0].Message.Content, nil +} + diff --git a/internal/gui/generator.go b/internal/gui/generator.go index 0d30f79..6d51359 100644 --- a/internal/gui/generator.go +++ b/internal/gui/generator.go @@ -120,11 +120,11 @@ func (a *Application) generateAudio(word string) (string, error) { // generateImages downloads images for a word func (a *Application) generateImages(word string) (string, error) { - return a.generateImagesWithPrompt(word, "") + return a.generateImagesWithPrompt(word, "", "") } -// generateImagesWithPrompt downloads a single image for a word with optional custom prompt -func (a *Application) generateImagesWithPrompt(word string, customPrompt string) (string, error) { +// generateImagesWithPrompt downloads a single image for a word with optional custom prompt and translation +func (a *Application) generateImagesWithPrompt(word string, customPrompt string, translation string) (string, error) { // Create image searcher based on provider var searcher image.ImageSearcher var err error @@ -155,17 +155,20 @@ func (a *Application) generateImagesWithPrompt(word string, customPrompt string) OutputDir: a.config.OutputDir, OverwriteExisting: true, CreateDir: true, - FileNamePattern: "{word}_{index}", + FileNamePattern: "{word}", MaxSizeBytes: 5 * 1024 * 1024, // 5MB } downloader := image.NewDownloader(searcher, downloadOpts) - // Create search options with custom prompt if provided + // Create search options with custom prompt and translation if provided searchOpts := image.DefaultSearchOptions(word) if customPrompt != "" { searchOpts.CustomPrompt = customPrompt } + if translation != "" { + searchOpts.Translation = translation + } // Download single image _, path, err := downloader.DownloadBestMatchWithOptions(a.ctx, searchOpts) diff --git a/internal/gui/navigation.go b/internal/gui/navigation.go index 59c656e..62a326e 100644 --- a/internal/gui/navigation.go +++ b/internal/gui/navigation.go @@ -187,6 +187,8 @@ func (a *Application) loadWordByIndex(index int) { if job.ImageFile != "" { a.imageDisplay.SetImages([]string{job.ImageFile}) } + // Load phonetic info from disk if it exists + a.loadPhoneticInfo(word) a.updateStatus(fmt.Sprintf("Loaded from queue: %s", word)) }) @@ -225,6 +227,24 @@ func (a *Application) loadExistingFiles(word string) { } } + // Load image prompt file + promptFile := filepath.Join(a.config.OutputDir, fmt.Sprintf("%s_prompt.txt", sanitized)) + if data, err := os.ReadFile(promptFile); err == nil { + prompt := strings.TrimSpace(string(data)) + fyne.Do(func() { + a.imagePromptEntry.SetText(prompt) + }) + } + + // Load phonetic information + phoneticFile := filepath.Join(a.config.OutputDir, fmt.Sprintf("%s_phonetic.txt", sanitized)) + if data, err := os.ReadFile(phoneticFile); err == nil { + phoneticInfo := string(data) + fyne.Do(func() { + a.phoneticDisplay.SetText(phoneticInfo) + }) + } + // Load audio file audioFile := filepath.Join(a.config.OutputDir, fmt.Sprintf("%s.%s", sanitized, a.config.AudioFormat)) if _, err := os.Stat(audioFile); err == nil { @@ -240,8 +260,6 @@ func (a *Application) loadExistingFiles(word string) { patterns := []string{ fmt.Sprintf("%s.jpg", sanitized), fmt.Sprintf("%s.png", sanitized), - fmt.Sprintf("%s_0.jpg", sanitized), - fmt.Sprintf("%s_0.png", sanitized), fmt.Sprintf("%s_1.jpg", sanitized), fmt.Sprintf("%s_1.png", sanitized), } @@ -345,6 +363,7 @@ func (a *Application) deleteCurrentWord() { fmt.Sprintf("%s_*.jpg", sanitized), fmt.Sprintf("%s_*.png", sanitized), fmt.Sprintf("%s_translation.txt", sanitized), + fmt.Sprintf("%s_prompt.txt", sanitized), fmt.Sprintf("%s_attribution.txt", sanitized), fmt.Sprintf("%s_*_attribution.txt", sanitized), } diff --git a/internal/image/openai.go b/internal/image/openai.go index ac66a45..c00ae5a 100644 --- a/internal/image/openai.go +++ b/internal/image/openai.go @@ -115,12 +115,21 @@ func (c *OpenAIClient) Search(ctx context.Context, opts *SearchOptions) ([]Searc } } - // Translate Bulgarian word to English for better results - translatedWord, err := c.translateBulgarianToEnglish(ctx, opts.Query) - if err != nil { - // If translation fails, fall back to using the original word - fmt.Printf("Translation failed: %v, using original word\n", err) - translatedWord = opts.Query + // Use provided translation if available, otherwise translate Bulgarian word to English + var translatedWord string + if opts.Translation != "" { + // Use the translation that was already provided (from UI or user input) + translatedWord = opts.Translation + fmt.Printf("Using provided translation: %s -> %s\n", opts.Query, translatedWord) + } else { + // Translate Bulgarian word to English for better results + var err error + translatedWord, err = c.translateBulgarianToEnglish(ctx, opts.Query) + if err != nil { + // If translation fails, fall back to using the original word + fmt.Printf("Translation failed: %v, using original word\n", err) + translatedWord = opts.Query + } } // Create prompt - use custom if provided, otherwise generate educational prompt diff --git a/internal/image/search.go b/internal/image/search.go index 800a114..540e0a1 100644 --- a/internal/image/search.go +++ b/internal/image/search.go @@ -20,6 +20,7 @@ type SearchResult struct { // SearchOptions configures the image search type SearchOptions struct { Query string // Search query (Bulgarian word) + Translation string // English translation (if already available) Language string // Language code (default: "bg") SafeSearch bool // Enable safe search filtering PerPage int // Number of results per page |
