diff options
| author | Paul Buetow <paul@buetow.org> | 2025-07-16 20:38:22 +0300 |
|---|---|---|
| committer | Paul Buetow <paul@buetow.org> | 2025-07-16 20:38:22 +0300 |
| commit | d46669426aa6b0ece71d0d05d0b6f2966686b17a (patch) | |
| tree | 9f927c3a8bc763943764ad63e3badafe8a9a7f62 | |
| parent | e49ecfe601c924fa68671477331a860acf8a62f7 (diff) | |
feat: add custom image prompt support and keyboard shortcuts
- Add text area next to image display for custom image generation prompts
- Users can specify their own prompts or leave empty for auto-generation
- Display the used prompt in the text area after generation
- Load prompts from attribution files when navigating to existing cards
- Add keyboard shortcuts for all GUI buttons:
- G: Generate, N: New Word, I: Regenerate Image, A: Regenerate Audio
- R: Regenerate All, D: Delete, P: Play audio
- Left/Right arrows: Navigate between words
- Y/N: Confirm/cancel delete dialog
- Update UI layout with equal 50/50 split between image and prompt
- Enable text wrapping in prompt text area
- Add 25% chance to ask OpenAI for creative photo style suggestions
- Fix concurrent processing to properly use custom prompts
🤖 Generated with [Claude Code](https://claude.ai/code)
Co-Authored-By: Claude <noreply@anthropic.com>
| -rw-r--r-- | internal/audio/openai_provider.go | 64 | ||||
| -rw-r--r-- | internal/gui/app.go | 142 | ||||
| -rw-r--r-- | internal/gui/audio_player.go | 15 | ||||
| -rw-r--r-- | internal/gui/generator.go | 28 | ||||
| -rw-r--r-- | internal/gui/navigation.go | 61 | ||||
| -rw-r--r-- | internal/gui/queue.go | 31 | ||||
| -rw-r--r-- | internal/image/download.go | 77 | ||||
| -rw-r--r-- | internal/image/openai.go | 68 | ||||
| -rw-r--r-- | internal/image/search.go | 1 | ||||
| -rw-r--r-- | test_prompt.md | 49 |
10 files changed, 463 insertions, 73 deletions
diff --git a/internal/audio/openai_provider.go b/internal/audio/openai_provider.go index b72d793..0f3a0ad 100644 --- a/internal/audio/openai_provider.go +++ b/internal/audio/openai_provider.go @@ -9,7 +9,7 @@ import ( "os" "path/filepath" "strings" - + "github.com/sashabaranov/go-openai" ) @@ -26,23 +26,23 @@ func NewOpenAIProvider(config *Config) (Provider, error) { if config.OpenAIKey == "" { return nil, fmt.Errorf("OpenAI API key is required") } - + client := openai.NewClient(config.OpenAIKey) - + provider := &OpenAIProvider{ client: client, config: config, cacheDir: config.CacheDir, enableCache: config.EnableCache, } - + // Create cache directory if caching is enabled if provider.enableCache && provider.cacheDir != "" { if err := os.MkdirAll(provider.cacheDir, 0755); err != nil { return nil, fmt.Errorf("failed to create cache directory: %w", err) } } - + return provider, nil } @@ -52,7 +52,7 @@ func (p *OpenAIProvider) GenerateAudio(ctx context.Context, text string, outputF if err := ValidateBulgarianText(text); err != nil { return err } - + // Check cache first if p.enableCache { cacheFile := p.getCacheFilePath(text) @@ -61,10 +61,10 @@ func (p *OpenAIProvider) GenerateAudio(ctx context.Context, text string, outputF return p.copyFile(cacheFile, outputFile) } } - + // Preprocess text for clearer Bulgarian pronunciation processedText := p.preprocessBulgarianText(text) - + // Prepare the TTS request // OpenAI TTS will automatically detect and pronounce Bulgarian text fmt.Printf("OpenAI TTS: Using model '%s' with voice '%s' at speed %.2f\n", p.config.OpenAIModel, p.config.OpenAIVoice, p.config.OpenAISpeed) @@ -72,19 +72,19 @@ func (p *OpenAIProvider) GenerateAudio(ctx context.Context, text string, outputF fmt.Printf("OpenAI TTS Instruction: '%s'\n", p.config.OpenAIInstruction) } fmt.Printf("OpenAI TTS Input: '%s'\n", processedText) - + req := openai.CreateSpeechRequest{ Model: openai.SpeechModel(p.config.OpenAIModel), Input: processedText, Voice: openai.SpeechVoice(p.config.OpenAIVoice), Speed: p.config.OpenAISpeed, } - + // Add instructions for gpt-4o-mini-tts model if p.config.OpenAIInstruction != "" && (p.config.OpenAIModel == "gpt-4o-mini-tts" || p.config.OpenAIModel == "gpt-4o-mini-audio-preview") { req.Instructions = p.config.OpenAIInstruction } - + // Determine response format based on output file extension ext := strings.ToLower(filepath.Ext(outputFile)) switch ext { @@ -104,7 +104,7 @@ func (p *OpenAIProvider) GenerateAudio(ctx context.Context, text string, outputF outputFile += ".mp3" } } - + // Make the API call response, err := p.client.CreateSpeech(ctx, req) if err != nil { @@ -116,7 +116,7 @@ func (p *OpenAIProvider) GenerateAudio(ctx context.Context, text string, outputF return fmt.Errorf("OpenAI TTS API error: %w", err) } defer response.Close() - + // Ensure output directory exists dir := filepath.Dir(outputFile) if dir != "" && dir != "." { @@ -124,30 +124,30 @@ func (p *OpenAIProvider) GenerateAudio(ctx context.Context, text string, outputF return fmt.Errorf("failed to create output directory: %w", err) } } - + // Create output file out, err := os.Create(outputFile) if err != nil { return fmt.Errorf("failed to create output file: %w", err) } defer out.Close() - + // Copy the audio data written, err := io.Copy(out, response) if err != nil { return fmt.Errorf("failed to write audio file: %w", err) } - + if written == 0 { return fmt.Errorf("no audio data received from OpenAI") } - + // Cache the result if caching is enabled if p.enableCache { cacheFile := p.getCacheFilePath(text) _ = p.copyFile(outputFile, cacheFile) // Ignore cache errors } - + return nil } @@ -161,7 +161,7 @@ func (p *OpenAIProvider) IsAvailable() error { if p.config.OpenAIKey == "" { return fmt.Errorf("OpenAI API key not configured") } - + // We could make a test API call here, but that would use credits // For now, just check that we have a key return nil @@ -171,20 +171,20 @@ func (p *OpenAIProvider) IsAvailable() error { func (p *OpenAIProvider) preprocessBulgarianText(text string) string { // First, clean the text and remove punctuation that shouldn't be spoken cleanedText := strings.TrimSpace(text) - + // Remove common punctuation marks that shouldn't be pronounced punctuationToRemove := []string{"!", "?", ".", ",", ";", ":", "\"", "'", "(", ")", "[", "]", "{", "}", "-", "—", "–"} for _, punct := range punctuationToRemove { cleanedText = strings.ReplaceAll(cleanedText, punct, "") } - + // Trim any remaining whitespace cleanedText = strings.TrimSpace(cleanedText) - + // For single words, we add subtle punctuation to create natural pauses // This helps the TTS engine pronounce it more carefully - processedText := fmt.Sprintf("%s...", cleanedText) - + processedText := cleanedText // fmt.Sprintf("%s...", cleanedText) + return processedText } @@ -201,11 +201,11 @@ func (p *OpenAIProvider) getCacheFilePath(text string) string { h.Write([]byte(p.config.OpenAIInstruction)) } hash := hex.EncodeToString(h.Sum(nil)) - + // Use first 2 chars as subdirectory for better file system performance subdir := hash[:2] filename := hash[2:] + ".mp3" - + return filepath.Join(p.cacheDir, subdir, filename) } @@ -218,19 +218,19 @@ func (p *OpenAIProvider) copyFile(src, dst string) error { return err } } - + source, err := os.Open(src) if err != nil { return err } defer source.Close() - + destination, err := os.Create(dst) if err != nil { return err } defer destination.Close() - + _, err = io.Copy(destination, source) return err } @@ -248,7 +248,7 @@ func (p *OpenAIProvider) GetCacheStats() (fileCount int, totalSize int64, err er if !p.enableCache || p.cacheDir == "" { return 0, 0, nil } - + err = filepath.Walk(p.cacheDir, func(path string, info os.FileInfo, err error) error { if err != nil { return err @@ -259,6 +259,6 @@ func (p *OpenAIProvider) GetCacheStats() (fileCount int, totalSize int64, err er } return nil }) - + return fileCount, totalSize, err -}
\ No newline at end of file +} diff --git a/internal/gui/app.go b/internal/gui/app.go index 28c703e..3783eb9 100644 --- a/internal/gui/app.go +++ b/internal/gui/app.go @@ -35,6 +35,7 @@ type Application struct { progressBar *widget.ProgressBar statusLabel *widget.Label queueStatusLabel *widget.Label + imagePromptEntry *widget.Entry // Navigation buttons prevWordBtn *widget.Button @@ -56,6 +57,7 @@ type Application struct { savedCards []anki.Card existingWords []string // Words already in anki_cards folder currentWordIndex int + deleteConfirming bool // Track if we're in delete confirmation mode // Word processing queue queue *WordQueue @@ -155,9 +157,9 @@ func (a *Application) setupUI() { a.wordInput.SetPlaceHolder("Enter Bulgarian word...") a.wordInput.OnSubmitted = func(string) { a.onSubmit() } - a.submitButton = widget.NewButton("Generate", a.onSubmit) - a.prevWordBtn = widget.NewButton("◀ Prev", a.onPrevWord) - a.nextWordBtn = widget.NewButton("Next ▶", a.onNextWord) + a.submitButton = widget.NewButton("Generate (G)", a.onSubmit) + a.prevWordBtn = widget.NewButton("◀ Prev (←)", a.onPrevWord) + a.nextWordBtn = widget.NewButton("Next (→) ▶", a.onNextWord) inputSection := container.NewBorder( nil, nil, @@ -172,19 +174,40 @@ func (a *Application) setupUI() { a.translationText = widget.NewLabel("") a.translationText.Alignment = fyne.TextAlignCenter + // Create image prompt entry + a.imagePromptEntry = widget.NewMultiLineEntry() + a.imagePromptEntry.SetPlaceHolder("Custom image prompt (optional)...") + a.imagePromptEntry.Wrapping = fyne.TextWrapWord // Enable word wrapping + + // Create container for image and prompt with proper sizing + promptContainer := container.NewBorder( + widget.NewLabel("Image Prompt:"), + nil, + nil, + nil, + container.NewScroll(a.imagePromptEntry), + ) + + // Use a split container to give equal space to image and prompt + imageSection := container.NewHSplit( + a.imageDisplay, + promptContainer, + ) + imageSection.SetOffset(0.5) // Equal 50/50 split + displaySection := container.NewBorder( a.translationText, a.audioPlayer, nil, nil, - a.imageDisplay, + imageSection, ) // Create action buttons - a.keepButton = widget.NewButton("New Word", a.onKeepAndContinue) - a.regenerateImageBtn = widget.NewButton("Regenerate Image", a.onRegenerateImage) - a.regenerateAudioBtn = widget.NewButton("Regenerate Audio", a.onRegenerateAudio) - a.regenerateAllBtn = widget.NewButton("Regenerate All", a.onRegenerateAll) - a.deleteButton = widget.NewButton("Delete", a.onDelete) + a.keepButton = widget.NewButton("New Word (N)", a.onKeepAndContinue) + a.regenerateImageBtn = widget.NewButton("Regenerate Image (I)", a.onRegenerateImage) + a.regenerateAudioBtn = widget.NewButton("Regenerate Audio (A)", a.onRegenerateAudio) + a.regenerateAllBtn = widget.NewButton("Regenerate All (R)", a.onRegenerateAll) + a.deleteButton = widget.NewButton("Delete (D)", a.onDelete) a.deleteButton.Importance = widget.DangerImportance // Initially disable action buttons @@ -248,6 +271,9 @@ func (a *Application) setupUI() { a.queue.Stop() a.wg.Wait() }) + + // Set up keyboard shortcuts + a.setupKeyboardShortcuts() } // Run starts the GUI application @@ -268,8 +294,11 @@ func (a *Application) onSubmit() { return } - // Add word to processing queue - job := a.queue.AddWord(word) + // Get custom prompt from the UI + customPrompt := a.imagePromptEntry.Text + + // Add word to processing queue with custom prompt + job := a.queue.AddWordWithPrompt(word, customPrompt) // Clear the input field for next word a.wordInput.SetText("") @@ -323,12 +352,16 @@ func (a *Application) generateMaterials(word string) { a.audioPlayer.SetAudioFile(audioFile) }) - // Generate images + // Generate images with custom prompt if provided fyne.Do(func() { a.updateStatus("Downloading images...") a.incrementProcessing() // Image processing starts }) - images, err := a.generateImages(word) + + // Get custom prompt from UI + customPrompt := a.imagePromptEntry.Text + + images, err := a.generateImagesWithPrompt(word, customPrompt) a.decrementProcessing() // Image processing ends if err != nil { @@ -414,6 +447,9 @@ func (a *Application) onRegenerateImage() { // Clear the current image immediately a.imageDisplay.Clear() + // Get custom prompt from UI + customPrompt := a.imagePromptEntry.Text + a.incrementProcessing() // Image processing starts a.wg.Add(1) @@ -421,7 +457,7 @@ func (a *Application) onRegenerateImage() { defer a.wg.Done() defer a.decrementProcessing() // Image processing ends - images, err := a.generateImages(a.currentWord) + images, err := a.generateImagesWithPrompt(a.currentWord, customPrompt) if err != nil { fyne.Do(func() { a.showError(fmt.Errorf("Image regeneration failed: %w", err)) @@ -591,6 +627,7 @@ func (a *Application) clearUI() { a.imageDisplay.Clear() a.audioPlayer.Clear() a.translationText.SetText("") + a.imagePromptEntry.SetText("") a.setActionButtonsEnabled(false) } @@ -687,7 +724,8 @@ func (a *Application) processWordJob(job *WordJob) { a.incrementProcessing() // Image processing starts }) - imageFiles, err := a.generateImages(job.Word) + // Use the custom prompt from the job + imageFiles, err := a.generateImagesWithPrompt(job.Word, job.CustomPrompt) a.decrementProcessing() // Image processing ends if err != nil { @@ -855,3 +893,77 @@ func (a *Application) decrementProcessing() { }) } +// setupKeyboardShortcuts sets up keyboard shortcuts for the application +func (a *Application) setupKeyboardShortcuts() { + // Create a custom shortcut handler + a.window.Canvas().SetOnTypedKey(func(ev *fyne.KeyEvent) { + // Don't process shortcuts if the word input is focused + if a.window.Canvas().Focused() == a.wordInput || a.window.Canvas().Focused() == a.imagePromptEntry { + return + } + + // Don't process if we're in delete confirmation mode (handled by dialog) + if a.deleteConfirming { + return + } + + switch ev.Name { + case fyne.KeyG: // Generate + if a.submitButton.Disabled() { + return + } + a.onSubmit() + + case fyne.KeyN: // New Word + if a.keepButton.Disabled() { + return + } + a.onKeepAndContinue() + + case fyne.KeyI: // Regenerate Image + if a.regenerateImageBtn.Disabled() { + return + } + a.onRegenerateImage() + + case fyne.KeyA: // Regenerate Audio + if a.regenerateAudioBtn.Disabled() { + return + } + a.onRegenerateAudio() + + case fyne.KeyR: // Regenerate All + if a.regenerateAllBtn.Disabled() { + return + } + a.onRegenerateAll() + + case fyne.KeyD: // Delete + if a.deleteButton.Disabled() { + return + } + a.onDelete() + + case fyne.KeyLeft: // Previous word + if a.prevWordBtn.Disabled() { + return + } + a.onPrevWord() + + case fyne.KeyRight: // Next word + if a.nextWordBtn.Disabled() { + return + } + a.onNextWord() + + case fyne.KeyP: // Play audio + if a.currentAudioFile != "" { + a.audioPlayer.Play() + } + + case fyne.KeyEscape: // Cancel any operation + a.deleteConfirming = false + } + }) +} + diff --git a/internal/gui/audio_player.go b/internal/gui/audio_player.go index 161c635..2d4b2da 100644 --- a/internal/gui/audio_player.go +++ b/internal/gui/audio_player.go @@ -31,7 +31,7 @@ func NewAudioPlayer() *AudioPlayer { p := &AudioPlayer{} // Create controls - p.playButton = widget.NewButton("▶ Play", p.onPlay) + p.playButton = widget.NewButton("▶ Play (P)", p.onPlay) p.stopButton = widget.NewButton("■ Stop", p.onStop) p.statusLabel = widget.NewLabel("No audio loaded") @@ -98,7 +98,7 @@ func (p *AudioPlayer) onPlay() { } p.isPlaying = true - p.playButton.SetText("⏸ Pause") + p.playButton.SetText("⏸ Pause (P)") p.stopButton.Enable() p.statusLabel.SetText("Playing: " + filepath.Base(p.audioFile)) } @@ -111,11 +111,18 @@ func (p *AudioPlayer) onStop() { } p.isPlaying = false - p.playButton.SetText("▶ Play") + p.playButton.SetText("▶ Play (P)") p.stopButton.Disable() p.statusLabel.SetText("Stopped: " + filepath.Base(p.audioFile)) } +// Play triggers audio playback +func (p *AudioPlayer) Play() { + if !p.playButton.Disabled() { + p.onPlay() + } +} + // startPlayback starts audio playback using platform-specific commands func (p *AudioPlayer) startPlayback() error { var cmd *exec.Cmd @@ -157,7 +164,7 @@ func (p *AudioPlayer) startPlayback() error { // Playback finished normally fyne.Do(func() { p.isPlaying = false - p.playButton.SetText("▶ Play") + p.playButton.SetText("▶ Play (P)") p.stopButton.Disable() p.statusLabel.SetText("Finished: " + filepath.Base(p.audioFile)) }) diff --git a/internal/gui/generator.go b/internal/gui/generator.go index 7656bcd..9738d88 100644 --- a/internal/gui/generator.go +++ b/internal/gui/generator.go @@ -8,6 +8,7 @@ import ( "strings" "time" + "fyne.io/fyne/v2" "github.com/sashabaranov/go-openai" "codeberg.org/snonux/totalrecall/internal/audio" @@ -86,6 +87,11 @@ func (a *Application) generateAudio(word string) (string, error) { // generateImages downloads images for a word func (a *Application) generateImages(word string) ([]string, error) { + return a.generateImagesWithPrompt(word, "") +} + +// generateImagesWithPrompt downloads images for a word with optional custom prompt +func (a *Application) generateImagesWithPrompt(word string, customPrompt string) ([]string, error) { // Create image searcher based on provider var searcher image.ImageSearcher var err error @@ -135,22 +141,40 @@ func (a *Application) generateImages(word string) ([]string, error) { downloader := image.NewDownloader(searcher, downloadOpts) + // Create search options with custom prompt if provided + searchOpts := image.DefaultSearchOptions(word) + if customPrompt != "" { + searchOpts.CustomPrompt = customPrompt + } + // Download images var paths []string if a.config.ImagesPerWord == 1 { - _, path, err := downloader.DownloadBestMatch(a.ctx, word) + _, path, err := downloader.DownloadBestMatchWithOptions(a.ctx, searchOpts) if err != nil { return nil, err } paths = []string{path} } else { - paths, err = downloader.DownloadMultiple(a.ctx, word, a.config.ImagesPerWord) + paths, err = downloader.DownloadMultipleWithOptions(a.ctx, searchOpts, a.config.ImagesPerWord) if err != nil { return nil, err } } + // If using OpenAI, get the last used prompt and update the UI + if a.config.ImageProvider == "openai" { + if openaiClient, ok := searcher.(*image.OpenAIClient); ok { + usedPrompt := openaiClient.GetLastPrompt() + if usedPrompt != "" { + fyne.Do(func() { + a.imagePromptEntry.SetText(usedPrompt) + }) + } + } + } + return paths, nil } diff --git a/internal/gui/navigation.go b/internal/gui/navigation.go index f24dcc1..e59b817 100644 --- a/internal/gui/navigation.go +++ b/internal/gui/navigation.go @@ -258,6 +258,28 @@ func (a *Application) loadExistingFiles(word string) { fyne.Do(func() { a.imageDisplay.SetImages(a.currentImages) }) + + // Try to load the prompt from attribution file if using OpenAI + if a.config.ImageProvider == "openai" && len(a.currentImages) > 0 { + // Look for attribution file + baseImagePath := a.currentImages[0] + attrPath := strings.TrimSuffix(baseImagePath, filepath.Ext(baseImagePath)) + "_attribution.txt" + if data, err := os.ReadFile(attrPath); err == nil { + // Parse prompt from attribution file + content := string(data) + lines := strings.Split(content, "\n") + for i, line := range lines { + if strings.HasPrefix(line, "Prompt used:") && i+1 < len(lines) { + // The prompt is on the next line + prompt := strings.TrimSpace(lines[i+1]) + fyne.Do(func() { + a.imagePromptEntry.SetText(prompt) + }) + break + } + } + } + } } fyne.Do(func() { @@ -271,14 +293,41 @@ func (a *Application) onDelete() { return } - // Confirm deletion - dialog.ShowConfirm("Delete Word", - fmt.Sprintf("Delete all files for '%s'?", a.currentWord), - func(confirm bool) { - if confirm { + // Create custom confirmation dialog with keyboard support + message := fmt.Sprintf("Delete all files for '%s'?\n\nPress Y to confirm or N to cancel", a.currentWord) + confirmDialog := dialog.NewConfirm("Delete Word", message, func(confirm bool) { + a.deleteConfirming = false + if confirm { + a.deleteCurrentWord() + } + }, a.window) + + // Set up keyboard handler for the dialog + a.deleteConfirming = true + + // Create a custom key handler for the dialog window + oldKeyHandler := a.window.Canvas().OnTypedKey() + a.window.Canvas().SetOnTypedKey(func(ev *fyne.KeyEvent) { + if a.deleteConfirming { + switch ev.Name { + case fyne.KeyY: + confirmDialog.Hide() + a.deleteConfirming = false a.deleteCurrentWord() + // Restore original key handler + a.window.Canvas().SetOnTypedKey(oldKeyHandler) + case fyne.KeyN, fyne.KeyEscape: + confirmDialog.Hide() + a.deleteConfirming = false + // Restore original key handler + a.window.Canvas().SetOnTypedKey(oldKeyHandler) } - }, a.window) + } else if oldKeyHandler != nil { + oldKeyHandler(ev) + } + }) + + confirmDialog.Show() } // deleteCurrentWord deletes all files for the current word diff --git a/internal/gui/queue.go b/internal/gui/queue.go index 7b1c5de..aaa0c55 100644 --- a/internal/gui/queue.go +++ b/internal/gui/queue.go @@ -9,15 +9,16 @@ import ( // WordJob represents a single word processing job type WordJob struct { - ID int - Word string - Translation string - AudioFile string - ImageFiles []string - Status JobStatus - Error error - StartedAt time.Time - CompletedAt time.Time + ID int + Word string + Translation string + AudioFile string + ImageFiles []string + Status JobStatus + Error error + StartedAt time.Time + CompletedAt time.Time + CustomPrompt string // Custom prompt for image generation } // JobStatus represents the current state of a job @@ -93,11 +94,17 @@ func (q *WordQueue) SetCallbacks(onStatusUpdate func(*WordJob), onJobComplete fu // AddWord adds a word to the processing queue func (q *WordQueue) AddWord(word string) *WordJob { + return q.AddWordWithPrompt(word, "") +} + +// AddWordWithPrompt adds a word to the processing queue with a custom prompt +func (q *WordQueue) AddWordWithPrompt(word, customPrompt string) *WordJob { q.mu.Lock() job := &WordJob{ - ID: q.nextID, - Word: word, - Status: StatusQueued, + ID: q.nextID, + Word: word, + Status: StatusQueued, + CustomPrompt: customPrompt, } q.nextID++ q.results[job.ID] = job diff --git a/internal/image/download.go b/internal/image/download.go index f684260..7083a6f 100644 --- a/internal/image/download.go +++ b/internal/image/download.go @@ -241,4 +241,81 @@ func (d *Downloader) DownloadMultiple(ctx context.Context, query string, count i } return downloaded, nil +} + +// DownloadBestMatchWithOptions downloads the best matching image for given search options +func (d *Downloader) DownloadBestMatchWithOptions(ctx context.Context, opts *SearchOptions) (*SearchResult, string, error) { + // Search for images + searchOpts := *opts // Copy to avoid modifying original + searchOpts.PerPage = 5 // Get top 5 results + + results, err := d.searcher.Search(ctx, &searchOpts) + if err != nil { + return nil, "", fmt.Errorf("search failed: %w", err) + } + + if len(results) == 0 { + return nil, "", fmt.Errorf("no images found for query: %s", opts.Query) + } + + // Try to download the first available image + for i, result := range results { + // Generate filename + filename := d.generateFileName(opts.Query, &result, i) + outputPath := filepath.Join(d.options.OutputDir, filename) + + // Try to download + err := d.DownloadImage(ctx, &result, outputPath) + if err == nil { + return &result, outputPath, nil + } + + // Log error and try next + fmt.Fprintf(os.Stderr, "Warning: failed to download image %d: %v\n", i+1, err) + } + + return nil, "", fmt.Errorf("failed to download any images for query: %s", opts.Query) +} + +// DownloadMultipleWithOptions downloads multiple images for given search options +func (d *Downloader) DownloadMultipleWithOptions(ctx context.Context, opts *SearchOptions, count int) ([]string, error) { + // Search for images + searchOpts := *opts // Copy to avoid modifying original + searchOpts.PerPage = count * 2 // Get extra in case some fail + + results, err := d.searcher.Search(ctx, &searchOpts) + if err != nil { + return nil, fmt.Errorf("search failed: %w", err) + } + + if len(results) == 0 { + return nil, fmt.Errorf("no images found for query: %s", opts.Query) + } + + // Download up to 'count' images + var downloaded []string + for i, result := range results { + if len(downloaded) >= count { + break + } + + // Generate filename + filename := d.generateFileName(opts.Query, &result, i) + outputPath := filepath.Join(d.options.OutputDir, filename) + + // Try to download + err := d.DownloadImage(ctx, &result, outputPath) + if err == nil { + downloaded = append(downloaded, outputPath) + } else { + // Log error and continue + fmt.Fprintf(os.Stderr, "Warning: failed to download image %d: %v\n", i+1, err) + } + } + + if len(downloaded) == 0 { + return nil, fmt.Errorf("failed to download any images for query: %s", opts.Query) + } + + return downloaded, nil }
\ No newline at end of file diff --git a/internal/image/openai.go b/internal/image/openai.go index c4b2e9d..add1c96 100644 --- a/internal/image/openai.go +++ b/internal/image/openai.go @@ -123,8 +123,14 @@ func (c *OpenAIClient) Search(ctx context.Context, opts *SearchOptions) ([]Searc translatedWord = opts.Query } - // Create educational prompt - prompt := c.createEducationalPrompt(opts.Query, translatedWord) + // Create prompt - use custom if provided, otherwise generate educational prompt + var prompt string + if opts.CustomPrompt != "" { + prompt = opts.CustomPrompt + fmt.Printf("Using custom prompt: %s\n", prompt) + } else { + prompt = c.createEducationalPrompt(opts.Query, translatedWord) + } // Store the prompt for attribution c.lastPrompt = prompt @@ -243,8 +249,28 @@ func (c *OpenAIClient) Name() string { return "openai" } +// GetLastPrompt returns the last prompt used for image generation +func (c *OpenAIClient) GetLastPrompt() string { + return c.lastPrompt +} + // createEducationalPrompt generates a prompt optimized for language learning func (c *OpenAIClient) createEducationalPrompt(bulgarianWord, englishTranslation string) string { + // 25% chance to ask OpenAI for a creative style + if rand.Float32() < 0.25 { + if creativeStyle := c.getCreativeStyleFromOpenAI(context.Background(), englishTranslation); creativeStyle != "" { + fmt.Printf(" Using OpenAI-suggested style: %s\n", creativeStyle) + return fmt.Sprintf( + "Generate a %s of: %s. "+ + "This is for the Bulgarian word '%s' which means %s. "+ + "The image should be educational and suitable for language learning flashcards. "+ + "Requirements: single main subject, plain background, clear and recognizable. "+ + "IMPORTANT: No text whatsoever. Do not include any words, letters, typography, labels, captions, or writing of any kind. Image only, without any text elements.", + creativeStyle, englishTranslation, bulgarianWord, englishTranslation, + ) + } + } + // Define different art styles for variety (42 styles total) styles := []string{ // Original styles (1-10) @@ -435,4 +461,42 @@ func (c *OpenAIClient) getSizeHeight() int { default: return 512 } +} + +// getCreativeStyleFromOpenAI asks OpenAI for a creative photo style suggestion +func (c *OpenAIClient) getCreativeStyleFromOpenAI(ctx context.Context, subject string) string { + fmt.Printf(" Asking OpenAI for creative style suggestion for '%s'...\n", subject) + + req := openai.ChatCompletionRequest{ + Model: openai.GPT4oMini, + Messages: []openai.ChatCompletionMessage{ + { + Role: openai.ChatMessageRoleSystem, + Content: "You are a creative art director. Suggest unique, interesting photo/art styles for educational flashcard images. Be creative and varied. Respond with ONLY the style description, nothing else. Keep it concise (max 15 words).", + }, + { + Role: openai.ChatMessageRoleUser, + Content: fmt.Sprintf("Suggest a creative visual style for an educational image of: %s", subject), + }, + }, + Temperature: 0.9, // Higher temperature for more creativity + MaxTokens: 30, + } + + resp, err := c.client.CreateChatCompletion(ctx, req) + if err != nil { + fmt.Printf(" Failed to get creative style: %v\n", err) + return "" + } + + if len(resp.Choices) == 0 || resp.Choices[0].Message.Content == "" { + return "" + } + + style := strings.TrimSpace(resp.Choices[0].Message.Content) + // Remove any trailing punctuation + style = strings.TrimSuffix(style, ".") + style = strings.TrimSuffix(style, "!") + + return style }
\ No newline at end of file diff --git a/internal/image/search.go b/internal/image/search.go index acc9dc8..800a114 100644 --- a/internal/image/search.go +++ b/internal/image/search.go @@ -26,6 +26,7 @@ type SearchOptions struct { Page int // Page number (1-based) ImageType string // Type: "photo", "illustration", "vector", "all" Orientation string // Orientation: "horizontal", "vertical", "all" + CustomPrompt string // Custom prompt for AI image generation (OpenAI) } // DefaultSearchOptions returns sensible defaults for Bulgarian word searches diff --git a/test_prompt.md b/test_prompt.md new file mode 100644 index 0000000..0734d9a --- /dev/null +++ b/test_prompt.md @@ -0,0 +1,49 @@ +# Custom Image Prompt Feature Test + +## Summary +Successfully implemented the custom image prompt feature for the TotalRecall GUI application. The feature allows users to: + +1. **Enter custom prompts**: A text area is displayed next to the image where users can specify their own prompt for image generation +2. **Auto-populate prompts**: When left empty, the app automatically generates an educational prompt +3. **Display used prompts**: After image generation, the actual prompt used is displayed in the text area +4. **Preserve prompts on navigation**: When navigating to existing cards, the prompt is loaded from attribution files + +## Implementation Details + +### Files Modified: +1. **internal/gui/app.go**: + - Added `imagePromptEntry` field to Application struct + - Updated UI layout to include the prompt text area + - Modified image generation calls to use custom prompts + +2. **internal/gui/generator.go**: + - Split `generateImages` into two functions + - Added `generateImagesWithPrompt` to handle custom prompts + - Updated to display used prompts in the UI after generation + +3. **internal/image/search.go**: + - Added `CustomPrompt` field to `SearchOptions` struct + +4. **internal/image/openai.go**: + - Modified to use custom prompts when provided + - Added `GetLastPrompt()` method to retrieve the used prompt + +5. **internal/image/download.go**: + - Added `DownloadBestMatchWithOptions` and `DownloadMultipleWithOptions` methods + +6. **internal/gui/navigation.go**: + - Added logic to load prompts from attribution files when navigating + +### How It Works: +1. User can enter a custom prompt in the text area next to the image +2. When generating/regenerating images, the custom prompt is used if provided +3. If no custom prompt is entered, the app generates an educational prompt automatically +4. The actual prompt used is displayed in the text area after generation +5. Prompts are saved in attribution files and loaded when navigating to existing cards + +## Testing +To test the feature: +1. Run the GUI: `./totalrecall gui` +2. Enter a Bulgarian word +3. Try generating with and without custom prompts +4. Navigate between cards to verify prompt loading
\ No newline at end of file |
