summaryrefslogtreecommitdiff
path: root/internal
diff options
context:
space:
mode:
authorPaul Buetow <paul@buetow.org>2025-07-16 20:38:22 +0300
committerPaul Buetow <paul@buetow.org>2025-07-16 20:38:22 +0300
commitd46669426aa6b0ece71d0d05d0b6f2966686b17a (patch)
tree9f927c3a8bc763943764ad63e3badafe8a9a7f62 /internal
parente49ecfe601c924fa68671477331a860acf8a62f7 (diff)
feat: add custom image prompt support and keyboard shortcuts
- Add text area next to image display for custom image generation prompts - Users can specify their own prompts or leave empty for auto-generation - Display the used prompt in the text area after generation - Load prompts from attribution files when navigating to existing cards - Add keyboard shortcuts for all GUI buttons: - G: Generate, N: New Word, I: Regenerate Image, A: Regenerate Audio - R: Regenerate All, D: Delete, P: Play audio - Left/Right arrows: Navigate between words - Y/N: Confirm/cancel delete dialog - Update UI layout with equal 50/50 split between image and prompt - Enable text wrapping in prompt text area - Add 25% chance to ask OpenAI for creative photo style suggestions - Fix concurrent processing to properly use custom prompts 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
Diffstat (limited to 'internal')
-rw-r--r--internal/audio/openai_provider.go64
-rw-r--r--internal/gui/app.go142
-rw-r--r--internal/gui/audio_player.go15
-rw-r--r--internal/gui/generator.go28
-rw-r--r--internal/gui/navigation.go61
-rw-r--r--internal/gui/queue.go31
-rw-r--r--internal/image/download.go77
-rw-r--r--internal/image/openai.go68
-rw-r--r--internal/image/search.go1
9 files changed, 414 insertions, 73 deletions
diff --git a/internal/audio/openai_provider.go b/internal/audio/openai_provider.go
index b72d793..0f3a0ad 100644
--- a/internal/audio/openai_provider.go
+++ b/internal/audio/openai_provider.go
@@ -9,7 +9,7 @@ import (
"os"
"path/filepath"
"strings"
-
+
"github.com/sashabaranov/go-openai"
)
@@ -26,23 +26,23 @@ func NewOpenAIProvider(config *Config) (Provider, error) {
if config.OpenAIKey == "" {
return nil, fmt.Errorf("OpenAI API key is required")
}
-
+
client := openai.NewClient(config.OpenAIKey)
-
+
provider := &OpenAIProvider{
client: client,
config: config,
cacheDir: config.CacheDir,
enableCache: config.EnableCache,
}
-
+
// Create cache directory if caching is enabled
if provider.enableCache && provider.cacheDir != "" {
if err := os.MkdirAll(provider.cacheDir, 0755); err != nil {
return nil, fmt.Errorf("failed to create cache directory: %w", err)
}
}
-
+
return provider, nil
}
@@ -52,7 +52,7 @@ func (p *OpenAIProvider) GenerateAudio(ctx context.Context, text string, outputF
if err := ValidateBulgarianText(text); err != nil {
return err
}
-
+
// Check cache first
if p.enableCache {
cacheFile := p.getCacheFilePath(text)
@@ -61,10 +61,10 @@ func (p *OpenAIProvider) GenerateAudio(ctx context.Context, text string, outputF
return p.copyFile(cacheFile, outputFile)
}
}
-
+
// Preprocess text for clearer Bulgarian pronunciation
processedText := p.preprocessBulgarianText(text)
-
+
// Prepare the TTS request
// OpenAI TTS will automatically detect and pronounce Bulgarian text
fmt.Printf("OpenAI TTS: Using model '%s' with voice '%s' at speed %.2f\n", p.config.OpenAIModel, p.config.OpenAIVoice, p.config.OpenAISpeed)
@@ -72,19 +72,19 @@ func (p *OpenAIProvider) GenerateAudio(ctx context.Context, text string, outputF
fmt.Printf("OpenAI TTS Instruction: '%s'\n", p.config.OpenAIInstruction)
}
fmt.Printf("OpenAI TTS Input: '%s'\n", processedText)
-
+
req := openai.CreateSpeechRequest{
Model: openai.SpeechModel(p.config.OpenAIModel),
Input: processedText,
Voice: openai.SpeechVoice(p.config.OpenAIVoice),
Speed: p.config.OpenAISpeed,
}
-
+
// Add instructions for gpt-4o-mini-tts model
if p.config.OpenAIInstruction != "" && (p.config.OpenAIModel == "gpt-4o-mini-tts" || p.config.OpenAIModel == "gpt-4o-mini-audio-preview") {
req.Instructions = p.config.OpenAIInstruction
}
-
+
// Determine response format based on output file extension
ext := strings.ToLower(filepath.Ext(outputFile))
switch ext {
@@ -104,7 +104,7 @@ func (p *OpenAIProvider) GenerateAudio(ctx context.Context, text string, outputF
outputFile += ".mp3"
}
}
-
+
// Make the API call
response, err := p.client.CreateSpeech(ctx, req)
if err != nil {
@@ -116,7 +116,7 @@ func (p *OpenAIProvider) GenerateAudio(ctx context.Context, text string, outputF
return fmt.Errorf("OpenAI TTS API error: %w", err)
}
defer response.Close()
-
+
// Ensure output directory exists
dir := filepath.Dir(outputFile)
if dir != "" && dir != "." {
@@ -124,30 +124,30 @@ func (p *OpenAIProvider) GenerateAudio(ctx context.Context, text string, outputF
return fmt.Errorf("failed to create output directory: %w", err)
}
}
-
+
// Create output file
out, err := os.Create(outputFile)
if err != nil {
return fmt.Errorf("failed to create output file: %w", err)
}
defer out.Close()
-
+
// Copy the audio data
written, err := io.Copy(out, response)
if err != nil {
return fmt.Errorf("failed to write audio file: %w", err)
}
-
+
if written == 0 {
return fmt.Errorf("no audio data received from OpenAI")
}
-
+
// Cache the result if caching is enabled
if p.enableCache {
cacheFile := p.getCacheFilePath(text)
_ = p.copyFile(outputFile, cacheFile) // Ignore cache errors
}
-
+
return nil
}
@@ -161,7 +161,7 @@ func (p *OpenAIProvider) IsAvailable() error {
if p.config.OpenAIKey == "" {
return fmt.Errorf("OpenAI API key not configured")
}
-
+
// We could make a test API call here, but that would use credits
// For now, just check that we have a key
return nil
@@ -171,20 +171,20 @@ func (p *OpenAIProvider) IsAvailable() error {
func (p *OpenAIProvider) preprocessBulgarianText(text string) string {
// First, clean the text and remove punctuation that shouldn't be spoken
cleanedText := strings.TrimSpace(text)
-
+
// Remove common punctuation marks that shouldn't be pronounced
punctuationToRemove := []string{"!", "?", ".", ",", ";", ":", "\"", "'", "(", ")", "[", "]", "{", "}", "-", "—", "–"}
for _, punct := range punctuationToRemove {
cleanedText = strings.ReplaceAll(cleanedText, punct, "")
}
-
+
// Trim any remaining whitespace
cleanedText = strings.TrimSpace(cleanedText)
-
+
// For single words, we add subtle punctuation to create natural pauses
// This helps the TTS engine pronounce it more carefully
- processedText := fmt.Sprintf("%s...", cleanedText)
-
+ processedText := cleanedText // fmt.Sprintf("%s...", cleanedText)
+
return processedText
}
@@ -201,11 +201,11 @@ func (p *OpenAIProvider) getCacheFilePath(text string) string {
h.Write([]byte(p.config.OpenAIInstruction))
}
hash := hex.EncodeToString(h.Sum(nil))
-
+
// Use first 2 chars as subdirectory for better file system performance
subdir := hash[:2]
filename := hash[2:] + ".mp3"
-
+
return filepath.Join(p.cacheDir, subdir, filename)
}
@@ -218,19 +218,19 @@ func (p *OpenAIProvider) copyFile(src, dst string) error {
return err
}
}
-
+
source, err := os.Open(src)
if err != nil {
return err
}
defer source.Close()
-
+
destination, err := os.Create(dst)
if err != nil {
return err
}
defer destination.Close()
-
+
_, err = io.Copy(destination, source)
return err
}
@@ -248,7 +248,7 @@ func (p *OpenAIProvider) GetCacheStats() (fileCount int, totalSize int64, err er
if !p.enableCache || p.cacheDir == "" {
return 0, 0, nil
}
-
+
err = filepath.Walk(p.cacheDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
@@ -259,6 +259,6 @@ func (p *OpenAIProvider) GetCacheStats() (fileCount int, totalSize int64, err er
}
return nil
})
-
+
return fileCount, totalSize, err
-} \ No newline at end of file
+}
diff --git a/internal/gui/app.go b/internal/gui/app.go
index 28c703e..3783eb9 100644
--- a/internal/gui/app.go
+++ b/internal/gui/app.go
@@ -35,6 +35,7 @@ type Application struct {
progressBar *widget.ProgressBar
statusLabel *widget.Label
queueStatusLabel *widget.Label
+ imagePromptEntry *widget.Entry
// Navigation buttons
prevWordBtn *widget.Button
@@ -56,6 +57,7 @@ type Application struct {
savedCards []anki.Card
existingWords []string // Words already in anki_cards folder
currentWordIndex int
+ deleteConfirming bool // Track if we're in delete confirmation mode
// Word processing queue
queue *WordQueue
@@ -155,9 +157,9 @@ func (a *Application) setupUI() {
a.wordInput.SetPlaceHolder("Enter Bulgarian word...")
a.wordInput.OnSubmitted = func(string) { a.onSubmit() }
- a.submitButton = widget.NewButton("Generate", a.onSubmit)
- a.prevWordBtn = widget.NewButton("◀ Prev", a.onPrevWord)
- a.nextWordBtn = widget.NewButton("Next ▶", a.onNextWord)
+ a.submitButton = widget.NewButton("Generate (G)", a.onSubmit)
+ a.prevWordBtn = widget.NewButton("◀ Prev (←)", a.onPrevWord)
+ a.nextWordBtn = widget.NewButton("Next (→) ▶", a.onNextWord)
inputSection := container.NewBorder(
nil, nil,
@@ -172,19 +174,40 @@ func (a *Application) setupUI() {
a.translationText = widget.NewLabel("")
a.translationText.Alignment = fyne.TextAlignCenter
+ // Create image prompt entry
+ a.imagePromptEntry = widget.NewMultiLineEntry()
+ a.imagePromptEntry.SetPlaceHolder("Custom image prompt (optional)...")
+ a.imagePromptEntry.Wrapping = fyne.TextWrapWord // Enable word wrapping
+
+ // Create container for image and prompt with proper sizing
+ promptContainer := container.NewBorder(
+ widget.NewLabel("Image Prompt:"),
+ nil,
+ nil,
+ nil,
+ container.NewScroll(a.imagePromptEntry),
+ )
+
+ // Use a split container to give equal space to image and prompt
+ imageSection := container.NewHSplit(
+ a.imageDisplay,
+ promptContainer,
+ )
+ imageSection.SetOffset(0.5) // Equal 50/50 split
+
displaySection := container.NewBorder(
a.translationText,
a.audioPlayer,
nil, nil,
- a.imageDisplay,
+ imageSection,
)
// Create action buttons
- a.keepButton = widget.NewButton("New Word", a.onKeepAndContinue)
- a.regenerateImageBtn = widget.NewButton("Regenerate Image", a.onRegenerateImage)
- a.regenerateAudioBtn = widget.NewButton("Regenerate Audio", a.onRegenerateAudio)
- a.regenerateAllBtn = widget.NewButton("Regenerate All", a.onRegenerateAll)
- a.deleteButton = widget.NewButton("Delete", a.onDelete)
+ a.keepButton = widget.NewButton("New Word (N)", a.onKeepAndContinue)
+ a.regenerateImageBtn = widget.NewButton("Regenerate Image (I)", a.onRegenerateImage)
+ a.regenerateAudioBtn = widget.NewButton("Regenerate Audio (A)", a.onRegenerateAudio)
+ a.regenerateAllBtn = widget.NewButton("Regenerate All (R)", a.onRegenerateAll)
+ a.deleteButton = widget.NewButton("Delete (D)", a.onDelete)
a.deleteButton.Importance = widget.DangerImportance
// Initially disable action buttons
@@ -248,6 +271,9 @@ func (a *Application) setupUI() {
a.queue.Stop()
a.wg.Wait()
})
+
+ // Set up keyboard shortcuts
+ a.setupKeyboardShortcuts()
}
// Run starts the GUI application
@@ -268,8 +294,11 @@ func (a *Application) onSubmit() {
return
}
- // Add word to processing queue
- job := a.queue.AddWord(word)
+ // Get custom prompt from the UI
+ customPrompt := a.imagePromptEntry.Text
+
+ // Add word to processing queue with custom prompt
+ job := a.queue.AddWordWithPrompt(word, customPrompt)
// Clear the input field for next word
a.wordInput.SetText("")
@@ -323,12 +352,16 @@ func (a *Application) generateMaterials(word string) {
a.audioPlayer.SetAudioFile(audioFile)
})
- // Generate images
+ // Generate images with custom prompt if provided
fyne.Do(func() {
a.updateStatus("Downloading images...")
a.incrementProcessing() // Image processing starts
})
- images, err := a.generateImages(word)
+
+ // Get custom prompt from UI
+ customPrompt := a.imagePromptEntry.Text
+
+ images, err := a.generateImagesWithPrompt(word, customPrompt)
a.decrementProcessing() // Image processing ends
if err != nil {
@@ -414,6 +447,9 @@ func (a *Application) onRegenerateImage() {
// Clear the current image immediately
a.imageDisplay.Clear()
+ // Get custom prompt from UI
+ customPrompt := a.imagePromptEntry.Text
+
a.incrementProcessing() // Image processing starts
a.wg.Add(1)
@@ -421,7 +457,7 @@ func (a *Application) onRegenerateImage() {
defer a.wg.Done()
defer a.decrementProcessing() // Image processing ends
- images, err := a.generateImages(a.currentWord)
+ images, err := a.generateImagesWithPrompt(a.currentWord, customPrompt)
if err != nil {
fyne.Do(func() {
a.showError(fmt.Errorf("Image regeneration failed: %w", err))
@@ -591,6 +627,7 @@ func (a *Application) clearUI() {
a.imageDisplay.Clear()
a.audioPlayer.Clear()
a.translationText.SetText("")
+ a.imagePromptEntry.SetText("")
a.setActionButtonsEnabled(false)
}
@@ -687,7 +724,8 @@ func (a *Application) processWordJob(job *WordJob) {
a.incrementProcessing() // Image processing starts
})
- imageFiles, err := a.generateImages(job.Word)
+ // Use the custom prompt from the job
+ imageFiles, err := a.generateImagesWithPrompt(job.Word, job.CustomPrompt)
a.decrementProcessing() // Image processing ends
if err != nil {
@@ -855,3 +893,77 @@ func (a *Application) decrementProcessing() {
})
}
+// setupKeyboardShortcuts sets up keyboard shortcuts for the application
+func (a *Application) setupKeyboardShortcuts() {
+ // Create a custom shortcut handler
+ a.window.Canvas().SetOnTypedKey(func(ev *fyne.KeyEvent) {
+ // Don't process shortcuts if the word input is focused
+ if a.window.Canvas().Focused() == a.wordInput || a.window.Canvas().Focused() == a.imagePromptEntry {
+ return
+ }
+
+ // Don't process if we're in delete confirmation mode (handled by dialog)
+ if a.deleteConfirming {
+ return
+ }
+
+ switch ev.Name {
+ case fyne.KeyG: // Generate
+ if a.submitButton.Disabled() {
+ return
+ }
+ a.onSubmit()
+
+ case fyne.KeyN: // New Word
+ if a.keepButton.Disabled() {
+ return
+ }
+ a.onKeepAndContinue()
+
+ case fyne.KeyI: // Regenerate Image
+ if a.regenerateImageBtn.Disabled() {
+ return
+ }
+ a.onRegenerateImage()
+
+ case fyne.KeyA: // Regenerate Audio
+ if a.regenerateAudioBtn.Disabled() {
+ return
+ }
+ a.onRegenerateAudio()
+
+ case fyne.KeyR: // Regenerate All
+ if a.regenerateAllBtn.Disabled() {
+ return
+ }
+ a.onRegenerateAll()
+
+ case fyne.KeyD: // Delete
+ if a.deleteButton.Disabled() {
+ return
+ }
+ a.onDelete()
+
+ case fyne.KeyLeft: // Previous word
+ if a.prevWordBtn.Disabled() {
+ return
+ }
+ a.onPrevWord()
+
+ case fyne.KeyRight: // Next word
+ if a.nextWordBtn.Disabled() {
+ return
+ }
+ a.onNextWord()
+
+ case fyne.KeyP: // Play audio
+ if a.currentAudioFile != "" {
+ a.audioPlayer.Play()
+ }
+
+ case fyne.KeyEscape: // Cancel any operation
+ a.deleteConfirming = false
+ }
+ })
+}
+
diff --git a/internal/gui/audio_player.go b/internal/gui/audio_player.go
index 161c635..2d4b2da 100644
--- a/internal/gui/audio_player.go
+++ b/internal/gui/audio_player.go
@@ -31,7 +31,7 @@ func NewAudioPlayer() *AudioPlayer {
p := &AudioPlayer{}
// Create controls
- p.playButton = widget.NewButton("▶ Play", p.onPlay)
+ p.playButton = widget.NewButton("▶ Play (P)", p.onPlay)
p.stopButton = widget.NewButton("■ Stop", p.onStop)
p.statusLabel = widget.NewLabel("No audio loaded")
@@ -98,7 +98,7 @@ func (p *AudioPlayer) onPlay() {
}
p.isPlaying = true
- p.playButton.SetText("⏸ Pause")
+ p.playButton.SetText("⏸ Pause (P)")
p.stopButton.Enable()
p.statusLabel.SetText("Playing: " + filepath.Base(p.audioFile))
}
@@ -111,11 +111,18 @@ func (p *AudioPlayer) onStop() {
}
p.isPlaying = false
- p.playButton.SetText("▶ Play")
+ p.playButton.SetText("▶ Play (P)")
p.stopButton.Disable()
p.statusLabel.SetText("Stopped: " + filepath.Base(p.audioFile))
}
+// Play triggers audio playback
+func (p *AudioPlayer) Play() {
+ if !p.playButton.Disabled() {
+ p.onPlay()
+ }
+}
+
// startPlayback starts audio playback using platform-specific commands
func (p *AudioPlayer) startPlayback() error {
var cmd *exec.Cmd
@@ -157,7 +164,7 @@ func (p *AudioPlayer) startPlayback() error {
// Playback finished normally
fyne.Do(func() {
p.isPlaying = false
- p.playButton.SetText("▶ Play")
+ p.playButton.SetText("▶ Play (P)")
p.stopButton.Disable()
p.statusLabel.SetText("Finished: " + filepath.Base(p.audioFile))
})
diff --git a/internal/gui/generator.go b/internal/gui/generator.go
index 7656bcd..9738d88 100644
--- a/internal/gui/generator.go
+++ b/internal/gui/generator.go
@@ -8,6 +8,7 @@ import (
"strings"
"time"
+ "fyne.io/fyne/v2"
"github.com/sashabaranov/go-openai"
"codeberg.org/snonux/totalrecall/internal/audio"
@@ -86,6 +87,11 @@ func (a *Application) generateAudio(word string) (string, error) {
// generateImages downloads images for a word
func (a *Application) generateImages(word string) ([]string, error) {
+ return a.generateImagesWithPrompt(word, "")
+}
+
+// generateImagesWithPrompt downloads images for a word with optional custom prompt
+func (a *Application) generateImagesWithPrompt(word string, customPrompt string) ([]string, error) {
// Create image searcher based on provider
var searcher image.ImageSearcher
var err error
@@ -135,22 +141,40 @@ func (a *Application) generateImages(word string) ([]string, error) {
downloader := image.NewDownloader(searcher, downloadOpts)
+ // Create search options with custom prompt if provided
+ searchOpts := image.DefaultSearchOptions(word)
+ if customPrompt != "" {
+ searchOpts.CustomPrompt = customPrompt
+ }
+
// Download images
var paths []string
if a.config.ImagesPerWord == 1 {
- _, path, err := downloader.DownloadBestMatch(a.ctx, word)
+ _, path, err := downloader.DownloadBestMatchWithOptions(a.ctx, searchOpts)
if err != nil {
return nil, err
}
paths = []string{path}
} else {
- paths, err = downloader.DownloadMultiple(a.ctx, word, a.config.ImagesPerWord)
+ paths, err = downloader.DownloadMultipleWithOptions(a.ctx, searchOpts, a.config.ImagesPerWord)
if err != nil {
return nil, err
}
}
+ // If using OpenAI, get the last used prompt and update the UI
+ if a.config.ImageProvider == "openai" {
+ if openaiClient, ok := searcher.(*image.OpenAIClient); ok {
+ usedPrompt := openaiClient.GetLastPrompt()
+ if usedPrompt != "" {
+ fyne.Do(func() {
+ a.imagePromptEntry.SetText(usedPrompt)
+ })
+ }
+ }
+ }
+
return paths, nil
}
diff --git a/internal/gui/navigation.go b/internal/gui/navigation.go
index f24dcc1..e59b817 100644
--- a/internal/gui/navigation.go
+++ b/internal/gui/navigation.go
@@ -258,6 +258,28 @@ func (a *Application) loadExistingFiles(word string) {
fyne.Do(func() {
a.imageDisplay.SetImages(a.currentImages)
})
+
+ // Try to load the prompt from attribution file if using OpenAI
+ if a.config.ImageProvider == "openai" && len(a.currentImages) > 0 {
+ // Look for attribution file
+ baseImagePath := a.currentImages[0]
+ attrPath := strings.TrimSuffix(baseImagePath, filepath.Ext(baseImagePath)) + "_attribution.txt"
+ if data, err := os.ReadFile(attrPath); err == nil {
+ // Parse prompt from attribution file
+ content := string(data)
+ lines := strings.Split(content, "\n")
+ for i, line := range lines {
+ if strings.HasPrefix(line, "Prompt used:") && i+1 < len(lines) {
+ // The prompt is on the next line
+ prompt := strings.TrimSpace(lines[i+1])
+ fyne.Do(func() {
+ a.imagePromptEntry.SetText(prompt)
+ })
+ break
+ }
+ }
+ }
+ }
}
fyne.Do(func() {
@@ -271,14 +293,41 @@ func (a *Application) onDelete() {
return
}
- // Confirm deletion
- dialog.ShowConfirm("Delete Word",
- fmt.Sprintf("Delete all files for '%s'?", a.currentWord),
- func(confirm bool) {
- if confirm {
+ // Create custom confirmation dialog with keyboard support
+ message := fmt.Sprintf("Delete all files for '%s'?\n\nPress Y to confirm or N to cancel", a.currentWord)
+ confirmDialog := dialog.NewConfirm("Delete Word", message, func(confirm bool) {
+ a.deleteConfirming = false
+ if confirm {
+ a.deleteCurrentWord()
+ }
+ }, a.window)
+
+ // Set up keyboard handler for the dialog
+ a.deleteConfirming = true
+
+ // Create a custom key handler for the dialog window
+ oldKeyHandler := a.window.Canvas().OnTypedKey()
+ a.window.Canvas().SetOnTypedKey(func(ev *fyne.KeyEvent) {
+ if a.deleteConfirming {
+ switch ev.Name {
+ case fyne.KeyY:
+ confirmDialog.Hide()
+ a.deleteConfirming = false
a.deleteCurrentWord()
+ // Restore original key handler
+ a.window.Canvas().SetOnTypedKey(oldKeyHandler)
+ case fyne.KeyN, fyne.KeyEscape:
+ confirmDialog.Hide()
+ a.deleteConfirming = false
+ // Restore original key handler
+ a.window.Canvas().SetOnTypedKey(oldKeyHandler)
}
- }, a.window)
+ } else if oldKeyHandler != nil {
+ oldKeyHandler(ev)
+ }
+ })
+
+ confirmDialog.Show()
}
// deleteCurrentWord deletes all files for the current word
diff --git a/internal/gui/queue.go b/internal/gui/queue.go
index 7b1c5de..aaa0c55 100644
--- a/internal/gui/queue.go
+++ b/internal/gui/queue.go
@@ -9,15 +9,16 @@ import (
// WordJob represents a single word processing job
type WordJob struct {
- ID int
- Word string
- Translation string
- AudioFile string
- ImageFiles []string
- Status JobStatus
- Error error
- StartedAt time.Time
- CompletedAt time.Time
+ ID int
+ Word string
+ Translation string
+ AudioFile string
+ ImageFiles []string
+ Status JobStatus
+ Error error
+ StartedAt time.Time
+ CompletedAt time.Time
+ CustomPrompt string // Custom prompt for image generation
}
// JobStatus represents the current state of a job
@@ -93,11 +94,17 @@ func (q *WordQueue) SetCallbacks(onStatusUpdate func(*WordJob), onJobComplete fu
// AddWord adds a word to the processing queue
func (q *WordQueue) AddWord(word string) *WordJob {
+ return q.AddWordWithPrompt(word, "")
+}
+
+// AddWordWithPrompt adds a word to the processing queue with a custom prompt
+func (q *WordQueue) AddWordWithPrompt(word, customPrompt string) *WordJob {
q.mu.Lock()
job := &WordJob{
- ID: q.nextID,
- Word: word,
- Status: StatusQueued,
+ ID: q.nextID,
+ Word: word,
+ Status: StatusQueued,
+ CustomPrompt: customPrompt,
}
q.nextID++
q.results[job.ID] = job
diff --git a/internal/image/download.go b/internal/image/download.go
index f684260..7083a6f 100644
--- a/internal/image/download.go
+++ b/internal/image/download.go
@@ -241,4 +241,81 @@ func (d *Downloader) DownloadMultiple(ctx context.Context, query string, count i
}
return downloaded, nil
+}
+
+// DownloadBestMatchWithOptions downloads the best matching image for given search options
+func (d *Downloader) DownloadBestMatchWithOptions(ctx context.Context, opts *SearchOptions) (*SearchResult, string, error) {
+ // Search for images
+ searchOpts := *opts // Copy to avoid modifying original
+ searchOpts.PerPage = 5 // Get top 5 results
+
+ results, err := d.searcher.Search(ctx, &searchOpts)
+ if err != nil {
+ return nil, "", fmt.Errorf("search failed: %w", err)
+ }
+
+ if len(results) == 0 {
+ return nil, "", fmt.Errorf("no images found for query: %s", opts.Query)
+ }
+
+ // Try to download the first available image
+ for i, result := range results {
+ // Generate filename
+ filename := d.generateFileName(opts.Query, &result, i)
+ outputPath := filepath.Join(d.options.OutputDir, filename)
+
+ // Try to download
+ err := d.DownloadImage(ctx, &result, outputPath)
+ if err == nil {
+ return &result, outputPath, nil
+ }
+
+ // Log error and try next
+ fmt.Fprintf(os.Stderr, "Warning: failed to download image %d: %v\n", i+1, err)
+ }
+
+ return nil, "", fmt.Errorf("failed to download any images for query: %s", opts.Query)
+}
+
+// DownloadMultipleWithOptions downloads multiple images for given search options
+func (d *Downloader) DownloadMultipleWithOptions(ctx context.Context, opts *SearchOptions, count int) ([]string, error) {
+ // Search for images
+ searchOpts := *opts // Copy to avoid modifying original
+ searchOpts.PerPage = count * 2 // Get extra in case some fail
+
+ results, err := d.searcher.Search(ctx, &searchOpts)
+ if err != nil {
+ return nil, fmt.Errorf("search failed: %w", err)
+ }
+
+ if len(results) == 0 {
+ return nil, fmt.Errorf("no images found for query: %s", opts.Query)
+ }
+
+ // Download up to 'count' images
+ var downloaded []string
+ for i, result := range results {
+ if len(downloaded) >= count {
+ break
+ }
+
+ // Generate filename
+ filename := d.generateFileName(opts.Query, &result, i)
+ outputPath := filepath.Join(d.options.OutputDir, filename)
+
+ // Try to download
+ err := d.DownloadImage(ctx, &result, outputPath)
+ if err == nil {
+ downloaded = append(downloaded, outputPath)
+ } else {
+ // Log error and continue
+ fmt.Fprintf(os.Stderr, "Warning: failed to download image %d: %v\n", i+1, err)
+ }
+ }
+
+ if len(downloaded) == 0 {
+ return nil, fmt.Errorf("failed to download any images for query: %s", opts.Query)
+ }
+
+ return downloaded, nil
} \ No newline at end of file
diff --git a/internal/image/openai.go b/internal/image/openai.go
index c4b2e9d..add1c96 100644
--- a/internal/image/openai.go
+++ b/internal/image/openai.go
@@ -123,8 +123,14 @@ func (c *OpenAIClient) Search(ctx context.Context, opts *SearchOptions) ([]Searc
translatedWord = opts.Query
}
- // Create educational prompt
- prompt := c.createEducationalPrompt(opts.Query, translatedWord)
+ // Create prompt - use custom if provided, otherwise generate educational prompt
+ var prompt string
+ if opts.CustomPrompt != "" {
+ prompt = opts.CustomPrompt
+ fmt.Printf("Using custom prompt: %s\n", prompt)
+ } else {
+ prompt = c.createEducationalPrompt(opts.Query, translatedWord)
+ }
// Store the prompt for attribution
c.lastPrompt = prompt
@@ -243,8 +249,28 @@ func (c *OpenAIClient) Name() string {
return "openai"
}
+// GetLastPrompt returns the last prompt used for image generation
+func (c *OpenAIClient) GetLastPrompt() string {
+ return c.lastPrompt
+}
+
// createEducationalPrompt generates a prompt optimized for language learning
func (c *OpenAIClient) createEducationalPrompt(bulgarianWord, englishTranslation string) string {
+ // 25% chance to ask OpenAI for a creative style
+ if rand.Float32() < 0.25 {
+ if creativeStyle := c.getCreativeStyleFromOpenAI(context.Background(), englishTranslation); creativeStyle != "" {
+ fmt.Printf(" Using OpenAI-suggested style: %s\n", creativeStyle)
+ return fmt.Sprintf(
+ "Generate a %s of: %s. "+
+ "This is for the Bulgarian word '%s' which means %s. "+
+ "The image should be educational and suitable for language learning flashcards. "+
+ "Requirements: single main subject, plain background, clear and recognizable. "+
+ "IMPORTANT: No text whatsoever. Do not include any words, letters, typography, labels, captions, or writing of any kind. Image only, without any text elements.",
+ creativeStyle, englishTranslation, bulgarianWord, englishTranslation,
+ )
+ }
+ }
+
// Define different art styles for variety (42 styles total)
styles := []string{
// Original styles (1-10)
@@ -435,4 +461,42 @@ func (c *OpenAIClient) getSizeHeight() int {
default:
return 512
}
+}
+
+// getCreativeStyleFromOpenAI asks OpenAI for a creative photo style suggestion
+func (c *OpenAIClient) getCreativeStyleFromOpenAI(ctx context.Context, subject string) string {
+ fmt.Printf(" Asking OpenAI for creative style suggestion for '%s'...\n", subject)
+
+ req := openai.ChatCompletionRequest{
+ Model: openai.GPT4oMini,
+ Messages: []openai.ChatCompletionMessage{
+ {
+ Role: openai.ChatMessageRoleSystem,
+ Content: "You are a creative art director. Suggest unique, interesting photo/art styles for educational flashcard images. Be creative and varied. Respond with ONLY the style description, nothing else. Keep it concise (max 15 words).",
+ },
+ {
+ Role: openai.ChatMessageRoleUser,
+ Content: fmt.Sprintf("Suggest a creative visual style for an educational image of: %s", subject),
+ },
+ },
+ Temperature: 0.9, // Higher temperature for more creativity
+ MaxTokens: 30,
+ }
+
+ resp, err := c.client.CreateChatCompletion(ctx, req)
+ if err != nil {
+ fmt.Printf(" Failed to get creative style: %v\n", err)
+ return ""
+ }
+
+ if len(resp.Choices) == 0 || resp.Choices[0].Message.Content == "" {
+ return ""
+ }
+
+ style := strings.TrimSpace(resp.Choices[0].Message.Content)
+ // Remove any trailing punctuation
+ style = strings.TrimSuffix(style, ".")
+ style = strings.TrimSuffix(style, "!")
+
+ return style
} \ No newline at end of file
diff --git a/internal/image/search.go b/internal/image/search.go
index acc9dc8..800a114 100644
--- a/internal/image/search.go
+++ b/internal/image/search.go
@@ -26,6 +26,7 @@ type SearchOptions struct {
Page int // Page number (1-based)
ImageType string // Type: "photo", "illustration", "vector", "all"
Orientation string // Orientation: "horizontal", "vertical", "all"
+ CustomPrompt string // Custom prompt for AI image generation (OpenAI)
}
// DefaultSearchOptions returns sensible defaults for Bulgarian word searches