diff options
| author | Paul Buetow <paul@buetow.org> | 2025-07-18 23:18:45 +0300 |
|---|---|---|
| committer | Paul Buetow <paul@buetow.org> | 2025-07-18 23:18:45 +0300 |
| commit | 8779de2c97e445acf82e8422c899fdefa4649bda (patch) | |
| tree | b24d270a4e655c66059034cfdf0713e3bf3d32f5 | |
| parent | f3c1b568ac28e211d218f0b33ccb85cf782ce248 (diff) | |
feat: multiple improvements to GUI and codebase
- Add random voice speed between 0.90-1.00 for more natural audio
- Display voice and speed info in GUI audio player
- Implement automatic retry loading for missing files (checks every 2 seconds)
- Fix voice/speed info persistence during audio playback
- Remove image caching functionality for cleaner codebase
- Rename prompt.txt to image_prompt.txt for clarity
- Fix GUI to recognize newly added cards during runtime (rescan on navigation)
- Update README to reflect removed image cache
These changes improve the user experience by making the audio more natural,
providing better feedback about audio generation parameters, and ensuring
the GUI stays synchronized with externally added cards.
π€ Generated with [Claude Code](https://claude.ai/code)
Co-Authored-By: Claude <noreply@anthropic.com>
| -rw-r--r-- | README.md | 6 | ||||
| -rw-r--r-- | cmd/totalrecall/main.go | 41 | ||||
| -rw-r--r-- | internal/gui/app.go | 15 | ||||
| -rw-r--r-- | internal/gui/audio_player.go | 45 | ||||
| -rw-r--r-- | internal/gui/generator.go | 23 | ||||
| -rw-r--r-- | internal/gui/navigation.go | 163 | ||||
| -rw-r--r-- | internal/image/openai.go | 410 |
7 files changed, 360 insertions, 343 deletions
@@ -21,7 +21,7 @@ It has mainly been vibe coded using Claude Code CLI. - Batch processing of multiple words - Anki-compatible CSV export with translations - Random voice variants and speech speed -- Audio and image caching to save API costs +- Audio caching to save API costs ## Installation @@ -121,10 +121,6 @@ image: openai_size: "512x512" # Size: 256x256, 512x512, 1024x1024 openai_quality: "standard" # Quality: standard or hd (dall-e-3 only) openai_style: "natural" # Style: natural or vivid (dall-e-3 only) - - # Caching - enable_cache: true - cache_dir: "./.image_cache" output: directory: ./anki_cards diff --git a/cmd/totalrecall/main.go b/cmd/totalrecall/main.go index dbcacf2..db1a94e 100644 --- a/cmd/totalrecall/main.go +++ b/cmd/totalrecall/main.go @@ -334,6 +334,13 @@ func generateAudio(word string) error { } func generateAudioWithVoice(word, voice string) error { + // Generate random speed between 0.90 and 1.00 if not explicitly set + speed := openAISpeed + if openAISpeed == 0.9 && !viper.IsSet("audio.openai_speed") { + // Default was used, generate random speed + speed = 0.90 + rand.Float64()*0.10 + } + // Create audio provider configuration providerConfig := &audio.Config{ Provider: "openai", @@ -344,7 +351,7 @@ func generateAudioWithVoice(word, voice string) error { OpenAIKey: getOpenAIKey(), OpenAIModel: openAIModel, OpenAIVoice: voice, - OpenAISpeed: openAISpeed, + OpenAISpeed: speed, OpenAIInstruction: openAIInstruction, // Caching @@ -437,8 +444,6 @@ func downloadImagesWithTranslation(word, translation string) error { Size: openAIImageSize, Quality: openAIImageQuality, Style: openAIImageStyle, - CacheDir: viper.GetString("image.cache_dir"), - EnableCache: viper.GetBool("image.enable_cache"), } // Use config file values if not overridden by flags @@ -455,13 +460,6 @@ func downloadImagesWithTranslation(word, translation string) error { openaiConfig.Style = viper.GetString("image.openai_style") } - // Set defaults - if openaiConfig.CacheDir == "" { - openaiConfig.CacheDir = "./.image_cache" - } - if !viper.IsSet("image.enable_cache") { - openaiConfig.EnableCache = true - } searcher = image.NewOpenAIClient(openaiConfig) if openaiConfig.APIKey == "" { @@ -514,6 +512,19 @@ func downloadImagesWithTranslation(word, translation string) error { } fmt.Printf(" Downloaded: %s\n", path) + // If using OpenAI, save the prompt + if imageAPI == "openai" { + if openaiClient, ok := searcher.(*image.OpenAIClient); ok { + usedPrompt := openaiClient.GetLastPrompt() + if usedPrompt != "" { + promptFile := filepath.Join(wordDir, "image_prompt.txt") + if err := os.WriteFile(promptFile, []byte(usedPrompt), 0644); err != nil { + fmt.Printf(" Warning: Failed to save image prompt: %v\n", err) + } + } + } + } + return nil } @@ -825,6 +836,15 @@ func saveAudioAttribution(word, audioFile string, config *audio.Config) error { return fmt.Errorf("failed to write audio attribution file: %w", err) } + // Also save metadata for GUI display + wordDir := filepath.Dir(audioFile) + metadataFile := filepath.Join(wordDir, "audio_metadata.txt") + metadata := fmt.Sprintf("voice=%s\nspeed=%.2f\n", config.OpenAIVoice, config.OpenAISpeed) + if err := os.WriteFile(metadataFile, []byte(metadata), 0644); err != nil { + // Non-fatal error, just log it + fmt.Printf("Warning: Failed to save audio metadata: %v\n", err) + } + return nil } @@ -834,7 +854,6 @@ func runGUIMode() error { OutputDir: outputDir, AudioFormat: audioFormat, ImageProvider: imageAPI, - EnableCache: viper.GetBool("cache.enable"), OpenAIKey: getOpenAIKey(), } diff --git a/internal/gui/app.go b/internal/gui/app.go index 28abb7b..da66287 100644 --- a/internal/gui/app.go +++ b/internal/gui/app.go @@ -65,6 +65,7 @@ type Application struct { currentWordIndex int deleteConfirming bool // Track if we're in delete confirmation mode wordChangeTimer *time.Timer // Timer for detecting word changes + fileCheckTicker *time.Ticker // Ticker for checking missing files // Word processing queue queue *WordQueue @@ -88,7 +89,6 @@ type Config struct { OutputDir string AudioFormat string ImageProvider string - EnableCache bool OpenAIKey string } @@ -98,7 +98,6 @@ func DefaultConfig() *Config { OutputDir: "./anki_cards", AudioFormat: "mp3", ImageProvider: "openai", - EnableCache: true, } } @@ -135,7 +134,7 @@ func New(config *Config) *Application { OpenAIVoice: "nova", OpenAISpeed: 0.9, OpenAIInstruction: "You are speaking Bulgarian language (Π±ΡΠ»Π³Π°ΡΡΠΊΠΈ Π΅Π·ΠΈΠΊ). Pronounce the Bulgarian text with authentic Bulgarian phonetics, not Russian. Speak slowly and clearly for language learners.", - EnableCache: config.EnableCache, + EnableCache: true, CacheDir: "./.audio_cache", } @@ -371,6 +370,10 @@ func (a *Application) setupUI() { // Add the tooltip layer to enable tooltips a.window.SetContent(fynetooltip.AddWindowToolTipLayer(content, a.window.Canvas())) a.window.SetOnClosed(func() { + // Stop file check ticker + if a.fileCheckTicker != nil { + a.fileCheckTicker.Stop() + } a.cancel() a.queue.Stop() a.wg.Wait() @@ -1080,6 +1083,12 @@ func (a *Application) showError(err error) { } func (a *Application) clearUI() { + // Stop file check ticker when clearing UI + if a.fileCheckTicker != nil { + a.fileCheckTicker.Stop() + a.fileCheckTicker = nil + } + a.imageDisplay.Clear() a.audioPlayer.Clear() // Don't clear the word input or translation entry - they should stay populated diff --git a/internal/gui/audio_player.go b/internal/gui/audio_player.go index 8df4461..38494c5 100644 --- a/internal/gui/audio_player.go +++ b/internal/gui/audio_player.go @@ -2,9 +2,11 @@ package gui import ( "fmt" + "os" "os/exec" "path/filepath" "runtime" + "strings" "fyne.io/fyne/v2" "fyne.io/fyne/v2/container" @@ -23,9 +25,10 @@ type AudioPlayer struct { stopButton *ttwidget.Button statusLabel *widget.Label - audioFile string - isPlaying bool - playCmd *exec.Cmd + audioFile string + isPlaying bool + playCmd *exec.Cmd + voiceInfo string // Stores voice and speed info } // NewAudioPlayer creates a new audio player widget @@ -71,7 +74,34 @@ func (p *AudioPlayer) SetAudioFile(audioFile string) { if audioFile != "" { p.playButton.Enable() - p.statusLabel.SetText(fmt.Sprintf("Audio: %s", filepath.Base(audioFile))) + + // Try to load voice metadata + wordDir := filepath.Dir(audioFile) + metadataFile := filepath.Join(wordDir, "audio_metadata.txt") + voice := "" + speed := "" + + if data, err := os.ReadFile(metadataFile); err == nil { + lines := strings.Split(string(data), "\n") + for _, line := range lines { + if strings.HasPrefix(line, "voice=") { + voice = strings.TrimPrefix(line, "voice=") + } else if strings.HasPrefix(line, "speed=") { + speed = strings.TrimPrefix(line, "speed=") + } + } + } + + // Store voice info + if voice != "" && speed != "" { + p.voiceInfo = fmt.Sprintf(" (voice: %s, speed: %s)", voice, speed) + } else { + p.voiceInfo = "" + } + + // Format status text with voice and speed info + statusText := fmt.Sprintf("Audio: %s%s", filepath.Base(audioFile), p.voiceInfo) + p.statusLabel.SetText(statusText) } else { p.Clear() } @@ -82,6 +112,7 @@ func (p *AudioPlayer) Clear() { p.onStop() // Stop any playing audio p.audioFile = "" p.isPlaying = false + p.voiceInfo = "" p.playButton.Disable() p.stopButton.Disable() p.statusLabel.SetText("No audio loaded") @@ -108,7 +139,7 @@ func (p *AudioPlayer) onPlay() { p.isPlaying = true p.playButton.SetIcon(theme.MediaPauseIcon()) p.stopButton.Enable() - p.statusLabel.SetText("Playing: " + filepath.Base(p.audioFile)) + p.statusLabel.SetText(fmt.Sprintf("Playing: %s%s", filepath.Base(p.audioFile), p.voiceInfo)) } // onStop handles stop button click @@ -121,7 +152,7 @@ func (p *AudioPlayer) onStop() { p.isPlaying = false p.playButton.SetIcon(theme.MediaPlayIcon()) p.stopButton.Disable() - p.statusLabel.SetText("Stopped: " + filepath.Base(p.audioFile)) + p.statusLabel.SetText(fmt.Sprintf("Stopped: %s%s", filepath.Base(p.audioFile), p.voiceInfo)) } // Play triggers audio playback @@ -174,7 +205,7 @@ func (p *AudioPlayer) startPlayback() error { p.isPlaying = false p.playButton.SetIcon(theme.MediaPlayIcon()) p.stopButton.Disable() - p.statusLabel.SetText("Finished: " + filepath.Base(p.audioFile)) + p.statusLabel.SetText(fmt.Sprintf("Finished: %s%s", filepath.Base(p.audioFile), p.voiceInfo)) }) } }() diff --git a/internal/gui/generator.go b/internal/gui/generator.go index ca9d854..92ead40 100644 --- a/internal/gui/generator.go +++ b/internal/gui/generator.go @@ -91,8 +91,12 @@ func (a *Application) generateAudio(word string) (string, error) { rand.Seed(time.Now().UnixNano()) voice := allVoices[rand.Intn(len(allVoices))] - // Update audio config with random voice + // Generate random speed between 0.90 and 1.00 + speed := 0.90 + rand.Float64()*0.10 + + // Update audio config with random voice and speed a.audioConfig.OpenAIVoice = voice + a.audioConfig.OpenAISpeed = speed // Create audio provider provider, err := audio.NewProvider(a.audioConfig) @@ -127,11 +131,18 @@ func (a *Application) generateAudio(word string) (string, error) { } // Save audio attribution - if err := a.saveAudioAttribution(word, outputFile, voice); err != nil { + if err := a.saveAudioAttribution(word, outputFile, voice, speed); err != nil { // Non-fatal error, just log it fmt.Printf("Warning: Failed to save audio attribution: %v\n", err) } + // Save voice metadata for GUI display + metadataFile := filepath.Join(wordDir, "audio_metadata.txt") + metadata := fmt.Sprintf("voice=%s\nspeed=%.2f\n", voice, speed) + if err := os.WriteFile(metadataFile, []byte(metadata), 0644); err != nil { + fmt.Printf("Warning: Failed to save audio metadata: %v\n", err) + } + return outputFile, nil } @@ -154,8 +165,6 @@ func (a *Application) generateImagesWithPrompt(word string, customPrompt string, Size: "512x512", // Half of 1024x1024 Quality: "standard", Style: "natural", - CacheDir: "./.image_cache", - EnableCache: a.config.EnableCache, } searcher = image.NewOpenAIClient(openaiConfig) @@ -216,7 +225,7 @@ func (a *Application) generateImagesWithPrompt(word string, customPrompt string, usedPrompt := openaiClient.GetLastPrompt() if usedPrompt != "" { // Save the prompt to disk immediately for this word - promptFile := filepath.Join(wordDir, "prompt.txt") + promptFile := filepath.Join(wordDir, "image_prompt.txt") os.WriteFile(promptFile, []byte(usedPrompt), 0644) // Only update UI if this word is still the current word @@ -237,12 +246,12 @@ func (a *Application) generateImagesWithPrompt(word string, customPrompt string, } // saveAudioAttribution saves attribution info for generated audio -func (a *Application) saveAudioAttribution(word, audioFile, voice string) error { +func (a *Application) saveAudioAttribution(word, audioFile, voice string, speed float64) error { attribution := fmt.Sprintf("Audio generated by OpenAI TTS\n\n") attribution += fmt.Sprintf("Bulgarian word: %s\n", word) attribution += fmt.Sprintf("Model: %s\n", a.audioConfig.OpenAIModel) attribution += fmt.Sprintf("Voice: %s\n", voice) - attribution += fmt.Sprintf("Speed: %.2f\n", a.audioConfig.OpenAISpeed) + attribution += fmt.Sprintf("Speed: %.2f\n", speed) if a.audioConfig.OpenAIInstruction != "" { attribution += fmt.Sprintf("\nVoice instructions:\n%s\n", a.audioConfig.OpenAIInstruction) diff --git a/internal/gui/navigation.go b/internal/gui/navigation.go index 5a41d7f..a73e4d0 100644 --- a/internal/gui/navigation.go +++ b/internal/gui/navigation.go @@ -197,12 +197,32 @@ func (a *Application) getAllAvailableWords() []string { // onPrevWord loads the previous word func (a *Application) onPrevWord() { + // Store current word before rescanning + currentWord := a.currentWord + + // Rescan to pick up any new cards added externally + a.scanExistingWords() + allWords := a.getAllAvailableWords() if len(allWords) == 0 { return } - newIndex := a.currentWordIndex - 1 + // Find current word's new index after rescan + currentIndex := -1 + for i, word := range allWords { + if word == currentWord { + currentIndex = i + break + } + } + + // If current word not found, use the stored index + if currentIndex == -1 { + currentIndex = a.currentWordIndex + } + + newIndex := currentIndex - 1 // Wrap around to the end if at beginning if newIndex < 0 { newIndex = len(allWords) - 1 @@ -213,12 +233,32 @@ func (a *Application) onPrevWord() { // onNextWord loads the next word func (a *Application) onNextWord() { + // Store current word before rescanning + currentWord := a.currentWord + + // Rescan to pick up any new cards added externally + a.scanExistingWords() + allWords := a.getAllAvailableWords() if len(allWords) == 0 { return } - newIndex := a.currentWordIndex + 1 + // Find current word's new index after rescan + currentIndex := -1 + for i, word := range allWords { + if word == currentWord { + currentIndex = i + break + } + } + + // If current word not found, use the stored index + if currentIndex == -1 { + currentIndex = a.currentWordIndex + } + + newIndex := currentIndex + 1 // Wrap around to the beginning if at end if newIndex >= len(allWords) { newIndex = 0 @@ -229,6 +269,12 @@ func (a *Application) onNextWord() { // loadWordByIndex loads a word by its index in the combined word list func (a *Application) loadWordByIndex(index int) { + // Stop any existing file check ticker + if a.fileCheckTicker != nil { + a.fileCheckTicker.Stop() + a.fileCheckTicker = nil + } + allWords := a.getAllAvailableWords() if index < 0 || index >= len(allWords) { return @@ -269,7 +315,7 @@ func (a *Application) loadWordByIndex(index int) { // Load image prompt from disk if it exists if wordDir := a.findCardDirectory(word); wordDir != "" { - promptFile := filepath.Join(wordDir, "prompt.txt") + promptFile := filepath.Join(wordDir, "image_prompt.txt") if data, err := os.ReadFile(promptFile); err == nil { prompt := strings.TrimSpace(string(data)) a.imagePromptEntry.SetText(prompt) @@ -297,6 +343,9 @@ func (a *Application) loadWordByIndex(index int) { if hasContent { a.setActionButtonsEnabled(true) } + + // Start ticker to check for missing files + a.startFileCheckTicker() } // loadExistingFiles loads existing files for a word @@ -326,7 +375,7 @@ func (a *Application) loadExistingFiles(word string) { } // Load image prompt file - promptFile := filepath.Join(wordDir, "prompt.txt") + promptFile := filepath.Join(wordDir, "image_prompt.txt") if data, err := os.ReadFile(promptFile); err == nil { prompt := strings.TrimSpace(string(data)) fmt.Printf("Loaded prompt from file: %s\n", promptFile) @@ -404,6 +453,112 @@ func (a *Application) loadExistingFiles(word string) { }) } +// startFileCheckTicker starts a ticker to check for missing files +func (a *Application) startFileCheckTicker() { + // Create ticker that checks every 2 seconds + a.fileCheckTicker = time.NewTicker(2 * time.Second) + + go func() { + for range a.fileCheckTicker.C { + // Only check files for the current word + a.mu.Lock() + currentWord := a.currentWord + a.mu.Unlock() + + if currentWord != "" { + a.checkForMissingFiles(currentWord) + } + } + }() +} + +// checkForMissingFiles checks for missing files and attempts to load them +func (a *Application) checkForMissingFiles(word string) { + // Find the card directory for this word + wordDir := a.findCardDirectory(word) + if wordDir == "" { + return + } + + // Check for missing audio file + if a.currentAudioFile == "" { + audioFile := filepath.Join(wordDir, fmt.Sprintf("audio.%s", a.config.AudioFormat)) + if _, err := os.Stat(audioFile); err == nil { + a.currentAudioFile = audioFile + fyne.Do(func() { + a.audioPlayer.SetAudioFile(audioFile) + a.updateStatus(fmt.Sprintf("Found audio file for %s", word)) + }) + } + } + + // Check for missing image file + if a.currentImage == "" { + patterns := []string{"image.jpg", "image.png"} + for _, pattern := range patterns { + imagePath := filepath.Join(wordDir, pattern) + if _, err := os.Stat(imagePath); err == nil { + a.currentImage = imagePath + fyne.Do(func() { + a.imageDisplay.SetImages([]string{imagePath}) + a.updateStatus(fmt.Sprintf("Found image file for %s", word)) + }) + break + } + } + } + + // Check for missing translation + if a.currentTranslation == "" { + translationFile := filepath.Join(wordDir, "translation.txt") + if data, err := os.ReadFile(translationFile); err == nil { + content := string(data) + parts := strings.Split(content, "=") + if len(parts) >= 2 { + a.currentTranslation = strings.TrimSpace(parts[1]) + fyne.Do(func() { + a.translationEntry.SetText(a.currentTranslation) + a.updateStatus(fmt.Sprintf("Found translation for %s", word)) + }) + } + } + } + + // Check for missing prompt + currentPrompt := a.imagePromptEntry.Text + if currentPrompt == "" { + promptFile := filepath.Join(wordDir, "image_prompt.txt") + if data, err := os.ReadFile(promptFile); err == nil { + prompt := strings.TrimSpace(string(data)) + fyne.Do(func() { + a.imagePromptEntry.SetText(prompt) + a.updateStatus(fmt.Sprintf("Found prompt for %s", word)) + }) + } + } + + // Check for missing phonetic info + currentPhonetic := a.phoneticDisplay.Text + if currentPhonetic == "" { + phoneticFile := filepath.Join(wordDir, "phonetic.txt") + if data, err := os.ReadFile(phoneticFile); err == nil { + phoneticInfo := string(data) + fyne.Do(func() { + a.phoneticDisplay.SetText(phoneticInfo) + a.updateStatus(fmt.Sprintf("Found phonetic info for %s", word)) + }) + } + } + + // Update action buttons if we now have content + hasContent := a.currentAudioFile != "" || a.currentImage != "" || a.currentTranslation != "" + if hasContent { + fyne.Do(func() { + a.setActionButtonsEnabled(true) + }) + } +} + // onDelete moves the current word's files to trash bin func (a *Application) onDelete() { if a.currentWord == "" { diff --git a/internal/image/openai.go b/internal/image/openai.go index 6a2494a..637b8a1 100644 --- a/internal/image/openai.go +++ b/internal/image/openai.go @@ -8,8 +8,6 @@ import ( "io" "math/rand" "net/http" - "os" - "path/filepath" "strings" "time" @@ -18,26 +16,22 @@ import ( // OpenAIClient implements ImageSearcher for OpenAI DALL-E image generation type OpenAIClient struct { - client *openai.Client - apiKey string - model string // dall-e-2 or dall-e-3 - size string // 256x256, 512x512, 1024x1024 - quality string // standard or hd (dall-e-3 only) - style string // natural or vivid (dall-e-3 only) - cacheDir string - enableCache bool - lastPrompt string // Store the last used prompt for attribution + client *openai.Client + apiKey string + model string // dall-e-2 or dall-e-3 + size string // 256x256, 512x512, 1024x1024 + quality string // standard or hd (dall-e-3 only) + style string // natural or vivid (dall-e-3 only) + lastPrompt string // Store the last used prompt for attribution } // OpenAIConfig holds configuration for the OpenAI image provider type OpenAIConfig struct { - APIKey string - Model string - Size string - Quality string - Style string - CacheDir string - EnableCache bool + APIKey string + Model string + Size string + Quality string + Style string } // NewOpenAIClient creates a new OpenAI DALL-E client @@ -62,24 +56,14 @@ func NewOpenAIClient(config *OpenAIConfig) *OpenAIClient { if config.Style == "" { config.Style = "natural" } - if config.CacheDir == "" { - config.CacheDir = "./.image_cache" - } oc := &OpenAIClient{ - client: client, - apiKey: config.APIKey, - model: config.Model, - size: config.Size, - quality: config.Quality, - style: config.Style, - cacheDir: config.CacheDir, - enableCache: config.EnableCache, - } - - // Create cache directory if caching is enabled - if oc.enableCache && oc.cacheDir != "" { - os.MkdirAll(oc.cacheDir, 0755) + client: client, + apiKey: config.APIKey, + model: config.Model, + size: config.Size, + quality: config.Quality, + style: config.Style, } return oc @@ -95,26 +79,6 @@ func (c *OpenAIClient) Search(ctx context.Context, opts *SearchOptions) ([]Searc } } - // Check cache first - if c.enableCache { - cacheFile := c.getCacheFilePath(opts.Query) - if info, err := os.Stat(cacheFile); err == nil && info.Size() > 0 { - // Return cached result - fmt.Printf("Using cached image for '%s'\n", opts.Query) - result := SearchResult{ - ID: c.generateImageID(opts.Query), - URL: cacheFile, - ThumbnailURL: cacheFile, - Width: c.getSizeWidth(), - Height: c.getSizeHeight(), - Description: fmt.Sprintf("Generated image for %s", opts.Query), - Attribution: "Generated by OpenAI DALL-E", - Source: "openai", - } - return []SearchResult{result}, nil - } - } - // Use provided translation if available, otherwise translate Bulgarian word to English var translatedWord string if opts.Translation != "" { @@ -196,16 +160,6 @@ func (c *OpenAIClient) Search(ctx context.Context, opts *SearchOptions) ([]Searc // Get the generated image URL imageURL := resp.Data[0].URL - // Download and cache the image if caching is enabled - if c.enableCache { - cacheFile := c.getCacheFilePath(opts.Query) - if err := c.downloadAndCache(ctx, imageURL, cacheFile); err == nil { - // Update URL to point to cached file - imageURL = cacheFile - } - // Continue even if caching fails - } - // Create result result := SearchResult{ ID: c.generateImageID(opts.Query), @@ -223,16 +177,7 @@ func (c *OpenAIClient) Search(ctx context.Context, opts *SearchOptions) ([]Searc // Download downloads an image from the given URL func (c *OpenAIClient) Download(ctx context.Context, url string) (io.ReadCloser, error) { - // If it's a local cached file (not an HTTP/HTTPS URL), open it directly - if !strings.HasPrefix(url, "http://") && !strings.HasPrefix(url, "https://") { - file, err := os.Open(url) - if err != nil { - return nil, fmt.Errorf("failed to open cached file: %w", err) - } - return file, nil - } - - // Otherwise download from URL + // Download from URL req, err := http.NewRequestWithContext(ctx, "GET", url, nil) if err != nil { return nil, err @@ -284,12 +229,77 @@ func (c *OpenAIClient) createEducationalPrompt(bulgarianWord, englishTranslation scene = "" } - // Get styles from file or generate them - styles, err := c.getArtisticStyles(context.Background()) - if err != nil { - // If we can't get styles, return an error by returning empty prompt - fmt.Printf(" ERROR: Failed to load artistic styles: %v\n", err) - return "" + // Define artistic styles + styles := []string{ + "Photorealism", "Hyperrealism", "Surrealism", "Impressionism", + "Minimalism", "Pop Art", "Art Nouveau", "Digital Art", + "Watercolor", "Oil Painting", "Pencil Sketch", "Ink Drawing", + "3D Rendering", "Low Poly Art", "Pixel Art", "Vector Art", + "Collage", "Mixed Media", "Contemporary Art", "Abstract Expressionism", + "Cubism", "Pointillism", "Fauvism", "Art Deco", + "Baroque", "Renaissance", "Romanticism", "Realism", + "Post-Impressionism", "Expressionism", "Constructivism", "Suprematism", + "Dadaism", "Futurism", "Op Art", "Kinetic Art", + "Street Art", "Graffiti Art", "Installation Art", "Land Art", + "Conceptual Art", "Performance Art", "Video Art", "Net Art", + "Generative Art", "Algorithmic Art", "Fractal Art", "Glitch Art", + "Vaporwave", "Synthwave", "Cyberpunk", "Steampunk", + "Fantasy Art", "Science Fiction Art", "Horror Art", "Gothic Art", + "Anime", "Manga", "Comic Book Art", "Cartoon", + "Caricature", "Editorial Illustration", "Children's Book Illustration", "Fashion Illustration", + "Architectural Rendering", "Technical Illustration", "Scientific Illustration", "Medical Illustration", + "Botanical Illustration", "Zoological Illustration", "Astronomical Art", "Paleoart", + "Infographic", "Data Visualization", "Typography Art", "Calligraphy", + "Mosaic", "Stained Glass", "Tapestry", "Embroidery", + "Sculpture", "Ceramics", "Pottery", "Glass Art", + "Metalwork", "Jewelry Design", "Woodcarving", "Paper Art", + "Origami", "Kirigami", "Quilling", "Book Art", + "Photography", "Documentary Photography", "Portrait Photography", "Landscape Photography", + "Macro Photography", "Aerial Photography", "Underwater Photography", "Astrophotography", + "Film Noir", "Vintage Photography", "Polaroid", "Double Exposure", + "HDR Photography", "Long Exposure", "Tilt-Shift", "Infrared Photography", + "Monochrome", "Sepia Tone", "Cross-Processing", "Cyanotype", + "Folk Art", "Outsider Art", "Naive Art", "Aboriginal Art", + "African Art", "Asian Art", "Islamic Art", "Celtic Art", + "Byzantine Art", "Medieval Art", "Pre-Columbian Art", "Ancient Egyptian Art", + "Ancient Greek Art", "Ancient Roman Art", "Cave Painting", "Petroglyphs", + "Bauhaus", "De Stijl", "Vienna Secession", "Arts and Crafts Movement", + "Prairie School", "International Style", "Brutalism", "Deconstructivism", + "Parametric Design", "Biomimicry", "Sustainable Design", "Universal Design", + "Retro Futurism", "Dieselpunk", "Atompunk", "Biopunk", + "Afrofuturism", "Solarpunk", "Post-Apocalyptic", "Dystopian Art", + "Psychedelic Art", "Visionary Art", "Lowbrow Art", "Outsider Art", + "Trompe-l'oeil", "Anamorphic Art", "Optical Illusion", "Impossible Objects", + "Sacred Geometry", "Mandala", "Yantra", "Celtic Knots", + "Stippling", "Hatching", "Cross-Hatching", "Scumbling", + "Impasto", "Glazing", "Scumbling", "Sgraffito", + "Encaustic", "Fresco", "Tempera", "Gouache", + "Pastel", "Charcoal", "Conte", "Silverpoint", + "Linocut", "Woodcut", "Etching", "Lithography", + "Screen Printing", "Monotype", "Collagraph", "Digital Print", + "Augmented Reality Art", "Virtual Reality Art", "Interactive Art", "Projection Mapping", + "Light Art", "Neon Art", "Holographic Art", "Laser Art", + "Sound Art", "Bio Art", "Eco Art", "Social Practice Art", + "Relational Aesthetics", "Participatory Art", "Community Art", "Activist Art", + "Feminist Art", "Queer Art", "Postcolonial Art", "Decolonial Art", + "Metamodernism", "Post-Internet Art", "Post-Digital Art", "New Aesthetic", + "Speculative Design", "Critical Design", "Design Fiction", "Adversarial Design", + "Transitional Design", "Transformation Design", "Service Design", "Experience Design", + "Slow Design", "Emotional Design", "Inclusive Design", "Regenerative Design", + "Biophilic Design", "Cradle to Cradle", "Circular Design", "Zero Waste Design", + "Modular Design", "Open Design", "Co-Design", "Participatory Design", + "Flat Design", "Material Design", "Neumorphism", "Glassmorphism", + "Maximalism", "Eclecticism", "Kitsch", "Camp", + "Wabi-Sabi", "Hygge", "Lagom", "Ikigai", + "Feng Shui", "Vastu Shastra", "Sacred Architecture", "Organic Architecture", + "Vernacular Architecture", "Adaptive Reuse", "Green Architecture", "Living Architecture", + "Kinetic Architecture", "Responsive Architecture", "Parametric Architecture", "Algorithmic Architecture", + "Blob Architecture", "Deconstructivist Architecture", "High-Tech Architecture", "Neo-Futurism", + "Critical Regionalism", "Tropical Modernism", "Desert Modernism", "Scandinavian Design", + "Japanese Design", "Italian Design", "German Design", "Dutch Design", + "Memphis Group", "Radical Design", "Anti-Design", "Superstudio", + "Archigram", "Metabolism", "Structuralism", "Postmodernism", + "Minimalist Photography", "Conceptual Photography", "Staged Photography", "Candid Photography", } // Shuffle the styles to avoid bias @@ -303,7 +313,7 @@ func (c *OpenAIClient) createEducationalPrompt(bulgarianWord, englishTranslation // Define prompt components in order of importance var prompt string - + if scene != "" { // Full prompt with scene fullPrompt := fmt.Sprintf( @@ -313,7 +323,7 @@ func (c *OpenAIClient) createEducationalPrompt(bulgarianWord, englishTranslation "IMPORTANT: No text whatsoever. Do not include any words, letters, typography, labels, captions, or writing of any kind. Image only, without any text elements.", selectedStyle, scene, ) - + // Check if full prompt exceeds 1000 characters if len(fullPrompt) > 1000 { // Try without the IMPORTANT notice @@ -323,7 +333,7 @@ func (c *OpenAIClient) createEducationalPrompt(bulgarianWord, englishTranslation "Requirements: The main subject must be clearly visible, easily recognizable, and prominent in the image. It should occupy the central area with sharp focus and proper lighting.", selectedStyle, scene, ) - + // If still too long, truncate the scene if len(prompt) > 1000 { // Truncate scene to fit within limit @@ -355,12 +365,12 @@ func (c *OpenAIClient) createEducationalPrompt(bulgarianWord, englishTranslation selectedStyle, englishTranslation, englishTranslation, ) } - + // Final check to ensure prompt is within 1000 characters if len(prompt) > 1000 { prompt = prompt[:997] + "..." } - + return prompt } @@ -432,60 +442,14 @@ func (c *OpenAIClient) generateSceneDescription(ctx context.Context, bulgarianWo return scene, nil } -// getCacheFilePath generates a cache file path for the given word -func (c *OpenAIClient) getCacheFilePath(word string) string { - // Create a hash of the word and settings - h := md5.New() - h.Write([]byte(word)) - h.Write([]byte(c.model)) - h.Write([]byte(c.size)) - h.Write([]byte(c.quality)) - h.Write([]byte(c.style)) - hash := hex.EncodeToString(h.Sum(nil)) - - // Use first 2 chars as subdirectory for better file system performance - subdir := hash[:2] - filename := hash[2:] + ".png" - - return filepath.Join(c.cacheDir, subdir, filename) -} - -// downloadAndCache downloads an image and saves it to the cache -func (c *OpenAIClient) downloadAndCache(ctx context.Context, url, cacheFile string) error { - // Ensure directory exists - dir := filepath.Dir(cacheFile) - if err := os.MkdirAll(dir, 0755); err != nil { - return err - } - - // Download the image - resp, err := c.Download(ctx, url) - if err != nil { - return err - } - defer resp.Close() - - // Create the cache file - out, err := os.Create(cacheFile) - if err != nil { - return err - } - defer out.Close() - - // Copy the data - _, err = io.Copy(out, resp) - return err -} - -// generateImageID creates a unique ID for the image +// generateImageID generates a unique ID for the image func (c *OpenAIClient) generateImageID(word string) string { - h := md5.New() - h.Write([]byte(word)) - h.Write([]byte(c.model)) - return "openai_" + hex.EncodeToString(h.Sum(nil))[:8] + // Create hash of the word for unique ID + hash := md5.Sum([]byte(word)) + return hex.EncodeToString(hash[:])[:8] } -// getSizeWidth returns the width based on the size setting +// getSizeWidth returns the width based on the configured size func (c *OpenAIClient) getSizeWidth() int { switch c.size { case "256x256": @@ -494,179 +458,13 @@ func (c *OpenAIClient) getSizeWidth() int { return 512 case "1024x1024": return 1024 - case "1024x1792", "1792x1024": // DALL-E 3 sizes - if strings.HasPrefix(c.size, "1024") { - return 1024 - } - return 1792 default: - return 512 - } -} - -// getSizeHeight returns the height based on the size setting -func (c *OpenAIClient) getSizeHeight() int { - switch c.size { - case "256x256": - return 256 - case "512x512": - return 512 - case "1024x1024": return 1024 - case "1024x1792": - return 1792 - case "1792x1024": - return 1024 - default: - return 512 } } -// getArtisticStyles loads artistic styles from cache or generates them via OpenAI -func (c *OpenAIClient) getArtisticStyles(ctx context.Context) ([]string, error) { - // Define the styles cache file path - stylesFile := filepath.Join(c.cacheDir, "artistic_styles.txt") - - // Check if file exists and is less than a week old - fileInfo, err := os.Stat(stylesFile) - needsRegeneration := false - - if err != nil { - if os.IsNotExist(err) { - needsRegeneration = true - fmt.Println(" Artistic styles file not found, will generate new styles") - } else { - return nil, fmt.Errorf("error checking styles file: %w", err) - } - } else { - // Check if file is older than a week - weekAgo := time.Now().Add(-7 * 24 * time.Hour) - if fileInfo.ModTime().Before(weekAgo) { - needsRegeneration = true - fmt.Println(" Artistic styles file is older than a week, will regenerate") - } - } - - // If we need to regenerate, do it - if needsRegeneration { - styles, err := c.generateArtisticStyles(ctx) - if err != nil { - return nil, fmt.Errorf("failed to generate artistic styles: %w", err) - } - - // Save to file - if err := c.saveStylesToFile(stylesFile, styles); err != nil { - // Log error but continue with generated styles - fmt.Printf(" Warning: Could not save styles to file: %v\n", err) - } - - return styles, nil - } - - // Load from file - return c.loadStylesFromFile(stylesFile) -} - -// generateArtisticStyles asks OpenAI to generate a list of artistic styles -func (c *OpenAIClient) generateArtisticStyles(ctx context.Context) ([]string, error) { - fmt.Println(" Generating artistic styles via OpenAI...") - - req := openai.ChatCompletionRequest{ - Model: openai.GPT4oMini, - Messages: []openai.ChatCompletionMessage{ - { - Role: openai.ChatMessageRoleSystem, - Content: "You are an art expert helping to create diverse visual styles for educational flashcards. Generate exactly 42 different artistic styles that could be used for images. Include a mix of: photography styles (macro, portrait, landscape, etc.), traditional art techniques (watercolor, oil painting, pencil sketch, etc.), digital art styles (3D render, pixel art, vector illustration, etc.), artistic movements (impressionist, pop art, art deco, etc.), and other creative visual approaches. Each style should be concise (2-5 words) and distinct from the others. Format your response as a simple list with one style per line, no numbers or bullets.", - }, - { - Role: openai.ChatMessageRoleUser, - Content: "Please generate 42 diverse artistic styles for creating educational images. Include various photography types, painting techniques, illustration styles, and artistic movements.", - }, - }, - Temperature: 0.8, - MaxTokens: 500, - } - - resp, err := c.client.CreateChatCompletion(ctx, req) - if err != nil { - return nil, fmt.Errorf("OpenAI API error: %w", err) - } - - if len(resp.Choices) == 0 || resp.Choices[0].Message.Content == "" { - return nil, fmt.Errorf("no response from OpenAI") - } - - // Parse the response into lines - content := strings.TrimSpace(resp.Choices[0].Message.Content) - lines := strings.Split(content, "\n") - - // Clean up and filter valid styles - var styles []string - for _, line := range lines { - style := strings.TrimSpace(line) - // Remove any numbering or bullets - style = strings.TrimPrefix(style, "- ") - style = strings.TrimPrefix(style, "* ") - style = strings.TrimPrefix(style, "β’ ") - // Remove numbers like "1. " or "42. " - if idx := strings.Index(style, ". "); idx > 0 && idx <= 3 { - style = style[idx+2:] - } - style = strings.TrimSpace(style) - - if style != "" && len(style) <= 50 { // Reasonable length check - styles = append(styles, style) - } - } - - // Ensure we have at least some styles - if len(styles) < 10 { - return nil, fmt.Errorf("insufficient styles generated (got %d, need at least 10)", len(styles)) - } - - fmt.Printf(" Generated %d artistic styles\n", len(styles)) - return styles, nil -} - -// saveStylesToFile saves the styles to a file -func (c *OpenAIClient) saveStylesToFile(filename string, styles []string) error { - // Ensure directory exists - dir := filepath.Dir(filename) - if err := os.MkdirAll(dir, 0755); err != nil { - return fmt.Errorf("failed to create directory: %w", err) - } - - // Write styles to file - content := strings.Join(styles, "\n") - if err := os.WriteFile(filename, []byte(content), 0644); err != nil { - return fmt.Errorf("failed to write file: %w", err) - } - - fmt.Printf(" Saved %d styles to %s\n", len(styles), filename) - return nil -} - -// loadStylesFromFile loads styles from a file -func (c *OpenAIClient) loadStylesFromFile(filename string) ([]string, error) { - data, err := os.ReadFile(filename) - if err != nil { - return nil, fmt.Errorf("failed to read file: %w", err) - } - - // Parse lines - lines := strings.Split(string(data), "\n") - var styles []string - for _, line := range lines { - style := strings.TrimSpace(line) - if style != "" { - styles = append(styles, style) - } - } - - if len(styles) == 0 { - return nil, fmt.Errorf("no styles found in file") - } - - fmt.Printf(" Loaded %d styles from cache\n", len(styles)) - return styles, nil +// getSizeHeight returns the height based on the configured size +func (c *OpenAIClient) getSizeHeight() int { + // All DALL-E sizes are square + return c.getSizeWidth() } |
