diff options
| author | Paul Buetow <paul@buetow.org> | 2025-07-17 13:49:23 +0300 |
|---|---|---|
| committer | Paul Buetow <paul@buetow.org> | 2025-07-17 13:49:23 +0300 |
| commit | 6d73daa3abf06b34bb80868571d6d6be464f1c62 (patch) | |
| tree | b01007d9273359ac62301364e49416d976e28e8b | |
| parent | b2b007699b2a42ed86970f59d597034679265e91 (diff) | |
feat: add contextual scene generation for flashcard imagesv0.2.0
- Generate meaningful scenes for each Bulgarian word using OpenAI
- Scene descriptions use English words for DALL-E compatibility
- Creates more engaging and memorable flashcards with context
- Bump version to v0.2.0
π€ Generated with [Claude Code](https://claude.ai/code)
Co-Authored-By: Claude <noreply@anthropic.com>
| -rw-r--r-- | internal/image/openai.go | 68 | ||||
| -rw-r--r-- | internal/version.go | 2 | ||||
| -rwxr-xr-x | test_scene_generation.sh | 20 |
3 files changed, 88 insertions, 2 deletions
diff --git a/internal/image/openai.go b/internal/image/openai.go index f055ec1..ac66a45 100644 --- a/internal/image/openai.go +++ b/internal/image/openai.go @@ -256,10 +256,29 @@ func (c *OpenAIClient) GetLastPrompt() string { // createEducationalPrompt generates a prompt optimized for language learning func (c *OpenAIClient) createEducationalPrompt(bulgarianWord, englishTranslation string) string { + // Generate a scene description for the word + scene, err := c.generateSceneDescription(context.Background(), bulgarianWord, englishTranslation) + if err != nil { + fmt.Printf(" Failed to generate scene: %v, using basic prompt\n", err) + scene = "" + } + // 25% chance to ask OpenAI for a creative style if rand.Float32() < 0.25 { if creativeStyle := c.getCreativeStyleFromOpenAI(context.Background(), englishTranslation); creativeStyle != "" { fmt.Printf(" Using OpenAI-suggested style: %s\n", creativeStyle) + + // If we have a scene, incorporate it + if scene != "" { + return fmt.Sprintf( + "Generate %s depicting: %s. "+ + "The image should be educational and suitable for language learning flashcards. "+ + "Requirements: clear and recognizable, focus on the main subject. "+ + "IMPORTANT: No text whatsoever. Do not include any words, letters, typography, labels, captions, or writing of any kind. Image only, without any text elements.", + creativeStyle, scene, + ) + } + return fmt.Sprintf( "Generate %s of %s. "+ "The image should be educational and suitable for language learning flashcards. "+ @@ -329,7 +348,18 @@ func (c *OpenAIClient) createEducationalPrompt(bulgarianWord, englishTranslation selectedStyle := styles[rand.Intn(len(styles))] fmt.Printf(" Using image style: %s\n", selectedStyle) - // Create a simple, clear prompt for educational images + // Create prompt with scene if available + if scene != "" { + return fmt.Sprintf( + "Generate a %s depicting: %s. "+ + "The image should be educational and suitable for language learning flashcards. "+ + "Requirements: clear and recognizable, focus on the main subject. "+ + "IMPORTANT: No text whatsoever. Do not include any words, letters, typography, labels, captions, or writing of any kind. Image only, without any text elements.", + selectedStyle, scene, + ) + } + + // Fallback to basic prompt if no scene return fmt.Sprintf( "Generate a %s of %s. "+ "The image should be educational and suitable for language learning flashcards. "+ @@ -371,6 +401,42 @@ func (c *OpenAIClient) translateBulgarianToEnglish(ctx context.Context, word str return translation, nil } +// generateSceneDescription generates a contextual scene description for the word +func (c *OpenAIClient) generateSceneDescription(ctx context.Context, bulgarianWord, englishTranslation string) (string, error) { + // Use OpenAI to generate a scene description + fmt.Printf("OpenAI Scene Generation: Creating scene for '%s' (%s)\n", bulgarianWord, englishTranslation) + + req := openai.ChatCompletionRequest{ + Model: openai.GPT4oMini, + Messages: []openai.ChatCompletionMessage{ + { + Role: openai.ChatMessageRoleSystem, + Content: "You are helping create educational flashcards for language learning. Generate a brief, vivid scene description that incorporates the given English word in a memorable, contextual way. The scene should be visually interesting and help with memory retention. Keep it to 1-2 sentences, focusing on visual elements that can be illustrated. IMPORTANT: Use the English word in your scene description, NOT the Bulgarian word.", + }, + { + Role: openai.ChatMessageRoleUser, + Content: fmt.Sprintf("Create a scene description for the English word '%s' (Bulgarian: %s) that would make a memorable flashcard image. Use the English word '%s' in the scene, not the Bulgarian word.", englishTranslation, bulgarianWord, englishTranslation), + }, + }, + Temperature: 0.7, // Balanced temperature for creativity with consistency + MaxTokens: 100, + } + + resp, err := c.client.CreateChatCompletion(ctx, req) + if err != nil { + return "", fmt.Errorf("scene generation failed: %w", err) + } + + if len(resp.Choices) == 0 || resp.Choices[0].Message.Content == "" { + return "", fmt.Errorf("no scene description received") + } + + scene := strings.TrimSpace(resp.Choices[0].Message.Content) + fmt.Printf("Generated scene: %s\n", scene) + + return scene, nil +} + // getCacheFilePath generates a cache file path for the given word func (c *OpenAIClient) getCacheFilePath(word string) string { // Create a hash of the word and settings diff --git a/internal/version.go b/internal/version.go index 0894830..1595476 100644 --- a/internal/version.go +++ b/internal/version.go @@ -1,3 +1,3 @@ package internal -const Version = "0.1.0" +const Version = "0.2.0" diff --git a/test_scene_generation.sh b/test_scene_generation.sh new file mode 100755 index 0000000..0355167 --- /dev/null +++ b/test_scene_generation.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +# Test scene generation with Bulgarian words +echo "Testing scene generation for Bulgarian flashcards..." + +# Test words +words=("ΡΠ±ΡΠ»ΠΊΠ°" "ΠΊΠΎΡΠΊΠ°" "ΠΊΡΡΠ΅" "Ρ
Π»ΡΠ±" "Π²ΠΎΠ΄Π°" "ΠΊΠ½ΠΈΠ³Π°") + +for word in "${words[@]}"; do + echo "" + echo "====================" + echo "Testing word: $word" + echo "====================" + go run ./cmd/totalrecall "$word" --provider openai + echo "" + echo "Press Enter to continue to next word..." + read +done + +echo "Scene generation test complete!"
\ No newline at end of file |
