diff options
| author | Paul Buetow <paul@buetow.org> | 2025-07-15 23:42:32 +0300 |
|---|---|---|
| committer | Paul Buetow <paul@buetow.org> | 2025-07-15 23:42:32 +0300 |
| commit | b105333c061ea165b3b79317415cbb8b9cfb7c75 (patch) | |
| tree | c2682cc156c372d85ab52d514df4316ceda9071d | |
| parent | 61529facc2c5321de9f0ab9123cb1de25bcab62c (diff) | |
feat: add English translations and detailed attribution files
- Automatic Bulgarian to English translation for all words
- Save translations to separate _translation.txt files
- Include translations in Anki CSV export
- Add detailed attribution files for audio and images:
- Audio: model, voice, speed, instructions, processed text
- Image: model, size, quality, style, full prompt used
- Expand image styles to 42 different options (including superhero comic, yoga, etc.)
- Improve image prompts to strongly avoid text generation
- Fix image overwrite issue - now overwrites existing files instead of failing
🤖 Generated with [Claude Code](https://claude.ai/code)
Co-Authored-By: Claude <noreply@anthropic.com>
| -rw-r--r-- | README.md | 15 | ||||
| -rw-r--r-- | cmd/totalrecall/main.go | 129 | ||||
| -rw-r--r-- | internal/anki/generator.go | 5 | ||||
| -rw-r--r-- | internal/image/openai.go | 62 |
4 files changed, 200 insertions, 11 deletions
@@ -11,12 +11,15 @@ It has mainly been vibe coded using Claude Code CLI. - Audio generation using **OpenAI TTS**: High-quality, natural-sounding voices (requires API key) - Random voice selection by default for variety - Option to generate in all 11 available voices +- Automatic Bulgarian to English translation + - Saves translations to separate text files + - Includes translations in Anki CSV export - Image search and generation: - **Pixabay**: Free stock photo search (optional API key) - **Unsplash**: High-quality photo search (requires API key) - **OpenAI DALL-E**: AI-generated educational images with random art styles (requires API key) - Batch processing of multiple words -- Anki-compatible CSV export +- Anki-compatible CSV export with translations - Configurable voice variants and speech speed - Support for WAV and MP3 audio formats - Audio and image caching to save API costs @@ -206,6 +209,16 @@ Create a text file with one Bulgarian word per line: вода ``` +### Output Files +For each word, the tool generates: +- `word.mp3` - Audio pronunciation (random voice) +- `word_translation.txt` - English translation +- `word_1.jpg`, `word_2.jpg`, etc. - Generated images +- `anki_import.csv` - Anki import file (when using --anki flag) + +With `--all-voices` flag: +- `word_alloy.mp3`, `word_nova.mp3`, etc. - Audio in all 11 voices + ## Anki Import 1. Generate materials with the `--anki` flag diff --git a/cmd/totalrecall/main.go b/cmd/totalrecall/main.go index 2a43d51..658301d 100644 --- a/cmd/totalrecall/main.go +++ b/cmd/totalrecall/main.go @@ -212,6 +212,22 @@ func runCommand(cmd *cobra.Command, args []string) error { } func processWord(word string) error { + // Translate the word first + fmt.Printf(" Translating to English...\n") + translation, err := translateWord(word) + if err != nil { + fmt.Printf(" Warning: Translation failed: %v\n", err) + translation = "" // Continue without translation + } else { + fmt.Printf(" Translation: %s\n", translation) + // Store translation for Anki export + wordTranslations[word] = translation + // Save translation to file + if err := saveTranslation(word, translation); err != nil { + fmt.Printf(" Warning: Failed to save translation: %v\n", err) + } + } + // Generate audio if !skipAudio { fmt.Printf(" Generating audio...\n") @@ -311,13 +327,25 @@ func generateAudioWithVoice(word, voice string) error { filename := sanitizeFilename(word) // Add voice name to filename if generating multiple voices + var outputFile string if allVoices { - outputFile := filepath.Join(outputDir, fmt.Sprintf("%s_%s.%s", filename, voice, audioFormat)) - return provider.GenerateAudio(ctx, word, outputFile) + outputFile = filepath.Join(outputDir, fmt.Sprintf("%s_%s.%s", filename, voice, audioFormat)) + } else { + outputFile = filepath.Join(outputDir, fmt.Sprintf("%s.%s", filename, audioFormat)) } - outputFile := filepath.Join(outputDir, fmt.Sprintf("%s.%s", filename, audioFormat)) - return provider.GenerateAudio(ctx, word, outputFile) + // Generate the audio + err = provider.GenerateAudio(ctx, word, outputFile) + if err != nil { + return err + } + + // Save audio attribution + if err := saveAudioAttribution(word, outputFile, providerConfig); err != nil { + fmt.Printf(" Warning: Failed to save audio attribution: %v\n", err) + } + + return nil } func downloadImages(word string) error { @@ -388,7 +416,7 @@ func downloadImages(word string) error { // Create downloader downloadOpts := &image.DownloadOptions{ OutputDir: outputDir, - OverwriteExisting: false, + OverwriteExisting: true, // Allow overwriting existing files CreateDir: true, FileNamePattern: "{word}_{index}", MaxSizeBytes: 5 * 1024 * 1024, // 5MB @@ -490,6 +518,13 @@ func generateAnkiCSV() error { return fmt.Errorf("failed to generate cards: %w", err) } + // Add translations to cards + for i := range gen.GetCards() { + if translation, ok := wordTranslations[gen.GetCards()[i].Bulgarian]; ok { + gen.GetCards()[i].Translation = translation + } + } + // Generate CSV if err := gen.GenerateCSV(); err != nil { return fmt.Errorf("failed to generate CSV: %w", err) @@ -593,6 +628,90 @@ func listAvailableModels() error { return nil } +func translateWord(word string) (string, error) { + // Use OpenAI to translate Bulgarian to English + apiKey := getOpenAIKey() + if apiKey == "" { + return "", fmt.Errorf("OpenAI API key not found") + } + + client := openai.NewClient(apiKey) + ctx := context.Background() + + req := openai.ChatCompletionRequest{ + Model: openai.GPT4oMini, + Messages: []openai.ChatCompletionMessage{ + { + Role: openai.ChatMessageRoleUser, + Content: fmt.Sprintf("Translate the Bulgarian word '%s' to English. Respond with only the English translation, nothing else.", word), + }, + }, + MaxTokens: 50, + Temperature: 0.3, + } + + resp, err := client.CreateChatCompletion(ctx, req) + if err != nil { + return "", fmt.Errorf("OpenAI API error: %w", err) + } + + if len(resp.Choices) == 0 { + return "", fmt.Errorf("no translation returned") + } + + translation := strings.TrimSpace(resp.Choices[0].Message.Content) + return translation, nil +} + +func saveTranslation(word, translation string) error { + // Save translation to a text file + filename := sanitizeFilename(word) + outputFile := filepath.Join(outputDir, fmt.Sprintf("%s_translation.txt", filename)) + + content := fmt.Sprintf("%s = %s\n", word, translation) + + if err := os.WriteFile(outputFile, []byte(content), 0644); err != nil { + return fmt.Errorf("failed to write translation file: %w", err) + } + + return nil +} + +// Global map to store translations for Anki export +var wordTranslations = make(map[string]string) + +func saveAudioAttribution(word, audioFile string, config *audio.Config) error { + // Create attribution text + attribution := fmt.Sprintf("Audio generated by OpenAI TTS\n\n") + attribution += fmt.Sprintf("Bulgarian word: %s\n", word) + attribution += fmt.Sprintf("Model: %s\n", config.OpenAIModel) + attribution += fmt.Sprintf("Voice: %s\n", config.OpenAIVoice) + attribution += fmt.Sprintf("Speed: %.2f\n", config.OpenAISpeed) + + if config.OpenAIInstruction != "" { + attribution += fmt.Sprintf("\nVoice instructions:\n%s\n", config.OpenAIInstruction) + } + + // Add preprocessing information + cleanedWord := strings.TrimSpace(word) + punctuationToRemove := []string{"!", "?", ".", ",", ";", ":", "\"", "'", "(", ")", "[", "]", "{", "}", "-", "—", "–"} + for _, punct := range punctuationToRemove { + cleanedWord = strings.ReplaceAll(cleanedWord, punct, "") + } + processedText := fmt.Sprintf("%s...", strings.TrimSpace(cleanedWord)) + attribution += fmt.Sprintf("\nProcessed text sent to TTS: %s\n", processedText) + + attribution += fmt.Sprintf("\nGenerated at: %s\n", time.Now().Format("2006-01-02 15:04:05")) + + // Save to file + attrPath := strings.TrimSuffix(audioFile, filepath.Ext(audioFile)) + "_attribution.txt" + if err := os.WriteFile(attrPath, []byte(attribution), 0644); err != nil { + return fmt.Errorf("failed to write audio attribution file: %w", err) + } + + return nil +} + func main() { if err := rootCmd.Execute(); err != nil { os.Exit(1) diff --git a/internal/anki/generator.go b/internal/anki/generator.go index 22221f3..ad41903 100644 --- a/internal/anki/generator.go +++ b/internal/anki/generator.go @@ -59,6 +59,11 @@ func (g *Generator) AddCard(card Card) { g.cards = append(g.cards, card) } +// GetCards returns a slice of all cards for modification +func (g *Generator) GetCards() []Card { + return g.cards +} + // GenerateCSV creates a CSV file for Anki import func (g *Generator) GenerateCSV() error { // Create output file diff --git a/internal/image/openai.go b/internal/image/openai.go index ee05c80..c4b2e9d 100644 --- a/internal/image/openai.go +++ b/internal/image/openai.go @@ -11,6 +11,7 @@ import ( "os" "path/filepath" "strings" + "time" "github.com/sashabaranov/go-openai" ) @@ -25,6 +26,7 @@ type OpenAIClient struct { style string // natural or vivid (dall-e-3 only) cacheDir string enableCache bool + lastPrompt string // Store the last used prompt for attribution } // OpenAIConfig holds configuration for the OpenAI image provider @@ -124,6 +126,9 @@ func (c *OpenAIClient) Search(ctx context.Context, opts *SearchOptions) ([]Searc // Create educational prompt prompt := c.createEducationalPrompt(opts.Query, translatedWord) + // Store the prompt for attribution + c.lastPrompt = prompt + // Log the prompt to stdout for debugging fmt.Printf("OpenAI Image Generation Prompt: %s\n", prompt) fmt.Printf("OpenAI Image Generation: Using model '%s' with size '%s'\n", c.model, c.size) @@ -221,7 +226,16 @@ func (c *OpenAIClient) Download(ctx context.Context, url string) (io.ReadCloser, // GetAttribution returns the required attribution text func (c *OpenAIClient) GetAttribution(result *SearchResult) string { - return "Image generated by OpenAI DALL-E" + attribution := fmt.Sprintf("Image generated by OpenAI DALL-E\n\n") + attribution += fmt.Sprintf("Model: %s\n", c.model) + attribution += fmt.Sprintf("Size: %s\n", c.size) + if c.model == "dall-e-3" { + attribution += fmt.Sprintf("Quality: %s\n", c.quality) + attribution += fmt.Sprintf("Style: %s\n", c.style) + } + attribution += fmt.Sprintf("\nPrompt used:\n%s\n", c.lastPrompt) + attribution += fmt.Sprintf("\nGenerated at: %s\n", time.Now().Format("2006-01-02 15:04:05")) + return attribution } // Name returns the name of the provider @@ -231,8 +245,9 @@ func (c *OpenAIClient) Name() string { // createEducationalPrompt generates a prompt optimized for language learning func (c *OpenAIClient) createEducationalPrompt(bulgarianWord, englishTranslation string) string { - // Define different art styles for variety + // Define different art styles for variety (42 styles total) styles := []string{ + // Original styles (1-10) "photorealistic, high quality photograph", "detailed digital illustration, clean vector art style", "vibrant cartoon style, animated movie quality", @@ -243,9 +258,46 @@ func (c *OpenAIClient) createEducationalPrompt(bulgarianWord, englishTranslation "oil painting, classical art style", "paper cut art, layered craft style", "isometric illustration, technical drawing style", - "superhero comic book style, dynamic action pose, bold colors", - "yoga/wellness illustration style, peaceful zen aesthetic", + + // Requested styles (11-13) + "superhero comic book style, dynamic action pose, bold colors, Marvel/DC aesthetic", + "super-realistic person practicing yoga, serene wellness photography", "cute illustration with cats interacting with the subject, whimsical cat-themed", + + // Additional artistic styles (14-25) + "impressionist painting style, Monet-inspired brushstrokes", + "art nouveau style, decorative organic forms, Mucha-inspired", + "pop art style, Andy Warhol inspired, bold contrasting colors", + "Japanese ukiyo-e woodblock print style, traditional aesthetic", + "steampunk illustration, Victorian era mechanical elements", + "cyberpunk neon aesthetic, futuristic digital art", + "medieval illuminated manuscript style, gold leaf details", + "graffiti street art style, urban spray paint aesthetic", + "stained glass window art, cathedral-inspired design", + "mosaic tile art style, Byzantine-inspired patterns", + "art deco style, geometric patterns, 1920s aesthetic", + "surrealist style, Salvador Dali inspired dreamlike quality", + + // Photography styles (26-32) + "macro photography style, extreme close-up detail", + "vintage polaroid photograph, retro instant camera aesthetic", + "film noir style, dramatic black and white photography", + "golden hour photography, warm sunset lighting", + "underwater photography style, ethereal aquatic atmosphere", + "aerial drone photography, bird's eye view perspective", + "long exposure photography, motion blur effects", + + // Modern digital styles (33-42) + "vaporwave aesthetic, 80s-90s retro digital art", + "low poly 3D art style, geometric simplified forms", + "pixel art style, 8-bit retro video game aesthetic", + "glitch art style, digital distortion effects", + "double exposure photography, artistic overlay effect", + "tilt-shift photography, miniature world effect", + "infrared photography style, otherworldly color palette", + "holographic iridescent style, rainbow prismatic effects", + "origami paper folding art style, geometric paper craft", + "chalk art style, sidewalk drawing aesthetic", } // Select a random style @@ -258,7 +310,7 @@ func (c *OpenAIClient) createEducationalPrompt(bulgarianWord, englishTranslation "This is for the Bulgarian word '%s' which means %s. "+ "The image should be educational and suitable for language learning flashcards. "+ "Requirements: single main subject, plain background, clear and recognizable. "+ - "No text, labels, or writing in the image.", + "IMPORTANT: No text whatsoever. Do not include any words, letters, typography, labels, captions, or writing of any kind. Image only, without any text elements.", selectedStyle, englishTranslation, bulgarianWord, englishTranslation, ) } |
