summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Buetow <paul@buetow.org>2025-07-15 23:42:32 +0300
committerPaul Buetow <paul@buetow.org>2025-07-15 23:42:32 +0300
commitb105333c061ea165b3b79317415cbb8b9cfb7c75 (patch)
treec2682cc156c372d85ab52d514df4316ceda9071d
parent61529facc2c5321de9f0ab9123cb1de25bcab62c (diff)
feat: add English translations and detailed attribution files
- Automatic Bulgarian to English translation for all words - Save translations to separate _translation.txt files - Include translations in Anki CSV export - Add detailed attribution files for audio and images: - Audio: model, voice, speed, instructions, processed text - Image: model, size, quality, style, full prompt used - Expand image styles to 42 different options (including superhero comic, yoga, etc.) - Improve image prompts to strongly avoid text generation - Fix image overwrite issue - now overwrites existing files instead of failing 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
-rw-r--r--README.md15
-rw-r--r--cmd/totalrecall/main.go129
-rw-r--r--internal/anki/generator.go5
-rw-r--r--internal/image/openai.go62
4 files changed, 200 insertions, 11 deletions
diff --git a/README.md b/README.md
index 7177b66..58ee15a 100644
--- a/README.md
+++ b/README.md
@@ -11,12 +11,15 @@ It has mainly been vibe coded using Claude Code CLI.
- Audio generation using **OpenAI TTS**: High-quality, natural-sounding voices (requires API key)
- Random voice selection by default for variety
- Option to generate in all 11 available voices
+- Automatic Bulgarian to English translation
+ - Saves translations to separate text files
+ - Includes translations in Anki CSV export
- Image search and generation:
- **Pixabay**: Free stock photo search (optional API key)
- **Unsplash**: High-quality photo search (requires API key)
- **OpenAI DALL-E**: AI-generated educational images with random art styles (requires API key)
- Batch processing of multiple words
-- Anki-compatible CSV export
+- Anki-compatible CSV export with translations
- Configurable voice variants and speech speed
- Support for WAV and MP3 audio formats
- Audio and image caching to save API costs
@@ -206,6 +209,16 @@ Create a text file with one Bulgarian word per line:
вода
```
+### Output Files
+For each word, the tool generates:
+- `word.mp3` - Audio pronunciation (random voice)
+- `word_translation.txt` - English translation
+- `word_1.jpg`, `word_2.jpg`, etc. - Generated images
+- `anki_import.csv` - Anki import file (when using --anki flag)
+
+With `--all-voices` flag:
+- `word_alloy.mp3`, `word_nova.mp3`, etc. - Audio in all 11 voices
+
## Anki Import
1. Generate materials with the `--anki` flag
diff --git a/cmd/totalrecall/main.go b/cmd/totalrecall/main.go
index 2a43d51..658301d 100644
--- a/cmd/totalrecall/main.go
+++ b/cmd/totalrecall/main.go
@@ -212,6 +212,22 @@ func runCommand(cmd *cobra.Command, args []string) error {
}
func processWord(word string) error {
+ // Translate the word first
+ fmt.Printf(" Translating to English...\n")
+ translation, err := translateWord(word)
+ if err != nil {
+ fmt.Printf(" Warning: Translation failed: %v\n", err)
+ translation = "" // Continue without translation
+ } else {
+ fmt.Printf(" Translation: %s\n", translation)
+ // Store translation for Anki export
+ wordTranslations[word] = translation
+ // Save translation to file
+ if err := saveTranslation(word, translation); err != nil {
+ fmt.Printf(" Warning: Failed to save translation: %v\n", err)
+ }
+ }
+
// Generate audio
if !skipAudio {
fmt.Printf(" Generating audio...\n")
@@ -311,13 +327,25 @@ func generateAudioWithVoice(word, voice string) error {
filename := sanitizeFilename(word)
// Add voice name to filename if generating multiple voices
+ var outputFile string
if allVoices {
- outputFile := filepath.Join(outputDir, fmt.Sprintf("%s_%s.%s", filename, voice, audioFormat))
- return provider.GenerateAudio(ctx, word, outputFile)
+ outputFile = filepath.Join(outputDir, fmt.Sprintf("%s_%s.%s", filename, voice, audioFormat))
+ } else {
+ outputFile = filepath.Join(outputDir, fmt.Sprintf("%s.%s", filename, audioFormat))
}
- outputFile := filepath.Join(outputDir, fmt.Sprintf("%s.%s", filename, audioFormat))
- return provider.GenerateAudio(ctx, word, outputFile)
+ // Generate the audio
+ err = provider.GenerateAudio(ctx, word, outputFile)
+ if err != nil {
+ return err
+ }
+
+ // Save audio attribution
+ if err := saveAudioAttribution(word, outputFile, providerConfig); err != nil {
+ fmt.Printf(" Warning: Failed to save audio attribution: %v\n", err)
+ }
+
+ return nil
}
func downloadImages(word string) error {
@@ -388,7 +416,7 @@ func downloadImages(word string) error {
// Create downloader
downloadOpts := &image.DownloadOptions{
OutputDir: outputDir,
- OverwriteExisting: false,
+ OverwriteExisting: true, // Allow overwriting existing files
CreateDir: true,
FileNamePattern: "{word}_{index}",
MaxSizeBytes: 5 * 1024 * 1024, // 5MB
@@ -490,6 +518,13 @@ func generateAnkiCSV() error {
return fmt.Errorf("failed to generate cards: %w", err)
}
+ // Add translations to cards
+ for i := range gen.GetCards() {
+ if translation, ok := wordTranslations[gen.GetCards()[i].Bulgarian]; ok {
+ gen.GetCards()[i].Translation = translation
+ }
+ }
+
// Generate CSV
if err := gen.GenerateCSV(); err != nil {
return fmt.Errorf("failed to generate CSV: %w", err)
@@ -593,6 +628,90 @@ func listAvailableModels() error {
return nil
}
+func translateWord(word string) (string, error) {
+ // Use OpenAI to translate Bulgarian to English
+ apiKey := getOpenAIKey()
+ if apiKey == "" {
+ return "", fmt.Errorf("OpenAI API key not found")
+ }
+
+ client := openai.NewClient(apiKey)
+ ctx := context.Background()
+
+ req := openai.ChatCompletionRequest{
+ Model: openai.GPT4oMini,
+ Messages: []openai.ChatCompletionMessage{
+ {
+ Role: openai.ChatMessageRoleUser,
+ Content: fmt.Sprintf("Translate the Bulgarian word '%s' to English. Respond with only the English translation, nothing else.", word),
+ },
+ },
+ MaxTokens: 50,
+ Temperature: 0.3,
+ }
+
+ resp, err := client.CreateChatCompletion(ctx, req)
+ if err != nil {
+ return "", fmt.Errorf("OpenAI API error: %w", err)
+ }
+
+ if len(resp.Choices) == 0 {
+ return "", fmt.Errorf("no translation returned")
+ }
+
+ translation := strings.TrimSpace(resp.Choices[0].Message.Content)
+ return translation, nil
+}
+
+func saveTranslation(word, translation string) error {
+ // Save translation to a text file
+ filename := sanitizeFilename(word)
+ outputFile := filepath.Join(outputDir, fmt.Sprintf("%s_translation.txt", filename))
+
+ content := fmt.Sprintf("%s = %s\n", word, translation)
+
+ if err := os.WriteFile(outputFile, []byte(content), 0644); err != nil {
+ return fmt.Errorf("failed to write translation file: %w", err)
+ }
+
+ return nil
+}
+
+// Global map to store translations for Anki export
+var wordTranslations = make(map[string]string)
+
+func saveAudioAttribution(word, audioFile string, config *audio.Config) error {
+ // Create attribution text
+ attribution := fmt.Sprintf("Audio generated by OpenAI TTS\n\n")
+ attribution += fmt.Sprintf("Bulgarian word: %s\n", word)
+ attribution += fmt.Sprintf("Model: %s\n", config.OpenAIModel)
+ attribution += fmt.Sprintf("Voice: %s\n", config.OpenAIVoice)
+ attribution += fmt.Sprintf("Speed: %.2f\n", config.OpenAISpeed)
+
+ if config.OpenAIInstruction != "" {
+ attribution += fmt.Sprintf("\nVoice instructions:\n%s\n", config.OpenAIInstruction)
+ }
+
+ // Add preprocessing information
+ cleanedWord := strings.TrimSpace(word)
+ punctuationToRemove := []string{"!", "?", ".", ",", ";", ":", "\"", "'", "(", ")", "[", "]", "{", "}", "-", "—", "–"}
+ for _, punct := range punctuationToRemove {
+ cleanedWord = strings.ReplaceAll(cleanedWord, punct, "")
+ }
+ processedText := fmt.Sprintf("%s...", strings.TrimSpace(cleanedWord))
+ attribution += fmt.Sprintf("\nProcessed text sent to TTS: %s\n", processedText)
+
+ attribution += fmt.Sprintf("\nGenerated at: %s\n", time.Now().Format("2006-01-02 15:04:05"))
+
+ // Save to file
+ attrPath := strings.TrimSuffix(audioFile, filepath.Ext(audioFile)) + "_attribution.txt"
+ if err := os.WriteFile(attrPath, []byte(attribution), 0644); err != nil {
+ return fmt.Errorf("failed to write audio attribution file: %w", err)
+ }
+
+ return nil
+}
+
func main() {
if err := rootCmd.Execute(); err != nil {
os.Exit(1)
diff --git a/internal/anki/generator.go b/internal/anki/generator.go
index 22221f3..ad41903 100644
--- a/internal/anki/generator.go
+++ b/internal/anki/generator.go
@@ -59,6 +59,11 @@ func (g *Generator) AddCard(card Card) {
g.cards = append(g.cards, card)
}
+// GetCards returns a slice of all cards for modification
+func (g *Generator) GetCards() []Card {
+ return g.cards
+}
+
// GenerateCSV creates a CSV file for Anki import
func (g *Generator) GenerateCSV() error {
// Create output file
diff --git a/internal/image/openai.go b/internal/image/openai.go
index ee05c80..c4b2e9d 100644
--- a/internal/image/openai.go
+++ b/internal/image/openai.go
@@ -11,6 +11,7 @@ import (
"os"
"path/filepath"
"strings"
+ "time"
"github.com/sashabaranov/go-openai"
)
@@ -25,6 +26,7 @@ type OpenAIClient struct {
style string // natural or vivid (dall-e-3 only)
cacheDir string
enableCache bool
+ lastPrompt string // Store the last used prompt for attribution
}
// OpenAIConfig holds configuration for the OpenAI image provider
@@ -124,6 +126,9 @@ func (c *OpenAIClient) Search(ctx context.Context, opts *SearchOptions) ([]Searc
// Create educational prompt
prompt := c.createEducationalPrompt(opts.Query, translatedWord)
+ // Store the prompt for attribution
+ c.lastPrompt = prompt
+
// Log the prompt to stdout for debugging
fmt.Printf("OpenAI Image Generation Prompt: %s\n", prompt)
fmt.Printf("OpenAI Image Generation: Using model '%s' with size '%s'\n", c.model, c.size)
@@ -221,7 +226,16 @@ func (c *OpenAIClient) Download(ctx context.Context, url string) (io.ReadCloser,
// GetAttribution returns the required attribution text
func (c *OpenAIClient) GetAttribution(result *SearchResult) string {
- return "Image generated by OpenAI DALL-E"
+ attribution := fmt.Sprintf("Image generated by OpenAI DALL-E\n\n")
+ attribution += fmt.Sprintf("Model: %s\n", c.model)
+ attribution += fmt.Sprintf("Size: %s\n", c.size)
+ if c.model == "dall-e-3" {
+ attribution += fmt.Sprintf("Quality: %s\n", c.quality)
+ attribution += fmt.Sprintf("Style: %s\n", c.style)
+ }
+ attribution += fmt.Sprintf("\nPrompt used:\n%s\n", c.lastPrompt)
+ attribution += fmt.Sprintf("\nGenerated at: %s\n", time.Now().Format("2006-01-02 15:04:05"))
+ return attribution
}
// Name returns the name of the provider
@@ -231,8 +245,9 @@ func (c *OpenAIClient) Name() string {
// createEducationalPrompt generates a prompt optimized for language learning
func (c *OpenAIClient) createEducationalPrompt(bulgarianWord, englishTranslation string) string {
- // Define different art styles for variety
+ // Define different art styles for variety (42 styles total)
styles := []string{
+ // Original styles (1-10)
"photorealistic, high quality photograph",
"detailed digital illustration, clean vector art style",
"vibrant cartoon style, animated movie quality",
@@ -243,9 +258,46 @@ func (c *OpenAIClient) createEducationalPrompt(bulgarianWord, englishTranslation
"oil painting, classical art style",
"paper cut art, layered craft style",
"isometric illustration, technical drawing style",
- "superhero comic book style, dynamic action pose, bold colors",
- "yoga/wellness illustration style, peaceful zen aesthetic",
+
+ // Requested styles (11-13)
+ "superhero comic book style, dynamic action pose, bold colors, Marvel/DC aesthetic",
+ "super-realistic person practicing yoga, serene wellness photography",
"cute illustration with cats interacting with the subject, whimsical cat-themed",
+
+ // Additional artistic styles (14-25)
+ "impressionist painting style, Monet-inspired brushstrokes",
+ "art nouveau style, decorative organic forms, Mucha-inspired",
+ "pop art style, Andy Warhol inspired, bold contrasting colors",
+ "Japanese ukiyo-e woodblock print style, traditional aesthetic",
+ "steampunk illustration, Victorian era mechanical elements",
+ "cyberpunk neon aesthetic, futuristic digital art",
+ "medieval illuminated manuscript style, gold leaf details",
+ "graffiti street art style, urban spray paint aesthetic",
+ "stained glass window art, cathedral-inspired design",
+ "mosaic tile art style, Byzantine-inspired patterns",
+ "art deco style, geometric patterns, 1920s aesthetic",
+ "surrealist style, Salvador Dali inspired dreamlike quality",
+
+ // Photography styles (26-32)
+ "macro photography style, extreme close-up detail",
+ "vintage polaroid photograph, retro instant camera aesthetic",
+ "film noir style, dramatic black and white photography",
+ "golden hour photography, warm sunset lighting",
+ "underwater photography style, ethereal aquatic atmosphere",
+ "aerial drone photography, bird's eye view perspective",
+ "long exposure photography, motion blur effects",
+
+ // Modern digital styles (33-42)
+ "vaporwave aesthetic, 80s-90s retro digital art",
+ "low poly 3D art style, geometric simplified forms",
+ "pixel art style, 8-bit retro video game aesthetic",
+ "glitch art style, digital distortion effects",
+ "double exposure photography, artistic overlay effect",
+ "tilt-shift photography, miniature world effect",
+ "infrared photography style, otherworldly color palette",
+ "holographic iridescent style, rainbow prismatic effects",
+ "origami paper folding art style, geometric paper craft",
+ "chalk art style, sidewalk drawing aesthetic",
}
// Select a random style
@@ -258,7 +310,7 @@ func (c *OpenAIClient) createEducationalPrompt(bulgarianWord, englishTranslation
"This is for the Bulgarian word '%s' which means %s. "+
"The image should be educational and suitable for language learning flashcards. "+
"Requirements: single main subject, plain background, clear and recognizable. "+
- "No text, labels, or writing in the image.",
+ "IMPORTANT: No text whatsoever. Do not include any words, letters, typography, labels, captions, or writing of any kind. Image only, without any text elements.",
selectedStyle, englishTranslation, bulgarianWord, englishTranslation,
)
}