package resolver import ( "encoding/json" "log" "math/rand" "net" "os" "path/filepath" "strings" "sync" "time" ) // DNSResolver provides thread-safe reverse DNS resolution with caching and rate limiting type DNSResolver struct { cache map[string]*DNSCacheEntry // IP -> cache entry with metadata mu sync.RWMutex rateLimiter chan struct{} // Channel for rate limiting DNS lookups cacheFile string // Path to persistent cache file } // DNSCacheEntry represents a cached DNS resolution with timestamp type DNSCacheEntry struct { IP string `json:"ip"` Hostname string `json:"hostname"` CachedAt time.Time `json:"cached_at"` LastRetry time.Time `json:"last_retry,omitempty"` // Last retry attempt for failed lookups RetryCount int `json:"retry_count,omitempty"` // Number of retry attempts } const ( // CacheTTL is the default time-to-live for cache entries (7 days) CacheTTL = 7 * 24 * time.Hour // FailedLookupRetryInterval is how often to retry failed DNS lookups (1 minute) FailedLookupRetryInterval = 1 * time.Minute // MaxRetries is the maximum number of retries for failed lookups MaxRetries = 5 ) // NewDNSResolver creates a new DNS resolver with an empty cache // Rate limited to ~100 DNS lookups per second to avoid overwhelming DNS servers // Loads persistent cache from disk if available func NewDNSResolver() *DNSResolver { // Determine cache file location cacheDir := os.Getenv("HOME") if cacheDir == "" { cacheDir = "/tmp" } cacheFile := filepath.Join(cacheDir, ".epimetheus-dns-cache.json") resolver := &DNSResolver{ cache: make(map[string]*DNSCacheEntry), rateLimiter: make(chan struct{}, 10), // Allow 10 concurrent lookups cacheFile: cacheFile, } // Load persistent cache from disk resolver.loadCache() // Clean expired entries on startup resolver.cleanExpiredEntries() // Start rate limiter goroutine that releases tokens every 10ms (100 per second) go func() { ticker := time.NewTicker(10 * time.Millisecond) // 100 requests/second defer ticker.Stop() for range ticker.C { select { case resolver.rateLimiter <- struct{}{}: default: // Channel full, skip this tick } } }() return resolver } // ResolveIP attempts to resolve an IP address to a hostname // Returns (hostname, true) if successful, ("", false) if resolution failed or not an IP // Results are cached to avoid repeated DNS lookups // Failed lookups are retried after FailedLookupRetryInterval func (r *DNSResolver) ResolveIP(ip string) (string, bool) { // Validate it's an IP address first if !isIPAddress(ip) { return "", false } // Check cache first (read lock) r.mu.RLock() entry, cached := r.cache[ip] r.mu.RUnlock() if cached { // Check if this is a failed lookup that should be retried if entry.Hostname == "" && entry.RetryCount < MaxRetries { // Check if enough time has passed since last retry if time.Since(entry.LastRetry) >= FailedLookupRetryInterval { // Retry the lookup hostname := r.retryLookup(ip, entry) return hostname, hostname != "" } } // Return cached result return entry.Hostname, entry.Hostname != "" } // Cache miss - resolve and cache the result hostname := r.resolveAndCache(ip) return hostname, hostname != "" } // resolveAndCache performs DNS lookup and caches the result // Returns hostname on success, empty string on failure // Rate limited to prevent overwhelming DNS servers func (r *DNSResolver) resolveAndCache(ip string) string { // Wait for rate limiter token <-r.rateLimiter // Perform reverse DNS lookup names, err := net.LookupAddr(ip) var hostname string now := time.Now() entry := &DNSCacheEntry{ IP: ip, CachedAt: now, } if err == nil && len(names) > 0 { // Take first result and remove trailing dot hostname = strings.TrimSuffix(names[0], ".") entry.Hostname = hostname } else { // Failed lookup - log and set up for retry if err != nil { log.Printf("⚠️ DNS resolution failed for %s: %v (will retry)", ip, err) } else { log.Printf("⚠️ DNS resolution failed for %s: no PTR record found (will retry)", ip) } entry.LastRetry = now entry.RetryCount = 0 } // Cache the result (write lock) r.mu.Lock() r.cache[ip] = entry r.mu.Unlock() return hostname } // retryLookup retries a failed DNS lookup func (r *DNSResolver) retryLookup(ip string, oldEntry *DNSCacheEntry) string { // Wait for rate limiter token <-r.rateLimiter // Perform reverse DNS lookup names, err := net.LookupAddr(ip) var hostname string now := time.Now() r.mu.Lock() defer r.mu.Unlock() if err == nil && len(names) > 0 { // Successful retry! hostname = strings.TrimSuffix(names[0], ".") log.Printf("✅ DNS retry successful for %s -> %s (after %d attempts)", ip, hostname, oldEntry.RetryCount+1) r.cache[ip] = &DNSCacheEntry{ IP: ip, Hostname: hostname, CachedAt: now, } } else { // Still failed - increment retry count newRetryCount := oldEntry.RetryCount + 1 if err != nil { log.Printf("⚠️ DNS retry %d/%d failed for %s: %v", newRetryCount, MaxRetries, ip, err) } else { log.Printf("⚠️ DNS retry %d/%d failed for %s: no PTR record found", newRetryCount, MaxRetries, ip) } r.cache[ip] = &DNSCacheEntry{ IP: ip, Hostname: "", CachedAt: oldEntry.CachedAt, LastRetry: now, RetryCount: newRetryCount, } // Log if we've exhausted all retries if newRetryCount >= MaxRetries { log.Printf("❌ Giving up on DNS resolution for %s after %d attempts", ip, MaxRetries) } } return hostname } // isIPAddress checks if a string is a valid IP address func isIPAddress(s string) bool { return net.ParseIP(s) != nil } // GetCacheSize returns the current cache size (useful for debugging/testing) func (r *DNSResolver) GetCacheSize() int { r.mu.RLock() defer r.mu.RUnlock() return len(r.cache) } // loadCache loads the persistent DNS cache from disk func (r *DNSResolver) loadCache() { data, err := os.ReadFile(r.cacheFile) if err != nil { // Cache file doesn't exist or can't be read - start with empty cache return } var entries []DNSCacheEntry if err := json.Unmarshal(data, &entries); err != nil { // Invalid cache file - start with empty cache return } r.mu.Lock() defer r.mu.Unlock() // Load entries into cache for i := range entries { r.cache[entries[i].IP] = &entries[i] } } // SaveCache saves the current DNS cache to disk func (r *DNSResolver) SaveCache() error { r.mu.RLock() entries := make([]DNSCacheEntry, 0, len(r.cache)) for _, entry := range r.cache { entries = append(entries, *entry) } r.mu.RUnlock() data, err := json.MarshalIndent(entries, "", " ") if err != nil { return err } return os.WriteFile(r.cacheFile, data, 0644) } // cleanExpiredEntries removes entries older than CacheTTL // Adds randomization to prevent all entries expiring at once func (r *DNSResolver) cleanExpiredEntries() { r.mu.Lock() defer r.mu.Unlock() now := time.Now() for ip, entry := range r.cache { // Add random jitter (±20%) to TTL to prevent thundering herd jitter := time.Duration(float64(CacheTTL) * 0.2 * (0.5 - rand.Float64() * 2)) effectiveTTL := CacheTTL + jitter age := now.Sub(entry.CachedAt) if age > effectiveTTL { delete(r.cache, ip) } } }