move extract URLs to types.Entry

author: Paul Buetow <paul@buetow.org> 2024-10-24 10:25:00 +0300
committer: Paul Buetow <paul@buetow.org> 2024-10-24 10:25:00 +0300
commit: 9a68ca0461bdf6d093213a218fbf255678018a16 (patch)
tree: fb955dbeffd8d028c50652131ec9090982d9338b /internal
parent: dde64be2724fd4e407315627943dd8dd37e5adf0 (diff)
4 files changed, 89 insertions, 92 deletions
diff --git a/internal/entry/entry.go b/internal/entry/entry.go
index 99f7ee3..34ce5f0 100644
--- a/internal/entry/entry.go
+++ b/internal/entry/entry.go
@@ -4,6 +4,7 @@ import (
 	"errors"
 	"fmt"
 	"os"
+	"regexp"
 	"strings"
 	"time"
 
@@ -126,3 +127,15 @@ func (e Entry) Edit() error {
 	}
 	return nil
 }
+
+// extractURLs finds all occurrences of URLs starting with "http://" or "https://" in a given string.
+func (e Entry) ExtractURLs() []string {
+	content, _ := e.Content()
+	return extractURLs(content)
+}
+
+func extractURLs(input string) []string {
+	urlPattern := `(http://|https://|ftp://)[^\s]+`
+	re := regexp.MustCompile(urlPattern)
+	return re.FindAllString(input, -1)
+}
diff --git a/internal/entry/entry_test.go b/internal/entry/entry_test.go
index 0ec8166..43c2243 100644
--- a/internal/entry/entry_test.go
+++ b/internal/entry/entry_test.go
@@ -2,6 +2,9 @@ package entry
 
 import (
 	"fmt"
+	"regexp"
+	"slices"
+	"strings"
 	"testing"
 
 	"codeberg.org/snonux/gos/internal/timestamp"
@@ -36,3 +39,76 @@ func TestEntry(t *testing.T) {
 		}
 	}
 }
+
+func TestExtractTwoURLs(t *testing.T) {
+	text := `Hello world https://foo.zone
+	Hello universe http://world.universe test 123`
+
+	urls := extractURLs(text)
+	if len(urls) != 2 {
+		t.Errorf("expected 2 URLs, but got %d", len(urls))
+	}
+
+	if !slices.Contains(urls, "https://foo.zone") {
+		t.Errorf("expected 'https://foo.zone' in the URL list, but got %v", urls)
+	}
+	if !slices.Contains(urls, "http://world.universe") {
+		t.Errorf("expected 'http://world.universe' in the URL list, but got %v", urls)
+	}
+}
+
+func TestExtractURLs(t *testing.T) {
+	urls := []string{
+		"http://foo.zone",
+		"http://foo.zone/",
+		"http://foo.zone?foo=bar",
+		"http://foo.zone/?foo=bar",
+		"http://foo.zone/?foo=bar",
+		"http://foo.zone/hurs?foo=bar",
+		"http://foo.zone?foo=bar&baz=bay",
+	}
+
+	for _, url := range urls {
+		text := fmt.Sprintf("Hello world %s Hello World", url)
+		found := extractURLs(text)
+		if len(found) != 1 {
+			t.Errorf("expected 1 URL, but got %d for text '%s'", len(found), text)
+		}
+		if found[0] != url {
+			t.Errorf("expected URL '%s', but got '%s' for text '%s'", url, found[0], text)
+		}
+	}
+}
+
+func FuzzExtractURLs(f *testing.F) {
+	f.Add("/path?myjfa=lwsr4imj&dgqeg=m3uwwsak")
+	f.Add("/?amfbm=bwzqu46m&xheuh=nv588d98")
+	f.Add("?tuupm=reng2p1y&cbjot=0g5qvpty")
+	f.Add("/path?qmcok=f%20w4tfp7g&awsnq=sjizuore&owdix=8s2dmqsv")
+	f.Add("?zwilf=868o24x1&fiwmp=1d5aqbvo&irhhr=xar7qbq7&eetpy=scmi9s8i")
+	f.Add("/path?mwhbm=psinstn6&nsjic=pfu0wnk9&lbmrz=5bixkhdt")
+	f.Add("/path?owbwo=67mkjiz2")
+	f.Add("/path?ohvxi=esy5qvml&zlvzt=2yi4q4ef&cnich=sgc8sahs")
+	f.Add("/path?codsl=fpwfto6j")
+	f.Add("tvdus=fhlhlh1y")
+	f.Add("/foo.txt")
+	noWhitespace := regexp.MustCompile(`\s+`)
+
+	f.Fuzz(func(t *testing.T, urlPath string) {
+		urlPath = noWhitespace.ReplaceAllString(strings.TrimSpace(urlPath), "%20")
+		baseURLs := []string{"https://foo.zone", "http://foo.zone", "ftp://foo.zone"}
+		for _, baseURL := range baseURLs {
+			fullURL := fmt.Sprintf("%s%s", baseURL, urlPath)
+			text := fmt.Sprintf("Hello world %s Hello World", fullURL)
+			found := extractURLs(text)
+			if len(found) != 1 {
+				t.Errorf("expected 1 URL '%s', but got %d for text '%s'",
+					fullURL, len(found), text)
+			}
+			if found[0] != fullURL {
+				t.Errorf("expected URL '%s', but got '%s' for text '%s'",
+					fullURL, found[0], text)
+			}
+		}
+	})
+}
diff --git a/internal/platforms/linkedin/escapes.go b/internal/platforms/linkedin/escapes.go
index 31110a0..5f803a5 100644
--- a/internal/platforms/linkedin/escapes.go
+++ b/internal/platforms/linkedin/escapes.go
@@ -1,7 +1,6 @@
 package linkedin
 
 import (
-	"regexp"
 	"strings"
 )
 
@@ -38,17 +37,3 @@ func escapeLinkedInText(input string) string {
 
 	return builder.String()
 }
-
-// extractURLs finds all occurrences of URLs starting with "http://" or "https://" in a given string.
-func extractURLs(input string) []string {
-	// Regular expression pattern to match URLs starting with http:// or https://
-	urlPattern := `(http://|https://|ftp://)[^\s]+`
-
-	// Compile the regular expression
-	re := regexp.MustCompile(urlPattern)
-
-	// Find all matches in the input string
-	urls := re.FindAllString(input, -1)
-
-	return urls
-}
diff --git a/internal/platforms/linkedin/escapes_test.go b/internal/platforms/linkedin/escapes_test.go
index f93b260..b1f9203 100644
--- a/internal/platforms/linkedin/escapes_test.go
+++ b/internal/platforms/linkedin/escapes_test.go
@@ -1,10 +1,6 @@
 package linkedin
 
 import (
-	"fmt"
-	"regexp"
-	"slices"
-	"strings"
 	"testing"
 )
 
@@ -17,76 +13,3 @@ func TestLinkedInEscapes(t *testing.T) {
 		t.Errorf("expected '%s' but got '%s'", expected, escaped)
 	}
 }
-
-func TestLinkedInTwoURLsExtract(t *testing.T) {
-	text := `Hello world https://foo.zone
-	Hello universe http://world.universe test 123`
-
-	urls := extractURLs(text)
-	if len(urls) != 2 {
-		t.Errorf("expected 2 URLs, but got %d", len(urls))
-	}
-
-	if !slices.Contains(urls, "https://foo.zone") {
-		t.Errorf("expected 'https://foo.zone' in the URL list, but got %v", urls)
-	}
-	if !slices.Contains(urls, "http://world.universe") {
-		t.Errorf("expected 'http://world.universe' in the URL list, but got %v", urls)
-	}
-}
-
-func TestLinkedInURLExtract(t *testing.T) {
-	urls := []string{
-		"http://foo.zone",
-		"http://foo.zone/",
-		"http://foo.zone?foo=bar",
-		"http://foo.zone/?foo=bar",
-		"http://foo.zone/?foo=bar",
-		"http://foo.zone/hurs?foo=bar",
-		"http://foo.zone?foo=bar&baz=bay",
-	}
-
-	for _, url := range urls {
-		text := fmt.Sprintf("Hello world %s Hello World", url)
-		found := extractURLs(text)
-		if len(found) != 1 {
-			t.Errorf("expected 1 URL, but got %d for text '%s'", len(found), text)
-		}
-		if found[0] != url {
-			t.Errorf("expected URL '%s', but got '%s' for text '%s'", url, found[0], text)
-		}
-	}
-}
-
-func FuzzLinkedInURLExtract(f *testing.F) {
-	f.Add("/path?myjfa=lwsr4imj&dgqeg=m3uwwsak")
-	f.Add("/?amfbm=bwzqu46m&xheuh=nv588d98")
-	f.Add("?tuupm=reng2p1y&cbjot=0g5qvpty")
-	f.Add("/path?qmcok=f%20w4tfp7g&awsnq=sjizuore&owdix=8s2dmqsv")
-	f.Add("?zwilf=868o24x1&fiwmp=1d5aqbvo&irhhr=xar7qbq7&eetpy=scmi9s8i")
-	f.Add("/path?mwhbm=psinstn6&nsjic=pfu0wnk9&lbmrz=5bixkhdt")
-	f.Add("/path?owbwo=67mkjiz2")
-	f.Add("/path?ohvxi=esy5qvml&zlvzt=2yi4q4ef&cnich=sgc8sahs")
-	f.Add("/path?codsl=fpwfto6j")
-	f.Add("tvdus=fhlhlh1y")
-	f.Add("/foo.txt")
-	noWhitespace := regexp.MustCompile(`\s+`)
-
-	f.Fuzz(func(t *testing.T, urlPath string) {
-		urlPath = noWhitespace.ReplaceAllString(strings.TrimSpace(urlPath), "%20")
-		baseURLs := []string{"https://foo.zone", "http://foo.zone", "ftp://foo.zone"}
-		for _, baseURL := range baseURLs {
-			fullURL := fmt.Sprintf("%s%s", baseURL, urlPath)
-			text := fmt.Sprintf("Hello world %s Hello World", fullURL)
-			found := extractURLs(text)
-			if len(found) != 1 {
-				t.Errorf("expected 1 URL '%s', but got %d for text '%s'",
-					fullURL, len(found), text)
-			}
-			if found[0] != fullURL {
-				t.Errorf("expected URL '%s', but got '%s' for text '%s'",
-					fullURL, found[0], text)
-			}
-		}
-	})
-}
author	Paul Buetow <paul@buetow.org>	2024-10-24 10:25:00 +0300
committer	Paul Buetow <paul@buetow.org>	2024-10-24 10:25:00 +0300
commit	9a68ca0461bdf6d093213a218fbf255678018a16 (patch)
tree	fb955dbeffd8d028c50652131ec9090982d9338b /internal
parent	dde64be2724fd4e407315627943dd8dd37e5adf0 (diff)