summaryrefslogtreecommitdiff
path: root/internal/platforms
diff options
context:
space:
mode:
authorPaul Buetow <paul@buetow.org>2024-10-24 10:25:00 +0300
committerPaul Buetow <paul@buetow.org>2024-10-24 10:25:00 +0300
commit9a68ca0461bdf6d093213a218fbf255678018a16 (patch)
treefb955dbeffd8d028c50652131ec9090982d9338b /internal/platforms
parentdde64be2724fd4e407315627943dd8dd37e5adf0 (diff)
move extract URLs to types.Entry
Diffstat (limited to 'internal/platforms')
-rw-r--r--internal/platforms/linkedin/escapes.go15
-rw-r--r--internal/platforms/linkedin/escapes_test.go77
2 files changed, 0 insertions, 92 deletions
diff --git a/internal/platforms/linkedin/escapes.go b/internal/platforms/linkedin/escapes.go
index 31110a0..5f803a5 100644
--- a/internal/platforms/linkedin/escapes.go
+++ b/internal/platforms/linkedin/escapes.go
@@ -1,7 +1,6 @@
package linkedin
import (
- "regexp"
"strings"
)
@@ -38,17 +37,3 @@ func escapeLinkedInText(input string) string {
return builder.String()
}
-
-// extractURLs finds all occurrences of URLs starting with "http://" or "https://" in a given string.
-func extractURLs(input string) []string {
- // Regular expression pattern to match URLs starting with http:// or https://
- urlPattern := `(http://|https://|ftp://)[^\s]+`
-
- // Compile the regular expression
- re := regexp.MustCompile(urlPattern)
-
- // Find all matches in the input string
- urls := re.FindAllString(input, -1)
-
- return urls
-}
diff --git a/internal/platforms/linkedin/escapes_test.go b/internal/platforms/linkedin/escapes_test.go
index f93b260..b1f9203 100644
--- a/internal/platforms/linkedin/escapes_test.go
+++ b/internal/platforms/linkedin/escapes_test.go
@@ -1,10 +1,6 @@
package linkedin
import (
- "fmt"
- "regexp"
- "slices"
- "strings"
"testing"
)
@@ -17,76 +13,3 @@ func TestLinkedInEscapes(t *testing.T) {
t.Errorf("expected '%s' but got '%s'", expected, escaped)
}
}
-
-func TestLinkedInTwoURLsExtract(t *testing.T) {
- text := `Hello world https://foo.zone
- Hello universe http://world.universe test 123`
-
- urls := extractURLs(text)
- if len(urls) != 2 {
- t.Errorf("expected 2 URLs, but got %d", len(urls))
- }
-
- if !slices.Contains(urls, "https://foo.zone") {
- t.Errorf("expected 'https://foo.zone' in the URL list, but got %v", urls)
- }
- if !slices.Contains(urls, "http://world.universe") {
- t.Errorf("expected 'http://world.universe' in the URL list, but got %v", urls)
- }
-}
-
-func TestLinkedInURLExtract(t *testing.T) {
- urls := []string{
- "http://foo.zone",
- "http://foo.zone/",
- "http://foo.zone?foo=bar",
- "http://foo.zone/?foo=bar",
- "http://foo.zone/?foo=bar",
- "http://foo.zone/hurs?foo=bar",
- "http://foo.zone?foo=bar&baz=bay",
- }
-
- for _, url := range urls {
- text := fmt.Sprintf("Hello world %s Hello World", url)
- found := extractURLs(text)
- if len(found) != 1 {
- t.Errorf("expected 1 URL, but got %d for text '%s'", len(found), text)
- }
- if found[0] != url {
- t.Errorf("expected URL '%s', but got '%s' for text '%s'", url, found[0], text)
- }
- }
-}
-
-func FuzzLinkedInURLExtract(f *testing.F) {
- f.Add("/path?myjfa=lwsr4imj&dgqeg=m3uwwsak")
- f.Add("/?amfbm=bwzqu46m&xheuh=nv588d98")
- f.Add("?tuupm=reng2p1y&cbjot=0g5qvpty")
- f.Add("/path?qmcok=f%20w4tfp7g&awsnq=sjizuore&owdix=8s2dmqsv")
- f.Add("?zwilf=868o24x1&fiwmp=1d5aqbvo&irhhr=xar7qbq7&eetpy=scmi9s8i")
- f.Add("/path?mwhbm=psinstn6&nsjic=pfu0wnk9&lbmrz=5bixkhdt")
- f.Add("/path?owbwo=67mkjiz2")
- f.Add("/path?ohvxi=esy5qvml&zlvzt=2yi4q4ef&cnich=sgc8sahs")
- f.Add("/path?codsl=fpwfto6j")
- f.Add("tvdus=fhlhlh1y")
- f.Add("/foo.txt")
- noWhitespace := regexp.MustCompile(`\s+`)
-
- f.Fuzz(func(t *testing.T, urlPath string) {
- urlPath = noWhitespace.ReplaceAllString(strings.TrimSpace(urlPath), "%20")
- baseURLs := []string{"https://foo.zone", "http://foo.zone", "ftp://foo.zone"}
- for _, baseURL := range baseURLs {
- fullURL := fmt.Sprintf("%s%s", baseURL, urlPath)
- text := fmt.Sprintf("Hello world %s Hello World", fullURL)
- found := extractURLs(text)
- if len(found) != 1 {
- t.Errorf("expected 1 URL '%s', but got %d for text '%s'",
- fullURL, len(found), text)
- }
- if found[0] != fullURL {
- t.Errorf("expected URL '%s', but got '%s' for text '%s'",
- fullURL, found[0], text)
- }
- }
- })
-}