diff options
| author | Paul Buetow <paul@buetow.org> | 2024-10-24 10:25:00 +0300 |
|---|---|---|
| committer | Paul Buetow <paul@buetow.org> | 2024-10-24 10:25:00 +0300 |
| commit | 9a68ca0461bdf6d093213a218fbf255678018a16 (patch) | |
| tree | fb955dbeffd8d028c50652131ec9090982d9338b /internal/platforms | |
| parent | dde64be2724fd4e407315627943dd8dd37e5adf0 (diff) | |
move extract URLs to types.Entry
Diffstat (limited to 'internal/platforms')
| -rw-r--r-- | internal/platforms/linkedin/escapes.go | 15 | ||||
| -rw-r--r-- | internal/platforms/linkedin/escapes_test.go | 77 |
2 files changed, 0 insertions, 92 deletions
diff --git a/internal/platforms/linkedin/escapes.go b/internal/platforms/linkedin/escapes.go index 31110a0..5f803a5 100644 --- a/internal/platforms/linkedin/escapes.go +++ b/internal/platforms/linkedin/escapes.go @@ -1,7 +1,6 @@ package linkedin import ( - "regexp" "strings" ) @@ -38,17 +37,3 @@ func escapeLinkedInText(input string) string { return builder.String() } - -// extractURLs finds all occurrences of URLs starting with "http://" or "https://" in a given string. -func extractURLs(input string) []string { - // Regular expression pattern to match URLs starting with http:// or https:// - urlPattern := `(http://|https://|ftp://)[^\s]+` - - // Compile the regular expression - re := regexp.MustCompile(urlPattern) - - // Find all matches in the input string - urls := re.FindAllString(input, -1) - - return urls -} diff --git a/internal/platforms/linkedin/escapes_test.go b/internal/platforms/linkedin/escapes_test.go index f93b260..b1f9203 100644 --- a/internal/platforms/linkedin/escapes_test.go +++ b/internal/platforms/linkedin/escapes_test.go @@ -1,10 +1,6 @@ package linkedin import ( - "fmt" - "regexp" - "slices" - "strings" "testing" ) @@ -17,76 +13,3 @@ func TestLinkedInEscapes(t *testing.T) { t.Errorf("expected '%s' but got '%s'", expected, escaped) } } - -func TestLinkedInTwoURLsExtract(t *testing.T) { - text := `Hello world https://foo.zone - Hello universe http://world.universe test 123` - - urls := extractURLs(text) - if len(urls) != 2 { - t.Errorf("expected 2 URLs, but got %d", len(urls)) - } - - if !slices.Contains(urls, "https://foo.zone") { - t.Errorf("expected 'https://foo.zone' in the URL list, but got %v", urls) - } - if !slices.Contains(urls, "http://world.universe") { - t.Errorf("expected 'http://world.universe' in the URL list, but got %v", urls) - } -} - -func TestLinkedInURLExtract(t *testing.T) { - urls := []string{ - "http://foo.zone", - "http://foo.zone/", - "http://foo.zone?foo=bar", - "http://foo.zone/?foo=bar", - "http://foo.zone/?foo=bar", - "http://foo.zone/hurs?foo=bar", - "http://foo.zone?foo=bar&baz=bay", - } - - for _, url := range urls { - text := fmt.Sprintf("Hello world %s Hello World", url) - found := extractURLs(text) - if len(found) != 1 { - t.Errorf("expected 1 URL, but got %d for text '%s'", len(found), text) - } - if found[0] != url { - t.Errorf("expected URL '%s', but got '%s' for text '%s'", url, found[0], text) - } - } -} - -func FuzzLinkedInURLExtract(f *testing.F) { - f.Add("/path?myjfa=lwsr4imj&dgqeg=m3uwwsak") - f.Add("/?amfbm=bwzqu46m&xheuh=nv588d98") - f.Add("?tuupm=reng2p1y&cbjot=0g5qvpty") - f.Add("/path?qmcok=f%20w4tfp7g&awsnq=sjizuore&owdix=8s2dmqsv") - f.Add("?zwilf=868o24x1&fiwmp=1d5aqbvo&irhhr=xar7qbq7&eetpy=scmi9s8i") - f.Add("/path?mwhbm=psinstn6&nsjic=pfu0wnk9&lbmrz=5bixkhdt") - f.Add("/path?owbwo=67mkjiz2") - f.Add("/path?ohvxi=esy5qvml&zlvzt=2yi4q4ef&cnich=sgc8sahs") - f.Add("/path?codsl=fpwfto6j") - f.Add("tvdus=fhlhlh1y") - f.Add("/foo.txt") - noWhitespace := regexp.MustCompile(`\s+`) - - f.Fuzz(func(t *testing.T, urlPath string) { - urlPath = noWhitespace.ReplaceAllString(strings.TrimSpace(urlPath), "%20") - baseURLs := []string{"https://foo.zone", "http://foo.zone", "ftp://foo.zone"} - for _, baseURL := range baseURLs { - fullURL := fmt.Sprintf("%s%s", baseURL, urlPath) - text := fmt.Sprintf("Hello world %s Hello World", fullURL) - found := extractURLs(text) - if len(found) != 1 { - t.Errorf("expected 1 URL '%s', but got %d for text '%s'", - fullURL, len(found), text) - } - if found[0] != fullURL { - t.Errorf("expected URL '%s', but got '%s' for text '%s'", - fullURL, found[0], text) - } - } - }) -} |
