summaryrefslogtreecommitdiff
path: root/internal/platforms
diff options
context:
space:
mode:
authorPaul Buetow <paul@buetow.org>2024-11-03 12:39:22 +0200
committerPaul Buetow <paul@buetow.org>2024-11-03 12:39:22 +0200
commit43b812e037a43a3295873971181f9cd004bea2c2 (patch)
tree1aefc094f7a4ae2b9f1480d70c0f691ab32308eb /internal/platforms
parent355794f0b55ac7e6ea81efbefa438fd176ea0de6 (diff)
can also fetch first image for the preview
Diffstat (limited to 'internal/platforms')
-rw-r--r--internal/platforms/linkedin/preview.go114
-rw-r--r--internal/platforms/linkedin/preview_test.go26
2 files changed, 110 insertions, 30 deletions
diff --git a/internal/platforms/linkedin/preview.go b/internal/platforms/linkedin/preview.go
index 1c0e5a7..03bf7e6 100644
--- a/internal/platforms/linkedin/preview.go
+++ b/internal/platforms/linkedin/preview.go
@@ -6,26 +6,30 @@ import (
"fmt"
"log"
"net/http"
+ "net/url"
"golang.org/x/net/html"
)
-var errNoTitleElementFound = errors.New("no title element found")
+var (
+ errNoTitleElementFound = errors.New("no title element found")
+ errNoImageElementFound = errors.New("no image element found")
+)
type preview struct {
- title, url string
+ title, imageURL, url string
}
func NewPreview(ctx context.Context, urls []string) (preview, error) {
if len(urls) == 0 {
return preview{}, nil
}
- title, err := fetchHTMLTitle(ctx, urls[0])
+ title, imageURL, err := fetchHTMLTitleAndFirstImage(ctx, urls[0])
if errors.Is(err, errNoTitleElementFound) || (err == nil && title == "") {
log.Println("Setting title to", urls[0])
title = urls[0]
}
- return preview{title: title, url: urls[0]}, err
+ return preview{title: title, imageURL: imageURL, url: urls[0]}, err
}
func (p preview) String() string {
@@ -36,47 +40,97 @@ func (p preview) Empty() bool {
return p.url == ""
}
-func fetchHTMLTitle(ctx context.Context, url string) (string, error) {
- req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
+func findTitle(n *html.Node) (string, error) {
+ var title string
+ var traverse func(*html.Node)
+ traverse = func(n *html.Node) {
+ if n.Type == html.ElementNode && n.Data == "title" {
+ if n.FirstChild != nil {
+ title = n.FirstChild.Data
+ }
+ return
+ }
+ for c := n.FirstChild; c != nil; c = c.NextSibling {
+ traverse(c)
+ }
+ }
+ traverse(n)
+ if title == "" {
+ return "", errNoTitleElementFound
+ }
+ return title, nil
+}
+
+func findFirstImage(n *html.Node) (string, error) {
+ var imageURL string
+ var traverse func(*html.Node) bool
+ traverse = func(n *html.Node) bool {
+ if n.Type == html.ElementNode && n.Data == "img" {
+ for _, attr := range n.Attr {
+ if attr.Key == "src" {
+ imageURL = attr.Val
+ return true // Stop searching when the first image's URL is found
+ }
+ }
+ }
+ for c := n.FirstChild; c != nil; c = c.NextSibling {
+ if traverse(c) {
+ return true
+ }
+ }
+ return false
+ }
+ if !traverse(n) {
+ return "", errNoImageElementFound
+ }
+ return imageURL, nil
+}
+
+func resolveURL(baseURL, rawURL string) (string, error) {
+ base, err := url.Parse(baseURL)
+ if err != nil {
+ return "", fmt.Errorf("failed to parse base URL: %w", err)
+ }
+ u, err := url.Parse(rawURL)
if err != nil {
- return "", fmt.Errorf("failed to create request: %w", err)
+ return "", fmt.Errorf("failed to parse raw URL: %w", err)
}
+ return base.ResolveReference(u).String(), nil
+}
+func fetchHTMLTitleAndFirstImage(ctx context.Context, url string) (string, string, error) {
+ req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
+ if err != nil {
+ return "", "", fmt.Errorf("failed to create request: %w", err)
+ }
resp, err := http.DefaultClient.Do(req)
if err != nil {
- return "", fmt.Errorf("failed to get URL: %w", err)
+ return "", "", fmt.Errorf("failed to get URL: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
- return "", fmt.Errorf("failed to get a successful response: %v", resp.StatusCode)
+ return "", "", fmt.Errorf("failed to get a successful response: %v", resp.StatusCode)
}
-
doc, err := html.Parse(resp.Body)
if err != nil {
- return "", fmt.Errorf("failed to parse HTML: %w", err)
+ return "", "", fmt.Errorf("failed to parse HTML: %w", err)
}
- // Traverse the document and find the <title> tag
- var title string
- var f func(*html.Node)
- f = func(n *html.Node) {
- if n.Type == html.ElementNode && n.Data == "title" {
- if n.FirstChild != nil {
- title = n.FirstChild.Data
- }
- return
- }
- for c := n.FirstChild; c != nil; c = c.NextSibling {
- f(c)
- }
+ var errs error
+ title, err := findTitle(doc)
+ if err != nil {
+ errs = errors.Join(errs, err)
}
- // Call the function to search for the title
- f(doc)
-
- if title == "" {
- return "", errNoTitleElementFound
+ imageURL, err := findFirstImage(doc)
+ if err != nil {
+ errs = errors.Join(errs, err)
+ } else if imageURL != "" {
+ if imageURL, err = resolveURL(url, imageURL); err != nil {
+ errs = errors.Join(errs, err)
+ }
}
- return title, nil
+
+ return title, imageURL, errs
}
diff --git a/internal/platforms/linkedin/preview_test.go b/internal/platforms/linkedin/preview_test.go
new file mode 100644
index 0000000..ea47d08
--- /dev/null
+++ b/internal/platforms/linkedin/preview_test.go
@@ -0,0 +1,26 @@
+package linkedin
+
+import (
+ "context"
+ "testing"
+)
+
+// TODO: Mock the http request, and serve a local HTML page!
+func TestFetchHTMLTitleAndFirstImage(t *testing.T) {
+ var (
+ url = "https://foo.zone/about/"
+ expectedTitle = "About"
+ expectedImageURL = "https://foo.zone/about/paul.jpg"
+ )
+
+ title, imageURL, err := fetchHTMLTitleAndFirstImage(context.Background(), url)
+ if err != nil {
+ t.Error(err)
+ }
+ if title != expectedTitle {
+ t.Errorf("expected title '%s' but got '%s'", expectedTitle, title)
+ }
+ if imageURL != expectedImageURL {
+ t.Errorf("expected imageURL '%s' but got '%s'", expectedImageURL, imageURL)
+ }
+}