Browse Source

Better handling of image urls

master
parent
commit
2d5cc84184
  1. 34
      src/web/images.go
  2. 2
      src/worker/worker.go

34
src/web/images.go

@ -20,9 +20,8 @@ package web
import (
"bytes"
"net/url"
"fmt"
"strings"
"unbewohnte/wecr/logger"
"golang.org/x/net/html"
)
@ -65,27 +64,34 @@ func FindPageImages(pageBody []byte, hostname string) []string {
continue
}
// recheck
for _, attribute := range token.Attr {
if attribute.Key != "src" && attribute.Key != "href" {
continue
}
var imageURLString string
imageURL, err := url.Parse(attribute.Val)
if err != nil {
logger.Error("Failed to parse URL %s: %s", attribute.Val, err)
continue
var imageURL string = attribute.Val
if !strings.Contains(imageURL, hostname) {
// add hostname
if strings.HasPrefix(imageURL, "/") && strings.HasSuffix(hostname, "/") {
imageURL = fmt.Sprintf("%s%s", hostname, imageURL[1:])
} else if !strings.HasPrefix(imageURL, "/") && !strings.HasSuffix(hostname, "/") {
imageURL = fmt.Sprintf("%s/%s", hostname, imageURL)
} else {
imageURL = fmt.Sprintf("%s%s", hostname, imageURL)
}
}
imageURLString = imageURL.String()
if strings.HasPrefix(imageURLString, "//") {
imageURLString = "http:" + imageURLString
imageURL = strings.TrimPrefix(imageURL, "//")
if !strings.HasPrefix(imageURL, "http://") && !strings.HasPrefix(imageURL, "https://") {
// add scheme
imageURL = "http://" + imageURL
}
if hasImageExtention(imageURLString) {
urls = append(urls, imageURLString)
// check for image extention
if hasImageExtention(imageURL) {
urls = append(urls, imageURL)
}
}
}

2
src/worker/worker.go

@ -81,7 +81,7 @@ func (w *Worker) outputImages(baseURL *url.URL, imageLinks []string) {
response, err := http.Get(imageLink)
if err != nil {
logger.Error("Failed to get %s", imageLink)
logger.Error("Failed to get image %s", imageLink)
continue
}

Loading…
Cancel
Save