Browse Source

Better handling of image urls

master
parent
commit
2d5cc84184
  1. 34
      src/web/images.go
  2. 2
      src/worker/worker.go

34
src/web/images.go

@ -20,9 +20,8 @@ package web
import ( import (
"bytes" "bytes"
"net/url" "fmt"
"strings" "strings"
"unbewohnte/wecr/logger"
"golang.org/x/net/html" "golang.org/x/net/html"
) )
@ -65,27 +64,34 @@ func FindPageImages(pageBody []byte, hostname string) []string {
continue continue
} }
// recheck
for _, attribute := range token.Attr { for _, attribute := range token.Attr {
if attribute.Key != "src" && attribute.Key != "href" { if attribute.Key != "src" && attribute.Key != "href" {
continue continue
} }
var imageURLString string var imageURL string = attribute.Val
imageURL, err := url.Parse(attribute.Val) if !strings.Contains(imageURL, hostname) {
if err != nil { // add hostname
logger.Error("Failed to parse URL %s: %s", attribute.Val, err) if strings.HasPrefix(imageURL, "/") && strings.HasSuffix(hostname, "/") {
continue imageURL = fmt.Sprintf("%s%s", hostname, imageURL[1:])
} else if !strings.HasPrefix(imageURL, "/") && !strings.HasSuffix(hostname, "/") {
imageURL = fmt.Sprintf("%s/%s", hostname, imageURL)
} else {
imageURL = fmt.Sprintf("%s%s", hostname, imageURL)
}
} }
imageURLString = imageURL.String()
if strings.HasPrefix(imageURLString, "//") { imageURL = strings.TrimPrefix(imageURL, "//")
imageURLString = "http:" + imageURLString
if !strings.HasPrefix(imageURL, "http://") && !strings.HasPrefix(imageURL, "https://") {
// add scheme
imageURL = "http://" + imageURL
} }
if hasImageExtention(imageURLString) { // check for image extention
urls = append(urls, imageURLString) if hasImageExtention(imageURL) {
urls = append(urls, imageURL)
} }
} }
} }

2
src/worker/worker.go

@ -81,7 +81,7 @@ func (w *Worker) outputImages(baseURL *url.URL, imageLinks []string) {
response, err := http.Get(imageLink) response, err := http.Get(imageLink)
if err != nil { if err != nil {
logger.Error("Failed to get %s", imageLink) logger.Error("Failed to get image %s", imageLink)
continue continue
} }

Loading…
Cancel
Save