diff --git a/src/web/images.go b/src/web/images.go index 88f2e8d..850f1cb 100644 --- a/src/web/images.go +++ b/src/web/images.go @@ -20,9 +20,8 @@ package web import ( "bytes" - "net/url" + "fmt" "strings" - "unbewohnte/wecr/logger" "golang.org/x/net/html" ) @@ -65,27 +64,34 @@ func FindPageImages(pageBody []byte, hostname string) []string { continue } - // recheck for _, attribute := range token.Attr { if attribute.Key != "src" && attribute.Key != "href" { continue } - var imageURLString string - - imageURL, err := url.Parse(attribute.Val) - if err != nil { - logger.Error("Failed to parse URL %s: %s", attribute.Val, err) - continue + var imageURL string = attribute.Val + + if !strings.Contains(imageURL, hostname) { + // add hostname + if strings.HasPrefix(imageURL, "/") && strings.HasSuffix(hostname, "/") { + imageURL = fmt.Sprintf("%s%s", hostname, imageURL[1:]) + } else if !strings.HasPrefix(imageURL, "/") && !strings.HasSuffix(hostname, "/") { + imageURL = fmt.Sprintf("%s/%s", hostname, imageURL) + } else { + imageURL = fmt.Sprintf("%s%s", hostname, imageURL) + } } - imageURLString = imageURL.String() - if strings.HasPrefix(imageURLString, "//") { - imageURLString = "http:" + imageURLString + imageURL = strings.TrimPrefix(imageURL, "//") + + if !strings.HasPrefix(imageURL, "http://") && !strings.HasPrefix(imageURL, "https://") { + // add scheme + imageURL = "http://" + imageURL } - if hasImageExtention(imageURLString) { - urls = append(urls, imageURLString) + // check for image extention + if hasImageExtention(imageURL) { + urls = append(urls, imageURL) } } } diff --git a/src/worker/worker.go b/src/worker/worker.go index 0aa5180..e7cf3e8 100644 --- a/src/worker/worker.go +++ b/src/worker/worker.go @@ -81,7 +81,7 @@ func (w *Worker) outputImages(baseURL *url.URL, imageLinks []string) { response, err := http.Get(imageLink) if err != nil { - logger.Error("Failed to get %s", imageLink) + logger.Error("Failed to get image %s", imageLink) continue }