From 793c2b2a704b6038c70cd5d005ef184424ebcf78 Mon Sep 17 00:00:00 2001 From: Unbewohnte Date: Mon, 23 Jan 2023 16:55:23 +0300 Subject: [PATCH] HUGE FIX: partially fixed abnormal memory consumption via actually closing http connections; Side-effect: no more what I thought was "bandwidth-throttling" --- Makefile | 2 +- src/main.go | 15 ++++++++++----- src/web/requests.go | 15 ++++++++------- src/worker/worker.go | 3 +++ 4 files changed, 22 insertions(+), 13 deletions(-) diff --git a/Makefile b/Makefile index 528ea23..553ec79 100644 --- a/Makefile +++ b/Makefile @@ -20,7 +20,7 @@ WINDIR64:=$(WINDIR)_x64 DARWINDIR64:=$(DARWINDIR)_x64 -all: clean +all: cd $(SRCDIR) && go build && mv $(EXE) .. test: all diff --git a/src/main.go b/src/main.go index 204ac5c..212b956 100644 --- a/src/main.go +++ b/src/main.go @@ -24,6 +24,8 @@ import ( "fmt" "io" "log" + "net/http" + _ "net/http/pprof" "net/url" "os" "os/signal" @@ -37,7 +39,7 @@ import ( "unbewohnte/wecr/worker" ) -const version = "v0.2.1" +const version = "v0.2.2" const ( defaultConfigFile string = "conf.json" @@ -80,9 +82,8 @@ func init() { // set log output logger.SetOutput(os.Stdout) - // and work around random log prints by /x/net library + // make default http logger silent log.SetOutput(io.Discard) - log.SetFlags(0) // parse and process flags flag.Parse() @@ -137,6 +138,10 @@ func init() { // global path to output file outputFilePath = filepath.Join(workingDirectory, *outputFile) + + go func() { + http.ListenAndServe(":8000", nil) + }() } func main() { @@ -321,8 +326,8 @@ func main() { defer outputFile.Close() // prepare channels - jobs := make(chan web.Job, conf.Workers*5) - results := make(chan web.Result, conf.Workers*5) + jobs := make(chan web.Job, conf.Workers) + results := make(chan web.Result, conf.Workers) // create initial jobs for _, initialPage := range conf.InitialPages { diff --git a/src/web/requests.go b/src/web/requests.go index abe6e66..90e6d83 100644 --- a/src/web/requests.go +++ b/src/web/requests.go @@ -27,10 +27,8 @@ import ( // Get page data coming from url with optional user agent and timeout func GetPage(url string, userAgent string, timeOutMs uint64) ([]byte, error) { - // client := &http.Client{} - // client.CheckRedirect = http.DefaultClient.CheckRedirect - // client.Transport = http.DefaultClient.Transport - // client.Timeout = time.Duration(timeOutMs) + http.DefaultClient.CloseIdleConnections() + http.DefaultClient.Timeout = time.Duration(timeOutMs * uint64(time.Millisecond)) req, err := http.NewRequest("GET", url, nil) if err != nil { @@ -38,19 +36,18 @@ func GetPage(url string, userAgent string, timeOutMs uint64) ([]byte, error) { } req.Header.Set("User-Agent", userAgent) - // response, err := client.Do(req) response, err := http.DefaultClient.Do(req) if err != nil { return nil, err } defer response.Body.Close() - responseBody, err := io.ReadAll(response.Body) + pageData, err := io.ReadAll(response.Body) if err != nil { return nil, err } - return responseBody, nil + return pageData, nil } // Fetch file from url and save to file at filePath @@ -65,11 +62,13 @@ func FetchFile(url string, userAgent string, timeOutMs uint64, filePath string) return err } req.Header.Set("User-Agent", userAgent) + req.Close = true response, err := client.Do(req) if err != nil { return nil } + response.Close = true defer response.Body.Close() file, err := os.Create(filePath) @@ -80,5 +79,7 @@ func FetchFile(url string, userAgent string, timeOutMs uint64, filePath string) _, _ = io.Copy(file, response.Body) + client.CloseIdleConnections() + return nil } diff --git a/src/worker/worker.go b/src/worker/worker.go index e93007a..40769e7 100644 --- a/src/worker/worker.go +++ b/src/worker/worker.go @@ -222,6 +222,7 @@ func (w *Worker) Work() { } } } + pageLinks = nil }() // process and output result @@ -336,6 +337,8 @@ func (w *Worker) Work() { if savePage { w.savePage(pageURL, pageData) } + pageData = nil + pageURL = nil // sleep before the next request time.Sleep(time.Duration(w.Conf.Requests.RequestPauseMs * uint64(time.Millisecond)))