Browse Source

HUGE FIX: partially fixed abnormal memory consumption via actually closing http connections; Side-effect: no more what I thought was "bandwidth-throttling"

master v0.2.2
parent
commit
793c2b2a70
  1. 2
      Makefile
  2. 15
      src/main.go
  3. 15
      src/web/requests.go
  4. 3
      src/worker/worker.go

2
Makefile

@ -20,7 +20,7 @@ WINDIR64:=$(WINDIR)_x64
DARWINDIR64:=$(DARWINDIR)_x64
all: clean
all:
cd $(SRCDIR) && go build && mv $(EXE) ..
test: all

15
src/main.go

@ -24,6 +24,8 @@ import (
"fmt"
"io"
"log"
"net/http"
_ "net/http/pprof"
"net/url"
"os"
"os/signal"
@ -37,7 +39,7 @@ import (
"unbewohnte/wecr/worker"
)
const version = "v0.2.1"
const version = "v0.2.2"
const (
defaultConfigFile string = "conf.json"
@ -80,9 +82,8 @@ func init() {
// set log output
logger.SetOutput(os.Stdout)
// and work around random log prints by /x/net library
// make default http logger silent
log.SetOutput(io.Discard)
log.SetFlags(0)
// parse and process flags
flag.Parse()
@ -137,6 +138,10 @@ func init() {
// global path to output file
outputFilePath = filepath.Join(workingDirectory, *outputFile)
go func() {
http.ListenAndServe(":8000", nil)
}()
}
func main() {
@ -321,8 +326,8 @@ func main() {
defer outputFile.Close()
// prepare channels
jobs := make(chan web.Job, conf.Workers*5)
results := make(chan web.Result, conf.Workers*5)
jobs := make(chan web.Job, conf.Workers)
results := make(chan web.Result, conf.Workers)
// create initial jobs
for _, initialPage := range conf.InitialPages {

15
src/web/requests.go

@ -27,10 +27,8 @@ import (
// Get page data coming from url with optional user agent and timeout
func GetPage(url string, userAgent string, timeOutMs uint64) ([]byte, error) {
// client := &http.Client{}
// client.CheckRedirect = http.DefaultClient.CheckRedirect
// client.Transport = http.DefaultClient.Transport
// client.Timeout = time.Duration(timeOutMs)
http.DefaultClient.CloseIdleConnections()
http.DefaultClient.Timeout = time.Duration(timeOutMs * uint64(time.Millisecond))
req, err := http.NewRequest("GET", url, nil)
if err != nil {
@ -38,19 +36,18 @@ func GetPage(url string, userAgent string, timeOutMs uint64) ([]byte, error) {
}
req.Header.Set("User-Agent", userAgent)
// response, err := client.Do(req)
response, err := http.DefaultClient.Do(req)
if err != nil {
return nil, err
}
defer response.Body.Close()
responseBody, err := io.ReadAll(response.Body)
pageData, err := io.ReadAll(response.Body)
if err != nil {
return nil, err
}
return responseBody, nil
return pageData, nil
}
// Fetch file from url and save to file at filePath
@ -65,11 +62,13 @@ func FetchFile(url string, userAgent string, timeOutMs uint64, filePath string)
return err
}
req.Header.Set("User-Agent", userAgent)
req.Close = true
response, err := client.Do(req)
if err != nil {
return nil
}
response.Close = true
defer response.Body.Close()
file, err := os.Create(filePath)
@ -80,5 +79,7 @@ func FetchFile(url string, userAgent string, timeOutMs uint64, filePath string)
_, _ = io.Copy(file, response.Body)
client.CloseIdleConnections()
return nil
}

3
src/worker/worker.go

@ -222,6 +222,7 @@ func (w *Worker) Work() {
}
}
}
pageLinks = nil
}()
// process and output result
@ -336,6 +337,8 @@ func (w *Worker) Work() {
if savePage {
w.savePage(pageURL, pageData)
}
pageData = nil
pageURL = nil
// sleep before the next request
time.Sleep(time.Duration(w.Conf.Requests.RequestPauseMs * uint64(time.Millisecond)))

Loading…
Cancel
Save