Browse Source

HUGE FIX: partially fixed abnormal memory consumption via actually closing http connections; Side-effect: no more what I thought was "bandwidth-throttling"

master v0.2.2
parent
commit
793c2b2a70
  1. 2
      Makefile
  2. 15
      src/main.go
  3. 15
      src/web/requests.go
  4. 3
      src/worker/worker.go

2
Makefile

@ -20,7 +20,7 @@ WINDIR64:=$(WINDIR)_x64
DARWINDIR64:=$(DARWINDIR)_x64 DARWINDIR64:=$(DARWINDIR)_x64
all: clean all:
cd $(SRCDIR) && go build && mv $(EXE) .. cd $(SRCDIR) && go build && mv $(EXE) ..
test: all test: all

15
src/main.go

@ -24,6 +24,8 @@ import (
"fmt" "fmt"
"io" "io"
"log" "log"
"net/http"
_ "net/http/pprof"
"net/url" "net/url"
"os" "os"
"os/signal" "os/signal"
@ -37,7 +39,7 @@ import (
"unbewohnte/wecr/worker" "unbewohnte/wecr/worker"
) )
const version = "v0.2.1" const version = "v0.2.2"
const ( const (
defaultConfigFile string = "conf.json" defaultConfigFile string = "conf.json"
@ -80,9 +82,8 @@ func init() {
// set log output // set log output
logger.SetOutput(os.Stdout) logger.SetOutput(os.Stdout)
// and work around random log prints by /x/net library // make default http logger silent
log.SetOutput(io.Discard) log.SetOutput(io.Discard)
log.SetFlags(0)
// parse and process flags // parse and process flags
flag.Parse() flag.Parse()
@ -137,6 +138,10 @@ func init() {
// global path to output file // global path to output file
outputFilePath = filepath.Join(workingDirectory, *outputFile) outputFilePath = filepath.Join(workingDirectory, *outputFile)
go func() {
http.ListenAndServe(":8000", nil)
}()
} }
func main() { func main() {
@ -321,8 +326,8 @@ func main() {
defer outputFile.Close() defer outputFile.Close()
// prepare channels // prepare channels
jobs := make(chan web.Job, conf.Workers*5) jobs := make(chan web.Job, conf.Workers)
results := make(chan web.Result, conf.Workers*5) results := make(chan web.Result, conf.Workers)
// create initial jobs // create initial jobs
for _, initialPage := range conf.InitialPages { for _, initialPage := range conf.InitialPages {

15
src/web/requests.go

@ -27,10 +27,8 @@ import (
// Get page data coming from url with optional user agent and timeout // Get page data coming from url with optional user agent and timeout
func GetPage(url string, userAgent string, timeOutMs uint64) ([]byte, error) { func GetPage(url string, userAgent string, timeOutMs uint64) ([]byte, error) {
// client := &http.Client{} http.DefaultClient.CloseIdleConnections()
// client.CheckRedirect = http.DefaultClient.CheckRedirect http.DefaultClient.Timeout = time.Duration(timeOutMs * uint64(time.Millisecond))
// client.Transport = http.DefaultClient.Transport
// client.Timeout = time.Duration(timeOutMs)
req, err := http.NewRequest("GET", url, nil) req, err := http.NewRequest("GET", url, nil)
if err != nil { if err != nil {
@ -38,19 +36,18 @@ func GetPage(url string, userAgent string, timeOutMs uint64) ([]byte, error) {
} }
req.Header.Set("User-Agent", userAgent) req.Header.Set("User-Agent", userAgent)
// response, err := client.Do(req)
response, err := http.DefaultClient.Do(req) response, err := http.DefaultClient.Do(req)
if err != nil { if err != nil {
return nil, err return nil, err
} }
defer response.Body.Close() defer response.Body.Close()
responseBody, err := io.ReadAll(response.Body) pageData, err := io.ReadAll(response.Body)
if err != nil { if err != nil {
return nil, err return nil, err
} }
return responseBody, nil return pageData, nil
} }
// Fetch file from url and save to file at filePath // Fetch file from url and save to file at filePath
@ -65,11 +62,13 @@ func FetchFile(url string, userAgent string, timeOutMs uint64, filePath string)
return err return err
} }
req.Header.Set("User-Agent", userAgent) req.Header.Set("User-Agent", userAgent)
req.Close = true
response, err := client.Do(req) response, err := client.Do(req)
if err != nil { if err != nil {
return nil return nil
} }
response.Close = true
defer response.Body.Close() defer response.Body.Close()
file, err := os.Create(filePath) file, err := os.Create(filePath)
@ -80,5 +79,7 @@ func FetchFile(url string, userAgent string, timeOutMs uint64, filePath string)
_, _ = io.Copy(file, response.Body) _, _ = io.Copy(file, response.Body)
client.CloseIdleConnections()
return nil return nil
} }

3
src/worker/worker.go

@ -222,6 +222,7 @@ func (w *Worker) Work() {
} }
} }
} }
pageLinks = nil
}() }()
// process and output result // process and output result
@ -336,6 +337,8 @@ func (w *Worker) Work() {
if savePage { if savePage {
w.savePage(pageURL, pageData) w.savePage(pageURL, pageData)
} }
pageData = nil
pageURL = nil
// sleep before the next request // sleep before the next request
time.Sleep(time.Duration(w.Conf.Requests.RequestPauseMs * uint64(time.Millisecond))) time.Sleep(time.Duration(w.Conf.Requests.RequestPauseMs * uint64(time.Millisecond)))

Loading…
Cancel
Save