|
|
@ -156,17 +156,6 @@ func main() { |
|
|
|
} |
|
|
|
} |
|
|
|
logger.Info("Successfully opened configuration file") |
|
|
|
logger.Info("Successfully opened configuration file") |
|
|
|
|
|
|
|
|
|
|
|
// Prepare global statistics variable
|
|
|
|
|
|
|
|
statistics := worker.Statistics{} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// open dashboard if needed
|
|
|
|
|
|
|
|
var board *dashboard.Dashboard = nil |
|
|
|
|
|
|
|
if conf.Dashboard.UseDashboard { |
|
|
|
|
|
|
|
board = dashboard.NewDashboard(conf.Dashboard.Port, conf, &statistics) |
|
|
|
|
|
|
|
go board.Launch() |
|
|
|
|
|
|
|
logger.Info("Launched dashboard at http://localhost:%d", conf.Dashboard.Port) |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// sanitize and correct inputs
|
|
|
|
// sanitize and correct inputs
|
|
|
|
if len(conf.InitialPages) == 0 { |
|
|
|
if len(conf.InitialPages) == 0 { |
|
|
|
logger.Error("No initial page URLs have been set") |
|
|
|
logger.Error("No initial page URLs have been set") |
|
|
@ -344,9 +333,6 @@ func main() { |
|
|
|
logger.SetOutput(nil) |
|
|
|
logger.SetOutput(nil) |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
jobs := make(chan web.Job, conf.Workers*5) |
|
|
|
|
|
|
|
results := make(chan web.Result, conf.Workers*5) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// create visit queue file if not turned off
|
|
|
|
// create visit queue file if not turned off
|
|
|
|
var visitQueueFile *os.File = nil |
|
|
|
var visitQueueFile *os.File = nil |
|
|
|
if !conf.InMemoryVisitQueue { |
|
|
|
if !conf.InMemoryVisitQueue { |
|
|
@ -363,6 +349,7 @@ func main() { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// create initial jobs
|
|
|
|
// create initial jobs
|
|
|
|
|
|
|
|
initialJobs := make(chan web.Job, conf.Workers*5) |
|
|
|
if !conf.InMemoryVisitQueue { |
|
|
|
if !conf.InMemoryVisitQueue { |
|
|
|
for _, initialPage := range conf.InitialPages { |
|
|
|
for _, initialPage := range conf.InitialPages { |
|
|
|
var newJob web.Job = web.Job{ |
|
|
|
var newJob web.Job = web.Job{ |
|
|
@ -379,7 +366,7 @@ func main() { |
|
|
|
visitQueueFile.Seek(0, io.SeekStart) |
|
|
|
visitQueueFile.Seek(0, io.SeekStart) |
|
|
|
} else { |
|
|
|
} else { |
|
|
|
for _, initialPage := range conf.InitialPages { |
|
|
|
for _, initialPage := range conf.InitialPages { |
|
|
|
jobs <- web.Job{ |
|
|
|
initialJobs <- web.Job{ |
|
|
|
URL: initialPage, |
|
|
|
URL: initialPage, |
|
|
|
Search: conf.Search, |
|
|
|
Search: conf.Search, |
|
|
|
Depth: conf.Depth, |
|
|
|
Depth: conf.Depth, |
|
|
@ -387,8 +374,11 @@ func main() { |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Prepare global statistics variable
|
|
|
|
|
|
|
|
statistics := worker.Statistics{} |
|
|
|
|
|
|
|
|
|
|
|
// form a worker pool
|
|
|
|
// form a worker pool
|
|
|
|
workerPool := worker.NewWorkerPool(jobs, results, conf.Workers, &worker.WorkerConf{ |
|
|
|
workerPool := worker.NewWorkerPool(initialJobs, conf.Workers, &worker.WorkerConf{ |
|
|
|
Search: &conf.Search, |
|
|
|
Search: &conf.Search, |
|
|
|
Requests: &conf.Requests, |
|
|
|
Requests: &conf.Requests, |
|
|
|
Save: &conf.Save, |
|
|
|
Save: &conf.Save, |
|
|
@ -403,6 +393,14 @@ func main() { |
|
|
|
}, &statistics) |
|
|
|
}, &statistics) |
|
|
|
logger.Info("Created a worker pool with %d workers", conf.Workers) |
|
|
|
logger.Info("Created a worker pool with %d workers", conf.Workers) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// open dashboard if needed
|
|
|
|
|
|
|
|
var board *dashboard.Dashboard = nil |
|
|
|
|
|
|
|
if conf.Dashboard.UseDashboard { |
|
|
|
|
|
|
|
board = dashboard.NewDashboard(conf.Dashboard.Port, conf, workerPool) |
|
|
|
|
|
|
|
go board.Launch() |
|
|
|
|
|
|
|
logger.Info("Launched dashboard at http://localhost:%d", conf.Dashboard.Port) |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// launch concurrent scraping !
|
|
|
|
// launch concurrent scraping !
|
|
|
|
workerPool.Work() |
|
|
|
workerPool.Work() |
|
|
|
logger.Info("Started scraping...") |
|
|
|
logger.Info("Started scraping...") |
|
|
@ -436,7 +434,4 @@ func main() { |
|
|
|
|
|
|
|
|
|
|
|
// stop workers
|
|
|
|
// stop workers
|
|
|
|
workerPool.Stop() |
|
|
|
workerPool.Stop() |
|
|
|
|
|
|
|
|
|
|
|
// close results channel
|
|
|
|
|
|
|
|
close(results) |
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|