Browse Source

Web dashboard foundation

master
parent
commit
6fab9031b1
  1. 30
      src/config/config.go
  2. 75
      src/dashboard/dashboard.go
  3. 104
      src/dashboard/res/index.html
  4. 11783
      src/dashboard/res/static/bootstrap.css
  5. 1
      src/dashboard/res/static/bootstrap.css.map
  6. 0
      src/dashboard/res/static/stylesheet.css
  7. 72
      src/main.go
  8. 25
      src/worker/pool.go
  9. 4
      src/worker/worker.go

30
src/config/config.go

@ -64,18 +64,24 @@ type Logging struct {
LogsFile string `json:"logs_file"`
}
type WebDashboard struct {
UseDashboard bool `json:"launch_dashboard"`
Port uint16 `json:"port"`
}
// Configuration file structure
type Conf struct {
Search Search `json:"search"`
Requests Requests `json:"requests"`
Depth uint `json:"depth"`
Workers uint `json:"workers"`
InitialPages []string `json:"initial_pages"`
AllowedDomains []string `json:"allowed_domains"`
BlacklistedDomains []string `json:"blacklisted_domains"`
InMemoryVisitQueue bool `json:"in_memory_visit_queue"`
Save Save `json:"save"`
Logging Logging `json:"logging"`
Search Search `json:"search"`
Requests Requests `json:"requests"`
Depth uint `json:"depth"`
Workers uint `json:"workers"`
InitialPages []string `json:"initial_pages"`
AllowedDomains []string `json:"allowed_domains"`
BlacklistedDomains []string `json:"blacklisted_domains"`
InMemoryVisitQueue bool `json:"in_memory_visit_queue"`
Dashboard WebDashboard `json:"web_dashboard"`
Save Save `json:"save"`
Logging Logging `json:"logging"`
}
// Default configuration file structure
@ -102,6 +108,10 @@ func Default() *Conf {
AllowedDomains: []string{""},
BlacklistedDomains: []string{""},
InMemoryVisitQueue: false,
Dashboard: WebDashboard{
UseDashboard: true,
Port: 13370,
},
Logging: Logging{
OutputLogs: true,
LogsFile: "logs.log",

75
src/dashboard/dashboard.go

@ -0,0 +1,75 @@
package dashboard
import (
"embed"
"encoding/json"
"fmt"
"html/template"
"io/fs"
"net/http"
"unbewohnte/wecr/config"
"unbewohnte/wecr/worker"
)
type Dashboard struct {
Server *http.Server
}
//go:embed res
var resFS embed.FS
type PageData struct {
Conf config.Conf
Stats worker.Statistics
}
func NewDashboard(port uint16, webConf *config.Conf, statistics *worker.Statistics) *Dashboard {
mux := http.NewServeMux()
res, err := fs.Sub(resFS, "res")
if err != nil {
return nil
}
mux.Handle("/static/", http.FileServer(http.FS(res)))
mux.HandleFunc("/", func(w http.ResponseWriter, req *http.Request) {
template, err := template.ParseFS(res, "*.html")
if err != nil {
return
}
template.ExecuteTemplate(w, "index.html", nil)
})
mux.HandleFunc("/stats", func(w http.ResponseWriter, req *http.Request) {
jsonStats, err := json.MarshalIndent(statistics, "", " ")
if err != nil {
http.Error(w, "Failed to marshal statistics", http.StatusInternalServerError)
return
}
w.Header().Add("Content-type", "application/json")
w.Write(jsonStats)
})
mux.HandleFunc("/conf", func(w http.ResponseWriter, req *http.Request) {
jsonConf, err := json.MarshalIndent(webConf, "", " ")
if err != nil {
http.Error(w, "Failed to marshal configuration", http.StatusInternalServerError)
return
}
w.Header().Add("Content-type", "application/json")
w.Write(jsonConf)
})
server := &http.Server{
Addr: fmt.Sprintf(":%d", port),
Handler: mux,
}
return &Dashboard{
Server: server,
}
}
func (board *Dashboard) Launch() error {
return board.Server.ListenAndServe()
}

104
src/dashboard/res/index.html

@ -0,0 +1,104 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>Wecr dashboard</title>
<!-- <link rel="icon" href="/static/icon.png"> -->
<link rel="stylesheet" href="/static/bootstrap.css">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
</head>
<body class="d-flex flex-column h-100">
<div class="container">
<header class="d-flex flex-wrap justify-content-center py-3 mb-4 border-bottom">
<a href="/" class="d-flex align-items-center mb-3 mb-md-0 me-md-auto text-dark text-decoration-none">
<svg class="bi me-2" width="40" height="32">
<use xlink:href="#bootstrap"></use>
</svg>
<strong class="fs-4">Wecr</strong>
</a>
<ul class="nav nav-pills">
<li class="nav-item"><a href="/stats" class="nav-link">Stats</a></li>
<li class="nav-item"><a href="/conf" class="nav-link">Config</a></li>
</ul>
</header>
</div>
<div class="container">
<h1>Dashboard</h1>
<h2>Statistics</h2>
<div id="statistics">
<ol class="list-group list-group-numbered">
<li class="list-group-item d-flex justify-content-between align-items-start">
<div class="ms-2 me-auto">
<div class="fw-bold">Pages visited</div>
</div>
<span class="badge bg-primary rounded-pill" id="pages_visited">0</span>
</li>
<li class="list-group-item d-flex justify-content-between align-items-start">
<div class="ms-2 me-auto">
<div class="fw-bold">Matches found</div>
</div>
<span class="badge bg-primary rounded-pill" id="matches_found">0</span>
</li>
<li class="list-group-item d-flex justify-content-between align-items-start">
<div class="ms-2 me-auto">
<div class="fw-bold">Pages saved</div>
</div>
<span class="badge bg-primary rounded-pill" id="pages_saved">0</span>
</li>
<li class="list-group-item d-flex justify-content-between align-items-start">
<div class="ms-2 me-auto">
<div class="fw-bold">Start time</div>
</div>
<span class="badge bg-primary rounded-pill" id="start_time_unix">0</span>
</li>
<li class="list-group-item d-flex justify-content-between align-items-start">
<div class="ms-2 me-auto">
<div class="fw-bold">Stopped</div>
</div>
<span class="badge bg-primary rounded-pill" id="stopped">false</span>
</li>
</ol>
</div>
<!-- <h2>Configuration</h2>
<pre id="configuration"></pre> -->
</div>
</body>
<script>
window.onload = function () {
let confOutput = document.getElementById("configuration");
let pagesVisitedOut = document.getElementById("pages_visited");
let matchesFoundOut = document.getElementById("matches_found");
let pagesSavedOut = document.getElementById("pages_saved");
let startTimeOut = document.getElementById("start_time_unix");
let stoppedOut = document.getElementById("stopped");
const interval = setInterval(function () {
// update statistics
fetch("/stats")
.then((response) => response.json())
.then((statistics) => {
pagesVisitedOut.innerText = statistics.pages_visited;
matchesFoundOut.innerText = statistics.matches_found;
pagesSavedOut.innerText = statistics.pages_saved;
startTimeOut.innerText = new Date(1000 * statistics.start_time_unix);
stoppedOut.innerText = statistics.stopped;
});
// // update config just in case
// fetch("/conf")
// .then((response) => response.text())
// .then((response_text) => JSON.parse(response_text))
// .then((config) => {
// confOutput.innerText = "Configuration: \n" + JSON.stringify(config);
// });
}, 650);
}();
</script>
</html>

11783
src/dashboard/res/static/bootstrap.css vendored

File diff suppressed because it is too large Load Diff

1
src/dashboard/res/static/bootstrap.css.map

File diff suppressed because one or more lines are too long

0
src/dashboard/res/static/stylesheet.css

72
src/main.go

@ -32,6 +32,7 @@ import (
"sync"
"time"
"unbewohnte/wecr/config"
"unbewohnte/wecr/dashboard"
"unbewohnte/wecr/logger"
"unbewohnte/wecr/queue"
"unbewohnte/wecr/utilities"
@ -39,7 +40,7 @@ import (
"unbewohnte/wecr/worker"
)
const version = "v0.2.5"
const version = "v0.3.0"
const (
defaultConfigFile string = "conf.json"
@ -164,28 +165,15 @@ func main() {
}
logger.Info("Successfully opened configuration file")
// create logs if needed
if conf.Logging.OutputLogs {
if conf.Logging.LogsFile != "" {
// output logs to a file
logFile, err := os.Create(filepath.Join(workingDirectory, conf.Logging.LogsFile))
if err != nil {
logger.Error("Failed to create logs file: %s", err)
return
}
defer logFile.Close()
// Prepare global statistics variable
statistics := worker.Statistics{}
logger.Info("Outputting logs to %s", conf.Logging.LogsFile)
logger.SetOutput(logFile)
} else {
// output logs to stdout
logger.Info("Outputting logs to stdout")
logger.SetOutput(os.Stdout)
}
} else {
// no logging needed
logger.Info("No further logs will be outputted")
logger.SetOutput(nil)
// open dashboard if needed
var board *dashboard.Dashboard = nil
if conf.Dashboard.UseDashboard {
board = dashboard.NewDashboard(conf.Dashboard.Port, conf, &statistics)
go board.Launch()
logger.Info("Launched dashboard at http://localhost:%d", conf.Dashboard.Port)
}
// sanitize and correct inputs
@ -335,6 +323,30 @@ func main() {
}
defer outputFile.Close()
// create logs if needed
if conf.Logging.OutputLogs {
if conf.Logging.LogsFile != "" {
// output logs to a file
logFile, err := os.Create(filepath.Join(workingDirectory, conf.Logging.LogsFile))
if err != nil {
logger.Error("Failed to create logs file: %s", err)
return
}
defer logFile.Close()
logger.Info("Outputting logs to %s", conf.Logging.LogsFile)
logger.SetOutput(logFile)
} else {
// output logs to stdout
logger.Info("Outputting logs to stdout")
logger.SetOutput(os.Stdout)
}
} else {
// no logging needed
logger.Info("No further logs will be outputted")
logger.SetOutput(nil)
}
jobs := make(chan web.Job, conf.Workers*5)
results := make(chan web.Result, conf.Workers*5)
@ -379,7 +391,7 @@ func main() {
}
// form a worker pool
workerPool := worker.NewWorkerPool(jobs, results, conf.Workers, worker.WorkerConf{
workerPool := worker.NewWorkerPool(jobs, results, conf.Workers, &worker.WorkerConf{
Requests: conf.Requests,
Save: conf.Save,
BlacklistedDomains: conf.BlacklistedDomains,
@ -388,7 +400,7 @@ func main() {
VisitQueue: visitQueueFile,
Lock: &sync.Mutex{},
},
})
}, &statistics)
logger.Info("Created a worker pool with %d workers", conf.Workers)
// set up graceful shutdown
@ -417,15 +429,15 @@ func main() {
for {
time.Sleep(time.Second)
timeSince := time.Since(workerPool.Stats.StartTime).Round(time.Second)
timeSince := time.Since(time.Unix(int64(statistics.StartTimeUnix), 0)).Round(time.Second)
fmt.Fprintf(os.Stdout, "\r[%s] %d pages visited; %d pages saved; %d matches (%d pages/sec)",
timeSince.String(),
workerPool.Stats.PagesVisited,
workerPool.Stats.PagesSaved,
workerPool.Stats.MatchesFound,
workerPool.Stats.PagesVisited-lastPagesVisited,
statistics.PagesVisited,
statistics.PagesSaved,
statistics.MatchesFound,
statistics.PagesVisited-lastPagesVisited,
)
lastPagesVisited = workerPool.Stats.PagesVisited
lastPagesVisited = statistics.PagesVisited
}
}()
}

25
src/worker/pool.go

@ -32,10 +32,11 @@ type visited struct {
// Whole worker pool's statistics
type Statistics struct {
PagesVisited uint64
MatchesFound uint64
PagesSaved uint64
StartTime time.Time
PagesVisited uint64 `json:"pages_visited"`
MatchesFound uint64 `json:"matches_found"`
PagesSaved uint64 `json:"pages_saved"`
StartTimeUnix uint64 `json:"start_time_unix"`
Stopped bool `json:"stopped"`
}
// Web-Worker pool
@ -43,11 +44,11 @@ type Pool struct {
workersCount uint
workers []*Worker
visited visited
Stats Statistics
Stats *Statistics
}
// Create a new worker pool
func NewWorkerPool(jobs chan web.Job, results chan web.Result, workerCount uint, workerConf WorkerConf) *Pool {
func NewWorkerPool(jobs chan web.Job, results chan web.Result, workerCount uint, workerConf *WorkerConf, stats *Statistics) *Pool {
var newPool Pool = Pool{
workersCount: workerCount,
workers: nil,
@ -55,16 +56,12 @@ func NewWorkerPool(jobs chan web.Job, results chan web.Result, workerCount uint,
URLs: nil,
Lock: sync.Mutex{},
},
Stats: Statistics{
StartTime: time.Time{},
PagesVisited: 0,
MatchesFound: 0,
},
Stats: stats,
}
var i uint
for i = 0; i < workerCount; i++ {
newWorker := NewWorker(jobs, results, workerConf, &newPool.visited, &newPool.Stats)
newWorker := NewWorker(jobs, results, workerConf, &newPool.visited, newPool.Stats)
newPool.workers = append(newPool.workers, &newWorker)
}
@ -73,7 +70,8 @@ func NewWorkerPool(jobs chan web.Job, results chan web.Result, workerCount uint,
// Notify all workers in pool to start scraping
func (p *Pool) Work() {
p.Stats.StartTime = time.Now()
p.Stats.StartTimeUnix = uint64(time.Now().Unix())
p.Stats.Stopped = false
for _, worker := range p.workers {
worker.Stopped = false
@ -83,6 +81,7 @@ func (p *Pool) Work() {
// Notify all workers in pool to stop scraping
func (p *Pool) Stop() {
p.Stats.Stopped = true
for _, worker := range p.workers {
worker.Stopped = true
}

4
src/worker/worker.go

@ -51,14 +51,14 @@ type WorkerConf struct {
type Worker struct {
Jobs chan web.Job
Results chan web.Result
Conf WorkerConf
Conf *WorkerConf
visited *visited
stats *Statistics
Stopped bool
}
// Create a new worker
func NewWorker(jobs chan web.Job, results chan web.Result, conf WorkerConf, visited *visited, stats *Statistics) Worker {
func NewWorker(jobs chan web.Job, results chan web.Result, conf *WorkerConf, visited *visited, stats *Statistics) Worker {
return Worker{
Jobs: jobs,
Results: results,

Loading…
Cancel
Save