Kasianov Nikolai Alekseevich
2 years ago
7 changed files with 117 additions and 17 deletions
@ -0,0 +1,78 @@
|
||||
/* |
||||
Wecr - crawl the web for data |
||||
Copyright (C) 2023 Kasyanov Nikolay Alexeyevich (Unbewohnte) |
||||
|
||||
This program is free software: you can redistribute it and/or modify |
||||
it under the terms of the GNU Affero General Public License as published by |
||||
the Free Software Foundation, either version 3 of the License, or |
||||
(at your option) any later version. |
||||
|
||||
This program is distributed in the hope that it will be useful, |
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||
GNU Affero General Public License for more details. |
||||
|
||||
You should have received a copy of the GNU Affero General Public License |
||||
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/ |
||||
|
||||
package utilities |
||||
|
||||
import ( |
||||
"encoding/json" |
||||
"fmt" |
||||
"io" |
||||
"os" |
||||
"unbewohnte/wecr/web" |
||||
) |
||||
|
||||
// Extracts data from the output JSON file and puts it in a new file with separators between each entry
|
||||
func ExtractDataFromOutput(inputFilename string, outputFilename string, separator string, keepDuplicates bool) error { |
||||
inputFile, err := os.Open(inputFilename) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
defer inputFile.Close() |
||||
|
||||
outputFile, err := os.Create(outputFilename) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
defer outputFile.Close() |
||||
|
||||
var processedData []string |
||||
|
||||
decoder := json.NewDecoder(inputFile) |
||||
for { |
||||
var result web.Result |
||||
|
||||
err := decoder.Decode(&result) |
||||
if err == io.EOF { |
||||
break |
||||
} |
||||
if err != nil { |
||||
return err |
||||
} |
||||
|
||||
for _, dataEntry := range result.Data { |
||||
var skip = false |
||||
if !keepDuplicates { |
||||
for _, processedEntry := range processedData { |
||||
if dataEntry == processedEntry { |
||||
skip = true |
||||
break |
||||
} |
||||
} |
||||
|
||||
if skip { |
||||
continue |
||||
} |
||||
processedData = append(processedData, dataEntry) |
||||
} |
||||
|
||||
outputFile.WriteString(fmt.Sprintf("%s%s", dataEntry, separator)) |
||||
} |
||||
} |
||||
|
||||
return nil |
||||
} |
Loading…
Reference in new issue