forked from cblgh/lieu
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcli.go
132 lines (123 loc) · 3.53 KB
/
cli.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
package main
import (
"bufio"
"fmt"
"lieu/crawler"
"lieu/database"
"lieu/ingest"
"lieu/server"
"lieu/util"
"os"
"strings"
)
const help = `Lieu: neighbourhood search engine
Commands
- precrawl (scrapes config's general.url for a list of links: <li> elements containing an anchor <a> tag)
- crawl (start crawler, crawls all urls in config's crawler.webring file. outputs to stdout)
- ingest (ingest crawled data, generates database)
- search (interactive cli for searching the database)
- host (hosts search engine over http)
Example:
lieu precrawl > data/webring.txt
lieu crawl > data/source.txt
lieu ingest
lieu host
See the configuration file lieu.toml or
https://github.com/cblgh/lieu for more information.
`
func main() {
exists := util.CheckFileExists("lieu.toml")
if !exists {
fmt.Println("lieu: can't find config, saving an example config in the working directory")
util.WriteMockConfig()
fmt.Println("lieu: lieu.toml written to disk")
util.Exit()
}
config := util.ReadConfig()
var cmd string
if len(os.Args) > 1 {
cmd = os.Args[1]
} else {
cmd = "help"
}
switch cmd {
case "help":
fmt.Println(help)
case "precrawl":
if config.General.URL == "https://example.com/" {
fmt.Println("lieu: the url is not set (example.com)")
util.Exit()
}
crawler.Precrawl(config)
case "crawl":
exists := util.CheckFileExists(config.Crawler.Webring)
if !exists {
fmt.Printf("lieu: webring file %s does not exist\n", config.Crawler.Webring)
util.Exit()
}
sourceLen := len(util.ReadList(config.Crawler.Webring, "\n"))
if sourceLen == 0 {
fmt.Printf("lieu: nothing to crawl; the webring file %s is empty\n", config.Crawler.Webring)
util.Exit()
}
crawler.Crawl(config)
case "ingest":
exists := util.CheckFileExists(config.Data.Source)
if !exists {
fmt.Printf("lieu: data source %s does not exist\n", config.Data.Source)
fmt.Println("lieu: try running `lieu crawl`")
util.Exit()
}
sourceLen := len(util.ReadList(config.Data.Source, "\n"))
if sourceLen == 0 {
fmt.Printf("lieu: nothing to ingest; data source %s is empty\n", config.Data.Source)
fmt.Println("lieu: try running `lieu crawl`")
util.Exit()
}
fmt.Println("lieu: creating a new database & initiating ingestion")
ingest.Ingest(config)
case "search":
exists := util.CheckFileExists(config.Data.Database)
if !exists {
util.DatabaseDoesNotExist(config.Data.Database)
}
interactiveMode(config.Data.Database)
case "random":
exists := util.CheckFileExists(config.Data.Database)
if !exists {
util.DatabaseDoesNotExist(config.Data.Database)
}
db := database.InitDB(config.Data.Database)
fmt.Println(database.GetRandomPage(db))
case "host":
exists := util.CheckFileExists(config.Data.Database)
if !exists {
util.DatabaseDoesNotExist(config.Data.Database)
}
open := util.CheckPortOpen(config.General.Port)
if !open {
fmt.Printf("lieu: port %d is not open; try another one\n", config.General.Port)
util.Exit()
}
server.Serve(config)
default:
fmt.Println("Lieu: no such command, currently. Try `lieu help`")
}
}
func interactiveMode(databasePath string) {
db := database.InitDB(databasePath)
reader := bufio.NewReader(os.Stdin)
for {
fmt.Printf("> ")
input, err := reader.ReadString('\n')
util.Check(err)
input = strings.TrimSuffix(input, "\n")
pages := database.SearchWordsByScore(db, util.Inflect(strings.Fields(input)))
for _, pageData := range pages {
fmt.Println(pageData.URL)
if len(pageData.About) > 0 {
fmt.Println(pageData.About)
}
}
}
}