Skip to content

Commit

Permalink
lint, vet cleanup added ignores for binaries
Browse files Browse the repository at this point in the history
  • Loading branch information
zpeters committed Oct 14, 2020
1 parent b050d53 commit 600a6be
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 21 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,9 @@
# vendor/
coverage.txt

# ignore binaries
stashbox
stashbox.exe

# ignore default archivbe location
stashDb/
2 changes: 1 addition & 1 deletion cmd/stashbox/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ func main() {
panic(err)
}

err = c.AddUrl(*url)
err = c.AddURL(*url)
if err != nil {
panic(err)
}
Expand Down
32 changes: 16 additions & 16 deletions pkg/crawler/crawler.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ import (

// Site ...
type Site struct {
HtmlBody []byte
HTMLBody []byte
TextBody []byte
Url string
URL string
Title string
}

Expand All @@ -38,7 +38,7 @@ type Crawler struct {

// Error Messages
var (
errNoTitleInHtml = errors.New("No title tag in HTML response")
errNoTitleInHTML = errors.New("No title tag in HTML response")
// regular expression from: https://mathiasbynens.be/demo/url-regex,
// by @imme_emosol
urlRegExp, _ = regexp.Compile(`^(https|http)://(-\.)?([^\s/?\.#-]+\.?)+(/[^\s]*)?$`)
Expand All @@ -57,7 +57,7 @@ func (c *Crawler) Save() error {

// save all sites one by one
for _, s := range c.Sites {
fmt.Printf("Saving %s...\n", s.Url)
fmt.Printf("Saving %s...\n", s.URL)
if err := c.saveSite(s); err != nil {
return err
}
Expand All @@ -68,7 +68,7 @@ func (c *Crawler) Save() error {

func (c *Crawler) saveSite(s Site) error {
dateTime := dateTimeFileName()
domainSubPath, err := buildPath(c.Archive, s.Url)
domainSubPath, err := buildPath(c.Archive, s.URL)
if err != nil {
return err
}
Expand All @@ -81,7 +81,7 @@ func (c *Crawler) saveSite(s Site) error {
// save the html
htmlFileName := fmt.Sprintf("%s.html", dateTime)
htmlSavePath := path.Join(domainSubPath, htmlFileName)
err = ioutil.WriteFile(htmlSavePath, s.HtmlBody, 0600)
err = ioutil.WriteFile(htmlSavePath, s.HTMLBody, 0600)
if err != nil {
return err
}
Expand All @@ -97,7 +97,7 @@ func (c *Crawler) saveSite(s Site) error {
// save the pdf
pdfFileName := fmt.Sprintf("%s.pdf", dateTime)
pdfSavePath := path.Join(domainSubPath, pdfFileName)
if err := generatePDF(pdfSavePath, s.Url); err != nil {
if err := generatePDF(pdfSavePath, s.URL); err != nil {
return err
}
return nil
Expand Down Expand Up @@ -128,8 +128,8 @@ func dateTimeFileName() string {
return t.Format(timestamp)
}

// AddUrl ...
func (c *Crawler) AddUrl(url string) error {
// AddURL will add the url to our list of urls
func (c *Crawler) AddURL(url string) error {
url = strings.TrimSpace(url)
if len(url) == 0 {
return errors.New("URL can't be empty or only containing space")
Expand All @@ -155,10 +155,10 @@ func createSiteFilename(url string, htmlBody []byte) (string, error) {
forbiddenCharactersWindows := [...]rune{'/', '<', '>', ':', '"', '\\', '|', '?', '*'}
reservedFilenamesWindows := [...]string{"CON", "PRN", "AUX", "NUL", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8", "COM9", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9"}

title, err := getHtmlTitle(htmlBody)
title, err := getHTMLTitle(htmlBody)

// if there is no title, do old way of creating hash
if err == errNoTitleInHtml {
if err == errNoTitleInHTML {
h := sha256.New()
_, err = io.WriteString(h, url)
if err != nil {
Expand Down Expand Up @@ -194,7 +194,7 @@ func (c *Crawler) Crawl() error {

var site Site

htmlBody, err := getHtmlBody(u)
htmlBody, err := getHTMLBody(u)
if err != nil {
return err
}
Expand All @@ -212,15 +212,15 @@ func (c *Crawler) Crawl() error {
}

site.TextBody = textBody
site.Url = u
site.URL = u

c.Sites = append(c.Sites, site)
}

return nil
}

func getHtmlTitle(body []byte) (title string, err error) {
func getHTMLTitle(body []byte) (title string, err error) {
// HTML DOM Document

r := bytes.NewReader(body)
Expand All @@ -232,13 +232,13 @@ func getHtmlTitle(body []byte) (title string, err error) {
titleTag := doc.Find("title").First()

if titleTag.Size() == 0 {
return "", errNoTitleInHtml
return "", errNoTitleInHTML
}

return titleTag.Text(), nil
}

func getHtmlBody(url string) (body []byte, err error) {
func getHTMLBody(url string) (body []byte, err error) {
// #nosec - gosec will detect this as a G107 error
// the point of this function *is* to accept a variable URL
resp, err := http.Get(url)
Expand Down
8 changes: 4 additions & 4 deletions pkg/crawler/crawler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ func TestGetHtmlTitle(t *testing.T) {
const url = "https://github.com/zpeters/stashbox"
const want = "GitHub - zpeters/stashbox: Your personal Internet Archive"

body, err := getHtmlBody(url)
body, err := getHTMLBody(url)
handleErr(t, err)
title, err := getHtmlTitle(body)
title, err := getHTMLTitle(body)
handleErr(t, err)
if title != want {
t.Errorf("Wrong title found. Want: %s, Got : %s", want, title)
Expand All @@ -28,7 +28,7 @@ func TestAddUrl(t *testing.T) {

for i := 1; i <= count; i++ {
url := "https://www.github.com" + strconv.Itoa(i)
err = c.AddUrl(url)
err = c.AddURL(url)
if err != nil {
t.Errorf("Test case for url: '" + url + "' failed; it should pass; error:" + err.Error())
}
Expand Down Expand Up @@ -65,7 +65,7 @@ func TestCrawl(t *testing.T) {
}

for _, s := range crawlSites {
err = c.AddUrl(s)
err = c.AddURL(s)
handleErr(t, err)
}
err = c.Crawl()
Expand Down

0 comments on commit 600a6be

Please sign in to comment.