From 7606107a4a4108470bcab6d8cd7f111562babde6 Mon Sep 17 00:00:00 2001 From: Zach Peters Date: Wed, 14 Oct 2020 20:32:45 -0500 Subject: [PATCH] added some test coverage --- pkg/crawler/crawler.go | 15 +++------ pkg/crawler/crawler_test.go | 63 ++++++++++++++++++++++++++++++++++--- 2 files changed, 62 insertions(+), 16 deletions(-) diff --git a/pkg/crawler/crawler.go b/pkg/crawler/crawler.go index be12003..9949fdf 100644 --- a/pkg/crawler/crawler.go +++ b/pkg/crawler/crawler.go @@ -44,16 +44,17 @@ var ( urlRegExp, _ = regexp.Compile(`^(https|http)://(-\.)?([^\s/?\.#-]+\.?)+(/[^\s]*)?$`) ) -// NewCrawler ... func NewCrawler(archive string) (Crawler, error) { return Crawler{ Archive: archive, }, nil } -// Save ... func (c *Crawler) Save() error { - ensureArchive(c.Archive) + err := os.MkdirAll(c.Archive, 0700) + if err != nil { + panic(err) + } // save all sites one by one for _, s := range c.Sites { @@ -187,7 +188,6 @@ func createSiteFilename(url string, htmlBody []byte) (string, error) { return title, nil } -// Crawl ... func (c *Crawler) Crawl() error { for _, u := range c.Urls { fmt.Printf("Crawling %s...\n", u) @@ -264,13 +264,6 @@ func getTextBody(htmlBody []byte) (body []byte, err error) { return []byte(text), nil } -func ensureArchive(p string) { - err := os.MkdirAll(p, 0700) - if err != nil { - panic(err) - } -} - func generatePDF(path, url string) error { pdfg, err := wkhtmltopdf.NewPDFGenerator() if err != nil { diff --git a/pkg/crawler/crawler_test.go b/pkg/crawler/crawler_test.go index 90258da..d58faa4 100644 --- a/pkg/crawler/crawler_test.go +++ b/pkg/crawler/crawler_test.go @@ -1,11 +1,51 @@ package crawler import ( + "errors" + "io/ioutil" + "os" + "path" "strconv" "testing" "time" + + "github.com/stretchr/testify/require" ) +func TestSave(t *testing.T) { + // Setup the test environment + tempDir := os.TempDir() + archivePath := path.Join(tempDir, "STASHBOX") + defer os.RemoveAll(archivePath) + + // Setup our crawler + c, err := NewCrawler(archivePath) + require.NoError(t, err) + + // Add some urls + err = c.AddURL("http://google.com") + require.NoError(t, err) + err = c.AddURL("https://thehelpfulhacker.net") + require.NoError(t, err) + + // Crawl the sites + err = c.Crawl() + require.NoError(t, err) + + // Save the sites + err = c.Save() + require.NoError(t, err) + + // Get the contents of the archivePath on the file system + files, err := ioutil.ReadDir(archivePath) + require.NoError(t, err) + + // there should be two domain folders + require.Len(t, files, 2) + + // TODO add some more sophisticated testing +} + func TestGetHtmlTitle(t *testing.T) { const url = "https://github.com/zpeters/stashbox" const want = "GitHub - zpeters/stashbox: Your personal Internet Archive" @@ -39,11 +79,24 @@ func TestAddUrl(t *testing.T) { } func TestBuildPath(t *testing.T) { - p, err := buildPath("./StashDB", "http://www.google.com/a/test.html") - handleErr(t, err) - expected := "StashDB/www.google.com/a/test.html" - if p != expected { - t.Errorf("expected: %s actual: %s", expected, p) + var tests = []struct { + inputDir string + inputURL string + expectedOutput string + expectedError error + }{ + {"./StashDB", "http://www.google.com/a/test.html", "StashDB/www.google.com/a/test.html", nil}, + // See https://golang.org/src/net/url/url_test.go "parseRequestURLTests" + {"./AnotherDB", " http://foo.com", "", errors.New("parse \" http://foo.com\": first path segment in URL cannot contain colon")}, + } + for _, tt := range tests { + actual, err := buildPath(tt.inputDir, tt.inputURL) + require.Equal(t, tt.expectedOutput, actual) + if tt.expectedError == nil { + require.NoError(t, err) + } else { + require.Equal(t, tt.expectedError.Error(), err.Error()) + } } }