Skip to content

Commit

Permalink
added some test coverage
Browse files Browse the repository at this point in the history
  • Loading branch information
zpeters committed Oct 15, 2020
1 parent 600a6be commit 7606107
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 16 deletions.
15 changes: 4 additions & 11 deletions pkg/crawler/crawler.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,16 +44,17 @@ var (
urlRegExp, _ = regexp.Compile(`^(https|http)://(-\.)?([^\s/?\.#-]+\.?)+(/[^\s]*)?$`)
)

// NewCrawler ...
func NewCrawler(archive string) (Crawler, error) {
return Crawler{
Archive: archive,
}, nil
}

// Save ...
func (c *Crawler) Save() error {
ensureArchive(c.Archive)
err := os.MkdirAll(c.Archive, 0700)
if err != nil {
panic(err)
}

// save all sites one by one
for _, s := range c.Sites {
Expand Down Expand Up @@ -187,7 +188,6 @@ func createSiteFilename(url string, htmlBody []byte) (string, error) {
return title, nil
}

// Crawl ...
func (c *Crawler) Crawl() error {
for _, u := range c.Urls {
fmt.Printf("Crawling %s...\n", u)
Expand Down Expand Up @@ -264,13 +264,6 @@ func getTextBody(htmlBody []byte) (body []byte, err error) {
return []byte(text), nil
}

func ensureArchive(p string) {
err := os.MkdirAll(p, 0700)
if err != nil {
panic(err)
}
}

func generatePDF(path, url string) error {
pdfg, err := wkhtmltopdf.NewPDFGenerator()
if err != nil {
Expand Down
63 changes: 58 additions & 5 deletions pkg/crawler/crawler_test.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,51 @@
package crawler

import (
"errors"
"io/ioutil"
"os"
"path"
"strconv"
"testing"
"time"

"github.com/stretchr/testify/require"
)

func TestSave(t *testing.T) {
// Setup the test environment
tempDir := os.TempDir()
archivePath := path.Join(tempDir, "STASHBOX")
defer os.RemoveAll(archivePath)

// Setup our crawler
c, err := NewCrawler(archivePath)
require.NoError(t, err)

// Add some urls
err = c.AddURL("http://google.com")
require.NoError(t, err)
err = c.AddURL("https://thehelpfulhacker.net")
require.NoError(t, err)

// Crawl the sites
err = c.Crawl()
require.NoError(t, err)

// Save the sites
err = c.Save()
require.NoError(t, err)

// Get the contents of the archivePath on the file system
files, err := ioutil.ReadDir(archivePath)
require.NoError(t, err)

// there should be two domain folders
require.Len(t, files, 2)

// TODO add some more sophisticated testing
}

func TestGetHtmlTitle(t *testing.T) {
const url = "https://github.com/zpeters/stashbox"
const want = "GitHub - zpeters/stashbox: Your personal Internet Archive"
Expand Down Expand Up @@ -39,11 +79,24 @@ func TestAddUrl(t *testing.T) {
}

func TestBuildPath(t *testing.T) {
p, err := buildPath("./StashDB", "http://www.google.com/a/test.html")
handleErr(t, err)
expected := "StashDB/www.google.com/a/test.html"
if p != expected {
t.Errorf("expected: %s actual: %s", expected, p)
var tests = []struct {
inputDir string
inputURL string
expectedOutput string
expectedError error
}{
{"./StashDB", "http://www.google.com/a/test.html", "StashDB/www.google.com/a/test.html", nil},
// See https://golang.org/src/net/url/url_test.go "parseRequestURLTests"
{"./AnotherDB", " http://foo.com", "", errors.New("parse \" http://foo.com\": first path segment in URL cannot contain colon")},
}
for _, tt := range tests {
actual, err := buildPath(tt.inputDir, tt.inputURL)
require.Equal(t, tt.expectedOutput, actual)
if tt.expectedError == nil {
require.NoError(t, err)
} else {
require.Equal(t, tt.expectedError.Error(), err.Error())
}
}
}

Expand Down

0 comments on commit 7606107

Please sign in to comment.