diff --git a/crawler/crawler/crawler.go b/crawler/crawler/crawler.go index fb6f0a98..a0dcb7a4 100644 --- a/crawler/crawler/crawler.go +++ b/crawler/crawler/crawler.go @@ -344,8 +344,7 @@ func getRemoteFile(data []byte, fileRawURL string, pa PA, domain Domain) (public parser := publiccode.NewParser() parser.Strict = false parser.RemoteBaseURL = strings.TrimRight(fileRawURL, viper.GetString("CRAWLED_FILENAME")) - - err := parser.ParseInDomain(data, domain.Host, domain.BasicAuth) + err := parser.ParseInDomain(data, domain.Host, domain.UseTokenFor, domain.BasicAuth) if err != nil { log.Errorf("Error parsing publiccode.yml for %s.", fileRawURL) return *parser, err diff --git a/crawler/crawler/domain.go b/crawler/crawler/domain.go index 08bf9403..dd259121 100644 --- a/crawler/crawler/domain.go +++ b/crawler/crawler/domain.go @@ -14,8 +14,9 @@ import ( // Domain is a single code hosting service. type Domain struct { // Domains.yml data - Host string `yaml:"host"` - BasicAuth []string `yaml:"basic-auth"` + Host string `yaml:"host"` + UseTokenFor []string `yaml:"use-token-for"` + BasicAuth []string `yaml:"basic-auth"` } // API returns a Domain without tld. diff --git a/crawler/crawler/saveToElasticsearch.go b/crawler/crawler/saveToElasticsearch.go index 93d563b4..3d1f3540 100644 --- a/crawler/crawler/saveToElasticsearch.go +++ b/crawler/crawler/saveToElasticsearch.go @@ -42,7 +42,7 @@ func (c *Crawler) saveToES(repo Repository, activityIndex float64, vitality []in parser := pcode.NewParser() parser.Strict = false parser.RemoteBaseURL = strings.TrimRight(repo.FileRawURL, viper.GetString("CRAWLED_FILENAME")) - err := parser.ParseInDomain(data, repo.Domain.Host, repo.Domain.BasicAuth) + err := parser.ParseInDomain(data, repo.Domain.Host, repo.Domain.UseTokenFor, repo.Domain.BasicAuth) if err != nil { log.Errorf("Error parsing publiccode.yml: %v", err) } diff --git a/crawler/domains.yml.example b/crawler/domains.yml.example index b9e2e445..91dc68df 100644 --- a/crawler/domains.yml.example +++ b/crawler/domains.yml.example @@ -7,5 +7,9 @@ # - "" - host: "github.com" + use-token-for: + - "github.com" + - "api.github.com" + - "raw.githubusercontent.com" #basic-auth: # - "" diff --git a/crawler/go.mod b/crawler/go.mod index 36cca60a..277cb5fc 100644 --- a/crawler/go.mod +++ b/crawler/go.mod @@ -6,7 +6,7 @@ require ( github.com/ghodss/yaml v1.0.0 github.com/icza/dyno v0.0.0-20200205103839-49cb13720835 github.com/inconshreveable/mousetrap v1.0.0 // indirect - github.com/italia/publiccode-parser-go v1.1.0 + github.com/italia/publiccode-parser-go v1.1.1 github.com/mailru/easyjson v0.7.1 // indirect github.com/mattn/go-runewidth v0.0.9 // indirect github.com/mitchellh/mapstructure v1.3.2 // indirect diff --git a/crawler/go.sum b/crawler/go.sum index bfd31e8d..a45cd7a9 100644 --- a/crawler/go.sum +++ b/crawler/go.sum @@ -237,6 +237,8 @@ github.com/italia/publiccode-parser-go v1.0.1 h1:/aQc+/WNQXQKVFTw8CCj7W8XsE7/GnY github.com/italia/publiccode-parser-go v1.0.1/go.mod h1:cSfzmMsPUbAmbcXFNTuGYLhHk4Jgi3jhLjOYuMlnfOg= github.com/italia/publiccode-parser-go v1.1.0 h1:3Ejo5IXjSWi5WvC34tvmcg6zY8dqjObNqTnStEr1mms= github.com/italia/publiccode-parser-go v1.1.0/go.mod h1:cSfzmMsPUbAmbcXFNTuGYLhHk4Jgi3jhLjOYuMlnfOg= +github.com/italia/publiccode-parser-go v1.1.1 h1:Piu9/PbPej3MSog0uORFIJIpk2YOzL2NTGoe0Vasdzc= +github.com/italia/publiccode-parser-go v1.1.1/go.mod h1:cSfzmMsPUbAmbcXFNTuGYLhHk4Jgi3jhLjOYuMlnfOg= github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 h1:BQSFePA1RWJOlocH6Fxy8MmwDt+yVQYULKfN0RoTN8A= github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99/go.mod h1:1lJo3i6rXxKeerYnT8Nvf0QmHCRC1n8sfWVwXF2Frvo= github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI=