Skip to content

Commit

Permalink
Merge pull request #11 from krakendio/empty_user_agent
Browse files Browse the repository at this point in the history
flag to reject requests with empty user-agent added
  • Loading branch information
kpacha authored Feb 2, 2023
2 parents dd4c1ed + fc81650 commit 2051d65
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 13 deletions.
25 changes: 14 additions & 11 deletions detector.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@ import (

// Config defines the behaviour of the detector
type Config struct {
Denylist []string `json:"deny"`
Allowlist []string `json:"allow"`
Patterns []string `json:"patterns"`
CacheSize int `json:"cache_size"`
Denylist []string `json:"deny"`
Allowlist []string `json:"allow"`
Patterns []string `json:"patterns"`
CacheSize int `json:"cache_size"`
RejectIfEmpty bool `json:"empty_user_agent_is_bot"`
}

// DetectorFunc is a func that chek if a request was made by a bot
Expand Down Expand Up @@ -48,25 +49,27 @@ func NewDetector(cfg Config) (*Detector, error) {
patterns[i] = rp
}
return &Detector{
deny: deny,
allow: allow,
patterns: patterns,
deny: deny,
allow: allow,
patterns: patterns,
rejectIfEmpty: cfg.RejectIfEmpty,
}, nil
}

// Detector is a struct able to detect bot-made requests
type Detector struct {
deny map[string]struct{}
allow map[string]struct{}
patterns []*regexp.Regexp
deny map[string]struct{}
allow map[string]struct{}
patterns []*regexp.Regexp
rejectIfEmpty bool
}

// IsBot returns true if the request was made by a bot
func (d *Detector) IsBot(r *http.Request) bool {
userAgent := r.Header.Get("User-Agent")

if userAgent == "" {
return false
return d.rejectIfEmpty
}
if _, ok := d.allow[userAgent]; ok {
return false
Expand Down
26 changes: 24 additions & 2 deletions detector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,28 @@ import (
"testing"
)

func TestNew_rejectEmptyHeader(t *testing.T) {
d, err := New(Config{
Denylist: []string{"a", "b"},
Allowlist: []string{"c", "Pingdom.com_bot_version_1.1"},
Patterns: []string{
`(Pingdom.com_bot_version_)(\d+)\.(\d+)`,
`(facebookexternalhit)/(\d+)\.(\d+)`,
},
RejectIfEmpty: true,
})
if err != nil {
t.Error(err)
return
}

req, _ := http.NewRequest("GET", "http://example.com", http.NoBody)
req.Header.Add("User-Agent", "")
if !d(req) {
t.Error("req with empty User-Agent has not been detected as a bot")
}
}

func TestNew_noLRU(t *testing.T) {
d, err := New(Config{
Denylist: []string{"a", "b"},
Expand Down Expand Up @@ -52,7 +74,7 @@ func testDetection(f DetectorFunc) error {
"c",
"Pingdom.com_bot_version_1.1",
} {
req, _ := http.NewRequest("GET", "http://example.com", nil)
req, _ := http.NewRequest("GET", "http://example.com", http.NoBody)
req.Header.Add("User-Agent", ua)
if f(req) {
return fmt.Errorf("the req #%d has been detected as a bot: %s", i, ua)
Expand All @@ -65,7 +87,7 @@ func testDetection(f DetectorFunc) error {
"facebookexternalhit/1.1",
"Pingdom.com_bot_version_1.2",
} {
req, _ := http.NewRequest("GET", "http://example.com", nil)
req, _ := http.NewRequest("GET", "http://example.com", http.NoBody)
req.Header.Add("User-Agent", ua)
if !f(req) {
return fmt.Errorf("the req #%d has not been detected as a bot: %s", i, ua)
Expand Down

0 comments on commit 2051d65

Please sign in to comment.