Skip to content

Commit

Permalink
Merge branch 'wip-i255-single-match'
Browse files Browse the repository at this point in the history
  • Loading branch information
mna committed Jun 14, 2021
2 parents e1f2d60 + b3953ef commit a9cfd67
Show file tree
Hide file tree
Showing 4 changed files with 156 additions and 1 deletion.
20 changes: 20 additions & 0 deletions bench_traversal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ package goquery

import (
"testing"

"github.com/andybalholm/cascadia"
)

func BenchmarkFind(b *testing.B) {
Expand Down Expand Up @@ -800,3 +802,21 @@ func BenchmarkClosestNodes(b *testing.B) {
b.Fatalf("want 2, got %d", n)
}
}

func BenchmarkSingleMatcher(b *testing.B) {
doc := Doc()
multi := cascadia.MustCompile(`div`)
single := SingleMatcher(multi)
b.ResetTimer()

b.Run("multi", func(b *testing.B) {
for i := 0; i < b.N; i++ {
_ = doc.FindMatcher(multi)
}
})
b.Run("single", func(b *testing.B) {
for i := 0; i < b.N; i++ {
_ = doc.FindMatcher(single)
}
})
}
28 changes: 28 additions & 0 deletions example_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,31 @@ func ExampleNewDocumentFromReader_string() {

// Output: Header
}

func ExampleSingle() {
html := `
<html>
<body>
<div>1</div>
<div>2</div>
<div>3</div>
</body>
</html>
`
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
if err != nil {
log.Fatal(err)
}

// By default, the selector string selects all matching nodes
multiSel := doc.Find("div")
fmt.Println(multiSel.Text())

// Using goquery.Single, only the first match is selected
singleSel := doc.FindMatcher(goquery.Single("div"))
fmt.Println(singleSel.Text())

// Output:
// 123
// 1
}
64 changes: 63 additions & 1 deletion type.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ import (
"net/url"

"github.com/andybalholm/cascadia"

"golang.org/x/net/html"
)

Expand Down Expand Up @@ -122,6 +121,45 @@ type Matcher interface {
Filter([]*html.Node) []*html.Node
}

// Single compiles a selector string to a Matcher that stops after the first
// match is found.
//
// By default, Selection.Find and other functions that accept a selector string
// to select nodes will use all matches corresponding to that selector. By
// using the Matcher returned by Single, at most the first match will be
// selected.
//
// For example, those two statements are semantically equivalent:
//
// sel1 := doc.Find("a").First()
// sel2 := doc.FindMatcher(goquery.Single("a"))
//
// The one using Single is optimized to be potentially much faster on large
// documents.
//
// Only the behaviour of the MatchAll method of the Matcher interface is
// altered compared to standard Matchers. This means that the single-selection
// property of the Matcher only applies for Selection methods where the Matcher
// is used to select nodes, not to filter or check if a node matches the
// Matcher - in those cases, the behaviour of the Matcher is unchanged (e.g.
// FilterMatcher(Single("div")) will still result in a Selection with multiple
// "div"s if there were many "div"s in the Selection to begin with).
func Single(selector string) Matcher {
return singleMatcher{compileMatcher(selector)}
}

// SingleMatcher returns a Matcher matches the same nodes as m, but that stops
// after the first match is found.
//
// See the documentation of function Single for more details.
func SingleMatcher(m Matcher) Matcher {
if _, ok := m.(singleMatcher); ok {
// m is already a singleMatcher
return m
}
return singleMatcher{m}
}

// compileMatcher compiles the selector string s and returns
// the corresponding Matcher. If s is an invalid selector string,
// it returns a Matcher that fails all matches.
Expand All @@ -133,6 +171,30 @@ func compileMatcher(s string) Matcher {
return cs
}

type singleMatcher struct {
Matcher
}

func (m singleMatcher) MatchAll(n *html.Node) []*html.Node {
// Optimized version - stops finding at the first match (cascadia-compiled
// matchers all use this code path).
if mm, ok := m.Matcher.(interface{ MatchFirst(*html.Node) *html.Node }); ok {
node := mm.MatchFirst(n)
if node == nil {
return nil
}
return []*html.Node{node}
}

// Fallback version, for e.g. test mocks that don't provide the MatchFirst
// method.
nodes := m.Matcher.MatchAll(n)
if len(nodes) > 0 {
return nodes[:1:1]
}
return nil
}

// invalidMatcher is a Matcher that always fails to match.
type invalidMatcher struct{}

Expand Down
45 changes: 45 additions & 0 deletions type_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"strings"
"testing"

"github.com/andybalholm/cascadia"
"golang.org/x/net/html"
)

Expand Down Expand Up @@ -208,3 +209,47 @@ func TestIssue103(t *testing.T) {
}
t.Log(text)
}

func TestSingle(t *testing.T) {
data := `
<html>
<body>
<div class="b">1</div>
<div class="a">2</div>
<div class="a">3</div>
<p class="b">4</p>
</body>
</html>
`
doc, err := NewDocumentFromReader(strings.NewReader(data))
if err != nil {
t.Fatal(err)
}

text := doc.FindMatcher(Single("div")).Text()
if text != "1" {
t.Fatalf("want %q, got %q", "1", text)
}

// Verify semantic equivalence
sel1 := doc.Find("div").First()
sel2 := doc.FindMatcher(Single("div"))
if sel1.Text() != sel2.Text() {
t.Fatalf("want sel1 to equal sel2")
}

// Here, the Single has no effect as the selector is used to filter
// from the existing selection, not to find nodes in the document.
divs := doc.Find("div")
text = divs.FilterMatcher(Single(".a")).Text()
if text != "23" {
t.Fatalf("want %q, got %q", "23", text)
}

classA := cascadia.MustCompile(".a")
classB := cascadia.MustCompile(".b")
text = doc.FindMatcher(classB).AddMatcher(SingleMatcher(classA)).Text()
if text != "142" {
t.Fatalf("want %q, got %q", "142", text)
}
}

0 comments on commit a9cfd67

Please sign in to comment.