Skip to content

Commit

Permalink
fix score boosting issues
Browse files Browse the repository at this point in the history
While investigating the behavior of several queries using
the query boost capability a few issues were found.

First, an optimization in the boolean query, would replace
the boolean searcher with internal "should" searcher, in
the case where it was the only one (no "must" or "must not").
This optimization is valid, but as implemented today, it looses
track of the users boost.  As this seems to be a minor
optimization, it is temporarily disabled to allow for correct
boosting behavior.

Second, the boolean searcher, which by default will use a
composite scorer (sum constituent scores), was ignoring the
user requested boost value.  A new boosting variant of the
composite scorer is introduced.  This variant first sums
the scores, then multiplies by the boost.

Third, the numeric range searcher was defaulting to a constant
score of 1.0, and it too ignored the requested boost value.  The
code has been updated to return a constant score equal to the
boost (1.0 * boost).
  • Loading branch information
mschoch committed Nov 10, 2021
1 parent 6208d09 commit fc82ea1
Show file tree
Hide file tree
Showing 3 changed files with 143 additions and 12 deletions.
11 changes: 6 additions & 5 deletions query.go
Original file line number Diff line number Diff line change
Expand Up @@ -208,9 +208,10 @@ func (q *BooleanQuery) Searcher(i search.Reader, options search.SearcherOptions)
if mustSearcher == nil && shouldSearcher == nil && mustNotSearcher == nil {
// if all 3 are nil, return MatchNone
return searcher.NewMatchNoneSearcher(i, options)
} else if mustSearcher == nil && shouldSearcher != nil && mustNotSearcher == nil {
// optimization, if only should searcher, just return it instead
return shouldSearcher, nil
// } else if mustSearcher == nil && shouldSearcher != nil && mustNotSearcher == nil {
// DISABLED optimization, if only should searcher, just return it instead
// While logically correct, returning the shouldSearcher looses the desired boost.
// return shouldSearcher, nil
} else if mustSearcher == nil && shouldSearcher == nil && mustNotSearcher != nil {
// if only mustNotSearcher, start with MatchAll
var err error
Expand All @@ -221,7 +222,7 @@ func (q *BooleanQuery) Searcher(i search.Reader, options search.SearcherOptions)
}

if q.scorer == nil {
q.scorer = similarity.NewCompositeSumScorer()
q.scorer = similarity.NewCompositeSumScorerWithBoost(q.boost.Value())
}

return searcher.NewBooleanSearcher(mustSearcher, shouldSearcher, mustNotSearcher, q.scorer, options)
Expand Down Expand Up @@ -1143,7 +1144,7 @@ func (q *NumericRangeQuery) Searcher(i search.Reader, options search.SearcherOpt
field = options.DefaultSearchField
}
if q.scorer == nil {
q.scorer = similarity.ConstantScorer(1)
q.scorer = similarity.ConstantScorer(q.boost.Value())
}
return searcher.NewNumericRangeSearcher(i, q.min, q.max, q.inclusiveMin, q.inclusiveMax, field,
q.boost.Value(), q.scorer, similarity.NewCompositeSumScorer(), options)
Expand Down
35 changes: 28 additions & 7 deletions search/similarity/composite.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,32 @@

package similarity

import "github.com/blugelabs/bluge/search"
import (
"github.com/blugelabs/bluge/search"
)

type CompositeSumScorer struct{}
type CompositeSumScorer struct {
boost float64
}

func NewCompositeSumScorer() *CompositeSumScorer {
return &CompositeSumScorer{}
return &CompositeSumScorer{
boost: 1.0,
}
}

func NewCompositeSumScorerWithBoost(boost float64) *CompositeSumScorer {
return &CompositeSumScorer{
boost: boost,
}
}

func (c *CompositeSumScorer) ScoreComposite(constituents []*search.DocumentMatch) float64 {
var rv float64
for _, constituent := range constituents {
rv += constituent.Score
}
return rv
return rv * c.boost
}

func (c *CompositeSumScorer) ExplainComposite(constituents []*search.DocumentMatch) *search.Explanation {
Expand All @@ -37,7 +49,16 @@ func (c *CompositeSumScorer) ExplainComposite(constituents []*search.DocumentMat
sum += constituent.Score
children = append(children, constituent.Explanation)
}
return search.NewExplanation(sum,
"sum of:",
children...)
if c.boost == 1 {
return search.NewExplanation(sum,
"sum of:",
children...)
}

return search.NewExplanation(sum*c.boost,
"computed as boost * sum",
search.NewExplanation(c.boost, "boost"),
search.NewExplanation(sum,
"sum of:",
children...))
}
109 changes: 109 additions & 0 deletions search_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1271,3 +1271,112 @@ func TestAllMatchesWithAggregationIssue31(t *testing.T) {
// should not panic with the fix
request.AddAggregation("score", aggregations.MaxStartingAt(search.DocumentScore(), 0))
}

func TestNumericRangeSearchBoost(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)

config := DefaultConfig(tmpIndexPath)
indexWriter, err := OpenWriter(config)
if err != nil {
t.Fatal(err)
}

doc := NewDocument("doc").
AddField(NewNumericField("age", 25.0))

batch := NewBatch()
batch.Update(doc.ID(), doc)

if err = indexWriter.Batch(batch); err != nil {
t.Fatal(err)
}

indexReader, err := indexWriter.Reader()
if err != nil {
t.Fatalf("error getting index reader: %v", err)
}

// numeric range query with boost 5.0
q := NewNumericRangeQuery(20, 30).SetField("age").SetBoost(5.0)
sr := NewTopNSearch(10, q)
res, err := indexReader.Search(context.Background(), sr)
if err != nil {
t.Fatal(err)
}

next, err := res.Next()
if err != nil {
t.Fatalf("error getting first hit")
}
if next == nil {
t.Fatalf("expected at least one hit")
}
if next.Score != 5.0 {
t.Fatalf("expected score to be 5.0, got %f", next.Score)
}

err = indexReader.Close()
if err != nil {
t.Fatal(err)
}
err = indexWriter.Close()
if err != nil {
t.Fatal(err)
}
}

func TestBooleanSearchBoost(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)

config := DefaultConfig(tmpIndexPath)
indexWriter, err := OpenWriter(config)
if err != nil {
t.Fatal(err)
}

doc := NewDocument("doc").
AddField(NewNumericField("age", 25.0))

batch := NewBatch()
batch.Update(doc.ID(), doc)

if err = indexWriter.Batch(batch); err != nil {
t.Fatal(err)
}

indexReader, err := indexWriter.Reader()
if err != nil {
t.Fatalf("error getting index reader: %v", err)
}

// numeric range query with no boost
nrq := NewNumericRangeQuery(20, 30).SetField("age")
bq := NewBooleanQuery().AddMust(nrq).SetBoost(3.0)
sr := NewTopNSearch(10, bq)
res, err := indexReader.Search(context.Background(), sr)
if err != nil {
t.Fatal(err)
}

next, err := res.Next()
if err != nil {
t.Fatalf("error getting first hit")
}
if next == nil {
t.Fatalf("expected at least one hit")
}
if next.Score != 3.0 {
t.Fatalf("expected score to be 3.0, got %f", next.Score)
}

err = indexReader.Close()
if err != nil {
t.Fatal(err)
}
err = indexWriter.Close()
if err != nil {
t.Fatal(err)
}
}

0 comments on commit fc82ea1

Please sign in to comment.