From fc82ea160875dabf7d774f207a9e49fce05eeb06 Mon Sep 17 00:00:00 2001 From: Marty Schoch Date: Wed, 10 Nov 2021 16:20:26 -0500 Subject: [PATCH] fix score boosting issues While investigating the behavior of several queries using the query boost capability a few issues were found. First, an optimization in the boolean query, would replace the boolean searcher with internal "should" searcher, in the case where it was the only one (no "must" or "must not"). This optimization is valid, but as implemented today, it looses track of the users boost. As this seems to be a minor optimization, it is temporarily disabled to allow for correct boosting behavior. Second, the boolean searcher, which by default will use a composite scorer (sum constituent scores), was ignoring the user requested boost value. A new boosting variant of the composite scorer is introduced. This variant first sums the scores, then multiplies by the boost. Third, the numeric range searcher was defaulting to a constant score of 1.0, and it too ignored the requested boost value. The code has been updated to return a constant score equal to the boost (1.0 * boost). --- query.go | 11 ++-- search/similarity/composite.go | 35 ++++++++--- search_test.go | 109 +++++++++++++++++++++++++++++++++ 3 files changed, 143 insertions(+), 12 deletions(-) diff --git a/query.go b/query.go index 4404ea4..f9a6156 100644 --- a/query.go +++ b/query.go @@ -208,9 +208,10 @@ func (q *BooleanQuery) Searcher(i search.Reader, options search.SearcherOptions) if mustSearcher == nil && shouldSearcher == nil && mustNotSearcher == nil { // if all 3 are nil, return MatchNone return searcher.NewMatchNoneSearcher(i, options) - } else if mustSearcher == nil && shouldSearcher != nil && mustNotSearcher == nil { - // optimization, if only should searcher, just return it instead - return shouldSearcher, nil + // } else if mustSearcher == nil && shouldSearcher != nil && mustNotSearcher == nil { + // DISABLED optimization, if only should searcher, just return it instead + // While logically correct, returning the shouldSearcher looses the desired boost. + // return shouldSearcher, nil } else if mustSearcher == nil && shouldSearcher == nil && mustNotSearcher != nil { // if only mustNotSearcher, start with MatchAll var err error @@ -221,7 +222,7 @@ func (q *BooleanQuery) Searcher(i search.Reader, options search.SearcherOptions) } if q.scorer == nil { - q.scorer = similarity.NewCompositeSumScorer() + q.scorer = similarity.NewCompositeSumScorerWithBoost(q.boost.Value()) } return searcher.NewBooleanSearcher(mustSearcher, shouldSearcher, mustNotSearcher, q.scorer, options) @@ -1143,7 +1144,7 @@ func (q *NumericRangeQuery) Searcher(i search.Reader, options search.SearcherOpt field = options.DefaultSearchField } if q.scorer == nil { - q.scorer = similarity.ConstantScorer(1) + q.scorer = similarity.ConstantScorer(q.boost.Value()) } return searcher.NewNumericRangeSearcher(i, q.min, q.max, q.inclusiveMin, q.inclusiveMax, field, q.boost.Value(), q.scorer, similarity.NewCompositeSumScorer(), options) diff --git a/search/similarity/composite.go b/search/similarity/composite.go index fb7d0b3..574b022 100644 --- a/search/similarity/composite.go +++ b/search/similarity/composite.go @@ -14,12 +14,24 @@ package similarity -import "github.com/blugelabs/bluge/search" +import ( + "github.com/blugelabs/bluge/search" +) -type CompositeSumScorer struct{} +type CompositeSumScorer struct { + boost float64 +} func NewCompositeSumScorer() *CompositeSumScorer { - return &CompositeSumScorer{} + return &CompositeSumScorer{ + boost: 1.0, + } +} + +func NewCompositeSumScorerWithBoost(boost float64) *CompositeSumScorer { + return &CompositeSumScorer{ + boost: boost, + } } func (c *CompositeSumScorer) ScoreComposite(constituents []*search.DocumentMatch) float64 { @@ -27,7 +39,7 @@ func (c *CompositeSumScorer) ScoreComposite(constituents []*search.DocumentMatch for _, constituent := range constituents { rv += constituent.Score } - return rv + return rv * c.boost } func (c *CompositeSumScorer) ExplainComposite(constituents []*search.DocumentMatch) *search.Explanation { @@ -37,7 +49,16 @@ func (c *CompositeSumScorer) ExplainComposite(constituents []*search.DocumentMat sum += constituent.Score children = append(children, constituent.Explanation) } - return search.NewExplanation(sum, - "sum of:", - children...) + if c.boost == 1 { + return search.NewExplanation(sum, + "sum of:", + children...) + } + + return search.NewExplanation(sum*c.boost, + "computed as boost * sum", + search.NewExplanation(c.boost, "boost"), + search.NewExplanation(sum, + "sum of:", + children...)) } diff --git a/search_test.go b/search_test.go index b9054c8..ef1da5b 100644 --- a/search_test.go +++ b/search_test.go @@ -1271,3 +1271,112 @@ func TestAllMatchesWithAggregationIssue31(t *testing.T) { // should not panic with the fix request.AddAggregation("score", aggregations.MaxStartingAt(search.DocumentScore(), 0)) } + +func TestNumericRangeSearchBoost(t *testing.T) { + tmpIndexPath := createTmpIndexPath(t) + defer cleanupTmpIndexPath(t, tmpIndexPath) + + config := DefaultConfig(tmpIndexPath) + indexWriter, err := OpenWriter(config) + if err != nil { + t.Fatal(err) + } + + doc := NewDocument("doc"). + AddField(NewNumericField("age", 25.0)) + + batch := NewBatch() + batch.Update(doc.ID(), doc) + + if err = indexWriter.Batch(batch); err != nil { + t.Fatal(err) + } + + indexReader, err := indexWriter.Reader() + if err != nil { + t.Fatalf("error getting index reader: %v", err) + } + + // numeric range query with boost 5.0 + q := NewNumericRangeQuery(20, 30).SetField("age").SetBoost(5.0) + sr := NewTopNSearch(10, q) + res, err := indexReader.Search(context.Background(), sr) + if err != nil { + t.Fatal(err) + } + + next, err := res.Next() + if err != nil { + t.Fatalf("error getting first hit") + } + if next == nil { + t.Fatalf("expected at least one hit") + } + if next.Score != 5.0 { + t.Fatalf("expected score to be 5.0, got %f", next.Score) + } + + err = indexReader.Close() + if err != nil { + t.Fatal(err) + } + err = indexWriter.Close() + if err != nil { + t.Fatal(err) + } +} + +func TestBooleanSearchBoost(t *testing.T) { + tmpIndexPath := createTmpIndexPath(t) + defer cleanupTmpIndexPath(t, tmpIndexPath) + + config := DefaultConfig(tmpIndexPath) + indexWriter, err := OpenWriter(config) + if err != nil { + t.Fatal(err) + } + + doc := NewDocument("doc"). + AddField(NewNumericField("age", 25.0)) + + batch := NewBatch() + batch.Update(doc.ID(), doc) + + if err = indexWriter.Batch(batch); err != nil { + t.Fatal(err) + } + + indexReader, err := indexWriter.Reader() + if err != nil { + t.Fatalf("error getting index reader: %v", err) + } + + // numeric range query with no boost + nrq := NewNumericRangeQuery(20, 30).SetField("age") + bq := NewBooleanQuery().AddMust(nrq).SetBoost(3.0) + sr := NewTopNSearch(10, bq) + res, err := indexReader.Search(context.Background(), sr) + if err != nil { + t.Fatal(err) + } + + next, err := res.Next() + if err != nil { + t.Fatalf("error getting first hit") + } + if next == nil { + t.Fatalf("expected at least one hit") + } + if next.Score != 3.0 { + t.Fatalf("expected score to be 3.0, got %f", next.Score) + } + + err = indexReader.Close() + if err != nil { + t.Fatal(err) + } + err = indexWriter.Close() + if err != nil { + t.Fatal(err) + } +}