diff --git a/fe/fe-core/src/main/java/com/starrocks/statistic/base/PartitionSampler.java b/fe/fe-core/src/main/java/com/starrocks/statistic/base/PartitionSampler.java index 72c86c13ba24b3..2afa6ea253f72a 100644 --- a/fe/fe-core/src/main/java/com/starrocks/statistic/base/PartitionSampler.java +++ b/fe/fe-core/src/main/java/com/starrocks/statistic/base/PartitionSampler.java @@ -27,10 +27,10 @@ import java.util.Map; public class PartitionSampler { - private static final double HIGH_WEIGHT_READ_RATIO = 0.001; - private static final double MEDIUM_HIGH_WEIGHT_READ_RATIO = 0.01; - private static final double MEDIUM_LOW_WEIGHT_READ_RATIO = 0.1; - private static final double LOW_WEIGHT_READ_RATIO = 0.8; + public static final double HIGH_WEIGHT_READ_RATIO = 0.001; + public static final double MEDIUM_HIGH_WEIGHT_READ_RATIO = 0.01; + public static final double MEDIUM_LOW_WEIGHT_READ_RATIO = 0.1; + public static final double LOW_WEIGHT_READ_RATIO = 0.8; private static final long HIGH_WEIGHT_ROWS_THRESHOLD = 10000000L; private static final long MEDIUM_HIGH_WEIGHT_ROWS_THRESHOLD = 1000000L; private static final long MEDIUM_LOW_WEIGHT_ROWS_THRESHOLD = 100000L; @@ -56,26 +56,6 @@ public PartitionSampler(double highSampleRatio, double mediumHighRatio, double m this.sampleRowsLimit = sampleRowLimit; } - public double getHighRatio() { - return highRatio; - } - - public double getMediumHighRatio() { - return mediumHighRatio; - } - - public double getMediumLowRatio() { - return mediumLowRatio; - } - - public double getLowRatio() { - return lowRatio; - } - - public int getMaxSize() { - return maxSize; - } - public long getSampleRowsLimit() { return sampleRowsLimit; } diff --git a/fe/fe-core/src/main/java/com/starrocks/statistic/hyper/HyperStatisticSQLs.java b/fe/fe-core/src/main/java/com/starrocks/statistic/hyper/HyperStatisticSQLs.java index e9eb65cd2fd43d..7e6922b7a38942 100644 --- a/fe/fe-core/src/main/java/com/starrocks/statistic/hyper/HyperStatisticSQLs.java +++ b/fe/fe-core/src/main/java/com/starrocks/statistic/hyper/HyperStatisticSQLs.java @@ -32,6 +32,11 @@ import java.util.Objects; import java.util.stream.Collectors; +import static com.starrocks.statistic.base.PartitionSampler.HIGH_WEIGHT_READ_RATIO; +import static com.starrocks.statistic.base.PartitionSampler.LOW_WEIGHT_READ_RATIO; +import static com.starrocks.statistic.base.PartitionSampler.MEDIUM_HIGH_WEIGHT_READ_RATIO; +import static com.starrocks.statistic.base.PartitionSampler.MEDIUM_LOW_WEIGHT_READ_RATIO; + public class HyperStatisticSQLs { private static final VelocityEngine DEFAULT_VELOCITY_ENGINE; @@ -131,13 +136,13 @@ public static String buildSampleSQL(Database db, Table table, Partition p, List< List groupSQLs = Lists.newArrayList(); StringBuilder sqlBuilder = new StringBuilder(); groupSQLs.add(generateRatioTable(tableName, sampler.getSampleRowsLimit(), info.getHighWeightTablets(), - sampler.getHighRatio(), "t_high")); + HIGH_WEIGHT_READ_RATIO, "t_high")); groupSQLs.add(generateRatioTable(tableName, sampler.getSampleRowsLimit(), info.getMediumHighWeightTablets(), - sampler.getMediumHighRatio(), "t_medium_high")); + MEDIUM_HIGH_WEIGHT_READ_RATIO, "t_medium_high")); groupSQLs.add(generateRatioTable(tableName, sampler.getSampleRowsLimit(), info.getMediumLowWeightTablets(), - sampler.getMediumLowRatio(), "t_medium_low")); + MEDIUM_LOW_WEIGHT_READ_RATIO, "t_medium_low")); groupSQLs.add(generateRatioTable(tableName, sampler.getSampleRowsLimit(), info.getLowWeightTablets(), - sampler.getLowRatio(), "t_low")); + LOW_WEIGHT_READ_RATIO, "t_low")); if (groupSQLs.stream().allMatch(Objects::isNull)) { groupSQLs.add("SELECT * FROM " + tableName + " LIMIT " + Config.statistic_sample_collect_rows); } diff --git a/fe/fe-core/src/test/java/com/starrocks/statistic/hyper/HyperJobTest.java b/fe/fe-core/src/test/java/com/starrocks/statistic/hyper/HyperJobTest.java index bd97dc273b2408..c7baa96ca3c5f4 100644 --- a/fe/fe-core/src/test/java/com/starrocks/statistic/hyper/HyperJobTest.java +++ b/fe/fe-core/src/test/java/com/starrocks/statistic/hyper/HyperJobTest.java @@ -43,6 +43,8 @@ import com.starrocks.statistic.base.PartitionSampler; import com.starrocks.statistic.base.PrimitiveTypeColumnStats; import com.starrocks.statistic.base.SubFieldColumnStats; +import com.starrocks.statistic.sample.SampleInfo; +import com.starrocks.statistic.sample.TabletStats; import com.starrocks.utframe.StarRocksAssert; import mockit.Mock; import mockit.MockUp; @@ -191,6 +193,13 @@ public void testFullJobs() { public void testSampleJobs() { Pair, List> pair = initColumn(List.of("c1", "c2", "c3")); + new MockUp() { + @Mock + public List getMediumHighWeightTablets() { + return List.of(new TabletStats(1, pid, 5000000)); + } + }; + List jobs = HyperQueryJob.createSampleQueryJobs(connectContext, db, table, pair.first, pair.second, List.of(pid), 1, sampler); @@ -201,8 +210,8 @@ public void testSampleJobs() { List sql = jobs.get(1).buildQuerySQL(); Assert.assertEquals(1, sql.size()); - assertContains(sql.get(0), "with base_cte_table as " + - "(SELECT * FROM `test`.`t_struct` LIMIT 200000) "); + assertContains(sql.get(0), "with base_cte_table as ( SELECT * FROM (SELECT * FROM `test`.`t_struct` TABLET(1)" + + " SAMPLE('percent'='1') LIMIT 200000)"); assertContains(sql.get(0), "cast(IFNULL(SUM(CHAR_LENGTH(`c2`)) * 0/ COUNT(*), 0) as BIGINT), " + "hex(hll_serialize(IFNULL(hll_raw(`c2`), hll_empty())))," + " cast((COUNT(*) - COUNT(`c2`)) * 0 / COUNT(*) as BIGINT), " +