Skip to content

Commit

Permalink
[BugFix] fix using incorrect read ratio
Browse files Browse the repository at this point in the history
Signed-off-by: stephen <[email protected]>
  • Loading branch information
stephen-shelby committed Jan 17, 2025
1 parent 75aa13e commit e394a1b
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 30 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@
import java.util.Map;

public class PartitionSampler {
private static final double HIGH_WEIGHT_READ_RATIO = 0.001;
private static final double MEDIUM_HIGH_WEIGHT_READ_RATIO = 0.01;
private static final double MEDIUM_LOW_WEIGHT_READ_RATIO = 0.1;
private static final double LOW_WEIGHT_READ_RATIO = 0.8;
public static final double HIGH_WEIGHT_READ_RATIO = 0.001;
public static final double MEDIUM_HIGH_WEIGHT_READ_RATIO = 0.01;
public static final double MEDIUM_LOW_WEIGHT_READ_RATIO = 0.1;
public static final double LOW_WEIGHT_READ_RATIO = 0.8;
private static final long HIGH_WEIGHT_ROWS_THRESHOLD = 10000000L;
private static final long MEDIUM_HIGH_WEIGHT_ROWS_THRESHOLD = 1000000L;
private static final long MEDIUM_LOW_WEIGHT_ROWS_THRESHOLD = 100000L;
Expand All @@ -56,26 +56,6 @@ public PartitionSampler(double highSampleRatio, double mediumHighRatio, double m
this.sampleRowsLimit = sampleRowLimit;
}

public double getHighRatio() {
return highRatio;
}

public double getMediumHighRatio() {
return mediumHighRatio;
}

public double getMediumLowRatio() {
return mediumLowRatio;
}

public double getLowRatio() {
return lowRatio;
}

public int getMaxSize() {
return maxSize;
}

public long getSampleRowsLimit() {
return sampleRowsLimit;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@
import java.util.Objects;
import java.util.stream.Collectors;

import static com.starrocks.statistic.base.PartitionSampler.HIGH_WEIGHT_READ_RATIO;
import static com.starrocks.statistic.base.PartitionSampler.LOW_WEIGHT_READ_RATIO;
import static com.starrocks.statistic.base.PartitionSampler.MEDIUM_HIGH_WEIGHT_READ_RATIO;
import static com.starrocks.statistic.base.PartitionSampler.MEDIUM_LOW_WEIGHT_READ_RATIO;

public class HyperStatisticSQLs {
private static final VelocityEngine DEFAULT_VELOCITY_ENGINE;

Expand Down Expand Up @@ -131,13 +136,13 @@ public static String buildSampleSQL(Database db, Table table, Partition p, List<
List<String> groupSQLs = Lists.newArrayList();
StringBuilder sqlBuilder = new StringBuilder();
groupSQLs.add(generateRatioTable(tableName, sampler.getSampleRowsLimit(), info.getHighWeightTablets(),
sampler.getHighRatio(), "t_high"));
HIGH_WEIGHT_READ_RATIO, "t_high"));
groupSQLs.add(generateRatioTable(tableName, sampler.getSampleRowsLimit(), info.getMediumHighWeightTablets(),
sampler.getMediumHighRatio(), "t_medium_high"));
MEDIUM_HIGH_WEIGHT_READ_RATIO, "t_medium_high"));
groupSQLs.add(generateRatioTable(tableName, sampler.getSampleRowsLimit(), info.getMediumLowWeightTablets(),
sampler.getMediumLowRatio(), "t_medium_low"));
MEDIUM_LOW_WEIGHT_READ_RATIO, "t_medium_low"));
groupSQLs.add(generateRatioTable(tableName, sampler.getSampleRowsLimit(), info.getLowWeightTablets(),
sampler.getLowRatio(), "t_low"));
LOW_WEIGHT_READ_RATIO, "t_low"));
if (groupSQLs.stream().allMatch(Objects::isNull)) {
groupSQLs.add("SELECT * FROM " + tableName + " LIMIT " + Config.statistic_sample_collect_rows);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@
import com.starrocks.statistic.base.PartitionSampler;
import com.starrocks.statistic.base.PrimitiveTypeColumnStats;
import com.starrocks.statistic.base.SubFieldColumnStats;
import com.starrocks.statistic.sample.SampleInfo;
import com.starrocks.statistic.sample.TabletStats;
import com.starrocks.utframe.StarRocksAssert;
import mockit.Mock;
import mockit.MockUp;
Expand Down Expand Up @@ -191,6 +193,13 @@ public void testFullJobs() {
public void testSampleJobs() {
Pair<List<String>, List<Type>> pair = initColumn(List.of("c1", "c2", "c3"));

new MockUp<SampleInfo>() {
@Mock
public List<TabletStats> getMediumHighWeightTablets() {
return List.of(new TabletStats(1, pid, 5000000));
}
};

List<HyperQueryJob> jobs = HyperQueryJob.createSampleQueryJobs(connectContext, db, table, pair.first,
pair.second, List.of(pid), 1, sampler);

Expand All @@ -201,8 +210,8 @@ public void testSampleJobs() {
List<String> sql = jobs.get(1).buildQuerySQL();
Assert.assertEquals(1, sql.size());

assertContains(sql.get(0), "with base_cte_table as " +
"(SELECT * FROM `test`.`t_struct` LIMIT 200000) ");
assertContains(sql.get(0), "with base_cte_table as ( SELECT * FROM (SELECT * FROM `test`.`t_struct` TABLET(1)" +
" SAMPLE('percent'='1') LIMIT 200000)");
assertContains(sql.get(0), "cast(IFNULL(SUM(CHAR_LENGTH(`c2`)) * 0/ COUNT(*), 0) as BIGINT), " +
"hex(hll_serialize(IFNULL(hll_raw(`c2`), hll_empty())))," +
" cast((COUNT(*) - COUNT(`c2`)) * 0 / COUNT(*) as BIGINT), " +
Expand Down

0 comments on commit e394a1b

Please sign in to comment.