select * 和 count(*) 结果不一致

【业务影响】用户业务数据不一致
【StarRocks版本】2.2.9
【集群规模】3Fe+5Be
【建表语句】
CREATE TABLE test (
user_id varchar(64) NOT NULL ,
enterprise_id varchar(64) NOT NULL ,
stat_date date NULL ,
point_from varchar(64) NULL ,
point_type bigint(20) NULL ,
points double NULL ,
pd_updated_date datetime NULL,
is_required int(11) NULL ,
created_date datetime NULL ,
created_by varchar(64) NULL ,
updated_date datetime NULL,
updated_by varchar(64) NULL ,
point_subtype varchar(65533) NULL,
resource_type varchar(65533) NULL ,
resource_name varchar(65533) NULL
) ENGINE=OLAP
DUPLICATE KEY(user_id, enterprise_id, stat_date)
COMMENT “OLAP”
PARTITION BY RANGE(stat_date)
(PARTITION p201701 VALUES [(‘2017-01-01’), (‘2017-02-01’)),
PARTITION p201702 VALUES [(‘2017-02-01’), (‘2017-03-01’)),
PARTITION p201703 VALUES [(‘2017-03-01’), (‘2017-04-01’)),
PARTITION p201704 VALUES [(‘2017-04-01’), (‘2017-05-01’)),
PARTITION p201705 VALUES [(‘2017-05-01’), (‘2017-06-01’)),
PARTITION p201706 VALUES [(‘2017-06-01’), (‘2017-07-01’)),
PARTITION p201707 VALUES [(‘2017-07-01’), (‘2017-08-01’)),
PARTITION p201708 VALUES [(‘2017-08-01’), (‘2017-09-01’)),
PARTITION p201709 VALUES [(‘2017-09-01’), (‘2017-10-01’)),
PARTITION p201710 VALUES [(‘2017-10-01’), (‘2017-11-01’)),
PARTITION p201711 VALUES [(‘2017-11-01’), (‘2017-12-01’)),
PARTITION p201712 VALUES [(‘2017-12-01’), (‘2018-01-01’)),
PARTITION p201801 VALUES [(‘2018-01-01’), (‘2018-02-01’)),
PARTITION p201802 VALUES [(‘2018-02-01’), (‘2018-03-01’)),
PARTITION p201803 VALUES [(‘2018-03-01’), (‘2018-04-01’)),
PARTITION p201804 VALUES [(‘2018-04-01’), (‘2018-05-01’)),
PARTITION p201805 VALUES [(‘2018-05-01’), (‘2018-06-01’)),
PARTITION p201806 VALUES [(‘2018-06-01’), (‘2018-07-01’)),
PARTITION p201807 VALUES [(‘2018-07-01’), (‘2018-08-01’)),
PARTITION p201808 VALUES [(‘2018-08-01’), (‘2018-09-01’)),
PARTITION p201809 VALUES [(‘2018-09-01’), (‘2018-10-01’)),
PARTITION p201810 VALUES [(‘2018-10-01’), (‘2018-11-01’)),
PARTITION p201811 VALUES [(‘2018-11-01’), (‘2018-12-01’)),
PARTITION p201812 VALUES [(‘2018-12-01’), (‘2019-01-01’)),
PARTITION p201901 VALUES [(‘2019-01-01’), (‘2019-02-01’)),
PARTITION p201902 VALUES [(‘2019-02-01’), (‘2019-03-01’)),
PARTITION p201903 VALUES [(‘2019-03-01’), (‘2019-04-01’)),
PARTITION p201904 VALUES [(‘2019-04-01’), (‘2019-05-01’)),
PARTITION p201905 VALUES [(‘2019-05-01’), (‘2019-06-01’)),
PARTITION p201906 VALUES [(‘2019-06-01’), (‘2019-07-01’)),
PARTITION p201907 VALUES [(‘2019-07-01’), (‘2019-08-01’)),
PARTITION p201908 VALUES [(‘2019-08-01’), (‘2019-09-01’)),
PARTITION p201909 VALUES [(‘2019-09-01’), (‘2019-10-01’)),
PARTITION p201910 VALUES [(‘2019-10-01’), (‘2019-11-01’)),
PARTITION p201911 VALUES [(‘2019-11-01’), (‘2019-12-01’)),
PARTITION p201912 VALUES [(‘2019-12-01’), (‘2020-01-01’)),
PARTITION p202001 VALUES [(‘2020-01-01’), (‘2020-02-01’)),
PARTITION p202002 VALUES [(‘2020-02-01’), (‘2020-03-01’)),
PARTITION p202003 VALUES [(‘2020-03-01’), (‘2020-04-01’)),
PARTITION p202004 VALUES [(‘2020-04-01’), (‘2020-05-01’)),
PARTITION p202005 VALUES [(‘2020-05-01’), (‘2020-06-01’)),
PARTITION p202006 VALUES [(‘2020-06-01’), (‘2020-07-01’)),
PARTITION p202007 VALUES [(‘2020-07-01’), (‘2020-08-01’)),
PARTITION p202008 VALUES [(‘2020-08-01’), (‘2020-09-01’)),
PARTITION p202009 VALUES [(‘2020-09-01’), (‘2020-10-01’)),
PARTITION p202010 VALUES [(‘2020-10-01’), (‘2020-11-01’)),
PARTITION p202011 VALUES [(‘2020-11-01’), (‘2020-12-01’)),
PARTITION p202012 VALUES [(‘2020-12-01’), (‘2021-01-01’)),
PARTITION p202101 VALUES [(‘2021-01-01’), (‘2021-02-01’)),
PARTITION p202102 VALUES [(‘2021-02-01’), (‘2021-03-01’)),
PARTITION p202103 VALUES [(‘2021-03-01’), (‘2021-04-01’)),
PARTITION p202104 VALUES [(‘2021-04-01’), (‘2021-05-01’)),
PARTITION p202105 VALUES [(‘2021-05-01’), (‘2021-06-01’)),
PARTITION p202106 VALUES [(‘2021-06-01’), (‘2021-07-01’)),
PARTITION p202107 VALUES [(‘2021-07-01’), (‘2021-08-01’)),
PARTITION p202108 VALUES [(‘2021-08-01’), (‘2021-09-01’)),
PARTITION p202109 VALUES [(‘2021-09-01’), (‘2021-10-01’)),
PARTITION p202110 VALUES [(‘2021-10-01’), (‘2021-11-01’)),
PARTITION p202111 VALUES [(‘2021-11-01’), (‘2021-12-01’)),
PARTITION p202112 VALUES [(‘2021-12-01’), (‘2022-01-01’)),
PARTITION p202201 VALUES [(‘2022-01-01’), (‘2022-02-01’)),
PARTITION p202202 VALUES [(‘2022-02-01’), (‘2022-03-01’)),
PARTITION p202203 VALUES [(‘2022-03-01’), (‘2022-04-01’)),
PARTITION p202204 VALUES [(‘2022-04-01’), (‘2022-05-01’)),
PARTITION p202205 VALUES [(‘2022-05-01’), (‘2022-06-01’)),
PARTITION p202206 VALUES [(‘2022-06-01’), (‘2022-07-01’)),
PARTITION p202207 VALUES [(‘2022-07-01’), (‘2022-08-01’)),
PARTITION p202208 VALUES [(‘2022-08-01’), (‘2022-09-01’)),
PARTITION p202209 VALUES [(‘2022-09-01’), (‘2022-10-01’)),
PARTITION p202210 VALUES [(‘2022-10-01’), (‘2022-11-01’)),
PARTITION p202211 VALUES [(‘2022-11-01’), (‘2022-12-01’)),
PARTITION p202212 VALUES [(‘2022-12-01’), (‘2023-01-01’)),
PARTITION p202301 VALUES [(‘2023-01-01’), (‘2023-02-01’)),
PARTITION p202302 VALUES [(‘2023-02-01’), (‘2023-03-01’)))
DISTRIBUTED BY HASH(user_id) BUCKETS 3
PROPERTIES (
“replication_num” = “3”,
“dynamic_partition.enable” = “true”,
“dynamic_partition.time_unit” = “month”,
“dynamic_partition.time_zone” = “Asia/Shanghai”,
“dynamic_partition.start” = “-2147483648”,
“dynamic_partition.end” = “2”,
“dynamic_partition.prefix” = “p”,
“dynamic_partition.buckets” = “3”,
“dynamic_partition.start_day_of_month” = “1”,
“in_memory” = “false”,
“storage_format” = “DEFAULT”
);
【现象1】select * 和 count(*)结果不一致
select * from test
where pt.stat_date between ‘2022-12-01’ and ‘2022-12-26’
and
pt.user_id = ‘9C29459EC5D43EE2E054022128574717’; – 1219条

select count(*) 或 count(1) from test
where pt.stat_date between ‘2022-12-01’ and ‘2022-12-26’
and
pt.user_id = ‘9C29459EC5D43EE2E054022128574717’; – 1251条

【现象2】-- group by不同列时sum结果不一致
select
pt.user_id as userId,
sum( points ) as totalIntegral,
sum( case when point_type = 1 then points else 0 end) as studyIntegral,
sum( case when point_type = 2 then points else 0 end) as smallCourseIntegral,
sum( case when point_type = 3 then points else 0 end) as anchorIntegral,
sum( case when point_type = 4 then points else 0 end) as lecturerIntegral
from
test pt
where
pt.stat_date between ‘2022-12-01’ and ‘2022-12-26’
and
pt.user_id = ‘9C29459EC5D43EE2E054022128574717’
group by user_id,enterprise_id;

– totalIntegral:3020,studyIntegral:2955,其他:0

select
pt.user_id as userId,
sum( points ) as totalIntegral,
sum( case when point_type = 1 then points else 0 end) as studyIntegral,
sum( case when point_type = 2 then points else 0 end) as smallCourseIntegral,
sum( case when point_type = 3 then points else 0 end) as anchorIntegral,
sum( case when point_type = 4 then points else 0 end) as lecturerIntegral
from
test pt
where
pt.stat_date between ‘2022-12-01’ and ‘2022-12-26’
and
pt.user_id = ‘9C29459EC5D43EE2E054022128574717’
group by user_id;

– totalIntegral:3065,studyIntegral:3000

data (887 字节)

原表-select全部.txt (3.7 KB)

原表-select count.txt (4.7 KB)

原表-select两列.txt (3.6 KB)

test-select两列.txt (3.5 KB)

test-select全部.txt (3.6 KB)

这个表,有做过SchemaChange吗?

新建表test04–select全部.txt (3.6 KB)

新建表test04–count(1).txt (4.6 KB)

set enable_tablet_internal_parallel=false; 然后跑一下看看

pp.log (1.2 MB)

pprof_0518.txt (101.9 KB)

pstack_0518_2240.log (1.3 MB)

这个问题还存在吗?

该问题已解决,感谢

commit dd7ca280d73320bb3b97cc1a47437faa1ec5ec8f
Author: meegoo hujie-dlut@qq.com
Date: Wed Apr 6 18:40:48 2022 +0800

Fix schema change bug on duplicate key table when it need final merge (#4811)

Now sorting schema change will write several temporary segment files if memory is not enough, and finally merge temporary segment files into sort segment files. But now, on the duplicate-key tablet, it just renames the temporary segment file.

Signed-off-by: trueeyu <lxhhust350@qq.com>

这个PR修复的,schema change后,动了SortKey,没有做FinalMerge,导致排序出错了

1赞

pstack.log (3.4 MB)

pstack0621-1-96.log (3.7 MB) pstack0621-1-82.log (3.8 MB)

明细表profile_20230626 (30.7 KB)

明细表小数据量profile_20230626.txt (26.7 KB) 明细表大数据量profile_20230626.txt (29.0 KB)

前置表profile_7s.txt (24.9 KB) 大表profile_14s.txt (162.1 KB)

前置表profile_3T分区.txt (24.7 KB) 大表profile_3T分区.txt (125.8 KB)

前置表加索引profile.txt (24.4 KB)