COUNT(distinct did) 和having或者where一起用时过滤不生效

【详述】问题详细描述

SELECT androidId ,count(DISTINCT did) cnt from bt.ods_bt_events where ymd=20230917 GROUP by androidId HAVING cnt > 100;

select * from (SELECT androidId ,count(DISTINCT did) cnt from bt.ods_bt_events where ymd=20230917 GROUP by androidId) as a where cnt > 100;

在mysql客户端命令行上面两个语句的过滤条件无效,cnt < 100的都返回了,
在DBeaer查询界面显示数据量正确,导出到本地文件时过滤条件无效

【背景】
【业务影响】
【StarRocks版本】例如:2.5。10
【集群规模】例如:3fe(1 follower+2observer)+5be(fe与be混部)
【机器信息】CPU虚拟核/内存/网卡,例如:48C/64G/万兆
【联系方式】社区群14-电波, dianbo.ruan@adtiming.com
【附件】

– bt.ods_bt_events definition

CREATE TABLE ods_bt_events (
ymd date NULL COMMENT “根据客户端时间生成的时间”,
uuid varchar(128) NULL COMMENT “用户ID,Server生成的用户ID”,
did varchar(128) NULL COMMENT “设备ID,对应dtype”,
sts bigint(20) NULL COMMENT “服务端时间,单位毫秒”,
event varchar(512) NULL COMMENT “Event名称”,
fit int(11) NULL COMMENT “客户端首次安装App时间,单位秒”,
flt int(11) NULL COMMENT “客户端首次打开App时间,单位秒(uid文件创建时间)”,
zo int(11) NULL COMMENT “时区偏移量,单位分钟.比如北京时区zo=480”,
publisherId int(11) NULL COMMENT “”,
pubAppId int(11) NULL COMMENT “”,
ts bigint(20) NULL COMMENT “客户端时间,单位毫秒”,
dtype int(11) NULL COMMENT “设备ID类型”,
androidId varchar(512) NULL COMMENT “androidId”,
fbid varchar(512) NULL COMMENT “FacebookID”,
lang varchar(512) NULL COMMENT “手机语言code”,
jb int(11) NULL COMMENT “jailbreak状态,0:正常,1:越狱”,
bundle varchar(512) NULL COMMENT “当前App包名”,
make varchar(512) NULL COMMENT “手机生产商”,
brand varchar(512) NULL COMMENT “手机品牌”,
model varchar(512) NULL COMMENT “手机型号”,
serial varchar(512) NULL COMMENT “Androidserial”,
os int(11) NULL COMMENT “系统,0:iOS,1:Android”,
osv varchar(512) NULL COMMENT “系统版本号”,
appk varchar(512) NULL COMMENT “AppKey”,
appv varchar(512) NULL COMMENT “当前App版本号”,
sdkv varchar(512) NULL COMMENT “当前Plugin版本号”,
w int(11) NULL COMMENT “屏幕宽度(像素)”,
h int(11) NULL COMMENT “屏幕高度(像素)”,
contype int(11) NULL COMMENT “当前网络类型ConnectionType”,
carrier varchar(512) NULL COMMENT “运营商名称,NetworkOperatorName”,
mccmnc varchar(512) NULL COMMENT “运营商mcc+mnc,NetworkOperator”,
country varchar(32) NULL COMMENT “服务端国家”,
lcountry varchar(32) NULL COMMENT “[[NSLocalecurrentLocale]localeIdentifier],Locale.getCountry()”,
fm int(11) NULL COMMENT “剩余硬盘空间,单位M”,
battery int(11) NULL COMMENT “剩余电量百分比”,
btch int(11) NULL COMMENT “是否充电中,0:否,1:是”,
lowp int(11) NULL COMMENT “是否低电量模式,0:否,1:是”,
btime bigint(20) NULL COMMENT “开机时间点,单位毫秒”,
ram bigint(20) NULL COMMENT “总RAM大小,单位B”,
vpn int(11) NULL COMMENT “VPN状态,0:OFF,1:ON”,
organic int(11) NULL COMMENT “是否是自然量,0:No,1:Yes”,
isb int(11) NULL COMMENT “issecondbuild,0:No,1:Yes”,
tid varchar(512) NULL COMMENT “Event分组”,
source varchar(512) NULL COMMENT “EventSource,关卡页面传入的参数”,
pos int(11) NULL COMMENT “PositionID”,
posType int(11) NULL COMMENT “PositionType”,
crid int(11) NULL COMMENT “CreativeID”,
ecid int(11) NULL COMMENT “EndCardID”,
data_dur int(11) NULL COMMENT “h5页面停留时长”,
data_page_title varchar(65533) NULL COMMENT “页面标题”,
data_url varchar(65533) NULL COMMENT “页面路径”,
data_referrer_host varchar(65533) NULL COMMENT “前向域名”,
data_referrer varchar(65533) NULL COMMENT “前向地址”,
data_uid varchar(512) NULL COMMENT “快音的帐号id”,
data_ad_type varchar(512) NULL COMMENT “广告类型”,
data_ad_position varchar(512) NULL COMMENT “代码位ID/素材ID”,
data_app_position varchar(512) NULL COMMENT “触发广告的位置”,
data_ad_stage varchar(512) NULL COMMENT “广告环节”,
data_is_success varchar(32) NULL COMMENT “是否成功”,
data_content varchar(65533) NULL COMMENT “广告内容”,
data_error varchar(65533) NULL COMMENT “失败原因”,
data_element_name varchar(512) NULL COMMENT “元素名称”,
data_remarks varchar(65533) NULL COMMENT “备注”,
data_newuser int(11) NULL COMMENT “0和1,1代表为新用户”,
abt int(11) NULL COMMENT “”,
data_ver varchar(512) NULL COMMENT “”,
data_times int(11) NULL COMMENT “”,
sh int(11) NULL COMMENT “”,
data_abt int(11) NULL COMMENT “”,
cpid int(11) NULL COMMENT “Camapign ID”,
mpid int(11) NULL COMMENT “Pos Template ID”,
jsv varchar(512) NULL COMMENT “jssdk版本”,
width int(11) NULL COMMENT “屏幕的宽”,
height int(11) NULL COMMENT “屏幕的高”,
ip varchar(128) NULL COMMENT “客户端ip”,
abp varchar(1000) NULL COMMENT “abtest的具体参数”,
afSource varchar(128) NULL COMMENT “appsflyer的渠道”,
afSourceType varchar(128) NULL COMMENT “渠道类型”,
data json NULL COMMENT “”,
faf json NULL COMMENT “”,
dv int(11) NULL COMMENT “发者模式”,
xp int(11) NULL COMMENT “是否安装了xposed(判定hook):0-未安装,1-已安装”,
ds int(11) NULL COMMENT “是否是系统级多开,0-否,1-是”,
du int(11) NULL COMMENT “是否是用户级多开,0-否,1-是”,
wp int(11) NULL COMMENT “是否使用WIFI代理:0-未使用,1-已使用”,
abi varchar(128) NULL COMMENT “‘arm64-v8a’,‘x86_64’,‘armeabi-v7a’ 等”,
fp varchar(512) NULL COMMENT “指纹信息”,
tp varchar(512) NULL COMMENT “设备类型:tablet-平板电脑,phone-手机”,
bd varchar(512) NULL COMMENT “主板信息”,
tgs varchar(512) NULL COMMENT “Tags信息,如unsigned,debug”,
bdv varchar(512) NULL COMMENT “启动程序版本号”,
ins varchar(512) NULL COMMENT “安装器,指定本app的安装来源”,
INDEX bt_events_bundle (bundle) USING BITMAP COMMENT ‘’
) ENGINE=OLAP
DUPLICATE KEY(ymd, uuid)
COMMENT “OLAP”
PARTITION BY RANGE(ymd)
(
PARTITION p20230823 VALUES [(“2023-08-23”), (“2023-08-24”)),
PARTITION p20230824 VALUES [(“2023-08-24”), (“2023-08-25”)),
PARTITION p20230825 VALUES [(“2023-08-25”), (“2023-08-26”)),
PARTITION p20230826 VALUES [(“2023-08-26”), (“2023-08-27”)),
PARTITION p20230827 VALUES [(“2023-08-27”), (“2023-08-28”)),
PARTITION p20230828 VALUES [(“2023-08-28”), (“2023-08-29”)),
PARTITION p20230829 VALUES [(“2023-08-29”), (“2023-08-30”)),
PARTITION p20230830 VALUES [(“2023-08-30”), (“2023-08-31”)),
PARTITION p20230831 VALUES [(“2023-08-31”), (“2023-09-01”)),
PARTITION p20230901 VALUES [(“2023-09-01”), (“2023-09-02”)),
PARTITION p20230902 VALUES [(“2023-09-02”), (“2023-09-03”)),
PARTITION p20230903 VALUES [(“2023-09-03”), (“2023-09-04”)),
PARTITION p20230904 VALUES [(“2023-09-04”), (“2023-09-05”)),
PARTITION p20230905 VALUES [(“2023-09-05”), (“2023-09-06”)),
PARTITION p20230906 VALUES [(“2023-09-06”), (“2023-09-07”)),
PARTITION p20230907 VALUES [(“2023-09-07”), (“2023-09-08”)),
PARTITION p20230908 VALUES [(“2023-09-08”), (“2023-09-09”)),
PARTITION p20230909 VALUES [(“2023-09-09”), (“2023-09-10”)),
PARTITION p20230910 VALUES [(“2023-09-10”), (“2023-09-11”)),
PARTITION p20230911 VALUES [(“2023-09-11”), (“2023-09-12”)),
PARTITION p20230912 VALUES [(“2023-09-12”), (“2023-09-13”)),
PARTITION p20230913 VALUES [(“2023-09-13”), (“2023-09-14”)),
PARTITION p20230914 VALUES [(“2023-09-14”), (“2023-09-15”)),
PARTITION p20230915 VALUES [(“2023-09-15”), (“2023-09-16”)),
PARTITION p20230916 VALUES [(“2023-09-16”), (“2023-09-17”)),
PARTITION p20230917 VALUES [(“2023-09-17”), (“2023-09-18”)),
PARTITION p20230918 VALUES [(“2023-09-18”), (“2023-09-19”)),
PARTITION p20230919 VALUES [(“2023-09-19”), (“2023-09-20”)),
PARTITION p20230920 VALUES [(“2023-09-20”), (“2023-09-21”)),
PARTITION p20230921 VALUES [(“2023-09-21”), (“2023-09-22”)),
PARTITION p20230922 VALUES [(“2023-09-22”), (“2023-09-23”)),
PARTITION p20230923 VALUES [(“2023-09-23”), (“2023-09-24”)))
DISTRIBUTED BY HASH(uuid) BUCKETS 32
PROPERTIES (
“replication_num” = “2”,
“bloom_filter_columns” = “sts”,
“dynamic_partition.enable” = “true”,
“dynamic_partition.time_unit” = “DAY”,
“dynamic_partition.time_zone” = “UTC”,
“dynamic_partition.start” = “-120”,
“dynamic_partition.end” = “3”,
“dynamic_partition.prefix” = “p”,
“dynamic_partition.buckets” = “16”,
“dynamic_partition.history_partition_num” = “0”,
“in_memory” = “false”,
“storage_format” = “DEFAULT”,
“enable_persistent_index” = “false”,
“compression” = “LZ4”
);

mysql> explain select * from (SELECT androidId ,count(DISTINCT did) as cnt from bt.ods_bt_events where ymd=20230917 GROUP by androidId) as a where cnt > 100;

±--------------------------------------------------------------------------------------------------------------+
| Explain String |
±--------------------------------------------------------------------------------------------------------------+
| PLAN FRAGMENT 0 |
| OUTPUT EXPRS:13: androidId | 93: count |
| PARTITION: UNPARTITIONED |
| |
| RESULT SINK |
| |
| 9:EXCHANGE |
| |
| PLAN FRAGMENT 1 |
| OUTPUT EXPRS: |
| PARTITION: HASH_PARTITIONED: 13: androidId |
| |
| STREAM DATA SINK |
| EXCHANGE ID: 09 |
| UNPARTITIONED |
| |
| 8:AGGREGATE (merge finalize) |
| | output: sum(93: count) |
| | group by: 13: androidId |
| | |
| 7:EXCHANGE |
| |
| PLAN FRAGMENT 2 |
| OUTPUT EXPRS: |
| PARTITION: HASH_PARTITIONED: 13: androidId, 94: cast |
| |
| STREAM DATA SINK |
| EXCHANGE ID: 07 |
| HASH_PARTITIONED: 13: androidId |
| |
| 6:AGGREGATE (update serialize) |
| | STREAMING |
| | output: sum(93: count) |
| | group by: 13: androidId |
| | |
| 5:AGGREGATE (update finalize) |
| | output: multi_distinct_count(3: did) |
| | group by: 13: androidId, 94: cast |
| | |
| 4:EXCHANGE |
| |
| PLAN FRAGMENT 3 |
| OUTPUT EXPRS: |
| PARTITION: RANDOM |
| |
| STREAM DATA SINK |
| EXCHANGE ID: 04 |
| HASH_PARTITIONED: 13: androidId, 94: cast |
| |
| 3:Project |
| | <slot 3> : 3: did |
| | <slot 13> : 13: androidId |
| | <slot 94> : CAST(murmur_hash3_32(3: did) % 512 AS SMALLINT) |
| | |
| 2:AGGREGATE (update serialize) |
| | STREAMING |
| | group by: 13: androidId, 3: did |
| | |
| 1:Project |
| | <slot 3> : 3: did |
| | <slot 13> : 13: androidId |
| | |
| 0:OlapScanNode |
| TABLE: ods_bt_events |
| PREAGGREGATION: ON |
| PREDICATES: 1: ymd = ‘2023-09-17’ |
| partitions=1/124 |
| rollup: ods_bt_events |
| tabletRatio=16/16 |
| tabletList=70469857,70469860,70469863,70469866,70469869,70469872,70469875,70469878,70469881,70469884 … |
| cardinality=92736526 |
| avgRowSize=46.73234 |
| numNodes=0 |
±--------------------------------------------------------------------------------------------------------------+

mysql> explain SELECT androidId ,count(DISTINCT uuid) cnt from bt.ods_bt_events where ymd=20230917 GROUP by androidId HAVING cnt > 100;
±--------------------------------------------------------------------------------------------------------------+
| Explain String |
±--------------------------------------------------------------------------------------------------------------+
| PLAN FRAGMENT 0 |
| OUTPUT EXPRS:13: androidId | 93: count |
| PARTITION: UNPARTITIONED |
| |
| RESULT SINK |
| |
| 6:EXCHANGE |
| |
| PLAN FRAGMENT 1 |
| OUTPUT EXPRS: |
| PARTITION: HASH_PARTITIONED: 13: androidId |
| |
| STREAM DATA SINK |
| EXCHANGE ID: 06 |
| UNPARTITIONED |
| |
| 5:AGGREGATE (merge finalize) |
| | output: count(93: count) |
| | group by: 13: androidId |
| | having: 93: count > 100 |
| | |
| 4:EXCHANGE |
| |
| PLAN FRAGMENT 2 |
| OUTPUT EXPRS: |
| PARTITION: RANDOM |
| |
| STREAM DATA SINK |
| EXCHANGE ID: 04 |
| HASH_PARTITIONED: 13: androidId |
| |
| 3:AGGREGATE (update serialize) |
| | STREAMING |
| | output: count(2: uuid) |
| | group by: 13: androidId |
| | |
| 2:AGGREGATE (update finalize) |
| | group by: 13: androidId, 2: uuid |
| | |
| 1:Project |
| | <slot 2> : 2: uuid |
| | <slot 13> : 13: androidId |
| | |
| 0:OlapScanNode |
| TABLE: ods_bt_events |
| PREAGGREGATION: ON |
| PREDICATES: 1: ymd = ‘2023-09-17’ |
| partitions=1/124 |
| rollup: ods_bt_events |
| tabletRatio=16/16 |
| tabletList=70469857,70469860,70469863,70469866,70469869,70469872,70469875,70469878,70469881,70469884 … |
| cardinality=92736526 |
| avgRowSize=38.603016 |
| numNodes=0 |
±--------------------------------------------------------------------------------------------------------------+

奇怪的点是,该表其他字段没事,和该表一样结构,但是数据量小的也没问题

发现新显现,增加limit 90000就可以了

set enable_distinct_column_bucketization=false 看看有问题吗

测试有效,这是问题,还是说就这么用的?

应该是触发了一个优化的bug,先把这个全局设成false吧

我反馈下,下个版本应该会修复

能提供一个querydump吗

具体要怎么操作?

暂时不用了,我们这应该可以复现

请问这里有具体的修复PR嘛