为了更快的定位您的问题,请提供以下信息,谢谢
【详述】在case when和split一起使用的时候返回了错误的结果
【背景】将字符串转成数组
【业务影响】通过其它方式暂时解决,不清楚是case when的问题还是split的问题,无法判断是否影响
【是否存算分离】
【StarRocks版本】3.1.0
【集群规模】
【机器信息】
【联系方式】社区群23-写一半的曲
【附件】
建表语句:
CREATE TABLE learncomp_active_record
(
id
int(11) NOT NULL COMMENT “”,
unit_id
int(11) NOT NULL ,
gmt_create
datetime NOT NULL ,
origin_uid
int(11) NULL ,
is_active_llm
tinyint(4) NULL,
uname
varchar(255) NULL ,
course_ids
varchar(65533) NULL ,
is_active
tinyint(4) NULL ,
fid_name
varchar(255) NULL ,
source
varchar(10) NULL ,
is_del
tinyint(4) NULL ,
gmt_modified
datetime NULL ,
admin_name
varchar(100) NULL ,
uid
varchar(100) NULL ,
is_active_speech
tinyint(4) NULL
) ENGINE=OLAP
PRIMARY KEY(id
, unit_id
, gmt_create
)
COMMENT “单位课程关系表”
PARTITION BY date_trunc(‘day’, gmt_create)
DISTRIBUTED BY HASH(id
)
ORDER BY(unit_id
)
PROPERTIES (
“replication_num” = “3”,
“in_memory” = “false”,
“enable_persistent_index” = “true”,
“replicated_storage” = “true”,
“storage_medium” = “SSD”,
“compression” = “LZ4”
);
查询sql:
SELECT id,course_ids,char_length(course_ids),if(course_ids is null,1,0),course_id_arr,cardinality(course_id_arr)
FROM (SELECT id,
course_ids,
CASE
WHEN course_ids LIKE ‘%,%’ THEN SPLIT(course_ids, ‘,’)
WHEN course_ids LIKE ‘%,%’ THEN SPLIT(course_ids, ‘,’)
WHEN course_ids LIKE ‘%;%’ THEN SPLIT(course_ids, ‘;’)
WHEN course_ids LIKE ‘%;%’ THEN SPLIT(course_ids, ‘;’)
ELSE
SPLIT(course_ids, ‘,’)
END AS course_id_arr
FROM robot.learncomp_active_record
WHERE gmt_create BETWEEN ‘2024-01-01’ AND ‘2024-07-01’
ORDER BY id) AS t1
WHERE
course_id_arr IS NULL
ORDER BY id
;
返回结果:
执行计划:
PLAN FRAGMENT 0(F01)
Output Exprs:1: id | 7: course_ids | 17: char_length | 18: if | 16: case | 19: cardinality
Input Partition: UNPARTITIONED
RESULT SINK
3:MERGING-EXCHANGE
distribution type: GATHER
cardinality: 178
column statistics:
* id–>[1.0, 2037.0, 0.0, 4.0, 177.51923076923077] ESTIMATE
* gmt_create–>[1.7041248E9, 1.7197632E9, 0.0, 8.0, 177.51923076923077] ESTIMATE
* course_ids–>[-Infinity, Infinity, 0.288404360753221, 32.79385530227948, 177.51923076923077] ESTIMATE
* case–>[-Infinity, Infinity, 0.0, 16.0, 5.0] ESTIMATE
* char_length–>[-Infinity, Infinity, 0.0, 1.0, 1.0] UNKNOWN
* if–>[-Infinity, Infinity, 0.0, 1.0, 1.0] UNKNOWN
* cardinality–>[-Infinity, Infinity, 0.0, 1.0, 1.0] UNKNOWN
PLAN FRAGMENT 1(F00)
Input Partition: RANDOM
OutPut Partition: UNPARTITIONED
OutPut Exchange Id: 03
2:SORT
| order by: [1, INT, false] ASC
| offset: 0
| cardinality: 178
| column statistics:
| * id–>[1.0, 2037.0, 0.0, 4.0, 177.51923076923077] ESTIMATE
| * gmt_create–>[1.7041248E9, 1.7197632E9, 0.0, 8.0, 177.51923076923077] ESTIMATE
| * course_ids–>[-Infinity, Infinity, 0.288404360753221, 32.79385530227948, 177.51923076923077] ESTIMATE
| * case–>[-Infinity, Infinity, 0.0, 16.0, 5.0] ESTIMATE
| * char_length–>[-Infinity, Infinity, 0.0, 1.0, 1.0] UNKNOWN
| * if–>[-Infinity, Infinity, 0.0, 1.0, 1.0] UNKNOWN
| * cardinality–>[-Infinity, Infinity, 0.0, 1.0, 1.0] UNKNOWN
|
1:Project
| output columns:
| 1 <-> [1: id, INT, false]
| 7 <-> [7: course_ids, VARCHAR, true]
| 16 <-> [28: case, ARRAY, true]
| 17 <-> char_length[([7: course_ids, VARCHAR, true]); args: VARCHAR; result: INT; args nullable: true; result nullable: true]
| 18 <-> if[(7: course_ids IS NULL, 1, 0); args: BOOLEAN,TINYINT,TINYINT; result: TINYINT; args nullable: false; result nullable: true]
| 19 <-> cardinality[([28: case, ARRAY, true]); args: INVALID_TYPE; result: INT; args nullable: true; result nullable: true]
| common expressions:
| 20 <-> split[([7: course_ids, VARCHAR, true], ‘,’); args: VARCHAR,VARCHAR; result: ARRAY; args nullable: true; result nullable: true]
| 21 <-> 7: course_ids LIKE ‘%,%’
| 22 <-> 7: course_ids LIKE ‘%,%’
| 23 <-> split[([7: course_ids, VARCHAR, true], ‘,’); args: VARCHAR,VARCHAR; result: ARRAY; args nullable: true; result nullable: true]
| 24 <-> 7: course_ids LIKE ‘%;%’
| 25 <-> split[([7: course_ids, VARCHAR, true], ‘;’); args: VARCHAR,VARCHAR; result: ARRAY; args nullable: true; result nullable: true]
| 26 <-> 7: course_ids LIKE ‘%;%’
| 27 <-> split[([7: course_ids, VARCHAR, true], ‘;’); args: VARCHAR,VARCHAR; result: ARRAY; args nullable: true; result nullable: true]
| 28 <-> CASE WHEN 21: expr THEN 20: split WHEN 22: expr THEN 23: split WHEN 24: expr THEN 25: split WHEN 26: expr THEN 27: split ELSE 20: split END
| cardinality: 178
| column statistics:
| * id–>[1.0, 2037.0, 0.0, 4.0, 177.51923076923077] ESTIMATE
| * course_ids–>[-Infinity, Infinity, 0.288404360753221, 32.79385530227948, 177.51923076923077] ESTIMATE
| * case–>[-Infinity, Infinity, 0.0, 16.0, 5.0] ESTIMATE
| * char_length–>[-Infinity, Infinity, 0.0, 1.0, 1.0] UNKNOWN
| * if–>[-Infinity, Infinity, 0.0, 1.0, 1.0] UNKNOWN
| * cardinality–>[-Infinity, Infinity, 0.0, 1.0, 1.0] UNKNOWN
|
0:OlapScanNode
table: learncomp_active_record, rollup: learncomp_active_record
preAggregation: on
Predicates: array_length(CASE WHEN 7: course_ids LIKE ‘%,%’ THEN split(7: course_ids, ‘,’) WHEN 7: course_ids LIKE ‘%,%’ THEN split(7: course_ids, ‘,’) WHEN 7: course_ids LIKE ‘%;%’ THEN split(7: course_ids, ‘;’) WHEN 7: course_ids LIKE ‘%;%’ THEN split(7: course_ids, ‘;’) ELSE split(7: course_ids, ‘,’) END) IS NULL, [3: gmt_create, DATETIME, false] <= ‘2024-07-01 00:00:00’
partitionsRatio=124/140, tabletsRatio=2728/2728
tabletList=3692173,3692177,3692181,3692185,3692189,3692193,3692197,3692201,3692205,3692209 …
actualRows=1785, avgRowSize=63.793854
cardinality: 178
column statistics:
* id–>[1.0, 2037.0, 0.0, 4.0, 177.51923076923077] ESTIMATE
* gmt_create–>[1.7041248E9, 1.7197632E9, 0.0, 8.0, 177.51923076923077] ESTIMATE
* course_ids–>[-Infinity, Infinity, 0.288404360753221, 32.79385530227948, 177.51923076923077] ESTIMATE
* case–>[-Infinity, Infinity, 0.0, 16.0, 5.0] ESTIMATE
* char_length–>[-Infinity, Infinity, 0.0, 1.0, 1.0] UNKNOWN
* if–>[-Infinity, Infinity, 0.0, 1.0, 1.0] UNKNOWN
* cardinality–>[-Infinity, Infinity, 0.0, 1.0, 1.0] UNKNOWN