【详述】SR 创建 routine load 实时任务后,由于mysql 端存在 DDL(添加索引、添加字段、修改字段) 等操作,从kafka 消费数据到SR 落表时报错(如图),出现丢数据情况,这种该如何解决呢?现在存在大量的实时任务,这个问题变得尤为突出
【导入】mysql -> canal -> kafka -> SR
【背景】实时同步任务需要
【业务影响】严重影响数据服务的数据质量
【StarRocks版本】2.0.5
【集群规模】3fe + 6be
mysql 源表
CREATE TABLE look_up
(
id
int(11) NOT NULL AUTO_INCREMENT,
db_name_prefix
varchar(50) NOT NULL,
table_name_prefix
varchar(50) NOT NULL,
ip
varchar(200) NOT NULL,
test1
varchar(100) DEFAULT NULL,
PRIMARY KEY (id
)
) ENGINE=InnoDB AUTO_INCREMENT=127 DEFAULT CHARSET=utf8;
SR 表
CREATE TABLE ods_test_look_up
(
id
bigint(20) NOT NULL COMMENT “”,
db_name_prefix
varchar(65533) NULL COMMENT “”,
table_name_prefix
varchar(65533) NULL COMMENT “”
) ENGINE=OLAP
UNIQUE KEY(id
)
COMMENT “ods_test_look_up”
DISTRIBUTED BY HASH(id
) BUCKETS 1
PROPERTIES (
“replication_num” = “1”,
“in_memory” = “false”,
“storage_format” = “DEFAULT”
);
1、不指定 json_root
create ROUTINE LOAD ods_test.routine_ods_test_look_up_1 ON ods_test_look_up
COLUMNS (
id,
db_name_prefix,
table_name_prefix
PROPERTIES
(
“strict_mode” = “false”,
“strip_outer_array” = “false”,
“format” = “json”,
“jsonpaths” = “[”$.data.id", “$.data.db_name_prefix”, “$.data.table_name_prefix”]"
)
FROM KAFKA
(
“kafka_broker_list”= “xxx.xxx.xxx.xxx:9092”,
“kafka_topic” = “test_look_up”,
“property.group.id” = “doris_group_ods_tmp_look_up”,
“property.kafka_default_offsets” = “OFFSET_END”
);
mysql 添加数据,
kafka 展示
SR 报错
修改 “strip_outer_array” = “true”
mysql 同样添加数据

SR 报错
在"strict_mode" = “ture” 时,报错类似
2、指定 json_root
create ROUTINE LOAD ods_test.routine_ods_test_look_up_5 ON ods_test_look_up
COLUMNS (
id,
db_name_prefix,
table_name_prefix
)
PROPERTIES
(
“strict_mode” = “false”,
“strip_outer_array” = “false”,
“format” = “json”,
“jsonpaths” = “[”$.id", “$.db_name_prefix”, “$.table_name_prefix”]",
“json_root”= “$.data”
)
FROM KAFKA
(
“kafka_broker_list”= “xxx.xxx.xxx.xxx:9092”,
“kafka_topic” = “test_look_up”,
“property.group.id” = “doris_group_ods_tmp_look_up”,
“property.kafka_default_offsets” = “OFFSET_END”
);
mysql 添加数据
SR 报错
设置 “strip_outer_array”=“true”
mysql 添加数据

SR 同步正常

mysql 添加列(DDL 操作)
kafka 数据
SR 报错
在 “strict_mode” = "true"时 报错类似
即 存在 DDL 操作时 data 为null 导致同步出错