set global prefer_compute_node = true; 设置了这个参数后,上面的查询正常了
但是进行hudi表的查询时,又报了其他异常
Job aborted due to stage failure: Task 0 in stage 0.0 failed 1 times, most recent failure: Lost task 0.0 in stage 0.0 (TID 0) (10.60.40.138 executor driver): java.sql.SQLSyntaxErrorException: Failed to open the off-heap table scanner. java exception details: java.io.IOException: Failed to open the hudi MOR slice reader.[com.starrocks.hudi.reader.HudiSliceScanner.open(HudiSliceScanner.java:219)]
at com.mysql.cj.jdbc.exceptions.SQLError.createSQLException(SQLError.java:120)
at com.mysql.cj.jdbc.exceptions.SQLExceptionsMapping.translateException(SQLExceptionsMapping.java:122)
at com.mysql.cj.jdbc.ClientPreparedStatement.executeInternal(ClientPreparedStatement.java:953)
at com.mysql.cj.jdbc.ClientPreparedStatement.executeQuery(ClientPreparedStatement.java:1009)
at org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD.compute(JDBCRDD.scala:358)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:138)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1510)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745) Driver stacktrace:
starrocks 查看表结构
*************************** 1. row ***************************
Table: cffinrepayagentdb_user_auto_repay_launch_rt
Create Table: CREATE TABLE cffinrepayagentdb_user_auto_repay_launch_rt
(
_hoodie_commit_time
varchar(1048576) DEFAULT NULL,
_hoodie_commit_seqno
varchar(1048576) DEFAULT NULL,
_hoodie_record_key
varchar(1048576) DEFAULT NULL,
_hoodie_partition_path
varchar(1048576) DEFAULT NULL,
_hoodie_file_name
varchar(1048576) DEFAULT NULL,
id
bigint(20) DEFAULT NULL,
user_id
varchar(1048576) DEFAULT NULL,
batch_no
varchar(1048576) DEFAULT NULL,
org_channel
varchar(1048576) DEFAULT NULL,
product_no
varchar(1048576) DEFAULT NULL,
auto_serial_no
varchar(1048576) DEFAULT NULL,
detail_serial_no
varchar(1048576) DEFAULT NULL,
req_serial_no
varchar(1048576) DEFAULT NULL,
launch_way
varchar(1048576) DEFAULT NULL,
status
bigint(20) DEFAULT NULL,
roll_order
bigint(20) DEFAULT NULL,
asset_type
varchar(1048576) DEFAULT NULL,
asset_info
varchar(1048576) DEFAULT NULL,
req_user_id
varchar(1048576) DEFAULT NULL,
repay_amt
decimal128(30, 15) DEFAULT NULL,
withhold_amt
decimal128(30, 15) DEFAULT NULL,
repay_succ_amt
decimal128(30, 15) DEFAULT NULL,
auto_type
varchar(1048576) DEFAULT NULL,
error_code
varchar(1048576) DEFAULT NULL,
error_msg
varchar(1048576) DEFAULT NULL,
ext
varchar(1048576) DEFAULT NULL,
finish_time
datetime DEFAULT NULL,
create_time
datetime DEFAULT NULL,
update_time
datetime DEFAULT NULL,
_binlog_offset
bigint(20) DEFAULT NULL,
_event_type
varchar(1048576) DEFAULT NULL,
_event_type_number
bigint(20) DEFAULT NULL,
_hoodie_is_deleted
boolean DEFAULT NULL,
_db
varchar(1048576) DEFAULT NULL,
_tbl
varchar(1048576) DEFAULT NULL,
_source
varchar(1048576) DEFAULT NULL,
_id_mod
int(11) DEFAULT NULL
)
PARTITION BY ( _db, _tbl, _id_mod )
PROPERTIES (“location” = “hdfs://ns/user/hive/warehouse/fin_basic_data.db/cffinrepayagentdb_user_auto_repay_launch_rt”);
1 row in set (2.26 sec)
hive show create table
CREATE EXTERNAL TABLE fin_basic_data.cffinrepayagentdb_user_auto_repay_launch_rt
(
_hoodie_commit_time
string COMMENT ‘’,
_hoodie_commit_seqno
string COMMENT ‘’,
_hoodie_record_key
string COMMENT ‘’,
_hoodie_partition_path
string COMMENT ‘’,
_hoodie_file_name
string COMMENT ‘’,
ext
string COMMENT ‘’,
roll_order
bigint COMMENT ‘’,
batch_no
string COMMENT ‘’,
error_msg
string COMMENT ‘’,
create_time
timestamp COMMENT ‘’,
detail_serial_no
string COMMENT ‘’,
org_channel
string COMMENT ‘’,
product_no
string COMMENT ‘’,
finish_time
timestamp COMMENT ‘完成时间’,
auto_type
string COMMENT ‘’,
asset_info
string COMMENT ‘’,
update_time
timestamp COMMENT ‘’,
withhold_amt
decimal(30,15) COMMENT ‘’,
launch_way
string COMMENT ‘’,
repay_succ_amt
decimal(30,15) COMMENT ‘’,
user_id
string COMMENT ‘用户id’,
asset_type
string COMMENT ‘’,
auto_serial_no
string COMMENT ‘’,
error_code
string COMMENT ‘’,
repay_amt
decimal(30,15) COMMENT ‘’,
id
bigint COMMENT ‘’,
req_user_id
string COMMENT ‘’,
status
bigint COMMENT ‘’,
req_serial_no
string COMMENT ‘’,
_binlog_offset
bigint COMMENT ‘’,
_event_type
string COMMENT ‘’,
_event_type_number
bigint COMMENT ‘’,
_hoodie_is_deleted
boolean COMMENT ‘’,
_source
string COMMENT ‘’)
PARTITIONED BY (
_db
string COMMENT ‘’,
_tbl
string COMMENT ‘’,
_id_mod
int COMMENT ‘’)
ROW FORMAT SERDE
‘org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe’
WITH SERDEPROPERTIES (
‘hoodie.query.as.ro.table’=‘false’,
‘path’=’/user/hive/warehouse/fin_basic_data.db/cffinrepayagentdb_user_auto_repay_launch_rt’)
STORED AS INPUTFORMAT
‘org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat’
OUTPUTFORMAT
‘org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat’
LOCATION
‘hdfs://ns/user/hive/warehouse/fin_basic_data.db/cffinrepayagentdb_user_auto_repay_launch_rt’
TBLPROPERTIES (
‘dp_last_modified_time’=‘1703748231536,1703739175815,1703729888625,1703722338354,1703663106063,1703655731057,1703643604965’,
‘last_commit_time_sync’=‘20231228144423814’,
‘spark.sql.create.version’=‘3.2.0-ctrip-1.0.0’,
‘spark.sql.sources.provider’=‘hudi’,
‘spark.sql.sources.schema.numPartCols’=‘3’,
‘spark.sql.sources.schema.numParts’=‘1’,
‘spark.sql.sources.schema.part.0’=’{“type”:“struct”,“fields”:[{“name”:"_hoodie_commit_time",“type”:“string”,“nullable”:true,“metadata”:{}},{“name”:"_hoodie_commit_seqno",“type”:“string”,“nullable”:true,“metadata”:{}},{“name”:"_hoodie_record_key",“type”:“string”,“nullable”:true,“metadata”:{}},{“name”:"_hoodie_partition_path",“type”:“string”,“nullable”:true,“metadata”:{}},{“name”:"_hoodie_file_name",“type”:“string”,“nullable”:true,“metadata”:{}},{“name”:“ext”,“type”:“string”,“nullable”:true,“metadata”:{}},{“name”:“roll_order”,“type”:“long”,“nullable”:true,“metadata”:{}},{“name”:“batch_no”,“type”:“string”,“nullable”:true,“metadata”:{}},{“name”:“error_msg”,“type”:“string”,“nullable”:true,“metadata”:{}},{“name”:“create_time”,“type”:“timestamp”,“nullable”:true,“metadata”:{}},{“name”:“detail_serial_no”,“type”:“string”,“nullable”:true,“metadata”:{}},{“name”:“org_channel”,“type”:“string”,“nullable”:true,“metadata”:{}},{“name”:“product_no”,“type”:“string”,“nullable”:true,“metadata”:{}},{“name”:“finish_time”,“type”:“timestamp”,“nullable”:true,“metadata”:{}},{“name”:“auto_type”,“type”:“string”,“nullable”:true,“metadata”:{}},{“name”:“asset_info”,“type”:“string”,“nullable”:true,“metadata”:{}},{“name”:“update_time”,“type”:“timestamp”,“nullable”:true,“metadata”:{}},{“name”:“withhold_amt”,“type”:“decimal(30,15)”,“nullable”:true,“metadata”:{}},{“name”:“launch_way”,“type”:“string”,“nullable”:true,“metadata”:{}},{“name”:“repay_succ_amt”,“type”:“decimal(30,15)”,“nullable”:true,“metadata”:{}},{“name”:“user_id”,“type”:“string”,“nullable”:true,“metadata”:{}},{“name”:“asset_type”,“type”:“string”,“nullable”:true,“metadata”:{}},{“name”:“auto_serial_no”,“type”:“string”,“nullable”:true,“metadata”:{}},{“name”:“error_code”,“type”:“string”,“nullable”:true,“metadata”:{}},{“name”:“repay_amt”,“type”:“decimal(30,15)”,“nullable”:true,“metadata”:{}},{“name”:“id”,“type”:“long”,“nullable”:true,“metadata”:{}},{“name”:“req_user_id”,“type”:“string”,“nullable”:true,“metadata”:{}},{“name”:“status”,“type”:“long”,“nullable”:true,“metadata”:{}},{“name”:“req_serial_no”,“type”:“string”,“nullable”:true,“metadata”:{}},{“name”:"_binlog_offset",“type”:“long”,“nullable”:true,“metadata”:{}},{“name”:"_event_type",“type”:“string”,“nullable”:true,“metadata”:{}},{“name”:"_event_type_number",“type”:“long”,“nullable”:true,“metadata”:{}},{“name”:"_hoodie_is_deleted",“type”:“boolean”,“nullable”:true,“metadata”:{}},{“name”:"_source",“type”:“string”,“nullable”:true,“metadata”:{}},{“name”:"_db",“type”:“string”,“nullable”:true,“metadata”:{}},{“name”:"_tbl",“type”:“string”,“nullable”:true,“metadata”:{}},{“name”:"_id_mod",“type”:“integer”,“nullable”:false,“metadata”:{}}]}’,
‘spark.sql.sources.schema.partCol.0’=’_db’,
‘spark.sql.sources.schema.partCol.1’=’_tbl’,
‘spark.sql.sources.schema.partCol.2’=’_id_mod’,
‘transient_lastDdlTime’=‘1651649240’)