常见 Crash / BUG / 优化 查询

这个因为这个挂了be,有临时解决办法吗?

  1. Grep execution crash

*** Aborted at 1728368458 (unix time) try "date -d @1728368458" if you are using GNU date ***
PC: @          0x49068b4 starrocks::pipeline::HashJoinerFactory::get_builder(int, int)
*** SIGFPE (@0x49068b4) received by PID 20231 (TID 0x7fb68ea42700) from PID 76572852; stack trace: ***
    @     0x7fb6fb82f20b __pthread_once_slow
    @          0xb0b3754 google::(anonymous namespace)::FailureSignalHandler(int, siginfo_t*, void*)
    @     0x7fb6fb838630 (/usr/lib64/libpthread-2.17.so+0xf62f)
    @          0x49068b4 starrocks::pipeline::HashJoinerFactory::get_builder(int, int)
    @          0x4904f5e starrocks::pipeline::HashJoinProbeOperatorFactory::create(int, int)
    @          0x4930803 starrocks::pipeline::Pipeline::instantiate_drivers(starrocks::RuntimeState*)
    @          0x48b9210 starrocks::pipeline::FragmentContext::iterate_pipeline(std::function<void (starrocks::pipeline::Pipeline*)> const&)
    @          0x47f9652 starrocks::pipeline::FragmentExecutor::_prepare_pipeline_driver(starrocks::ExecEnv*, starrocks::pipeline::UnifiedExecPlanFragmentParams const&)
    @          0x47fe7ed starrocks::pipeline::FragmentExecutor::prepare(starrocks::ExecEnv*, starrocks::TExecPlanFragmentParams const&, starrocks::TExecPlanFragmentParams const&)
    @          0x7714203 starrocks::PInternalServiceImplBase<starrocks::PInternalService>::_exec_plan_fragment_by_pipeline(starrocks::TExecPlanFragmentParams const&, starrocks::TExecPlanFragmentParams const&)
    @          0x771ea43 starrocks::PInternalServiceImplBase<starrocks::PInternalService>::_exec_plan_fragment(brpc::Controller*, starrocks::PExecPlanFragmentRequest const*)
    @          0x7726f7b starrocks::PInternalServiceImplBase<starrocks::PInternalService>::_exec_plan_fragment(google::protobuf::RpcController*, starrocks::PExecPlanFragmentRequest const*, starrocks::PExecPlanFragmentResult*, google::protobuf::Closure*)
    @          0x3a50b18 starrocks::PriorityThreadPool::work_thread(int)
    @          0xb070ac7 thread_proxy
    @     0x7fb6fb830ea5 start_thread
    @     0x7fb6faa1bb0d __clone
  1. RowDescriptior Crash

*** SIGABRT (@0x3eb002fe74f) received by PID 3139407 (TID 0x7f5aba7d8640) from PID 3139407; stack trace: ***
    @         0x1c9d9e4a google::(anonymous namespace)::FailureSignalHandler()
    @     0x7f5bba7ba520 (unknown)
    @     0x7f5bba80e9fc pthread_kill
    @     0x7f5bba7ba476 raise
    @     0x7f5bba7a07f3 abort
    @          0xbdd85e1 starrocks::failure_function()
    @         0x1c9cd841 google::LogMessage::Fail()
    @         0x1c9cfe9f google::LogMessage::SendToLog()
    @         0x1c9cd380 google::LogMessage::Flush()
    @         0x1c9d050d google::LogMessageFatal::~LogMessageFatal()
    @          0xc4c7d9a starrocks::RowDescriptor::RowDescriptor()
    @          0xddb7dc7 starrocks::ExecNode::ExecNode()
    @          0xe970ce4 starrocks::ProjectNode::ProjectNode()
    @          0xddc12e0 starrocks::ExecNode::create_vectorized_node()
    @          0xddbf290 starrocks::ExecNode::create_tree_helper()
    @          0xddbec1b starrocks::ExecNode::create_tree()
    @          0xf15d286 starrocks::pipeline::FragmentExecutor::_prepare_exec_plan()
    @          0xf167366 starrocks::pipeline::FragmentExecutor::prepare()
    @         0x18731e03 starrocks::PInternalServiceImplBase<>::_exec_plan_fragment_by_pipeline()
    @         0x187317fa starrocks::PInternalServiceImplBase<>::_exec_plan_fragment()
    @         0x1872b3df starrocks::PInternalServiceImplBase<>::_exec_plan_fragment()
    @         0x18751f5e _ZZN9starrocks24PInternalServiceImplBaseINS_16PInternalServiceEE18exec_plan_fragmentEPN6google8protobuf13RpcControllerEPKNS_24PExecPlanFragmentRequestEPNS_23PExecPlanFragmentResultEPNS4_7ClosureEENKUlvE_clEv
    @         0x1876305c _ZSt13__invoke_implIvRZN9starrocks24PInternalServiceImplBaseINS0_16PInternalServiceEE18exec_plan_fragmentEPN6google8protobuf13RpcControllerEPKNS0_24PExecPlanFragmentRequestEPNS0_23PExecPlanFragmentResultEPNS5_7ClosureEEUlvE_JEET_St14__invoke_otherOT0_DpOT1_
    @         0x1875f639 _ZSt10__invoke_rIvRZN9starrocks24PInternalServiceImplBaseINS0_16PInternalServiceEE18exec_plan_fragmentEPN6google8protobuf13RpcControllerEPKNS0_24PExecPlanFragmentRequestEPNS0_23PExecPlanFragmentResultEPNS5_7ClosureEEUlvE_JEENSt9enable_ifIX16is_invocable_r_vIT_T0_DpT1_EESI_E4typeEOSJ_DpOSK_
    @         0x1875bf0d _ZNSt17_Function_handlerIFvvEZN9starrocks24PInternalServiceImplBaseINS1_16PInternalServiceEE18exec_plan_fragmentEPN6google8protobuf13RpcControllerEPKNS1_24PExecPlanFragmentRequestEPNS1_23PExecPlanFragmentResultEPNS6_7ClosureEEUlvE_E9_M_invokeERKSt9_Any_data
    @          0xbcab5ba std::function<>::operator()()
    @          0xc15c958 starrocks::PriorityThreadPool::work_thread()
    @          0xc1b126d std::__invoke_impl<>()
    @          0xc1b10a5 std::__invoke<>()
    @          0xc1b1039 _ZNKSt12_Mem_fn_baseIMN9starrocks18PriorityThreadPoolEFviELb1EEclIJRPS1_RiEEEDTcl8__invokedtdefpT6_M_pmfspcl7forwardIT_Efp_EEEDpOS9_
    @          0xc1b0fa6 std::__invoke_impl<>()
    @          0xc1b0ee9 _ZSt10__invoke_rIvRSt7_Mem_fnIMN9starrocks18PriorityThreadPoolEFviEEJRPS2_RiEENSt9enable_ifIX16is_invocable_r_vIT_T0_DpT1_EESB_E4typeEOSC_DpOSD_
  1. FE 内存泄漏 TabletCommitInfo

num     #instances         #bytes  class name
----------------------------------------------
   1:      34790980     1391639200  java.util.ArrayList
   2:      11573352      648107712  com.starrocks.transaction.TabletCommitInfo
   3:      11720494      562583712  java.util.HashMap$Node
   4:         55373      162871152  [B
   5:         48370      132573648  [Ljava.util.HashMap$Node;
   6:        788789       75283688  [C
   7:        789455       25262560  java.lang.String
   8:        312276       19985664  com.sun.tools.javac.file.ZipFileIndex$Entry
   9:         59963       18400872  [Ljava.lang.Object;
  10:        189413       12122432  java.util.LinkedHashMap$Entry
  11:        358510       11472320  com.sun.tools.javac.util.List
  12:         64812        9851424  com.starrocks.catalog.Replica
  13:        350890        8421360  java.lang.Long
  • Github Issue:

  • Github Fix PR:

  • Jira

  • 问题版本:

    • 3.1.0 ~ latest

    • 3.2.0 ~ latest

    • 3.3.0 ~ 3.3.8

  • 修复版本:

    • 3.1未修复

    • 3.2未修复

    • 3.3.9+

  • 问题原因:

  • 临时解决办法:

    • fe.conf label_clean_interval_second=1800
  1. arrays_overlap crash

* Aborted at 1731327492 (unix time) try "date -d @1731327492" if you are using GNU date ***
PC: @          0x6c1cf10 starrocks::ArrayOverlap<(starrocks::LogicalType)17>::_check_column_overlap_nullable(phmap::flat_hash_set<starrocks::Slice, starrocks::PhmapDefaultHashFunc<(starrocks::LogicalType)17, (starrocks::PhmapSeed)0>, phmap::EqualTo<starrocks::Slice>, std::allocatox��
*** SIGSEGV (@0x0) received by PID 14351 (TID 0x7f73af977700) from PID 0; stack trace: ***
    @     0x7f7af7cbbe20 __GI___pthread_once
    @          0x7aff560 google::(anonymous namespace)::FailureSignalHandler(int, siginfo_t*, void*)
    @     0x7f7af7cbe5e0 (/usr/lib64/libpthread-2.17.so+0xf5df)
    @          0x6c1cf10 starrocks::ArrayOverlap<(starrocks::LogicalType)17>::_check_column_overlap_nullable(phmap::flat_hash_set<starrocks::Slice, starrocks::PhmapDefaultHashFunc<(starrocks::LogicalType)17, (starrocks::PhmapSeed)0>, phmap::EqualTo<starrocks::Slice>, std::allocatox��
    @          0x6c1d3d4 starrocks::ArrayOverlap<(starrocks::LogicalType)17>::_array_overlap_const(starrocks::ArrayOverlapState<phmap::flat_hash_set<starrocks::Slice, starrocks::PhmapDefaultHashFunc<(starrocks::LogicalType)17, (starrocks::PhmapSeed)0>, phmap::EqualTo<starrocks::Slx��
    @          0x6ce7d33 starrocks::StatusOr<std::shared_ptr<starrocks::Column> > starrocks::ArrayFunctions::array_overlap<(starrocks::LogicalType)17>(starrocks::FunctionContext*, std::vector<std::shared_ptr<starrocks::Column>, std::allocator<std::shared_ptr<starrocks::Column> > >x��
    @          0x5d885c3 starrocks::VectorizedFunctionCallExpr::evaluate_checked(starrocks::ExprContext*, starrocks::Chunk*)
    @          0x5c6a003 starrocks::VectorizedCastExpr<(starrocks::LogicalType)24, (starrocks::LogicalType)1, false>::evaluate_checked(starrocks::ExprContext*, starrocks::Chunk*)
    @          0x5456df1 starrocks::VectorizedBinaryPredicate<(starrocks::LogicalType)1, starrocks::BinaryPredFunc<std::equal_to<signed char> > >::evaluate_checked(starrocks::ExprContext*, starrocks::Chunk*)
    @          0x525ba3b starrocks::ExprContext::evaluate(starrocks::Expr*, starrocks::Chunk*, unsigned char*)
    @          0x525bebf starrocks::ExprContext::evaluate(starrocks::Chunk*, unsigned char*)
    @          0x3f3fea3 starrocks::eager_prune_eval_conjuncts(std::vector<starrocks::ExprContext*, std::allocator<starrocks::ExprContext*> > const&, starrocks::Chunk*)
    @          0x3f42646 starrocks::ExecNode::eval_conjuncts(std::vector<starrocks::ExprContext*, std::allocator<starrocks::ExprContext*> > const&, starrocks::Chunk*, std::shared_ptr<std::vector<unsigned char, std::allocator<unsigned char> > >*, bool)
    @          0x461cc6f starrocks::pipeline::OlapChunkSource::_read_chunk_from_storage(starrocks::RuntimeState*, starrocks::Chunk*)
    @          0x461d171 starrocks::pipeline::OlapChunkSource::_read_chunk(starrocks::RuntimeState*, std::shared_ptr<starrocks::Chunk>*)
    @          0x4613478 starrocks::pipeline::ChunkSource::buffer_next_batch_chunks_blocking(starrocks::RuntimeState*, unsigned long, starrocks::workgroup::WorkGroup const*)
    @          0x42e53e3 auto starrocks::pipeline::ScanOperator::_trigger_next_scan(starrocks::RuntimeState*, int)::{lambda(auto:1&)#1}::operator()<starrocks::workgroup::YieldContext>(starrocks::workgroup::YieldContext&) const [clone .constprop.0]
    @          0x43e163b starrocks::workgroup::ScanExecutor::worker_thread()
    @          0x38b3123 starrocks::ThreadPool::dispatch_thread()
    @          0x38abb86 starrocks::Thread::supervise_thread(void*)
    @     0x7f7af7cb6e25 start_thread
    @     0x7f7af70bc34d __clone

terminate called after throwing an instance of ‘std::runtime_error’
what(): lz4 encode size does not equal when decoding, encode size = 31750, but decode get size = 18446744073709519877, raw size = 47820.
3.1.15 RELEASE (build 5625961)
query_id:4840d96b-b04b-11ef-abac-fa163e214541, fragment_instance:4840d96b-b04b-11ef-abac-fa163e214590
tracker:process consumption: 59241656760
tracker:query_pool consumption: 11697180161
tracker:load consumption: 34667368
tracker:metadata consumption: 790506936
tracker:tablet_metadata consumption: 86288656
tracker:rowset_metadata consumption: 102208699
tracker:segment_metadata consumption: 45077593
tracker:column_metadata consumption: 556931988
tracker:tablet_schema consumption: 7196272
tracker:segment_zonemap consumption: 34206788
tracker:short_key_index consumption: 4135758
tracker:column_zonemap_index consumption: 94802644
tracker:ordinal_index consumption: 377560664
tracker:bitmap_index consumption: 8800
tracker:bloom_filter_index consumption: 0
tracker:compaction consumption: 123385584
tracker:schema_change consumption: 0
tracker:column_pool consumption: 2600578256
tracker:page_cache consumption: 16204554880
tracker:update consumption: 21096945713
tracker:chunk_allocator consumption: 2043140856
tracker:clone consumption: 0
tracker:consistency consumption: 0
tracker:datacache consumption: 0
tracker:replication consumption: 0
*** Aborted at 1733102370 (unix time) try “date -d @1733102370” if you are using GNU date ***
PC: @ 0x2b2696520387 __GI_raise
*** SIGABRT (@0xd59b) received by PID 54683 (TID 0x2b272cf1a700) from PID 54683; stack trace: ***
@ 0x67e5682 google::(anonymous namespace)::FailureSignalHandler()
@ 0x2b26959b5630 (unknown)
@ 0x2b2696520387 __GI_raise
@ 0x2b2696521a78 __GI_abort
@ 0x2a03e28 _ZN9__gnu_cxx27__verbose_terminate_handlerEv.cold
@ 0x8baa7a6 __cxxabiv1::__terminate()
@ 0x8baa811 std::terminate()
@ 0x8baa964 __cxa_throw
@ 0x27b5b3d _ZN9starrocks5serde12_GLOBAL__N_117decode_string_lz4EPKhPvm.cold
@ 0x50317ab starrocks::ColumnVisitorMutableAdapter<>::visit()
@ 0x2bc16fc starrocks::ColumnFactory<>::accept_mutable()
@ 0x5030f38 starrocks::serde::ColumnArraySerde::deserialize()
@ 0x503117d starrocks::ColumnVisitorMutableAdapter<>::visit()
@ 0x34410ac starrocks::ColumnFactory<>::accept_mutable()
@ 0x5030f38 starrocks::serde::ColumnArraySerde::deserialize()
@ 0x38b99dd starrocks::spill::ColumnarSerde::deserialize()
@ 0x38baf26 starrocks::spill::UnorderedInputStream::get_next()
@ 0x38bc64b starrocks::spill::BufferedInputStream::prefetch()
@ 0x38ba467 starrocks::spill::OrderedInputStream::prefetch()
@ 0x384a2c1 _ZNSt17_Function_handlerIFvvEZN9starrocks5spill13SpillerReader15trigger_restoreIRNS2_14IOTaskExecutorERNS2_23ResourceMemTrackerGuardIJSt8weak_ptrINS1_8pipeline12QueryContextEES8_INS2_7SpillerEEEEEEENS1_6StatusEPNS1_12RuntimeStateEOT_OT0_EUlvE_E9_M_invokeERKSt9_Any_data
@ 0x38940d1 starrocks::workgroup::ScanExecutor::worker_thread()
@ 0x2e9087c starrocks::ThreadPool::dispatch_thread()
@ 0x2e89eda starrocks::thread::supervise_thread()
@ 0x2b26959adea5 start_thread
@ 0x2b26965e8b0d __clone
@ 0x0 (unknown)
start time: Mon Dec 2 09:22:09 CST 2024, server uptime: 09:22:09 up 11 days, 12:18, 2 users, load average: 1.70, 11.97, 16.90
Ignored unknown config: prority_networks
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/opt/software/starrocks/be/lib/jni-packages/starrocks-jdbc-bridge-jar-with-dependencies.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/opt/software/starrocks/be/lib/hadoop/common/lib/slf4j-reload4j-1.7.36.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory]
Java HotSpot™ 64-Bit Server VM warning: You have loaded library /opt/software/starrocks/be/lib/hadoop/native/libhadoop.so which might have disabled stack guard. The VM will try to fix the stack guard now.
It’s highly recommended that you fix the library with 'execstack -c ', or link it with ‘-z noexecstack’.

  1. FE TabletCommitInfo 占用大量内存

一般高频导入容易触发,或是从升级到3.1+的版本

  1. Insert 导致 FE leader 内存泄漏

Insert into values 或是 insert into select from 都会触发

num     #instances         #bytes  class name (module)
-------------------------------------------------------
   1:      71191352     4468153216  [B (java.base@11.0.12)
   2:      72211914     2888476560  java.util.LinkedHashMap$Entry (java.base@11.0.12)
   3:      70306229     1687349496  java.lang.String (java.base@11.0.12)
   4:      18741598     1449018672  [Ljava.util.HashMap$Node; (java.base@11.0.12)
   5:      15373083      860892648  java.util.LinkedHashMap (java.base@11.0.12)
   6:       5490407      401211800  [Ljava.lang.Object; (java.base@11.0.12)
   7:       8264224      396682752  java.util.HashMap (java.base@11.0.12)
   8:       7351345      294053800  com.google.common.collect.HashBasedTable
   9:       7347842      293913680  com.google.common.collect.StandardTable$RowMap
  10:      10005532      240132768  java.lang.Long (java.base@11.0.12)
  11:      14936437      238982992  java.util.LinkedHashMap$LinkedEntrySet (java.base@11.0.12)
  12:       7012005      224384160  java.util.HashMap$Node (java.base@11.0.12)
  13:       1037983      224204328  com.starrocks.load.loadv2.InsertLoadJob
  14:       7784966      186839184  java.util.ArrayList (java.base@11.0.12)
  15:       7347837      176348088  com.google.common.collect.StandardTable$RowMap$EntrySet
  16:       7347526      176340624  com.google.common.collect.StandardTable$ColumnKeySet
  17:        857442      130331184  com.starrocks.catalog.Replica
  18:       7581823      121309168  java.util.LinkedHashMap$LinkedValues (java.base@11.0.12)
  19:       7376764      118028224  java.util.LinkedHashMap$LinkedKeySet (java.base@11.0.12)
  20:       7351345      117621520  com.google.common.collect.HashBasedTable$Factory
  21:       7349067      117585072  com.google.common.collect.AbstractTable$CellSet
  22:       7347526      117560416  com.google.common.collect.Maps$KeySet
  23:       1312210       83981440  java.util.concurrent.ConcurrentHashMap (java.base@11.0.12)
  24:       5212675       83402800  com.google.common.collect.AbstractTable$Values
  25:       1042514       83401312  [Lorg.apache.commons.collections.map.AbstractHashedMap$HashEntry;
  26:       1050189       75613608  com.starrocks.load.EtlStatus$LoadStatistic
  27:         52546       65528392  [I (java.base@11.0.12)
  28:       1050189       58810584  com.starrocks.load.EtlStatus
  1. JDBC 物化视图刷新报错: corrupted partition meta

2023-11-27 09:46:30,449 WARN (pool-23-thread-120|2218) [PartitionBasedMvRefreshProcessor.doMvRefresh():285] Refresh mv mv_user_partition_local failed: java.lang.IllegalStateException: corrupted partition meta
2023-11-27 09:46:30,449 WARN (pool-23-thread-120|2218) [TaskRunExecutor.lambda$executeTaskRun$0():54] failed to execute TaskRun.
java.lang.IllegalStateException: corrupted partition meta
at com.google.common.base.Preconditions.checkState(Preconditions.java:512) ~[spark-dpp-1.0.0.jar:?]
at com.starrocks.connector.ConnectorPartitionTraits$DefaultTraits.getPartitionNameWithPartitionInfo(ConnectorPartitionTraits.java:217) ~[starrocks-fe.jar:?]
at com.starrocks.connector.ConnectorPartitionTraits$DefaultTraits.getUpdatedPartitionNames(ConnectorPartitionTraits.java:239) ~[starrocks-fe.jar:?]
at com.starrocks.catalog.MaterializedView.getUpdatedPartitionNamesOfExternalTable(MaterializedView.java:691) ~[starrocks-fe.jar:?]
at com.starrocks.catalog.MaterializedView.getUpdatedPartitionNamesOfTable(MaterializedView.java:718) ~[starrocks-fe.jar:?]
at com.starrocks.catalog.MaterializedView.getUpdatedPartitionNamesOfTable(MaterializedView.java:534) ~[starrocks-fe.jar:?]
at com.starrocks.scheduler.PartitionBasedMvRefreshProcessor.needToRefreshTable(PartitionBasedMvRefreshProcessor.java:792) ~[starrocks-fe.jar:?]
at com.starrocks.scheduler.PartitionBasedMvRefreshProcessor.isNonPartitionedMVNeedToRefresh(PartitionBasedMvRefreshProcessor.java:814) ~[starrocks-fe.jar:?]
at com.starrocks.scheduler.PartitionBasedMvRefreshProcessor.getPartitionsToRefreshForMaterializedView(PartitionBasedMvRefreshProcessor.java:890) ~[starrocks-fe.jar:?]
at com.starrocks.scheduler.PartitionBasedMvRefreshProcessor.getPartitionsToRefreshForMaterializedView(PartitionBasedMvRefreshProcessor.java:848) ~[starrocks-fe.jar:?]
at com.starrocks.scheduler.PartitionBasedMvRefreshProcessor.doMvRefresh(PartitionBasedMvRefreshProcessor.java:240) ~[starrocks-fe.jar:?]
at com.starrocks.scheduler.PartitionBasedMvRefreshProcessor.processTaskRun(PartitionBasedMvRefreshProcessor.java:192) ~[starrocks-fe.jar:?]
at com.starrocks.scheduler.TaskRun.executeTaskRun(TaskRun.java:190) ~[starrocks-fe.jar:?]
at com.starrocks.scheduler.TaskRunExecutor.lambda$executeTaskRun$0(TaskRunExecutor.java:47) ~[starrocks-fe.jar:?]
at java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1700) ~[?:?]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) ~[?:?]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) ~[?:?]
at java.lang.Thread.run(Thread.java:829) ~[?:?]
  1. Rowset::get_segment_iterators crash

*** Aborted at 1697430544 (unix time) try "date -d @1697430544" if you are using GNU date ***
PC: @ 0x7fd69cf50387 __GI_raise
*** SIGABRT (@0x3e800000c2b) received by PID 3115 (TID 0x7fd2383e9700) from PID 3115; stack trace: ***
@ 0x633ed22 google::(anonymous namespace)::FailureSignalHandler()
@ 0x7fd69da05630 (unknown)
@ 0x7fd69cf50387 __GI_raise
@ 0x7fd69cf51a78 __GI_abort
@ 0x27e062c _ZN9__gnu_cxx27__verbose_terminate_handlerEv.cold
@ 0x860bbc6 __cxxabiv1::__terminate()
@ 0x860bc31 std::terminate()
@ 0x860bd84 __cxa_throw
@ 0x24658c0 std::__throw_bad_weak_ptr()
@ 0x2501e2d _ZN9starrocks7Segment13_new_iteratorERKNS_6SchemaERKNS_18SegmentReadOptionsE.cold
@ 0x460a245 starrocks::Segment::new_iterator()
@ 0x4cfbd45 starrocks::Rowset::get_segment_iterators2()
@ 0x46c50fa starrocks::LinkedSchemaChange::generate_delta_column_group_and_cols()
@ 0x46c8f55 starrocks::SchemaChangeHandler::_convert_historical_rowsets()
@ 0x46cb2c9 starrocks::SchemaChangeHandler::_do_process_alter_tablet_v2_normal()
@ 0x46cccbe starrocks::SchemaChangeHandler::_do_process_alter_tablet_v2()
@ 0x46cd82a starrocks::SchemaChangeHandler::process_alter_tablet_v2()
@ 0x465e353 starrocks::EngineAlterTabletTask::execute()
@ 0x44e9d9e starrocks::StorageEngine::execute_task()
@ 0x29c2278 starrocks::run_alter_tablet_task()
@ 0x4f39192 starrocks::ThreadPool::dispatch_thread()
@ 0x4f33c2a starrocks::Thread::supervise_thread()
@ 0x7fd69d9fdea5 start_thread
@ 0x7fd69d018b0d __clone
@ 0x0 (unknown)

*** Aborted at 1737516956 (unix time) try "date -d @1737516956" if you are using GNU date ***
PC: @     0x7ff6576f337f __GI_raise
*** SIGABRT (@0x18bb6) received by PID 101302 (TID 0x7ff591770700) from PID 101302; stack trace: ***
    @          0x5c325c2 google::(anonymous namespace)::FailureSignalHandler()
    @     0x7ff657a93c20 (unknown)
    @     0x7ff6576f337f __GI_raise
    @     0x7ff6576dddb5 __GI_abort
    @          0x2b7ff0e _ZN9__gnu_cxx27__verbose_terminate_handlerEv.cold
    @          0x80cb166 __cxxabiv1::__terminate()
    @          0x80cb1d1 std::terminate()
    @          0x80cb324 __cxa_throw
    @          0x28a27b0 std::__throw_bad_weak_ptr()
    @          0x2925fbd _ZN9starrocks7Segment13_new_iteratorERKNS_10vectorized6SchemaERKNS1_18SegmentReadOptionsE.cold
    @          0x4389735 starrocks::Segment::new_iterator()
    @          0x49e07f2 starrocks::Rowset::get_segment_iterators()
    @          0x4458750 starrocks::vectorized::TabletReader::get_segment_iterators()
    @          0x4458ea1 starrocks::vectorized::TabletReader::_init_collector()
    @          0x445a549 starrocks::vectorized::TabletReader::open()
    @          0x3236e74 starrocks::vectorized::TabletScanner::open()
    @          0x2e359fe starrocks::vectorized::OlapScanNode::_scanner_thread()
    @          0x4a1be00 starrocks::PriorityThreadPool::work_thread()
    @          0x5bf1f87 thread_proxy
    @     0x7ff657a8917a start_thread
    @     0x7ff6577b8df3 __GI___clone
    @                0x0 (unknown)
  1. ForkJoinPool 死锁

"pull-hive-remote-files-1" #761 prio=5 os_prio=0 cpu=41322.65ms elapsed=3199967.09s tid=0x00007fb6e4ac1000 nid=0x2649 in Object.wait()  [0x00007fb6504cb000]
   java.lang.Thread.State: WAITING (on object monitor)
        at java.lang.Object.wait(java.base@11.0.20.1/Native Method)
        - waiting on <no object reference available>
        at java.util.concurrent.ForkJoinTask.externalAwaitDone(java.base@11.0.20.1/ForkJoinTask.java:330)
        - waiting to re-lock in wait() <0x00007fba6fded9c0> (a java.util.stream.ReduceOps$ReduceTask)
        at java.util.concurrent.ForkJoinTask.doInvoke(java.base@11.0.20.1/ForkJoinTask.java:412)
        at java.util.concurrent.ForkJoinTask.invoke(java.base@11.0.20.1/ForkJoinTask.java:736)
        at java.util.stream.ReduceOps$ReduceOp.evaluateParallel(java.base@11.0.20.1/ReduceOps.java:919)
        at java.util.stream.AbstractPipeline.evaluate(java.base@11.0.20.1/AbstractPipeline.java:233)
        at java.util.stream.ReferencePipeline.collect(java.base@11.0.20.1/ReferencePipeline.java:578)
        at org.apache.hadoop.fs.statistics.impl.EvaluatingStatisticsMap.entrySet(EvaluatingStatisticsMap.java:166)
        - locked <0x00007fba6fdeda58> (a org.apache.hadoop.fs.statistics.impl.EvaluatingStatisticsMap)
        at java.util.Collections$UnmodifiableMap.entrySet(java.base@11.0.20.1/Collections.java:1481)
        at org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.aggregateMaps(IOStatisticsBinding.java:240)
        at org.apache.hadoop.fs.statistics.IOStatisticsSnapshot.aggregate(IOStatisticsSnapshot.java:184)
        - locked <0x00007fba6fdeda90> (a org.apache.hadoop.fs.statistics.IOStatisticsSnapshot)
        at org.apache.hadoop.fs.s3a.Listing$ObjectListingIterator.close(Listing.java:718)
        at org.apache.hadoop.fs.s3a.Listing$FileStatusListingIterator.close(Listing.java:414)
        at org.apache.hadoop.util.functional.RemoteIterators$MaybeClose.close(RemoteIterators.java:744)
        at org.apache.hadoop.util.functional.RemoteIterators$WrappingRemoteIterator.close(RemoteIterators.java:456)
        at org.apache.hadoop.util.functional.RemoteIterators$WrappingRemoteIterator.sourceHasNext(RemoteIterators.java:476)
        at org.apache.hadoop.util.functional.RemoteIterators$MappingRemoteIterator.hasNext(RemoteIterators.java:530)
        at com.starrocks.connector.hive.HiveRemoteFileIO$1.hasNext(HiveRemoteFileIO.java:138)
        at com.starrocks.connector.hive.HiveRemoteFileIO.getRemoteFiles(HiveRemoteFileIO.java:98)
        at com.starrocks.connector.hive.HiveRemoteFileIO.getRemoteFiles(HiveRemoteFileIO.java:67)
        at com.starrocks.connector.CachingRemoteFileIO.loadRemoteFiles(CachingRemoteFileIO.java:87)
        at com.starrocks.connector.CachingRemoteFileIO$Lambda$297/0x00007fb76d6fe4c8.apply(Unknown Source)
        at com.google.common.cache.CacheLoader$FunctionToCacheLoader.load(CacheLoader.java:169)
        at com.google.common.cache.CacheLoader$1.load(CacheLoader.java:192)
        at com.google.common.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3570)
        at com.google.common.cache.LocalCache$Segment.loadSync(LocalCache.java:2312)
        at com.google.common.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2189)
        - locked <0x00007fba6fdedcc8> (a com.google.common.cache.LocalCache$StrongAccessEntry)
        at com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2079)
        at com.google.common.cache.LocalCache.get(LocalCache.java:4011)
        at com.google.common.cache.LocalCache.getOrLoad(LocalCache.java:4034)
        at com.google.common.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:5010)
        at com.google.common.cache.LocalCache$LocalLoadingCache.getUnchecked(LocalCache.java:5017)
        at com.starrocks.connector.CachingRemoteFileIO.getRemoteFiles(CachingRemoteFileIO.java:78)
        at com.starrocks.connector.RemoteFileOperations.lambda$getRemoteFiles$0(RemoteFileOperations.java:86)
        at com.starrocks.connector.RemoteFileOperations$Lambda$1716/0x00007fb6d234b500.call(Unknown Source)
        at java.util.concurrent.FutureTask.run(java.base@11.0.20.1/FutureTask.java:264)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(java.base@11.0.20.1/ThreadPoolExecutor.java:1128)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(java.base@11.0.20.1/ThreadPoolExecutor.java:628)
        at java.lang.Thread.run(java.base@11.0.20.1/Thread.java:829)
"pull-hive-remote-files-1" #761 prio=5 os_prio=0 cpu=41322.65ms elapsed=3199967.09s tid=0x00007fb6e4ac1000 nid=0x2649 in Object.wait()  [0x00007fb6504cb000]
   java.lang.Thread.State: WAITING (on object monitor)
        at java.lang.Object.wait(java.base@11.0.20.1/Native Method)
        - waiting on <no object reference available>
        at java.util.concurrent.ForkJoinTask.externalAwaitDone(java.base@11.0.20.1/ForkJoinTask.java:330)
        - waiting to re-lock in wait() <0x00007fba6fded9c0> (a java.util.stream.ReduceOps$ReduceTask)
        at java.util.concurrent.ForkJoinTask.doInvoke(java.base@11.0.20.1/ForkJoinTask.java:412)
        at java.util.concurrent.ForkJoinTask.invoke(java.base@11.0.20.1/ForkJoinTask.java:736)
        at java.util.stream.ReduceOps$ReduceOp.evaluateParallel(java.base@11.0.20.1/ReduceOps.java:919)
        at java.util.stream.AbstractPipeline.evaluate(java.base@11.0.20.1/AbstractPipeline.java:233)
        at java.util.stream.ReferencePipeline.collect(java.base@11.0.20.1/ReferencePipeline.java:578)
        at org.apache.hadoop.fs.statistics.impl.EvaluatingStatisticsMap.entrySet(EvaluatingStatisticsMap.java:166)
        - locked <0x00007fba6fdeda58> (a org.apache.hadoop.fs.statistics.impl.EvaluatingStatisticsMap)
        at java.util.Collections$UnmodifiableMap.entrySet(java.base@11.0.20.1/Collections.java:1481)
        at org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.aggregateMaps(IOStatisticsBinding.java:240)
        at org.apache.hadoop.fs.statistics.IOStatisticsSnapshot.aggregate(IOStatisticsSnapshot.java:184)
        - locked <0x00007fba6fdeda90> (a org.apache.hadoop.fs.statistics.IOStatisticsSnapshot)
        at org.apache.hadoop.fs.s3a.Listing$ObjectListingIterator.close(Listing.java:718)
        at org.apache.hadoop.fs.s3a.Listing$FileStatusListingIterator.close(Listing.java:414)
        at org.apache.hadoop.util.functional.RemoteIterators$MaybeClose.close(RemoteIterators.java:744)
        at org.apache.hadoop.util.functional.RemoteIterators$WrappingRemoteIterator.close(RemoteIterators.java:456)
        at org.apache.hadoop.util.functional.RemoteIterators$WrappingRemoteIterator.sourceHasNext(RemoteIterators.java:476)
        at org.apache.hadoop.util.functional.RemoteIterators$MappingRemoteIterator.hasNext(RemoteIterators.java:530)
        at com.starrocks.connector.hive.HiveRemoteFileIO$1.hasNext(HiveRemoteFileIO.java:138)
        at com.starrocks.connector.hive.HiveRemoteFileIO.getRemoteFiles(HiveRemoteFileIO.java:98)
        at com.starrocks.connector.hive.HiveRemoteFileIO.getRemoteFiles(HiveRemoteFileIO.java:67)
        at com.starrocks.connector.CachingRemoteFileIO.loadRemoteFiles(CachingRemoteFileIO.java:87)
        at com.starrocks.connector.CachingRemoteFileIO$Lambda$297/0x00007fb76d6fe4c8.apply(Unknown Source)
        at com.google.common.cache.CacheLoader$FunctionToCacheLoader.load(CacheLoader.java:169)
        at com.google.common.cache.CacheLoader$1.load(CacheLoader.java:192)
        at com.google.common.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3570)
        at com.google.common.cache.LocalCache$Segment.loadSync(LocalCache.java:2312)
        at com.google.common.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2189)
        - locked <0x00007fba6fdedcc8> (a com.google.common.cache.LocalCache$StrongAccessEntry)
        at com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2079)
        at com.google.common.cache.LocalCache.get(LocalCache.java:4011)
        at com.google.common.cache.LocalCache.getOrLoad(LocalCache.java:4034)
        at com.google.common.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:5010)
        at com.google.common.cache.LocalCache$LocalLoadingCache.getUnchecked(LocalCache.java:5017)
        at com.starrocks.connector.CachingRemoteFileIO.getRemoteFiles(CachingRemoteFileIO.java:78)
        at com.starrocks.connector.RemoteFileOperations.lambda$getRemoteFiles$0(RemoteFileOperations.java:86)
        at com.starrocks.connector.RemoteFileOperations$Lambda$1716/0x00007fb6d234b500.call(Unknown Source)
        at java.util.concurrent.FutureTask.run(java.base@11.0.20.1/FutureTask.java:264)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(java.base@11.0.20.1/ThreadPoolExecutor.java:1128)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(java.base@11.0.20.1/ThreadPoolExecutor.java:628)
        at java.lang.Thread.run(java.base@11.0.20.1/Thread.java:829)
  • Github Issue:

  • Github Fix PR:

  • Jira

  • 问题版本:

    • 3.1.0 ~ 3.1.17

    • 3.2.0 ~ 3.2.12

    • 3.3.0 ~ 3.3.8

  • 修复版本:

    • 3.1.18+

    • 3.2.13+

    • 3.3.9+

  • 问题原因:

  • 临时解决办法:

    • Set global cbo_enable_low_cardinality_optimize=false;

    • Set global low_cardinality_optimize_v2=false;

  1. 开启 query cache 带有 having 的查询 crash

*** Aborted at 1712672237 (unix time) try "date -d @1712672237" if you are using GNU date ***
PC: @          0x39c1538 starrocks::UnpackConstColumnBinaryFunction<>::evaluate<>()
*** SIGSEGV (@0x0) received by PID 1650236 (TID 0x7fd6c9ff9640) from PID 0; stack trace: ***
    @          0x5fa5742 google::(anonymous namespace)::FailureSignalHandler()
    @     0x7fd722195520 (unknown)
    @          0x39c1538 starrocks::UnpackConstColumnBinaryFunction<>::evaluate<>()
    @          0x39c180c starrocks::VectorizedBinaryPredicate<>::evaluate_checked()
    @          0x40c0e4e starrocks::VectorizedOrCompoundPredicate::evaluate_checked()
    @          0x40c0e4e starrocks::VectorizedOrCompoundPredicate::evaluate_checked()
    @          0x40c0e4e starrocks::VectorizedOrCompoundPredicate::evaluate_checked()
    @          0x385daa3 starrocks::ExprContext::evaluate()
    @          0x385ddef starrocks::ExprContext::evaluate()
    @          0x28d3432 starrocks::ExecNode::eval_conjuncts()
    @          0x28bf6ff starrocks::pipeline::Operator::eval_conjuncts_and_in_filters()
    @          0x2bfa677 starrocks::pipeline::AggregateBlockingSourceOperator::pull_chunk()
    @          0x2c53915 starrocks::query_cache::ConjugateOperator::pull_chunk()
    @          0x28aa8bc starrocks::query_cache::MultilaneOperator::_pull_chunk_from_lane()
    @          0x28aae25 starrocks::query_cache::MultilaneOperator::pull_chunk()
    @          0x2891f64 starrocks::pipeline::PipelineDriver::process()
    @          0x550125e starrocks::pipeline::GlobalDriverExecutor::_worker_thread()
    @          0x4db08fa starrocks::ThreadPool::dispatch_thread()
    @          0x4dab38a starrocks::Thread::supervise_thread()
    @     0x7fd7221e7ac3 (unknown)
    @     0x7fd722279850 (unknown)
    @                0x0 (unknown)
  • Github Issue:

  • Github Fix PR:

  • Jira

  • 问题版本:

    • 2.5.0 ~ 2.5.20

    • 3.1.0 ~ 3.1.10

    • 3.2.0 ~ 3.2.4

  • 修复版本:

    • 2.5.21+

    • 3.1.11+

    • 3.2.5+

  • 问题原因:

  • 临时解决办法:

    • Set global enable_query_cache=false;
  1. Parquet 查询 Decimal 列 crash

*** Aborted at 1734387117 (unix time) try "date -d @1734387117" if you are using GNU date ***
PC: @          0x7e96840 starrocks::parquet::BinaryToDecimalConverter<(starrocks::LogicalType)49>::convert(std::shared_ptr<starrocks::Column> const&, starrocks::Column*)
*** SIGSEGV (@0x0) received by PID 2866 (TID 0x7fafe599d700) from PID 0; stack trace: ***
    @     0x7fb077ef920b __pthread_once_slow
    @          0xb204694 google::(anonymous namespace)::FailureSignalHandler(int, siginfo_t*, void*)
    @     0x7fb07759c54f os::Linux::chained_handler(int, siginfo_t*, void*)
    @     0x7fb0775a23b8 JVM_handle_linux_signal
    @     0x7fb077593db8 signalHandler(int, siginfo_t*, void*)
    @     0x7fb077f02630 (/usr/lib64/libpthread-2.17.so+0xf62f)
    @          0x7e96840 starrocks::parquet::BinaryToDecimalConverter<(starrocks::LogicalType)49>::convert(std::shared_ptr<starrocks::Column> const&, starrocks::Column*)
    @          0x7e858a7 starrocks::parquet::StatisticsHelper::decode_value_into_column(std::shared_ptr<starrocks::Column> const&, std::vector<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::allocator<std::__cxx11::basic_string<char, std::char@
    @          0x7eb97dd starrocks::parquet::ScalarColumnReader::page_index_zone_map_filter(std::vector<starrocks::ColumnPredicate const*, std::allocator<starrocks::ColumnPredicate const*> > const&, starrocks::SparseRange<unsigned long>*, starrocks::CompoundNodeType, unsigned long@
    @          0x7eb0e8d starrocks::StatusOr<std::optional<starrocks::SparseRange<unsigned long> > > starrocks::parquet::ZoneMapEvaluator<(starrocks::parquet::FilterLevel)1>::operator()<(starrocks::CompoundNodeType)0>(starrocks::PredicateCompoundNode<(starrocks::CompoundNodeType)0@
    @          0x7eaa89a starrocks::parquet::GroupReader::_deal_with_pageindex()
    @          0x7eac030 starrocks::parquet::GroupReader::prepare()
    @          0x7e740be starrocks::parquet::FileReader::_init_group_readers()
    @          0x7e74893 starrocks::parquet::FileReader::init(starrocks::HdfsScannerContext*)
    @          0x7c44992 starrocks::HdfsParquetScanner::do_open(starrocks::RuntimeState*)
    @          0x7c35727 starrocks::HdfsScanner::open(starrocks::RuntimeState*)
    @          0x7ba8ca2 starrocks::connector::HiveDataSource::_init_scanner(starrocks::RuntimeState*)
    @          0x7baad91 starrocks::connector::HiveDataSource::open(starrocks::RuntimeState*)
    @          0x49327dc starrocks::pipeline::ConnectorChunkSource::_open_data_source(starrocks::RuntimeState*, bool*)
    @          0x4932a1d starrocks::pipeline::ConnectorChunkSource::_read_chunk(starrocks::RuntimeState*, std::shared_ptr<starrocks::Chunk>*)
    @          0x4ccc1ea starrocks::pipeline::ChunkSource::buffer_next_batch_chunks_blocking(starrocks::RuntimeState*, unsigned long, starrocks::workgroup::WorkGroup const*)
    @          0x4922a4e auto starrocks::pipeline::ScanOperator::_trigger_next_scan(starrocks::RuntimeState*, int)::{lambda(auto:1&)#1}::operator()<starrocks::workgroup::YieldContext>(starrocks::workgroup::YieldContext&) const [clone .isra.0]
    @          0x4a39e19 starrocks::workgroup::ScanExecutor::worker_thread()
    @          0x3cf933f starrocks::ThreadPool::dispatch_thread()
    @          0x3cf01c0 starrocks::Thread::supervise_thread(void*)
    @     0x7fb077efaea5 start_thread
    @     0x7fb075b2bb0d __clone

这个有临时修复方案吗?

2.4版本不支持,只能升级吗?mysql> set global enable_rbo_table_prune = false;
ERROR 1064 (HY000): Unknown system variable ‘enable_rbo_table_prune’

是的,只能升级,2.4版本不推荐使用,升级到2.5版本吧

  1. Left anti join crash

*** Aborted at 1733316816 (unix time) try "date -d @1733316816" if you are using GNU date ***
PC: @          0x37bacdb starrocks::JoinHashMap&lt;&gt;::_ZN9starrocks11JoinHashMapILNS_11LogicalTypeE17ENS_23SerializedJoinBuildFuncENS_23SerializedJoinProbeFuncEE33_probe_from_ht_for_left_anti_joinEPNS_12RuntimeStateERKSt6vectorINS_5SliceESaIS8_EESC_.actor()
*** SIGSEGV (@0x0) received by PID 44699 (TID 0x1465a04da640) from PID 0; stack trace: ***
    @          0x6cb5a62 google::(anonymous namespace)::FailureSignalHandler()
    @     0x14691162b9b9 os::Linux::chained_handler()
    @     0x146911631c7a JVM_handle_linux_signal
    @     0x146911623a4c signalHandler()
    @     0x14691063e6f0 (unknown)
    @          0x37bacdb starrocks::JoinHashMap&lt;&gt;::_ZN9starrocks11JoinHashMapILNS_11LogicalTypeE17ENS_23SerializedJoinBuildFuncENS_23SerializedJoinProbeFuncEE33_probe_from_ht_for_left_anti_joinEPNS_12RuntimeStateERKSt6vectorINS_5SliceESaIS8_EESC_.actor()
    @          0x37a9e5c starrocks::JoinHashMap&lt;&gt;::_probe_coroutine&lt;&gt;()
    @          0x3860e84 starrocks::JoinHashMap&lt;&gt;::probe()
    @          0x37a3d46 starrocks::JoinHashTable::probe()
    @          0x3ce25ca starrocks::HashJoinProber::probe_chunk()
    @          0x3cddad4 starrocks::HashJoiner::_pull_probe_output_chunk()
    @          0x3cddd32 starrocks::HashJoiner::pull_chunk()
    @          0x3b16aa9 starrocks::pipeline::HashJoinProbeOperator::pull_chunk()
    @          0x3ac36e6 starrocks::pipeline::PipelineDriver::process()
    @          0x3ab55bf starrocks::pipeline::GlobalDriverExecutor::_worker_thread()
    @          0x302f5ec starrocks::ThreadPool::dispatch_thread()
    @          0x3028b6a starrocks::Thread::supervise_thread()
  1. 高基数字符串聚合慢

  • Count distinct 单列高基数字符串

  • Group by 单列高基数字符串

  • Multic count distinct 高基数字符串

# c1 是高基数字符串
select count(distinct c1) from t1;
# 或
select xxx from t1 group by c1
# 或
select count(distinct c1), count(distinct c2) from t_10

295.LogMessageFatal

*** Aborted at 1744634903 (unix time) try “date -d @1744634903” if you are using GNU date ***
PC: @ 0x7f74b286d387 __GI_raise
*** SIGABRT (@0x3eb0000859c) received by PID 34204 (TID 0x7f74741fe700) from PID 34204; stack trace: ***
@ 0x6d933a2 google::(anonymous namespace)::FailureSignalHandler()
@ 0x7f74b353c630 (unknown)
@ 0x7f74b286d387 __GI_raise
@ 0x7f74b286ea78 __GI_abort
@ 0x3666087 starrocks::failure_function()
@ 0x6d86d7d google::LogMessage::Fail()
@ 0x6d891ef google::LogMessage::SendToLog()
@ 0x6d868ce google::LogMessage::Flush()
@ 0x6d897f9 google::LogMessageFatal::~LogMessageFatal()
@ 0x54646dd starrocks::DataDir::load()
@ 0x54425eb _ZNSt6thread11_State_implINS_8_InvokerISt5tupleIJZN9starrocks13StorageEngine14load_data_dirsERKSt6vectorIPNS3_7DataDirESaIS7_EEEUlvE_EEEEE6_M_runEv
@ 0x91d36c0 execute_native_thread_routine
@ 0x7f74b3534ea5 start_thread
@ 0x7f74b293596d __clone
@ 0x0 (unknown)

Github Issue:

Github Fix PR:

Jira
问题版本:

3.1.0 ~ latest

3.2.0 ~ latest

3.3.0 ~ latest

3.4.0 ~ latest

修复版本:

问题原因:

临时解决办法:

  1. 检查日志是否存在 load tablets encounter failure 错误:
grep -a 'load tablets encounter failure' be.INFO
  1. 检查日志是否存在 there is failure when scan rockdb tablet metas 错误:
grep -a 'there is failure when scan rockdb tablet metas' be.INFO
  1. 根据检查结果处理:
  • 如果 there is failure when scan rockdb tablet metas 存在,请联系 StarRocks RD 团队处理。

  • 如果 there is failure when scan rockdb tablet metas 不存在,执行以下步骤:

    1. 在 be.conf 文件中添加以下配置:
ignore_load_tablet_failure = true
2. 保存配置后,重启 BE 服务。
  1. Spill 导致 BE Crash

*** Aborted at 1741768367 (unix time) try "date -d @1741768367" if you are using GNU date ***
PC: @          0x519375e starrocks::ColumnVisitorMutableAdapter<starrocks::ColumnAppendPermutation>::visit(starrocks::FixedLengthColumn<long>*)
*** SIGSEGV (@0x7f5a22e41000) received by PID 25 (TID 0x7f5ac2240640) from PID 585371648; stack trace: ***
    @     0x7f5b741a3ee8 (/usr/lib/x86_64-linux-gnu/libc.so.6+0x99ee7)
    @          0x9af2d89 google::(anonymous namespace)::FailureSignalHandler(int, siginfo_t*, void*)
    @     0x7f5b7414c520 (/usr/lib/x86_64-linux-gnu/libc.so.6+0x4251f)
    @          0x519375e starrocks::ColumnVisitorMutableAdapter<starrocks::ColumnAppendPermutation>::visit(starrocks::FixedLengthColumn<long>*)
    @          0x4fc742d starrocks::ColumnFactory<starrocks::FixedLengthColumnBase<long>, starrocks::FixedLengthColumn<long>, starrocks::Column>::accept_mutable(starrocks::ColumnVisitorMutable*)
    @          0x519028d starrocks::materialize_column_by_permutation(starrocks::Column*, std::vector<std::shared_ptr<starrocks::Column>, std::allocator<std::shared_ptr<starrocks::Column> > > const&, starrocks::array_view<starrocks::PermutationItem> const&)
    @          0x5190536 starrocks::materialize_by_permutation(starrocks::Chunk*, std::vector<std::shared_ptr<starrocks::Chunk>, std::allocator<std::shared_ptr<starrocks::Chunk> > > const&, starrocks::array_view<starrocks::PermutationItem> const&)
    @          0x528c30e starrocks::MergeTwoCursor::merge_sorted_intersected_cursor(starrocks::SortedRun&, starrocks::SortedRun&)
    @          0x528d9a1 starrocks::MergeTwoCursor::merge_sorted_cursor_two_way()
    @          0x528eb8c starrocks::MergeTwoCursor::next()
    @          0x528ec62 std::_Function_handler<bool (std::unique_ptr<starrocks::Chunk, std::default_delete<starrocks::Chunk> >*, bool*), starrocks::MergeTwoCursor::MergeTwoCursor(starrocks::SortDescs const&, std::unique_ptr<starrocks::SimpleChunkSortCursor, std::default_delete<stO2
    @          0x5293947 starrocks::SimpleChunkSortCursor::try_get_next()
    @          0x528bcd4 starrocks::MergeCursorsCascade::try_get_next()
    @          0x5287f5c starrocks::CascadeChunkMerger::get_next(std::unique_ptr<starrocks::Chunk, std::default_delete<starrocks::Chunk> >*, std::atomic<bool>*, bool*)
    @          0x54bb965 starrocks::spill::OrderedInputStream::get_next(starrocks::workgroup::YieldContext&, starrocks::spill::SerdeContext&)
    @          0x612da77 starrocks::StatusOr<std::shared_ptr<starrocks::Chunk> > starrocks::spill::SpillerReader::restore<starrocks::spill::IOTaskExecutor, starrocks::spill::ResourceMemTrackerGuard<std::weak_ptr<starrocks::pipeline::QueryContext>, std::weak_ptr<starrocks::spill::SO2
    @          0x612e03b starrocks::StatusOr<std::shared_ptr<starrocks::Chunk> > starrocks::spill::Spiller::restore<starrocks::spill::IOTaskExecutor, starrocks::spill::ResourceMemTrackerGuard<std::weak_ptr<starrocks::pipeline::QueryContext>, std::weak_ptr<starrocks::spill::SpillerO2
    @          0x611c9dd starrocks::pipeline::SpillableAggregateBlockingSourceOperator::_pull_spilled_chunk(starrocks::RuntimeState*)
    @          0x611cfb6 starrocks::pipeline::SpillableAggregateBlockingSourceOperator::pull_chunk(starrocks::RuntimeState*)
    @          0x4f8e0c0 starrocks::pipeline::PipelineDriver::process(starrocks::RuntimeState*, int)
    @          0x79fa3c3 starrocks::pipeline::GlobalDriverExecutor::_worker_thread()
    @          0x872f223 starrocks::ThreadPool::dispatch_thread()
    @          0x87268e9 starrocks::Thread::supervise_thread(void*)
    @     0x7f5b7419eac3 (/usr/lib/x86_64-linux-gnu/libc.so.6+0x94ac2)
    @     0x7f5b7422fa04 clone

Or

*** Aborted at 1741657717 (unix time) try "date -d @1741657717" if you are using GNU date ***
PC: @     0x7f51ec288aca (/usr/lib/x86_64-linux-gnu/libc.so.6+0x1a0ac9)
*** SIGSEGV (@0x7f51e57ff000) received by PID 25 (TID 0x7f5145bf4640) from PID 18446744073264951296; stack trace: ***
    @     0x7f51ec181ee8 (/usr/lib/x86_64-linux-gnu/libc.so.6+0x99ee7)
    @          0xa16e1c9 google::(anonymous namespace)::FailureSignalHandler(int, siginfo_t*, void*)
    @     0x7f51ec12a520 (/usr/lib/x86_64-linux-gnu/libc.so.6+0x4251f)
    @     0x7f51ec288aca (/usr/lib/x86_64-linux-gnu/libc.so.6+0x1a0ac9)
    @          0x5472aed starrocks::FixedLengthColumnBase<signed char>::append(starrocks::Column const&, unsigned long, unsigned long)
    @          0x53b194a starrocks::Chunk::append(starrocks::Chunk const&, unsigned long, unsigned long)
    @          0x58b5989 starrocks::spill::OrderedMemTable::append(std::shared_ptr<starrocks::Chunk>)
    @          0x5827e8c starrocks::Status starrocks::spill::RawSpillerWriter::spill<starrocks::spill::IOTaskExecutor, starrocks::spill::ResourceMemTrackerGuard<std::weak_ptr<starrocks::pipeline::QueryContext>, std::weak_ptr<starrocks::spill::Spiller> >&>(starrocks::RuntimeState*,¹^Q
    @          0x582a4ad starrocks::Status starrocks::spill::Spiller::spill<starrocks::spill::IOTaskExecutor, starrocks::spill::ResourceMemTrackerGuard<std::weak_ptr<starrocks::pipeline::QueryContext>, std::weak_ptr<starrocks::spill::Spiller> > >(starrocks::RuntimeState*, std::sha¹^Q
    @          0x75cfa3c starrocks::pipeline::SpillProcessOperator::pull_chunk(starrocks::RuntimeState*)
    @          0x5396c5f starrocks::pipeline::PipelineDriver::process(starrocks::RuntimeState*, int)
    @          0x7d94683 starrocks::pipeline::GlobalDriverExecutor::_worker_thread()
    @          0x8aa57d2 starrocks::ThreadPool::dispatch_thread()
    @          0x8a9db09 starrocks::Thread::supervise_thread(void*)
    @     0x7f51ec17cac3 (/usr/lib/x86_64-linux-gnu/libc.so.6+0x94ac2)
    @     0x7f51ec20da04 clone