LLVM IR: why is the `std::string` complicated?

Source Code:

char foo_char() {
    char state_char = 'l';
    state_char = 'r';
    std::cin >> state_char;
    return state_char;
}

std::string foo_string() {
    std::string state_string = "ILoveLLVM";
    state_string = "IDon'tLoveLLVM";
    std::cin >> state_string;
    return state_string;
}

Generated IR:

; Function Attrs: mustprogress noinline optnone uwtable
define dso_local signext i8 @_Z8foo_charv() #4 !dbg !874 {
entry:
  %state_char = alloca i8, align 1
  call void @llvm.dbg.declare(metadata i8* %state_char, metadata !877, metadata !DIExpression()), !dbg !878
  store i8 108, i8* %state_char, align 1, !dbg !878
  store i8 114, i8* %state_char, align 1, !dbg !879
  %call = call nonnull align 8 dereferenceable(16) %"class.std::basic_istream"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_RS3_(%"class.std::basic_istream"* nonnull align 8 dereferenceable(16) @_ZSt3cin, i8* nonnull align 1 dereferenceable(1) %state_char), !dbg !880
  %0 = load i8, i8* %state_char, align 1, !dbg !881
  ret i8 %0, !dbg !882
}

declare dso_local nonnull align 8 dereferenceable(16) %"class.std::basic_istream"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_RS3_(%"class.std::basic_istream"* nonnull align 8 dereferenceable(16), i8* nonnull align 1 dereferenceable(1)) #1

; Function Attrs: mustprogress noinline optnone uwtable
define dso_local void @_Z10foo_stringB5cxx11v(%"class.std::__cxx11::basic_string"* noalias sret(%"class.std::__cxx11::basic_string") align 8 %agg.result) #4 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg !883 {
entry:
  %result.ptr = alloca i8*, align 8
  %nrvo = alloca i1, align 1
  %ref.tmp = alloca %"class.std::allocator", align 1
  %exn.slot = alloca i8*, align 8
  %ehselector.slot = alloca i32, align 4
  %0 = bitcast %"class.std::__cxx11::basic_string"* %agg.result to i8*
  store i8* %0, i8** %result.ptr, align 8
  store i1 false, i1* %nrvo, align 1, !dbg !891
  call void @llvm.dbg.declare(metadata i8** %result.ptr, metadata !892, metadata !DIExpression(DW_OP_deref)), !dbg !893
  call void @_ZNSaIcEC1Ev(%"class.std::allocator"* nonnull align 1 dereferenceable(1) %ref.tmp) #3, !dbg !894
  invoke void @_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEC1EPKcRKS3_(%"class.std::__cxx11::basic_string"* nonnull align 8 dereferenceable(32) %agg.result, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str, i64 0, i64 0), %"class.std::allocator"* nonnull align 1 dereferenceable(1) %ref.tmp)
          to label %invoke.cont unwind label %lpad, !dbg !894

invoke.cont:                                      ; preds = %entry
  call void @_ZNSaIcED1Ev(%"class.std::allocator"* nonnull align 1 dereferenceable(1) %ref.tmp) #3, !dbg !893
  %call = invoke nonnull align 8 dereferenceable(32) %"class.std::__cxx11::basic_string"* @_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEaSEPKc(%"class.std::__cxx11::basic_string"* nonnull align 8 dereferenceable(32) %agg.result, i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str.1, i64 0, i64 0))
          to label %invoke.cont2 unwind label %lpad1, !dbg !895

invoke.cont2:                                     ; preds = %invoke.cont
  %call4 = invoke nonnull align 8 dereferenceable(16) %"class.std::basic_istream"* @_ZStrsIcSt11char_traitsIcESaIcEERSt13basic_istreamIT_T0_ES7_RNSt7__cxx1112basic_stringIS4_S5_T1_EE(%"class.std::basic_istream"* nonnull align 8 dereferenceable(16) @_ZSt3cin, %"class.std::__cxx11::basic_string"* nonnull align 8 dereferenceable(32) %agg.result)
          to label %invoke.cont3 unwind label %lpad1, !dbg !896

invoke.cont3:                                     ; preds = %invoke.cont2
  store i1 true, i1* %nrvo, align 1, !dbg !897
  %nrvo.val = load i1, i1* %nrvo, align 1, !dbg !898
  br i1 %nrvo.val, label %nrvo.skipdtor, label %nrvo.unused, !dbg !898

lpad:                                             ; preds = %entry
  %1 = landingpad { i8*, i32 }
          cleanup, !dbg !898
  %2 = extractvalue { i8*, i32 } %1, 0, !dbg !898
  store i8* %2, i8** %exn.slot, align 8, !dbg !898
  %3 = extractvalue { i8*, i32 } %1, 1, !dbg !898
  store i32 %3, i32* %ehselector.slot, align 4, !dbg !898
  call void @_ZNSaIcED1Ev(%"class.std::allocator"* nonnull align 1 dereferenceable(1) %ref.tmp) #3, !dbg !893
  br label %eh.resume, !dbg !893

lpad1:                                            ; preds = %invoke.cont2, %invoke.cont
  %4 = landingpad { i8*, i32 }
          cleanup, !dbg !898
  %5 = extractvalue { i8*, i32 } %4, 0, !dbg !898
  store i8* %5, i8** %exn.slot, align 8, !dbg !898
  %6 = extractvalue { i8*, i32 } %4, 1, !dbg !898
  store i32 %6, i32* %ehselector.slot, align 4, !dbg !898
  call void @_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED1Ev(%"class.std::__cxx11::basic_string"* nonnull align 8 dereferenceable(32) %agg.result) #3, !dbg !898
  br label %eh.resume, !dbg !898

nrvo.unused:                                      ; preds = %invoke.cont3
  call void @_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED1Ev(%"class.std::__cxx11::basic_string"* nonnull align 8 dereferenceable(32) %agg.result) #3, !dbg !898
  br label %nrvo.skipdtor, !dbg !898

nrvo.skipdtor:                                    ; preds = %nrvo.unused, %invoke.cont3
  ret void, !dbg !898

eh.resume:                                        ; preds = %lpad1, %lpad
  %exn = load i8*, i8** %exn.slot, align 8, !dbg !893
  %sel = load i32, i32* %ehselector.slot, align 4, !dbg !893
  %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0, !dbg !893
  %lpad.val5 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1, !dbg !893
  resume { i8*, i32 } %lpad.val5, !dbg !893
}

I think the source code snippets for char and string are very similar. But IR generated from the latter is much more complicated. So, why?

And how should I analyse such kind of IR? For example, what should I do if I wanna get the true value of state_string?

std::string ain’t a built-in type. It’s a class with some extra tricks into it, including short string optimization. As such, it requires code to deal with them. Next to that, std::string allocates memory for the array of characters it stores, something a regular char doesn’t do.
This complexity does propagate to the IR.