Duplicate Function with duplicated Arguments

I'm now writing a pass and I wanna ask a question about how to
duplicate the function and add duplicated arguments in llvm, for
example:

func(int a, char *b) -> func(int a, char *b, int a1, char *b1)

I'm now stuck at using "getOrInsertFunction" and how to handle
"getArgumentList", please share your opinion, thanks a lot!

James

Hi James,

I'm now writing a pass and I wanna ask a question about how to
duplicate the function and add duplicated arguments in llvm, for
example:

func(int a, char *b) -> func(int a, char *b, int a1, char *b1)

I'm now stuck at using "getOrInsertFunction" and how to handle
"getArgumentList", please share your opinion, thanks a lot!

I have some code which does something similar. I'll send you an example when
I'm office again next monday.

Gr.

Matthijs

Thanks a lot, I appreciate your help.

Regards,
James

Hi James,

I'm now writing a pass and I wanna ask a question about how to
duplicate the function and add duplicated arguments in llvm, for
example:

Here is some example code. This code creates a new function NF that has one
extra argument as the original F, named "globals". The variable Globals points
to this extra argument. Also, this function adds an extra return value,
containing the final value of the struct the globals argument points to. I've
flagged the lines which are specific to this pass and probably not useful for
you with #.

Also, note that this code destroys the old function. In particular, it uses
    NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());
to move the instruction list from F to NF, effectively crippling F.

For doing this without destroying F, you can look at
lib/Transforms/Utils/CloneFunction.cpp. In particular the function
CloneFunctionInto looks interesting, though there might be functions that can
even do more stuff for you.

Hope this helps,

Matthijs

    // Start by computing a new prototype for the function, which is the same as
    // the old function, but has an extra argument.
    const llvm::FunctionType *FTy = F->getFunctionType();

    /* Copy the argument types and add an extra struct */
    std::vector<const llvm::Type*> Params(FTy->param_begin(), FTy->param_end());
    Params.push_back(ArgType);

    // Make a new parameter attribute list (they are immutable) that has the new
    // argument marked as byval. Since the parameter attributes include the
    // return type parameters at index 0, we don't use size() - 1, but just
    // size() as index.
    llvm::PAListPtr PAL = F->getParamAttrs().addAttr(Params.size(), llvm::ParamAttr::ByVal);

    // New return type is simply a struct of the old type and the globals type,
    // or just the globals type if the original was void
    const llvm::Type *RetTy = F->getReturnType();
    const llvm::Type *NRetTy;
    if (RetTy == llvm::Type::VoidTy) {
      NRetTy = ArgType->getElementType();
    } else {
      NRetTy = llvm::StructType::get(RetTy, ArgType->getElementType(), NULL);
    }

    // Create the new function type based on the recomputed parameters.
    llvm::FunctionType *NFTy = llvm::FunctionType::get(NRetTy, Params, FTy->isVarArg());

    // Create the new function body and insert it into the module...
    llvm::Function *NF = llvm::Function::Create(NFTy, F->getLinkage());
    NF->copyAttributesFrom(F);
    NF->setParamAttrs(PAL);
    F->getParent()->getFunctionList().insert(F, NF);
    NF->takeName(F);
    for (llvm::Function::arg_iterator AI = F->arg_begin(), AE = F->arg_end(),
          NAI = NF->arg_begin(); AI != AE; ++AI, ++NAI)
      NAI->takeName(AI);

    llvm::Value *Globals = --NF->arg_end();
    Globals->setName("globals");

# // Prepare our result
# ArgAndGEPs &Result = GlobalStructs[NF];
# Result.first = Globals;
   // Since we have now created the new function, splice the body of the old
    // function right into the new function, leaving the old rotting hulk of the
    // function empty.
    NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());

    // Now, pimp all return instructions
    for(llvm::Function::iterator BI = NF->begin(), BE = NF->end();
          BI != BE; ++BI)
        if (llvm::ReturnInst *RI = llvm::dyn_cast<llvm::ReturnInst>(BI->getTerminator())) {
          // Don't support functions that had multiple return values
          assert(RI->getNumOperands() < 2);
          // Insert a new load instruction to return
          llvm::Value *Load = new llvm::LoadInst(Globals, "globalsret", RI);
          // Return type was void
          if (RetTy == llvm::Type::VoidTy) {
            llvm::ReturnInst::Create(Load, RI);
            RI->getParent()->getInstList().erase(RI);
          } else {
            // Start out with an empty struct
            llvm::Value *Return = llvm::ConstantAggregateZero::get(NRetTy);
            DOUT << "Return: " << *Return->getType();
            // Insert the original return value in field 0
            Return = llvm::InsertValueInst::Create(Return, RI->getOperand(0), 0, "ret", RI);
            DOUT << "Return: " << *Return->getType();
            // Insert the globals return value in field 1
            Return = llvm::InsertValueInst::Create(Return, Load, 1, "ret", RI);
            DOUT << "Return: " << *Return->getType();

            // And update the return instruction
            RI->setOperand(0, Return);
          }
        }

# // Create GEPs in this function
# CreateGEPs(Globals, NF->getEntryBlock().begin(), Result.second);

    DOUT << *NF;

    // Replace all uses of the old arguments with the new arguments
    for (llvm::Function::arg_iterator I = F->arg_begin(), E = F->arg_end(),
           NI = NF->arg_begin(); I != E; ++I, ++NI)
      I->replaceAllUsesWith(NI);

    // Replace all callers
    while (!F->use_empty()) {
      llvm::CallSite CS = llvm::CallSite::get(F->use_back());
      llvm::Instruction *Call = CS.getInstruction();
      llvm::Function *CallingF = Call->getParent()->getParent();

# /* Get the global struct in our caller */
# llvm::Value* CallerGlobals = ModifyFunctionRecursive(CallingF).first;

      // Copy the existing arguments
      std::vector<llvm::Value*> Args;
      Args.reserve(CS.arg_size());
      llvm::CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
      // First, copy regular arguments
      for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++AI)
        Args.push_back(*AI);
      // Then, insert the new argument
      Args.push_back(CallerGlobals);
      // Lastly, copy any remaining varargs
      for (; AI != AE; ++AI)
        Args.push_back(*AI);

      llvm::Instruction *New;
      llvm::Instruction *Before = Call;
      if (llvm::InvokeInst *II = llvm::dyn_cast<llvm::InvokeInst>(Call)) {
        New = llvm::InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
                                 Args.begin(), Args.end(), "", Before);
        llvm::cast<llvm::InvokeInst>(New)->setCallingConv(CS.getCallingConv());
        llvm::cast<llvm::InvokeInst>(New)->setParamAttrs(CS.getParamAttrs());
      } else {
        New = llvm::CallInst::Create(NF, Args.begin(), Args.end(), "", Before);
        llvm::cast<llvm::CallInst>(New)->setCallingConv(CS.getCallingConv());
        llvm::cast<llvm::CallInst>(New)->setParamAttrs(CS.getParamAttrs());
        if (llvm::cast<llvm::CallInst>(Call)->isTailCall())
          llvm::cast<llvm::CallInst>(New)->setTailCall();
      }

      if (Call->hasName())
        New->takeName(Call);
      else
        New->setName(NF->getName() + ".ret");

      llvm::Value *GlobalsRet;
      if (Call->getType() == llvm::Type::VoidTy) {
        // The original function returned nothing, so the new function returns
        // only the globals
        GlobalsRet = New;
      } else {
        // Split the values
        llvm::Value *OrigRet = llvm::ExtractValueInst::Create(New, 0, "origret", Before);
        GlobalsRet = llvm::ExtractValueInst::Create(New, 1, "globalsret", Before);
        // Replace all the uses of the original result
        Call->replaceAllUsesWith(OrigRet);
      }

      // Now, store the globals back
      new llvm::StoreInst(GlobalsRet, CallerGlobals, Before);

      DOUT << " Call " << *Call << " replaced, function is now " << *Call->getParent()->getParent() << "\n";

      // Finally, remove the old call from the program, reducing the use-count of
      // F.
      Call->eraseFromParent();

    }
    // Delete the old function
    F->eraseFromParent();
    return Result;
  }