Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Support for byval aggregate arguments
The ABI for passing byval aggregates as arguments isn't
really documented, but this is what gcc seems to do:

If the aggregate fits in the available argument registers
it will be passed in those.
If it doesn't fit, it will be passed (completely) on the
stack and any arguments after the byval argument are
also passed on the stack.
  • Loading branch information
skristiansson authored and Sebastien Bourdeauducq committed Jul 14, 2012
1 parent 8822bd8 commit bbcbd8a
Show file tree
Hide file tree
Showing 4 changed files with 169 additions and 12 deletions.
3 changes: 3 additions & 0 deletions lib/Target/LM32/LM32CallingConv.td
Expand Up @@ -28,6 +28,9 @@ def CC_LM32 : CallingConv<[
// Promote i8/i16 arguments to i32.
CCIfType<[i8, i16], CCPromoteToType<i32>>,

// Handle byval arguments
CCIfByVal<CCPassByVal<4, 4>>,

// Integer and float arguments are passed in integer registers.
CCIfType<[i32, f32], CCAssignToReg<[R1, R2, R3, R4, R5, R6, R7, R8]>>,

Expand Down
103 changes: 91 additions & 12 deletions lib/Target/LM32/LM32ISelLowering.cpp
Expand Up @@ -502,6 +502,25 @@ SDValue LM32TargetLowering::LowerVASTART(SDValue Op,
// For mips eabi see http://www.cygwin.com/ml/binutils/2003-06/msg00436.html
// Elements may have been used from SparcTargetLowering::LowerArguments.
//===----------------------------------------------------------------------===//
/// HandleByVal - byval parameters that fit in the remaining registers
/// will be passed in those, if it doesn't fit, the whole parameter will be
/// passed on stack and all remaining registers are confiscated.
void LM32TargetLowering::HandleByVal(CCState *State, unsigned &Size) const {
static const unsigned ArgRegList[] = {
LM32::R1, LM32::R2, LM32::R3, LM32::R4, LM32::R5, LM32::R6, LM32::R7,
LM32::R8
};
unsigned NumWords = (Size + 3)/4;
unsigned NewSize = 0;
for (unsigned i = 0; i < NumWords; ++i) {
if (!State->AllocateReg(ArgRegList, 8)) {
NewSize = NumWords*4;
break;
}
}
Size = NewSize;
}

/// Monarch call implementation
/// LowerCall - This hook must be implemented to lower calls into the
/// the specified DAG. The outgoing arguments to the call are described
Expand Down Expand Up @@ -557,11 +576,14 @@ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass;
SmallVector<SDValue, 8> MemOpChains;

unsigned ArgRegEnd = LM32::R0;

// Walk the register/memloc assignments, inserting copies/loads.
// This was based on Sparc but the Sparc code has been updated.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;

// Promote the value if needed.
switch (VA.getLocInfo()) {
Expand All @@ -581,7 +603,39 @@ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
// Arguments that can be passed on register must be kept at
// RegsToPass vector
if (VA.isRegLoc()) {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
ArgRegEnd = VA.getLocReg();
RegsToPass.push_back(std::make_pair(ArgRegEnd, Arg));
} else if (Flags.isByVal()) {
unsigned NumWords = (Flags.getByValSize() + 3)/4;
if (NumWords <= (LM32::R8 - ArgRegEnd)) {
// Load byval aggregate into argument registers.
for (unsigned i = 0; i < NumWords; ++i) {
SDValue AddArg = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg,
DAG.getConstant(i*4, MVT::i32));
SDValue Load = DAG.getLoad(getPointerTy(), dl, Chain, AddArg,
MachinePointerInfo(),
false, false, false, 0);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(++ArgRegEnd, Load));
}
continue;
}
// Byval aggregate didn't fit in the argument registers,
// pass it on the stack.
SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, LM32::RSP,
getPointerTy());
int Offset = VA.getLocMemOffset();
Offset += Subtarget->hasSPBias() ? 4 : 0;
SDValue StackOffset = DAG.getIntPtrConstant(Offset);
SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
StackOffset);
SDValue SizeNode = DAG.getConstant(NumWords*4, MVT::i32);
MemOpChains.push_back(DAG.getMemcpy(Chain, dl, Dst, Arg, SizeNode,
Flags.getByValAlign(),
/*isVolatile=*/false,
/*AlwaysInline=*/false,
MachinePointerInfo(0),
MachinePointerInfo(0)));
} else {
assert(VA.isMemLoc());

Expand Down Expand Up @@ -702,6 +756,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
SmallVector<SDValue, 8> OutChains;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
LM32FunctionInfo *LM32FI = MF.getInfo<LM32FunctionInfo>();
Expand All @@ -721,6 +776,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,

for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
ISD::ArgFlagsTy Flags = Ins[i].Flags;

// Arguments stored on registers
if (VA.isRegLoc()) {
Expand Down Expand Up @@ -760,6 +816,32 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
}

InVals.push_back(ArgValue);
} else if (Flags.isByVal()) {
unsigned NumWords = (Flags.getByValSize() + 3)/4;
unsigned Size = NumWords*4;
unsigned Align = Flags.getByValAlign();
int FI = 0;
if (NumWords <= (LM32::R8 - ArgRegEnd)) {
// Store the argument registers onto the local stack
FI = MFI->CreateStackObject(Size, Align, false);
for (unsigned i = 0; i < NumWords; ++i) {
unsigned LiveReg = MF.addLiveIn(++ArgRegEnd, LM32::GPRRegisterClass);
SDValue AddArg = DAG.getNode(ISD::ADD, dl, MVT::i32,
DAG.getFrameIndex(FI, getPointerTy()),
DAG.getConstant(i*4, MVT::i32));
OutChains.push_back(DAG.getStore(Chain, dl,
DAG.getRegister(LiveReg, MVT::i32),
AddArg,
MachinePointerInfo(),
false, false, 0));
}
} else {
// Byval arguments didn't fit in registers, mark all as occupied.
ArgRegEnd = LM32::R8;
nextLocMemOffset = VA.getLocMemOffset() + Size;
FI = MFI->CreateFixedObject(Size, VA.getLocMemOffset(), true);
}
InVals.push_back(DAG.getFrameIndex(FI, getPointerTy()));
} else { // VA.isRegLoc()
assert(ArgRegEnd == LM32::R8 &&
"We should have used all argument registers");
Expand Down Expand Up @@ -821,9 +903,6 @@ DEBUG((cast<LoadSDNode>(/* SDNode* */lod.getNode()))->dump());
DEBUG(errs() << "All varargs on stack getVarArgsFrameIndex() to:" <<
LM32FI->getVarArgsFrameIndex() << "\n");
} else {
// Used to acumulate store chains.
std::vector<SDValue> OutChains;

TargetRegisterClass *RC = LM32::GPRRegisterClass;

// We'll save all argument registers not already saved on the stack. Store
Expand Down Expand Up @@ -851,16 +930,16 @@ DEBUG((cast<LoadSDNode>(/* SDNode* */lod.getNode()))->dump());
// which is a value necessary to VASTART.
DEBUG(errs() << "setVarArgsFrameIndex to:" << FI << "\n");
LM32FI->setVarArgsFrameIndex(FI);

// All stores are grouped in one node to allow the matching between
// the size of Ins and InVals. This only happens when on varg functions
if (!OutChains.empty()) {
OutChains.push_back(Chain);
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&OutChains[0], OutChains.size());
}
}
}
// All stores are grouped in one node to allow the matching between
// the size of Ins and InVals. This only happens when on varg functions and
// byval arguments
if (!OutChains.empty()) {
OutChains.push_back(Chain);
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&OutChains[0], OutChains.size());
}
return Chain;
}

Expand Down
3 changes: 3 additions & 0 deletions lib/Target/LM32/LM32ISelLowering.h
Expand Up @@ -151,6 +151,9 @@ namespace llvm {
const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;

/// HandleByVal - Target-specific cleanup for ByVal support.
virtual void HandleByVal(CCState *, unsigned &) const;

#if 0
virtual MachineBasicBlock*
EmitCustomShift(MachineInstr *MI, MachineBasicBlock *MBB) const;
Expand Down
72 changes: 72 additions & 0 deletions test/CodeGen/LM32/byval.ll
@@ -0,0 +1,72 @@
; RUN: llc -march=lm32 < %s | FileCheck %s
; NOTE: if the memcpy threshold is adjusted, some of those tests might
; generate a false negative. Adjust accordingly in such case.
%struct.s0 = type { [8 x i32] }
%struct.s1 = type { [9 x i32] }
%struct.s2 = type { [7 x i32] }

; Test for byval aggregate that fits in arg regs.
define void @f0_1(%struct.s0* byval %s) {
entry:
call void @f0(%struct.s0* byval %s)
ret void
}

declare void @f0(%struct.s0* byval)
; CHECK: f0_1:
; CHECK: addi sp, sp, -36
; CHECK: sw (sp+4), r1
; CHECK: calli f0
; CHECK: addi sp, sp, 36

; Test for byval aggregate that doesn't fit in arg regs.
define void @f1_1(%struct.s1* byval %s) {
entry:
call void @f1(%struct.s1* byval %s)
ret void
}

declare void @f1(%struct.s1* byval)
; CHECK: f1_1:
; CHECK: addi sp, sp, -40
; CHECK: addi r1, sp, 4
; CHECK: addi r2, sp, 44
; CHECK: addi r3, r0, 36
; CHECK: calli memcpy
; CHECK: calli f1
; CHECK: addi sp, sp, 40


; Test for byval aggregate with trailing argument that fit in arg regs.
define void @f2_1(%struct.s2* byval %s) {
entry:
call void @f2(%struct.s2* byval %s, i32 1)
ret void
}

declare void @f2(%struct.s2* byval, i32)
; CHECK: f2_1:
; CHECK: addi sp, sp, -32
; CHECK: sw (sp+4), r1
; CHECK: addi r8, r0, 1
; CHECK: calli f2
; CHECK: addi sp, sp, 32

; Test for byval aggregate with trailing argument that doesn't fit in arg regs.
define void @f3_1(%struct.s1* byval %s) {
entry:
call void @f3(%struct.s1* byval %s, i32 1)
ret void
}

declare void @f3(%struct.s1* byval, i32)
; CEHCK: f3_1:
; CHECK: addi sp, sp, -44
; CHECK: addi r1, sp, 4
; CHECK: addi r2, sp, 48
; CHECK: addi r3, r0, 36
; CHECK: calli memcpy
; CHECK: addi r{{[1-25]}}, r0, 1
; CHECK: sw (sp+40), r{{[1-25]}}
; CHECK: calli f3
; CHECK: addi sp, sp, 44

0 comments on commit bbcbd8a

Please sign in to comment.