Skip to content

Commit bbcbd8a

Browse files
skristianssonSebastien Bourdeauducq
authored and
Sebastien Bourdeauducq
committedJul 14, 2012
Support for byval aggregate arguments
The ABI for passing byval aggregates as arguments isn't really documented, but this is what gcc seems to do: If the aggregate fits in the available argument registers it will be passed in those. If it doesn't fit, it will be passed (completely) on the stack and any arguments after the byval argument are also passed on the stack.
1 parent 8822bd8 commit bbcbd8a

File tree

4 files changed

+169
-12
lines changed

4 files changed

+169
-12
lines changed
 

‎lib/Target/LM32/LM32CallingConv.td

+3
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ def CC_LM32 : CallingConv<[
2828
// Promote i8/i16 arguments to i32.
2929
CCIfType<[i8, i16], CCPromoteToType<i32>>,
3030

31+
// Handle byval arguments
32+
CCIfByVal<CCPassByVal<4, 4>>,
33+
3134
// Integer and float arguments are passed in integer registers.
3235
CCIfType<[i32, f32], CCAssignToReg<[R1, R2, R3, R4, R5, R6, R7, R8]>>,
3336

‎lib/Target/LM32/LM32ISelLowering.cpp

+91-12
Original file line numberDiff line numberDiff line change
@@ -502,6 +502,25 @@ SDValue LM32TargetLowering::LowerVASTART(SDValue Op,
502502
// For mips eabi see http://www.cygwin.com/ml/binutils/2003-06/msg00436.html
503503
// Elements may have been used from SparcTargetLowering::LowerArguments.
504504
//===----------------------------------------------------------------------===//
505+
/// HandleByVal - byval parameters that fit in the remaining registers
506+
/// will be passed in those, if it doesn't fit, the whole parameter will be
507+
/// passed on stack and all remaining registers are confiscated.
508+
void LM32TargetLowering::HandleByVal(CCState *State, unsigned &Size) const {
509+
static const unsigned ArgRegList[] = {
510+
LM32::R1, LM32::R2, LM32::R3, LM32::R4, LM32::R5, LM32::R6, LM32::R7,
511+
LM32::R8
512+
};
513+
unsigned NumWords = (Size + 3)/4;
514+
unsigned NewSize = 0;
515+
for (unsigned i = 0; i < NumWords; ++i) {
516+
if (!State->AllocateReg(ArgRegList, 8)) {
517+
NewSize = NumWords*4;
518+
break;
519+
}
520+
}
521+
Size = NewSize;
522+
}
523+
505524
/// Monarch call implementation
506525
/// LowerCall - This hook must be implemented to lower calls into the
507526
/// the specified DAG. The outgoing arguments to the call are described
@@ -557,11 +576,14 @@ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
557576
SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass;
558577
SmallVector<SDValue, 8> MemOpChains;
559578

579+
unsigned ArgRegEnd = LM32::R0;
580+
560581
// Walk the register/memloc assignments, inserting copies/loads.
561582
// This was based on Sparc but the Sparc code has been updated.
562583
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
563584
CCValAssign &VA = ArgLocs[i];
564585
SDValue Arg = OutVals[i];
586+
ISD::ArgFlagsTy Flags = Outs[i].Flags;
565587

566588
// Promote the value if needed.
567589
switch (VA.getLocInfo()) {
@@ -581,7 +603,39 @@ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
581603
// Arguments that can be passed on register must be kept at
582604
// RegsToPass vector
583605
if (VA.isRegLoc()) {
584-
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
606+
ArgRegEnd = VA.getLocReg();
607+
RegsToPass.push_back(std::make_pair(ArgRegEnd, Arg));
608+
} else if (Flags.isByVal()) {
609+
unsigned NumWords = (Flags.getByValSize() + 3)/4;
610+
if (NumWords <= (LM32::R8 - ArgRegEnd)) {
611+
// Load byval aggregate into argument registers.
612+
for (unsigned i = 0; i < NumWords; ++i) {
613+
SDValue AddArg = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg,
614+
DAG.getConstant(i*4, MVT::i32));
615+
SDValue Load = DAG.getLoad(getPointerTy(), dl, Chain, AddArg,
616+
MachinePointerInfo(),
617+
false, false, false, 0);
618+
MemOpChains.push_back(Load.getValue(1));
619+
RegsToPass.push_back(std::make_pair(++ArgRegEnd, Load));
620+
}
621+
continue;
622+
}
623+
// Byval aggregate didn't fit in the argument registers,
624+
// pass it on the stack.
625+
SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, LM32::RSP,
626+
getPointerTy());
627+
int Offset = VA.getLocMemOffset();
628+
Offset += Subtarget->hasSPBias() ? 4 : 0;
629+
SDValue StackOffset = DAG.getIntPtrConstant(Offset);
630+
SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
631+
StackOffset);
632+
SDValue SizeNode = DAG.getConstant(NumWords*4, MVT::i32);
633+
MemOpChains.push_back(DAG.getMemcpy(Chain, dl, Dst, Arg, SizeNode,
634+
Flags.getByValAlign(),
635+
/*isVolatile=*/false,
636+
/*AlwaysInline=*/false,
637+
MachinePointerInfo(0),
638+
MachinePointerInfo(0)));
585639
} else {
586640
assert(VA.isMemLoc());
587641

@@ -702,6 +756,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
702756
const SmallVectorImpl<ISD::InputArg> &Ins,
703757
DebugLoc dl, SelectionDAG &DAG,
704758
SmallVectorImpl<SDValue> &InVals) const {
759+
SmallVector<SDValue, 8> OutChains;
705760
MachineFunction &MF = DAG.getMachineFunction();
706761
MachineFrameInfo *MFI = MF.getFrameInfo();
707762
LM32FunctionInfo *LM32FI = MF.getInfo<LM32FunctionInfo>();
@@ -721,6 +776,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
721776

722777
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
723778
CCValAssign &VA = ArgLocs[i];
779+
ISD::ArgFlagsTy Flags = Ins[i].Flags;
724780

725781
// Arguments stored on registers
726782
if (VA.isRegLoc()) {
@@ -760,6 +816,32 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
760816
}
761817

762818
InVals.push_back(ArgValue);
819+
} else if (Flags.isByVal()) {
820+
unsigned NumWords = (Flags.getByValSize() + 3)/4;
821+
unsigned Size = NumWords*4;
822+
unsigned Align = Flags.getByValAlign();
823+
int FI = 0;
824+
if (NumWords <= (LM32::R8 - ArgRegEnd)) {
825+
// Store the argument registers onto the local stack
826+
FI = MFI->CreateStackObject(Size, Align, false);
827+
for (unsigned i = 0; i < NumWords; ++i) {
828+
unsigned LiveReg = MF.addLiveIn(++ArgRegEnd, LM32::GPRRegisterClass);
829+
SDValue AddArg = DAG.getNode(ISD::ADD, dl, MVT::i32,
830+
DAG.getFrameIndex(FI, getPointerTy()),
831+
DAG.getConstant(i*4, MVT::i32));
832+
OutChains.push_back(DAG.getStore(Chain, dl,
833+
DAG.getRegister(LiveReg, MVT::i32),
834+
AddArg,
835+
MachinePointerInfo(),
836+
false, false, 0));
837+
}
838+
} else {
839+
// Byval arguments didn't fit in registers, mark all as occupied.
840+
ArgRegEnd = LM32::R8;
841+
nextLocMemOffset = VA.getLocMemOffset() + Size;
842+
FI = MFI->CreateFixedObject(Size, VA.getLocMemOffset(), true);
843+
}
844+
InVals.push_back(DAG.getFrameIndex(FI, getPointerTy()));
763845
} else { // VA.isRegLoc()
764846
assert(ArgRegEnd == LM32::R8 &&
765847
"We should have used all argument registers");
@@ -821,9 +903,6 @@ DEBUG((cast<LoadSDNode>(/* SDNode* */lod.getNode()))->dump());
821903
DEBUG(errs() << "All varargs on stack getVarArgsFrameIndex() to:" <<
822904
LM32FI->getVarArgsFrameIndex() << "\n");
823905
} else {
824-
// Used to acumulate store chains.
825-
std::vector<SDValue> OutChains;
826-
827906
TargetRegisterClass *RC = LM32::GPRRegisterClass;
828907

829908
// We'll save all argument registers not already saved on the stack. Store
@@ -851,16 +930,16 @@ DEBUG((cast<LoadSDNode>(/* SDNode* */lod.getNode()))->dump());
851930
// which is a value necessary to VASTART.
852931
DEBUG(errs() << "setVarArgsFrameIndex to:" << FI << "\n");
853932
LM32FI->setVarArgsFrameIndex(FI);
854-
855-
// All stores are grouped in one node to allow the matching between
856-
// the size of Ins and InVals. This only happens when on varg functions
857-
if (!OutChains.empty()) {
858-
OutChains.push_back(Chain);
859-
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
860-
&OutChains[0], OutChains.size());
861-
}
862933
}
863934
}
935+
// All stores are grouped in one node to allow the matching between
936+
// the size of Ins and InVals. This only happens when on varg functions and
937+
// byval arguments
938+
if (!OutChains.empty()) {
939+
OutChains.push_back(Chain);
940+
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
941+
&OutChains[0], OutChains.size());
942+
}
864943
return Chain;
865944
}
866945

‎lib/Target/LM32/LM32ISelLowering.h

+3
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,9 @@ namespace llvm {
151151
const SmallVectorImpl<SDValue> &OutVals,
152152
DebugLoc dl, SelectionDAG &DAG) const;
153153

154+
/// HandleByVal - Target-specific cleanup for ByVal support.
155+
virtual void HandleByVal(CCState *, unsigned &) const;
156+
154157
#if 0
155158
virtual MachineBasicBlock*
156159
EmitCustomShift(MachineInstr *MI, MachineBasicBlock *MBB) const;

‎test/CodeGen/LM32/byval.ll

+72
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
; RUN: llc -march=lm32 < %s | FileCheck %s
2+
; NOTE: if the memcpy threshold is adjusted, some of those tests might
3+
; generate a false negative. Adjust accordingly in such case.
4+
%struct.s0 = type { [8 x i32] }
5+
%struct.s1 = type { [9 x i32] }
6+
%struct.s2 = type { [7 x i32] }
7+
8+
; Test for byval aggregate that fits in arg regs.
9+
define void @f0_1(%struct.s0* byval %s) {
10+
entry:
11+
call void @f0(%struct.s0* byval %s)
12+
ret void
13+
}
14+
15+
declare void @f0(%struct.s0* byval)
16+
; CHECK: f0_1:
17+
; CHECK: addi sp, sp, -36
18+
; CHECK: sw (sp+4), r1
19+
; CHECK: calli f0
20+
; CHECK: addi sp, sp, 36
21+
22+
; Test for byval aggregate that doesn't fit in arg regs.
23+
define void @f1_1(%struct.s1* byval %s) {
24+
entry:
25+
call void @f1(%struct.s1* byval %s)
26+
ret void
27+
}
28+
29+
declare void @f1(%struct.s1* byval)
30+
; CHECK: f1_1:
31+
; CHECK: addi sp, sp, -40
32+
; CHECK: addi r1, sp, 4
33+
; CHECK: addi r2, sp, 44
34+
; CHECK: addi r3, r0, 36
35+
; CHECK: calli memcpy
36+
; CHECK: calli f1
37+
; CHECK: addi sp, sp, 40
38+
39+
40+
; Test for byval aggregate with trailing argument that fit in arg regs.
41+
define void @f2_1(%struct.s2* byval %s) {
42+
entry:
43+
call void @f2(%struct.s2* byval %s, i32 1)
44+
ret void
45+
}
46+
47+
declare void @f2(%struct.s2* byval, i32)
48+
; CHECK: f2_1:
49+
; CHECK: addi sp, sp, -32
50+
; CHECK: sw (sp+4), r1
51+
; CHECK: addi r8, r0, 1
52+
; CHECK: calli f2
53+
; CHECK: addi sp, sp, 32
54+
55+
; Test for byval aggregate with trailing argument that doesn't fit in arg regs.
56+
define void @f3_1(%struct.s1* byval %s) {
57+
entry:
58+
call void @f3(%struct.s1* byval %s, i32 1)
59+
ret void
60+
}
61+
62+
declare void @f3(%struct.s1* byval, i32)
63+
; CEHCK: f3_1:
64+
; CHECK: addi sp, sp, -44
65+
; CHECK: addi r1, sp, 4
66+
; CHECK: addi r2, sp, 48
67+
; CHECK: addi r3, r0, 36
68+
; CHECK: calli memcpy
69+
; CHECK: addi r{{[1-25]}}, r0, 1
70+
; CHECK: sw (sp+40), r{{[1-25]}}
71+
; CHECK: calli f3
72+
; CHECK: addi sp, sp, 44

0 commit comments

Comments
 (0)
Please sign in to comment.