aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoel Sing <joel@sing.id.au>2023-08-02 20:29:52 +1000
committerJoel Sing <joel@sing.id.au>2023-08-28 17:40:40 +0000
commit19e2e3c291e28d337f155f5f01afa21910431752 (patch)
tree9ad70c49a65908aee1687d000fbf397d4056bb60
parentb2e809bab59a692aa6a69e1bd1d32eeeab4622e3 (diff)
downloadgo-19e2e3c291e28d337f155f5f01afa21910431752.tar.gz
go-19e2e3c291e28d337f155f5f01afa21910431752.zip
cmd/internal/obj/arm64: avoid unnecessary pool literal usage for load/store pairs
Implement better classification for load and store pair operations. This in turn allows us to avoid using pool literals when the offset fits in a 24 bit unsigned immediate. In this case, the offset can be calculated using two add immediate instructions, rather than loading the offset from the pool literal and then adding the offset to the base register. This requires the same number of instructions, however avoids a load from memory and does not require the offset to be stored in the literal pool. Updates #59615 Change-Id: I316ec3d54f1d06ae9d930e98d0c32471775fcb26 Reviewed-on: https://go-review.googlesource.com/c/go/+/515615 Run-TryBot: Joel Sing <joel@sing.id.au> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Joedian Reid <joedian@golang.org> Reviewed-by: Cherry Mui <cherryyz@google.com>
-rw-r--r--src/cmd/internal/obj/arm64/asm7.go145
1 files changed, 130 insertions, 15 deletions
diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go
index 2e5d84f647..84c6b4d5d4 100644
--- a/src/cmd/internal/obj/arm64/asm7.go
+++ b/src/cmd/internal/obj/arm64/asm7.go
@@ -710,7 +710,8 @@ var optab = []Optab{
{AFLDPQ, C_PQAUTO_16, C_NONE, C_NONE, C_PAIR, C_NONE, 66, 4, REGSP, 0, 0},
{AFLDPQ, C_UAUTO4K, C_NONE, C_NONE, C_PAIR, C_NONE, 74, 8, REGSP, 0, 0},
{AFLDPQ, C_NAUTO4K, C_NONE, C_NONE, C_PAIR, C_NONE, 74, 8, REGSP, 0, 0},
- {AFLDPQ, C_LAUTO, C_NONE, C_NONE, C_PAIR, C_NONE, 75, 12, REGSP, LFROM, 0},
+ {AFLDPQ, C_LAUTO, C_NONE, C_NONE, C_PAIR, C_NONE, 75, 12, REGSP, 0, 0},
+ {AFLDPQ, C_LAUTOPOOL, C_NONE, C_NONE, C_PAIR, C_NONE, 75, 12, REGSP, LFROM, 0},
{AFLDPQ, C_NQOREG_16, C_NONE, C_NONE, C_PAIR, C_NONE, 66, 4, 0, 0, 0},
{AFLDPQ, C_NQOREG_16, C_NONE, C_NONE, C_PAIR, C_NONE, 66, 4, 0, 0, C_XPRE},
{AFLDPQ, C_NQOREG_16, C_NONE, C_NONE, C_PAIR, C_NONE, 66, 4, 0, 0, C_XPOST},
@@ -719,14 +720,16 @@ var optab = []Optab{
{AFLDPQ, C_PQOREG_16, C_NONE, C_NONE, C_PAIR, C_NONE, 66, 4, 0, 0, C_XPOST},
{AFLDPQ, C_UOREG4K, C_NONE, C_NONE, C_PAIR, C_NONE, 74, 8, 0, 0, 0},
{AFLDPQ, C_NOREG4K, C_NONE, C_NONE, C_PAIR, C_NONE, 74, 8, 0, 0, 0},
- {AFLDPQ, C_LOREG, C_NONE, C_NONE, C_PAIR, C_NONE, 75, 12, 0, LFROM, 0},
+ {AFLDPQ, C_LOREG, C_NONE, C_NONE, C_PAIR, C_NONE, 75, 12, 0, 0, 0},
+ {AFLDPQ, C_LOREGPOOL, C_NONE, C_NONE, C_PAIR, C_NONE, 75, 12, 0, LFROM, 0},
{AFLDPQ, C_ADDR, C_NONE, C_NONE, C_PAIR, C_NONE, 88, 12, 0, 0, 0},
{AFSTPQ, C_PAIR, C_NONE, C_NONE, C_NQAUTO_16, C_NONE, 67, 4, REGSP, 0, 0},
{AFSTPQ, C_PAIR, C_NONE, C_NONE, C_PQAUTO_16, C_NONE, 67, 4, REGSP, 0, 0},
{AFSTPQ, C_PAIR, C_NONE, C_NONE, C_UAUTO4K, C_NONE, 76, 8, REGSP, 0, 0},
{AFSTPQ, C_PAIR, C_NONE, C_NONE, C_NAUTO4K, C_NONE, 76, 8, REGSP, 0, 0},
- {AFSTPQ, C_PAIR, C_NONE, C_NONE, C_LAUTO, C_NONE, 77, 12, REGSP, LTO, 0},
+ {AFSTPQ, C_PAIR, C_NONE, C_NONE, C_LAUTO, C_NONE, 77, 12, REGSP, 0, 0},
+ {AFSTPQ, C_PAIR, C_NONE, C_NONE, C_LAUTOPOOL, C_NONE, 77, 12, REGSP, LTO, 0},
{AFSTPQ, C_PAIR, C_NONE, C_NONE, C_NQOREG_16, C_NONE, 67, 4, 0, 0, 0},
{AFSTPQ, C_PAIR, C_NONE, C_NONE, C_NQOREG_16, C_NONE, 67, 4, 0, 0, C_XPRE},
{AFSTPQ, C_PAIR, C_NONE, C_NONE, C_NQOREG_16, C_NONE, 67, 4, 0, 0, C_XPOST},
@@ -735,14 +738,16 @@ var optab = []Optab{
{AFSTPQ, C_PAIR, C_NONE, C_NONE, C_PQOREG_16, C_NONE, 67, 4, 0, 0, C_XPOST},
{AFSTPQ, C_PAIR, C_NONE, C_NONE, C_UOREG4K, C_NONE, 76, 8, 0, 0, 0},
{AFSTPQ, C_PAIR, C_NONE, C_NONE, C_NOREG4K, C_NONE, 76, 8, 0, 0, 0},
- {AFSTPQ, C_PAIR, C_NONE, C_NONE, C_LOREG, C_NONE, 77, 12, 0, LTO, 0},
+ {AFSTPQ, C_PAIR, C_NONE, C_NONE, C_LOREG, C_NONE, 77, 12, 0, 0, 0},
+ {AFSTPQ, C_PAIR, C_NONE, C_NONE, C_LOREGPOOL, C_NONE, 77, 12, 0, LTO, 0},
{AFSTPQ, C_PAIR, C_NONE, C_NONE, C_ADDR, C_NONE, 87, 12, 0, 0, 0},
{ALDP, C_NPAUTO, C_NONE, C_NONE, C_PAIR, C_NONE, 66, 4, REGSP, 0, 0},
{ALDP, C_PPAUTO, C_NONE, C_NONE, C_PAIR, C_NONE, 66, 4, REGSP, 0, 0},
{ALDP, C_UAUTO4K, C_NONE, C_NONE, C_PAIR, C_NONE, 74, 8, REGSP, 0, 0},
{ALDP, C_NAUTO4K, C_NONE, C_NONE, C_PAIR, C_NONE, 74, 8, REGSP, 0, 0},
- {ALDP, C_LAUTO, C_NONE, C_NONE, C_PAIR, C_NONE, 75, 12, REGSP, LFROM, 0},
+ {ALDP, C_LAUTO, C_NONE, C_NONE, C_PAIR, C_NONE, 75, 12, REGSP, 0, 0},
+ {ALDP, C_LAUTOPOOL, C_NONE, C_NONE, C_PAIR, C_NONE, 75, 12, REGSP, LFROM, 0},
{ALDP, C_NPOREG, C_NONE, C_NONE, C_PAIR, C_NONE, 66, 4, 0, 0, 0},
{ALDP, C_NPOREG, C_NONE, C_NONE, C_PAIR, C_NONE, 66, 4, 0, 0, C_XPRE},
{ALDP, C_NPOREG, C_NONE, C_NONE, C_PAIR, C_NONE, 66, 4, 0, 0, C_XPOST},
@@ -751,14 +756,16 @@ var optab = []Optab{
{ALDP, C_PPOREG, C_NONE, C_NONE, C_PAIR, C_NONE, 66, 4, 0, 0, C_XPOST},
{ALDP, C_UOREG4K, C_NONE, C_NONE, C_PAIR, C_NONE, 74, 8, 0, 0, 0},
{ALDP, C_NOREG4K, C_NONE, C_NONE, C_PAIR, C_NONE, 74, 8, 0, 0, 0},
- {ALDP, C_LOREG, C_NONE, C_NONE, C_PAIR, C_NONE, 75, 12, 0, LFROM, 0},
+ {ALDP, C_LOREG, C_NONE, C_NONE, C_PAIR, C_NONE, 75, 12, 0, 0, 0},
+ {ALDP, C_LOREGPOOL, C_NONE, C_NONE, C_PAIR, C_NONE, 75, 12, 0, LFROM, 0},
{ALDP, C_ADDR, C_NONE, C_NONE, C_PAIR, C_NONE, 88, 12, 0, 0, 0},
{ASTP, C_PAIR, C_NONE, C_NONE, C_NPAUTO, C_NONE, 67, 4, REGSP, 0, 0},
{ASTP, C_PAIR, C_NONE, C_NONE, C_PPAUTO, C_NONE, 67, 4, REGSP, 0, 0},
{ASTP, C_PAIR, C_NONE, C_NONE, C_UAUTO4K, C_NONE, 76, 8, REGSP, 0, 0},
{ASTP, C_PAIR, C_NONE, C_NONE, C_NAUTO4K, C_NONE, 76, 8, REGSP, 0, 0},
- {ASTP, C_PAIR, C_NONE, C_NONE, C_LAUTO, C_NONE, 77, 12, REGSP, LTO, 0},
+ {ASTP, C_PAIR, C_NONE, C_NONE, C_LAUTO, C_NONE, 77, 12, REGSP, 0, 0},
+ {ASTP, C_PAIR, C_NONE, C_NONE, C_LAUTOPOOL, C_NONE, 77, 12, REGSP, LTO, 0},
{ASTP, C_PAIR, C_NONE, C_NONE, C_NPOREG, C_NONE, 67, 4, 0, 0, 0},
{ASTP, C_PAIR, C_NONE, C_NONE, C_NPOREG, C_NONE, 67, 4, 0, 0, C_XPRE},
{ASTP, C_PAIR, C_NONE, C_NONE, C_NPOREG, C_NONE, 67, 4, 0, 0, C_XPOST},
@@ -767,7 +774,8 @@ var optab = []Optab{
{ASTP, C_PAIR, C_NONE, C_NONE, C_PPOREG, C_NONE, 67, 4, 0, 0, C_XPOST},
{ASTP, C_PAIR, C_NONE, C_NONE, C_UOREG4K, C_NONE, 76, 8, 0, 0, 0},
{ASTP, C_PAIR, C_NONE, C_NONE, C_NOREG4K, C_NONE, 76, 8, 0, 0, 0},
- {ASTP, C_PAIR, C_NONE, C_NONE, C_LOREG, C_NONE, 77, 12, 0, LTO, 0},
+ {ASTP, C_PAIR, C_NONE, C_NONE, C_LOREG, C_NONE, 77, 12, 0, 0, 0},
+ {ASTP, C_PAIR, C_NONE, C_NONE, C_LOREGPOOL, C_NONE, 77, 12, 0, LTO, 0},
{ASTP, C_PAIR, C_NONE, C_NONE, C_ADDR, C_NONE, 87, 12, 0, 0, 0},
// differ from LDP/STP for C_NSAUTO_4/C_PSAUTO_4/C_NSOREG_4/C_PSOREG_4
@@ -775,7 +783,8 @@ var optab = []Optab{
{ALDPW, C_PSAUTO_4, C_NONE, C_NONE, C_PAIR, C_NONE, 66, 4, REGSP, 0, 0},
{ALDPW, C_UAUTO4K, C_NONE, C_NONE, C_PAIR, C_NONE, 74, 8, REGSP, 0, 0},
{ALDPW, C_NAUTO4K, C_NONE, C_NONE, C_PAIR, C_NONE, 74, 8, REGSP, 0, 0},
- {ALDPW, C_LAUTO, C_NONE, C_NONE, C_PAIR, C_NONE, 75, 12, REGSP, LFROM, 0},
+ {ALDPW, C_LAUTO, C_NONE, C_NONE, C_PAIR, C_NONE, 75, 12, REGSP, 0, 0},
+ {ALDPW, C_LAUTOPOOL, C_NONE, C_NONE, C_PAIR, C_NONE, 75, 12, REGSP, LFROM, 0},
{ALDPW, C_NSOREG_4, C_NONE, C_NONE, C_PAIR, C_NONE, 66, 4, 0, 0, 0},
{ALDPW, C_NSOREG_4, C_NONE, C_NONE, C_PAIR, C_NONE, 66, 4, 0, 0, C_XPRE},
{ALDPW, C_NSOREG_4, C_NONE, C_NONE, C_PAIR, C_NONE, 66, 4, 0, 0, C_XPOST},
@@ -784,14 +793,16 @@ var optab = []Optab{
{ALDPW, C_PSOREG_4, C_NONE, C_NONE, C_PAIR, C_NONE, 66, 4, 0, 0, C_XPOST},
{ALDPW, C_UOREG4K, C_NONE, C_NONE, C_PAIR, C_NONE, 74, 8, 0, 0, 0},
{ALDPW, C_NOREG4K, C_NONE, C_NONE, C_PAIR, C_NONE, 74, 8, 0, 0, 0},
- {ALDPW, C_LOREG, C_NONE, C_NONE, C_PAIR, C_NONE, 75, 12, 0, LFROM, 0},
+ {ALDPW, C_LOREG, C_NONE, C_NONE, C_PAIR, C_NONE, 75, 12, 0, 0, 0},
+ {ALDPW, C_LOREGPOOL, C_NONE, C_NONE, C_PAIR, C_NONE, 75, 12, 0, LFROM, 0},
{ALDPW, C_ADDR, C_NONE, C_NONE, C_PAIR, C_NONE, 88, 12, 0, 0, 0},
{ASTPW, C_PAIR, C_NONE, C_NONE, C_NSAUTO_4, C_NONE, 67, 4, REGSP, 0, 0},
{ASTPW, C_PAIR, C_NONE, C_NONE, C_PSAUTO_4, C_NONE, 67, 4, REGSP, 0, 0},
{ASTPW, C_PAIR, C_NONE, C_NONE, C_UAUTO4K, C_NONE, 76, 8, REGSP, 0, 0},
{ASTPW, C_PAIR, C_NONE, C_NONE, C_NAUTO4K, C_NONE, 76, 8, REGSP, 0, 0},
- {ASTPW, C_PAIR, C_NONE, C_NONE, C_LAUTO, C_NONE, 77, 12, REGSP, LTO, 0},
+ {ASTPW, C_PAIR, C_NONE, C_NONE, C_LAUTO, C_NONE, 77, 12, REGSP, 0, 0},
+ {ASTPW, C_PAIR, C_NONE, C_NONE, C_LAUTOPOOL, C_NONE, 77, 12, REGSP, LTO, 0},
{ASTPW, C_PAIR, C_NONE, C_NONE, C_NSOREG_4, C_NONE, 67, 4, 0, 0, 0},
{ASTPW, C_PAIR, C_NONE, C_NONE, C_NSOREG_4, C_NONE, 67, 4, 0, 0, C_XPRE},
{ASTPW, C_PAIR, C_NONE, C_NONE, C_NSOREG_4, C_NONE, 67, 4, 0, 0, C_XPOST},
@@ -800,7 +811,8 @@ var optab = []Optab{
{ASTPW, C_PAIR, C_NONE, C_NONE, C_PSOREG_4, C_NONE, 67, 4, 0, 0, C_XPOST},
{ASTPW, C_PAIR, C_NONE, C_NONE, C_UOREG4K, C_NONE, 76, 8, 0, 0, 0},
{ASTPW, C_PAIR, C_NONE, C_NONE, C_NOREG4K, C_NONE, 76, 8, 0, 0, 0},
- {ASTPW, C_PAIR, C_NONE, C_NONE, C_LOREG, C_NONE, 77, 12, 0, LTO, 0},
+ {ASTPW, C_PAIR, C_NONE, C_NONE, C_LOREG, C_NONE, 77, 12, 0, 0, 0},
+ {ASTPW, C_PAIR, C_NONE, C_NONE, C_LOREGPOOL, C_NONE, 77, 12, 0, LTO, 0},
{ASTPW, C_PAIR, C_NONE, C_NONE, C_ADDR, C_NONE, 87, 12, 0, 0, 0},
{ASWPD, C_ZREG, C_NONE, C_NONE, C_ZOREG, C_ZREG, 47, 4, 0, 0, 0},
@@ -1479,6 +1491,14 @@ func isNEGop(op obj.As) bool {
return false
}
+func isLoadStorePairOp(op obj.As) bool {
+ switch op {
+ case AFLDPQ, AFSTPQ, ALDP, ASTP, ALDPW, ASTPW:
+ return true
+ }
+ return false
+}
+
func isMOVop(op obj.As) bool {
switch op {
case AMOVB, AMOVBU, AMOVH, AMOVHU, AMOVW, AMOVWU, AMOVD, AFMOVS, AFMOVD, AFMOVQ:
@@ -1984,6 +2004,33 @@ func (c *ctxt7) loadStoreClass(p *obj.Prog, lsc int, v int64) int {
return lsc
}
+// loadStorePairClass reclassifies a load or store pair operation based on its offset.
+func (c *ctxt7) loadStorePairClass(p *obj.Prog, lsc int, v int64) int {
+ // Avoid reclassification of pre/post-indexed loads and stores.
+ if p.Scond == C_XPRE || p.Scond == C_XPOST {
+ return lsc
+ }
+
+ if cmp(C_NAUTO4K, lsc) || cmp(C_NOREG4K, lsc) {
+ return lsc
+ }
+ if cmp(C_UAUTO4K, lsc) || cmp(C_UOREG4K, lsc) {
+ return lsc
+ }
+
+ needsPool := true
+ if v >= 0 && v <= 0xffffff {
+ needsPool = false
+ }
+ if needsPool && cmp(C_LAUTO, lsc) {
+ return C_LAUTOPOOL
+ }
+ if needsPool && cmp(C_LOREG, lsc) {
+ return C_LOREGPOOL
+ }
+ return lsc
+}
+
func (c *ctxt7) aclass(a *obj.Addr) int {
switch a.Type {
case obj.TYPE_NONE:
@@ -2212,6 +2259,10 @@ func (c *ctxt7) oplook(p *obj.Prog) *Optab {
// More specific classification of large offset loads and stores.
a1 = c.loadStoreClass(p, a1, c.instoffset)
}
+ if isLoadStorePairOp(p.As) && (cmp(C_LAUTO, a1) || cmp(C_LOREG, a1)) {
+ // More specific classification of large offset loads and stores.
+ a1 = c.loadStorePairClass(p, a1, c.instoffset)
+ }
}
p.From.Class = int8(a1)
}
@@ -2238,6 +2289,10 @@ func (c *ctxt7) oplook(p *obj.Prog) *Optab {
// More specific classification of large offset loads and stores.
a4 = c.loadStoreClass(p, a4, c.instoffset)
}
+ if isLoadStorePairOp(p.As) && (cmp(C_LAUTO, a4) || cmp(C_LOREG, a4)) {
+ // More specific classification of large offset loads and stores.
+ a4 = c.loadStorePairClass(p, a4, c.instoffset)
+ }
}
p.To.Class = int8(a4)
}
@@ -4035,12 +4090,12 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
break
storeusepool:
- if r == REGTMP || p.From.Reg == REGTMP {
- c.ctxt.Diag("REGTMP used in large offset store: %v", p)
- }
if p.Pool == nil {
c.ctxt.Diag("%v: constant is not in pool", p)
}
+ if r == REGTMP || p.From.Reg == REGTMP {
+ c.ctxt.Diag("REGTMP used in large offset store: %v", p)
+ }
o1 = c.omovlit(AMOVD, p, &p.To, REGTMP)
o2 = c.olsxrr(p, int32(c.opstrr(p, p.As, false)), int(p.From.Reg), int(r), REGTMP)
@@ -4831,6 +4886,11 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
o2 = c.opldpstp(p, o, 0, REGTMP, rt1, rt2, 1)
case 75:
+ // If offset L fits in a 24 bit unsigned immediate:
+ // add $lo, R, Rtmp
+ // add $hi, Rtmp, Rtmp
+ // ldr (Rtmp), R
+ // Otherwise, use constant pool:
// mov $L, Rtmp (from constant pool)
// add Rtmp, R, Rtmp
// ldp (Rtmp), (R1, R2)
@@ -4844,6 +4904,31 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
if rf == obj.REG_NONE {
c.ctxt.Diag("invalid ldp source: %v", p)
}
+
+ v := c.regoff(&p.From)
+ if v >= -4095 && v <= 4095 {
+ c.ctxt.Diag("%v: bad type for offset %d (should be add/sub+ldp)", p, v)
+ }
+
+ hi, lo, err := splitImm24uScaled(v, 0)
+ if err != nil {
+ goto loadpairusepool
+ }
+ if p.Pool != nil {
+ c.ctxt.Diag("%v: unused constant in pool (%v)\n", p, v)
+ }
+ o1 = c.oaddi(p, AADD, lo, REGTMP, int16(rf))
+ o2 = c.oaddi(p, AADD, hi, REGTMP, REGTMP)
+ o3 = c.opldpstp(p, o, 0, REGTMP, rt1, rt2, 1)
+ break
+
+ loadpairusepool:
+ if p.Pool == nil {
+ c.ctxt.Diag("%v: constant is not in pool", p)
+ }
+ if rf == REGTMP || p.From.Reg == REGTMP {
+ c.ctxt.Diag("REGTMP used in large offset load: %v", p)
+ }
o1 = c.omovlit(AMOVD, p, &p.From, REGTMP)
o2 = c.opxrrr(p, AADD, REGTMP, rf, REGTMP, false)
o3 = c.opldpstp(p, o, 0, REGTMP, rt1, rt2, 1)
@@ -4866,6 +4951,11 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
o2 = c.opldpstp(p, o, 0, REGTMP, rf1, rf2, 0)
case 77:
+ // If offset L fits in a 24 bit unsigned immediate:
+ // add $lo, R, Rtmp
+ // add $hi, Rtmp, Rtmp
+ // stp (R1, R2), (Rtmp)
+ // Otherwise, use constant pool:
// mov $L, Rtmp (from constant pool)
// add Rtmp, R, Rtmp
// stp (R1, R2), (Rtmp)
@@ -4879,6 +4969,31 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
if rt == obj.REG_NONE {
c.ctxt.Diag("invalid stp destination: %v", p)
}
+
+ v := c.regoff(&p.To)
+ if v >= -4095 && v <= 4095 {
+ c.ctxt.Diag("%v: bad type for offset %d (should be add/sub+stp)", p, v)
+ }
+
+ hi, lo, err := splitImm24uScaled(v, 0)
+ if err != nil {
+ goto storepairusepool
+ }
+ if p.Pool != nil {
+ c.ctxt.Diag("%v: unused constant in pool (%v)\n", p, v)
+ }
+ o1 = c.oaddi(p, AADD, lo, REGTMP, int16(rt))
+ o2 = c.oaddi(p, AADD, hi, REGTMP, REGTMP)
+ o3 = c.opldpstp(p, o, 0, REGTMP, rf1, rf2, 0)
+ break
+
+ storepairusepool:
+ if p.Pool == nil {
+ c.ctxt.Diag("%v: constant is not in pool", p)
+ }
+ if rt == REGTMP || p.From.Reg == REGTMP {
+ c.ctxt.Diag("REGTMP used in large offset store: %v", p)
+ }
o1 = c.omovlit(AMOVD, p, &p.To, REGTMP)
o2 = c.opxrrr(p, AADD, REGTMP, rt, REGTMP, false)
o3 = c.opldpstp(p, o, 0, REGTMP, rf1, rf2, 0)