|
Dot Product
|
float DotProduct(const void *v0, *const void *v1)
{
float dot;
asm __volatile__("
lqc2 vf16,0x0(%1)
lqc2 vf17,0x0(%2)
vaddw.x vf18,vf00,vf00
vmul.xyz vf16,vf16,vf17
vmulax.x ACC,vf18,vf16x
vmadday.x ACC,vf18,vf16y
vmaddz.x vf16,vf18,vf16z
.set noat
qmfc2 $1,vf16
mtc1 $1,%0
.set at
"
: "=f"(dot)
: "r"(v0), "r"(v1)
: "$1"
);
return dot;
}
|
|
Cross Product
|
void CrossProduct(void *v0, const void *v1, const void *v2)
{
asm __volatile__("
lqc2 vf16,0x0(%1)
lqc2 vf17,0x0(%2)
vopmula.xyz ACC,vf16,vf17
vopmsub.xyz vf17,vf17,vf16
vsub.w vf17,vf00,vf00 # w = 0
sqc2 vf17,0x0(%0)
"
: /* No Output */
: "r"(v0), "r"(v1), "r"(v2)
: "memory"
);
}
|
|
Normalize
|
void Normalize(void *v0, const void *v1)
{
asm __volatile__("
lqc2 vf16,0x0(%1)
vmul.xyz vf17,vf16,vf16
vmulax.w ACC,vf00,vf17x
vmadday.w ACC,vf00,vf17y
vmaddz.w vf17,vf00,vf17z
vrsqrt Q,vf00w,vf17w
vwaitq
vmulq.xyz vf16,vf16,Q
sqc2 vf16,0x0(%0)
"
: /* No Output */
: "r"(v0), "r"(v1)
: "memory"
);
}
|
|
IntplVector
|
void IntplVector(void *v0, const void *v1, const void *v2, float t)
{
asm __volatile__("
.set noat
mfc1 $1,%3
qmtc2 $1,vf18
.set at
lqc2 vf17,0x0(%2)
lqc2 vf16,0x0(%1)
vsubx.w vf19,vf00,vf18
vmulax.xyz ACC,vf17,vf18
vmaddw.xyz vf16,vf16,vf19
sqc2 vf16,0x0(%0)
"
: /* No Output */
: "r"(v0), "r"(v1), "r"(v2), "f"(t)
: "memory"
);
}
|
|
Apply Matrix
|
void ApplyMatrix(void *v0, const void *m0, const void *v1)
{
asm __volatile__("
lqc2 vf20,0x00(%2)
lqc2 vf16,0x00(%1)
lqc2 vf17,0x10(%1)
lqc2 vf18,0x20(%1)
lqc2 vf19,0x30(%1)
vmulax.xyzw ACC,vf16,vf20
vmadday.xyzw ACC,vf17,vf20
vmaddaz.xyzw ACC,vf18,vf20
vmaddw.xyzw vf20,vf19,vf20
sqc2 vf20,0x00(%0)
"
: /* No Output */
: "r"(v0), "r"(m0), "r"(v1)
: "memory"
);
}
|
|
Load Identity Matrix
|
void LoadIdentityMatrix(void *m0)
{
asm __volatile__("
vmr32.xyzw vf18,vf00
sqc2 vf00,0x30(%0)
vmr32.xyzw vf17,vf18
sqc2 vf18,0x20(%0)
vmr32.xyzw vf16,vf17
sqc2 vf17,0x10(%0)
sqc2 vf16,0x00(%0)
"
: /* No Output */
: "r"(m0)
: "memory"
);
}
|
|
Multiply Matrix
|
void MulMatrix(void *m0, const void *m1, const void *m2)
{
asm __volatile__("
lqc2 vf16,0x00(%1)
lqc2 vf17,0x10(%1)
lqc2 vf18,0x20(%1)
lqc2 vf19,0x30(%1)
lqc2 vf20,0x00(%2)
lqc2 vf21,0x10(%2)
lqc2 vf22,0x20(%2)
lqc2 vf23,0x30(%2)
vmulax.xyzw ACC,vf20,vf16
vmadday.xyzw ACC,vf21,vf16
vmaddaz.xyzw ACC,vf22,vf16
vmaddw.xyzw vf16,vf23,vf16
vmulax.xyzw ACC,vf20,vf17
vmadday.xyzw ACC,vf21,vf17
vmaddaz.xyzw ACC,vf22,vf17
vmaddw.xyzw vf17,vf23,vf17
vmulax.xyzw ACC,vf20,vf18
vmadday.xyzw ACC,vf21,vf18
vmaddaz.xyzw ACC,vf22,vf18
vmaddw.xyzw vf18,vf23,vf18
vmulax.xyzw ACC,vf20,vf19
vmadday.xyzw ACC,vf21,vf19
vmaddaz.xyzw ACC,vf22,vf19
vmaddw.xyzw vf19,vf23,vf19
sqc2 vf16,0x00(%0)
sqc2 vf17,0x10(%0)
sqc2 vf18,0x20(%0)
sqc2 vf19,0x30(%0)
"
: /* No Output */
: "r"(m0), "r"(m1), "r"(m2)
: "memory"
);
}
|
|
Transpose
|
void Transpose(void *m0, const void *m1)
{
asm __volatile__("
lq $8,0x00(%1)
lq $9,0x10(%1)
lq $10,0x20(%1)
lq $11,0x30(%1)
pextlw $12,$9,$8
pextuw $13,$9,$8
pextlw $14,$11,$10
pextuw $15,$11,$10
pcpyld $8,$14,$12
pcpyud $9,$12,$14
pcpyld $10,$15,$13
pcpyud $11,$13,$15
sq $8,0x00(%0)
sq $9,0x10(%0)
sq $10,0x20(%0)
sq $11,0x30(%0)
"
: /* No Output */
: "r"(m0), "r"(m1)
: "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$15", "memory"
);
}
|
|
Quaternion To Matrix
|
void QuaternionToMatrix(void *m0, const void *q0)
{
asm __volatile__("
lqc2 vf16,0x0(%1)
#-----------------------------------------------
# m0[0][3] = 0.0f;
# m0[1][3] = 0.0f;
# m0[2][3] = 0.0f;
#-----------------------------------------------
# vf17w = 0
# vf18w = 0
# vf19w = 0
vsub.w vf17,vf17,vf17
vsub.w vf18,vf18,vf18
vsub.w vf19,vf19,vf19
#-----------------------------------------------
# m0[0][0] = 1.0f - 2.0f * (y * y + z * z);
# m0[0][1] = 2.0f * (x * y - w * z);
# m0[0][2] = 2.0f * (x * z + w * y);
#-----------------------------------------------
# vf20xyz = yxx
# vf21xyz = yyz
# vf22xyz = zww
# vf23xyz = zzy
vaddy.x vf20,vf00,vf16y
vaddy.x vf21,vf00,vf16y
vaddz.x vf22,vf00,vf16z
vaddz.x vf23,vf00,vf16z
vaddx.y vf20,vf00,vf16x
vaddy.y vf21,vf00,vf16y
vaddw.y vf22,vf00,vf16w
vaddz.y vf23,vf00,vf16z
vaddx.z vf20,vf00,vf16x
vaddz.z vf21,vf00,vf16z
vaddw.z vf22,vf00,vf16w
vaddy.z vf23,vf00,vf16y
# vf17xyz = (y * y + z * z) (x * y - w * z) (x * z + w * y)
vmula.xyz ACC,vf20,vf21
vmadd.xz vf17,vf22,vf23
vmsub.y vf17,vf22,vf23
#-----------------------------------------------
# m0[1][0] = 2.0f * (x * y + w * z);
# m0[1][1] = 1.0f - 2.0f * (x * x + z * z);
# m0[1][2] = 2.0f * (y * z - w * x);
#-----------------------------------------------
# vf20xyz = yxx -> XxY
# vf21xyz = yyz -> yXz
# vf22xyz = zww -> WZw
# vf23xyz = zzy -> zzX
vaddx.x vf20,vf00,vf16x
vaddy.z vf20,vf00,vf16y
vaddx.y vf21,vf00,vf16x
vaddw.x vf22,vf00,vf16w
vaddz.y vf22,vf00,vf16z
vaddx.z vf23,vf00,vf16x
# vf18xyz = (x * y + w * z) (x * x + z * z) (y * z - w * x)
vmula.xyz ACC,vf20,vf21
vmadd.xy vf18,vf22,vf23
vmsub.z vf18,vf22,vf23
#-----------------------------------------------
# m0[2][0] = 2.0f * (x * z - w * y);
# m0[2][1] = 2.0f * (y * z + w * x);
# m0[2][2] = 1.0f - 2.0f * (x * x + y * y);
#-----------------------------------------------
# vf20xyz = xxy -> xYX
# vf21xyz = yxz -> ZZX
# vf22xyz = wzw -> wWY
# vf23xyz = zzx -> YXY
vaddy.y vf20,vf00,vf16y
vaddx.z vf20,vf00,vf16x
vaddz.x vf21,vf00,vf16z
vaddz.y vf21,vf00,vf16z
vaddx.z vf21,vf00,vf16x
vaddw.y vf22,vf00,vf16w
vaddy.z vf22,vf00,vf16y
vaddy.x vf23,vf00,vf16y
vaddx.y vf23,vf00,vf16x
vaddy.z vf23,vf00,vf16y
# vf19xyz = (x * z - w * y) (y * z + w * x) (x * x + y * y)
vmula.xyz ACC,vf20,vf21
vmadd.yz vf19,vf22,vf23
vmsub.x vf19,vf22,vf23
# vf17xyz *= 2
# vf18xyz *= 2
# vf19xyz *= 2
vadd.xyz vf17,vf17,vf17
vadd.xyz vf18,vf18,vf18
vadd.xyz vf19,vf19,vf19
# vf17x = 1.0f - vf17x
# vf18y = 1.0f - vf18y
# vf19z = 1.0f - vf19z
vsub.x vf17,vf00,vf17
vsub.y vf18,vf00,vf18
vsub.z vf19,vf00,vf19
vaddw.x vf17,vf17,vf00w
vaddw.y vf18,vf18,vf00w
vaddw.z vf19,vf19,vf00w
#---------------------------
sqc2 vf17,0x00(%0)
sqc2 vf18,0x10(%0)
sqc2 vf19,0x20(%0)
sqc2 vf00,0x30(%0)
"
: /* no output */
: "r"(m0) , "r"(q0)
: "memory"
);
}
|
|
Point Of Intersection Of Face And Line
|
void PointOfIntersectionOfFaceAndLine(void *v0, const void *v1, const void *v2, const void *face)
{
/*
* (a, b, c) = v1
* (d, e, f) = v2 - v1
* (A, B, C, D) = (Ax + By + Cz + D = 0)
*
* t = -(aA + bB + cC + D) / (Ad + Be + Cf)
*
* (x, y, z) = (a, b, c) + t(d, e, f)
*/
asm __volatile__("
lqc2 vf16,0x0(%1) # v1 (a, b, c)
lqc2 vf17,0x0(%2) # v2
lqc2 vf18,0x0(%3) # face (A B C D)
vsub.xyz vf17,vf17,vf16 # vf17 = (d e f)
vmul.xyz vf19,vf16,vf18 # (aA bB cC)
vmul.xyz vf20,vf17,vf18 # (Ad Be Cf)
#
# vf19.w = -(aA + bB + cC + D)
#
vsuba.w ACC,vf00,vf00
vmsubax.w ACC,vf00,vf19x
vmsubay.w ACC,vf00,vf19y
vmsubaz.w ACC,vf00,vf19z
vmsubw.w vf19,vf00,vf18w
#
# vf20.w = (Ad + Be + Cf)
#
vmulax.w ACC,vf00,vf20x
vmadday.w ACC,vf00,vf20y
vmaddz.w vf20,vf00,vf20z
#
# Q = -(aA + bB + cC + D) / (Ad + Be + Cf)
#
vdiv Q,vf19w,vf20w
#
# vf16 = (a b c) + Q(d e f)
#
vadda.xyz ACC,vf00,vf16
vwaitq
vmaddq.xyz vf16,vf17,Q
sqc2 vf16,0x0(%0)
"
:
: "r"(v0), "r"(v1), "r"(v2), "r"(face)
);
}
|
|
Foot Of Perpendicular
|
void FootOfPerpendicular(void *v0, const void *va, const void *vb, const void *vp)
{
asm __volatile__ ("
lqc2 vf16,0x00(%1) # va
lqc2 vf17,0x00(%2) # vb
lqc2 vf18,0x00(%3) # vp
vsub.xyz vf19,vf17,vf16 # v = vb - va
vsub.xyz vf20,vf16,vf18 # vf20 = (va - vp) * v
vmul.xyz vf20,vf20,vf19
vmulax.w ACC,vf00,vf20x # vf20.w = vf20x + vf20y + vf20z
vmadday.w ACC,vf00,vf20y
vmaddz.w vf20,vf00,vf20z
vmul.xyz vf21,vf19,vf19 # vf21 = v^2
vsuba.w ACC,vf00,vf00 # vf21.w = 0 - vf21.x - vf21.y - vf21.z
vmsubax.w ACC,vf00,vf21x
vmsubay.w ACC,vf00,vf21y
vmsubz.w vf21,vf00,vf21z
vdiv Q,vf20w,vf21w # Q = vf20.w / vf21.w
vadda.xyz ACC,vf00,vf16 # vf18 = va + v * Q
vwaitq
vmaddq.xyz vf18,vf19,Q
sqc2 vf18,0x00(%0)
"
: /* No Output */
: "r"(v0), "r"(va), "r"(vb), "r"(vp)
: "memory"
);
}
|
|