void Transpose(void *m0, const void *m1)
{
asm __volatile__("
lq $8,0x00(%1)
lq $9,0x10(%1)
lq $10,0x20(%1)
lq $11,0x30(%1)
pextlw $12,$9,$8
pextuw $13,$9,$8
pextlw $14,$11,$10
pextuw $15,$11,$10
pcpyld $8,$14,$12
pcpyud $9,$12,$14
pcpyld $10,$15,$13
pcpyud $11,$13,$15
sq $8,0x00(%0)
sq $9,0x10(%0)
sq $10,0x20(%0)
sq $11,0x30(%0)
"
: /* No Output */
: "r"(m0), "r"(m1)
: "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$15", "memory"
);
}
RSS