1 #include <linux/linkage.h>
4 * Multiply operation for 64 bit integers, for devices with hard multiply
5 * Input : Operand1[H] in Reg r5
6 * Operand1[L] in Reg r6
7 * Operand2[H] in Reg r7
8 * Operand2[L] in Reg r8
9 * Output: Result[H] in Reg r3
14 * Both the input numbers are divided into 16 bit number as follows
18 * + (C * H + D * G) << 16
19 * + (B * H + C * G + D * F) << 32
20 * + (A * H + B * G + C * F + D * E) << 48
22 * Only 64 bits of the output are considered
27 .type __muldi3, @function
33 /* Save the input operands on the caller's stack */
39 /* Store all the callee saved registers */
49 /* Load all the 16 bit values for A thru H */
50 lhui r20, r1, 44 /* A */
51 lhui r21, r1, 46 /* B */
52 lhui r22, r1, 48 /* C */
53 lhui r23, r1, 50 /* D */
54 lhui r24, r1, 52 /* E */
55 lhui r25, r1, 54 /* F */
56 lhui r26, r1, 56 /* G */
57 lhui r27, r1, 58 /* H */
59 /* D * H ==> LSB of the result on stack ==> Store1 */
61 swi r9, r1, 36 /* Pos2 and Pos3 */
63 /* Hi (Store1) + C * H + D * G ==> Store2 ==> Pos1 and Pos2 */
64 /* Store the carry generated in position 2 for Pos 3 */
65 lhui r11, r1, 36 /* Pos2 */
66 mul r9, r22, r27 /* C * H */
67 mul r10, r23, r26 /* D * G */
71 addc r12, r12, r0 /* Store the Carry */
72 shi r9, r1, 36 /* Store Pos2 */
75 shi r11, r1, 34 /* Store Pos1 */
77 /* Hi (Store2) + B * H + C * G + D * F ==> Store3 ==> Pos0 and Pos1 */
78 mul r9, r21, r27 /* B * H */
79 mul r10, r22, r26 /* C * G */
80 mul r7, r23, r25 /* D * F */
84 swi r9, r1, 32 /* Pos0 and Pos1 */
86 /* Hi (Store3) + A * H + B * G + C * F + D * E ==> Store3 ==> Pos0 */
87 lhui r11, r1, 32 /* Pos0 */
88 mul r9, r20, r27 /* A * H */
89 mul r10, r21, r26 /* B * G */
90 mul r7, r22, r25 /* C * F */
91 mul r8, r23, r24 /* D * E */
96 sext16 r9, r9 /* Sign extend the MSB */
99 /* Move results to r3 and r4 */
103 lwi r3, r1, 32 /* Hi Part */
104 lwi r4, r1, 36 /* Lo Part */
106 /* Restore Callee saved registers */
116 /* Restore Frame and return */
120 .size __muldi3, . - __muldi3