HomeSort by: relevance | last modified time | path
    Searched refs:mul_2 (Results 1 - 23 of 23) sorted by relevancy

  /src/external/lgpl3/gmp/dist/mpn/x86_64/pentium4/
mul_2.asm 37 include_mpn(`x86_64/bd1/mul_2.asm')
  /src/external/lgpl3/gmp/dist/mpn/x86_64/bd1/
mul_basecase.asm 35 C cycles/limb mul_1 mul_2 mul_3 addmul_2
59 C * Merge faster mul_2, such as the one in the same directory as this file.
96 mov (up), %rax C shared for mul_1 and mul_2
100 mov (vp), v0 C shared for mul_1 and mul_2
101 mul v0 C shared for mul_1 and mul_2
  /src/external/lgpl3/gmp/dist/mpn/x86_64/coreisbr/
mul_basecase.asm 35 C cycles/limb mul_1 mul_2 mul_3 addmul_2
61 C * Consider replacing the 2-way mul_2 code with 4-way code, for a very slight
99 mov (up), %rax C shared for mul_1 and mul_2
103 mov (vp), v0 C shared for mul_1 and mul_2
104 mul v0 C shared for mul_1 and mul_2
sqr_basecase.asm 35 C cycles/limb mul_2 addmul_2 sqr_diag_addlsh1
66 C * The mul_2 loop has a 10 insn common sequence in the loop start and the
mullo_basecase.asm 35 C cycles/limb mul_2 addmul_2
  /src/external/lgpl3/gmp/dist/mpn/x86_64/k8/
mul_basecase.asm 51 C mul_1/mul_2 prologues, saving a LEA (%rip)? It would slow down the
103 jz L(mul_2)
206 C mul_2 for vp[0], vp[1] if vn is even
209 L(mul_2):
mullo_basecase.asm 40 C large trip count. Instead, we should start with mul_2 for any operand
164 jz L(mul_2)
243 L(mul_2):
mulmid_basecase.asm 94 jz L(mul_2)
205 C mul_2 for vp[0], vp[1] if vn is even
208 L(mul_2):
  /src/external/lgpl3/gmp/dist/mpn/m88k/
mul_1.s 94 mul r11,r12,r7 ; r11 = prod_0 mul_2 WB mask_1
98 mul r25,r13,r8 ; r25 = prod_2 mul_5 WB mul_2
  /src/external/lgpl3/gmp/lib/libgmp/arch/ia64/
srcs.mk 206 mul_2.asm mpn/ia64/mul_2.asm \
  /src/external/lgpl3/gmp/lib/libgmp/arch/x86_64/
srcs.mk 208 mul_2.asm mpn/x86_64/mul_2.asm \
  /src/external/lgpl3/gmp/dist/mpn/powerpc64/mode64/p7/
aormul_2.asm 34 C mul_2 addmul_2
  /src/external/lgpl3/gmp/dist/mpn/sparc64/ultrasparct3/
aormul_2.asm 37 C mul_2 addmul_2
  /src/external/lgpl3/gmp/dist/mpn/x86_64/core2/
mul_basecase.asm 36 C cycles/limb mul_1 mul_2 mul_3 addmul_2
129 mov (up), %rax C shared for mul_1 and mul_2
136 mov (vp_param), v0 C shared for mul_1 and mul_2
144 mul v0 C shared for mul_1 and mul_2
mullo_basecase.asm 35 C cycles/limb mul_2 addmul_2
sqr_basecase.asm 36 C cycles/limb mul_2 addmul_2 sqr_diag_addlsh1
  /src/external/lgpl3/gmp/dist/mpn/x86_64/coreihwl/
mul_basecase.asm 35 C cycles/limb mul_1 mul_2 mul_3 addmul_2
mullo_basecase.asm 35 C cycles/limb mul_2 addmul_2
sqr_basecase.asm 35 C cycles/limb mul_2 addmul_2 sqr_diag_addlsh1
  /src/external/lgpl3/gmp/dist/mpn/arm/v6/
sqr_basecase.asm 68 C * The addmul_2 loops here runs well on all cores, but mul_2 runs poorly
  /src/external/lgpl3/gmp/dist/mpn/powerpc64/mode64/p9/
mul_basecase.asm 45 C * Could we schedule loads less in addmul_2/mul_2? That would save some regs
  /src/external/lgpl3/gmp/dist/
gmp-impl.h 1165 #define mpn_mul_2 __MPN(mul_2)
configure 24901 mul_2 mul_3 mul_4 mul_5 mul_6 \
25216 mul_2|addmul_2) tmp_mulfunc="aormul_2" ;;
25410 mul_2|addmul_2) tmp_mulfunc="aormul_2" ;;

Completed in 149 milliseconds