497 lines
8.3 KiB
ArmAsm
497 lines
8.3 KiB
ArmAsm
// Copyright 2012 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
// This code was translated into a form compatible with 6a from the public
|
|
// domain sources in SUPERCOP: http://bench.cr.yp.to/supercop.html
|
|
|
|
// +build amd64,!gccgo
|
|
|
|
// func poly1305(out *[16]byte, m *byte, mlen uint64, key *[32]key)
|
|
TEXT ·poly1305(SB),0,$224-32
|
|
MOVQ out+0(FP),DI
|
|
MOVQ m+8(FP),SI
|
|
MOVQ mlen+16(FP),DX
|
|
MOVQ key+24(FP),CX
|
|
|
|
MOVQ SP,R11
|
|
MOVQ $31,R9
|
|
NOTQ R9
|
|
ANDQ R9,SP
|
|
ADDQ $32,SP
|
|
|
|
MOVQ R11,32(SP)
|
|
MOVQ R12,40(SP)
|
|
MOVQ R13,48(SP)
|
|
MOVQ R14,56(SP)
|
|
MOVQ R15,64(SP)
|
|
MOVQ BX,72(SP)
|
|
MOVQ BP,80(SP)
|
|
FLDCW ·ROUNDING(SB)
|
|
MOVL 0(CX),R8
|
|
MOVL 4(CX),R9
|
|
MOVL 8(CX),AX
|
|
MOVL 12(CX),R10
|
|
MOVQ DI,88(SP)
|
|
MOVQ CX,96(SP)
|
|
MOVL $0X43300000,108(SP)
|
|
MOVL $0X45300000,116(SP)
|
|
MOVL $0X47300000,124(SP)
|
|
MOVL $0X49300000,132(SP)
|
|
ANDL $0X0FFFFFFF,R8
|
|
ANDL $0X0FFFFFFC,R9
|
|
ANDL $0X0FFFFFFC,AX
|
|
ANDL $0X0FFFFFFC,R10
|
|
MOVL R8,104(SP)
|
|
MOVL R9,112(SP)
|
|
MOVL AX,120(SP)
|
|
MOVL R10,128(SP)
|
|
FMOVD 104(SP), F0
|
|
FSUBD ·DOFFSET0(SB), F0
|
|
FMOVD 112(SP), F0
|
|
FSUBD ·DOFFSET1(SB), F0
|
|
FMOVD 120(SP), F0
|
|
FSUBD ·DOFFSET2(SB), F0
|
|
FMOVD 128(SP), F0
|
|
FSUBD ·DOFFSET3(SB), F0
|
|
FXCHD F0, F3
|
|
FMOVDP F0, 136(SP)
|
|
FXCHD F0, F1
|
|
FMOVD F0, 144(SP)
|
|
FMULD ·SCALE(SB), F0
|
|
FMOVDP F0, 152(SP)
|
|
FMOVD F0, 160(SP)
|
|
FMULD ·SCALE(SB), F0
|
|
FMOVDP F0, 168(SP)
|
|
FMOVD F0, 176(SP)
|
|
FMULD ·SCALE(SB), F0
|
|
FMOVDP F0, 184(SP)
|
|
FLDZ
|
|
FLDZ
|
|
FLDZ
|
|
FLDZ
|
|
CMPQ DX,$16
|
|
JB ADDATMOST15BYTES
|
|
INITIALATLEAST16BYTES:
|
|
MOVL 12(SI),DI
|
|
MOVL 8(SI),CX
|
|
MOVL 4(SI),R8
|
|
MOVL 0(SI),R9
|
|
MOVL DI,128(SP)
|
|
MOVL CX,120(SP)
|
|
MOVL R8,112(SP)
|
|
MOVL R9,104(SP)
|
|
ADDQ $16,SI
|
|
SUBQ $16,DX
|
|
FXCHD F0, F3
|
|
FADDD 128(SP), F0
|
|
FSUBD ·DOFFSET3MINUSTWO128(SB), F0
|
|
FXCHD F0, F1
|
|
FADDD 112(SP), F0
|
|
FSUBD ·DOFFSET1(SB), F0
|
|
FXCHD F0, F2
|
|
FADDD 120(SP), F0
|
|
FSUBD ·DOFFSET2(SB), F0
|
|
FXCHD F0, F3
|
|
FADDD 104(SP), F0
|
|
FSUBD ·DOFFSET0(SB), F0
|
|
CMPQ DX,$16
|
|
JB MULTIPLYADDATMOST15BYTES
|
|
MULTIPLYADDATLEAST16BYTES:
|
|
MOVL 12(SI),DI
|
|
MOVL 8(SI),CX
|
|
MOVL 4(SI),R8
|
|
MOVL 0(SI),R9
|
|
MOVL DI,128(SP)
|
|
MOVL CX,120(SP)
|
|
MOVL R8,112(SP)
|
|
MOVL R9,104(SP)
|
|
ADDQ $16,SI
|
|
SUBQ $16,DX
|
|
FMOVD ·ALPHA130(SB), F0
|
|
FADDD F2,F0
|
|
FSUBD ·ALPHA130(SB), F0
|
|
FSUBD F0,F2
|
|
FMULD ·SCALE(SB), F0
|
|
FMOVD ·ALPHA32(SB), F0
|
|
FADDD F2,F0
|
|
FSUBD ·ALPHA32(SB), F0
|
|
FSUBD F0,F2
|
|
FXCHD F0, F2
|
|
FADDDP F0,F1
|
|
FMOVD ·ALPHA64(SB), F0
|
|
FADDD F4,F0
|
|
FSUBD ·ALPHA64(SB), F0
|
|
FSUBD F0,F4
|
|
FMOVD ·ALPHA96(SB), F0
|
|
FADDD F6,F0
|
|
FSUBD ·ALPHA96(SB), F0
|
|
FSUBD F0,F6
|
|
FXCHD F0, F6
|
|
FADDDP F0,F1
|
|
FXCHD F0, F3
|
|
FADDDP F0,F5
|
|
FXCHD F0, F3
|
|
FADDDP F0,F1
|
|
FMOVD 176(SP), F0
|
|
FMULD F3,F0
|
|
FMOVD 160(SP), F0
|
|
FMULD F4,F0
|
|
FMOVD 144(SP), F0
|
|
FMULD F5,F0
|
|
FMOVD 136(SP), F0
|
|
FMULDP F0,F6
|
|
FMOVD 160(SP), F0
|
|
FMULD F4,F0
|
|
FADDDP F0,F3
|
|
FMOVD 144(SP), F0
|
|
FMULD F4,F0
|
|
FADDDP F0,F2
|
|
FMOVD 136(SP), F0
|
|
FMULD F4,F0
|
|
FADDDP F0,F1
|
|
FMOVD 184(SP), F0
|
|
FMULDP F0,F4
|
|
FXCHD F0, F3
|
|
FADDDP F0,F5
|
|
FMOVD 144(SP), F0
|
|
FMULD F4,F0
|
|
FADDDP F0,F2
|
|
FMOVD 136(SP), F0
|
|
FMULD F4,F0
|
|
FADDDP F0,F1
|
|
FMOVD 184(SP), F0
|
|
FMULD F4,F0
|
|
FADDDP F0,F3
|
|
FMOVD 168(SP), F0
|
|
FMULDP F0,F4
|
|
FXCHD F0, F3
|
|
FADDDP F0,F4
|
|
FMOVD 136(SP), F0
|
|
FMULD F5,F0
|
|
FADDDP F0,F1
|
|
FXCHD F0, F3
|
|
FMOVD 184(SP), F0
|
|
FMULD F5,F0
|
|
FADDDP F0,F3
|
|
FXCHD F0, F1
|
|
FMOVD 168(SP), F0
|
|
FMULD F5,F0
|
|
FADDDP F0,F1
|
|
FMOVD 152(SP), F0
|
|
FMULDP F0,F5
|
|
FXCHD F0, F4
|
|
FADDDP F0,F1
|
|
CMPQ DX,$16
|
|
FXCHD F0, F2
|
|
FMOVD 128(SP), F0
|
|
FSUBD ·DOFFSET3MINUSTWO128(SB), F0
|
|
FADDDP F0,F1
|
|
FXCHD F0, F1
|
|
FMOVD 120(SP), F0
|
|
FSUBD ·DOFFSET2(SB), F0
|
|
FADDDP F0,F1
|
|
FXCHD F0, F3
|
|
FMOVD 112(SP), F0
|
|
FSUBD ·DOFFSET1(SB), F0
|
|
FADDDP F0,F1
|
|
FXCHD F0, F2
|
|
FMOVD 104(SP), F0
|
|
FSUBD ·DOFFSET0(SB), F0
|
|
FADDDP F0,F1
|
|
JAE MULTIPLYADDATLEAST16BYTES
|
|
MULTIPLYADDATMOST15BYTES:
|
|
FMOVD ·ALPHA130(SB), F0
|
|
FADDD F2,F0
|
|
FSUBD ·ALPHA130(SB), F0
|
|
FSUBD F0,F2
|
|
FMULD ·SCALE(SB), F0
|
|
FMOVD ·ALPHA32(SB), F0
|
|
FADDD F2,F0
|
|
FSUBD ·ALPHA32(SB), F0
|
|
FSUBD F0,F2
|
|
FMOVD ·ALPHA64(SB), F0
|
|
FADDD F5,F0
|
|
FSUBD ·ALPHA64(SB), F0
|
|
FSUBD F0,F5
|
|
FMOVD ·ALPHA96(SB), F0
|
|
FADDD F7,F0
|
|
FSUBD ·ALPHA96(SB), F0
|
|
FSUBD F0,F7
|
|
FXCHD F0, F7
|
|
FADDDP F0,F1
|
|
FXCHD F0, F5
|
|
FADDDP F0,F1
|
|
FXCHD F0, F3
|
|
FADDDP F0,F5
|
|
FADDDP F0,F1
|
|
FMOVD 176(SP), F0
|
|
FMULD F1,F0
|
|
FMOVD 160(SP), F0
|
|
FMULD F2,F0
|
|
FMOVD 144(SP), F0
|
|
FMULD F3,F0
|
|
FMOVD 136(SP), F0
|
|
FMULDP F0,F4
|
|
FMOVD 160(SP), F0
|
|
FMULD F5,F0
|
|
FADDDP F0,F3
|
|
FMOVD 144(SP), F0
|
|
FMULD F5,F0
|
|
FADDDP F0,F2
|
|
FMOVD 136(SP), F0
|
|
FMULD F5,F0
|
|
FADDDP F0,F1
|
|
FMOVD 184(SP), F0
|
|
FMULDP F0,F5
|
|
FXCHD F0, F4
|
|
FADDDP F0,F3
|
|
FMOVD 144(SP), F0
|
|
FMULD F5,F0
|
|
FADDDP F0,F2
|
|
FMOVD 136(SP), F0
|
|
FMULD F5,F0
|
|
FADDDP F0,F1
|
|
FMOVD 184(SP), F0
|
|
FMULD F5,F0
|
|
FADDDP F0,F4
|
|
FMOVD 168(SP), F0
|
|
FMULDP F0,F5
|
|
FXCHD F0, F4
|
|
FADDDP F0,F2
|
|
FMOVD 136(SP), F0
|
|
FMULD F5,F0
|
|
FADDDP F0,F1
|
|
FMOVD 184(SP), F0
|
|
FMULD F5,F0
|
|
FADDDP F0,F4
|
|
FMOVD 168(SP), F0
|
|
FMULD F5,F0
|
|
FADDDP F0,F3
|
|
FMOVD 152(SP), F0
|
|
FMULDP F0,F5
|
|
FXCHD F0, F4
|
|
FADDDP F0,F1
|
|
ADDATMOST15BYTES:
|
|
CMPQ DX,$0
|
|
JE NOMOREBYTES
|
|
MOVL $0,0(SP)
|
|
MOVL $0, 4 (SP)
|
|
MOVL $0, 8 (SP)
|
|
MOVL $0, 12 (SP)
|
|
LEAQ 0(SP),DI
|
|
MOVQ DX,CX
|
|
REP; MOVSB
|
|
MOVB $1,0(DI)
|
|
MOVL 12 (SP),DI
|
|
MOVL 8 (SP),SI
|
|
MOVL 4 (SP),DX
|
|
MOVL 0(SP),CX
|
|
MOVL DI,128(SP)
|
|
MOVL SI,120(SP)
|
|
MOVL DX,112(SP)
|
|
MOVL CX,104(SP)
|
|
FXCHD F0, F3
|
|
FADDD 128(SP), F0
|
|
FSUBD ·DOFFSET3(SB), F0
|
|
FXCHD F0, F2
|
|
FADDD 120(SP), F0
|
|
FSUBD ·DOFFSET2(SB), F0
|
|
FXCHD F0, F1
|
|
FADDD 112(SP), F0
|
|
FSUBD ·DOFFSET1(SB), F0
|
|
FXCHD F0, F3
|
|
FADDD 104(SP), F0
|
|
FSUBD ·DOFFSET0(SB), F0
|
|
FMOVD ·ALPHA130(SB), F0
|
|
FADDD F3,F0
|
|
FSUBD ·ALPHA130(SB), F0
|
|
FSUBD F0,F3
|
|
FMULD ·SCALE(SB), F0
|
|
FMOVD ·ALPHA32(SB), F0
|
|
FADDD F2,F0
|
|
FSUBD ·ALPHA32(SB), F0
|
|
FSUBD F0,F2
|
|
FMOVD ·ALPHA64(SB), F0
|
|
FADDD F6,F0
|
|
FSUBD ·ALPHA64(SB), F0
|
|
FSUBD F0,F6
|
|
FMOVD ·ALPHA96(SB), F0
|
|
FADDD F5,F0
|
|
FSUBD ·ALPHA96(SB), F0
|
|
FSUBD F0,F5
|
|
FXCHD F0, F4
|
|
FADDDP F0,F3
|
|
FXCHD F0, F6
|
|
FADDDP F0,F1
|
|
FXCHD F0, F3
|
|
FADDDP F0,F5
|
|
FXCHD F0, F3
|
|
FADDDP F0,F1
|
|
FMOVD 176(SP), F0
|
|
FMULD F3,F0
|
|
FMOVD 160(SP), F0
|
|
FMULD F4,F0
|
|
FMOVD 144(SP), F0
|
|
FMULD F5,F0
|
|
FMOVD 136(SP), F0
|
|
FMULDP F0,F6
|
|
FMOVD 160(SP), F0
|
|
FMULD F5,F0
|
|
FADDDP F0,F3
|
|
FMOVD 144(SP), F0
|
|
FMULD F5,F0
|
|
FADDDP F0,F2
|
|
FMOVD 136(SP), F0
|
|
FMULD F5,F0
|
|
FADDDP F0,F1
|
|
FMOVD 184(SP), F0
|
|
FMULDP F0,F5
|
|
FXCHD F0, F4
|
|
FADDDP F0,F5
|
|
FMOVD 144(SP), F0
|
|
FMULD F6,F0
|
|
FADDDP F0,F2
|
|
FMOVD 136(SP), F0
|
|
FMULD F6,F0
|
|
FADDDP F0,F1
|
|
FMOVD 184(SP), F0
|
|
FMULD F6,F0
|
|
FADDDP F0,F4
|
|
FMOVD 168(SP), F0
|
|
FMULDP F0,F6
|
|
FXCHD F0, F5
|
|
FADDDP F0,F4
|
|
FMOVD 136(SP), F0
|
|
FMULD F2,F0
|
|
FADDDP F0,F1
|
|
FMOVD 184(SP), F0
|
|
FMULD F2,F0
|
|
FADDDP F0,F5
|
|
FMOVD 168(SP), F0
|
|
FMULD F2,F0
|
|
FADDDP F0,F3
|
|
FMOVD 152(SP), F0
|
|
FMULDP F0,F2
|
|
FXCHD F0, F1
|
|
FADDDP F0,F3
|
|
FXCHD F0, F3
|
|
FXCHD F0, F2
|
|
NOMOREBYTES:
|
|
MOVL $0,R10
|
|
FMOVD ·ALPHA130(SB), F0
|
|
FADDD F4,F0
|
|
FSUBD ·ALPHA130(SB), F0
|
|
FSUBD F0,F4
|
|
FMULD ·SCALE(SB), F0
|
|
FMOVD ·ALPHA32(SB), F0
|
|
FADDD F2,F0
|
|
FSUBD ·ALPHA32(SB), F0
|
|
FSUBD F0,F2
|
|
FMOVD ·ALPHA64(SB), F0
|
|
FADDD F4,F0
|
|
FSUBD ·ALPHA64(SB), F0
|
|
FSUBD F0,F4
|
|
FMOVD ·ALPHA96(SB), F0
|
|
FADDD F6,F0
|
|
FSUBD ·ALPHA96(SB), F0
|
|
FXCHD F0, F6
|
|
FSUBD F6,F0
|
|
FXCHD F0, F4
|
|
FADDDP F0,F3
|
|
FXCHD F0, F4
|
|
FADDDP F0,F1
|
|
FXCHD F0, F2
|
|
FADDDP F0,F3
|
|
FXCHD F0, F4
|
|
FADDDP F0,F3
|
|
FXCHD F0, F3
|
|
FADDD ·HOFFSET0(SB), F0
|
|
FXCHD F0, F3
|
|
FADDD ·HOFFSET1(SB), F0
|
|
FXCHD F0, F1
|
|
FADDD ·HOFFSET2(SB), F0
|
|
FXCHD F0, F2
|
|
FADDD ·HOFFSET3(SB), F0
|
|
FXCHD F0, F3
|
|
FMOVDP F0, 104(SP)
|
|
FMOVDP F0, 112(SP)
|
|
FMOVDP F0, 120(SP)
|
|
FMOVDP F0, 128(SP)
|
|
MOVL 108(SP),DI
|
|
ANDL $63,DI
|
|
MOVL 116(SP),SI
|
|
ANDL $63,SI
|
|
MOVL 124(SP),DX
|
|
ANDL $63,DX
|
|
MOVL 132(SP),CX
|
|
ANDL $63,CX
|
|
MOVL 112(SP),R8
|
|
ADDL DI,R8
|
|
MOVQ R8,112(SP)
|
|
MOVL 120(SP),DI
|
|
ADCL SI,DI
|
|
MOVQ DI,120(SP)
|
|
MOVL 128(SP),DI
|
|
ADCL DX,DI
|
|
MOVQ DI,128(SP)
|
|
MOVL R10,DI
|
|
ADCL CX,DI
|
|
MOVQ DI,136(SP)
|
|
MOVQ $5,DI
|
|
MOVL 104(SP),SI
|
|
ADDL SI,DI
|
|
MOVQ DI,104(SP)
|
|
MOVL R10,DI
|
|
MOVQ 112(SP),DX
|
|
ADCL DX,DI
|
|
MOVQ DI,112(SP)
|
|
MOVL R10,DI
|
|
MOVQ 120(SP),CX
|
|
ADCL CX,DI
|
|
MOVQ DI,120(SP)
|
|
MOVL R10,DI
|
|
MOVQ 128(SP),R8
|
|
ADCL R8,DI
|
|
MOVQ DI,128(SP)
|
|
MOVQ $0XFFFFFFFC,DI
|
|
MOVQ 136(SP),R9
|
|
ADCL R9,DI
|
|
SARL $16,DI
|
|
MOVQ DI,R9
|
|
XORL $0XFFFFFFFF,R9
|
|
ANDQ DI,SI
|
|
MOVQ 104(SP),AX
|
|
ANDQ R9,AX
|
|
ORQ AX,SI
|
|
ANDQ DI,DX
|
|
MOVQ 112(SP),AX
|
|
ANDQ R9,AX
|
|
ORQ AX,DX
|
|
ANDQ DI,CX
|
|
MOVQ 120(SP),AX
|
|
ANDQ R9,AX
|
|
ORQ AX,CX
|
|
ANDQ DI,R8
|
|
MOVQ 128(SP),DI
|
|
ANDQ R9,DI
|
|
ORQ DI,R8
|
|
MOVQ 88(SP),DI
|
|
MOVQ 96(SP),R9
|
|
ADDL 16(R9),SI
|
|
ADCL 20(R9),DX
|
|
ADCL 24(R9),CX
|
|
ADCL 28(R9),R8
|
|
MOVL SI,0(DI)
|
|
MOVL DX,4(DI)
|
|
MOVL CX,8(DI)
|
|
MOVL R8,12(DI)
|
|
MOVQ 32(SP),R11
|
|
MOVQ 40(SP),R12
|
|
MOVQ 48(SP),R13
|
|
MOVQ 56(SP),R14
|
|
MOVQ 64(SP),R15
|
|
MOVQ 72(SP),BX
|
|
MOVQ 80(SP),BP
|
|
MOVQ R11,SP
|
|
RET
|