mirror of
				https://gogs.blitter.com/RLabs/xs
				synced 2024-08-14 10:26:42 +00:00 
			
		
		
		
	Initial aead/chacha20 support (ChaCha20_12)
Signed-off-by: Russ Magee <rmagee@gmail.com>
This commit is contained in:
		
							parent
							
								
									50e786e549
								
							
						
					
					
						commit
						f3e8383dce
					
				
					 18 changed files with 2592 additions and 2 deletions
				
			
		
							
								
								
									
										1
									
								
								go.mod
									
										
									
									
									
								
							
							
						
						
									
										1
									
								
								go.mod
									
										
									
									
									
								
							|  | @ -9,6 +9,7 @@ require ( | ||||||
| 	blitter.com/go/kyber v0.0.0-20200130200857-6f2021cb88d9 | 	blitter.com/go/kyber v0.0.0-20200130200857-6f2021cb88d9 | ||||||
| 	blitter.com/go/mtwist v1.0.1 // indirect | 	blitter.com/go/mtwist v1.0.1 // indirect | ||||||
| 	blitter.com/go/newhope v0.0.0-20200130200750-192fc08a8aae | 	blitter.com/go/newhope v0.0.0-20200130200750-192fc08a8aae | ||||||
|  | 	github.com/aead/chacha20 v0.0.0-20180709150244-8b13a72661da | ||||||
| 	github.com/jameskeane/bcrypt v0.0.0-20120420032655-c3cd44c1e20f | 	github.com/jameskeane/bcrypt v0.0.0-20120420032655-c3cd44c1e20f | ||||||
| 	github.com/klauspost/cpuid v1.2.2 // indirect | 	github.com/klauspost/cpuid v1.2.2 // indirect | ||||||
| 	github.com/klauspost/reedsolomon v1.9.3 // indirect | 	github.com/klauspost/reedsolomon v1.9.3 // indirect | ||||||
|  |  | ||||||
							
								
								
									
										2
									
								
								go.sum
									
										
									
									
									
								
							
							
						
						
									
										2
									
								
								go.sum
									
										
									
									
									
								
							|  | @ -28,6 +28,8 @@ git.schwanenlied.me/yawning/kyber.git v0.0.0-20180530164001-a270899bd22c h1:SGOx | ||||||
| git.schwanenlied.me/yawning/kyber.git v0.0.0-20180530164001-a270899bd22c/go.mod h1:QrbgzU5EL/1jaMD5pD4Tiikj3R5elPMa+RMwFUTGwQU= | git.schwanenlied.me/yawning/kyber.git v0.0.0-20180530164001-a270899bd22c/go.mod h1:QrbgzU5EL/1jaMD5pD4Tiikj3R5elPMa+RMwFUTGwQU= | ||||||
| git.schwanenlied.me/yawning/newhope.git v0.0.0-20170622154529-9598792ba8f2 h1:89TYv/+wotJ+QWrH5B/yN0pEQutr2V/5za0VoYiVGCM= | git.schwanenlied.me/yawning/newhope.git v0.0.0-20170622154529-9598792ba8f2 h1:89TYv/+wotJ+QWrH5B/yN0pEQutr2V/5za0VoYiVGCM= | ||||||
| git.schwanenlied.me/yawning/newhope.git v0.0.0-20170622154529-9598792ba8f2/go.mod h1:weMqACFGzJs4Ni+K9shsRd02N4LkDrtGlkRxISK+II0= | git.schwanenlied.me/yawning/newhope.git v0.0.0-20170622154529-9598792ba8f2/go.mod h1:weMqACFGzJs4Ni+K9shsRd02N4LkDrtGlkRxISK+II0= | ||||||
|  | github.com/aead/chacha20 v0.0.0-20180709150244-8b13a72661da h1:KjTM2ks9d14ZYCvmHS9iAKVt9AyzRSqNU1qabPih5BY= | ||||||
|  | github.com/aead/chacha20 v0.0.0-20180709150244-8b13a72661da/go.mod h1:eHEWzANqSiWQsof+nXEI9bUVUyV6F53Fp89EuCh2EAA= | ||||||
| github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= | github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= | ||||||
| github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= | ||||||
| github.com/jameskeane/bcrypt v0.0.0-20120420032655-c3cd44c1e20f h1:UWGE8Vi+1Agt0lrvnd7UsmvwqWKRzb9byK9iQmsbY0Y= | github.com/jameskeane/bcrypt v0.0.0-20120420032655-c3cd44c1e20f h1:UWGE8Vi+1Agt0lrvnd7UsmvwqWKRzb9byK9iQmsbY0Y= | ||||||
|  |  | ||||||
							
								
								
									
										21
									
								
								vendor/github.com/aead/chacha20/LICENSE
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										21
									
								
								vendor/github.com/aead/chacha20/LICENSE
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,21 @@ | ||||||
|  | The MIT License (MIT) | ||||||
|  | 
 | ||||||
|  | Copyright (c) 2016 Andreas Auernhammer | ||||||
|  | 
 | ||||||
|  | Permission is hereby granted, free of charge, to any person obtaining a copy | ||||||
|  | of this software and associated documentation files (the "Software"), to deal | ||||||
|  | in the Software without restriction, including without limitation the rights | ||||||
|  | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||||||
|  | copies of the Software, and to permit persons to whom the Software is | ||||||
|  | furnished to do so, subject to the following conditions: | ||||||
|  | 
 | ||||||
|  | The above copyright notice and this permission notice shall be included in all | ||||||
|  | copies or substantial portions of the Software. | ||||||
|  | 
 | ||||||
|  | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||||||
|  | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||||||
|  | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||||||
|  | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||||||
|  | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||||||
|  | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||||||
|  | SOFTWARE. | ||||||
							
								
								
									
										197
									
								
								vendor/github.com/aead/chacha20/chacha/chacha.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										197
									
								
								vendor/github.com/aead/chacha20/chacha/chacha.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,197 @@ | ||||||
|  | // Copyright (c) 2016 Andreas Auernhammer. All rights reserved. | ||||||
|  | // Use of this source code is governed by a license that can be | ||||||
|  | // found in the LICENSE file. | ||||||
|  | 
 | ||||||
|  | // Package chacha implements some low-level functions of the | ||||||
|  | // ChaCha cipher family. | ||||||
|  | package chacha // import "github.com/aead/chacha20/chacha" | ||||||
|  | 
 | ||||||
|  | import ( | ||||||
|  | 	"encoding/binary" | ||||||
|  | 	"errors" | ||||||
|  | 	"math" | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | const ( | ||||||
|  | 	// NonceSize is the size of the ChaCha20 nonce in bytes. | ||||||
|  | 	NonceSize = 8 | ||||||
|  | 
 | ||||||
|  | 	// INonceSize is the size of the IETF-ChaCha20 nonce in bytes. | ||||||
|  | 	INonceSize = 12 | ||||||
|  | 
 | ||||||
|  | 	// XNonceSize is the size of the XChaCha20 nonce in bytes. | ||||||
|  | 	XNonceSize = 24 | ||||||
|  | 
 | ||||||
|  | 	// KeySize is the size of the key in bytes. | ||||||
|  | 	KeySize = 32 | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | var ( | ||||||
|  | 	useSSE2  bool | ||||||
|  | 	useSSSE3 bool | ||||||
|  | 	useAVX   bool | ||||||
|  | 	useAVX2  bool | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | var ( | ||||||
|  | 	errKeySize      = errors.New("chacha20/chacha: bad key length") | ||||||
|  | 	errInvalidNonce = errors.New("chacha20/chacha: bad nonce length") | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | func setup(state *[64]byte, nonce, key []byte) (err error) { | ||||||
|  | 	if len(key) != KeySize { | ||||||
|  | 		err = errKeySize | ||||||
|  | 		return | ||||||
|  | 	} | ||||||
|  | 	var Nonce [16]byte | ||||||
|  | 	switch len(nonce) { | ||||||
|  | 	case NonceSize: | ||||||
|  | 		copy(Nonce[8:], nonce) | ||||||
|  | 		initialize(state, key, &Nonce) | ||||||
|  | 	case INonceSize: | ||||||
|  | 		copy(Nonce[4:], nonce) | ||||||
|  | 		initialize(state, key, &Nonce) | ||||||
|  | 	case XNonceSize: | ||||||
|  | 		var tmpKey [32]byte | ||||||
|  | 		var hNonce [16]byte | ||||||
|  | 
 | ||||||
|  | 		copy(hNonce[:], nonce[:16]) | ||||||
|  | 		copy(tmpKey[:], key) | ||||||
|  | 		HChaCha20(&tmpKey, &hNonce, &tmpKey) | ||||||
|  | 		copy(Nonce[8:], nonce[16:]) | ||||||
|  | 		initialize(state, tmpKey[:], &Nonce) | ||||||
|  | 
 | ||||||
|  | 		// BUG(aead): A "good" compiler will remove this (optimizations) | ||||||
|  | 		//			  But using the provided key instead of tmpKey, | ||||||
|  | 		//			  will change the key (-> probably confuses users) | ||||||
|  | 		for i := range tmpKey { | ||||||
|  | 			tmpKey[i] = 0 | ||||||
|  | 		} | ||||||
|  | 	default: | ||||||
|  | 		err = errInvalidNonce | ||||||
|  | 	} | ||||||
|  | 	return | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // XORKeyStream crypts bytes from src to dst using the given nonce and key. | ||||||
|  | // The length of the nonce determinds the version of ChaCha20: | ||||||
|  | // - NonceSize:  ChaCha20/r with a 64 bit nonce and a 2^64 * 64 byte period. | ||||||
|  | // - INonceSize: ChaCha20/r as defined in RFC 7539 and a 2^32 * 64 byte period. | ||||||
|  | // - XNonceSize: XChaCha20/r with a 192 bit nonce and a 2^64 * 64 byte period. | ||||||
|  | // The rounds argument specifies the number of rounds performed for keystream | ||||||
|  | // generation - valid values are 8, 12 or 20. The src and dst may be the same slice | ||||||
|  | // but otherwise should not overlap. If len(dst) < len(src) this function panics. | ||||||
|  | // If the nonce is neither 64, 96 nor 192 bits long, this function panics. | ||||||
|  | func XORKeyStream(dst, src, nonce, key []byte, rounds int) { | ||||||
|  | 	if rounds != 20 && rounds != 12 && rounds != 8 { | ||||||
|  | 		panic("chacha20/chacha: bad number of rounds") | ||||||
|  | 	} | ||||||
|  | 	if len(dst) < len(src) { | ||||||
|  | 		panic("chacha20/chacha: dst buffer is to small") | ||||||
|  | 	} | ||||||
|  | 	if len(nonce) == INonceSize && uint64(len(src)) > (1<<38) { | ||||||
|  | 		panic("chacha20/chacha: src is too large") | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	var block, state [64]byte | ||||||
|  | 	if err := setup(&state, nonce, key); err != nil { | ||||||
|  | 		panic(err) | ||||||
|  | 	} | ||||||
|  | 	xorKeyStream(dst, src, &block, &state, rounds) | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // Cipher implements ChaCha20/r (XChaCha20/r) for a given number of rounds r. | ||||||
|  | type Cipher struct { | ||||||
|  | 	state, block [64]byte | ||||||
|  | 	off          int | ||||||
|  | 	rounds       int // 20 for ChaCha20 | ||||||
|  | 	noncesize    int | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // NewCipher returns a new *chacha.Cipher implementing the ChaCha20/r or XChaCha20/r | ||||||
|  | // (r = 8, 12 or 20) stream cipher. The nonce must be unique for one key for all time. | ||||||
|  | // The length of the nonce determinds the version of ChaCha20: | ||||||
|  | // - NonceSize:  ChaCha20/r with a 64 bit nonce and a 2^64 * 64 byte period. | ||||||
|  | // - INonceSize: ChaCha20/r as defined in RFC 7539 and a 2^32 * 64 byte period. | ||||||
|  | // - XNonceSize: XChaCha20/r with a 192 bit nonce and a 2^64 * 64 byte period. | ||||||
|  | // If the nonce is neither 64, 96 nor 192 bits long, a non-nil error is returned. | ||||||
|  | func NewCipher(nonce, key []byte, rounds int) (*Cipher, error) { | ||||||
|  | 	if rounds != 20 && rounds != 12 && rounds != 8 { | ||||||
|  | 		panic("chacha20/chacha: bad number of rounds") | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	c := new(Cipher) | ||||||
|  | 	if err := setup(&(c.state), nonce, key); err != nil { | ||||||
|  | 		return nil, err | ||||||
|  | 	} | ||||||
|  | 	c.rounds = rounds | ||||||
|  | 
 | ||||||
|  | 	if len(nonce) == INonceSize { | ||||||
|  | 		c.noncesize = INonceSize | ||||||
|  | 	} else { | ||||||
|  | 		c.noncesize = NonceSize | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return c, nil | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // XORKeyStream crypts bytes from src to dst. Src and dst may be the same slice | ||||||
|  | // but otherwise should not overlap. If len(dst) < len(src) the function panics. | ||||||
|  | func (c *Cipher) XORKeyStream(dst, src []byte) { | ||||||
|  | 	if len(dst) < len(src) { | ||||||
|  | 		panic("chacha20/chacha: dst buffer is to small") | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if c.off > 0 { | ||||||
|  | 		n := len(c.block[c.off:]) | ||||||
|  | 		if len(src) <= n { | ||||||
|  | 			for i, v := range src { | ||||||
|  | 				dst[i] = v ^ c.block[c.off] | ||||||
|  | 				c.off++ | ||||||
|  | 			} | ||||||
|  | 			if c.off == 64 { | ||||||
|  | 				c.off = 0 | ||||||
|  | 			} | ||||||
|  | 			return | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		for i, v := range c.block[c.off:] { | ||||||
|  | 			dst[i] = src[i] ^ v | ||||||
|  | 		} | ||||||
|  | 		src = src[n:] | ||||||
|  | 		dst = dst[n:] | ||||||
|  | 		c.off = 0 | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	// check for counter overflow | ||||||
|  | 	blocksToXOR := len(src) / 64 | ||||||
|  | 	if len(src)%64 != 0 { | ||||||
|  | 		blocksToXOR++ | ||||||
|  | 	} | ||||||
|  | 	var overflow bool | ||||||
|  | 	if c.noncesize == INonceSize { | ||||||
|  | 		overflow = binary.LittleEndian.Uint32(c.state[48:]) > math.MaxUint32-uint32(blocksToXOR) | ||||||
|  | 	} else { | ||||||
|  | 		overflow = binary.LittleEndian.Uint64(c.state[48:]) > math.MaxUint64-uint64(blocksToXOR) | ||||||
|  | 	} | ||||||
|  | 	if overflow { | ||||||
|  | 		panic("chacha20/chacha: counter overflow") | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	c.off += xorKeyStream(dst, src, &(c.block), &(c.state), c.rounds) | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // SetCounter skips ctr * 64 byte blocks. SetCounter(0) resets the cipher. | ||||||
|  | // This function always skips the unused keystream of the current 64 byte block. | ||||||
|  | func (c *Cipher) SetCounter(ctr uint64) { | ||||||
|  | 	if c.noncesize == INonceSize { | ||||||
|  | 		binary.LittleEndian.PutUint32(c.state[48:], uint32(ctr)) | ||||||
|  | 	} else { | ||||||
|  | 		binary.LittleEndian.PutUint64(c.state[48:], ctr) | ||||||
|  | 	} | ||||||
|  | 	c.off = 0 | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // HChaCha20 generates 32 pseudo-random bytes from a 128 bit nonce and a 256 bit secret key. | ||||||
|  | // It can be used as a key-derivation-function (KDF). | ||||||
|  | func HChaCha20(out *[32]byte, nonce *[16]byte, key *[32]byte) { hChaCha20(out, nonce, key) } | ||||||
							
								
								
									
										406
									
								
								vendor/github.com/aead/chacha20/chacha/chachaAVX2_amd64.s
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										406
									
								
								vendor/github.com/aead/chacha20/chacha/chachaAVX2_amd64.s
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,406 @@ | ||||||
|  | // Copyright (c) 2016 Andreas Auernhammer. All rights reserved. | ||||||
|  | // Use of this source code is governed by a license that can be | ||||||
|  | // found in the LICENSE file. | ||||||
|  | 
 | ||||||
|  | // +build amd64,!gccgo,!appengine,!nacl | ||||||
|  | 
 | ||||||
|  | #include "const.s" | ||||||
|  | #include "macro.s" | ||||||
|  | 
 | ||||||
|  | #define TWO 0(SP) | ||||||
|  | #define C16 32(SP) | ||||||
|  | #define C8 64(SP) | ||||||
|  | #define STATE_0 96(SP) | ||||||
|  | #define STATE_1 128(SP) | ||||||
|  | #define STATE_2 160(SP) | ||||||
|  | #define STATE_3 192(SP) | ||||||
|  | #define TMP_0 224(SP) | ||||||
|  | #define TMP_1 256(SP) | ||||||
|  | 
 | ||||||
|  | // func xorKeyStreamAVX(dst, src []byte, block, state *[64]byte, rounds int) int | ||||||
|  | TEXT ·xorKeyStreamAVX2(SB), 4, $320-80 | ||||||
|  | 	MOVQ dst_base+0(FP), DI | ||||||
|  | 	MOVQ src_base+24(FP), SI | ||||||
|  | 	MOVQ block+48(FP), BX | ||||||
|  | 	MOVQ state+56(FP), AX | ||||||
|  | 	MOVQ rounds+64(FP), DX | ||||||
|  | 	MOVQ src_len+32(FP), CX | ||||||
|  | 
 | ||||||
|  | 	MOVQ SP, R8 | ||||||
|  | 	ADDQ $32, SP | ||||||
|  | 	ANDQ $-32, SP | ||||||
|  | 
 | ||||||
|  | 	VMOVDQU    0(AX), Y2 | ||||||
|  | 	VMOVDQU    32(AX), Y3 | ||||||
|  | 	VPERM2I128 $0x22, Y2, Y0, Y0 | ||||||
|  | 	VPERM2I128 $0x33, Y2, Y1, Y1 | ||||||
|  | 	VPERM2I128 $0x22, Y3, Y2, Y2 | ||||||
|  | 	VPERM2I128 $0x33, Y3, Y3, Y3 | ||||||
|  | 
 | ||||||
|  | 	TESTQ CX, CX | ||||||
|  | 	JZ    done | ||||||
|  | 
 | ||||||
|  | 	VMOVDQU ·one_AVX2<>(SB), Y4 | ||||||
|  | 	VPADDD  Y4, Y3, Y3 | ||||||
|  | 
 | ||||||
|  | 	VMOVDQA Y0, STATE_0 | ||||||
|  | 	VMOVDQA Y1, STATE_1 | ||||||
|  | 	VMOVDQA Y2, STATE_2 | ||||||
|  | 	VMOVDQA Y3, STATE_3 | ||||||
|  | 
 | ||||||
|  | 	VMOVDQU ·rol16_AVX2<>(SB), Y4 | ||||||
|  | 	VMOVDQU ·rol8_AVX2<>(SB), Y5 | ||||||
|  | 	VMOVDQU ·two_AVX2<>(SB), Y6 | ||||||
|  | 	VMOVDQA Y4, Y14 | ||||||
|  | 	VMOVDQA Y5, Y15 | ||||||
|  | 	VMOVDQA Y4, C16 | ||||||
|  | 	VMOVDQA Y5, C8 | ||||||
|  | 	VMOVDQA Y6, TWO | ||||||
|  | 
 | ||||||
|  | 	CMPQ CX, $64 | ||||||
|  | 	JBE  between_0_and_64 | ||||||
|  | 	CMPQ CX, $192 | ||||||
|  | 	JBE  between_64_and_192 | ||||||
|  | 	CMPQ CX, $320 | ||||||
|  | 	JBE  between_192_and_320 | ||||||
|  | 	CMPQ CX, $448 | ||||||
|  | 	JBE  between_320_and_448 | ||||||
|  | 
 | ||||||
|  | at_least_512: | ||||||
|  | 	VMOVDQA Y0, Y4 | ||||||
|  | 	VMOVDQA Y1, Y5 | ||||||
|  | 	VMOVDQA Y2, Y6 | ||||||
|  | 	VPADDQ  TWO, Y3, Y7 | ||||||
|  | 	VMOVDQA Y0, Y8 | ||||||
|  | 	VMOVDQA Y1, Y9 | ||||||
|  | 	VMOVDQA Y2, Y10 | ||||||
|  | 	VPADDQ  TWO, Y7, Y11 | ||||||
|  | 	VMOVDQA Y0, Y12 | ||||||
|  | 	VMOVDQA Y1, Y13 | ||||||
|  | 	VMOVDQA Y2, Y14 | ||||||
|  | 	VPADDQ  TWO, Y11, Y15 | ||||||
|  | 
 | ||||||
|  | 	MOVQ DX, R9 | ||||||
|  | 
 | ||||||
|  | chacha_loop_512: | ||||||
|  | 	VMOVDQA Y8, TMP_0 | ||||||
|  | 	CHACHA_QROUND_AVX(Y0, Y1, Y2, Y3, Y8, C16, C8) | ||||||
|  | 	CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y8, C16, C8) | ||||||
|  | 	VMOVDQA TMP_0, Y8 | ||||||
|  | 	VMOVDQA Y0, TMP_0 | ||||||
|  | 	CHACHA_QROUND_AVX(Y8, Y9, Y10, Y11, Y0, C16, C8) | ||||||
|  | 	CHACHA_QROUND_AVX(Y12, Y13, Y14, Y15, Y0, C16, C8) | ||||||
|  | 	CHACHA_SHUFFLE_AVX(Y1, Y2, Y3) | ||||||
|  | 	CHACHA_SHUFFLE_AVX(Y5, Y6, Y7) | ||||||
|  | 	CHACHA_SHUFFLE_AVX(Y9, Y10, Y11) | ||||||
|  | 	CHACHA_SHUFFLE_AVX(Y13, Y14, Y15) | ||||||
|  | 
 | ||||||
|  | 	CHACHA_QROUND_AVX(Y12, Y13, Y14, Y15, Y0, C16, C8) | ||||||
|  | 	CHACHA_QROUND_AVX(Y8, Y9, Y10, Y11, Y0, C16, C8) | ||||||
|  | 	VMOVDQA TMP_0, Y0 | ||||||
|  | 	VMOVDQA Y8, TMP_0 | ||||||
|  | 	CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y8, C16, C8) | ||||||
|  | 	CHACHA_QROUND_AVX(Y0, Y1, Y2, Y3, Y8, C16, C8) | ||||||
|  | 	VMOVDQA TMP_0, Y8 | ||||||
|  | 	CHACHA_SHUFFLE_AVX(Y3, Y2, Y1) | ||||||
|  | 	CHACHA_SHUFFLE_AVX(Y7, Y6, Y5) | ||||||
|  | 	CHACHA_SHUFFLE_AVX(Y11, Y10, Y9) | ||||||
|  | 	CHACHA_SHUFFLE_AVX(Y15, Y14, Y13) | ||||||
|  | 	SUBQ    $2, R9 | ||||||
|  | 	JA      chacha_loop_512 | ||||||
|  | 
 | ||||||
|  | 	VMOVDQA Y12, TMP_0 | ||||||
|  | 	VMOVDQA Y13, TMP_1 | ||||||
|  | 	VPADDD  STATE_0, Y0, Y0 | ||||||
|  | 	VPADDD  STATE_1, Y1, Y1 | ||||||
|  | 	VPADDD  STATE_2, Y2, Y2 | ||||||
|  | 	VPADDD  STATE_3, Y3, Y3 | ||||||
|  | 	XOR_AVX2(DI, SI, 0, Y0, Y1, Y2, Y3, Y12, Y13) | ||||||
|  | 	VMOVDQA STATE_0, Y0 | ||||||
|  | 	VMOVDQA STATE_1, Y1 | ||||||
|  | 	VMOVDQA STATE_2, Y2 | ||||||
|  | 	VMOVDQA STATE_3, Y3 | ||||||
|  | 	VPADDQ  TWO, Y3, Y3 | ||||||
|  | 
 | ||||||
|  | 	VPADDD Y0, Y4, Y4 | ||||||
|  | 	VPADDD Y1, Y5, Y5 | ||||||
|  | 	VPADDD Y2, Y6, Y6 | ||||||
|  | 	VPADDD Y3, Y7, Y7 | ||||||
|  | 	XOR_AVX2(DI, SI, 128, Y4, Y5, Y6, Y7, Y12, Y13) | ||||||
|  | 	VPADDQ TWO, Y3, Y3 | ||||||
|  | 
 | ||||||
|  | 	VPADDD Y0, Y8, Y8 | ||||||
|  | 	VPADDD Y1, Y9, Y9 | ||||||
|  | 	VPADDD Y2, Y10, Y10 | ||||||
|  | 	VPADDD Y3, Y11, Y11 | ||||||
|  | 	XOR_AVX2(DI, SI, 256, Y8, Y9, Y10, Y11, Y12, Y13) | ||||||
|  | 	VPADDQ TWO, Y3, Y3 | ||||||
|  | 
 | ||||||
|  | 	VPADDD TMP_0, Y0, Y12 | ||||||
|  | 	VPADDD TMP_1, Y1, Y13 | ||||||
|  | 	VPADDD Y2, Y14, Y14 | ||||||
|  | 	VPADDD Y3, Y15, Y15 | ||||||
|  | 	VPADDQ TWO, Y3, Y3 | ||||||
|  | 
 | ||||||
|  | 	CMPQ CX, $512 | ||||||
|  | 	JB   less_than_512 | ||||||
|  | 
 | ||||||
|  | 	XOR_AVX2(DI, SI, 384, Y12, Y13, Y14, Y15, Y4, Y5) | ||||||
|  | 	VMOVDQA Y3, STATE_3 | ||||||
|  | 	ADDQ    $512, SI | ||||||
|  | 	ADDQ    $512, DI | ||||||
|  | 	SUBQ    $512, CX | ||||||
|  | 	CMPQ    CX, $448 | ||||||
|  | 	JA      at_least_512 | ||||||
|  | 
 | ||||||
|  | 	TESTQ CX, CX | ||||||
|  | 	JZ    done | ||||||
|  | 
 | ||||||
|  | 	VMOVDQA C16, Y14 | ||||||
|  | 	VMOVDQA C8, Y15 | ||||||
|  | 
 | ||||||
|  | 	CMPQ CX, $64 | ||||||
|  | 	JBE  between_0_and_64 | ||||||
|  | 	CMPQ CX, $192 | ||||||
|  | 	JBE  between_64_and_192 | ||||||
|  | 	CMPQ CX, $320 | ||||||
|  | 	JBE  between_192_and_320 | ||||||
|  | 	JMP  between_320_and_448 | ||||||
|  | 
 | ||||||
|  | less_than_512: | ||||||
|  | 	XOR_UPPER_AVX2(DI, SI, 384, Y12, Y13, Y14, Y15, Y4, Y5) | ||||||
|  | 	EXTRACT_LOWER(BX, Y12, Y13, Y14, Y15, Y4) | ||||||
|  | 	ADDQ $448, SI | ||||||
|  | 	ADDQ $448, DI | ||||||
|  | 	SUBQ $448, CX | ||||||
|  | 	JMP  finalize | ||||||
|  | 
 | ||||||
|  | between_320_and_448: | ||||||
|  | 	VMOVDQA Y0, Y4 | ||||||
|  | 	VMOVDQA Y1, Y5 | ||||||
|  | 	VMOVDQA Y2, Y6 | ||||||
|  | 	VPADDQ  TWO, Y3, Y7 | ||||||
|  | 	VMOVDQA Y0, Y8 | ||||||
|  | 	VMOVDQA Y1, Y9 | ||||||
|  | 	VMOVDQA Y2, Y10 | ||||||
|  | 	VPADDQ  TWO, Y7, Y11 | ||||||
|  | 
 | ||||||
|  | 	MOVQ DX, R9 | ||||||
|  | 
 | ||||||
|  | chacha_loop_384: | ||||||
|  | 	CHACHA_QROUND_AVX(Y0, Y1, Y2, Y3, Y13, Y14, Y15) | ||||||
|  | 	CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y13, Y14, Y15) | ||||||
|  | 	CHACHA_QROUND_AVX(Y8, Y9, Y10, Y11, Y13, Y14, Y15) | ||||||
|  | 	CHACHA_SHUFFLE_AVX(Y1, Y2, Y3) | ||||||
|  | 	CHACHA_SHUFFLE_AVX(Y5, Y6, Y7) | ||||||
|  | 	CHACHA_SHUFFLE_AVX(Y9, Y10, Y11) | ||||||
|  | 	CHACHA_QROUND_AVX(Y0, Y1, Y2, Y3, Y13, Y14, Y15) | ||||||
|  | 	CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y13, Y14, Y15) | ||||||
|  | 	CHACHA_QROUND_AVX(Y8, Y9, Y10, Y11, Y13, Y14, Y15) | ||||||
|  | 	CHACHA_SHUFFLE_AVX(Y3, Y2, Y1) | ||||||
|  | 	CHACHA_SHUFFLE_AVX(Y7, Y6, Y5) | ||||||
|  | 	CHACHA_SHUFFLE_AVX(Y11, Y10, Y9) | ||||||
|  | 	SUBQ $2, R9 | ||||||
|  | 	JA   chacha_loop_384 | ||||||
|  | 
 | ||||||
|  | 	VPADDD  STATE_0, Y0, Y0 | ||||||
|  | 	VPADDD  STATE_1, Y1, Y1 | ||||||
|  | 	VPADDD  STATE_2, Y2, Y2 | ||||||
|  | 	VPADDD  STATE_3, Y3, Y3 | ||||||
|  | 	XOR_AVX2(DI, SI, 0, Y0, Y1, Y2, Y3, Y12, Y13) | ||||||
|  | 	VMOVDQA STATE_0, Y0 | ||||||
|  | 	VMOVDQA STATE_1, Y1 | ||||||
|  | 	VMOVDQA STATE_2, Y2 | ||||||
|  | 	VMOVDQA STATE_3, Y3 | ||||||
|  | 	VPADDQ  TWO, Y3, Y3 | ||||||
|  | 
 | ||||||
|  | 	VPADDD Y0, Y4, Y4 | ||||||
|  | 	VPADDD Y1, Y5, Y5 | ||||||
|  | 	VPADDD Y2, Y6, Y6 | ||||||
|  | 	VPADDD Y3, Y7, Y7 | ||||||
|  | 	XOR_AVX2(DI, SI, 128, Y4, Y5, Y6, Y7, Y12, Y13) | ||||||
|  | 	VPADDQ TWO, Y3, Y3 | ||||||
|  | 
 | ||||||
|  | 	VPADDD Y0, Y8, Y8 | ||||||
|  | 	VPADDD Y1, Y9, Y9 | ||||||
|  | 	VPADDD Y2, Y10, Y10 | ||||||
|  | 	VPADDD Y3, Y11, Y11 | ||||||
|  | 	VPADDQ TWO, Y3, Y3 | ||||||
|  | 
 | ||||||
|  | 	CMPQ CX, $384 | ||||||
|  | 	JB   less_than_384 | ||||||
|  | 
 | ||||||
|  | 	XOR_AVX2(DI, SI, 256, Y8, Y9, Y10, Y11, Y12, Y13) | ||||||
|  | 	SUBQ  $384, CX | ||||||
|  | 	TESTQ CX, CX | ||||||
|  | 	JE    done | ||||||
|  | 
 | ||||||
|  | 	ADDQ $384, SI | ||||||
|  | 	ADDQ $384, DI | ||||||
|  | 	JMP  between_0_and_64 | ||||||
|  | 
 | ||||||
|  | less_than_384: | ||||||
|  | 	XOR_UPPER_AVX2(DI, SI, 256, Y8, Y9, Y10, Y11, Y12, Y13) | ||||||
|  | 	EXTRACT_LOWER(BX, Y8, Y9, Y10, Y11, Y12) | ||||||
|  | 	ADDQ $320, SI | ||||||
|  | 	ADDQ $320, DI | ||||||
|  | 	SUBQ $320, CX | ||||||
|  | 	JMP  finalize | ||||||
|  | 
 | ||||||
|  | between_192_and_320: | ||||||
|  | 	VMOVDQA Y0, Y4 | ||||||
|  | 	VMOVDQA Y1, Y5 | ||||||
|  | 	VMOVDQA Y2, Y6 | ||||||
|  | 	VMOVDQA Y3, Y7 | ||||||
|  | 	VMOVDQA Y0, Y8 | ||||||
|  | 	VMOVDQA Y1, Y9 | ||||||
|  | 	VMOVDQA Y2, Y10 | ||||||
|  | 	VPADDQ  TWO, Y3, Y11 | ||||||
|  | 
 | ||||||
|  | 	MOVQ DX, R9 | ||||||
|  | 
 | ||||||
|  | chacha_loop_256: | ||||||
|  | 	CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y13, Y14, Y15) | ||||||
|  | 	CHACHA_QROUND_AVX(Y8, Y9, Y10, Y11, Y13, Y14, Y15) | ||||||
|  | 	CHACHA_SHUFFLE_AVX(Y5, Y6, Y7) | ||||||
|  | 	CHACHA_SHUFFLE_AVX(Y9, Y10, Y11) | ||||||
|  | 	CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y13, Y14, Y15) | ||||||
|  | 	CHACHA_QROUND_AVX(Y8, Y9, Y10, Y11, Y13, Y14, Y15) | ||||||
|  | 	CHACHA_SHUFFLE_AVX(Y7, Y6, Y5) | ||||||
|  | 	CHACHA_SHUFFLE_AVX(Y11, Y10, Y9) | ||||||
|  | 	SUBQ $2, R9 | ||||||
|  | 	JA   chacha_loop_256 | ||||||
|  | 
 | ||||||
|  | 	VPADDD Y0, Y4, Y4 | ||||||
|  | 	VPADDD Y1, Y5, Y5 | ||||||
|  | 	VPADDD Y2, Y6, Y6 | ||||||
|  | 	VPADDD Y3, Y7, Y7 | ||||||
|  | 	VPADDQ TWO, Y3, Y3 | ||||||
|  | 	XOR_AVX2(DI, SI, 0, Y4, Y5, Y6, Y7, Y12, Y13) | ||||||
|  | 	VPADDD Y0, Y8, Y8 | ||||||
|  | 	VPADDD Y1, Y9, Y9 | ||||||
|  | 	VPADDD Y2, Y10, Y10 | ||||||
|  | 	VPADDD Y3, Y11, Y11 | ||||||
|  | 	VPADDQ TWO, Y3, Y3 | ||||||
|  | 
 | ||||||
|  | 	CMPQ CX, $256 | ||||||
|  | 	JB   less_than_256 | ||||||
|  | 
 | ||||||
|  | 	XOR_AVX2(DI, SI, 128, Y8, Y9, Y10, Y11, Y12, Y13) | ||||||
|  | 	SUBQ  $256, CX | ||||||
|  | 	TESTQ CX, CX | ||||||
|  | 	JE    done | ||||||
|  | 
 | ||||||
|  | 	ADDQ $256, SI | ||||||
|  | 	ADDQ $256, DI | ||||||
|  | 	JMP  between_0_and_64 | ||||||
|  | 
 | ||||||
|  | less_than_256: | ||||||
|  | 	XOR_UPPER_AVX2(DI, SI, 128, Y8, Y9, Y10, Y11, Y12, Y13) | ||||||
|  | 	EXTRACT_LOWER(BX, Y8, Y9, Y10, Y11, Y12) | ||||||
|  | 	ADDQ $192, SI | ||||||
|  | 	ADDQ $192, DI | ||||||
|  | 	SUBQ $192, CX | ||||||
|  | 	JMP  finalize | ||||||
|  | 
 | ||||||
|  | between_64_and_192: | ||||||
|  | 	VMOVDQA Y0, Y4 | ||||||
|  | 	VMOVDQA Y1, Y5 | ||||||
|  | 	VMOVDQA Y2, Y6 | ||||||
|  | 	VMOVDQA Y3, Y7 | ||||||
|  | 
 | ||||||
|  | 	MOVQ DX, R9 | ||||||
|  | 
 | ||||||
|  | chacha_loop_128: | ||||||
|  | 	CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y13, Y14, Y15) | ||||||
|  | 	CHACHA_SHUFFLE_AVX(Y5, Y6, Y7) | ||||||
|  | 	CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y13, Y14, Y15) | ||||||
|  | 	CHACHA_SHUFFLE_AVX(Y7, Y6, Y5) | ||||||
|  | 	SUBQ $2, R9 | ||||||
|  | 	JA   chacha_loop_128 | ||||||
|  | 
 | ||||||
|  | 	VPADDD Y0, Y4, Y4 | ||||||
|  | 	VPADDD Y1, Y5, Y5 | ||||||
|  | 	VPADDD Y2, Y6, Y6 | ||||||
|  | 	VPADDD Y3, Y7, Y7 | ||||||
|  | 	VPADDQ TWO, Y3, Y3 | ||||||
|  | 
 | ||||||
|  | 	CMPQ CX, $128 | ||||||
|  | 	JB   less_than_128 | ||||||
|  | 
 | ||||||
|  | 	XOR_AVX2(DI, SI, 0, Y4, Y5, Y6, Y7, Y12, Y13) | ||||||
|  | 	SUBQ  $128, CX | ||||||
|  | 	TESTQ CX, CX | ||||||
|  | 	JE    done | ||||||
|  | 
 | ||||||
|  | 	ADDQ $128, SI | ||||||
|  | 	ADDQ $128, DI | ||||||
|  | 	JMP  between_0_and_64 | ||||||
|  | 
 | ||||||
|  | less_than_128: | ||||||
|  | 	XOR_UPPER_AVX2(DI, SI, 0, Y4, Y5, Y6, Y7, Y12, Y13) | ||||||
|  | 	EXTRACT_LOWER(BX, Y4, Y5, Y6, Y7, Y13) | ||||||
|  | 	ADDQ $64, SI | ||||||
|  | 	ADDQ $64, DI | ||||||
|  | 	SUBQ $64, CX | ||||||
|  | 	JMP  finalize | ||||||
|  | 
 | ||||||
|  | between_0_and_64: | ||||||
|  | 	VMOVDQA X0, X4 | ||||||
|  | 	VMOVDQA X1, X5 | ||||||
|  | 	VMOVDQA X2, X6 | ||||||
|  | 	VMOVDQA X3, X7 | ||||||
|  | 
 | ||||||
|  | 	MOVQ DX, R9 | ||||||
|  | 
 | ||||||
|  | chacha_loop_64: | ||||||
|  | 	CHACHA_QROUND_AVX(X4, X5, X6, X7, X13, X14, X15) | ||||||
|  | 	CHACHA_SHUFFLE_AVX(X5, X6, X7) | ||||||
|  | 	CHACHA_QROUND_AVX(X4, X5, X6, X7, X13, X14, X15) | ||||||
|  | 	CHACHA_SHUFFLE_AVX(X7, X6, X5) | ||||||
|  | 	SUBQ $2, R9 | ||||||
|  | 	JA   chacha_loop_64 | ||||||
|  | 
 | ||||||
|  | 	VPADDD  X0, X4, X4 | ||||||
|  | 	VPADDD  X1, X5, X5 | ||||||
|  | 	VPADDD  X2, X6, X6 | ||||||
|  | 	VPADDD  X3, X7, X7 | ||||||
|  | 	VMOVDQU ·one<>(SB), X0 | ||||||
|  | 	VPADDQ  X0, X3, X3 | ||||||
|  | 
 | ||||||
|  | 	CMPQ CX, $64 | ||||||
|  | 	JB   less_than_64 | ||||||
|  | 
 | ||||||
|  | 	XOR_AVX(DI, SI, 0, X4, X5, X6, X7, X13) | ||||||
|  | 	SUBQ $64, CX | ||||||
|  | 	JMP  done | ||||||
|  | 
 | ||||||
|  | less_than_64: | ||||||
|  | 	VMOVDQU X4, 0(BX) | ||||||
|  | 	VMOVDQU X5, 16(BX) | ||||||
|  | 	VMOVDQU X6, 32(BX) | ||||||
|  | 	VMOVDQU X7, 48(BX) | ||||||
|  | 
 | ||||||
|  | finalize: | ||||||
|  | 	XORQ R11, R11 | ||||||
|  | 	XORQ R12, R12 | ||||||
|  | 	MOVQ CX, BP | ||||||
|  | 
 | ||||||
|  | xor_loop: | ||||||
|  | 	MOVB 0(SI), R11 | ||||||
|  | 	MOVB 0(BX), R12 | ||||||
|  | 	XORQ R11, R12 | ||||||
|  | 	MOVB R12, 0(DI) | ||||||
|  | 	INCQ SI | ||||||
|  | 	INCQ BX | ||||||
|  | 	INCQ DI | ||||||
|  | 	DECQ BP | ||||||
|  | 	JA   xor_loop | ||||||
|  | 
 | ||||||
|  | done: | ||||||
|  | 	VMOVDQU X3, 48(AX) | ||||||
|  | 	VZEROUPPER | ||||||
|  | 	MOVQ    R8, SP | ||||||
|  | 	MOVQ    CX, ret+72(FP) | ||||||
|  | 	RET | ||||||
|  | 
 | ||||||
							
								
								
									
										60
									
								
								vendor/github.com/aead/chacha20/chacha/chacha_386.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										60
									
								
								vendor/github.com/aead/chacha20/chacha/chacha_386.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,60 @@ | ||||||
|  | // Copyright (c) 2016 Andreas Auernhammer. All rights reserved. | ||||||
|  | // Use of this source code is governed by a license that can be | ||||||
|  | // found in the LICENSE file. | ||||||
|  | 
 | ||||||
|  | // +build 386,!gccgo,!appengine,!nacl | ||||||
|  | 
 | ||||||
|  | package chacha | ||||||
|  | 
 | ||||||
|  | import ( | ||||||
|  | 	"encoding/binary" | ||||||
|  | 
 | ||||||
|  | 	"golang.org/x/sys/cpu" | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | func init() { | ||||||
|  | 	useSSE2 = cpu.X86.HasSSE2 | ||||||
|  | 	useSSSE3 = cpu.X86.HasSSSE3 | ||||||
|  | 	useAVX = false | ||||||
|  | 	useAVX2 = false | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | func initialize(state *[64]byte, key []byte, nonce *[16]byte) { | ||||||
|  | 	binary.LittleEndian.PutUint32(state[0:], sigma[0]) | ||||||
|  | 	binary.LittleEndian.PutUint32(state[4:], sigma[1]) | ||||||
|  | 	binary.LittleEndian.PutUint32(state[8:], sigma[2]) | ||||||
|  | 	binary.LittleEndian.PutUint32(state[12:], sigma[3]) | ||||||
|  | 	copy(state[16:], key[:]) | ||||||
|  | 	copy(state[48:], nonce[:]) | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // This function is implemented in chacha_386.s | ||||||
|  | //go:noescape | ||||||
|  | func hChaCha20SSE2(out *[32]byte, nonce *[16]byte, key *[32]byte) | ||||||
|  | 
 | ||||||
|  | // This function is implemented in chacha_386.s | ||||||
|  | //go:noescape | ||||||
|  | func hChaCha20SSSE3(out *[32]byte, nonce *[16]byte, key *[32]byte) | ||||||
|  | 
 | ||||||
|  | // This function is implemented in chacha_386.s | ||||||
|  | //go:noescape | ||||||
|  | func xorKeyStreamSSE2(dst, src []byte, block, state *[64]byte, rounds int) int | ||||||
|  | 
 | ||||||
|  | func hChaCha20(out *[32]byte, nonce *[16]byte, key *[32]byte) { | ||||||
|  | 	switch { | ||||||
|  | 	case useSSSE3: | ||||||
|  | 		hChaCha20SSSE3(out, nonce, key) | ||||||
|  | 	case useSSE2: | ||||||
|  | 		hChaCha20SSE2(out, nonce, key) | ||||||
|  | 	default: | ||||||
|  | 		hChaCha20Generic(out, nonce, key) | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | func xorKeyStream(dst, src []byte, block, state *[64]byte, rounds int) int { | ||||||
|  | 	if useSSE2 { | ||||||
|  | 		return xorKeyStreamSSE2(dst, src, block, state, rounds) | ||||||
|  | 	} else { | ||||||
|  | 		return xorKeyStreamGeneric(dst, src, block, state, rounds) | ||||||
|  | 	} | ||||||
|  | } | ||||||
							
								
								
									
										163
									
								
								vendor/github.com/aead/chacha20/chacha/chacha_386.s
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										163
									
								
								vendor/github.com/aead/chacha20/chacha/chacha_386.s
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,163 @@ | ||||||
|  | // Copyright (c) 2016 Andreas Auernhammer. All rights reserved. | ||||||
|  | // Use of this source code is governed by a license that can be | ||||||
|  | // found in the LICENSE file. | ||||||
|  | 
 | ||||||
|  | // +build 386,!gccgo,!appengine,!nacl | ||||||
|  | 
 | ||||||
|  | #include "const.s" | ||||||
|  | #include "macro.s" | ||||||
|  | 
 | ||||||
|  | // FINALIZE xors len bytes from src and block using | ||||||
|  | // the temp. registers t0 and t1 and writes the result | ||||||
|  | // to dst. | ||||||
|  | #define FINALIZE(dst, src, block, len, t0, t1) \ | ||||||
|  | 	XORL t0, t0;       \
 | ||||||
|  | 	XORL t1, t1;       \
 | ||||||
|  | 	FINALIZE_LOOP:;    \
 | ||||||
|  | 	MOVB 0(src), t0;   \
 | ||||||
|  | 	MOVB 0(block), t1; \
 | ||||||
|  | 	XORL t0, t1;       \
 | ||||||
|  | 	MOVB t1, 0(dst);   \
 | ||||||
|  | 	INCL src;          \
 | ||||||
|  | 	INCL block;        \
 | ||||||
|  | 	INCL dst;          \
 | ||||||
|  | 	DECL len;          \
 | ||||||
|  | 	JG   FINALIZE_LOOP \ | ||||||
|  | 
 | ||||||
|  | #define Dst DI | ||||||
|  | #define Nonce AX | ||||||
|  | #define Key BX | ||||||
|  | #define Rounds DX | ||||||
|  | 
 | ||||||
|  | // func hChaCha20SSE2(out *[32]byte, nonce *[16]byte, key *[32]byte) | ||||||
|  | TEXT ·hChaCha20SSE2(SB), 4, $0-12 | ||||||
|  | 	MOVL out+0(FP), Dst | ||||||
|  | 	MOVL nonce+4(FP), Nonce | ||||||
|  | 	MOVL key+8(FP), Key | ||||||
|  | 
 | ||||||
|  | 	MOVOU ·sigma<>(SB), X0 | ||||||
|  | 	MOVOU 0*16(Key), X1 | ||||||
|  | 	MOVOU 1*16(Key), X2 | ||||||
|  | 	MOVOU 0*16(Nonce), X3 | ||||||
|  | 	MOVL  $20, Rounds | ||||||
|  | 
 | ||||||
|  | chacha_loop: | ||||||
|  | 	CHACHA_QROUND_SSE2(X0, X1, X2, X3, X4) | ||||||
|  | 	CHACHA_SHUFFLE_SSE(X1, X2, X3) | ||||||
|  | 	CHACHA_QROUND_SSE2(X0, X1, X2, X3, X4) | ||||||
|  | 	CHACHA_SHUFFLE_SSE(X3, X2, X1) | ||||||
|  | 	SUBL $2, Rounds | ||||||
|  | 	JNZ  chacha_loop | ||||||
|  | 
 | ||||||
|  | 	MOVOU X0, 0*16(Dst) | ||||||
|  | 	MOVOU X3, 1*16(Dst) | ||||||
|  | 	RET | ||||||
|  | 
 | ||||||
|  | // func hChaCha20SSSE3(out *[32]byte, nonce *[16]byte, key *[32]byte) | ||||||
|  | TEXT ·hChaCha20SSSE3(SB), 4, $0-12 | ||||||
|  | 	MOVL out+0(FP), Dst | ||||||
|  | 	MOVL nonce+4(FP), Nonce | ||||||
|  | 	MOVL key+8(FP), Key | ||||||
|  | 
 | ||||||
|  | 	MOVOU ·sigma<>(SB), X0 | ||||||
|  | 	MOVOU 0*16(Key), X1 | ||||||
|  | 	MOVOU 1*16(Key), X2 | ||||||
|  | 	MOVOU 0*16(Nonce), X3 | ||||||
|  | 	MOVL  $20, Rounds | ||||||
|  | 
 | ||||||
|  | 	MOVOU ·rol16<>(SB), X5 | ||||||
|  | 	MOVOU ·rol8<>(SB), X6 | ||||||
|  | 
 | ||||||
|  | chacha_loop: | ||||||
|  | 	CHACHA_QROUND_SSSE3(X0, X1, X2, X3, X4, X5, X6) | ||||||
|  | 	CHACHA_SHUFFLE_SSE(X1, X2, X3) | ||||||
|  | 	CHACHA_QROUND_SSSE3(X0, X1, X2, X3, X4, X5, X6) | ||||||
|  | 	CHACHA_SHUFFLE_SSE(X3, X2, X1) | ||||||
|  | 	SUBL $2, Rounds | ||||||
|  | 	JNZ  chacha_loop | ||||||
|  | 
 | ||||||
|  | 	MOVOU X0, 0*16(Dst) | ||||||
|  | 	MOVOU X3, 1*16(Dst) | ||||||
|  | 	RET | ||||||
|  | 
 | ||||||
|  | #undef Dst | ||||||
|  | #undef Nonce | ||||||
|  | #undef Key | ||||||
|  | #undef Rounds | ||||||
|  | 
 | ||||||
|  | #define State AX | ||||||
|  | #define Dst DI | ||||||
|  | #define Src SI | ||||||
|  | #define Len DX | ||||||
|  | #define Tmp0 BX | ||||||
|  | #define Tmp1 BP | ||||||
|  | 
 | ||||||
|  | // func xorKeyStreamSSE2(dst, src []byte, block, state *[64]byte, rounds int) int | ||||||
|  | TEXT ·xorKeyStreamSSE2(SB), 4, $0-40 | ||||||
|  | 	MOVL dst_base+0(FP), Dst | ||||||
|  | 	MOVL src_base+12(FP), Src | ||||||
|  | 	MOVL state+28(FP), State | ||||||
|  | 	MOVL src_len+16(FP), Len | ||||||
|  | 	MOVL $0, ret+36(FP)       // Number of bytes written to the keystream buffer - 0 iff len mod 64 == 0 | ||||||
|  | 
 | ||||||
|  | 	MOVOU 0*16(State), X0 | ||||||
|  | 	MOVOU 1*16(State), X1 | ||||||
|  | 	MOVOU 2*16(State), X2 | ||||||
|  | 	MOVOU 3*16(State), X3 | ||||||
|  | 	TESTL Len, Len | ||||||
|  | 	JZ    DONE | ||||||
|  | 
 | ||||||
|  | GENERATE_KEYSTREAM: | ||||||
|  | 	MOVO X0, X4 | ||||||
|  | 	MOVO X1, X5 | ||||||
|  | 	MOVO X2, X6 | ||||||
|  | 	MOVO X3, X7 | ||||||
|  | 	MOVL rounds+32(FP), Tmp0 | ||||||
|  | 
 | ||||||
|  | CHACHA_LOOP: | ||||||
|  | 	CHACHA_QROUND_SSE2(X4, X5, X6, X7, X0) | ||||||
|  | 	CHACHA_SHUFFLE_SSE(X5, X6, X7) | ||||||
|  | 	CHACHA_QROUND_SSE2(X4, X5, X6, X7, X0) | ||||||
|  | 	CHACHA_SHUFFLE_SSE(X7, X6, X5) | ||||||
|  | 	SUBL $2, Tmp0 | ||||||
|  | 	JA   CHACHA_LOOP | ||||||
|  | 
 | ||||||
|  | 	MOVOU 0*16(State), X0 // Restore X0 from state | ||||||
|  | 	PADDL X0, X4 | ||||||
|  | 	PADDL X1, X5 | ||||||
|  | 	PADDL X2, X6 | ||||||
|  | 	PADDL X3, X7 | ||||||
|  | 	MOVOU ·one<>(SB), X0 | ||||||
|  | 	PADDQ X0, X3 | ||||||
|  | 
 | ||||||
|  | 	CMPL Len, $64 | ||||||
|  | 	JL   BUFFER_KEYSTREAM | ||||||
|  | 
 | ||||||
|  | 	XOR_SSE(Dst, Src, 0, X4, X5, X6, X7, X0) | ||||||
|  | 	MOVOU 0*16(State), X0    // Restore X0 from state | ||||||
|  | 	ADDL  $64, Src | ||||||
|  | 	ADDL  $64, Dst | ||||||
|  | 	SUBL  $64, Len | ||||||
|  | 	JZ    DONE | ||||||
|  | 	JMP   GENERATE_KEYSTREAM // There is at least one more plaintext byte | ||||||
|  | 
 | ||||||
|  | BUFFER_KEYSTREAM: | ||||||
|  | 	MOVL  block+24(FP), State | ||||||
|  | 	MOVOU X4, 0(State) | ||||||
|  | 	MOVOU X5, 16(State) | ||||||
|  | 	MOVOU X6, 32(State) | ||||||
|  | 	MOVOU X7, 48(State) | ||||||
|  | 	MOVL  Len, ret+36(FP)     // Number of bytes written to the keystream buffer - 0 < Len < 64 | ||||||
|  | 	FINALIZE(Dst, Src, State, Len, Tmp0, Tmp1) | ||||||
|  | 
 | ||||||
|  | DONE: | ||||||
|  | 	MOVL  state+28(FP), State | ||||||
|  | 	MOVOU X3, 3*16(State) | ||||||
|  | 	RET | ||||||
|  | 
 | ||||||
|  | #undef State | ||||||
|  | #undef Dst | ||||||
|  | #undef Src | ||||||
|  | #undef Len | ||||||
|  | #undef Tmp0 | ||||||
|  | #undef Tmp1 | ||||||
							
								
								
									
										76
									
								
								vendor/github.com/aead/chacha20/chacha/chacha_amd64.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										76
									
								
								vendor/github.com/aead/chacha20/chacha/chacha_amd64.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,76 @@ | ||||||
|  | // Copyright (c) 2017 Andreas Auernhammer. All rights reserved. | ||||||
|  | // Use of this source code is governed by a license that can be | ||||||
|  | // found in the LICENSE file. | ||||||
|  | 
 | ||||||
|  | // +build go1.7,amd64,!gccgo,!appengine,!nacl | ||||||
|  | 
 | ||||||
|  | package chacha | ||||||
|  | 
 | ||||||
|  | import "golang.org/x/sys/cpu" | ||||||
|  | 
 | ||||||
|  | func init() { | ||||||
|  | 	useSSE2 = cpu.X86.HasSSE2 | ||||||
|  | 	useSSSE3 = cpu.X86.HasSSSE3 | ||||||
|  | 	useAVX = cpu.X86.HasAVX | ||||||
|  | 	useAVX2 = cpu.X86.HasAVX2 | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // This function is implemented in chacha_amd64.s | ||||||
|  | //go:noescape | ||||||
|  | func initialize(state *[64]byte, key []byte, nonce *[16]byte) | ||||||
|  | 
 | ||||||
|  | // This function is implemented in chacha_amd64.s | ||||||
|  | //go:noescape | ||||||
|  | func hChaCha20SSE2(out *[32]byte, nonce *[16]byte, key *[32]byte) | ||||||
|  | 
 | ||||||
|  | // This function is implemented in chacha_amd64.s | ||||||
|  | //go:noescape | ||||||
|  | func hChaCha20SSSE3(out *[32]byte, nonce *[16]byte, key *[32]byte) | ||||||
|  | 
 | ||||||
|  | // This function is implemented in chachaAVX2_amd64.s | ||||||
|  | //go:noescape | ||||||
|  | func hChaCha20AVX(out *[32]byte, nonce *[16]byte, key *[32]byte) | ||||||
|  | 
 | ||||||
|  | // This function is implemented in chacha_amd64.s | ||||||
|  | //go:noescape | ||||||
|  | func xorKeyStreamSSE2(dst, src []byte, block, state *[64]byte, rounds int) int | ||||||
|  | 
 | ||||||
|  | // This function is implemented in chacha_amd64.s | ||||||
|  | //go:noescape | ||||||
|  | func xorKeyStreamSSSE3(dst, src []byte, block, state *[64]byte, rounds int) int | ||||||
|  | 
 | ||||||
|  | // This function is implemented in chacha_amd64.s | ||||||
|  | //go:noescape | ||||||
|  | func xorKeyStreamAVX(dst, src []byte, block, state *[64]byte, rounds int) int | ||||||
|  | 
 | ||||||
|  | // This function is implemented in chachaAVX2_amd64.s | ||||||
|  | //go:noescape | ||||||
|  | func xorKeyStreamAVX2(dst, src []byte, block, state *[64]byte, rounds int) int | ||||||
|  | 
 | ||||||
|  | func hChaCha20(out *[32]byte, nonce *[16]byte, key *[32]byte) { | ||||||
|  | 	switch { | ||||||
|  | 	case useAVX: | ||||||
|  | 		hChaCha20AVX(out, nonce, key) | ||||||
|  | 	case useSSSE3: | ||||||
|  | 		hChaCha20SSSE3(out, nonce, key) | ||||||
|  | 	case useSSE2: | ||||||
|  | 		hChaCha20SSE2(out, nonce, key) | ||||||
|  | 	default: | ||||||
|  | 		hChaCha20Generic(out, nonce, key) | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | func xorKeyStream(dst, src []byte, block, state *[64]byte, rounds int) int { | ||||||
|  | 	switch { | ||||||
|  | 	case useAVX2: | ||||||
|  | 		return xorKeyStreamAVX2(dst, src, block, state, rounds) | ||||||
|  | 	case useAVX: | ||||||
|  | 		return xorKeyStreamAVX(dst, src, block, state, rounds) | ||||||
|  | 	case useSSSE3: | ||||||
|  | 		return xorKeyStreamSSSE3(dst, src, block, state, rounds) | ||||||
|  | 	case useSSE2: | ||||||
|  | 		return xorKeyStreamSSE2(dst, src, block, state, rounds) | ||||||
|  | 	default: | ||||||
|  | 		return xorKeyStreamGeneric(dst, src, block, state, rounds) | ||||||
|  | 	} | ||||||
|  | } | ||||||
							
								
								
									
										1072
									
								
								vendor/github.com/aead/chacha20/chacha/chacha_amd64.s
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										1072
									
								
								vendor/github.com/aead/chacha20/chacha/chacha_amd64.s
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
							
								
								
									
										319
									
								
								vendor/github.com/aead/chacha20/chacha/chacha_generic.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										319
									
								
								vendor/github.com/aead/chacha20/chacha/chacha_generic.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,319 @@ | ||||||
|  | // Copyright (c) 2016 Andreas Auernhammer. All rights reserved. | ||||||
|  | // Use of this source code is governed by a license that can be | ||||||
|  | // found in the LICENSE file. | ||||||
|  | 
 | ||||||
|  | package chacha | ||||||
|  | 
 | ||||||
|  | import "encoding/binary" | ||||||
|  | 
 | ||||||
|  | var sigma = [4]uint32{0x61707865, 0x3320646e, 0x79622d32, 0x6b206574} | ||||||
|  | 
 | ||||||
|  | func xorKeyStreamGeneric(dst, src []byte, block, state *[64]byte, rounds int) int { | ||||||
|  | 	for len(src) >= 64 { | ||||||
|  | 		chachaGeneric(block, state, rounds) | ||||||
|  | 
 | ||||||
|  | 		for i, v := range block { | ||||||
|  | 			dst[i] = src[i] ^ v | ||||||
|  | 		} | ||||||
|  | 		src = src[64:] | ||||||
|  | 		dst = dst[64:] | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	n := len(src) | ||||||
|  | 	if n > 0 { | ||||||
|  | 		chachaGeneric(block, state, rounds) | ||||||
|  | 		for i, v := range src { | ||||||
|  | 			dst[i] = v ^ block[i] | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	return n | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | func chachaGeneric(dst *[64]byte, state *[64]byte, rounds int) { | ||||||
|  | 	v00 := binary.LittleEndian.Uint32(state[0:]) | ||||||
|  | 	v01 := binary.LittleEndian.Uint32(state[4:]) | ||||||
|  | 	v02 := binary.LittleEndian.Uint32(state[8:]) | ||||||
|  | 	v03 := binary.LittleEndian.Uint32(state[12:]) | ||||||
|  | 	v04 := binary.LittleEndian.Uint32(state[16:]) | ||||||
|  | 	v05 := binary.LittleEndian.Uint32(state[20:]) | ||||||
|  | 	v06 := binary.LittleEndian.Uint32(state[24:]) | ||||||
|  | 	v07 := binary.LittleEndian.Uint32(state[28:]) | ||||||
|  | 	v08 := binary.LittleEndian.Uint32(state[32:]) | ||||||
|  | 	v09 := binary.LittleEndian.Uint32(state[36:]) | ||||||
|  | 	v10 := binary.LittleEndian.Uint32(state[40:]) | ||||||
|  | 	v11 := binary.LittleEndian.Uint32(state[44:]) | ||||||
|  | 	v12 := binary.LittleEndian.Uint32(state[48:]) | ||||||
|  | 	v13 := binary.LittleEndian.Uint32(state[52:]) | ||||||
|  | 	v14 := binary.LittleEndian.Uint32(state[56:]) | ||||||
|  | 	v15 := binary.LittleEndian.Uint32(state[60:]) | ||||||
|  | 
 | ||||||
|  | 	s00, s01, s02, s03, s04, s05, s06, s07 := v00, v01, v02, v03, v04, v05, v06, v07 | ||||||
|  | 	s08, s09, s10, s11, s12, s13, s14, s15 := v08, v09, v10, v11, v12, v13, v14, v15 | ||||||
|  | 
 | ||||||
|  | 	for i := 0; i < rounds; i += 2 { | ||||||
|  | 		v00 += v04 | ||||||
|  | 		v12 ^= v00 | ||||||
|  | 		v12 = (v12 << 16) | (v12 >> 16) | ||||||
|  | 		v08 += v12 | ||||||
|  | 		v04 ^= v08 | ||||||
|  | 		v04 = (v04 << 12) | (v04 >> 20) | ||||||
|  | 		v00 += v04 | ||||||
|  | 		v12 ^= v00 | ||||||
|  | 		v12 = (v12 << 8) | (v12 >> 24) | ||||||
|  | 		v08 += v12 | ||||||
|  | 		v04 ^= v08 | ||||||
|  | 		v04 = (v04 << 7) | (v04 >> 25) | ||||||
|  | 		v01 += v05 | ||||||
|  | 		v13 ^= v01 | ||||||
|  | 		v13 = (v13 << 16) | (v13 >> 16) | ||||||
|  | 		v09 += v13 | ||||||
|  | 		v05 ^= v09 | ||||||
|  | 		v05 = (v05 << 12) | (v05 >> 20) | ||||||
|  | 		v01 += v05 | ||||||
|  | 		v13 ^= v01 | ||||||
|  | 		v13 = (v13 << 8) | (v13 >> 24) | ||||||
|  | 		v09 += v13 | ||||||
|  | 		v05 ^= v09 | ||||||
|  | 		v05 = (v05 << 7) | (v05 >> 25) | ||||||
|  | 		v02 += v06 | ||||||
|  | 		v14 ^= v02 | ||||||
|  | 		v14 = (v14 << 16) | (v14 >> 16) | ||||||
|  | 		v10 += v14 | ||||||
|  | 		v06 ^= v10 | ||||||
|  | 		v06 = (v06 << 12) | (v06 >> 20) | ||||||
|  | 		v02 += v06 | ||||||
|  | 		v14 ^= v02 | ||||||
|  | 		v14 = (v14 << 8) | (v14 >> 24) | ||||||
|  | 		v10 += v14 | ||||||
|  | 		v06 ^= v10 | ||||||
|  | 		v06 = (v06 << 7) | (v06 >> 25) | ||||||
|  | 		v03 += v07 | ||||||
|  | 		v15 ^= v03 | ||||||
|  | 		v15 = (v15 << 16) | (v15 >> 16) | ||||||
|  | 		v11 += v15 | ||||||
|  | 		v07 ^= v11 | ||||||
|  | 		v07 = (v07 << 12) | (v07 >> 20) | ||||||
|  | 		v03 += v07 | ||||||
|  | 		v15 ^= v03 | ||||||
|  | 		v15 = (v15 << 8) | (v15 >> 24) | ||||||
|  | 		v11 += v15 | ||||||
|  | 		v07 ^= v11 | ||||||
|  | 		v07 = (v07 << 7) | (v07 >> 25) | ||||||
|  | 		v00 += v05 | ||||||
|  | 		v15 ^= v00 | ||||||
|  | 		v15 = (v15 << 16) | (v15 >> 16) | ||||||
|  | 		v10 += v15 | ||||||
|  | 		v05 ^= v10 | ||||||
|  | 		v05 = (v05 << 12) | (v05 >> 20) | ||||||
|  | 		v00 += v05 | ||||||
|  | 		v15 ^= v00 | ||||||
|  | 		v15 = (v15 << 8) | (v15 >> 24) | ||||||
|  | 		v10 += v15 | ||||||
|  | 		v05 ^= v10 | ||||||
|  | 		v05 = (v05 << 7) | (v05 >> 25) | ||||||
|  | 		v01 += v06 | ||||||
|  | 		v12 ^= v01 | ||||||
|  | 		v12 = (v12 << 16) | (v12 >> 16) | ||||||
|  | 		v11 += v12 | ||||||
|  | 		v06 ^= v11 | ||||||
|  | 		v06 = (v06 << 12) | (v06 >> 20) | ||||||
|  | 		v01 += v06 | ||||||
|  | 		v12 ^= v01 | ||||||
|  | 		v12 = (v12 << 8) | (v12 >> 24) | ||||||
|  | 		v11 += v12 | ||||||
|  | 		v06 ^= v11 | ||||||
|  | 		v06 = (v06 << 7) | (v06 >> 25) | ||||||
|  | 		v02 += v07 | ||||||
|  | 		v13 ^= v02 | ||||||
|  | 		v13 = (v13 << 16) | (v13 >> 16) | ||||||
|  | 		v08 += v13 | ||||||
|  | 		v07 ^= v08 | ||||||
|  | 		v07 = (v07 << 12) | (v07 >> 20) | ||||||
|  | 		v02 += v07 | ||||||
|  | 		v13 ^= v02 | ||||||
|  | 		v13 = (v13 << 8) | (v13 >> 24) | ||||||
|  | 		v08 += v13 | ||||||
|  | 		v07 ^= v08 | ||||||
|  | 		v07 = (v07 << 7) | (v07 >> 25) | ||||||
|  | 		v03 += v04 | ||||||
|  | 		v14 ^= v03 | ||||||
|  | 		v14 = (v14 << 16) | (v14 >> 16) | ||||||
|  | 		v09 += v14 | ||||||
|  | 		v04 ^= v09 | ||||||
|  | 		v04 = (v04 << 12) | (v04 >> 20) | ||||||
|  | 		v03 += v04 | ||||||
|  | 		v14 ^= v03 | ||||||
|  | 		v14 = (v14 << 8) | (v14 >> 24) | ||||||
|  | 		v09 += v14 | ||||||
|  | 		v04 ^= v09 | ||||||
|  | 		v04 = (v04 << 7) | (v04 >> 25) | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	v00 += s00 | ||||||
|  | 	v01 += s01 | ||||||
|  | 	v02 += s02 | ||||||
|  | 	v03 += s03 | ||||||
|  | 	v04 += s04 | ||||||
|  | 	v05 += s05 | ||||||
|  | 	v06 += s06 | ||||||
|  | 	v07 += s07 | ||||||
|  | 	v08 += s08 | ||||||
|  | 	v09 += s09 | ||||||
|  | 	v10 += s10 | ||||||
|  | 	v11 += s11 | ||||||
|  | 	v12 += s12 | ||||||
|  | 	v13 += s13 | ||||||
|  | 	v14 += s14 | ||||||
|  | 	v15 += s15 | ||||||
|  | 
 | ||||||
|  | 	s12++ | ||||||
|  | 	binary.LittleEndian.PutUint32(state[48:], s12) | ||||||
|  | 	if s12 == 0 { // indicates overflow | ||||||
|  | 		s13++ | ||||||
|  | 		binary.LittleEndian.PutUint32(state[52:], s13) | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	binary.LittleEndian.PutUint32(dst[0:], v00) | ||||||
|  | 	binary.LittleEndian.PutUint32(dst[4:], v01) | ||||||
|  | 	binary.LittleEndian.PutUint32(dst[8:], v02) | ||||||
|  | 	binary.LittleEndian.PutUint32(dst[12:], v03) | ||||||
|  | 	binary.LittleEndian.PutUint32(dst[16:], v04) | ||||||
|  | 	binary.LittleEndian.PutUint32(dst[20:], v05) | ||||||
|  | 	binary.LittleEndian.PutUint32(dst[24:], v06) | ||||||
|  | 	binary.LittleEndian.PutUint32(dst[28:], v07) | ||||||
|  | 	binary.LittleEndian.PutUint32(dst[32:], v08) | ||||||
|  | 	binary.LittleEndian.PutUint32(dst[36:], v09) | ||||||
|  | 	binary.LittleEndian.PutUint32(dst[40:], v10) | ||||||
|  | 	binary.LittleEndian.PutUint32(dst[44:], v11) | ||||||
|  | 	binary.LittleEndian.PutUint32(dst[48:], v12) | ||||||
|  | 	binary.LittleEndian.PutUint32(dst[52:], v13) | ||||||
|  | 	binary.LittleEndian.PutUint32(dst[56:], v14) | ||||||
|  | 	binary.LittleEndian.PutUint32(dst[60:], v15) | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | func hChaCha20Generic(out *[32]byte, nonce *[16]byte, key *[32]byte) { | ||||||
|  | 	v00 := sigma[0] | ||||||
|  | 	v01 := sigma[1] | ||||||
|  | 	v02 := sigma[2] | ||||||
|  | 	v03 := sigma[3] | ||||||
|  | 	v04 := binary.LittleEndian.Uint32(key[0:]) | ||||||
|  | 	v05 := binary.LittleEndian.Uint32(key[4:]) | ||||||
|  | 	v06 := binary.LittleEndian.Uint32(key[8:]) | ||||||
|  | 	v07 := binary.LittleEndian.Uint32(key[12:]) | ||||||
|  | 	v08 := binary.LittleEndian.Uint32(key[16:]) | ||||||
|  | 	v09 := binary.LittleEndian.Uint32(key[20:]) | ||||||
|  | 	v10 := binary.LittleEndian.Uint32(key[24:]) | ||||||
|  | 	v11 := binary.LittleEndian.Uint32(key[28:]) | ||||||
|  | 	v12 := binary.LittleEndian.Uint32(nonce[0:]) | ||||||
|  | 	v13 := binary.LittleEndian.Uint32(nonce[4:]) | ||||||
|  | 	v14 := binary.LittleEndian.Uint32(nonce[8:]) | ||||||
|  | 	v15 := binary.LittleEndian.Uint32(nonce[12:]) | ||||||
|  | 
 | ||||||
|  | 	for i := 0; i < 20; i += 2 { | ||||||
|  | 		v00 += v04 | ||||||
|  | 		v12 ^= v00 | ||||||
|  | 		v12 = (v12 << 16) | (v12 >> 16) | ||||||
|  | 		v08 += v12 | ||||||
|  | 		v04 ^= v08 | ||||||
|  | 		v04 = (v04 << 12) | (v04 >> 20) | ||||||
|  | 		v00 += v04 | ||||||
|  | 		v12 ^= v00 | ||||||
|  | 		v12 = (v12 << 8) | (v12 >> 24) | ||||||
|  | 		v08 += v12 | ||||||
|  | 		v04 ^= v08 | ||||||
|  | 		v04 = (v04 << 7) | (v04 >> 25) | ||||||
|  | 		v01 += v05 | ||||||
|  | 		v13 ^= v01 | ||||||
|  | 		v13 = (v13 << 16) | (v13 >> 16) | ||||||
|  | 		v09 += v13 | ||||||
|  | 		v05 ^= v09 | ||||||
|  | 		v05 = (v05 << 12) | (v05 >> 20) | ||||||
|  | 		v01 += v05 | ||||||
|  | 		v13 ^= v01 | ||||||
|  | 		v13 = (v13 << 8) | (v13 >> 24) | ||||||
|  | 		v09 += v13 | ||||||
|  | 		v05 ^= v09 | ||||||
|  | 		v05 = (v05 << 7) | (v05 >> 25) | ||||||
|  | 		v02 += v06 | ||||||
|  | 		v14 ^= v02 | ||||||
|  | 		v14 = (v14 << 16) | (v14 >> 16) | ||||||
|  | 		v10 += v14 | ||||||
|  | 		v06 ^= v10 | ||||||
|  | 		v06 = (v06 << 12) | (v06 >> 20) | ||||||
|  | 		v02 += v06 | ||||||
|  | 		v14 ^= v02 | ||||||
|  | 		v14 = (v14 << 8) | (v14 >> 24) | ||||||
|  | 		v10 += v14 | ||||||
|  | 		v06 ^= v10 | ||||||
|  | 		v06 = (v06 << 7) | (v06 >> 25) | ||||||
|  | 		v03 += v07 | ||||||
|  | 		v15 ^= v03 | ||||||
|  | 		v15 = (v15 << 16) | (v15 >> 16) | ||||||
|  | 		v11 += v15 | ||||||
|  | 		v07 ^= v11 | ||||||
|  | 		v07 = (v07 << 12) | (v07 >> 20) | ||||||
|  | 		v03 += v07 | ||||||
|  | 		v15 ^= v03 | ||||||
|  | 		v15 = (v15 << 8) | (v15 >> 24) | ||||||
|  | 		v11 += v15 | ||||||
|  | 		v07 ^= v11 | ||||||
|  | 		v07 = (v07 << 7) | (v07 >> 25) | ||||||
|  | 		v00 += v05 | ||||||
|  | 		v15 ^= v00 | ||||||
|  | 		v15 = (v15 << 16) | (v15 >> 16) | ||||||
|  | 		v10 += v15 | ||||||
|  | 		v05 ^= v10 | ||||||
|  | 		v05 = (v05 << 12) | (v05 >> 20) | ||||||
|  | 		v00 += v05 | ||||||
|  | 		v15 ^= v00 | ||||||
|  | 		v15 = (v15 << 8) | (v15 >> 24) | ||||||
|  | 		v10 += v15 | ||||||
|  | 		v05 ^= v10 | ||||||
|  | 		v05 = (v05 << 7) | (v05 >> 25) | ||||||
|  | 		v01 += v06 | ||||||
|  | 		v12 ^= v01 | ||||||
|  | 		v12 = (v12 << 16) | (v12 >> 16) | ||||||
|  | 		v11 += v12 | ||||||
|  | 		v06 ^= v11 | ||||||
|  | 		v06 = (v06 << 12) | (v06 >> 20) | ||||||
|  | 		v01 += v06 | ||||||
|  | 		v12 ^= v01 | ||||||
|  | 		v12 = (v12 << 8) | (v12 >> 24) | ||||||
|  | 		v11 += v12 | ||||||
|  | 		v06 ^= v11 | ||||||
|  | 		v06 = (v06 << 7) | (v06 >> 25) | ||||||
|  | 		v02 += v07 | ||||||
|  | 		v13 ^= v02 | ||||||
|  | 		v13 = (v13 << 16) | (v13 >> 16) | ||||||
|  | 		v08 += v13 | ||||||
|  | 		v07 ^= v08 | ||||||
|  | 		v07 = (v07 << 12) | (v07 >> 20) | ||||||
|  | 		v02 += v07 | ||||||
|  | 		v13 ^= v02 | ||||||
|  | 		v13 = (v13 << 8) | (v13 >> 24) | ||||||
|  | 		v08 += v13 | ||||||
|  | 		v07 ^= v08 | ||||||
|  | 		v07 = (v07 << 7) | (v07 >> 25) | ||||||
|  | 		v03 += v04 | ||||||
|  | 		v14 ^= v03 | ||||||
|  | 		v14 = (v14 << 16) | (v14 >> 16) | ||||||
|  | 		v09 += v14 | ||||||
|  | 		v04 ^= v09 | ||||||
|  | 		v04 = (v04 << 12) | (v04 >> 20) | ||||||
|  | 		v03 += v04 | ||||||
|  | 		v14 ^= v03 | ||||||
|  | 		v14 = (v14 << 8) | (v14 >> 24) | ||||||
|  | 		v09 += v14 | ||||||
|  | 		v04 ^= v09 | ||||||
|  | 		v04 = (v04 << 7) | (v04 >> 25) | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	binary.LittleEndian.PutUint32(out[0:], v00) | ||||||
|  | 	binary.LittleEndian.PutUint32(out[4:], v01) | ||||||
|  | 	binary.LittleEndian.PutUint32(out[8:], v02) | ||||||
|  | 	binary.LittleEndian.PutUint32(out[12:], v03) | ||||||
|  | 	binary.LittleEndian.PutUint32(out[16:], v12) | ||||||
|  | 	binary.LittleEndian.PutUint32(out[20:], v13) | ||||||
|  | 	binary.LittleEndian.PutUint32(out[24:], v14) | ||||||
|  | 	binary.LittleEndian.PutUint32(out[28:], v15) | ||||||
|  | } | ||||||
							
								
								
									
										33
									
								
								vendor/github.com/aead/chacha20/chacha/chacha_ref.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								vendor/github.com/aead/chacha20/chacha/chacha_ref.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,33 @@ | ||||||
|  | // Copyright (c) 2016 Andreas Auernhammer. All rights reserved. | ||||||
|  | // Use of this source code is governed by a license that can be | ||||||
|  | // found in the LICENSE file. | ||||||
|  | 
 | ||||||
|  | // +build !amd64,!386 gccgo appengine nacl | ||||||
|  | 
 | ||||||
|  | package chacha | ||||||
|  | 
 | ||||||
|  | import "encoding/binary" | ||||||
|  | 
 | ||||||
|  | func init() { | ||||||
|  | 	useSSE2 = false | ||||||
|  | 	useSSSE3 = false | ||||||
|  | 	useAVX = false | ||||||
|  | 	useAVX2 = false | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | func initialize(state *[64]byte, key []byte, nonce *[16]byte) { | ||||||
|  | 	binary.LittleEndian.PutUint32(state[0:], sigma[0]) | ||||||
|  | 	binary.LittleEndian.PutUint32(state[4:], sigma[1]) | ||||||
|  | 	binary.LittleEndian.PutUint32(state[8:], sigma[2]) | ||||||
|  | 	binary.LittleEndian.PutUint32(state[12:], sigma[3]) | ||||||
|  | 	copy(state[16:], key[:]) | ||||||
|  | 	copy(state[48:], nonce[:]) | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | func xorKeyStream(dst, src []byte, block, state *[64]byte, rounds int) int { | ||||||
|  | 	return xorKeyStreamGeneric(dst, src, block, state, rounds) | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | func hChaCha20(out *[32]byte, nonce *[16]byte, key *[32]byte) { | ||||||
|  | 	hChaCha20Generic(out, nonce, key) | ||||||
|  | } | ||||||
							
								
								
									
										53
									
								
								vendor/github.com/aead/chacha20/chacha/const.s
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										53
									
								
								vendor/github.com/aead/chacha20/chacha/const.s
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,53 @@ | ||||||
|  | // Copyright (c) 2018 Andreas Auernhammer. All rights reserved. | ||||||
|  | // Use of this source code is governed by a license that can be | ||||||
|  | // found in the LICENSE file. | ||||||
|  | 
 | ||||||
|  | // +build 386,!gccgo,!appengine,!nacl amd64,!gccgo,!appengine,!nacl | ||||||
|  | 
 | ||||||
|  | #include "textflag.h" | ||||||
|  | 
 | ||||||
|  | DATA ·sigma<>+0x00(SB)/4, $0x61707865 | ||||||
|  | DATA ·sigma<>+0x04(SB)/4, $0x3320646e | ||||||
|  | DATA ·sigma<>+0x08(SB)/4, $0x79622d32 | ||||||
|  | DATA ·sigma<>+0x0C(SB)/4, $0x6b206574 | ||||||
|  | GLOBL ·sigma<>(SB), (NOPTR+RODATA), $16 // The 4 ChaCha initialization constants | ||||||
|  | 
 | ||||||
|  | // SSE2/SSE3/AVX constants | ||||||
|  | 
 | ||||||
|  | DATA ·one<>+0x00(SB)/8, $1 | ||||||
|  | DATA ·one<>+0x08(SB)/8, $0 | ||||||
|  | GLOBL ·one<>(SB), (NOPTR+RODATA), $16 // The constant 1 as 128 bit value | ||||||
|  | 
 | ||||||
|  | DATA ·rol16<>+0x00(SB)/8, $0x0504070601000302 | ||||||
|  | DATA ·rol16<>+0x08(SB)/8, $0x0D0C0F0E09080B0A | ||||||
|  | GLOBL ·rol16<>(SB), (NOPTR+RODATA), $16 // The PSHUFB 16 bit left rotate constant | ||||||
|  | 
 | ||||||
|  | DATA ·rol8<>+0x00(SB)/8, $0x0605040702010003 | ||||||
|  | DATA ·rol8<>+0x08(SB)/8, $0x0E0D0C0F0A09080B | ||||||
|  | GLOBL ·rol8<>(SB), (NOPTR+RODATA), $16 // The PSHUFB 8 bit left rotate constant | ||||||
|  | 
 | ||||||
|  | // AVX2 constants | ||||||
|  | 
 | ||||||
|  | DATA ·one_AVX2<>+0x00(SB)/8, $0 | ||||||
|  | DATA ·one_AVX2<>+0x08(SB)/8, $0 | ||||||
|  | DATA ·one_AVX2<>+0x10(SB)/8, $1 | ||||||
|  | DATA ·one_AVX2<>+0x18(SB)/8, $0 | ||||||
|  | GLOBL ·one_AVX2<>(SB), (NOPTR+RODATA), $32 // The constant 1 as 256 bit value | ||||||
|  | 
 | ||||||
|  | DATA ·two_AVX2<>+0x00(SB)/8, $2 | ||||||
|  | DATA ·two_AVX2<>+0x08(SB)/8, $0 | ||||||
|  | DATA ·two_AVX2<>+0x10(SB)/8, $2 | ||||||
|  | DATA ·two_AVX2<>+0x18(SB)/8, $0 | ||||||
|  | GLOBL ·two_AVX2<>(SB), (NOPTR+RODATA), $32 | ||||||
|  | 
 | ||||||
|  | DATA ·rol16_AVX2<>+0x00(SB)/8, $0x0504070601000302 | ||||||
|  | DATA ·rol16_AVX2<>+0x08(SB)/8, $0x0D0C0F0E09080B0A | ||||||
|  | DATA ·rol16_AVX2<>+0x10(SB)/8, $0x0504070601000302 | ||||||
|  | DATA ·rol16_AVX2<>+0x18(SB)/8, $0x0D0C0F0E09080B0A | ||||||
|  | GLOBL ·rol16_AVX2<>(SB), (NOPTR+RODATA), $32 // The VPSHUFB 16 bit left rotate constant | ||||||
|  | 
 | ||||||
|  | DATA ·rol8_AVX2<>+0x00(SB)/8, $0x0605040702010003 | ||||||
|  | DATA ·rol8_AVX2<>+0x08(SB)/8, $0x0E0D0C0F0A09080B | ||||||
|  | DATA ·rol8_AVX2<>+0x10(SB)/8, $0x0605040702010003 | ||||||
|  | DATA ·rol8_AVX2<>+0x18(SB)/8, $0x0E0D0C0F0A09080B | ||||||
|  | GLOBL ·rol8_AVX2<>(SB), (NOPTR+RODATA), $32 // The VPSHUFB 8 bit left rotate constant | ||||||
							
								
								
									
										163
									
								
								vendor/github.com/aead/chacha20/chacha/macro.s
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										163
									
								
								vendor/github.com/aead/chacha20/chacha/macro.s
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,163 @@ | ||||||
|  | // Copyright (c) 2018 Andreas Auernhammer. All rights reserved. | ||||||
|  | // Use of this source code is governed by a license that can be | ||||||
|  | // found in the LICENSE file. | ||||||
|  | 
 | ||||||
|  | // +build 386,!gccgo,!appengine,!nacl amd64,!gccgo,!appengine,!nacl | ||||||
|  | 
 | ||||||
|  | // ROTL_SSE rotates all 4 32 bit values of the XMM register v | ||||||
|  | // left by n bits using SSE2 instructions (0 <= n <= 32). | ||||||
|  | // The XMM register t is used as a temp. register. | ||||||
|  | #define ROTL_SSE(n, t, v) \ | ||||||
|  | 	MOVO  v, t;       \
 | ||||||
|  | 	PSLLL $n, t;      \
 | ||||||
|  | 	PSRLL $(32-n), v; \
 | ||||||
|  | 	PXOR  t, v | ||||||
|  | 
 | ||||||
|  | // ROTL_AVX rotates all 4/8 32 bit values of the AVX/AVX2 register v | ||||||
|  | // left by n bits using AVX/AVX2 instructions (0 <= n <= 32). | ||||||
|  | // The AVX/AVX2 register t is used as a temp. register. | ||||||
|  | #define ROTL_AVX(n, t, v) \ | ||||||
|  | 	VPSLLD $n, v, t;      \
 | ||||||
|  | 	VPSRLD $(32-n), v, v; \
 | ||||||
|  | 	VPXOR  v, t, v | ||||||
|  | 
 | ||||||
|  | // CHACHA_QROUND_SSE2 performs a ChaCha quarter-round using the | ||||||
|  | // 4 XMM registers v0, v1, v2 and v3. It uses only ROTL_SSE2 for | ||||||
|  | // rotations. The XMM register t is used as a temp. register. | ||||||
|  | #define CHACHA_QROUND_SSE2(v0, v1, v2, v3, t) \ | ||||||
|  | 	PADDL v1, v0;        \
 | ||||||
|  | 	PXOR  v0, v3;        \
 | ||||||
|  | 	ROTL_SSE(16, t, v3); \
 | ||||||
|  | 	PADDL v3, v2;        \
 | ||||||
|  | 	PXOR  v2, v1;        \
 | ||||||
|  | 	ROTL_SSE(12, t, v1); \
 | ||||||
|  | 	PADDL v1, v0;        \
 | ||||||
|  | 	PXOR  v0, v3;        \
 | ||||||
|  | 	ROTL_SSE(8, t, v3);  \
 | ||||||
|  | 	PADDL v3, v2;        \
 | ||||||
|  | 	PXOR  v2, v1;        \
 | ||||||
|  | 	ROTL_SSE(7, t, v1) | ||||||
|  | 
 | ||||||
|  | // CHACHA_QROUND_SSSE3 performs a ChaCha quarter-round using the | ||||||
|  | // 4 XMM registers v0, v1, v2 and v3. It uses PSHUFB for 8/16 bit | ||||||
|  | // rotations. The XMM register t is used as a temp. register. | ||||||
|  | // | ||||||
|  | // r16 holds the PSHUFB constant for a 16 bit left rotate. | ||||||
|  | // r8 holds the PSHUFB constant for a 8 bit left rotate. | ||||||
|  | #define CHACHA_QROUND_SSSE3(v0, v1, v2, v3, t, r16, r8) \ | ||||||
|  | 	PADDL  v1, v0;       \
 | ||||||
|  | 	PXOR   v0, v3;       \
 | ||||||
|  | 	PSHUFB r16, v3;      \
 | ||||||
|  | 	PADDL  v3, v2;       \
 | ||||||
|  | 	PXOR   v2, v1;       \
 | ||||||
|  | 	ROTL_SSE(12, t, v1); \
 | ||||||
|  | 	PADDL  v1, v0;       \
 | ||||||
|  | 	PXOR   v0, v3;       \
 | ||||||
|  | 	PSHUFB r8, v3;       \
 | ||||||
|  | 	PADDL  v3, v2;       \
 | ||||||
|  | 	PXOR   v2, v1;       \
 | ||||||
|  | 	ROTL_SSE(7, t, v1) | ||||||
|  | 
 | ||||||
|  | // CHACHA_QROUND_AVX performs a ChaCha quarter-round using the | ||||||
|  | // 4 AVX/AVX2 registers v0, v1, v2 and v3. It uses VPSHUFB for 8/16 bit | ||||||
|  | // rotations. The AVX/AVX2 register t is used as a temp. register. | ||||||
|  | // | ||||||
|  | // r16 holds the VPSHUFB constant for a 16 bit left rotate. | ||||||
|  | // r8 holds the VPSHUFB constant for a 8 bit left rotate. | ||||||
|  | #define CHACHA_QROUND_AVX(v0, v1, v2, v3, t, r16, r8) \ | ||||||
|  | 	VPADDD  v0, v1, v0;  \
 | ||||||
|  | 	VPXOR   v3, v0, v3;  \
 | ||||||
|  | 	VPSHUFB r16, v3, v3; \
 | ||||||
|  | 	VPADDD  v2, v3, v2;  \
 | ||||||
|  | 	VPXOR   v1, v2, v1;  \
 | ||||||
|  | 	ROTL_AVX(12, t, v1); \
 | ||||||
|  | 	VPADDD  v0, v1, v0;  \
 | ||||||
|  | 	VPXOR   v3, v0, v3;  \
 | ||||||
|  | 	VPSHUFB r8, v3, v3;  \
 | ||||||
|  | 	VPADDD  v2, v3, v2;  \
 | ||||||
|  | 	VPXOR   v1, v2, v1;  \
 | ||||||
|  | 	ROTL_AVX(7, t, v1) | ||||||
|  | 
 | ||||||
|  | // CHACHA_SHUFFLE_SSE performs a ChaCha shuffle using the | ||||||
|  | // 3 XMM registers v1, v2 and v3. The inverse shuffle is | ||||||
|  | // performed by switching v1 and v3: CHACHA_SHUFFLE_SSE(v3, v2, v1). | ||||||
|  | #define CHACHA_SHUFFLE_SSE(v1, v2, v3) \ | ||||||
|  | 	PSHUFL $0x39, v1, v1; \
 | ||||||
|  | 	PSHUFL $0x4E, v2, v2; \
 | ||||||
|  | 	PSHUFL $0x93, v3, v3 | ||||||
|  | 
 | ||||||
|  | // CHACHA_SHUFFLE_AVX performs a ChaCha shuffle using the | ||||||
|  | // 3 AVX/AVX2 registers v1, v2 and v3. The inverse shuffle is | ||||||
|  | // performed by switching v1 and v3: CHACHA_SHUFFLE_AVX(v3, v2, v1). | ||||||
|  | #define CHACHA_SHUFFLE_AVX(v1, v2, v3) \ | ||||||
|  | 	VPSHUFD $0x39, v1, v1; \
 | ||||||
|  | 	VPSHUFD $0x4E, v2, v2; \
 | ||||||
|  | 	VPSHUFD $0x93, v3, v3 | ||||||
|  | 
 | ||||||
|  | // XOR_SSE extracts 4x16 byte vectors from src at | ||||||
|  | // off, xors all vectors with the corresponding XMM | ||||||
|  | // register (v0 - v3) and writes the result to dst | ||||||
|  | // at off. | ||||||
|  | // The XMM register t is used as a temp. register. | ||||||
|  | #define XOR_SSE(dst, src, off, v0, v1, v2, v3, t) \ | ||||||
|  | 	MOVOU 0+off(src), t;  \
 | ||||||
|  | 	PXOR  v0, t;          \
 | ||||||
|  | 	MOVOU t, 0+off(dst);  \
 | ||||||
|  | 	MOVOU 16+off(src), t; \
 | ||||||
|  | 	PXOR  v1, t;          \
 | ||||||
|  | 	MOVOU t, 16+off(dst); \
 | ||||||
|  | 	MOVOU 32+off(src), t; \
 | ||||||
|  | 	PXOR  v2, t;          \
 | ||||||
|  | 	MOVOU t, 32+off(dst); \
 | ||||||
|  | 	MOVOU 48+off(src), t; \
 | ||||||
|  | 	PXOR  v3, t;          \
 | ||||||
|  | 	MOVOU t, 48+off(dst) | ||||||
|  | 
 | ||||||
|  | // XOR_AVX extracts 4x16 byte vectors from src at | ||||||
|  | // off, xors all vectors with the corresponding AVX | ||||||
|  | // register (v0 - v3) and writes the result to dst | ||||||
|  | // at off. | ||||||
|  | // The XMM register t is used as a temp. register. | ||||||
|  | #define XOR_AVX(dst, src, off, v0, v1, v2, v3, t) \ | ||||||
|  | 	VPXOR   0+off(src), v0, t;  \
 | ||||||
|  | 	VMOVDQU t, 0+off(dst);      \
 | ||||||
|  | 	VPXOR   16+off(src), v1, t; \
 | ||||||
|  | 	VMOVDQU t, 16+off(dst);     \
 | ||||||
|  | 	VPXOR   32+off(src), v2, t; \
 | ||||||
|  | 	VMOVDQU t, 32+off(dst);     \
 | ||||||
|  | 	VPXOR   48+off(src), v3, t; \
 | ||||||
|  | 	VMOVDQU t, 48+off(dst) | ||||||
|  | 
 | ||||||
|  | #define XOR_AVX2(dst, src, off, v0, v1, v2, v3, t0, t1) \ | ||||||
|  | 	VMOVDQU    (0+off)(src), t0;  \
 | ||||||
|  | 	VPERM2I128 $32, v1, v0, t1;   \
 | ||||||
|  | 	VPXOR      t0, t1, t0;        \
 | ||||||
|  | 	VMOVDQU    t0, (0+off)(dst);  \
 | ||||||
|  | 	VMOVDQU    (32+off)(src), t0; \
 | ||||||
|  | 	VPERM2I128 $32, v3, v2, t1;   \
 | ||||||
|  | 	VPXOR      t0, t1, t0;        \
 | ||||||
|  | 	VMOVDQU    t0, (32+off)(dst); \
 | ||||||
|  | 	VMOVDQU    (64+off)(src), t0; \
 | ||||||
|  | 	VPERM2I128 $49, v1, v0, t1;   \
 | ||||||
|  | 	VPXOR      t0, t1, t0;        \
 | ||||||
|  | 	VMOVDQU    t0, (64+off)(dst); \
 | ||||||
|  | 	VMOVDQU    (96+off)(src), t0; \
 | ||||||
|  | 	VPERM2I128 $49, v3, v2, t1;   \
 | ||||||
|  | 	VPXOR      t0, t1, t0;        \
 | ||||||
|  | 	VMOVDQU    t0, (96+off)(dst) | ||||||
|  | 
 | ||||||
|  | #define XOR_UPPER_AVX2(dst, src, off, v0, v1, v2, v3, t0, t1) \ | ||||||
|  | 	VMOVDQU    (0+off)(src), t0;  \
 | ||||||
|  | 	VPERM2I128 $32, v1, v0, t1;   \
 | ||||||
|  | 	VPXOR      t0, t1, t0;        \
 | ||||||
|  | 	VMOVDQU    t0, (0+off)(dst);  \
 | ||||||
|  | 	VMOVDQU    (32+off)(src), t0; \
 | ||||||
|  | 	VPERM2I128 $32, v3, v2, t1;   \
 | ||||||
|  | 	VPXOR      t0, t1, t0;        \
 | ||||||
|  | 	VMOVDQU    t0, (32+off)(dst); \
 | ||||||
|  | 
 | ||||||
|  | #define EXTRACT_LOWER(dst, v0, v1, v2, v3, t0) \ | ||||||
|  | 	VPERM2I128 $49, v1, v0, t0; \
 | ||||||
|  | 	VMOVDQU    t0, 0(dst);      \
 | ||||||
|  | 	VPERM2I128 $49, v3, v2, t0; \
 | ||||||
|  | 	VMOVDQU    t0, 32(dst) | ||||||
							
								
								
									
										2
									
								
								vendor/modules.txt
									
										
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								vendor/modules.txt
									
										
									
									
										vendored
									
									
								
							|  | @ -15,6 +15,8 @@ blitter.com/go/kyber | ||||||
| blitter.com/go/mtwist | blitter.com/go/mtwist | ||||||
| # blitter.com/go/newhope v0.0.0-20200130200750-192fc08a8aae | # blitter.com/go/newhope v0.0.0-20200130200750-192fc08a8aae | ||||||
| blitter.com/go/newhope | blitter.com/go/newhope | ||||||
|  | # github.com/aead/chacha20 v0.0.0-20180709150244-8b13a72661da | ||||||
|  | github.com/aead/chacha20/chacha | ||||||
| # github.com/jameskeane/bcrypt v0.0.0-20120420032655-c3cd44c1e20f | # github.com/jameskeane/bcrypt v0.0.0-20120420032655-c3cd44c1e20f | ||||||
| github.com/jameskeane/bcrypt | github.com/jameskeane/bcrypt | ||||||
| # github.com/klauspost/cpuid v1.2.2 | # github.com/klauspost/cpuid v1.2.2 | ||||||
|  |  | ||||||
							
								
								
									
										2
									
								
								xs/xs.go
									
										
									
									
									
								
							
							
						
						
									
										2
									
								
								xs/xs.go
									
										
									
									
									
								
							|  | @ -624,7 +624,7 @@ func main() { | ||||||
| 
 | 
 | ||||||
| 	flag.BoolVar(&vopt, "v", false, "show version") | 	flag.BoolVar(&vopt, "v", false, "show version") | ||||||
| 	flag.BoolVar(&dbg, "d", false, "debug logging") | 	flag.BoolVar(&dbg, "d", false, "debug logging") | ||||||
| 	flag.StringVar(&cipherAlg, "c", "C_AES_256", "session `cipher` [C_AES_256 | C_TWOFISH_128 | C_BLOWFISH_64 | C_CRYPTMT1]") | 	flag.StringVar(&cipherAlg, "c", "C_AES_256", "session `cipher` [C_AES_256 | C_TWOFISH_128 | C_BLOWFISH_64 | C_CRYPTMT1 | C_CHACHA20_12]") | ||||||
| 	flag.StringVar(&hmacAlg, "m", "H_SHA256", "session `HMAC` [H_SHA256 | H_SHA512]") | 	flag.StringVar(&hmacAlg, "m", "H_SHA256", "session `HMAC` [H_SHA256 | H_SHA512]") | ||||||
| 	flag.StringVar(&kexAlg, "k", "KEX_HERRADURA512", "KEx `alg` [KEX_HERRADURA{256/512/1024/2048} | KEX_KYBER{512/768/1024} | KEX_NEWHOPE | KEX_NEWHOPE_SIMPLE]") | 	flag.StringVar(&kexAlg, "k", "KEX_HERRADURA512", "KEx `alg` [KEX_HERRADURA{256/512/1024/2048} | KEX_KYBER{512/768/1024} | KEX_NEWHOPE | KEX_NEWHOPE_SIMPLE]") | ||||||
| 	flag.StringVar(&kcpMode, "K", "unused", "KCP `alg`, one of [KCP_NONE | KCP_AES | KCP_BLOWFISH | KCP_CAST5 | KCP_SM4 | KCP_SALSA20 | KCP_SIMPLEXOR | KCP_TEA | KCP_3DES | KCP_TWOFISH | KCP_XTEA] to use KCP (github.com/xtaci/kcp-go) reliable UDP instead of TCP") | 	flag.StringVar(&kcpMode, "K", "unused", "KCP `alg`, one of [KCP_NONE | KCP_AES | KCP_BLOWFISH | KCP_CAST5 | KCP_SM4 | KCP_SALSA20 | KCP_SIMPLEXOR | KCP_TEA | KCP_3DES | KCP_TWOFISH | KCP_XTEA] to use KCP (github.com/xtaci/kcp-go) reliable UDP instead of TCP") | ||||||
|  |  | ||||||
|  | @ -21,6 +21,7 @@ import ( | ||||||
| 	"log" | 	"log" | ||||||
| 
 | 
 | ||||||
| 	"blitter.com/go/cryptmt" | 	"blitter.com/go/cryptmt" | ||||||
|  | 	"github.com/aead/chacha20/chacha" | ||||||
| 	"golang.org/x/crypto/blowfish" | 	"golang.org/x/crypto/blowfish" | ||||||
| 	"golang.org/x/crypto/twofish" | 	"golang.org/x/crypto/twofish" | ||||||
| 
 | 
 | ||||||
|  | @ -104,6 +105,18 @@ func (hc Conn) getStream(keymat []byte) (rc cipher.Stream, mc hash.Hash, err err | ||||||
| 	case CAlgCryptMT1: | 	case CAlgCryptMT1: | ||||||
| 		rc = cryptmt.New(nil, nil, keymat) | 		rc = cryptmt.New(nil, nil, keymat) | ||||||
| 		log.Printf("[cipher CRYPTMT1 (%d)]\n", copts) | 		log.Printf("[cipher CRYPTMT1 (%d)]\n", copts) | ||||||
|  | 	case CAlgChaCha20_12: | ||||||
|  | 		keymat = expandKeyMat(keymat, chacha.KeySize) | ||||||
|  | 		key = keymat[0:chacha.KeySize] | ||||||
|  | 		ivlen = chacha.INonceSize | ||||||
|  | 		iv = keymat[chacha.KeySize : chacha.KeySize+ivlen] | ||||||
|  | 		rc, err = chacha.NewCipher(iv, key, 20) | ||||||
|  | 		if err != nil { | ||||||
|  | 			log.Printf("[ChaCha20 config error]\n") | ||||||
|  | 			fmt.Printf("[ChaCha20 config error]\n") | ||||||
|  | 		} | ||||||
|  | 		// TODO: SetCounter() to something derived from key or nonce or extra keymat? | ||||||
|  | 		log.Printf("[cipher CHACHA20_12 (%d)]\n", copts) | ||||||
| 	default: | 	default: | ||||||
| 		log.Printf("[invalid cipher (%d)]\n", copts) | 		log.Printf("[invalid cipher (%d)]\n", copts) | ||||||
| 		fmt.Printf("DOOFUS SET A VALID CIPHER ALG (%d)\n", copts) | 		fmt.Printf("DOOFUS SET A VALID CIPHER ALG (%d)\n", copts) | ||||||
|  |  | ||||||
|  | @ -99,6 +99,7 @@ const ( | ||||||
| 	CAlgTwofish128 // golang.org/x/crypto/twofish | 	CAlgTwofish128 // golang.org/x/crypto/twofish | ||||||
| 	CAlgBlowfish64 // golang.org/x/crypto/blowfish | 	CAlgBlowfish64 // golang.org/x/crypto/blowfish | ||||||
| 	CAlgCryptMT1   //cryptmt using mtwist64 | 	CAlgCryptMT1   //cryptmt using mtwist64 | ||||||
|  | 	CAlgChaCha20_12 | ||||||
| 	CAlgNoneDisallowed | 	CAlgNoneDisallowed | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
							
								
								
									
										10
									
								
								xsnet/net.go
									
										
									
									
									
								
							
							
						
						
									
										10
									
								
								xsnet/net.go
									
										
									
									
									
								
							|  | @ -145,6 +145,8 @@ func (c *CSCipherAlg) String() string { | ||||||
| 		return "C_BLOWFISH_64" | 		return "C_BLOWFISH_64" | ||||||
| 	case CAlgCryptMT1: | 	case CAlgCryptMT1: | ||||||
| 		return "C_CRYPTMT1" | 		return "C_CRYPTMT1" | ||||||
|  | 	case CAlgChaCha20_12: | ||||||
|  | 		return "C_CHACHA20_12" | ||||||
| 	default: | 	default: | ||||||
| 		return "C_ERR_UNK" | 		return "C_ERR_UNK" | ||||||
| 	} | 	} | ||||||
|  | @ -280,6 +282,8 @@ func _new(kexAlg KEXAlg, conn *net.Conn) (hc *Conn, e error) { | ||||||
| 		hc.kex = KEX_HERRADURA512 | 		hc.kex = KEX_HERRADURA512 | ||||||
| 		log.Printf("[KEx alg %d ?? defaults to %d]\n", kexAlg, hc.kex) | 		log.Printf("[KEx alg %d ?? defaults to %d]\n", kexAlg, hc.kex) | ||||||
| 	} | 	} | ||||||
|  | 
 | ||||||
|  | 	//hc.logCipherText = true // !!! DEBUGGING ONLY !!! NEVER DEPLOY this uncommented !!! | ||||||
| 	return | 	return | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -298,7 +302,7 @@ func _new(kexAlg KEXAlg, conn *net.Conn) (hc *Conn, e error) { | ||||||
| // | // | ||||||
| // Session (symmetric) crypto | // Session (symmetric) crypto | ||||||
| // | // | ||||||
| // C_AES_256 C_TWOFISH_128 C_BLOWFISH_128 C_CRYPTMT1 | // C_AES_256 C_TWOFISH_128 C_BLOWFISH_128 C_CRYPTMT1 C_CHACHA20_12 | ||||||
| // | // | ||||||
| // Session HMACs | // Session HMACs | ||||||
| // | // | ||||||
|  | @ -322,6 +326,10 @@ func (hc *Conn) applyConnExtensions(extensions ...string) { | ||||||
| 			log.Println("[extension arg = C_CRYPTMT1]") | 			log.Println("[extension arg = C_CRYPTMT1]") | ||||||
| 			hc.cipheropts &= (0xFFFFFF00) | 			hc.cipheropts &= (0xFFFFFF00) | ||||||
| 			hc.cipheropts |= CAlgCryptMT1 | 			hc.cipheropts |= CAlgCryptMT1 | ||||||
|  | 		case "C_CHACHA20_12": | ||||||
|  | 			log.Println("[extension arg = C_CHACHA20_12]") | ||||||
|  | 			hc.cipheropts &= (0xFFFFFF00) | ||||||
|  | 			hc.cipheropts |= CAlgChaCha20_12 | ||||||
| 		case "H_SHA256": | 		case "H_SHA256": | ||||||
| 			log.Println("[extension arg = H_SHA256]") | 			log.Println("[extension arg = H_SHA256]") | ||||||
| 			hc.cipheropts &= (0xFFFF00FF) | 			hc.cipheropts &= (0xFFFF00FF) | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue