mirror of
				https://gogs.blitter.com/RLabs/xs
				synced 2024-08-14 10:26:42 +00:00 
			
		
		
		
	Initial aead/chacha20 support (ChaCha20_12)
Signed-off-by: Russ Magee <rmagee@gmail.com>
This commit is contained in:
		
							parent
							
								
									50e786e549
								
							
						
					
					
						commit
						f3e8383dce
					
				
					 18 changed files with 2592 additions and 2 deletions
				
			
		
							
								
								
									
										1
									
								
								go.mod
									
										
									
									
									
								
							
							
						
						
									
										1
									
								
								go.mod
									
										
									
									
									
								
							|  | @ -9,6 +9,7 @@ require ( | |||
| 	blitter.com/go/kyber v0.0.0-20200130200857-6f2021cb88d9 | ||||
| 	blitter.com/go/mtwist v1.0.1 // indirect | ||||
| 	blitter.com/go/newhope v0.0.0-20200130200750-192fc08a8aae | ||||
| 	github.com/aead/chacha20 v0.0.0-20180709150244-8b13a72661da | ||||
| 	github.com/jameskeane/bcrypt v0.0.0-20120420032655-c3cd44c1e20f | ||||
| 	github.com/klauspost/cpuid v1.2.2 // indirect | ||||
| 	github.com/klauspost/reedsolomon v1.9.3 // indirect | ||||
|  |  | |||
							
								
								
									
										2
									
								
								go.sum
									
										
									
									
									
								
							
							
						
						
									
										2
									
								
								go.sum
									
										
									
									
									
								
							|  | @ -28,6 +28,8 @@ git.schwanenlied.me/yawning/kyber.git v0.0.0-20180530164001-a270899bd22c h1:SGOx | |||
| git.schwanenlied.me/yawning/kyber.git v0.0.0-20180530164001-a270899bd22c/go.mod h1:QrbgzU5EL/1jaMD5pD4Tiikj3R5elPMa+RMwFUTGwQU= | ||||
| git.schwanenlied.me/yawning/newhope.git v0.0.0-20170622154529-9598792ba8f2 h1:89TYv/+wotJ+QWrH5B/yN0pEQutr2V/5za0VoYiVGCM= | ||||
| git.schwanenlied.me/yawning/newhope.git v0.0.0-20170622154529-9598792ba8f2/go.mod h1:weMqACFGzJs4Ni+K9shsRd02N4LkDrtGlkRxISK+II0= | ||||
| github.com/aead/chacha20 v0.0.0-20180709150244-8b13a72661da h1:KjTM2ks9d14ZYCvmHS9iAKVt9AyzRSqNU1qabPih5BY= | ||||
| github.com/aead/chacha20 v0.0.0-20180709150244-8b13a72661da/go.mod h1:eHEWzANqSiWQsof+nXEI9bUVUyV6F53Fp89EuCh2EAA= | ||||
| github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= | ||||
| github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= | ||||
| github.com/jameskeane/bcrypt v0.0.0-20120420032655-c3cd44c1e20f h1:UWGE8Vi+1Agt0lrvnd7UsmvwqWKRzb9byK9iQmsbY0Y= | ||||
|  |  | |||
							
								
								
									
										21
									
								
								vendor/github.com/aead/chacha20/LICENSE
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										21
									
								
								vendor/github.com/aead/chacha20/LICENSE
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,21 @@ | |||
| The MIT License (MIT) | ||||
| 
 | ||||
| Copyright (c) 2016 Andreas Auernhammer | ||||
| 
 | ||||
| Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
| of this software and associated documentation files (the "Software"), to deal | ||||
| in the Software without restriction, including without limitation the rights | ||||
| to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||||
| copies of the Software, and to permit persons to whom the Software is | ||||
| furnished to do so, subject to the following conditions: | ||||
| 
 | ||||
| The above copyright notice and this permission notice shall be included in all | ||||
| copies or substantial portions of the Software. | ||||
| 
 | ||||
| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||||
| IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||||
| FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||||
| AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||||
| LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||||
| OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||||
| SOFTWARE. | ||||
							
								
								
									
										197
									
								
								vendor/github.com/aead/chacha20/chacha/chacha.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										197
									
								
								vendor/github.com/aead/chacha20/chacha/chacha.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,197 @@ | |||
| // Copyright (c) 2016 Andreas Auernhammer. All rights reserved. | ||||
| // Use of this source code is governed by a license that can be | ||||
| // found in the LICENSE file. | ||||
| 
 | ||||
| // Package chacha implements some low-level functions of the | ||||
| // ChaCha cipher family. | ||||
| package chacha // import "github.com/aead/chacha20/chacha" | ||||
| 
 | ||||
| import ( | ||||
| 	"encoding/binary" | ||||
| 	"errors" | ||||
| 	"math" | ||||
| ) | ||||
| 
 | ||||
| const ( | ||||
| 	// NonceSize is the size of the ChaCha20 nonce in bytes. | ||||
| 	NonceSize = 8 | ||||
| 
 | ||||
| 	// INonceSize is the size of the IETF-ChaCha20 nonce in bytes. | ||||
| 	INonceSize = 12 | ||||
| 
 | ||||
| 	// XNonceSize is the size of the XChaCha20 nonce in bytes. | ||||
| 	XNonceSize = 24 | ||||
| 
 | ||||
| 	// KeySize is the size of the key in bytes. | ||||
| 	KeySize = 32 | ||||
| ) | ||||
| 
 | ||||
| var ( | ||||
| 	useSSE2  bool | ||||
| 	useSSSE3 bool | ||||
| 	useAVX   bool | ||||
| 	useAVX2  bool | ||||
| ) | ||||
| 
 | ||||
| var ( | ||||
| 	errKeySize      = errors.New("chacha20/chacha: bad key length") | ||||
| 	errInvalidNonce = errors.New("chacha20/chacha: bad nonce length") | ||||
| ) | ||||
| 
 | ||||
| func setup(state *[64]byte, nonce, key []byte) (err error) { | ||||
| 	if len(key) != KeySize { | ||||
| 		err = errKeySize | ||||
| 		return | ||||
| 	} | ||||
| 	var Nonce [16]byte | ||||
| 	switch len(nonce) { | ||||
| 	case NonceSize: | ||||
| 		copy(Nonce[8:], nonce) | ||||
| 		initialize(state, key, &Nonce) | ||||
| 	case INonceSize: | ||||
| 		copy(Nonce[4:], nonce) | ||||
| 		initialize(state, key, &Nonce) | ||||
| 	case XNonceSize: | ||||
| 		var tmpKey [32]byte | ||||
| 		var hNonce [16]byte | ||||
| 
 | ||||
| 		copy(hNonce[:], nonce[:16]) | ||||
| 		copy(tmpKey[:], key) | ||||
| 		HChaCha20(&tmpKey, &hNonce, &tmpKey) | ||||
| 		copy(Nonce[8:], nonce[16:]) | ||||
| 		initialize(state, tmpKey[:], &Nonce) | ||||
| 
 | ||||
| 		// BUG(aead): A "good" compiler will remove this (optimizations) | ||||
| 		//			  But using the provided key instead of tmpKey, | ||||
| 		//			  will change the key (-> probably confuses users) | ||||
| 		for i := range tmpKey { | ||||
| 			tmpKey[i] = 0 | ||||
| 		} | ||||
| 	default: | ||||
| 		err = errInvalidNonce | ||||
| 	} | ||||
| 	return | ||||
| } | ||||
| 
 | ||||
| // XORKeyStream crypts bytes from src to dst using the given nonce and key. | ||||
| // The length of the nonce determinds the version of ChaCha20: | ||||
| // - NonceSize:  ChaCha20/r with a 64 bit nonce and a 2^64 * 64 byte period. | ||||
| // - INonceSize: ChaCha20/r as defined in RFC 7539 and a 2^32 * 64 byte period. | ||||
| // - XNonceSize: XChaCha20/r with a 192 bit nonce and a 2^64 * 64 byte period. | ||||
| // The rounds argument specifies the number of rounds performed for keystream | ||||
| // generation - valid values are 8, 12 or 20. The src and dst may be the same slice | ||||
| // but otherwise should not overlap. If len(dst) < len(src) this function panics. | ||||
| // If the nonce is neither 64, 96 nor 192 bits long, this function panics. | ||||
| func XORKeyStream(dst, src, nonce, key []byte, rounds int) { | ||||
| 	if rounds != 20 && rounds != 12 && rounds != 8 { | ||||
| 		panic("chacha20/chacha: bad number of rounds") | ||||
| 	} | ||||
| 	if len(dst) < len(src) { | ||||
| 		panic("chacha20/chacha: dst buffer is to small") | ||||
| 	} | ||||
| 	if len(nonce) == INonceSize && uint64(len(src)) > (1<<38) { | ||||
| 		panic("chacha20/chacha: src is too large") | ||||
| 	} | ||||
| 
 | ||||
| 	var block, state [64]byte | ||||
| 	if err := setup(&state, nonce, key); err != nil { | ||||
| 		panic(err) | ||||
| 	} | ||||
| 	xorKeyStream(dst, src, &block, &state, rounds) | ||||
| } | ||||
| 
 | ||||
| // Cipher implements ChaCha20/r (XChaCha20/r) for a given number of rounds r. | ||||
| type Cipher struct { | ||||
| 	state, block [64]byte | ||||
| 	off          int | ||||
| 	rounds       int // 20 for ChaCha20 | ||||
| 	noncesize    int | ||||
| } | ||||
| 
 | ||||
| // NewCipher returns a new *chacha.Cipher implementing the ChaCha20/r or XChaCha20/r | ||||
| // (r = 8, 12 or 20) stream cipher. The nonce must be unique for one key for all time. | ||||
| // The length of the nonce determinds the version of ChaCha20: | ||||
| // - NonceSize:  ChaCha20/r with a 64 bit nonce and a 2^64 * 64 byte period. | ||||
| // - INonceSize: ChaCha20/r as defined in RFC 7539 and a 2^32 * 64 byte period. | ||||
| // - XNonceSize: XChaCha20/r with a 192 bit nonce and a 2^64 * 64 byte period. | ||||
| // If the nonce is neither 64, 96 nor 192 bits long, a non-nil error is returned. | ||||
| func NewCipher(nonce, key []byte, rounds int) (*Cipher, error) { | ||||
| 	if rounds != 20 && rounds != 12 && rounds != 8 { | ||||
| 		panic("chacha20/chacha: bad number of rounds") | ||||
| 	} | ||||
| 
 | ||||
| 	c := new(Cipher) | ||||
| 	if err := setup(&(c.state), nonce, key); err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	c.rounds = rounds | ||||
| 
 | ||||
| 	if len(nonce) == INonceSize { | ||||
| 		c.noncesize = INonceSize | ||||
| 	} else { | ||||
| 		c.noncesize = NonceSize | ||||
| 	} | ||||
| 
 | ||||
| 	return c, nil | ||||
| } | ||||
| 
 | ||||
| // XORKeyStream crypts bytes from src to dst. Src and dst may be the same slice | ||||
| // but otherwise should not overlap. If len(dst) < len(src) the function panics. | ||||
| func (c *Cipher) XORKeyStream(dst, src []byte) { | ||||
| 	if len(dst) < len(src) { | ||||
| 		panic("chacha20/chacha: dst buffer is to small") | ||||
| 	} | ||||
| 
 | ||||
| 	if c.off > 0 { | ||||
| 		n := len(c.block[c.off:]) | ||||
| 		if len(src) <= n { | ||||
| 			for i, v := range src { | ||||
| 				dst[i] = v ^ c.block[c.off] | ||||
| 				c.off++ | ||||
| 			} | ||||
| 			if c.off == 64 { | ||||
| 				c.off = 0 | ||||
| 			} | ||||
| 			return | ||||
| 		} | ||||
| 
 | ||||
| 		for i, v := range c.block[c.off:] { | ||||
| 			dst[i] = src[i] ^ v | ||||
| 		} | ||||
| 		src = src[n:] | ||||
| 		dst = dst[n:] | ||||
| 		c.off = 0 | ||||
| 	} | ||||
| 
 | ||||
| 	// check for counter overflow | ||||
| 	blocksToXOR := len(src) / 64 | ||||
| 	if len(src)%64 != 0 { | ||||
| 		blocksToXOR++ | ||||
| 	} | ||||
| 	var overflow bool | ||||
| 	if c.noncesize == INonceSize { | ||||
| 		overflow = binary.LittleEndian.Uint32(c.state[48:]) > math.MaxUint32-uint32(blocksToXOR) | ||||
| 	} else { | ||||
| 		overflow = binary.LittleEndian.Uint64(c.state[48:]) > math.MaxUint64-uint64(blocksToXOR) | ||||
| 	} | ||||
| 	if overflow { | ||||
| 		panic("chacha20/chacha: counter overflow") | ||||
| 	} | ||||
| 
 | ||||
| 	c.off += xorKeyStream(dst, src, &(c.block), &(c.state), c.rounds) | ||||
| } | ||||
| 
 | ||||
| // SetCounter skips ctr * 64 byte blocks. SetCounter(0) resets the cipher. | ||||
| // This function always skips the unused keystream of the current 64 byte block. | ||||
| func (c *Cipher) SetCounter(ctr uint64) { | ||||
| 	if c.noncesize == INonceSize { | ||||
| 		binary.LittleEndian.PutUint32(c.state[48:], uint32(ctr)) | ||||
| 	} else { | ||||
| 		binary.LittleEndian.PutUint64(c.state[48:], ctr) | ||||
| 	} | ||||
| 	c.off = 0 | ||||
| } | ||||
| 
 | ||||
| // HChaCha20 generates 32 pseudo-random bytes from a 128 bit nonce and a 256 bit secret key. | ||||
| // It can be used as a key-derivation-function (KDF). | ||||
| func HChaCha20(out *[32]byte, nonce *[16]byte, key *[32]byte) { hChaCha20(out, nonce, key) } | ||||
							
								
								
									
										406
									
								
								vendor/github.com/aead/chacha20/chacha/chachaAVX2_amd64.s
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										406
									
								
								vendor/github.com/aead/chacha20/chacha/chachaAVX2_amd64.s
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,406 @@ | |||
| // Copyright (c) 2016 Andreas Auernhammer. All rights reserved. | ||||
| // Use of this source code is governed by a license that can be | ||||
| // found in the LICENSE file. | ||||
| 
 | ||||
| // +build amd64,!gccgo,!appengine,!nacl | ||||
| 
 | ||||
| #include "const.s" | ||||
| #include "macro.s" | ||||
| 
 | ||||
| #define TWO 0(SP) | ||||
| #define C16 32(SP) | ||||
| #define C8 64(SP) | ||||
| #define STATE_0 96(SP) | ||||
| #define STATE_1 128(SP) | ||||
| #define STATE_2 160(SP) | ||||
| #define STATE_3 192(SP) | ||||
| #define TMP_0 224(SP) | ||||
| #define TMP_1 256(SP) | ||||
| 
 | ||||
| // func xorKeyStreamAVX(dst, src []byte, block, state *[64]byte, rounds int) int | ||||
| TEXT ·xorKeyStreamAVX2(SB), 4, $320-80 | ||||
| 	MOVQ dst_base+0(FP), DI | ||||
| 	MOVQ src_base+24(FP), SI | ||||
| 	MOVQ block+48(FP), BX | ||||
| 	MOVQ state+56(FP), AX | ||||
| 	MOVQ rounds+64(FP), DX | ||||
| 	MOVQ src_len+32(FP), CX | ||||
| 
 | ||||
| 	MOVQ SP, R8 | ||||
| 	ADDQ $32, SP | ||||
| 	ANDQ $-32, SP | ||||
| 
 | ||||
| 	VMOVDQU    0(AX), Y2 | ||||
| 	VMOVDQU    32(AX), Y3 | ||||
| 	VPERM2I128 $0x22, Y2, Y0, Y0 | ||||
| 	VPERM2I128 $0x33, Y2, Y1, Y1 | ||||
| 	VPERM2I128 $0x22, Y3, Y2, Y2 | ||||
| 	VPERM2I128 $0x33, Y3, Y3, Y3 | ||||
| 
 | ||||
| 	TESTQ CX, CX | ||||
| 	JZ    done | ||||
| 
 | ||||
| 	VMOVDQU ·one_AVX2<>(SB), Y4 | ||||
| 	VPADDD  Y4, Y3, Y3 | ||||
| 
 | ||||
| 	VMOVDQA Y0, STATE_0 | ||||
| 	VMOVDQA Y1, STATE_1 | ||||
| 	VMOVDQA Y2, STATE_2 | ||||
| 	VMOVDQA Y3, STATE_3 | ||||
| 
 | ||||
| 	VMOVDQU ·rol16_AVX2<>(SB), Y4 | ||||
| 	VMOVDQU ·rol8_AVX2<>(SB), Y5 | ||||
| 	VMOVDQU ·two_AVX2<>(SB), Y6 | ||||
| 	VMOVDQA Y4, Y14 | ||||
| 	VMOVDQA Y5, Y15 | ||||
| 	VMOVDQA Y4, C16 | ||||
| 	VMOVDQA Y5, C8 | ||||
| 	VMOVDQA Y6, TWO | ||||
| 
 | ||||
| 	CMPQ CX, $64 | ||||
| 	JBE  between_0_and_64 | ||||
| 	CMPQ CX, $192 | ||||
| 	JBE  between_64_and_192 | ||||
| 	CMPQ CX, $320 | ||||
| 	JBE  between_192_and_320 | ||||
| 	CMPQ CX, $448 | ||||
| 	JBE  between_320_and_448 | ||||
| 
 | ||||
| at_least_512: | ||||
| 	VMOVDQA Y0, Y4 | ||||
| 	VMOVDQA Y1, Y5 | ||||
| 	VMOVDQA Y2, Y6 | ||||
| 	VPADDQ  TWO, Y3, Y7 | ||||
| 	VMOVDQA Y0, Y8 | ||||
| 	VMOVDQA Y1, Y9 | ||||
| 	VMOVDQA Y2, Y10 | ||||
| 	VPADDQ  TWO, Y7, Y11 | ||||
| 	VMOVDQA Y0, Y12 | ||||
| 	VMOVDQA Y1, Y13 | ||||
| 	VMOVDQA Y2, Y14 | ||||
| 	VPADDQ  TWO, Y11, Y15 | ||||
| 
 | ||||
| 	MOVQ DX, R9 | ||||
| 
 | ||||
| chacha_loop_512: | ||||
| 	VMOVDQA Y8, TMP_0 | ||||
| 	CHACHA_QROUND_AVX(Y0, Y1, Y2, Y3, Y8, C16, C8) | ||||
| 	CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y8, C16, C8) | ||||
| 	VMOVDQA TMP_0, Y8 | ||||
| 	VMOVDQA Y0, TMP_0 | ||||
| 	CHACHA_QROUND_AVX(Y8, Y9, Y10, Y11, Y0, C16, C8) | ||||
| 	CHACHA_QROUND_AVX(Y12, Y13, Y14, Y15, Y0, C16, C8) | ||||
| 	CHACHA_SHUFFLE_AVX(Y1, Y2, Y3) | ||||
| 	CHACHA_SHUFFLE_AVX(Y5, Y6, Y7) | ||||
| 	CHACHA_SHUFFLE_AVX(Y9, Y10, Y11) | ||||
| 	CHACHA_SHUFFLE_AVX(Y13, Y14, Y15) | ||||
| 
 | ||||
| 	CHACHA_QROUND_AVX(Y12, Y13, Y14, Y15, Y0, C16, C8) | ||||
| 	CHACHA_QROUND_AVX(Y8, Y9, Y10, Y11, Y0, C16, C8) | ||||
| 	VMOVDQA TMP_0, Y0 | ||||
| 	VMOVDQA Y8, TMP_0 | ||||
| 	CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y8, C16, C8) | ||||
| 	CHACHA_QROUND_AVX(Y0, Y1, Y2, Y3, Y8, C16, C8) | ||||
| 	VMOVDQA TMP_0, Y8 | ||||
| 	CHACHA_SHUFFLE_AVX(Y3, Y2, Y1) | ||||
| 	CHACHA_SHUFFLE_AVX(Y7, Y6, Y5) | ||||
| 	CHACHA_SHUFFLE_AVX(Y11, Y10, Y9) | ||||
| 	CHACHA_SHUFFLE_AVX(Y15, Y14, Y13) | ||||
| 	SUBQ    $2, R9 | ||||
| 	JA      chacha_loop_512 | ||||
| 
 | ||||
| 	VMOVDQA Y12, TMP_0 | ||||
| 	VMOVDQA Y13, TMP_1 | ||||
| 	VPADDD  STATE_0, Y0, Y0 | ||||
| 	VPADDD  STATE_1, Y1, Y1 | ||||
| 	VPADDD  STATE_2, Y2, Y2 | ||||
| 	VPADDD  STATE_3, Y3, Y3 | ||||
| 	XOR_AVX2(DI, SI, 0, Y0, Y1, Y2, Y3, Y12, Y13) | ||||
| 	VMOVDQA STATE_0, Y0 | ||||
| 	VMOVDQA STATE_1, Y1 | ||||
| 	VMOVDQA STATE_2, Y2 | ||||
| 	VMOVDQA STATE_3, Y3 | ||||
| 	VPADDQ  TWO, Y3, Y3 | ||||
| 
 | ||||
| 	VPADDD Y0, Y4, Y4 | ||||
| 	VPADDD Y1, Y5, Y5 | ||||
| 	VPADDD Y2, Y6, Y6 | ||||
| 	VPADDD Y3, Y7, Y7 | ||||
| 	XOR_AVX2(DI, SI, 128, Y4, Y5, Y6, Y7, Y12, Y13) | ||||
| 	VPADDQ TWO, Y3, Y3 | ||||
| 
 | ||||
| 	VPADDD Y0, Y8, Y8 | ||||
| 	VPADDD Y1, Y9, Y9 | ||||
| 	VPADDD Y2, Y10, Y10 | ||||
| 	VPADDD Y3, Y11, Y11 | ||||
| 	XOR_AVX2(DI, SI, 256, Y8, Y9, Y10, Y11, Y12, Y13) | ||||
| 	VPADDQ TWO, Y3, Y3 | ||||
| 
 | ||||
| 	VPADDD TMP_0, Y0, Y12 | ||||
| 	VPADDD TMP_1, Y1, Y13 | ||||
| 	VPADDD Y2, Y14, Y14 | ||||
| 	VPADDD Y3, Y15, Y15 | ||||
| 	VPADDQ TWO, Y3, Y3 | ||||
| 
 | ||||
| 	CMPQ CX, $512 | ||||
| 	JB   less_than_512 | ||||
| 
 | ||||
| 	XOR_AVX2(DI, SI, 384, Y12, Y13, Y14, Y15, Y4, Y5) | ||||
| 	VMOVDQA Y3, STATE_3 | ||||
| 	ADDQ    $512, SI | ||||
| 	ADDQ    $512, DI | ||||
| 	SUBQ    $512, CX | ||||
| 	CMPQ    CX, $448 | ||||
| 	JA      at_least_512 | ||||
| 
 | ||||
| 	TESTQ CX, CX | ||||
| 	JZ    done | ||||
| 
 | ||||
| 	VMOVDQA C16, Y14 | ||||
| 	VMOVDQA C8, Y15 | ||||
| 
 | ||||
| 	CMPQ CX, $64 | ||||
| 	JBE  between_0_and_64 | ||||
| 	CMPQ CX, $192 | ||||
| 	JBE  between_64_and_192 | ||||
| 	CMPQ CX, $320 | ||||
| 	JBE  between_192_and_320 | ||||
| 	JMP  between_320_and_448 | ||||
| 
 | ||||
| less_than_512: | ||||
| 	XOR_UPPER_AVX2(DI, SI, 384, Y12, Y13, Y14, Y15, Y4, Y5) | ||||
| 	EXTRACT_LOWER(BX, Y12, Y13, Y14, Y15, Y4) | ||||
| 	ADDQ $448, SI | ||||
| 	ADDQ $448, DI | ||||
| 	SUBQ $448, CX | ||||
| 	JMP  finalize | ||||
| 
 | ||||
| between_320_and_448: | ||||
| 	VMOVDQA Y0, Y4 | ||||
| 	VMOVDQA Y1, Y5 | ||||
| 	VMOVDQA Y2, Y6 | ||||
| 	VPADDQ  TWO, Y3, Y7 | ||||
| 	VMOVDQA Y0, Y8 | ||||
| 	VMOVDQA Y1, Y9 | ||||
| 	VMOVDQA Y2, Y10 | ||||
| 	VPADDQ  TWO, Y7, Y11 | ||||
| 
 | ||||
| 	MOVQ DX, R9 | ||||
| 
 | ||||
| chacha_loop_384: | ||||
| 	CHACHA_QROUND_AVX(Y0, Y1, Y2, Y3, Y13, Y14, Y15) | ||||
| 	CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y13, Y14, Y15) | ||||
| 	CHACHA_QROUND_AVX(Y8, Y9, Y10, Y11, Y13, Y14, Y15) | ||||
| 	CHACHA_SHUFFLE_AVX(Y1, Y2, Y3) | ||||
| 	CHACHA_SHUFFLE_AVX(Y5, Y6, Y7) | ||||
| 	CHACHA_SHUFFLE_AVX(Y9, Y10, Y11) | ||||
| 	CHACHA_QROUND_AVX(Y0, Y1, Y2, Y3, Y13, Y14, Y15) | ||||
| 	CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y13, Y14, Y15) | ||||
| 	CHACHA_QROUND_AVX(Y8, Y9, Y10, Y11, Y13, Y14, Y15) | ||||
| 	CHACHA_SHUFFLE_AVX(Y3, Y2, Y1) | ||||
| 	CHACHA_SHUFFLE_AVX(Y7, Y6, Y5) | ||||
| 	CHACHA_SHUFFLE_AVX(Y11, Y10, Y9) | ||||
| 	SUBQ $2, R9 | ||||
| 	JA   chacha_loop_384 | ||||
| 
 | ||||
| 	VPADDD  STATE_0, Y0, Y0 | ||||
| 	VPADDD  STATE_1, Y1, Y1 | ||||
| 	VPADDD  STATE_2, Y2, Y2 | ||||
| 	VPADDD  STATE_3, Y3, Y3 | ||||
| 	XOR_AVX2(DI, SI, 0, Y0, Y1, Y2, Y3, Y12, Y13) | ||||
| 	VMOVDQA STATE_0, Y0 | ||||
| 	VMOVDQA STATE_1, Y1 | ||||
| 	VMOVDQA STATE_2, Y2 | ||||
| 	VMOVDQA STATE_3, Y3 | ||||
| 	VPADDQ  TWO, Y3, Y3 | ||||
| 
 | ||||
| 	VPADDD Y0, Y4, Y4 | ||||
| 	VPADDD Y1, Y5, Y5 | ||||
| 	VPADDD Y2, Y6, Y6 | ||||
| 	VPADDD Y3, Y7, Y7 | ||||
| 	XOR_AVX2(DI, SI, 128, Y4, Y5, Y6, Y7, Y12, Y13) | ||||
| 	VPADDQ TWO, Y3, Y3 | ||||
| 
 | ||||
| 	VPADDD Y0, Y8, Y8 | ||||
| 	VPADDD Y1, Y9, Y9 | ||||
| 	VPADDD Y2, Y10, Y10 | ||||
| 	VPADDD Y3, Y11, Y11 | ||||
| 	VPADDQ TWO, Y3, Y3 | ||||
| 
 | ||||
| 	CMPQ CX, $384 | ||||
| 	JB   less_than_384 | ||||
| 
 | ||||
| 	XOR_AVX2(DI, SI, 256, Y8, Y9, Y10, Y11, Y12, Y13) | ||||
| 	SUBQ  $384, CX | ||||
| 	TESTQ CX, CX | ||||
| 	JE    done | ||||
| 
 | ||||
| 	ADDQ $384, SI | ||||
| 	ADDQ $384, DI | ||||
| 	JMP  between_0_and_64 | ||||
| 
 | ||||
| less_than_384: | ||||
| 	XOR_UPPER_AVX2(DI, SI, 256, Y8, Y9, Y10, Y11, Y12, Y13) | ||||
| 	EXTRACT_LOWER(BX, Y8, Y9, Y10, Y11, Y12) | ||||
| 	ADDQ $320, SI | ||||
| 	ADDQ $320, DI | ||||
| 	SUBQ $320, CX | ||||
| 	JMP  finalize | ||||
| 
 | ||||
| between_192_and_320: | ||||
| 	VMOVDQA Y0, Y4 | ||||
| 	VMOVDQA Y1, Y5 | ||||
| 	VMOVDQA Y2, Y6 | ||||
| 	VMOVDQA Y3, Y7 | ||||
| 	VMOVDQA Y0, Y8 | ||||
| 	VMOVDQA Y1, Y9 | ||||
| 	VMOVDQA Y2, Y10 | ||||
| 	VPADDQ  TWO, Y3, Y11 | ||||
| 
 | ||||
| 	MOVQ DX, R9 | ||||
| 
 | ||||
| chacha_loop_256: | ||||
| 	CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y13, Y14, Y15) | ||||
| 	CHACHA_QROUND_AVX(Y8, Y9, Y10, Y11, Y13, Y14, Y15) | ||||
| 	CHACHA_SHUFFLE_AVX(Y5, Y6, Y7) | ||||
| 	CHACHA_SHUFFLE_AVX(Y9, Y10, Y11) | ||||
| 	CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y13, Y14, Y15) | ||||
| 	CHACHA_QROUND_AVX(Y8, Y9, Y10, Y11, Y13, Y14, Y15) | ||||
| 	CHACHA_SHUFFLE_AVX(Y7, Y6, Y5) | ||||
| 	CHACHA_SHUFFLE_AVX(Y11, Y10, Y9) | ||||
| 	SUBQ $2, R9 | ||||
| 	JA   chacha_loop_256 | ||||
| 
 | ||||
| 	VPADDD Y0, Y4, Y4 | ||||
| 	VPADDD Y1, Y5, Y5 | ||||
| 	VPADDD Y2, Y6, Y6 | ||||
| 	VPADDD Y3, Y7, Y7 | ||||
| 	VPADDQ TWO, Y3, Y3 | ||||
| 	XOR_AVX2(DI, SI, 0, Y4, Y5, Y6, Y7, Y12, Y13) | ||||
| 	VPADDD Y0, Y8, Y8 | ||||
| 	VPADDD Y1, Y9, Y9 | ||||
| 	VPADDD Y2, Y10, Y10 | ||||
| 	VPADDD Y3, Y11, Y11 | ||||
| 	VPADDQ TWO, Y3, Y3 | ||||
| 
 | ||||
| 	CMPQ CX, $256 | ||||
| 	JB   less_than_256 | ||||
| 
 | ||||
| 	XOR_AVX2(DI, SI, 128, Y8, Y9, Y10, Y11, Y12, Y13) | ||||
| 	SUBQ  $256, CX | ||||
| 	TESTQ CX, CX | ||||
| 	JE    done | ||||
| 
 | ||||
| 	ADDQ $256, SI | ||||
| 	ADDQ $256, DI | ||||
| 	JMP  between_0_and_64 | ||||
| 
 | ||||
| less_than_256: | ||||
| 	XOR_UPPER_AVX2(DI, SI, 128, Y8, Y9, Y10, Y11, Y12, Y13) | ||||
| 	EXTRACT_LOWER(BX, Y8, Y9, Y10, Y11, Y12) | ||||
| 	ADDQ $192, SI | ||||
| 	ADDQ $192, DI | ||||
| 	SUBQ $192, CX | ||||
| 	JMP  finalize | ||||
| 
 | ||||
| between_64_and_192: | ||||
| 	VMOVDQA Y0, Y4 | ||||
| 	VMOVDQA Y1, Y5 | ||||
| 	VMOVDQA Y2, Y6 | ||||
| 	VMOVDQA Y3, Y7 | ||||
| 
 | ||||
| 	MOVQ DX, R9 | ||||
| 
 | ||||
| chacha_loop_128: | ||||
| 	CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y13, Y14, Y15) | ||||
| 	CHACHA_SHUFFLE_AVX(Y5, Y6, Y7) | ||||
| 	CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y13, Y14, Y15) | ||||
| 	CHACHA_SHUFFLE_AVX(Y7, Y6, Y5) | ||||
| 	SUBQ $2, R9 | ||||
| 	JA   chacha_loop_128 | ||||
| 
 | ||||
| 	VPADDD Y0, Y4, Y4 | ||||
| 	VPADDD Y1, Y5, Y5 | ||||
| 	VPADDD Y2, Y6, Y6 | ||||
| 	VPADDD Y3, Y7, Y7 | ||||
| 	VPADDQ TWO, Y3, Y3 | ||||
| 
 | ||||
| 	CMPQ CX, $128 | ||||
| 	JB   less_than_128 | ||||
| 
 | ||||
| 	XOR_AVX2(DI, SI, 0, Y4, Y5, Y6, Y7, Y12, Y13) | ||||
| 	SUBQ  $128, CX | ||||
| 	TESTQ CX, CX | ||||
| 	JE    done | ||||
| 
 | ||||
| 	ADDQ $128, SI | ||||
| 	ADDQ $128, DI | ||||
| 	JMP  between_0_and_64 | ||||
| 
 | ||||
| less_than_128: | ||||
| 	XOR_UPPER_AVX2(DI, SI, 0, Y4, Y5, Y6, Y7, Y12, Y13) | ||||
| 	EXTRACT_LOWER(BX, Y4, Y5, Y6, Y7, Y13) | ||||
| 	ADDQ $64, SI | ||||
| 	ADDQ $64, DI | ||||
| 	SUBQ $64, CX | ||||
| 	JMP  finalize | ||||
| 
 | ||||
| between_0_and_64: | ||||
| 	VMOVDQA X0, X4 | ||||
| 	VMOVDQA X1, X5 | ||||
| 	VMOVDQA X2, X6 | ||||
| 	VMOVDQA X3, X7 | ||||
| 
 | ||||
| 	MOVQ DX, R9 | ||||
| 
 | ||||
| chacha_loop_64: | ||||
| 	CHACHA_QROUND_AVX(X4, X5, X6, X7, X13, X14, X15) | ||||
| 	CHACHA_SHUFFLE_AVX(X5, X6, X7) | ||||
| 	CHACHA_QROUND_AVX(X4, X5, X6, X7, X13, X14, X15) | ||||
| 	CHACHA_SHUFFLE_AVX(X7, X6, X5) | ||||
| 	SUBQ $2, R9 | ||||
| 	JA   chacha_loop_64 | ||||
| 
 | ||||
| 	VPADDD  X0, X4, X4 | ||||
| 	VPADDD  X1, X5, X5 | ||||
| 	VPADDD  X2, X6, X6 | ||||
| 	VPADDD  X3, X7, X7 | ||||
| 	VMOVDQU ·one<>(SB), X0 | ||||
| 	VPADDQ  X0, X3, X3 | ||||
| 
 | ||||
| 	CMPQ CX, $64 | ||||
| 	JB   less_than_64 | ||||
| 
 | ||||
| 	XOR_AVX(DI, SI, 0, X4, X5, X6, X7, X13) | ||||
| 	SUBQ $64, CX | ||||
| 	JMP  done | ||||
| 
 | ||||
| less_than_64: | ||||
| 	VMOVDQU X4, 0(BX) | ||||
| 	VMOVDQU X5, 16(BX) | ||||
| 	VMOVDQU X6, 32(BX) | ||||
| 	VMOVDQU X7, 48(BX) | ||||
| 
 | ||||
| finalize: | ||||
| 	XORQ R11, R11 | ||||
| 	XORQ R12, R12 | ||||
| 	MOVQ CX, BP | ||||
| 
 | ||||
| xor_loop: | ||||
| 	MOVB 0(SI), R11 | ||||
| 	MOVB 0(BX), R12 | ||||
| 	XORQ R11, R12 | ||||
| 	MOVB R12, 0(DI) | ||||
| 	INCQ SI | ||||
| 	INCQ BX | ||||
| 	INCQ DI | ||||
| 	DECQ BP | ||||
| 	JA   xor_loop | ||||
| 
 | ||||
| done: | ||||
| 	VMOVDQU X3, 48(AX) | ||||
| 	VZEROUPPER | ||||
| 	MOVQ    R8, SP | ||||
| 	MOVQ    CX, ret+72(FP) | ||||
| 	RET | ||||
| 
 | ||||
							
								
								
									
										60
									
								
								vendor/github.com/aead/chacha20/chacha/chacha_386.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										60
									
								
								vendor/github.com/aead/chacha20/chacha/chacha_386.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,60 @@ | |||
| // Copyright (c) 2016 Andreas Auernhammer. All rights reserved. | ||||
| // Use of this source code is governed by a license that can be | ||||
| // found in the LICENSE file. | ||||
| 
 | ||||
| // +build 386,!gccgo,!appengine,!nacl | ||||
| 
 | ||||
| package chacha | ||||
| 
 | ||||
| import ( | ||||
| 	"encoding/binary" | ||||
| 
 | ||||
| 	"golang.org/x/sys/cpu" | ||||
| ) | ||||
| 
 | ||||
| func init() { | ||||
| 	useSSE2 = cpu.X86.HasSSE2 | ||||
| 	useSSSE3 = cpu.X86.HasSSSE3 | ||||
| 	useAVX = false | ||||
| 	useAVX2 = false | ||||
| } | ||||
| 
 | ||||
| func initialize(state *[64]byte, key []byte, nonce *[16]byte) { | ||||
| 	binary.LittleEndian.PutUint32(state[0:], sigma[0]) | ||||
| 	binary.LittleEndian.PutUint32(state[4:], sigma[1]) | ||||
| 	binary.LittleEndian.PutUint32(state[8:], sigma[2]) | ||||
| 	binary.LittleEndian.PutUint32(state[12:], sigma[3]) | ||||
| 	copy(state[16:], key[:]) | ||||
| 	copy(state[48:], nonce[:]) | ||||
| } | ||||
| 
 | ||||
| // This function is implemented in chacha_386.s | ||||
| //go:noescape | ||||
| func hChaCha20SSE2(out *[32]byte, nonce *[16]byte, key *[32]byte) | ||||
| 
 | ||||
| // This function is implemented in chacha_386.s | ||||
| //go:noescape | ||||
| func hChaCha20SSSE3(out *[32]byte, nonce *[16]byte, key *[32]byte) | ||||
| 
 | ||||
| // This function is implemented in chacha_386.s | ||||
| //go:noescape | ||||
| func xorKeyStreamSSE2(dst, src []byte, block, state *[64]byte, rounds int) int | ||||
| 
 | ||||
| func hChaCha20(out *[32]byte, nonce *[16]byte, key *[32]byte) { | ||||
| 	switch { | ||||
| 	case useSSSE3: | ||||
| 		hChaCha20SSSE3(out, nonce, key) | ||||
| 	case useSSE2: | ||||
| 		hChaCha20SSE2(out, nonce, key) | ||||
| 	default: | ||||
| 		hChaCha20Generic(out, nonce, key) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func xorKeyStream(dst, src []byte, block, state *[64]byte, rounds int) int { | ||||
| 	if useSSE2 { | ||||
| 		return xorKeyStreamSSE2(dst, src, block, state, rounds) | ||||
| 	} else { | ||||
| 		return xorKeyStreamGeneric(dst, src, block, state, rounds) | ||||
| 	} | ||||
| } | ||||
							
								
								
									
										163
									
								
								vendor/github.com/aead/chacha20/chacha/chacha_386.s
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										163
									
								
								vendor/github.com/aead/chacha20/chacha/chacha_386.s
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,163 @@ | |||
| // Copyright (c) 2016 Andreas Auernhammer. All rights reserved. | ||||
| // Use of this source code is governed by a license that can be | ||||
| // found in the LICENSE file. | ||||
| 
 | ||||
| // +build 386,!gccgo,!appengine,!nacl | ||||
| 
 | ||||
| #include "const.s" | ||||
| #include "macro.s" | ||||
| 
 | ||||
| // FINALIZE xors len bytes from src and block using | ||||
| // the temp. registers t0 and t1 and writes the result | ||||
| // to dst. | ||||
| #define FINALIZE(dst, src, block, len, t0, t1) \ | ||||
| 	XORL t0, t0;       \
 | ||||
| 	XORL t1, t1;       \
 | ||||
| 	FINALIZE_LOOP:;    \
 | ||||
| 	MOVB 0(src), t0;   \
 | ||||
| 	MOVB 0(block), t1; \
 | ||||
| 	XORL t0, t1;       \
 | ||||
| 	MOVB t1, 0(dst);   \
 | ||||
| 	INCL src;          \
 | ||||
| 	INCL block;        \
 | ||||
| 	INCL dst;          \
 | ||||
| 	DECL len;          \
 | ||||
| 	JG   FINALIZE_LOOP \ | ||||
| 
 | ||||
| #define Dst DI | ||||
| #define Nonce AX | ||||
| #define Key BX | ||||
| #define Rounds DX | ||||
| 
 | ||||
| // func hChaCha20SSE2(out *[32]byte, nonce *[16]byte, key *[32]byte) | ||||
| TEXT ·hChaCha20SSE2(SB), 4, $0-12 | ||||
| 	MOVL out+0(FP), Dst | ||||
| 	MOVL nonce+4(FP), Nonce | ||||
| 	MOVL key+8(FP), Key | ||||
| 
 | ||||
| 	MOVOU ·sigma<>(SB), X0 | ||||
| 	MOVOU 0*16(Key), X1 | ||||
| 	MOVOU 1*16(Key), X2 | ||||
| 	MOVOU 0*16(Nonce), X3 | ||||
| 	MOVL  $20, Rounds | ||||
| 
 | ||||
| chacha_loop: | ||||
| 	CHACHA_QROUND_SSE2(X0, X1, X2, X3, X4) | ||||
| 	CHACHA_SHUFFLE_SSE(X1, X2, X3) | ||||
| 	CHACHA_QROUND_SSE2(X0, X1, X2, X3, X4) | ||||
| 	CHACHA_SHUFFLE_SSE(X3, X2, X1) | ||||
| 	SUBL $2, Rounds | ||||
| 	JNZ  chacha_loop | ||||
| 
 | ||||
| 	MOVOU X0, 0*16(Dst) | ||||
| 	MOVOU X3, 1*16(Dst) | ||||
| 	RET | ||||
| 
 | ||||
| // func hChaCha20SSSE3(out *[32]byte, nonce *[16]byte, key *[32]byte) | ||||
| TEXT ·hChaCha20SSSE3(SB), 4, $0-12 | ||||
| 	MOVL out+0(FP), Dst | ||||
| 	MOVL nonce+4(FP), Nonce | ||||
| 	MOVL key+8(FP), Key | ||||
| 
 | ||||
| 	MOVOU ·sigma<>(SB), X0 | ||||
| 	MOVOU 0*16(Key), X1 | ||||
| 	MOVOU 1*16(Key), X2 | ||||
| 	MOVOU 0*16(Nonce), X3 | ||||
| 	MOVL  $20, Rounds | ||||
| 
 | ||||
| 	MOVOU ·rol16<>(SB), X5 | ||||
| 	MOVOU ·rol8<>(SB), X6 | ||||
| 
 | ||||
| chacha_loop: | ||||
| 	CHACHA_QROUND_SSSE3(X0, X1, X2, X3, X4, X5, X6) | ||||
| 	CHACHA_SHUFFLE_SSE(X1, X2, X3) | ||||
| 	CHACHA_QROUND_SSSE3(X0, X1, X2, X3, X4, X5, X6) | ||||
| 	CHACHA_SHUFFLE_SSE(X3, X2, X1) | ||||
| 	SUBL $2, Rounds | ||||
| 	JNZ  chacha_loop | ||||
| 
 | ||||
| 	MOVOU X0, 0*16(Dst) | ||||
| 	MOVOU X3, 1*16(Dst) | ||||
| 	RET | ||||
| 
 | ||||
| #undef Dst | ||||
| #undef Nonce | ||||
| #undef Key | ||||
| #undef Rounds | ||||
| 
 | ||||
| #define State AX | ||||
| #define Dst DI | ||||
| #define Src SI | ||||
| #define Len DX | ||||
| #define Tmp0 BX | ||||
| #define Tmp1 BP | ||||
| 
 | ||||
| // func xorKeyStreamSSE2(dst, src []byte, block, state *[64]byte, rounds int) int | ||||
| TEXT ·xorKeyStreamSSE2(SB), 4, $0-40 | ||||
| 	MOVL dst_base+0(FP), Dst | ||||
| 	MOVL src_base+12(FP), Src | ||||
| 	MOVL state+28(FP), State | ||||
| 	MOVL src_len+16(FP), Len | ||||
| 	MOVL $0, ret+36(FP)       // Number of bytes written to the keystream buffer - 0 iff len mod 64 == 0 | ||||
| 
 | ||||
| 	MOVOU 0*16(State), X0 | ||||
| 	MOVOU 1*16(State), X1 | ||||
| 	MOVOU 2*16(State), X2 | ||||
| 	MOVOU 3*16(State), X3 | ||||
| 	TESTL Len, Len | ||||
| 	JZ    DONE | ||||
| 
 | ||||
| GENERATE_KEYSTREAM: | ||||
| 	MOVO X0, X4 | ||||
| 	MOVO X1, X5 | ||||
| 	MOVO X2, X6 | ||||
| 	MOVO X3, X7 | ||||
| 	MOVL rounds+32(FP), Tmp0 | ||||
| 
 | ||||
| CHACHA_LOOP: | ||||
| 	CHACHA_QROUND_SSE2(X4, X5, X6, X7, X0) | ||||
| 	CHACHA_SHUFFLE_SSE(X5, X6, X7) | ||||
| 	CHACHA_QROUND_SSE2(X4, X5, X6, X7, X0) | ||||
| 	CHACHA_SHUFFLE_SSE(X7, X6, X5) | ||||
| 	SUBL $2, Tmp0 | ||||
| 	JA   CHACHA_LOOP | ||||
| 
 | ||||
| 	MOVOU 0*16(State), X0 // Restore X0 from state | ||||
| 	PADDL X0, X4 | ||||
| 	PADDL X1, X5 | ||||
| 	PADDL X2, X6 | ||||
| 	PADDL X3, X7 | ||||
| 	MOVOU ·one<>(SB), X0 | ||||
| 	PADDQ X0, X3 | ||||
| 
 | ||||
| 	CMPL Len, $64 | ||||
| 	JL   BUFFER_KEYSTREAM | ||||
| 
 | ||||
| 	XOR_SSE(Dst, Src, 0, X4, X5, X6, X7, X0) | ||||
| 	MOVOU 0*16(State), X0    // Restore X0 from state | ||||
| 	ADDL  $64, Src | ||||
| 	ADDL  $64, Dst | ||||
| 	SUBL  $64, Len | ||||
| 	JZ    DONE | ||||
| 	JMP   GENERATE_KEYSTREAM // There is at least one more plaintext byte | ||||
| 
 | ||||
| BUFFER_KEYSTREAM: | ||||
| 	MOVL  block+24(FP), State | ||||
| 	MOVOU X4, 0(State) | ||||
| 	MOVOU X5, 16(State) | ||||
| 	MOVOU X6, 32(State) | ||||
| 	MOVOU X7, 48(State) | ||||
| 	MOVL  Len, ret+36(FP)     // Number of bytes written to the keystream buffer - 0 < Len < 64 | ||||
| 	FINALIZE(Dst, Src, State, Len, Tmp0, Tmp1) | ||||
| 
 | ||||
| DONE: | ||||
| 	MOVL  state+28(FP), State | ||||
| 	MOVOU X3, 3*16(State) | ||||
| 	RET | ||||
| 
 | ||||
| #undef State | ||||
| #undef Dst | ||||
| #undef Src | ||||
| #undef Len | ||||
| #undef Tmp0 | ||||
| #undef Tmp1 | ||||
							
								
								
									
										76
									
								
								vendor/github.com/aead/chacha20/chacha/chacha_amd64.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										76
									
								
								vendor/github.com/aead/chacha20/chacha/chacha_amd64.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,76 @@ | |||
| // Copyright (c) 2017 Andreas Auernhammer. All rights reserved. | ||||
| // Use of this source code is governed by a license that can be | ||||
| // found in the LICENSE file. | ||||
| 
 | ||||
| // +build go1.7,amd64,!gccgo,!appengine,!nacl | ||||
| 
 | ||||
| package chacha | ||||
| 
 | ||||
| import "golang.org/x/sys/cpu" | ||||
| 
 | ||||
| func init() { | ||||
| 	useSSE2 = cpu.X86.HasSSE2 | ||||
| 	useSSSE3 = cpu.X86.HasSSSE3 | ||||
| 	useAVX = cpu.X86.HasAVX | ||||
| 	useAVX2 = cpu.X86.HasAVX2 | ||||
| } | ||||
| 
 | ||||
| // This function is implemented in chacha_amd64.s | ||||
| //go:noescape | ||||
| func initialize(state *[64]byte, key []byte, nonce *[16]byte) | ||||
| 
 | ||||
| // This function is implemented in chacha_amd64.s | ||||
| //go:noescape | ||||
| func hChaCha20SSE2(out *[32]byte, nonce *[16]byte, key *[32]byte) | ||||
| 
 | ||||
| // This function is implemented in chacha_amd64.s | ||||
| //go:noescape | ||||
| func hChaCha20SSSE3(out *[32]byte, nonce *[16]byte, key *[32]byte) | ||||
| 
 | ||||
| // This function is implemented in chachaAVX2_amd64.s | ||||
| //go:noescape | ||||
| func hChaCha20AVX(out *[32]byte, nonce *[16]byte, key *[32]byte) | ||||
| 
 | ||||
| // This function is implemented in chacha_amd64.s | ||||
| //go:noescape | ||||
| func xorKeyStreamSSE2(dst, src []byte, block, state *[64]byte, rounds int) int | ||||
| 
 | ||||
| // This function is implemented in chacha_amd64.s | ||||
| //go:noescape | ||||
| func xorKeyStreamSSSE3(dst, src []byte, block, state *[64]byte, rounds int) int | ||||
| 
 | ||||
| // This function is implemented in chacha_amd64.s | ||||
| //go:noescape | ||||
| func xorKeyStreamAVX(dst, src []byte, block, state *[64]byte, rounds int) int | ||||
| 
 | ||||
| // This function is implemented in chachaAVX2_amd64.s | ||||
| //go:noescape | ||||
| func xorKeyStreamAVX2(dst, src []byte, block, state *[64]byte, rounds int) int | ||||
| 
 | ||||
| func hChaCha20(out *[32]byte, nonce *[16]byte, key *[32]byte) { | ||||
| 	switch { | ||||
| 	case useAVX: | ||||
| 		hChaCha20AVX(out, nonce, key) | ||||
| 	case useSSSE3: | ||||
| 		hChaCha20SSSE3(out, nonce, key) | ||||
| 	case useSSE2: | ||||
| 		hChaCha20SSE2(out, nonce, key) | ||||
| 	default: | ||||
| 		hChaCha20Generic(out, nonce, key) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func xorKeyStream(dst, src []byte, block, state *[64]byte, rounds int) int { | ||||
| 	switch { | ||||
| 	case useAVX2: | ||||
| 		return xorKeyStreamAVX2(dst, src, block, state, rounds) | ||||
| 	case useAVX: | ||||
| 		return xorKeyStreamAVX(dst, src, block, state, rounds) | ||||
| 	case useSSSE3: | ||||
| 		return xorKeyStreamSSSE3(dst, src, block, state, rounds) | ||||
| 	case useSSE2: | ||||
| 		return xorKeyStreamSSE2(dst, src, block, state, rounds) | ||||
| 	default: | ||||
| 		return xorKeyStreamGeneric(dst, src, block, state, rounds) | ||||
| 	} | ||||
| } | ||||
							
								
								
									
										1072
									
								
								vendor/github.com/aead/chacha20/chacha/chacha_amd64.s
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										1072
									
								
								vendor/github.com/aead/chacha20/chacha/chacha_amd64.s
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
							
								
								
									
										319
									
								
								vendor/github.com/aead/chacha20/chacha/chacha_generic.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										319
									
								
								vendor/github.com/aead/chacha20/chacha/chacha_generic.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,319 @@ | |||
| // Copyright (c) 2016 Andreas Auernhammer. All rights reserved. | ||||
| // Use of this source code is governed by a license that can be | ||||
| // found in the LICENSE file. | ||||
| 
 | ||||
| package chacha | ||||
| 
 | ||||
| import "encoding/binary" | ||||
| 
 | ||||
| var sigma = [4]uint32{0x61707865, 0x3320646e, 0x79622d32, 0x6b206574} | ||||
| 
 | ||||
| func xorKeyStreamGeneric(dst, src []byte, block, state *[64]byte, rounds int) int { | ||||
| 	for len(src) >= 64 { | ||||
| 		chachaGeneric(block, state, rounds) | ||||
| 
 | ||||
| 		for i, v := range block { | ||||
| 			dst[i] = src[i] ^ v | ||||
| 		} | ||||
| 		src = src[64:] | ||||
| 		dst = dst[64:] | ||||
| 	} | ||||
| 
 | ||||
| 	n := len(src) | ||||
| 	if n > 0 { | ||||
| 		chachaGeneric(block, state, rounds) | ||||
| 		for i, v := range src { | ||||
| 			dst[i] = v ^ block[i] | ||||
| 		} | ||||
| 	} | ||||
| 	return n | ||||
| } | ||||
| 
 | ||||
| func chachaGeneric(dst *[64]byte, state *[64]byte, rounds int) { | ||||
| 	v00 := binary.LittleEndian.Uint32(state[0:]) | ||||
| 	v01 := binary.LittleEndian.Uint32(state[4:]) | ||||
| 	v02 := binary.LittleEndian.Uint32(state[8:]) | ||||
| 	v03 := binary.LittleEndian.Uint32(state[12:]) | ||||
| 	v04 := binary.LittleEndian.Uint32(state[16:]) | ||||
| 	v05 := binary.LittleEndian.Uint32(state[20:]) | ||||
| 	v06 := binary.LittleEndian.Uint32(state[24:]) | ||||
| 	v07 := binary.LittleEndian.Uint32(state[28:]) | ||||
| 	v08 := binary.LittleEndian.Uint32(state[32:]) | ||||
| 	v09 := binary.LittleEndian.Uint32(state[36:]) | ||||
| 	v10 := binary.LittleEndian.Uint32(state[40:]) | ||||
| 	v11 := binary.LittleEndian.Uint32(state[44:]) | ||||
| 	v12 := binary.LittleEndian.Uint32(state[48:]) | ||||
| 	v13 := binary.LittleEndian.Uint32(state[52:]) | ||||
| 	v14 := binary.LittleEndian.Uint32(state[56:]) | ||||
| 	v15 := binary.LittleEndian.Uint32(state[60:]) | ||||
| 
 | ||||
| 	s00, s01, s02, s03, s04, s05, s06, s07 := v00, v01, v02, v03, v04, v05, v06, v07 | ||||
| 	s08, s09, s10, s11, s12, s13, s14, s15 := v08, v09, v10, v11, v12, v13, v14, v15 | ||||
| 
 | ||||
| 	for i := 0; i < rounds; i += 2 { | ||||
| 		v00 += v04 | ||||
| 		v12 ^= v00 | ||||
| 		v12 = (v12 << 16) | (v12 >> 16) | ||||
| 		v08 += v12 | ||||
| 		v04 ^= v08 | ||||
| 		v04 = (v04 << 12) | (v04 >> 20) | ||||
| 		v00 += v04 | ||||
| 		v12 ^= v00 | ||||
| 		v12 = (v12 << 8) | (v12 >> 24) | ||||
| 		v08 += v12 | ||||
| 		v04 ^= v08 | ||||
| 		v04 = (v04 << 7) | (v04 >> 25) | ||||
| 		v01 += v05 | ||||
| 		v13 ^= v01 | ||||
| 		v13 = (v13 << 16) | (v13 >> 16) | ||||
| 		v09 += v13 | ||||
| 		v05 ^= v09 | ||||
| 		v05 = (v05 << 12) | (v05 >> 20) | ||||
| 		v01 += v05 | ||||
| 		v13 ^= v01 | ||||
| 		v13 = (v13 << 8) | (v13 >> 24) | ||||
| 		v09 += v13 | ||||
| 		v05 ^= v09 | ||||
| 		v05 = (v05 << 7) | (v05 >> 25) | ||||
| 		v02 += v06 | ||||
| 		v14 ^= v02 | ||||
| 		v14 = (v14 << 16) | (v14 >> 16) | ||||
| 		v10 += v14 | ||||
| 		v06 ^= v10 | ||||
| 		v06 = (v06 << 12) | (v06 >> 20) | ||||
| 		v02 += v06 | ||||
| 		v14 ^= v02 | ||||
| 		v14 = (v14 << 8) | (v14 >> 24) | ||||
| 		v10 += v14 | ||||
| 		v06 ^= v10 | ||||
| 		v06 = (v06 << 7) | (v06 >> 25) | ||||
| 		v03 += v07 | ||||
| 		v15 ^= v03 | ||||
| 		v15 = (v15 << 16) | (v15 >> 16) | ||||
| 		v11 += v15 | ||||
| 		v07 ^= v11 | ||||
| 		v07 = (v07 << 12) | (v07 >> 20) | ||||
| 		v03 += v07 | ||||
| 		v15 ^= v03 | ||||
| 		v15 = (v15 << 8) | (v15 >> 24) | ||||
| 		v11 += v15 | ||||
| 		v07 ^= v11 | ||||
| 		v07 = (v07 << 7) | (v07 >> 25) | ||||
| 		v00 += v05 | ||||
| 		v15 ^= v00 | ||||
| 		v15 = (v15 << 16) | (v15 >> 16) | ||||
| 		v10 += v15 | ||||
| 		v05 ^= v10 | ||||
| 		v05 = (v05 << 12) | (v05 >> 20) | ||||
| 		v00 += v05 | ||||
| 		v15 ^= v00 | ||||
| 		v15 = (v15 << 8) | (v15 >> 24) | ||||
| 		v10 += v15 | ||||
| 		v05 ^= v10 | ||||
| 		v05 = (v05 << 7) | (v05 >> 25) | ||||
| 		v01 += v06 | ||||
| 		v12 ^= v01 | ||||
| 		v12 = (v12 << 16) | (v12 >> 16) | ||||
| 		v11 += v12 | ||||
| 		v06 ^= v11 | ||||
| 		v06 = (v06 << 12) | (v06 >> 20) | ||||
| 		v01 += v06 | ||||
| 		v12 ^= v01 | ||||
| 		v12 = (v12 << 8) | (v12 >> 24) | ||||
| 		v11 += v12 | ||||
| 		v06 ^= v11 | ||||
| 		v06 = (v06 << 7) | (v06 >> 25) | ||||
| 		v02 += v07 | ||||
| 		v13 ^= v02 | ||||
| 		v13 = (v13 << 16) | (v13 >> 16) | ||||
| 		v08 += v13 | ||||
| 		v07 ^= v08 | ||||
| 		v07 = (v07 << 12) | (v07 >> 20) | ||||
| 		v02 += v07 | ||||
| 		v13 ^= v02 | ||||
| 		v13 = (v13 << 8) | (v13 >> 24) | ||||
| 		v08 += v13 | ||||
| 		v07 ^= v08 | ||||
| 		v07 = (v07 << 7) | (v07 >> 25) | ||||
| 		v03 += v04 | ||||
| 		v14 ^= v03 | ||||
| 		v14 = (v14 << 16) | (v14 >> 16) | ||||
| 		v09 += v14 | ||||
| 		v04 ^= v09 | ||||
| 		v04 = (v04 << 12) | (v04 >> 20) | ||||
| 		v03 += v04 | ||||
| 		v14 ^= v03 | ||||
| 		v14 = (v14 << 8) | (v14 >> 24) | ||||
| 		v09 += v14 | ||||
| 		v04 ^= v09 | ||||
| 		v04 = (v04 << 7) | (v04 >> 25) | ||||
| 	} | ||||
| 
 | ||||
| 	v00 += s00 | ||||
| 	v01 += s01 | ||||
| 	v02 += s02 | ||||
| 	v03 += s03 | ||||
| 	v04 += s04 | ||||
| 	v05 += s05 | ||||
| 	v06 += s06 | ||||
| 	v07 += s07 | ||||
| 	v08 += s08 | ||||
| 	v09 += s09 | ||||
| 	v10 += s10 | ||||
| 	v11 += s11 | ||||
| 	v12 += s12 | ||||
| 	v13 += s13 | ||||
| 	v14 += s14 | ||||
| 	v15 += s15 | ||||
| 
 | ||||
| 	s12++ | ||||
| 	binary.LittleEndian.PutUint32(state[48:], s12) | ||||
| 	if s12 == 0 { // indicates overflow | ||||
| 		s13++ | ||||
| 		binary.LittleEndian.PutUint32(state[52:], s13) | ||||
| 	} | ||||
| 
 | ||||
| 	binary.LittleEndian.PutUint32(dst[0:], v00) | ||||
| 	binary.LittleEndian.PutUint32(dst[4:], v01) | ||||
| 	binary.LittleEndian.PutUint32(dst[8:], v02) | ||||
| 	binary.LittleEndian.PutUint32(dst[12:], v03) | ||||
| 	binary.LittleEndian.PutUint32(dst[16:], v04) | ||||
| 	binary.LittleEndian.PutUint32(dst[20:], v05) | ||||
| 	binary.LittleEndian.PutUint32(dst[24:], v06) | ||||
| 	binary.LittleEndian.PutUint32(dst[28:], v07) | ||||
| 	binary.LittleEndian.PutUint32(dst[32:], v08) | ||||
| 	binary.LittleEndian.PutUint32(dst[36:], v09) | ||||
| 	binary.LittleEndian.PutUint32(dst[40:], v10) | ||||
| 	binary.LittleEndian.PutUint32(dst[44:], v11) | ||||
| 	binary.LittleEndian.PutUint32(dst[48:], v12) | ||||
| 	binary.LittleEndian.PutUint32(dst[52:], v13) | ||||
| 	binary.LittleEndian.PutUint32(dst[56:], v14) | ||||
| 	binary.LittleEndian.PutUint32(dst[60:], v15) | ||||
| } | ||||
| 
 | ||||
| func hChaCha20Generic(out *[32]byte, nonce *[16]byte, key *[32]byte) { | ||||
| 	v00 := sigma[0] | ||||
| 	v01 := sigma[1] | ||||
| 	v02 := sigma[2] | ||||
| 	v03 := sigma[3] | ||||
| 	v04 := binary.LittleEndian.Uint32(key[0:]) | ||||
| 	v05 := binary.LittleEndian.Uint32(key[4:]) | ||||
| 	v06 := binary.LittleEndian.Uint32(key[8:]) | ||||
| 	v07 := binary.LittleEndian.Uint32(key[12:]) | ||||
| 	v08 := binary.LittleEndian.Uint32(key[16:]) | ||||
| 	v09 := binary.LittleEndian.Uint32(key[20:]) | ||||
| 	v10 := binary.LittleEndian.Uint32(key[24:]) | ||||
| 	v11 := binary.LittleEndian.Uint32(key[28:]) | ||||
| 	v12 := binary.LittleEndian.Uint32(nonce[0:]) | ||||
| 	v13 := binary.LittleEndian.Uint32(nonce[4:]) | ||||
| 	v14 := binary.LittleEndian.Uint32(nonce[8:]) | ||||
| 	v15 := binary.LittleEndian.Uint32(nonce[12:]) | ||||
| 
 | ||||
| 	for i := 0; i < 20; i += 2 { | ||||
| 		v00 += v04 | ||||
| 		v12 ^= v00 | ||||
| 		v12 = (v12 << 16) | (v12 >> 16) | ||||
| 		v08 += v12 | ||||
| 		v04 ^= v08 | ||||
| 		v04 = (v04 << 12) | (v04 >> 20) | ||||
| 		v00 += v04 | ||||
| 		v12 ^= v00 | ||||
| 		v12 = (v12 << 8) | (v12 >> 24) | ||||
| 		v08 += v12 | ||||
| 		v04 ^= v08 | ||||
| 		v04 = (v04 << 7) | (v04 >> 25) | ||||
| 		v01 += v05 | ||||
| 		v13 ^= v01 | ||||
| 		v13 = (v13 << 16) | (v13 >> 16) | ||||
| 		v09 += v13 | ||||
| 		v05 ^= v09 | ||||
| 		v05 = (v05 << 12) | (v05 >> 20) | ||||
| 		v01 += v05 | ||||
| 		v13 ^= v01 | ||||
| 		v13 = (v13 << 8) | (v13 >> 24) | ||||
| 		v09 += v13 | ||||
| 		v05 ^= v09 | ||||
| 		v05 = (v05 << 7) | (v05 >> 25) | ||||
| 		v02 += v06 | ||||
| 		v14 ^= v02 | ||||
| 		v14 = (v14 << 16) | (v14 >> 16) | ||||
| 		v10 += v14 | ||||
| 		v06 ^= v10 | ||||
| 		v06 = (v06 << 12) | (v06 >> 20) | ||||
| 		v02 += v06 | ||||
| 		v14 ^= v02 | ||||
| 		v14 = (v14 << 8) | (v14 >> 24) | ||||
| 		v10 += v14 | ||||
| 		v06 ^= v10 | ||||
| 		v06 = (v06 << 7) | (v06 >> 25) | ||||
| 		v03 += v07 | ||||
| 		v15 ^= v03 | ||||
| 		v15 = (v15 << 16) | (v15 >> 16) | ||||
| 		v11 += v15 | ||||
| 		v07 ^= v11 | ||||
| 		v07 = (v07 << 12) | (v07 >> 20) | ||||
| 		v03 += v07 | ||||
| 		v15 ^= v03 | ||||
| 		v15 = (v15 << 8) | (v15 >> 24) | ||||
| 		v11 += v15 | ||||
| 		v07 ^= v11 | ||||
| 		v07 = (v07 << 7) | (v07 >> 25) | ||||
| 		v00 += v05 | ||||
| 		v15 ^= v00 | ||||
| 		v15 = (v15 << 16) | (v15 >> 16) | ||||
| 		v10 += v15 | ||||
| 		v05 ^= v10 | ||||
| 		v05 = (v05 << 12) | (v05 >> 20) | ||||
| 		v00 += v05 | ||||
| 		v15 ^= v00 | ||||
| 		v15 = (v15 << 8) | (v15 >> 24) | ||||
| 		v10 += v15 | ||||
| 		v05 ^= v10 | ||||
| 		v05 = (v05 << 7) | (v05 >> 25) | ||||
| 		v01 += v06 | ||||
| 		v12 ^= v01 | ||||
| 		v12 = (v12 << 16) | (v12 >> 16) | ||||
| 		v11 += v12 | ||||
| 		v06 ^= v11 | ||||
| 		v06 = (v06 << 12) | (v06 >> 20) | ||||
| 		v01 += v06 | ||||
| 		v12 ^= v01 | ||||
| 		v12 = (v12 << 8) | (v12 >> 24) | ||||
| 		v11 += v12 | ||||
| 		v06 ^= v11 | ||||
| 		v06 = (v06 << 7) | (v06 >> 25) | ||||
| 		v02 += v07 | ||||
| 		v13 ^= v02 | ||||
| 		v13 = (v13 << 16) | (v13 >> 16) | ||||
| 		v08 += v13 | ||||
| 		v07 ^= v08 | ||||
| 		v07 = (v07 << 12) | (v07 >> 20) | ||||
| 		v02 += v07 | ||||
| 		v13 ^= v02 | ||||
| 		v13 = (v13 << 8) | (v13 >> 24) | ||||
| 		v08 += v13 | ||||
| 		v07 ^= v08 | ||||
| 		v07 = (v07 << 7) | (v07 >> 25) | ||||
| 		v03 += v04 | ||||
| 		v14 ^= v03 | ||||
| 		v14 = (v14 << 16) | (v14 >> 16) | ||||
| 		v09 += v14 | ||||
| 		v04 ^= v09 | ||||
| 		v04 = (v04 << 12) | (v04 >> 20) | ||||
| 		v03 += v04 | ||||
| 		v14 ^= v03 | ||||
| 		v14 = (v14 << 8) | (v14 >> 24) | ||||
| 		v09 += v14 | ||||
| 		v04 ^= v09 | ||||
| 		v04 = (v04 << 7) | (v04 >> 25) | ||||
| 	} | ||||
| 
 | ||||
| 	binary.LittleEndian.PutUint32(out[0:], v00) | ||||
| 	binary.LittleEndian.PutUint32(out[4:], v01) | ||||
| 	binary.LittleEndian.PutUint32(out[8:], v02) | ||||
| 	binary.LittleEndian.PutUint32(out[12:], v03) | ||||
| 	binary.LittleEndian.PutUint32(out[16:], v12) | ||||
| 	binary.LittleEndian.PutUint32(out[20:], v13) | ||||
| 	binary.LittleEndian.PutUint32(out[24:], v14) | ||||
| 	binary.LittleEndian.PutUint32(out[28:], v15) | ||||
| } | ||||
							
								
								
									
										33
									
								
								vendor/github.com/aead/chacha20/chacha/chacha_ref.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								vendor/github.com/aead/chacha20/chacha/chacha_ref.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,33 @@ | |||
| // Copyright (c) 2016 Andreas Auernhammer. All rights reserved. | ||||
| // Use of this source code is governed by a license that can be | ||||
| // found in the LICENSE file. | ||||
| 
 | ||||
| // +build !amd64,!386 gccgo appengine nacl | ||||
| 
 | ||||
| package chacha | ||||
| 
 | ||||
| import "encoding/binary" | ||||
| 
 | ||||
| func init() { | ||||
| 	useSSE2 = false | ||||
| 	useSSSE3 = false | ||||
| 	useAVX = false | ||||
| 	useAVX2 = false | ||||
| } | ||||
| 
 | ||||
| func initialize(state *[64]byte, key []byte, nonce *[16]byte) { | ||||
| 	binary.LittleEndian.PutUint32(state[0:], sigma[0]) | ||||
| 	binary.LittleEndian.PutUint32(state[4:], sigma[1]) | ||||
| 	binary.LittleEndian.PutUint32(state[8:], sigma[2]) | ||||
| 	binary.LittleEndian.PutUint32(state[12:], sigma[3]) | ||||
| 	copy(state[16:], key[:]) | ||||
| 	copy(state[48:], nonce[:]) | ||||
| } | ||||
| 
 | ||||
| func xorKeyStream(dst, src []byte, block, state *[64]byte, rounds int) int { | ||||
| 	return xorKeyStreamGeneric(dst, src, block, state, rounds) | ||||
| } | ||||
| 
 | ||||
| func hChaCha20(out *[32]byte, nonce *[16]byte, key *[32]byte) { | ||||
| 	hChaCha20Generic(out, nonce, key) | ||||
| } | ||||
							
								
								
									
										53
									
								
								vendor/github.com/aead/chacha20/chacha/const.s
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										53
									
								
								vendor/github.com/aead/chacha20/chacha/const.s
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,53 @@ | |||
| // Copyright (c) 2018 Andreas Auernhammer. All rights reserved. | ||||
| // Use of this source code is governed by a license that can be | ||||
| // found in the LICENSE file. | ||||
| 
 | ||||
| // +build 386,!gccgo,!appengine,!nacl amd64,!gccgo,!appengine,!nacl | ||||
| 
 | ||||
| #include "textflag.h" | ||||
| 
 | ||||
| DATA ·sigma<>+0x00(SB)/4, $0x61707865 | ||||
| DATA ·sigma<>+0x04(SB)/4, $0x3320646e | ||||
| DATA ·sigma<>+0x08(SB)/4, $0x79622d32 | ||||
| DATA ·sigma<>+0x0C(SB)/4, $0x6b206574 | ||||
| GLOBL ·sigma<>(SB), (NOPTR+RODATA), $16 // The 4 ChaCha initialization constants | ||||
| 
 | ||||
| // SSE2/SSE3/AVX constants | ||||
| 
 | ||||
| DATA ·one<>+0x00(SB)/8, $1 | ||||
| DATA ·one<>+0x08(SB)/8, $0 | ||||
| GLOBL ·one<>(SB), (NOPTR+RODATA), $16 // The constant 1 as 128 bit value | ||||
| 
 | ||||
| DATA ·rol16<>+0x00(SB)/8, $0x0504070601000302 | ||||
| DATA ·rol16<>+0x08(SB)/8, $0x0D0C0F0E09080B0A | ||||
| GLOBL ·rol16<>(SB), (NOPTR+RODATA), $16 // The PSHUFB 16 bit left rotate constant | ||||
| 
 | ||||
| DATA ·rol8<>+0x00(SB)/8, $0x0605040702010003 | ||||
| DATA ·rol8<>+0x08(SB)/8, $0x0E0D0C0F0A09080B | ||||
| GLOBL ·rol8<>(SB), (NOPTR+RODATA), $16 // The PSHUFB 8 bit left rotate constant | ||||
| 
 | ||||
| // AVX2 constants | ||||
| 
 | ||||
| DATA ·one_AVX2<>+0x00(SB)/8, $0 | ||||
| DATA ·one_AVX2<>+0x08(SB)/8, $0 | ||||
| DATA ·one_AVX2<>+0x10(SB)/8, $1 | ||||
| DATA ·one_AVX2<>+0x18(SB)/8, $0 | ||||
| GLOBL ·one_AVX2<>(SB), (NOPTR+RODATA), $32 // The constant 1 as 256 bit value | ||||
| 
 | ||||
| DATA ·two_AVX2<>+0x00(SB)/8, $2 | ||||
| DATA ·two_AVX2<>+0x08(SB)/8, $0 | ||||
| DATA ·two_AVX2<>+0x10(SB)/8, $2 | ||||
| DATA ·two_AVX2<>+0x18(SB)/8, $0 | ||||
| GLOBL ·two_AVX2<>(SB), (NOPTR+RODATA), $32 | ||||
| 
 | ||||
| DATA ·rol16_AVX2<>+0x00(SB)/8, $0x0504070601000302 | ||||
| DATA ·rol16_AVX2<>+0x08(SB)/8, $0x0D0C0F0E09080B0A | ||||
| DATA ·rol16_AVX2<>+0x10(SB)/8, $0x0504070601000302 | ||||
| DATA ·rol16_AVX2<>+0x18(SB)/8, $0x0D0C0F0E09080B0A | ||||
| GLOBL ·rol16_AVX2<>(SB), (NOPTR+RODATA), $32 // The VPSHUFB 16 bit left rotate constant | ||||
| 
 | ||||
| DATA ·rol8_AVX2<>+0x00(SB)/8, $0x0605040702010003 | ||||
| DATA ·rol8_AVX2<>+0x08(SB)/8, $0x0E0D0C0F0A09080B | ||||
| DATA ·rol8_AVX2<>+0x10(SB)/8, $0x0605040702010003 | ||||
| DATA ·rol8_AVX2<>+0x18(SB)/8, $0x0E0D0C0F0A09080B | ||||
| GLOBL ·rol8_AVX2<>(SB), (NOPTR+RODATA), $32 // The VPSHUFB 8 bit left rotate constant | ||||
							
								
								
									
										163
									
								
								vendor/github.com/aead/chacha20/chacha/macro.s
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										163
									
								
								vendor/github.com/aead/chacha20/chacha/macro.s
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,163 @@ | |||
| // Copyright (c) 2018 Andreas Auernhammer. All rights reserved. | ||||
| // Use of this source code is governed by a license that can be | ||||
| // found in the LICENSE file. | ||||
| 
 | ||||
| // +build 386,!gccgo,!appengine,!nacl amd64,!gccgo,!appengine,!nacl | ||||
| 
 | ||||
| // ROTL_SSE rotates all 4 32 bit values of the XMM register v | ||||
| // left by n bits using SSE2 instructions (0 <= n <= 32). | ||||
| // The XMM register t is used as a temp. register. | ||||
| #define ROTL_SSE(n, t, v) \ | ||||
| 	MOVO  v, t;       \
 | ||||
| 	PSLLL $n, t;      \
 | ||||
| 	PSRLL $(32-n), v; \
 | ||||
| 	PXOR  t, v | ||||
| 
 | ||||
| // ROTL_AVX rotates all 4/8 32 bit values of the AVX/AVX2 register v | ||||
| // left by n bits using AVX/AVX2 instructions (0 <= n <= 32). | ||||
| // The AVX/AVX2 register t is used as a temp. register. | ||||
| #define ROTL_AVX(n, t, v) \ | ||||
| 	VPSLLD $n, v, t;      \
 | ||||
| 	VPSRLD $(32-n), v, v; \
 | ||||
| 	VPXOR  v, t, v | ||||
| 
 | ||||
| // CHACHA_QROUND_SSE2 performs a ChaCha quarter-round using the | ||||
| // 4 XMM registers v0, v1, v2 and v3. It uses only ROTL_SSE2 for | ||||
| // rotations. The XMM register t is used as a temp. register. | ||||
| #define CHACHA_QROUND_SSE2(v0, v1, v2, v3, t) \ | ||||
| 	PADDL v1, v0;        \
 | ||||
| 	PXOR  v0, v3;        \
 | ||||
| 	ROTL_SSE(16, t, v3); \
 | ||||
| 	PADDL v3, v2;        \
 | ||||
| 	PXOR  v2, v1;        \
 | ||||
| 	ROTL_SSE(12, t, v1); \
 | ||||
| 	PADDL v1, v0;        \
 | ||||
| 	PXOR  v0, v3;        \
 | ||||
| 	ROTL_SSE(8, t, v3);  \
 | ||||
| 	PADDL v3, v2;        \
 | ||||
| 	PXOR  v2, v1;        \
 | ||||
| 	ROTL_SSE(7, t, v1) | ||||
| 
 | ||||
| // CHACHA_QROUND_SSSE3 performs a ChaCha quarter-round using the | ||||
| // 4 XMM registers v0, v1, v2 and v3. It uses PSHUFB for 8/16 bit | ||||
| // rotations. The XMM register t is used as a temp. register. | ||||
| // | ||||
| // r16 holds the PSHUFB constant for a 16 bit left rotate. | ||||
| // r8 holds the PSHUFB constant for a 8 bit left rotate. | ||||
| #define CHACHA_QROUND_SSSE3(v0, v1, v2, v3, t, r16, r8) \ | ||||
| 	PADDL  v1, v0;       \
 | ||||
| 	PXOR   v0, v3;       \
 | ||||
| 	PSHUFB r16, v3;      \
 | ||||
| 	PADDL  v3, v2;       \
 | ||||
| 	PXOR   v2, v1;       \
 | ||||
| 	ROTL_SSE(12, t, v1); \
 | ||||
| 	PADDL  v1, v0;       \
 | ||||
| 	PXOR   v0, v3;       \
 | ||||
| 	PSHUFB r8, v3;       \
 | ||||
| 	PADDL  v3, v2;       \
 | ||||
| 	PXOR   v2, v1;       \
 | ||||
| 	ROTL_SSE(7, t, v1) | ||||
| 
 | ||||
| // CHACHA_QROUND_AVX performs a ChaCha quarter-round using the | ||||
| // 4 AVX/AVX2 registers v0, v1, v2 and v3. It uses VPSHUFB for 8/16 bit | ||||
| // rotations. The AVX/AVX2 register t is used as a temp. register. | ||||
| // | ||||
| // r16 holds the VPSHUFB constant for a 16 bit left rotate. | ||||
| // r8 holds the VPSHUFB constant for a 8 bit left rotate. | ||||
| #define CHACHA_QROUND_AVX(v0, v1, v2, v3, t, r16, r8) \ | ||||
| 	VPADDD  v0, v1, v0;  \
 | ||||
| 	VPXOR   v3, v0, v3;  \
 | ||||
| 	VPSHUFB r16, v3, v3; \
 | ||||
| 	VPADDD  v2, v3, v2;  \
 | ||||
| 	VPXOR   v1, v2, v1;  \
 | ||||
| 	ROTL_AVX(12, t, v1); \
 | ||||
| 	VPADDD  v0, v1, v0;  \
 | ||||
| 	VPXOR   v3, v0, v3;  \
 | ||||
| 	VPSHUFB r8, v3, v3;  \
 | ||||
| 	VPADDD  v2, v3, v2;  \
 | ||||
| 	VPXOR   v1, v2, v1;  \
 | ||||
| 	ROTL_AVX(7, t, v1) | ||||
| 
 | ||||
| // CHACHA_SHUFFLE_SSE performs a ChaCha shuffle using the | ||||
| // 3 XMM registers v1, v2 and v3. The inverse shuffle is | ||||
| // performed by switching v1 and v3: CHACHA_SHUFFLE_SSE(v3, v2, v1). | ||||
| #define CHACHA_SHUFFLE_SSE(v1, v2, v3) \ | ||||
| 	PSHUFL $0x39, v1, v1; \
 | ||||
| 	PSHUFL $0x4E, v2, v2; \
 | ||||
| 	PSHUFL $0x93, v3, v3 | ||||
| 
 | ||||
| // CHACHA_SHUFFLE_AVX performs a ChaCha shuffle using the | ||||
| // 3 AVX/AVX2 registers v1, v2 and v3. The inverse shuffle is | ||||
| // performed by switching v1 and v3: CHACHA_SHUFFLE_AVX(v3, v2, v1). | ||||
| #define CHACHA_SHUFFLE_AVX(v1, v2, v3) \ | ||||
| 	VPSHUFD $0x39, v1, v1; \
 | ||||
| 	VPSHUFD $0x4E, v2, v2; \
 | ||||
| 	VPSHUFD $0x93, v3, v3 | ||||
| 
 | ||||
| // XOR_SSE extracts 4x16 byte vectors from src at | ||||
| // off, xors all vectors with the corresponding XMM | ||||
| // register (v0 - v3) and writes the result to dst | ||||
| // at off. | ||||
| // The XMM register t is used as a temp. register. | ||||
| #define XOR_SSE(dst, src, off, v0, v1, v2, v3, t) \ | ||||
| 	MOVOU 0+off(src), t;  \
 | ||||
| 	PXOR  v0, t;          \
 | ||||
| 	MOVOU t, 0+off(dst);  \
 | ||||
| 	MOVOU 16+off(src), t; \
 | ||||
| 	PXOR  v1, t;          \
 | ||||
| 	MOVOU t, 16+off(dst); \
 | ||||
| 	MOVOU 32+off(src), t; \
 | ||||
| 	PXOR  v2, t;          \
 | ||||
| 	MOVOU t, 32+off(dst); \
 | ||||
| 	MOVOU 48+off(src), t; \
 | ||||
| 	PXOR  v3, t;          \
 | ||||
| 	MOVOU t, 48+off(dst) | ||||
| 
 | ||||
| // XOR_AVX extracts 4x16 byte vectors from src at | ||||
| // off, xors all vectors with the corresponding AVX | ||||
| // register (v0 - v3) and writes the result to dst | ||||
| // at off. | ||||
| // The XMM register t is used as a temp. register. | ||||
| #define XOR_AVX(dst, src, off, v0, v1, v2, v3, t) \ | ||||
| 	VPXOR   0+off(src), v0, t;  \
 | ||||
| 	VMOVDQU t, 0+off(dst);      \
 | ||||
| 	VPXOR   16+off(src), v1, t; \
 | ||||
| 	VMOVDQU t, 16+off(dst);     \
 | ||||
| 	VPXOR   32+off(src), v2, t; \
 | ||||
| 	VMOVDQU t, 32+off(dst);     \
 | ||||
| 	VPXOR   48+off(src), v3, t; \
 | ||||
| 	VMOVDQU t, 48+off(dst) | ||||
| 
 | ||||
| #define XOR_AVX2(dst, src, off, v0, v1, v2, v3, t0, t1) \ | ||||
| 	VMOVDQU    (0+off)(src), t0;  \
 | ||||
| 	VPERM2I128 $32, v1, v0, t1;   \
 | ||||
| 	VPXOR      t0, t1, t0;        \
 | ||||
| 	VMOVDQU    t0, (0+off)(dst);  \
 | ||||
| 	VMOVDQU    (32+off)(src), t0; \
 | ||||
| 	VPERM2I128 $32, v3, v2, t1;   \
 | ||||
| 	VPXOR      t0, t1, t0;        \
 | ||||
| 	VMOVDQU    t0, (32+off)(dst); \
 | ||||
| 	VMOVDQU    (64+off)(src), t0; \
 | ||||
| 	VPERM2I128 $49, v1, v0, t1;   \
 | ||||
| 	VPXOR      t0, t1, t0;        \
 | ||||
| 	VMOVDQU    t0, (64+off)(dst); \
 | ||||
| 	VMOVDQU    (96+off)(src), t0; \
 | ||||
| 	VPERM2I128 $49, v3, v2, t1;   \
 | ||||
| 	VPXOR      t0, t1, t0;        \
 | ||||
| 	VMOVDQU    t0, (96+off)(dst) | ||||
| 
 | ||||
| #define XOR_UPPER_AVX2(dst, src, off, v0, v1, v2, v3, t0, t1) \ | ||||
| 	VMOVDQU    (0+off)(src), t0;  \
 | ||||
| 	VPERM2I128 $32, v1, v0, t1;   \
 | ||||
| 	VPXOR      t0, t1, t0;        \
 | ||||
| 	VMOVDQU    t0, (0+off)(dst);  \
 | ||||
| 	VMOVDQU    (32+off)(src), t0; \
 | ||||
| 	VPERM2I128 $32, v3, v2, t1;   \
 | ||||
| 	VPXOR      t0, t1, t0;        \
 | ||||
| 	VMOVDQU    t0, (32+off)(dst); \
 | ||||
| 
 | ||||
| #define EXTRACT_LOWER(dst, v0, v1, v2, v3, t0) \ | ||||
| 	VPERM2I128 $49, v1, v0, t0; \
 | ||||
| 	VMOVDQU    t0, 0(dst);      \
 | ||||
| 	VPERM2I128 $49, v3, v2, t0; \
 | ||||
| 	VMOVDQU    t0, 32(dst) | ||||
							
								
								
									
										2
									
								
								vendor/modules.txt
									
										
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								vendor/modules.txt
									
										
									
									
										vendored
									
									
								
							|  | @ -15,6 +15,8 @@ blitter.com/go/kyber | |||
| blitter.com/go/mtwist | ||||
| # blitter.com/go/newhope v0.0.0-20200130200750-192fc08a8aae | ||||
| blitter.com/go/newhope | ||||
| # github.com/aead/chacha20 v0.0.0-20180709150244-8b13a72661da | ||||
| github.com/aead/chacha20/chacha | ||||
| # github.com/jameskeane/bcrypt v0.0.0-20120420032655-c3cd44c1e20f | ||||
| github.com/jameskeane/bcrypt | ||||
| # github.com/klauspost/cpuid v1.2.2 | ||||
|  |  | |||
							
								
								
									
										2
									
								
								xs/xs.go
									
										
									
									
									
								
							
							
						
						
									
										2
									
								
								xs/xs.go
									
										
									
									
									
								
							|  | @ -624,7 +624,7 @@ func main() { | |||
| 
 | ||||
| 	flag.BoolVar(&vopt, "v", false, "show version") | ||||
| 	flag.BoolVar(&dbg, "d", false, "debug logging") | ||||
| 	flag.StringVar(&cipherAlg, "c", "C_AES_256", "session `cipher` [C_AES_256 | C_TWOFISH_128 | C_BLOWFISH_64 | C_CRYPTMT1]") | ||||
| 	flag.StringVar(&cipherAlg, "c", "C_AES_256", "session `cipher` [C_AES_256 | C_TWOFISH_128 | C_BLOWFISH_64 | C_CRYPTMT1 | C_CHACHA20_12]") | ||||
| 	flag.StringVar(&hmacAlg, "m", "H_SHA256", "session `HMAC` [H_SHA256 | H_SHA512]") | ||||
| 	flag.StringVar(&kexAlg, "k", "KEX_HERRADURA512", "KEx `alg` [KEX_HERRADURA{256/512/1024/2048} | KEX_KYBER{512/768/1024} | KEX_NEWHOPE | KEX_NEWHOPE_SIMPLE]") | ||||
| 	flag.StringVar(&kcpMode, "K", "unused", "KCP `alg`, one of [KCP_NONE | KCP_AES | KCP_BLOWFISH | KCP_CAST5 | KCP_SM4 | KCP_SALSA20 | KCP_SIMPLEXOR | KCP_TEA | KCP_3DES | KCP_TWOFISH | KCP_XTEA] to use KCP (github.com/xtaci/kcp-go) reliable UDP instead of TCP") | ||||
|  |  | |||
|  | @ -21,6 +21,7 @@ import ( | |||
| 	"log" | ||||
| 
 | ||||
| 	"blitter.com/go/cryptmt" | ||||
| 	"github.com/aead/chacha20/chacha" | ||||
| 	"golang.org/x/crypto/blowfish" | ||||
| 	"golang.org/x/crypto/twofish" | ||||
| 
 | ||||
|  | @ -104,6 +105,18 @@ func (hc Conn) getStream(keymat []byte) (rc cipher.Stream, mc hash.Hash, err err | |||
| 	case CAlgCryptMT1: | ||||
| 		rc = cryptmt.New(nil, nil, keymat) | ||||
| 		log.Printf("[cipher CRYPTMT1 (%d)]\n", copts) | ||||
| 	case CAlgChaCha20_12: | ||||
| 		keymat = expandKeyMat(keymat, chacha.KeySize) | ||||
| 		key = keymat[0:chacha.KeySize] | ||||
| 		ivlen = chacha.INonceSize | ||||
| 		iv = keymat[chacha.KeySize : chacha.KeySize+ivlen] | ||||
| 		rc, err = chacha.NewCipher(iv, key, 20) | ||||
| 		if err != nil { | ||||
| 			log.Printf("[ChaCha20 config error]\n") | ||||
| 			fmt.Printf("[ChaCha20 config error]\n") | ||||
| 		} | ||||
| 		// TODO: SetCounter() to something derived from key or nonce or extra keymat? | ||||
| 		log.Printf("[cipher CHACHA20_12 (%d)]\n", copts) | ||||
| 	default: | ||||
| 		log.Printf("[invalid cipher (%d)]\n", copts) | ||||
| 		fmt.Printf("DOOFUS SET A VALID CIPHER ALG (%d)\n", copts) | ||||
|  |  | |||
|  | @ -99,6 +99,7 @@ const ( | |||
| 	CAlgTwofish128 // golang.org/x/crypto/twofish | ||||
| 	CAlgBlowfish64 // golang.org/x/crypto/blowfish | ||||
| 	CAlgCryptMT1   //cryptmt using mtwist64 | ||||
| 	CAlgChaCha20_12 | ||||
| 	CAlgNoneDisallowed | ||||
| ) | ||||
| 
 | ||||
|  |  | |||
							
								
								
									
										10
									
								
								xsnet/net.go
									
										
									
									
									
								
							
							
						
						
									
										10
									
								
								xsnet/net.go
									
										
									
									
									
								
							|  | @ -145,6 +145,8 @@ func (c *CSCipherAlg) String() string { | |||
| 		return "C_BLOWFISH_64" | ||||
| 	case CAlgCryptMT1: | ||||
| 		return "C_CRYPTMT1" | ||||
| 	case CAlgChaCha20_12: | ||||
| 		return "C_CHACHA20_12" | ||||
| 	default: | ||||
| 		return "C_ERR_UNK" | ||||
| 	} | ||||
|  | @ -280,6 +282,8 @@ func _new(kexAlg KEXAlg, conn *net.Conn) (hc *Conn, e error) { | |||
| 		hc.kex = KEX_HERRADURA512 | ||||
| 		log.Printf("[KEx alg %d ?? defaults to %d]\n", kexAlg, hc.kex) | ||||
| 	} | ||||
| 
 | ||||
| 	//hc.logCipherText = true // !!! DEBUGGING ONLY !!! NEVER DEPLOY this uncommented !!! | ||||
| 	return | ||||
| } | ||||
| 
 | ||||
|  | @ -298,7 +302,7 @@ func _new(kexAlg KEXAlg, conn *net.Conn) (hc *Conn, e error) { | |||
| // | ||||
| // Session (symmetric) crypto | ||||
| // | ||||
| // C_AES_256 C_TWOFISH_128 C_BLOWFISH_128 C_CRYPTMT1 | ||||
| // C_AES_256 C_TWOFISH_128 C_BLOWFISH_128 C_CRYPTMT1 C_CHACHA20_12 | ||||
| // | ||||
| // Session HMACs | ||||
| // | ||||
|  | @ -322,6 +326,10 @@ func (hc *Conn) applyConnExtensions(extensions ...string) { | |||
| 			log.Println("[extension arg = C_CRYPTMT1]") | ||||
| 			hc.cipheropts &= (0xFFFFFF00) | ||||
| 			hc.cipheropts |= CAlgCryptMT1 | ||||
| 		case "C_CHACHA20_12": | ||||
| 			log.Println("[extension arg = C_CHACHA20_12]") | ||||
| 			hc.cipheropts &= (0xFFFFFF00) | ||||
| 			hc.cipheropts |= CAlgChaCha20_12 | ||||
| 		case "H_SHA256": | ||||
| 			log.Println("[extension arg = H_SHA256]") | ||||
| 			hc.cipheropts &= (0xFFFF00FF) | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue