mirror of
				https://git.wownero.com/wownero/RandomWOW.git
				synced 2024-08-15 00:23:14 +00:00 
			
		
		
		
	COND_R instruction reworked as CBRANCH
instruction mod field bits reallocated
This commit is contained in:
		
							parent
							
								
									ebddaf671c
								
							
						
					
					
						commit
						ffebc37381
					
				
					 12 changed files with 166 additions and 313 deletions
				
			
		
							
								
								
									
										246
									
								
								doc/program.asm
									
										
									
									
									
								
							
							
						
						
									
										246
									
								
								doc/program.asm
									
										
									
									
									
								
							|  | @ -25,9 +25,9 @@ randomx_isn_6: | |||
| 	; FSQRT_R e3 | ||||
| 	sqrtpd xmm7, xmm7 | ||||
| randomx_isn_7: | ||||
| 	; ISTORE L1[r0-784322734], r3 | ||||
| 	; ISTORE L3[r0-784322734], r3 | ||||
| 	lea eax, [r8d-784322734] | ||||
| 	and eax, 16376 | ||||
| 	and eax, 2097144 | ||||
| 	mov qword ptr [rsi+rax], r11 | ||||
| randomx_isn_8: | ||||
| 	; FMUL_R e1, a1 | ||||
|  | @ -55,8 +55,8 @@ randomx_isn_14: | |||
| 	; FSQRT_R e2 | ||||
| 	sqrtpd xmm6, xmm6 | ||||
| randomx_isn_15: | ||||
| 	; IADD_RS r6, r2, LSH 1 | ||||
| 	lea r14, [r14+r10*2] | ||||
| 	; IADD_RS r6, r2, SHFT 3 | ||||
| 	lea r14, [r14+r10*8] | ||||
| randomx_isn_16: | ||||
| 	; FSUB_M f2, L1[r1-1890725713] | ||||
| 	lea eax, [r9d-1890725713] | ||||
|  | @ -68,22 +68,18 @@ randomx_isn_17: | |||
| 	mov ecx, r11d | ||||
| 	ror r12, cl | ||||
| randomx_isn_18: | ||||
| 	; ISTORE L3[r4+1297827817], r4 | ||||
| 	; ISTORE L1[r4+1297827817], r4 | ||||
| 	lea eax, [r12d+1297827817] | ||||
| 	and eax, 2097144 | ||||
| 	and eax, 16376 | ||||
| 	mov qword ptr [rsi+rax], r12 | ||||
| randomx_isn_19: | ||||
| 	; FMUL_R e1, a2 | ||||
| 	mulpd xmm5, xmm10 | ||||
| randomx_isn_20: | ||||
| 	; COND_R r6, of(r3, 1593588996), LSH 1 | ||||
| 	add r8, 2 | ||||
| 	test r8, 254 | ||||
| 	; CBRANCH 1593588996, COND 3 | ||||
| 	add r8, 1593589004 | ||||
| 	test r8, 1016 | ||||
| 	jz randomx_isn_0 | ||||
| 	xor ecx, ecx | ||||
| 	cmp r11d, 1593588996 | ||||
| 	seto cl | ||||
| 	add r14, rcx | ||||
| randomx_isn_21: | ||||
| 	; IXOR_M r7, L1[r2+1680388681] | ||||
| 	lea eax, [r10d+1680388681] | ||||
|  | @ -98,26 +94,22 @@ randomx_isn_23: | |||
| 	; FMUL_R e2, a0 | ||||
| 	mulpd xmm6, xmm8 | ||||
| randomx_isn_24: | ||||
| 	; COND_R r6, no(r0, 149087159), LSH 6 | ||||
| 	add r8, 64 | ||||
| 	test r8, 8128 | ||||
| 	; CBRANCH 149087159, COND 13 | ||||
| 	add r8, 149087159 | ||||
| 	test r8, 1040384 | ||||
| 	jz randomx_isn_21 | ||||
| 	xor ecx, ecx | ||||
| 	cmp r8d, 149087159 | ||||
| 	setno cl | ||||
| 	add r14, rcx | ||||
| randomx_isn_25: | ||||
| 	; FADD_R f3, a0 | ||||
| 	addpd xmm3, xmm8 | ||||
| randomx_isn_26: | ||||
| 	; IADD_RS r7, r0, LSH 3 | ||||
| 	lea r15, [r15+r8*8] | ||||
| 	; IADD_RS r7, r0, SHFT 2 | ||||
| 	lea r15, [r15+r8*4] | ||||
| randomx_isn_27: | ||||
| 	; IMUL_R r2, r3 | ||||
| 	imul r10, r11 | ||||
| randomx_isn_28: | ||||
| 	; IADD_RS r5, r7, 1345488645, LSH 1 | ||||
| 	lea r13, [r13+r15*2+1345488645] | ||||
| 	; IADD_RS r5, r7, 1345488645, SHFT 3 | ||||
| 	lea r13, [r13+r15*8+1345488645] | ||||
| randomx_isn_29: | ||||
| 	; ISTORE L2[r6-950233266], r2 | ||||
| 	lea eax, [r14d-950233266] | ||||
|  | @ -144,8 +136,8 @@ randomx_isn_35: | |||
| 	; IMUL_R r6, 835132161 | ||||
| 	imul r14, 835132161 | ||||
| randomx_isn_36: | ||||
| 	; IADD_RS r3, r4, LSH 2 | ||||
| 	lea r11, [r11+r12*4] | ||||
| 	; IADD_RS r3, r4, SHFT 0 | ||||
| 	lea r11, [r11+r12*1] | ||||
| randomx_isn_37: | ||||
| 	; ISUB_M r6, L2[r4+1885029796] | ||||
| 	lea eax, [r12d+1885029796] | ||||
|  | @ -173,12 +165,12 @@ randomx_isn_44: | |||
| 	; FADD_R f1, a2 | ||||
| 	addpd xmm1, xmm10 | ||||
| randomx_isn_45: | ||||
| 	; ISTORE L3[r0+1805562386], r5 | ||||
| 	; ISTORE L1[r0+1805562386], r5 | ||||
| 	lea eax, [r8d+1805562386] | ||||
| 	and eax, 2097144 | ||||
| 	and eax, 16376 | ||||
| 	mov qword ptr [rsi+rax], r13 | ||||
| randomx_isn_46: | ||||
| 	; IADD_RS r0, r7, LSH 0 | ||||
| 	; IADD_RS r0, r7, SHFT 0 | ||||
| 	lea r8, [r8+r15*1] | ||||
| randomx_isn_47: | ||||
| 	; IXOR_R r5, r2 | ||||
|  | @ -193,29 +185,21 @@ randomx_isn_50: | |||
| 	; FSUB_R f3, a0 | ||||
| 	subpd xmm3, xmm8 | ||||
| randomx_isn_51: | ||||
| 	; COND_R r2, be(r3, -1975981803), LSH 7 | ||||
| 	add r12, 128 | ||||
| 	test r12, 16256 | ||||
| 	; CBRANCH -1975981803, COND 14 | ||||
| 	add r12, -1975981803 | ||||
| 	test r12, 2080768 | ||||
| 	jz randomx_isn_25 | ||||
| 	xor ecx, ecx | ||||
| 	cmp r11d, -1975981803 | ||||
| 	setbe cl | ||||
| 	add r10, rcx | ||||
| randomx_isn_52: | ||||
| 	; IADD_RS r1, r1, LSH 2 | ||||
| 	; IADD_RS r1, r1, SHFT 2 | ||||
| 	lea r9, [r9+r9*4] | ||||
| randomx_isn_53: | ||||
| 	; FSUB_R f2, a0 | ||||
| 	subpd xmm2, xmm8 | ||||
| randomx_isn_54: | ||||
| 	; COND_R r5, ns(r1, 1917049931), LSH 6 | ||||
| 	add r8, 64 | ||||
| 	test r8, 8128 | ||||
| 	; CBRANCH 1917049931, COND 12 | ||||
| 	add r8, 1917049931 | ||||
| 	test r8, 520192 | ||||
| 	jz randomx_isn_52 | ||||
| 	xor ecx, ecx | ||||
| 	cmp r9d, 1917049931 | ||||
| 	setns cl | ||||
| 	add r13, rcx | ||||
| randomx_isn_55: | ||||
| 	; IXOR_R r2, r3 | ||||
| 	xor r10, r11 | ||||
|  | @ -226,7 +210,7 @@ randomx_isn_57: | |||
| 	; IMUL_R r5, r1 | ||||
| 	imul r13, r9 | ||||
| randomx_isn_58: | ||||
| 	; IADD_RS r5, r1, -999103579, LSH 0 | ||||
| 	; IADD_RS r5, r1, -999103579, SHFT 0 | ||||
| 	lea r13, [r13+r9*1-999103579] | ||||
| randomx_isn_59: | ||||
| 	; FMUL_R e2, a2 | ||||
|  | @ -236,8 +220,8 @@ randomx_isn_60: | |||
| 	mov ecx, r14d | ||||
| 	ror r10, cl | ||||
| randomx_isn_61: | ||||
| 	; IADD_RS r0, r3, LSH 1 | ||||
| 	lea r8, [r8+r11*2] | ||||
| 	; IADD_RS r0, r3, SHFT 3 | ||||
| 	lea r8, [r8+r11*8] | ||||
| randomx_isn_62: | ||||
| 	; FSQRT_R e3 | ||||
| 	sqrtpd xmm7, xmm7 | ||||
|  | @ -259,8 +243,8 @@ randomx_isn_66: | |||
| 	; ISUB_R r4, 841292629 | ||||
| 	sub r12, 841292629 | ||||
| randomx_isn_67: | ||||
| 	; IADD_RS r4, r6, LSH 2 | ||||
| 	lea r12, [r12+r14*4] | ||||
| 	; IADD_RS r4, r6, SHFT 3 | ||||
| 	lea r12, [r12+r14*8] | ||||
| randomx_isn_68: | ||||
| 	; FSUB_M f3, L1[r4+613549729] | ||||
| 	lea eax, [r12d+613549729] | ||||
|  | @ -268,8 +252,8 @@ randomx_isn_68: | |||
| 	cvtdq2pd xmm12, qword ptr [rsi+rax] | ||||
| 	subpd xmm3, xmm12 | ||||
| randomx_isn_69: | ||||
| 	; IADD_RS r6, r4, LSH 0 | ||||
| 	lea r14, [r14+r12*1] | ||||
| 	; IADD_RS r6, r4, SHFT 3 | ||||
| 	lea r14, [r14+r12*8] | ||||
| randomx_isn_70: | ||||
| 	; FSUB_M f1, L1[r5+629563256] | ||||
| 	lea eax, [r13d+629563256] | ||||
|  | @ -286,14 +270,10 @@ randomx_isn_73: | |||
| 	; FMUL_R e0, a0 | ||||
| 	mulpd xmm4, xmm8 | ||||
| randomx_isn_74: | ||||
| 	; COND_R r6, ns(r3, -1200328848), LSH 2 | ||||
| 	add r9, 4 | ||||
| 	test r9, 508 | ||||
| 	; CBRANCH -1200328848, COND 4 | ||||
| 	add r9, -1200328848 | ||||
| 	test r9, 2032 | ||||
| 	jz randomx_isn_55 | ||||
| 	xor ecx, ecx | ||||
| 	cmp r11d, -1200328848 | ||||
| 	setns cl | ||||
| 	add r14, rcx | ||||
| randomx_isn_75: | ||||
| 	; FMUL_R e0, a3 | ||||
| 	mulpd xmm4, xmm11 | ||||
|  | @ -312,12 +292,12 @@ randomx_isn_78: | |||
| 	; FMUL_R e2, a1 | ||||
| 	mulpd xmm6, xmm9 | ||||
| randomx_isn_79: | ||||
| 	; IADD_RS r3, r1, LSH 1 | ||||
| 	lea r11, [r11+r9*2] | ||||
| 	; IADD_RS r3, r1, SHFT 2 | ||||
| 	lea r11, [r11+r9*4] | ||||
| randomx_isn_80: | ||||
| 	; ISTORE L3[r2+1885666804], r4 | ||||
| 	; ISTORE L1[r2+1885666804], r4 | ||||
| 	lea eax, [r10d+1885666804] | ||||
| 	and eax, 2097144 | ||||
| 	and eax, 16376 | ||||
| 	mov qword ptr [rsi+rax], r12 | ||||
| randomx_isn_81: | ||||
| 	; IMULH_R r3, r0 | ||||
|  | @ -348,23 +328,19 @@ randomx_isn_88: | |||
| 	; IMUL_R r1, r3 | ||||
| 	imul r9, r11 | ||||
| randomx_isn_89: | ||||
| 	; COND_R r2, no(r0, -122257389), LSH 6 | ||||
| 	add r8, 64 | ||||
| 	test r8, 8128 | ||||
| 	; CBRANCH -122257389, COND 13 | ||||
| 	add r8, -122249197 | ||||
| 	test r8, 1040384 | ||||
| 	jz randomx_isn_75 | ||||
| 	xor ecx, ecx | ||||
| 	cmp r8d, -122257389 | ||||
| 	setno cl | ||||
| 	add r10, rcx | ||||
| randomx_isn_90: | ||||
| 	; ISTORE L1[r5+228116180], r7 | ||||
| 	lea eax, [r13d+228116180] | ||||
| 	and eax, 16376 | ||||
| 	mov qword ptr [rsi+rax], r15 | ||||
| randomx_isn_91: | ||||
| 	; ISTORE L1[r6+650356254], r5 | ||||
| 	; ISTORE L3[r6+650356254], r5 | ||||
| 	lea eax, [r14d+650356254] | ||||
| 	and eax, 16376 | ||||
| 	and eax, 2097144 | ||||
| 	mov qword ptr [rsi+rax], r13 | ||||
| randomx_isn_92: | ||||
| 	; FSUB_R f2, a0 | ||||
|  | @ -426,8 +402,8 @@ randomx_isn_107: | |||
| 	imul r13 | ||||
| 	mov r14, rdx | ||||
| randomx_isn_108: | ||||
| 	; IADD_RS r7, r0, LSH 1 | ||||
| 	lea r15, [r15+r8*2] | ||||
| 	; IADD_RS r7, r0, SHFT 2 | ||||
| 	lea r15, [r15+r8*4] | ||||
| randomx_isn_109: | ||||
| 	; IMUL_R r6, r5 | ||||
| 	imul r14, r13 | ||||
|  | @ -441,14 +417,14 @@ randomx_isn_111: | |||
| 	cvtdq2pd xmm12, qword ptr [rsi+rax] | ||||
| 	addpd xmm2, xmm12 | ||||
| randomx_isn_112: | ||||
| 	; IADD_RS r0, r3, LSH 0 | ||||
| 	lea r8, [r8+r11*1] | ||||
| 	; IADD_RS r0, r3, SHFT 1 | ||||
| 	lea r8, [r8+r11*2] | ||||
| randomx_isn_113: | ||||
| 	; IADD_RS r3, r4, LSH 1 | ||||
| 	; IADD_RS r3, r4, SHFT 1 | ||||
| 	lea r11, [r11+r12*2] | ||||
| randomx_isn_114: | ||||
| 	; IADD_RS r2, r4, LSH 2 | ||||
| 	lea r10, [r10+r12*4] | ||||
| 	; IADD_RS r2, r4, SHFT 1 | ||||
| 	lea r10, [r10+r12*2] | ||||
| randomx_isn_115: | ||||
| 	; IMUL_M r7, L1[r2-106928748] | ||||
| 	lea eax, [r10d-106928748] | ||||
|  | @ -461,17 +437,17 @@ randomx_isn_117: | |||
| 	; FSUB_R f2, a2 | ||||
| 	subpd xmm2, xmm10 | ||||
| randomx_isn_118: | ||||
| 	; IADD_RS r2, r2, LSH 0 | ||||
| 	lea r10, [r10+r10*1] | ||||
| 	; IADD_RS r2, r2, SHFT 3 | ||||
| 	lea r10, [r10+r10*8] | ||||
| randomx_isn_119: | ||||
| 	; ISUB_R r7, -342152774 | ||||
| 	sub r15, -342152774 | ||||
| randomx_isn_120: | ||||
| 	; IADD_RS r4, r1, LSH 1 | ||||
| 	lea r12, [r12+r9*2] | ||||
| 	; IADD_RS r4, r1, SHFT 0 | ||||
| 	lea r12, [r12+r9*1] | ||||
| randomx_isn_121: | ||||
| 	; IADD_RS r4, r7, LSH 2 | ||||
| 	lea r12, [r12+r15*4] | ||||
| 	; IADD_RS r4, r7, SHFT 3 | ||||
| 	lea r12, [r12+r15*8] | ||||
| randomx_isn_122: | ||||
| 	; FSUB_R f0, a1 | ||||
| 	subpd xmm0, xmm9 | ||||
|  | @ -501,7 +477,7 @@ randomx_isn_128: | |||
| 	; FSUB_R f3, a1 | ||||
| 	subpd xmm3, xmm9 | ||||
| randomx_isn_129: | ||||
| 	; IADD_RS r1, r2, LSH 2 | ||||
| 	; IADD_RS r1, r2, SHFT 2 | ||||
| 	lea r9, [r9+r10*4] | ||||
| randomx_isn_130: | ||||
| 	; FSUB_R f1, a1 | ||||
|  | @ -528,8 +504,8 @@ randomx_isn_136: | |||
| 	; ISUB_R r3, r6 | ||||
| 	sub r11, r14 | ||||
| randomx_isn_137: | ||||
| 	; IADD_RS r4, r1, LSH 0 | ||||
| 	lea r12, [r12+r9*1] | ||||
| 	; IADD_RS r4, r1, SHFT 1 | ||||
| 	lea r12, [r12+r9*2] | ||||
| randomx_isn_138: | ||||
| 	; ISTORE L1[r0+56684410], r0 | ||||
| 	lea eax, [r8d+56684410] | ||||
|  | @ -552,14 +528,10 @@ randomx_isn_142: | |||
| 	; FADD_R f1, a0 | ||||
| 	addpd xmm1, xmm8 | ||||
| randomx_isn_143: | ||||
| 	; COND_R r5, ge(r1, 880467599), LSH 2 | ||||
| 	add r14, 4 | ||||
| 	test r14, 508 | ||||
| 	; CBRANCH 880467599, COND 5 | ||||
| 	add r14, 880467631 | ||||
| 	test r14, 4064 | ||||
| 	jz randomx_isn_110 | ||||
| 	xor ecx, ecx | ||||
| 	cmp r9d, 880467599 | ||||
| 	setge cl | ||||
| 	add r13, rcx | ||||
| randomx_isn_144: | ||||
| 	; FSUB_M f1, L1[r5+1283529302] | ||||
| 	lea eax, [r13d+1283529302] | ||||
|  | @ -570,17 +542,17 @@ randomx_isn_145: | |||
| 	; ISUB_R r5, r3 | ||||
| 	sub r13, r11 | ||||
| randomx_isn_146: | ||||
| 	; IADD_RS r0, r3, LSH 1 | ||||
| 	lea r8, [r8+r11*2] | ||||
| 	; IADD_RS r0, r3, SHFT 3 | ||||
| 	lea r8, [r8+r11*8] | ||||
| randomx_isn_147: | ||||
| 	; IADD_RS r1, r3, LSH 1 | ||||
| 	lea r9, [r9+r11*2] | ||||
| 	; IADD_RS r1, r3, SHFT 2 | ||||
| 	lea r9, [r9+r11*4] | ||||
| randomx_isn_148: | ||||
| 	; FSQRT_R e1 | ||||
| 	sqrtpd xmm5, xmm5 | ||||
| randomx_isn_149: | ||||
| 	; IADD_RS r4, r3, LSH 1 | ||||
| 	lea r12, [r12+r11*2] | ||||
| 	; IADD_RS r4, r3, SHFT 2 | ||||
| 	lea r12, [r12+r11*4] | ||||
| randomx_isn_150: | ||||
| 	; FADD_M f1, L1[r0-1977073973] | ||||
| 	lea eax, [r8d-1977073973] | ||||
|  | @ -588,8 +560,8 @@ randomx_isn_150: | |||
| 	cvtdq2pd xmm12, qword ptr [rsi+rax] | ||||
| 	addpd xmm1, xmm12 | ||||
| randomx_isn_151: | ||||
| 	; IADD_RS r1, r0, LSH 1 | ||||
| 	lea r9, [r9+r8*2] | ||||
| 	; IADD_RS r1, r0, SHFT 3 | ||||
| 	lea r9, [r9+r8*8] | ||||
| randomx_isn_152: | ||||
| 	; FSUB_R f1, a0 | ||||
| 	subpd xmm1, xmm8 | ||||
|  | @ -622,8 +594,8 @@ randomx_isn_158: | |||
| 	and eax, 16376 | ||||
| 	mov qword ptr [rsi+rax], r12 | ||||
| randomx_isn_159: | ||||
| 	; IADD_RS r7, r2, LSH 3 | ||||
| 	lea r15, [r15+r10*8] | ||||
| 	; IADD_RS r7, r2, SHFT 2 | ||||
| 	lea r15, [r15+r10*4] | ||||
| randomx_isn_160: | ||||
| 	; IMUL_RCP r7, 2040763167 | ||||
| 	mov rax, 9705702723791900149 | ||||
|  | @ -632,8 +604,8 @@ randomx_isn_161: | |||
| 	; FADD_R f3, a3 | ||||
| 	addpd xmm3, xmm11 | ||||
| randomx_isn_162: | ||||
| 	; IADD_RS r6, r4, LSH 1 | ||||
| 	lea r14, [r14+r12*2] | ||||
| 	; IADD_RS r6, r4, SHFT 3 | ||||
| 	lea r14, [r14+r12*8] | ||||
| randomx_isn_163: | ||||
| 	; ISWAP_R r3, r5 | ||||
| 	xchg r11, r13 | ||||
|  | @ -697,8 +669,8 @@ randomx_isn_177: | |||
| 	; IMUL_M r3, L3[232968] | ||||
| 	imul r11, qword ptr [rsi+232968] | ||||
| randomx_isn_178: | ||||
| 	; IADD_RS r5, r3, -2108568616, LSH 1 | ||||
| 	lea r13, [r13+r11*2-2108568616] | ||||
| 	; IADD_RS r5, r3, -2108568616, SHFT 0 | ||||
| 	lea r13, [r13+r11*1-2108568616] | ||||
| randomx_isn_179: | ||||
| 	; IADD_M r3, L1[r4+1322108729] | ||||
| 	lea eax, [r12d+1322108729] | ||||
|  | @ -714,8 +686,8 @@ randomx_isn_182: | |||
| 	; FMUL_R e2, a2 | ||||
| 	mulpd xmm6, xmm10 | ||||
| randomx_isn_183: | ||||
| 	; IADD_RS r6, r2, LSH 0 | ||||
| 	lea r14, [r14+r10*1] | ||||
| 	; IADD_RS r6, r2, SHFT 1 | ||||
| 	lea r14, [r14+r10*2] | ||||
| randomx_isn_184: | ||||
| 	; FADD_R f2, a3 | ||||
| 	addpd xmm2, xmm11 | ||||
|  | @ -726,7 +698,7 @@ randomx_isn_186: | |||
| 	; FSCAL_R f3 | ||||
| 	xorps xmm3, xmm15 | ||||
| randomx_isn_187: | ||||
| 	; IADD_RS r6, r6, LSH 3 | ||||
| 	; IADD_RS r6, r6, SHFT 3 | ||||
| 	lea r14, [r14+r14*8] | ||||
| randomx_isn_188: | ||||
| 	; FSCAL_R f2 | ||||
|  | @ -779,8 +751,8 @@ randomx_isn_199: | |||
| 	; FSUB_R f3, a3 | ||||
| 	subpd xmm3, xmm11 | ||||
| randomx_isn_200: | ||||
| 	; IADD_RS r2, r5, LSH 2 | ||||
| 	lea r10, [r10+r13*4] | ||||
| 	; IADD_RS r2, r5, SHFT 0 | ||||
| 	lea r10, [r10+r13*1] | ||||
| randomx_isn_201: | ||||
| 	; ISUB_M r6, L2[r3+376384700] | ||||
| 	lea eax, [r11d+376384700] | ||||
|  | @ -803,14 +775,14 @@ randomx_isn_205: | |||
| 	and eax, 262136 | ||||
| 	add r15, qword ptr [rsi+rax] | ||||
| randomx_isn_206: | ||||
| 	; IADD_RS r3, r5, LSH 0 | ||||
| 	lea r11, [r11+r13*1] | ||||
| 	; IADD_RS r3, r5, SHFT 2 | ||||
| 	lea r11, [r11+r13*4] | ||||
| randomx_isn_207: | ||||
| 	; FSCAL_R f1 | ||||
| 	xorps xmm1, xmm15 | ||||
| randomx_isn_208: | ||||
| 	; IADD_RS r6, r3, LSH 1 | ||||
| 	lea r14, [r14+r11*2] | ||||
| 	; IADD_RS r6, r3, SHFT 0 | ||||
| 	lea r14, [r14+r11*1] | ||||
| randomx_isn_209: | ||||
| 	; FSUB_M f0, L1[r4-557177119] | ||||
| 	lea eax, [r12d-557177119] | ||||
|  | @ -866,18 +838,18 @@ randomx_isn_221: | |||
| 	; IMUL_R r1, r0 | ||||
| 	imul r9, r8 | ||||
| randomx_isn_222: | ||||
| 	; IADD_RS r1, r0, LSH 2 | ||||
| 	; IADD_RS r1, r0, SHFT 2 | ||||
| 	lea r9, [r9+r8*4] | ||||
| randomx_isn_223: | ||||
| 	; FSCAL_R f2 | ||||
| 	xorps xmm2, xmm15 | ||||
| randomx_isn_224: | ||||
| 	; IADD_RS r5, r4, 312567979, LSH 1 | ||||
| 	lea r13, [r13+r12*2+312567979] | ||||
| 	; IADD_RS r5, r4, 312567979, SHFT 3 | ||||
| 	lea r13, [r13+r12*8+312567979] | ||||
| randomx_isn_225: | ||||
| 	; ISTORE L2[r2+260885699], r1 | ||||
| 	; ISTORE L3[r2+260885699], r1 | ||||
| 	lea eax, [r10d+260885699] | ||||
| 	and eax, 262136 | ||||
| 	and eax, 2097144 | ||||
| 	mov qword ptr [rsi+rax], r9 | ||||
| randomx_isn_226: | ||||
| 	; ISUB_R r6, -791575725 | ||||
|  | @ -897,8 +869,8 @@ randomx_isn_229: | |||
| 	; ISWAP_R r0, r6 | ||||
| 	xchg r8, r14 | ||||
| randomx_isn_230: | ||||
| 	; IADD_RS r2, r7, LSH 2 | ||||
| 	lea r10, [r10+r15*4] | ||||
| 	; IADD_RS r2, r7, SHFT 3 | ||||
| 	lea r10, [r10+r15*8] | ||||
| randomx_isn_231: | ||||
| 	; FMUL_R e1, a0 | ||||
| 	mulpd xmm5, xmm8 | ||||
|  | @ -923,8 +895,8 @@ randomx_isn_237: | |||
| 	; FSUB_R f1, a3 | ||||
| 	subpd xmm1, xmm11 | ||||
| randomx_isn_238: | ||||
| 	; IADD_RS r4, r2, LSH 1 | ||||
| 	lea r12, [r12+r10*2] | ||||
| 	; IADD_RS r4, r2, SHFT 0 | ||||
| 	lea r12, [r12+r10*1] | ||||
| randomx_isn_239: | ||||
| 	; IMUL_RCP r7, 3065786637 | ||||
| 	mov rax, 12921343181238534701 | ||||
|  | @ -957,36 +929,32 @@ randomx_isn_246: | |||
| 	and eax, 262136 | ||||
| 	sub r15, qword ptr [rsi+rax] | ||||
| randomx_isn_247: | ||||
| 	; COND_R r2, be(r5, -8545330), LSH 2 | ||||
| 	add r9, 4 | ||||
| 	test r9, 508 | ||||
| 	; CBRANCH -8545330, COND 4 | ||||
| 	add r9, -8545314 | ||||
| 	test r9, 2032 | ||||
| 	jz randomx_isn_223 | ||||
| 	xor ecx, ecx | ||||
| 	cmp r13d, -8545330 | ||||
| 	setbe cl | ||||
| 	add r10, rcx | ||||
| randomx_isn_248: | ||||
| 	; ISTORE L1[r0+1951752498], r5 | ||||
| 	lea eax, [r8d+1951752498] | ||||
| 	and eax, 16376 | ||||
| 	mov qword ptr [rsi+rax], r13 | ||||
| randomx_isn_249: | ||||
| 	; IADD_RS r6, r5, LSH 2 | ||||
| 	lea r14, [r14+r13*4] | ||||
| 	; IADD_RS r6, r5, SHFT 3 | ||||
| 	lea r14, [r14+r13*8] | ||||
| randomx_isn_250: | ||||
| 	; FADD_R f3, a0 | ||||
| 	addpd xmm3, xmm8 | ||||
| randomx_isn_251: | ||||
| 	; IADD_RS r0, r0, LSH 0 | ||||
| 	; IADD_RS r0, r0, SHFT 0 | ||||
| 	lea r8, [r8+r8*1] | ||||
| randomx_isn_252: | ||||
| 	; ISUB_R r4, r2 | ||||
| 	sub r12, r10 | ||||
| randomx_isn_253: | ||||
| 	; IADD_RS r5, r4, 256175395, LSH 0 | ||||
| 	lea r13, [r13+r12*1+256175395] | ||||
| 	; IADD_RS r5, r4, 256175395, SHFT 3 | ||||
| 	lea r13, [r13+r12*8+256175395] | ||||
| randomx_isn_254: | ||||
| 	; IADD_RS r6, r7, LSH 2 | ||||
| 	; IADD_RS r6, r7, SHFT 2 | ||||
| 	lea r14, [r14+r15*4] | ||||
| randomx_isn_255: | ||||
| 	; IROR_R r7, r3 | ||||
|  |  | |||
|  | @ -44,14 +44,12 @@ namespace randomx { | |||
| 	static const char* regScratchpadAddr = "rsi"; | ||||
| 
 | ||||
| 	void AssemblyGeneratorX86::generateProgram(Program& prog) { | ||||
| 		for (unsigned i = 0; i < 8; ++i) { | ||||
| 		for (unsigned i = 0; i < RegistersCount; ++i) { | ||||
| 			registerUsage[i] = -1; | ||||
| 		} | ||||
| 		asmCode.str(std::string()); //clear
 | ||||
| 		for (unsigned i = 0; i < prog.getSize(); ++i) { | ||||
| #if RANDOMX_JUMP | ||||
| 			asmCode << "randomx_isn_" << i << ":" << std::endl; | ||||
| #endif | ||||
| 			Instruction& instr = prog(i); | ||||
| 			instr.src %= RegistersCount; | ||||
| 			instr.dst %= RegistersCount; | ||||
|  | @ -261,7 +259,7 @@ namespace randomx { | |||
| 	void AssemblyGeneratorX86::genAddressRegDst(Instruction& instr, int maskAlign = 8) { | ||||
| 		asmCode << "\tlea eax, [" << regR32[instr.dst] << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl; | ||||
| 		int mask; | ||||
| 		if (instr.getModCond()) { | ||||
| 		if (instr.getModCond() < StoreL3Condition) { | ||||
| 			mask = instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask; | ||||
| 		} | ||||
| 		else { | ||||
|  | @ -277,9 +275,9 @@ namespace randomx { | |||
| 	void AssemblyGeneratorX86::h_IADD_RS(Instruction& instr, int i) { | ||||
| 		registerUsage[instr.dst] = i; | ||||
| 		if(instr.dst == RegisterNeedsDisplacement) | ||||
| 			asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModMem())) << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl; | ||||
| 			asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModShift())) << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl; | ||||
| 		else | ||||
| 			asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModMem())) << "]" << std::endl; | ||||
| 			asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModShift())) << "]" << std::endl; | ||||
| 		traceint(instr); | ||||
| 	} | ||||
| 
 | ||||
|  | @ -542,55 +540,18 @@ namespace randomx { | |||
| 		tracenop(instr); | ||||
| 	} | ||||
| 
 | ||||
| 	static inline const char* condition(Instruction& instr) { | ||||
| 		switch (instr.getModCond()) | ||||
| 		{ | ||||
| 			case 0: | ||||
| 				return "be"; | ||||
| 			case 1: | ||||
| 				return "a"; | ||||
| 			case 2: | ||||
| 				return "s"; | ||||
| 			case 3: | ||||
| 				return "ns"; | ||||
| 			case 4: | ||||
| 				return "o"; | ||||
| 			case 5: | ||||
| 				return "no"; | ||||
| 			case 6: | ||||
| 				return "l"; | ||||
| 			case 7: | ||||
| 				return "ge"; | ||||
| 			default: | ||||
| 				UNREACHABLE; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	void AssemblyGeneratorX86::handleCondition(Instruction& instr, int i) { | ||||
| 		const int shift = instr.getModShift(); | ||||
| 		const int conditionMask = ((1 << RANDOMX_JUMP_BITS) - 1) << shift; | ||||
| 	void AssemblyGeneratorX86::h_CBRANCH(Instruction& instr, int i) { | ||||
| 		int reg = getConditionRegister(); | ||||
| 		int target = registerUsage[reg] + 1; | ||||
| 		registerUsage[reg] = i; | ||||
| 		asmCode << "\tadd " << regR[reg] << ", " << (1 << shift) << std::endl; | ||||
| 		asmCode << "\ttest " << regR[reg] << ", " << conditionMask << std::endl; | ||||
| 		int shift = instr.getModCond(); | ||||
| 		asmCode << "\tadd " << regR[reg] << ", " << (int32_t)(instr.getImm32() | (1 << shift)) << std::endl; | ||||
| 		asmCode << "\ttest " << regR[reg] << ", " << (ConditionMask << shift) << std::endl; | ||||
| 		asmCode << "\tjz randomx_isn_" << target << std::endl; | ||||
| 		for (unsigned j = 0; j < 8; ++j) { //mark all registers as used
 | ||||
| 		for (unsigned j = 0; j < RegistersCount; ++j) { //mark all registers as used
 | ||||
| 			registerUsage[j] = i; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	void AssemblyGeneratorX86::h_COND_R(Instruction& instr, int i) { | ||||
| #if RANDOMX_JUMP | ||||
| 		handleCondition(instr, i); | ||||
| #endif | ||||
| 		asmCode << "\txor ecx, ecx" << std::endl; | ||||
| 		asmCode << "\tcmp " << regR32[instr.src] << ", " << (int32_t)instr.getImm32() << std::endl; | ||||
| 		asmCode << "\tset" << condition(instr) << " cl" << std::endl; | ||||
| 		asmCode << "\tadd " << regR[instr.dst] << ", rcx" << std::endl; | ||||
| 		traceint(instr); | ||||
| 	} | ||||
| 
 | ||||
| 	void AssemblyGeneratorX86::h_ISTORE(Instruction& instr, int i) { | ||||
| 		genAddressRegDst(instr); | ||||
| 		asmCode << "\tmov qword ptr [" << regScratchpadAddr << "+rax], " << regR[instr.src] << std::endl; | ||||
|  | @ -632,7 +593,7 @@ namespace randomx { | |||
| 		INST_HANDLE(FMUL_R) | ||||
| 		INST_HANDLE(FDIV_M) | ||||
| 		INST_HANDLE(FSQRT_R) | ||||
| 		INST_HANDLE(COND_R) | ||||
| 		INST_HANDLE(CBRANCH) | ||||
| 		INST_HANDLE(CFROUND) | ||||
| 		INST_HANDLE(ISTORE) | ||||
| 		INST_HANDLE(NOP) | ||||
|  |  | |||
|  | @ -44,7 +44,6 @@ namespace randomx { | |||
| 		void genAddressRegDst(Instruction&, int); | ||||
| 		int32_t genAddressImm(Instruction&); | ||||
| 		int getConditionRegister(); | ||||
| 		void handleCondition(Instruction&, int); | ||||
| 		void generateCode(Instruction&, int); | ||||
| 		void traceint(Instruction&); | ||||
| 		void traceflt(Instruction&); | ||||
|  | @ -76,7 +75,7 @@ namespace randomx { | |||
| 		void h_FMUL_R(Instruction&, int); | ||||
| 		void h_FDIV_M(Instruction&, int); | ||||
| 		void h_FSQRT_R(Instruction&, int); | ||||
| 		void h_COND_R(Instruction&, int); | ||||
| 		void h_CBRANCH(Instruction&, int); | ||||
| 		void h_CFROUND(Instruction&, int); | ||||
| 		void h_ISTORE(Instruction&, int); | ||||
| 		void h_NOP(Instruction&, int); | ||||
|  |  | |||
|  | @ -40,13 +40,14 @@ namespace randomx { | |||
| 	static_assert(RANDOMX_SCRATCHPAD_L2 >= RANDOMX_SCRATCHPAD_L1, "RANDOMX_SCRATCHPAD_L2 must be greater than or equal to RANDOMX_SCRATCHPAD_L1."); | ||||
| 	static_assert((RANDOMX_SCRATCHPAD_L1 & (RANDOMX_SCRATCHPAD_L1 - 1)) == 0, "RANDOMX_SCRATCHPAD_L1 must be a power of 2."); | ||||
| 	static_assert(RANDOMX_CACHE_ACCESSES > 1, "RANDOMX_CACHE_ACCESSES must be greater than 1"); | ||||
| 	static_assert(RANDOMX_JUMP_BITS >= 1 && RANDOMX_JUMP_BITS <= 16, "RANDOMX_JUMP_BITS must be an integer in the range 1-16."); | ||||
| 
 | ||||
| 	constexpr int wtSum = RANDOMX_FREQ_IADD_RS + RANDOMX_FREQ_IADD_M + RANDOMX_FREQ_ISUB_R + \ | ||||
| 		RANDOMX_FREQ_ISUB_M + RANDOMX_FREQ_IMUL_R + RANDOMX_FREQ_IMUL_M + RANDOMX_FREQ_IMULH_R + \ | ||||
| 		RANDOMX_FREQ_IMULH_M + RANDOMX_FREQ_ISMULH_R + RANDOMX_FREQ_ISMULH_M + RANDOMX_FREQ_IMUL_RCP + \ | ||||
| 		RANDOMX_FREQ_INEG_R + RANDOMX_FREQ_IXOR_R + RANDOMX_FREQ_IXOR_M + RANDOMX_FREQ_IROR_R + RANDOMX_FREQ_ISWAP_R + \ | ||||
| 		RANDOMX_FREQ_FSWAP_R + RANDOMX_FREQ_FADD_R + RANDOMX_FREQ_FADD_M + RANDOMX_FREQ_FSUB_R + RANDOMX_FREQ_FSUB_M + \ | ||||
| 		RANDOMX_FREQ_FSCAL_R + RANDOMX_FREQ_FMUL_R + RANDOMX_FREQ_FDIV_M + RANDOMX_FREQ_FSQRT_R + RANDOMX_FREQ_COND_R + \ | ||||
| 		RANDOMX_FREQ_FSCAL_R + RANDOMX_FREQ_FMUL_R + RANDOMX_FREQ_FDIV_M + RANDOMX_FREQ_FSQRT_R + RANDOMX_FREQ_CBRANCH + \ | ||||
| 		RANDOMX_FREQ_CFROUND + RANDOMX_FREQ_ISTORE + RANDOMX_FREQ_NOP; | ||||
| 
 | ||||
| 	static_assert(wtSum == 256,	"Sum of instruction frequencies must be 256."); | ||||
|  | @ -59,6 +60,8 @@ namespace randomx { | |||
| 	constexpr uint32_t CacheSize = RANDOMX_ARGON_MEMORY * ArgonBlockSize; | ||||
| 	constexpr uint64_t DatasetSize = RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE; | ||||
| 	constexpr uint32_t DatasetExtraItems = RANDOMX_DATASET_EXTRA_SIZE / RANDOMX_DATASET_ITEM_SIZE; | ||||
| 	constexpr uint32_t ConditionMask = ((1 << RANDOMX_JUMP_BITS) - 1); | ||||
| 	constexpr int StoreL3Condition = 14; | ||||
| 
 | ||||
| #ifdef TRACE | ||||
| 	constexpr bool trace = true; | ||||
|  | @ -76,8 +79,6 @@ namespace randomx { | |||
| #endif | ||||
| #endif | ||||
| 
 | ||||
| #define RANDOMX_JUMP (RANDOMX_JUMP_BITS > 0) | ||||
| 
 | ||||
| 	using addr_t = uint32_t; | ||||
| 
 | ||||
| 	using int_reg_t = uint64_t; | ||||
|  |  | |||
|  | @ -64,7 +64,7 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| //Scratchpad L1 size in bytes. Must be a power of two and less than or equal to RANDOMX_SCRATCHPAD_L2.
 | ||||
| #define RANDOMX_SCRATCHPAD_L1      (16 * 1024) | ||||
| 
 | ||||
| //How many register bits must be zero for a jump condition to be triggered. If set to 0, jumps are disabled.
 | ||||
| //How many register bits must be zero for CBRANCH instruction to jump. Must be an integer in the range 1-16.
 | ||||
| #define RANDOMX_JUMP_BITS          7 | ||||
| 
 | ||||
| /*
 | ||||
|  | @ -100,8 +100,9 @@ Total sum of frequencies must be 256 | |||
| #define RANDOMX_FREQ_FDIV_M         4 | ||||
| #define RANDOMX_FREQ_FSQRT_R        6 | ||||
| 
 | ||||
| #define RANDOMX_FREQ_COND_R         8 | ||||
| #define RANDOMX_FREQ_CBRANCH        8 | ||||
| #define RANDOMX_FREQ_CFROUND        1 | ||||
| 
 | ||||
| #define RANDOMX_FREQ_ISTORE        16 | ||||
| 
 | ||||
| #define RANDOMX_FREQ_NOP            0 | ||||
|  |  | |||
|  | @ -33,7 +33,7 @@ namespace randomx { | |||
| 	} | ||||
| 
 | ||||
| 	void Instruction::genAddressRegDst(std::ostream& os) const { | ||||
| 		if (getModCond()) | ||||
| 		if (getModCond() < StoreL3Condition) | ||||
| 			os << (getModMem() ? "L1" : "L2"); | ||||
| 		else | ||||
| 			os << "L3"; | ||||
|  | @ -49,7 +49,7 @@ namespace randomx { | |||
| 		if(dst == RegisterNeedsDisplacement) { | ||||
| 			os << ", " << (int32_t)getImm32(); | ||||
| 		} | ||||
| 		os << ", LSH " << (int)getModMem() << std::endl; | ||||
| 		os << ", SHFT " << (int)getModShift() << std::endl; | ||||
| 	} | ||||
| 
 | ||||
| 	void Instruction::h_IADD_M(std::ostream& os) const { | ||||
|  | @ -278,8 +278,8 @@ namespace randomx { | |||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	void Instruction::h_COND_R(std::ostream& os) const { | ||||
| 		os << "r" << (int)dst << ", " << condition(getModCond()) << "(r" << (int)src << ", " << (int32_t)getImm32() << "), LSH " << (int)(getModShift()) << std::endl; | ||||
| 	void Instruction::h_CBRANCH(std::ostream& os) const { | ||||
| 		os << (int32_t)getImm32() << ", COND " << (int)(getModCond()) << std::endl; | ||||
| 	} | ||||
| 
 | ||||
| 	void  Instruction::h_ISTORE(std::ostream& os) const { | ||||
|  | @ -321,7 +321,7 @@ namespace randomx { | |||
| 		INST_NAME(FMUL_R) | ||||
| 		INST_NAME(FDIV_M) | ||||
| 		INST_NAME(FSQRT_R) | ||||
| 		INST_NAME(COND_R) | ||||
| 		INST_NAME(CBRANCH) | ||||
| 		INST_NAME(CFROUND) | ||||
| 		INST_NAME(ISTORE) | ||||
| 		INST_NAME(NOP) | ||||
|  | @ -354,7 +354,7 @@ namespace randomx { | |||
| 		INST_HANDLE(FMUL_R) | ||||
| 		INST_HANDLE(FDIV_M) | ||||
| 		INST_HANDLE(FSQRT_R) | ||||
| 		INST_HANDLE(COND_R) | ||||
| 		INST_HANDLE(CBRANCH) | ||||
| 		INST_HANDLE(CFROUND) | ||||
| 		INST_HANDLE(ISTORE) | ||||
| 		INST_HANDLE(NOP) | ||||
|  |  | |||
|  | @ -57,7 +57,7 @@ namespace randomx { | |||
| 		constexpr int FMUL_R = 23; | ||||
| 		constexpr int FDIV_M = 24; | ||||
| 		constexpr int FSQRT_R = 25; | ||||
| 		constexpr int COND_R = 26; | ||||
| 		constexpr int CBRANCH = 26; | ||||
| 		constexpr int CFROUND = 27; | ||||
| 		constexpr int ISTORE = 28; | ||||
| 		constexpr int NOP = 29; | ||||
|  | @ -81,11 +81,11 @@ namespace randomx { | |||
| 		int getModMem() const { | ||||
| 			return mod % 4; //bits 0-1
 | ||||
| 		} | ||||
| 		int getModCond() const { | ||||
| 			return (mod >> 2) % 8; //bits 2-4
 | ||||
| 		} | ||||
| 		int getModShift() const { | ||||
| 			return mod >> 5; //bits 5-7
 | ||||
| 			return (mod >> 2) % 4; //bits 2-3
 | ||||
| 		} | ||||
| 		int getModCond() const { | ||||
| 			return mod >> 4; //bits 4-7
 | ||||
| 		} | ||||
| 		void setMod(uint8_t val) { | ||||
| 			mod = val; | ||||
|  | @ -129,7 +129,7 @@ namespace randomx { | |||
| 		void h_FMUL_R(std::ostream&) const; | ||||
| 		void h_FDIV_M(std::ostream&) const; | ||||
| 		void h_FSQRT_R(std::ostream&) const; | ||||
| 		void h_COND_R(std::ostream&) const; | ||||
| 		void h_CBRANCH(std::ostream&) const; | ||||
| 		void h_CFROUND(std::ostream&) const; | ||||
| 		void h_ISTORE(std::ostream&) const; | ||||
| 		void h_NOP(std::ostream&) const; | ||||
|  |  | |||
|  | @ -297,12 +297,10 @@ namespace randomx { | |||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::generateProgramPrologue(Program& prog, ProgramConfiguration& pcfg) { | ||||
| #if RANDOMX_JUMP | ||||
| 		instructionOffsets.clear(); | ||||
| 		for (unsigned i = 0; i < 8; ++i) { | ||||
| 			registerUsage[i] = -1; | ||||
| 		} | ||||
| #endif | ||||
| 		codePos = prologueSize; | ||||
| 		memcpy(code + codePos - 48, &pcfg.eMask, sizeof(pcfg.eMask)); | ||||
| 		emit(REX_XOR_RAX_R64); | ||||
|  | @ -334,9 +332,7 @@ namespace randomx { | |||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::generateCode(Instruction& instr, int i) { | ||||
| #if RANDOMX_JUMP | ||||
| 		instructionOffsets.push_back(codePos); | ||||
| #endif | ||||
| 		auto generator = engine[instr.opcode]; | ||||
| 		(this->*generator)(instr, i); | ||||
| 	} | ||||
|  | @ -457,7 +453,7 @@ namespace randomx { | |||
| 		} | ||||
| 		emit32(instr.getImm32()); | ||||
| 		emitByte(AND_EAX_I); | ||||
| 		if (instr.getModCond()) { | ||||
| 		if (instr.getModCond() < StoreL3Condition) { | ||||
| 			int32_t maskL1 = align16 ? ScratchpadL1Mask16 : ScratchpadL1Mask; | ||||
| 			int32_t maskL2 = align16 ? ScratchpadL2Mask16 : ScratchpadL2Mask; | ||||
| 			emit32(instr.getModMem() ? maskL1 : maskL2); | ||||
|  | @ -478,7 +474,7 @@ namespace randomx { | |||
| 			emitByte(0xac); | ||||
| 		else | ||||
| 			emitByte(0x04 + 8 * instr.dst); | ||||
| 		genSIB(instr.getModMem(), instr.src, instr.dst); | ||||
| 		genSIB(instr.getModShift(), instr.src, instr.dst); | ||||
| 		if (instr.dst == RegisterNeedsDisplacement) | ||||
| 			emit32(instr.getImm32()); | ||||
| 	} | ||||
|  | @ -774,56 +770,10 @@ namespace randomx { | |||
| 		emit(AND_OR_MOV_LDMXCSR); | ||||
| 	} | ||||
| 
 | ||||
| 	static inline uint8_t jumpCondition(Instruction& instr, bool invert = false) { | ||||
| 		switch (instr.getModCond() ^ invert) | ||||
| 		{ | ||||
| 		case 0: | ||||
| 			return 0x76; //jbe
 | ||||
| 		case 1: | ||||
| 			return 0x77; //ja
 | ||||
| 		case 2: | ||||
| 			return 0x78; //js
 | ||||
| 		case 3: | ||||
| 			return 0x79; //jns
 | ||||
| 		case 4: | ||||
| 			return 0x70; //jo
 | ||||
| 		case 5: | ||||
| 			return 0x71; //jno
 | ||||
| 		case 6: | ||||
| 			return 0x7c; //jl
 | ||||
| 		case 7: | ||||
| 			return 0x7d; //jge
 | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	static inline uint8_t condition(Instruction& instr) { | ||||
| 		switch (instr.getModCond()) | ||||
| 		{ | ||||
| 		case 0: | ||||
| 			return 0x96; //setbe
 | ||||
| 		case 1: | ||||
| 			return 0x97; //seta
 | ||||
| 		case 2: | ||||
| 			return 0x98; //sets
 | ||||
| 		case 3: | ||||
| 			return 0x99; //setns
 | ||||
| 		case 4: | ||||
| 			return 0x90; //seto
 | ||||
| 		case 5: | ||||
| 			return 0x91; //setno
 | ||||
| 		case 6: | ||||
| 			return 0x9c; //setl
 | ||||
| 		case 7: | ||||
| 			return 0x9d; //setge
 | ||||
| 		default: | ||||
| 			UNREACHABLE; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	int JitCompilerX86::getConditionRegister() { | ||||
| 		int min = INT_MAX; | ||||
| 		int minIndex; | ||||
| 		for (unsigned i = 0; i < 8; ++i) { | ||||
| 		for (unsigned i = 0; i < RegistersCount; ++i) { | ||||
| 			if (registerUsage[i] < min) { | ||||
| 				min = registerUsage[i]; | ||||
| 				minIndex = i; | ||||
|  | @ -832,40 +782,23 @@ namespace randomx { | |||
| 		return minIndex; | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::handleCondition(Instruction& instr, int i) { | ||||
| 		const int shift = instr.getModShift(); | ||||
| 		const int conditionMask = ((1 << RANDOMX_JUMP_BITS) - 1) << shift; | ||||
| 	void JitCompilerX86::h_CBRANCH(Instruction& instr, int i) { | ||||
| 		int reg = getConditionRegister(); | ||||
| 		int target = registerUsage[reg] + 1; | ||||
| 		int shift = instr.getModCond(); | ||||
| 		emit(REX_ADD_I); | ||||
| 		emitByte(0xc0 + reg); | ||||
| 		emit32(1 << shift); | ||||
| 		emit32(instr.getImm32() | (1 << shift)); | ||||
| 		emit(REX_TEST); | ||||
| 		emitByte(0xc0 + reg); | ||||
| 		emit32(conditionMask); | ||||
| 		emit32(ConditionMask << shift); | ||||
| 		emit(JZ); | ||||
| 		emit32(instructionOffsets[target] - (codePos + 4)); | ||||
| 		for (unsigned j = 0; j < 8; ++j) { //mark all registers as used
 | ||||
| 		for (unsigned j = 0; j < RegistersCount; ++j) { //mark all registers as used
 | ||||
| 			registerUsage[j] = i; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_COND_R(Instruction& instr, int i) { | ||||
| #if RANDOMX_JUMP | ||||
| 		handleCondition(instr, i); | ||||
| #endif | ||||
| 		emit(XOR_ECX_ECX); | ||||
| 		emit(REX_CMP_R32I); | ||||
| 		emitByte(0xf8 + instr.src); | ||||
| 		emit32(instr.getImm32()); | ||||
| 		emitByte(0x0f); | ||||
| 		emitByte(condition(instr)); | ||||
| 		emitByte(0xc1); | ||||
| 		emit(REX_ADD_RM); | ||||
| 		emitByte(0xc1 + 8 * instr.dst); | ||||
| 
 | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_ISTORE(Instruction& instr, int i) { | ||||
| 		genAddressRegDst(instr); | ||||
| 		emit(REX_MOV_MR); | ||||
|  | @ -907,7 +840,7 @@ namespace randomx { | |||
| 		INST_HANDLE(FMUL_R) | ||||
| 		INST_HANDLE(FDIV_M) | ||||
| 		INST_HANDLE(FSQRT_R) | ||||
| 		INST_HANDLE(COND_R) | ||||
| 		INST_HANDLE(CBRANCH) | ||||
| 		INST_HANDLE(CFROUND) | ||||
| 		INST_HANDLE(ISTORE) | ||||
| 		INST_HANDLE(NOP) | ||||
|  |  | |||
|  | @ -70,8 +70,6 @@ namespace randomx { | |||
| 		void genAddressImm(Instruction&); | ||||
| 		void genSIB(int scale, int index, int base); | ||||
| 
 | ||||
| 		void handleCondition(Instruction&, int); | ||||
| 
 | ||||
| 		void generateCode(Instruction&, int); | ||||
| 		void generateSuperscalarCode(Instruction &, std::vector<uint64_t> &); | ||||
| 
 | ||||
|  | @ -136,7 +134,7 @@ namespace randomx { | |||
| 		void h_FMUL_R(Instruction&, int); | ||||
| 		void h_FDIV_M(Instruction&, int); | ||||
| 		void h_FSQRT_R(Instruction&, int); | ||||
| 		void h_COND_R(Instruction&, int); | ||||
| 		void h_CBRANCH(Instruction&, int); | ||||
| 		void h_CFROUND(Instruction&, int); | ||||
| 		void h_ISTORE(Instruction&, int); | ||||
| 		void h_NOP(Instruction&, int); | ||||
|  |  | |||
|  | @ -229,7 +229,7 @@ int main(int argc, char** argv) { | |||
| 		std::cout << "Calculated result: "; | ||||
| 		result.print(std::cout); | ||||
| 		if (noncesCount == 1000 && seedValue == 0) | ||||
| 			std::cout << "Reference result:  89336a85bf6d1e83eb20fbc92170705ded9b42285b30178ed8e855d65c4c4b69" << std::endl; | ||||
| 			std::cout << "Reference result:  804fed4a3dc4ed12917a210aad295925544e688e28549d7178eb27f412476a10" << std::endl; | ||||
| 		if (!miningMode) { | ||||
| 			std::cout << "Performance: " << 1000 * elapsed / noncesCount << " ms per hash" << std::endl; | ||||
| 		} | ||||
|  |  | |||
|  | @ -180,16 +180,11 @@ namespace randomx { | |||
| 				*ibc.fdst = _mm_sqrt_pd(*ibc.fdst); | ||||
| 			} break; | ||||
| 
 | ||||
| 			case InstructionType::COND_R: { | ||||
| #if RANDOMX_JUMP | ||||
| 				*ibc.creg += (1 << ibc.shift); | ||||
| 				const uint64_t conditionMask = ((1ULL << RANDOMX_JUMP_BITS) - 1) << ibc.shift; | ||||
| 				if ((*ibc.creg & conditionMask) == 0) { | ||||
| 			case InstructionType::CBRANCH: { | ||||
| 				*ibc.isrc += ibc.imm; | ||||
| 				if ((*ibc.isrc & ibc.memMask) == 0) { | ||||
| 					pc = ibc.target; | ||||
| 					break; | ||||
| 				} | ||||
| #endif | ||||
| 				*ibc.idst += condition(ibc.condition, *ibc.isrc, ibc.imm) ? 1 : 0; | ||||
| 			} break; | ||||
| 
 | ||||
| 			case InstructionType::CFROUND: { | ||||
|  | @ -308,12 +303,12 @@ namespace randomx { | |||
| 					ibc.idst = &r[dst]; | ||||
| 					if (dst != RegisterNeedsDisplacement) { | ||||
| 						ibc.isrc = &r[src]; | ||||
| 						ibc.shift = instr.getModMem(); | ||||
| 						ibc.shift = instr.getModShift(); | ||||
| 						ibc.imm = 0; | ||||
| 					} | ||||
| 					else { | ||||
| 						ibc.isrc = &r[src]; | ||||
| 						ibc.shift = instr.getModMem(); | ||||
| 						ibc.shift = instr.getModShift(); | ||||
| 						ibc.imm = signExtend2sCompl(instr.getImm32()); | ||||
| 					} | ||||
| 					registerUsage[dst] = i; | ||||
|  | @ -626,19 +621,16 @@ namespace randomx { | |||
| 					ibc.fdst = &e[dst]; | ||||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(COND_R) { | ||||
| 					auto dst = instr.dst % RegistersCount; | ||||
| 					auto src = instr.src % RegistersCount; | ||||
| 					ibc.type = InstructionType::COND_R; | ||||
| 					ibc.idst = &r[dst]; | ||||
| 					ibc.isrc = &r[src]; | ||||
| 					ibc.condition = instr.getModCond(); | ||||
| 					ibc.imm = instr.getImm32(); | ||||
| 				CASE_REP(CBRANCH) { | ||||
| 					ibc.type = InstructionType::CBRANCH; | ||||
| 					//jump condition
 | ||||
| 					int reg = getConditionRegister(registerUsage); | ||||
| 					ibc.isrc = &r[reg]; | ||||
| 					ibc.target = registerUsage[reg]; | ||||
| 					ibc.shift = instr.getModShift(); | ||||
| 					ibc.creg = &r[reg]; | ||||
| 					int shift = instr.getModCond(); | ||||
| 					const uint64_t conditionMask = ConditionMask << instr.getModCond(); | ||||
| 					ibc.imm = signExtend2sCompl(instr.getImm32()) | (1ULL << shift); | ||||
| 					ibc.memMask = ConditionMask << shift; | ||||
| 					for (unsigned j = 0; j < RegistersCount; ++j) { //mark all registers as used
 | ||||
| 						registerUsage[j] = i; | ||||
| 					} | ||||
|  | @ -658,7 +650,7 @@ namespace randomx { | |||
| 					ibc.idst = &r[dst]; | ||||
| 					ibc.isrc = &r[src]; | ||||
| 					ibc.imm = signExtend2sCompl(instr.getImm32()); | ||||
| 					if (instr.getModCond()) | ||||
| 					if (instr.getModCond() < StoreL3Condition) | ||||
| 						ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); | ||||
| 					else | ||||
| 						ibc.memMask = ScratchpadL3Mask; | ||||
|  |  | |||
|  | @ -41,12 +41,12 @@ namespace randomx { | |||
| 			uint64_t imm; | ||||
| 			int64_t simm; | ||||
| 		}; | ||||
| 		int_reg_t* creg; | ||||
| 		uint16_t condition; | ||||
| 		int16_t target; | ||||
| 		uint32_t memMask; | ||||
| 		uint16_t type; | ||||
| 		uint16_t shift; | ||||
| 		union { | ||||
| 			int16_t target; | ||||
| 			uint16_t shift; | ||||
| 		}; | ||||
| 		uint32_t memMask; | ||||
| 	}; | ||||
| 
 | ||||
| 	template<class Allocator, bool softAes> | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue