mirror of
				https://git.wownero.com/wownero/RandomWOW.git
				synced 2024-08-15 00:23:14 +00:00 
			
		
		
		
	FPROUND - variable flag offset
This commit is contained in:
		
							parent
							
								
									e487092f07
								
							
						
					
					
						commit
						c02ee4291d
					
				
					 5 changed files with 20 additions and 8 deletions
				
			
		|  | @ -12,6 +12,7 @@ There are 31 unique instructions divided into 3 groups: | |||
| 
 | ||||
| ## Integer instructions | ||||
| There are 22 integer instructions. They are divided into 3 classes (MATH, DIV, SHIFT) with different B operand selection rules. | ||||
| 
 | ||||
| |# opcodes|instruction|class|signed|A width|B width|C|C width| | ||||
| |-|-|-|-|-|-|-|-| | ||||
| |12|ADD_64|MATH|no|64|64|`A + B`|64| | ||||
|  | @ -55,7 +56,7 @@ The shift/rotate instructions use just the bottom 6 bits of the `B` operand (`im | |||
| There are 5 floating point instructions. All floating point instructions are vector instructions that operate on two packed double precision floating point values. | ||||
| 
 | ||||
| |# opcodes|instruction|C| | ||||
| |-|-|-|-| | ||||
| |-|-|-| | ||||
| |20|FPADD|`A + B`| | ||||
| |20|FPSUB|`A - B`| | ||||
| |22|FPMUL|`A * B`| | ||||
|  |  | |||
|  | @ -9,6 +9,7 @@ The encoding of each 128-bit instruction word is following: | |||
| There are 256 opcodes, which are distributed between 3 groups of instructions. There are 31 distinct operations (each operation can be encoded using multiple opcodes - for example opcodes `0x00` to `0x0d` correspond to integer addition). | ||||
| 
 | ||||
| **Table 1: Instruction groups** | ||||
| 
 | ||||
| |group|# operations|# opcodes|| | ||||
| |---------|-----------------|----|-| | ||||
| |integer (IA)|22|144|56.3%| | ||||
|  | @ -31,8 +32,8 @@ The `A.LOC.W` flag determines the address width when reading operand A from the | |||
| 
 | ||||
| **Table 3: Operand A read address width** | ||||
| 
 | ||||
| |`A.LOC.W`|address width (W) | ||||
| |---------|-|-| | ||||
| |`A.LOC.W`|address width (W)| | ||||
| |---------|-| | ||||
| |0|15 bits (256 KiB)| | ||||
| |1-3|11 bits (16 KiB)| | ||||
| 
 | ||||
|  | @ -125,8 +126,8 @@ The `C.LOC.W` flag determines the address width when writing operand C to the sc | |||
| 
 | ||||
| **Table 10: Operand C write address width** | ||||
| 
 | ||||
| |`C.LOC.W`|address width (W) | ||||
| |---------|-|-| | ||||
| |`C.LOC.W`|address width (W)| | ||||
| |---------|-| | ||||
| |0|15 bits (256 KiB)| | ||||
| |1-3|11 bits (16 KiB)| | ||||
| 
 | ||||
|  |  | |||
|  | @ -466,7 +466,9 @@ namespace RandomX { | |||
| 	void AssemblyGeneratorX86::h_FPROUND(Instruction& instr, int i) { | ||||
| 		genar(instr, i); | ||||
| 		asmCode << "\tmov rcx, rax" << std::endl; | ||||
| 		asmCode << "\tshl eax, 13" << std::endl; | ||||
| 		int rotate = (13 - (instr.imm8 & 63)) & 63; | ||||
| 		if (rotate != 0) | ||||
| 			asmCode << "\trol rax, " << rotate << std::endl; | ||||
| 		asmCode << "\tand eax, 24576" << std::endl; | ||||
| 		asmCode << "\tor eax, 40896" << std::endl; | ||||
| 		asmCode << "\tmov dword ptr [rsp - 8], eax" << std::endl; | ||||
|  |  | |||
|  | @ -574,7 +574,15 @@ namespace RandomX { | |||
| 
 | ||||
| 	void JitCompilerX86::h_FPROUND(Instruction& instr, int i) { | ||||
| 		genar(instr); | ||||
| 		emit(0x00250de0c1c88b48); //mov rcx,rax; shl eax,0xd
 | ||||
| 		emitByte(0x48); | ||||
| 		emit(uint16_t(0xc88b)); //mov rcx,rax
 | ||||
| 		int rotate = (13 - (instr.imm8 & 63)) & 63; | ||||
| 		if (rotate != 0) { | ||||
| 			emitByte(0x48); | ||||
| 			emit(uint16_t(0xc0c1)); //rol rax
 | ||||
| 			emitByte(rotate); | ||||
| 		} | ||||
| 		emit(uint16_t(0x0025)); | ||||
| 		emit(0x00009fc00d000060); //and eax,0x6000; or eax,0x9fc0
 | ||||
| 		emit(0x2454ae0ff8244489); //ldmxcsr DWORD PTR [rsp-0x8]
 | ||||
| 		emitByte(0xf8); | ||||
|  |  | |||
|  | @ -8859,7 +8859,7 @@ rx_body_509: | |||
| 	and ecx, 2047 | ||||
| 	mov rax, qword ptr [rsi+rcx*8] | ||||
| 	mov rcx, rax | ||||
| 	shl eax, 13 | ||||
| 	rol rax, 34 | ||||
| 	and eax, 24576 | ||||
| 	or eax, 40896 | ||||
| 	mov dword ptr [rsp - 8], eax | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue