Benchmarking CPUs with a VM

We've all pretty much assumed the the AMD cpus are fairly comparable to the Pentium2s in integer applications, and in some cases, even faster. But benchmarking my virtual machine on different machine produced some really striking differences in the performance. I have tested these benchmarks on the K6-2, K6-3, and Pentium2 266 and 333 cpus, and they all corroborate the following data:


Virgil (Build 62), 500 million insts:

Processor clock: Pentium II @ 333 mhz

nop : 12.60s, 39.68 mips, 8.39 ccl

lubr : 18.48s, 27.06 mips, 12.31 ccl

lubi : 15.67s, 31.90 mips, 10.44 ccl

luwi : 16.99s, 29.44 mips, 11.31 ccl

ludi : 18.49s, 27.04 mips, 12.32 ccl

add : 18.48s, 27.06 mips, 12.31 ccl

acci : 15.49s, 32.27 mips, 10.32 ccl

inc : 16.97s, 29.46 mips, 11.30 ccl

dec : 16.98s, 29.46 mips, 11.31 ccl


Virgil (Build 62), 500 million insts:

Processor: K6-2 @ 350 mhz

nop : 20.61s, 24.26 mips, 14.43 ccl

lubr : 28.68s, 17.43 mips, 20.08 ccl

lubi : 26.90s, 18.59 mips, 18.83 ccl

luwi : 27.26s, 18.34 mips, 19.08 ccl

ludi : 25.83s, 19.35 mips, 18.08 ccl

add : 31.53s, 15.86 mips, 22.07 ccl

acci : 27.26s, 18.34 mips, 19.08 ccl

inc : 27.26s, 18.34 mips, 19.08 ccl

dec : 27.26s, 18.34 mips, 19.09 ccl


The Intel CPUs blow the AMD cpus out of the water! The second colun is the mips (millions of instructions per second, and the third is the clock cycle latency (ccl) of the instruction. The ccl means the number of cpu cycles it takes on average to execute the software instruction.

For some reason the AMD cpus lag far far behind in this area. Does anyone have any clue as to why this might be? I posted the source to the decoder engine below:


_execstart:

pusha ; save general registers

mov esi, [_curinst] ; load instruction pointer into esi

mov ecx, [_instcount] ; load instruction counter into ecx

mov [_errstate], dword INST_OK

cmp [_execstate], dword EXEC_CONTINUE

mov ebx, dword [_regset] ; ebx holds the addy of the register table

jg inst_err


_execloop:

; cmp esi, [_codemax] ; make sure we don't go past the end of code

; jge code_overrun ; jump to instruction stub if less than max

movzx edi, byte [esi] ; move instruction into edi

inc esi ; increment esi

mov eax, [edi*4+jumptable] ; get address to jump to

jmp eax

;code_overrun:

; mov [_errstate], dword EXEC_OVERRUN

; mov [_execstate], dword EXEC_EXCEPTION

; jmp inst_err ; jump to end


inst_end:

loop _execloop ; loop to _execloop, decrement ecx

inst_err:

shl edi, 8 ; shift last instruction to the left

or edi, [_errstate] ; or errstate

mov [_errstate], edi ; store errstate

mov [_curinst], esi ; save current instruction pointer

mov [_instcount], ecx ; save remaining number of instructions

popa ; restore general registers

ret


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; JUMPBASE, INSTRUCTION STUBS ;;

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;


jumpbase:


;;==========================================================================;;

inst_nsi: ; unsupported instruction

mov [_errstate], dword INST_NOTSUPPORTED ; set error state

jmp inst_err ; jump to end


;;==========================================================================;;

inst_ret: ; return from procedure call

; mov [_execstate], byte EXEC_DONE ; execution complete

mov esi, [_codebase] ; reset to beginning

jmp inst_end ; jump out of execution


;;==========================================================================;;

inst_lubi: ; load unsigned byte immediate

movzx edx, word [esi] ; load operands

add esi, 2 ; increment code counter

movzx eax, dl ; copy into edx

shr edx, 8 ; save high 8 bits

mov [eax*4+ebx], edx ; store immediate byte

jmp inst_end ; jump out of execution


;;==========================================================================;;

inst_luwi: ; load unsigned word immediate

movzx eax, byte [esi] ; load operands

movzx edx, word [esi+1]

add esi, 3

mov [eax*4+ebx], edx ; store immediate byte

jmp inst_end ; jump out of execution


;;==========================================================================;;

inst_ludi: ; load unsigned double word immediate

movzx eax, byte [esi] ; load operands

mov edx, dword [esi+1]

add esi, 5

mov [eax*4+ebx], edx ; store immediate byte

jmp inst_end ; jump out of execution


;;==========================================================================;;

inst_lubr: ; load unsigned byte register address

movzx eax, byte [esi] ; load operands

inc esi ; increment code counter

mov edx, eax ; copy into edx

and eax, 0x0f ; save lowest 4 bits

shr edx, 4 ; save high 4 bits

mov edx, [edx*4+ebx] ; load content of register

movzx edx, byte [edx] ; load byte at that address

mov [eax*4+ebx], edx ; store byte

jmp inst_end ; jump out of execution


;;==========================================================================;;

inst_add: ; add two registers

movzx eax, byte [esi]

inc esi ; increment code counter

mov edx, eax ; copy into edx

and eax, 0x0f ; save lowest 4 bits

shr edx, 4 ; shift to the right

mov eax, [eax*4+ebx] ; load value of reg1

mov edx, [edx*4+ebx] ; load value of reg1

add eax, edx

mov [ebx], eax ; store the result in rrx (regtable[0])

jmp inst_end


;;==========================================================================;;

inst_inc: ; increment

movzx eax, byte [esi] ; load the register

inc esi ; increment code counter

mov edx, [eax*4+ebx] ; load virtual register

inc edx ; increment

mov [eax*4+ebx], edx ; save

jmp inst_end ; jump out of execution


;;==========================================================================;;

inst_dec: ; decrement

movzx eax, byte [esi]

inc esi ; increment code counter

mov edx, [eax*4+ebx] ; load virtual register

dec edx ; increment

mov [eax*4+ebx], edx ; save

jmp inst_end ; jump out of execution


;;==========================================================================;;

inst_acci: ; accumulate immediate

movzx eax, byte [esi] ; load register

movzx edx, word [esi+1] ; move immediate

add esi, 3 ; update counter

add [ebx+eax*4], edx ; accumulate

jmp inst_end


;;==========================================================================;;

inst_ext: ; exit

mov [_execstate], byte EXEC_DONE ; execution complete

jmp inst_err ; jump out of execution


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; END OF INSTRUCTION STUBS ;;

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;




Comments

  • Because the processor time that any app running under Win95

    varies wildly, it's not generally a good idea to benchmark from

    Windows without taking precautions, such as setting the program

    in exclusive mode, otherwise the CPU time it gets will vary dramatically.

    Try the same test again in DOS mode or true DOS for a concrete

    benchmark, if you did not do so previously.


Sign In or Register to comment.

Howdy, Stranger!

It looks like you're new here. If you want to get involved, click one of these buttons!

Categories