Virgil (Build 62), 500 million insts:
Processor clock: Pentium II @ 333 mhz
nop : 12.60s, 39.68 mips, 8.39 ccl
lubr : 18.48s, 27.06 mips, 12.31 ccl
lubi : 15.67s, 31.90 mips, 10.44 ccl
luwi : 16.99s, 29.44 mips, 11.31 ccl
ludi : 18.49s, 27.04 mips, 12.32 ccl
add : 18.48s, 27.06 mips, 12.31 ccl
acci : 15.49s, 32.27 mips, 10.32 ccl
inc : 16.97s, 29.46 mips, 11.30 ccl
dec : 16.98s, 29.46 mips, 11.31 ccl
Virgil (Build 62), 500 million insts:
Processor: K6-2 @ 350 mhz
nop : 20.61s, 24.26 mips, 14.43 ccl
lubr : 28.68s, 17.43 mips, 20.08 ccl
lubi : 26.90s, 18.59 mips, 18.83 ccl
luwi : 27.26s, 18.34 mips, 19.08 ccl
ludi : 25.83s, 19.35 mips, 18.08 ccl
add : 31.53s, 15.86 mips, 22.07 ccl
acci : 27.26s, 18.34 mips, 19.08 ccl
inc : 27.26s, 18.34 mips, 19.08 ccl
dec : 27.26s, 18.34 mips, 19.09 ccl
The Intel CPUs blow the AMD cpus out of the water! The second colun is the mips (millions of instructions per second, and the third is the clock cycle latency (ccl) of the instruction. The ccl means the number of cpu cycles it takes on average to execute the software instruction.
For some reason the AMD cpus lag far far behind in this area. Does anyone have any clue as to why this might be? I posted the source to the decoder engine below:
_execstart:
pusha ; save general registers
mov esi, [_curinst] ; load instruction pointer into esi
mov ecx, [_instcount] ; load instruction counter into ecx
mov [_errstate], dword INST_OK
cmp [_execstate], dword EXEC_CONTINUE
mov ebx, dword [_regset] ; ebx holds the addy of the register table
jg inst_err
_execloop:
; cmp esi, [_codemax] ; make sure we don't go past the end of code
; jge code_overrun ; jump to instruction stub if less than max
movzx edi, byte [esi] ; move instruction into edi
inc esi ; increment esi
mov eax, [edi*4+jumptable] ; get address to jump to
jmp eax
;code_overrun:
; mov [_errstate], dword EXEC_OVERRUN
; mov [_execstate], dword EXEC_EXCEPTION
; jmp inst_err ; jump to end
inst_end:
loop _execloop ; loop to _execloop, decrement ecx
inst_err:
shl edi, 8 ; shift last instruction to the left
or edi, [_errstate] ; or errstate
mov [_errstate], edi ; store errstate
mov [_curinst], esi ; save current instruction pointer
mov [_instcount], ecx ; save remaining number of instructions
popa ; restore general registers
ret
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; JUMPBASE, INSTRUCTION STUBS ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
jumpbase:
;;==========================================================================;;
inst_nsi: ; unsupported instruction
mov [_errstate], dword INST_NOTSUPPORTED ; set error state
jmp inst_err ; jump to end
;;==========================================================================;;
inst_ret: ; return from procedure call
; mov [_execstate], byte EXEC_DONE ; execution complete
mov esi, [_codebase] ; reset to beginning
jmp inst_end ; jump out of execution
;;==========================================================================;;
inst_lubi: ; load unsigned byte immediate
movzx edx, word [esi] ; load operands
add esi, 2 ; increment code counter
movzx eax, dl ; copy into edx
shr edx, 8 ; save high 8 bits
mov [eax*4+ebx], edx ; store immediate byte
jmp inst_end ; jump out of execution
;;==========================================================================;;
inst_luwi: ; load unsigned word immediate
movzx eax, byte [esi] ; load operands
movzx edx, word [esi+1]
add esi, 3
mov [eax*4+ebx], edx ; store immediate byte
jmp inst_end ; jump out of execution
;;==========================================================================;;
inst_ludi: ; load unsigned double word immediate
movzx eax, byte [esi] ; load operands
mov edx, dword [esi+1]
add esi, 5
mov [eax*4+ebx], edx ; store immediate byte
jmp inst_end ; jump out of execution
;;==========================================================================;;
inst_lubr: ; load unsigned byte register address
movzx eax, byte [esi] ; load operands
inc esi ; increment code counter
mov edx, eax ; copy into edx
and eax, 0x0f ; save lowest 4 bits
shr edx, 4 ; save high 4 bits
mov edx, [edx*4+ebx] ; load content of register
movzx edx, byte [edx] ; load byte at that address
mov [eax*4+ebx], edx ; store byte
jmp inst_end ; jump out of execution
;;==========================================================================;;
inst_add: ; add two registers
movzx eax, byte [esi]
inc esi ; increment code counter
mov edx, eax ; copy into edx
and eax, 0x0f ; save lowest 4 bits
shr edx, 4 ; shift to the right
mov eax, [eax*4+ebx] ; load value of reg1
mov edx, [edx*4+ebx] ; load value of reg1
add eax, edx
mov [ebx], eax ; store the result in rrx (regtable[0])
jmp inst_end
;;==========================================================================;;
inst_inc: ; increment
movzx eax, byte [esi] ; load the register
inc esi ; increment code counter
mov edx, [eax*4+ebx] ; load virtual register
inc edx ; increment
mov [eax*4+ebx], edx ; save
jmp inst_end ; jump out of execution
;;==========================================================================;;
inst_dec: ; decrement
movzx eax, byte [esi]
inc esi ; increment code counter
mov edx, [eax*4+ebx] ; load virtual register
dec edx ; increment
mov [eax*4+ebx], edx ; save
jmp inst_end ; jump out of execution
;;==========================================================================;;
inst_acci: ; accumulate immediate
movzx eax, byte [esi] ; load register
movzx edx, word [esi+1] ; move immediate
add esi, 3 ; update counter
add [ebx+eax*4], edx ; accumulate
jmp inst_end
;;==========================================================================;;
inst_ext: ; exit
mov [_execstate], byte EXEC_DONE ; execution complete
jmp inst_err ; jump out of execution
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; END OF INSTRUCTION STUBS ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
It looks like you're new here. If you want to get involved, click one of these buttons!
Comments
varies wildly, it's not generally a good idea to benchmark from
Windows without taking precautions, such as setting the program
in exclusive mode, otherwise the CPU time it gets will vary dramatically.
Try the same test again in DOS mode or true DOS for a concrete
benchmark, if you did not do so previously.