Tuesday, September 14, 2010

Misconceptions

Last night I was fiddling around with some really simple code; the object of my program was to test a positive integer, to see if it was odd or even, and then print the result.  There is more than one way to do this, some being more expensive than others.  I thought I would run some tests to see what sort of differences, with regard to program execution time, I could come up with.  I had three methods of checking for whether a number was odd or even. The first one uses the venerable modulus operator (%), as displayed below:
mod.c
#include <stdio.h>
#include <limits.h>

int main(void)
{
    int i;

    for (i = 0; i < INT_MAX; ++i) {
        if (i % 2 == 0) {
            printf("%d is even.\n", i);
        } else {
            printf("%d is odd.\n", i);
        }
    }

   return 0;
}
Next, I tried a bit mask.  Basically, all it does is bitwise and (&) the first bit of the integer and 1:
bit.c
#include <stdio.h>
#include <limits.h>

int main(void)
{
    register int i;

    for (i = 0; i < INT_MAX; ++i) {
        if (i & 1) {
            printf("%d is even.\n", i);
        } else {
            printf("%d is odd.\n", i);
        }
    }

    return 0;
} 
Finally, I exploited the fact the integer division truncates any remainder of a number:
int.c 
#include <stdio.h>
#include <limits.h>

int main(void)
{
    register int i;

    for (i = 0; i < INT_MAX; ++i) {
        if ((i / 2) * 2 == i) {
            printf("%d is even.\n", i);
        } else {
            printf("%d is odd.\n", i);
        }
    }

    return 0;
}
I compiled each one with gcc -O2, and these were my times:

time ./mod > /dev/null
./mod > /dev/null  299.43s user 1.34s system 62% cpu 8:02.40 total

time ./bit > /dev/null
./bit > /dev/null  298.97s user 1.87s system 83% cpu 5:59.71 total

time ./int > /dev/null
./int > /dev/null  304.14s user 1.57s system 61% cpu 8:16.85 total

These times are not all that useful, as each program did not get the same amount of cpu.  Things got a little more interesting though when I recompiled with gcc -O3, and made sure the programs could each get pretty much all of the cpu for the entirety of their run times:

time ./mod > /dev/null         
./mod > /dev/null  296.04s user 1.00s system 99% cpu 4:58.53 total

time ./int > /dev/null
./int > /dev/null  297.81s user 1.04s system 99% cpu 5:00.46 total

time ./bit > /dev/null         
./bit > /dev/null  292.43s user 1.16s system 99% cpu 4:54.79 total

Really not much difference in time at all between the three - they all took around five minutes. Without looking at the assembly for each program, these results might seem a little surprising, especially for mod.c, and bit.c. But look at the assembly for the two (and you won't be surprised about the execution time any longer):

.file   "mod.c"
        .section        .rodata.str1.1,"aMS",@progbits,1
.LC0:
        .string "%d is even.\n"
.LC1:
        .string "%d is odd.\n"
        .text
        .p2align 4,,15
.globl main
        .type   main, @function
main:
.LFB11:
        .cfi_startproc
        pushq   %rbx
        .cfi_def_cfa_offset 16
        xorl    %ebx, %ebx
        .cfi_offset 3, -16
        jmp     .L4
        .p2align 4,,10
        .p2align 3
.L8:
        movl    %ebx, %esi
        xorl    %eax, %eax
        movl    $.LC0, %edi
        addl    $1, %ebx
        call    printf
        cmpl    $2147483647, %ebx
        je      .L7
.L4:
        testb   $1, %bl
        je      .L8
        movl    %ebx, %esi
        xorl    %eax, %eax
        movl    $.LC1, %edi
        addl    $1, %ebx
        call    printf
        cmpl    $2147483647, %ebx
        jne     .L4
.L7:
        xorl    %eax, %eax
        popq    %rbx
        .cfi_def_cfa_offset 8
        ret
        .cfi_endproc
.LFE11:
        .size   main, .-main
        .ident  "GCC: (SUSE Linux) 4.5.0 20100604 [gcc-4_5-branch revision 160292]"
        .section        .comment.SUSE.OPTs,"MS",@progbits,1
        .string "Ospwg"
        .section        .note.GNU-stack,"",@progbits

And bit.c:

.file   "bit.c"
        .section        .rodata.str1.1,"aMS",@progbits,1
.LC0:
        .string "%d is even.\n"
.LC1:
        .string "%d is odd.\n"
        .text
        .p2align 4,,15
.globl main
        .type   main, @function
main:
.LFB11:
        .cfi_startproc
        pushq   %rbx
        .cfi_def_cfa_offset 16
        xorl    %ebx, %ebx
        .cfi_offset 3, -16
        jmp     .L4
        .p2align 4,,10
        .p2align 3
.L8:
        movl    %ebx, %esi
        xorl    %eax, %eax
        movl    $.LC0, %edi
        addl    $1, %ebx
        call    printf
        cmpl    $2147483647, %ebx
        je      .L7
.L4:
        testb   $1, %bl
        jne     .L8
        movl    %ebx, %esi
        xorl    %eax, %eax
        movl    $.LC1, %edi
        addl    $1, %ebx
        call    printf
        cmpl    $2147483647, %ebx
        jne     .L4
.L7:
        xorl    %eax, %eax
        popq    %rbx
        .cfi_def_cfa_offset 8
        ret
        .cfi_endproc
.LFE11:
        .size   main, .-main
        .ident  "GCC: (SUSE Linux) 4.5.0 20100604 [gcc-4_5-branch revision 160292]"
        .section        .comment.SUSE.OPTs,"MS",@progbits,1
        .string "Ospwg"
        .section        .note.GNU-stack,"",@progbits

They are virtually identical; the compiler turned them into almost the same program! In case you have trouble seeing where the differences lie, here is the diff for the two:

diff mod.s bit.s
1c1
<       .file   "mod.c"
---
>       .file   "bit.c"
31c31
<       je      .L8
---
>       jne     .L8

You don't get much closer than that. I won't bother posting the assembly for int.c, (its just a couple more instructions) as the difference in run time is negligible. With a good optimizing compiler, what you wrote might not even end up in the final output.

No comments:

Post a Comment