Bench strcpy

I had a need to parse a huge JSON file, and I was curious how much performance speed up could be had just checking the first character, and hashing everything instead of using strcmp.

Full code can be found here

Results

Platform | strcmp | strcmp with prefix | hash runtime | hash ahead of time
---------|--------|--------------------|--------------|-------------------
1(GCC)   | 1318   | 359                | 1838         | 140
2(Clang) | 32715  | 475                | 2640         | 243
3(GCC)   | 4666   | 2151               | 12923        | 542
3(Clang) | 4592   | 1994               | 13449        | 509

Times are in cycles using rdtsc counter

The platforms tested are

1. Linux Ubuntu 18 | Intel(R) Core(TM) i7-8550U CPU @ 1.8GHz
2. MacOSX 10.14    | Intel(R) Core(TM) i5-5257U @ 2.70GHz
3. Linux Ubuntu 16 | Intel(R) Core(TM) i7-6700K CPU @ 4.00GHz

Strcpy

A basic strcpy benchmark, sets the baseline.

uint64_t
bench_strcmp() {
        const char **str_it = &strings[0];
        uint64_t start = get_time_rdtsc();

        while(*str_it) {
          if(strcmp(*str_it, search_for) == 0) {
            break;
          }
          ++str_it;
        }

        uint64_t end = get_time_rdtsc();

        printf("Found %s\n", *str_it);

        return end - start;
}

Strcpy Prefix Check

Before running strcpy check the first character, as a runtime solution this is the winner.

int
pre_check(uint8_t *a, uint8_t *b) {
        return *a == *b;
}

uint64_t
bench_strcmp_prefix() {
        const char **str_it = &strings[0];
        uint64_t start = get_time_rdtsc();

        uint8_t *search_int = (uint8_t*)search_for;

        const char *str;
        while(str = *str_it++, str) {
                /* check prefix first */
                if(!pre_check((uint8_t*)*str_it, search_int)) {
                    continue;
                }

                /* compare whole string */
                if(strcmp(*str_it, search_for) == 0){
                        break;
                }
        }

        uint64_t end = get_time_rdtsc();

        printf("Found %s\n", *str_it);

        return end - start;
}

Runtme Hashing

Hashing strings and then comparing the hashes, this doesn’t handle collisions.

uint64_t
hash_str(const char *str) {
        uint64_t hash = 5381;
        int c;
        while(c = *str++, c) {
          hash = ((hash << 5) + hash) + c;
        }

        return hash;
}

uint64_t
bench_hash_rt() {
        const char **str_it = &strings[0];
        uint64_t start = get_time_rdtsc();
        uint64_t search_hash = hash_str(search_for);

        while(*str_it) {
                uint64_t hash = hash_str(*str_it);
                if(hash == search_hash) {
                        break;
                }
                ++str_it;
        }

        uint64_t end = get_time_rdtsc();
        
        printf("Found %s\n", *str_it);
        
        return end - start;
}

Ahead Of Time Hashing

Hashing strings upfront. Predictably this is the fastests.

uint64_t
hash_str(const char *str) {
        uint64_t hash = 5381;
        int c;
        while(c = *str++, c) {
          hash = ((hash << 5) + hash) + c;
        }

        return hash;
}

uint64_t
bench_hash_at() {
        /* build hash table */
        uint64_t hash_arr[(sizeof(strings) / sizeof(strings[0]))];
        int count = (sizeof(strings) / sizeof(strings[0]));
        int i;
        for(i = 0; i < count; ++i) {
                if(strings[i] == NULL) {
                        hash_arr[i] = (uint64_t)-1;
                }
                else {
                        hash_arr[i] = hash_str(strings[i]);
                }
        }

        /* search */
        uint64_t *hash_it = &hash_arr[0];
        uint64_t search_hash = hash_str(search_for);

        uint64_t start = get_time_rdtsc();
        
        while(*hash_it != (uint64_t)-1) {
                if(*hash_it == search_hash) {
                        break;
                }

                ++hash_it;
        }

        uint64_t end = get_time_rdtsc();
        
        printf("Found %s\n", strings[hash_it - &hash_arr[0]]);
        
        return end - start;
}