CProjects

C Masterclass Capstone: Building a Secure, Persistent Key-Value Store (Redis in C)

TT
TopicTrick Team
C Masterclass Capstone: Building a Secure, Persistent Key-Value Store (Redis in C)

C Masterclass Capstone: Building a Secure, Persistent Key-Value Store (Redis in C)

Final Project. Every module in this masterclass has prepared you for this moment. You'll build a complete key-value store — the engine at the heart of Redis, Memcached, and etcd. It combines concurrent hash tables (Modules 10+16), LRU eviction (Module 27), binary persistence (Module 15), TCP networking (Module 21), signal handling (Module 22), and C23 security hardening (Module 25). This is what 30 modules of C mastery looks like in production.


Table of Contents


System Architecture Overview


Core Data Model: The KV Entry

c
#include <stdint.h>
#include <stdbool.h>
#include <time.h>
#include <stdlib.h>
#include <string.h>

#define MAX_KEY_LEN    256
#define MAX_VALUE_LEN  (64 * 1024)  // 64KB max value

typedef enum {
    TYPE_STRING = 0,
    TYPE_INT    = 1,
    TYPE_BYTES  = 2,
} ValueType;

typedef struct {
    char       key[MAX_KEY_LEN];
    ValueType  type;
    union {
        char     str_val[MAX_VALUE_LEN];
        int64_t  int_val;
        struct { uint8_t *data; size_t len; } bytes_val;
    };
    int64_t    expires_at;     // Unix timestamp, -1 = no expiry
    uint64_t   created_at;    // For TTL tracking
    uint64_t   last_accessed; // For LRU ordering
    uint64_t   access_count;  // Stats
} KVEntry;

// Thread-safe ref-counted wrapper
typedef struct {
    KVEntry   *entry;
    _Atomic int ref_count;     // C11 atomic for lock-free reference counting
} KVRef;

Concurrent Hash Table with Reader-Writer Locks

A single global mutex creates a bottleneck with many concurrent readers. Sharding solves this — split the hash table into N independent shards, each with its own lock:

c
#include <pthread.h>

#define NUM_SHARDS    16           // Adjust based on thread count
#define SHARD_BUCKETS 1024

typedef struct HTBucket {
    KVEntry           *entry;
    struct HTBucket   *next;
} HTBucket;

typedef struct {
    HTBucket         *buckets[SHARD_BUCKETS];
    pthread_rwlock_t  lock;           // Per-shard reader-writer lock
    size_t            count;
} Shard;

typedef struct {
    Shard    shards[NUM_SHARDS];
    size_t   total_capacity;          // Max entries across all shards
    _Atomic size_t total_count;       // Total entries (atomic for stats)
} ConcurrentHashTable;

// Route to the correct shard for any key
static Shard* get_shard(ConcurrentHashTable *ht, const char *key) {
    uint32_t hash = fnv1a_hash(key);
    return &ht->shards[hash % NUM_SHARDS];
}

KVEntry* cht_get(ConcurrentHashTable *ht, const char *key) {
    Shard *shard = get_shard(ht, key);
    
    pthread_rwlock_rdlock(&shard->lock); // Multiple readers allowed
    HTBucket *b = find_bucket(shard, key);
    KVEntry *entry = b ? b->entry : NULL;
    pthread_rwlock_unlock(&shard->lock);
    
    // Check expiration (without holding lock)
    if (entry && entry->expires_at > 0 && time(NULL) > entry->expires_at) {
        cht_del(ht, key); // Lazy deletion
        return NULL;
    }
    
    return entry;
}

int cht_set(ConcurrentHashTable *ht, const char *key, KVEntry *entry) {
    Shard *shard = get_shard(ht, key);
    
    pthread_rwlock_wrlock(&shard->lock); // Exclusive write access
    int result = insert_or_update(shard, key, entry);
    pthread_rwlock_unlock(&shard->lock);
    
    return result;
}

Write-Ahead Log (WAL): Crash-Safe Persistence

A WAL records every mutation before applying it to the in-memory state. If the server crashes mid-operation, the WAL can be replayed on restart to restore the exact state:

c
#include <stdio.h>
#include <stdint.h>

// Each WAL record has a fixed-size header
typedef struct __attribute__((packed)) {
    uint32_t magic;        // 0x4B56574C "KVWL" — validate record type
    uint8_t  op_type;      // 0=SET, 1=DEL
    uint32_t key_len;
    uint32_t val_len;
    int64_t  expires_at;
    uint32_t checksum;     // CRC32 of key+value for corruption detection
    // Followed by: key_len bytes of key, val_len bytes of value
} WALRecord;

#define WAL_MAGIC   0x4B56574C
#define WAL_OP_SET  0
#define WAL_OP_DEL  1

int wal_append_set(FILE *wal, const char *key, const char *value, int64_t expires_at) {
    WALRecord rec = {
        .magic      = WAL_MAGIC,
        .op_type    = WAL_OP_SET,
        .key_len    = strlen(key),
        .val_len    = strlen(value),
        .expires_at = expires_at,
        .checksum   = crc32_compute(key, strlen(key)),
    };
    
    fwrite(&rec, sizeof(rec), 1, wal);
    fwrite(key,   1, rec.key_len, wal);
    fwrite(value, 1, rec.val_len, wal);
    
    fflush(wal);
    fsync(fileno(wal)); // Ensure durability before acknowledging the write
    return 0;
}

// On startup: replay WAL to reconstruct in-memory state
int wal_replay(ConcurrentHashTable *ht, const char *wal_path) {
    FILE *wal = fopen(wal_path, "rb");
    if (!wal) return 0; // No WAL = fresh start
    
    WALRecord rec;
    int replayed = 0;
    
    while (fread(&rec, sizeof(rec), 1, wal) == 1) {
        if (rec.magic != WAL_MAGIC) break; // Corrupt record — stop replay
        
        char *key = malloc(rec.key_len + 1);
        char *val = malloc(rec.val_len + 1);
        
        fread(key, 1, rec.key_len, wal); key[rec.key_len] = '\0';
        fread(val, 1, rec.val_len, wal); val[rec.val_len] = '\0';
        
        if (rec.op_type == WAL_OP_SET) {
            // Reconstruct and insert into hash table
            KVEntry *entry = make_entry(key, val, TYPE_STRING, rec.expires_at);
            cht_set(ht, key, entry);
            replayed++;
        } else if (rec.op_type == WAL_OP_DEL) {
            cht_del(ht, key);
            replayed++;
        }
        
        free(key); free(val);
    }
    
    fclose(wal);
    printf("[WAL] Replayed %d operations\n", replayed);
    return replayed;
}

TCP Network Protocol: The Command Interface

c
// Simple line-delimited text protocol (similar to Redis RESP)
// SET key value [EX seconds]
// GET key
// DEL key
// KEYS *   (list all keys)
// STATS    (print server statistics)

void handle_client(ConcurrentHashTable *ht, FILE *wal, int client_fd) {
    char buf[MAX_KEY_LEN + MAX_VALUE_LEN + 64];
    ssize_t n = recv(client_fd, buf, sizeof(buf) - 1, 0);
    if (n <= 0) return;
    buf[n] = '\0';
    
    char cmd[16], key[MAX_KEY_LEN], value[MAX_VALUE_LEN] = "";
    int ttl_seconds = 0;
    
    int fields = sscanf(buf, "%15s %255s %65535s EX %d",
                        cmd, key, value, &ttl_seconds);
    
    const char *response;
    
    if (strcasecmp(cmd, "GET") == 0 && fields >= 2) {
        KVEntry *e = cht_get(ht, key);
        if (e) {
            send(client_fd, e->str_val, strlen(e->str_val), 0);
            send(client_fd, "\n", 1, 0);
        } else {
            send(client_fd, "(nil)\n", 6, 0);
        }
    } else if (strcasecmp(cmd, "SET") == 0 && fields >= 3) {
        int64_t expires = ttl_seconds > 0 ? time(NULL) + ttl_seconds : -1;
        KVEntry *entry = make_entry(key, value, TYPE_STRING, expires);
        cht_set(ht, key, entry);
        wal_append_set(wal, key, value, expires);
        send(client_fd, "+OK\n", 4, 0);
    } else if (strcasecmp(cmd, "DEL") == 0 && fields >= 2) {
        cht_del(ht, key);
        wal_append_del(wal, key);
        send(client_fd, "+OK\n", 4, 0);
    } else {
        send(client_fd, "-ERR Unknown command\n", 21, 0);
    }
}

Graceful Shutdown with Signal Handlers

c
static volatile sig_atomic_t g_running      = 1;
static volatile sig_atomic_t g_save_and_exit = 0;

void sigterm_handler(int sig) { g_running = 0; g_save_and_exit = 1; }
void sigusr1_handler(int sig) { g_save_and_exit = 1; } // Manual snapshot

void setup_signals(void) {
    struct sigaction sa = { .sa_flags = SA_RESTART };
    sigemptyset(&sa.sa_mask);
    
    sa.sa_handler = sigterm_handler;
    sigaction(SIGTERM, &sa, NULL);
    sigaction(SIGINT,  &sa, NULL);
    
    sa.sa_handler = sigusr1_handler;
    sigaction(SIGUSR1, &sa, NULL);
    
    // Ignore SIGPIPE — prevents crash when client disconnects mid-send
    sa.sa_handler = SIG_IGN;
    sigaction(SIGPIPE, &sa, NULL);
}

// In main loop — check after each accept()
if (g_save_and_exit) {
    g_save_and_exit = 0;
    write_snapshot(ht, "snapshot.bin");
    wal_truncate("wal.log"); // Checkpoint: WAL state is now in snapshot
    
    if (!g_running) {
        printf("[KV] Graceful shutdown complete. Snapshot saved.\n");
        break; // Exit main loop
    }
}

Security Hardening: C23 Best Practices

c
// 1. All keys and values bounded — no unchecked strcpy
void set_key_safe(KVEntry *entry, const char *key, size_t len) {
    size_t copy_len = len < MAX_KEY_LEN ? len : MAX_KEY_LEN - 1;
    memcpy(entry->key, key, copy_len);
    entry->key[copy_len] = '\0';
}

// 2. Validate all network input before processing
bool validate_key(const char *key) {
    size_t len = strnlen(key, MAX_KEY_LEN + 1);
    if (len == 0 || len > MAX_KEY_LEN) return false;
    // Allow: alphanumeric, colon, underscore, dash
    for (size_t i = 0; i < len; i++) {
        char c = key[i];
        if (!isalnum(c) && c != ':' && c != '_' && c != '-' && c != '.') return false;
    }
    return true;
}

// 3. Compile with hardening flags:
// gcc -std=c23 -O2 -Wall -Wextra
//     -fstack-protector-strong -D_FORTIFY_SOURCE=3
//     -fpie -pie -Wl,-z,relro,-z,now
//     -fsanitize=address,undefined (for development)
//     -pthread -lm kv_store.c -o kvstore

// 4. memset_explicit for clearing auth tokens/sensitive data
void clear_auth_token(char *token, size_t len) {
    memset_explicit(token, 0, len); // C23: guaranteed not optimized away
}

Testing the Complete System

bash
# Terminal 1: Start the server
./kvstore --port 6380 --maxmem 64MB --wal wal.log --snapshot snapshot.bin

# Terminal 2: Test with netcat
echo "SET user:1 Alice" | nc localhost 6380
echo "GET user:1" | nc localhost 6380        # → Alice
echo "SET session:123 {token:abc} EX 300" | nc localhost 6380  # 5-min TTL
echo "GET session:123" | nc localhost 6380   # → {token:abc}
echo "DEL user:1" | nc localhost 6380
echo "GET user:1" | nc localhost 6380        # → (nil)
echo "STATS" | nc localhost 6380             # → server statistics

# Test crash recovery
kill -9 $(pidof kvstore)  # Simulate crash
./kvstore --port 6380 --wal wal.log        # Restart — WAL replayed automatically
echo "GET user:1" | nc localhost 6380     # → still present after replay

# Send SIGUSR1 to force snapshot
kill -USR1 $(pidof kvstore)

# Graceful shutdown
kill -TERM $(pidof kvstore)  # SIGTERM → saves snapshot → exits cleanly

What You've Built and Where to Go Next

You've built a system that demonstrates mastery of:

ModuleTechniqueHow Used
1-5Types, control flow, functionsCommand parser, validation
6-8Pointers, memory, structsKVEntry, pointer manipulation
9-10Arrays, linked listsHash table buckets, WAL buffer
11Hash tablesConcurrent hash table core
12Function pointersPluggable eviction policies
13void*Generic value storage
15File I/OWAL append, snapshot write
16Error handlingEvery operation checked
17Binary treesOrdered key iteration
18Bit operationsShard selection, checksum
19pthreadsWorker threads, rwlocks
20fork/execSnapshot writer subprocess
21TCP socketsNetwork protocol
22SignalsGraceful shutdown, SIGUSR1
25SecurityInput validation, hardening

Where to go from here:

  • C++: Add object orientation and templates; RAII for automatic resource management.
  • Rust: Eliminate all memory safety concerns at compile time; similar performance, no GC.
  • Kernel development: Linux kernel modules; network drivers; eBPF programs.
  • Embedded systems: Bare-metal STM32/ESP32 programming; RTOS; hardware drivers.
  • Distributed systems: Raft consensus for the KV store; replication; sharding.

Congratulations

You have completed the C Mastery Masterclass — 30 modules from "what is a variable?" to building a production-quality, distributed-capable, crash-safe key-value store.

You don't just know how to use computers. You know how they work — at the memory, thread, kernel, and network level. This knowledge is the foundation that makes everything else in systems engineering possible.

Masterclass Status: 100% Complete.

Thank you for completing the TopicTrick C Programming Masterclass.

Frequently Asked Questions

Q: What should a comprehensive C final project demonstrate to prove mastery? A strong final project integrates: manual memory management with no leaks (verified by Valgrind), pointer arithmetic and data structures (linked list, hash table, or binary tree), file I/O with binary and text formats, error handling via errno and return codes, socket networking or process/signal handling, and a Makefile build. Projects like a custom shell, an HTTP server, a key-value store, or a memory allocator each naturally exercise the full breadth of C systems programming.

Q: How do you use Valgrind to verify your C project has no memory leaks? Compile with debug symbols (-g) and no optimisation (-O0), then run: valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes ./your_program. Valgrind reports every malloc that was not freed, invalid reads/writes, and use of uninitialised values. A clean run shows "All heap blocks were freed -- no leaks are possible" and zero errors. Fix all leaks before submitting — even a single forgotten free() in an otherwise correct program indicates incomplete understanding of ownership.

Q: What is the most common mistake C beginners make in final projects? Off-by-one errors in buffer sizing are the most common — allocating strlen(s) bytes for a string copy instead of strlen(s) + 1 (forgetting the null terminator), causing writes beyond the allocated region. The second most common is not checking return values: malloc can return NULL, fopen can return NULL, read can return -1. The third is improper pointer invalidation — continuing to use a pointer after free() (use-after-free) or after realloc() moves the allocation. Compiling with gcc -Wall -Wextra -fsanitize=address catches most of these at development time.


Part of the C Mastery Course — 30 modules from C basics to expert systems engineering.