Occam's razor Archive Pages Categories Tags

Building a Simple Key-Value Store in Go

15 January 2023

Key-value stores are everywhere - from Redis caching your sessions to etcd coordinating your Kubernetes cluster. I wanted to understand how they work under the hood, so I built one from scratch. This post covers the fundamentals: an in-memory store with thread safety, persistence via a write-ahead log, and a simple HTTP API.

This is part 1 of a series. In the next post, we’ll distribute this store across multiple nodes using Raft consensus.

The Interface

Let’s start with the basic operations every KV store needs:

type KVStore interface {
    Get(key string) (string, bool)
    Set(key, value string) error
    Delete(key string) error
    Close() error
}

Simple and familiar. Get returns the value and whether the key exists. Set and Delete can return errors because we’ll add persistence later. Close handles cleanup.

In-Memory Implementation

The simplest implementation uses a map with a mutex for thread safety:

type MemStore struct {
    mu   sync.RWMutex
    data map[string]string
}

func NewMemStore() *MemStore {
    return &MemStore{
        data: make(map[string]string),
    }
}

func (s *MemStore) Get(key string) (string, bool) {
    s.mu.RLock()
    defer s.mu.RUnlock()
    val, ok := s.data[key]
    return val, ok
}

func (s *MemStore) Set(key, value string) error {
    s.mu.Lock()
    defer s.mu.Unlock()
    s.data[key] = value
    return nil
}

func (s *MemStore) Delete(key string) error {
    s.mu.Lock()
    defer s.mu.Unlock()
    delete(s.data, key)
    return nil
}

func (s *MemStore) Close() error {
    return nil
}

We use RWMutex instead of Mutex so multiple readers can access the store concurrently. Writes still require exclusive access.

This works, but there’s a problem: all data is lost when the process exits. For a real database, we need persistence.

Adding Persistence with a Write-Ahead Log

A write-ahead log (WAL) is a simple but powerful technique: before modifying in-memory state, write the operation to disk. On restart, replay the log to reconstruct state.

First, let’s define our log entry format:

type LogEntry struct {
    Op    string `json:"op"`    // "set" or "delete"
    Key   string `json:"key"`
    Value string `json:"value,omitempty"`
}

Now the persistent store:

type PersistentStore struct {
    mu      sync.RWMutex
    data    map[string]string
    logFile *os.File
    encoder *json.Encoder
}

func NewPersistentStore(path string) (*PersistentStore, error) {
    s := &PersistentStore{
        data: make(map[string]string),
    }

    // Replay existing log if present
    if err := s.replay(path); err != nil {
        return nil, err
    }

    // Open log file for appending
    f, err := os.OpenFile(path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
    if err != nil {
        return nil, err
    }
    s.logFile = f
    s.encoder = json.NewEncoder(f)

    return s, nil
}

The replay function reads the log and applies each operation:

func (s *PersistentStore) replay(path string) error {
    f, err := os.Open(path)
    if os.IsNotExist(err) {
        return nil // No log yet, start fresh
    }
    if err != nil {
        return err
    }
    defer f.Close()

    decoder := json.NewDecoder(f)
    for {
        var entry LogEntry
        if err := decoder.Decode(&entry); err == io.EOF {
            break
        } else if err != nil {
            return err
        }

        switch entry.Op {
        case "set":
            s.data[entry.Key] = entry.Value
        case "delete":
            delete(s.data, entry.Key)
        }
    }
    return nil
}

The write operations now log before modifying state:

func (s *PersistentStore) Set(key, value string) error {
    s.mu.Lock()
    defer s.mu.Unlock()

    // Write to log first
    entry := LogEntry{Op: "set", Key: key, Value: value}
    if err := s.encoder.Encode(entry); err != nil {
        return err
    }
    if err := s.logFile.Sync(); err != nil {
        return err
    }

    s.data[key] = value
    return nil
}

func (s *PersistentStore) Delete(key string) error {
    s.mu.Lock()
    defer s.mu.Unlock()

    entry := LogEntry{Op: "delete", Key: key}
    if err := s.encoder.Encode(entry); err != nil {
        return err
    }
    if err := s.logFile.Sync(); err != nil {
        return err
    }

    delete(s.data, key)
    return nil
}

The Sync() call is crucial - it forces the OS to flush the write to disk. Without it, data could be lost if the system crashes before the OS buffer is flushed.

Get and Close are straightforward:

func (s *PersistentStore) Get(key string) (string, bool) {
    s.mu.RLock()
    defer s.mu.RUnlock()
    val, ok := s.data[key]
    return val, ok
}

func (s *PersistentStore) Close() error {
    return s.logFile.Close()
}

HTTP API

Let’s expose the store via REST endpoints:

type Server struct {
    store KVStore
}

func NewServer(store KVStore) *Server {
    return &Server{store: store}
}

func (s *Server) ServeHTTP(w http.ResponseWriter, r *http.Request) {
    key := strings.TrimPrefix(r.URL.Path, "/kv/")
    if key == "" {
        http.Error(w, "key required", http.StatusBadRequest)
        return
    }

    switch r.Method {
    case http.MethodGet:
        s.handleGet(w, key)
    case http.MethodPut:
        s.handlePut(w, r, key)
    case http.MethodDelete:
        s.handleDelete(w, key)
    default:
        http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
    }
}

func (s *Server) handleGet(w http.ResponseWriter, key string) {
    val, ok := s.store.Get(key)
    if !ok {
        http.Error(w, "not found", http.StatusNotFound)
        return
    }
    w.Write([]byte(val))
}

func (s *Server) handlePut(w http.ResponseWriter, r *http.Request, key string) {
    body, err := io.ReadAll(r.Body)
    if err != nil {
        http.Error(w, err.Error(), http.StatusBadRequest)
        return
    }
    if err := s.store.Set(key, string(body)); err != nil {
        http.Error(w, err.Error(), http.StatusInternalServerError)
        return
    }
    w.WriteHeader(http.StatusOK)
}

func (s *Server) handleDelete(w http.ResponseWriter, key string) {
    if err := s.store.Delete(key); err != nil {
        http.Error(w, err.Error(), http.StatusInternalServerError)
        return
    }
    w.WriteHeader(http.StatusOK)
}

Wire it up in main:

func main() {
    store, err := NewPersistentStore("kv.log")
    if err != nil {
        log.Fatal(err)
    }
    defer store.Close()

    server := NewServer(store)
    http.Handle("/kv/", server)

    log.Println("listening on :8080")
    log.Fatal(http.ListenAndServe(":8080", nil))
}

Now you can interact with it:

# Set a value
curl -X PUT -d "world" http://localhost:8080/kv/hello

# Get it back
curl http://localhost:8080/kv/hello
# world

# Delete it
curl -X DELETE http://localhost:8080/kv/hello

Benchmarks

How much does persistence cost? I benchmarked both implementations with 10,000 sequential writes:

| Implementation | Writes/sec | Latency (p99) |
|----------------|------------|---------------|
| In-Memory      | 2,847,000  | 1.2µs         |
| With WAL       | 12,400     | 890µs         |

The WAL adds ~200x overhead. That’s the cost of fsync - waiting for the disk to confirm the write. In practice, you can batch writes or use a faster disk (NVMe helps significantly).

For reads, both implementations are identical since they just read from the in-memory map.

What’s Next

We have a working single-node KV store with durability. But what happens when that node fails? All your data becomes unavailable.

In the next post, we’ll distribute this store across multiple nodes using Raft consensus. Each write will be replicated to a majority of nodes before being acknowledged, so the system survives node failures. We’ll cover:

Leader election
Log replication
Consistency guarantees
The tradeoffs between availability and consistency

The interface will stay the same - clients won’t know they’re talking to a distributed system.