commit 7b88c572a70246eeadbbd4bb5e1b4f1fae2ef514
Author: tri <tri@thac.loan>
Date:   Sun Oct 5 10:02:13 2025 +0700

    implement KHOE_HISTORICAL_BLOBS
    
    When on, generate blobs that appear in old commits too.

diff --git a/src/config.zig b/src/config.zig
index 2fd3c0f..13e6adc 100644
--- a/src/config.zig
+++ b/src/config.zig
@@ -2,13 +2,43 @@ const std = @import("std");
 const mem = std.mem;
 const posix = std.posix;
 
+// Here are all environment variables used for config:
+
+/// Default 0.
+/// If 0, blob object pages that already exist will be skipped.
+/// If 1, they will be regenerated.
+/// Useful when there's been updates on blob page generation logic.
+const KHOE_FULL_REGEN = "KHOE_FULL_REGEN";
+
+/// TODO: not implemented yet
+/// Default empty.
+/// If non-empty, only specified repo's objects will be generated.
+/// Convenient when used as a post-update git hook, so that the repo being
+/// pushed stays updated.
+const KHOE_REPO = "KHOE_REPO";
+
+/// Default 0.
+/// If 0, only generate blobs for latest tree in HEAD.
+/// If 1, all blobs from all commits of HEAD branch will be generated.
+/// It's recommended to first run khoe with this on to generate the full set,
+/// then leave it off in subsequent runs to save time. As long as we don't
+/// delete anything, links to blob object pages will stay valid.
+const KHOE_HISTORICAL_BLOBS = "KHOE_HISTORICAL_BLOBS";
+
 pub const Conf = struct {
     full_regen: bool,
+    repo: ?[:0]const u8,
+    historical_blobs: bool,
 };
 
 pub fn fromEnv() Conf {
-    const full_regen = posix.getenv("KHOE_FULL_REGEN") orelse "0";
+    const full_regen = posix.getenv(KHOE_FULL_REGEN) orelse "0";
+    const repo = posix.getenv(KHOE_REPO);
+    const historical_blobs = posix.getenv(KHOE_HISTORICAL_BLOBS) orelse "0";
+
     return Conf{
         .full_regen = mem.eql(u8, full_regen, "1"),
+        .repo = repo,
+        .historical_blobs = mem.eql(u8, historical_blobs, "1"),
     };
 }
diff --git a/src/git.zig b/src/git.zig
index cb66c0f..4ae29ff 100644
--- a/src/git.zig
+++ b/src/git.zig
@@ -183,7 +183,7 @@ pub const Walker = struct {
     }
 };
 
-pub fn walkTree(arena: mem.Allocator, dir: fs.Dir) !Walker {
+pub fn walkTree(arena: mem.Allocator, dir: fs.Dir, tree_ref: []const u8) !Walker {
     var proc = try std.process.Child.run(.{
         .allocator = arena,
         .cwd_dir = dir,
@@ -191,7 +191,7 @@ pub fn walkTree(arena: mem.Allocator, dir: fs.Dir) !Walker {
         .argv = &.{
             "git",
             "ls-tree",
-            "HEAD",
+            tree_ref,
             "-r",
             "-z",
             "--format",
diff --git a/src/main.zig b/src/main.zig
index 994fd65..cbf5e23 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -12,33 +12,17 @@ const templates = @import("templates.zig");
 const constants = @import("constants.zig");
 const config = @import("config.zig");
 
-const Mode = union(enum) {
-    all: void,
-    single_repo: [*:0]const u8,
-};
-
 pub fn main() !u8 {
-    if (std.os.argv.len != 3 and std.os.argv.len != 4) {
-        println("Usage: khoe <dir> <site-url> [repo-name]", .{});
+    if (std.os.argv.len != 3) {
+        println("Usage: khoe <dir> <site-url>", .{});
         println(
             \\For example:
             \\    khoe /srv/git/repos https://khoe.thac.loan
-            \\    khoe /srv/git/repos https://khoe.thac.loan khoe.git
-            \\When repo-name is present, only the homepage and that repo are
-            \\regenerated.
         , .{});
         return 1;
     }
 
     const site_url = std.os.argv[2];
-    const mode: Mode =
-        if (std.os.argv.len == 3)
-            .all
-        else
-            .{ .single_repo = std.os.argv[3] };
-    _ = mode; // TODO
-
-    const conf = config.fromEnv();
 
     var dba_impl: std.heap.DebugAllocator(.{}) = .init;
     defer _ = dba_impl.deinit();
@@ -57,6 +41,8 @@ pub fn main() !u8 {
     defer arena_impl.deinit();
     const arena = arena_impl.allocator();
 
+    const conf = config.fromEnv();
+
     var repo_summaries: std.ArrayList(RepoSummary) = try .initCapacity(arena, 32);
     defer repo_summaries.deinit(arena);
 
@@ -244,15 +230,21 @@ pub fn processRepo(args: *const RepoArgs) !void {
 
     const maybe_readme_path = try git.findReadme(arena, args.in_repo_dir);
 
-    try writeRepoPage(args, out_repo_dir, maybe_readme_path);
+    var processed_blob_hashes = std.StringHashMap(bool).init(arena);
+
+    try writeRepoPage(args, out_repo_dir, maybe_readme_path, &processed_blob_hashes);
     try writeReadmePage(args, out_repo_dir, maybe_readme_path);
     try writeCommitsPage(args, out_repo_dir, maybe_readme_path);
+    if (args.conf.historical_blobs) {
+        try writeHistoricalBlobPages(args, out_repo_dir, &processed_blob_hashes);
+    }
 }
 
 pub fn writeRepoPage(
     args: *const RepoArgs,
     out_repo_dir: fs.Dir,
     maybe_readme_filename: ?[]const u8,
+    processed_blob_hashes: *std.StringHashMap(bool),
 ) !void {
     const arena = args.arena;
 
@@ -306,7 +298,7 @@ pub fn writeRepoPage(
     var objects_dir = try out_repo_dir.makeOpenPath(constants.web_objects_path, .{});
     defer objects_dir.close();
 
-    var treeWalker = try git.walkTree(arena, args.in_repo_dir);
+    var treeWalker = try git.walkTree(arena, args.in_repo_dir, "HEAD");
     while (try treeWalker.next()) |src_file| {
         try writer.print(
             \\<tr>
@@ -322,6 +314,10 @@ pub fn writeRepoPage(
         });
 
         try writeBlobPage(args, objects_dir, src_file);
+
+        // Report that this blob has already been generated so we don't do
+        // duplicate work later:
+        try processed_blob_hashes.put(src_file.hash, true);
     }
 
     try writer.writeAll(
@@ -618,6 +614,33 @@ pub fn writeCommitPage(
     try writer.flush();
 }
 
+/// For each historical commit, iterate its blobs and generate if necessary
+pub fn writeHistoricalBlobPages(
+    args: *const RepoArgs,
+    out_repo_dir: fs.Dir,
+    processed_blob_hashes: *std.StringHashMap(bool),
+) !void {
+    const arena = args.arena;
+
+    if (args.commits.len <= 1) return;
+    // skip latest commit because it has already been processed in
+    // writeRepoPage()
+    const commits = args.commits[1..];
+
+    var objects_dir = try out_repo_dir.makeOpenPath(constants.web_objects_path, .{});
+    defer objects_dir.close();
+
+    for (commits) |cmt| {
+        var treeWalker = try git.walkTree(arena, args.in_repo_dir, cmt.hash);
+        while (try treeWalker.next()) |src_file| {
+            if (processed_blob_hashes.get(src_file.hash) == null) {
+                try writeBlobPage(args, objects_dir, src_file);
+                try processed_blob_hashes.put(src_file.hash, true);
+            }
+        }
+    }
+}
+
 pub fn writeBlobPage(
     args: *const RepoArgs,
     objects_dir: fs.Dir,