download patch
commit 7b88c572a70246eeadbbd4bb5e1b4f1fae2ef514
Author: tri <tri@thac.loan>
Date: Sun Oct 5 10:02:13 2025 +0700
implement KHOE_HISTORICAL_BLOBS
When on, generate blobs that appear in old commits too.
diff --git a/src/config.zig b/src/config.zig
index 2fd3c0f..13e6adc 100644
--- a/src/config.zig
+++ b/src/config.zig
const std = @import("std");
const mem = std.mem;
const posix = std.posix;
+// Here are all environment variables used for config:
+
+/// Default 0.
+/// If 0, blob object pages that already exist will be skipped.
+/// If 1, they will be regenerated.
+/// Useful when there's been updates on blob page generation logic.
+const KHOE_FULL_REGEN = "KHOE_FULL_REGEN";
+
+/// TODO: not implemented yet
+/// Default empty.
+/// If non-empty, only specified repo's objects will be generated.
+/// Convenient when used as a post-update git hook, so that the repo being
+/// pushed stays updated.
+const KHOE_REPO = "KHOE_REPO";
+
+/// Default 0.
+/// If 0, only generate blobs for latest tree in HEAD.
+/// If 1, all blobs from all commits of HEAD branch will be generated.
+/// It's recommended to first run khoe with this on to generate the full set,
+/// then leave it off in subsequent runs to save time. As long as we don't
+/// delete anything, links to blob object pages will stay valid.
+const KHOE_HISTORICAL_BLOBS = "KHOE_HISTORICAL_BLOBS";
+
pub const Conf = struct {
full_regen: bool,
+ repo: ?[:0]const u8,
+ historical_blobs: bool,
};
pub fn fromEnv() Conf {
- const full_regen = posix.getenv("KHOE_FULL_REGEN") orelse "0";
+ const full_regen = posix.getenv(KHOE_FULL_REGEN) orelse "0";
+ const repo = posix.getenv(KHOE_REPO);
+ const historical_blobs = posix.getenv(KHOE_HISTORICAL_BLOBS) orelse "0";
+
return Conf{
.full_regen = mem.eql(u8, full_regen, "1"),
+ .repo = repo,
+ .historical_blobs = mem.eql(u8, historical_blobs, "1"),
};
}
diff --git a/src/git.zig b/src/git.zig
index cb66c0f..4ae29ff 100644
--- a/src/git.zig
+++ b/src/git.zig
pub const Walker = struct {
}
};
-pub fn walkTree(arena: mem.Allocator, dir: fs.Dir) !Walker {
+pub fn walkTree(arena: mem.Allocator, dir: fs.Dir, tree_ref: []const u8) !Walker {
var proc = try std.process.Child.run(.{
.allocator = arena,
.cwd_dir = dir,
pub fn walkTree(arena: mem.Allocator, dir: fs.Dir) !Walker {
.argv = &.{
"git",
"ls-tree",
- "HEAD",
+ tree_ref,
"-r",
"-z",
"--format",
diff --git a/src/main.zig b/src/main.zig
index 994fd65..cbf5e23 100644
--- a/src/main.zig
+++ b/src/main.zig
const templates = @import("templates.zig");
const constants = @import("constants.zig");
const config = @import("config.zig");
-const Mode = union(enum) {
- all: void,
- single_repo: [*:0]const u8,
-};
-
pub fn main() !u8 {
- if (std.os.argv.len != 3 and std.os.argv.len != 4) {
- println("Usage: khoe <dir> <site-url> [repo-name]", .{});
+ if (std.os.argv.len != 3) {
+ println("Usage: khoe <dir> <site-url>", .{});
println(
\\For example:
\\ khoe /srv/git/repos https://khoe.thac.loan
- \\ khoe /srv/git/repos https://khoe.thac.loan khoe.git
- \\When repo-name is present, only the homepage and that repo are
- \\regenerated.
, .{});
return 1;
}
const site_url = std.os.argv[2];
- const mode: Mode =
- if (std.os.argv.len == 3)
- .all
- else
- .{ .single_repo = std.os.argv[3] };
- _ = mode; // TODO
-
- const conf = config.fromEnv();
var dba_impl: std.heap.DebugAllocator(.{}) = .init;
defer _ = dba_impl.deinit();
pub fn main() !u8 {
defer arena_impl.deinit();
const arena = arena_impl.allocator();
+ const conf = config.fromEnv();
+
var repo_summaries: std.ArrayList(RepoSummary) = try .initCapacity(arena, 32);
defer repo_summaries.deinit(arena);
pub fn processRepo(args: *const RepoArgs) !void {
const maybe_readme_path = try git.findReadme(arena, args.in_repo_dir);
- try writeRepoPage(args, out_repo_dir, maybe_readme_path);
+ var processed_blob_hashes = std.StringHashMap(bool).init(arena);
+
+ try writeRepoPage(args, out_repo_dir, maybe_readme_path, &processed_blob_hashes);
try writeReadmePage(args, out_repo_dir, maybe_readme_path);
try writeCommitsPage(args, out_repo_dir, maybe_readme_path);
+ if (args.conf.historical_blobs) {
+ try writeHistoricalBlobPages(args, out_repo_dir, &processed_blob_hashes);
+ }
}
pub fn writeRepoPage(
args: *const RepoArgs,
out_repo_dir: fs.Dir,
maybe_readme_filename: ?[]const u8,
+ processed_blob_hashes: *std.StringHashMap(bool),
) !void {
const arena = args.arena;
pub fn writeRepoPage(
var objects_dir = try out_repo_dir.makeOpenPath(constants.web_objects_path, .{});
defer objects_dir.close();
- var treeWalker = try git.walkTree(arena, args.in_repo_dir);
+ var treeWalker = try git.walkTree(arena, args.in_repo_dir, "HEAD");
while (try treeWalker.next()) |src_file| {
try writer.print(
\\<tr>
pub fn writeRepoPage(
});
try writeBlobPage(args, objects_dir, src_file);
+
+ // Report that this blob has already been generated so we don't do
+ // duplicate work later:
+ try processed_blob_hashes.put(src_file.hash, true);
}
try writer.writeAll(
pub fn writeCommitPage(
try writer.flush();
}
+/// For each historical commit, iterate its blobs and generate if necessary
+pub fn writeHistoricalBlobPages(
+ args: *const RepoArgs,
+ out_repo_dir: fs.Dir,
+ processed_blob_hashes: *std.StringHashMap(bool),
+) !void {
+ const arena = args.arena;
+
+ if (args.commits.len <= 1) return;
+ // skip latest commit because it has already been processed in
+ // writeRepoPage()
+ const commits = args.commits[1..];
+
+ var objects_dir = try out_repo_dir.makeOpenPath(constants.web_objects_path, .{});
+ defer objects_dir.close();
+
+ for (commits) |cmt| {
+ var treeWalker = try git.walkTree(arena, args.in_repo_dir, cmt.hash);
+ while (try treeWalker.next()) |src_file| {
+ if (processed_blob_hashes.get(src_file.hash) == null) {
+ try writeBlobPage(args, objects_dir, src_file);
+ try processed_blob_hashes.put(src_file.hash, true);
+ }
+ }
+ }
+}
+
pub fn writeBlobPage(
args: *const RepoArgs,
objects_dir: fs.Dir,