Fix segfault: keep JSON body alive for parsed string refs

- Root cause: parseFromSlice returns slices pointing into the raw JSON
  body, but body was freed immediately after parsing in getUpdates
- Fix: return OwnedParsed wrapper that keeps both parsed result and
  raw body alive together
- Switch HTTP layer to curl subprocess (more reliable than std.http
  with static linking)
- Fix downloadToFile: use spawnAndWait instead of collectOutput
  (collectOutput requires both pipes)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Mikhail Kilin
2026-02-18 16:39:52 +03:00
parent 89f98f02be
commit 01bf41c61c
4 changed files with 138 additions and 82 deletions

View File

@@ -1,4 +1,4 @@
FROM alpine:3.21
RUN apk add --no-cache ffmpeg ca-certificates
RUN apk add --no-cache ffmpeg curl ca-certificates
COPY zig-out/bin/transcribator /usr/local/bin/
CMD ["transcribator"]

View File

@@ -1,41 +1,64 @@
const std = @import("std");
const Allocator = std.mem.Allocator;
const log = std.log.scoped(.transcribator);
/// Execute curl and return stdout as owned slice
fn runCurl(allocator: Allocator, args: []const []const u8) ![]u8 {
var child = std.process.Child.init(args, allocator);
child.stdout_behavior = .Pipe;
child.stderr_behavior = .Pipe;
child.spawn() catch |err| {
log.err("curl spawn failed: {s}", .{@errorName(err)});
return error.HttpRequestFailed;
};
var stdout_list: std.ArrayList(u8) = .empty;
defer stdout_list.deinit(allocator);
var stderr_list: std.ArrayList(u8) = .empty;
defer stderr_list.deinit(allocator);
child.collectOutput(allocator, &stdout_list, &stderr_list, 100 * 1024 * 1024) catch |err| {
log.err("curl collectOutput failed: {s}", .{@errorName(err)});
return error.HttpRequestFailed;
};
const term = child.wait() catch |err| {
log.err("curl wait failed: {s}", .{@errorName(err)});
return error.HttpRequestFailed;
};
switch (term) {
.Exited => |code| {
if (code != 0) {
log.err("curl exited with code {d}", .{code});
if (stderr_list.items.len > 0) {
log.err("curl stderr: {s}", .{stderr_list.items});
}
return error.HttpRequestFailed;
}
},
else => {
log.err("curl terminated abnormally", .{});
return error.HttpRequestFailed;
},
}
return allocator.dupe(u8, stdout_list.items);
}
pub fn httpGet(allocator: Allocator, url: []const u8) ![]u8 {
var client: std.http.Client = .{ .allocator = allocator };
defer client.deinit();
var aw: std.Io.Writer.Allocating = .init(allocator);
defer aw.deinit();
const result = client.fetch(.{
.location = .{ .url = url },
.response_writer = &aw.writer,
}) catch return error.HttpRequestFailed;
if (result.status != .ok) return error.HttpRequestFailed;
return aw.toOwnedSlice() catch return error.OutOfMemory;
return runCurl(allocator, &.{ "curl", "-sf", "--max-time", "60", url });
}
pub fn httpPostJson(allocator: Allocator, url: []const u8, body: []const u8) ![]u8 {
var client: std.http.Client = .{ .allocator = allocator };
defer client.deinit();
var aw: std.Io.Writer.Allocating = .init(allocator);
defer aw.deinit();
const result = client.fetch(.{
.location = .{ .url = url },
.method = .POST,
.payload = body,
.headers = .{ .content_type = .{ .override = "application/json" } },
.response_writer = &aw.writer,
}) catch return error.HttpRequestFailed;
if (result.status != .ok) return error.HttpRequestFailed;
return aw.toOwnedSlice() catch return error.OutOfMemory;
return runCurl(allocator, &.{
"curl", "-sf", "--max-time", "30",
"-X", "POST",
"-H", "Content-Type: application/json",
"-d", body,
url,
});
}
pub fn httpPostMultipart(
@@ -43,62 +66,61 @@ pub fn httpPostMultipart(
url: []const u8,
file_field: []const u8,
file_path: []const u8,
file_name: []const u8,
_: []const u8, // file_name (curl handles it)
fields: []const [2][]const u8,
) ![]u8 {
const boundary = "----ZigMultipartBoundary9876543210";
// Build curl args dynamically
var args: std.ArrayList([]const u8) = .empty;
defer args.deinit(allocator);
// Build multipart body
var body_writer: std.Io.Writer.Allocating = .init(allocator);
defer body_writer.deinit();
try args.appendSlice(allocator, &.{ "curl", "-sf", "--max-time", "120" });
// Track allocated strings to free later
var alloc_strings: std.ArrayList([]u8) = .empty;
defer {
for (alloc_strings.items) |s| allocator.free(s);
alloc_strings.deinit(allocator);
}
// Add form fields
for (fields) |field| {
body_writer.writer.print("--{s}\r\nContent-Disposition: form-data; name=\"{s}\"\r\n\r\n{s}\r\n", .{ boundary, field[0], field[1] }) catch return error.OutOfMemory;
try args.append(allocator, "-F");
const field_arg = try std.fmt.allocPrint(allocator, "{s}={s}", .{ field[0], field[1] });
try alloc_strings.append(allocator, field_arg);
try args.append(allocator, field_arg);
}
// Add file field header
body_writer.writer.print("--{s}\r\nContent-Disposition: form-data; name=\"{s}\"; filename=\"{s}\"\r\nContent-Type: application/octet-stream\r\n\r\n", .{ boundary, file_field, file_name }) catch return error.OutOfMemory;
// Add file field
try args.append(allocator, "-F");
const file_arg = try std.fmt.allocPrint(allocator, "{s}=@{s}", .{ file_field, file_path });
try alloc_strings.append(allocator, file_arg);
try args.append(allocator, file_arg);
// Read and append file content
const file = std.fs.openFileAbsolute(file_path, .{}) catch return error.HttpRequestFailed;
defer file.close();
const file_content = file.readToEndAlloc(allocator, 100 * 1024 * 1024) catch return error.HttpRequestFailed;
defer allocator.free(file_content);
try args.append(allocator, url);
body_writer.writer.writeAll(file_content) catch return error.OutOfMemory;
body_writer.writer.print("\r\n--{s}--\r\n", .{boundary}) catch return error.OutOfMemory;
const body = body_writer.written();
// Send request
var client: std.http.Client = .{ .allocator = allocator };
defer client.deinit();
var response_aw: std.Io.Writer.Allocating = .init(allocator);
defer response_aw.deinit();
const content_type = std.fmt.allocPrint(allocator, "multipart/form-data; boundary={s}", .{boundary}) catch return error.OutOfMemory;
defer allocator.free(content_type);
const result = client.fetch(.{
.location = .{ .url = url },
.method = .POST,
.payload = body,
.headers = .{ .content_type = .{ .override = content_type } },
.response_writer = &response_aw.writer,
}) catch return error.HttpRequestFailed;
if (result.status != .ok) return error.HttpRequestFailed;
return response_aw.toOwnedSlice() catch return error.OutOfMemory;
return runCurl(allocator, args.items);
}
pub fn downloadToFile(allocator: Allocator, url: []const u8, dest_path: []const u8) !void {
const data = try httpGet(allocator, url);
defer allocator.free(data);
var child = std.process.Child.init(
&.{ "curl", "-sf", "--max-time", "60", "-o", dest_path, url },
allocator,
);
child.stdout_behavior = .Ignore;
child.stderr_behavior = .Ignore;
const file = std.fs.createFileAbsolute(dest_path, .{}) catch return error.HttpRequestFailed;
defer file.close();
file.writeAll(data) catch return error.HttpRequestFailed;
const term = try child.spawnAndWait();
switch (term) {
.Exited => |code| {
if (code != 0) {
log.err("curl download failed with code {d}", .{code});
return error.HttpRequestFailed;
}
},
else => {
log.err("curl download terminated abnormally", .{});
return error.HttpRequestFailed;
},
}
}

View File

@@ -24,14 +24,14 @@ pub fn main() !void {
var offset: i64 = 0;
while (true) {
const updates = bot.getUpdates(offset, 30) catch |err| {
var updates = bot.getUpdates(offset, 30) catch |err| {
log.err("getUpdates failed: {s}", .{@errorName(err)});
std.Thread.sleep(5 * std.time.ns_per_s);
continue;
};
defer updates.deinit();
for (updates.value.result) |update| {
for (updates.parsed.value.result) |update| {
offset = update.update_id + 1;
processUpdate(allocator, &bot, update, whisper_url, language);
}
@@ -80,15 +80,19 @@ fn handleTranscription(
language: []const u8,
) !void {
// Get file path from Telegram
log.info("Step 1: getFilePath", .{});
const file_path = try bot.getFilePath(file_id);
defer allocator.free(file_path);
log.info("Step 1 done: {s}", .{file_path});
// Download file
const ext: []const u8 = if (is_video) ".mp4" else ".ogg";
const tmp_input = try std.fmt.allocPrint(allocator, "/tmp/tg_{d}{s}", .{ message.message_id, ext });
defer allocator.free(tmp_input);
log.info("Step 2: downloadFile to {s}", .{tmp_input});
try bot.downloadFile(file_path, tmp_input);
log.info("Step 2 done", .{});
defer std.fs.deleteFileAbsolute(tmp_input) catch {};
// Convert video to audio if needed
@@ -100,6 +104,7 @@ fn handleTranscription(
};
if (is_video) {
log.info("Step 2.5: ffmpeg conversion", .{});
const out_path = try std.fmt.allocPrint(allocator, "/tmp/tg_{d}.ogg", .{message.message_id});
tmp_audio = out_path;
@@ -117,15 +122,21 @@ fn handleTranscription(
}
audio_path = out_path;
log.info("Step 2.5 done", .{});
}
// Transcribe
log.info("Step 3: transcribe {s}", .{audio_path});
const text = try whisper.transcribe(allocator, whisper_url, audio_path, language);
defer allocator.free(text);
log.info("Step 3 done, text length: {d}", .{text.len});
// Send response
log.info("Step 4: sendMessage", .{});
if (text.len == 0) {
try bot.sendMessage(message.chat.id, "(empty transcription)", message.message_id);
} else {
try bot.sendMessage(message.chat.id, text, message.message_id);
}
log.info("Step 4 done", .{});
}

View File

@@ -42,6 +42,21 @@ pub const SendMessageBody = struct {
reply_to_message_id: ?i64 = null,
};
/// Parsed JSON result that also owns the raw JSON body.
/// Must call deinit() to free both.
pub fn OwnedParsed(comptime T: type) type {
return struct {
parsed: std.json.Parsed(T),
raw_body: []u8,
allocator: Allocator,
pub fn deinit(self: *@This()) void {
self.parsed.deinit();
self.allocator.free(self.raw_body);
}
};
}
pub const TelegramBot = struct {
allocator: Allocator,
token: []const u8,
@@ -60,14 +75,22 @@ pub const TelegramBot = struct {
self.allocator.free(self.api_base);
}
pub fn getUpdates(self: *TelegramBot, offset: i64, timeout: u32) !std.json.Parsed(GetUpdatesResponse) {
const url = try std.fmt.allocPrint(self.allocator, "{s}/getUpdates?offset={d}&timeout={d}&allowed_updates=[\"message\"]", .{ self.api_base, offset, timeout });
pub fn getUpdates(self: *TelegramBot, offset: i64, timeout: u32) !OwnedParsed(GetUpdatesResponse) {
const url = try std.fmt.allocPrint(self.allocator, "{s}/getUpdates?offset={d}&timeout={d}", .{ self.api_base, offset, timeout });
defer self.allocator.free(url);
const body = try http.httpGet(self.allocator, url);
defer self.allocator.free(body);
return std.json.parseFromSlice(GetUpdatesResponse, self.allocator, body, .{ .ignore_unknown_fields = true });
const parsed = std.json.parseFromSlice(GetUpdatesResponse, self.allocator, body, .{ .ignore_unknown_fields = true }) catch {
self.allocator.free(body);
return error.HttpRequestFailed;
};
return .{
.parsed = parsed,
.raw_body = body,
.allocator = self.allocator,
};
}
pub fn getFilePath(self: *TelegramBot, file_id: []const u8) ![]u8 {