Fix segfault: keep JSON body alive for parsed string refs
- Root cause: parseFromSlice returns slices pointing into the raw JSON body, but body was freed immediately after parsing in getUpdates - Fix: return OwnedParsed wrapper that keeps both parsed result and raw body alive together - Switch HTTP layer to curl subprocess (more reliable than std.http with static linking) - Fix downloadToFile: use spawnAndWait instead of collectOutput (collectOutput requires both pipes) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,4 +1,4 @@
|
|||||||
FROM alpine:3.21
|
FROM alpine:3.21
|
||||||
RUN apk add --no-cache ffmpeg ca-certificates
|
RUN apk add --no-cache ffmpeg curl ca-certificates
|
||||||
COPY zig-out/bin/transcribator /usr/local/bin/
|
COPY zig-out/bin/transcribator /usr/local/bin/
|
||||||
CMD ["transcribator"]
|
CMD ["transcribator"]
|
||||||
|
|||||||
172
src/http.zig
172
src/http.zig
@@ -1,41 +1,64 @@
|
|||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
const Allocator = std.mem.Allocator;
|
const Allocator = std.mem.Allocator;
|
||||||
|
|
||||||
|
const log = std.log.scoped(.transcribator);
|
||||||
|
|
||||||
|
/// Execute curl and return stdout as owned slice
|
||||||
|
fn runCurl(allocator: Allocator, args: []const []const u8) ![]u8 {
|
||||||
|
var child = std.process.Child.init(args, allocator);
|
||||||
|
child.stdout_behavior = .Pipe;
|
||||||
|
child.stderr_behavior = .Pipe;
|
||||||
|
|
||||||
|
child.spawn() catch |err| {
|
||||||
|
log.err("curl spawn failed: {s}", .{@errorName(err)});
|
||||||
|
return error.HttpRequestFailed;
|
||||||
|
};
|
||||||
|
|
||||||
|
var stdout_list: std.ArrayList(u8) = .empty;
|
||||||
|
defer stdout_list.deinit(allocator);
|
||||||
|
var stderr_list: std.ArrayList(u8) = .empty;
|
||||||
|
defer stderr_list.deinit(allocator);
|
||||||
|
|
||||||
|
child.collectOutput(allocator, &stdout_list, &stderr_list, 100 * 1024 * 1024) catch |err| {
|
||||||
|
log.err("curl collectOutput failed: {s}", .{@errorName(err)});
|
||||||
|
return error.HttpRequestFailed;
|
||||||
|
};
|
||||||
|
const term = child.wait() catch |err| {
|
||||||
|
log.err("curl wait failed: {s}", .{@errorName(err)});
|
||||||
|
return error.HttpRequestFailed;
|
||||||
|
};
|
||||||
|
|
||||||
|
switch (term) {
|
||||||
|
.Exited => |code| {
|
||||||
|
if (code != 0) {
|
||||||
|
log.err("curl exited with code {d}", .{code});
|
||||||
|
if (stderr_list.items.len > 0) {
|
||||||
|
log.err("curl stderr: {s}", .{stderr_list.items});
|
||||||
|
}
|
||||||
|
return error.HttpRequestFailed;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
else => {
|
||||||
|
log.err("curl terminated abnormally", .{});
|
||||||
|
return error.HttpRequestFailed;
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
return allocator.dupe(u8, stdout_list.items);
|
||||||
|
}
|
||||||
|
|
||||||
pub fn httpGet(allocator: Allocator, url: []const u8) ![]u8 {
|
pub fn httpGet(allocator: Allocator, url: []const u8) ![]u8 {
|
||||||
var client: std.http.Client = .{ .allocator = allocator };
|
return runCurl(allocator, &.{ "curl", "-sf", "--max-time", "60", url });
|
||||||
defer client.deinit();
|
|
||||||
|
|
||||||
var aw: std.Io.Writer.Allocating = .init(allocator);
|
|
||||||
defer aw.deinit();
|
|
||||||
|
|
||||||
const result = client.fetch(.{
|
|
||||||
.location = .{ .url = url },
|
|
||||||
.response_writer = &aw.writer,
|
|
||||||
}) catch return error.HttpRequestFailed;
|
|
||||||
|
|
||||||
if (result.status != .ok) return error.HttpRequestFailed;
|
|
||||||
|
|
||||||
return aw.toOwnedSlice() catch return error.OutOfMemory;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn httpPostJson(allocator: Allocator, url: []const u8, body: []const u8) ![]u8 {
|
pub fn httpPostJson(allocator: Allocator, url: []const u8, body: []const u8) ![]u8 {
|
||||||
var client: std.http.Client = .{ .allocator = allocator };
|
return runCurl(allocator, &.{
|
||||||
defer client.deinit();
|
"curl", "-sf", "--max-time", "30",
|
||||||
|
"-X", "POST",
|
||||||
var aw: std.Io.Writer.Allocating = .init(allocator);
|
"-H", "Content-Type: application/json",
|
||||||
defer aw.deinit();
|
"-d", body,
|
||||||
|
url,
|
||||||
const result = client.fetch(.{
|
});
|
||||||
.location = .{ .url = url },
|
|
||||||
.method = .POST,
|
|
||||||
.payload = body,
|
|
||||||
.headers = .{ .content_type = .{ .override = "application/json" } },
|
|
||||||
.response_writer = &aw.writer,
|
|
||||||
}) catch return error.HttpRequestFailed;
|
|
||||||
|
|
||||||
if (result.status != .ok) return error.HttpRequestFailed;
|
|
||||||
|
|
||||||
return aw.toOwnedSlice() catch return error.OutOfMemory;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn httpPostMultipart(
|
pub fn httpPostMultipart(
|
||||||
@@ -43,62 +66,61 @@ pub fn httpPostMultipart(
|
|||||||
url: []const u8,
|
url: []const u8,
|
||||||
file_field: []const u8,
|
file_field: []const u8,
|
||||||
file_path: []const u8,
|
file_path: []const u8,
|
||||||
file_name: []const u8,
|
_: []const u8, // file_name (curl handles it)
|
||||||
fields: []const [2][]const u8,
|
fields: []const [2][]const u8,
|
||||||
) ![]u8 {
|
) ![]u8 {
|
||||||
const boundary = "----ZigMultipartBoundary9876543210";
|
// Build curl args dynamically
|
||||||
|
var args: std.ArrayList([]const u8) = .empty;
|
||||||
|
defer args.deinit(allocator);
|
||||||
|
|
||||||
// Build multipart body
|
try args.appendSlice(allocator, &.{ "curl", "-sf", "--max-time", "120" });
|
||||||
var body_writer: std.Io.Writer.Allocating = .init(allocator);
|
|
||||||
defer body_writer.deinit();
|
// Track allocated strings to free later
|
||||||
|
var alloc_strings: std.ArrayList([]u8) = .empty;
|
||||||
|
defer {
|
||||||
|
for (alloc_strings.items) |s| allocator.free(s);
|
||||||
|
alloc_strings.deinit(allocator);
|
||||||
|
}
|
||||||
|
|
||||||
// Add form fields
|
// Add form fields
|
||||||
for (fields) |field| {
|
for (fields) |field| {
|
||||||
body_writer.writer.print("--{s}\r\nContent-Disposition: form-data; name=\"{s}\"\r\n\r\n{s}\r\n", .{ boundary, field[0], field[1] }) catch return error.OutOfMemory;
|
try args.append(allocator, "-F");
|
||||||
|
const field_arg = try std.fmt.allocPrint(allocator, "{s}={s}", .{ field[0], field[1] });
|
||||||
|
try alloc_strings.append(allocator, field_arg);
|
||||||
|
try args.append(allocator, field_arg);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add file field header
|
// Add file field
|
||||||
body_writer.writer.print("--{s}\r\nContent-Disposition: form-data; name=\"{s}\"; filename=\"{s}\"\r\nContent-Type: application/octet-stream\r\n\r\n", .{ boundary, file_field, file_name }) catch return error.OutOfMemory;
|
try args.append(allocator, "-F");
|
||||||
|
const file_arg = try std.fmt.allocPrint(allocator, "{s}=@{s}", .{ file_field, file_path });
|
||||||
|
try alloc_strings.append(allocator, file_arg);
|
||||||
|
try args.append(allocator, file_arg);
|
||||||
|
|
||||||
// Read and append file content
|
try args.append(allocator, url);
|
||||||
const file = std.fs.openFileAbsolute(file_path, .{}) catch return error.HttpRequestFailed;
|
|
||||||
defer file.close();
|
|
||||||
const file_content = file.readToEndAlloc(allocator, 100 * 1024 * 1024) catch return error.HttpRequestFailed;
|
|
||||||
defer allocator.free(file_content);
|
|
||||||
|
|
||||||
body_writer.writer.writeAll(file_content) catch return error.OutOfMemory;
|
return runCurl(allocator, args.items);
|
||||||
body_writer.writer.print("\r\n--{s}--\r\n", .{boundary}) catch return error.OutOfMemory;
|
|
||||||
|
|
||||||
const body = body_writer.written();
|
|
||||||
|
|
||||||
// Send request
|
|
||||||
var client: std.http.Client = .{ .allocator = allocator };
|
|
||||||
defer client.deinit();
|
|
||||||
|
|
||||||
var response_aw: std.Io.Writer.Allocating = .init(allocator);
|
|
||||||
defer response_aw.deinit();
|
|
||||||
|
|
||||||
const content_type = std.fmt.allocPrint(allocator, "multipart/form-data; boundary={s}", .{boundary}) catch return error.OutOfMemory;
|
|
||||||
defer allocator.free(content_type);
|
|
||||||
|
|
||||||
const result = client.fetch(.{
|
|
||||||
.location = .{ .url = url },
|
|
||||||
.method = .POST,
|
|
||||||
.payload = body,
|
|
||||||
.headers = .{ .content_type = .{ .override = content_type } },
|
|
||||||
.response_writer = &response_aw.writer,
|
|
||||||
}) catch return error.HttpRequestFailed;
|
|
||||||
|
|
||||||
if (result.status != .ok) return error.HttpRequestFailed;
|
|
||||||
|
|
||||||
return response_aw.toOwnedSlice() catch return error.OutOfMemory;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn downloadToFile(allocator: Allocator, url: []const u8, dest_path: []const u8) !void {
|
pub fn downloadToFile(allocator: Allocator, url: []const u8, dest_path: []const u8) !void {
|
||||||
const data = try httpGet(allocator, url);
|
var child = std.process.Child.init(
|
||||||
defer allocator.free(data);
|
&.{ "curl", "-sf", "--max-time", "60", "-o", dest_path, url },
|
||||||
|
allocator,
|
||||||
|
);
|
||||||
|
child.stdout_behavior = .Ignore;
|
||||||
|
child.stderr_behavior = .Ignore;
|
||||||
|
|
||||||
const file = std.fs.createFileAbsolute(dest_path, .{}) catch return error.HttpRequestFailed;
|
const term = try child.spawnAndWait();
|
||||||
defer file.close();
|
|
||||||
file.writeAll(data) catch return error.HttpRequestFailed;
|
switch (term) {
|
||||||
|
.Exited => |code| {
|
||||||
|
if (code != 0) {
|
||||||
|
log.err("curl download failed with code {d}", .{code});
|
||||||
|
return error.HttpRequestFailed;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
else => {
|
||||||
|
log.err("curl download terminated abnormally", .{});
|
||||||
|
return error.HttpRequestFailed;
|
||||||
|
},
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
15
src/main.zig
15
src/main.zig
@@ -24,14 +24,14 @@ pub fn main() !void {
|
|||||||
var offset: i64 = 0;
|
var offset: i64 = 0;
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
const updates = bot.getUpdates(offset, 30) catch |err| {
|
var updates = bot.getUpdates(offset, 30) catch |err| {
|
||||||
log.err("getUpdates failed: {s}", .{@errorName(err)});
|
log.err("getUpdates failed: {s}", .{@errorName(err)});
|
||||||
std.Thread.sleep(5 * std.time.ns_per_s);
|
std.Thread.sleep(5 * std.time.ns_per_s);
|
||||||
continue;
|
continue;
|
||||||
};
|
};
|
||||||
defer updates.deinit();
|
defer updates.deinit();
|
||||||
|
|
||||||
for (updates.value.result) |update| {
|
for (updates.parsed.value.result) |update| {
|
||||||
offset = update.update_id + 1;
|
offset = update.update_id + 1;
|
||||||
processUpdate(allocator, &bot, update, whisper_url, language);
|
processUpdate(allocator, &bot, update, whisper_url, language);
|
||||||
}
|
}
|
||||||
@@ -80,15 +80,19 @@ fn handleTranscription(
|
|||||||
language: []const u8,
|
language: []const u8,
|
||||||
) !void {
|
) !void {
|
||||||
// Get file path from Telegram
|
// Get file path from Telegram
|
||||||
|
log.info("Step 1: getFilePath", .{});
|
||||||
const file_path = try bot.getFilePath(file_id);
|
const file_path = try bot.getFilePath(file_id);
|
||||||
defer allocator.free(file_path);
|
defer allocator.free(file_path);
|
||||||
|
log.info("Step 1 done: {s}", .{file_path});
|
||||||
|
|
||||||
// Download file
|
// Download file
|
||||||
const ext: []const u8 = if (is_video) ".mp4" else ".ogg";
|
const ext: []const u8 = if (is_video) ".mp4" else ".ogg";
|
||||||
const tmp_input = try std.fmt.allocPrint(allocator, "/tmp/tg_{d}{s}", .{ message.message_id, ext });
|
const tmp_input = try std.fmt.allocPrint(allocator, "/tmp/tg_{d}{s}", .{ message.message_id, ext });
|
||||||
defer allocator.free(tmp_input);
|
defer allocator.free(tmp_input);
|
||||||
|
|
||||||
|
log.info("Step 2: downloadFile to {s}", .{tmp_input});
|
||||||
try bot.downloadFile(file_path, tmp_input);
|
try bot.downloadFile(file_path, tmp_input);
|
||||||
|
log.info("Step 2 done", .{});
|
||||||
defer std.fs.deleteFileAbsolute(tmp_input) catch {};
|
defer std.fs.deleteFileAbsolute(tmp_input) catch {};
|
||||||
|
|
||||||
// Convert video to audio if needed
|
// Convert video to audio if needed
|
||||||
@@ -100,6 +104,7 @@ fn handleTranscription(
|
|||||||
};
|
};
|
||||||
|
|
||||||
if (is_video) {
|
if (is_video) {
|
||||||
|
log.info("Step 2.5: ffmpeg conversion", .{});
|
||||||
const out_path = try std.fmt.allocPrint(allocator, "/tmp/tg_{d}.ogg", .{message.message_id});
|
const out_path = try std.fmt.allocPrint(allocator, "/tmp/tg_{d}.ogg", .{message.message_id});
|
||||||
tmp_audio = out_path;
|
tmp_audio = out_path;
|
||||||
|
|
||||||
@@ -117,15 +122,21 @@ fn handleTranscription(
|
|||||||
}
|
}
|
||||||
|
|
||||||
audio_path = out_path;
|
audio_path = out_path;
|
||||||
|
log.info("Step 2.5 done", .{});
|
||||||
}
|
}
|
||||||
|
|
||||||
// Transcribe
|
// Transcribe
|
||||||
|
log.info("Step 3: transcribe {s}", .{audio_path});
|
||||||
const text = try whisper.transcribe(allocator, whisper_url, audio_path, language);
|
const text = try whisper.transcribe(allocator, whisper_url, audio_path, language);
|
||||||
defer allocator.free(text);
|
defer allocator.free(text);
|
||||||
|
log.info("Step 3 done, text length: {d}", .{text.len});
|
||||||
|
|
||||||
|
// Send response
|
||||||
|
log.info("Step 4: sendMessage", .{});
|
||||||
if (text.len == 0) {
|
if (text.len == 0) {
|
||||||
try bot.sendMessage(message.chat.id, "(empty transcription)", message.message_id);
|
try bot.sendMessage(message.chat.id, "(empty transcription)", message.message_id);
|
||||||
} else {
|
} else {
|
||||||
try bot.sendMessage(message.chat.id, text, message.message_id);
|
try bot.sendMessage(message.chat.id, text, message.message_id);
|
||||||
}
|
}
|
||||||
|
log.info("Step 4 done", .{});
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -42,6 +42,21 @@ pub const SendMessageBody = struct {
|
|||||||
reply_to_message_id: ?i64 = null,
|
reply_to_message_id: ?i64 = null,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// Parsed JSON result that also owns the raw JSON body.
|
||||||
|
/// Must call deinit() to free both.
|
||||||
|
pub fn OwnedParsed(comptime T: type) type {
|
||||||
|
return struct {
|
||||||
|
parsed: std.json.Parsed(T),
|
||||||
|
raw_body: []u8,
|
||||||
|
allocator: Allocator,
|
||||||
|
|
||||||
|
pub fn deinit(self: *@This()) void {
|
||||||
|
self.parsed.deinit();
|
||||||
|
self.allocator.free(self.raw_body);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
pub const TelegramBot = struct {
|
pub const TelegramBot = struct {
|
||||||
allocator: Allocator,
|
allocator: Allocator,
|
||||||
token: []const u8,
|
token: []const u8,
|
||||||
@@ -60,14 +75,22 @@ pub const TelegramBot = struct {
|
|||||||
self.allocator.free(self.api_base);
|
self.allocator.free(self.api_base);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn getUpdates(self: *TelegramBot, offset: i64, timeout: u32) !std.json.Parsed(GetUpdatesResponse) {
|
pub fn getUpdates(self: *TelegramBot, offset: i64, timeout: u32) !OwnedParsed(GetUpdatesResponse) {
|
||||||
const url = try std.fmt.allocPrint(self.allocator, "{s}/getUpdates?offset={d}&timeout={d}&allowed_updates=[\"message\"]", .{ self.api_base, offset, timeout });
|
const url = try std.fmt.allocPrint(self.allocator, "{s}/getUpdates?offset={d}&timeout={d}", .{ self.api_base, offset, timeout });
|
||||||
defer self.allocator.free(url);
|
defer self.allocator.free(url);
|
||||||
|
|
||||||
const body = try http.httpGet(self.allocator, url);
|
const body = try http.httpGet(self.allocator, url);
|
||||||
defer self.allocator.free(body);
|
|
||||||
|
|
||||||
return std.json.parseFromSlice(GetUpdatesResponse, self.allocator, body, .{ .ignore_unknown_fields = true });
|
const parsed = std.json.parseFromSlice(GetUpdatesResponse, self.allocator, body, .{ .ignore_unknown_fields = true }) catch {
|
||||||
|
self.allocator.free(body);
|
||||||
|
return error.HttpRequestFailed;
|
||||||
|
};
|
||||||
|
|
||||||
|
return .{
|
||||||
|
.parsed = parsed,
|
||||||
|
.raw_body = body,
|
||||||
|
.allocator = self.allocator,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn getFilePath(self: *TelegramBot, file_id: []const u8) ![]u8 {
|
pub fn getFilePath(self: *TelegramBot, file_id: []const u8) ![]u8 {
|
||||||
|
|||||||
Reference in New Issue
Block a user