diff --git a/Dockerfile b/Dockerfile index a25821c..cf1b35c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ FROM alpine:3.21 -RUN apk add --no-cache ffmpeg ca-certificates +RUN apk add --no-cache ffmpeg curl ca-certificates COPY zig-out/bin/transcribator /usr/local/bin/ CMD ["transcribator"] diff --git a/src/http.zig b/src/http.zig index c973817..7ed637f 100644 --- a/src/http.zig +++ b/src/http.zig @@ -1,41 +1,64 @@ const std = @import("std"); const Allocator = std.mem.Allocator; +const log = std.log.scoped(.transcribator); + +/// Execute curl and return stdout as owned slice +fn runCurl(allocator: Allocator, args: []const []const u8) ![]u8 { + var child = std.process.Child.init(args, allocator); + child.stdout_behavior = .Pipe; + child.stderr_behavior = .Pipe; + + child.spawn() catch |err| { + log.err("curl spawn failed: {s}", .{@errorName(err)}); + return error.HttpRequestFailed; + }; + + var stdout_list: std.ArrayList(u8) = .empty; + defer stdout_list.deinit(allocator); + var stderr_list: std.ArrayList(u8) = .empty; + defer stderr_list.deinit(allocator); + + child.collectOutput(allocator, &stdout_list, &stderr_list, 100 * 1024 * 1024) catch |err| { + log.err("curl collectOutput failed: {s}", .{@errorName(err)}); + return error.HttpRequestFailed; + }; + const term = child.wait() catch |err| { + log.err("curl wait failed: {s}", .{@errorName(err)}); + return error.HttpRequestFailed; + }; + + switch (term) { + .Exited => |code| { + if (code != 0) { + log.err("curl exited with code {d}", .{code}); + if (stderr_list.items.len > 0) { + log.err("curl stderr: {s}", .{stderr_list.items}); + } + return error.HttpRequestFailed; + } + }, + else => { + log.err("curl terminated abnormally", .{}); + return error.HttpRequestFailed; + }, + } + + return allocator.dupe(u8, stdout_list.items); +} + pub fn httpGet(allocator: Allocator, url: []const u8) ![]u8 { - var client: std.http.Client = .{ .allocator = allocator }; - defer client.deinit(); - - var aw: std.Io.Writer.Allocating = .init(allocator); - defer aw.deinit(); - - const result = client.fetch(.{ - .location = .{ .url = url }, - .response_writer = &aw.writer, - }) catch return error.HttpRequestFailed; - - if (result.status != .ok) return error.HttpRequestFailed; - - return aw.toOwnedSlice() catch return error.OutOfMemory; + return runCurl(allocator, &.{ "curl", "-sf", "--max-time", "60", url }); } pub fn httpPostJson(allocator: Allocator, url: []const u8, body: []const u8) ![]u8 { - var client: std.http.Client = .{ .allocator = allocator }; - defer client.deinit(); - - var aw: std.Io.Writer.Allocating = .init(allocator); - defer aw.deinit(); - - const result = client.fetch(.{ - .location = .{ .url = url }, - .method = .POST, - .payload = body, - .headers = .{ .content_type = .{ .override = "application/json" } }, - .response_writer = &aw.writer, - }) catch return error.HttpRequestFailed; - - if (result.status != .ok) return error.HttpRequestFailed; - - return aw.toOwnedSlice() catch return error.OutOfMemory; + return runCurl(allocator, &.{ + "curl", "-sf", "--max-time", "30", + "-X", "POST", + "-H", "Content-Type: application/json", + "-d", body, + url, + }); } pub fn httpPostMultipart( @@ -43,62 +66,61 @@ pub fn httpPostMultipart( url: []const u8, file_field: []const u8, file_path: []const u8, - file_name: []const u8, + _: []const u8, // file_name (curl handles it) fields: []const [2][]const u8, ) ![]u8 { - const boundary = "----ZigMultipartBoundary9876543210"; + // Build curl args dynamically + var args: std.ArrayList([]const u8) = .empty; + defer args.deinit(allocator); - // Build multipart body - var body_writer: std.Io.Writer.Allocating = .init(allocator); - defer body_writer.deinit(); + try args.appendSlice(allocator, &.{ "curl", "-sf", "--max-time", "120" }); + + // Track allocated strings to free later + var alloc_strings: std.ArrayList([]u8) = .empty; + defer { + for (alloc_strings.items) |s| allocator.free(s); + alloc_strings.deinit(allocator); + } // Add form fields for (fields) |field| { - body_writer.writer.print("--{s}\r\nContent-Disposition: form-data; name=\"{s}\"\r\n\r\n{s}\r\n", .{ boundary, field[0], field[1] }) catch return error.OutOfMemory; + try args.append(allocator, "-F"); + const field_arg = try std.fmt.allocPrint(allocator, "{s}={s}", .{ field[0], field[1] }); + try alloc_strings.append(allocator, field_arg); + try args.append(allocator, field_arg); } - // Add file field header - body_writer.writer.print("--{s}\r\nContent-Disposition: form-data; name=\"{s}\"; filename=\"{s}\"\r\nContent-Type: application/octet-stream\r\n\r\n", .{ boundary, file_field, file_name }) catch return error.OutOfMemory; + // Add file field + try args.append(allocator, "-F"); + const file_arg = try std.fmt.allocPrint(allocator, "{s}=@{s}", .{ file_field, file_path }); + try alloc_strings.append(allocator, file_arg); + try args.append(allocator, file_arg); - // Read and append file content - const file = std.fs.openFileAbsolute(file_path, .{}) catch return error.HttpRequestFailed; - defer file.close(); - const file_content = file.readToEndAlloc(allocator, 100 * 1024 * 1024) catch return error.HttpRequestFailed; - defer allocator.free(file_content); + try args.append(allocator, url); - body_writer.writer.writeAll(file_content) catch return error.OutOfMemory; - body_writer.writer.print("\r\n--{s}--\r\n", .{boundary}) catch return error.OutOfMemory; - - const body = body_writer.written(); - - // Send request - var client: std.http.Client = .{ .allocator = allocator }; - defer client.deinit(); - - var response_aw: std.Io.Writer.Allocating = .init(allocator); - defer response_aw.deinit(); - - const content_type = std.fmt.allocPrint(allocator, "multipart/form-data; boundary={s}", .{boundary}) catch return error.OutOfMemory; - defer allocator.free(content_type); - - const result = client.fetch(.{ - .location = .{ .url = url }, - .method = .POST, - .payload = body, - .headers = .{ .content_type = .{ .override = content_type } }, - .response_writer = &response_aw.writer, - }) catch return error.HttpRequestFailed; - - if (result.status != .ok) return error.HttpRequestFailed; - - return response_aw.toOwnedSlice() catch return error.OutOfMemory; + return runCurl(allocator, args.items); } pub fn downloadToFile(allocator: Allocator, url: []const u8, dest_path: []const u8) !void { - const data = try httpGet(allocator, url); - defer allocator.free(data); + var child = std.process.Child.init( + &.{ "curl", "-sf", "--max-time", "60", "-o", dest_path, url }, + allocator, + ); + child.stdout_behavior = .Ignore; + child.stderr_behavior = .Ignore; - const file = std.fs.createFileAbsolute(dest_path, .{}) catch return error.HttpRequestFailed; - defer file.close(); - file.writeAll(data) catch return error.HttpRequestFailed; + const term = try child.spawnAndWait(); + + switch (term) { + .Exited => |code| { + if (code != 0) { + log.err("curl download failed with code {d}", .{code}); + return error.HttpRequestFailed; + } + }, + else => { + log.err("curl download terminated abnormally", .{}); + return error.HttpRequestFailed; + }, + } } diff --git a/src/main.zig b/src/main.zig index 112d6b1..7cee394 100644 --- a/src/main.zig +++ b/src/main.zig @@ -24,14 +24,14 @@ pub fn main() !void { var offset: i64 = 0; while (true) { - const updates = bot.getUpdates(offset, 30) catch |err| { + var updates = bot.getUpdates(offset, 30) catch |err| { log.err("getUpdates failed: {s}", .{@errorName(err)}); std.Thread.sleep(5 * std.time.ns_per_s); continue; }; defer updates.deinit(); - for (updates.value.result) |update| { + for (updates.parsed.value.result) |update| { offset = update.update_id + 1; processUpdate(allocator, &bot, update, whisper_url, language); } @@ -80,15 +80,19 @@ fn handleTranscription( language: []const u8, ) !void { // Get file path from Telegram + log.info("Step 1: getFilePath", .{}); const file_path = try bot.getFilePath(file_id); defer allocator.free(file_path); + log.info("Step 1 done: {s}", .{file_path}); // Download file const ext: []const u8 = if (is_video) ".mp4" else ".ogg"; const tmp_input = try std.fmt.allocPrint(allocator, "/tmp/tg_{d}{s}", .{ message.message_id, ext }); defer allocator.free(tmp_input); + log.info("Step 2: downloadFile to {s}", .{tmp_input}); try bot.downloadFile(file_path, tmp_input); + log.info("Step 2 done", .{}); defer std.fs.deleteFileAbsolute(tmp_input) catch {}; // Convert video to audio if needed @@ -100,6 +104,7 @@ fn handleTranscription( }; if (is_video) { + log.info("Step 2.5: ffmpeg conversion", .{}); const out_path = try std.fmt.allocPrint(allocator, "/tmp/tg_{d}.ogg", .{message.message_id}); tmp_audio = out_path; @@ -117,15 +122,21 @@ fn handleTranscription( } audio_path = out_path; + log.info("Step 2.5 done", .{}); } // Transcribe + log.info("Step 3: transcribe {s}", .{audio_path}); const text = try whisper.transcribe(allocator, whisper_url, audio_path, language); defer allocator.free(text); + log.info("Step 3 done, text length: {d}", .{text.len}); + // Send response + log.info("Step 4: sendMessage", .{}); if (text.len == 0) { try bot.sendMessage(message.chat.id, "(empty transcription)", message.message_id); } else { try bot.sendMessage(message.chat.id, text, message.message_id); } + log.info("Step 4 done", .{}); } diff --git a/src/telegram.zig b/src/telegram.zig index 437f60f..b6154d7 100644 --- a/src/telegram.zig +++ b/src/telegram.zig @@ -42,6 +42,21 @@ pub const SendMessageBody = struct { reply_to_message_id: ?i64 = null, }; +/// Parsed JSON result that also owns the raw JSON body. +/// Must call deinit() to free both. +pub fn OwnedParsed(comptime T: type) type { + return struct { + parsed: std.json.Parsed(T), + raw_body: []u8, + allocator: Allocator, + + pub fn deinit(self: *@This()) void { + self.parsed.deinit(); + self.allocator.free(self.raw_body); + } + }; +} + pub const TelegramBot = struct { allocator: Allocator, token: []const u8, @@ -60,14 +75,22 @@ pub const TelegramBot = struct { self.allocator.free(self.api_base); } - pub fn getUpdates(self: *TelegramBot, offset: i64, timeout: u32) !std.json.Parsed(GetUpdatesResponse) { - const url = try std.fmt.allocPrint(self.allocator, "{s}/getUpdates?offset={d}&timeout={d}&allowed_updates=[\"message\"]", .{ self.api_base, offset, timeout }); + pub fn getUpdates(self: *TelegramBot, offset: i64, timeout: u32) !OwnedParsed(GetUpdatesResponse) { + const url = try std.fmt.allocPrint(self.allocator, "{s}/getUpdates?offset={d}&timeout={d}", .{ self.api_base, offset, timeout }); defer self.allocator.free(url); const body = try http.httpGet(self.allocator, url); - defer self.allocator.free(body); - return std.json.parseFromSlice(GetUpdatesResponse, self.allocator, body, .{ .ignore_unknown_fields = true }); + const parsed = std.json.parseFromSlice(GetUpdatesResponse, self.allocator, body, .{ .ignore_unknown_fields = true }) catch { + self.allocator.free(body); + return error.HttpRequestFailed; + }; + + return .{ + .parsed = parsed, + .raw_body = body, + .allocator = self.allocator, + }; } pub fn getFilePath(self: *TelegramBot, file_id: []const u8) ![]u8 {