Initial commit: Telegram voice/video transcription bot in Zig

Long-polling bot that accepts voice messages and video notes, sends them to Whisper STT API, and replies with transcription text. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-18 15:32:04 +03:00
commit 819b28a672
9 changed files with 550 additions and 0 deletions
--- a/src/main.zig
+++ b/src/main.zig
@@ -0,0 +1,131 @@
+const std = @import("std");
+const telegram = @import("telegram.zig");
+const whisper = @import("whisper.zig");
+
+const log = std.log.scoped(.transcribator);
+
+pub fn main() !void {
+    var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init;
+    defer _ = gpa.deinit();
+    const allocator = gpa.allocator();
+
+    const token = std.posix.getenv("TELEGRAM_BOT_TOKEN") orelse {
+        log.err("TELEGRAM_BOT_TOKEN is not set", .{});
+        return error.MissingToken;
+    };
+    const whisper_url = std.posix.getenv("WHISPER_URL") orelse "http://whisper.whisper.svc:8000";
+    const language = std.posix.getenv("WHISPER_LANGUAGE") orelse "ru";
+
+    var bot = try telegram.TelegramBot.init(allocator, token);
+    defer bot.deinit();
+
+    log.info("Bot started. Whisper: {s}, language: {s}", .{ whisper_url, language });
+
+    var offset: i64 = 0;
+
+    while (true) {
+        const updates = bot.getUpdates(offset, 30) catch |err| {
+            log.err("getUpdates failed: {s}", .{@errorName(err)});
+            std.Thread.sleep(5 * std.time.ns_per_s);
+            continue;
+        };
+        defer updates.deinit();
+
+        for (updates.value.result) |update| {
+            offset = update.update_id + 1;
+            processUpdate(allocator, &bot, update, whisper_url, language);
+        }
+    }
+}
+
+fn processUpdate(
+    allocator: std.mem.Allocator,
+    bot: *telegram.TelegramBot,
+    update: telegram.Update,
+    whisper_url: []const u8,
+    language: []const u8,
+) void {
+    const message = update.message orelse return;
+
+    var file_id: ?[]const u8 = null;
+    var is_video = false;
+
+    if (message.voice) |voice| {
+        file_id = voice.file_id;
+    } else if (message.video_note) |vn| {
+        file_id = vn.file_id;
+        is_video = true;
+    }
+
+    const fid = file_id orelse return;
+
+    log.info("Processing {s} message in chat {d}", .{
+        if (is_video) @as([]const u8, "video_note") else @as([]const u8, "voice"),
+        message.chat.id,
+    });
+
+    handleTranscription(allocator, bot, message, fid, is_video, whisper_url, language) catch |err| {
+        log.err("Transcription failed: {s}", .{@errorName(err)});
+        bot.sendMessage(message.chat.id, "Transcription failed.", message.message_id) catch {};
+    };
+}
+
+fn handleTranscription(
+    allocator: std.mem.Allocator,
+    bot: *telegram.TelegramBot,
+    message: telegram.Message,
+    file_id: []const u8,
+    is_video: bool,
+    whisper_url: []const u8,
+    language: []const u8,
+) !void {
+    // Get file path from Telegram
+    const file_path = try bot.getFilePath(file_id);
+    defer allocator.free(file_path);
+
+    // Download file
+    const ext: []const u8 = if (is_video) ".mp4" else ".ogg";
+    const tmp_input = try std.fmt.allocPrint(allocator, "/tmp/tg_{d}{s}", .{ message.message_id, ext });
+    defer allocator.free(tmp_input);
+
+    try bot.downloadFile(file_path, tmp_input);
+    defer std.fs.deleteFileAbsolute(tmp_input) catch {};
+
+    // Convert video to audio if needed
+    var audio_path = tmp_input;
+    var tmp_audio: ?[]u8 = null;
+    defer if (tmp_audio) |p| {
+        allocator.free(p);
+        std.fs.deleteFileAbsolute(p) catch {};
+    };
+
+    if (is_video) {
+        const out_path = try std.fmt.allocPrint(allocator, "/tmp/tg_{d}.ogg", .{message.message_id});
+        tmp_audio = out_path;
+
+        var child = std.process.Child.init(
+            &.{ "ffmpeg", "-y", "-i", tmp_input, "-vn", "-codec:a", "libopus", out_path },
+            allocator,
+        );
+        child.stderr_behavior = .Ignore;
+        child.stdout_behavior = .Ignore;
+        const term = try child.spawnAndWait();
+
+        if (term.Exited != 0) {
+            log.err("ffmpeg failed with exit code {d}", .{term.Exited});
+            return error.HttpRequestFailed;
+        }
+
+        audio_path = out_path;
+    }
+
+    // Transcribe
+    const text = try whisper.transcribe(allocator, whisper_url, audio_path, language);
+    defer allocator.free(text);
+
+    if (text.len == 0) {
+        try bot.sendMessage(message.chat.id, "(empty transcription)", message.message_id);
+    } else {
+        try bot.sendMessage(message.chat.id, text, message.message_id);
+    }
+}