transcribator/src/main.zig

const std = @import("std");
const telegram = @import("telegram.zig");
const whisper = @import("whisper.zig");

const log = std.log.scoped(.transcribator);

pub fn main() !void {
    var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init;
    defer _ = gpa.deinit();
    const allocator = gpa.allocator();

    const token = std.posix.getenv("TELEGRAM_BOT_TOKEN") orelse {
        log.err("TELEGRAM_BOT_TOKEN is not set", .{});
        return error.MissingToken;
    };
    const whisper_url = std.posix.getenv("WHISPER_URL") orelse "http://whisper.whisper.svc:8000";
    const language = std.posix.getenv("WHISPER_LANGUAGE") orelse "ru";

    var bot = try telegram.TelegramBot.init(allocator, token);
    defer bot.deinit();

    log.info("Bot started. Whisper: {s}, language: {s}", .{ whisper_url, language });

    var offset: i64 = 0;

    while (true) {
        var updates = bot.getUpdates(offset, 30) catch |err| {
            log.err("getUpdates failed: {s}", .{@errorName(err)});
            std.Thread.sleep(5 * std.time.ns_per_s);
            continue;
        };
        defer updates.deinit();

        for (updates.parsed.value.result) |update| {
            offset = update.update_id + 1;
            processUpdate(allocator, &bot, update, whisper_url, language);
        }
    }
}

fn processUpdate(
    allocator: std.mem.Allocator,
    bot: *telegram.TelegramBot,
    update: telegram.Update,
    whisper_url: []const u8,
    language: []const u8,
) void {
    const message = update.message orelse return;

    var file_id: ?[]const u8 = null;
    var is_video = false;

    if (message.voice) |voice| {
        file_id = voice.file_id;
    } else if (message.video_note) |vn| {
        file_id = vn.file_id;
        is_video = true;
    }

    const fid = file_id orelse return;

    log.info("Processing {s} message in chat {d}", .{
        if (is_video) @as([]const u8, "video_note") else @as([]const u8, "voice"),
        message.chat.id,
    });

    handleTranscription(allocator, bot, message, fid, is_video, whisper_url, language) catch |err| {
        log.err("Transcription failed: {s}", .{@errorName(err)});
        bot.sendMessage(message.chat.id, "Transcription failed.", message.message_id) catch {};
    };
}

fn handleTranscription(
    allocator: std.mem.Allocator,
    bot: *telegram.TelegramBot,
    message: telegram.Message,
    file_id: []const u8,
    is_video: bool,
    whisper_url: []const u8,
    language: []const u8,
) !void {
    // Get file path from Telegram
    log.info("Step 1: getFilePath", .{});
    const file_path = try bot.getFilePath(file_id);
    defer allocator.free(file_path);
    log.info("Step 1 done: {s}", .{file_path});

    // Download file
    const ext: []const u8 = if (is_video) ".mp4" else ".ogg";
    const tmp_input = try std.fmt.allocPrint(allocator, "/tmp/tg_{d}{s}", .{ message.message_id, ext });
    defer allocator.free(tmp_input);

    log.info("Step 2: downloadFile to {s}", .{tmp_input});
    try bot.downloadFile(file_path, tmp_input);
    log.info("Step 2 done", .{});
    defer std.fs.deleteFileAbsolute(tmp_input) catch {};

    // Convert video to audio if needed
    var audio_path = tmp_input;
    var tmp_audio: ?[]u8 = null;
    defer if (tmp_audio) |p| {
        allocator.free(p);
        std.fs.deleteFileAbsolute(p) catch {};
    };

    if (is_video) {
        log.info("Step 2.5: ffmpeg conversion", .{});
        const out_path = try std.fmt.allocPrint(allocator, "/tmp/tg_{d}.ogg", .{message.message_id});
        tmp_audio = out_path;

        var child = std.process.Child.init(
            &.{ "ffmpeg", "-y", "-i", tmp_input, "-vn", "-codec:a", "libopus", out_path },
            allocator,
        );
        child.stderr_behavior = .Ignore;
        child.stdout_behavior = .Ignore;
        const term = try child.spawnAndWait();

        if (term.Exited != 0) {
            log.err("ffmpeg failed with exit code {d}", .{term.Exited});
            return error.HttpRequestFailed;
        }

        audio_path = out_path;
        log.info("Step 2.5 done", .{});
    }

    // Transcribe
    log.info("Step 3: transcribe {s}", .{audio_path});
    const text = try whisper.transcribe(allocator, whisper_url, audio_path, language);
    defer allocator.free(text);
    log.info("Step 3 done, text length: {d}", .{text.len});

    // Send response
    log.info("Step 4: sendMessage", .{});
    if (text.len == 0) {
        try bot.sendMessage(message.chat.id, "(empty transcription)", message.message_id);
    } else {
        try bot.sendMessage(message.chat.id, text, message.message_id);
    }
    log.info("Step 4 done", .{});
}