const std = @import("std"); const telegram = @import("telegram.zig"); const whisper = @import("whisper.zig"); const piper = @import("piper.zig"); const log = std.log.scoped(.transcribator); pub fn main() !void { var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init; defer _ = gpa.deinit(); const allocator = gpa.allocator(); const token = std.posix.getenv("TELEGRAM_BOT_TOKEN") orelse { log.err("TELEGRAM_BOT_TOKEN is not set", .{}); return error.MissingToken; }; const whisper_url = std.posix.getenv("WHISPER_URL") orelse "http://whisper.whisper.svc:8000"; const language = std.posix.getenv("WHISPER_LANGUAGE") orelse "ru"; const piper_url = std.posix.getenv("PIPER_URL") orelse "http://piper.piper.svc:5000"; var bot = try telegram.TelegramBot.init(allocator, token); defer bot.deinit(); log.info("Bot started. Whisper: {s}, language: {s}, Piper: {s}", .{ whisper_url, language, piper_url }); var offset: i64 = 0; while (true) { var updates = bot.getUpdates(offset, 30) catch |err| { log.err("getUpdates failed: {s}", .{@errorName(err)}); std.Thread.sleep(5 * std.time.ns_per_s); continue; }; defer updates.deinit(); for (updates.parsed.value.result) |update| { offset = update.update_id + 1; processUpdate(allocator, &bot, update, whisper_url, language, piper_url); } } } fn processUpdate( allocator: std.mem.Allocator, bot: *telegram.TelegramBot, update: telegram.Update, whisper_url: []const u8, language: []const u8, piper_url: []const u8, ) void { const message = update.message orelse return; // Voice / video_note → transcription if (message.voice) |voice| { log.info("Processing voice message in chat {d}", .{message.chat.id}); handleTranscription(allocator, bot, message, voice.file_id, false, whisper_url, language) catch |err| { log.err("Transcription failed: {s}", .{@errorName(err)}); bot.sendMessage(message.chat.id, "Transcription failed.", message.message_id) catch {}; }; return; } if (message.video_note) |vn| { log.info("Processing video_note message in chat {d}", .{message.chat.id}); handleTranscription(allocator, bot, message, vn.file_id, true, whisper_url, language) catch |err| { log.err("Transcription failed: {s}", .{@errorName(err)}); bot.sendMessage(message.chat.id, "Transcription failed.", message.message_id) catch {}; }; return; } // Text message → TTS (skip commands starting with /) if (message.text) |text| { if (text.len > 0 and text[0] != '/') { log.info("Processing TTS for text message in chat {d}", .{message.chat.id}); handleTTS(allocator, bot, message, text, piper_url) catch |err| { log.err("TTS failed: {s}", .{@errorName(err)}); bot.sendMessage(message.chat.id, "TTS failed.", message.message_id) catch {}; }; } } } fn handleTranscription( allocator: std.mem.Allocator, bot: *telegram.TelegramBot, message: telegram.Message, file_id: []const u8, is_video: bool, whisper_url: []const u8, language: []const u8, ) !void { // Get file path from Telegram log.info("Step 1: getFilePath", .{}); const file_path = try bot.getFilePath(file_id); defer allocator.free(file_path); log.info("Step 1 done: {s}", .{file_path}); // Download file const ext: []const u8 = if (is_video) ".mp4" else ".ogg"; const tmp_input = try std.fmt.allocPrint(allocator, "/tmp/tg_{d}{s}", .{ message.message_id, ext }); defer allocator.free(tmp_input); log.info("Step 2: downloadFile to {s}", .{tmp_input}); try bot.downloadFile(file_path, tmp_input); log.info("Step 2 done", .{}); defer std.fs.deleteFileAbsolute(tmp_input) catch {}; // Convert video to audio if needed var audio_path = tmp_input; var tmp_audio: ?[]u8 = null; defer if (tmp_audio) |p| { std.fs.deleteFileAbsolute(p) catch {}; allocator.free(p); }; if (is_video) { log.info("Step 2.5: ffmpeg conversion", .{}); const out_path = try std.fmt.allocPrint(allocator, "/tmp/tg_{d}.ogg", .{message.message_id}); tmp_audio = out_path; var child = std.process.Child.init( &.{ "ffmpeg", "-y", "-i", tmp_input, "-vn", "-codec:a", "libopus", out_path }, allocator, ); child.stderr_behavior = .Ignore; child.stdout_behavior = .Ignore; const term = try child.spawnAndWait(); if (term.Exited != 0) { log.err("ffmpeg failed with exit code {d}", .{term.Exited}); return error.HttpRequestFailed; } audio_path = out_path; log.info("Step 2.5 done", .{}); } // Transcribe log.info("Step 3: transcribe {s}", .{audio_path}); const text = try whisper.transcribe(allocator, whisper_url, audio_path, language); defer allocator.free(text); log.info("Step 3 done, text length: {d}", .{text.len}); // Send response log.info("Step 4: sendMessage", .{}); if (text.len == 0) { try bot.sendMessage(message.chat.id, "(empty transcription)", message.message_id); } else { try bot.sendMessage(message.chat.id, text, message.message_id); } log.info("Step 4 done", .{}); } fn handleTTS( allocator: std.mem.Allocator, bot: *telegram.TelegramBot, message: telegram.Message, text: []const u8, piper_url: []const u8, ) !void { log.info("TTS step 1: synthesize", .{}); const ogg_path = try piper.synthesize(allocator, piper_url, text, message.message_id); defer { std.fs.deleteFileAbsolute(ogg_path) catch {}; allocator.free(ogg_path); } log.info("TTS step 1 done: {s}", .{ogg_path}); log.info("TTS step 2: sendVoice", .{}); try bot.sendVoice(message.chat.id, ogg_path, message.message_id); log.info("TTS step 2 done", .{}); }