Add text-to-speech via Piper TTS

Send text message → Piper TTS → WAV → OGG Opus → voice reply. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-19 13:24:41 +03:00
parent 213eabb026
commit 86acdfab28
4 changed files with 154 additions and 19 deletions
--- a/src/main.zig
+++ b/src/main.zig
@@ -1,6 +1,7 @@
 const std = @import("std");
 const telegram = @import("telegram.zig");
 const whisper = @import("whisper.zig");
+const piper = @import("piper.zig");

 const log = std.log.scoped(.transcribator);

@@ -15,11 +16,12 @@ pub fn main() !void {
    };
    const whisper_url = std.posix.getenv("WHISPER_URL") orelse "http://whisper.whisper.svc:8000";
    const language = std.posix.getenv("WHISPER_LANGUAGE") orelse "ru";
+    const piper_url = std.posix.getenv("PIPER_URL") orelse "http://piper.piper.svc:5000";

    var bot = try telegram.TelegramBot.init(allocator, token);
    defer bot.deinit();

-    log.info("Bot started. Whisper: {s}, language: {s}", .{ whisper_url, language });
+    log.info("Bot started. Whisper: {s}, language: {s}, Piper: {s}", .{ whisper_url, language, piper_url });

    var offset: i64 = 0;

@@ -33,7 +35,7 @@ pub fn main() !void {

        for (updates.parsed.value.result) |update| {
            offset = update.update_id + 1;
-            processUpdate(allocator, &bot, update, whisper_url, language);
+            processUpdate(allocator, &bot, update, whisper_url, language, piper_url);
        }
    }
 }
@@ -44,30 +46,39 @@ fn processUpdate(
    update: telegram.Update,
    whisper_url: []const u8,
    language: []const u8,
+    piper_url: []const u8,
 ) void {
    const message = update.message orelse return;

-    var file_id: ?[]const u8 = null;
-    var is_video = false;
-
+    // Voice / video_note → transcription
    if (message.voice) |voice| {
-        file_id = voice.file_id;
-    } else if (message.video_note) |vn| {
-        file_id = vn.file_id;
-        is_video = true;
+        log.info("Processing voice message in chat {d}", .{message.chat.id});
+        handleTranscription(allocator, bot, message, voice.file_id, false, whisper_url, language) catch |err| {
+            log.err("Transcription failed: {s}", .{@errorName(err)});
+            bot.sendMessage(message.chat.id, "Transcription failed.", message.message_id) catch {};
+        };
+        return;
    }

-    const fid = file_id orelse return;
+    if (message.video_note) |vn| {
+        log.info("Processing video_note message in chat {d}", .{message.chat.id});
+        handleTranscription(allocator, bot, message, vn.file_id, true, whisper_url, language) catch |err| {
+            log.err("Transcription failed: {s}", .{@errorName(err)});
+            bot.sendMessage(message.chat.id, "Transcription failed.", message.message_id) catch {};
+        };
+        return;
+    }

-    log.info("Processing {s} message in chat {d}", .{
-        if (is_video) @as([]const u8, "video_note") else @as([]const u8, "voice"),
-        message.chat.id,
-    });
-
-    handleTranscription(allocator, bot, message, fid, is_video, whisper_url, language) catch |err| {
-        log.err("Transcription failed: {s}", .{@errorName(err)});
-        bot.sendMessage(message.chat.id, "Transcription failed.", message.message_id) catch {};
-    };
+    // Text message → TTS (skip commands starting with /)
+    if (message.text) |text| {
+        if (text.len > 0 and text[0] != '/') {
+            log.info("Processing TTS for text message in chat {d}", .{message.chat.id});
+            handleTTS(allocator, bot, message, text, piper_url) catch |err| {
+                log.err("TTS failed: {s}", .{@errorName(err)});
+                bot.sendMessage(message.chat.id, "TTS failed.", message.message_id) catch {};
+            };
+        }
+    }
 }

 fn handleTranscription(
@@ -140,3 +151,23 @@ fn handleTranscription(
    }
    log.info("Step 4 done", .{});
 }
+
+fn handleTTS(
+    allocator: std.mem.Allocator,
+    bot: *telegram.TelegramBot,
+    message: telegram.Message,
+    text: []const u8,
+    piper_url: []const u8,
+) !void {
+    log.info("TTS step 1: synthesize", .{});
+    const ogg_path = try piper.synthesize(allocator, piper_url, text, message.message_id);
+    defer {
+        std.fs.deleteFileAbsolute(ogg_path) catch {};
+        allocator.free(ogg_path);
+    }
+    log.info("TTS step 1 done: {s}", .{ogg_path});
+
+    log.info("TTS step 2: sendVoice", .{});
+    try bot.sendVoice(message.chat.id, ogg_path, message.message_id);
+    log.info("TTS step 2 done", .{});
+}