Add /speak command for multi-voice TTS in group chats

- /speak <voice> enables TTS with selected voice (irina, denis, dmitri) - /speak stop disables TTS in the chat - Private chats keep always-on TTS behavior - Add PIPER_VOICES env var for voice-to-URL mapping - Add chat type field to distinguish private/group chats Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-20 00:56:30 +03:00
parent 88c328b5d0
commit 17eb61b259
3 changed files with 132 additions and 13 deletions
--- a/k8s/transcribator.yaml
+++ b/k8s/transcribator.yaml
@@ -42,6 +42,8 @@ spec:
              value: "ru"
            - name: PIPER_URL
              value: "http://piper.piper.svc:5000"
+            - name: PIPER_VOICES
+              value: "irina=http://piper.piper.svc:5000,denis=http://piper-denis.piper.svc:5000,dmitri=http://piper-dmitri.piper.svc:5000"
          resources:
            requests:
              memory: "32Mi"
--- a/src/main.zig
+++ b/src/main.zig
@@ -5,6 +5,9 @@ const piper = @import("piper.zig");

 const log = std.log.scoped(.transcribator);

+const VoiceMap = std.StringHashMap([]const u8);
+const ActiveChats = std.AutoHashMap(i64, []const u8);
+
 pub fn main() !void {
    var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init;
    defer _ = gpa.deinit();
@@ -16,12 +19,20 @@ pub fn main() !void {
    };
    const whisper_url = std.posix.getenv("WHISPER_URL") orelse "http://whisper.whisper.svc:8000";
    const language = std.posix.getenv("WHISPER_LANGUAGE") orelse "ru";
-    const piper_url = std.posix.getenv("PIPER_URL") orelse "http://piper.piper.svc:5000";
+    const voices_env = std.posix.getenv("PIPER_VOICES") orelse "";
+    const default_piper_url = std.posix.getenv("PIPER_URL") orelse "";
+
+    var voices = VoiceMap.init(allocator);
+    defer voices.deinit();
+    parseVoices(&voices, voices_env);
+
+    var active_chats = ActiveChats.init(allocator);
+    defer active_chats.deinit();

    var bot = try telegram.TelegramBot.init(allocator, token);
    defer bot.deinit();

-    log.info("Bot started. Whisper: {s}, language: {s}, Piper: {s}", .{ whisper_url, language, piper_url });
+    log.info("Bot started. Whisper: {s}, language: {s}, voices: {d}", .{ whisper_url, language, voices.count() });

    var offset: i64 = 0;

@@ -35,18 +46,35 @@ pub fn main() !void {

        for (updates.parsed.value.result) |update| {
            offset = update.update_id + 1;
-            processUpdate(allocator, &bot, update, whisper_url, language, piper_url);
+            processUpdate(allocator, &bot, update, whisper_url, language, &voices, &active_chats, default_piper_url);
        }
    }
 }

+fn parseVoices(map: *VoiceMap, env: []const u8) void {
+    if (env.len == 0) return;
+    var iter = std.mem.splitScalar(u8, env, ',');
+    while (iter.next()) |entry| {
+        const trimmed = std.mem.trim(u8, entry, " ");
+        if (std.mem.indexOfScalar(u8, trimmed, '=')) |sep| {
+            map.put(trimmed[0..sep], trimmed[sep + 1..]) catch {};
+        }
+    }
+}
+
+fn isPrivateChat(chat_type: []const u8) bool {
+    return std.mem.eql(u8, chat_type, "private");
+}
+
 fn processUpdate(
    allocator: std.mem.Allocator,
    bot: *telegram.TelegramBot,
    update: telegram.Update,
    whisper_url: []const u8,
    language: []const u8,
-    piper_url: []const u8,
+    voices: *VoiceMap,
+    active_chats: *ActiveChats,
+    default_piper_url: []const u8,
 ) void {
    const message = update.message orelse return;

@@ -69,11 +97,31 @@ fn processUpdate(
        return;
    }

-    // Text message → TTS (skip commands starting with /)
+    // Text message
    if (message.text) |text| {
-        if (text.len > 0 and text[0] != '/') {
+        if (text.len == 0) return;
+
+        // Commands
+        if (text[0] == '/') {
+            handleCommand(allocator, bot, message, text, voices, active_chats);
+            return;
+        }
+
+        // TTS: check active speak mode or private chat default
+        var piper_url: ?[]const u8 = active_chats.get(message.chat.id);
+
+        if (piper_url == null and isPrivateChat(message.chat.@"type")) {
+            if (default_piper_url.len > 0) {
+                piper_url = default_piper_url;
+            } else {
+                var it = voices.valueIterator();
+                if (it.next()) |v| piper_url = v.*;
+            }
+        }
+
+        if (piper_url) |url| {
            log.info("Processing TTS for text message in chat {d}", .{message.chat.id});
-            handleTTS(allocator, bot, message, text, piper_url) catch |err| {
+            handleTTS(allocator, bot, message, text, url) catch |err| {
                log.err("TTS failed: {s}", .{@errorName(err)});
                bot.sendMessage(message.chat.id, "TTS failed.", message.message_id) catch {};
            };
@@ -81,6 +129,72 @@ fn processUpdate(
    }
 }

+fn handleCommand(
+    allocator: std.mem.Allocator,
+    bot: *telegram.TelegramBot,
+    message: telegram.Message,
+    text: []const u8,
+    voices: *VoiceMap,
+    active_chats: *ActiveChats,
+) void {
+    // Only handle /speak command
+    if (!std.mem.startsWith(u8, text, "/speak")) return;
+
+    var rest = text["/speak".len..];
+
+    // Skip @botname suffix (e.g. /speak@my_bot denis)
+    if (rest.len > 0 and rest[0] == '@') {
+        if (std.mem.indexOfScalar(u8, rest, ' ')) |space| {
+            rest = rest[space..];
+        } else {
+            rest = "";
+        }
+    }
+
+    const arg = std.mem.trim(u8, rest, " ");
+
+    // /speak stop or /speak (no args) → disable TTS
+    if (arg.len == 0 or std.mem.eql(u8, arg, "stop")) {
+        const was_active = active_chats.remove(message.chat.id);
+        if (was_active) {
+            bot.sendMessage(message.chat.id, "TTS отключён.", message.message_id) catch {};
+        } else {
+            // No active TTS, show available voices
+            const reply = buildVoiceListMessage(allocator, voices, "Доступные голоса: ") catch return;
+            defer allocator.free(reply);
+            bot.sendMessage(message.chat.id, reply, message.message_id) catch {};
+        }
+        return;
+    }
+
+    // /speak <voice> → enable TTS with specified voice
+    if (voices.get(arg)) |url| {
+        active_chats.put(message.chat.id, url) catch {};
+        const reply = std.fmt.allocPrint(allocator, "TTS включён, голос: {s}", .{arg}) catch return;
+        defer allocator.free(reply);
+        bot.sendMessage(message.chat.id, reply, message.message_id) catch {};
+    } else {
+        const reply = buildVoiceListMessage(allocator, voices, "Неизвестный голос. Доступные: ") catch return;
+        defer allocator.free(reply);
+        bot.sendMessage(message.chat.id, reply, message.message_id) catch {};
+    }
+}
+
+fn buildVoiceListMessage(allocator: std.mem.Allocator, voices: *VoiceMap, prefix: []const u8) ![]u8 {
+    var buf: std.ArrayList(u8) = .empty;
+    defer buf.deinit(allocator);
+
+    try buf.appendSlice(allocator, prefix);
+    var first = true;
+    var it = voices.iterator();
+    while (it.next()) |entry| {
+        if (!first) try buf.appendSlice(allocator, ", ");
+        try buf.appendSlice(allocator, entry.key_ptr.*);
+        first = false;
+    }
+    return buf.toOwnedSlice(allocator);
+}
+
 fn handleTranscription(
    allocator: std.mem.Allocator,
    bot: *telegram.TelegramBot,
@@ -138,16 +252,16 @@ fn handleTranscription(

    // Transcribe
    log.info("Step 3: transcribe {s}", .{audio_path});
-    const text = try whisper.transcribe(allocator, whisper_url, audio_path, language);
-    defer allocator.free(text);
-    log.info("Step 3 done, text length: {d}", .{text.len});
+    const transcribed_text = try whisper.transcribe(allocator, whisper_url, audio_path, language);
+    defer allocator.free(transcribed_text);
+    log.info("Step 3 done, text length: {d}", .{transcribed_text.len});

    // Send response
    log.info("Step 4: sendMessage", .{});
-    if (text.len == 0) {
+    if (transcribed_text.len == 0) {
        try bot.sendMessage(message.chat.id, "(empty transcription)", message.message_id);
    } else {
-        try bot.sendMessage(message.chat.id, text, message.message_id);
+        try bot.sendMessage(message.chat.id, transcribed_text, message.message_id);
    }
    log.info("Step 4 done", .{});
 }
--- a/src/telegram.zig
+++ b/src/telegram.zig
@@ -14,7 +14,10 @@ pub const VideoNote = struct {

 pub const Message = struct {
    message_id: i64,
-    chat: struct { id: i64 },
+    chat: struct {
+        id: i64,
+        @"type": []const u8 = "private",
+    },
    text: ?[]const u8 = null,
    voice: ?Voice = null,
    video_note: ?VideoNote = null,