Add /speak command for multi-voice TTS in group chats
All checks were successful
ci/woodpecker/push/deploy Pipeline was successful
All checks were successful
ci/woodpecker/push/deploy Pipeline was successful
- /speak <voice> enables TTS with selected voice (irina, denis, dmitri) - /speak stop disables TTS in the chat - Private chats keep always-on TTS behavior - Add PIPER_VOICES env var for voice-to-URL mapping - Add chat type field to distinguish private/group chats Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -42,6 +42,8 @@ spec:
|
|||||||
value: "ru"
|
value: "ru"
|
||||||
- name: PIPER_URL
|
- name: PIPER_URL
|
||||||
value: "http://piper.piper.svc:5000"
|
value: "http://piper.piper.svc:5000"
|
||||||
|
- name: PIPER_VOICES
|
||||||
|
value: "irina=http://piper.piper.svc:5000,denis=http://piper-denis.piper.svc:5000,dmitri=http://piper-dmitri.piper.svc:5000"
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
memory: "32Mi"
|
memory: "32Mi"
|
||||||
|
|||||||
138
src/main.zig
138
src/main.zig
@@ -5,6 +5,9 @@ const piper = @import("piper.zig");
|
|||||||
|
|
||||||
const log = std.log.scoped(.transcribator);
|
const log = std.log.scoped(.transcribator);
|
||||||
|
|
||||||
|
const VoiceMap = std.StringHashMap([]const u8);
|
||||||
|
const ActiveChats = std.AutoHashMap(i64, []const u8);
|
||||||
|
|
||||||
pub fn main() !void {
|
pub fn main() !void {
|
||||||
var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init;
|
var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init;
|
||||||
defer _ = gpa.deinit();
|
defer _ = gpa.deinit();
|
||||||
@@ -16,12 +19,20 @@ pub fn main() !void {
|
|||||||
};
|
};
|
||||||
const whisper_url = std.posix.getenv("WHISPER_URL") orelse "http://whisper.whisper.svc:8000";
|
const whisper_url = std.posix.getenv("WHISPER_URL") orelse "http://whisper.whisper.svc:8000";
|
||||||
const language = std.posix.getenv("WHISPER_LANGUAGE") orelse "ru";
|
const language = std.posix.getenv("WHISPER_LANGUAGE") orelse "ru";
|
||||||
const piper_url = std.posix.getenv("PIPER_URL") orelse "http://piper.piper.svc:5000";
|
const voices_env = std.posix.getenv("PIPER_VOICES") orelse "";
|
||||||
|
const default_piper_url = std.posix.getenv("PIPER_URL") orelse "";
|
||||||
|
|
||||||
|
var voices = VoiceMap.init(allocator);
|
||||||
|
defer voices.deinit();
|
||||||
|
parseVoices(&voices, voices_env);
|
||||||
|
|
||||||
|
var active_chats = ActiveChats.init(allocator);
|
||||||
|
defer active_chats.deinit();
|
||||||
|
|
||||||
var bot = try telegram.TelegramBot.init(allocator, token);
|
var bot = try telegram.TelegramBot.init(allocator, token);
|
||||||
defer bot.deinit();
|
defer bot.deinit();
|
||||||
|
|
||||||
log.info("Bot started. Whisper: {s}, language: {s}, Piper: {s}", .{ whisper_url, language, piper_url });
|
log.info("Bot started. Whisper: {s}, language: {s}, voices: {d}", .{ whisper_url, language, voices.count() });
|
||||||
|
|
||||||
var offset: i64 = 0;
|
var offset: i64 = 0;
|
||||||
|
|
||||||
@@ -35,18 +46,35 @@ pub fn main() !void {
|
|||||||
|
|
||||||
for (updates.parsed.value.result) |update| {
|
for (updates.parsed.value.result) |update| {
|
||||||
offset = update.update_id + 1;
|
offset = update.update_id + 1;
|
||||||
processUpdate(allocator, &bot, update, whisper_url, language, piper_url);
|
processUpdate(allocator, &bot, update, whisper_url, language, &voices, &active_chats, default_piper_url);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn parseVoices(map: *VoiceMap, env: []const u8) void {
|
||||||
|
if (env.len == 0) return;
|
||||||
|
var iter = std.mem.splitScalar(u8, env, ',');
|
||||||
|
while (iter.next()) |entry| {
|
||||||
|
const trimmed = std.mem.trim(u8, entry, " ");
|
||||||
|
if (std.mem.indexOfScalar(u8, trimmed, '=')) |sep| {
|
||||||
|
map.put(trimmed[0..sep], trimmed[sep + 1..]) catch {};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn isPrivateChat(chat_type: []const u8) bool {
|
||||||
|
return std.mem.eql(u8, chat_type, "private");
|
||||||
|
}
|
||||||
|
|
||||||
fn processUpdate(
|
fn processUpdate(
|
||||||
allocator: std.mem.Allocator,
|
allocator: std.mem.Allocator,
|
||||||
bot: *telegram.TelegramBot,
|
bot: *telegram.TelegramBot,
|
||||||
update: telegram.Update,
|
update: telegram.Update,
|
||||||
whisper_url: []const u8,
|
whisper_url: []const u8,
|
||||||
language: []const u8,
|
language: []const u8,
|
||||||
piper_url: []const u8,
|
voices: *VoiceMap,
|
||||||
|
active_chats: *ActiveChats,
|
||||||
|
default_piper_url: []const u8,
|
||||||
) void {
|
) void {
|
||||||
const message = update.message orelse return;
|
const message = update.message orelse return;
|
||||||
|
|
||||||
@@ -69,11 +97,31 @@ fn processUpdate(
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Text message → TTS (skip commands starting with /)
|
// Text message
|
||||||
if (message.text) |text| {
|
if (message.text) |text| {
|
||||||
if (text.len > 0 and text[0] != '/') {
|
if (text.len == 0) return;
|
||||||
|
|
||||||
|
// Commands
|
||||||
|
if (text[0] == '/') {
|
||||||
|
handleCommand(allocator, bot, message, text, voices, active_chats);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TTS: check active speak mode or private chat default
|
||||||
|
var piper_url: ?[]const u8 = active_chats.get(message.chat.id);
|
||||||
|
|
||||||
|
if (piper_url == null and isPrivateChat(message.chat.@"type")) {
|
||||||
|
if (default_piper_url.len > 0) {
|
||||||
|
piper_url = default_piper_url;
|
||||||
|
} else {
|
||||||
|
var it = voices.valueIterator();
|
||||||
|
if (it.next()) |v| piper_url = v.*;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (piper_url) |url| {
|
||||||
log.info("Processing TTS for text message in chat {d}", .{message.chat.id});
|
log.info("Processing TTS for text message in chat {d}", .{message.chat.id});
|
||||||
handleTTS(allocator, bot, message, text, piper_url) catch |err| {
|
handleTTS(allocator, bot, message, text, url) catch |err| {
|
||||||
log.err("TTS failed: {s}", .{@errorName(err)});
|
log.err("TTS failed: {s}", .{@errorName(err)});
|
||||||
bot.sendMessage(message.chat.id, "TTS failed.", message.message_id) catch {};
|
bot.sendMessage(message.chat.id, "TTS failed.", message.message_id) catch {};
|
||||||
};
|
};
|
||||||
@@ -81,6 +129,72 @@ fn processUpdate(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn handleCommand(
|
||||||
|
allocator: std.mem.Allocator,
|
||||||
|
bot: *telegram.TelegramBot,
|
||||||
|
message: telegram.Message,
|
||||||
|
text: []const u8,
|
||||||
|
voices: *VoiceMap,
|
||||||
|
active_chats: *ActiveChats,
|
||||||
|
) void {
|
||||||
|
// Only handle /speak command
|
||||||
|
if (!std.mem.startsWith(u8, text, "/speak")) return;
|
||||||
|
|
||||||
|
var rest = text["/speak".len..];
|
||||||
|
|
||||||
|
// Skip @botname suffix (e.g. /speak@my_bot denis)
|
||||||
|
if (rest.len > 0 and rest[0] == '@') {
|
||||||
|
if (std.mem.indexOfScalar(u8, rest, ' ')) |space| {
|
||||||
|
rest = rest[space..];
|
||||||
|
} else {
|
||||||
|
rest = "";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const arg = std.mem.trim(u8, rest, " ");
|
||||||
|
|
||||||
|
// /speak stop or /speak (no args) → disable TTS
|
||||||
|
if (arg.len == 0 or std.mem.eql(u8, arg, "stop")) {
|
||||||
|
const was_active = active_chats.remove(message.chat.id);
|
||||||
|
if (was_active) {
|
||||||
|
bot.sendMessage(message.chat.id, "TTS отключён.", message.message_id) catch {};
|
||||||
|
} else {
|
||||||
|
// No active TTS, show available voices
|
||||||
|
const reply = buildVoiceListMessage(allocator, voices, "Доступные голоса: ") catch return;
|
||||||
|
defer allocator.free(reply);
|
||||||
|
bot.sendMessage(message.chat.id, reply, message.message_id) catch {};
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// /speak <voice> → enable TTS with specified voice
|
||||||
|
if (voices.get(arg)) |url| {
|
||||||
|
active_chats.put(message.chat.id, url) catch {};
|
||||||
|
const reply = std.fmt.allocPrint(allocator, "TTS включён, голос: {s}", .{arg}) catch return;
|
||||||
|
defer allocator.free(reply);
|
||||||
|
bot.sendMessage(message.chat.id, reply, message.message_id) catch {};
|
||||||
|
} else {
|
||||||
|
const reply = buildVoiceListMessage(allocator, voices, "Неизвестный голос. Доступные: ") catch return;
|
||||||
|
defer allocator.free(reply);
|
||||||
|
bot.sendMessage(message.chat.id, reply, message.message_id) catch {};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn buildVoiceListMessage(allocator: std.mem.Allocator, voices: *VoiceMap, prefix: []const u8) ![]u8 {
|
||||||
|
var buf: std.ArrayList(u8) = .empty;
|
||||||
|
defer buf.deinit(allocator);
|
||||||
|
|
||||||
|
try buf.appendSlice(allocator, prefix);
|
||||||
|
var first = true;
|
||||||
|
var it = voices.iterator();
|
||||||
|
while (it.next()) |entry| {
|
||||||
|
if (!first) try buf.appendSlice(allocator, ", ");
|
||||||
|
try buf.appendSlice(allocator, entry.key_ptr.*);
|
||||||
|
first = false;
|
||||||
|
}
|
||||||
|
return buf.toOwnedSlice(allocator);
|
||||||
|
}
|
||||||
|
|
||||||
fn handleTranscription(
|
fn handleTranscription(
|
||||||
allocator: std.mem.Allocator,
|
allocator: std.mem.Allocator,
|
||||||
bot: *telegram.TelegramBot,
|
bot: *telegram.TelegramBot,
|
||||||
@@ -138,16 +252,16 @@ fn handleTranscription(
|
|||||||
|
|
||||||
// Transcribe
|
// Transcribe
|
||||||
log.info("Step 3: transcribe {s}", .{audio_path});
|
log.info("Step 3: transcribe {s}", .{audio_path});
|
||||||
const text = try whisper.transcribe(allocator, whisper_url, audio_path, language);
|
const transcribed_text = try whisper.transcribe(allocator, whisper_url, audio_path, language);
|
||||||
defer allocator.free(text);
|
defer allocator.free(transcribed_text);
|
||||||
log.info("Step 3 done, text length: {d}", .{text.len});
|
log.info("Step 3 done, text length: {d}", .{transcribed_text.len});
|
||||||
|
|
||||||
// Send response
|
// Send response
|
||||||
log.info("Step 4: sendMessage", .{});
|
log.info("Step 4: sendMessage", .{});
|
||||||
if (text.len == 0) {
|
if (transcribed_text.len == 0) {
|
||||||
try bot.sendMessage(message.chat.id, "(empty transcription)", message.message_id);
|
try bot.sendMessage(message.chat.id, "(empty transcription)", message.message_id);
|
||||||
} else {
|
} else {
|
||||||
try bot.sendMessage(message.chat.id, text, message.message_id);
|
try bot.sendMessage(message.chat.id, transcribed_text, message.message_id);
|
||||||
}
|
}
|
||||||
log.info("Step 4 done", .{});
|
log.info("Step 4 done", .{});
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -14,7 +14,10 @@ pub const VideoNote = struct {
|
|||||||
|
|
||||||
pub const Message = struct {
|
pub const Message = struct {
|
||||||
message_id: i64,
|
message_id: i64,
|
||||||
chat: struct { id: i64 },
|
chat: struct {
|
||||||
|
id: i64,
|
||||||
|
@"type": []const u8 = "private",
|
||||||
|
},
|
||||||
text: ?[]const u8 = null,
|
text: ?[]const u8 = null,
|
||||||
voice: ?Voice = null,
|
voice: ?Voice = null,
|
||||||
video_note: ?VideoNote = null,
|
video_note: ?VideoNote = null,
|
||||||
|
|||||||
Reference in New Issue
Block a user