Add text-to-speech via Piper TTS
All checks were successful
ci/woodpecker/push/deploy Pipeline was successful

Send text message → Piper TTS → WAV → OGG Opus → voice reply.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Mikhail Kilin
2026-02-19 13:24:41 +03:00
parent 213eabb026
commit 86acdfab28
4 changed files with 154 additions and 19 deletions

View File

@@ -1,6 +1,7 @@
const std = @import("std");
const telegram = @import("telegram.zig");
const whisper = @import("whisper.zig");
const piper = @import("piper.zig");
const log = std.log.scoped(.transcribator);
@@ -15,11 +16,12 @@ pub fn main() !void {
};
const whisper_url = std.posix.getenv("WHISPER_URL") orelse "http://whisper.whisper.svc:8000";
const language = std.posix.getenv("WHISPER_LANGUAGE") orelse "ru";
const piper_url = std.posix.getenv("PIPER_URL") orelse "http://piper.piper.svc:5000";
var bot = try telegram.TelegramBot.init(allocator, token);
defer bot.deinit();
log.info("Bot started. Whisper: {s}, language: {s}", .{ whisper_url, language });
log.info("Bot started. Whisper: {s}, language: {s}, Piper: {s}", .{ whisper_url, language, piper_url });
var offset: i64 = 0;
@@ -33,7 +35,7 @@ pub fn main() !void {
for (updates.parsed.value.result) |update| {
offset = update.update_id + 1;
processUpdate(allocator, &bot, update, whisper_url, language);
processUpdate(allocator, &bot, update, whisper_url, language, piper_url);
}
}
}
@@ -44,30 +46,39 @@ fn processUpdate(
update: telegram.Update,
whisper_url: []const u8,
language: []const u8,
piper_url: []const u8,
) void {
const message = update.message orelse return;
var file_id: ?[]const u8 = null;
var is_video = false;
// Voice / video_note → transcription
if (message.voice) |voice| {
file_id = voice.file_id;
} else if (message.video_note) |vn| {
file_id = vn.file_id;
is_video = true;
log.info("Processing voice message in chat {d}", .{message.chat.id});
handleTranscription(allocator, bot, message, voice.file_id, false, whisper_url, language) catch |err| {
log.err("Transcription failed: {s}", .{@errorName(err)});
bot.sendMessage(message.chat.id, "Transcription failed.", message.message_id) catch {};
};
return;
}
const fid = file_id orelse return;
if (message.video_note) |vn| {
log.info("Processing video_note message in chat {d}", .{message.chat.id});
handleTranscription(allocator, bot, message, vn.file_id, true, whisper_url, language) catch |err| {
log.err("Transcription failed: {s}", .{@errorName(err)});
bot.sendMessage(message.chat.id, "Transcription failed.", message.message_id) catch {};
};
return;
}
log.info("Processing {s} message in chat {d}", .{
if (is_video) @as([]const u8, "video_note") else @as([]const u8, "voice"),
message.chat.id,
});
handleTranscription(allocator, bot, message, fid, is_video, whisper_url, language) catch |err| {
log.err("Transcription failed: {s}", .{@errorName(err)});
bot.sendMessage(message.chat.id, "Transcription failed.", message.message_id) catch {};
};
// Text message → TTS (skip commands starting with /)
if (message.text) |text| {
if (text.len > 0 and text[0] != '/') {
log.info("Processing TTS for text message in chat {d}", .{message.chat.id});
handleTTS(allocator, bot, message, text, piper_url) catch |err| {
log.err("TTS failed: {s}", .{@errorName(err)});
bot.sendMessage(message.chat.id, "TTS failed.", message.message_id) catch {};
};
}
}
}
fn handleTranscription(
@@ -140,3 +151,23 @@ fn handleTranscription(
}
log.info("Step 4 done", .{});
}
fn handleTTS(
allocator: std.mem.Allocator,
bot: *telegram.TelegramBot,
message: telegram.Message,
text: []const u8,
piper_url: []const u8,
) !void {
log.info("TTS step 1: synthesize", .{});
const ogg_path = try piper.synthesize(allocator, piper_url, text, message.message_id);
defer {
std.fs.deleteFileAbsolute(ogg_path) catch {};
allocator.free(ogg_path);
}
log.info("TTS step 1 done: {s}", .{ogg_path});
log.info("TTS step 2: sendVoice", .{});
try bot.sendVoice(message.chat.id, ogg_path, message.message_id);
log.info("TTS step 2 done", .{});
}