Add text-to-speech via Piper TTS
All checks were successful
ci/woodpecker/push/deploy Pipeline was successful

Send text message → Piper TTS → WAV → OGG Opus → voice reply.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Mikhail Kilin
2026-02-19 13:24:41 +03:00
parent 213eabb026
commit 86acdfab28
4 changed files with 154 additions and 19 deletions

View File

@@ -40,6 +40,8 @@ spec:
value: "http://whisper.whisper.svc:8000" value: "http://whisper.whisper.svc:8000"
- name: WHISPER_LANGUAGE - name: WHISPER_LANGUAGE
value: "ru" value: "ru"
- name: PIPER_URL
value: "http://piper.piper.svc:5000"
resources: resources:
requests: requests:
memory: "32Mi" memory: "32Mi"

View File

@@ -1,6 +1,7 @@
const std = @import("std"); const std = @import("std");
const telegram = @import("telegram.zig"); const telegram = @import("telegram.zig");
const whisper = @import("whisper.zig"); const whisper = @import("whisper.zig");
const piper = @import("piper.zig");
const log = std.log.scoped(.transcribator); const log = std.log.scoped(.transcribator);
@@ -15,11 +16,12 @@ pub fn main() !void {
}; };
const whisper_url = std.posix.getenv("WHISPER_URL") orelse "http://whisper.whisper.svc:8000"; const whisper_url = std.posix.getenv("WHISPER_URL") orelse "http://whisper.whisper.svc:8000";
const language = std.posix.getenv("WHISPER_LANGUAGE") orelse "ru"; const language = std.posix.getenv("WHISPER_LANGUAGE") orelse "ru";
const piper_url = std.posix.getenv("PIPER_URL") orelse "http://piper.piper.svc:5000";
var bot = try telegram.TelegramBot.init(allocator, token); var bot = try telegram.TelegramBot.init(allocator, token);
defer bot.deinit(); defer bot.deinit();
log.info("Bot started. Whisper: {s}, language: {s}", .{ whisper_url, language }); log.info("Bot started. Whisper: {s}, language: {s}, Piper: {s}", .{ whisper_url, language, piper_url });
var offset: i64 = 0; var offset: i64 = 0;
@@ -33,7 +35,7 @@ pub fn main() !void {
for (updates.parsed.value.result) |update| { for (updates.parsed.value.result) |update| {
offset = update.update_id + 1; offset = update.update_id + 1;
processUpdate(allocator, &bot, update, whisper_url, language); processUpdate(allocator, &bot, update, whisper_url, language, piper_url);
} }
} }
} }
@@ -44,30 +46,39 @@ fn processUpdate(
update: telegram.Update, update: telegram.Update,
whisper_url: []const u8, whisper_url: []const u8,
language: []const u8, language: []const u8,
piper_url: []const u8,
) void { ) void {
const message = update.message orelse return; const message = update.message orelse return;
var file_id: ?[]const u8 = null; // Voice / video_note → transcription
var is_video = false;
if (message.voice) |voice| { if (message.voice) |voice| {
file_id = voice.file_id; log.info("Processing voice message in chat {d}", .{message.chat.id});
} else if (message.video_note) |vn| { handleTranscription(allocator, bot, message, voice.file_id, false, whisper_url, language) catch |err| {
file_id = vn.file_id; log.err("Transcription failed: {s}", .{@errorName(err)});
is_video = true; bot.sendMessage(message.chat.id, "Transcription failed.", message.message_id) catch {};
};
return;
} }
const fid = file_id orelse return; if (message.video_note) |vn| {
log.info("Processing video_note message in chat {d}", .{message.chat.id});
handleTranscription(allocator, bot, message, vn.file_id, true, whisper_url, language) catch |err| {
log.err("Transcription failed: {s}", .{@errorName(err)});
bot.sendMessage(message.chat.id, "Transcription failed.", message.message_id) catch {};
};
return;
}
log.info("Processing {s} message in chat {d}", .{ // Text message → TTS (skip commands starting with /)
if (is_video) @as([]const u8, "video_note") else @as([]const u8, "voice"), if (message.text) |text| {
message.chat.id, if (text.len > 0 and text[0] != '/') {
}); log.info("Processing TTS for text message in chat {d}", .{message.chat.id});
handleTTS(allocator, bot, message, text, piper_url) catch |err| {
handleTranscription(allocator, bot, message, fid, is_video, whisper_url, language) catch |err| { log.err("TTS failed: {s}", .{@errorName(err)});
log.err("Transcription failed: {s}", .{@errorName(err)}); bot.sendMessage(message.chat.id, "TTS failed.", message.message_id) catch {};
bot.sendMessage(message.chat.id, "Transcription failed.", message.message_id) catch {}; };
}; }
}
} }
fn handleTranscription( fn handleTranscription(
@@ -140,3 +151,23 @@ fn handleTranscription(
} }
log.info("Step 4 done", .{}); log.info("Step 4 done", .{});
} }
fn handleTTS(
allocator: std.mem.Allocator,
bot: *telegram.TelegramBot,
message: telegram.Message,
text: []const u8,
piper_url: []const u8,
) !void {
log.info("TTS step 1: synthesize", .{});
const ogg_path = try piper.synthesize(allocator, piper_url, text, message.message_id);
defer {
std.fs.deleteFileAbsolute(ogg_path) catch {};
allocator.free(ogg_path);
}
log.info("TTS step 1 done: {s}", .{ogg_path});
log.info("TTS step 2: sendVoice", .{});
try bot.sendVoice(message.chat.id, ogg_path, message.message_id);
log.info("TTS step 2 done", .{});
}

70
src/piper.zig Normal file
View File

@@ -0,0 +1,70 @@
const std = @import("std");
const Allocator = std.mem.Allocator;
const log = std.log.scoped(.transcribator);
pub fn synthesize(allocator: Allocator, piper_url: []const u8, text: []const u8, msg_id: i64) ![]u8 {
const wav_path = try std.fmt.allocPrint(allocator, "/tmp/tts_{d}.wav", .{msg_id});
defer {
std.fs.deleteFileAbsolute(wav_path) catch {};
allocator.free(wav_path);
}
const ogg_path = try std.fmt.allocPrint(allocator, "/tmp/tts_{d}.ogg", .{msg_id});
errdefer {
std.fs.deleteFileAbsolute(ogg_path) catch {};
allocator.free(ogg_path);
}
// POST text to Piper TTS, save WAV
log.info("Piper TTS: synthesizing {d} chars", .{text.len});
{
var child = std.process.Child.init(
&.{ "curl", "-sf", "--max-time", "120", "-X", "POST", "-H", "Content-Type: text/plain", "--data-raw", text, "-o", wav_path, piper_url },
allocator,
);
child.stdout_behavior = .Ignore;
child.stderr_behavior = .Ignore;
const term = try child.spawnAndWait();
switch (term) {
.Exited => |code| {
if (code != 0) {
log.err("Piper TTS curl failed with exit code {d}", .{code});
return error.HttpRequestFailed;
}
},
else => {
log.err("Piper TTS curl terminated abnormally", .{});
return error.HttpRequestFailed;
},
}
}
// Convert WAV to OGG Opus
log.info("Piper TTS: converting WAV to OGG", .{});
{
var child = std.process.Child.init(
&.{ "ffmpeg", "-y", "-i", wav_path, "-c:a", "libopus", ogg_path },
allocator,
);
child.stdout_behavior = .Ignore;
child.stderr_behavior = .Ignore;
const term = try child.spawnAndWait();
switch (term) {
.Exited => |code| {
if (code != 0) {
log.err("ffmpeg WAV→OGG failed with exit code {d}", .{code});
return error.HttpRequestFailed;
}
},
else => {
log.err("ffmpeg terminated abnormally", .{});
return error.HttpRequestFailed;
},
}
}
return ogg_path;
}

View File

@@ -15,6 +15,7 @@ pub const VideoNote = struct {
pub const Message = struct { pub const Message = struct {
message_id: i64, message_id: i64,
chat: struct { id: i64 }, chat: struct { id: i64 },
text: ?[]const u8 = null,
voice: ?Voice = null, voice: ?Voice = null,
video_note: ?VideoNote = null, video_note: ?VideoNote = null,
}; };
@@ -134,4 +135,35 @@ pub const TelegramBot = struct {
const resp = http.httpPostJson(self.allocator, url, json_body) catch return; const resp = http.httpPostJson(self.allocator, url, json_body) catch return;
self.allocator.free(resp); self.allocator.free(resp);
} }
pub fn sendVoice(self: *TelegramBot, chat_id: i64, ogg_path: []const u8, reply_to: ?i64) !void {
const url = try std.fmt.allocPrint(self.allocator, "{s}/sendVoice", .{self.api_base});
defer self.allocator.free(url);
const chat_id_str = try std.fmt.allocPrint(self.allocator, "{d}", .{chat_id});
defer self.allocator.free(chat_id_str);
var fields_buf: [2][2][]const u8 = undefined;
var field_count: usize = 1;
fields_buf[0] = .{ "chat_id", chat_id_str };
var reply_str: ?[]u8 = null;
defer if (reply_str) |s| self.allocator.free(s);
if (reply_to) |r| {
reply_str = try std.fmt.allocPrint(self.allocator, "{d}", .{r});
fields_buf[1] = .{ "reply_to_message_id", reply_str.? };
field_count = 2;
}
const resp = try http.httpPostMultipart(
self.allocator,
url,
"voice",
ogg_path,
"voice.ogg",
fields_buf[0..field_count],
);
self.allocator.free(resp);
}
}; };