Add text-to-speech via Piper TTS
All checks were successful
ci/woodpecker/push/deploy Pipeline was successful
All checks were successful
ci/woodpecker/push/deploy Pipeline was successful
Send text message → Piper TTS → WAV → OGG Opus → voice reply. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -40,6 +40,8 @@ spec:
|
||||
value: "http://whisper.whisper.svc:8000"
|
||||
- name: WHISPER_LANGUAGE
|
||||
value: "ru"
|
||||
- name: PIPER_URL
|
||||
value: "http://piper.piper.svc:5000"
|
||||
resources:
|
||||
requests:
|
||||
memory: "32Mi"
|
||||
|
||||
69
src/main.zig
69
src/main.zig
@@ -1,6 +1,7 @@
|
||||
const std = @import("std");
|
||||
const telegram = @import("telegram.zig");
|
||||
const whisper = @import("whisper.zig");
|
||||
const piper = @import("piper.zig");
|
||||
|
||||
const log = std.log.scoped(.transcribator);
|
||||
|
||||
@@ -15,11 +16,12 @@ pub fn main() !void {
|
||||
};
|
||||
const whisper_url = std.posix.getenv("WHISPER_URL") orelse "http://whisper.whisper.svc:8000";
|
||||
const language = std.posix.getenv("WHISPER_LANGUAGE") orelse "ru";
|
||||
const piper_url = std.posix.getenv("PIPER_URL") orelse "http://piper.piper.svc:5000";
|
||||
|
||||
var bot = try telegram.TelegramBot.init(allocator, token);
|
||||
defer bot.deinit();
|
||||
|
||||
log.info("Bot started. Whisper: {s}, language: {s}", .{ whisper_url, language });
|
||||
log.info("Bot started. Whisper: {s}, language: {s}, Piper: {s}", .{ whisper_url, language, piper_url });
|
||||
|
||||
var offset: i64 = 0;
|
||||
|
||||
@@ -33,7 +35,7 @@ pub fn main() !void {
|
||||
|
||||
for (updates.parsed.value.result) |update| {
|
||||
offset = update.update_id + 1;
|
||||
processUpdate(allocator, &bot, update, whisper_url, language);
|
||||
processUpdate(allocator, &bot, update, whisper_url, language, piper_url);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -44,30 +46,39 @@ fn processUpdate(
|
||||
update: telegram.Update,
|
||||
whisper_url: []const u8,
|
||||
language: []const u8,
|
||||
piper_url: []const u8,
|
||||
) void {
|
||||
const message = update.message orelse return;
|
||||
|
||||
var file_id: ?[]const u8 = null;
|
||||
var is_video = false;
|
||||
|
||||
// Voice / video_note → transcription
|
||||
if (message.voice) |voice| {
|
||||
file_id = voice.file_id;
|
||||
} else if (message.video_note) |vn| {
|
||||
file_id = vn.file_id;
|
||||
is_video = true;
|
||||
log.info("Processing voice message in chat {d}", .{message.chat.id});
|
||||
handleTranscription(allocator, bot, message, voice.file_id, false, whisper_url, language) catch |err| {
|
||||
log.err("Transcription failed: {s}", .{@errorName(err)});
|
||||
bot.sendMessage(message.chat.id, "Transcription failed.", message.message_id) catch {};
|
||||
};
|
||||
return;
|
||||
}
|
||||
|
||||
const fid = file_id orelse return;
|
||||
if (message.video_note) |vn| {
|
||||
log.info("Processing video_note message in chat {d}", .{message.chat.id});
|
||||
handleTranscription(allocator, bot, message, vn.file_id, true, whisper_url, language) catch |err| {
|
||||
log.err("Transcription failed: {s}", .{@errorName(err)});
|
||||
bot.sendMessage(message.chat.id, "Transcription failed.", message.message_id) catch {};
|
||||
};
|
||||
return;
|
||||
}
|
||||
|
||||
log.info("Processing {s} message in chat {d}", .{
|
||||
if (is_video) @as([]const u8, "video_note") else @as([]const u8, "voice"),
|
||||
message.chat.id,
|
||||
});
|
||||
|
||||
handleTranscription(allocator, bot, message, fid, is_video, whisper_url, language) catch |err| {
|
||||
log.err("Transcription failed: {s}", .{@errorName(err)});
|
||||
bot.sendMessage(message.chat.id, "Transcription failed.", message.message_id) catch {};
|
||||
};
|
||||
// Text message → TTS (skip commands starting with /)
|
||||
if (message.text) |text| {
|
||||
if (text.len > 0 and text[0] != '/') {
|
||||
log.info("Processing TTS for text message in chat {d}", .{message.chat.id});
|
||||
handleTTS(allocator, bot, message, text, piper_url) catch |err| {
|
||||
log.err("TTS failed: {s}", .{@errorName(err)});
|
||||
bot.sendMessage(message.chat.id, "TTS failed.", message.message_id) catch {};
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn handleTranscription(
|
||||
@@ -140,3 +151,23 @@ fn handleTranscription(
|
||||
}
|
||||
log.info("Step 4 done", .{});
|
||||
}
|
||||
|
||||
fn handleTTS(
|
||||
allocator: std.mem.Allocator,
|
||||
bot: *telegram.TelegramBot,
|
||||
message: telegram.Message,
|
||||
text: []const u8,
|
||||
piper_url: []const u8,
|
||||
) !void {
|
||||
log.info("TTS step 1: synthesize", .{});
|
||||
const ogg_path = try piper.synthesize(allocator, piper_url, text, message.message_id);
|
||||
defer {
|
||||
std.fs.deleteFileAbsolute(ogg_path) catch {};
|
||||
allocator.free(ogg_path);
|
||||
}
|
||||
log.info("TTS step 1 done: {s}", .{ogg_path});
|
||||
|
||||
log.info("TTS step 2: sendVoice", .{});
|
||||
try bot.sendVoice(message.chat.id, ogg_path, message.message_id);
|
||||
log.info("TTS step 2 done", .{});
|
||||
}
|
||||
|
||||
70
src/piper.zig
Normal file
70
src/piper.zig
Normal file
@@ -0,0 +1,70 @@
|
||||
const std = @import("std");
|
||||
const Allocator = std.mem.Allocator;
|
||||
|
||||
const log = std.log.scoped(.transcribator);
|
||||
|
||||
pub fn synthesize(allocator: Allocator, piper_url: []const u8, text: []const u8, msg_id: i64) ![]u8 {
|
||||
const wav_path = try std.fmt.allocPrint(allocator, "/tmp/tts_{d}.wav", .{msg_id});
|
||||
defer {
|
||||
std.fs.deleteFileAbsolute(wav_path) catch {};
|
||||
allocator.free(wav_path);
|
||||
}
|
||||
|
||||
const ogg_path = try std.fmt.allocPrint(allocator, "/tmp/tts_{d}.ogg", .{msg_id});
|
||||
errdefer {
|
||||
std.fs.deleteFileAbsolute(ogg_path) catch {};
|
||||
allocator.free(ogg_path);
|
||||
}
|
||||
|
||||
// POST text to Piper TTS, save WAV
|
||||
log.info("Piper TTS: synthesizing {d} chars", .{text.len});
|
||||
{
|
||||
var child = std.process.Child.init(
|
||||
&.{ "curl", "-sf", "--max-time", "120", "-X", "POST", "-H", "Content-Type: text/plain", "--data-raw", text, "-o", wav_path, piper_url },
|
||||
allocator,
|
||||
);
|
||||
child.stdout_behavior = .Ignore;
|
||||
child.stderr_behavior = .Ignore;
|
||||
const term = try child.spawnAndWait();
|
||||
|
||||
switch (term) {
|
||||
.Exited => |code| {
|
||||
if (code != 0) {
|
||||
log.err("Piper TTS curl failed with exit code {d}", .{code});
|
||||
return error.HttpRequestFailed;
|
||||
}
|
||||
},
|
||||
else => {
|
||||
log.err("Piper TTS curl terminated abnormally", .{});
|
||||
return error.HttpRequestFailed;
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// Convert WAV to OGG Opus
|
||||
log.info("Piper TTS: converting WAV to OGG", .{});
|
||||
{
|
||||
var child = std.process.Child.init(
|
||||
&.{ "ffmpeg", "-y", "-i", wav_path, "-c:a", "libopus", ogg_path },
|
||||
allocator,
|
||||
);
|
||||
child.stdout_behavior = .Ignore;
|
||||
child.stderr_behavior = .Ignore;
|
||||
const term = try child.spawnAndWait();
|
||||
|
||||
switch (term) {
|
||||
.Exited => |code| {
|
||||
if (code != 0) {
|
||||
log.err("ffmpeg WAV→OGG failed with exit code {d}", .{code});
|
||||
return error.HttpRequestFailed;
|
||||
}
|
||||
},
|
||||
else => {
|
||||
log.err("ffmpeg terminated abnormally", .{});
|
||||
return error.HttpRequestFailed;
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
return ogg_path;
|
||||
}
|
||||
@@ -15,6 +15,7 @@ pub const VideoNote = struct {
|
||||
pub const Message = struct {
|
||||
message_id: i64,
|
||||
chat: struct { id: i64 },
|
||||
text: ?[]const u8 = null,
|
||||
voice: ?Voice = null,
|
||||
video_note: ?VideoNote = null,
|
||||
};
|
||||
@@ -134,4 +135,35 @@ pub const TelegramBot = struct {
|
||||
const resp = http.httpPostJson(self.allocator, url, json_body) catch return;
|
||||
self.allocator.free(resp);
|
||||
}
|
||||
|
||||
pub fn sendVoice(self: *TelegramBot, chat_id: i64, ogg_path: []const u8, reply_to: ?i64) !void {
|
||||
const url = try std.fmt.allocPrint(self.allocator, "{s}/sendVoice", .{self.api_base});
|
||||
defer self.allocator.free(url);
|
||||
|
||||
const chat_id_str = try std.fmt.allocPrint(self.allocator, "{d}", .{chat_id});
|
||||
defer self.allocator.free(chat_id_str);
|
||||
|
||||
var fields_buf: [2][2][]const u8 = undefined;
|
||||
var field_count: usize = 1;
|
||||
fields_buf[0] = .{ "chat_id", chat_id_str };
|
||||
|
||||
var reply_str: ?[]u8 = null;
|
||||
defer if (reply_str) |s| self.allocator.free(s);
|
||||
|
||||
if (reply_to) |r| {
|
||||
reply_str = try std.fmt.allocPrint(self.allocator, "{d}", .{r});
|
||||
fields_buf[1] = .{ "reply_to_message_id", reply_str.? };
|
||||
field_count = 2;
|
||||
}
|
||||
|
||||
const resp = try http.httpPostMultipart(
|
||||
self.allocator,
|
||||
url,
|
||||
"voice",
|
||||
ogg_path,
|
||||
"voice.ogg",
|
||||
fields_buf[0..field_count],
|
||||
);
|
||||
self.allocator.free(resp);
|
||||
}
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user