Add text-to-speech via Piper TTS
All checks were successful
ci/woodpecker/push/deploy Pipeline was successful
All checks were successful
ci/woodpecker/push/deploy Pipeline was successful
Send text message → Piper TTS → WAV → OGG Opus → voice reply. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -40,6 +40,8 @@ spec:
|
|||||||
value: "http://whisper.whisper.svc:8000"
|
value: "http://whisper.whisper.svc:8000"
|
||||||
- name: WHISPER_LANGUAGE
|
- name: WHISPER_LANGUAGE
|
||||||
value: "ru"
|
value: "ru"
|
||||||
|
- name: PIPER_URL
|
||||||
|
value: "http://piper.piper.svc:5000"
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
memory: "32Mi"
|
memory: "32Mi"
|
||||||
|
|||||||
69
src/main.zig
69
src/main.zig
@@ -1,6 +1,7 @@
|
|||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
const telegram = @import("telegram.zig");
|
const telegram = @import("telegram.zig");
|
||||||
const whisper = @import("whisper.zig");
|
const whisper = @import("whisper.zig");
|
||||||
|
const piper = @import("piper.zig");
|
||||||
|
|
||||||
const log = std.log.scoped(.transcribator);
|
const log = std.log.scoped(.transcribator);
|
||||||
|
|
||||||
@@ -15,11 +16,12 @@ pub fn main() !void {
|
|||||||
};
|
};
|
||||||
const whisper_url = std.posix.getenv("WHISPER_URL") orelse "http://whisper.whisper.svc:8000";
|
const whisper_url = std.posix.getenv("WHISPER_URL") orelse "http://whisper.whisper.svc:8000";
|
||||||
const language = std.posix.getenv("WHISPER_LANGUAGE") orelse "ru";
|
const language = std.posix.getenv("WHISPER_LANGUAGE") orelse "ru";
|
||||||
|
const piper_url = std.posix.getenv("PIPER_URL") orelse "http://piper.piper.svc:5000";
|
||||||
|
|
||||||
var bot = try telegram.TelegramBot.init(allocator, token);
|
var bot = try telegram.TelegramBot.init(allocator, token);
|
||||||
defer bot.deinit();
|
defer bot.deinit();
|
||||||
|
|
||||||
log.info("Bot started. Whisper: {s}, language: {s}", .{ whisper_url, language });
|
log.info("Bot started. Whisper: {s}, language: {s}, Piper: {s}", .{ whisper_url, language, piper_url });
|
||||||
|
|
||||||
var offset: i64 = 0;
|
var offset: i64 = 0;
|
||||||
|
|
||||||
@@ -33,7 +35,7 @@ pub fn main() !void {
|
|||||||
|
|
||||||
for (updates.parsed.value.result) |update| {
|
for (updates.parsed.value.result) |update| {
|
||||||
offset = update.update_id + 1;
|
offset = update.update_id + 1;
|
||||||
processUpdate(allocator, &bot, update, whisper_url, language);
|
processUpdate(allocator, &bot, update, whisper_url, language, piper_url);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -44,30 +46,39 @@ fn processUpdate(
|
|||||||
update: telegram.Update,
|
update: telegram.Update,
|
||||||
whisper_url: []const u8,
|
whisper_url: []const u8,
|
||||||
language: []const u8,
|
language: []const u8,
|
||||||
|
piper_url: []const u8,
|
||||||
) void {
|
) void {
|
||||||
const message = update.message orelse return;
|
const message = update.message orelse return;
|
||||||
|
|
||||||
var file_id: ?[]const u8 = null;
|
// Voice / video_note → transcription
|
||||||
var is_video = false;
|
|
||||||
|
|
||||||
if (message.voice) |voice| {
|
if (message.voice) |voice| {
|
||||||
file_id = voice.file_id;
|
log.info("Processing voice message in chat {d}", .{message.chat.id});
|
||||||
} else if (message.video_note) |vn| {
|
handleTranscription(allocator, bot, message, voice.file_id, false, whisper_url, language) catch |err| {
|
||||||
file_id = vn.file_id;
|
|
||||||
is_video = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
const fid = file_id orelse return;
|
|
||||||
|
|
||||||
log.info("Processing {s} message in chat {d}", .{
|
|
||||||
if (is_video) @as([]const u8, "video_note") else @as([]const u8, "voice"),
|
|
||||||
message.chat.id,
|
|
||||||
});
|
|
||||||
|
|
||||||
handleTranscription(allocator, bot, message, fid, is_video, whisper_url, language) catch |err| {
|
|
||||||
log.err("Transcription failed: {s}", .{@errorName(err)});
|
log.err("Transcription failed: {s}", .{@errorName(err)});
|
||||||
bot.sendMessage(message.chat.id, "Transcription failed.", message.message_id) catch {};
|
bot.sendMessage(message.chat.id, "Transcription failed.", message.message_id) catch {};
|
||||||
};
|
};
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (message.video_note) |vn| {
|
||||||
|
log.info("Processing video_note message in chat {d}", .{message.chat.id});
|
||||||
|
handleTranscription(allocator, bot, message, vn.file_id, true, whisper_url, language) catch |err| {
|
||||||
|
log.err("Transcription failed: {s}", .{@errorName(err)});
|
||||||
|
bot.sendMessage(message.chat.id, "Transcription failed.", message.message_id) catch {};
|
||||||
|
};
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Text message → TTS (skip commands starting with /)
|
||||||
|
if (message.text) |text| {
|
||||||
|
if (text.len > 0 and text[0] != '/') {
|
||||||
|
log.info("Processing TTS for text message in chat {d}", .{message.chat.id});
|
||||||
|
handleTTS(allocator, bot, message, text, piper_url) catch |err| {
|
||||||
|
log.err("TTS failed: {s}", .{@errorName(err)});
|
||||||
|
bot.sendMessage(message.chat.id, "TTS failed.", message.message_id) catch {};
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn handleTranscription(
|
fn handleTranscription(
|
||||||
@@ -140,3 +151,23 @@ fn handleTranscription(
|
|||||||
}
|
}
|
||||||
log.info("Step 4 done", .{});
|
log.info("Step 4 done", .{});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn handleTTS(
|
||||||
|
allocator: std.mem.Allocator,
|
||||||
|
bot: *telegram.TelegramBot,
|
||||||
|
message: telegram.Message,
|
||||||
|
text: []const u8,
|
||||||
|
piper_url: []const u8,
|
||||||
|
) !void {
|
||||||
|
log.info("TTS step 1: synthesize", .{});
|
||||||
|
const ogg_path = try piper.synthesize(allocator, piper_url, text, message.message_id);
|
||||||
|
defer {
|
||||||
|
std.fs.deleteFileAbsolute(ogg_path) catch {};
|
||||||
|
allocator.free(ogg_path);
|
||||||
|
}
|
||||||
|
log.info("TTS step 1 done: {s}", .{ogg_path});
|
||||||
|
|
||||||
|
log.info("TTS step 2: sendVoice", .{});
|
||||||
|
try bot.sendVoice(message.chat.id, ogg_path, message.message_id);
|
||||||
|
log.info("TTS step 2 done", .{});
|
||||||
|
}
|
||||||
|
|||||||
70
src/piper.zig
Normal file
70
src/piper.zig
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
const std = @import("std");
|
||||||
|
const Allocator = std.mem.Allocator;
|
||||||
|
|
||||||
|
const log = std.log.scoped(.transcribator);
|
||||||
|
|
||||||
|
pub fn synthesize(allocator: Allocator, piper_url: []const u8, text: []const u8, msg_id: i64) ![]u8 {
|
||||||
|
const wav_path = try std.fmt.allocPrint(allocator, "/tmp/tts_{d}.wav", .{msg_id});
|
||||||
|
defer {
|
||||||
|
std.fs.deleteFileAbsolute(wav_path) catch {};
|
||||||
|
allocator.free(wav_path);
|
||||||
|
}
|
||||||
|
|
||||||
|
const ogg_path = try std.fmt.allocPrint(allocator, "/tmp/tts_{d}.ogg", .{msg_id});
|
||||||
|
errdefer {
|
||||||
|
std.fs.deleteFileAbsolute(ogg_path) catch {};
|
||||||
|
allocator.free(ogg_path);
|
||||||
|
}
|
||||||
|
|
||||||
|
// POST text to Piper TTS, save WAV
|
||||||
|
log.info("Piper TTS: synthesizing {d} chars", .{text.len});
|
||||||
|
{
|
||||||
|
var child = std.process.Child.init(
|
||||||
|
&.{ "curl", "-sf", "--max-time", "120", "-X", "POST", "-H", "Content-Type: text/plain", "--data-raw", text, "-o", wav_path, piper_url },
|
||||||
|
allocator,
|
||||||
|
);
|
||||||
|
child.stdout_behavior = .Ignore;
|
||||||
|
child.stderr_behavior = .Ignore;
|
||||||
|
const term = try child.spawnAndWait();
|
||||||
|
|
||||||
|
switch (term) {
|
||||||
|
.Exited => |code| {
|
||||||
|
if (code != 0) {
|
||||||
|
log.err("Piper TTS curl failed with exit code {d}", .{code});
|
||||||
|
return error.HttpRequestFailed;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
else => {
|
||||||
|
log.err("Piper TTS curl terminated abnormally", .{});
|
||||||
|
return error.HttpRequestFailed;
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert WAV to OGG Opus
|
||||||
|
log.info("Piper TTS: converting WAV to OGG", .{});
|
||||||
|
{
|
||||||
|
var child = std.process.Child.init(
|
||||||
|
&.{ "ffmpeg", "-y", "-i", wav_path, "-c:a", "libopus", ogg_path },
|
||||||
|
allocator,
|
||||||
|
);
|
||||||
|
child.stdout_behavior = .Ignore;
|
||||||
|
child.stderr_behavior = .Ignore;
|
||||||
|
const term = try child.spawnAndWait();
|
||||||
|
|
||||||
|
switch (term) {
|
||||||
|
.Exited => |code| {
|
||||||
|
if (code != 0) {
|
||||||
|
log.err("ffmpeg WAV→OGG failed with exit code {d}", .{code});
|
||||||
|
return error.HttpRequestFailed;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
else => {
|
||||||
|
log.err("ffmpeg terminated abnormally", .{});
|
||||||
|
return error.HttpRequestFailed;
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return ogg_path;
|
||||||
|
}
|
||||||
@@ -15,6 +15,7 @@ pub const VideoNote = struct {
|
|||||||
pub const Message = struct {
|
pub const Message = struct {
|
||||||
message_id: i64,
|
message_id: i64,
|
||||||
chat: struct { id: i64 },
|
chat: struct { id: i64 },
|
||||||
|
text: ?[]const u8 = null,
|
||||||
voice: ?Voice = null,
|
voice: ?Voice = null,
|
||||||
video_note: ?VideoNote = null,
|
video_note: ?VideoNote = null,
|
||||||
};
|
};
|
||||||
@@ -134,4 +135,35 @@ pub const TelegramBot = struct {
|
|||||||
const resp = http.httpPostJson(self.allocator, url, json_body) catch return;
|
const resp = http.httpPostJson(self.allocator, url, json_body) catch return;
|
||||||
self.allocator.free(resp);
|
self.allocator.free(resp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn sendVoice(self: *TelegramBot, chat_id: i64, ogg_path: []const u8, reply_to: ?i64) !void {
|
||||||
|
const url = try std.fmt.allocPrint(self.allocator, "{s}/sendVoice", .{self.api_base});
|
||||||
|
defer self.allocator.free(url);
|
||||||
|
|
||||||
|
const chat_id_str = try std.fmt.allocPrint(self.allocator, "{d}", .{chat_id});
|
||||||
|
defer self.allocator.free(chat_id_str);
|
||||||
|
|
||||||
|
var fields_buf: [2][2][]const u8 = undefined;
|
||||||
|
var field_count: usize = 1;
|
||||||
|
fields_buf[0] = .{ "chat_id", chat_id_str };
|
||||||
|
|
||||||
|
var reply_str: ?[]u8 = null;
|
||||||
|
defer if (reply_str) |s| self.allocator.free(s);
|
||||||
|
|
||||||
|
if (reply_to) |r| {
|
||||||
|
reply_str = try std.fmt.allocPrint(self.allocator, "{d}", .{r});
|
||||||
|
fields_buf[1] = .{ "reply_to_message_id", reply_str.? };
|
||||||
|
field_count = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
const resp = try http.httpPostMultipart(
|
||||||
|
self.allocator,
|
||||||
|
url,
|
||||||
|
"voice",
|
||||||
|
ogg_path,
|
||||||
|
"voice.ogg",
|
||||||
|
fields_buf[0..field_count],
|
||||||
|
);
|
||||||
|
self.allocator.free(resp);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|||||||
Reference in New Issue
Block a user