Initial commit: Telegram voice/video transcription bot in Zig

Long-polling bot that accepts voice messages and video notes,
sends them to Whisper STT API, and replies with transcription text.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Mikhail Kilin
2026-02-18 15:32:04 +03:00
commit 819b28a672
9 changed files with 550 additions and 0 deletions

2
.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
.zig-cache/
zig-out/

10
Dockerfile Normal file
View File

@@ -0,0 +1,10 @@
FROM alpine:3.21 AS builder
RUN apk add --no-cache zig
COPY . /app
WORKDIR /app
RUN zig build -Doptimize=ReleaseSafe
FROM alpine:3.21
RUN apk add --no-cache ffmpeg ca-certificates
COPY --from=builder /app/zig-out/bin/transcribator /usr/local/bin/
CMD ["transcribator"]

26
build.zig Normal file
View File

@@ -0,0 +1,26 @@
const std = @import("std");
pub fn build(b: *std.Build) void {
const target = b.standardTargetOptions(.{});
const optimize = b.standardOptimizeOption(.{});
const exe = b.addExecutable(.{
.name = "transcribator",
.root_module = b.createModule(.{
.root_source_file = b.path("src/main.zig"),
.target = target,
.optimize = optimize,
}),
});
b.installArtifact(exe);
const run_step = b.step("run", "Run the bot");
const run_cmd = b.addRunArtifact(exe);
run_step.dependOn(&run_cmd.step);
run_cmd.step.dependOn(b.getInstallStep());
if (b.args) |args| {
run_cmd.addArgs(args);
}
}

81
build.zig.zon Normal file
View File

@@ -0,0 +1,81 @@
.{
// This is the default name used by packages depending on this one. For
// example, when a user runs `zig fetch --save <url>`, this field is used
// as the key in the `dependencies` table. Although the user can choose a
// different name, most users will stick with this provided value.
//
// It is redundant to include "zig" in this name because it is already
// within the Zig package namespace.
.name = .transcribator,
// This is a [Semantic Version](https://semver.org/).
// In a future version of Zig it will be used for package deduplication.
.version = "0.0.0",
// Together with name, this represents a globally unique package
// identifier. This field is generated by the Zig toolchain when the
// package is first created, and then *never changes*. This allows
// unambiguous detection of one package being an updated version of
// another.
//
// When forking a Zig project, this id should be regenerated (delete the
// field and run `zig build`) if the upstream project is still maintained.
// Otherwise, the fork is *hostile*, attempting to take control over the
// original project's identity. Thus it is recommended to leave the comment
// on the following line intact, so that it shows up in code reviews that
// modify the field.
.fingerprint = 0x4f84e6a7e054cdf3, // Changing this has security and trust implications.
// Tracks the earliest Zig version that the package considers to be a
// supported use case.
.minimum_zig_version = "0.15.2",
// This field is optional.
// Each dependency must either provide a `url` and `hash`, or a `path`.
// `zig build --fetch` can be used to fetch all dependencies of a package, recursively.
// Once all dependencies are fetched, `zig build` no longer requires
// internet connectivity.
.dependencies = .{
// See `zig fetch --save <url>` for a command-line interface for adding dependencies.
//.example = .{
// // When updating this field to a new URL, be sure to delete the corresponding
// // `hash`, otherwise you are communicating that you expect to find the old hash at
// // the new URL. If the contents of a URL change this will result in a hash mismatch
// // which will prevent zig from using it.
// .url = "https://example.com/foo.tar.gz",
//
// // This is computed from the file contents of the directory of files that is
// // obtained after fetching `url` and applying the inclusion rules given by
// // `paths`.
// //
// // This field is the source of truth; packages do not come from a `url`; they
// // come from a `hash`. `url` is just one of many possible mirrors for how to
// // obtain a package matching this `hash`.
// //
// // Uses the [multihash](https://multiformats.io/multihash/) format.
// .hash = "...",
//
// // When this is provided, the package is found in a directory relative to the
// // build root. In this case the package's hash is irrelevant and therefore not
// // computed. This field and `url` are mutually exclusive.
// .path = "foo",
//
// // When this is set to `true`, a package is declared to be lazily
// // fetched. This makes the dependency only get fetched if it is
// // actually used.
// .lazy = false,
//},
},
// Specifies the set of files and directories that are included in this package.
// Only files and directories listed here are included in the `hash` that
// is computed for this package. Only files listed here will remain on disk
// when using the zig package manager. As a rule of thumb, one should list
// files required for compilation plus any license(s).
// Paths are relative to the build root. Use the empty string (`""`) to refer to
// the build root itself.
// A directory listed here means that all files within, recursively, are included.
.paths = .{
"build.zig",
"build.zig.zon",
"src",
// For example...
//"LICENSE",
//"README.md",
},
}

49
k8s/transcribator.yaml Normal file
View File

@@ -0,0 +1,49 @@
apiVersion: v1
kind: Namespace
metadata:
name: transcribator
---
apiVersion: v1
kind: Secret
metadata:
name: transcribator-secret
namespace: transcribator
type: Opaque
stringData:
TELEGRAM_BOT_TOKEN: "REPLACE_ME"
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: transcribator
namespace: transcribator
spec:
replicas: 1
selector:
matchLabels:
app: transcribator
template:
metadata:
labels:
app: transcribator
spec:
containers:
- name: transcribator
image: gitea.mikhailkilin.ru/killingdruid/transcribator:latest
env:
- name: TELEGRAM_BOT_TOKEN
valueFrom:
secretKeyRef:
name: transcribator-secret
key: TELEGRAM_BOT_TOKEN
- name: WHISPER_URL
value: "http://whisper.whisper.svc:8000"
- name: WHISPER_LANGUAGE
value: "ru"
resources:
requests:
memory: "32Mi"
cpu: "10m"
limits:
memory: "128Mi"
cpu: "500m"

104
src/http.zig Normal file
View File

@@ -0,0 +1,104 @@
const std = @import("std");
const Allocator = std.mem.Allocator;
pub fn httpGet(allocator: Allocator, url: []const u8) ![]u8 {
var client: std.http.Client = .{ .allocator = allocator };
defer client.deinit();
var aw: std.Io.Writer.Allocating = .init(allocator);
defer aw.deinit();
const result = client.fetch(.{
.location = .{ .url = url },
.response_writer = &aw.writer,
}) catch return error.HttpRequestFailed;
if (result.status != .ok) return error.HttpRequestFailed;
return aw.toOwnedSlice() catch return error.OutOfMemory;
}
pub fn httpPostJson(allocator: Allocator, url: []const u8, body: []const u8) ![]u8 {
var client: std.http.Client = .{ .allocator = allocator };
defer client.deinit();
var aw: std.Io.Writer.Allocating = .init(allocator);
defer aw.deinit();
const result = client.fetch(.{
.location = .{ .url = url },
.method = .POST,
.payload = body,
.headers = .{ .content_type = .{ .override = "application/json" } },
.response_writer = &aw.writer,
}) catch return error.HttpRequestFailed;
if (result.status != .ok) return error.HttpRequestFailed;
return aw.toOwnedSlice() catch return error.OutOfMemory;
}
pub fn httpPostMultipart(
allocator: Allocator,
url: []const u8,
file_field: []const u8,
file_path: []const u8,
file_name: []const u8,
fields: []const [2][]const u8,
) ![]u8 {
const boundary = "----ZigMultipartBoundary9876543210";
// Build multipart body
var body_writer: std.Io.Writer.Allocating = .init(allocator);
defer body_writer.deinit();
// Add form fields
for (fields) |field| {
body_writer.writer.print("--{s}\r\nContent-Disposition: form-data; name=\"{s}\"\r\n\r\n{s}\r\n", .{ boundary, field[0], field[1] }) catch return error.OutOfMemory;
}
// Add file field header
body_writer.writer.print("--{s}\r\nContent-Disposition: form-data; name=\"{s}\"; filename=\"{s}\"\r\nContent-Type: application/octet-stream\r\n\r\n", .{ boundary, file_field, file_name }) catch return error.OutOfMemory;
// Read and append file content
const file = std.fs.openFileAbsolute(file_path, .{}) catch return error.HttpRequestFailed;
defer file.close();
const file_content = file.readToEndAlloc(allocator, 100 * 1024 * 1024) catch return error.HttpRequestFailed;
defer allocator.free(file_content);
body_writer.writer.writeAll(file_content) catch return error.OutOfMemory;
body_writer.writer.print("\r\n--{s}--\r\n", .{boundary}) catch return error.OutOfMemory;
const body = body_writer.written();
// Send request
var client: std.http.Client = .{ .allocator = allocator };
defer client.deinit();
var response_aw: std.Io.Writer.Allocating = .init(allocator);
defer response_aw.deinit();
const content_type = std.fmt.allocPrint(allocator, "multipart/form-data; boundary={s}", .{boundary}) catch return error.OutOfMemory;
defer allocator.free(content_type);
const result = client.fetch(.{
.location = .{ .url = url },
.method = .POST,
.payload = body,
.headers = .{ .content_type = .{ .override = content_type } },
.response_writer = &response_aw.writer,
}) catch return error.HttpRequestFailed;
if (result.status != .ok) return error.HttpRequestFailed;
return response_aw.toOwnedSlice() catch return error.OutOfMemory;
}
pub fn downloadToFile(allocator: Allocator, url: []const u8, dest_path: []const u8) !void {
const data = try httpGet(allocator, url);
defer allocator.free(data);
const file = std.fs.createFileAbsolute(dest_path, .{}) catch return error.HttpRequestFailed;
defer file.close();
file.writeAll(data) catch return error.HttpRequestFailed;
}

131
src/main.zig Normal file
View File

@@ -0,0 +1,131 @@
const std = @import("std");
const telegram = @import("telegram.zig");
const whisper = @import("whisper.zig");
const log = std.log.scoped(.transcribator);
pub fn main() !void {
var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init;
defer _ = gpa.deinit();
const allocator = gpa.allocator();
const token = std.posix.getenv("TELEGRAM_BOT_TOKEN") orelse {
log.err("TELEGRAM_BOT_TOKEN is not set", .{});
return error.MissingToken;
};
const whisper_url = std.posix.getenv("WHISPER_URL") orelse "http://whisper.whisper.svc:8000";
const language = std.posix.getenv("WHISPER_LANGUAGE") orelse "ru";
var bot = try telegram.TelegramBot.init(allocator, token);
defer bot.deinit();
log.info("Bot started. Whisper: {s}, language: {s}", .{ whisper_url, language });
var offset: i64 = 0;
while (true) {
const updates = bot.getUpdates(offset, 30) catch |err| {
log.err("getUpdates failed: {s}", .{@errorName(err)});
std.Thread.sleep(5 * std.time.ns_per_s);
continue;
};
defer updates.deinit();
for (updates.value.result) |update| {
offset = update.update_id + 1;
processUpdate(allocator, &bot, update, whisper_url, language);
}
}
}
fn processUpdate(
allocator: std.mem.Allocator,
bot: *telegram.TelegramBot,
update: telegram.Update,
whisper_url: []const u8,
language: []const u8,
) void {
const message = update.message orelse return;
var file_id: ?[]const u8 = null;
var is_video = false;
if (message.voice) |voice| {
file_id = voice.file_id;
} else if (message.video_note) |vn| {
file_id = vn.file_id;
is_video = true;
}
const fid = file_id orelse return;
log.info("Processing {s} message in chat {d}", .{
if (is_video) @as([]const u8, "video_note") else @as([]const u8, "voice"),
message.chat.id,
});
handleTranscription(allocator, bot, message, fid, is_video, whisper_url, language) catch |err| {
log.err("Transcription failed: {s}", .{@errorName(err)});
bot.sendMessage(message.chat.id, "Transcription failed.", message.message_id) catch {};
};
}
fn handleTranscription(
allocator: std.mem.Allocator,
bot: *telegram.TelegramBot,
message: telegram.Message,
file_id: []const u8,
is_video: bool,
whisper_url: []const u8,
language: []const u8,
) !void {
// Get file path from Telegram
const file_path = try bot.getFilePath(file_id);
defer allocator.free(file_path);
// Download file
const ext: []const u8 = if (is_video) ".mp4" else ".ogg";
const tmp_input = try std.fmt.allocPrint(allocator, "/tmp/tg_{d}{s}", .{ message.message_id, ext });
defer allocator.free(tmp_input);
try bot.downloadFile(file_path, tmp_input);
defer std.fs.deleteFileAbsolute(tmp_input) catch {};
// Convert video to audio if needed
var audio_path = tmp_input;
var tmp_audio: ?[]u8 = null;
defer if (tmp_audio) |p| {
allocator.free(p);
std.fs.deleteFileAbsolute(p) catch {};
};
if (is_video) {
const out_path = try std.fmt.allocPrint(allocator, "/tmp/tg_{d}.ogg", .{message.message_id});
tmp_audio = out_path;
var child = std.process.Child.init(
&.{ "ffmpeg", "-y", "-i", tmp_input, "-vn", "-codec:a", "libopus", out_path },
allocator,
);
child.stderr_behavior = .Ignore;
child.stdout_behavior = .Ignore;
const term = try child.spawnAndWait();
if (term.Exited != 0) {
log.err("ffmpeg failed with exit code {d}", .{term.Exited});
return error.HttpRequestFailed;
}
audio_path = out_path;
}
// Transcribe
const text = try whisper.transcribe(allocator, whisper_url, audio_path, language);
defer allocator.free(text);
if (text.len == 0) {
try bot.sendMessage(message.chat.id, "(empty transcription)", message.message_id);
} else {
try bot.sendMessage(message.chat.id, text, message.message_id);
}
}

114
src/telegram.zig Normal file
View File

@@ -0,0 +1,114 @@
const std = @import("std");
const http = @import("http.zig");
const Allocator = std.mem.Allocator;
pub const Voice = struct {
file_id: []const u8,
duration: ?i64 = null,
};
pub const VideoNote = struct {
file_id: []const u8,
duration: ?i64 = null,
};
pub const Message = struct {
message_id: i64,
chat: struct { id: i64 },
voice: ?Voice = null,
video_note: ?VideoNote = null,
};
pub const Update = struct {
update_id: i64,
message: ?Message = null,
};
pub const GetUpdatesResponse = struct {
ok: bool,
result: []Update = &.{},
};
pub const GetFileResponse = struct {
ok: bool,
result: ?struct {
file_path: ?[]const u8 = null,
} = null,
};
pub const SendMessageBody = struct {
chat_id: i64,
text: []const u8,
reply_to_message_id: ?i64 = null,
};
pub const TelegramBot = struct {
allocator: Allocator,
token: []const u8,
api_base: []const u8,
pub fn init(allocator: Allocator, token: []const u8) !TelegramBot {
const api_base = try std.fmt.allocPrint(allocator, "https://api.telegram.org/bot{s}", .{token});
return .{
.allocator = allocator,
.token = token,
.api_base = api_base,
};
}
pub fn deinit(self: *TelegramBot) void {
self.allocator.free(self.api_base);
}
pub fn getUpdates(self: *TelegramBot, offset: i64, timeout: u32) !std.json.Parsed(GetUpdatesResponse) {
const url = try std.fmt.allocPrint(self.allocator, "{s}/getUpdates?offset={d}&timeout={d}&allowed_updates=[\"message\"]", .{ self.api_base, offset, timeout });
defer self.allocator.free(url);
const body = try http.httpGet(self.allocator, url);
defer self.allocator.free(body);
return std.json.parseFromSlice(GetUpdatesResponse, self.allocator, body, .{ .ignore_unknown_fields = true });
}
pub fn getFilePath(self: *TelegramBot, file_id: []const u8) ![]u8 {
const url = try std.fmt.allocPrint(self.allocator, "{s}/getFile?file_id={s}", .{ self.api_base, file_id });
defer self.allocator.free(url);
const body = try http.httpGet(self.allocator, url);
defer self.allocator.free(body);
const parsed = try std.json.parseFromSlice(GetFileResponse, self.allocator, body, .{ .ignore_unknown_fields = true });
defer parsed.deinit();
if (parsed.value.result) |result| {
if (result.file_path) |fp| {
return self.allocator.dupe(u8, fp);
}
}
return error.HttpRequestFailed;
}
pub fn downloadFile(self: *TelegramBot, file_path: []const u8, dest: []const u8) !void {
const url = try std.fmt.allocPrint(self.allocator, "https://api.telegram.org/file/bot{s}/{s}", .{ self.token, file_path });
defer self.allocator.free(url);
try http.downloadToFile(self.allocator, url, dest);
}
pub fn sendMessage(self: *TelegramBot, chat_id: i64, text: []const u8, reply_to: ?i64) !void {
const url = try std.fmt.allocPrint(self.allocator, "{s}/sendMessage", .{self.api_base});
defer self.allocator.free(url);
const msg = SendMessageBody{
.chat_id = chat_id,
.text = text,
.reply_to_message_id = reply_to,
};
const json_body = std.json.Stringify.valueAlloc(self.allocator, msg, .{}) catch return;
defer self.allocator.free(json_body);
const resp = http.httpPostJson(self.allocator, url, json_body) catch return;
self.allocator.free(resp);
}
};

33
src/whisper.zig Normal file
View File

@@ -0,0 +1,33 @@
const std = @import("std");
const http = @import("http.zig");
const Allocator = std.mem.Allocator;
const TranscriptionResponse = struct {
text: []const u8 = "",
};
pub fn transcribe(allocator: Allocator, whisper_url: []const u8, audio_path: []const u8, language: []const u8) ![]u8 {
const url = try std.fmt.allocPrint(allocator, "{s}/v1/audio/transcriptions", .{whisper_url});
defer allocator.free(url);
// Extract filename from path
const file_name = std.fs.path.basename(audio_path);
const body = try http.httpPostMultipart(
allocator,
url,
"file",
audio_path,
file_name,
&.{
.{ "model", "Systran/faster-whisper-medium" },
.{ "language", language },
},
);
defer allocator.free(body);
const parsed = try std.json.parseFromSlice(TranscriptionResponse, allocator, body, .{ .ignore_unknown_fields = true });
defer parsed.deinit();
return allocator.dupe(u8, parsed.value.text);
}