From 819b28a6724dea79f1eb703b38a8e11a2fa3b931 Mon Sep 17 00:00:00 2001 From: Mikhail Kilin Date: Wed, 18 Feb 2026 15:32:04 +0300 Subject: [PATCH] Initial commit: Telegram voice/video transcription bot in Zig Long-polling bot that accepts voice messages and video notes, sends them to Whisper STT API, and replies with transcription text. Co-Authored-By: Claude Opus 4.6 --- .gitignore | 2 + Dockerfile | 10 ++++ build.zig | 26 ++++++++ build.zig.zon | 81 +++++++++++++++++++++++++ k8s/transcribator.yaml | 49 +++++++++++++++ src/http.zig | 104 ++++++++++++++++++++++++++++++++ src/main.zig | 131 +++++++++++++++++++++++++++++++++++++++++ src/telegram.zig | 114 +++++++++++++++++++++++++++++++++++ src/whisper.zig | 33 +++++++++++ 9 files changed, 550 insertions(+) create mode 100644 .gitignore create mode 100644 Dockerfile create mode 100644 build.zig create mode 100644 build.zig.zon create mode 100644 k8s/transcribator.yaml create mode 100644 src/http.zig create mode 100644 src/main.zig create mode 100644 src/telegram.zig create mode 100644 src/whisper.zig diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3389c86 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.zig-cache/ +zig-out/ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..4001a5b --- /dev/null +++ b/Dockerfile @@ -0,0 +1,10 @@ +FROM alpine:3.21 AS builder +RUN apk add --no-cache zig +COPY . /app +WORKDIR /app +RUN zig build -Doptimize=ReleaseSafe + +FROM alpine:3.21 +RUN apk add --no-cache ffmpeg ca-certificates +COPY --from=builder /app/zig-out/bin/transcribator /usr/local/bin/ +CMD ["transcribator"] diff --git a/build.zig b/build.zig new file mode 100644 index 0000000..be8b269 --- /dev/null +++ b/build.zig @@ -0,0 +1,26 @@ +const std = @import("std"); + +pub fn build(b: *std.Build) void { + const target = b.standardTargetOptions(.{}); + const optimize = b.standardOptimizeOption(.{}); + + const exe = b.addExecutable(.{ + .name = "transcribator", + .root_module = b.createModule(.{ + .root_source_file = b.path("src/main.zig"), + .target = target, + .optimize = optimize, + }), + }); + + b.installArtifact(exe); + + const run_step = b.step("run", "Run the bot"); + const run_cmd = b.addRunArtifact(exe); + run_step.dependOn(&run_cmd.step); + run_cmd.step.dependOn(b.getInstallStep()); + + if (b.args) |args| { + run_cmd.addArgs(args); + } +} diff --git a/build.zig.zon b/build.zig.zon new file mode 100644 index 0000000..778dd4a --- /dev/null +++ b/build.zig.zon @@ -0,0 +1,81 @@ +.{ + // This is the default name used by packages depending on this one. For + // example, when a user runs `zig fetch --save `, this field is used + // as the key in the `dependencies` table. Although the user can choose a + // different name, most users will stick with this provided value. + // + // It is redundant to include "zig" in this name because it is already + // within the Zig package namespace. + .name = .transcribator, + // This is a [Semantic Version](https://semver.org/). + // In a future version of Zig it will be used for package deduplication. + .version = "0.0.0", + // Together with name, this represents a globally unique package + // identifier. This field is generated by the Zig toolchain when the + // package is first created, and then *never changes*. This allows + // unambiguous detection of one package being an updated version of + // another. + // + // When forking a Zig project, this id should be regenerated (delete the + // field and run `zig build`) if the upstream project is still maintained. + // Otherwise, the fork is *hostile*, attempting to take control over the + // original project's identity. Thus it is recommended to leave the comment + // on the following line intact, so that it shows up in code reviews that + // modify the field. + .fingerprint = 0x4f84e6a7e054cdf3, // Changing this has security and trust implications. + // Tracks the earliest Zig version that the package considers to be a + // supported use case. + .minimum_zig_version = "0.15.2", + // This field is optional. + // Each dependency must either provide a `url` and `hash`, or a `path`. + // `zig build --fetch` can be used to fetch all dependencies of a package, recursively. + // Once all dependencies are fetched, `zig build` no longer requires + // internet connectivity. + .dependencies = .{ + // See `zig fetch --save ` for a command-line interface for adding dependencies. + //.example = .{ + // // When updating this field to a new URL, be sure to delete the corresponding + // // `hash`, otherwise you are communicating that you expect to find the old hash at + // // the new URL. If the contents of a URL change this will result in a hash mismatch + // // which will prevent zig from using it. + // .url = "https://example.com/foo.tar.gz", + // + // // This is computed from the file contents of the directory of files that is + // // obtained after fetching `url` and applying the inclusion rules given by + // // `paths`. + // // + // // This field is the source of truth; packages do not come from a `url`; they + // // come from a `hash`. `url` is just one of many possible mirrors for how to + // // obtain a package matching this `hash`. + // // + // // Uses the [multihash](https://multiformats.io/multihash/) format. + // .hash = "...", + // + // // When this is provided, the package is found in a directory relative to the + // // build root. In this case the package's hash is irrelevant and therefore not + // // computed. This field and `url` are mutually exclusive. + // .path = "foo", + // + // // When this is set to `true`, a package is declared to be lazily + // // fetched. This makes the dependency only get fetched if it is + // // actually used. + // .lazy = false, + //}, + }, + // Specifies the set of files and directories that are included in this package. + // Only files and directories listed here are included in the `hash` that + // is computed for this package. Only files listed here will remain on disk + // when using the zig package manager. As a rule of thumb, one should list + // files required for compilation plus any license(s). + // Paths are relative to the build root. Use the empty string (`""`) to refer to + // the build root itself. + // A directory listed here means that all files within, recursively, are included. + .paths = .{ + "build.zig", + "build.zig.zon", + "src", + // For example... + //"LICENSE", + //"README.md", + }, +} diff --git a/k8s/transcribator.yaml b/k8s/transcribator.yaml new file mode 100644 index 0000000..9ca7137 --- /dev/null +++ b/k8s/transcribator.yaml @@ -0,0 +1,49 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: transcribator +--- +apiVersion: v1 +kind: Secret +metadata: + name: transcribator-secret + namespace: transcribator +type: Opaque +stringData: + TELEGRAM_BOT_TOKEN: "REPLACE_ME" +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: transcribator + namespace: transcribator +spec: + replicas: 1 + selector: + matchLabels: + app: transcribator + template: + metadata: + labels: + app: transcribator + spec: + containers: + - name: transcribator + image: gitea.mikhailkilin.ru/killingdruid/transcribator:latest + env: + - name: TELEGRAM_BOT_TOKEN + valueFrom: + secretKeyRef: + name: transcribator-secret + key: TELEGRAM_BOT_TOKEN + - name: WHISPER_URL + value: "http://whisper.whisper.svc:8000" + - name: WHISPER_LANGUAGE + value: "ru" + resources: + requests: + memory: "32Mi" + cpu: "10m" + limits: + memory: "128Mi" + cpu: "500m" diff --git a/src/http.zig b/src/http.zig new file mode 100644 index 0000000..c973817 --- /dev/null +++ b/src/http.zig @@ -0,0 +1,104 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; + +pub fn httpGet(allocator: Allocator, url: []const u8) ![]u8 { + var client: std.http.Client = .{ .allocator = allocator }; + defer client.deinit(); + + var aw: std.Io.Writer.Allocating = .init(allocator); + defer aw.deinit(); + + const result = client.fetch(.{ + .location = .{ .url = url }, + .response_writer = &aw.writer, + }) catch return error.HttpRequestFailed; + + if (result.status != .ok) return error.HttpRequestFailed; + + return aw.toOwnedSlice() catch return error.OutOfMemory; +} + +pub fn httpPostJson(allocator: Allocator, url: []const u8, body: []const u8) ![]u8 { + var client: std.http.Client = .{ .allocator = allocator }; + defer client.deinit(); + + var aw: std.Io.Writer.Allocating = .init(allocator); + defer aw.deinit(); + + const result = client.fetch(.{ + .location = .{ .url = url }, + .method = .POST, + .payload = body, + .headers = .{ .content_type = .{ .override = "application/json" } }, + .response_writer = &aw.writer, + }) catch return error.HttpRequestFailed; + + if (result.status != .ok) return error.HttpRequestFailed; + + return aw.toOwnedSlice() catch return error.OutOfMemory; +} + +pub fn httpPostMultipart( + allocator: Allocator, + url: []const u8, + file_field: []const u8, + file_path: []const u8, + file_name: []const u8, + fields: []const [2][]const u8, +) ![]u8 { + const boundary = "----ZigMultipartBoundary9876543210"; + + // Build multipart body + var body_writer: std.Io.Writer.Allocating = .init(allocator); + defer body_writer.deinit(); + + // Add form fields + for (fields) |field| { + body_writer.writer.print("--{s}\r\nContent-Disposition: form-data; name=\"{s}\"\r\n\r\n{s}\r\n", .{ boundary, field[0], field[1] }) catch return error.OutOfMemory; + } + + // Add file field header + body_writer.writer.print("--{s}\r\nContent-Disposition: form-data; name=\"{s}\"; filename=\"{s}\"\r\nContent-Type: application/octet-stream\r\n\r\n", .{ boundary, file_field, file_name }) catch return error.OutOfMemory; + + // Read and append file content + const file = std.fs.openFileAbsolute(file_path, .{}) catch return error.HttpRequestFailed; + defer file.close(); + const file_content = file.readToEndAlloc(allocator, 100 * 1024 * 1024) catch return error.HttpRequestFailed; + defer allocator.free(file_content); + + body_writer.writer.writeAll(file_content) catch return error.OutOfMemory; + body_writer.writer.print("\r\n--{s}--\r\n", .{boundary}) catch return error.OutOfMemory; + + const body = body_writer.written(); + + // Send request + var client: std.http.Client = .{ .allocator = allocator }; + defer client.deinit(); + + var response_aw: std.Io.Writer.Allocating = .init(allocator); + defer response_aw.deinit(); + + const content_type = std.fmt.allocPrint(allocator, "multipart/form-data; boundary={s}", .{boundary}) catch return error.OutOfMemory; + defer allocator.free(content_type); + + const result = client.fetch(.{ + .location = .{ .url = url }, + .method = .POST, + .payload = body, + .headers = .{ .content_type = .{ .override = content_type } }, + .response_writer = &response_aw.writer, + }) catch return error.HttpRequestFailed; + + if (result.status != .ok) return error.HttpRequestFailed; + + return response_aw.toOwnedSlice() catch return error.OutOfMemory; +} + +pub fn downloadToFile(allocator: Allocator, url: []const u8, dest_path: []const u8) !void { + const data = try httpGet(allocator, url); + defer allocator.free(data); + + const file = std.fs.createFileAbsolute(dest_path, .{}) catch return error.HttpRequestFailed; + defer file.close(); + file.writeAll(data) catch return error.HttpRequestFailed; +} diff --git a/src/main.zig b/src/main.zig new file mode 100644 index 0000000..112d6b1 --- /dev/null +++ b/src/main.zig @@ -0,0 +1,131 @@ +const std = @import("std"); +const telegram = @import("telegram.zig"); +const whisper = @import("whisper.zig"); + +const log = std.log.scoped(.transcribator); + +pub fn main() !void { + var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init; + defer _ = gpa.deinit(); + const allocator = gpa.allocator(); + + const token = std.posix.getenv("TELEGRAM_BOT_TOKEN") orelse { + log.err("TELEGRAM_BOT_TOKEN is not set", .{}); + return error.MissingToken; + }; + const whisper_url = std.posix.getenv("WHISPER_URL") orelse "http://whisper.whisper.svc:8000"; + const language = std.posix.getenv("WHISPER_LANGUAGE") orelse "ru"; + + var bot = try telegram.TelegramBot.init(allocator, token); + defer bot.deinit(); + + log.info("Bot started. Whisper: {s}, language: {s}", .{ whisper_url, language }); + + var offset: i64 = 0; + + while (true) { + const updates = bot.getUpdates(offset, 30) catch |err| { + log.err("getUpdates failed: {s}", .{@errorName(err)}); + std.Thread.sleep(5 * std.time.ns_per_s); + continue; + }; + defer updates.deinit(); + + for (updates.value.result) |update| { + offset = update.update_id + 1; + processUpdate(allocator, &bot, update, whisper_url, language); + } + } +} + +fn processUpdate( + allocator: std.mem.Allocator, + bot: *telegram.TelegramBot, + update: telegram.Update, + whisper_url: []const u8, + language: []const u8, +) void { + const message = update.message orelse return; + + var file_id: ?[]const u8 = null; + var is_video = false; + + if (message.voice) |voice| { + file_id = voice.file_id; + } else if (message.video_note) |vn| { + file_id = vn.file_id; + is_video = true; + } + + const fid = file_id orelse return; + + log.info("Processing {s} message in chat {d}", .{ + if (is_video) @as([]const u8, "video_note") else @as([]const u8, "voice"), + message.chat.id, + }); + + handleTranscription(allocator, bot, message, fid, is_video, whisper_url, language) catch |err| { + log.err("Transcription failed: {s}", .{@errorName(err)}); + bot.sendMessage(message.chat.id, "Transcription failed.", message.message_id) catch {}; + }; +} + +fn handleTranscription( + allocator: std.mem.Allocator, + bot: *telegram.TelegramBot, + message: telegram.Message, + file_id: []const u8, + is_video: bool, + whisper_url: []const u8, + language: []const u8, +) !void { + // Get file path from Telegram + const file_path = try bot.getFilePath(file_id); + defer allocator.free(file_path); + + // Download file + const ext: []const u8 = if (is_video) ".mp4" else ".ogg"; + const tmp_input = try std.fmt.allocPrint(allocator, "/tmp/tg_{d}{s}", .{ message.message_id, ext }); + defer allocator.free(tmp_input); + + try bot.downloadFile(file_path, tmp_input); + defer std.fs.deleteFileAbsolute(tmp_input) catch {}; + + // Convert video to audio if needed + var audio_path = tmp_input; + var tmp_audio: ?[]u8 = null; + defer if (tmp_audio) |p| { + allocator.free(p); + std.fs.deleteFileAbsolute(p) catch {}; + }; + + if (is_video) { + const out_path = try std.fmt.allocPrint(allocator, "/tmp/tg_{d}.ogg", .{message.message_id}); + tmp_audio = out_path; + + var child = std.process.Child.init( + &.{ "ffmpeg", "-y", "-i", tmp_input, "-vn", "-codec:a", "libopus", out_path }, + allocator, + ); + child.stderr_behavior = .Ignore; + child.stdout_behavior = .Ignore; + const term = try child.spawnAndWait(); + + if (term.Exited != 0) { + log.err("ffmpeg failed with exit code {d}", .{term.Exited}); + return error.HttpRequestFailed; + } + + audio_path = out_path; + } + + // Transcribe + const text = try whisper.transcribe(allocator, whisper_url, audio_path, language); + defer allocator.free(text); + + if (text.len == 0) { + try bot.sendMessage(message.chat.id, "(empty transcription)", message.message_id); + } else { + try bot.sendMessage(message.chat.id, text, message.message_id); + } +} diff --git a/src/telegram.zig b/src/telegram.zig new file mode 100644 index 0000000..437f60f --- /dev/null +++ b/src/telegram.zig @@ -0,0 +1,114 @@ +const std = @import("std"); +const http = @import("http.zig"); +const Allocator = std.mem.Allocator; + +pub const Voice = struct { + file_id: []const u8, + duration: ?i64 = null, +}; + +pub const VideoNote = struct { + file_id: []const u8, + duration: ?i64 = null, +}; + +pub const Message = struct { + message_id: i64, + chat: struct { id: i64 }, + voice: ?Voice = null, + video_note: ?VideoNote = null, +}; + +pub const Update = struct { + update_id: i64, + message: ?Message = null, +}; + +pub const GetUpdatesResponse = struct { + ok: bool, + result: []Update = &.{}, +}; + +pub const GetFileResponse = struct { + ok: bool, + result: ?struct { + file_path: ?[]const u8 = null, + } = null, +}; + +pub const SendMessageBody = struct { + chat_id: i64, + text: []const u8, + reply_to_message_id: ?i64 = null, +}; + +pub const TelegramBot = struct { + allocator: Allocator, + token: []const u8, + api_base: []const u8, + + pub fn init(allocator: Allocator, token: []const u8) !TelegramBot { + const api_base = try std.fmt.allocPrint(allocator, "https://api.telegram.org/bot{s}", .{token}); + return .{ + .allocator = allocator, + .token = token, + .api_base = api_base, + }; + } + + pub fn deinit(self: *TelegramBot) void { + self.allocator.free(self.api_base); + } + + pub fn getUpdates(self: *TelegramBot, offset: i64, timeout: u32) !std.json.Parsed(GetUpdatesResponse) { + const url = try std.fmt.allocPrint(self.allocator, "{s}/getUpdates?offset={d}&timeout={d}&allowed_updates=[\"message\"]", .{ self.api_base, offset, timeout }); + defer self.allocator.free(url); + + const body = try http.httpGet(self.allocator, url); + defer self.allocator.free(body); + + return std.json.parseFromSlice(GetUpdatesResponse, self.allocator, body, .{ .ignore_unknown_fields = true }); + } + + pub fn getFilePath(self: *TelegramBot, file_id: []const u8) ![]u8 { + const url = try std.fmt.allocPrint(self.allocator, "{s}/getFile?file_id={s}", .{ self.api_base, file_id }); + defer self.allocator.free(url); + + const body = try http.httpGet(self.allocator, url); + defer self.allocator.free(body); + + const parsed = try std.json.parseFromSlice(GetFileResponse, self.allocator, body, .{ .ignore_unknown_fields = true }); + defer parsed.deinit(); + + if (parsed.value.result) |result| { + if (result.file_path) |fp| { + return self.allocator.dupe(u8, fp); + } + } + return error.HttpRequestFailed; + } + + pub fn downloadFile(self: *TelegramBot, file_path: []const u8, dest: []const u8) !void { + const url = try std.fmt.allocPrint(self.allocator, "https://api.telegram.org/file/bot{s}/{s}", .{ self.token, file_path }); + defer self.allocator.free(url); + + try http.downloadToFile(self.allocator, url, dest); + } + + pub fn sendMessage(self: *TelegramBot, chat_id: i64, text: []const u8, reply_to: ?i64) !void { + const url = try std.fmt.allocPrint(self.allocator, "{s}/sendMessage", .{self.api_base}); + defer self.allocator.free(url); + + const msg = SendMessageBody{ + .chat_id = chat_id, + .text = text, + .reply_to_message_id = reply_to, + }; + + const json_body = std.json.Stringify.valueAlloc(self.allocator, msg, .{}) catch return; + defer self.allocator.free(json_body); + + const resp = http.httpPostJson(self.allocator, url, json_body) catch return; + self.allocator.free(resp); + } +}; diff --git a/src/whisper.zig b/src/whisper.zig new file mode 100644 index 0000000..3dbfa46 --- /dev/null +++ b/src/whisper.zig @@ -0,0 +1,33 @@ +const std = @import("std"); +const http = @import("http.zig"); +const Allocator = std.mem.Allocator; + +const TranscriptionResponse = struct { + text: []const u8 = "", +}; + +pub fn transcribe(allocator: Allocator, whisper_url: []const u8, audio_path: []const u8, language: []const u8) ![]u8 { + const url = try std.fmt.allocPrint(allocator, "{s}/v1/audio/transcriptions", .{whisper_url}); + defer allocator.free(url); + + // Extract filename from path + const file_name = std.fs.path.basename(audio_path); + + const body = try http.httpPostMultipart( + allocator, + url, + "file", + audio_path, + file_name, + &.{ + .{ "model", "Systran/faster-whisper-medium" }, + .{ "language", language }, + }, + ); + defer allocator.free(body); + + const parsed = try std.json.parseFromSlice(TranscriptionResponse, allocator, body, .{ .ignore_unknown_fields = true }); + defer parsed.deinit(); + + return allocator.dupe(u8, parsed.value.text); +}