Add transcribator page: audio recording + Whisper STT proxy
Some checks failed
ci/woodpecker/push/deploy Pipeline failed

Browser records audio via MediaRecorder API, bcard proxies it to
Whisper STT service and returns transcription as JSON.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Mikhail Kilin
2026-02-18 20:23:10 +03:00
parent c1db6ae562
commit 22a16affa5
5 changed files with 1500 additions and 12 deletions

1292
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -4,5 +4,8 @@ version = "0.1.0"
edition = "2021" edition = "2021"
[dependencies] [dependencies]
axum = "0.7" axum = { version = "0.7", features = ["multipart"] }
reqwest = { version = "0.12", features = ["multipart", "json"] }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
tokio = { version = "1", features = ["full"] } tokio = { version = "1", features = ["full"] }

View File

@@ -1,9 +1,14 @@
use axum::{response::Html, routing::get, Router}; mod transcribe;
use axum::{response::Html, routing::{get, post}, Router};
use std::net::SocketAddr; use std::net::SocketAddr;
#[tokio::main] #[tokio::main]
async fn main() { async fn main() {
let app = Router::new().route("/", get(handler)); let app = Router::new()
.route("/", get(handler))
.route("/transcribator", get(transcribator_page))
.route("/api/transcribe", post(transcribe::transcribe));
let addr = SocketAddr::from(([0, 0, 0, 0], 3000)); let addr = SocketAddr::from(([0, 0, 0, 0], 3000));
println!("listening on {}", addr); println!("listening on {}", addr);
let listener = tokio::net::TcpListener::bind(addr).await.unwrap(); let listener = tokio::net::TcpListener::bind(addr).await.unwrap();
@@ -14,6 +19,10 @@ async fn handler() -> Html<&'static str> {
Html("<h1>Mikhail Kilin</h1>") Html("<h1>Mikhail Kilin</h1>")
} }
async fn transcribator_page() -> Html<&'static str> {
Html(include_str!("../static/transcribator.html"))
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
@@ -23,4 +32,10 @@ mod tests {
let response = handler().await; let response = handler().await;
assert!(response.0.contains("Mikhail Kilin")); assert!(response.0.contains("Mikhail Kilin"));
} }
#[tokio::test]
async fn test_transcribator_page() {
let response = transcribator_page().await;
assert!(response.0.contains("Transcribator"));
}
} }

78
src/transcribe.rs Normal file
View File

@@ -0,0 +1,78 @@
use axum::{extract::Multipart, http::StatusCode, Json};
use serde::Serialize;
const WHISPER_URL: &str = "http://whisper.whisper.svc:8000/v1/audio/transcriptions";
#[derive(Serialize)]
pub struct TranscribeResponse {
pub text: String,
}
pub async fn transcribe(
mut multipart: Multipart,
) -> Result<Json<TranscribeResponse>, (StatusCode, String)> {
let mut audio_data: Option<(Vec<u8>, String)> = None;
while let Some(field) = multipart
.next_field()
.await
.map_err(|e| (StatusCode::BAD_REQUEST, e.to_string()))?
{
if field.name() == Some("audio") {
let file_name = field
.file_name()
.unwrap_or("recording.webm")
.to_string();
let data = field
.bytes()
.await
.map_err(|e| (StatusCode::BAD_REQUEST, e.to_string()))?;
audio_data = Some((data.to_vec(), file_name));
}
}
let (data, file_name) = audio_data
.ok_or((StatusCode::BAD_REQUEST, "No audio field".to_string()))?;
let mime = if file_name.ends_with(".m4a") || file_name.ends_with(".mp4") {
"audio/mp4"
} else {
"audio/webm"
};
let part = reqwest::multipart::Part::bytes(data)
.file_name(file_name)
.mime_str(mime)
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
let form = reqwest::multipart::Form::new()
.part("file", part)
.text("model", "Systran/faster-whisper-medium")
.text("language", "ru");
let client = reqwest::Client::new();
let resp = client
.post(WHISPER_URL)
.multipart(form)
.send()
.await
.map_err(|e| (StatusCode::BAD_GATEWAY, format!("Whisper unavailable: {e}")))?;
if !resp.status().is_success() {
let status = resp.status();
let body = resp.text().await.unwrap_or_default();
return Err((StatusCode::BAD_GATEWAY, format!("Whisper {status}: {body}")));
}
let whisper_resp: serde_json::Value = resp
.json()
.await
.map_err(|e| (StatusCode::BAD_GATEWAY, format!("Invalid Whisper response: {e}")))?;
let text = whisper_resp["text"]
.as_str()
.unwrap_or("")
.to_string();
Ok(Json(TranscribeResponse { text }))
}

118
static/transcribator.html Normal file
View File

@@ -0,0 +1,118 @@
<!DOCTYPE html>
<html lang="ru" data-theme="dark">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Transcribator</title>
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@picocss/pico@2/css/pico.min.css">
<style>
:root { --pico-font-size: 16px; }
html, body { height: 100dvh; margin: 0; overflow: hidden; }
body { display: flex; flex-direction: column; }
main { flex: 1; display: flex; flex-direction: column; max-width: 640px; width: 100%; margin: 0 auto; padding: 0.5rem; overflow: hidden; }
h1 { text-align: center; margin: 0.5rem 0; font-size: 1.4rem; }
#chat { flex: 1; overflow-y: auto; display: flex; flex-direction: column; gap: 0.75rem; padding: 0.5rem 0; }
#chat:empty::before { content: "Tap the microphone to start recording"; display: block; text-align: center; opacity: 0.5; margin-top: 2rem; }
#chat article { margin: 0; padding: 0.75rem; }
#chat article audio { width: 100%; margin-bottom: 0.5rem; }
#chat article p { margin: 0; }
.input-area { padding: 0.5rem 0; display: flex; justify-content: center; }
#mic-btn {
width: 64px; height: 64px; border-radius: 50%; font-size: 1.8rem;
display: flex; align-items: center; justify-content: center;
cursor: pointer; border: none; padding: 0; min-height: 48px;
}
#mic-btn.recording { background: var(--pico-del-color); animation: pulse 1s infinite; }
@keyframes pulse { 0%, 100% { transform: scale(1); } 50% { transform: scale(1.1); } }
.spinner { display: inline-block; width: 1em; height: 1em; border: 2px solid currentColor; border-right-color: transparent; border-radius: 50%; animation: spin 0.6s linear infinite; vertical-align: middle; }
@keyframes spin { to { transform: rotate(360deg); } }
.error { color: var(--pico-del-color); }
</style>
</head>
<body>
<main>
<h1>Transcribator</h1>
<div id="chat"></div>
<div class="input-area">
<button id="mic-btn" aria-label="Record">&#127908;</button>
</div>
</main>
<script>
const chat = document.getElementById('chat');
const micBtn = document.getElementById('mic-btn');
let mediaRecorder = null;
let chunks = [];
function getMimeType() {
if (MediaRecorder.isTypeSupported('audio/webm;codecs=opus')) return 'audio/webm';
if (MediaRecorder.isTypeSupported('audio/mp4')) return 'audio/mp4';
return '';
}
function getExtension(mime) {
if (mime.includes('webm')) return 'webm';
if (mime.includes('mp4')) return 'm4a';
return 'bin';
}
function addMessage(audioBlob) {
const article = document.createElement('article');
const audio = document.createElement('audio');
audio.controls = true;
audio.src = URL.createObjectURL(audioBlob);
const text = document.createElement('p');
text.innerHTML = '<span class="spinner"></span> Transcribing...';
article.append(audio, text);
chat.append(article);
chat.scrollTop = chat.scrollHeight;
return text;
}
async function sendAudio(blob, textEl) {
const mime = blob.type || 'audio/webm';
const ext = getExtension(mime);
const form = new FormData();
form.append('audio', blob, `recording.${ext}`);
try {
const res = await fetch('/api/transcribe', { method: 'POST', body: form });
if (!res.ok) {
const err = await res.text();
textEl.innerHTML = `<span class="error">Error: ${err}</span>`;
return;
}
const data = await res.json();
textEl.textContent = data.text || '(empty)';
} catch (e) {
textEl.innerHTML = `<span class="error">Error: ${e.message}</span>`;
}
}
async function toggleRecording() {
if (mediaRecorder && mediaRecorder.state === 'recording') {
mediaRecorder.stop();
return;
}
try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
const mime = getMimeType();
mediaRecorder = new MediaRecorder(stream, mime ? { mimeType: mime } : {});
chunks = [];
mediaRecorder.ondataavailable = e => { if (e.data.size > 0) chunks.push(e.data); };
mediaRecorder.onstop = () => {
stream.getTracks().forEach(t => t.stop());
micBtn.classList.remove('recording');
const blob = new Blob(chunks, { type: mediaRecorder.mimeType });
const textEl = addMessage(blob);
sendAudio(blob, textEl);
};
mediaRecorder.start();
micBtn.classList.add('recording');
} catch (e) {
alert('Microphone access denied: ' + e.message);
}
}
micBtn.addEventListener('click', toggleRecording);
</script>
</body>
</html>