Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 37 additions & 2 deletions packages/app/src/components/prompt-input.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ import { PromptContextItems } from "./prompt-input/context-items"
import { PromptImageAttachments } from "./prompt-input/image-attachments"
import { PromptDragOverlay } from "./prompt-input/drag-overlay"
import { promptPlaceholder } from "./prompt-input/placeholder"
import { createVoiceInput } from "./prompt-input/voice"
import { ImagePreview } from "@opencode-ai/ui/image-preview"
import { useQueries } from "@tanstack/solid-query"
import { useQueryOptions } from "@/context/server-sync"
Expand Down Expand Up @@ -1010,6 +1011,21 @@ export const PromptInput: Component<PromptInputProps> = (props) => {
return true
}

const voice = createVoiceInput({
language: () => language.locale(),
onText: (text) => {
addPart({ type: "text", content: text, start: 0, end: 0 })
restoreFocus()
},
})

const voiceTip = () => {
if (voice.error()) return language.t(voice.error() as Parameters<typeof language.t>[0])
if (voice.status() === "recording") return language.t("prompt.voice.stop")
if (voice.status() === "transcribing") return language.t("prompt.voice.transcribing")
return language.t("prompt.voice.start")
}

const addToHistory = (prompt: Prompt, mode: "normal" | "shell") => {
const currentHistory = mode === "shell" ? shellHistory : history
const setCurrentHistory = mode === "shell" ? setShellHistory : setHistory
Expand Down Expand Up @@ -1639,7 +1655,11 @@ export const PromptInput: Component<PromptInputProps> = (props) => {
onMouseDown={(e) => {
const target = e.target
if (!(target instanceof HTMLElement)) return
if (target.closest('[data-action="prompt-attach"], [data-action="prompt-submit"]')) {
if (
target.closest(
'[data-action="prompt-attach"], [data-action="prompt-submit"], [data-action="prompt-voice"]',
)
) {
return
}
editorRef?.focus()
Expand Down Expand Up @@ -1733,7 +1753,7 @@ export const PromptInput: Component<PromptInputProps> = (props) => {
<div class="pointer-events-none absolute bottom-2 left-2">
<div
aria-hidden={store.mode !== "normal"}
class="pointer-events-auto"
class="pointer-events-auto flex items-center gap-1"
style={{
"pointer-events": buttonsSpring() > 0.5 ? "auto" : "none",
}}
Expand All @@ -1757,6 +1777,21 @@ export const PromptInput: Component<PromptInputProps> = (props) => {
<Icon name="plus" class="size-4.5" />
</Button>
</TooltipKeybind>
<Tooltip placement="top" value={voiceTip()}>
<Button
data-action="prompt-voice"
type="button"
variant="ghost"
class={`size-8 p-0 ${voice.status() === "recording" ? "text-red-500" : ""}`}
style={buttons()}
onClick={() => void voice.start()}
disabled={store.mode !== "normal" || voice.status() === "transcribing"}
tabIndex={store.mode === "normal" ? undefined : -1}
aria-label={voiceTip()}
>
<Icon name={voice.status() === "recording" ? "stop" : "speech-bubble"} class="size-4.5" />
</Button>
</Tooltip>
</div>
</div>
</div>
Expand Down
190 changes: 190 additions & 0 deletions packages/app/src/components/prompt-input/voice.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
import { createSignal, onCleanup } from "solid-js"
import { getSpeechRecognitionCtor } from "@/utils/runtime-adapters"

const WHISPER_MODEL = "onnx-community/whisper-large-v3-turbo"
const TRANSFORMERS_IMPORT = "https://cdn.jsdelivr.net/npm/@huggingface/transformers@4.2.0"

type VoiceInputStatus = "idle" | "recording" | "transcribing" | "error"

type SpeechRecognitionLike = {
continuous: boolean
interimResults: boolean
lang: string
onresult: ((event: SpeechRecognitionEventLike) => void) | null
onerror: ((event: SpeechRecognitionErrorEventLike) => void) | null
onend: (() => void) | null
start: () => void
stop: () => void
}

type SpeechRecognitionEventLike = {
results: ArrayLike<ArrayLike<{ transcript: string; isFinal?: boolean }>>
}

type SpeechRecognitionErrorEventLike = {
error?: string
}

type TransformersModule = {
env?: {
allowLocalModels?: boolean
}
pipeline: (
task: "automatic-speech-recognition",
model: string,
options: { device?: "webgpu" | "wasm" },
) => Promise<WhisperPipeline>
}

type WhisperPipeline = (
audio: Float32Array,
options: {
chunk_length_s: number
stride_length_s: number
language?: string
task: "transcribe"
},
) => Promise<WhisperResult>

type WhisperResult = {
text?: string
}

type VoiceInputOptions = {
language: () => string
onText: (text: string) => void
}

let whisperPipeline: Promise<WhisperPipeline> | undefined

export function createVoiceInput(options: VoiceInputOptions) {
const [status, setStatus] = createSignal<VoiceInputStatus>("idle")
const [error, setError] = createSignal("")
let recorder: MediaRecorder | undefined
let recognition: SpeechRecognitionLike | undefined
let chunks: Blob[] = []

const supported = () => typeof navigator !== "undefined" && !!navigator.mediaDevices?.getUserMedia

const stop = () => {
if (recorder?.state === "recording") {
recorder.stop()
return
}
recognition?.stop()
}

const start = async () => {
if (status() === "recording") {
stop()
return
}
if (status() === "transcribing") return

setError("")
try {
await startWhisper()
} catch (err) {
console.warn("Local Whisper voice input unavailable, falling back to browser speech recognition.", err)
startSpeechRecognition()
}
}

const startWhisper = async () => {
if (!supported()) throw new Error("Microphone capture is unavailable.")
chunks = []
const stream = await navigator.mediaDevices.getUserMedia({ audio: true })
recorder = new MediaRecorder(stream)
recorder.ondataavailable = (event) => {
if (event.data.size === 0) return
chunks.push(event.data)
}
recorder.onerror = () => {
stream.getTracks().forEach((track) => track.stop())
setError("prompt.voice.error")
setStatus("error")
}
recorder.onstop = () => {
stream.getTracks().forEach((track) => track.stop())
void transcribe(new Blob(chunks, { type: recorder?.mimeType || "audio/webm" }))
}
recorder.start()
setStatus("recording")
}

const transcribe = async (blob: Blob) => {
setStatus("transcribing")
try {
const pipeline = await loadWhisper()
const result = await pipeline(await blobToMonoAudio(blob), {
chunk_length_s: 30,
stride_length_s: 5,
language: undefined,
task: "transcribe",
})
const text = result.text?.trim()
if (text) options.onText(text)
setStatus("idle")
} catch (err) {
console.warn("Local Whisper transcription failed.", err)
setError("prompt.voice.error")
setStatus("error")
}
}

const startSpeechRecognition = () => {
const SpeechRecognition = getSpeechRecognitionCtor<SpeechRecognitionLike>(globalThis)
if (!SpeechRecognition) {
setError("prompt.voice.unsupported")
setStatus("error")
return
}

recognition = new SpeechRecognition()
recognition.continuous = false
recognition.interimResults = false
recognition.lang = options.language()
recognition.onresult = (event) => {
const text = Array.from(event.results)
.flatMap((result) => Array.from(result))
.map((result) => result.transcript)
.join(" ")
.trim()
if (text) options.onText(text)
}
recognition.onerror = () => {
setError("prompt.voice.error")
setStatus("error")
}
recognition.onend = () => {
if (status() === "recording") setStatus("idle")
}
recognition.start()
setStatus("recording")
}

onCleanup(() => {
if (recorder?.state === "recording") recorder.stop()
recognition?.stop()
})

return { status, error, supported, start }
}

async function loadWhisper() {
whisperPipeline ??= import(/* @vite-ignore */ TRANSFORMERS_IMPORT).then(async (mod) => {
const transformers = mod as TransformersModule
if (transformers.env) transformers.env.allowLocalModels = false
return transformers.pipeline("automatic-speech-recognition", WHISPER_MODEL, {
device: typeof navigator !== "undefined" && "gpu" in navigator ? "webgpu" : "wasm",
})
})
return whisperPipeline
}

async function blobToMonoAudio(blob: Blob) {
const context = new AudioContext({ sampleRate: 16000 })
const buffer = await context.decodeAudioData(await blob.arrayBuffer())
await context.close()
return buffer.getChannelData(0)
}
5 changes: 5 additions & 0 deletions packages/app/src/i18n/ar.ts
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,11 @@ export const dict = {
"prompt.context.removeFile": "إزالة الملف من السياق",
"prompt.action.attachFile": "إرفاق ملف",
"prompt.attachment.remove": "إزالة المرفق",
"prompt.voice.start": "Dictate with local Whisper",
"prompt.voice.stop": "Stop dictation",
"prompt.voice.transcribing": "Transcribing locally...",
"prompt.voice.unsupported": "Voice input is not available in this browser.",
"prompt.voice.error": "Voice input failed.",
"prompt.action.send": "إرسال",
"prompt.action.stop": "توقف",
"prompt.toast.pasteUnsupported.title": "مرفق غير مدعوم",
Expand Down
5 changes: 5 additions & 0 deletions packages/app/src/i18n/br.ts
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,11 @@ export const dict = {
"prompt.context.removeFile": "Remover arquivo do contexto",
"prompt.action.attachFile": "Anexar arquivo",
"prompt.attachment.remove": "Remover anexo",
"prompt.voice.start": "Dictate with local Whisper",
"prompt.voice.stop": "Stop dictation",
"prompt.voice.transcribing": "Transcribing locally...",
"prompt.voice.unsupported": "Voice input is not available in this browser.",
"prompt.voice.error": "Voice input failed.",
"prompt.action.send": "Enviar",
"prompt.action.stop": "Parar",
"prompt.toast.pasteUnsupported.title": "Anexo não suportado",
Expand Down
5 changes: 5 additions & 0 deletions packages/app/src/i18n/bs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,11 @@ export const dict = {
"prompt.context.removeFile": "Ukloni datoteku iz konteksta",
"prompt.action.attachFile": "Priloži datoteku",
"prompt.attachment.remove": "Ukloni prilog",
"prompt.voice.start": "Dictate with local Whisper",
"prompt.voice.stop": "Stop dictation",
"prompt.voice.transcribing": "Transcribing locally...",
"prompt.voice.unsupported": "Voice input is not available in this browser.",
"prompt.voice.error": "Voice input failed.",
"prompt.action.send": "Pošalji",
"prompt.action.stop": "Zaustavi",

Expand Down
5 changes: 5 additions & 0 deletions packages/app/src/i18n/da.ts
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,11 @@ export const dict = {
"prompt.context.removeFile": "Fjern fil fra kontekst",
"prompt.action.attachFile": "Vedhæft fil",
"prompt.attachment.remove": "Fjern vedhæftning",
"prompt.voice.start": "Dictate with local Whisper",
"prompt.voice.stop": "Stop dictation",
"prompt.voice.transcribing": "Transcribing locally...",
"prompt.voice.unsupported": "Voice input is not available in this browser.",
"prompt.voice.error": "Voice input failed.",
"prompt.action.send": "Send",
"prompt.action.stop": "Stop",

Expand Down
5 changes: 5 additions & 0 deletions packages/app/src/i18n/de.ts
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,11 @@ export const dict = {
"prompt.context.removeFile": "Datei aus dem Kontext entfernen",
"prompt.action.attachFile": "Datei anhängen",
"prompt.attachment.remove": "Anhang entfernen",
"prompt.voice.start": "Dictate with local Whisper",
"prompt.voice.stop": "Stop dictation",
"prompt.voice.transcribing": "Transcribing locally...",
"prompt.voice.unsupported": "Voice input is not available in this browser.",
"prompt.voice.error": "Voice input failed.",
"prompt.action.send": "Senden",
"prompt.action.stop": "Stopp",
"prompt.toast.pasteUnsupported.title": "Nicht unterstützter Anhang",
Expand Down
5 changes: 5 additions & 0 deletions packages/app/src/i18n/en.ts
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,11 @@ export const dict = {
"prompt.context.removeFile": "Remove file from context",
"prompt.action.attachFile": "Add files",
"prompt.attachment.remove": "Remove attachment",
"prompt.voice.start": "Dictate with local Whisper",
"prompt.voice.stop": "Stop dictation",
"prompt.voice.transcribing": "Transcribing locally...",
"prompt.voice.unsupported": "Voice input is not available in this browser.",
"prompt.voice.error": "Voice input failed.",
"prompt.action.send": "Send",
"prompt.action.stop": "Stop",

Expand Down
5 changes: 5 additions & 0 deletions packages/app/src/i18n/es.ts
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,11 @@ export const dict = {
"prompt.context.removeFile": "Eliminar archivo del contexto",
"prompt.action.attachFile": "Adjuntar archivo",
"prompt.attachment.remove": "Eliminar adjunto",
"prompt.voice.start": "Dictate with local Whisper",
"prompt.voice.stop": "Stop dictation",
"prompt.voice.transcribing": "Transcribing locally...",
"prompt.voice.unsupported": "Voice input is not available in this browser.",
"prompt.voice.error": "Voice input failed.",
"prompt.action.send": "Enviar",
"prompt.action.stop": "Detener",

Expand Down
5 changes: 5 additions & 0 deletions packages/app/src/i18n/fr.ts
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,11 @@ export const dict = {
"prompt.context.removeFile": "Retirer le fichier du contexte",
"prompt.action.attachFile": "Joindre un fichier",
"prompt.attachment.remove": "Supprimer la pièce jointe",
"prompt.voice.start": "Dicter avec Whisper local",
"prompt.voice.stop": "Arrêter la dictée",
"prompt.voice.transcribing": "Transcription locale...",
"prompt.voice.unsupported": "La saisie vocale n'est pas disponible dans ce navigateur.",
"prompt.voice.error": "La saisie vocale a échoué.",
"prompt.action.send": "Envoyer",
"prompt.action.stop": "Arrêter",
"prompt.toast.pasteUnsupported.title": "Pièce jointe non prise en charge",
Expand Down
5 changes: 5 additions & 0 deletions packages/app/src/i18n/ja.ts
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,11 @@ export const dict = {
"prompt.context.removeFile": "コンテキストからファイルを削除",
"prompt.action.attachFile": "ファイルを添付",
"prompt.attachment.remove": "添付ファイルを削除",
"prompt.voice.start": "Dictate with local Whisper",
"prompt.voice.stop": "Stop dictation",
"prompt.voice.transcribing": "Transcribing locally...",
"prompt.voice.unsupported": "Voice input is not available in this browser.",
"prompt.voice.error": "Voice input failed.",
"prompt.action.send": "送信",
"prompt.action.stop": "停止",
"prompt.toast.pasteUnsupported.title": "サポートされていない添付ファイル",
Expand Down
5 changes: 5 additions & 0 deletions packages/app/src/i18n/ko.ts
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,11 @@ export const dict = {
"prompt.context.removeFile": "컨텍스트에서 파일 제거",
"prompt.action.attachFile": "파일 첨부",
"prompt.attachment.remove": "첨부 파일 제거",
"prompt.voice.start": "Dictate with local Whisper",
"prompt.voice.stop": "Stop dictation",
"prompt.voice.transcribing": "Transcribing locally...",
"prompt.voice.unsupported": "Voice input is not available in this browser.",
"prompt.voice.error": "Voice input failed.",
"prompt.action.send": "전송",
"prompt.action.stop": "중지",
"prompt.toast.pasteUnsupported.title": "지원되지 않는 첨부 파일",
Expand Down
5 changes: 5 additions & 0 deletions packages/app/src/i18n/no.ts
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,11 @@ export const dict = {
"prompt.context.removeFile": "Fjern fil fra kontekst",
"prompt.action.attachFile": "Legg ved fil",
"prompt.attachment.remove": "Fjern vedlegg",
"prompt.voice.start": "Dictate with local Whisper",
"prompt.voice.stop": "Stop dictation",
"prompt.voice.transcribing": "Transcribing locally...",
"prompt.voice.unsupported": "Voice input is not available in this browser.",
"prompt.voice.error": "Voice input failed.",
"prompt.action.send": "Send",
"prompt.action.stop": "Stopp",

Expand Down
Loading
Loading