Laravel AI SDK 音频处理实战
摘要
Laravel AI SDK 提供了统一的音频处理 API,支持语音合成(TTS)和语音识别(ASR)功能。本文将深入讲解如何使用 Laravel AI SDK 进行音频处理,包括:
- 语音合成(Text-to-Speech)基础用法
- 语音识别(Speech-to-Text)实现
- 支持的音频格式与模型
- 多语言与声音选择
- 实时流式音频处理
- 实战案例:构建语音助手
本文适合希望在 Laravel 应用中集成语音功能的开发者。
1. 音频处理概述
1.1 支持的功能
| 功能 | 描述 | 支持模型 |
|---|
| 语音合成 | 文本转语音 | OpenAI TTS、Google TTS、Azure TTS |
| 语音识别 | 语音转文本 | Whisper、Google Speech、Azure Speech |
| 语音翻译 | 多语言翻译 | Whisper |
| 声音克隆 | 自定义声音 | 部分提供商 |
1.2 基本用法
1 2 3 4 5 6 7
| use Laravel\Ai\Audio;
$audio = Audio::of('Hello, welcome to Laravel!')->generate();
Storage::put('greeting.mp3', $audio->toBinary());
|
2. 语音合成(TTS)
2.1 基础用法
1 2 3 4 5 6
| use Laravel\Ai\Audio;
$audio = Audio::of('I love coding with Laravel.') ->generate();
$rawContent = (string) $audio;
|
2.2 声音选择
1 2 3 4 5 6
| $audio = Audio::of('Hello, world!') ->voice('alloy') ->generate();
|
2.3 模型选择
1 2 3
| $audio = Audio::of('This is a test.') ->model('tts-1') ->generate();
|
2.4 输出格式
1 2 3
| $audio = Audio::of('Hello') ->format('mp3') ->generate();
|
2.5 语速控制
1 2 3
| $audio = Audio::of('Fast speech example') ->speed(1.5) ->generate();
|
2.6 多语言支持
1 2 3 4 5 6 7
| $audio = Audio::of('你好,欢迎使用 Laravel') ->language('zh-CN') ->generate();
$audio = Audio::of('Bonjour, bienvenue à Laravel') ->language('fr-FR') ->generate();
|
3. 语音识别(ASR)
3.1 基础用法
1 2 3 4 5 6 7
| use Laravel\Ai\Audio;
$audioFile = Storage::get('recording.mp3');
$transcription = Audio::transcribe($audioFile);
echo $transcription->text;
|
3.2 模型选择
1 2 3
| $transcription = Audio::transcribe($audioFile) ->model('whisper-1') ->process();
|
3.3 语言指定
1 2 3
| $transcription = Audio::transcribe($audioFile) ->language('zh') ->process();
|
3.4 输出格式
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
| $text = Audio::transcribe($audioFile)->text();
$json = Audio::transcribe($audioFile) ->format('json') ->process();
$srt = Audio::transcribe($audioFile) ->format('srt') ->process();
$vtt = Audio::transcribe($audioFile) ->format('vtt') ->process();
|
3.5 时间戳
1 2 3 4 5 6 7
| $transcription = Audio::transcribe($audioFile) ->withTimestamps() ->process();
foreach ($transcription->segments as $segment) { echo "[{$segment->start} - {$segment->end}] {$segment->text}\n"; }
|
4. 语音翻译
4.1 基础用法
1 2 3 4 5 6 7
| $audioFile = Storage::get('french_recording.mp3');
$translation = Audio::translate($audioFile) ->to('en') ->process();
echo $translation->text;
|
4.2 多语言翻译
1 2 3 4 5 6 7 8 9 10 11 12 13
| class TranslationService { public function translateAudio(string $audioPath, string $targetLang): string { $audio = Storage::get($audioPath); $translation = Audio::translate($audio) ->to($targetLang) ->process(); return $translation->text; } }
|
5. 流式音频处理
5.1 流式语音合成
1 2 3 4 5 6 7 8 9 10
| use Illuminate\Support\Facades\Response;
Route::get('/tts-stream', function () { return Response::stream(function () { foreach (Audio::stream('Long text to synthesize...') as $chunk) { echo $chunk; flush(); } }, 200, ['Content-Type' => 'audio/mpeg']); });
|
5.2 实时转录
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
| use Laravel\Ai\Audio\RealtimeTranscriber;
class RealtimeTranscriptionController extends Controller { public function stream() { $transcriber = new RealtimeTranscriber(); return response()->stream(function () use ($transcriber) { while (true) { $audioChunk = $this->getAudioChunk(); if ($audioChunk === null) { break; } $text = $transcriber->process($audioChunk); if ($text) { echo json_encode(['text' => $text]) . "\n"; flush(); } } }); } }
|
6. 实战案例:语音助手
6.1 控制器
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
| <?php
namespace App\Http\Controllers;
use Laravel\Ai\Audio; use Laravel\Ai\Prompt; use Illuminate\Http\Request; use Illuminate\Support\Facades\Storage;
class VoiceAssistantController extends Controller { public function process(Request $request) { $validated = $request->validate([ 'audio' => 'required|file|max:10240', 'language' => 'nullable|string|size:2', ]); $audioPath = $request->file('audio')->store('temp'); $audioContent = Storage::get($audioPath); $transcription = Audio::transcribe($audioContent) ->language($validated['language'] ?? null) ->process(); $response = Prompt::make($transcription->text) ->withSystemMessage('You are a helpful voice assistant.') ->generate(); $audioResponse = Audio::of($response) ->voice('nova') ->generate(); $responsePath = 'responses/' . uniqid() . '.mp3'; Storage::put($responsePath, $audioResponse->toBinary()); Storage::delete($audioPath); return response()->json([ 'transcription' => $transcription->text, 'response' => $response, 'audio_url' => Storage::url($responsePath), ]); } }
|
6.2 Livewire 组件
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
| <?php
namespace App\Http\Livewire;
use Livewire\Component; use Livewire\WithFileUploads; use Laravel\Ai\Audio; use Laravel\Ai\Prompt; use Illuminate\Support\Facades\Storage;
class VoiceAssistant extends Component { use WithFileUploads; public $audioFile; public string $transcription = ''; public string $response = ''; public ?string $audioUrl = null; public bool $processing = false; public function process() { $this->validate([ 'audioFile' => 'required|file|max:10240', ]); $this->processing = true; try { $audioPath = $this->audioFile->store('temp'); $audioContent = Storage::get($audioPath); $transcription = Audio::transcribe($audioContent)->process(); $this->transcription = $transcription->text; $response = Prompt::make($this->transcription) ->withSystemMessage('You are a helpful assistant.') ->generate(); $this->response = $response; $audioResponse = Audio::of($response)->generate(); $responsePath = 'responses/' . uniqid() . '.mp3'; Storage::put($responsePath, $audioResponse->toBinary()); $this->audioUrl = Storage::url($responsePath); Storage::delete($audioPath); } finally { $this->processing = false; } } public function render() { return view('livewire.voice-assistant'); } }
|
6.3 视图模板
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
| <div class="voice-assistant"> <div class="recorder"> <input type="file" wire:model="audioFile" accept="audio/*"> <button wire:click="process" wire:loading.attr="disabled" class="btn btn-primary mt-3" > <span wire:loading.remove>Process Audio</span> <span wire:loading>Processing...</span> </button> </div> @if($transcription) <div class="result mt-4"> <h4>You said:</h4> <p>{{ $transcription }}</p> <h4>Response:</h4> <p>{{ $response }}</p> @if($audioUrl) <audio controls src="{{ $audioUrl }}"></audio> @endif </div> @endif </div>
|
7. 高级功能
7.1 批量转录
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
| use Laravel\Ai\Jobs\TranscribeAudio;
class BatchTranscriptionService { public function processDirectory(string $directory): array { $files = Storage::files($directory); $results = []; foreach ($files as $file) { TranscribeAudio::dispatch($file); } return $results; } }
|
7.2 音频分割
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
| use FFMpeg\FFMpeg;
class AudioSplitter { public function split(string $audioPath, int $segmentLength = 300): array { $ffmpeg = FFMpeg::create(); $audio = $ffmpeg->open($audioPath); $segments = []; $duration = $audio->getStreams()->first()->get('duration'); for ($i = 0; $i < $duration; $i += $segmentLength) { $segmentPath = "segments/segment_{$i}.mp3"; $audio->filters() ->clip( \FFMpeg\Coordinate\TimeCode::fromSeconds($i), \FFMpeg\Coordinate\TimeCode::fromSeconds($segmentLength) ); $audio->save(new \FFMpeg\Format\Audio\Mp3(), $segmentPath); $segments[] = $segmentPath; } return $segments; } }
|
7.3 声音克隆
1 2 3
| $audio = Audio::of('Hello with custom voice') ->cloneFrom(Storage::get('voice_sample.mp3')) ->generate();
|
8. 字幕生成
8.1 生成 SRT 字幕
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
| class SubtitleGenerator { public function generate(string $videoPath): string { $audioPath = $this->extractAudio($videoPath); $audioContent = Storage::get($audioPath); $transcription = Audio::transcribe($audioContent) ->withTimestamps() ->format('srt') ->process(); Storage::delete($audioPath); return $transcription; } private function extractAudio(string $videoPath): string { $ffmpeg = FFMpeg::create(); $video = $ffmpeg->open($videoPath); $audioPath = 'temp/' . uniqid() . '.mp3'; $video->save(new \FFMpeg\Format\Audio\Mp3(), $audioPath); return $audioPath; } }
|
8.2 多语言字幕
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
| class MultiLanguageSubtitleGenerator { public function generate(string $videoPath, array $languages): array { $audioPath = $this->extractAudio($videoPath); $audioContent = Storage::get($audioPath); $subtitles = []; foreach ($languages as $lang) { if ($lang === 'original') { $subtitles[$lang] = Audio::transcribe($audioContent) ->format('srt') ->process(); } else { $subtitles[$lang] = Audio::translate($audioContent) ->to($lang) ->format('srt') ->process(); } } Storage::delete($audioPath); return $subtitles; } }
|
9. 成本控制
9.1 成本估算
1 2 3 4 5 6 7 8 9 10 11 12 13
| use Laravel\Ai\Cost;
$ttsCost = Cost::forTTS( characters: 1000, model: 'tts-1' );
$asrCost = Cost::forTranscription( duration: 60, // 秒 model: 'whisper-1' );
|
9.2 音频时长限制
1 2 3 4 5 6 7 8 9 10 11
| class AudioValidator { public function validateDuration($file, int $maxMinutes = 10): bool { $ffmpeg = FFMpeg::create(); $audio = $ffmpeg->open($file->path()); $duration = $audio->getStreams()->first()->get('duration'); return $duration <= ($maxMinutes * 60); } }
|
10. 最佳实践
10.1 音频格式建议
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
| class AudioFormatHelper { public static function recommendedSettings(): array { return [ 'format' => 'mp3', 'bitrate' => '128k', 'sample_rate' => 44100, 'channels' => 1, ]; } public static function convert($file): string { $ffmpeg = FFMpeg::create(); $audio = $ffmpeg->open($file->path()); $outputPath = 'converted/' . uniqid() . '.mp3'; $audio->save( new \FFMpeg\Format\Audio\Mp3('libmp3lame', 128), $outputPath ); return $outputPath; } }
|
10.2 错误处理
1 2 3 4 5 6 7 8 9
| try { $transcription = Audio::transcribe($audio)->process(); } catch (AudioTooLongException $e) { return back()->with('error', 'Audio file is too long'); } catch (UnsupportedFormatException $e) { return back()->with('error', 'Audio format not supported'); } catch (AiException $e) { return back()->with('error', 'Transcription failed'); }
|
10.3 缓存策略
1 2 3 4 5 6 7 8 9 10 11
| use Illuminate\Support\Facades\Cache;
function getCachedTranscription(string $audioPath): string { $hash = md5_file($audioPath); $key = "transcription:{$hash}"; return Cache::remember($key, now()->addDays(30), function () use ($audioPath) { return Audio::transcribe(Storage::get($audioPath))->text(); }); }
|
11. 总结
Laravel AI SDK 的音频处理功能为 Laravel 应用提供了强大的语音能力:
- 语音合成:高质量文本转语音
- 语音识别:准确的语音转文本
- 语音翻译:多语言翻译支持
- 流式处理:实时音频处理
- 字幕生成:自动生成视频字幕
通过本指南,您已经掌握了 Laravel AI SDK 音频处理的核心功能,可以开始构建语音驱动的应用了。
参考资料