在 Web 项目中集成文本转语音功能
本文将通过一个完整的实战案例,演示如何在 Web 项目中集成文本转语音(TTS)功能。我们将涵盖从基础实现到高级优化的完整流程。
项目需求分析
功能需求
- ✅ 文本输入和语音播放
- ✅ 多种语音选择
- ✅ 语速、音调调节
- ✅ 播放控制(暂停、继续、停止)
- ✅ 进度显示
- ✅ 音频下载
技术选型
| 方案 | 优点 | 缺点 | 适用场景 |
|---|---|---|---|
| Web Speech API | 免费、简单 | 功能有限 | 基础需求 |
| 云服务 API | 功能强大 | 需付费 | 商业应用 |
| 自建服务 | 完全控制 | 维护成本高 | 定制需求 |
我们采用混合方案:基础功能使用 Web Speech API,高级功能集成云服务。
项目初始化
创建项目
bash
# 使用 Vite 创建项目
npm create vite@latest tts-demo -- --template vue
cd tts-demo
npm install
# 安装依赖
npm install axios element-plus项目结构
tts-demo/
├── src/
│ ├── components/
│ │ ├── TTSPlayer.vue # 主播放器组件
│ │ ├── VoiceSelector.vue # 语音选择器
│ │ └── Controls.vue # 控制面板
│ ├── services/
│ │ ├── ttsService.js # TTS 服务封装
│ │ └── cloudTTSService.js # 云服务封装
│ ├── utils/
│ │ └── audioUtils.js # 音频工具函数
│ ├── App.vue
│ └── main.js
├── public/
└── package.json核心服务实现
1. TTS 基础服务
javascript
// src/services/ttsService.js
class TTSService {
constructor() {
this.synth = window.speechSynthesis;
this.utterance = null;
this.voices = [];
this.isPlaying = false;
this.isPaused = false;
this.callbacks = {
onStart: null,
onEnd: null,
onError: null,
onBoundary: null
};
this.initVoices();
}
// 初始化语音列表
initVoices() {
const loadVoices = () => {
this.voices = this.synth.getVoices();
// 按语言分组
return this.voices.reduce((acc, voice) => {
const lang = voice.lang.split('-')[0];
if (!acc[lang]) acc[lang] = [];
acc[lang].push(voice);
return acc;
}, {});
};
// 某些浏览器需要等待 voiceschanged 事件
if (this.synth.getVoices().length > 0) {
return loadVoices();
}
return new Promise(resolve => {
this.synth.onvoiceschanged = () => resolve(loadVoices());
});
}
// 获取语音列表
getVoices() {
return this.voices;
}
// 获取中文语音
getChineseVoices() {
return this.voices.filter(voice =>
voice.lang.startsWith('zh') || voice.lang.includes('CN')
);
}
// 播放语音
speak(text, options = {}) {
return new Promise((resolve, reject) => {
// 停止当前播放
this.stop();
this.utterance = new SpeechSynthesisUtterance(text);
// 设置参数
if (options.voice) {
this.utterance.voice = options.voice;
}
this.utterance.rate = options.rate || 1.0;
this.utterance.pitch = options.pitch || 1.0;
this.utterance.volume = options.volume || 1.0;
this.utterance.lang = options.lang || 'zh-CN';
// 事件处理
this.utterance.onstart = () => {
this.isPlaying = true;
this.isPaused = false;
if (this.callbacks.onStart) this.callbacks.onStart();
};
this.utterance.onend = () => {
this.isPlaying = false;
this.isPaused = false;
if (this.callbacks.onEnd) this.callbacks.onEnd();
resolve();
};
this.utterance.onerror = (event) => {
this.isPlaying = false;
if (this.callbacks.onError) this.callbacks.onError(event);
reject(event);
};
this.utterance.onboundary = (event) => {
if (this.callbacks.onBoundary) {
this.callbacks.onBoundary(event);
}
};
this.synth.speak(this.utterance);
});
}
// 暂停
pause() {
if (this.isPlaying && !this.isPaused) {
this.synth.pause();
this.isPaused = true;
}
}
// 继续
resume() {
if (this.isPlaying && this.isPaused) {
this.synth.resume();
this.isPaused = false;
}
}
// 停止
stop() {
this.synth.cancel();
this.isPlaying = false;
this.isPaused = false;
}
// 设置回调
on(event, callback) {
const eventMap = {
'start': 'onStart',
'end': 'onEnd',
'error': 'onError',
'boundary': 'onBoundary'
};
if (eventMap[event]) {
this.callbacks[eventMap[event]] = callback;
}
}
// 销毁
destroy() {
this.stop();
this.callbacks = {};
}
}
// 导出单例
export default new TTSService();2. 云服务集成
javascript
// src/services/cloudTTSService.js
import axios from 'axios';
class CloudTTSService {
constructor(config = {}) {
// 支持 Google Cloud TTS / Azure / 自定义后端
this.provider = config.provider || 'google';
this.apiKey = config.apiKey;
this.baseUrl = config.baseUrl || '/api/tts';
}
// 设置提供商
setProvider(provider, config = {}) {
this.provider = provider;
Object.assign(this, config);
}
// Google Cloud TTS
async googleTTS(text, options = {}) {
const response = await axios.post(
`https://texttospeech.googleapis.com/v1/text:synthesize?key=${this.apiKey}`,
{
input: { text },
voice: {
languageCode: options.lang || 'zh-CN',
name: options.voiceName || 'zh-CN-Wavenet-A'
},
audioConfig: {
audioEncoding: 'MP3',
speakingRate: options.rate || 1.0,
pitch: options.pitch || 0
}
}
);
// 返回 base64 音频
return response.data.audioContent;
}
// Azure TTS
async azureTTS(text, options = {}) {
// SSML 格式
const ssml = `
<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="${options.lang || 'zh-CN'}">
<voice name="${options.voiceName || 'zh-CN-XiaoxiaoNeural'}">
<prosody rate="${options.rate || '1.0'}" pitch="${options.pitch || '0%'}">
${text}
</prosody>
</voice>
</speak>
`;
const response = await axios.post(
`https://${options.region || 'eastasia'}.tts.speech.microsoft.com/cognitiveservices/v1`,
ssml,
{
headers: {
'Ocp-Apim-Subscription-Key': this.apiKey,
'Content-Type': 'application/ssml+xml',
'X-Microsoft-OutputFormat': 'audio-16khz-128kbitrate-mono-mp3'
},
responseType: 'arraybuffer'
}
);
return response.data;
}
// 自定义后端
async customBackend(text, options = {}) {
const response = await axios.post(this.baseUrl, {
text,
lang: options.lang || 'zh-CN',
voice: options.voice,
rate: options.rate || 1.0,
pitch: options.pitch || 1.0
}, {
responseType: 'arraybuffer'
});
return response.data;
}
// 统一调用接口
async synthesize(text, options = {}) {
switch (this.provider) {
case 'google':
return this.googleTTS(text, options);
case 'azure':
return this.azureTTS(text, options);
case 'custom':
return this.customBackend(text, options);
default:
throw new Error(`Unknown provider: ${this.provider}`);
}
}
// 保存音频文件
async downloadAudio(text, filename = 'speech.mp3', options = {}) {
const audioData = await this.synthesize(text, options);
// 创建 Blob
const blob = new Blob([audioData], { type: 'audio/mpeg' });
const url = URL.createObjectURL(blob);
// 创建下载链接
const link = document.createElement('a');
link.href = url;
link.download = filename;
link.click();
// 清理
URL.revokeObjectURL(url);
}
}
export default CloudTTSService;3. 音频工具函数
javascript
// src/utils/audioUtils.js
// Base64 转 Blob
export function base64ToBlob(base64, mimeType = 'audio/mpeg') {
const byteCharacters = atob(base64);
const byteArrays = [];
for (let offset = 0; offset < byteCharacters.length; offset += 512) {
const slice = byteCharacters.slice(offset, offset + 512);
const byteNumbers = new Array(slice.length);
for (let i = 0; i < slice.length; i++) {
byteNumbers[i] = slice.charCodeAt(i);
}
const byteArray = new Uint8Array(byteNumbers);
byteArrays.push(byteArray);
}
return new Blob(byteArrays, { type: mimeType });
}
// 音频时长计算
export function estimateDuration(text, rate = 150) {
// 平均语速约 150 字/分钟
const chars = text.length;
const minutes = chars / rate;
return Math.ceil(minutes * 60); // 返回秒数
}
// 音频可视化
export function visualizeAudio(audioContext, canvas, audioSource) {
const analyser = audioContext.createAnalyser();
audioSource.connect(analyser);
analyser.connect(audioContext.destination);
const canvasCtx = canvas.getContext('2d');
const bufferLength = analyser.frequencyBinCount;
const dataArray = new Uint8Array(bufferLength);
function draw() {
requestAnimationFrame(draw);
analyser.getByteTimeDomainData(dataArray);
canvasCtx.fillStyle = 'rgb(200, 200, 200)';
canvasCtx.fillRect(0, 0, canvas.width, canvas.height);
canvasCtx.lineWidth = 2;
canvasCtx.strokeStyle = 'rgb(0, 0, 0)';
canvasCtx.beginPath();
const sliceWidth = canvas.width / bufferLength;
let x = 0;
for (let i = 0; i < bufferLength; i++) {
const v = dataArray[i] / 128.0;
const y = v * canvas.height / 2;
if (i === 0) {
canvasCtx.moveTo(x, y);
} else {
canvasCtx.lineTo(x, y);
}
x += sliceWidth;
}
canvasCtx.lineTo(canvas.width, canvas.height / 2);
canvasCtx.stroke();
}
draw();
}
// 文本分段(长文本处理)
export function splitTextIntoChunks(text, maxLength = 200) {
const chunks = [];
let currentChunk = '';
// 按句子分割
const sentences = text.split(/[。!?;\n]/);
for (const sentence of sentences) {
if ((currentChunk + sentence).length > maxLength) {
if (currentChunk) {
chunks.push(currentChunk);
currentChunk = '';
}
// 超长句子强制分割
if (sentence.length > maxLength) {
for (let i = 0; i < sentence.length; i += maxLength) {
chunks.push(sentence.slice(i, i + maxLength));
}
} else {
currentChunk = sentence;
}
} else {
currentChunk += (currentChunk ? '。' : '') + sentence;
}
}
if (currentChunk) {
chunks.push(currentChunk);
}
return chunks;
}Vue 组件实现
1. 主播放器组件
vue
<!-- src/components/TTSPlayer.vue -->
<template>
<div class="tts-player">
<div class="input-section">
<el-input
v-model="text"
type="textarea"
:rows="6"
placeholder="请输入要转换的文本..."
maxlength="5000"
show-word-limit
/>
</div>
<div class="controls-section">
<VoiceSelector
v-model="selectedVoice"
:voices="voices"
/>
<div class="slider-group">
<div class="slider-item">
<span>语速: {{ rate.toFixed(1) }}</span>
<el-slider
v-model="rate"
:min="0.5"
:max="2"
:step="0.1"
/>
</div>
<div class="slider-item">
<span>音调: {{ pitch.toFixed(1) }}</span>
<el-slider
v-model="pitch"
:min="0.5"
:max="2"
:step="0.1"
/>
</div>
<div class="slider-item">
<span>音量: {{ volume.toFixed(1) }}</span>
<el-slider
v-model="volume"
:min="0"
:max="1"
:step="0.1"
/>
</div>
</div>
</div>
<div class="player-controls">
<el-button
type="primary"
@click="play"
:disabled="!text || isLoading"
:loading="isLoading"
>
{{ isPlaying ? '播放中...' : '播放' }}
</el-button>
<el-button
@click="togglePause"
:disabled="!isPlaying"
>
{{ isPaused ? '继续' : '暂停' }}
</el-button>
<el-button
@click="stop"
:disabled="!isPlaying && !isPaused"
>
停止
</el-button>
<el-button
@click="download"
:disabled="!text"
>
下载音频
</el-button>
</div>
<div class="progress-section" v-if="isPlaying || isPaused">
<el-progress
:percentage="progress"
:status="isPlaying ? '' : 'warning'"
/>
<p class="status-text">
{{ statusText }}
</p>
</div>
</div>
</template>
<script setup>
import { ref, computed, onMounted, onUnmounted } from 'vue';
import { ElMessage } from 'element-plus';
import ttsService from '../services/ttsService';
import cloudTTSService from '../services/cloudTTSService';
import VoiceSelector from './VoiceSelector.vue';
// 状态
const text = ref('');
const voices = ref([]);
const selectedVoice = ref(null);
const rate = ref(1.0);
const pitch = ref(1.0);
const volume = ref(1.0);
const isPlaying = ref(false);
const isPaused = ref(false);
const isLoading = ref(false);
const progress = ref(0);
const charIndex = ref(0);
// 计算属性
const statusText = computed(() => {
if (isPaused.value) return '已暂停';
if (isPlaying.value) return `正在播放... (${charIndex.value}/${text.value.length})`;
return '';
});
// 初始化
onMounted(async () => {
await ttsService.initVoices();
voices.value = ttsService.getVoices();
// 默认选择中文语音
const chineseVoices = ttsService.getChineseVoices();
if (chineseVoices.length > 0) {
selectedVoice.value = chineseVoices[0];
}
// 设置进度回调
ttsService.on('boundary', (event) => {
charIndex.value = event.charIndex;
progress.value = Math.round((event.charIndex / text.value.length) * 100);
});
ttsService.on('end', () => {
isPlaying.value = false;
isPaused.value = false;
progress.value = 100;
});
ttsService.on('error', (event) => {
ElMessage.error(`播放出错: ${event.error}`);
isPlaying.value = false;
isPaused.value = false;
});
});
// 播放
async function play() {
if (!text.value.trim()) return;
isLoading.value = true;
progress.value = 0;
charIndex.value = 0;
try {
isPlaying.value = true;
await ttsService.speak(text.value, {
voice: selectedVoice.value,
rate: rate.value,
pitch: pitch.value,
volume: volume.value
});
} catch (error) {
ElMessage.error('播放失败');
console.error(error);
} finally {
isLoading.value = false;
}
}
// 暂停/继续
function togglePause() {
if (isPaused.value) {
ttsService.resume();
isPaused.value = false;
} else {
ttsService.pause();
isPaused.value = true;
}
}
// 停止
function stop() {
ttsService.stop();
isPlaying.value = false;
isPaused.value = false;
progress.value = 0;
charIndex.value = 0;
}
// 下载音频
async function download() {
if (!text.value.trim()) return;
isLoading.value = true;
try {
// 使用云服务下载
const service = new cloudTTSService({
provider: 'custom', // 或 'google'/'azure'
baseUrl: '/api/tts'
});
await service.downloadAudio(text.value, `speech_${Date.now()}.mp3`, {
voice: selectedVoice.value?.name,
rate: rate.value,
pitch: pitch.value
});
ElMessage.success('音频已下载');
} catch (error) {
ElMessage.error('下载失败,请检查服务配置');
console.error(error);
} finally {
isLoading.value = false;
}
}
// 清理
onUnmounted(() => {
ttsService.destroy();
});
</script>
<style scoped>
.tts-player {
max-width: 800px;
margin: 0 auto;
padding: 20px;
}
.input-section {
margin-bottom: 20px;
}
.controls-section {
margin-bottom: 20px;
}
.slider-group {
margin-top: 15px;
}
.slider-item {
margin-bottom: 15px;
}
.slider-item span {
display: block;
margin-bottom: 5px;
font-size: 14px;
color: #666;
}
.player-controls {
margin-bottom: 20px;
}
.progress-section {
padding: 15px;
background: #f5f7fa;
border-radius: 8px;
}
.status-text {
margin-top: 10px;
font-size: 14px;
color: #666;
text-align: center;
}
</style>2. 语音选择器组件
vue
<!-- src/components/VoiceSelector.vue -->
<template>
<div class="voice-selector">
<el-select
v-model="localValue"
placeholder="选择语音"
filterable
@change="handleChange"
>
<el-option-group
v-for="(voices, lang) in groupedVoices"
:key="lang"
:label="getLanguageName(lang)"
>
<el-option
v-for="voice in voices"
:key="voice.voiceURI"
:label="voice.name"
:value="voice.voiceURI"
>
<div class="voice-option">
<span>{{ voice.name }}</span>
<el-tag v-if="voice.default" size="small" type="success">
默认
</el-tag>
</div>
</el-option>
</el-option-group>
</el-select>
<el-button
@click="playSample"
:disabled="!localValue"
size="small"
style="margin-left: 10px;"
>
试听
</el-button>
</div>
</template>
<script setup>
import { ref, computed, watch } from 'vue';
import { ElMessage } from 'element-plus';
const props = defineProps({
modelValue: {
type: Object,
default: null
},
voices: {
type: Array,
default: () => []
}
});
const emit = defineEmits(['update:modelValue']);
const localValue = ref(props.modelValue?.voiceURI || '');
// 按语言分组
const groupedVoices = computed(() => {
return props.voices.reduce((acc, voice) => {
const lang = voice.lang.split('-')[0];
if (!acc[lang]) acc[lang] = [];
acc[lang].push(voice);
return acc;
}, {});
});
// 语言名称映射
const languageNames = {
zh: '中文',
en: '英语',
ja: '日语',
ko: '韩语',
fr: '法语',
de: '德语',
es: '西班牙语'
};
function getLanguageName(langCode) {
return languageNames[langCode] || langCode.toUpperCase();
}
// 处理选择变化
function handleChange(voiceURI) {
const voice = props.voices.find(v => v.voiceURI === voiceURI);
emit('update:modelValue', voice);
}
// 试听
function playSample() {
const voice = props.voices.find(v => v.voiceURI === localValue.value);
if (!voice) return;
const utterance = new SpeechSynthesisUtterance(
'你好,这是语音试听示例。Hello, this is a voice sample.'
);
utterance.voice = voice;
window.speechSynthesis.speak(utterance);
}
// 监听外部变化
watch(() => props.modelValue, (newVal) => {
localValue.value = newVal?.voiceURI || '';
});
</script>
<style scoped>
.voice-selector {
display: flex;
align-items: center;
}
.voice-option {
display: flex;
justify-content: space-between;
align-items: center;
}
</style>后端服务示例
Node.js 后端(Express)
javascript
// server.js
const express = require('express');
const multer = require('multer');
const path = require('path');
const fs = require('fs');
const app = express();
const port = 3000;
// 中间件
app.use(express.json());
app.use(express.static('public'));
// 模拟 TTS 服务(实际项目中集成真实 API)
app.post('/api/tts', async (req, res) => {
const { text, voice, rate, pitch } = req.body;
try {
// 这里集成实际的 TTS API
// 例如:Google Cloud TTS, Azure, 百度等
// 示例:调用 Azure TTS
// const audioData = await azureTTS(text, { voice, rate, pitch });
// 临时返回模拟数据
const audioBuffer = await generateAudio(text);
res.setHeader('Content-Type', 'audio/mpeg');
res.send(audioBuffer);
} catch (error) {
console.error('TTS Error:', error);
res.status(500).json({ error: '音频生成失败' });
}
});
// 音频文件上传
const upload = multer({ dest: 'uploads/' });
app.post('/api/upload-audio', upload.single('audio'), (req, res) => {
res.json({
filename: req.file.filename,
path: `/audio/${req.file.filename}`
});
});
app.listen(port, () => {
console.log(`TTS Server running at http://localhost:${port}`);
});
// 辅助函数:生成音频(示例)
async function generateAudio(text) {
// 实际实现需要调用真实的 TTS API
// 这里只是占位符
return Buffer.from('');
}高级功能扩展
1. 长文本处理
javascript
// 分段播放长文本
async function playLongText(text, options = {}) {
const chunks = splitTextIntoChunks(text, 200);
for (const chunk of chunks) {
await ttsService.speak(chunk, options);
// 段落间暂停
await new Promise(resolve => setTimeout(resolve, 500));
}
}2. 音频可视化
javascript
// 实时音频波形显示
import { visualizeAudio } from '../utils/audioUtils';
const audioContext = new AudioContext();
const audioSource = audioContext.createMediaStreamSource(stream);
visualizeAudio(audioContext, canvasElement, audioSource);3. 语音识别结合
javascript
// 语音识别 + 语音合成 完整对话
class VoiceConversation {
constructor() {
this.recognition = new webkitSpeechRecognition();
this.tts = ttsService;
this.setupRecognition();
}
setupRecognition() {
this.recognition.continuous = false;
this.recognition.lang = 'zh-CN';
this.recognition.onresult = async (event) => {
const text = event.results[0][0].transcript;
const response = await this.getAIResponse(text);
await this.tts.speak(response);
};
}
start() {
this.recognition.start();
}
async getAIResponse(input) {
// 调用 AI API 获取回复
return '这是回复内容';
}
}性能优化建议
1. 预加载语音
javascript
// 应用启动时预加载语音列表
window.speechSynthesis.getVoices();2. 文本缓存
javascript
// 缓存已合成的音频
const audioCache = new Map();
async function getCachedAudio(text) {
const cacheKey = text.trim().toLowerCase();
if (audioCache.has(cacheKey)) {
return audioCache.get(cacheKey);
}
const audio = await synthesize(text);
audioCache.set(cacheKey, audio);
return audio;
}3. Web Worker 处理
javascript
// worker.js
self.onmessage = async function(e) {
const { text, options } = e.data;
// 在 Worker 中处理文本分割等耗时操作
const chunks = splitTextIntoChunks(text);
self.postMessage({ type: 'ready', chunks });
};总结
本文介绍了在 Web 项目中集成文本转语音功能的完整方案:
- 服务层 - 封装 Web Speech API 和云服务
- 组件层 - 可复用的 Vue 组件
- 工具层 - 音频处理辅助函数
- 扩展功能 - 长文本处理、可视化等
通过这个方案,你可以快速在你的 Web 项目中实现强大的文本转语音功能。根据实际需求选择合适的技术栈和服务提供商,打造流畅的用户体验。
发布于 2025-06-28