Skip to content

文生音频及语音翻译

官方文档:https://docs.spring.io/spring-ai/reference/api/index.html#api/audio

java
@Autowired
OpenAiAudioSpeechModel openAiAudioSpeechModel;

@Autowired
OpenAiAudioTranscriptionModel openAiTranscriptionModel;

TTS

Fv9m4PJxdCLn9Pt8UJgriuHlPRiI
java
@GetMapping("/tts")
public ResponseEntity<String> tts(@RequestParam(value = "msg",
                               defaultValue = "欢迎朋友加入本社群") String msg,
                       @RequestParam(value = "audioName", defaultValue = "welcomeJoin") String audioName) {
    try {
        OpenAiAudioSpeechOptions speechOptions = OpenAiAudioSpeechOptions.builder()
                .withModel(OpenAiAudioApi.TtsModel.TTS_1.value)
                .withVoice(OpenAiAudioApi.SpeechRequest.Voice.ALLOY)
                .withResponseFormat(OpenAiAudioApi.SpeechRequest.AudioResponseFormat.MP3)
                .withSpeed(1.0f)
                .build();
        SpeechPrompt speechPrompt = new SpeechPrompt(msg, speechOptions);
        SpeechResponse response = openAiAudioSpeechModel.call(speechPrompt);
        byte[] outputBytes = response.getResult().getOutput();
        File output = getMp3OutputPath(audioName);
        FileOutputStream fos = new FileOutputStream(output);
        fos.write(outputBytes);
        fos.close();
    } catch (NonTransientAiException e) {
        if (e.getMessage().contains("insufficient_user_quota")) {
            return ResponseEntity.status(HttpStatus.BAD_REQUEST).body("您的配额不足,请联系管理员或充值。");
        }
        throw e;
    } catch (Exception ex) {
        ex.printStackTrace();
    }
    return ResponseEntity.status(HttpStatus.OK).body("http://localhost:8080/audios/" + audioName + ".mp3");
}

public File getMp3OutputPath(String audioName) throws IOException {
    File audioDir =  new ClassPathResource("static/audios").getFile();
    if (!audioDir.exists()) {
        audioDir.mkdirs();
    }
    return new File(audioDir, audioName + ".mp3");
}

生成的音频文件: https://diyai.cn/audio/welcomeJoin.mp3

语音翻译

java
@GetMapping("/audio2Text")
public ResponseEntity<String> transcription(@RequestParam(value = "audioName", defaultValue = "welcomeJoin") String audioName){
    OpenAiAudioApi.TranscriptResponseFormat responseFormat = OpenAiAudioApi.TranscriptResponseFormat.VTT;
    OpenAiAudioTranscriptionOptions transcriptionOptions = OpenAiAudioTranscriptionOptions.builder()
            .withLanguage("en")
            .withTemperature(0f)
            .withResponseFormat(responseFormat)
            .build();
    Resource audioFile = new ClassPathResource("static/audios/" + audioName + ".mp3");
    AudioTranscriptionPrompt transcriptionRequest = new AudioTranscriptionPrompt(audioFile, transcriptionOptions);
    AudioTranscriptionResponse response = openAiTranscriptionModel.call(transcriptionRequest);
    return ResponseEntity.status(HttpStatus.OK).body(response.getResult().getOutput()+"<br/>http://localhost:8080/audios/" + audioName + ".mp3");
}