text2speech
This commit is contained in:
parent
cfa38e28a7
commit
1a255f1182
|
@ -0,0 +1,252 @@
|
||||||
|
using UnityEngine;
|
||||||
|
using UnityEngine.Networking;
|
||||||
|
using System;
|
||||||
|
using System.IO;
|
||||||
|
using System.Text;
|
||||||
|
using Newtonsoft.Json;
|
||||||
|
using System.Collections;
|
||||||
|
using OpenAI_API;
|
||||||
|
using OpenAI_API.Chat;
|
||||||
|
using OpenAI_API.Models;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
using Newtonsoft.Json.Linq;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
|
||||||
|
[Serializable]
|
||||||
|
public class TextToSpeechResponse
|
||||||
|
{
|
||||||
|
public string audioContent;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public class Text2Speech : MonoBehaviour
|
||||||
|
{
|
||||||
|
public string context = "going for a walk, falling, explosion, blood";
|
||||||
|
public string gender = "MALE";
|
||||||
|
public JToken voice = null;
|
||||||
|
public double speakingSpeed = 1.1;
|
||||||
|
public bool playSound = false;
|
||||||
|
public bool generate = false;
|
||||||
|
|
||||||
|
private AudioSource _audioSource;
|
||||||
|
|
||||||
|
private readonly string _googelCloudApiKey = "AIzaSyDNpkVyAUU4AvSwAErVMlZ1lSvGfpkEs0Q";
|
||||||
|
private readonly string _google_CloudApiUrl = "https://texttospeech.googleapis.com/v1/text:synthesize";
|
||||||
|
private readonly string _outputPath = "C:\\Users\\PC\\VoiceTest\\Assets\\Scirpts\\audio.wav";
|
||||||
|
private string _tmpPath = "tmp_audio.wav";
|
||||||
|
|
||||||
|
private OpenAIAPI _openAiApi;
|
||||||
|
private Conversation? _conversation;
|
||||||
|
private readonly string _openAiApiKey = "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";
|
||||||
|
private readonly string _prompt = "Write a short text for an NPC in a game.The text should be based on the following bullet-point context, which describes the events of the last moments. Remember to only respond with the short text that the ONE NPC should speak! The context is: ";
|
||||||
|
|
||||||
|
void Start()
|
||||||
|
{
|
||||||
|
_tmpPath = "tmp_audio_" + GetInstanceID().ToString() + ".wav";
|
||||||
|
_audioSource = GetComponent<AudioSource>();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void Update()
|
||||||
|
{
|
||||||
|
if (playSound)
|
||||||
|
{
|
||||||
|
playSound = false;
|
||||||
|
_audioSource.Play();
|
||||||
|
}
|
||||||
|
if (generate)
|
||||||
|
{
|
||||||
|
generate = false;
|
||||||
|
if (voice == null)
|
||||||
|
{
|
||||||
|
GetRandomGermanVoice(gender, (v) => {
|
||||||
|
voice = v;
|
||||||
|
//Debug.Log($"GoogleCloud: Choosen voice is\n{voice}");
|
||||||
|
StartCoroutine(GenerateAndSynthesizeText(context));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
StartCoroutine(GenerateAndSynthesizeText(context));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void GetRandomGermanVoice(string gender, Action<JToken> callback)
|
||||||
|
{
|
||||||
|
StartCoroutine(GetRandomGermanVoiceCoroutine(gender, callback));
|
||||||
|
}
|
||||||
|
|
||||||
|
private IEnumerator GetRandomGermanVoiceCoroutine(string gender, Action<JToken> callback)
|
||||||
|
{
|
||||||
|
string url = $"https://texttospeech.googleapis.com/v1beta1/voices?key={_googelCloudApiKey}";
|
||||||
|
|
||||||
|
using (UnityWebRequest webRequest = UnityWebRequest.Get(url))
|
||||||
|
{
|
||||||
|
yield return webRequest.SendWebRequest();
|
||||||
|
|
||||||
|
if (webRequest.isNetworkError || webRequest.isHttpError)
|
||||||
|
{
|
||||||
|
Debug.LogError(webRequest.error);
|
||||||
|
callback(null);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
JObject response = JObject.Parse(webRequest.downloadHandler.text);
|
||||||
|
JArray voices = (JArray)response["voices"];
|
||||||
|
|
||||||
|
List<JToken> filteredVoices = new List<JToken>();
|
||||||
|
|
||||||
|
// Filterung nach deutschen Stimmen und dem spezifizierten Geschlecht
|
||||||
|
foreach (var v in voices)
|
||||||
|
{
|
||||||
|
JArray languageCodes = (JArray)v["languageCodes"];
|
||||||
|
string languageCode = languageCodes[0].ToString();
|
||||||
|
string ssmlGender = v["ssmlGender"].ToString();
|
||||||
|
string name = v["name"].ToString();
|
||||||
|
if (languageCode.Contains("en-") && ssmlGender == gender && !name.Contains("Standard"))
|
||||||
|
{
|
||||||
|
filteredVoices.Add(v);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (filteredVoices.Count > 0)
|
||||||
|
{
|
||||||
|
// Auswahl einer zufälligen Stimme aus den gefilterten Ergebnissen
|
||||||
|
var randomVoice = filteredVoices[UnityEngine.Random.Range(0, filteredVoices.Count)];
|
||||||
|
|
||||||
|
System.Random random = new System.Random();
|
||||||
|
double u1 = 1.0 - random.NextDouble();
|
||||||
|
double u2 = 1.0 - random.NextDouble();
|
||||||
|
double randStdNormal = Math.Sqrt(-2.0 * Math.Log(u1)) * Math.Sin(2.0 * Math.PI * u2);
|
||||||
|
double mean = 0;
|
||||||
|
double stdDev = 4;
|
||||||
|
double randNormal = mean + stdDev * randStdNormal;
|
||||||
|
double clampedRandNormal = Math.Max(Math.Min(randNormal, 20), -20);
|
||||||
|
int finalResult = (int)Math.Round(clampedRandNormal);
|
||||||
|
randomVoice["pitch"] = finalResult;
|
||||||
|
|
||||||
|
callback(randomVoice);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Debug.LogError("GoogleCloud: No matching voice found.");
|
||||||
|
callback(null);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
IEnumerator GenerateAndSynthesizeText(string context)
|
||||||
|
{
|
||||||
|
var generateTextTask = GenerateText(context);
|
||||||
|
|
||||||
|
yield return new WaitUntil(() => generateTextTask.IsCompleted);
|
||||||
|
|
||||||
|
if (generateTextTask.IsFaulted)
|
||||||
|
{
|
||||||
|
Debug.LogError(generateTextTask.Exception.ToString());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
string chatPGTresponse = generateTextTask.Result;
|
||||||
|
StartCoroutine(SynthesizeSpeech(chatPGTresponse));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async Task<string> GenerateText(string context)
|
||||||
|
{
|
||||||
|
Model model = Model.ChatGPTTurbo;
|
||||||
|
|
||||||
|
_openAiApi = new OpenAIAPI(_openAiApiKey);
|
||||||
|
|
||||||
|
ChatRequest chatRequest = new ChatRequest
|
||||||
|
{
|
||||||
|
Temperature = 0.9,
|
||||||
|
Model = model
|
||||||
|
};
|
||||||
|
|
||||||
|
_conversation = _openAiApi.Chat.CreateConversation(chatRequest);
|
||||||
|
_conversation.AppendUserInput(_prompt + context);
|
||||||
|
string response = await _conversation.GetResponseFromChatbotAsync();
|
||||||
|
//Debug.Log($"ChatGPT: {response}");
|
||||||
|
return response;
|
||||||
|
}
|
||||||
|
|
||||||
|
IEnumerator SynthesizeSpeech(string textToSynthesize)
|
||||||
|
{
|
||||||
|
var requestObject = new
|
||||||
|
{
|
||||||
|
input = new { text = textToSynthesize },
|
||||||
|
voice = new { languageCode = ((JArray)voice["languageCodes"])[0].ToString(),
|
||||||
|
name = voice["name"],
|
||||||
|
ssmlGender = voice["ssmlGender"]},
|
||||||
|
audioConfig = new { audioEncoding = "LINEAR16",
|
||||||
|
speakingRate = speakingSpeed,
|
||||||
|
pitch = voice["pitch"]
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
string jsonRequestBody = JsonConvert.SerializeObject(requestObject);
|
||||||
|
byte[] requestBody = Encoding.UTF8.GetBytes(jsonRequestBody);
|
||||||
|
|
||||||
|
using (UnityWebRequest www = new UnityWebRequest(_google_CloudApiUrl + "?key=" + _googelCloudApiKey, "POST"))
|
||||||
|
{
|
||||||
|
www.uploadHandler = new UploadHandlerRaw(requestBody);
|
||||||
|
www.downloadHandler = new DownloadHandlerBuffer();
|
||||||
|
www.SetRequestHeader("Content-Type", "application/json");
|
||||||
|
|
||||||
|
yield return www.SendWebRequest();
|
||||||
|
|
||||||
|
if (www.result == UnityWebRequest.Result.ConnectionError || www.result == UnityWebRequest.Result.ProtocolError)
|
||||||
|
{
|
||||||
|
Debug.LogError("GoogleCloud: Error: " + www.error);
|
||||||
|
Debug.LogError("GoogleCloud: Response: " + www.downloadHandler.text);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
TextToSpeechResponse response = JsonConvert.DeserializeObject<TextToSpeechResponse>(www.downloadHandler.text);
|
||||||
|
string audioContent = response.audioContent;
|
||||||
|
SetAudioClip(audioContent);
|
||||||
|
//Debug.Log("GoogleCloud: Successfully created WAV file");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void SetAudioClip(string base64AudioContent)
|
||||||
|
{
|
||||||
|
byte[] audioBytes = Convert.FromBase64String(base64AudioContent);
|
||||||
|
|
||||||
|
string tempFilePath = Path.Combine(Application.temporaryCachePath, _tmpPath);
|
||||||
|
File.WriteAllBytes(tempFilePath, audioBytes);
|
||||||
|
|
||||||
|
StartCoroutine(LoadWavAudio(tempFilePath));
|
||||||
|
SaveAudioFile(base64AudioContent);
|
||||||
|
}
|
||||||
|
|
||||||
|
IEnumerator LoadWavAudio(string path)
|
||||||
|
{
|
||||||
|
using (UnityWebRequest www = UnityWebRequestMultimedia.GetAudioClip("file:///" + path, AudioType.WAV))
|
||||||
|
{
|
||||||
|
yield return www.SendWebRequest();
|
||||||
|
|
||||||
|
if (www.result == UnityWebRequest.Result.ConnectionError || www.result == UnityWebRequest.Result.ProtocolError)
|
||||||
|
{
|
||||||
|
Debug.LogError("Fehler beim Laden des AudioClips: " + www.error);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_audioSource.clip = DownloadHandlerAudioClip.GetContent(www);
|
||||||
|
//Debug.Log("GoogleCloud: Successfully set WAV file as AudioClip");
|
||||||
|
_audioSource.Play();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private void SaveAudioFile(string base64AudioContent)
|
||||||
|
{
|
||||||
|
byte[] audioBytes = Convert.FromBase64String(base64AudioContent);
|
||||||
|
File.WriteAllBytes(_outputPath, audioBytes);
|
||||||
|
Debug.Log($"GoogleCloud: Successfully saved WAV file as {_outputPath}");
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,11 @@
|
||||||
|
fileFormatVersion: 2
|
||||||
|
guid: ef5183dac70a54b4cbed3e05d617524f
|
||||||
|
MonoImporter:
|
||||||
|
externalObjects: {}
|
||||||
|
serializedVersion: 2
|
||||||
|
defaultReferences: []
|
||||||
|
executionOrder: 0
|
||||||
|
icon: {instanceID: 0}
|
||||||
|
userData:
|
||||||
|
assetBundleName:
|
||||||
|
assetBundleVariant:
|
Loading…
Reference in New Issue