using UnityEngine; using UnityEngine.Networking; using System; using System.IO; using System.Text; using Newtonsoft.Json; using System.Collections; using OpenAI_API; using OpenAI_API.Chat; using OpenAI_API.Models; using System.Threading.Tasks; using Newtonsoft.Json.Linq; using System.Collections.Generic; [Serializable] public class TextToSpeechResponse { public string audioContent; } public class Text2Speech : MonoBehaviour { public string context = "going for a walk, falling, explosion, blood"; public string gender = "MALE"; public JToken voice = null; public double speakingSpeed = 1.1; public bool playSound = false; public bool generate = false; private AudioSource _audioSource; private readonly string _googelCloudApiKey = "AIzaSyDNpkVyAUU4AvSwAErVMlZ1lSvGfpkEs0Q"; private readonly string _google_CloudApiUrl = "https://texttospeech.googleapis.com/v1/text:synthesize"; private readonly string _outputPath = "C:\\Users\\PC\\VoiceTest\\Assets\\Scirpts\\audio.wav"; private string _tmpPath = "tmp_audio.wav"; private OpenAIAPI _openAiApi; private Conversation? _conversation; private readonly string _openAiApiKey = "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"; private readonly string _prompt = "Write a short text for an NPC in a game.The text should be based on the following bullet-point context, which describes the events of the last moments. Remember to only respond with the short text that the ONE NPC should speak! The context is: "; void Start() { _tmpPath = "tmp_audio_" + GetInstanceID().ToString() + ".wav"; _audioSource = GetComponent(); } public void Update() { if (playSound) { playSound = false; _audioSource.Play(); } if (generate) { generate = false; if (voice == null) { GetRandomGermanVoice(gender, (v) => { voice = v; //Debug.Log($"GoogleCloud: Choosen voice is\n{voice}"); StartCoroutine(GenerateAndSynthesizeText(context)); }); } else { StartCoroutine(GenerateAndSynthesizeText(context)); } } } public void GetRandomGermanVoice(string gender, Action callback) { StartCoroutine(GetRandomGermanVoiceCoroutine(gender, callback)); } private IEnumerator GetRandomGermanVoiceCoroutine(string gender, Action callback) { string url = $"https://texttospeech.googleapis.com/v1beta1/voices?key={_googelCloudApiKey}"; using (UnityWebRequest webRequest = UnityWebRequest.Get(url)) { yield return webRequest.SendWebRequest(); if (webRequest.isNetworkError || webRequest.isHttpError) { Debug.LogError(webRequest.error); callback(null); } else { JObject response = JObject.Parse(webRequest.downloadHandler.text); JArray voices = (JArray)response["voices"]; List filteredVoices = new List(); // Filterung nach deutschen Stimmen und dem spezifizierten Geschlecht foreach (var v in voices) { JArray languageCodes = (JArray)v["languageCodes"]; string languageCode = languageCodes[0].ToString(); string ssmlGender = v["ssmlGender"].ToString(); string name = v["name"].ToString(); if (languageCode.Contains("en-") && ssmlGender == gender && !name.Contains("Standard")) { filteredVoices.Add(v); } } if (filteredVoices.Count > 0) { // Auswahl einer zufälligen Stimme aus den gefilterten Ergebnissen var randomVoice = filteredVoices[UnityEngine.Random.Range(0, filteredVoices.Count)]; System.Random random = new System.Random(); double u1 = 1.0 - random.NextDouble(); double u2 = 1.0 - random.NextDouble(); double randStdNormal = Math.Sqrt(-2.0 * Math.Log(u1)) * Math.Sin(2.0 * Math.PI * u2); double mean = 0; double stdDev = 4; double randNormal = mean + stdDev * randStdNormal; double clampedRandNormal = Math.Max(Math.Min(randNormal, 20), -20); int finalResult = (int)Math.Round(clampedRandNormal); randomVoice["pitch"] = finalResult; callback(randomVoice); } else { Debug.LogError("GoogleCloud: No matching voice found."); callback(null); } } } } IEnumerator GenerateAndSynthesizeText(string context) { var generateTextTask = GenerateText(context); yield return new WaitUntil(() => generateTextTask.IsCompleted); if (generateTextTask.IsFaulted) { Debug.LogError(generateTextTask.Exception.ToString()); } else { string chatPGTresponse = generateTextTask.Result; StartCoroutine(SynthesizeSpeech(chatPGTresponse)); } } async Task GenerateText(string context) { Model model = Model.ChatGPTTurbo; _openAiApi = new OpenAIAPI(_openAiApiKey); ChatRequest chatRequest = new ChatRequest { Temperature = 0.9, Model = model }; _conversation = _openAiApi.Chat.CreateConversation(chatRequest); _conversation.AppendUserInput(_prompt + context); string response = await _conversation.GetResponseFromChatbotAsync(); //Debug.Log($"ChatGPT: {response}"); return response; } IEnumerator SynthesizeSpeech(string textToSynthesize) { var requestObject = new { input = new { text = textToSynthesize }, voice = new { languageCode = ((JArray)voice["languageCodes"])[0].ToString(), name = voice["name"], ssmlGender = voice["ssmlGender"]}, audioConfig = new { audioEncoding = "LINEAR16", speakingRate = speakingSpeed, pitch = voice["pitch"] } }; string jsonRequestBody = JsonConvert.SerializeObject(requestObject); byte[] requestBody = Encoding.UTF8.GetBytes(jsonRequestBody); using (UnityWebRequest www = new UnityWebRequest(_google_CloudApiUrl + "?key=" + _googelCloudApiKey, "POST")) { www.uploadHandler = new UploadHandlerRaw(requestBody); www.downloadHandler = new DownloadHandlerBuffer(); www.SetRequestHeader("Content-Type", "application/json"); yield return www.SendWebRequest(); if (www.result == UnityWebRequest.Result.ConnectionError || www.result == UnityWebRequest.Result.ProtocolError) { Debug.LogError("GoogleCloud: Error: " + www.error); Debug.LogError("GoogleCloud: Response: " + www.downloadHandler.text); } else { TextToSpeechResponse response = JsonConvert.DeserializeObject(www.downloadHandler.text); string audioContent = response.audioContent; SetAudioClip(audioContent); //Debug.Log("GoogleCloud: Successfully created WAV file"); } } } private void SetAudioClip(string base64AudioContent) { byte[] audioBytes = Convert.FromBase64String(base64AudioContent); string tempFilePath = Path.Combine(Application.temporaryCachePath, _tmpPath); File.WriteAllBytes(tempFilePath, audioBytes); StartCoroutine(LoadWavAudio(tempFilePath)); SaveAudioFile(base64AudioContent); } IEnumerator LoadWavAudio(string path) { using (UnityWebRequest www = UnityWebRequestMultimedia.GetAudioClip("file:///" + path, AudioType.WAV)) { yield return www.SendWebRequest(); if (www.result == UnityWebRequest.Result.ConnectionError || www.result == UnityWebRequest.Result.ProtocolError) { Debug.LogError("Fehler beim Laden des AudioClips: " + www.error); } else { _audioSource.clip = DownloadHandlerAudioClip.GetContent(www); //Debug.Log("GoogleCloud: Successfully set WAV file as AudioClip"); _audioSource.Play(); } } } private void SaveAudioFile(string base64AudioContent) { byte[] audioBytes = Convert.FromBase64String(base64AudioContent); File.WriteAllBytes(_outputPath, audioBytes); Debug.Log($"GoogleCloud: Successfully saved WAV file as {_outputPath}"); } }