using UnityEngine; using UnityEngine.Networking; using System; using System.IO; using System.Text; using Newtonsoft.Json; using System.Collections; using OpenAI_API; using OpenAI_API.Chat; using OpenAI_API.Models; using System.Threading.Tasks; using Newtonsoft.Json.Linq; using System.Collections.Generic; [Serializable] public class TextToSpeechResponse { public string audioContent; } public class Text2Speech : MonoBehaviour { public string context = "going for a walk, falling, explosion, blood"; public string gender = "MALE"; public JToken voice = null; public double speakingSpeed = 1.1; public bool playSound = false; public bool generate = false; private AudioSource _audioSource; private readonly string _googelCloudApiKey = "AIzaSyCVfKH5YOH9gcPamTtbeX5nPj9wWkKB1y4"; private readonly string _google_CloudApiUrl = "https://texttospeech.googleapis.com/v1/text:synthesize"; private readonly string _outputPath = "path/to/audio.wav"; private string _tmpPath = "tmp_audio.wav"; private OpenAIAPI _openAiApi; private Conversation? _conversation; private readonly string _openAiApiKey = "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"; private readonly string _prompt = "Write a short text for a Developer as an NPC in a game. The Developer works at a small gamedevelopement office and its manager is called Gottfried who is responsable for all the Developers needs. The text should be based on the following bullet-point context, which describes the events of the last moments. Remember to only respond with the short text that only this ONE Developer should speak and nothing else! The context is: "; void Start() { _tmpPath = "tmp_audio_" + GetInstanceID().ToString() + ".wav"; _audioSource = GetComponent(); } public void Update() { if (playSound) { playSound = false; _audioSource.Play(); } if (generate) { generate = false; if (voice == null) { GetRandomGermanVoice(gender, (v) => { voice = v; //Debug.Log($"GoogleCloud: Choosen voice is\n{voice}"); StartCoroutine(GenerateAndSynthesizeText(context)); }); } else { StartCoroutine(GenerateAndSynthesizeText(context)); } } } public void Generate(string c) { context = c; generate = true; } public void GetRandomGermanVoice(string gender, Action callback) { StartCoroutine(GetRandomGermanVoiceCoroutine(gender, callback)); } private IEnumerator GetRandomGermanVoiceCoroutine(string gender, Action callback) { string url = $"https://texttospeech.googleapis.com/v1beta1/voices?key={_googelCloudApiKey}"; using (UnityWebRequest webRequest = UnityWebRequest.Get(url)) { yield return webRequest.SendWebRequest(); if (webRequest.isNetworkError || webRequest.isHttpError) { Debug.LogError(webRequest.error); callback(null); } else { JObject response = JObject.Parse(webRequest.downloadHandler.text); JArray voices = (JArray)response["voices"]; List filteredVoices = new List(); // Filterung nach deutschen Stimmen und dem spezifizierten Geschlecht foreach (var v in voices) { JArray languageCodes = (JArray)v["languageCodes"]; string languageCode = languageCodes[0].ToString(); string ssmlGender = v["ssmlGender"].ToString(); string name = v["name"].ToString(); if (languageCode.Contains("en-") && ssmlGender == gender && !name.Contains("Standard")) { filteredVoices.Add(v); } } if (filteredVoices.Count > 0) { // Auswahl einer zufälligen Stimme aus den gefilterten Ergebnissen var randomVoice = filteredVoices[UnityEngine.Random.Range(0, filteredVoices.Count)]; System.Random random = new System.Random(); double u1 = 1.0 - random.NextDouble(); double u2 = 1.0 - random.NextDouble(); double randStdNormal = Math.Sqrt(-2.0 * Math.Log(u1)) * Math.Sin(2.0 * Math.PI * u2); double mean = 0; double stdDev = 4; double randNormal = mean + stdDev * randStdNormal; double clampedRandNormal = Math.Max(Math.Min(randNormal, 20), -20); int finalResult = (int)Math.Round(clampedRandNormal); randomVoice["pitch"] = finalResult; callback(randomVoice); } else { Debug.LogError("GoogleCloud: No matching voice found."); callback(null); } } } } IEnumerator GenerateAndSynthesizeText(string context) { var generateTextTask = GenerateText(context); yield return new WaitUntil(() => generateTextTask.IsCompleted); if (generateTextTask.IsFaulted) { Debug.LogError(generateTextTask.Exception.ToString()); } else { string chatPGTresponse = generateTextTask.Result; StartCoroutine(SynthesizeSpeech(chatPGTresponse)); } } async Task GenerateText(string context) { Model model = Model.ChatGPTTurbo; _openAiApi = new OpenAIAPI(_openAiApiKey); ChatRequest chatRequest = new ChatRequest { Temperature = 0.9, Model = model }; _conversation = _openAiApi.Chat.CreateConversation(chatRequest); _conversation.AppendUserInput(_prompt + context); string response = await _conversation.GetResponseFromChatbotAsync(); //Debug.Log($"ChatGPT: {response}"); return response; } IEnumerator SynthesizeSpeech(string textToSynthesize) { var requestObject = new { input = new { text = textToSynthesize }, voice = new { languageCode = ((JArray)voice["languageCodes"])[0].ToString(), name = voice["name"], ssmlGender = voice["ssmlGender"]}, audioConfig = new { audioEncoding = "LINEAR16", speakingRate = speakingSpeed, pitch = voice["pitch"] } }; string jsonRequestBody = JsonConvert.SerializeObject(requestObject); byte[] requestBody = Encoding.UTF8.GetBytes(jsonRequestBody); using (UnityWebRequest www = new UnityWebRequest(_google_CloudApiUrl + "?key=" + _googelCloudApiKey, "POST")) { www.uploadHandler = new UploadHandlerRaw(requestBody); www.downloadHandler = new DownloadHandlerBuffer(); www.SetRequestHeader("Content-Type", "application/json"); yield return www.SendWebRequest(); if (www.result == UnityWebRequest.Result.ConnectionError || www.result == UnityWebRequest.Result.ProtocolError) { Debug.LogError("GoogleCloud: Error: " + www.error); Debug.LogError("GoogleCloud: Response: " + www.downloadHandler.text); } else { TextToSpeechResponse response = JsonConvert.DeserializeObject(www.downloadHandler.text); string audioContent = response.audioContent; SetAudioClip(audioContent); //Debug.Log("GoogleCloud: Successfully created WAV file"); } } } private void SetAudioClip(string base64AudioContent) { byte[] audioBytes = Convert.FromBase64String(base64AudioContent); string tempFilePath = Path.Combine(Application.temporaryCachePath, _tmpPath); File.WriteAllBytes(tempFilePath, audioBytes); StartCoroutine(LoadWavAudio(tempFilePath)); } IEnumerator LoadWavAudio(string path) { using (UnityWebRequest www = UnityWebRequestMultimedia.GetAudioClip("file:///" + path, AudioType.WAV)) { yield return www.SendWebRequest(); if (www.result == UnityWebRequest.Result.ConnectionError || www.result == UnityWebRequest.Result.ProtocolError) { Debug.LogError("Fehler beim Laden des AudioClips: " + www.error); } else { _audioSource.clip = DownloadHandlerAudioClip.GetContent(www); //Debug.Log("GoogleCloud: Successfully set WAV file as AudioClip"); _audioSource.Play(); } } } private void SaveAudioFile(string base64AudioContent) { byte[] audioBytes = Convert.FromBase64String(base64AudioContent); File.WriteAllBytes(_outputPath, audioBytes); Debug.Log($"GoogleCloud: Successfully saved WAV file as {_outputPath}"); } }