2024-04-04 11:10:57 +02:00
using UnityEngine ;
using UnityEngine.Networking ;
using System ;
using System.IO ;
using System.Text ;
using Newtonsoft.Json ;
using System.Collections ;
using OpenAI_API ;
using OpenAI_API.Chat ;
using OpenAI_API.Models ;
using System.Threading.Tasks ;
using Newtonsoft.Json.Linq ;
using System.Collections.Generic ;
[Serializable]
public class TextToSpeechResponse
{
public string audioContent ;
}
public class Text2Speech : MonoBehaviour
{
public string context = "going for a walk, falling, explosion, blood" ;
public string gender = "MALE" ;
public JToken voice = null ;
public double speakingSpeed = 1.1 ;
public bool playSound = false ;
public bool generate = false ;
private AudioSource _audioSource ;
private readonly string _googelCloudApiKey = "AIzaSyDNpkVyAUU4AvSwAErVMlZ1lSvGfpkEs0Q" ;
private readonly string _google_CloudApiUrl = "https://texttospeech.googleapis.com/v1/text:synthesize" ;
2024-04-05 17:29:05 +02:00
private readonly string _outputPath = "path/to/audio.wav" ;
2024-04-04 11:10:57 +02:00
private string _tmpPath = "tmp_audio.wav" ;
private OpenAIAPI _openAiApi ;
private Conversation ? _conversation ;
private readonly string _openAiApiKey = "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" ;
2024-04-05 17:29:05 +02:00
private readonly string _prompt = "Write a short text for an NPC in a game. The text should be based on the following bullet-point context, which describes the events of the last moments. Remember to only respond with the short text that the ONE NPC should speak! The context is: " ;
2024-04-04 11:10:57 +02:00
void Start ( )
{
_tmpPath = "tmp_audio_" + GetInstanceID ( ) . ToString ( ) + ".wav" ;
_audioSource = GetComponent < AudioSource > ( ) ;
}
public void Update ( )
{
if ( playSound )
{
playSound = false ;
_audioSource . Play ( ) ;
}
if ( generate )
{
generate = false ;
if ( voice = = null )
{
GetRandomGermanVoice ( gender , ( v ) = > {
voice = v ;
//Debug.Log($"GoogleCloud: Choosen voice is\n{voice}");
StartCoroutine ( GenerateAndSynthesizeText ( context ) ) ;
} ) ;
}
else
{
StartCoroutine ( GenerateAndSynthesizeText ( context ) ) ;
}
}
}
2024-04-05 17:29:05 +02:00
public void Generate ( string c )
{
context = c ;
generate = true ;
}
2024-04-04 11:10:57 +02:00
public void GetRandomGermanVoice ( string gender , Action < JToken > callback )
{
StartCoroutine ( GetRandomGermanVoiceCoroutine ( gender , callback ) ) ;
}
private IEnumerator GetRandomGermanVoiceCoroutine ( string gender , Action < JToken > callback )
{
string url = $"https://texttospeech.googleapis.com/v1beta1/voices?key={_googelCloudApiKey}" ;
using ( UnityWebRequest webRequest = UnityWebRequest . Get ( url ) )
{
yield return webRequest . SendWebRequest ( ) ;
if ( webRequest . isNetworkError | | webRequest . isHttpError )
{
Debug . LogError ( webRequest . error ) ;
callback ( null ) ;
}
else
{
JObject response = JObject . Parse ( webRequest . downloadHandler . text ) ;
JArray voices = ( JArray ) response [ "voices" ] ;
List < JToken > filteredVoices = new List < JToken > ( ) ;
// Filterung nach deutschen Stimmen und dem spezifizierten Geschlecht
foreach ( var v in voices )
{
JArray languageCodes = ( JArray ) v [ "languageCodes" ] ;
string languageCode = languageCodes [ 0 ] . ToString ( ) ;
string ssmlGender = v [ "ssmlGender" ] . ToString ( ) ;
string name = v [ "name" ] . ToString ( ) ;
if ( languageCode . Contains ( "en-" ) & & ssmlGender = = gender & & ! name . Contains ( "Standard" ) )
{
filteredVoices . Add ( v ) ;
}
}
if ( filteredVoices . Count > 0 )
{
// Auswahl einer zuf<75> lligen Stimme aus den gefilterten Ergebnissen
var randomVoice = filteredVoices [ UnityEngine . Random . Range ( 0 , filteredVoices . Count ) ] ;
System . Random random = new System . Random ( ) ;
double u1 = 1.0 - random . NextDouble ( ) ;
double u2 = 1.0 - random . NextDouble ( ) ;
double randStdNormal = Math . Sqrt ( - 2.0 * Math . Log ( u1 ) ) * Math . Sin ( 2.0 * Math . PI * u2 ) ;
double mean = 0 ;
double stdDev = 4 ;
double randNormal = mean + stdDev * randStdNormal ;
double clampedRandNormal = Math . Max ( Math . Min ( randNormal , 20 ) , - 20 ) ;
int finalResult = ( int ) Math . Round ( clampedRandNormal ) ;
randomVoice [ "pitch" ] = finalResult ;
callback ( randomVoice ) ;
}
else
{
Debug . LogError ( "GoogleCloud: No matching voice found." ) ;
callback ( null ) ;
}
}
}
}
IEnumerator GenerateAndSynthesizeText ( string context )
{
var generateTextTask = GenerateText ( context ) ;
yield return new WaitUntil ( ( ) = > generateTextTask . IsCompleted ) ;
if ( generateTextTask . IsFaulted )
{
Debug . LogError ( generateTextTask . Exception . ToString ( ) ) ;
}
else
{
string chatPGTresponse = generateTextTask . Result ;
StartCoroutine ( SynthesizeSpeech ( chatPGTresponse ) ) ;
}
}
async Task < string > GenerateText ( string context )
{
Model model = Model . ChatGPTTurbo ;
_openAiApi = new OpenAIAPI ( _openAiApiKey ) ;
ChatRequest chatRequest = new ChatRequest
{
Temperature = 0.9 ,
Model = model
} ;
_conversation = _openAiApi . Chat . CreateConversation ( chatRequest ) ;
_conversation . AppendUserInput ( _prompt + context ) ;
string response = await _conversation . GetResponseFromChatbotAsync ( ) ;
//Debug.Log($"ChatGPT: {response}");
return response ;
}
IEnumerator SynthesizeSpeech ( string textToSynthesize )
{
var requestObject = new
{
input = new { text = textToSynthesize } ,
voice = new { languageCode = ( ( JArray ) voice [ "languageCodes" ] ) [ 0 ] . ToString ( ) ,
name = voice [ "name" ] ,
ssmlGender = voice [ "ssmlGender" ] } ,
audioConfig = new { audioEncoding = "LINEAR16" ,
speakingRate = speakingSpeed ,
pitch = voice [ "pitch" ]
}
} ;
string jsonRequestBody = JsonConvert . SerializeObject ( requestObject ) ;
byte [ ] requestBody = Encoding . UTF8 . GetBytes ( jsonRequestBody ) ;
using ( UnityWebRequest www = new UnityWebRequest ( _google_CloudApiUrl + "?key=" + _googelCloudApiKey , "POST" ) )
{
www . uploadHandler = new UploadHandlerRaw ( requestBody ) ;
www . downloadHandler = new DownloadHandlerBuffer ( ) ;
www . SetRequestHeader ( "Content-Type" , "application/json" ) ;
yield return www . SendWebRequest ( ) ;
if ( www . result = = UnityWebRequest . Result . ConnectionError | | www . result = = UnityWebRequest . Result . ProtocolError )
{
Debug . LogError ( "GoogleCloud: Error: " + www . error ) ;
Debug . LogError ( "GoogleCloud: Response: " + www . downloadHandler . text ) ;
}
else
{
TextToSpeechResponse response = JsonConvert . DeserializeObject < TextToSpeechResponse > ( www . downloadHandler . text ) ;
string audioContent = response . audioContent ;
SetAudioClip ( audioContent ) ;
//Debug.Log("GoogleCloud: Successfully created WAV file");
}
}
}
private void SetAudioClip ( string base64AudioContent )
{
byte [ ] audioBytes = Convert . FromBase64String ( base64AudioContent ) ;
string tempFilePath = Path . Combine ( Application . temporaryCachePath , _tmpPath ) ;
File . WriteAllBytes ( tempFilePath , audioBytes ) ;
StartCoroutine ( LoadWavAudio ( tempFilePath ) ) ;
}
IEnumerator LoadWavAudio ( string path )
{
using ( UnityWebRequest www = UnityWebRequestMultimedia . GetAudioClip ( "file:///" + path , AudioType . WAV ) )
{
yield return www . SendWebRequest ( ) ;
if ( www . result = = UnityWebRequest . Result . ConnectionError | | www . result = = UnityWebRequest . Result . ProtocolError )
{
Debug . LogError ( "Fehler beim Laden des AudioClips: " + www . error ) ;
}
else
{
_audioSource . clip = DownloadHandlerAudioClip . GetContent ( www ) ;
//Debug.Log("GoogleCloud: Successfully set WAV file as AudioClip");
_audioSource . Play ( ) ;
}
}
}
private void SaveAudioFile ( string base64AudioContent )
{
byte [ ] audioBytes = Convert . FromBase64String ( base64AudioContent ) ;
File . WriteAllBytes ( _outputPath , audioBytes ) ;
Debug . Log ( $"GoogleCloud: Successfully saved WAV file as {_outputPath}" ) ;
}
}