diff --git a/Assets/_Client/Scripts/API/ApiClient.cs b/Assets/_Client/Scripts/API/ApiClient.cs index 598a4bd..2f6cbee 100644 --- a/Assets/_Client/Scripts/API/ApiClient.cs +++ b/Assets/_Client/Scripts/API/ApiClient.cs @@ -1,6 +1,6 @@ using System; -using System.Collections; using System.IO; +using System.Collections; using System.Text; using UnityEngine; @@ -56,12 +56,6 @@ namespace PPGIA.X540.Project3.API request.downloadHandler = new DownloadHandlerBuffer(); - // Debug.Log($"Sending {method} request to {url}"); - // Debug.Log( - // payload != null ? - // $"Payload: {JsonUtility.ToJson(payload)}" : - // "No payload."); - var op = request.SendWebRequest(); yield return WaitForTimeout(op, timeoutInSeconds, () => { @@ -117,6 +111,170 @@ Response Body: {body}"; url, "DELETE", null, timeoutInSeconds, callbackOnSuccess); } + internal static IEnumerator UploadAudioDataCoroutine( + string url, + string filePath, + float timeoutInSeconds, + Action callbackOnSuccess) + { + // PUT the audio data as binary + byte[] audioData = File.ReadAllBytes(filePath); + string fileName = Path.GetFileName(filePath); + + using (UnityWebRequest request = UnityWebRequest.Put(url, audioData)) + { + request.SetRequestHeader("Content-Type", "audio/wav"); + + var op = request.SendWebRequest(); + yield return WaitForTimeout(op, timeoutInSeconds, () => + { + Debug.LogError("Request timed out."); + }); + + if (request.result == UnityWebRequest.Result.Success) + { + callbackOnSuccess?.Invoke(request); + } + else + { + var body = request.downloadHandler?.text ?? string.Empty; + Debug.LogError($"Failed to upload audio data: {request.error} (HTTP {request.responseCode})\nBody: {body}"); + } + } + } + + internal static IEnumerator UploadAudioCoroutine( + string url, + AudioClip audioClip, + float timeoutInSeconds, + Action callbackOnSuccess) + { + // Convert AudioClip to WAV (PCM 16-bit little endian) without external utility. + byte[] audioData = AudioClipToWavBytes(audioClip); + string fileName = $"{audioClip.name}.wav"; + string fieldName = "file"; + + yield return UploadFileCoroutine( + url, audioData, fileName, fieldName, timeoutInSeconds, callbackOnSuccess); + } + + // Writes a WAV file header + PCM 16-bit data for the provided AudioClip. + // Supports mono or multi-channel clips. Assumes clip.samples * channels fits in int32. + private static byte[] AudioClipToWavBytes(AudioClip clip) + { + if (clip == null) + { + Debug.LogError("AudioClipToWavBytes: clip is null"); + return Array.Empty(); + } + + int channels = clip.channels; + int sampleCount = clip.samples * channels; // total samples across channels + int sampleRate = clip.frequency; + + // Get float data + float[] floatData = new float[sampleCount]; + clip.GetData(floatData, 0); + + // Convert to 16-bit PCM + // Each sample -> 2 bytes + byte[] pcmData = new byte[sampleCount * 2]; + int pcmIndex = 0; + for (int i = 0; i < sampleCount; i++) + { + // Clamp just in case + float f = Mathf.Clamp(floatData[i], -1f, 1f); + short s = (short)Mathf.RoundToInt(f * 32767f); + pcmData[pcmIndex++] = (byte)(s & 0xFF); // little endian + pcmData[pcmIndex++] = (byte)((s >> 8) & 0xFF); + } + + // WAV header size is 44 bytes + int headerSize = 44; + int fileSize = headerSize + pcmData.Length; + byte[] wav = new byte[fileSize]; + + // Helper local to write int/short little endian + void WriteInt32LE(int offset, int value) + { + wav[offset] = (byte)(value & 0xFF); + wav[offset + 1] = (byte)((value >> 8) & 0xFF); + wav[offset + 2] = (byte)((value >> 16) & 0xFF); + wav[offset + 3] = (byte)((value >> 24) & 0xFF); + } + void WriteInt16LE(int offset, short value) + { + wav[offset] = (byte)(value & 0xFF); + wav[offset + 1] = (byte)((value >> 8) & 0xFF); + } + + // ChunkID "RIFF" + wav[0] = (byte)'R'; wav[1] = (byte)'I'; wav[2] = (byte)'F'; wav[3] = (byte)'F'; + // ChunkSize = 36 + Subchunk2Size + int subchunk2Size = pcmData.Length; // NumSamples * NumChannels * BitsPerSample/8 + WriteInt32LE(4, 36 + subchunk2Size); + // Format "WAVE" + wav[8] = (byte)'W'; wav[9] = (byte)'A'; wav[10] = (byte)'V'; wav[11] = (byte)'E'; + // Subchunk1ID "fmt " + wav[12] = (byte)'f'; wav[13] = (byte)'m'; wav[14] = (byte)'t'; wav[15] = (byte)' '; + // Subchunk1Size (16 for PCM) + WriteInt32LE(16, 16); + // AudioFormat (1 = PCM) + WriteInt16LE(20, 1); + // NumChannels + WriteInt16LE(22, (short)channels); + // SampleRate + WriteInt32LE(24, sampleRate); + // ByteRate = SampleRate * NumChannels * BitsPerSample/8 + int byteRate = sampleRate * channels * 2; + WriteInt32LE(28, byteRate); + // BlockAlign = NumChannels * BitsPerSample/8 + WriteInt16LE(32, (short)(channels * 2)); + // BitsPerSample + WriteInt16LE(34, 16); + // Subchunk2ID "data" + wav[36] = (byte)'d'; wav[37] = (byte)'a'; wav[38] = (byte)'t'; wav[39] = (byte)'a'; + // Subchunk2Size + WriteInt32LE(40, subchunk2Size); + + // Copy PCM data after header + Buffer.BlockCopy(pcmData, 0, wav, headerSize, pcmData.Length); + + return wav; + } + + internal static IEnumerator UploadFileCoroutine( + string url, + byte[] fileData, + string fileName, + string fieldName, + float timeoutInSeconds, + Action callbackOnSuccess) + { + WWWForm form = new WWWForm(); + form.AddBinaryData(fieldName, fileData, fileName); + + using (UnityWebRequest request = + UnityWebRequest.Post(url, form)) + { + var op = request.SendWebRequest(); + yield return WaitForTimeout(op, timeoutInSeconds, () => + { + Debug.LogError("Request timed out."); + }); + + if (request.result == UnityWebRequest.Result.Success) + { + callbackOnSuccess?.Invoke(request); + } + else + { + Debug.LogError( + $"Error uploading file: {request.error}"); + } + } + } + internal static IEnumerator DownloadAudioCoroutine( string url, float timeoutInSeconds, diff --git a/Assets/_Client/Scripts/API/ApiClientManager.cs b/Assets/_Client/Scripts/API/ApiClientManager.cs index 95bb5e5..8a05ead 100644 --- a/Assets/_Client/Scripts/API/ApiClientManager.cs +++ b/Assets/_Client/Scripts/API/ApiClientManager.cs @@ -1,4 +1,5 @@ using System; +using System.IO; using System.Collections; using System.Linq; @@ -7,7 +8,6 @@ using UnityEngine; namespace PPGIA.X540.Project3.API { - [RequireComponent(typeof(AudioSource))] public class ApiClientManager : MonoBehaviour { #region -- Inspector Fields ------------------------------------------- @@ -29,7 +29,7 @@ namespace PPGIA.X540.Project3.API private string _sessionCloseEndpoint = "/session/close"; [SerializeField] - private string _chatEndpoint = "/chat/"; + private string _chatEndpoint = "/chat"; [SerializeField] private string _llmAgentEndpoint = "/agent/ask"; @@ -38,7 +38,13 @@ namespace PPGIA.X540.Project3.API private string _ttsEndpoint = "/tts/synthesize"; [SerializeField] - private string _sttEndpoint = "/stt/upload"; + private string _sttUploadEndpoint = "/transcript/get-upload-url"; + + [SerializeField] + private string _sttStartEndpoint = "/transcript/start"; + + [SerializeField] + private string _sttDownloadEndpoint = "/transcript/download"; [Header("API Settings & Workload")] [SerializeField] @@ -73,7 +79,10 @@ namespace PPGIA.X540.Project3.API void Awake() { - _audioSource = GetComponent(); + if (_audioSource == null) + _audioSource = GetComponent(); + if (_audioSource == null) + Debug.LogWarning("AudioSource component is missing."); } #region -- API Calls -------------------------------------------------- @@ -131,6 +140,135 @@ namespace PPGIA.X540.Project3.API })); } + public void UploadAudioClip( + string localFilePath, Action uploadCompletedCallback = null) + { + if (_session == null) + { + Debug.LogWarning("No active session. Please initiate a session first."); + return; + } + + StopAllCoroutines(); + + var url = EndpointUrl(_sttUploadEndpoint, _session.SessionId); + var payload = new + { + filename = Path.GetFileName(localFilePath), + content_type = "audio/wav" + }; + + StartCoroutine(ApiClient.CallEndpointWithPostCoroutine( + url, _timeoutInSeconds, payload, (request) => + { + var body = request.downloadHandler?.text ?? string.Empty; + var uploadUrl = JsonUtility.FromJson(body)?.UploadUrl; + var s3Key = JsonUtility.FromJson(body)?.S3Key; + if (uploadUrl == null) + { + Debug.LogWarning("Failed to get upload URL."); + return; + } + + StartCoroutine(ApiClient.UploadAudioDataCoroutine( + uploadUrl, localFilePath, _timeoutInSeconds, (uploadRequest) => + { + Debug.Log($"Audio upload complete: {uploadRequest.responseCode}"); + uploadCompletedCallback?.Invoke(s3Key); + })); + })); + } + + + [ContextMenu("STT/Upload Audio Clip")] + public void StartTranscript(string s3Key, + Action transcriptStartedCallback = null) + { + // Ensure there is an active session + if (_session == null) + { + Debug.LogWarning("No active session. Please initiate a session first."); + return; + } + if (string.IsNullOrEmpty(s3Key)) + { + Debug.LogWarning("No file path provided for upload."); + return; + } + + StopAllCoroutines(); + + // Build the endpoint URL + var url = EndpointUrl(_sttStartEndpoint); + var payload = new STTUploadResponse { + s3_key = s3Key + }; + + // Make the API call to upload the audio clip + StartCoroutine(ApiClient.CallEndpointWithPostCoroutine( + url, _timeoutInSeconds, payload, (request) => + { + var body = request.downloadHandler?.text ?? string.Empty; + var response = ApiModel.FromJson(body); + var jobName = response?.JobName; + + Debug.Log($"Transcription job started: {jobName}"); + transcriptStartedCallback?.Invoke(jobName); + })); + } + + [ContextMenu("STT/Download Transcription")] + public void DownloadTranscription(string jobName, + Action transcriptionReceivedCallback = null) + { + // Ensure there is an active session + if (_session == null) + { + Debug.LogWarning("No active session. Please initiate a session first."); + return; + } + + StopAllCoroutines(); + + StartCoroutine(KeepCallingCoroutine( + EndpointUrl(_sttDownloadEndpoint, jobName), .5f, + transcriptionReceivedCallback + )); + + } + + private IEnumerator KeepCallingCoroutine(string url, + float delayInSeconds, Action callback) + { + // Make the API call to download the transcription + var wait = new WaitForSeconds(delayInSeconds); + + bool keepCalling = true; + while (keepCalling) + { + yield return wait; + yield return ApiClient.CallEndpointWithGetCoroutine( + url, _timeoutInSeconds, (request) => + { + var body = request.downloadHandler?.text ?? string.Empty; + var response = ApiModel.FromJson(body); + + if (response.Status == "FAILED") + { + keepCalling = false; + Debug.LogError("Transcription job failed."); + callback?.Invoke(null); + } + else if (response.Status == "COMPLETED") + { + keepCalling = false; + callback?.Invoke(response?.Transcript); + } + }); + } + } + + [ContextMenu("Chat/Send Message")] public void SendChatMessage(string message = null, Action responseReceivedCallback = null, diff --git a/Assets/_Client/Scripts/API/ApiModel.cs b/Assets/_Client/Scripts/API/ApiModel.cs index 5f4431b..b073dc0 100644 --- a/Assets/_Client/Scripts/API/ApiModel.cs +++ b/Assets/_Client/Scripts/API/ApiModel.cs @@ -48,6 +48,30 @@ namespace PPGIA.X540.Project3.API public int ExpiresIn => expires_in; } + [Serializable] + public class STTUploadResponse : ApiModel + { + public string upload_url; + public string s3_key; + + public string UploadUrl => upload_url; + public string S3Key => s3_key; + } + + [Serializable] + public class STTJobResponse : ApiModel + { + public string job_name; + public string s3_uri; + public string status; + public string transcript; + + public string JobName => job_name; + public string S3Uri => s3_uri; + public string Status => status; + public string Transcript => transcript; + } + internal enum Environment { Development, diff --git a/Assets/_Client/Scripts/Core/AppManager.cs b/Assets/_Client/Scripts/Core/AppManager.cs index 77bd7fa..75bfcf6 100644 --- a/Assets/_Client/Scripts/Core/AppManager.cs +++ b/Assets/_Client/Scripts/Core/AppManager.cs @@ -7,6 +7,10 @@ namespace PPGIA.X540.Project3 { public class AppManager : MonoBehaviour { + // Singleton instance + public static AppManager Instance { get; private set; } + + #region -- Fields & Properties ---------------------------------------- [Header("References")] [SerializeField] private UIController _uiController; @@ -14,59 +18,129 @@ namespace PPGIA.X540.Project3 [SerializeField] private ApiClientManager _apiManager; + [SerializeField] + private AudioCapture _audioCapture; + + private AudioClip _recordedClip; + #endregion ------------------------------------------------------------ + + #region -- MonoBehaviour Methods -------------------------------------- private void Awake() { - if (_uiController == null) - _uiController = GetComponent(); - if (_apiManager == null) - _apiManager = GetComponent(); - } + // Singleton pattern implementation + if (Instance == null) + { + Instance = this; + DontDestroyOnLoad(gameObject); + } + else + { + Destroy(gameObject); + return; + } + + if (_uiController == null) + { + Debug.LogError("UIController reference is missing in AppManager."); + } + if (_apiManager == null) + { + Debug.LogError("ApiClientManager reference is missing in AppManager."); + } + if (_audioCapture == null) + { + Debug.LogError("AudioCapture reference is missing in AppManager."); + } + + _uiController.OnTalkButtonClicked += HandleTalkButtonClicked; + _audioCapture.OnRecordingSaved += HandleClipSaved; - void Start() - { - _apiManager.CloseSession( - () => _uiController.SessionActive = _apiManager.IsSessionActive); } private void OnEnable() { - _uiController.OnSessionButtonClicked += HandleSessionButtonClicked; - _uiController.OnSendChatButtonClicked += HandleSendChatButtonClicked; + _apiManager.InitiateSession(() => + { + Debug.Log("API session initiated successfully."); + }); } - private void OnDisable() + void OnDisable() { - _uiController.OnSessionButtonClicked -= HandleSessionButtonClicked; - _uiController.OnSendChatButtonClicked -= HandleSendChatButtonClicked; + if (_apiManager != null && _apiManager.IsSessionActive) + { + _apiManager.CloseSession(() => + { + Debug.Log("API session closed successfully."); + }); + } } - private void HandleSessionButtonClicked() + private void OnDestroy() + { + _uiController.OnTalkButtonClicked -= HandleTalkButtonClicked; + _audioCapture.OnRecordingSaved -= HandleClipSaved; + } + #endregion ------------------------------------------------------------ + + private void HandleClipSaved(string filePath) + { + Debug.Log($"Audio clip saved at: {filePath}"); + + _apiManager.UploadAudioClip( + filePath, + (s3Key) => + { + Debug.Log($"Clip uploaded to: {s3Key}"); + _apiManager.StartTranscript( + s3Key, + (jobName) => + { + Debug.Log($"Transcription job started: {jobName}"); + _apiManager.DownloadTranscription(jobName, (transcript) => + { + Debug.Log($"Transcription completed: {transcript}"); + _uiController.AppendChatOutput($"\nUser: {transcript}\n"); + + _apiManager.SendChatMessage(transcript, + (response) => + { + _uiController.AppendChatOutput($"Bot: {response}\n"); + }, () => + { + // Speech synthesis finished. + }); + }); + }); + }); + + _uiController.CurrentState = UIController.UIState.Idle; + } + + private void HandleTalkButtonClicked() { if (!_apiManager.IsSessionActive) { - _apiManager.InitiateSession( - () => _uiController.SessionActive = _apiManager.IsSessionActive); + Debug.LogWarning("Session is not active. Cannot send message."); + return; } - else - { - _apiManager.CloseSession( - () => _uiController.SessionActive = _apiManager.IsSessionActive - ); - } - } - private void HandleSendChatButtonClicked(string message) - { - _apiManager.SendChatMessage(message, - (responseMessage) => + switch (_uiController.CurrentState) { - _uiController.ChatOutput += $"User: {message}\n"; - _uiController.ChatOutput += $"Bot: {responseMessage}\n"; - }, - () => - { - // Speech finished callback (optional) - }); + case UIController.UIState.Idle: + _audioCapture.StartRecording(); + _uiController.CurrentState = UIController.UIState.Recording; + break; + + case UIController.UIState.Recording: + _audioCapture.StopRecording(); + _uiController.CurrentState = UIController.UIState.Idle; + break; + + case UIController.UIState.Processing: + Debug.Log("Currently processing. Please wait."); + break; + } } } } diff --git a/Assets/_Client/Scripts/AudioCapture.cs b/Assets/_Client/Scripts/Core/AudioCapture.cs similarity index 96% rename from Assets/_Client/Scripts/AudioCapture.cs rename to Assets/_Client/Scripts/Core/AudioCapture.cs index 7c7c926..97d53e9 100644 --- a/Assets/_Client/Scripts/AudioCapture.cs +++ b/Assets/_Client/Scripts/Core/AudioCapture.cs @@ -18,6 +18,7 @@ namespace PPGIA.X540.Project3 Hz96000 = 96000 } + #region -- Fields & Properties ---------------------------------------- [Header("Audio Capture Settings")] [SerializeField] private SampleRate _sampleRateInHz = SampleRate.Hz44100; @@ -54,9 +55,14 @@ namespace PPGIA.X540.Project3 private List _capturedSamples = new List(1024 * 32); // filled only on Stop private int _channels = 1; // microphone channel count (Unity usually mono) + private AudioClip _recordingClip; + public AudioClip GetRecordedClip() => _recordingClip; + private string _currentDevice; + #endregion ------------------------------------------------------------ + #region -- MonoBehaviour Methods -------------------------------------- private void Awake() { _audioSource = GetComponent(); @@ -67,6 +73,7 @@ namespace PPGIA.X540.Project3 { StopRecording(); } + #endregion ------------------------------------------------------------ [ContextMenu("Start recording audio")] public void StartRecording() @@ -215,7 +222,6 @@ namespace PPGIA.X540.Project3 writer.Write(dataBytes); } LastSavedFilePath = filePath; - Debug.Log($"Audio saved to: {filePath}"); OnRecordingSaved?.Invoke(filePath); } catch (Exception ex) diff --git a/Assets/_Client/Scripts/AudioCapture.cs.meta b/Assets/_Client/Scripts/Core/AudioCapture.cs.meta similarity index 100% rename from Assets/_Client/Scripts/AudioCapture.cs.meta rename to Assets/_Client/Scripts/Core/AudioCapture.cs.meta