Adds STT functionality with audio upload
Adds speech-to-text (STT) functionality by allowing users to upload audio clips, start transcription jobs, and download transcriptions. Introduces new API endpoints for STT upload, start, and download. Also, converts AudioClip to WAV byte array.
This commit is contained in:
parent
ea4535ebb6
commit
e0daa00205
|
|
@ -1,6 +1,6 @@
|
||||||
using System;
|
using System;
|
||||||
using System.Collections;
|
|
||||||
using System.IO;
|
using System.IO;
|
||||||
|
using System.Collections;
|
||||||
using System.Text;
|
using System.Text;
|
||||||
|
|
||||||
using UnityEngine;
|
using UnityEngine;
|
||||||
|
|
@ -56,12 +56,6 @@ namespace PPGIA.X540.Project3.API
|
||||||
|
|
||||||
request.downloadHandler = new DownloadHandlerBuffer();
|
request.downloadHandler = new DownloadHandlerBuffer();
|
||||||
|
|
||||||
// Debug.Log($"Sending {method} request to {url}");
|
|
||||||
// Debug.Log(
|
|
||||||
// payload != null ?
|
|
||||||
// $"Payload: {JsonUtility.ToJson(payload)}" :
|
|
||||||
// "No payload.");
|
|
||||||
|
|
||||||
var op = request.SendWebRequest();
|
var op = request.SendWebRequest();
|
||||||
yield return WaitForTimeout(op, timeoutInSeconds, () =>
|
yield return WaitForTimeout(op, timeoutInSeconds, () =>
|
||||||
{
|
{
|
||||||
|
|
@ -117,6 +111,170 @@ Response Body: {body}";
|
||||||
url, "DELETE", null, timeoutInSeconds, callbackOnSuccess);
|
url, "DELETE", null, timeoutInSeconds, callbackOnSuccess);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
internal static IEnumerator UploadAudioDataCoroutine(
|
||||||
|
string url,
|
||||||
|
string filePath,
|
||||||
|
float timeoutInSeconds,
|
||||||
|
Action<UnityWebRequest> callbackOnSuccess)
|
||||||
|
{
|
||||||
|
// PUT the audio data as binary
|
||||||
|
byte[] audioData = File.ReadAllBytes(filePath);
|
||||||
|
string fileName = Path.GetFileName(filePath);
|
||||||
|
|
||||||
|
using (UnityWebRequest request = UnityWebRequest.Put(url, audioData))
|
||||||
|
{
|
||||||
|
request.SetRequestHeader("Content-Type", "audio/wav");
|
||||||
|
|
||||||
|
var op = request.SendWebRequest();
|
||||||
|
yield return WaitForTimeout(op, timeoutInSeconds, () =>
|
||||||
|
{
|
||||||
|
Debug.LogError("Request timed out.");
|
||||||
|
});
|
||||||
|
|
||||||
|
if (request.result == UnityWebRequest.Result.Success)
|
||||||
|
{
|
||||||
|
callbackOnSuccess?.Invoke(request);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
var body = request.downloadHandler?.text ?? string.Empty;
|
||||||
|
Debug.LogError($"Failed to upload audio data: {request.error} (HTTP {request.responseCode})\nBody: {body}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
internal static IEnumerator UploadAudioCoroutine(
|
||||||
|
string url,
|
||||||
|
AudioClip audioClip,
|
||||||
|
float timeoutInSeconds,
|
||||||
|
Action<UnityWebRequest> callbackOnSuccess)
|
||||||
|
{
|
||||||
|
// Convert AudioClip to WAV (PCM 16-bit little endian) without external utility.
|
||||||
|
byte[] audioData = AudioClipToWavBytes(audioClip);
|
||||||
|
string fileName = $"{audioClip.name}.wav";
|
||||||
|
string fieldName = "file";
|
||||||
|
|
||||||
|
yield return UploadFileCoroutine(
|
||||||
|
url, audioData, fileName, fieldName, timeoutInSeconds, callbackOnSuccess);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Writes a WAV file header + PCM 16-bit data for the provided AudioClip.
|
||||||
|
// Supports mono or multi-channel clips. Assumes clip.samples * channels fits in int32.
|
||||||
|
private static byte[] AudioClipToWavBytes(AudioClip clip)
|
||||||
|
{
|
||||||
|
if (clip == null)
|
||||||
|
{
|
||||||
|
Debug.LogError("AudioClipToWavBytes: clip is null");
|
||||||
|
return Array.Empty<byte>();
|
||||||
|
}
|
||||||
|
|
||||||
|
int channels = clip.channels;
|
||||||
|
int sampleCount = clip.samples * channels; // total samples across channels
|
||||||
|
int sampleRate = clip.frequency;
|
||||||
|
|
||||||
|
// Get float data
|
||||||
|
float[] floatData = new float[sampleCount];
|
||||||
|
clip.GetData(floatData, 0);
|
||||||
|
|
||||||
|
// Convert to 16-bit PCM
|
||||||
|
// Each sample -> 2 bytes
|
||||||
|
byte[] pcmData = new byte[sampleCount * 2];
|
||||||
|
int pcmIndex = 0;
|
||||||
|
for (int i = 0; i < sampleCount; i++)
|
||||||
|
{
|
||||||
|
// Clamp just in case
|
||||||
|
float f = Mathf.Clamp(floatData[i], -1f, 1f);
|
||||||
|
short s = (short)Mathf.RoundToInt(f * 32767f);
|
||||||
|
pcmData[pcmIndex++] = (byte)(s & 0xFF); // little endian
|
||||||
|
pcmData[pcmIndex++] = (byte)((s >> 8) & 0xFF);
|
||||||
|
}
|
||||||
|
|
||||||
|
// WAV header size is 44 bytes
|
||||||
|
int headerSize = 44;
|
||||||
|
int fileSize = headerSize + pcmData.Length;
|
||||||
|
byte[] wav = new byte[fileSize];
|
||||||
|
|
||||||
|
// Helper local to write int/short little endian
|
||||||
|
void WriteInt32LE(int offset, int value)
|
||||||
|
{
|
||||||
|
wav[offset] = (byte)(value & 0xFF);
|
||||||
|
wav[offset + 1] = (byte)((value >> 8) & 0xFF);
|
||||||
|
wav[offset + 2] = (byte)((value >> 16) & 0xFF);
|
||||||
|
wav[offset + 3] = (byte)((value >> 24) & 0xFF);
|
||||||
|
}
|
||||||
|
void WriteInt16LE(int offset, short value)
|
||||||
|
{
|
||||||
|
wav[offset] = (byte)(value & 0xFF);
|
||||||
|
wav[offset + 1] = (byte)((value >> 8) & 0xFF);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ChunkID "RIFF"
|
||||||
|
wav[0] = (byte)'R'; wav[1] = (byte)'I'; wav[2] = (byte)'F'; wav[3] = (byte)'F';
|
||||||
|
// ChunkSize = 36 + Subchunk2Size
|
||||||
|
int subchunk2Size = pcmData.Length; // NumSamples * NumChannels * BitsPerSample/8
|
||||||
|
WriteInt32LE(4, 36 + subchunk2Size);
|
||||||
|
// Format "WAVE"
|
||||||
|
wav[8] = (byte)'W'; wav[9] = (byte)'A'; wav[10] = (byte)'V'; wav[11] = (byte)'E';
|
||||||
|
// Subchunk1ID "fmt "
|
||||||
|
wav[12] = (byte)'f'; wav[13] = (byte)'m'; wav[14] = (byte)'t'; wav[15] = (byte)' ';
|
||||||
|
// Subchunk1Size (16 for PCM)
|
||||||
|
WriteInt32LE(16, 16);
|
||||||
|
// AudioFormat (1 = PCM)
|
||||||
|
WriteInt16LE(20, 1);
|
||||||
|
// NumChannels
|
||||||
|
WriteInt16LE(22, (short)channels);
|
||||||
|
// SampleRate
|
||||||
|
WriteInt32LE(24, sampleRate);
|
||||||
|
// ByteRate = SampleRate * NumChannels * BitsPerSample/8
|
||||||
|
int byteRate = sampleRate * channels * 2;
|
||||||
|
WriteInt32LE(28, byteRate);
|
||||||
|
// BlockAlign = NumChannels * BitsPerSample/8
|
||||||
|
WriteInt16LE(32, (short)(channels * 2));
|
||||||
|
// BitsPerSample
|
||||||
|
WriteInt16LE(34, 16);
|
||||||
|
// Subchunk2ID "data"
|
||||||
|
wav[36] = (byte)'d'; wav[37] = (byte)'a'; wav[38] = (byte)'t'; wav[39] = (byte)'a';
|
||||||
|
// Subchunk2Size
|
||||||
|
WriteInt32LE(40, subchunk2Size);
|
||||||
|
|
||||||
|
// Copy PCM data after header
|
||||||
|
Buffer.BlockCopy(pcmData, 0, wav, headerSize, pcmData.Length);
|
||||||
|
|
||||||
|
return wav;
|
||||||
|
}
|
||||||
|
|
||||||
|
internal static IEnumerator UploadFileCoroutine(
|
||||||
|
string url,
|
||||||
|
byte[] fileData,
|
||||||
|
string fileName,
|
||||||
|
string fieldName,
|
||||||
|
float timeoutInSeconds,
|
||||||
|
Action<UnityWebRequest> callbackOnSuccess)
|
||||||
|
{
|
||||||
|
WWWForm form = new WWWForm();
|
||||||
|
form.AddBinaryData(fieldName, fileData, fileName);
|
||||||
|
|
||||||
|
using (UnityWebRequest request =
|
||||||
|
UnityWebRequest.Post(url, form))
|
||||||
|
{
|
||||||
|
var op = request.SendWebRequest();
|
||||||
|
yield return WaitForTimeout(op, timeoutInSeconds, () =>
|
||||||
|
{
|
||||||
|
Debug.LogError("Request timed out.");
|
||||||
|
});
|
||||||
|
|
||||||
|
if (request.result == UnityWebRequest.Result.Success)
|
||||||
|
{
|
||||||
|
callbackOnSuccess?.Invoke(request);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Debug.LogError(
|
||||||
|
$"Error uploading file: {request.error}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
internal static IEnumerator DownloadAudioCoroutine(
|
internal static IEnumerator DownloadAudioCoroutine(
|
||||||
string url,
|
string url,
|
||||||
float timeoutInSeconds,
|
float timeoutInSeconds,
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
using System;
|
using System;
|
||||||
|
using System.IO;
|
||||||
using System.Collections;
|
using System.Collections;
|
||||||
using System.Linq;
|
using System.Linq;
|
||||||
|
|
||||||
|
|
@ -7,7 +8,6 @@ using UnityEngine;
|
||||||
|
|
||||||
namespace PPGIA.X540.Project3.API
|
namespace PPGIA.X540.Project3.API
|
||||||
{
|
{
|
||||||
[RequireComponent(typeof(AudioSource))]
|
|
||||||
public class ApiClientManager : MonoBehaviour
|
public class ApiClientManager : MonoBehaviour
|
||||||
{
|
{
|
||||||
#region -- Inspector Fields -------------------------------------------
|
#region -- Inspector Fields -------------------------------------------
|
||||||
|
|
@ -29,7 +29,7 @@ namespace PPGIA.X540.Project3.API
|
||||||
private string _sessionCloseEndpoint = "/session/close";
|
private string _sessionCloseEndpoint = "/session/close";
|
||||||
|
|
||||||
[SerializeField]
|
[SerializeField]
|
||||||
private string _chatEndpoint = "/chat/";
|
private string _chatEndpoint = "/chat";
|
||||||
|
|
||||||
[SerializeField]
|
[SerializeField]
|
||||||
private string _llmAgentEndpoint = "/agent/ask";
|
private string _llmAgentEndpoint = "/agent/ask";
|
||||||
|
|
@ -38,7 +38,13 @@ namespace PPGIA.X540.Project3.API
|
||||||
private string _ttsEndpoint = "/tts/synthesize";
|
private string _ttsEndpoint = "/tts/synthesize";
|
||||||
|
|
||||||
[SerializeField]
|
[SerializeField]
|
||||||
private string _sttEndpoint = "/stt/upload";
|
private string _sttUploadEndpoint = "/transcript/get-upload-url";
|
||||||
|
|
||||||
|
[SerializeField]
|
||||||
|
private string _sttStartEndpoint = "/transcript/start";
|
||||||
|
|
||||||
|
[SerializeField]
|
||||||
|
private string _sttDownloadEndpoint = "/transcript/download";
|
||||||
|
|
||||||
[Header("API Settings & Workload")]
|
[Header("API Settings & Workload")]
|
||||||
[SerializeField]
|
[SerializeField]
|
||||||
|
|
@ -73,7 +79,10 @@ namespace PPGIA.X540.Project3.API
|
||||||
|
|
||||||
void Awake()
|
void Awake()
|
||||||
{
|
{
|
||||||
_audioSource = GetComponent<AudioSource>();
|
if (_audioSource == null)
|
||||||
|
_audioSource = GetComponent<AudioSource>();
|
||||||
|
if (_audioSource == null)
|
||||||
|
Debug.LogWarning("AudioSource component is missing.");
|
||||||
}
|
}
|
||||||
|
|
||||||
#region -- API Calls --------------------------------------------------
|
#region -- API Calls --------------------------------------------------
|
||||||
|
|
@ -131,6 +140,135 @@ namespace PPGIA.X540.Project3.API
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void UploadAudioClip(
|
||||||
|
string localFilePath, Action<string> uploadCompletedCallback = null)
|
||||||
|
{
|
||||||
|
if (_session == null)
|
||||||
|
{
|
||||||
|
Debug.LogWarning("No active session. Please initiate a session first.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
StopAllCoroutines();
|
||||||
|
|
||||||
|
var url = EndpointUrl(_sttUploadEndpoint, _session.SessionId);
|
||||||
|
var payload = new
|
||||||
|
{
|
||||||
|
filename = Path.GetFileName(localFilePath),
|
||||||
|
content_type = "audio/wav"
|
||||||
|
};
|
||||||
|
|
||||||
|
StartCoroutine(ApiClient.CallEndpointWithPostCoroutine(
|
||||||
|
url, _timeoutInSeconds, payload, (request) =>
|
||||||
|
{
|
||||||
|
var body = request.downloadHandler?.text ?? string.Empty;
|
||||||
|
var uploadUrl = JsonUtility.FromJson<STTUploadResponse>(body)?.UploadUrl;
|
||||||
|
var s3Key = JsonUtility.FromJson<STTUploadResponse>(body)?.S3Key;
|
||||||
|
if (uploadUrl == null)
|
||||||
|
{
|
||||||
|
Debug.LogWarning("Failed to get upload URL.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
StartCoroutine(ApiClient.UploadAudioDataCoroutine(
|
||||||
|
uploadUrl, localFilePath, _timeoutInSeconds, (uploadRequest) =>
|
||||||
|
{
|
||||||
|
Debug.Log($"Audio upload complete: {uploadRequest.responseCode}");
|
||||||
|
uploadCompletedCallback?.Invoke(s3Key);
|
||||||
|
}));
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
[ContextMenu("STT/Upload Audio Clip")]
|
||||||
|
public void StartTranscript(string s3Key,
|
||||||
|
Action<string> transcriptStartedCallback = null)
|
||||||
|
{
|
||||||
|
// Ensure there is an active session
|
||||||
|
if (_session == null)
|
||||||
|
{
|
||||||
|
Debug.LogWarning("No active session. Please initiate a session first.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (string.IsNullOrEmpty(s3Key))
|
||||||
|
{
|
||||||
|
Debug.LogWarning("No file path provided for upload.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
StopAllCoroutines();
|
||||||
|
|
||||||
|
// Build the endpoint URL
|
||||||
|
var url = EndpointUrl(_sttStartEndpoint);
|
||||||
|
var payload = new STTUploadResponse {
|
||||||
|
s3_key = s3Key
|
||||||
|
};
|
||||||
|
|
||||||
|
// Make the API call to upload the audio clip
|
||||||
|
StartCoroutine(ApiClient.CallEndpointWithPostCoroutine(
|
||||||
|
url, _timeoutInSeconds, payload, (request) =>
|
||||||
|
{
|
||||||
|
var body = request.downloadHandler?.text ?? string.Empty;
|
||||||
|
var response = ApiModel.FromJson<STTJobResponse>(body);
|
||||||
|
var jobName = response?.JobName;
|
||||||
|
|
||||||
|
Debug.Log($"Transcription job started: {jobName}");
|
||||||
|
transcriptStartedCallback?.Invoke(jobName);
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
[ContextMenu("STT/Download Transcription")]
|
||||||
|
public void DownloadTranscription(string jobName,
|
||||||
|
Action<string> transcriptionReceivedCallback = null)
|
||||||
|
{
|
||||||
|
// Ensure there is an active session
|
||||||
|
if (_session == null)
|
||||||
|
{
|
||||||
|
Debug.LogWarning("No active session. Please initiate a session first.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
StopAllCoroutines();
|
||||||
|
|
||||||
|
StartCoroutine(KeepCallingCoroutine(
|
||||||
|
EndpointUrl(_sttDownloadEndpoint, jobName), .5f,
|
||||||
|
transcriptionReceivedCallback
|
||||||
|
));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private IEnumerator KeepCallingCoroutine(string url,
|
||||||
|
float delayInSeconds, Action<string> callback)
|
||||||
|
{
|
||||||
|
// Make the API call to download the transcription
|
||||||
|
var wait = new WaitForSeconds(delayInSeconds);
|
||||||
|
|
||||||
|
bool keepCalling = true;
|
||||||
|
while (keepCalling)
|
||||||
|
{
|
||||||
|
yield return wait;
|
||||||
|
yield return ApiClient.CallEndpointWithGetCoroutine(
|
||||||
|
url, _timeoutInSeconds, (request) =>
|
||||||
|
{
|
||||||
|
var body = request.downloadHandler?.text ?? string.Empty;
|
||||||
|
var response = ApiModel.FromJson<STTJobResponse>(body);
|
||||||
|
|
||||||
|
if (response.Status == "FAILED")
|
||||||
|
{
|
||||||
|
keepCalling = false;
|
||||||
|
Debug.LogError("Transcription job failed.");
|
||||||
|
callback?.Invoke(null);
|
||||||
|
}
|
||||||
|
else if (response.Status == "COMPLETED")
|
||||||
|
{
|
||||||
|
keepCalling = false;
|
||||||
|
callback?.Invoke(response?.Transcript);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
[ContextMenu("Chat/Send Message")]
|
[ContextMenu("Chat/Send Message")]
|
||||||
public void SendChatMessage(string message = null,
|
public void SendChatMessage(string message = null,
|
||||||
Action<string> responseReceivedCallback = null,
|
Action<string> responseReceivedCallback = null,
|
||||||
|
|
|
||||||
|
|
@ -48,6 +48,30 @@ namespace PPGIA.X540.Project3.API
|
||||||
public int ExpiresIn => expires_in;
|
public int ExpiresIn => expires_in;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[Serializable]
|
||||||
|
public class STTUploadResponse : ApiModel
|
||||||
|
{
|
||||||
|
public string upload_url;
|
||||||
|
public string s3_key;
|
||||||
|
|
||||||
|
public string UploadUrl => upload_url;
|
||||||
|
public string S3Key => s3_key;
|
||||||
|
}
|
||||||
|
|
||||||
|
[Serializable]
|
||||||
|
public class STTJobResponse : ApiModel
|
||||||
|
{
|
||||||
|
public string job_name;
|
||||||
|
public string s3_uri;
|
||||||
|
public string status;
|
||||||
|
public string transcript;
|
||||||
|
|
||||||
|
public string JobName => job_name;
|
||||||
|
public string S3Uri => s3_uri;
|
||||||
|
public string Status => status;
|
||||||
|
public string Transcript => transcript;
|
||||||
|
}
|
||||||
|
|
||||||
internal enum Environment
|
internal enum Environment
|
||||||
{
|
{
|
||||||
Development,
|
Development,
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,10 @@ namespace PPGIA.X540.Project3
|
||||||
{
|
{
|
||||||
public class AppManager : MonoBehaviour
|
public class AppManager : MonoBehaviour
|
||||||
{
|
{
|
||||||
|
// Singleton instance
|
||||||
|
public static AppManager Instance { get; private set; }
|
||||||
|
|
||||||
|
#region -- Fields & Properties ----------------------------------------
|
||||||
[Header("References")]
|
[Header("References")]
|
||||||
[SerializeField]
|
[SerializeField]
|
||||||
private UIController _uiController;
|
private UIController _uiController;
|
||||||
|
|
@ -14,59 +18,129 @@ namespace PPGIA.X540.Project3
|
||||||
[SerializeField]
|
[SerializeField]
|
||||||
private ApiClientManager _apiManager;
|
private ApiClientManager _apiManager;
|
||||||
|
|
||||||
|
[SerializeField]
|
||||||
|
private AudioCapture _audioCapture;
|
||||||
|
|
||||||
|
private AudioClip _recordedClip;
|
||||||
|
#endregion ------------------------------------------------------------
|
||||||
|
|
||||||
|
#region -- MonoBehaviour Methods --------------------------------------
|
||||||
private void Awake()
|
private void Awake()
|
||||||
{
|
{
|
||||||
if (_uiController == null)
|
// Singleton pattern implementation
|
||||||
_uiController = GetComponent<UIController>();
|
if (Instance == null)
|
||||||
if (_apiManager == null)
|
{
|
||||||
_apiManager = GetComponent<ApiClientManager>();
|
Instance = this;
|
||||||
}
|
DontDestroyOnLoad(gameObject);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Destroy(gameObject);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (_uiController == null)
|
||||||
|
{
|
||||||
|
Debug.LogError("UIController reference is missing in AppManager.");
|
||||||
|
}
|
||||||
|
if (_apiManager == null)
|
||||||
|
{
|
||||||
|
Debug.LogError("ApiClientManager reference is missing in AppManager.");
|
||||||
|
}
|
||||||
|
if (_audioCapture == null)
|
||||||
|
{
|
||||||
|
Debug.LogError("AudioCapture reference is missing in AppManager.");
|
||||||
|
}
|
||||||
|
|
||||||
|
_uiController.OnTalkButtonClicked += HandleTalkButtonClicked;
|
||||||
|
_audioCapture.OnRecordingSaved += HandleClipSaved;
|
||||||
|
|
||||||
void Start()
|
|
||||||
{
|
|
||||||
_apiManager.CloseSession(
|
|
||||||
() => _uiController.SessionActive = _apiManager.IsSessionActive);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void OnEnable()
|
private void OnEnable()
|
||||||
{
|
{
|
||||||
_uiController.OnSessionButtonClicked += HandleSessionButtonClicked;
|
_apiManager.InitiateSession(() =>
|
||||||
_uiController.OnSendChatButtonClicked += HandleSendChatButtonClicked;
|
{
|
||||||
|
Debug.Log("API session initiated successfully.");
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
private void OnDisable()
|
void OnDisable()
|
||||||
{
|
{
|
||||||
_uiController.OnSessionButtonClicked -= HandleSessionButtonClicked;
|
if (_apiManager != null && _apiManager.IsSessionActive)
|
||||||
_uiController.OnSendChatButtonClicked -= HandleSendChatButtonClicked;
|
{
|
||||||
|
_apiManager.CloseSession(() =>
|
||||||
|
{
|
||||||
|
Debug.Log("API session closed successfully.");
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void HandleSessionButtonClicked()
|
private void OnDestroy()
|
||||||
|
{
|
||||||
|
_uiController.OnTalkButtonClicked -= HandleTalkButtonClicked;
|
||||||
|
_audioCapture.OnRecordingSaved -= HandleClipSaved;
|
||||||
|
}
|
||||||
|
#endregion ------------------------------------------------------------
|
||||||
|
|
||||||
|
private void HandleClipSaved(string filePath)
|
||||||
|
{
|
||||||
|
Debug.Log($"Audio clip saved at: {filePath}");
|
||||||
|
|
||||||
|
_apiManager.UploadAudioClip(
|
||||||
|
filePath,
|
||||||
|
(s3Key) =>
|
||||||
|
{
|
||||||
|
Debug.Log($"Clip uploaded to: {s3Key}");
|
||||||
|
_apiManager.StartTranscript(
|
||||||
|
s3Key,
|
||||||
|
(jobName) =>
|
||||||
|
{
|
||||||
|
Debug.Log($"Transcription job started: {jobName}");
|
||||||
|
_apiManager.DownloadTranscription(jobName, (transcript) =>
|
||||||
|
{
|
||||||
|
Debug.Log($"Transcription completed: {transcript}");
|
||||||
|
_uiController.AppendChatOutput($"\nUser: {transcript}\n");
|
||||||
|
|
||||||
|
_apiManager.SendChatMessage(transcript,
|
||||||
|
(response) =>
|
||||||
|
{
|
||||||
|
_uiController.AppendChatOutput($"Bot: {response}\n");
|
||||||
|
}, () =>
|
||||||
|
{
|
||||||
|
// Speech synthesis finished.
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
_uiController.CurrentState = UIController.UIState.Idle;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void HandleTalkButtonClicked()
|
||||||
{
|
{
|
||||||
if (!_apiManager.IsSessionActive)
|
if (!_apiManager.IsSessionActive)
|
||||||
{
|
{
|
||||||
_apiManager.InitiateSession(
|
Debug.LogWarning("Session is not active. Cannot send message.");
|
||||||
() => _uiController.SessionActive = _apiManager.IsSessionActive);
|
return;
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
_apiManager.CloseSession(
|
|
||||||
() => _uiController.SessionActive = _apiManager.IsSessionActive
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void HandleSendChatButtonClicked(string message)
|
switch (_uiController.CurrentState)
|
||||||
{
|
|
||||||
_apiManager.SendChatMessage(message,
|
|
||||||
(responseMessage) =>
|
|
||||||
{
|
{
|
||||||
_uiController.ChatOutput += $"User: {message}\n";
|
case UIController.UIState.Idle:
|
||||||
_uiController.ChatOutput += $"Bot: {responseMessage}\n";
|
_audioCapture.StartRecording();
|
||||||
},
|
_uiController.CurrentState = UIController.UIState.Recording;
|
||||||
() =>
|
break;
|
||||||
{
|
|
||||||
// Speech finished callback (optional)
|
case UIController.UIState.Recording:
|
||||||
});
|
_audioCapture.StopRecording();
|
||||||
|
_uiController.CurrentState = UIController.UIState.Idle;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case UIController.UIState.Processing:
|
||||||
|
Debug.Log("Currently processing. Please wait.");
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -18,6 +18,7 @@ namespace PPGIA.X540.Project3
|
||||||
Hz96000 = 96000
|
Hz96000 = 96000
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#region -- Fields & Properties ----------------------------------------
|
||||||
[Header("Audio Capture Settings")]
|
[Header("Audio Capture Settings")]
|
||||||
[SerializeField]
|
[SerializeField]
|
||||||
private SampleRate _sampleRateInHz = SampleRate.Hz44100;
|
private SampleRate _sampleRateInHz = SampleRate.Hz44100;
|
||||||
|
|
@ -54,9 +55,14 @@ namespace PPGIA.X540.Project3
|
||||||
private List<short> _capturedSamples =
|
private List<short> _capturedSamples =
|
||||||
new List<short>(1024 * 32); // filled only on Stop
|
new List<short>(1024 * 32); // filled only on Stop
|
||||||
private int _channels = 1; // microphone channel count (Unity usually mono)
|
private int _channels = 1; // microphone channel count (Unity usually mono)
|
||||||
private AudioClip _recordingClip;
|
|
||||||
private string _currentDevice;
|
|
||||||
|
|
||||||
|
private AudioClip _recordingClip;
|
||||||
|
public AudioClip GetRecordedClip() => _recordingClip;
|
||||||
|
|
||||||
|
private string _currentDevice;
|
||||||
|
#endregion ------------------------------------------------------------
|
||||||
|
|
||||||
|
#region -- MonoBehaviour Methods --------------------------------------
|
||||||
private void Awake()
|
private void Awake()
|
||||||
{
|
{
|
||||||
_audioSource = GetComponent<AudioSource>();
|
_audioSource = GetComponent<AudioSource>();
|
||||||
|
|
@ -67,6 +73,7 @@ namespace PPGIA.X540.Project3
|
||||||
{
|
{
|
||||||
StopRecording();
|
StopRecording();
|
||||||
}
|
}
|
||||||
|
#endregion ------------------------------------------------------------
|
||||||
|
|
||||||
[ContextMenu("Start recording audio")]
|
[ContextMenu("Start recording audio")]
|
||||||
public void StartRecording()
|
public void StartRecording()
|
||||||
|
|
@ -215,7 +222,6 @@ namespace PPGIA.X540.Project3
|
||||||
writer.Write(dataBytes);
|
writer.Write(dataBytes);
|
||||||
}
|
}
|
||||||
LastSavedFilePath = filePath;
|
LastSavedFilePath = filePath;
|
||||||
Debug.Log($"Audio saved to: {filePath}");
|
|
||||||
OnRecordingSaved?.Invoke(filePath);
|
OnRecordingSaved?.Invoke(filePath);
|
||||||
}
|
}
|
||||||
catch (Exception ex)
|
catch (Exception ex)
|
||||||
Loading…
Reference in New Issue