Compare commits

...

4 Commits

Author SHA1 Message Date
Jonas Luz Jr. 7a000ac9e2 Updates app main scene. 2025-11-26 19:31:51 -03:00
Jonas Luz Jr. e0daa00205 Adds STT functionality with audio upload
Adds speech-to-text (STT) functionality by allowing users to upload audio clips, start transcription jobs, and download transcriptions.

Introduces new API endpoints for STT upload, start, and download.

Also, converts AudioClip to WAV byte array.
2025-11-26 19:31:12 -03:00
Jonas Luz Jr. ea4535ebb6 Updates GUI for new working flow. 2025-11-26 19:30:37 -03:00
Jonas Luz Jr. 6c1ed05bd7 Refactors UI interaction and state management
Improves UI element handling by encapsulating UI logic.

Enhances state management for session activity and input
enablement through properties. Introduces event handlers for button
clicks to decouple UI interactions.
2025-11-26 07:21:44 -03:00
12 changed files with 721 additions and 203 deletions

View File

@ -2,6 +2,5 @@
}
.expandable {
flex-grow: 1;
white-space: normal;
}

View File

@ -1,11 +1,10 @@
<ui:UXML xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:ui="UnityEngine.UIElements" xmlns:uie="UnityEditor.UIElements" noNamespaceSchemaLocation="../../../UIElementsSchema/UIElements.xsd" editor-extension-mode="False">
<Style src="project://database/Assets/_Client/GUI/Main.uss?fileID=7433441132597879392&amp;guid=fe550b4c3daa62b448a38ddf615910a6&amp;type=3#Main" />
<ui:VisualElement name="VE_Panel" style="flex-grow: 1;">
<ui:Button text="Iniciar sessão" name="B_Session" />
<ui:Label text="Chat:" />
<ui:TextField placeholder-text="Digite sua mensagem" name="TF_ChatInput" class="expandable" />
<ui:Button text="Enviar mensagem" name="B_SendChat" />
<ui:Button text="Falar..." name="B_Talk" />
<ui:ProgressBar value="0" title="Processando... Por favor, espere..." name="PB_Progress" enabled="true" />
<ui:TextField placeholder-text="response" multiline="true" readonly="true" name="TF_ChatOutput" class="expandable" />
<ui:ScrollView>
<ui:TextField placeholder-text="response" multiline="true" readonly="true" name="TF_Dialogue" class="expandable" />
</ui:ScrollView>
</ui:VisualElement>
</ui:UXML>

View File

@ -1,6 +1,6 @@
%YAML 1.1
%TAG !u! tag:unity3d.com,2011:
--- !u!1 &3050228793350555746
--- !u!1 &164669483031671566
GameObject:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
@ -8,84 +8,32 @@ GameObject:
m_PrefabAsset: {fileID: 0}
serializedVersion: 6
m_Component:
- component: {fileID: 3228149243574310962}
- component: {fileID: 5034104198302478614}
- component: {fileID: 2870540532630303239}
- component: {fileID: 3085863286867829983}
- component: {fileID: 7970807069023951264}
m_Layer: 0
m_Name: -- GAME MANAGER --
m_Name: -- Audio Manager --
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0
m_StaticEditorFlags: 0
m_IsActive: 1
--- !u!4 &3228149243574310962
--- !u!4 &7970807069023951264
Transform:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 3050228793350555746}
m_GameObject: {fileID: 164669483031671566}
serializedVersion: 2
m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
m_LocalPosition: {x: 0, y: 0, z: 0}
m_LocalScale: {x: 1, y: 1, z: 1}
m_ConstrainProportionsScale: 0
m_Children:
- {fileID: 7625920793580372814}
- {fileID: 8663494998515940154}
m_Father: {fileID: 0}
- {fileID: 4128215114116466198}
m_Father: {fileID: 3228149243574310962}
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!114 &5034104198302478614
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 3050228793350555746}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 19102, guid: 0000000000000000e000000000000000, type: 0}
m_Name:
m_EditorClassIdentifier: UnityEngine.dll::UnityEngine.UIElements.UIDocument
m_PanelSettings: {fileID: 11400000, guid: 08c29b4a94427e44598b2f1d4bc32893, type: 2}
m_ParentUI: {fileID: 0}
sourceAsset: {fileID: 9197481963319205126, guid: 394ce53ae03aab84c98352e044c8bed4, type: 3}
m_SortingOrder: 0
m_Position: 0
m_WorldSpaceSizeMode: 1
m_WorldSpaceWidth: 1920
m_WorldSpaceHeight: 1080
m_PivotReferenceSize: 0
m_Pivot: 0
m_WorldSpaceCollider: {fileID: 0}
--- !u!114 &2870540532630303239
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 3050228793350555746}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 177140e4f5a62c145a88b74bb8f02f59, type: 3}
m_Name:
m_EditorClassIdentifier: PpgiaX540P3::PPGIA.X540.Project3.UIController
--- !u!114 &3085863286867829983
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 3050228793350555746}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 7a1ca38af0a96524893a60d620fa5791, type: 3}
m_Name:
m_EditorClassIdentifier: PpgiaX540P3::PPGIA.X540.Project3.AppManager
_uiController: {fileID: 2870540532630303239}
_apiManager: {fileID: 4693018637014637836}
--- !u!1 &4601626619299515270
--- !u!1 &1759505303048597376
GameObject:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
@ -93,47 +41,46 @@ GameObject:
m_PrefabAsset: {fileID: 0}
serializedVersion: 6
m_Component:
- component: {fileID: 7625920793580372814}
- component: {fileID: 1118623755729130604}
- component: {fileID: 4693018637014637836}
- component: {fileID: 4128215114116466198}
- component: {fileID: 7584847903250953468}
m_Layer: 0
m_Name: -- API Manager --
m_Name: Audio Output
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0
m_StaticEditorFlags: 0
m_IsActive: 1
--- !u!4 &7625920793580372814
--- !u!4 &4128215114116466198
Transform:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 4601626619299515270}
m_GameObject: {fileID: 1759505303048597376}
serializedVersion: 2
m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
m_LocalPosition: {x: 0, y: 0, z: 0}
m_LocalScale: {x: 1, y: 1, z: 1}
m_ConstrainProportionsScale: 0
m_Children: []
m_Father: {fileID: 3228149243574310962}
m_Father: {fileID: 7970807069023951264}
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!82 &1118623755729130604
--- !u!82 &7584847903250953468
AudioSource:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 4601626619299515270}
m_GameObject: {fileID: 1759505303048597376}
m_Enabled: 1
serializedVersion: 4
OutputAudioMixerGroup: {fileID: 4597973050289367049, guid: b4346293b16e5254aadce52e891ff5e9, type: 2}
m_audioClip: {fileID: 0}
m_Resource: {fileID: 8300000, guid: 6613d523ef2496349a24abd07925f85a, type: 3}
m_Resource: {fileID: 0}
m_PlayOnAwake: 0
m_Volume: 1
m_Pitch: 1
Loop: 1
Loop: 0
Mute: 0
Spatialize: 0
SpatializePostEffects: 0
@ -215,6 +162,88 @@ AudioSource:
m_PreInfinity: 2
m_PostInfinity: 2
m_RotationOrder: 4
--- !u!1 &3050228793350555746
GameObject:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
serializedVersion: 6
m_Component:
- component: {fileID: 3228149243574310962}
- component: {fileID: 3085863286867829983}
m_Layer: 0
m_Name: -- GAME MANAGER --
m_TagString: GameController
m_Icon: {fileID: 0}
m_NavMeshLayer: 0
m_StaticEditorFlags: 0
m_IsActive: 1
--- !u!4 &3228149243574310962
Transform:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 3050228793350555746}
serializedVersion: 2
m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
m_LocalPosition: {x: 0, y: 0, z: 0}
m_LocalScale: {x: 1, y: 1, z: 1}
m_ConstrainProportionsScale: 0
m_Children:
- {fileID: 7625920793580372814}
- {fileID: 7970807069023951264}
- {fileID: 5087449153445061889}
m_Father: {fileID: 0}
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!114 &3085863286867829983
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 3050228793350555746}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 7a1ca38af0a96524893a60d620fa5791, type: 3}
m_Name:
m_EditorClassIdentifier: PpgiaX540P3::PPGIA.X540.Project3.AppManager
_uiController: {fileID: 3705417928890449656}
_apiManager: {fileID: 4693018637014637836}
_audioCapture: {fileID: 1938294408165125026}
--- !u!1 &4601626619299515270
GameObject:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
serializedVersion: 6
m_Component:
- component: {fileID: 7625920793580372814}
- component: {fileID: 4693018637014637836}
m_Layer: 0
m_Name: -- API Manager --
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0
m_StaticEditorFlags: 0
m_IsActive: 1
--- !u!4 &7625920793580372814
Transform:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 4601626619299515270}
serializedVersion: 2
m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
m_LocalPosition: {x: 0, y: 0, z: 0}
m_LocalScale: {x: 1, y: 1, z: 1}
m_ConstrainProportionsScale: 0
m_Children: []
m_Father: {fileID: 3228149243574310962}
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!114 &4693018637014637836
MonoBehaviour:
m_ObjectHideFlags: 0
@ -235,14 +264,16 @@ MonoBehaviour:
_chatEndpoint: /chat/
_llmAgentEndpoint: /agent/ask
_ttsEndpoint: /tts/synthesize
_sttEndpoint: /stt/upload
_sttUploadEndpoint: /transcript/get-upload-url
_sttStartEndpoint: /transcript/start
_sttDownloadEndpoint: /transcript/download
_clientId: unity-client
_timeoutInSeconds: 10
_query: "Ah, guru! Como \xE9 bom poder contar com suas orienta\xE7\xF5es!"
_session:
session_id: 9686ca2a-302a-4ac8-8709-a98067d37530
created_at: 1763908462
_audioSource: {fileID: 1118623755729130604}
_audioSource: {fileID: 7584847903250953468}
--- !u!1 &5836695571582163658
GameObject:
m_ObjectHideFlags: 0
@ -252,10 +283,10 @@ GameObject:
serializedVersion: 6
m_Component:
- component: {fileID: 8663494998515940154}
- component: {fileID: 1938294408165125026}
- component: {fileID: 796320736253214861}
- component: {fileID: 1938294408165125026}
m_Layer: 0
m_Name: -- AudioManager --
m_Name: Audio Input
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0
@ -274,22 +305,8 @@ Transform:
m_LocalScale: {x: 1, y: 1, z: 1}
m_ConstrainProportionsScale: 0
m_Children: []
m_Father: {fileID: 3228149243574310962}
m_Father: {fileID: 7970807069023951264}
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!114 &1938294408165125026
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 5836695571582163658}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 9523fc2f4430eb549ab3da789eaf70c1, type: 3}
m_Name:
m_EditorClassIdentifier: Assembly-CSharp::PPGIA.X540.Project3.AudioCapture
_sampleRateInHz: 16000
_playingBack: 0
--- !u!82 &796320736253214861
AudioSource:
m_ObjectHideFlags: 0
@ -299,7 +316,7 @@ AudioSource:
m_GameObject: {fileID: 5836695571582163658}
m_Enabled: 1
serializedVersion: 4
OutputAudioMixerGroup: {fileID: 4597973050289367049, guid: b4346293b16e5254aadce52e891ff5e9, type: 2}
OutputAudioMixerGroup: {fileID: -7600268988427216071, guid: b4346293b16e5254aadce52e891ff5e9, type: 2}
m_audioClip: {fileID: 0}
m_Resource: {fileID: 0}
m_PlayOnAwake: 1
@ -387,3 +404,91 @@ AudioSource:
m_PreInfinity: 2
m_PostInfinity: 2
m_RotationOrder: 4
--- !u!114 &1938294408165125026
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 5836695571582163658}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 9523fc2f4430eb549ab3da789eaf70c1, type: 3}
m_Name:
m_EditorClassIdentifier: Assembly-CSharp::PPGIA.X540.Project3.AudioCapture
_sampleRateInHz: 16000
_maxRecordingSeconds: 300
_fileName: RecordedAudio.wav
_playbackVolume: 1
_enableMonitoring: 0
_monitorLatencyMs: 80
_microphones: []
_selectedMicrophoneIndex: 0
--- !u!1 &7800601072168794597
GameObject:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
serializedVersion: 6
m_Component:
- component: {fileID: 5087449153445061889}
- component: {fileID: 1529497273491449657}
- component: {fileID: 3705417928890449656}
m_Layer: 0
m_Name: -- UI Manager --
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0
m_StaticEditorFlags: 0
m_IsActive: 1
--- !u!4 &5087449153445061889
Transform:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 7800601072168794597}
serializedVersion: 2
m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
m_LocalPosition: {x: 0, y: 0, z: 0}
m_LocalScale: {x: 1, y: 1, z: 1}
m_ConstrainProportionsScale: 0
m_Children: []
m_Father: {fileID: 3228149243574310962}
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!114 &1529497273491449657
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 7800601072168794597}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 19102, guid: 0000000000000000e000000000000000, type: 0}
m_Name:
m_EditorClassIdentifier: UnityEngine.dll::UnityEngine.UIElements.UIDocument
m_PanelSettings: {fileID: 11400000, guid: 08c29b4a94427e44598b2f1d4bc32893, type: 2}
m_ParentUI: {fileID: 0}
sourceAsset: {fileID: 9197481963319205126, guid: 394ce53ae03aab84c98352e044c8bed4, type: 3}
m_SortingOrder: 0
m_Position: 0
m_WorldSpaceSizeMode: 1
m_WorldSpaceWidth: 1920
m_WorldSpaceHeight: 1080
m_PivotReferenceSize: 0
m_Pivot: 0
m_WorldSpaceCollider: {fileID: 0}
--- !u!114 &3705417928890449656
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 7800601072168794597}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 177140e4f5a62c145a88b74bb8f02f59, type: 3}
m_Name:
m_EditorClassIdentifier: PpgiaX540P3::PPGIA.X540.Project3.UIController

View File

@ -440,21 +440,9 @@ PrefabInstance:
serializedVersion: 3
m_TransformParent: {fileID: 0}
m_Modifications:
- target: {fileID: 796320736253214861, guid: 004ea84725223334994cf72d2115c180, type: 3}
propertyPath: OutputAudioMixerGroup
value:
objectReference: {fileID: -7600268988427216071, guid: b4346293b16e5254aadce52e891ff5e9, type: 2}
- target: {fileID: 1938294408165125026, guid: 004ea84725223334994cf72d2115c180, type: 3}
propertyPath: _fileName
value: RecordedAudio.wav
objectReference: {fileID: 0}
- target: {fileID: 1938294408165125026, guid: 004ea84725223334994cf72d2115c180, type: 3}
propertyPath: _selectedMicrophoneIndex
value: 0
objectReference: {fileID: 0}
- target: {fileID: 3050228793350555746, guid: 004ea84725223334994cf72d2115c180, type: 3}
propertyPath: m_Name
value: -- GAME MANAGER --
value: -- APP MANAGER --
objectReference: {fileID: 0}
- target: {fileID: 3228149243574310962, guid: 004ea84725223334994cf72d2115c180, type: 3}
propertyPath: m_LocalPosition.x

View File

@ -1,6 +1,6 @@
using System;
using System.Collections;
using System.IO;
using System.Collections;
using System.Text;
using UnityEngine;
@ -56,12 +56,6 @@ namespace PPGIA.X540.Project3.API
request.downloadHandler = new DownloadHandlerBuffer();
// Debug.Log($"Sending {method} request to {url}");
// Debug.Log(
// payload != null ?
// $"Payload: {JsonUtility.ToJson(payload)}" :
// "No payload.");
var op = request.SendWebRequest();
yield return WaitForTimeout(op, timeoutInSeconds, () =>
{
@ -117,6 +111,170 @@ Response Body: {body}";
url, "DELETE", null, timeoutInSeconds, callbackOnSuccess);
}
internal static IEnumerator UploadAudioDataCoroutine(
string url,
string filePath,
float timeoutInSeconds,
Action<UnityWebRequest> callbackOnSuccess)
{
// PUT the audio data as binary
byte[] audioData = File.ReadAllBytes(filePath);
string fileName = Path.GetFileName(filePath);
using (UnityWebRequest request = UnityWebRequest.Put(url, audioData))
{
request.SetRequestHeader("Content-Type", "audio/wav");
var op = request.SendWebRequest();
yield return WaitForTimeout(op, timeoutInSeconds, () =>
{
Debug.LogError("Request timed out.");
});
if (request.result == UnityWebRequest.Result.Success)
{
callbackOnSuccess?.Invoke(request);
}
else
{
var body = request.downloadHandler?.text ?? string.Empty;
Debug.LogError($"Failed to upload audio data: {request.error} (HTTP {request.responseCode})\nBody: {body}");
}
}
}
internal static IEnumerator UploadAudioCoroutine(
string url,
AudioClip audioClip,
float timeoutInSeconds,
Action<UnityWebRequest> callbackOnSuccess)
{
// Convert AudioClip to WAV (PCM 16-bit little endian) without external utility.
byte[] audioData = AudioClipToWavBytes(audioClip);
string fileName = $"{audioClip.name}.wav";
string fieldName = "file";
yield return UploadFileCoroutine(
url, audioData, fileName, fieldName, timeoutInSeconds, callbackOnSuccess);
}
// Writes a WAV file header + PCM 16-bit data for the provided AudioClip.
// Supports mono or multi-channel clips. Assumes clip.samples * channels fits in int32.
private static byte[] AudioClipToWavBytes(AudioClip clip)
{
if (clip == null)
{
Debug.LogError("AudioClipToWavBytes: clip is null");
return Array.Empty<byte>();
}
int channels = clip.channels;
int sampleCount = clip.samples * channels; // total samples across channels
int sampleRate = clip.frequency;
// Get float data
float[] floatData = new float[sampleCount];
clip.GetData(floatData, 0);
// Convert to 16-bit PCM
// Each sample -> 2 bytes
byte[] pcmData = new byte[sampleCount * 2];
int pcmIndex = 0;
for (int i = 0; i < sampleCount; i++)
{
// Clamp just in case
float f = Mathf.Clamp(floatData[i], -1f, 1f);
short s = (short)Mathf.RoundToInt(f * 32767f);
pcmData[pcmIndex++] = (byte)(s & 0xFF); // little endian
pcmData[pcmIndex++] = (byte)((s >> 8) & 0xFF);
}
// WAV header size is 44 bytes
int headerSize = 44;
int fileSize = headerSize + pcmData.Length;
byte[] wav = new byte[fileSize];
// Helper local to write int/short little endian
void WriteInt32LE(int offset, int value)
{
wav[offset] = (byte)(value & 0xFF);
wav[offset + 1] = (byte)((value >> 8) & 0xFF);
wav[offset + 2] = (byte)((value >> 16) & 0xFF);
wav[offset + 3] = (byte)((value >> 24) & 0xFF);
}
void WriteInt16LE(int offset, short value)
{
wav[offset] = (byte)(value & 0xFF);
wav[offset + 1] = (byte)((value >> 8) & 0xFF);
}
// ChunkID "RIFF"
wav[0] = (byte)'R'; wav[1] = (byte)'I'; wav[2] = (byte)'F'; wav[3] = (byte)'F';
// ChunkSize = 36 + Subchunk2Size
int subchunk2Size = pcmData.Length; // NumSamples * NumChannels * BitsPerSample/8
WriteInt32LE(4, 36 + subchunk2Size);
// Format "WAVE"
wav[8] = (byte)'W'; wav[9] = (byte)'A'; wav[10] = (byte)'V'; wav[11] = (byte)'E';
// Subchunk1ID "fmt "
wav[12] = (byte)'f'; wav[13] = (byte)'m'; wav[14] = (byte)'t'; wav[15] = (byte)' ';
// Subchunk1Size (16 for PCM)
WriteInt32LE(16, 16);
// AudioFormat (1 = PCM)
WriteInt16LE(20, 1);
// NumChannels
WriteInt16LE(22, (short)channels);
// SampleRate
WriteInt32LE(24, sampleRate);
// ByteRate = SampleRate * NumChannels * BitsPerSample/8
int byteRate = sampleRate * channels * 2;
WriteInt32LE(28, byteRate);
// BlockAlign = NumChannels * BitsPerSample/8
WriteInt16LE(32, (short)(channels * 2));
// BitsPerSample
WriteInt16LE(34, 16);
// Subchunk2ID "data"
wav[36] = (byte)'d'; wav[37] = (byte)'a'; wav[38] = (byte)'t'; wav[39] = (byte)'a';
// Subchunk2Size
WriteInt32LE(40, subchunk2Size);
// Copy PCM data after header
Buffer.BlockCopy(pcmData, 0, wav, headerSize, pcmData.Length);
return wav;
}
internal static IEnumerator UploadFileCoroutine(
string url,
byte[] fileData,
string fileName,
string fieldName,
float timeoutInSeconds,
Action<UnityWebRequest> callbackOnSuccess)
{
WWWForm form = new WWWForm();
form.AddBinaryData(fieldName, fileData, fileName);
using (UnityWebRequest request =
UnityWebRequest.Post(url, form))
{
var op = request.SendWebRequest();
yield return WaitForTimeout(op, timeoutInSeconds, () =>
{
Debug.LogError("Request timed out.");
});
if (request.result == UnityWebRequest.Result.Success)
{
callbackOnSuccess?.Invoke(request);
}
else
{
Debug.LogError(
$"Error uploading file: {request.error}");
}
}
}
internal static IEnumerator DownloadAudioCoroutine(
string url,
float timeoutInSeconds,

View File

@ -1,4 +1,5 @@
using System;
using System.IO;
using System.Collections;
using System.Linq;
@ -7,7 +8,6 @@ using UnityEngine;
namespace PPGIA.X540.Project3.API
{
[RequireComponent(typeof(AudioSource))]
public class ApiClientManager : MonoBehaviour
{
#region -- Inspector Fields -------------------------------------------
@ -29,7 +29,7 @@ namespace PPGIA.X540.Project3.API
private string _sessionCloseEndpoint = "/session/close";
[SerializeField]
private string _chatEndpoint = "/chat/";
private string _chatEndpoint = "/chat";
[SerializeField]
private string _llmAgentEndpoint = "/agent/ask";
@ -38,7 +38,13 @@ namespace PPGIA.X540.Project3.API
private string _ttsEndpoint = "/tts/synthesize";
[SerializeField]
private string _sttEndpoint = "/stt/upload";
private string _sttUploadEndpoint = "/transcript/get-upload-url";
[SerializeField]
private string _sttStartEndpoint = "/transcript/start";
[SerializeField]
private string _sttDownloadEndpoint = "/transcript/download";
[Header("API Settings & Workload")]
[SerializeField]
@ -73,7 +79,10 @@ namespace PPGIA.X540.Project3.API
void Awake()
{
if (_audioSource == null)
_audioSource = GetComponent<AudioSource>();
if (_audioSource == null)
Debug.LogWarning("AudioSource component is missing.");
}
#region -- API Calls --------------------------------------------------
@ -131,6 +140,135 @@ namespace PPGIA.X540.Project3.API
}));
}
public void UploadAudioClip(
string localFilePath, Action<string> uploadCompletedCallback = null)
{
if (_session == null)
{
Debug.LogWarning("No active session. Please initiate a session first.");
return;
}
StopAllCoroutines();
var url = EndpointUrl(_sttUploadEndpoint, _session.SessionId);
var payload = new
{
filename = Path.GetFileName(localFilePath),
content_type = "audio/wav"
};
StartCoroutine(ApiClient.CallEndpointWithPostCoroutine(
url, _timeoutInSeconds, payload, (request) =>
{
var body = request.downloadHandler?.text ?? string.Empty;
var uploadUrl = JsonUtility.FromJson<STTUploadResponse>(body)?.UploadUrl;
var s3Key = JsonUtility.FromJson<STTUploadResponse>(body)?.S3Key;
if (uploadUrl == null)
{
Debug.LogWarning("Failed to get upload URL.");
return;
}
StartCoroutine(ApiClient.UploadAudioDataCoroutine(
uploadUrl, localFilePath, _timeoutInSeconds, (uploadRequest) =>
{
Debug.Log($"Audio upload complete: {uploadRequest.responseCode}");
uploadCompletedCallback?.Invoke(s3Key);
}));
}));
}
[ContextMenu("STT/Upload Audio Clip")]
public void StartTranscript(string s3Key,
Action<string> transcriptStartedCallback = null)
{
// Ensure there is an active session
if (_session == null)
{
Debug.LogWarning("No active session. Please initiate a session first.");
return;
}
if (string.IsNullOrEmpty(s3Key))
{
Debug.LogWarning("No file path provided for upload.");
return;
}
StopAllCoroutines();
// Build the endpoint URL
var url = EndpointUrl(_sttStartEndpoint);
var payload = new STTUploadResponse {
s3_key = s3Key
};
// Make the API call to upload the audio clip
StartCoroutine(ApiClient.CallEndpointWithPostCoroutine(
url, _timeoutInSeconds, payload, (request) =>
{
var body = request.downloadHandler?.text ?? string.Empty;
var response = ApiModel.FromJson<STTJobResponse>(body);
var jobName = response?.JobName;
Debug.Log($"Transcription job started: {jobName}");
transcriptStartedCallback?.Invoke(jobName);
}));
}
[ContextMenu("STT/Download Transcription")]
public void DownloadTranscription(string jobName,
Action<string> transcriptionReceivedCallback = null)
{
// Ensure there is an active session
if (_session == null)
{
Debug.LogWarning("No active session. Please initiate a session first.");
return;
}
StopAllCoroutines();
StartCoroutine(KeepCallingCoroutine(
EndpointUrl(_sttDownloadEndpoint, jobName), .5f,
transcriptionReceivedCallback
));
}
private IEnumerator KeepCallingCoroutine(string url,
float delayInSeconds, Action<string> callback)
{
// Make the API call to download the transcription
var wait = new WaitForSeconds(delayInSeconds);
bool keepCalling = true;
while (keepCalling)
{
yield return wait;
yield return ApiClient.CallEndpointWithGetCoroutine(
url, _timeoutInSeconds, (request) =>
{
var body = request.downloadHandler?.text ?? string.Empty;
var response = ApiModel.FromJson<STTJobResponse>(body);
if (response.Status == "FAILED")
{
keepCalling = false;
Debug.LogError("Transcription job failed.");
callback?.Invoke(null);
}
else if (response.Status == "COMPLETED")
{
keepCalling = false;
callback?.Invoke(response?.Transcript);
}
});
}
}
[ContextMenu("Chat/Send Message")]
public void SendChatMessage(string message = null,
Action<string> responseReceivedCallback = null,

View File

@ -48,6 +48,30 @@ namespace PPGIA.X540.Project3.API
public int ExpiresIn => expires_in;
}
[Serializable]
public class STTUploadResponse : ApiModel
{
public string upload_url;
public string s3_key;
public string UploadUrl => upload_url;
public string S3Key => s3_key;
}
[Serializable]
public class STTJobResponse : ApiModel
{
public string job_name;
public string s3_uri;
public string status;
public string transcript;
public string JobName => job_name;
public string S3Uri => s3_uri;
public string Status => status;
public string Transcript => transcript;
}
internal enum Environment
{
Development,

View File

@ -7,6 +7,10 @@ namespace PPGIA.X540.Project3
{
public class AppManager : MonoBehaviour
{
// Singleton instance
public static AppManager Instance { get; private set; }
#region -- Fields & Properties ----------------------------------------
[Header("References")]
[SerializeField]
private UIController _uiController;
@ -14,59 +18,129 @@ namespace PPGIA.X540.Project3
[SerializeField]
private ApiClientManager _apiManager;
[SerializeField]
private AudioCapture _audioCapture;
private AudioClip _recordedClip;
#endregion ------------------------------------------------------------
#region -- MonoBehaviour Methods --------------------------------------
private void Awake()
{
if (_uiController == null)
_uiController = GetComponent<UIController>();
if (_apiManager == null)
_apiManager = GetComponent<ApiClientManager>();
// Singleton pattern implementation
if (Instance == null)
{
Instance = this;
DontDestroyOnLoad(gameObject);
}
else
{
Destroy(gameObject);
return;
}
void Start()
if (_uiController == null)
{
_apiManager.CloseSession(
() => _uiController.SessionActive = _apiManager.IsSessionActive);
Debug.LogError("UIController reference is missing in AppManager.");
}
if (_apiManager == null)
{
Debug.LogError("ApiClientManager reference is missing in AppManager.");
}
if (_audioCapture == null)
{
Debug.LogError("AudioCapture reference is missing in AppManager.");
}
_uiController.OnTalkButtonClicked += HandleTalkButtonClicked;
_audioCapture.OnRecordingSaved += HandleClipSaved;
}
private void OnEnable()
{
_uiController.OnSessionButtonClicked += HandleSessionButtonClicked;
_uiController.OnSendChatButtonClicked += HandleSendChatButtonClicked;
_apiManager.InitiateSession(() =>
{
Debug.Log("API session initiated successfully.");
});
}
private void OnDisable()
void OnDisable()
{
_uiController.OnSessionButtonClicked -= HandleSessionButtonClicked;
_uiController.OnSendChatButtonClicked -= HandleSendChatButtonClicked;
}
private void HandleSessionButtonClicked()
if (_apiManager != null && _apiManager.IsSessionActive)
{
if (!_apiManager.IsSessionActive)
_apiManager.CloseSession(() =>
{
_apiManager.InitiateSession(
() => _uiController.SessionActive = _apiManager.IsSessionActive);
}
else
{
_apiManager.CloseSession(
() => _uiController.SessionActive = _apiManager.IsSessionActive
);
}
}
private void HandleSendChatButtonClicked(string message)
{
_apiManager.SendChatMessage(message,
(responseMessage) =>
{
_uiController.ChatOutput += $"User: {message}\n";
_uiController.ChatOutput += $"Bot: {responseMessage}\n";
},
() =>
{
// Speech finished callback (optional)
Debug.Log("API session closed successfully.");
});
}
}
private void OnDestroy()
{
_uiController.OnTalkButtonClicked -= HandleTalkButtonClicked;
_audioCapture.OnRecordingSaved -= HandleClipSaved;
}
#endregion ------------------------------------------------------------
private void HandleClipSaved(string filePath)
{
Debug.Log($"Audio clip saved at: {filePath}");
_apiManager.UploadAudioClip(
filePath,
(s3Key) =>
{
Debug.Log($"Clip uploaded to: {s3Key}");
_apiManager.StartTranscript(
s3Key,
(jobName) =>
{
Debug.Log($"Transcription job started: {jobName}");
_apiManager.DownloadTranscription(jobName, (transcript) =>
{
Debug.Log($"Transcription completed: {transcript}");
_uiController.AppendChatOutput($"\nUser: {transcript}\n");
_apiManager.SendChatMessage(transcript,
(response) =>
{
_uiController.AppendChatOutput($"Bot: {response}\n");
}, () =>
{
// Speech synthesis finished.
});
});
});
});
_uiController.CurrentState = UIController.UIState.Idle;
}
private void HandleTalkButtonClicked()
{
if (!_apiManager.IsSessionActive)
{
Debug.LogWarning("Session is not active. Cannot send message.");
return;
}
switch (_uiController.CurrentState)
{
case UIController.UIState.Idle:
_audioCapture.StartRecording();
_uiController.CurrentState = UIController.UIState.Recording;
break;
case UIController.UIState.Recording:
_audioCapture.StopRecording();
_uiController.CurrentState = UIController.UIState.Idle;
break;
case UIController.UIState.Processing:
Debug.Log("Currently processing. Please wait.");
break;
}
}
}
}

View File

@ -18,6 +18,7 @@ namespace PPGIA.X540.Project3
Hz96000 = 96000
}
#region -- Fields & Properties ----------------------------------------
[Header("Audio Capture Settings")]
[SerializeField]
private SampleRate _sampleRateInHz = SampleRate.Hz44100;
@ -54,9 +55,14 @@ namespace PPGIA.X540.Project3
private List<short> _capturedSamples =
new List<short>(1024 * 32); // filled only on Stop
private int _channels = 1; // microphone channel count (Unity usually mono)
private AudioClip _recordingClip;
private string _currentDevice;
private AudioClip _recordingClip;
public AudioClip GetRecordedClip() => _recordingClip;
private string _currentDevice;
#endregion ------------------------------------------------------------
#region -- MonoBehaviour Methods --------------------------------------
private void Awake()
{
_audioSource = GetComponent<AudioSource>();
@ -67,6 +73,7 @@ namespace PPGIA.X540.Project3
{
StopRecording();
}
#endregion ------------------------------------------------------------
[ContextMenu("Start recording audio")]
public void StartRecording()
@ -215,7 +222,6 @@ namespace PPGIA.X540.Project3
writer.Write(dataBytes);
}
LastSavedFilePath = filePath;
Debug.Log($"Audio saved to: {filePath}");
OnRecordingSaved?.Invoke(filePath);
}
catch (Exception ex)

View File

@ -6,86 +6,113 @@ using UnityEngine.UIElements;
namespace PPGIA.X540.Project3
{
[RequireComponent(typeof(UIController))]
[RequireComponent(typeof(UIDocument))]
public class UIController : MonoBehaviour
{
public enum UIState
{
Idle = 0,
Recording = 1,
Processing = 2
}
private UIDocument _uiDocument;
private VisualElement _root;
private readonly string[] _sessionButtonLabels = {
"Iniciar Sessão",
"Encerrar Sessão"
#region -- Fields & Properties ----------------------------------------
private readonly string[] _sendChatButtonLabels = {
"Falar...",
"Enviar...",
"Processando... Aguarde..."
};
private Button _sessionButton;
private Button _sendChatButton;
private int _currentSessionState = 0;
private TextField _chatInputField;
// UI controls --------------------------------------------------------
private Button _talkButton;
private TextField _chatOutputField;
public string ChatOutput
{
get => _chatOutputField.value;
set => _chatOutputField.value = value;
}
public bool SessionActive {
get => _currentSessionState == 1;
private ProgressBar _progressBar;
public float Progress
{
get => _progressBar.value;
set => _progressBar.value = value;
}
// State management ---------------------------------------------------
private UIState _currentState = UIState.Idle;
public UIState CurrentState
{
get => _currentState;
set
{
_currentSessionState = value ? 1 : 0;
UpdateStateForSession();
_currentState = value;
_talkButton.text = _sendChatButtonLabels[(int)value];
if (value == UIState.Processing)
{
_talkButton.SetEnabled(false);
_progressBar.value = 0.5f;
}
else
{
_talkButton.SetEnabled(true);
_progressBar.value = 0f;
}
}
}
public Action OnSessionButtonClicked { get; set; }
public Action<string> OnSendChatButtonClicked { get; set; }
public float Progress { get; set; }
// Event Handlers -----------------------------------------------------
public event Action OnTalkButtonClicked;
#endregion ------------------------------------------------------------
#region -- MonoBehaviour Methods --------------------------------------
private void Awake()
{
_uiDocument = GetComponent<UIDocument>();
_root = _uiDocument.rootVisualElement;
_sessionButton = _root.Q<Button>("B_Session");
_sendChatButton = _root.Q<Button>("B_SendChat");
_chatInputField = _root.Q<TextField>("TF_ChatInput");
_chatOutputField = _root.Q<TextField>("TF_ChatOutput");
SessionActive = false;
}
void OnEnable()
_talkButton = _root.Q<Button>("B_Talk");
if (_talkButton == null)
{
_sessionButton.clicked += OnSessionButtonClickedInternal;
_sendChatButton.clicked += OnSendChatButtonClickedInternal;
Debug.LogError("Talk Button not found in UI.");
}
void OnDisable()
_progressBar = _root.Q<ProgressBar>("PB_Progress");
if (_progressBar == null)
{
_sessionButton.clicked -= OnSessionButtonClickedInternal;
_sendChatButton.clicked -= OnSendChatButtonClickedInternal;
Debug.LogError("Progress Bar not found in UI.");
}
private void UpdateStateForSession()
_chatOutputField = _root.Q<TextField>("TF_Dialogue");
if (_chatOutputField == null)
{
_sessionButton.text = _sessionButtonLabels[_currentSessionState];
var enable = _currentSessionState == 1;
_chatInputField.SetEnabled(enable);
_sendChatButton.SetEnabled(enable);
Debug.LogError("Chat Output Field not found in UI.");
}
private void OnSessionButtonClickedInternal()
CurrentState = UIState.Idle;
}
private void OnEnable()
{
OnSessionButtonClicked?.Invoke();
// SessionActive state will be updated externally
_talkButton.clicked += OnTalkButtonClickedInternal;
}
private void OnSendChatButtonClickedInternal()
private void OnDisable()
{
OnSendChatButtonClicked?.Invoke(_chatInputField.value);
_chatInputField.value = string.Empty;
_talkButton.clicked -= OnTalkButtonClickedInternal;
}
#endregion ------------------------------------------------------------
private void OnTalkButtonClickedInternal() => OnTalkButtonClicked?.Invoke();
public void AppendChatOutput(string newText)
{
ChatOutput += newText;
}
}
}

View File

@ -29,7 +29,7 @@ MonoBehaviour:
m_Match: 0
m_SortingOrder: 0
m_TargetDisplay: 0
m_BindingLogLevel: 0
m_BindingLogLevel: 2
m_ClearDepthStencil: 1
m_ClearColor: 0
m_ColorClearValue: {r: 0, g: 0, b: 0, a: 0}