About the websocket

To integrate your agent into your website or mobile app, use this endpoint. You can send and receive audio in real time, which allows for seamless and natural conversations.

Step 1. Create a widget agent

  1. In your Anunzi dashboard, create an agent of the type widget.

  2. Go to Agents and select the agent you have just created. Copy the ID next to its name. This is the ID you will use as the assistant_id parameter.

Step 2. Request a session

Make a GET request to the following endpoint:

curl -X GET "https://widgets.anunzi.net/websocket/token/{assistant_id}" \
     -H "Authorization: Bearer <your-anunzi-api-key>"

It will return a response of the form:

{
  "sessionURL": "wss://widgets.anunzi.net/websocket/start?token=..."
}

You can then use this session URL to connect to the websocket.

Step 3. Send and receive events

Send events

You can send two types of messages through the websocket:

  1. Binary messages containing the user’s speech (raw PCM16 audio, sample rate of 48000).

  2. JSON messages ({ "type": "status_client_ready" }) to signal that you are ready for the agent to start speaking.

Receive events

You will receive two types of messages:

  • Binary messages containing the agent’s speech (raw PC16 audio, sample rate 16000).

  • JSON messages ({ "type": "status_agent_ready" }) that signal that the agent is ready to start receiving audio.

Writing a client

To use the websocket connection in a webpage, follow these steps:

Write functions to record and play linear audio:

index.ts
type AudioOutControls = {
  enqueueAudioChunk: (chunk: Int16Array) => void;
  audioContext: AudioContext;
  stop: () => void;
};

async function startAudioOut(): Promise<AudioOutControls> {
  const ctx = new AudioContext({ sampleRate: 16000 });
  const gainNode = ctx.createGain();
  gainNode.connect(ctx.destination);
  await ctx.audioWorklet.addModule("/audio-out-worklet.js");
  const worklet = new AudioWorkletNode(ctx, "audio-out-worklet");
  worklet.connect(gainNode);
  return {
    enqueueAudioChunk: (chunk: Int16Array) =>
      worklet.port.postMessage({ buffer: chunk }),
    audioContext: ctx,
    stop: () => {
      worklet.disconnect();
      gainNode.disconnect();
      ctx.close();
    },
  };
}

async function startRecording(
  onAudioChunk: (chunk: ArrayBufferLike) => void,
  onRecordingStarted: () => Promise<void>,
): Promise<{ stop: () => void }> {
  const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
  const audioContext = new window.AudioContext();
  const sampleRate = audioContext.sampleRate; // 48000
  const sourceNode = audioContext.createMediaStreamSource(stream);
  const scriptProcessor = audioContext.createScriptProcessor(8192, 1, 1);

  let hasRecordingStarted = false;
  scriptProcessor.onaudioprocess = (
    audioProcessingEvent: AudioProcessingEvent
  ) => {
    if (!hasRecordingStarted) {
      hasRecordingStarted = true;
      onRecordingStarted();
    }
    const floatData = audioProcessingEvent.inputBuffer.getChannelData(0);
    const int16Data = float32ToInt16(floatData);
    const byteBuffer = new Uint8Array(int16Data.buffer);
    onAudioChunk(byteBuffer);
  };

  sourceNode.connect(scriptProcessor);
  scriptProcessor.connect(audioContext.destination);
  return {
    stop: () => {
      scriptProcessor.disconnect();
      scriptProcessor.onaudioprocess = null;
      sourceNode.disconnect();
      audioContext.close();
      stream.getTracks().forEach((track) => track.stop());
    },
  };
}

function float32ToInt16(float32Array: Float32Array): Int16Array {
  const int16Array = new Int16Array(float32Array.length);
  for (let i = 0; i < float32Array.length; i++) {
    const s = Math.max(-1, Math.min(1, float32Array[i]));
    int16Array[i] = s < 0 ? s * 0x8000 : s * 0x7fff;
  }
  return int16Array;
}

audio-out-worklet.js
class Processor extends AudioWorkletProcessor {
  constructor(options) {
    super();
    this.buffer = getAudioBuffer();
    this.port.onmessage = (ev) => {
      this.buffer.pushArray(new Int16Array(ev.data.buffer));
    };
  }

  process(inputs, outputs, parameters) {
    const output = outputs[0][0];
    for (let i = 0; i < output.length; i++) {
      let value = this.buffer.getSample();
      if (value === undefined) {
        break;
      }
      output[i] = value / 32768;
    }
    return true;
  }
}

function getAudioBuffer() {
  let samplePointer = 0;

  /**
   * @type {Array<Int16Array>}
   */
  let arrays = [];

  /**
   * @type {Int16Array | undefined}
   */
  let currentArray = undefined;

  return {
    getSample: () => {
      if (currentArray === undefined || samplePointer >= currentArray.length) {
        currentArray = arrays.shift();
        samplePointer = 0;
      }
      if (currentArray === undefined) {
        return undefined;
      }
      const sample = currentArray[samplePointer];
      samplePointer++;
      return sample;
    },
    pushArray: (array) => {
      arrays.push(array);
    },
  };
}

registerProcessor("audio-out-worklet", Processor);
  1. Write a function to manage the websocket connection:

index.ts
const MESSAGE_TYPE_STATUS_AGENT_READY = "status_agent_ready";
const MESSAGE_TYPE_STATUS_CLIENT_READY = "status_client_ready";

async function startSynthflow(
  url: string,
  playAudioChunk: (chunk: Int16Array) => void
): Promise<{
  stop: () => void;
}> {
  updateStatus("Connecting to server", "warning");
  
  const websocket = new WebSocket(
    url
  );
  websocket.binaryType = "arraybuffer";

  websocket.onmessage = async (event) => {
    // Handle audio
    if (event.data instanceof ArrayBuffer) {
      const arrayBuffer = event.data as ArrayBuffer;
      const pcmSamples = new Int16Array(arrayBuffer);
      playAudioChunk(pcmSamples);
    } else {
      const data = JSON.parse(event.data);
      switch (data.type) {
        case MESSAGE_TYPE_STATUS_AGENT_READY:
          updateStatus("Connected to call", "success");
          console.log("Received agent ready message");
          break;
        default:
          console.log("Received unknown message from server", data);
          break;
      }
    }
  };

  const recordingControls = await startRecording(
    (audio) => {
      if (websocket.readyState === WebSocket.OPEN) {
        websocket.send(audio);
      }
    },
    async () => {
      sendWhenReady(websocket, JSON.stringify({ type: MESSAGE_TYPE_STATUS_CLIENT_READY }))
      console.log("Scheduled send client ready message");
    }
  );

  websocket.onclose = () => {
    recordingControls.stop();
    updateStatus("Disconnected from server", "error");
  };

  return {
    stop: () => {
      websocket.close();
    },
  };
}

function base64ToArrayBuffer(base64: string): ArrayBuffer {
  const binaryString = window.atob(base64);
  const len = binaryString.length;
  const bytes = new Uint8Array(len);
  for (let i = 0; i < len; i++) {
    bytes[i] = binaryString.charCodeAt(i);
  }
  return bytes.buffer;
}

function sendWhenReady(websocket: WebSocket, message: string) {
  if (websocket.readyState === WebSocket.CLOSED) {
    console.log("WebSocket is closed, not sending message");
    return;
  } else if (websocket.readyState === WebSocket.OPEN) {
    websocket.send(message);
  } else {
    setTimeout(() => sendWhenReady(websocket, message), 50);
  }
}

  1. Use the functions defined above to create a call:

index.ts
async function makeCall() {
  const sessionURL = ... // make a call to your server to get a session URL

  const audioOutControls = await startAudioOut();
  const synthflowControls = await startSynthflow(sessionURL, (audio) => {
    audioOutControls.enqueueAudioChunk(audio);
  });

  // stop audio/call when needed
}

Última actualización

¿Te fue útil?