Enterprise: Text to Audio Endpoint

Overview

The Text-to-Audio endpoint enables you to generate audio by providing a text input along with a valid audio URL or a pre-created voice using a voice_id. The output is an audio file that mimics the sound of the provided audio URL or the selected voice.

caution

Make sure you add your s3 details for voice_cloning server, so you can receive image generated in your bucket. Images generated without s3 details being added will be delete after 24 hours

Open in Playground 🚀

Sample Generation

Init Audio Clip

Example 1

Prompt

In the ancient land of Eldoria, where the skies were painted with shades of mystic hues and the forests whispered secrets of old, there existed a dragon named Zephyros. Unlike the fearsome tales of dragons that plagued human hearts with terror, Zephyros was a creature of wonder and wisdom, revered by all who knew of his existence.

Generated Speech

Request

--request POST 'https://modelslab.com/api/v1/enterprise/voice/text_to_audio' \

Make a POST request to https://modelslab.com/api/v1/enterprise/voice/text_to_audio endpoint and pass the required parameters as a request body.

Body Attributes

Parameter	Description
key	Your API Key used for request authorization
prompt	Text prompt with description of the audio you want to generate
init_audio	A valid audio url you want it voice cloned (min 4 seconds) to (max 30 seconds)
voice_id	Optional, A valid id from the lists of voices
language	The language of the voice. The supported languages includes `english`, `arabic`, `chinese`, `spanish`, `german` , `czech` , `dutch`, `french`, `hindi`, `hungarian`, `italian`, `japanese`, `korean`, `polish`, `russian`, `turkish`. Default is `english`
emotion	One of `neutral`, `happy`, `sad`, `angry`,`dull`. Defaults to `neutral`
base64	Whether the input sound clip is in base64 or not. Should be `true` or `false`. Defaults to `false`.
temp	Whether you want temporary links or not. This is useful if your country blocks access to our storage sites. Should be `true` or `false`. Defaults
webhook	Set an URL to get a POST API call once the image generation is complete.
track_id	This ID is returned in the response to the webhook API call. This will be used to identify the webhook request.

Note: You can either pass init_audio or voice_id. However, if both are passed at the same time the init_audio takes preference.

Example

Body

Body
{   
 "key": "",
 "prompt":"Narrative voices capable of pronouncing terminologies & acronyms in training and ai learning materials.",
 "init_audio":"https://pub-f3505056e06f40d6990886c8e14102b2.r2.dev/audio/tom_hanks_1.wav",
 "language":"english",
 "webhook": null,
 "track_id": null
}

Request

JS
PHP
NODE
PYTHON
JAVA

var myHeaders = new Headers();
myHeaders.append("Content-Type", "application/json");

var raw = JSON.stringify({
  "key": "",
  "prompt":"Narrative voices capable of pronouncing terminologies & acronyms in training and ai learning materials.",
  "init_audio":"https://pub-f3505056e06f40d6990886c8e14102b2.r2.dev/audio/tom_hanks_1.wav",
  "language":"english",
  "webhook": null,
  "track_id": null
});

var requestOptions = {
  method: 'POST',
  headers: myHeaders,
  body: raw,
  redirect: 'follow'
};

fetch("https://modelslab.com/api/v1/enterprise/voice/text_to_audio", requestOptions)
  .then(response => response.text())
  .then(result => console.log(result))
  .catch(error => console.log('error', error));

<?php

$payload = [
  "key" => "",
  "prompt" => "Narrative voices capable of pronouncing terminologies & acronyms in training and ai learning materials.",
  "init_audio" => "https://pub-f3505056e06f40d6990886c8e14102b2.r2.dev/audio/tom_hanks_1.wav",
  "language" => "english",
  "webhook" => null, 
  "track_id" => null 
];

$curl = curl_init();

curl_setopt_array($curl, array(
  CURLOPT_URL => 'https://modelslab.com/api/v1/enterprise/voice/text_to_audio',
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => '',
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 0,
  CURLOPT_FOLLOWLOCATION => true,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => 'POST',
  CURLOPT_POSTFIELDS => json_encode($payload),
  CURLOPT_HTTPHEADER => array(
    'Content-Type: application/json'
  ),
));

$response = curl_exec($curl);

curl_close($curl);
echo $response;

var request = require('request');
var options = {
  'method': 'POST',
  'url': 'https://modelslab.com/api/v1/enterprise/voice/text_to_audio',
  'headers': {
    'Content-Type': 'application/json'
  },
  body: JSON.stringify({
    "key": "",
    "prompt":"Narrative voices capable of pronouncing terminologies & acronyms in training and ai learning materials.",
    "init_audio":"https://pub-f3505056e06f40d6990886c8e14102b2.r2.dev/audio/tom_hanks_1.wav",
    "language":"english",
    "webhook": null,
    "track_id": null
  })
};

request(options, function (error, response) {
  if (error) throw new Error(error);
  console.log(response.body);
});

import requests
import json

url = "https://modelslab.com/api/v1/enterprise/voice/text_to_audio"

payload = json.dumps({
  "key": "",
  "prompt":"Narrative voices capable of pronouncing terminologies & acronyms in training and ai learning materials.",
  "init_audio":"https://pub-f3505056e06f40d6990886c8e14102b2.r2.dev/audio/tom_hanks_1.wav",
  "language":"english",
  "webhook": None,
  "track_id": None
})

headers = {
  'Content-Type': 'application/json'
}

response = requests.request("POST", url, headers=headers, data=payload)

print(response.text)

OkHttpClient client = new OkHttpClient().newBuilder()
  .build();
MediaType mediaType = MediaType.parse("application/json");
RequestBody body = RequestBody.create(mediaType, "{\n    \"key\":\"\",\n    \"prompt\":\"Narrative voices capable of pronouncing terminologies & acronyms in training and ai learning materials.\",\n    \"init_audio\":\"https://pub-f3505056e06f40d6990886c8e14102b2.r2.dev/audio/tom_hanks_1.wav\",\n    \"language\":\"english\"\n}");
Request request = new Request.Builder()
  .url("https://modelslab.com/api/v1/enterprise/voice/text_to_audio")
  .method("POST", body)
  .addHeader("Content-Type", "application/json")
  .build();
Response response = client.newCall(request).execute();

Response

{
    "status": "success",
    "generationTime": 1.4285192489624,
    "id": 334166,
    "output": [
        "https://pub-3626123a908346a7a8be8d9295f44e26.r2.dev/generations/b2dff60e-4636-4178-9a72-04a10a309185.wav"
    ],
    "proxy_links": [
        "https://cdn2.stablediffusionapi.com/generations/b2dff60e-4636-4178-9a72-04a10a309185.wav"
    ],
    "meta": {
        "base64": "no",
        "emotion": "Neutral",
        "filename": "b2dff60e-4636-4178-9a72-04a10a309185.wav",
        "input_sound_clip": [
            "tmp/0-b2dff60e-4636-4178-9a72-04a10a309185.wav"
        ],
        "input_text": "Narrative voices capable of pronouncing terminologies & acronyms in training and ai learning materials.",
        "language": "english",
        "speed": 1,
        "temp": "no"
    }
}

Overview​

Sample Generation​

Init Audio Clip​

Example 1​

Prompt​

Generated Speech​

Request​

Body Attributes​

Example​

Body​

Request​

Response​

Overview

Sample Generation

Init Audio Clip

Example 1

Prompt

Generated Speech

Request

Body Attributes

Example

Body

Request

Response