Enterprise: Voice Cover Endpoint
Overview
Voice cover endpoint alThe Voice Cover endpoint allows you to transform a song or audio file into a different voice using a provided model. Find all available voice models HERE. lows you to create an audio from an existing audio with a model. Get all voice models HERE .
caution
Make sure you add your s3 details for voice_cloning
server, so you can receive image generated in your bucket.
Images generated without s3 details being added will be delete after 24 hours
Sample Music Generation Output
Input Audio
A YouTube video or music link provided for processing:
Generated Output
- Voice Model ID: arianagrande
- Processed Music Output:
Request
--request POST 'https://modelslab.com/api/v1/enterprise/voice/voice_cover' \
Make a POST
request to https://modelslab.com/api/v1/enterprise/voice/voice_cover endpoint and pass the required parameters as a request body.
Body Attributes
Parameter | Description |
---|---|
key | Your API Key used for request authorization |
init_audio | URL (Youtube links supported) or valid .wav file base64 data whose audio you want to clone with the model. |
model_id | ID of the voice cloning model get model_id from here. |
pitch | One of ["m2f", "f2m", "none"]. If input voice is a male voice and model used is a trained female voice model, specify "m2f". Similar for the case of female-to-male. In other cases, specify "none". |
algorithm | One of rmvpe , mangio-crepe . Defaults to rmvpe |
rate | Rate of control for generated voice leakage. Higher values bias model towards training data. Defaults to 0.5 . Should be between 0 and 1 . |
seed | Seed is used to reproduce results, same seed will give you same image in return again. Pass null for a random number. |
language | The language of the voice. The supported languages includes english, arabic, brazilian, portuguese,chinese, dutch, english, french, hindi, hungarian, italian, japanese, korean, polish, russian, turkish. Default is english |
emotion | One of neutral , happy , sad , angry ,dull . Defaults to neutral |
speed | Floating point value for speed of speaker. Defaults to 1.0 |
radius | Median filtering length to reduce breathiness and other minor voice artifacts. Defaults to 3. |
mix | A value between 0 and 1. A lower value leads to similar loudness to the original sound clip while a higher value leans towards fixed loudness. Defaults to 0.25. |
hop_length | How often to check for pitch changes when using mangio-crepe as algorithm. |
originality | Control how much similarity to maintain with the original vocals voiceless constants. Defaults to 0.33. |
lead_voice_volume_delta | A value between -5 and +5 controlling whether lead vocals should be decreased or increased. |
backup_voice_volume_delta | A value between -5 and +5 controlling whether backup vocals should be decreased or increased |
instrument_volume_delta | A value between -5 and +5 controlling whether instrumental volume should be decreased or increased. |
reverb_size | Reverb room size. Defaults to 0.15. Should be between 0 and 1. |
wetness | Reverb for generated vocals. Defaults to 0.2. Should be between 0 and 1. |
dryness | Reverb for original vocals. Defaults to 0.8. Should be between 0 and 1. |
damping | Damping factor for high frequencies in the reverb. Defaults to 0.7. Should be between 0 and 1. |
base64 | Whether the input sound clip is in base64 or not. Should be true or false . Defaults to false . |
temp | Whether you want the output to be auto-deleted from our server in a short amount of time. |
webhook | Set an URL to get a POST API call once the image generation is complete. |
track_id | This ID is returned in the response to the webhook API call. This will be used to identify the webhook request. |
Example
Body
Body
{
"key": "",
"init_audio": "https://music.youtube.com/watch?v=aZ1hziFhj1o",
"model_id": "zoro",
"pitch": "none",
"rate": 0.5,
"radius": 3,
"mix": 0.25,
"algorithm": "rmvpe",
"hop_length": 128,
"originality": 0.5,
"lead_voice_volume_delta": "+1",
"backup_voice_volume_delta": "-2",
"instrument_volume_delta":"+2",
"reverb_size": 0.15,
"wetness": 0.2,
"dryness": 0.8,
"damping": 0.7,
"base64": false,
"temp": false,
"webhook": null,
"track_id" : null
}
Request
- JS
- PHP
- NODE
- PYTHON
- JAVA
var myHeaders = new Headers();
myHeaders.append("Content-Type", "application/json");
var raw = JSON.stringify({
"key": "",
"init_audio": "https://music.youtube.com/watch?v=aZ1hziFhj1o",
"model_id": "zoro",
"pitch": "none",
"rate": 0.5,
"radius": 3,
"mix": 0.25,
"algorithm": "rmvpe",
"hop_length": 128,
"originality": 0.5,
"lead_voice_volume_delta": "+1",
"backup_voice_volume_delta": "-2",
"instrument_volume_delta":"+2",
"reverb_size": 0.15,
"wetness": 0.2,
"dryness": 0.8,
"damping": 0.7,
"base64": false,
"temp": false,
"webhook": null,
"track_id" : null
});
var requestOptions = {
method: 'POST',
headers: myHeaders,
body: raw,
redirect: 'follow'
};
fetch("https://modelslab.com/api/v1/enterprise/voice/voice_cover", requestOptions)
.then(response => response.text())
.then(result => console.log(result))
.catch(error => console.log('error', error));
<?php
$payload = [
"key" => "",
"init_audio" => "https://music.youtube.com/watch?v=aZ1hziFhj1o",
"model_id" => "zoro",
"pitch" => "none",
"rate" => 0.5,
"radius" => 3,
"mix" => 0.25,
"algorithm" => "rmvpe",
"hop_length" => 128,
"originality" => 0.5,
"lead_voice_volume_delta" => "+1",
"backup_voice_volume_delta" => "-2",
"instrument_volume_delta":"+2",
"reverb_size" => 0.15,
"wetness" => 0.2,
"dryness" => 0.8,
"damping" => 0.7,
"base64" => false,
"temp" => false,
"webhook" => null,
"track_id" => null
];
$curl = curl_init();
curl_setopt_array($curl, array(
CURLOPT_URL => 'https://modelslab.com/api/v1/enterprise/voice/voice_cover',
CURLOPT_RETURNTRANSFER => true,
CURLOPT_ENCODING => '',
CURLOPT_MAXREDIRS => 10,
CURLOPT_TIMEOUT => 0,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
CURLOPT_CUSTOMREQUEST => 'POST',
CURLOPT_POSTFIELDS => json_encode($payload),
CURLOPT_HTTPHEADER => array(
'Content-Type: application/json'
),
));
$response = curl_exec($curl);
curl_close($curl);
echo $response;
var request = require('request');
var options = {
'method': 'POST',
'url': 'https://modelslab.com/api/v1/enterprise/voice/voice_cover',
'headers': {
'Content-Type': 'application/json'
},
body: JSON.stringify({
"key": "",
"init_audio": "https://music.youtube.com/watch?v=aZ1hziFhj1o",
"model_id": "zoro",
"pitch": "none",
"rate": 0.5,
"radius": 3,
"mix": 0.25,
"algorithm": "rmvpe",
"hop_length": 128,
"originality": 0.5,
"lead_voice_volume_delta": "+1",
"backup_voice_volume_delta": "-2",
"instrument_volume_delta":"+2",
"reverb_size": 0.15,
"wetness": 0.2,
"dryness": 0.8,
"damping": 0.7,
"base64": false,
"temp": false,
"webhook": null,
"track_id" : null
})
};
request(options, function (error, response) {
if (error) throw new Error(error);
console.log(response.body);
});
import requests
import json
url = "https://modelslab.com/api/v1/enterprise/voice/voice_cover"
payload = json.dumps({
"key": "",
"init_audio": "https://music.youtube.com/watch?v=aZ1hziFhj1o",
"model_id": "zoro",
"pitch": "none",
"rate": 0.5,
"radius": 3,
"mix": 0.25,
"algorithm": "rmvpe",
"hop_length": 128,
"originality": 0.5,
"lead_voice_volume_delta": "+1",
"backup_voice_volume_delta": "-2",
"instrument_volume_delta":"+2",
"reverb_size": 0.15,
"wetness": 0.2,
"dryness": 0.8,
"damping": 0.7,
"base64": False,
"temp": False,
"webhook": None,
"track_id" : None
})
headers = {
'Content-Type': 'application/json'
}
response = requests.request("POST", url, headers=headers, data=payload)
print(response.text)
OkHttpClient client = new OkHttpClient().newBuilder()
.build();
MediaType mediaType = MediaType.parse("application/json");
RequestBody body = RequestBody.create(mediaType, "{\n \"key\":\"\",\n \"init_audio\": \"https://music.youtube.com/watch?v=aZ1hziFhj1o\",\n \"model_id\": \"zoro\",\n \"pitch\": \"none\",\n \"rate\": 0.5,\n \"radius\": 3,\n \"mix\": 0.25,\n \"algorithm\": \"rmvpe\",\n \"hop_length\": 128,\n \"originality\": 0.5,\n \"lead_voice_volume_delta\": \"+1\",\n \"backup_voice_volume_delta\": \"-2\",\n \"instrument_volume_delta\":\"+2\",\n \"reverb_size\": 0.15,\n \"wetness\": 0.2,\n \"dryness\": 0.8,\n \"damping\": 0.7,\n \"base64\": false,\n \"temp\": false\n}");
Request request = new Request.Builder()
.url("https://modelslab.com/api/v1/enterprise/voice/voice_cover")
.method("POST", body)
.addHeader("Content-Type", "application/json")
.build();
Response response = client.newCall(request).execute();
Response
{
"generationTime": 1.5732920169830322,
"id": 10,
"links": [
"https://cdn2.stablediffusionapi.com/generations/bc1e5025-b140-4af6-be24-183fa18c943a.wav"
],
"proxy_links": [
"https://cdn2.stablediffusionapi.com/generations/bc1e5025-b140-4af6-be24-183fa18c943a.wav"
],
"meta": {
"algorithm": "rmvpe",
"backup_voice_volume_delta": -2,
"base64": "no",
"damping": 0.7,
"dryness": 0.8,
"filename": "bc1e5025-b140-4af6-be24-183fa18c943a.wav",
"hop_length": 128,
"input_sound_clip": "https://music.youtube.com/watch?v=aZ1hziFhj1o",
"instrument_volume_delta": 2,
"is_youtube": true,
"lead_voice_volume_delta": 1,
"mix": 0.25,
"model_id": "zoro",
"originality": 0.5,
"pitch": "none",
"radius": 3,
"rate": 0.5,
"reverb_size": 0.15,
"seed": 1216247535,
"temp": "no",
"wetness": 0.2
},
"status": "success",
}