Skip to main content

Overview

The ModelsLab Python SDK provides a simple, type-safe interface to interact with all ModelsLab APIs. It handles authentication, request formatting, and response parsing automatically.

Installation

pip install modelslab_py
Requires Python 3.7 or higher.

Quick Start

Here’s a complete example to generate your first image:
from modelslab_py.core.client import Client
from modelslab_py.core.apis.realtime import Realtime
from modelslab_py.schemas.realtime import RealtimeText2ImageSchema

# Initialize the client
client = Client(api_key="your_api_key")

# Create the API instance
api = Realtime(client=client, enterprise=False)

# Define the generation parameters
schema = RealtimeText2ImageSchema(
    prompt="A majestic lion in a savanna at sunset, photorealistic, 8k",
    negative_prompt="blurry, low quality, distorted",
    width=512,
    height=512,
    samples=1,
    num_inference_steps=30,
    guidance_scale=7.5
)

# Generate the image
response = api.text_to_image(schema)

# Handle the response
if response.get("status") == "success":
    print(f"Image URL: {response['output'][0]}")
elif response.get("status") == "processing":
    print(f"Request ID: {response['id']} - Check back later")
else:
    print(f"Error: {response.get('message')}")

Client Configuration

Basic Setup

from modelslab_py.core.client import Client

# Standard API access
client = Client(api_key="your_api_key")

Enterprise Setup

from modelslab_py.core.client import Client
from modelslab_py.core.apis.realtime import Realtime

client = Client(api_key="your_enterprise_api_key")

# Set enterprise=True for enterprise endpoints
api = Realtime(client=client, enterprise=True)

Available APIs

  • Realtime - Fast image generation with FLUX and other models
  • Community - Access community fine-tuned models
  • Image_editing - Edit, enhance, and transform images
  • Video - Generate videos from text or images
  • Audio - Text-to-speech, voice cloning, music generation
  • DeepFake - Face swapping in images and videos
  • Interior - Interior design and room transformation
  • Three_D - Generate 3D models from text or images

Image Generation

Text to Image (Realtime)

Generate images from text prompts using the fastest models:
from modelslab_py.core.client import Client
from modelslab_py.core.apis.realtime import Realtime
from modelslab_py.schemas.realtime import RealtimeText2ImageSchema

client = Client(api_key="your_api_key")
api = Realtime(client=client, enterprise=False)

schema = RealtimeText2ImageSchema(
    prompt="A cyberpunk city at night, neon lights, rain, cinematic",
    negative_prompt="blurry, low quality, distorted, deformed",
    width=1024,
    height=1024,
    samples=1,
    num_inference_steps=30,
    guidance_scale=7.5,
    seed=12345  # Optional: for reproducible results
)

response = api.text_to_image(schema)
print(response)

Image to Image (Realtime)

Transform existing images with a text prompt:
from modelslab_py.core.apis.realtime import Realtime
from modelslab_py.schemas.realtime import RealtimeImage2ImageSchema

api = Realtime(client=client, enterprise=False)

schema = RealtimeImage2ImageSchema(
    init_image="https://example.com/your-image.jpg",
    prompt="Transform into a watercolor painting style",
    negative_prompt="photo, realistic",
    width=512,
    height=512,
    strength=0.7,  # How much to change the image (0-1)
    num_inference_steps=30,
    guidance_scale=7.5
)

response = api.image_to_image(schema)
print(response)

Community Models

Use fine-tuned community models for specific styles:
from modelslab_py.core.client import Client
from modelslab_py.core.apis.community import Community
from modelslab_py.schemas.community import Text2Image, Image2Image, Inpainting, ControlNet

client = Client(api_key="your_api_key")
api = Community(client=client, enterprise=False)

# Text to Image with a specific model
schema = Text2Image(
    model_id="flux",  # or any community model ID
    prompt="Portrait of a woman, oil painting style, renaissance",
    negative_prompt="modern, photo, blurry",
    width=512,
    height=768,
    samples=1,
    num_inference_steps=30,
    guidance_scale=7.5
)
response = api.text_to_image(schema)

# Image to Image
schema = Image2Image(
    model_id="flux",
    init_image="https://example.com/image.jpg",
    prompt="Add autumn colors to the scene",
    strength=0.6,
    width=512,
    height=512
)
response = api.image_to_image(schema)

# Inpainting (edit specific areas)
schema = Inpainting(
    model_id="flux",
    init_image="https://example.com/image.jpg",
    mask_image="https://example.com/mask.png",  # White = edit, Black = keep
    prompt="A red sports car",
    width=512,
    height=512
)
response = api.inpainting(schema)

# ControlNet (guided generation)
schema = ControlNet(
    model_id="flux",
    controlnet_model="canny",  # canny, depth, pose, etc.
    controlnet_image="https://example.com/control-image.jpg",
    prompt="A beautiful house, photorealistic",
    width=512,
    height=512
)
response = api.controlnet(schema)

Image Editing

Background Removal

from modelslab_py.core.client import Client
from modelslab_py.core.apis.image_editing import Image_editing
from modelslab_py.schemas.image_editing import BackgroundRemoverSchema

client = Client(api_key="your_api_key")
api = Image_editing(client=client, enterprise=False)

schema = BackgroundRemoverSchema(
    image="https://example.com/photo.jpg"
)

response = api.background_remover(schema)
print(f"Image without background: {response['output'][0]}")

Super Resolution (Upscale)

from modelslab_py.schemas.image_editing import SuperResolutionSchema

schema = SuperResolutionSchema(
    image="https://example.com/low-res-image.jpg",
    scale=4  # 2x or 4x upscale
)

response = api.super_resolution(schema)
print(f"Upscaled image: {response['output'][0]}")

Object Removal

from modelslab_py.schemas.image_editing import ObjectRemovalSchema

schema = ObjectRemovalSchema(
    image="https://example.com/photo.jpg",
    mask_image="https://example.com/mask.png"  # White areas will be removed
)

response = api.object_remover(schema)
print(response)

Outpainting (Extend Images)

from modelslab_py.schemas.image_editing import OutpaintingSchema

schema = OutpaintingSchema(
    image="https://example.com/photo.jpg",
    prompt="Continue the landscape with mountains and trees",
    width=1024,  # New width (larger than original)
    height=768   # New height
)

response = api.outpainting(schema)
print(response)

AI Headshots

from modelslab_py.schemas.image_editing import HeadshotSchema, FluxHeadshotSchema

# Standard headshot
schema = HeadshotSchema(
    image="https://example.com/selfie.jpg",
    prompt="Professional headshot, studio lighting, business attire"
)
response = api.headshot(schema)

# FLUX-powered headshot (higher quality)
schema = FluxHeadshotSchema(
    image="https://example.com/selfie.jpg",
    prompt="Professional LinkedIn headshot, neutral background"
)
response = api.flux_headshot(schema)

Face Generation

from modelslab_py.schemas.image_editing import FacegenSchema

schema = FacegenSchema(
    image="https://example.com/portrait.jpg",
    prompt="Make the person look 10 years younger"
)

response = api.facegen(schema)
print(response)

Fashion / Virtual Try-On

from modelslab_py.schemas.image_editing import FashionSchema

schema = FashionSchema(
    model_image="https://example.com/person.jpg",
    cloth_image="https://example.com/shirt.jpg"
)

response = api.fashion(schema)
print(response)

Video Generation

Text to Video

from modelslab_py.core.client import Client
from modelslab_py.core.apis.video import Video
from modelslab_py.schemas.video import Text2Video

client = Client(api_key="your_api_key")
api = Video(client=client, enterprise=False)

schema = Text2Video(
    model_id="cogvideox",
    prompt="A spaceship flying through an asteroid field, cinematic, 4K",
    negative_prompt="low quality, blurry, static",
    width=512,
    height=512,
    num_frames=25,
    num_inference_steps=20,
    guidance_scale=7
)

response = api.text_to_video(schema)

# Video generation is async - you'll get a request ID
if response.get("status") == "processing":
    print(f"Video processing, request ID: {response['id']}")
    print(f"ETA: {response.get('eta')} seconds")

Image to Video

Animate a static image:
from modelslab_py.schemas.video import Image2Video

schema = Image2Video(
    model_id="cogvideox",
    init_image="https://example.com/landscape.jpg",
    prompt="The clouds moving slowly, birds flying in the distance",
    num_frames=25,
    num_inference_steps=20
)

response = api.image_to_video(schema)
print(response)

Audio Generation

Text to Speech

from modelslab_py.core.client import Client
from modelslab_py.core.apis.audio import Audio
from modelslab_py.schemas.audio import Text2Speech

client = Client(api_key="your_api_key")
api = Audio(client=client, enterprise=False)

schema = Text2Speech(
    text="Hello, welcome to ModelsLab! This is a sample audio generation.",
    voice_id="alloy",  # Choose from available voices
    language="en"
)

response = api.text_to_speech(schema)
print(f"Audio URL: {response['output'][0]}")

Voice Cloning (Voice to Voice)

from modelslab_py.schemas.audio import Voice2Voice

schema = Voice2Voice(
    init_audio="https://example.com/source-voice.mp3",
    target_audio="https://example.com/target-voice.mp3"  # Voice to clone
)

response = api.voice2voice(schema)
print(response)

Music Generation

from modelslab_py.schemas.audio import MusicGenSchema

schema = MusicGenSchema(
    prompt="Upbeat electronic dance music with heavy bass drops",
    duration=30  # Duration in seconds
)

response = api.music_gen(schema)
print(response)

Song Generation

from modelslab_py.schemas.audio import SongGenerator

schema = SongGenerator(
    prompt="A pop song about summer love",
    lyrics="optional custom lyrics here"
)

response = api.song_generator(schema)
print(response)

Lyrics Generation

from modelslab_py.schemas.audio import LyricsGenerator

schema = LyricsGenerator(
    prompt="Write lyrics for a country song about road trips"
)

response = api.lyrics_gen(schema)
print(response)

Sound Effects (SFX)

from modelslab_py.schemas.audio import SFX

schema = SFX(
    prompt="Thunder rolling in the distance, heavy rain",
    duration=10
)

response = api.sfx_gen(schema)
print(response)

Speech to Text

from modelslab_py.schemas.audio import Speech2Text

schema = Speech2Text(
    audio="https://example.com/speech.mp3",
    language="en"
)

response = api.speech_to_text(schema)
print(f"Transcription: {response['text']}")

DeepFake / Face Swap

Single Face Swap (Image)

from modelslab_py.core.client import Client
from modelslab_py.core.apis.deepfake import DeepFake
from modelslab_py.schemas.deepfake import SpecificFaceSwap, MultipleFaceSwap

client = Client(api_key="your_api_key")
api = DeepFake(client=client, enterprise=False)

schema = SpecificFaceSwap(
    init_image="https://example.com/target-photo.jpg",
    target_image="https://example.com/face-to-swap.jpg"
)

response = api.specific_face_swap(schema)
print(response)

Multiple Face Swap

schema = MultipleFaceSwap(
    init_image="https://example.com/group-photo.jpg",
    target_image="https://example.com/new-face.jpg"
)

response = api.multiple_face_swap(schema)
print(response)

Video Face Swap

from modelslab_py.schemas.deepfake import SingleVideoSwap, SpecificVideoSwap

# Swap all faces in video
schema = SingleVideoSwap(
    init_video="https://example.com/video.mp4",
    target_image="https://example.com/face.jpg"
)
response = api.single_video_swap(schema)

# Swap specific face in video
schema = SpecificVideoSwap(
    init_video="https://example.com/video.mp4",
    target_image="https://example.com/face.jpg",
    source_image="https://example.com/face-to-replace.jpg"
)
response = api.multiple_video_swap(schema)

Interior Design

Interior Redesign

from modelslab_py.core.client import Client
from modelslab_py.core.apis.interior import Interior
from modelslab_py.schemas.interior import InteriorSchema

client = Client(api_key="your_api_key")
api = Interior(client=client, enterprise=False)

schema = InteriorSchema(
    init_image="https://example.com/room-photo.jpg",
    prompt="Modern minimalist living room with Scandinavian furniture"
)

response = api.interior(schema)
print(response)

Room Decorator

from modelslab_py.schemas.interior import RoomDecoratorSchema

schema = RoomDecoratorSchema(
    init_image="https://example.com/empty-room.jpg",
    prompt="Cozy bedroom with warm lighting and plants"
)

response = api.room_decorator(schema)
print(response)

Exterior Restoration

from modelslab_py.schemas.interior import ExteriorSchema

schema = ExteriorSchema(
    init_image="https://example.com/house-exterior.jpg",
    prompt="Modern exterior with landscaping and new paint"
)

response = api.exterior_restorer(schema)
print(response)

Floor Planning

from modelslab_py.schemas.interior import FloorSchema

schema = FloorSchema(
    init_image="https://example.com/floor-plan.jpg",
    prompt="Open concept kitchen and living area"
)

response = api.floor(schema)
print(response)

3D Model Generation

Text to 3D

from modelslab_py.core.client import Client
from modelslab_py.core.apis.three_d import Three_D
from modelslab_py.schemas.threed import Text23D, Image23D

client = Client(api_key="your_api_key")
api = Three_D(client=client, enterprise=False)

schema = Text23D(
    prompt="A medieval sword with ornate handle",
    num_inference_steps=50
)

response = api.text_to_3d(schema)
print(response)

Image to 3D

schema = Image23D(
    image="https://example.com/product-photo.jpg"
)

response = api.image_to_3d(schema)
print(response)

Working with Base64 Images

For local images, convert them to base64:
from modelslab_py.core.client import Client
from modelslab_py.core.apis.image_editing import Image_editing
from modelslab_py.schemas.image_editing import BackgroundRemoverSchema
from modelslab_py.utils.image_utils import read_image_from_file, image_to_base64

client = Client(api_key="your_api_key")
api = Image_editing(client=client, enterprise=False)

# Read local image and convert to base64
image_pil = read_image_from_file("local-image.png")
image_base64 = image_to_base64(image_pil)

schema = BackgroundRemoverSchema(
    image=image_base64,
    base64=True  # Important: set this to True for base64 images
)

response = api.background_remover(schema)
print(response)

Error Handling

Always handle potential errors in production code:
from modelslab_py.core.client import Client
from modelslab_py.core.apis.realtime import Realtime
from modelslab_py.schemas.realtime import RealtimeText2ImageSchema

def generate_image(prompt: str, api_key: str) -> str:
    """Generate an image and return the URL."""
    try:
        client = Client(api_key=api_key)
        api = Realtime(client=client, enterprise=False)

        schema = RealtimeText2ImageSchema(
            prompt=prompt,
            width=512,
            height=512,
            samples=1,
            num_inference_steps=30
        )

        response = api.text_to_image(schema)

        if response.get("status") == "success":
            return response["output"][0]
        elif response.get("status") == "processing":
            # Handle async processing
            return f"Processing... Request ID: {response['id']}"
        else:
            raise Exception(f"API Error: {response.get('message', 'Unknown error')}")

    except Exception as e:
        print(f"Error generating image: {e}")
        raise

# Usage
try:
    image_url = generate_image("A sunset over mountains", "your_api_key")
    print(f"Generated: {image_url}")
except Exception as e:
    print(f"Failed: {e}")

Async Processing Pattern

For long-running operations (videos, training), poll for results:
import time
from modelslab_py.core.client import Client
from modelslab_py.core.apis.video import Video
from modelslab_py.schemas.video import Text2Video

def generate_video_with_polling(prompt: str, api_key: str, timeout: int = 300):
    """Generate a video and wait for completion."""
    client = Client(api_key=api_key)
    api = Video(client=client, enterprise=False)

    schema = Text2Video(
        model_id="cogvideox",
        prompt=prompt,
        width=512,
        height=512,
        num_frames=25
    )

    response = api.text_to_video(schema)

    if response.get("status") == "success":
        return response["output"][0]

    if response.get("status") != "processing":
        raise Exception(f"Error: {response.get('message')}")

    request_id = response["id"]
    start_time = time.time()

    while time.time() - start_time < timeout:
        # Use the fetch endpoint (implement based on your needs)
        # This is a simplified example
        time.sleep(5)

        # Check status...
        # If complete, return URL
        # If failed, raise exception

    raise Exception("Timeout waiting for video generation")

Next Steps