Z-Image API Integration Guide: Complete Manual for REST API and Batch Processing
Z-Image, an open-source 6B parameter image generation model from Alibaba's Tongyi Lab, excels not only in local deployment but also in API integration capabilities. Whether you're self-hosting or using third-party API providers, Z-Image offers standardized interfaces that allow developers to quickly integrate AI image generation into their applications. This guide provides a complete walkthrough from basic API calls to advanced batch processing.
Z-Image API Ecosystem Overview
There are three main approaches to integrating Z-Image via API:
- Self-hosted service: Deploy Z-Image locally, expose REST endpoints through ComfyUI API or SGLang Diffusion
- Third-party API platforms: Access Z-Image through providers like Replicate, Eachlabs, WaveSpeed
- Cloud API: Use official Z-Image API through Alibaba Cloud's platform
Each approach has its own advantages. This guide covers all three in detail.
Approach 1: ComfyUI API Self-Hosted Service
Starting the ComfyUI Service
cd ComfyUI
python main.py --listen 0.0.0.0 --port 8188 --enable-cors-header *
Basic API Call
ComfyUI provides a REST API endpoint via /prompt for submitting generation tasks:
import requests
import json
import time
def generate_image(prompt, negative_prompt="", steps=8, cfg=1.0, seed=None):
"""Generate image through ComfyUI API"""
payload = {
"3": {
"class_type": "KSampler",
"inputs": {
"seed": seed or int(time.time() * 1000),
"steps": steps,
"cfg": cfg,
"sampler_name": "res_multistep",
"scheduler": "normal",
"denoise": 1.0,
"model": ["4", 0],
"positive": ["6", 0],
"negative": ["7", 0],
"latent_image": ["5", 0]
}
},
"4": {
"class_type": "CheckpointLoaderSimple",
"inputs": {
"ckpt_name": "z_image_turbo_bf16.safetensors"
}
},
"5": {
"class_type": "EmptyLatentImage",
"inputs": {
"width": 1024,
"height": 1024,
"batch_size": 1
}
},
"6": {
"class_type": "CLIPTextEncode",
"inputs": {
"text": prompt,
"clip": ["4", 1]
}
},
"7": {
"class_type": "CLIPTextEncode",
"inputs": {
"text": negative_prompt,
"clip": ["4", 1]
}
},
"8": {
"class_type": "VAEDecode",
"inputs": {
"samples": ["3", 0],
"vae": ["4", 2]
}
},
"9": {
"class_type": "SaveImage",
"inputs": {
"filename_prefix": "zimage_output",
"images": ["8", 0]
}
}
}
# Submit task
response = requests.post("http://127.0.0.1:8188/prompt", json={"prompt": payload})
prompt_id = response.json()["prompt_id"]
# Poll for results
while True:
history = requests.get(f"http://127.0.0.1:8188/history/{prompt_id}")
if prompt_id in history.json():
output = history.json()[prompt_id]["outputs"]
for node_id, node_output in output.items():
if "images" in node_output:
image_info = node_output["images"][0]
return f"http://127.0.0.1:8188/view?filename={image_info['filename']}"
time.sleep(1)
# Usage example
url = generate_image("a beautiful sunset over the ocean, photorealistic, 4k")
print(f"Image URL: {url}")
Batch Processing
def batch_generate(prompts, output_dir="./output"):
"""Batch generate images"""
for i, prompt in enumerate(prompts):
url = generate_image(prompt)
# Download image
response = requests.get(url)
with open(f"{output_dir}/image_{i:04d}.png", "wb") as f:
f.write(response.content)
print(f"Generated: {output_dir}/image_{i:04d}.png")
Approach 2: SGLang Diffusion High-Performance API
SGLang Diffusion provides an OpenAI-compatible API with significantly higher performance than ComfyUI.
Start the Service
python -m sglang.launch_server /
--model-path Tongyi-MAI/Z-Image-Turbo /
--port 30000 /
--mem-fraction-static 0.85
API Call
import requests
import json
def generate_with_sglang(prompt, size="1024x1024", n=1):
"""Generate image through SGLang Diffusion API"""
payload = {
"model": "Tongyi-MAI/Z-Image-Turbo",
"prompt": prompt,
"n": n,
"size": size
}
response = requests.post(
"http://127.0.0.1:30000/v1/images/generations",
headers={"Content-Type": "application/json"},
json=payload
)
return response.json()
# Usage example
result = generate_with_sglang(
"a modern minimalist living room with natural lighting",
size="1024x1024",
n=4
)
for img in result["data"]:
print(f"Image URL: {img['url']}")
Concurrent Batch Processing
import concurrent.futures
def batch_generate_sglang(prompts, max_workers=10):
"""High-concurrency batch generation"""
results = []
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
futures = [executor.submit(generate_with_sglang, p, n=1) for p in prompts]
for future in concurrent.futures.as_completed(futures):
results.append(future.result())
return results
Approach 3: Third-Party API Platform Integration
Replicate API
import replicate
output = replicate.run(
"tongyi-mai/z-image-turbo:latest",
input={
"prompt": "a serene landscape with mountains and a lake",
"width": 1024,
"height": 1024,
"num_inference_steps": 8,
"guidance_scale": 1.0
}
)
print(f"Image URL: {output}")
Eachlabs API
import requests
def generate_eachlabs(prompt, model="z-image-turbo"):
response = requests.post(
"https://api.eachlabs.ai/v1/images/generations",
headers={
"Authorization": "Bearer YOUR_API_KEY",
"Content-Type": "application/json"
},
json={
"model": f"zhipu-ai/{model}",
"prompt": prompt,
"n": 1,
"size": "1024x1024"
}
)
return response.json()
WaveSpeed API
import requests
def generate_wavespeed(prompt):
response = requests.post(
"https://api.wavespeed.ai/v1/z-image-turbo/generate",
headers={
"Authorization": "Bearer YOUR_API_KEY",
"Content-Type": "application/json"
},
json={
"prompt": prompt,
"width": 1024,
"height": 1024,
"steps": 8,
"cfg_scale": 1.0
}
)
return response.json()
Advanced Techniques
Asynchronous Task Queue
For large-scale batch processing, use asynchronous task queues:
import asyncio
import aiohttp
async def generate_async(session, prompt):
"""Asynchronously generate image"""
payload = {
"model": "Tongyi-MAI/Z-Image-Turbo",
"prompt": prompt,
"n": 1,
"size": "1024x1024"
}
async with session.post(
"http://127.0.0.1:30000/v1/images/generations",
json=payload
) as response:
return await response.json()
async def batch_async_generate(prompts, max_concurrent=20):
"""High-concurrency async batch generation"""
semaphore = asyncio.Semaphore(max_concurrent)
async def limited_generate(session, prompt):
async with semaphore:
return await generate_async(session, prompt)
async with aiohttp.ClientSession() as session:
tasks = [limited_generate(session, p) for p in prompts]
return await asyncio.gather(*tasks)
Error Handling and Retry
import time
from tenacity import retry, stop_after_attempt, wait_exponential
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=10))
def generate_with_retry(prompt):
"""Image generation with retry mechanism"""
try:
return generate_with_sglang(prompt)
except requests.exceptions.RequestException as e:
print(f"Error: {e}, retrying...")
raise
Result Caching
import hashlib
import os
import json
def cache_key(prompt, **kwargs):
"""Generate cache key"""
key_data = json.dumps({"prompt": prompt, **kwargs}, sort_keys=True)
return hashlib.md5(key_data.encode()).hexdigest()
def generate_with_cache(prompt, cache_dir="./cache"):
"""Image generation with caching"""
ck = cache_key(prompt)
cache_path = os.path.join(cache_dir, f"{ck}.json")
if os.path.exists(cache_path):
with open(cache_path, 'r') as f:
return json.load(f)
result = generate_with_sglang(prompt)
os.makedirs(cache_dir, exist_ok=True)
with open(cache_path, 'w') as f:
json.dump(result, f)
return result
Performance Comparison
| Approach | Per-Image Latency | Throughput (images/sec) | Deployment Cost | Use Case |
|---|---|---|---|---|
| ComfyUI API | 0.8-3s | 1-3 | Low (local) | Small scale, flexible customization |
| SGLang Diffusion | 0.2-0.5s | 10-30 | Medium (GPU server) | Large-scale batch, high concurrency |
| Replicate | 2-5s | Pay-per-use | Per-call | Quick prototyping, no maintenance |
| Eachlabs/WaveSpeed | 1-3s | Pay-per-use | Per-call | Production, no ops overhead |
Summary
Z-Image API integration offers multiple flexible options:
- Development phase: Use ComfyUI API for rapid prototyping
- Small-scale production: Self-host SGLang Diffusion for performance-cost balance
- Large-scale production: Third-party API platforms for elastic scaling
- Maximum performance: SGLang Diffusion + async queue + caching for high-throughput batch processing
Choose the right approach based on your specific needs — Z-Image delivers powerful AI image generation capabilities for any application.