Merge branch 'main' into space-txt2img
Browse files- README.md +7 -0
- app_init.py +18 -17
- frontend/src/lib/components/MediaListSwitcher.svelte +5 -3
- frontend/src/lib/components/PipelineOptions.svelte +5 -6
- frontend/src/lib/components/VideoInput.svelte +32 -27
- frontend/src/lib/types.ts +5 -0
- frontend/src/routes/+page.svelte +18 -29
- frontend/tailwind.config.js +1 -1
- pipelines/controlnelSD21Turbo.py +260 -0
- pipelines/controlnet.py +37 -6
- pipelines/controlnetLoraSD15.py +52 -19
- pipelines/controlnetLoraSDXL.py +44 -16
- pipelines/controlnetSDXLTurbo.py +268 -0
- pipelines/img2img.py +38 -6
- pipelines/img2imgSDXLTurbo.py +182 -0
- pipelines/txt2img.py +26 -3
- pipelines/txt2imgLora.py +30 -1
- pipelines/txt2imgLoraSDXL.py +33 -7
- requirements.txt +3 -2
README.md
CHANGED
@@ -127,6 +127,13 @@ docker build -t lcm-live .
|
|
127 |
docker run -ti -p 7860:7860 --gpus all lcm-live
|
128 |
```
|
129 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
or with environment variables
|
131 |
|
132 |
```bash
|
|
|
127 |
docker run -ti -p 7860:7860 --gpus all lcm-live
|
128 |
```
|
129 |
|
130 |
+
reuse models data from host to avoid downloading them again, you can change `~/.cache/huggingface` to any other directory, but if you use hugingface-cli locally, you can share the same cache
|
131 |
+
|
132 |
+
```bash
|
133 |
+
docker run -ti -p 7860:7860 -e HF_HOME=/data -v ~/.cache/huggingface:/data --gpus all lcm-live
|
134 |
+
```
|
135 |
+
|
136 |
+
|
137 |
or with environment variables
|
138 |
|
139 |
```bash
|
app_init.py
CHANGED
@@ -3,6 +3,7 @@ from fastapi.responses import StreamingResponse, JSONResponse
|
|
3 |
from fastapi.middleware.cors import CORSMiddleware
|
4 |
from fastapi.staticfiles import StaticFiles
|
5 |
from fastapi import Request
|
|
|
6 |
|
7 |
import logging
|
8 |
import traceback
|
@@ -13,6 +14,7 @@ import time
|
|
13 |
from types import SimpleNamespace
|
14 |
from util import pil_to_frame, bytes_to_pil, is_firefox
|
15 |
import asyncio
|
|
|
16 |
|
17 |
|
18 |
def init_app(app: FastAPI, user_data: UserData, args: Args, pipeline):
|
@@ -41,11 +43,7 @@ def init_app(app: FastAPI, user_data: UserData, args: Args, pipeline):
|
|
41 |
await websocket.send_json(
|
42 |
{"status": "connected", "message": "Connected", "userId": str(user_id)}
|
43 |
)
|
44 |
-
await websocket.send_json(
|
45 |
-
{
|
46 |
-
"status": "send_frame",
|
47 |
-
}
|
48 |
-
)
|
49 |
await handle_websocket_data(user_id, websocket)
|
50 |
except WebSocketDisconnect as e:
|
51 |
logging.error(f"WebSocket Error: {e}, {user_id}")
|
@@ -71,13 +69,12 @@ def init_app(app: FastAPI, user_data: UserData, args: Args, pipeline):
|
|
71 |
params = SimpleNamespace(**params.dict())
|
72 |
if info.input_mode == "image":
|
73 |
image_data = await websocket.receive_bytes()
|
|
|
|
|
|
|
74 |
params.image = bytes_to_pil(image_data)
|
75 |
await user_data.update_data(user_id, params)
|
76 |
-
await websocket.send_json(
|
77 |
-
{
|
78 |
-
"status": "wait",
|
79 |
-
}
|
80 |
-
)
|
81 |
if args.timeout > 0 and time.time() - last_time > args.timeout:
|
82 |
await websocket.send_json(
|
83 |
{
|
@@ -110,11 +107,7 @@ def init_app(app: FastAPI, user_data: UserData, args: Args, pipeline):
|
|
110 |
while True:
|
111 |
params = await user_data.get_latest_data(user_id)
|
112 |
if not vars(params) or params.__dict__ == last_params.__dict__:
|
113 |
-
await websocket.send_json(
|
114 |
-
{
|
115 |
-
"status": "send_frame",
|
116 |
-
}
|
117 |
-
)
|
118 |
await asyncio.sleep(0.1)
|
119 |
continue
|
120 |
|
@@ -143,14 +136,22 @@ def init_app(app: FastAPI, user_data: UserData, args: Args, pipeline):
|
|
143 |
# route to setup frontend
|
144 |
@app.get("/settings")
|
145 |
async def settings():
|
146 |
-
|
|
|
|
|
|
|
|
|
147 |
input_params = pipeline.InputParams.schema()
|
148 |
return JSONResponse(
|
149 |
{
|
150 |
-
"info":
|
151 |
"input_params": input_params,
|
152 |
"max_queue_size": args.max_queue_size,
|
|
|
153 |
}
|
154 |
)
|
155 |
|
|
|
|
|
|
|
156 |
app.mount("/", StaticFiles(directory="public", html=True), name="public")
|
|
|
3 |
from fastapi.middleware.cors import CORSMiddleware
|
4 |
from fastapi.staticfiles import StaticFiles
|
5 |
from fastapi import Request
|
6 |
+
import markdown2
|
7 |
|
8 |
import logging
|
9 |
import traceback
|
|
|
14 |
from types import SimpleNamespace
|
15 |
from util import pil_to_frame, bytes_to_pil, is_firefox
|
16 |
import asyncio
|
17 |
+
import os
|
18 |
|
19 |
|
20 |
def init_app(app: FastAPI, user_data: UserData, args: Args, pipeline):
|
|
|
43 |
await websocket.send_json(
|
44 |
{"status": "connected", "message": "Connected", "userId": str(user_id)}
|
45 |
)
|
46 |
+
await websocket.send_json({"status": "send_frame"})
|
|
|
|
|
|
|
|
|
47 |
await handle_websocket_data(user_id, websocket)
|
48 |
except WebSocketDisconnect as e:
|
49 |
logging.error(f"WebSocket Error: {e}, {user_id}")
|
|
|
69 |
params = SimpleNamespace(**params.dict())
|
70 |
if info.input_mode == "image":
|
71 |
image_data = await websocket.receive_bytes()
|
72 |
+
if len(image_data) == 0:
|
73 |
+
await websocket.send_json({"status": "send_frame"})
|
74 |
+
continue
|
75 |
params.image = bytes_to_pil(image_data)
|
76 |
await user_data.update_data(user_id, params)
|
77 |
+
await websocket.send_json({"status": "wait"})
|
|
|
|
|
|
|
|
|
78 |
if args.timeout > 0 and time.time() - last_time > args.timeout:
|
79 |
await websocket.send_json(
|
80 |
{
|
|
|
107 |
while True:
|
108 |
params = await user_data.get_latest_data(user_id)
|
109 |
if not vars(params) or params.__dict__ == last_params.__dict__:
|
110 |
+
await websocket.send_json({"status": "send_frame"})
|
|
|
|
|
|
|
|
|
111 |
await asyncio.sleep(0.1)
|
112 |
continue
|
113 |
|
|
|
136 |
# route to setup frontend
|
137 |
@app.get("/settings")
|
138 |
async def settings():
|
139 |
+
info_schema = pipeline.Info.schema()
|
140 |
+
info = pipeline.Info()
|
141 |
+
if info.page_content:
|
142 |
+
page_content = markdown2.markdown(info.page_content)
|
143 |
+
|
144 |
input_params = pipeline.InputParams.schema()
|
145 |
return JSONResponse(
|
146 |
{
|
147 |
+
"info": info_schema,
|
148 |
"input_params": input_params,
|
149 |
"max_queue_size": args.max_queue_size,
|
150 |
+
"page_content": page_content if info.page_content else "",
|
151 |
}
|
152 |
)
|
153 |
|
154 |
+
if not os.path.exists("public"):
|
155 |
+
os.makedirs("public")
|
156 |
+
|
157 |
app.mount("/", StaticFiles(directory="public", html=True), name="public")
|
frontend/src/lib/components/MediaListSwitcher.svelte
CHANGED
@@ -18,17 +18,19 @@
|
|
18 |
<div class="flex items-center justify-center text-xs">
|
19 |
<button
|
20 |
title="Share your screen"
|
21 |
-
class="border-1 my-1
|
22 |
on:click={() => mediaStreamActions.startScreenCapture()}
|
23 |
>
|
24 |
-
<
|
|
|
|
|
25 |
</button>
|
26 |
{#if $mediaDevices}
|
27 |
<select
|
28 |
bind:value={deviceId}
|
29 |
on:change={() => mediaStreamActions.switchCamera(deviceId)}
|
30 |
id="devices-list"
|
31 |
-
class="border-1 block cursor-pointer rounded-md border-gray-800 border-opacity-50 bg-slate-100 bg-opacity-30 p-
|
32 |
>
|
33 |
{#each $mediaDevices as device, i}
|
34 |
<option value={device.deviceId}>{device.label}</option>
|
|
|
18 |
<div class="flex items-center justify-center text-xs">
|
19 |
<button
|
20 |
title="Share your screen"
|
21 |
+
class="border-1 my-1 flex cursor-pointer gap-1 rounded-md border-gray-500 border-opacity-50 bg-slate-100 bg-opacity-30 p-1 font-medium text-white"
|
22 |
on:click={() => mediaStreamActions.startScreenCapture()}
|
23 |
>
|
24 |
+
<span>Share</span>
|
25 |
+
|
26 |
+
<Screen classList={''} />
|
27 |
</button>
|
28 |
{#if $mediaDevices}
|
29 |
<select
|
30 |
bind:value={deviceId}
|
31 |
on:change={() => mediaStreamActions.switchCamera(deviceId)}
|
32 |
id="devices-list"
|
33 |
+
class="border-1 block cursor-pointer rounded-md border-gray-800 border-opacity-50 bg-slate-100 bg-opacity-30 p-1 font-medium text-white"
|
34 |
>
|
35 |
{#each $mediaDevices as device, i}
|
36 |
<option value={device.deviceId}>{device.label}</option>
|
frontend/src/lib/components/PipelineOptions.svelte
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
<script lang="ts">
|
2 |
-
import {
|
3 |
-
import type { FieldProps } from '$lib/types';
|
4 |
import { FieldType } from '$lib/types';
|
5 |
import InputRange from './InputRange.svelte';
|
6 |
import SeedInput from './SeedInput.svelte';
|
@@ -9,10 +8,10 @@
|
|
9 |
import Selectlist from './Selectlist.svelte';
|
10 |
import { pipelineValues } from '$lib/store';
|
11 |
|
12 |
-
export let pipelineParams:
|
13 |
|
14 |
-
$: advanceOptions = pipelineParams?.filter((e) => e?.hide == true);
|
15 |
-
$: featuredOptions = pipelineParams?.filter((e) => e?.hide !== true);
|
16 |
</script>
|
17 |
|
18 |
<div class="flex flex-col gap-3">
|
@@ -37,7 +36,7 @@
|
|
37 |
<details>
|
38 |
<summary class="cursor-pointer font-medium">Advanced Options</summary>
|
39 |
<div
|
40 |
-
class="grid grid-cols-1 items-center gap-3 {pipelineParams.length > 5
|
41 |
? 'sm:grid-cols-2'
|
42 |
: ''}"
|
43 |
>
|
|
|
1 |
<script lang="ts">
|
2 |
+
import type { Fields } from '$lib/types';
|
|
|
3 |
import { FieldType } from '$lib/types';
|
4 |
import InputRange from './InputRange.svelte';
|
5 |
import SeedInput from './SeedInput.svelte';
|
|
|
8 |
import Selectlist from './Selectlist.svelte';
|
9 |
import { pipelineValues } from '$lib/store';
|
10 |
|
11 |
+
export let pipelineParams: Fields;
|
12 |
|
13 |
+
$: advanceOptions = Object.values(pipelineParams)?.filter((e) => e?.hide == true);
|
14 |
+
$: featuredOptions = Object.values(pipelineParams)?.filter((e) => e?.hide !== true);
|
15 |
</script>
|
16 |
|
17 |
<div class="flex flex-col gap-3">
|
|
|
36 |
<details>
|
37 |
<summary class="cursor-pointer font-medium">Advanced Options</summary>
|
38 |
<div
|
39 |
+
class="grid grid-cols-1 items-center gap-3 {Object.values(pipelineParams).length > 5
|
40 |
? 'sm:grid-cols-2'
|
41 |
: ''}"
|
42 |
>
|
frontend/src/lib/components/VideoInput.svelte
CHANGED
@@ -10,21 +10,24 @@
|
|
10 |
mediaDevices
|
11 |
} from '$lib/mediaStream';
|
12 |
import MediaListSwitcher from './MediaListSwitcher.svelte';
|
|
|
|
|
|
|
13 |
|
14 |
let videoEl: HTMLVideoElement;
|
15 |
let canvasEl: HTMLCanvasElement;
|
16 |
let ctx: CanvasRenderingContext2D;
|
17 |
let videoFrameCallbackId: number;
|
18 |
-
|
19 |
-
const HEIGHT = 768;
|
20 |
// ajust the throttle time to your needs
|
21 |
const THROTTLE_TIME = 1000 / 15;
|
22 |
let selectedDevice: string = '';
|
|
|
23 |
|
24 |
onMount(() => {
|
25 |
ctx = canvasEl.getContext('2d') as CanvasRenderingContext2D;
|
26 |
-
canvasEl.width =
|
27 |
-
canvasEl.height =
|
28 |
});
|
29 |
$: {
|
30 |
console.log(selectedDevice);
|
@@ -44,35 +47,34 @@
|
|
44 |
}
|
45 |
const videoWidth = videoEl.videoWidth;
|
46 |
const videoHeight = videoEl.videoHeight;
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
onFrameChangeStore.set({ blob });
|
56 |
videoFrameCallbackId = videoEl.requestVideoFrameCallback(onFrameChange);
|
57 |
}
|
58 |
|
59 |
-
$: if ($mediaStreamStatus == MediaStreamStatusEnum.CONNECTED) {
|
60 |
videoFrameCallbackId = videoEl.requestVideoFrameCallback(onFrameChange);
|
61 |
}
|
62 |
-
async function grapCropBlobImg(
|
63 |
-
video: HTMLVideoElement,
|
64 |
-
x: number,
|
65 |
-
y: number,
|
66 |
-
width: number,
|
67 |
-
height: number
|
68 |
-
) {
|
69 |
-
const canvas = new OffscreenCanvas(width, height);
|
70 |
-
|
71 |
-
const ctx = canvas.getContext('2d') as OffscreenCanvasRenderingContext2D;
|
72 |
-
ctx.drawImage(video, x, y, width, height, 0, 0, width, height);
|
73 |
-
const blob = await canvas.convertToBlob({ type: 'image/jpeg', quality: 1 });
|
74 |
-
return blob;
|
75 |
-
}
|
76 |
</script>
|
77 |
|
78 |
<div class="relative mx-auto max-w-lg overflow-hidden rounded-lg border border-slate-300">
|
@@ -85,6 +87,9 @@
|
|
85 |
<video
|
86 |
class="pointer-events-none aspect-square w-full object-cover"
|
87 |
bind:this={videoEl}
|
|
|
|
|
|
|
88 |
playsinline
|
89 |
autoplay
|
90 |
muted
|
|
|
10 |
mediaDevices
|
11 |
} from '$lib/mediaStream';
|
12 |
import MediaListSwitcher from './MediaListSwitcher.svelte';
|
13 |
+
export let width = 512;
|
14 |
+
export let height = 512;
|
15 |
+
const size = { width, height };
|
16 |
|
17 |
let videoEl: HTMLVideoElement;
|
18 |
let canvasEl: HTMLCanvasElement;
|
19 |
let ctx: CanvasRenderingContext2D;
|
20 |
let videoFrameCallbackId: number;
|
21 |
+
|
|
|
22 |
// ajust the throttle time to your needs
|
23 |
const THROTTLE_TIME = 1000 / 15;
|
24 |
let selectedDevice: string = '';
|
25 |
+
let videoIsReady = false;
|
26 |
|
27 |
onMount(() => {
|
28 |
ctx = canvasEl.getContext('2d') as CanvasRenderingContext2D;
|
29 |
+
canvasEl.width = size.width;
|
30 |
+
canvasEl.height = size.height;
|
31 |
});
|
32 |
$: {
|
33 |
console.log(selectedDevice);
|
|
|
47 |
}
|
48 |
const videoWidth = videoEl.videoWidth;
|
49 |
const videoHeight = videoEl.videoHeight;
|
50 |
+
let height0 = videoHeight;
|
51 |
+
let width0 = videoWidth;
|
52 |
+
let x0 = 0;
|
53 |
+
let y0 = 0;
|
54 |
+
if (videoWidth > videoHeight) {
|
55 |
+
width0 = videoHeight;
|
56 |
+
x0 = (videoWidth - videoHeight) / 2;
|
57 |
+
} else {
|
58 |
+
height0 = videoWidth;
|
59 |
+
y0 = (videoHeight - videoWidth) / 2;
|
60 |
+
}
|
61 |
+
ctx.drawImage(videoEl, x0, y0, width0, height0, 0, 0, size.width, size.height);
|
62 |
+
const blob = await new Promise<Blob>((resolve) => {
|
63 |
+
canvasEl.toBlob(
|
64 |
+
(blob) => {
|
65 |
+
resolve(blob as Blob);
|
66 |
+
},
|
67 |
+
'image/jpeg',
|
68 |
+
1
|
69 |
+
);
|
70 |
+
});
|
71 |
onFrameChangeStore.set({ blob });
|
72 |
videoFrameCallbackId = videoEl.requestVideoFrameCallback(onFrameChange);
|
73 |
}
|
74 |
|
75 |
+
$: if ($mediaStreamStatus == MediaStreamStatusEnum.CONNECTED && videoIsReady) {
|
76 |
videoFrameCallbackId = videoEl.requestVideoFrameCallback(onFrameChange);
|
77 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
</script>
|
79 |
|
80 |
<div class="relative mx-auto max-w-lg overflow-hidden rounded-lg border border-slate-300">
|
|
|
87 |
<video
|
88 |
class="pointer-events-none aspect-square w-full object-cover"
|
89 |
bind:this={videoEl}
|
90 |
+
on:loadeddata={() => {
|
91 |
+
videoIsReady = true;
|
92 |
+
}}
|
93 |
playsinline
|
94 |
autoplay
|
95 |
muted
|
frontend/src/lib/types.ts
CHANGED
@@ -11,6 +11,11 @@ export const enum PipelineMode {
|
|
11 |
TEXT = "text",
|
12 |
}
|
13 |
|
|
|
|
|
|
|
|
|
|
|
14 |
export interface FieldProps {
|
15 |
default: number | string;
|
16 |
max?: number;
|
|
|
11 |
TEXT = "text",
|
12 |
}
|
13 |
|
14 |
+
|
15 |
+
export interface Fields {
|
16 |
+
[key: string]: FieldProps;
|
17 |
+
}
|
18 |
+
|
19 |
export interface FieldProps {
|
20 |
default: number | string;
|
21 |
max?: number;
|
frontend/src/routes/+page.svelte
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
<script lang="ts">
|
2 |
import { onMount } from 'svelte';
|
3 |
-
import type {
|
4 |
import { PipelineMode } from '$lib/types';
|
5 |
import ImagePlayer from '$lib/components/ImagePlayer.svelte';
|
6 |
import VideoInput from '$lib/components/VideoInput.svelte';
|
@@ -11,8 +11,9 @@
|
|
11 |
import { mediaStreamActions, onFrameChangeStore } from '$lib/mediaStream';
|
12 |
import { getPipelineValues, deboucedPipelineValues } from '$lib/store';
|
13 |
|
14 |
-
let pipelineParams:
|
15 |
let pipelineInfo: PipelineInfo;
|
|
|
16 |
let isImageMode: boolean = false;
|
17 |
let maxQueueSize: number = 0;
|
18 |
let currentQueueSize: number = 0;
|
@@ -22,11 +23,12 @@
|
|
22 |
|
23 |
async function getSettings() {
|
24 |
const settings = await fetch('/settings').then((r) => r.json());
|
25 |
-
pipelineParams =
|
26 |
pipelineInfo = settings.info.properties;
|
27 |
isImageMode = pipelineInfo.input_mode.default === PipelineMode.IMAGE;
|
28 |
maxQueueSize = settings.max_queue_size;
|
29 |
-
|
|
|
30 |
if (maxQueueSize > 0) {
|
31 |
getQueueSize();
|
32 |
setInterval(() => {
|
@@ -68,33 +70,17 @@
|
|
68 |
}
|
69 |
</script>
|
70 |
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
<main class="container mx-auto flex max-w-5xl flex-col gap-3 px-4 py-4">
|
72 |
<article class="text-center">
|
73 |
-
|
74 |
-
|
75 |
-
<h3 class="text-xl font-bold">{pipelineInfo?.title?.default}</h3>
|
76 |
{/if}
|
77 |
-
<p class="text-sm">
|
78 |
-
This demo showcases
|
79 |
-
<a
|
80 |
-
href="https://huggingface.co/blog/lcm_lora"
|
81 |
-
target="_blank"
|
82 |
-
class="text-blue-500 underline hover:no-underline">LCM LoRA</a
|
83 |
-
>
|
84 |
-
Image to Image pipeline using
|
85 |
-
<a
|
86 |
-
href="https://huggingface.co/docs/diffusers/main/en/using-diffusers/lcm#performing-inference-with-lcm"
|
87 |
-
target="_blank"
|
88 |
-
class="text-blue-500 underline hover:no-underline">Diffusers</a
|
89 |
-
> with a MJPEG stream server.
|
90 |
-
</p>
|
91 |
-
<p class="text-sm text-gray-500">
|
92 |
-
Change the prompt to generate different images, accepts <a
|
93 |
-
href="https://github.com/damian0815/compel/blob/main/doc/syntax.md"
|
94 |
-
target="_blank"
|
95 |
-
class="text-blue-500 underline hover:no-underline">Compel</a
|
96 |
-
> syntax.
|
97 |
-
</p>
|
98 |
{#if maxQueueSize > 0}
|
99 |
<p class="text-sm">
|
100 |
There are <span id="queue_size" class="font-bold">{currentQueueSize}</span>
|
@@ -111,7 +97,10 @@
|
|
111 |
<article class="my-3 grid grid-cols-1 gap-3 sm:grid-cols-2">
|
112 |
{#if isImageMode}
|
113 |
<div class="sm:col-start-1">
|
114 |
-
<VideoInput
|
|
|
|
|
|
|
115 |
</div>
|
116 |
{/if}
|
117 |
<div class={isImageMode ? 'sm:col-start-2' : 'col-span-2'}>
|
|
|
1 |
<script lang="ts">
|
2 |
import { onMount } from 'svelte';
|
3 |
+
import type { Fields, PipelineInfo } from '$lib/types';
|
4 |
import { PipelineMode } from '$lib/types';
|
5 |
import ImagePlayer from '$lib/components/ImagePlayer.svelte';
|
6 |
import VideoInput from '$lib/components/VideoInput.svelte';
|
|
|
11 |
import { mediaStreamActions, onFrameChangeStore } from '$lib/mediaStream';
|
12 |
import { getPipelineValues, deboucedPipelineValues } from '$lib/store';
|
13 |
|
14 |
+
let pipelineParams: Fields;
|
15 |
let pipelineInfo: PipelineInfo;
|
16 |
+
let pageContent: string;
|
17 |
let isImageMode: boolean = false;
|
18 |
let maxQueueSize: number = 0;
|
19 |
let currentQueueSize: number = 0;
|
|
|
23 |
|
24 |
async function getSettings() {
|
25 |
const settings = await fetch('/settings').then((r) => r.json());
|
26 |
+
pipelineParams = settings.input_params.properties;
|
27 |
pipelineInfo = settings.info.properties;
|
28 |
isImageMode = pipelineInfo.input_mode.default === PipelineMode.IMAGE;
|
29 |
maxQueueSize = settings.max_queue_size;
|
30 |
+
pageContent = settings.page_content;
|
31 |
+
console.log(pipelineParams);
|
32 |
if (maxQueueSize > 0) {
|
33 |
getQueueSize();
|
34 |
setInterval(() => {
|
|
|
70 |
}
|
71 |
</script>
|
72 |
|
73 |
+
<svelte:head>
|
74 |
+
<script
|
75 |
+
src="https://cdnjs.cloudflare.com/ajax/libs/iframe-resizer/4.3.9/iframeResizer.contentWindow.min.js"
|
76 |
+
></script>
|
77 |
+
</svelte:head>
|
78 |
+
|
79 |
<main class="container mx-auto flex max-w-5xl flex-col gap-3 px-4 py-4">
|
80 |
<article class="text-center">
|
81 |
+
{#if pageContent}
|
82 |
+
{@html pageContent}
|
|
|
83 |
{/if}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
{#if maxQueueSize > 0}
|
85 |
<p class="text-sm">
|
86 |
There are <span id="queue_size" class="font-bold">{currentQueueSize}</span>
|
|
|
97 |
<article class="my-3 grid grid-cols-1 gap-3 sm:grid-cols-2">
|
98 |
{#if isImageMode}
|
99 |
<div class="sm:col-start-1">
|
100 |
+
<VideoInput
|
101 |
+
width={Number(pipelineParams.width.default)}
|
102 |
+
height={Number(pipelineParams.height.default)}
|
103 |
+
></VideoInput>
|
104 |
</div>
|
105 |
{/if}
|
106 |
<div class={isImageMode ? 'sm:col-start-2' : 'col-span-2'}>
|
frontend/tailwind.config.js
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
/** @type {import('tailwindcss').Config} */
|
2 |
export default {
|
3 |
-
content: ['./src/**/*.{html,js,svelte,ts}'],
|
4 |
theme: {
|
5 |
extend: {}
|
6 |
},
|
|
|
1 |
/** @type {import('tailwindcss').Config} */
|
2 |
export default {
|
3 |
+
content: ['./src/**/*.{html,js,svelte,ts}', '../pipelines/**/*.py'],
|
4 |
theme: {
|
5 |
extend: {}
|
6 |
},
|
pipelines/controlnelSD21Turbo.py
ADDED
@@ -0,0 +1,260 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from diffusers import (
|
2 |
+
StableDiffusionControlNetImg2ImgPipeline,
|
3 |
+
ControlNetModel,
|
4 |
+
LCMScheduler,
|
5 |
+
AutoencoderTiny,
|
6 |
+
)
|
7 |
+
from compel import Compel
|
8 |
+
import torch
|
9 |
+
from pipelines.utils.canny_gpu import SobelOperator
|
10 |
+
|
11 |
+
try:
|
12 |
+
import intel_extension_for_pytorch as ipex # type: ignore
|
13 |
+
except:
|
14 |
+
pass
|
15 |
+
|
16 |
+
import psutil
|
17 |
+
from config import Args
|
18 |
+
from pydantic import BaseModel, Field
|
19 |
+
from PIL import Image
|
20 |
+
import math
|
21 |
+
import time
|
22 |
+
|
23 |
+
#
|
24 |
+
taesd_model = "madebyollin/taesd"
|
25 |
+
controlnet_model = "thibaud/controlnet-sd21-canny-diffusers"
|
26 |
+
base_model = "stabilityai/sd-turbo"
|
27 |
+
|
28 |
+
default_prompt = "Portrait of The Joker halloween costume, face painting, with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
|
29 |
+
page_content = """
|
30 |
+
<h1 class="text-3xl font-bold">Real-Time SDv2.1 Turbo</h1>
|
31 |
+
<h3 class="text-xl font-bold">Image-to-Image ControlNet</h3>
|
32 |
+
<p class="text-sm">
|
33 |
+
This demo showcases
|
34 |
+
<a
|
35 |
+
href="https://huggingface.co/stabilityai/sd-turbo"
|
36 |
+
target="_blank"
|
37 |
+
class="text-blue-500 underline hover:no-underline">SD Turbo</a>
|
38 |
+
Image to Image pipeline using
|
39 |
+
<a
|
40 |
+
href="https://huggingface.co/docs/diffusers/main/en/using-diffusers/sdxl_turbo"
|
41 |
+
target="_blank"
|
42 |
+
class="text-blue-500 underline hover:no-underline">Diffusers</a
|
43 |
+
> with a MJPEG stream server.
|
44 |
+
</p>
|
45 |
+
<p class="text-sm text-gray-500">
|
46 |
+
Change the prompt to generate different images, accepts <a
|
47 |
+
href="https://github.com/damian0815/compel/blob/main/doc/syntax.md"
|
48 |
+
target="_blank"
|
49 |
+
class="text-blue-500 underline hover:no-underline">Compel</a
|
50 |
+
> syntax.
|
51 |
+
</p>
|
52 |
+
"""
|
53 |
+
|
54 |
+
|
55 |
+
class Pipeline:
|
56 |
+
class Info(BaseModel):
|
57 |
+
name: str = "controlnet+sd15Turbo"
|
58 |
+
title: str = "SDv1.5 Turbo + Controlnet"
|
59 |
+
description: str = "Generates an image from a text prompt"
|
60 |
+
input_mode: str = "image"
|
61 |
+
page_content: str = page_content
|
62 |
+
|
63 |
+
class InputParams(BaseModel):
|
64 |
+
prompt: str = Field(
|
65 |
+
default_prompt,
|
66 |
+
title="Prompt",
|
67 |
+
field="textarea",
|
68 |
+
id="prompt",
|
69 |
+
)
|
70 |
+
seed: int = Field(
|
71 |
+
4402026899276587, min=0, title="Seed", field="seed", hide=True, id="seed"
|
72 |
+
)
|
73 |
+
steps: int = Field(
|
74 |
+
1, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
|
75 |
+
)
|
76 |
+
width: int = Field(
|
77 |
+
512, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
|
78 |
+
)
|
79 |
+
height: int = Field(
|
80 |
+
512, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
|
81 |
+
)
|
82 |
+
guidance_scale: float = Field(
|
83 |
+
1.21,
|
84 |
+
min=0,
|
85 |
+
max=10,
|
86 |
+
step=0.001,
|
87 |
+
title="Guidance Scale",
|
88 |
+
field="range",
|
89 |
+
hide=True,
|
90 |
+
id="guidance_scale",
|
91 |
+
)
|
92 |
+
strength: float = Field(
|
93 |
+
0.8,
|
94 |
+
min=0.10,
|
95 |
+
max=1.0,
|
96 |
+
step=0.001,
|
97 |
+
title="Strength",
|
98 |
+
field="range",
|
99 |
+
hide=True,
|
100 |
+
id="strength",
|
101 |
+
)
|
102 |
+
controlnet_scale: float = Field(
|
103 |
+
0.2,
|
104 |
+
min=0,
|
105 |
+
max=1.0,
|
106 |
+
step=0.001,
|
107 |
+
title="Controlnet Scale",
|
108 |
+
field="range",
|
109 |
+
hide=True,
|
110 |
+
id="controlnet_scale",
|
111 |
+
)
|
112 |
+
controlnet_start: float = Field(
|
113 |
+
0.0,
|
114 |
+
min=0,
|
115 |
+
max=1.0,
|
116 |
+
step=0.001,
|
117 |
+
title="Controlnet Start",
|
118 |
+
field="range",
|
119 |
+
hide=True,
|
120 |
+
id="controlnet_start",
|
121 |
+
)
|
122 |
+
controlnet_end: float = Field(
|
123 |
+
1.0,
|
124 |
+
min=0,
|
125 |
+
max=1.0,
|
126 |
+
step=0.001,
|
127 |
+
title="Controlnet End",
|
128 |
+
field="range",
|
129 |
+
hide=True,
|
130 |
+
id="controlnet_end",
|
131 |
+
)
|
132 |
+
canny_low_threshold: float = Field(
|
133 |
+
0.31,
|
134 |
+
min=0,
|
135 |
+
max=1.0,
|
136 |
+
step=0.001,
|
137 |
+
title="Canny Low Threshold",
|
138 |
+
field="range",
|
139 |
+
hide=True,
|
140 |
+
id="canny_low_threshold",
|
141 |
+
)
|
142 |
+
canny_high_threshold: float = Field(
|
143 |
+
0.125,
|
144 |
+
min=0,
|
145 |
+
max=1.0,
|
146 |
+
step=0.001,
|
147 |
+
title="Canny High Threshold",
|
148 |
+
field="range",
|
149 |
+
hide=True,
|
150 |
+
id="canny_high_threshold",
|
151 |
+
)
|
152 |
+
debug_canny: bool = Field(
|
153 |
+
False,
|
154 |
+
title="Debug Canny",
|
155 |
+
field="checkbox",
|
156 |
+
hide=True,
|
157 |
+
id="debug_canny",
|
158 |
+
)
|
159 |
+
|
160 |
+
def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype):
|
161 |
+
controlnet_canny = ControlNetModel.from_pretrained(
|
162 |
+
controlnet_model, torch_dtype=torch_dtype
|
163 |
+
).to(device)
|
164 |
+
|
165 |
+
self.pipes = {}
|
166 |
+
|
167 |
+
if args.safety_checker:
|
168 |
+
self.pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
|
169 |
+
base_model,
|
170 |
+
controlnet=controlnet_canny,
|
171 |
+
)
|
172 |
+
else:
|
173 |
+
self.pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
|
174 |
+
base_model,
|
175 |
+
controlnet=controlnet_canny,
|
176 |
+
safety_checker=None,
|
177 |
+
)
|
178 |
+
|
179 |
+
if args.use_taesd:
|
180 |
+
self.pipe.vae = AutoencoderTiny.from_pretrained(
|
181 |
+
taesd_model, torch_dtype=torch_dtype, use_safetensors=True
|
182 |
+
).to(device)
|
183 |
+
self.canny_torch = SobelOperator(device=device)
|
184 |
+
|
185 |
+
self.pipe.scheduler = LCMScheduler.from_config(self.pipe.scheduler.config)
|
186 |
+
self.pipe.set_progress_bar_config(disable=True)
|
187 |
+
self.pipe.to(device=device, dtype=torch_dtype).to(device)
|
188 |
+
if device.type != "mps":
|
189 |
+
self.pipe.unet.to(memory_format=torch.channels_last)
|
190 |
+
|
191 |
+
if psutil.virtual_memory().total < 64 * 1024**3:
|
192 |
+
self.pipe.enable_attention_slicing()
|
193 |
+
|
194 |
+
self.pipe.compel_proc = Compel(
|
195 |
+
tokenizer=self.pipe.tokenizer,
|
196 |
+
text_encoder=self.pipe.text_encoder,
|
197 |
+
truncate_long_prompts=True,
|
198 |
+
)
|
199 |
+
if args.use_taesd:
|
200 |
+
self.pipe.vae = AutoencoderTiny.from_pretrained(
|
201 |
+
taesd_model, torch_dtype=torch_dtype, use_safetensors=True
|
202 |
+
).to(device)
|
203 |
+
|
204 |
+
if args.torch_compile:
|
205 |
+
self.pipe.unet = torch.compile(
|
206 |
+
self.pipe.unet, mode="reduce-overhead", fullgraph=True
|
207 |
+
)
|
208 |
+
self.pipe.vae = torch.compile(
|
209 |
+
self.pipe.vae, mode="reduce-overhead", fullgraph=True
|
210 |
+
)
|
211 |
+
self.pipe(
|
212 |
+
prompt="warmup",
|
213 |
+
image=[Image.new("RGB", (768, 768))],
|
214 |
+
control_image=[Image.new("RGB", (768, 768))],
|
215 |
+
)
|
216 |
+
|
217 |
+
def predict(self, params: "Pipeline.InputParams") -> Image.Image:
|
218 |
+
generator = torch.manual_seed(params.seed)
|
219 |
+
prompt_embeds = self.pipe.compel_proc(params.prompt)
|
220 |
+
control_image = self.canny_torch(
|
221 |
+
params.image, params.canny_low_threshold, params.canny_high_threshold
|
222 |
+
)
|
223 |
+
steps = params.steps
|
224 |
+
strength = params.strength
|
225 |
+
if int(steps * strength) < 1:
|
226 |
+
steps = math.ceil(1 / max(0.10, strength))
|
227 |
+
last_time = time.time()
|
228 |
+
results = self.pipe(
|
229 |
+
image=params.image,
|
230 |
+
control_image=control_image,
|
231 |
+
prompt_embeds=prompt_embeds,
|
232 |
+
generator=generator,
|
233 |
+
strength=strength,
|
234 |
+
num_inference_steps=steps,
|
235 |
+
guidance_scale=params.guidance_scale,
|
236 |
+
width=params.width,
|
237 |
+
height=params.height,
|
238 |
+
output_type="pil",
|
239 |
+
controlnet_conditioning_scale=params.controlnet_scale,
|
240 |
+
control_guidance_start=params.controlnet_start,
|
241 |
+
control_guidance_end=params.controlnet_end,
|
242 |
+
)
|
243 |
+
print(f"Time taken: {time.time() - last_time}")
|
244 |
+
|
245 |
+
nsfw_content_detected = (
|
246 |
+
results.nsfw_content_detected[0]
|
247 |
+
if "nsfw_content_detected" in results
|
248 |
+
else False
|
249 |
+
)
|
250 |
+
if nsfw_content_detected:
|
251 |
+
return None
|
252 |
+
result_image = results.images[0]
|
253 |
+
if params.debug_canny:
|
254 |
+
# paste control_image on top of result_image
|
255 |
+
w0, h0 = (200, 200)
|
256 |
+
control_image = control_image.resize((w0, h0))
|
257 |
+
w1, h1 = result_image.size
|
258 |
+
result_image.paste(control_image, (w1 - w0, h1 - h0))
|
259 |
+
|
260 |
+
return result_image
|
pipelines/controlnet.py
CHANGED
@@ -16,12 +16,38 @@ import psutil
|
|
16 |
from config import Args
|
17 |
from pydantic import BaseModel, Field
|
18 |
from PIL import Image
|
|
|
19 |
|
20 |
base_model = "SimianLuo/LCM_Dreamshaper_v7"
|
21 |
taesd_model = "madebyollin/taesd"
|
22 |
controlnet_model = "lllyasviel/control_v11p_sd15_canny"
|
23 |
|
24 |
default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
|
27 |
class Pipeline:
|
@@ -30,6 +56,7 @@ class Pipeline:
|
|
30 |
title: str = "LCM + Controlnet"
|
31 |
description: str = "Generates an image from a text prompt"
|
32 |
input_mode: str = "image"
|
|
|
33 |
|
34 |
class InputParams(BaseModel):
|
35 |
prompt: str = Field(
|
@@ -42,13 +69,13 @@ class Pipeline:
|
|
42 |
2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
|
43 |
)
|
44 |
steps: int = Field(
|
45 |
-
4, min=
|
46 |
)
|
47 |
width: int = Field(
|
48 |
-
|
49 |
)
|
50 |
height: int = Field(
|
51 |
-
|
52 |
)
|
53 |
guidance_scale: float = Field(
|
54 |
0.2,
|
@@ -145,7 +172,7 @@ class Pipeline:
|
|
145 |
if args.use_taesd:
|
146 |
self.pipe.vae = AutoencoderTiny.from_pretrained(
|
147 |
taesd_model, torch_dtype=torch_dtype, use_safetensors=True
|
148 |
-
)
|
149 |
self.canny_torch = SobelOperator(device=device)
|
150 |
self.pipe.set_progress_bar_config(disable=True)
|
151 |
self.pipe.to(device=device, dtype=torch_dtype)
|
@@ -182,14 +209,18 @@ class Pipeline:
|
|
182 |
control_image = self.canny_torch(
|
183 |
params.image, params.canny_low_threshold, params.canny_high_threshold
|
184 |
)
|
|
|
|
|
|
|
|
|
185 |
|
186 |
results = self.pipe(
|
187 |
image=params.image,
|
188 |
control_image=control_image,
|
189 |
prompt_embeds=prompt_embeds,
|
190 |
generator=generator,
|
191 |
-
strength=
|
192 |
-
num_inference_steps=
|
193 |
guidance_scale=params.guidance_scale,
|
194 |
width=params.width,
|
195 |
height=params.height,
|
|
|
16 |
from config import Args
|
17 |
from pydantic import BaseModel, Field
|
18 |
from PIL import Image
|
19 |
+
import math
|
20 |
|
21 |
base_model = "SimianLuo/LCM_Dreamshaper_v7"
|
22 |
taesd_model = "madebyollin/taesd"
|
23 |
controlnet_model = "lllyasviel/control_v11p_sd15_canny"
|
24 |
|
25 |
default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
|
26 |
+
page_content = """
|
27 |
+
<h1 class="text-3xl font-bold">Real-Time Latent Consistency Model</h1>
|
28 |
+
<h3 class="text-xl font-bold">LCM + Controlnet Canny</h3>
|
29 |
+
<p class="text-sm">
|
30 |
+
This demo showcases
|
31 |
+
<a
|
32 |
+
href="https://huggingface.co/blog/lcm_lora"
|
33 |
+
target="_blank"
|
34 |
+
class="text-blue-500 underline hover:no-underline">LCM LoRA</a
|
35 |
+
>
|
36 |
+
ControlNet + Image to Image pipeline using
|
37 |
+
<a
|
38 |
+
href="https://huggingface.co/docs/diffusers/main/en/using-diffusers/lcm#performing-inference-with-lcm"
|
39 |
+
target="_blank"
|
40 |
+
class="text-blue-500 underline hover:no-underline">Diffusers</a
|
41 |
+
> with a MJPEG stream server.
|
42 |
+
</p>
|
43 |
+
<p class="text-sm text-gray-500">
|
44 |
+
Change the prompt to generate different images, accepts <a
|
45 |
+
href="https://github.com/damian0815/compel/blob/main/doc/syntax.md"
|
46 |
+
target="_blank"
|
47 |
+
class="text-blue-500 underline hover:no-underline">Compel</a
|
48 |
+
> syntax.
|
49 |
+
</p>
|
50 |
+
"""
|
51 |
|
52 |
|
53 |
class Pipeline:
|
|
|
56 |
title: str = "LCM + Controlnet"
|
57 |
description: str = "Generates an image from a text prompt"
|
58 |
input_mode: str = "image"
|
59 |
+
page_content: str = page_content
|
60 |
|
61 |
class InputParams(BaseModel):
|
62 |
prompt: str = Field(
|
|
|
69 |
2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
|
70 |
)
|
71 |
steps: int = Field(
|
72 |
+
4, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
|
73 |
)
|
74 |
width: int = Field(
|
75 |
+
768, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
|
76 |
)
|
77 |
height: int = Field(
|
78 |
+
768, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
|
79 |
)
|
80 |
guidance_scale: float = Field(
|
81 |
0.2,
|
|
|
172 |
if args.use_taesd:
|
173 |
self.pipe.vae = AutoencoderTiny.from_pretrained(
|
174 |
taesd_model, torch_dtype=torch_dtype, use_safetensors=True
|
175 |
+
).to(device)
|
176 |
self.canny_torch = SobelOperator(device=device)
|
177 |
self.pipe.set_progress_bar_config(disable=True)
|
178 |
self.pipe.to(device=device, dtype=torch_dtype)
|
|
|
209 |
control_image = self.canny_torch(
|
210 |
params.image, params.canny_low_threshold, params.canny_high_threshold
|
211 |
)
|
212 |
+
steps = params.steps
|
213 |
+
strength = params.strength
|
214 |
+
if int(steps * strength) < 1:
|
215 |
+
steps = math.ceil(1 / max(0.10, strength))
|
216 |
|
217 |
results = self.pipe(
|
218 |
image=params.image,
|
219 |
control_image=control_image,
|
220 |
prompt_embeds=prompt_embeds,
|
221 |
generator=generator,
|
222 |
+
strength=strength,
|
223 |
+
num_inference_steps=steps,
|
224 |
guidance_scale=params.guidance_scale,
|
225 |
width=params.width,
|
226 |
height=params.height,
|
pipelines/controlnetLoraSD15.py
CHANGED
@@ -2,6 +2,7 @@ from diffusers import (
|
|
2 |
StableDiffusionControlNetImg2ImgPipeline,
|
3 |
ControlNetModel,
|
4 |
LCMScheduler,
|
|
|
5 |
)
|
6 |
from compel import Compel
|
7 |
import torch
|
@@ -16,6 +17,7 @@ import psutil
|
|
16 |
from config import Args
|
17 |
from pydantic import BaseModel, Field
|
18 |
from PIL import Image
|
|
|
19 |
|
20 |
taesd_model = "madebyollin/taesd"
|
21 |
controlnet_model = "lllyasviel/control_v11p_sd15_canny"
|
@@ -26,17 +28,40 @@ base_models = {
|
|
26 |
"nitrosocke/mo-di-diffusion": "modern disney style",
|
27 |
}
|
28 |
lcm_lora_id = "latent-consistency/lcm-lora-sdv1-5"
|
29 |
-
|
30 |
-
|
31 |
default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
|
34 |
class Pipeline:
|
35 |
class Info(BaseModel):
|
36 |
name: str = "controlnet+loras+sd15"
|
37 |
-
title: str = "LCM + LoRA + Controlnet
|
38 |
description: str = "Generates an image from a text prompt"
|
39 |
input_mode: str = "image"
|
|
|
40 |
|
41 |
class InputParams(BaseModel):
|
42 |
prompt: str = Field(
|
@@ -45,24 +70,24 @@ class Pipeline:
|
|
45 |
field="textarea",
|
46 |
id="prompt",
|
47 |
)
|
48 |
-
|
49 |
"plasmo/woolitize",
|
50 |
title="Base Model",
|
51 |
values=list(base_models.keys()),
|
52 |
field="select",
|
53 |
-
id="
|
54 |
)
|
55 |
seed: int = Field(
|
56 |
2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
|
57 |
)
|
58 |
steps: int = Field(
|
59 |
-
4, min=
|
60 |
)
|
61 |
width: int = Field(
|
62 |
-
|
63 |
)
|
64 |
height: int = Field(
|
65 |
-
|
66 |
)
|
67 |
guidance_scale: float = Field(
|
68 |
0.2,
|
@@ -150,20 +175,20 @@ class Pipeline:
|
|
150 |
self.pipes = {}
|
151 |
|
152 |
if args.safety_checker:
|
153 |
-
for
|
154 |
pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
|
155 |
-
|
156 |
controlnet=controlnet_canny,
|
157 |
)
|
158 |
-
self.pipes[
|
159 |
else:
|
160 |
-
for
|
161 |
pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
|
162 |
-
|
163 |
safety_checker=None,
|
164 |
controlnet=controlnet_canny,
|
165 |
)
|
166 |
-
self.pipes[
|
167 |
|
168 |
self.canny_torch = SobelOperator(device=device)
|
169 |
|
@@ -177,6 +202,11 @@ class Pipeline:
|
|
177 |
if psutil.virtual_memory().total < 64 * 1024**3:
|
178 |
pipe.enable_attention_slicing()
|
179 |
|
|
|
|
|
|
|
|
|
|
|
180 |
# Load LCM LoRA
|
181 |
pipe.load_lora_weights(lcm_lora_id, adapter_name="lcm")
|
182 |
pipe.compel_proc = Compel(
|
@@ -199,23 +229,26 @@ class Pipeline:
|
|
199 |
|
200 |
def predict(self, params: "Pipeline.InputParams") -> Image.Image:
|
201 |
generator = torch.manual_seed(params.seed)
|
202 |
-
|
203 |
-
pipe = self.pipes[params.model_id]
|
204 |
|
205 |
-
activation_token = base_models[params.
|
206 |
prompt = f"{activation_token} {params.prompt}"
|
207 |
prompt_embeds = pipe.compel_proc(prompt)
|
208 |
control_image = self.canny_torch(
|
209 |
params.image, params.canny_low_threshold, params.canny_high_threshold
|
210 |
)
|
|
|
|
|
|
|
|
|
211 |
|
212 |
results = pipe(
|
213 |
image=params.image,
|
214 |
control_image=control_image,
|
215 |
prompt_embeds=prompt_embeds,
|
216 |
generator=generator,
|
217 |
-
strength=
|
218 |
-
num_inference_steps=
|
219 |
guidance_scale=params.guidance_scale,
|
220 |
width=params.width,
|
221 |
height=params.height,
|
|
|
2 |
StableDiffusionControlNetImg2ImgPipeline,
|
3 |
ControlNetModel,
|
4 |
LCMScheduler,
|
5 |
+
AutoencoderTiny,
|
6 |
)
|
7 |
from compel import Compel
|
8 |
import torch
|
|
|
17 |
from config import Args
|
18 |
from pydantic import BaseModel, Field
|
19 |
from PIL import Image
|
20 |
+
import math
|
21 |
|
22 |
taesd_model = "madebyollin/taesd"
|
23 |
controlnet_model = "lllyasviel/control_v11p_sd15_canny"
|
|
|
28 |
"nitrosocke/mo-di-diffusion": "modern disney style",
|
29 |
}
|
30 |
lcm_lora_id = "latent-consistency/lcm-lora-sdv1-5"
|
|
|
|
|
31 |
default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
|
32 |
+
page_content = """
|
33 |
+
<h1 class="text-3xl font-bold">Real-Time Latent Consistency Model SDv1.5</h1>
|
34 |
+
<h3 class="text-xl font-bold">LCM + LoRA + Controlnet + Canny</h3>
|
35 |
+
<p class="text-sm">
|
36 |
+
This demo showcases
|
37 |
+
<a
|
38 |
+
href="https://huggingface.co/blog/lcm_lora"
|
39 |
+
target="_blank"
|
40 |
+
class="text-blue-500 underline hover:no-underline">LCM LoRA</a>
|
41 |
+
+ ControlNet + Image to Imasge pipeline using
|
42 |
+
<a
|
43 |
+
href="https://huggingface.co/docs/diffusers/main/en/using-diffusers/lcm#performing-inference-with-lcm"
|
44 |
+
target="_blank"
|
45 |
+
class="text-blue-500 underline hover:no-underline">Diffusers</a
|
46 |
+
> with a MJPEG stream server.
|
47 |
+
</p>
|
48 |
+
<p class="text-sm text-gray-500">
|
49 |
+
Change the prompt to generate different images, accepts <a
|
50 |
+
href="https://github.com/damian0815/compel/blob/main/doc/syntax.md"
|
51 |
+
target="_blank"
|
52 |
+
class="text-blue-500 underline hover:no-underline">Compel</a
|
53 |
+
> syntax.
|
54 |
+
</p>
|
55 |
+
"""
|
56 |
|
57 |
|
58 |
class Pipeline:
|
59 |
class Info(BaseModel):
|
60 |
name: str = "controlnet+loras+sd15"
|
61 |
+
title: str = "LCM + LoRA + Controlnet"
|
62 |
description: str = "Generates an image from a text prompt"
|
63 |
input_mode: str = "image"
|
64 |
+
page_content: str = page_content
|
65 |
|
66 |
class InputParams(BaseModel):
|
67 |
prompt: str = Field(
|
|
|
70 |
field="textarea",
|
71 |
id="prompt",
|
72 |
)
|
73 |
+
base_model_id: str = Field(
|
74 |
"plasmo/woolitize",
|
75 |
title="Base Model",
|
76 |
values=list(base_models.keys()),
|
77 |
field="select",
|
78 |
+
id="base_model_id",
|
79 |
)
|
80 |
seed: int = Field(
|
81 |
2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
|
82 |
)
|
83 |
steps: int = Field(
|
84 |
+
4, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
|
85 |
)
|
86 |
width: int = Field(
|
87 |
+
768, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
|
88 |
)
|
89 |
height: int = Field(
|
90 |
+
768, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
|
91 |
)
|
92 |
guidance_scale: float = Field(
|
93 |
0.2,
|
|
|
175 |
self.pipes = {}
|
176 |
|
177 |
if args.safety_checker:
|
178 |
+
for base_model_id in base_models.keys():
|
179 |
pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
|
180 |
+
base_model_id,
|
181 |
controlnet=controlnet_canny,
|
182 |
)
|
183 |
+
self.pipes[base_model_id] = pipe
|
184 |
else:
|
185 |
+
for base_model_id in base_models.keys():
|
186 |
pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
|
187 |
+
base_model_id,
|
188 |
safety_checker=None,
|
189 |
controlnet=controlnet_canny,
|
190 |
)
|
191 |
+
self.pipes[base_model_id] = pipe
|
192 |
|
193 |
self.canny_torch = SobelOperator(device=device)
|
194 |
|
|
|
202 |
if psutil.virtual_memory().total < 64 * 1024**3:
|
203 |
pipe.enable_attention_slicing()
|
204 |
|
205 |
+
if args.use_taesd:
|
206 |
+
pipe.vae = AutoencoderTiny.from_pretrained(
|
207 |
+
taesd_model, torch_dtype=torch_dtype, use_safetensors=True
|
208 |
+
).to(device)
|
209 |
+
|
210 |
# Load LCM LoRA
|
211 |
pipe.load_lora_weights(lcm_lora_id, adapter_name="lcm")
|
212 |
pipe.compel_proc = Compel(
|
|
|
229 |
|
230 |
def predict(self, params: "Pipeline.InputParams") -> Image.Image:
|
231 |
generator = torch.manual_seed(params.seed)
|
232 |
+
pipe = self.pipes[params.base_model_id]
|
|
|
233 |
|
234 |
+
activation_token = base_models[params.base_model_id]
|
235 |
prompt = f"{activation_token} {params.prompt}"
|
236 |
prompt_embeds = pipe.compel_proc(prompt)
|
237 |
control_image = self.canny_torch(
|
238 |
params.image, params.canny_low_threshold, params.canny_high_threshold
|
239 |
)
|
240 |
+
steps = params.steps
|
241 |
+
strength = params.strength
|
242 |
+
if int(steps * strength) < 1:
|
243 |
+
steps = math.ceil(1 / max(0.10, strength))
|
244 |
|
245 |
results = pipe(
|
246 |
image=params.image,
|
247 |
control_image=control_image,
|
248 |
prompt_embeds=prompt_embeds,
|
249 |
generator=generator,
|
250 |
+
strength=strength,
|
251 |
+
num_inference_steps=steps,
|
252 |
guidance_scale=params.guidance_scale,
|
253 |
width=params.width,
|
254 |
height=params.height,
|
pipelines/controlnetLoraSDXL.py
CHANGED
@@ -3,6 +3,7 @@ from diffusers import (
|
|
3 |
ControlNetModel,
|
4 |
LCMScheduler,
|
5 |
AutoencoderKL,
|
|
|
6 |
)
|
7 |
from compel import Compel, ReturnedEmbeddingsType
|
8 |
import torch
|
@@ -17,30 +18,49 @@ import psutil
|
|
17 |
from config import Args
|
18 |
from pydantic import BaseModel, Field
|
19 |
from PIL import Image
|
|
|
20 |
|
21 |
controlnet_model = "diffusers/controlnet-canny-sdxl-1.0"
|
22 |
model_id = "stabilityai/stable-diffusion-xl-base-1.0"
|
23 |
lcm_lora_id = "latent-consistency/lcm-lora-sdxl"
|
24 |
-
|
25 |
-
# # base model with activation token, it will prepend the prompt with the activation token
|
26 |
-
base_models = {
|
27 |
-
"plasmo/woolitize": "woolitize",
|
28 |
-
"nitrosocke/Ghibli-Diffusion": "ghibli style",
|
29 |
-
"nitrosocke/mo-di-diffusion": "modern disney style",
|
30 |
-
}
|
31 |
-
# lcm_lora_id = "latent-consistency/lcm-lora-sdv1-5"
|
32 |
|
33 |
|
34 |
default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
|
35 |
default_negative_prompt = "blurry, low quality, render, 3D, oversaturated"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
|
38 |
class Pipeline:
|
39 |
class Info(BaseModel):
|
40 |
name: str = "controlnet+loras+sdxl"
|
41 |
-
title: str = "SDXL + LCM + LoRA + Controlnet
|
42 |
description: str = "Generates an image from a text prompt"
|
43 |
input_mode: str = "image"
|
|
|
44 |
|
45 |
class InputParams(BaseModel):
|
46 |
prompt: str = Field(
|
@@ -60,13 +80,13 @@ class Pipeline:
|
|
60 |
2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
|
61 |
)
|
62 |
steps: int = Field(
|
63 |
-
|
64 |
)
|
65 |
width: int = Field(
|
66 |
-
|
67 |
)
|
68 |
height: int = Field(
|
69 |
-
|
70 |
)
|
71 |
guidance_scale: float = Field(
|
72 |
1.0,
|
@@ -79,10 +99,10 @@ class Pipeline:
|
|
79 |
id="guidance_scale",
|
80 |
)
|
81 |
strength: float = Field(
|
82 |
-
|
83 |
min=0.25,
|
84 |
max=1.0,
|
85 |
-
step=0.
|
86 |
title="Strength",
|
87 |
field="range",
|
88 |
hide=True,
|
@@ -191,6 +211,10 @@ class Pipeline:
|
|
191 |
returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
|
192 |
requires_pooled=[False, True],
|
193 |
)
|
|
|
|
|
|
|
|
|
194 |
|
195 |
if args.torch_compile:
|
196 |
self.pipe.unet = torch.compile(
|
@@ -214,6 +238,10 @@ class Pipeline:
|
|
214 |
control_image = self.canny_torch(
|
215 |
params.image, params.canny_low_threshold, params.canny_high_threshold
|
216 |
)
|
|
|
|
|
|
|
|
|
217 |
|
218 |
results = self.pipe(
|
219 |
image=params.image,
|
@@ -223,8 +251,8 @@ class Pipeline:
|
|
223 |
negative_prompt_embeds=prompt_embeds[1:2],
|
224 |
negative_pooled_prompt_embeds=pooled_prompt_embeds[1:2],
|
225 |
generator=generator,
|
226 |
-
strength=
|
227 |
-
num_inference_steps=
|
228 |
guidance_scale=params.guidance_scale,
|
229 |
width=params.width,
|
230 |
height=params.height,
|
|
|
3 |
ControlNetModel,
|
4 |
LCMScheduler,
|
5 |
AutoencoderKL,
|
6 |
+
AutoencoderTiny,
|
7 |
)
|
8 |
from compel import Compel, ReturnedEmbeddingsType
|
9 |
import torch
|
|
|
18 |
from config import Args
|
19 |
from pydantic import BaseModel, Field
|
20 |
from PIL import Image
|
21 |
+
import math
|
22 |
|
23 |
controlnet_model = "diffusers/controlnet-canny-sdxl-1.0"
|
24 |
model_id = "stabilityai/stable-diffusion-xl-base-1.0"
|
25 |
lcm_lora_id = "latent-consistency/lcm-lora-sdxl"
|
26 |
+
taesd_model = "madebyollin/taesdxl"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
|
29 |
default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
|
30 |
default_negative_prompt = "blurry, low quality, render, 3D, oversaturated"
|
31 |
+
page_content = """
|
32 |
+
<h1 class="text-3xl font-bold">Real-Time Latent Consistency Model SDXL</h1>
|
33 |
+
<h3 class="text-xl font-bold">SDXL + LCM + LoRA + Controlnet</h3>
|
34 |
+
<p class="text-sm">
|
35 |
+
This demo showcases
|
36 |
+
<a
|
37 |
+
href="https://huggingface.co/blog/lcm_lora"
|
38 |
+
target="_blank"
|
39 |
+
class="text-blue-500 underline hover:no-underline">LCM LoRA</a>
|
40 |
+
+ SDXL + Controlnet + Image to Image pipeline using
|
41 |
+
<a
|
42 |
+
href="https://huggingface.co/docs/diffusers/main/en/using-diffusers/lcm#performing-inference-with-lcm"
|
43 |
+
target="_blank"
|
44 |
+
class="text-blue-500 underline hover:no-underline">Diffusers</a
|
45 |
+
> with a MJPEG stream server.
|
46 |
+
</p>
|
47 |
+
<p class="text-sm text-gray-500">
|
48 |
+
Change the prompt to generate different images, accepts <a
|
49 |
+
href="https://github.com/damian0815/compel/blob/main/doc/syntax.md"
|
50 |
+
target="_blank"
|
51 |
+
class="text-blue-500 underline hover:no-underline">Compel</a
|
52 |
+
> syntax.
|
53 |
+
</p>
|
54 |
+
"""
|
55 |
|
56 |
|
57 |
class Pipeline:
|
58 |
class Info(BaseModel):
|
59 |
name: str = "controlnet+loras+sdxl"
|
60 |
+
title: str = "SDXL + LCM + LoRA + Controlnet"
|
61 |
description: str = "Generates an image from a text prompt"
|
62 |
input_mode: str = "image"
|
63 |
+
page_content: str = page_content
|
64 |
|
65 |
class InputParams(BaseModel):
|
66 |
prompt: str = Field(
|
|
|
80 |
2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
|
81 |
)
|
82 |
steps: int = Field(
|
83 |
+
2, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
|
84 |
)
|
85 |
width: int = Field(
|
86 |
+
1024, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
|
87 |
)
|
88 |
height: int = Field(
|
89 |
+
1024, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
|
90 |
)
|
91 |
guidance_scale: float = Field(
|
92 |
1.0,
|
|
|
99 |
id="guidance_scale",
|
100 |
)
|
101 |
strength: float = Field(
|
102 |
+
1,
|
103 |
min=0.25,
|
104 |
max=1.0,
|
105 |
+
step=0.0001,
|
106 |
title="Strength",
|
107 |
field="range",
|
108 |
hide=True,
|
|
|
211 |
returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
|
212 |
requires_pooled=[False, True],
|
213 |
)
|
214 |
+
if args.use_taesd:
|
215 |
+
self.pipe.vae = AutoencoderTiny.from_pretrained(
|
216 |
+
taesd_model, torch_dtype=torch_dtype, use_safetensors=True
|
217 |
+
).to(device)
|
218 |
|
219 |
if args.torch_compile:
|
220 |
self.pipe.unet = torch.compile(
|
|
|
238 |
control_image = self.canny_torch(
|
239 |
params.image, params.canny_low_threshold, params.canny_high_threshold
|
240 |
)
|
241 |
+
steps = params.steps
|
242 |
+
strength = params.strength
|
243 |
+
if int(steps * strength) < 1:
|
244 |
+
steps = math.ceil(1 / max(0.10, strength))
|
245 |
|
246 |
results = self.pipe(
|
247 |
image=params.image,
|
|
|
251 |
negative_prompt_embeds=prompt_embeds[1:2],
|
252 |
negative_pooled_prompt_embeds=pooled_prompt_embeds[1:2],
|
253 |
generator=generator,
|
254 |
+
strength=strength,
|
255 |
+
num_inference_steps=steps,
|
256 |
guidance_scale=params.guidance_scale,
|
257 |
width=params.width,
|
258 |
height=params.height,
|
pipelines/controlnetSDXLTurbo.py
ADDED
@@ -0,0 +1,268 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from diffusers import (
|
2 |
+
StableDiffusionXLControlNetImg2ImgPipeline,
|
3 |
+
ControlNetModel,
|
4 |
+
AutoencoderKL,
|
5 |
+
AutoencoderTiny,
|
6 |
+
)
|
7 |
+
from compel import Compel, ReturnedEmbeddingsType
|
8 |
+
import torch
|
9 |
+
from pipelines.utils.canny_gpu import SobelOperator
|
10 |
+
|
11 |
+
try:
|
12 |
+
import intel_extension_for_pytorch as ipex # type: ignore
|
13 |
+
except:
|
14 |
+
pass
|
15 |
+
|
16 |
+
import psutil
|
17 |
+
from config import Args
|
18 |
+
from pydantic import BaseModel, Field
|
19 |
+
from PIL import Image
|
20 |
+
import math
|
21 |
+
|
22 |
+
controlnet_model = "diffusers/controlnet-canny-sdxl-1.0"
|
23 |
+
model_id = "stabilityai/sdxl-turbo"
|
24 |
+
taesd_model = "madebyollin/taesdxl"
|
25 |
+
|
26 |
+
default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
|
27 |
+
default_negative_prompt = "blurry, low quality, render, 3D, oversaturated"
|
28 |
+
page_content = """
|
29 |
+
<h1 class="text-3xl font-bold">Real-Time SDXL Turbo</h1>
|
30 |
+
<h3 class="text-xl font-bold">Image-to-Image ControlNet</h3>
|
31 |
+
<p class="text-sm">
|
32 |
+
This demo showcases
|
33 |
+
<a
|
34 |
+
href="https://huggingface.co/stabilityai/sdxl-turbo"
|
35 |
+
target="_blank"
|
36 |
+
class="text-blue-500 underline hover:no-underline">SDXL Turbo</a>
|
37 |
+
Image to Image pipeline using
|
38 |
+
<a
|
39 |
+
href="https://huggingface.co/docs/diffusers/main/en/using-diffusers/sdxl_turbo"
|
40 |
+
target="_blank"
|
41 |
+
class="text-blue-500 underline hover:no-underline">Diffusers</a
|
42 |
+
> with a MJPEG stream server.
|
43 |
+
</p>
|
44 |
+
<p class="text-sm text-gray-500">
|
45 |
+
Change the prompt to generate different images, accepts <a
|
46 |
+
href="https://github.com/damian0815/compel/blob/main/doc/syntax.md"
|
47 |
+
target="_blank"
|
48 |
+
class="text-blue-500 underline hover:no-underline">Compel</a
|
49 |
+
> syntax.
|
50 |
+
</p>
|
51 |
+
"""
|
52 |
+
|
53 |
+
|
54 |
+
class Pipeline:
|
55 |
+
class Info(BaseModel):
|
56 |
+
name: str = "controlnet+SDXL+Turbo"
|
57 |
+
title: str = "SDXL Turbo + Controlnet"
|
58 |
+
description: str = "Generates an image from a text prompt"
|
59 |
+
input_mode: str = "image"
|
60 |
+
page_content: str = page_content
|
61 |
+
|
62 |
+
class InputParams(BaseModel):
|
63 |
+
prompt: str = Field(
|
64 |
+
default_prompt,
|
65 |
+
title="Prompt",
|
66 |
+
field="textarea",
|
67 |
+
id="prompt",
|
68 |
+
)
|
69 |
+
negative_prompt: str = Field(
|
70 |
+
default_negative_prompt,
|
71 |
+
title="Negative Prompt",
|
72 |
+
field="textarea",
|
73 |
+
id="negative_prompt",
|
74 |
+
hide=True,
|
75 |
+
)
|
76 |
+
seed: int = Field(
|
77 |
+
2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
|
78 |
+
)
|
79 |
+
steps: int = Field(
|
80 |
+
2, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
|
81 |
+
)
|
82 |
+
width: int = Field(
|
83 |
+
1024, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
|
84 |
+
)
|
85 |
+
height: int = Field(
|
86 |
+
1024, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
|
87 |
+
)
|
88 |
+
guidance_scale: float = Field(
|
89 |
+
1.0,
|
90 |
+
min=0,
|
91 |
+
max=10,
|
92 |
+
step=0.001,
|
93 |
+
title="Guidance Scale",
|
94 |
+
field="range",
|
95 |
+
hide=True,
|
96 |
+
id="guidance_scale",
|
97 |
+
)
|
98 |
+
strength: float = Field(
|
99 |
+
0.5,
|
100 |
+
min=0.25,
|
101 |
+
max=1.0,
|
102 |
+
step=0.001,
|
103 |
+
title="Strength",
|
104 |
+
field="range",
|
105 |
+
hide=True,
|
106 |
+
id="strength",
|
107 |
+
)
|
108 |
+
controlnet_scale: float = Field(
|
109 |
+
0.5,
|
110 |
+
min=0,
|
111 |
+
max=1.0,
|
112 |
+
step=0.001,
|
113 |
+
title="Controlnet Scale",
|
114 |
+
field="range",
|
115 |
+
hide=True,
|
116 |
+
id="controlnet_scale",
|
117 |
+
)
|
118 |
+
controlnet_start: float = Field(
|
119 |
+
0.0,
|
120 |
+
min=0,
|
121 |
+
max=1.0,
|
122 |
+
step=0.001,
|
123 |
+
title="Controlnet Start",
|
124 |
+
field="range",
|
125 |
+
hide=True,
|
126 |
+
id="controlnet_start",
|
127 |
+
)
|
128 |
+
controlnet_end: float = Field(
|
129 |
+
1.0,
|
130 |
+
min=0,
|
131 |
+
max=1.0,
|
132 |
+
step=0.001,
|
133 |
+
title="Controlnet End",
|
134 |
+
field="range",
|
135 |
+
hide=True,
|
136 |
+
id="controlnet_end",
|
137 |
+
)
|
138 |
+
canny_low_threshold: float = Field(
|
139 |
+
0.31,
|
140 |
+
min=0,
|
141 |
+
max=1.0,
|
142 |
+
step=0.001,
|
143 |
+
title="Canny Low Threshold",
|
144 |
+
field="range",
|
145 |
+
hide=True,
|
146 |
+
id="canny_low_threshold",
|
147 |
+
)
|
148 |
+
canny_high_threshold: float = Field(
|
149 |
+
0.125,
|
150 |
+
min=0,
|
151 |
+
max=1.0,
|
152 |
+
step=0.001,
|
153 |
+
title="Canny High Threshold",
|
154 |
+
field="range",
|
155 |
+
hide=True,
|
156 |
+
id="canny_high_threshold",
|
157 |
+
)
|
158 |
+
debug_canny: bool = Field(
|
159 |
+
False,
|
160 |
+
title="Debug Canny",
|
161 |
+
field="checkbox",
|
162 |
+
hide=True,
|
163 |
+
id="debug_canny",
|
164 |
+
)
|
165 |
+
|
166 |
+
def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype):
|
167 |
+
controlnet_canny = ControlNetModel.from_pretrained(
|
168 |
+
controlnet_model, torch_dtype=torch_dtype
|
169 |
+
).to(device)
|
170 |
+
vae = AutoencoderKL.from_pretrained(
|
171 |
+
"madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch_dtype
|
172 |
+
)
|
173 |
+
if args.safety_checker:
|
174 |
+
self.pipe = StableDiffusionXLControlNetImg2ImgPipeline.from_pretrained(
|
175 |
+
model_id,
|
176 |
+
controlnet=controlnet_canny,
|
177 |
+
vae=vae,
|
178 |
+
)
|
179 |
+
else:
|
180 |
+
self.pipe = StableDiffusionXLControlNetImg2ImgPipeline.from_pretrained(
|
181 |
+
model_id,
|
182 |
+
safety_checker=None,
|
183 |
+
controlnet=controlnet_canny,
|
184 |
+
vae=vae,
|
185 |
+
)
|
186 |
+
self.canny_torch = SobelOperator(device=device)
|
187 |
+
|
188 |
+
self.pipe.set_progress_bar_config(disable=True)
|
189 |
+
self.pipe.to(device=device, dtype=torch_dtype).to(device)
|
190 |
+
if device.type != "mps":
|
191 |
+
self.pipe.unet.to(memory_format=torch.channels_last)
|
192 |
+
|
193 |
+
if psutil.virtual_memory().total < 64 * 1024**3:
|
194 |
+
self.pipe.enable_attention_slicing()
|
195 |
+
|
196 |
+
self.pipe.compel_proc = Compel(
|
197 |
+
tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2],
|
198 |
+
text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],
|
199 |
+
returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
|
200 |
+
requires_pooled=[False, True],
|
201 |
+
)
|
202 |
+
if args.use_taesd:
|
203 |
+
self.pipe.vae = AutoencoderTiny.from_pretrained(
|
204 |
+
taesd_model, torch_dtype=torch_dtype, use_safetensors=True
|
205 |
+
).to(device)
|
206 |
+
|
207 |
+
if args.torch_compile:
|
208 |
+
self.pipe.unet = torch.compile(
|
209 |
+
self.pipe.unet, mode="reduce-overhead", fullgraph=True
|
210 |
+
)
|
211 |
+
self.pipe.vae = torch.compile(
|
212 |
+
self.pipe.vae, mode="reduce-overhead", fullgraph=True
|
213 |
+
)
|
214 |
+
self.pipe(
|
215 |
+
prompt="warmup",
|
216 |
+
image=[Image.new("RGB", (768, 768))],
|
217 |
+
control_image=[Image.new("RGB", (768, 768))],
|
218 |
+
)
|
219 |
+
|
220 |
+
def predict(self, params: "Pipeline.InputParams") -> Image.Image:
|
221 |
+
generator = torch.manual_seed(params.seed)
|
222 |
+
|
223 |
+
prompt_embeds, pooled_prompt_embeds = self.pipe.compel_proc(
|
224 |
+
[params.prompt, params.negative_prompt]
|
225 |
+
)
|
226 |
+
control_image = self.canny_torch(
|
227 |
+
params.image, params.canny_low_threshold, params.canny_high_threshold
|
228 |
+
)
|
229 |
+
steps = params.steps
|
230 |
+
strength = params.strength
|
231 |
+
if int(steps * strength) < 1:
|
232 |
+
steps = math.ceil(1 / max(0.10, strength))
|
233 |
+
|
234 |
+
results = self.pipe(
|
235 |
+
image=params.image,
|
236 |
+
control_image=control_image,
|
237 |
+
prompt_embeds=prompt_embeds[0:1],
|
238 |
+
pooled_prompt_embeds=pooled_prompt_embeds[0:1],
|
239 |
+
negative_prompt_embeds=prompt_embeds[1:2],
|
240 |
+
negative_pooled_prompt_embeds=pooled_prompt_embeds[1:2],
|
241 |
+
generator=generator,
|
242 |
+
strength=strength,
|
243 |
+
num_inference_steps=steps,
|
244 |
+
guidance_scale=params.guidance_scale,
|
245 |
+
width=params.width,
|
246 |
+
height=params.height,
|
247 |
+
output_type="pil",
|
248 |
+
controlnet_conditioning_scale=params.controlnet_scale,
|
249 |
+
control_guidance_start=params.controlnet_start,
|
250 |
+
control_guidance_end=params.controlnet_end,
|
251 |
+
)
|
252 |
+
|
253 |
+
nsfw_content_detected = (
|
254 |
+
results.nsfw_content_detected[0]
|
255 |
+
if "nsfw_content_detected" in results
|
256 |
+
else False
|
257 |
+
)
|
258 |
+
if nsfw_content_detected:
|
259 |
+
return None
|
260 |
+
result_image = results.images[0]
|
261 |
+
if params.debug_canny:
|
262 |
+
# paste control_image on top of result_image
|
263 |
+
w0, h0 = (200, 200)
|
264 |
+
control_image = control_image.resize((w0, h0))
|
265 |
+
w1, h1 = result_image.size
|
266 |
+
result_image.paste(control_image, (w1 - w0, h1 - h0))
|
267 |
+
|
268 |
+
return result_image
|
pipelines/img2img.py
CHANGED
@@ -14,11 +14,36 @@ import psutil
|
|
14 |
from config import Args
|
15 |
from pydantic import BaseModel, Field
|
16 |
from PIL import Image
|
|
|
17 |
|
18 |
base_model = "SimianLuo/LCM_Dreamshaper_v7"
|
19 |
taesd_model = "madebyollin/taesd"
|
20 |
|
21 |
default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
|
24 |
class Pipeline:
|
@@ -27,6 +52,7 @@ class Pipeline:
|
|
27 |
title: str = "Image-to-Image LCM"
|
28 |
description: str = "Generates an image from a text prompt"
|
29 |
input_mode: str = "image"
|
|
|
30 |
|
31 |
class InputParams(BaseModel):
|
32 |
prompt: str = Field(
|
@@ -39,13 +65,13 @@ class Pipeline:
|
|
39 |
2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
|
40 |
)
|
41 |
steps: int = Field(
|
42 |
-
4, min=
|
43 |
)
|
44 |
width: int = Field(
|
45 |
-
|
46 |
)
|
47 |
height: int = Field(
|
48 |
-
|
49 |
)
|
50 |
guidance_scale: float = Field(
|
51 |
0.2,
|
@@ -79,7 +105,7 @@ class Pipeline:
|
|
79 |
if args.use_taesd:
|
80 |
self.pipe.vae = AutoencoderTiny.from_pretrained(
|
81 |
taesd_model, torch_dtype=torch_dtype, use_safetensors=True
|
82 |
-
)
|
83 |
|
84 |
self.pipe.set_progress_bar_config(disable=True)
|
85 |
self.pipe.to(device=device, dtype=torch_dtype)
|
@@ -113,12 +139,18 @@ class Pipeline:
|
|
113 |
def predict(self, params: "Pipeline.InputParams") -> Image.Image:
|
114 |
generator = torch.manual_seed(params.seed)
|
115 |
prompt_embeds = self.compel_proc(params.prompt)
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
results = self.pipe(
|
117 |
image=params.image,
|
118 |
prompt_embeds=prompt_embeds,
|
119 |
generator=generator,
|
120 |
-
strength=
|
121 |
-
num_inference_steps=
|
122 |
guidance_scale=params.guidance_scale,
|
123 |
width=params.width,
|
124 |
height=params.height,
|
|
|
14 |
from config import Args
|
15 |
from pydantic import BaseModel, Field
|
16 |
from PIL import Image
|
17 |
+
import math
|
18 |
|
19 |
base_model = "SimianLuo/LCM_Dreamshaper_v7"
|
20 |
taesd_model = "madebyollin/taesd"
|
21 |
|
22 |
default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
|
23 |
+
page_content = """
|
24 |
+
<h1 class="text-3xl font-bold">Real-Time Latent Consistency Model</h1>
|
25 |
+
<h3 class="text-xl font-bold">Image-to-Image LCM</h3>
|
26 |
+
<p class="text-sm">
|
27 |
+
This demo showcases
|
28 |
+
<a
|
29 |
+
href="https://huggingface.co/blog/lcm_lora"
|
30 |
+
target="_blank"
|
31 |
+
class="text-blue-500 underline hover:no-underline">LCM</a>
|
32 |
+
Image to Image pipeline using
|
33 |
+
<a
|
34 |
+
href="https://huggingface.co/docs/diffusers/main/en/using-diffusers/lcm#performing-inference-with-lcm"
|
35 |
+
target="_blank"
|
36 |
+
class="text-blue-500 underline hover:no-underline">Diffusers</a
|
37 |
+
> with a MJPEG stream server.
|
38 |
+
</p>
|
39 |
+
<p class="text-sm text-gray-500">
|
40 |
+
Change the prompt to generate different images, accepts <a
|
41 |
+
href="https://github.com/damian0815/compel/blob/main/doc/syntax.md"
|
42 |
+
target="_blank"
|
43 |
+
class="text-blue-500 underline hover:no-underline">Compel</a
|
44 |
+
> syntax.
|
45 |
+
</p>
|
46 |
+
"""
|
47 |
|
48 |
|
49 |
class Pipeline:
|
|
|
52 |
title: str = "Image-to-Image LCM"
|
53 |
description: str = "Generates an image from a text prompt"
|
54 |
input_mode: str = "image"
|
55 |
+
page_content: str = page_content
|
56 |
|
57 |
class InputParams(BaseModel):
|
58 |
prompt: str = Field(
|
|
|
65 |
2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
|
66 |
)
|
67 |
steps: int = Field(
|
68 |
+
4, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
|
69 |
)
|
70 |
width: int = Field(
|
71 |
+
768, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
|
72 |
)
|
73 |
height: int = Field(
|
74 |
+
768, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
|
75 |
)
|
76 |
guidance_scale: float = Field(
|
77 |
0.2,
|
|
|
105 |
if args.use_taesd:
|
106 |
self.pipe.vae = AutoencoderTiny.from_pretrained(
|
107 |
taesd_model, torch_dtype=torch_dtype, use_safetensors=True
|
108 |
+
).to(device)
|
109 |
|
110 |
self.pipe.set_progress_bar_config(disable=True)
|
111 |
self.pipe.to(device=device, dtype=torch_dtype)
|
|
|
139 |
def predict(self, params: "Pipeline.InputParams") -> Image.Image:
|
140 |
generator = torch.manual_seed(params.seed)
|
141 |
prompt_embeds = self.compel_proc(params.prompt)
|
142 |
+
|
143 |
+
steps = params.steps
|
144 |
+
strength = params.strength
|
145 |
+
if int(steps * strength) < 1:
|
146 |
+
steps = math.ceil(1 / max(0.10, strength))
|
147 |
+
|
148 |
results = self.pipe(
|
149 |
image=params.image,
|
150 |
prompt_embeds=prompt_embeds,
|
151 |
generator=generator,
|
152 |
+
strength=strength,
|
153 |
+
num_inference_steps=steps,
|
154 |
guidance_scale=params.guidance_scale,
|
155 |
width=params.width,
|
156 |
height=params.height,
|
pipelines/img2imgSDXLTurbo.py
ADDED
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from diffusers import (
|
2 |
+
AutoPipelineForImage2Image,
|
3 |
+
AutoencoderTiny,
|
4 |
+
)
|
5 |
+
from compel import Compel, ReturnedEmbeddingsType
|
6 |
+
import torch
|
7 |
+
|
8 |
+
try:
|
9 |
+
import intel_extension_for_pytorch as ipex # type: ignore
|
10 |
+
except:
|
11 |
+
pass
|
12 |
+
|
13 |
+
import psutil
|
14 |
+
from config import Args
|
15 |
+
from pydantic import BaseModel, Field
|
16 |
+
from PIL import Image
|
17 |
+
import math
|
18 |
+
|
19 |
+
base_model = "stabilityai/sdxl-turbo"
|
20 |
+
taesd_model = "madebyollin/taesdxl"
|
21 |
+
|
22 |
+
default_prompt = "close-up photography of old man standing in the rain at night, in a street lit by lamps, leica 35mm summilux"
|
23 |
+
default_negative_prompt = "blurry, low quality, render, 3D, oversaturated"
|
24 |
+
page_content = """
|
25 |
+
<h1 class="text-3xl font-bold">Real-Time SDXL Turbo</h1>
|
26 |
+
<h3 class="text-xl font-bold">Image-to-Image</h3>
|
27 |
+
<p class="text-sm">
|
28 |
+
This demo showcases
|
29 |
+
<a
|
30 |
+
href="https://huggingface.co/stabilityai/sdxl-turbo"
|
31 |
+
target="_blank"
|
32 |
+
class="text-blue-500 underline hover:no-underline">SDXL Turbo</a>
|
33 |
+
Image to Image pipeline using
|
34 |
+
<a
|
35 |
+
href="https://huggingface.co/docs/diffusers/main/en/using-diffusers/sdxl_turbo"
|
36 |
+
target="_blank"
|
37 |
+
class="text-blue-500 underline hover:no-underline">Diffusers</a
|
38 |
+
> with a MJPEG stream server.
|
39 |
+
</p>
|
40 |
+
<p class="text-sm text-gray-500">
|
41 |
+
Change the prompt to generate different images, accepts <a
|
42 |
+
href="https://github.com/damian0815/compel/blob/main/doc/syntax.md"
|
43 |
+
target="_blank"
|
44 |
+
class="text-blue-500 underline hover:no-underline">Compel</a
|
45 |
+
> syntax.
|
46 |
+
</p>
|
47 |
+
"""
|
48 |
+
|
49 |
+
|
50 |
+
class Pipeline:
|
51 |
+
class Info(BaseModel):
|
52 |
+
name: str = "img2img"
|
53 |
+
title: str = "Image-to-Image SDXL"
|
54 |
+
description: str = "Generates an image from a text prompt"
|
55 |
+
input_mode: str = "image"
|
56 |
+
page_content: str = page_content
|
57 |
+
|
58 |
+
class InputParams(BaseModel):
|
59 |
+
prompt: str = Field(
|
60 |
+
default_prompt,
|
61 |
+
title="Prompt",
|
62 |
+
field="textarea",
|
63 |
+
id="prompt",
|
64 |
+
)
|
65 |
+
negative_prompt: str = Field(
|
66 |
+
default_negative_prompt,
|
67 |
+
title="Negative Prompt",
|
68 |
+
field="textarea",
|
69 |
+
id="negative_prompt",
|
70 |
+
hide=True,
|
71 |
+
)
|
72 |
+
seed: int = Field(
|
73 |
+
2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
|
74 |
+
)
|
75 |
+
steps: int = Field(
|
76 |
+
4, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
|
77 |
+
)
|
78 |
+
width: int = Field(
|
79 |
+
512, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
|
80 |
+
)
|
81 |
+
height: int = Field(
|
82 |
+
512, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
|
83 |
+
)
|
84 |
+
guidance_scale: float = Field(
|
85 |
+
0.2,
|
86 |
+
min=0,
|
87 |
+
max=20,
|
88 |
+
step=0.001,
|
89 |
+
title="Guidance Scale",
|
90 |
+
field="range",
|
91 |
+
hide=True,
|
92 |
+
id="guidance_scale",
|
93 |
+
)
|
94 |
+
strength: float = Field(
|
95 |
+
0.5,
|
96 |
+
min=0.25,
|
97 |
+
max=1.0,
|
98 |
+
step=0.001,
|
99 |
+
title="Strength",
|
100 |
+
field="range",
|
101 |
+
hide=True,
|
102 |
+
id="strength",
|
103 |
+
)
|
104 |
+
|
105 |
+
def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype):
|
106 |
+
if args.safety_checker:
|
107 |
+
self.pipe = AutoPipelineForImage2Image.from_pretrained(base_model)
|
108 |
+
else:
|
109 |
+
self.pipe = AutoPipelineForImage2Image.from_pretrained(
|
110 |
+
base_model,
|
111 |
+
safety_checker=None,
|
112 |
+
)
|
113 |
+
if args.use_taesd:
|
114 |
+
self.pipe.vae = AutoencoderTiny.from_pretrained(
|
115 |
+
taesd_model, torch_dtype=torch_dtype, use_safetensors=True
|
116 |
+
).to(device)
|
117 |
+
|
118 |
+
self.pipe.set_progress_bar_config(disable=True)
|
119 |
+
self.pipe.to(device=device, dtype=torch_dtype)
|
120 |
+
if device.type != "mps":
|
121 |
+
self.pipe.unet.to(memory_format=torch.channels_last)
|
122 |
+
|
123 |
+
# check if computer has less than 64GB of RAM using sys or os
|
124 |
+
if psutil.virtual_memory().total < 64 * 1024**3:
|
125 |
+
self.pipe.enable_attention_slicing()
|
126 |
+
|
127 |
+
if args.torch_compile:
|
128 |
+
print("Running torch compile")
|
129 |
+
self.pipe.unet = torch.compile(
|
130 |
+
self.pipe.unet, mode="reduce-overhead", fullgraph=True
|
131 |
+
)
|
132 |
+
self.pipe.vae = torch.compile(
|
133 |
+
self.pipe.vae, mode="reduce-overhead", fullgraph=True
|
134 |
+
)
|
135 |
+
|
136 |
+
self.pipe(
|
137 |
+
prompt="warmup",
|
138 |
+
image=[Image.new("RGB", (768, 768))],
|
139 |
+
)
|
140 |
+
|
141 |
+
self.pipe.compel_proc = Compel(
|
142 |
+
tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2],
|
143 |
+
text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],
|
144 |
+
returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
|
145 |
+
requires_pooled=[False, True],
|
146 |
+
)
|
147 |
+
|
148 |
+
def predict(self, params: "Pipeline.InputParams") -> Image.Image:
|
149 |
+
generator = torch.manual_seed(params.seed)
|
150 |
+
prompt_embeds, pooled_prompt_embeds = self.pipe.compel_proc(
|
151 |
+
[params.prompt, params.negative_prompt]
|
152 |
+
)
|
153 |
+
steps = params.steps
|
154 |
+
strength = params.strength
|
155 |
+
if int(steps * strength) < 1:
|
156 |
+
steps = math.ceil(1 / max(0.10, strength))
|
157 |
+
|
158 |
+
results = self.pipe(
|
159 |
+
image=params.image,
|
160 |
+
prompt_embeds=prompt_embeds[0:1],
|
161 |
+
pooled_prompt_embeds=pooled_prompt_embeds[0:1],
|
162 |
+
negative_prompt_embeds=prompt_embeds[1:2],
|
163 |
+
negative_pooled_prompt_embeds=pooled_prompt_embeds[1:2],
|
164 |
+
generator=generator,
|
165 |
+
strength=strength,
|
166 |
+
num_inference_steps=steps,
|
167 |
+
guidance_scale=params.guidance_scale,
|
168 |
+
width=params.width,
|
169 |
+
height=params.height,
|
170 |
+
output_type="pil",
|
171 |
+
)
|
172 |
+
|
173 |
+
nsfw_content_detected = (
|
174 |
+
results.nsfw_content_detected[0]
|
175 |
+
if "nsfw_content_detected" in results
|
176 |
+
else False
|
177 |
+
)
|
178 |
+
if nsfw_content_detected:
|
179 |
+
return None
|
180 |
+
result_image = results.images[0]
|
181 |
+
|
182 |
+
return result_image
|
pipelines/txt2img.py
CHANGED
@@ -17,6 +17,28 @@ taesd_model = "madebyollin/taesd"
|
|
17 |
|
18 |
default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
class Pipeline:
|
22 |
class Info(BaseModel):
|
@@ -24,6 +46,7 @@ class Pipeline:
|
|
24 |
title: str = "Text-to-Image LCM"
|
25 |
description: str = "Generates an image from a text prompt"
|
26 |
input_mode: str = "text"
|
|
|
27 |
|
28 |
class InputParams(BaseModel):
|
29 |
prompt: str = Field(
|
@@ -39,10 +62,10 @@ class Pipeline:
|
|
39 |
4, min=2, max=15, title="Steps", field="range", hide=True, id="steps"
|
40 |
)
|
41 |
width: int = Field(
|
42 |
-
|
43 |
)
|
44 |
height: int = Field(
|
45 |
-
|
46 |
)
|
47 |
guidance_scale: float = Field(
|
48 |
8.0,
|
@@ -65,7 +88,7 @@ class Pipeline:
|
|
65 |
if args.use_taesd:
|
66 |
self.pipe.vae = AutoencoderTiny.from_pretrained(
|
67 |
taesd_model, torch_dtype=torch_dtype, use_safetensors=True
|
68 |
-
)
|
69 |
|
70 |
self.pipe.set_progress_bar_config(disable=True)
|
71 |
self.pipe.to(device=device, dtype=torch_dtype)
|
|
|
17 |
|
18 |
default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
|
19 |
|
20 |
+
page_content = """<h1 class="text-3xl font-bold">Real-Time Latent Consistency Model</h1>
|
21 |
+
<h3 class="text-xl font-bold">Text-to-Image</h3>
|
22 |
+
<p class="text-sm">
|
23 |
+
This demo showcases
|
24 |
+
<a
|
25 |
+
href="https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7"
|
26 |
+
target="_blank"
|
27 |
+
class="text-blue-500 underline hover:no-underline">LCM</a>
|
28 |
+
Image to Image pipeline using
|
29 |
+
<a
|
30 |
+
href="https://huggingface.co/docs/diffusers/main/en/using-diffusers/lcm#performing-inference-with-lcm"
|
31 |
+
target="_blank"
|
32 |
+
class="text-blue-500 underline hover:no-underline">Diffusers</a> with a MJPEG stream server
|
33 |
+
</p>
|
34 |
+
<p class="text-sm text-gray-500">
|
35 |
+
Change the prompt to generate different images, accepts <a
|
36 |
+
href="https://github.com/damian0815/compel/blob/main/doc/syntax.md"
|
37 |
+
target="_blank"
|
38 |
+
class="text-blue-500 underline hover:no-underline">Compel</a
|
39 |
+
> syntax.
|
40 |
+
</p>"""
|
41 |
+
|
42 |
|
43 |
class Pipeline:
|
44 |
class Info(BaseModel):
|
|
|
46 |
title: str = "Text-to-Image LCM"
|
47 |
description: str = "Generates an image from a text prompt"
|
48 |
input_mode: str = "text"
|
49 |
+
page_content: str = page_content
|
50 |
|
51 |
class InputParams(BaseModel):
|
52 |
prompt: str = Field(
|
|
|
62 |
4, min=2, max=15, title="Steps", field="range", hide=True, id="steps"
|
63 |
)
|
64 |
width: int = Field(
|
65 |
+
768, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
|
66 |
)
|
67 |
height: int = Field(
|
68 |
+
768, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
|
69 |
)
|
70 |
guidance_scale: float = Field(
|
71 |
8.0,
|
|
|
88 |
if args.use_taesd:
|
89 |
self.pipe.vae = AutoencoderTiny.from_pretrained(
|
90 |
taesd_model, torch_dtype=torch_dtype, use_safetensors=True
|
91 |
+
).to(device)
|
92 |
|
93 |
self.pipe.set_progress_bar_config(disable=True)
|
94 |
self.pipe.to(device=device, dtype=torch_dtype)
|
pipelines/txt2imgLora.py
CHANGED
@@ -18,6 +18,34 @@ taesd_model = "madebyollin/taesd"
|
|
18 |
|
19 |
default_prompt = "Analog style photograph of young Harrison Ford as Han Solo, star wars behind the scenes"
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
class Pipeline:
|
23 |
class Info(BaseModel):
|
@@ -25,6 +53,7 @@ class Pipeline:
|
|
25 |
title: str = "Text-to-Image LCM + LoRa"
|
26 |
description: str = "Generates an image from a text prompt"
|
27 |
input_mode: str = "text"
|
|
|
28 |
|
29 |
class InputParams(BaseModel):
|
30 |
prompt: str = Field(
|
@@ -66,7 +95,7 @@ class Pipeline:
|
|
66 |
if args.use_taesd:
|
67 |
self.pipe.vae = AutoencoderTiny.from_pretrained(
|
68 |
taesd_model, torch_dtype=torch_dtype, use_safetensors=True
|
69 |
-
)
|
70 |
self.pipe.scheduler = LCMScheduler.from_config(self.pipe.scheduler.config)
|
71 |
self.pipe.set_progress_bar_config(disable=True)
|
72 |
self.pipe.to(device=device, dtype=torch_dtype)
|
|
|
18 |
|
19 |
default_prompt = "Analog style photograph of young Harrison Ford as Han Solo, star wars behind the scenes"
|
20 |
|
21 |
+
page_content = """
|
22 |
+
<h1 class="text-3xl font-bold">Real-Time Latent Consistency Model SDv1.5</h1>
|
23 |
+
<h3 class="text-xl font-bold">Text-to-Image LCM + LoRa</h3>
|
24 |
+
<p class="text-sm">
|
25 |
+
This demo showcases
|
26 |
+
<a
|
27 |
+
href="https://huggingface.co/blog/lcm_lora"
|
28 |
+
target="_blank"
|
29 |
+
class="text-blue-500 underline hover:no-underline">LCM</a>
|
30 |
+
Image to Image pipeline using
|
31 |
+
<a
|
32 |
+
href="https://huggingface.co/docs/diffusers/main/en/using-diffusers/lcm#performing-inference-with-lcm"
|
33 |
+
target="_blank"
|
34 |
+
class="text-blue-500 underline hover:no-underline">Diffusers</a
|
35 |
+
> with a MJPEG stream server. Featuring <a
|
36 |
+
href="https://huggingface.co/wavymulder/Analog-Diffusion"
|
37 |
+
target="_blank"
|
38 |
+
class="text-blue-500 underline hover:no-underline">Analog-Diffusion</a>
|
39 |
+
</p>
|
40 |
+
<p class="text-sm text-gray-500">
|
41 |
+
Change the prompt to generate different images, accepts <a
|
42 |
+
href="https://github.com/damian0815/compel/blob/main/doc/syntax.md"
|
43 |
+
target="_blank"
|
44 |
+
class="text-blue-500 underline hover:no-underline">Compel</a
|
45 |
+
> syntax.
|
46 |
+
</p>
|
47 |
+
"""
|
48 |
+
|
49 |
|
50 |
class Pipeline:
|
51 |
class Info(BaseModel):
|
|
|
53 |
title: str = "Text-to-Image LCM + LoRa"
|
54 |
description: str = "Generates an image from a text prompt"
|
55 |
input_mode: str = "text"
|
56 |
+
page_content: str = page_content
|
57 |
|
58 |
class InputParams(BaseModel):
|
59 |
prompt: str = Field(
|
|
|
95 |
if args.use_taesd:
|
96 |
self.pipe.vae = AutoencoderTiny.from_pretrained(
|
97 |
taesd_model, torch_dtype=torch_dtype, use_safetensors=True
|
98 |
+
).to(device)
|
99 |
self.pipe.scheduler = LCMScheduler.from_config(self.pipe.scheduler.config)
|
100 |
self.pipe.set_progress_bar_config(disable=True)
|
101 |
self.pipe.to(device=device, dtype=torch_dtype)
|
pipelines/txt2imgLoraSDXL.py
CHANGED
@@ -1,8 +1,4 @@
|
|
1 |
-
from diffusers import
|
2 |
-
DiffusionPipeline,
|
3 |
-
LCMScheduler,
|
4 |
-
AutoencoderKL,
|
5 |
-
)
|
6 |
from compel import Compel, ReturnedEmbeddingsType
|
7 |
import torch
|
8 |
|
@@ -16,13 +12,38 @@ from config import Args
|
|
16 |
from pydantic import BaseModel, Field
|
17 |
from PIL import Image
|
18 |
|
19 |
-
controlnet_model = "diffusers/controlnet-canny-sdxl-1.0"
|
20 |
model_id = "stabilityai/stable-diffusion-xl-base-1.0"
|
21 |
lcm_lora_id = "latent-consistency/lcm-lora-sdxl"
|
|
|
22 |
|
23 |
|
24 |
default_prompt = "close-up photography of old man standing in the rain at night, in a street lit by lamps, leica 35mm summilux"
|
25 |
default_negative_prompt = "blurry, low quality, render, 3D, oversaturated"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
|
28 |
class Pipeline:
|
@@ -30,6 +51,7 @@ class Pipeline:
|
|
30 |
name: str = "LCM+Lora+SDXL"
|
31 |
title: str = "Text-to-Image SDXL + LCM + LoRA"
|
32 |
description: str = "Generates an image from a text prompt"
|
|
|
33 |
input_mode: str = "text"
|
34 |
|
35 |
class InputParams(BaseModel):
|
@@ -50,7 +72,7 @@ class Pipeline:
|
|
50 |
2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
|
51 |
)
|
52 |
steps: int = Field(
|
53 |
-
4, min=
|
54 |
)
|
55 |
width: int = Field(
|
56 |
1024, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
|
@@ -101,6 +123,10 @@ class Pipeline:
|
|
101 |
returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
|
102 |
requires_pooled=[False, True],
|
103 |
)
|
|
|
|
|
|
|
|
|
104 |
|
105 |
if args.torch_compile:
|
106 |
self.pipe.unet = torch.compile(
|
|
|
1 |
+
from diffusers import DiffusionPipeline, LCMScheduler, AutoencoderKL, AutoencoderTiny
|
|
|
|
|
|
|
|
|
2 |
from compel import Compel, ReturnedEmbeddingsType
|
3 |
import torch
|
4 |
|
|
|
12 |
from pydantic import BaseModel, Field
|
13 |
from PIL import Image
|
14 |
|
|
|
15 |
model_id = "stabilityai/stable-diffusion-xl-base-1.0"
|
16 |
lcm_lora_id = "latent-consistency/lcm-lora-sdxl"
|
17 |
+
taesd_model = "madebyollin/taesdxl"
|
18 |
|
19 |
|
20 |
default_prompt = "close-up photography of old man standing in the rain at night, in a street lit by lamps, leica 35mm summilux"
|
21 |
default_negative_prompt = "blurry, low quality, render, 3D, oversaturated"
|
22 |
+
page_content = """
|
23 |
+
<h1 class="text-3xl font-bold">Real-Time Latent Consistency Model</h1>
|
24 |
+
<h3 class="text-xl font-bold">Text-to-Image SDXL + LCM + LoRA</h3>
|
25 |
+
<p class="text-sm">
|
26 |
+
This demo showcases
|
27 |
+
<a
|
28 |
+
href="https://huggingface.co/blog/lcm_lora"
|
29 |
+
target="_blank"
|
30 |
+
class="text-blue-500 underline hover:no-underline">LCM LoRA</a
|
31 |
+
>
|
32 |
+
Text to Image pipeline using
|
33 |
+
<a
|
34 |
+
href="https://huggingface.co/docs/diffusers/main/en/using-diffusers/lcm#performing-inference-with-lcm"
|
35 |
+
target="_blank"
|
36 |
+
class="text-blue-500 underline hover:no-underline">Diffusers</a
|
37 |
+
> with a MJPEG stream server.
|
38 |
+
</p>
|
39 |
+
<p class="text-sm text-gray-500">
|
40 |
+
Change the prompt to generate different images, accepts <a
|
41 |
+
href="https://github.com/damian0815/compel/blob/main/doc/syntax.md"
|
42 |
+
target="_blank"
|
43 |
+
class="text-blue-500 underline hover:no-underline">Compel</a
|
44 |
+
> syntax.
|
45 |
+
</p>
|
46 |
+
"""
|
47 |
|
48 |
|
49 |
class Pipeline:
|
|
|
51 |
name: str = "LCM+Lora+SDXL"
|
52 |
title: str = "Text-to-Image SDXL + LCM + LoRA"
|
53 |
description: str = "Generates an image from a text prompt"
|
54 |
+
page_content: str = page_content
|
55 |
input_mode: str = "text"
|
56 |
|
57 |
class InputParams(BaseModel):
|
|
|
72 |
2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
|
73 |
)
|
74 |
steps: int = Field(
|
75 |
+
4, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
|
76 |
)
|
77 |
width: int = Field(
|
78 |
1024, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
|
|
|
123 |
returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
|
124 |
requires_pooled=[False, True],
|
125 |
)
|
126 |
+
if args.use_taesd:
|
127 |
+
self.pipe.vae = AutoencoderTiny.from_pretrained(
|
128 |
+
taesd_model, torch_dtype=torch_dtype, use_safetensors=True
|
129 |
+
).to(device)
|
130 |
|
131 |
if args.torch_compile:
|
132 |
self.pipe.unet = torch.compile(
|
requirements.txt
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
git+https://github.com/huggingface/diffusers@
|
2 |
transformers==4.35.2
|
3 |
--extra-index-url https://download.pytorch.org/whl/cu121;
|
4 |
torch==2.1.0
|
@@ -9,4 +9,5 @@ accelerate==0.24.0
|
|
9 |
compel==2.0.2
|
10 |
controlnet-aux==0.0.7
|
11 |
peft==0.6.0
|
12 |
-
xformers; sys_platform != 'darwin' or platform_machine != 'arm64'
|
|
|
|
1 |
+
git+https://github.com/huggingface/diffusers@dadd55fb36acc862254cf935826d54349b0fcd8c
|
2 |
transformers==4.35.2
|
3 |
--extra-index-url https://download.pytorch.org/whl/cu121;
|
4 |
torch==2.1.0
|
|
|
9 |
compel==2.0.2
|
10 |
controlnet-aux==0.0.7
|
11 |
peft==0.6.0
|
12 |
+
xformers; sys_platform != 'darwin' or platform_machine != 'arm64'
|
13 |
+
markdown2
|