|
#include "util.h" |
|
#include <stdarg.h> |
|
#include <algorithm> |
|
#include <cmath> |
|
#include <codecvt> |
|
#include <fstream> |
|
#include <locale> |
|
#include <sstream> |
|
#include <string> |
|
#include <thread> |
|
#include <unordered_set> |
|
#include <vector> |
|
#include "preprocessing.hpp" |
|
|
|
#include <inttypes.h> |
|
#include <cinttypes> |
|
|
|
#if defined(__APPLE__) && defined(__MACH__) |
|
#include <sys/sysctl.h> |
|
#include <sys/types.h> |
|
#endif |
|
|
|
#if !defined(_WIN32) |
|
#include <sys/ioctl.h> |
|
#include <unistd.h> |
|
#endif |
|
|
|
#include "ggml-cpu.h" |
|
#include "ggml.h" |
|
#include "stable-diffusion.h" |
|
|
|
|
|
#include "stb_image_resize.h" |
|
|
|
bool ends_with(const std::string& str, const std::string& ending) { |
|
if (str.length() >= ending.length()) { |
|
return (str.compare(str.length() - ending.length(), ending.length(), ending) == 0); |
|
} else { |
|
return false; |
|
} |
|
} |
|
|
|
bool starts_with(const std::string& str, const std::string& start) { |
|
if (str.find(start) == 0) { |
|
return true; |
|
} |
|
return false; |
|
} |
|
|
|
bool contains(const std::string& str, const std::string& substr) { |
|
if (str.find(substr) != std::string::npos) { |
|
return true; |
|
} |
|
return false; |
|
} |
|
|
|
void replace_all_chars(std::string& str, char target, char replacement) { |
|
for (size_t i = 0; i < str.length(); ++i) { |
|
if (str[i] == target) { |
|
str[i] = replacement; |
|
} |
|
} |
|
} |
|
|
|
#ifdef _WIN32 |
|
#include <windows.h> |
|
|
|
bool file_exists(const std::string& filename) { |
|
DWORD attributes = GetFileAttributesA(filename.c_str()); |
|
return (attributes != INVALID_FILE_ATTRIBUTES && !(attributes & FILE_ATTRIBUTE_DIRECTORY)); |
|
} |
|
|
|
bool is_directory(const std::string& path) { |
|
DWORD attributes = GetFileAttributesA(path.c_str()); |
|
return (attributes != INVALID_FILE_ATTRIBUTES && (attributes & FILE_ATTRIBUTE_DIRECTORY)); |
|
} |
|
|
|
std::string get_full_path(const std::string& dir, const std::string& filename) { |
|
std::string full_path = dir + "\\" + filename; |
|
|
|
WIN32_FIND_DATA find_file_data; |
|
HANDLE hFind = FindFirstFile(full_path.c_str(), &find_file_data); |
|
|
|
if (hFind != INVALID_HANDLE_VALUE) { |
|
FindClose(hFind); |
|
return full_path; |
|
} else { |
|
return ""; |
|
} |
|
} |
|
|
|
std::vector<std::string> get_files_from_dir(const std::string& dir) { |
|
std::vector<std::string> files; |
|
|
|
WIN32_FIND_DATA findFileData; |
|
HANDLE hFind; |
|
|
|
char currentDirectory[MAX_PATH]; |
|
GetCurrentDirectory(MAX_PATH, currentDirectory); |
|
|
|
char directoryPath[MAX_PATH]; |
|
sprintf(directoryPath, "%s\\%s\\*", currentDirectory, dir.c_str()); |
|
|
|
|
|
hFind = FindFirstFile(directoryPath, &findFileData); |
|
|
|
|
|
if (hFind == INVALID_HANDLE_VALUE) { |
|
printf("Unable to find directory.\n"); |
|
return files; |
|
} |
|
|
|
|
|
do { |
|
|
|
if (!(findFileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) { |
|
files.push_back(std::string(currentDirectory) + "\\" + dir + "\\" + std::string(findFileData.cFileName)); |
|
} |
|
} while (FindNextFile(hFind, &findFileData) != 0); |
|
|
|
|
|
FindClose(hFind); |
|
|
|
sort(files.begin(), files.end()); |
|
|
|
return files; |
|
} |
|
|
|
#else |
|
#include <dirent.h> |
|
#include <sys/stat.h> |
|
|
|
bool file_exists(const std::string& filename) { |
|
struct stat buffer; |
|
return (stat(filename.c_str(), &buffer) == 0 && S_ISREG(buffer.st_mode)); |
|
} |
|
|
|
bool is_directory(const std::string& path) { |
|
struct stat buffer; |
|
return (stat(path.c_str(), &buffer) == 0 && S_ISDIR(buffer.st_mode)); |
|
} |
|
|
|
|
|
std::string get_full_path(const std::string& dir, const std::string& filename) { |
|
DIR* dp = opendir(dir.c_str()); |
|
|
|
if (dp != nullptr) { |
|
struct dirent* entry; |
|
|
|
while ((entry = readdir(dp)) != nullptr) { |
|
if (strcasecmp(entry->d_name, filename.c_str()) == 0) { |
|
closedir(dp); |
|
return dir + "/" + entry->d_name; |
|
} |
|
} |
|
|
|
closedir(dp); |
|
} |
|
|
|
return ""; |
|
} |
|
|
|
std::vector<std::string> get_files_from_dir(const std::string& dir) { |
|
std::vector<std::string> files; |
|
|
|
DIR* dp = opendir(dir.c_str()); |
|
|
|
if (dp != nullptr) { |
|
struct dirent* entry; |
|
|
|
while ((entry = readdir(dp)) != nullptr) { |
|
std::string fname = dir + "/" + entry->d_name; |
|
if (!is_directory(fname)) |
|
files.push_back(fname); |
|
} |
|
closedir(dp); |
|
} |
|
|
|
sort(files.begin(), files.end()); |
|
|
|
return files; |
|
} |
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
int32_t sd_get_num_physical_cores() { |
|
#ifdef __linux__ |
|
|
|
std::unordered_set<std::string> siblings; |
|
for (uint32_t cpu = 0; cpu < UINT32_MAX; ++cpu) { |
|
std::ifstream thread_siblings("/sys/devices/system/cpu" + std::to_string(cpu) + "/topology/thread_siblings"); |
|
if (!thread_siblings.is_open()) { |
|
break; |
|
} |
|
std::string line; |
|
if (std::getline(thread_siblings, line)) { |
|
siblings.insert(line); |
|
} |
|
} |
|
if (siblings.size() > 0) { |
|
return static_cast<int32_t>(siblings.size()); |
|
} |
|
#elif defined(__APPLE__) && defined(__MACH__) |
|
int32_t num_physical_cores; |
|
size_t len = sizeof(num_physical_cores); |
|
int result = sysctlbyname("hw.perflevel0.physicalcpu", &num_physical_cores, &len, NULL, 0); |
|
if (result == 0) { |
|
return num_physical_cores; |
|
} |
|
result = sysctlbyname("hw.physicalcpu", &num_physical_cores, &len, NULL, 0); |
|
if (result == 0) { |
|
return num_physical_cores; |
|
} |
|
#elif defined(_WIN32) |
|
|
|
#endif |
|
unsigned int n_threads = std::thread::hardware_concurrency(); |
|
return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4; |
|
} |
|
|
|
static sd_progress_cb_t sd_progress_cb = NULL; |
|
void* sd_progress_cb_data = NULL; |
|
|
|
std::u32string utf8_to_utf32(const std::string& utf8_str) { |
|
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> converter; |
|
return converter.from_bytes(utf8_str); |
|
} |
|
|
|
std::string utf32_to_utf8(const std::u32string& utf32_str) { |
|
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> converter; |
|
return converter.to_bytes(utf32_str); |
|
} |
|
|
|
std::u32string unicode_value_to_utf32(int unicode_value) { |
|
std::u32string utf32_string = {static_cast<char32_t>(unicode_value)}; |
|
return utf32_string; |
|
} |
|
|
|
static std::string sd_basename(const std::string& path) { |
|
size_t pos = path.find_last_of('/'); |
|
if (pos != std::string::npos) { |
|
return path.substr(pos + 1); |
|
} |
|
pos = path.find_last_of('\\'); |
|
if (pos != std::string::npos) { |
|
return path.substr(pos + 1); |
|
} |
|
return path; |
|
} |
|
|
|
std::string path_join(const std::string& p1, const std::string& p2) { |
|
if (p1.empty()) { |
|
return p2; |
|
} |
|
|
|
if (p2.empty()) { |
|
return p1; |
|
} |
|
|
|
if (p1[p1.length() - 1] == '/' || p1[p1.length() - 1] == '\\') { |
|
return p1 + p2; |
|
} |
|
|
|
return p1 + "/" + p2; |
|
} |
|
|
|
std::vector<std::string> splitString(const std::string& str, char delimiter) { |
|
std::vector<std::string> result; |
|
size_t start = 0; |
|
size_t end = str.find(delimiter); |
|
|
|
while (end != std::string::npos) { |
|
result.push_back(str.substr(start, end - start)); |
|
start = end + 1; |
|
end = str.find(delimiter, start); |
|
} |
|
|
|
|
|
result.push_back(str.substr(start)); |
|
|
|
return result; |
|
} |
|
|
|
sd_image_t* preprocess_id_image(sd_image_t* img) { |
|
int shortest_edge = 224; |
|
int size = shortest_edge; |
|
sd_image_t* resized = NULL; |
|
uint32_t w = img->width; |
|
uint32_t h = img->height; |
|
uint32_t c = img->channel; |
|
|
|
|
|
|
|
unsigned char* buf = (unsigned char*)malloc(sizeof(unsigned char) * 3 * size * size); |
|
if (!stbir_resize_uint8(img->data, w, h, 0, |
|
buf, size, size, 0, |
|
c)) { |
|
fprintf(stderr, "%s: resize operation failed \n ", __func__); |
|
return resized; |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
resized = new sd_image_t{(uint32_t)shortest_edge, |
|
(uint32_t)shortest_edge, |
|
3, |
|
buf}; |
|
return resized; |
|
} |
|
|
|
static int sdloglevel = 0; |
|
static bool sdquiet = false; |
|
void pretty_progress(int step, int steps, float time) { |
|
if (sd_progress_cb) { |
|
sd_progress_cb(step, steps, time, sd_progress_cb_data); |
|
return; |
|
} |
|
if (step == 0) { |
|
return; |
|
} |
|
if(sdloglevel<0 || sdquiet) |
|
{ |
|
return; |
|
} |
|
std::string progress = " |"; |
|
int max_progress = 50; |
|
int32_t current = (int32_t)(step * 1.f * max_progress / steps); |
|
for (int i = 0; i < 50; i++) { |
|
if (i > current) { |
|
progress += " "; |
|
} else if (i == current && i != max_progress - 1) { |
|
progress += ">"; |
|
} else { |
|
progress += "="; |
|
} |
|
} |
|
progress += "|"; |
|
printf(time > 1.0f ? "\r%s %i/%i - %.2fs/it" : "\r%s %i/%i - %.2fit/s", |
|
progress.c_str(), step, steps, |
|
time > 1.0f || time == 0 ? time : (1.0f / time)); |
|
fflush(stdout); |
|
if (step == steps) { |
|
printf("\n"); |
|
} |
|
} |
|
|
|
std::string ltrim(const std::string& s) { |
|
auto it = std::find_if(s.begin(), s.end(), [](int ch) { |
|
return !std::isspace(ch); |
|
}); |
|
return std::string(it, s.end()); |
|
} |
|
|
|
std::string rtrim(const std::string& s) { |
|
auto it = std::find_if(s.rbegin(), s.rend(), [](int ch) { |
|
return !std::isspace(ch); |
|
}); |
|
return std::string(s.begin(), it.base()); |
|
} |
|
|
|
std::string trim(const std::string& s) { |
|
return rtrim(ltrim(s)); |
|
} |
|
|
|
static sd_log_cb_t sd_log_cb = NULL; |
|
void* sd_log_cb_data = NULL; |
|
|
|
#define LOG_BUFFER_SIZE 1024 |
|
|
|
void log_message(const char* format, ...) { |
|
if (sdloglevel>0) { |
|
printf("\n"); |
|
va_list args; |
|
va_start(args, format); |
|
vprintf(format, args); |
|
va_end(args); |
|
fflush(stdout); |
|
} |
|
} |
|
void set_sd_log_level(int log) |
|
{ |
|
sdloglevel = log; |
|
} |
|
bool get_sd_log_level() |
|
{ |
|
return sdloglevel; |
|
} |
|
void set_sd_quiet(bool quiet) |
|
{ |
|
sdquiet = quiet; |
|
} |
|
|
|
void log_printf(sd_log_level_t level, const char* file, int line, const char* format, ...) { |
|
va_list args; |
|
va_start(args, format); |
|
|
|
static char log_buffer[LOG_BUFFER_SIZE + 1]; |
|
int written = snprintf(log_buffer, LOG_BUFFER_SIZE, "%s:%-4d - ", sd_basename(file).c_str(), line); |
|
|
|
if (written >= 0 && written < LOG_BUFFER_SIZE) { |
|
vsnprintf(log_buffer + written, LOG_BUFFER_SIZE - written, format, args); |
|
} |
|
strncat(log_buffer, "\n", LOG_BUFFER_SIZE - strlen(log_buffer)); |
|
|
|
if (sd_log_cb) { |
|
sd_log_cb(level, log_buffer, sd_log_cb_data); |
|
} |
|
|
|
va_end(args); |
|
} |
|
|
|
void sd_set_log_callback(sd_log_cb_t cb, void* data) { |
|
sd_log_cb = cb; |
|
sd_log_cb_data = data; |
|
} |
|
void sd_set_progress_callback(sd_progress_cb_t cb, void* data) { |
|
sd_progress_cb = cb; |
|
sd_progress_cb_data = data; |
|
} |
|
const char* sd_get_system_info() { |
|
static char buffer[1024]; |
|
std::stringstream ss; |
|
ss << "System Info: \n"; |
|
ss << " SSE3 = " << ggml_cpu_has_sse3() << std::endl; |
|
ss << " AVX = " << ggml_cpu_has_avx() << std::endl; |
|
ss << " AVX2 = " << ggml_cpu_has_avx2() << std::endl; |
|
ss << " AVX512 = " << ggml_cpu_has_avx512() << std::endl; |
|
ss << " AVX512_VBMI = " << ggml_cpu_has_avx512_vbmi() << std::endl; |
|
ss << " AVX512_VNNI = " << ggml_cpu_has_avx512_vnni() << std::endl; |
|
ss << " FMA = " << ggml_cpu_has_fma() << std::endl; |
|
ss << " NEON = " << ggml_cpu_has_neon() << std::endl; |
|
ss << " ARM_FMA = " << ggml_cpu_has_arm_fma() << std::endl; |
|
ss << " F16C = " << ggml_cpu_has_f16c() << std::endl; |
|
ss << " FP16_VA = " << ggml_cpu_has_fp16_va() << std::endl; |
|
ss << " WASM_SIMD = " << ggml_cpu_has_wasm_simd() << std::endl; |
|
ss << " VSX = " << ggml_cpu_has_vsx() << std::endl; |
|
snprintf(buffer, sizeof(buffer), "%s", ss.str().c_str()); |
|
return buffer; |
|
} |
|
|
|
const char* sd_type_name(enum sd_type_t type) { |
|
return ggml_type_name((ggml_type)type); |
|
} |
|
|
|
sd_image_f32_t sd_image_t_to_sd_image_f32_t(sd_image_t image) { |
|
sd_image_f32_t converted_image; |
|
converted_image.width = image.width; |
|
converted_image.height = image.height; |
|
converted_image.channel = image.channel; |
|
|
|
|
|
converted_image.data = (float*)malloc(image.width * image.height * image.channel * sizeof(float)); |
|
|
|
for (int i = 0; i < image.width * image.height * image.channel; i++) { |
|
|
|
converted_image.data[i] = (float)image.data[i]; |
|
} |
|
|
|
return converted_image; |
|
} |
|
|
|
|
|
float interpolate(float v1, float v2, float v3, float v4, float x_ratio, float y_ratio) { |
|
return v1 * (1 - x_ratio) * (1 - y_ratio) + v2 * x_ratio * (1 - y_ratio) + v3 * (1 - x_ratio) * y_ratio + v4 * x_ratio * y_ratio; |
|
} |
|
|
|
sd_image_f32_t resize_sd_image_f32_t(sd_image_f32_t image, int target_width, int target_height) { |
|
sd_image_f32_t resized_image; |
|
resized_image.width = target_width; |
|
resized_image.height = target_height; |
|
resized_image.channel = image.channel; |
|
|
|
|
|
resized_image.data = (float*)malloc(target_width * target_height * image.channel * sizeof(float)); |
|
|
|
for (int y = 0; y < target_height; y++) { |
|
for (int x = 0; x < target_width; x++) { |
|
float original_x = (float)x * image.width / target_width; |
|
float original_y = (float)y * image.height / target_height; |
|
|
|
int x1 = (int)original_x; |
|
int y1 = (int)original_y; |
|
int x2 = x1 + 1; |
|
int y2 = y1 + 1; |
|
|
|
for (int k = 0; k < image.channel; k++) { |
|
float v1 = *(image.data + y1 * image.width * image.channel + x1 * image.channel + k); |
|
float v2 = *(image.data + y1 * image.width * image.channel + x2 * image.channel + k); |
|
float v3 = *(image.data + y2 * image.width * image.channel + x1 * image.channel + k); |
|
float v4 = *(image.data + y2 * image.width * image.channel + x2 * image.channel + k); |
|
|
|
float x_ratio = original_x - x1; |
|
float y_ratio = original_y - y1; |
|
|
|
float value = interpolate(v1, v2, v3, v4, x_ratio, y_ratio); |
|
|
|
*(resized_image.data + y * target_width * image.channel + x * image.channel + k) = value; |
|
} |
|
} |
|
} |
|
|
|
return resized_image; |
|
} |
|
|
|
void normalize_sd_image_f32_t(sd_image_f32_t image, float means[3], float stds[3]) { |
|
for (int y = 0; y < image.height; y++) { |
|
for (int x = 0; x < image.width; x++) { |
|
for (int k = 0; k < image.channel; k++) { |
|
int index = (y * image.width + x) * image.channel + k; |
|
image.data[index] = (image.data[index] - means[k]) / stds[k]; |
|
} |
|
} |
|
} |
|
} |
|
|
|
|
|
float means[3] = {0.48145466, 0.4578275, 0.40821073}; |
|
float stds[3] = {0.26862954, 0.26130258, 0.27577711}; |
|
|
|
|
|
sd_image_f32_t clip_preprocess(sd_image_f32_t image, int size) { |
|
float scale = (float)size / fmin(image.width, image.height); |
|
|
|
|
|
int new_width = (int)(scale * image.width); |
|
int new_height = (int)(scale * image.height); |
|
float* resized_data = (float*)malloc(new_width * new_height * image.channel * sizeof(float)); |
|
|
|
for (int y = 0; y < new_height; y++) { |
|
for (int x = 0; x < new_width; x++) { |
|
float original_x = (float)x * image.width / new_width; |
|
float original_y = (float)y * image.height / new_height; |
|
|
|
int x1 = (int)original_x; |
|
int y1 = (int)original_y; |
|
int x2 = x1 + 1; |
|
int y2 = y1 + 1; |
|
|
|
for (int k = 0; k < image.channel; k++) { |
|
float v1 = *(image.data + y1 * image.width * image.channel + x1 * image.channel + k); |
|
float v2 = *(image.data + y1 * image.width * image.channel + x2 * image.channel + k); |
|
float v3 = *(image.data + y2 * image.width * image.channel + x1 * image.channel + k); |
|
float v4 = *(image.data + y2 * image.width * image.channel + x2 * image.channel + k); |
|
|
|
float x_ratio = original_x - x1; |
|
float y_ratio = original_y - y1; |
|
|
|
float value = interpolate(v1, v2, v3, v4, x_ratio, y_ratio); |
|
|
|
*(resized_data + y * new_width * image.channel + x * image.channel + k) = value; |
|
} |
|
} |
|
} |
|
|
|
|
|
int h = (new_height - size) / 2; |
|
int w = (new_width - size) / 2; |
|
|
|
sd_image_f32_t result; |
|
result.width = size; |
|
result.height = size; |
|
result.channel = image.channel; |
|
result.data = (float*)malloc(size * size * image.channel * sizeof(float)); |
|
|
|
for (int k = 0; k < image.channel; k++) { |
|
for (int i = 0; i < size; i++) { |
|
for (int j = 0; j < size; j++) { |
|
*(result.data + i * size * image.channel + j * image.channel + k) = |
|
fmin(fmax(*(resized_data + (i + h) * new_width * image.channel + (j + w) * image.channel + k), 0.0f), 255.0f) / 255.0f; |
|
} |
|
} |
|
} |
|
|
|
|
|
free(resized_data); |
|
|
|
|
|
for (int k = 0; k < image.channel; k++) { |
|
for (int i = 0; i < size; i++) { |
|
for (int j = 0; j < size; j++) { |
|
|
|
int offset = i * size * image.channel + j * image.channel + k; |
|
float value = *(result.data + offset); |
|
value = (value - means[k]) / stds[k]; |
|
|
|
*(result.data + offset) = value; |
|
} |
|
} |
|
} |
|
|
|
return result; |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
std::vector<std::pair<std::string, float>> parse_prompt_attention(const std::string& text) { |
|
std::vector<std::pair<std::string, float>> res; |
|
std::vector<int> round_brackets; |
|
std::vector<int> square_brackets; |
|
|
|
float round_bracket_multiplier = 1.1f; |
|
float square_bracket_multiplier = 1 / 1.1f; |
|
|
|
std::regex re_attention(R"(\\\(|\\\)|\\\[|\\\]|\\\\|\\|\(|\[|:([+-]?[.\d]+)\)|\)|\]|[^\\()\[\]:]+|:)"); |
|
std::regex re_break(R"(\s*\bBREAK\b\s*)"); |
|
|
|
auto multiply_range = [&](int start_position, float multiplier) { |
|
for (int p = start_position; p < res.size(); ++p) { |
|
res[p].second *= multiplier; |
|
} |
|
}; |
|
|
|
std::smatch m; |
|
std::string remaining_text = text; |
|
|
|
while (std::regex_search(remaining_text, m, re_attention)) { |
|
std::string text = m[0]; |
|
std::string weight = m[1]; |
|
|
|
if (text == "(") { |
|
round_brackets.push_back((int)res.size()); |
|
} else if (text == "[") { |
|
square_brackets.push_back((int)res.size()); |
|
} else if (!weight.empty()) { |
|
if (!round_brackets.empty()) { |
|
multiply_range(round_brackets.back(), std::stof(weight)); |
|
round_brackets.pop_back(); |
|
} |
|
} else if (text == ")" && !round_brackets.empty()) { |
|
multiply_range(round_brackets.back(), round_bracket_multiplier); |
|
round_brackets.pop_back(); |
|
} else if (text == "]" && !square_brackets.empty()) { |
|
multiply_range(square_brackets.back(), square_bracket_multiplier); |
|
square_brackets.pop_back(); |
|
} else if (text == "\\(") { |
|
res.push_back({text.substr(1), 1.0f}); |
|
} else { |
|
res.push_back({text, 1.0f}); |
|
} |
|
|
|
remaining_text = m.suffix(); |
|
} |
|
|
|
for (int pos : round_brackets) { |
|
multiply_range(pos, round_bracket_multiplier); |
|
} |
|
|
|
for (int pos : square_brackets) { |
|
multiply_range(pos, square_bracket_multiplier); |
|
} |
|
|
|
if (res.empty()) { |
|
res.push_back({"", 1.0f}); |
|
} |
|
|
|
int i = 0; |
|
while (i + 1 < res.size()) { |
|
if (res[i].second == res[i + 1].second) { |
|
res[i].first += res[i + 1].first; |
|
res.erase(res.begin() + i + 1); |
|
} else { |
|
++i; |
|
} |
|
} |
|
|
|
return res; |
|
} |