diff --git a/.gitattributes b/.gitattributes index 7178fd6dccceed56913f8bdabd035546c3daa15e..80567d1326fbd214debacdaa7d39498ba279d360 100644 --- a/.gitattributes +++ b/.gitattributes @@ -81,3 +81,4 @@ llama.cpp/build/bin/test-json-schema-to-grammar filter=lfs diff=lfs merge=lfs -t llama.cpp/build/bin/test-tokenizer-0 filter=lfs diff=lfs merge=lfs -text llama.cpp/build/bin/test-tokenizer-1-bpe filter=lfs diff=lfs merge=lfs -text llama.cpp/build/bin/test-tokenizer-1-spm filter=lfs diff=lfs merge=lfs -text +llama.cpp/models/*.gguf filter=lfs diff=lfs merge=lfs -text diff --git a/llama.cpp/.clang-format b/llama.cpp/.clang-format new file mode 100644 index 0000000000000000000000000000000000000000..45232b80ed8cd1c7ce3892a9d181d38b131240d7 --- /dev/null +++ b/llama.cpp/.clang-format @@ -0,0 +1,161 @@ +--- +Language: Cpp +AlignAfterOpenBracket: Align +AlignArrayOfStructures: Left +AlignConsecutiveAssignments: AcrossComments +AlignConsecutiveBitFields: AcrossComments +AlignConsecutiveDeclarations: AcrossComments +AlignConsecutiveMacros: AcrossComments +# AlignConsecutiveShortCaseStatements: AcrossComments +AlignEscapedNewlines: Left # LeftWithLastLine +AlignOperands: Align +AlignTrailingComments: + Kind: Always + OverEmptyLines: 1 +AllowAllArgumentsOnNextLine: true +AllowAllParametersOfDeclarationOnNextLine: false +# AllowBreakBeforeNoexceptSpecifier: OnlyWithParen +AllowShortBlocksOnASingleLine: Never +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: Inline +AllowShortIfStatementsOnASingleLine: Never +AllowShortLambdasOnASingleLine: Inline +AllowShortLoopsOnASingleLine: false +AlwaysBreakBeforeMultilineStrings: true +BinPackArguments: true +BinPackParameters: true # OnePerLine +BitFieldColonSpacing: Both +BreakBeforeBraces: Custom # Attach +BraceWrapping: + AfterCaseLabel: true + AfterClass: false + AfterControlStatement: false + AfterEnum: false + AfterFunction: false + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + AfterExternBlock: false + BeforeCatch: false + BeforeElse: false + BeforeLambdaBody: false + BeforeWhile: false + IndentBraces: false + SplitEmptyFunction: false + SplitEmptyRecord: false + SplitEmptyNamespace: false +# BreakAdjacentStringLiterals: true +BreakAfterAttributes: Never +BreakBeforeBinaryOperators: None +BreakBeforeInlineASMColon: OnlyMultiline +BreakBeforeTernaryOperators: false +# BreakBinaryOperations: Never +BreakConstructorInitializers: AfterColon +# BreakFunctionDefinitionParameters: false +BreakInheritanceList: AfterComma +BreakStringLiterals: true +# BreakTemplateDeclarations: Yes +ColumnLimit: 120 +CommentPragmas: '^ IWYU pragma:' +CompactNamespaces: false +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: false +DerivePointerAlignment: false +DisableFormat: false +EmptyLineBeforeAccessModifier: Leave +EmptyLineAfterAccessModifier: Never +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: true +IncludeBlocks: Regroup +IncludeCategories: + - Regex: '^<.*\.h>' + Priority: 1 + SortPriority: 0 + - Regex: '^<.*' + Priority: 2 + SortPriority: 0 + - Regex: '.*' + Priority: 3 + SortPriority: 0 +IncludeIsMainRegex: '([-_](test|unittest))?$' +IncludeIsMainSourceRegex: '' +IndentAccessModifiers: false +IndentCaseBlocks: true +IndentCaseLabels: true +IndentExternBlock: NoIndent +IndentGotoLabels: false +IndentPPDirectives: AfterHash +IndentWidth: 4 +IndentWrappedFunctionNames: false +InsertBraces: true # NOTE: may lead to incorrect formatting +InsertNewlineAtEOF: true +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: false +LambdaBodyIndentation: Signature +LineEnding: LF +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBinPackProtocolList: Auto +ObjCBlockIndentWidth: 4 +ObjCSpaceAfterProperty: true +ObjCSpaceBeforeProtocolList: true +PPIndentWidth: -1 +PackConstructorInitializers: CurrentLine +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 1 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyBreakTemplateDeclaration: 10 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 200 +PointerAlignment: Middle +QualifierAlignment: Left +#QualifierOrder: ['static', 'inline', 'friend', 'constexpr', 'const', 'volatile', 'type', 'restrict'] +RawStringFormats: + - Language: Cpp + Delimiters: + - cc + - CC + - cpp + - Cpp + - CPP + - 'c++' + - 'C++' + CanonicalDelimiter: '' +ReferenceAlignment: Middle +ReflowComments: false # IndentOnly +SeparateDefinitionBlocks: Always +SortIncludes: CaseInsensitive +SortUsingDeclarations: LexicographicNumeric +SpaceAfterCStyleCast: true +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: true +SpaceBeforeAssignmentOperators: true +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeParens: ControlStatements +SpaceBeforeRangeBasedForLoopColon: true +SpaceInEmptyBlock: false +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 2 +SpacesInAngles: Never +SpacesInContainerLiterals: true +SpacesInLineCommentPrefix: + Minimum: 1 + Maximum: -1 +SpacesInParentheses: false +SpacesInSquareBrackets: false +SpaceBeforeSquareBrackets: false +Standard: c++17 +TabWidth: 4 +UseTab: Never +WhitespaceSensitiveMacros: ['STRINGIZE'] +... + diff --git a/llama.cpp/.clang-tidy b/llama.cpp/.clang-tidy new file mode 100644 index 0000000000000000000000000000000000000000..310c3d182c8f2ed2c3bc0ddfa9f6fb74f86e39ae --- /dev/null +++ b/llama.cpp/.clang-tidy @@ -0,0 +1,26 @@ +--- +Checks: > + bugprone-*, + -bugprone-easily-swappable-parameters, + -bugprone-implicit-widening-of-multiplication-result, + -bugprone-misplaced-widening-cast, + -bugprone-narrowing-conversions, + readability-*, + -readability-avoid-unconditional-preprocessor-if, + -readability-function-cognitive-complexity, + -readability-identifier-length, + -readability-implicit-bool-conversion, + -readability-magic-numbers, + -readability-uppercase-literal-suffix, + -readability-simplify-boolean-expr, + clang-analyzer-*, + -clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling, + performance-*, + portability-*, + -portability-simd-intrinsics, + misc-*, + -misc-const-correctness, + -misc-non-private-member-variables-in-classes, + -misc-no-recursion, + -misc-use-anonymous-namespace, +FormatStyle: none diff --git a/llama.cpp/.devops/cloud-v-pipeline b/llama.cpp/.devops/cloud-v-pipeline new file mode 100644 index 0000000000000000000000000000000000000000..af8c0cea6155c71c0c162e034715908ea16ee9cf --- /dev/null +++ b/llama.cpp/.devops/cloud-v-pipeline @@ -0,0 +1,22 @@ +node('x86_runner1'){ // Running on x86 runner containing latest vector qemu, latest vector gcc and all the necessary libraries + stage('Cleanup'){ + cleanWs() // Cleaning previous CI build in workspace + } + stage('checkout repo'){ + retry(5){ // Retry if the cloning fails due to some reason + checkout scm // Clone the repo on Runner + } + } + stage('Compiling llama.cpp'){ + sh'''#!/bin/bash + make RISCV=1 RISCV_CROSS_COMPILE=1 # Compiling llama for RISC-V + ''' + } + stage('Running llama.cpp'){ + sh'''#!/bin/bash + module load gnu-bin2/0.1 # loading latest versions of vector qemu and vector gcc + qemu-riscv64 -L /softwares/gnu-bin2/sysroot -cpu rv64,v=true,vlen=256,elen=64,vext_spec=v1.0 ./llama-cli -m /home/alitariq/codellama-7b.Q4_K_M.gguf -p "Anything" -n 9 > llama_log.txt # Running llama.cpp on vector qemu-riscv64 + cat llama_log.txt # Printing results + ''' + } +} diff --git a/llama.cpp/.devops/cpu.Dockerfile b/llama.cpp/.devops/cpu.Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..522ee8147d63a9e595dcd7c4d71d246705739e4a --- /dev/null +++ b/llama.cpp/.devops/cpu.Dockerfile @@ -0,0 +1,92 @@ +ARG UBUNTU_VERSION=22.04 + +FROM ubuntu:$UBUNTU_VERSION AS build + +ARG TARGETARCH + +ARG GGML_CPU_ARM_ARCH=armv8-a + +RUN apt-get update && \ + apt-get install -y build-essential git cmake libcurl4-openssl-dev + +WORKDIR /app + +COPY . . + +RUN if [ "$TARGETARCH" = "amd64" ]; then \ + cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON -DGGML_NATIVE=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON; \ + elif [ "$TARGETARCH" = "arm64" ]; then \ + cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=${GGML_CPU_ARM_ARCH}; \ + else \ + echo "Unsupported architecture"; \ + exit 1; \ + fi && \ + cmake --build build -j $(nproc) + +RUN mkdir -p /app/lib && \ + find build -name "*.so" -exec cp {} /app/lib \; + +RUN mkdir -p /app/full \ + && cp build/bin/* /app/full \ + && cp *.py /app/full \ + && cp -r gguf-py /app/full \ + && cp -r requirements /app/full \ + && cp requirements.txt /app/full \ + && cp .devops/tools.sh /app/full/tools.sh + +## Base image +FROM ubuntu:$UBUNTU_VERSION AS base + +RUN apt-get update \ + && apt-get install -y libgomp1 curl\ + && apt autoremove -y \ + && apt clean -y \ + && rm -rf /tmp/* /var/tmp/* \ + && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ + && find /var/cache -type f -delete + +COPY --from=build /app/lib/ /app + +### Full +FROM base AS full + +COPY --from=build /app/full /app + +WORKDIR /app + +RUN apt-get update \ + && apt-get install -y \ + git \ + python3 \ + python3-pip \ + && pip install --upgrade pip setuptools wheel \ + && pip install -r requirements.txt \ + && apt autoremove -y \ + && apt clean -y \ + && rm -rf /tmp/* /var/tmp/* \ + && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ + && find /var/cache -type f -delete + +ENTRYPOINT ["/app/tools.sh"] + +### Light, CLI only +FROM base AS light + +COPY --from=build /app/full/llama-cli /app + +WORKDIR /app + +ENTRYPOINT [ "/app/llama-cli" ] + +### Server, Server only +FROM base AS server + +ENV LLAMA_ARG_HOST=0.0.0.0 + +COPY --from=build /app/full/llama-server /app + +WORKDIR /app + +HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ] + +ENTRYPOINT [ "/app/llama-server" ] diff --git a/llama.cpp/.devops/cuda.Dockerfile b/llama.cpp/.devops/cuda.Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..974dd78a8b08a779cc2263f0f59ba57a5a4298a8 --- /dev/null +++ b/llama.cpp/.devops/cuda.Dockerfile @@ -0,0 +1,94 @@ +ARG UBUNTU_VERSION=22.04 +# This needs to generally match the container host's environment. +ARG CUDA_VERSION=12.6.0 +# Target the CUDA build image +ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} + +ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} + +FROM ${BASE_CUDA_DEV_CONTAINER} AS build + +# CUDA architecture to build for (defaults to all supported archs) +ARG CUDA_DOCKER_ARCH=default + +RUN apt-get update && \ + apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1 + +WORKDIR /app + +COPY . . + +RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \ + export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \ + fi && \ + cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ + cmake --build build --config Release -j$(nproc) + +RUN mkdir -p /app/lib && \ + find build -name "*.so" -exec cp {} /app/lib \; + +RUN mkdir -p /app/full \ + && cp build/bin/* /app/full \ + && cp *.py /app/full \ + && cp -r gguf-py /app/full \ + && cp -r requirements /app/full \ + && cp requirements.txt /app/full \ + && cp .devops/tools.sh /app/full/tools.sh + +## Base image +FROM ${BASE_CUDA_RUN_CONTAINER} AS base + +RUN apt-get update \ + && apt-get install -y libgomp1 curl\ + && apt autoremove -y \ + && apt clean -y \ + && rm -rf /tmp/* /var/tmp/* \ + && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ + && find /var/cache -type f -delete + +COPY --from=build /app/lib/ /app + +### Full +FROM base AS full + +COPY --from=build /app/full /app + +WORKDIR /app + +RUN apt-get update \ + && apt-get install -y \ + git \ + python3 \ + python3-pip \ + && pip install --upgrade pip setuptools wheel \ + && pip install -r requirements.txt \ + && apt autoremove -y \ + && apt clean -y \ + && rm -rf /tmp/* /var/tmp/* \ + && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ + && find /var/cache -type f -delete + + +ENTRYPOINT ["/app/tools.sh"] + +### Light, CLI only +FROM base AS light + +COPY --from=build /app/full/llama-cli /app + +WORKDIR /app + +ENTRYPOINT [ "/app/llama-cli" ] + +### Server, Server only +FROM base AS server + +ENV LLAMA_ARG_HOST=0.0.0.0 + +COPY --from=build /app/full/llama-server /app + +WORKDIR /app + +HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ] + +ENTRYPOINT [ "/app/llama-server" ] diff --git a/llama.cpp/.devops/intel.Dockerfile b/llama.cpp/.devops/intel.Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..af783f5e998ebac28965ed81798709d4f65ec3f1 --- /dev/null +++ b/llama.cpp/.devops/intel.Dockerfile @@ -0,0 +1,91 @@ +ARG ONEAPI_VERSION=2025.0.0-0-devel-ubuntu22.04 + +## Build Image + +FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build + +ARG GGML_SYCL_F16=OFF +RUN apt-get update && \ + apt-get install -y git libcurl4-openssl-dev + +WORKDIR /app + +COPY . . + +RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \ + echo "GGML_SYCL_F16 is set" \ + && export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \ + fi && \ + echo "Building with dynamic libs" && \ + cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \ + cmake --build build --config Release -j$(nproc) + +RUN mkdir -p /app/lib && \ + find build -name "*.so" -exec cp {} /app/lib \; + +RUN mkdir -p /app/full \ + && cp build/bin/* /app/full \ + && cp *.py /app/full \ + && cp -r gguf-py /app/full \ + && cp -r requirements /app/full \ + && cp requirements.txt /app/full \ + && cp .devops/tools.sh /app/full/tools.sh + +FROM intel/oneapi-basekit:$ONEAPI_VERSION AS base + +RUN apt-get update \ + && apt-get install -y libgomp1 curl\ + && apt autoremove -y \ + && apt clean -y \ + && rm -rf /tmp/* /var/tmp/* \ + && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ + && find /var/cache -type f -delete + +### Full +FROM base AS full + +COPY --from=build /app/lib/ /app +COPY --from=build /app/full /app + +WORKDIR /app + +RUN apt-get update \ + && apt-get install -y \ + git \ + python3 \ + python3-pip \ + && pip install --upgrade pip setuptools wheel \ + && pip install -r requirements.txt \ + && apt autoremove -y \ + && apt clean -y \ + && rm -rf /tmp/* /var/tmp/* \ + && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ + && find /var/cache -type f -delete + + +ENTRYPOINT ["/app/tools.sh"] + +### Light, CLI only +FROM base AS light + +COPY --from=build /app/lib/ /app +COPY --from=build /app/full/llama-cli /app + +WORKDIR /app + +ENTRYPOINT [ "/app/llama-cli" ] + +### Server, Server only +FROM base AS server + +ENV LLAMA_ARG_HOST=0.0.0.0 + +COPY --from=build /app/lib/ /app +COPY --from=build /app/full/llama-server /app + +WORKDIR /app + +HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ] + +ENTRYPOINT [ "/app/llama-server" ] + diff --git a/llama.cpp/.devops/llama-cli-cann.Dockerfile b/llama.cpp/.devops/llama-cli-cann.Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..02dce501ce28629776e631f3f2e56bbf66df12a7 --- /dev/null +++ b/llama.cpp/.devops/llama-cli-cann.Dockerfile @@ -0,0 +1,44 @@ +ARG ASCEND_VERSION=8.0.rc2.alpha003-910b-openeuler22.03-py3.8 + +FROM ascendai/cann:$ASCEND_VERSION AS build + +WORKDIR /app + +COPY . . + +RUN yum install -y gcc g++ cmake make +ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest +ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH +ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH} +ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH} +ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH} +ENV ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME} +ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp +ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit +ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME} + +# find libascend_hal.so, because the drive hasn`t been mounted. +ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH + +RUN echo "Building with static libs" && \ + source /usr/local/Ascend/ascend-toolkit/set_env.sh --force && \ + cmake -B build -DGGML_NATIVE=OFF -DGGML_CANN=ON -DBUILD_SHARED_LIBS=OFF && \ + cmake --build build --config Release --target llama-cli + +# TODO: use image with NNRT +FROM ascendai/cann:$ASCEND_VERSION AS runtime +COPY --from=build /app/build/bin/llama-cli /llama-cli + +ENV LC_ALL=C.utf8 + +ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest +ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH +ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH} +ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH} +ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH} +ENV ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME} +ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp +ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit +ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME} + +ENTRYPOINT ["/llama-cli" ] diff --git a/llama.cpp/.devops/llama-cpp-cuda.srpm.spec b/llama.cpp/.devops/llama-cpp-cuda.srpm.spec new file mode 100644 index 0000000000000000000000000000000000000000..7425d3a9d7a4027fcff13077ffec3caf6e66d02f --- /dev/null +++ b/llama.cpp/.devops/llama-cpp-cuda.srpm.spec @@ -0,0 +1,83 @@ +# SRPM for building from source and packaging an RPM for RPM-based distros. +# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages +# Built and maintained by John Boero - boeroboy@gmail.com +# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal + +# Notes for llama.cpp: +# 1. Tags are currently based on hash - which will not sort asciibetically. +# We need to declare standard versioning if people want to sort latest releases. +# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies. +# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed. +# Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo +# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries. +# It is up to the user to install the correct vendor-specific support. + +Name: llama.cpp-cuda +Version: %( date "+%%Y%%m%%d" ) +Release: 1%{?dist} +Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL) +License: MIT +Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz +BuildRequires: coreutils make gcc-c++ git cuda-toolkit +Requires: cuda-toolkit +URL: https://github.com/ggerganov/llama.cpp + +%define debug_package %{nil} +%define source_date_epoch_from_changelog 0 + +%description +CPU inference for Meta's Lllama2 models using default options. + +%prep +%setup -n llama.cpp-master + +%build +make -j GGML_CUDA=1 + +%install +mkdir -p %{buildroot}%{_bindir}/ +cp -p llama-cli %{buildroot}%{_bindir}/llama-cuda-cli +cp -p llama-server %{buildroot}%{_bindir}/llama-cuda-server +cp -p llama-simple %{buildroot}%{_bindir}/llama-cuda-simple + +mkdir -p %{buildroot}/usr/lib/systemd/system +%{__cat} < %{buildroot}/usr/lib/systemd/system/llamacuda.service +[Unit] +Description=Llama.cpp server, CPU only (no GPU support in this build). +After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target + +[Service] +Type=simple +EnvironmentFile=/etc/sysconfig/llama +ExecStart=/usr/bin/llama-cuda-server $LLAMA_ARGS +ExecReload=/bin/kill -s HUP $MAINPID +Restart=never + +[Install] +WantedBy=default.target +EOF + +mkdir -p %{buildroot}/etc/sysconfig +%{__cat} < %{buildroot}/etc/sysconfig/llama +LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin" +EOF + +%clean +rm -rf %{buildroot} +rm -rf %{_builddir}/* + +%files +%{_bindir}/llama-cuda-cli +%{_bindir}/llama-cuda-server +%{_bindir}/llama-cuda-simple +/usr/lib/systemd/system/llamacuda.service +%config /etc/sysconfig/llama + +%pre + +%post + +%preun +%postun + +%changelog diff --git a/llama.cpp/.devops/llama-cpp.srpm.spec b/llama.cpp/.devops/llama-cpp.srpm.spec new file mode 100644 index 0000000000000000000000000000000000000000..4d5560089816c6679bd02b673dc57a2206ebfbe7 --- /dev/null +++ b/llama.cpp/.devops/llama-cpp.srpm.spec @@ -0,0 +1,85 @@ +# SRPM for building from source and packaging an RPM for RPM-based distros. +# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages +# Built and maintained by John Boero - boeroboy@gmail.com +# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal + +# Notes for llama.cpp: +# 1. Tags are currently based on hash - which will not sort asciibetically. +# We need to declare standard versioning if people want to sort latest releases. +# In the meantime, YYYYMMDD format will be used. +# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies. +# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed. +# Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo +# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries. +# It is up to the user to install the correct vendor-specific support. + +Name: llama.cpp +Version: %( date "+%%Y%%m%%d" ) +Release: 1%{?dist} +Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL) +License: MIT +Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz +BuildRequires: coreutils make gcc-c++ git libstdc++-devel +Requires: libstdc++ +URL: https://github.com/ggerganov/llama.cpp + +%define debug_package %{nil} +%define source_date_epoch_from_changelog 0 + +%description +CPU inference for Meta's Lllama2 models using default options. +Models are not included in this package and must be downloaded separately. + +%prep +%setup -n llama.cpp-master + +%build +make -j + +%install +mkdir -p %{buildroot}%{_bindir}/ +cp -p llama-cli %{buildroot}%{_bindir}/llama-cli +cp -p llama-server %{buildroot}%{_bindir}/llama-server +cp -p llama-simple %{buildroot}%{_bindir}/llama-simple + +mkdir -p %{buildroot}/usr/lib/systemd/system +%{__cat} < %{buildroot}/usr/lib/systemd/system/llama.service +[Unit] +Description=Llama.cpp server, CPU only (no GPU support in this build). +After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target + +[Service] +Type=simple +EnvironmentFile=/etc/sysconfig/llama +ExecStart=/usr/bin/llama-server $LLAMA_ARGS +ExecReload=/bin/kill -s HUP $MAINPID +Restart=never + +[Install] +WantedBy=default.target +EOF + +mkdir -p %{buildroot}/etc/sysconfig +%{__cat} < %{buildroot}/etc/sysconfig/llama +LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin" +EOF + +%clean +rm -rf %{buildroot} +rm -rf %{_builddir}/* + +%files +%{_bindir}/llama-cli +%{_bindir}/llama-server +%{_bindir}/llama-simple +/usr/lib/systemd/system/llama.service +%config /etc/sysconfig/llama + +%pre + +%post + +%preun +%postun + +%changelog diff --git a/llama.cpp/.devops/musa.Dockerfile b/llama.cpp/.devops/musa.Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..bfd7fc1c1740fd5898a8576b5b7e13b6a79d68a0 --- /dev/null +++ b/llama.cpp/.devops/musa.Dockerfile @@ -0,0 +1,108 @@ +ARG UBUNTU_VERSION=22.04 +# This needs to generally match the container host's environment. +ARG MUSA_VERSION=rc3.1.0 +# Target the MUSA build image +ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION} + +ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} + +FROM ${BASE_MUSA_DEV_CONTAINER} AS build + +# MUSA architecture to build for (defaults to all supported archs) +ARG MUSA_DOCKER_ARCH=default + +RUN apt-get update && \ + apt-get install -y \ + build-essential \ + cmake \ + python3 \ + python3-pip \ + git \ + libcurl4-openssl-dev \ + libgomp1 + +COPY requirements.txt requirements.txt +COPY requirements requirements + +RUN pip install --upgrade pip setuptools wheel \ + && pip install -r requirements.txt + +WORKDIR /app + +COPY . . + +# Use the default MUSA archs if not specified +RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \ + export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \ + fi && \ + cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ + cmake --build build --config Release -j$(nproc) + +RUN mkdir -p /app/lib && \ + find build -name "*.so" -exec cp {} /app/lib \; + +RUN mkdir -p /app/full \ + && cp build/bin/* /app/full \ + && cp *.py /app/full \ + && cp -r gguf-py /app/full \ + && cp -r requirements /app/full \ + && cp requirements.txt /app/full \ + && cp .devops/tools.sh /app/full/tools.sh + +## Base image +FROM ${BASE_MUSA_RUN_CONTAINER} AS base + +RUN apt-get update \ + && apt-get install -y libgomp1 curl\ + && apt autoremove -y \ + && apt clean -y \ + && rm -rf /tmp/* /var/tmp/* \ + && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ + && find /var/cache -type f -delete + +COPY --from=build /app/lib/ /app + +### Full +FROM base AS full + +COPY --from=build /app/full /app + +WORKDIR /app + +RUN apt-get update \ + && apt-get install -y \ + git \ + python3 \ + python3-pip \ + && pip install --upgrade pip setuptools wheel \ + && pip install -r requirements.txt \ + && apt autoremove -y \ + && apt clean -y \ + && rm -rf /tmp/* /var/tmp/* \ + && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ + && find /var/cache -type f -delete + + +ENTRYPOINT ["/app/tools.sh"] + +### Light, CLI only +FROM base AS light + +COPY --from=build /app/full/llama-cli /app + +WORKDIR /app + +ENTRYPOINT [ "/app/llama-cli" ] + +### Server, Server only +FROM base AS server + +ENV LLAMA_ARG_HOST=0.0.0.0 + +COPY --from=build /app/full/llama-server /app + +WORKDIR /app + +HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ] + +ENTRYPOINT [ "/app/llama-server" ] diff --git a/llama.cpp/.devops/nix/apps.nix b/llama.cpp/.devops/nix/apps.nix new file mode 100644 index 0000000000000000000000000000000000000000..0ecf19fc56d554c69aeef8a03b253fc15338688e --- /dev/null +++ b/llama.cpp/.devops/nix/apps.nix @@ -0,0 +1,21 @@ +{ + perSystem = + { config, lib, ... }: + { + apps = + let + inherit (config.packages) default; + binaries = [ + "llama-cli" + "llama-embedding" + "llama-server" + "llama-quantize" + ]; + mkApp = name: { + type = "app"; + program = "${default}/bin/${name}"; + }; + in + lib.genAttrs binaries mkApp; + }; +} diff --git a/llama.cpp/.devops/nix/devshells.nix b/llama.cpp/.devops/nix/devshells.nix new file mode 100644 index 0000000000000000000000000000000000000000..bfd304af14dcda59e284d30b9fc7491466bf71d1 --- /dev/null +++ b/llama.cpp/.devops/nix/devshells.nix @@ -0,0 +1,52 @@ +{ inputs, ... }: + +{ + perSystem = + { + config, + lib, + system, + ... + }: + { + devShells = + let + pkgs = import inputs.nixpkgs { inherit system; }; + stdenv = pkgs.stdenv; + scripts = config.packages.python-scripts; + in + lib.pipe (config.packages) [ + (lib.concatMapAttrs ( + name: package: { + ${name} = pkgs.mkShell { + name = "${name}"; + inputsFrom = [ package ]; + shellHook = '' + echo "Entering ${name} devShell" + ''; + }; + "${name}-extra" = + if (name == "python-scripts") then + null + else + pkgs.mkShell { + name = "${name}-extra"; + inputsFrom = [ + package + scripts + ]; + # Extra packages that *may* be used by some scripts + packages = [ + pkgs.python3Packages.tiktoken + ]; + shellHook = '' + echo "Entering ${name} devShell" + addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib stdenv.cc.cc}/lib" + ''; + }; + } + )) + (lib.filterAttrs (name: value: value != null)) + ]; + }; +} diff --git a/llama.cpp/.devops/nix/docker.nix b/llama.cpp/.devops/nix/docker.nix new file mode 100644 index 0000000000000000000000000000000000000000..d607b4575772c5330e962649ff6e14e5562ecfad --- /dev/null +++ b/llama.cpp/.devops/nix/docker.nix @@ -0,0 +1,37 @@ +{ + lib, + dockerTools, + buildEnv, + llama-cpp, + interactive ? true, + coreutils, +}: + +# A tar that can be fed into `docker load`: +# +# $ nix build .#llamaPackages.docker +# $ docker load < result + +# For details and variations cf. +# - https://nixos.org/manual/nixpkgs/unstable/#ssec-pkgs-dockerTools-buildLayeredImage +# - https://discourse.nixos.org/t/a-faster-dockertools-buildimage-prototype/16922 +# - https://nixery.dev/ + +# Approximate (compressed) sizes, at the time of writing, are: +# +# .#llamaPackages.docker: 125M; +# .#llamaPackagesCuda.docker: 537M; +# .#legacyPackages.aarch64-linux.llamaPackagesXavier.docker: 415M. + +dockerTools.buildLayeredImage { + name = llama-cpp.pname; + tag = "latest"; + + contents = + [ llama-cpp ] + ++ lib.optionals interactive [ + coreutils + dockerTools.binSh + dockerTools.caCertificates + ]; +} diff --git a/llama.cpp/.devops/nix/jetson-support.nix b/llama.cpp/.devops/nix/jetson-support.nix new file mode 100644 index 0000000000000000000000000000000000000000..78e2e40e03864e3df046389f7b751a1fd4575656 --- /dev/null +++ b/llama.cpp/.devops/nix/jetson-support.nix @@ -0,0 +1,39 @@ +{ inputs, ... }: +{ + perSystem = + { + config, + system, + lib, + pkgsCuda, + ... + }: + { + legacyPackages = + let + caps.llamaPackagesXavier = "7.2"; + caps.llamaPackagesOrin = "8.7"; + caps.llamaPackagesTX2 = "6.2"; + caps.llamaPackagesNano = "5.3"; + + pkgsFor = + cap: + import inputs.nixpkgs { + inherit system; + config = { + cudaSupport = true; + cudaCapabilities = [ cap ]; + cudaEnableForwardCompat = false; + inherit (pkgsCuda.config) allowUnfreePredicate; + }; + }; + in + builtins.mapAttrs (name: cap: (pkgsFor cap).callPackage ./scope.nix { }) caps; + + packages = lib.optionalAttrs (system == "aarch64-linux") { + jetson-xavier = config.legacyPackages.llamaPackagesXavier.llama-cpp; + jetson-orin = config.legacyPackages.llamaPackagesOrin.llama-cpp; + jetson-nano = config.legacyPackages.llamaPackagesNano.llama-cpp; + }; + }; +} diff --git a/llama.cpp/.devops/nix/nixpkgs-instances.nix b/llama.cpp/.devops/nix/nixpkgs-instances.nix new file mode 100644 index 0000000000000000000000000000000000000000..90d683a713aa1f6f7b7e263549afe5edef0a4033 --- /dev/null +++ b/llama.cpp/.devops/nix/nixpkgs-instances.nix @@ -0,0 +1,45 @@ +{ inputs, ... }: +{ + # The _module.args definitions are passed on to modules as arguments. E.g. + # the module `{ pkgs ... }: { /* config */ }` implicitly uses + # `_module.args.pkgs` (defined in this case by flake-parts). + perSystem = + { system, ... }: + { + _module.args = { + # Note: bringing up https://zimbatm.com/notes/1000-instances-of-nixpkgs + # again, the below creates several nixpkgs instances which the + # flake-centric CLI will be forced to evaluate e.g. on `nix flake show`. + # + # This is currently "slow" and "expensive", on a certain scale. + # This also isn't "right" in that this hinders dependency injection at + # the level of flake inputs. This might get removed in the foreseeable + # future. + # + # Note that you can use these expressions without Nix + # (`pkgs.callPackage ./devops/nix/scope.nix { }` is the entry point). + + pkgsCuda = import inputs.nixpkgs { + inherit system; + # Ensure dependencies use CUDA consistently (e.g. that openmpi, ucc, + # and ucx are built with CUDA support) + config.cudaSupport = true; + config.allowUnfreePredicate = + p: + builtins.all ( + license: + license.free + || builtins.elem license.shortName [ + "CUDA EULA" + "cuDNN EULA" + ] + ) (p.meta.licenses or [ p.meta.license ]); + }; + # Ensure dependencies use ROCm consistently + pkgsRocm = import inputs.nixpkgs { + inherit system; + config.rocmSupport = true; + }; + }; + }; +} diff --git a/llama.cpp/.devops/nix/package-gguf-py.nix b/llama.cpp/.devops/nix/package-gguf-py.nix new file mode 100644 index 0000000000000000000000000000000000000000..cca2f36a5bd4df214b3b7da84aa7a05d242fce0e --- /dev/null +++ b/llama.cpp/.devops/nix/package-gguf-py.nix @@ -0,0 +1,36 @@ +{ + lib, + llamaVersion, + numpy, + tqdm, + sentencepiece, + pyyaml, + poetry-core, + buildPythonPackage, + pytestCheckHook, +}: + +buildPythonPackage { + pname = "gguf"; + version = llamaVersion; + pyproject = true; + nativeBuildInputs = [ poetry-core ]; + propagatedBuildInputs = [ + numpy + tqdm + sentencepiece + pyyaml + ]; + src = lib.cleanSource ../../gguf-py; + pythonImportsCheck = [ + "numpy" + "gguf" + ]; + nativeCheckInputs = [ pytestCheckHook ]; + doCheck = true; + meta = with lib; { + description = "Python package for writing binary files in the GGUF format"; + license = licenses.mit; + maintainers = [ maintainers.ditsuke ]; + }; +} diff --git a/llama.cpp/.devops/nix/package.nix b/llama.cpp/.devops/nix/package.nix new file mode 100644 index 0000000000000000000000000000000000000000..043c4364b956ac30127e089216be545a499a687c --- /dev/null +++ b/llama.cpp/.devops/nix/package.nix @@ -0,0 +1,247 @@ +{ + lib, + glibc, + config, + stdenv, + runCommand, + cmake, + ninja, + pkg-config, + git, + mpi, + blas, + cudaPackages, + autoAddDriverRunpath, + darwin, + rocmPackages, + vulkan-headers, + vulkan-loader, + curl, + shaderc, + useBlas ? + builtins.all (x: !x) [ + useCuda + useMetalKit + useRocm + useVulkan + ] + && blas.meta.available, + useCuda ? config.cudaSupport, + useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin, + # Increases the runtime closure size by ~700M + useMpi ? false, + useRocm ? config.rocmSupport, + rocmGpuTargets ? builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets, + enableCurl ? true, + useVulkan ? false, + llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake + + # It's necessary to consistently use backendStdenv when building with CUDA support, + # otherwise we get libstdc++ errors downstream. + effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv, + enableStatic ? effectiveStdenv.hostPlatform.isStatic, + precompileMetalShaders ? false, +}: + +let + inherit (lib) + cmakeBool + cmakeFeature + optionals + strings + ; + + stdenv = throw "Use effectiveStdenv instead"; + + suffices = + lib.optionals useBlas [ "BLAS" ] + ++ lib.optionals useCuda [ "CUDA" ] + ++ lib.optionals useMetalKit [ "MetalKit" ] + ++ lib.optionals useMpi [ "MPI" ] + ++ lib.optionals useRocm [ "ROCm" ] + ++ lib.optionals useVulkan [ "Vulkan" ]; + + pnameSuffix = + strings.optionalString (suffices != [ ]) + "-${strings.concatMapStringsSep "-" strings.toLower suffices}"; + descriptionSuffix = strings.optionalString ( + suffices != [ ] + ) ", accelerated with ${strings.concatStringsSep ", " suffices}"; + + xcrunHost = runCommand "xcrunHost" { } '' + mkdir -p $out/bin + ln -s /usr/bin/xcrun $out/bin + ''; + + # apple_sdk is supposed to choose sane defaults, no need to handle isAarch64 + # separately + darwinBuildInputs = + with darwin.apple_sdk.frameworks; + [ + Accelerate + CoreVideo + CoreGraphics + ] + ++ optionals useMetalKit [ MetalKit ]; + + cudaBuildInputs = with cudaPackages; [ + cuda_cudart + cuda_cccl # + libcublas + ]; + + rocmBuildInputs = with rocmPackages; [ + clr + hipblas + rocblas + ]; + + vulkanBuildInputs = [ + vulkan-headers + vulkan-loader + shaderc + ]; +in + +effectiveStdenv.mkDerivation (finalAttrs: { + pname = "llama-cpp${pnameSuffix}"; + version = llamaVersion; + + # Note: none of the files discarded here are visible in the sandbox or + # affect the output hash. This also means they can be modified without + # triggering a rebuild. + src = lib.cleanSourceWith { + filter = + name: type: + let + noneOf = builtins.all (x: !x); + baseName = baseNameOf name; + in + noneOf [ + (lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths + (lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths + (lib.hasPrefix "." baseName) # Skip hidden files and directories + (baseName == "flake.lock") + ]; + src = lib.cleanSource ../../.; + }; + + postPatch = '' + substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \ + --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";" + substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \ + --replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";" + ''; + + # With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015, + # `default.metallib` may be compiled with Metal compiler from XCode + # and we need to escape sandbox on MacOS to access Metal compiler. + # `xcrun` is used find the path of the Metal compiler, which is varible + # and not on $PATH + # see https://github.com/ggerganov/llama.cpp/pull/6118 for discussion + __noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders; + + nativeBuildInputs = + [ + cmake + ninja + pkg-config + git + ] + ++ optionals useCuda [ + cudaPackages.cuda_nvcc + + autoAddDriverRunpath + ] + ++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [ glibc.static ] + ++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [ xcrunHost ]; + + buildInputs = + optionals effectiveStdenv.isDarwin darwinBuildInputs + ++ optionals useCuda cudaBuildInputs + ++ optionals useMpi [ mpi ] + ++ optionals useRocm rocmBuildInputs + ++ optionals useBlas [ blas ] + ++ optionals useVulkan vulkanBuildInputs + ++ optionals enableCurl [ curl ]; + + cmakeFlags = + [ + (cmakeBool "LLAMA_BUILD_SERVER" true) + (cmakeBool "BUILD_SHARED_LIBS" (!enableStatic)) + (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true) + (cmakeBool "LLAMA_CURL" enableCurl) + (cmakeBool "GGML_NATIVE" false) + (cmakeBool "GGML_BLAS" useBlas) + (cmakeBool "GGML_CUDA" useCuda) + (cmakeBool "GGML_HIP" useRocm) + (cmakeBool "GGML_METAL" useMetalKit) + (cmakeBool "GGML_VULKAN" useVulkan) + (cmakeBool "GGML_STATIC" enableStatic) + ] + ++ optionals useCuda [ + ( + with cudaPackages.flags; + cmakeFeature "CMAKE_CUDA_ARCHITECTURES" ( + builtins.concatStringsSep ";" (map dropDot cudaCapabilities) + ) + ) + ] + ++ optionals useRocm [ + (cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang") + (cmakeFeature "CMAKE_HIP_ARCHITECTURES" rocmGpuTargets) + ] + ++ optionals useMetalKit [ + (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") + (cmakeBool "GGML_METAL_EMBED_LIBRARY" (!precompileMetalShaders)) + ]; + + # Environment variables needed for ROCm + env = optionals useRocm { + ROCM_PATH = "${rocmPackages.clr}"; + HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode"; + }; + + # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level, + # if they haven't been added yet. + postInstall = '' + mkdir -p $out/include + cp $src/include/llama.h $out/include/ + ''; + + meta = { + # Configurations we don't want even the CI to evaluate. Results in the + # "unsupported platform" messages. This is mostly a no-op, because + # cudaPackages would've refused to evaluate anyway. + badPlatforms = optionals useCuda lib.platforms.darwin; + + # Configurations that are known to result in build failures. Can be + # overridden by importing Nixpkgs with `allowBroken = true`. + broken = (useMetalKit && !effectiveStdenv.isDarwin); + + description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}"; + homepage = "https://github.com/ggerganov/llama.cpp/"; + license = lib.licenses.mit; + + # Accommodates `nix run` and `lib.getExe` + mainProgram = "llama-cli"; + + # These people might respond, on the best effort basis, if you ping them + # in case of Nix-specific regressions or for reviewing Nix-specific PRs. + # Consider adding yourself to this list if you want to ensure this flake + # stays maintained and you're willing to invest your time. Do not add + # other people without their consent. Consider removing people after + # they've been unreachable for long periods of time. + + # Note that lib.maintainers is defined in Nixpkgs, but you may just add + # an attrset following the same format as in + # https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix + maintainers = with lib.maintainers; [ + philiptaron + SomeoneSerge + ]; + + # Extend `badPlatforms` instead + platforms = lib.platforms.all; + }; +}) diff --git a/llama.cpp/.devops/nix/python-scripts.nix b/llama.cpp/.devops/nix/python-scripts.nix new file mode 100644 index 0000000000000000000000000000000000000000..56ea1827887646424fa08017fe91c4bdcc82c465 --- /dev/null +++ b/llama.cpp/.devops/nix/python-scripts.nix @@ -0,0 +1,66 @@ +{ + lib, + stdenv, + buildPythonPackage, + poetry-core, + mkShell, + python3Packages, + gguf-py, +}@inputs: + +let + llama-python-deps = with python3Packages; [ + numpy + sentencepiece + transformers + protobuf + torchWithoutCuda + gguf-py + tqdm + + # for scripts/compare-llama-bench.py + gitpython + tabulate + + # for examples/pydantic-models-to-grammar-examples.py + docstring-parser + pydantic + + ]; + + llama-python-test-deps = with python3Packages; [ + # Server bench + matplotlib + + # server tests + openai + pytest + prometheus-client + ]; +in + +buildPythonPackage ({ + pname = "llama-scripts"; + version = "0.0.0"; + pyproject = true; + + # NOTE: The files filtered out here are not visible in the build sandbox, neither + # do they affect the output hash. They can be modified without triggering a rebuild. + src = lib.cleanSourceWith { + filter = + name: type: + let + any = builtins.any (x: x); + baseName = builtins.baseNameOf name; + in + any [ + (lib.hasSuffix ".py" name) + (baseName == "README.md") + (baseName == "pyproject.toml") + ]; + src = lib.cleanSource ../../.; + }; + nativeBuildInputs = [ poetry-core ]; + nativeCheckInputs = llama-python-test-deps; + dependencies = llama-python-deps; +}) diff --git a/llama.cpp/.devops/nix/scope.nix b/llama.cpp/.devops/nix/scope.nix new file mode 100644 index 0000000000000000000000000000000000000000..478e8c4228afad449c3bd396d7ed4a12759a5de3 --- /dev/null +++ b/llama.cpp/.devops/nix/scope.nix @@ -0,0 +1,41 @@ +{ + lib, + newScope, + python3, + llamaVersion ? "0.0.0", +}: + +let + pythonPackages = python3.pkgs; + buildPythonPackage = pythonPackages.buildPythonPackage; + numpy = pythonPackages.numpy; + tqdm = pythonPackages.tqdm; + sentencepiece = pythonPackages.sentencepiece; + pyyaml = pythonPackages.pyyaml; + poetry-core = pythonPackages.poetry-core; + pytestCheckHook = pythonPackages.pytestCheckHook; +in + +# We're using `makeScope` instead of just writing out an attrset +# because it allows users to apply overlays later using `overrideScope'`. +# Cf. https://noogle.dev/f/lib/makeScope + +lib.makeScope newScope (self: { + inherit llamaVersion; + gguf-py = self.callPackage ./package-gguf-py.nix { + inherit + buildPythonPackage + numpy + tqdm + sentencepiece + poetry-core + pyyaml + pytestCheckHook + ; + }; + python-scripts = self.callPackage ./python-scripts.nix { inherit buildPythonPackage poetry-core; }; + llama-cpp = self.callPackage ./package.nix { }; + docker = self.callPackage ./docker.nix { }; + docker-min = self.callPackage ./docker.nix { interactive = false; }; + sif = self.callPackage ./sif.nix { }; +}) diff --git a/llama.cpp/.devops/nix/sif.nix b/llama.cpp/.devops/nix/sif.nix new file mode 100644 index 0000000000000000000000000000000000000000..7a5e1dd0ffc4c61e9b88b25d14a10afbd4f8cda9 --- /dev/null +++ b/llama.cpp/.devops/nix/sif.nix @@ -0,0 +1,27 @@ +{ + lib, + singularity-tools, + llama-cpp, + bashInteractive, + interactive ? false, +}: + +let + optionalInt = cond: x: if cond then x else 0; +in +singularity-tools.buildImage rec { + inherit (llama-cpp) name; + contents = [ llama-cpp ] ++ lib.optionals interactive [ bashInteractive ]; + + # These are excessive (but safe) for most variants. Building singularity + # images requires superuser privileges, so we build them inside a VM in a + # writable image of pre-determined size. + # + # ROCm is currently affected by https://github.com/NixOS/nixpkgs/issues/276846 + # + # Expected image sizes: + # - cpu/blas: 150M, + # - cuda, all gencodes: 560M, + diskSize = 4096 + optionalInt llama-cpp.useRocm 16384; + memSize = diskSize; +} diff --git a/llama.cpp/.devops/rocm.Dockerfile b/llama.cpp/.devops/rocm.Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..a8088ea00da5bb946f7a8a2a4830907290bdf7ff --- /dev/null +++ b/llama.cpp/.devops/rocm.Dockerfile @@ -0,0 +1,113 @@ +ARG UBUNTU_VERSION=24.04 + +# This needs to generally match the container host's environment. +ARG ROCM_VERSION=6.3 +ARG AMDGPU_VERSION=6.3 + +# Target the CUDA build image +ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete + +### Build image +FROM ${BASE_ROCM_DEV_CONTAINER} AS build + +# Unless otherwise specified, we make a fat build. +# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878 +# This is mostly tied to rocBLAS supported archs. +# gfx803, gfx900, gfx1032, gfx1101, gfx1102,not officialy supported +# gfx906 is deprecated +#check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.2.4/reference/system-requirements.html + +#ARG ROCM_DOCKER_ARCH='gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102' +ARG ROCM_DOCKER_ARCH=gfx1100 + +# Set nvcc architectured +ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH} +# Enable ROCm +# ENV CC=/opt/rocm/llvm/bin/clang +# ENV CXX=/opt/rocm/llvm/bin/clang++ + +RUN apt-get update \ + && apt-get install -y \ + build-essential \ + cmake \ + git \ + libcurl4-openssl-dev \ + curl \ + libgomp1 + +WORKDIR /app + +COPY . . + +RUN HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \ + cmake -S . -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=$ROCM_DOCKER_ARCH -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON \ + && cmake --build build --config Release -j$(nproc) + +RUN mkdir -p /app/lib \ + && find build -name "*.so" -exec cp {} /app/lib \; + +RUN mkdir -p /app/full \ + && cp build/bin/* /app/full \ + && cp *.py /app/full \ + && cp -r gguf-py /app/full \ + && cp -r requirements /app/full \ + && cp requirements.txt /app/full \ + && cp .devops/tools.sh /app/full/tools.sh + +## Base image +FROM ${BASE_ROCM_DEV_CONTAINER} AS base + +RUN apt-get update \ + && apt-get install -y libgomp1 curl\ + && apt autoremove -y \ + && apt clean -y \ + && rm -rf /tmp/* /var/tmp/* \ + && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ + && find /var/cache -type f -delete + +COPY --from=build /app/lib/ /app + +### Full +FROM base AS full + +COPY --from=build /app/full /app + +WORKDIR /app + +RUN apt-get update \ + && apt-get install -y \ + git \ + python3-pip \ + python3 \ + python3-wheel\ + && pip install --break-system-packages --upgrade setuptools \ + && pip install --break-system-packages -r requirements.txt \ + && apt autoremove -y \ + && apt clean -y \ + && rm -rf /tmp/* /var/tmp/* \ + && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ + && find /var/cache -type f -delete + +ENTRYPOINT ["/app/tools.sh"] + +### Light, CLI only +FROM base AS light + +COPY --from=build /app/full/llama-cli /app + +WORKDIR /app + +ENTRYPOINT [ "/app/llama-cli" ] + +### Server, Server only +FROM base AS server + +ENV LLAMA_ARG_HOST=0.0.0.0 + +COPY --from=build /app/full/llama-server /app + +WORKDIR /app + +HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ] + +ENTRYPOINT [ "/app/llama-server" ] diff --git a/llama.cpp/.devops/tools.sh b/llama.cpp/.devops/tools.sh new file mode 100755 index 0000000000000000000000000000000000000000..41a6b1e55c7d295c9075a24eeaf2b8b0bc8eaa01 --- /dev/null +++ b/llama.cpp/.devops/tools.sh @@ -0,0 +1,49 @@ +#!/bin/bash +set -e + +# Read the first argument into a variable +arg1="$1" + +# Shift the arguments to remove the first one +shift + +if [[ "$arg1" == '--convert' || "$arg1" == '-c' ]]; then + exec python3 ./convert_hf_to_gguf.py "$@" +elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then + exec ./llama-quantize "$@" +elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then + exec ./llama-cli "$@" +elif [[ "$arg1" == '--bench' || "$arg1" == '-b' ]]; then + exec ./llama-bench "$@" +elif [[ "$arg1" == '--perplexity' || "$arg1" == '-p' ]]; then + exec ./llama-perplexity "$@" +elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then + echo "Converting PTH to GGML..." + for i in $(ls $1/$2/ggml-model-f16.bin*); do + if [ -f "${i/f16/q4_0}" ]; then + echo "Skip model quantization, it already exists: ${i/f16/q4_0}" + else + echo "Converting PTH to GGML: $i into ${i/f16/q4_0}..." + exec ./llama-quantize "$i" "${i/f16/q4_0}" q4_0 + fi + done +elif [[ "$arg1" == '--server' || "$arg1" == '-s' ]]; then + exec ./llama-server "$@" +else + echo "Unknown command: $arg1" + echo "Available commands: " + echo " --run (-r): Run a model previously converted into ggml" + echo " ex: -m /models/7B/ggml-model-q4_0.bin -p \"Building a website can be done in 10 simple steps:\" -n 512" + echo " --bench (-b): Benchmark the performance of the inference for various parameters." + echo " ex: -m model.gguf" + echo " --perplexity (-p): Measure the perplexity of a model over a given text." + echo " ex: -m model.gguf -f file.txt" + echo " --convert (-c): Convert a llama model into ggml" + echo " ex: --outtype f16 \"/models/7B/\" " + echo " --quantize (-q): Optimize with quantization process ggml" + echo " ex: \"/models/7B/ggml-model-f16.bin\" \"/models/7B/ggml-model-q4_0.bin\" 2" + echo " --all-in-one (-a): Execute --convert & --quantize" + echo " ex: \"/models/\" 7B" + echo " --server (-s): Run a model on the server" + echo " ex: -m /models/7B/ggml-model-q4_0.bin -c 2048 -ngl 43 -mg 1 --port 8080" +fi diff --git a/llama.cpp/.devops/vulkan.Dockerfile b/llama.cpp/.devops/vulkan.Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..9064f383858fa592de84fc5f45af49341ffc9365 --- /dev/null +++ b/llama.cpp/.devops/vulkan.Dockerfile @@ -0,0 +1,89 @@ +ARG UBUNTU_VERSION=24.04 + +FROM ubuntu:$UBUNTU_VERSION AS build + +# Install build tools +RUN apt update && apt install -y git build-essential cmake wget + +# Install Vulkan SDK and cURL +RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \ + wget -qO /etc/apt/sources.list.d/lunarg-vulkan-noble.list https://packages.lunarg.com/vulkan/lunarg-vulkan-noble.list && \ + apt update -y && \ + apt-get install -y vulkan-sdk libcurl4-openssl-dev curl + +# Build it +WORKDIR /app + +COPY . . + +RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=1 -DLLAMA_CURL=1 && \ + cmake --build build --config Release -j$(nproc) + +RUN mkdir -p /app/lib && \ + find build -name "*.so" -exec cp {} /app/lib \; + +RUN mkdir -p /app/full \ + && cp build/bin/* /app/full \ + && cp *.py /app/full \ + && cp -r gguf-py /app/full \ + && cp -r requirements /app/full \ + && cp requirements.txt /app/full \ + && cp .devops/tools.sh /app/full/tools.sh + +## Base image +FROM ubuntu:$UBUNTU_VERSION AS base + +RUN apt-get update \ + && apt-get install -y libgomp1 curl libvulkan-dev \ + && apt autoremove -y \ + && apt clean -y \ + && rm -rf /tmp/* /var/tmp/* \ + && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ + && find /var/cache -type f -delete + +COPY --from=build /app/lib/ /app + +### Full +FROM base AS full + +COPY --from=build /app/full /app + +WORKDIR /app + +RUN apt-get update \ + && apt-get install -y \ + git \ + python3 \ + python3-pip \ + python3-wheel \ + && pip install --break-system-packages --upgrade setuptools \ + && pip install --break-system-packages -r requirements.txt \ + && apt autoremove -y \ + && apt clean -y \ + && rm -rf /tmp/* /var/tmp/* \ + && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ + && find /var/cache -type f -delete + +ENTRYPOINT ["/app/tools.sh"] + +### Light, CLI only +FROM base AS light + +COPY --from=build /app/full/llama-cli /app + +WORKDIR /app + +ENTRYPOINT [ "/app/llama-cli" ] + +### Server, Server only +FROM base AS server + +ENV LLAMA_ARG_HOST=0.0.0.0 + +COPY --from=build /app/full/llama-server /app + +WORKDIR /app + +HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ] + +ENTRYPOINT [ "/app/llama-server" ] diff --git a/llama.cpp/.dockerignore b/llama.cpp/.dockerignore new file mode 100644 index 0000000000000000000000000000000000000000..064b7c7be86d02d7bbc17b17a0ca40f638d66611 --- /dev/null +++ b/llama.cpp/.dockerignore @@ -0,0 +1,20 @@ +*.o +*.a +.cache/ +# Do not ignore .git directory, otherwise the reported build number will always be 0 +.github/ +.gitignore +.vs/ +.vscode/ +.DS_Store + +build*/ + +models/* + +/llama-cli +/llama-quantize + +arm_neon.h +compile_commands.json +Dockerfile diff --git a/llama.cpp/.ecrc b/llama.cpp/.ecrc new file mode 100644 index 0000000000000000000000000000000000000000..c68877ec211f1c7f9458b8d59c278fc7e88f38c8 --- /dev/null +++ b/llama.cpp/.ecrc @@ -0,0 +1,6 @@ +{ + "Exclude": ["^\\.gitmodules$", "stb_image\\.h"], + "Disable": { + "IndentSize": true + } +} diff --git a/llama.cpp/.editorconfig b/llama.cpp/.editorconfig new file mode 100644 index 0000000000000000000000000000000000000000..5d63d0a51e46635f8d298eb0d07f3c4eb2021663 --- /dev/null +++ b/llama.cpp/.editorconfig @@ -0,0 +1,50 @@ +# https://EditorConfig.org + +# Top-most EditorConfig file +root = true + +# Unix-style newlines with a newline ending every file, utf-8 charset +[*] +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true +charset = utf-8 +indent_style = space +indent_size = 4 + +[Makefile] +indent_style = tab + +[scripts/*.mk] +indent_style = tab + +[prompts/*.txt] +insert_final_newline = unset + +[examples/server/public/*] +indent_size = 2 + +[examples/server/public/deps_*] +trim_trailing_whitespace = unset +indent_style = unset +indent_size = unset + +[examples/server/deps_*] +trim_trailing_whitespace = unset +indent_style = unset +indent_size = unset + +[examples/llama.swiftui/llama.swiftui.xcodeproj/*] +indent_style = tab + +[examples/cvector-generator/*.txt] +trim_trailing_whitespace = unset +insert_final_newline = unset + +[models/templates/*.jinja] +indent_style = unset +indent_size = unset +end_of_line = unset +charset = unset +trim_trailing_whitespace = unset +insert_final_newline = unset diff --git a/llama.cpp/.flake8 b/llama.cpp/.flake8 new file mode 100644 index 0000000000000000000000000000000000000000..d64c2564aca8f54ff000c93a44c505f65a76c07c --- /dev/null +++ b/llama.cpp/.flake8 @@ -0,0 +1,17 @@ +[flake8] +max-line-length = 125 +ignore = E203,E211,E221,E225,E231,E241,E251,E261,E266,E501,E701,E704,W503 +exclude = + # Do not traverse examples + examples, + # Do not include package initializers + __init__.py, + # No need to traverse our git directory + .git, + # There's no value in checking cache directories + __pycache__, + # No need to include the build path + build, + # This contains builds that we don't want to check + dist # This is generated with `python build .` for package releases +# max-complexity = 10 diff --git a/llama.cpp/.github/ISSUE_TEMPLATE/010-bug-compilation.yml b/llama.cpp/.github/ISSUE_TEMPLATE/010-bug-compilation.yml new file mode 100644 index 0000000000000000000000000000000000000000..b85bf5741e5a38de8f5b40153e720ded2fb07835 --- /dev/null +++ b/llama.cpp/.github/ISSUE_TEMPLATE/010-bug-compilation.yml @@ -0,0 +1,87 @@ +name: Bug (compilation) +description: Something goes wrong when trying to compile llama.cpp. +title: "Compile bug: " +labels: ["bug-unconfirmed", "compilation"] +body: + - type: markdown + attributes: + value: > + Thanks for taking the time to fill out this bug report! + This issue template is intended for bug reports where the compilation of llama.cpp fails. + Before opening an issue, please confirm that the compilation still fails with `-DGGML_CCACHE=OFF`. + If the compilation succeeds with ccache disabled you should be able to permanently fix the issue + by clearing `~/.cache/ccache` (on Linux). + - type: textarea + id: commit + attributes: + label: Git commit + description: Which commit are you trying to compile? + placeholder: | + $git rev-parse HEAD + 84a07a17b1b08cf2b9747c633a2372782848a27f + validations: + required: true + - type: dropdown + id: operating-system + attributes: + label: Operating systems + description: Which operating systems do you know to be affected? + multiple: true + options: + - Linux + - Mac + - Windows + - BSD + - Other? (Please let us know in description) + validations: + required: true + - type: dropdown + id: backends + attributes: + label: GGML backends + description: Which GGML backends do you know to be affected? + options: [AMX, BLAS, CPU, CUDA, HIP, Kompute, Metal, Musa, RPC, SYCL, Vulkan] + multiple: true + validations: + required: true + - type: textarea + id: info + attributes: + label: Problem description & steps to reproduce + description: > + Please give us a summary of the problem and tell us how to reproduce it. + If you can narrow down the bug to specific compile flags, that information would be very much appreciated by us. + placeholder: > + I'm trying to compile llama.cpp with CUDA support on a fresh install of Ubuntu and get error XY. + Here are the exact commands that I used: ... + validations: + required: true + - type: textarea + id: first_bad_commit + attributes: + label: First Bad Commit + description: > + If the bug was not present on an earlier version: when did it start appearing? + If possible, please do a git bisect and identify the exact commit that introduced the bug. + validations: + required: false + - type: textarea + id: command + attributes: + label: Compile command + description: > + Please provide the exact command you used to compile llama.cpp. For example: `cmake -B ...`. + This will be automatically formatted into code, so no need for backticks. + render: shell + validations: + required: true + - type: textarea + id: logs + attributes: + label: Relevant log output + description: > + Please copy and paste any relevant log output, including any generated text. + This will be automatically formatted into code, so no need for backticks. + render: shell + validations: + required: true diff --git a/llama.cpp/.github/ISSUE_TEMPLATE/011-bug-results.yml b/llama.cpp/.github/ISSUE_TEMPLATE/011-bug-results.yml new file mode 100644 index 0000000000000000000000000000000000000000..1ccef0793d45e890195b104b837cb568ccb11dff --- /dev/null +++ b/llama.cpp/.github/ISSUE_TEMPLATE/011-bug-results.yml @@ -0,0 +1,101 @@ +name: Bug (model use) +description: Something goes wrong when using a model (in general, not specific to a single llama.cpp module). +title: "Eval bug: " +labels: ["bug-unconfirmed", "model evaluation"] +body: + - type: markdown + attributes: + value: > + Thanks for taking the time to fill out this bug report! + This issue template is intended for bug reports where the model evaluation results + (i.e. the generated text) are incorrect or llama.cpp crashes during model evaluation. + If you encountered the issue while using an external UI (e.g. ollama), + please reproduce your issue using one of the examples/binaries in this repository. + The `llama-cli` binary can be used for simple and reproducible model inference. + - type: textarea + id: version + attributes: + label: Name and Version + description: Which version of our software are you running? (use `--version` to get a version string) + placeholder: | + $./llama-cli --version + version: 2999 (42b4109e) + built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu + validations: + required: true + - type: dropdown + id: operating-system + attributes: + label: Operating systems + description: Which operating systems do you know to be affected? + multiple: true + options: + - Linux + - Mac + - Windows + - BSD + - Other? (Please let us know in description) + validations: + required: true + - type: dropdown + id: backends + attributes: + label: GGML backends + description: Which GGML backends do you know to be affected? + options: [AMX, BLAS, CPU, CUDA, HIP, Kompute, Metal, Musa, RPC, SYCL, Vulkan] + multiple: true + validations: + required: true + - type: textarea + id: hardware + attributes: + label: Hardware + description: Which CPUs/GPUs are you using? + placeholder: > + e.g. Ryzen 5950X + 2x RTX 4090 + validations: + required: true + - type: textarea + id: model + attributes: + label: Models + description: > + Which model(s) at which quantization were you using when encountering the bug? + If you downloaded a GGUF file off of Huggingface, please provide a link. + placeholder: > + e.g. Meta LLaMA 3.1 Instruct 8b q4_K_M + validations: + required: false + - type: textarea + id: info + attributes: + label: Problem description & steps to reproduce + description: > + Please give us a summary of the problem and tell us how to reproduce it. + If you can narrow down the bug to specific hardware, compile flags, or command line arguments, + that information would be very much appreciated by us. + placeholder: > + e.g. when I run llama-cli with -ngl 99 I get garbled outputs. + When I use -ngl 0 it works correctly. + Here are the exact commands that I used: ... + validations: + required: true + - type: textarea + id: first_bad_commit + attributes: + label: First Bad Commit + description: > + If the bug was not present on an earlier version: when did it start appearing? + If possible, please do a git bisect and identify the exact commit that introduced the bug. + validations: + required: false + - type: textarea + id: logs + attributes: + label: Relevant log output + description: > + Please copy and paste any relevant log output, including the command that you entered and any generated text. + This will be automatically formatted into code, so no need for backticks. + render: shell + validations: + required: true diff --git a/llama.cpp/.github/ISSUE_TEMPLATE/019-bug-misc.yml b/llama.cpp/.github/ISSUE_TEMPLATE/019-bug-misc.yml new file mode 100644 index 0000000000000000000000000000000000000000..1904e31fdc4362502ecaf0a6c16d1b30f720e50c --- /dev/null +++ b/llama.cpp/.github/ISSUE_TEMPLATE/019-bug-misc.yml @@ -0,0 +1,91 @@ +name: Bug (misc.) +description: Something is not working the way it should (and it's not covered by any of the above cases). +title: "Misc. bug: " +labels: ["bug-unconfirmed"] +body: + - type: markdown + attributes: + value: > + Thanks for taking the time to fill out this bug report! + This issue template is intended for miscellaneous bugs that don't fit into any other category. + If you encountered the issue while using an external UI (e.g. ollama), + please reproduce your issue using one of the examples/binaries in this repository. + - type: textarea + id: version + attributes: + label: Name and Version + description: Which version of our software is affected? (You can use `--version` to get a version string.) + placeholder: | + $./llama-cli --version + version: 2999 (42b4109e) + built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu + validations: + required: true + - type: dropdown + id: operating-system + attributes: + label: Operating systems + description: Which operating systems do you know to be affected? + multiple: true + options: + - Linux + - Mac + - Windows + - BSD + - Other? (Please let us know in description) + validations: + required: false + - type: dropdown + id: module + attributes: + label: Which llama.cpp modules do you know to be affected? + multiple: true + options: + - Documentation/Github + - libllama (core library) + - llama-cli + - llama-server + - llama-bench + - llama-quantize + - Python/Bash scripts + - Test code + - Other (Please specify in the next section) + validations: + required: false + - type: textarea + id: command + attributes: + label: Command line + description: > + Please provide the exact commands you entered, if applicable. For example: `llama-server -m ... -c ...`, `llama-cli -m ...`, etc. + This will be automatically formatted into code, so no need for backticks. + render: shell + validations: + required: false + - type: textarea + id: info + attributes: + label: Problem description & steps to reproduce + description: > + Please give us a summary of the problem and tell us how to reproduce it (if applicable). + validations: + required: true + - type: textarea + id: first_bad_commit + attributes: + label: First Bad Commit + description: > + If the bug was not present on an earlier version and it's not trivial to track down: when did it start appearing? + If possible, please do a git bisect and identify the exact commit that introduced the bug. + validations: + required: false + - type: textarea + id: logs + attributes: + label: Relevant log output + description: > + If applicable, please copy and paste any relevant log output, including any generated text. + This will be automatically formatted into code, so no need for backticks. + render: shell + validations: + required: false diff --git a/llama.cpp/.github/ISSUE_TEMPLATE/020-enhancement.yml b/llama.cpp/.github/ISSUE_TEMPLATE/020-enhancement.yml new file mode 100644 index 0000000000000000000000000000000000000000..02dd4f575a686802a44e0ff66c5725daf7c7934e --- /dev/null +++ b/llama.cpp/.github/ISSUE_TEMPLATE/020-enhancement.yml @@ -0,0 +1,51 @@ +name: Enhancement +description: Used to request enhancements for llama.cpp. +title: "Feature Request: " +labels: ["enhancement"] +body: + - type: markdown + attributes: + value: | + [Please post your idea first in Discussion if there is not yet a consensus for this enhancement request. This will help to keep this issue tracker focused on enhancements that the community has agreed needs to be implemented.](https://github.com/ggerganov/llama.cpp/discussions/categories/ideas) + + - type: checkboxes + id: prerequisites + attributes: + label: Prerequisites + description: Please confirm the following before submitting your enhancement request. + options: + - label: I am running the latest code. Mention the version if possible as well. + required: true + - label: I carefully followed the [README.md](https://github.com/ggerganov/llama.cpp/blob/master/README.md). + required: true + - label: I searched using keywords relevant to my issue to make sure that I am creating a new issue that is not already open (or closed). + required: true + - label: I reviewed the [Discussions](https://github.com/ggerganov/llama.cpp/discussions), and have a new and useful enhancement to share. + required: true + + - type: textarea + id: feature-description + attributes: + label: Feature Description + description: Please provide a detailed written description of what you were trying to do, and what you expected `llama.cpp` to do as an enhancement. + placeholder: Detailed description of the enhancement + validations: + required: true + + - type: textarea + id: motivation + attributes: + label: Motivation + description: Please provide a detailed written description of reasons why this feature is necessary and how it is useful to `llama.cpp` users. + placeholder: Explanation of why this feature is needed and its benefits + validations: + required: true + + - type: textarea + id: possible-implementation + attributes: + label: Possible Implementation + description: If you have an idea as to how it can be implemented, please write a detailed description. Feel free to give links to external sources or share visuals that might be helpful to understand the details better. + placeholder: Detailed description of potential implementation + validations: + required: false diff --git a/llama.cpp/.github/ISSUE_TEMPLATE/030-research.yml b/llama.cpp/.github/ISSUE_TEMPLATE/030-research.yml new file mode 100644 index 0000000000000000000000000000000000000000..18975dbbfd0fe536b4aab5c7d36b965c732cee98 --- /dev/null +++ b/llama.cpp/.github/ISSUE_TEMPLATE/030-research.yml @@ -0,0 +1,52 @@ +name: Research +description: Track new technical research area. +title: "Research: " +labels: ["research 🔬"] +body: + - type: markdown + attributes: + value: | + Don't forget to check for any [duplicate research issue tickets](https://github.com/ggerganov/llama.cpp/issues?q=is%3Aopen+is%3Aissue+label%3A%22research+%F0%9F%94%AC%22) + + - type: checkboxes + id: research-stage + attributes: + label: Research Stage + description: Track general state of this research ticket + options: + - label: Background Research (Let's try to avoid reinventing the wheel) + - label: Hypothesis Formed (How do you think this will work and it's effect?) + - label: Strategy / Implementation Forming + - label: Analysis of results + - label: Debrief / Documentation (So people in the future can learn from us) + + - type: textarea + id: background + attributes: + label: Previous existing literature and research + description: Whats the current state of the art and whats the motivation for this research? + + - type: textarea + id: hypothesis + attributes: + label: Hypothesis + description: How do you think this will work and it's effect? + + - type: textarea + id: implementation + attributes: + label: Implementation + description: Got an approach? e.g. a PR ready to go? + + - type: textarea + id: analysis + attributes: + label: Analysis + description: How does the proposed implementation behave? + + - type: textarea + id: logs + attributes: + label: Relevant log output + description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks. + render: shell diff --git a/llama.cpp/.github/ISSUE_TEMPLATE/040-refactor.yml b/llama.cpp/.github/ISSUE_TEMPLATE/040-refactor.yml new file mode 100644 index 0000000000000000000000000000000000000000..b6e6ab36defd6383b4dcbe1ee8628a4692edf7aa --- /dev/null +++ b/llama.cpp/.github/ISSUE_TEMPLATE/040-refactor.yml @@ -0,0 +1,28 @@ +name: Refactor (Maintainers) +description: Used to track refactoring opportunities. +title: "Refactor: " +labels: ["refactor"] +body: + - type: markdown + attributes: + value: | + Don't forget to [check for existing refactor issue tickets](https://github.com/ggerganov/llama.cpp/issues?q=is%3Aopen+is%3Aissue+label%3Arefactoring) in case it's already covered. + Also you may want to check [Pull request refactor label as well](https://github.com/ggerganov/llama.cpp/pulls?q=is%3Aopen+is%3Apr+label%3Arefactoring) for duplicates too. + + - type: textarea + id: background-description + attributes: + label: Background Description + description: Please provide a detailed written description of the pain points you are trying to solve. + placeholder: Detailed description behind your motivation to request refactor + validations: + required: true + + - type: textarea + id: possible-approaches + attributes: + label: Possible Refactor Approaches + description: If you have some idea of possible approaches to solve this problem. You may want to make it a todo list. + placeholder: Your idea of possible refactoring opportunity/approaches + validations: + required: false diff --git a/llama.cpp/.github/ISSUE_TEMPLATE/config.yml b/llama.cpp/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000000000000000000000000000000000000..eb8c4b472df4c4fb22e7190a320dbf800eea4807 --- /dev/null +++ b/llama.cpp/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,11 @@ +blank_issues_enabled: true +contact_links: + - name: Got an idea? + url: https://github.com/ggerganov/llama.cpp/discussions/categories/ideas + about: Pop it there. It may then become an enhancement ticket. + - name: Got a question? + url: https://github.com/ggerganov/llama.cpp/discussions/categories/q-a + about: Ask a question there! + - name: Want to contribute? + url: https://github.com/ggerganov/llama.cpp/wiki/contribute + about: Head to the contribution guide page of the wiki for areas you can help with diff --git a/llama.cpp/.github/labeler.yml b/llama.cpp/.github/labeler.yml new file mode 100644 index 0000000000000000000000000000000000000000..1b47bc96885c410e519edd5aadd845cdc2a9a32f --- /dev/null +++ b/llama.cpp/.github/labeler.yml @@ -0,0 +1,86 @@ +# https://github.com/actions/labeler +Kompute: + - changed-files: + - any-glob-to-any-file: + - ggml/include/ggml-kompute.h + - ggml/src/ggml-kompute/** + - README-kompute.md +Apple Metal: + - changed-files: + - any-glob-to-any-file: + - ggml/include/ggml-metal.h + - ggml/src/ggml-metal/** + - README-metal.md +SYCL: + - changed-files: + - any-glob-to-any-file: + - ggml/include/ggml-sycl.h + - ggml/src/ggml-sycl/** + - docs/backend/SYCL.md + - examples/sycl/** +Nvidia GPU: + - changed-files: + - any-glob-to-any-file: + - ggml/include/ggml-cuda.h + - ggml/src/ggml-cuda/** +Vulkan: + - changed-files: + - any-glob-to-any-file: + - ggml/include/ggml-vulkan.h + - ggml/src/ggml-vulkan/** +documentation: + - changed-files: + - any-glob-to-any-file: + - docs/** + - media/** +testing: + - changed-files: + - any-glob-to-any-file: + - tests/** +build: + - changed-files: + - any-glob-to-any-file: + - cmake/** + - CMakeLists.txt + - CMakePresets.json +examples: + - changed-files: + - any-glob-to-any-file: examples/** +devops: + - changed-files: + - any-glob-to-any-file: + - .devops/** + - .github/** + - ci/** +python: + - changed-files: + - any-glob-to-any-file: + - "**/*.py" + - requirements/** + - gguf-py/** + - .flake8 +script: + - changed-files: + - any-glob-to-any-file: + - scripts/** +android: + - changed-files: + - any-glob-to-any-file: + - examples/llama.android/** +server: + - changed-files: + - any-glob-to-any-file: + - examples/server/** +ggml: + - changed-files: + - any-glob-to-any-file: + - ggml/** +nix: + - changed-files: + - any-glob-to-any-file: + - "**/*.nix" + - .github/workflows/nix-*.yml + - .devops/nix/nixpkgs-instances.nix +embedding: + - changed-files: + - any-glob-to-any-file: examples/embedding/ diff --git a/llama.cpp/.github/pull_request_template.md b/llama.cpp/.github/pull_request_template.md new file mode 100644 index 0000000000000000000000000000000000000000..d9f5bdc235a0024ba068e93b93b3c66255ccaae8 --- /dev/null +++ b/llama.cpp/.github/pull_request_template.md @@ -0,0 +1 @@ +*Make sure to read the [contributing guidelines](https://github.com/ggerganov/llama.cpp/blob/master/CONTRIBUTING.md) before submitting a PR* diff --git a/llama.cpp/.github/workflows/bench.yml.disabled b/llama.cpp/.github/workflows/bench.yml.disabled new file mode 100644 index 0000000000000000000000000000000000000000..1c8787ef78f7e43f991cf9a0d1fb4df40e7e8021 --- /dev/null +++ b/llama.cpp/.github/workflows/bench.yml.disabled @@ -0,0 +1,315 @@ +# TODO: there have been some issues with the workflow, so disabling for now +# https://github.com/ggerganov/llama.cpp/issues/7893 +# +# Benchmark +name: Benchmark + +on: + workflow_dispatch: + inputs: + gpu-series: + description: 'Azure GPU series to run with' + required: true + type: choice + options: + - Standard_NC4as_T4_v3 + - Standard_NC24ads_A100_v4 + - Standard_NC80adis_H100_v5 + sha: + description: 'Commit SHA1 to build' + required: false + type: string + duration: + description: 'Duration of the bench' + type: string + default: 10m + + push: + branches: + - master + paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp'] + pull_request_target: + types: [opened, synchronize, reopened] + paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp'] + schedule: + - cron: '04 2 * * *' + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}-${{ github.event.inputs.sha }} + cancel-in-progress: true + +jobs: + bench-server-baseline: + runs-on: Standard_NC4as_T4_v3 + env: + RUNNER_LABEL: Standard_NC4as_T4_v3 # FIXME Do not find a way to not duplicate it + N_USERS: 8 + DURATION: 10m + + strategy: + matrix: + model: [phi-2] + ftype: [q4_0, q8_0, f16] + include: + - model: phi-2 + ftype: q4_0 + pr_comment_enabled: "true" + + if: | + inputs.gpu-series == 'Standard_NC4as_T4_v3' + || ( + github.event_name == 'schedule' + && github.ref_name == 'master' + && github.repository_owner == 'ggerganov' + ) + || github.event_name == 'pull_request_target' + || ( + github.event_name == 'push' + && github.event.ref == 'refs/heads/master' + && github.repository_owner == 'ggerganov' + ) + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }} + + - name: Install python env + id: pipenv + run: | + cd examples/server/bench + python3 -m venv venv + source venv/bin/activate + pip install -r requirements.txt + + - name: Prometheus + id: install_prometheus + run: | + wget --quiet https://github.com/prometheus/prometheus/releases/download/v2.51.0/prometheus-2.51.0.linux-amd64.tar.gz + tar xzf prometheus*.tar.gz --strip-components=1 + ./prometheus --config.file=examples/server/bench/prometheus.yml & + while ! nc -z localhost 9090; do + sleep 0.1 + done + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: '1.21' + + - name: Install k6 and xk6-sse + id: k6_installation + run: | + cd examples/server/bench + go install go.k6.io/xk6/cmd/xk6@latest + xk6 build master \ + --with github.com/phymbert/xk6-sse + + - name: Build + id: cmake_build + run: | + set -eux + cmake -B build \ + -DGGML_NATIVE=OFF \ + -DLLAMA_BUILD_SERVER=ON \ + -DLLAMA_CURL=ON \ + -DLLAMA_CUBLAS=ON \ + -DCUDAToolkit_ROOT=/usr/local/cuda \ + -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \ + -DCMAKE_CUDA_ARCHITECTURES=75 \ + -DLLAMA_FATAL_WARNINGS=OFF \ + -DLLAMA_ALL_WARNINGS=OFF \ + -DCMAKE_BUILD_TYPE=Release; + cmake --build build --config Release -j $(nproc) --target llama-server + + - name: Download the dataset + id: download_dataset + run: | + cd examples/server/bench + wget --quiet https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json + + - name: Server bench + id: server_bench + env: + HEAD_REF: ${{ github.head_ref || github.ref_name }} + run: | + set -eux + + cd examples/server/bench + source venv/bin/activate + python bench.py \ + --runner-label ${{ env.RUNNER_LABEL }} \ + --name ${{ github.job }} \ + --branch $HEAD_REF \ + --commit ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha }} \ + --scenario script.js \ + --duration ${{ github.event.inputs.duration || env.DURATION }} \ + --hf-repo ggml-org/models \ + --hf-file ${{ matrix.model }}/ggml-model-${{ matrix.ftype }}.gguf \ + --model-path-prefix /models \ + --parallel ${{ env.N_USERS }} \ + -ngl 33 \ + --batch-size 2048 \ + --ubatch-size 256 \ + --ctx-size 16384 \ + --n-prompts 1000 \ + --max-prompt-tokens 1024 \ + --max-tokens 2048 + + cat results.github.env >> $GITHUB_ENV + + # Remove dataset as we do not want it in the artefact + rm ShareGPT_V3_unfiltered_cleaned_split.json + + - uses: actions/upload-artifact@v4 + with: + name: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }} + compression-level: 9 + path: | + examples/server/bench/*.jpg + examples/server/bench/*.json + examples/server/bench/*.log + + - name: Commit status + uses: Sibz/github-status-action@v1 + with: + authToken: ${{secrets.GITHUB_TOKEN}} + sha: ${{ inputs.sha || github.event.pull_request.head.sha || github.sha }} + context: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }} + description: | + ${{ env.BENCH_RESULTS }} + state: 'success' + + - name: Upload benchmark images + uses: devicons/public-upload-to-imgur@v2.2.2 + continue-on-error: true # Important as it looks unstable: 503 + id: imgur_step + with: + client_id: ${{secrets.IMGUR_CLIENT_ID}} + path: | + examples/server/bench/prompt_tokens_seconds.jpg + examples/server/bench/predicted_tokens_seconds.jpg + examples/server/bench/kv_cache_usage_ratio.jpg + examples/server/bench/requests_processing.jpg + + - name: Extract mermaid + id: set_mermaid + run: | + set -eux + + cd examples/server/bench + PROMPT_TOKENS_SECONDS=$(cat prompt_tokens_seconds.mermaid) + echo "PROMPT_TOKENS_SECONDS<> $GITHUB_ENV + echo "$PROMPT_TOKENS_SECONDS" >> $GITHUB_ENV + echo "EOF" >> $GITHUB_ENV + + PREDICTED_TOKENS_SECONDS=$(cat predicted_tokens_seconds.mermaid) + echo "PREDICTED_TOKENS_SECONDS<> $GITHUB_ENV + echo "$PREDICTED_TOKENS_SECONDS" >> $GITHUB_ENV + echo "EOF" >> $GITHUB_ENV + + KV_CACHE_USAGE_RATIO=$(cat kv_cache_usage_ratio.mermaid) + echo "KV_CACHE_USAGE_RATIO<> $GITHUB_ENV + echo "$KV_CACHE_USAGE_RATIO" >> $GITHUB_ENV + echo "EOF" >> $GITHUB_ENV + + REQUESTS_PROCESSING=$(cat requests_processing.mermaid) + echo "REQUESTS_PROCESSING<> $GITHUB_ENV + echo "$REQUESTS_PROCESSING" >> $GITHUB_ENV + echo "EOF" >> $GITHUB_ENV + + - name: Extract image url + id: extract_image_url + continue-on-error: true + run: | + set -eux + + echo "IMAGE_O=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[0] }}" >> $GITHUB_ENV + echo "IMAGE_1=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[1] }}" >> $GITHUB_ENV + echo "IMAGE_2=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[2] }}" >> $GITHUB_ENV + echo "IMAGE_3=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[3] }}" >> $GITHUB_ENV + + - name: Comment PR + uses: mshick/add-pr-comment@v2 + id: comment_pr + if: ${{ github.event.pull_request != '' && matrix.pr_comment_enabled == 'true' }} + with: + message-id: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }} + message: | +

+ + 📈 **llama.cpp server** for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_ for `${{ matrix.model }}`-`${{ matrix.ftype }}`: **${{ env.BENCH_ITERATIONS}} iterations** 🚀 + +

+ +
+ + Expand details for performance related PR only + + - Concurrent users: ${{ env.N_USERS }}, duration: ${{ github.event.inputs.duration || env.DURATION }} + - HTTP request : avg=${{ env.HTTP_REQ_DURATION_AVG }}ms p(95)=${{ env.HTTP_REQ_DURATION_P_95_ }}ms fails=${{ env.HTTP_REQ_FAILED_PASSES }}, finish reason: stop=${{ env.LLAMACPP_COMPLETIONS_STOP_RATE_PASSES }} truncated=${{ env.LLAMACPP_COMPLETIONS_TRUNCATED_RATE_PASSES }} + - Prompt processing (pp): avg=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_P_95_ }}tk/s + - Token generation (tg): avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_TOKENS_SECOND_P_95_ }}tk/s + - ${{ env.BENCH_GRAPH_XLABEL }} + + +

+ + prompt_tokens_seconds + +

+ + More + + ```mermaid + ${{ env.PROMPT_TOKENS_SECONDS }} + ``` + +
+ + predicted_tokens_seconds + +
+ More + + ```mermaid + ${{ env.PREDICTED_TOKENS_SECONDS }} + ``` + +
+ +

+ +
+ + Details + +

+ + kv_cache_usage_ratio + +

+ More + + ```mermaid + ${{ env.KV_CACHE_USAGE_RATIO }} + ``` + +
+ + requests_processing + +
+ More + + ```mermaid + ${{ env.REQUESTS_PROCESSING }} + ``` + +
+ +

+
+
diff --git a/llama.cpp/.github/workflows/build.yml b/llama.cpp/.github/workflows/build.yml new file mode 100644 index 0000000000000000000000000000000000000000..6841ba5897809a89fab24d80174106ddad7b54ca --- /dev/null +++ b/llama.cpp/.github/workflows/build.yml @@ -0,0 +1,1645 @@ +name: CI + +on: + workflow_dispatch: # allows manual triggering + inputs: + create_release: + description: 'Create new release' + required: true + type: boolean + push: + branches: + - master + paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp'] + pull_request: + types: [opened, synchronize, reopened] + paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp'] + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} + cancel-in-progress: true + +# Fine-grant permission +# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token +permissions: + contents: write # for creating release + +env: + BRANCH_NAME: ${{ github.head_ref || github.ref_name }} + GGML_NLOOP: 3 + GGML_N_THREADS: 1 + LLAMA_LOG_COLORS: 1 + LLAMA_LOG_PREFIX: 1 + LLAMA_LOG_TIMESTAMPS: 1 + +jobs: + macOS-latest-cmake-arm64: + runs-on: macos-14 + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: macOS-latest-cmake-arm64 + evict-old-files: 1d + + - name: Dependencies + id: depends + continue-on-error: true + run: | + brew update + + - name: Build + id: cmake_build + run: | + sysctl -a + cmake -B build \ + -DCMAKE_BUILD_RPATH="@loader_path" \ + -DLLAMA_FATAL_WARNINGS=ON \ + -DLLAMA_CURL=ON \ + -DGGML_METAL_USE_BF16=ON \ + -DGGML_METAL_EMBED_LIBRARY=ON \ + -DGGML_RPC=ON + cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) + + - name: Test + id: cmake_test + run: | + cd build + ctest -L 'main|curl' --verbose --timeout 900 + + - name: Determine tag name + id: tag + shell: bash + run: | + BUILD_NUMBER="$(git rev-list --count HEAD)" + SHORT_HASH="$(git rev-parse --short=7 HEAD)" + if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then + echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT + else + SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-') + echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT + fi + + - name: Pack artifacts + id: pack_artifacts + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + run: | + cp LICENSE ./build/bin/ + cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp + zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip ./build/bin/* + + - name: Upload artifacts + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + uses: actions/upload-artifact@v4 + with: + path: llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip + name: llama-bin-macos-arm64.zip + + macOS-latest-cmake-x64: + runs-on: macos-13 + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: macOS-latest-cmake-x64 + evict-old-files: 1d + + - name: Dependencies + id: depends + continue-on-error: true + run: | + brew update + + - name: Build + id: cmake_build + run: | + sysctl -a + # Metal is disabled due to intermittent failures with Github runners not having a GPU: + # https://github.com/ggerganov/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313 + cmake -B build \ + -DCMAKE_BUILD_RPATH="@loader_path" \ + -DLLAMA_FATAL_WARNINGS=ON \ + -DLLAMA_CURL=ON \ + -DGGML_METAL=OFF \ + -DGGML_RPC=ON + cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) + + - name: Test + id: cmake_test + run: | + cd build + ctest -L main --verbose --timeout 900 + + - name: Determine tag name + id: tag + shell: bash + run: | + BUILD_NUMBER="$(git rev-list --count HEAD)" + SHORT_HASH="$(git rev-parse --short=7 HEAD)" + if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then + echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT + else + SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-') + echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT + fi + + - name: Pack artifacts + id: pack_artifacts + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + run: | + cp LICENSE ./build/bin/ + cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp + zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip ./build/bin/* + + - name: Upload artifacts + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + uses: actions/upload-artifact@v4 + with: + path: llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip + name: llama-bin-macos-x64.zip + + ubuntu-cpu-cmake: + runs-on: ubuntu-22.04 + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: ubuntu-cpu-cmake + evict-old-files: 1d + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install build-essential libcurl4-openssl-dev + + - name: Build + id: cmake_build + run: | + cmake -B build \ + -DLLAMA_FATAL_WARNINGS=ON \ + -DLLAMA_CURL=ON \ + -DGGML_RPC=ON + cmake --build build --config Release -j $(nproc) + + - name: Test + id: cmake_test + run: | + cd build + ctest -L 'main|curl' --verbose --timeout 900 + + - name: Test llama2c conversion + id: llama2c_test + run: | + cd build + echo "Fetch tokenizer" + wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/tok512.bin + echo "Fetch llama2c model" + wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/stories260K.bin + ./bin/llama-convert-llama2c-to-ggml --copy-vocab-from-model ./tok512.bin --llama2c-model stories260K.bin --llama2c-output-model stories260K.gguf + ./bin/llama-cli -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256 + + - name: Determine tag name + id: tag + shell: bash + run: | + BUILD_NUMBER="$(git rev-list --count HEAD)" + SHORT_HASH="$(git rev-parse --short=7 HEAD)" + if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then + echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT + else + SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-') + echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT + fi + + - name: Pack artifacts + id: pack_artifacts + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + run: | + cp LICENSE ./build/bin/ + cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp + zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip ./build/bin/* + + - name: Upload artifacts + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + uses: actions/upload-artifact@v4 + with: + path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip + name: llama-bin-ubuntu-x64.zip + + ubuntu-latest-cmake-sanitizer: + runs-on: ubuntu-latest + + continue-on-error: true + + strategy: + matrix: + sanitizer: [ADDRESS, THREAD, UNDEFINED] + build_type: [Debug] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: ubuntu-latest-cmake-sanitizer-${{ matrix.sanitizer }} + evict-old-files: 1d + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install build-essential + + - name: Build + id: cmake_build + if: ${{ matrix.sanitizer != 'THREAD' }} + run: | + cmake -B build \ + -DLLAMA_FATAL_WARNINGS=ON \ + -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \ + -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} + cmake --build build --config ${{ matrix.build_type }} -j $(nproc) + + - name: Build (no OpenMP) + id: cmake_build_no_openmp + if: ${{ matrix.sanitizer == 'THREAD' }} + run: | + cmake -B build \ + -DLLAMA_FATAL_WARNINGS=ON \ + -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \ + -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ + -DGGML_OPENMP=OFF + cmake --build build --config ${{ matrix.build_type }} -j $(nproc) + + - name: Test + id: cmake_test + run: | + cd build + ctest -L main --verbose --timeout 900 + + ubuntu-latest-llguidance: + runs-on: ubuntu-latest + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install build-essential + + - name: Build + id: cmake_build + run: | + mkdir build + cd build + cmake .. \ + -DLLAMA_FATAL_WARNINGS=ON \ + -DLLAMA_LLGUIDANCE=ON + cmake --build . --config Release -j $(nproc) + + - name: Test + id: cmake_test + run: | + cd build + ctest -L main --verbose --timeout 900 + + ubuntu-latest-cmake-rpc: + runs-on: ubuntu-latest + + continue-on-error: true + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: ubuntu-latest-cmake-rpc + evict-old-files: 1d + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install build-essential + + - name: Build + id: cmake_build + run: | + cmake -B build \ + -DGGML_RPC=ON + cmake --build build --config Release -j $(nproc) + + - name: Test + id: cmake_test + run: | + cd build + ctest -L main --verbose + + ubuntu-22-cmake-vulkan: + runs-on: ubuntu-22.04 + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: ubuntu-22-cmake-vulkan + evict-old-files: 1d + + - name: Dependencies + id: depends + run: | + wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add - + sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list + sudo apt-get update -y + sudo apt-get install -y build-essential mesa-vulkan-drivers vulkan-sdk + + - name: Build + id: cmake_build + run: | + cmake -B build \ + -DGGML_VULKAN=ON + cmake --build build --config Release -j $(nproc) + + - name: Test + id: cmake_test + run: | + cd build + # This is using llvmpipe and runs slower than other backends + ctest -L main --verbose --timeout 1800 + + ubuntu-22-cmake-hip: + runs-on: ubuntu-22.04 + container: rocm/dev-ubuntu-22.04:6.0.2 + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install -y build-essential git cmake rocblas-dev hipblas-dev + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: ubuntu-22-cmake-hip + evict-old-files: 1d + + - name: Build with native CMake HIP support + id: cmake_build + run: | + cmake -B build -S . \ + -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" \ + -DGGML_HIP=ON + cmake --build build --config Release -j $(nproc) + + - name: Build with legacy HIP support + id: cmake_build_legacy_hip + run: | + cmake -B build2 -S . \ + -DCMAKE_C_COMPILER=hipcc \ + -DCMAKE_CXX_COMPILER=hipcc \ + -DGGML_HIP=ON + cmake --build build2 --config Release -j $(nproc) + + ubuntu-22-cmake-musa: + runs-on: ubuntu-22.04 + container: mthreads/musa:rc3.1.0-devel-ubuntu22.04 + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: Dependencies + id: depends + run: | + apt-get update + apt-get install -y build-essential git cmake libcurl4-openssl-dev + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: ubuntu-22-cmake-musa + evict-old-files: 1d + + - name: Build with native CMake MUSA support + id: cmake_build + run: | + cmake -B build -S . \ + -DGGML_MUSA=ON + cmake --build build --config Release -j $(nproc) + + ubuntu-22-cmake-sycl: + runs-on: ubuntu-22.04 + + continue-on-error: true + + steps: + - uses: actions/checkout@v4 + + - name: add oneAPI to apt + shell: bash + run: | + cd /tmp + wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB + sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB + rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB + sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main" + + - name: install oneAPI dpcpp compiler + shell: bash + run: | + sudo apt update + sudo apt install intel-oneapi-compiler-dpcpp-cpp + + - name: install oneAPI MKL library + shell: bash + run: | + sudo apt install intel-oneapi-mkl-devel + + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: ubuntu-22-cmake-sycl + evict-old-files: 1d + + - name: Build + id: cmake_build + run: | + source /opt/intel/oneapi/setvars.sh + cmake -B build \ + -DGGML_SYCL=ON \ + -DCMAKE_C_COMPILER=icx \ + -DCMAKE_CXX_COMPILER=icpx + cmake --build build --config Release -j $(nproc) + + ubuntu-22-cmake-sycl-fp16: + runs-on: ubuntu-22.04 + + continue-on-error: true + + steps: + - uses: actions/checkout@v4 + + - name: add oneAPI to apt + shell: bash + run: | + cd /tmp + wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB + sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB + rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB + sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main" + + - name: install oneAPI dpcpp compiler + shell: bash + run: | + sudo apt update + sudo apt install intel-oneapi-compiler-dpcpp-cpp + + - name: install oneAPI MKL library + shell: bash + run: | + sudo apt install intel-oneapi-mkl-devel + + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: ubuntu-22-cmake-sycl-fp16 + evict-old-files: 1d + + - name: Build + id: cmake_build + run: | + source /opt/intel/oneapi/setvars.sh + cmake -B build \ + -DGGML_SYCL=ON \ + -DCMAKE_C_COMPILER=icx \ + -DCMAKE_CXX_COMPILER=icpx \ + -DGGML_SYCL_F16=ON + cmake --build build --config Release -j $(nproc) + + macOS-latest-cmake-ios: + runs-on: macos-latest + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: macOS-latest-cmake-ios + evict-old-files: 1d + + - name: Dependencies + id: depends + continue-on-error: true + run: | + brew update + + - name: Build + id: cmake_build + run: | + sysctl -a + cmake -B build -G Xcode \ + -DGGML_METAL_USE_BF16=ON \ + -DGGML_METAL_EMBED_LIBRARY=ON \ + -DLLAMA_BUILD_EXAMPLES=OFF \ + -DLLAMA_BUILD_TESTS=OFF \ + -DLLAMA_BUILD_SERVER=OFF \ + -DCMAKE_SYSTEM_NAME=iOS \ + -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \ + -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml + cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO + + macOS-latest-cmake-tvos: + runs-on: macos-latest + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: macOS-latest-cmake-tvos + evict-old-files: 1d + + - name: Dependencies + id: depends + continue-on-error: true + run: | + brew update + + - name: Build + id: cmake_build + run: | + sysctl -a + cmake -B build -G Xcode \ + -DGGML_METAL_USE_BF16=ON \ + -DGGML_METAL_EMBED_LIBRARY=ON \ + -DLLAMA_BUILD_EXAMPLES=OFF \ + -DLLAMA_BUILD_TESTS=OFF \ + -DLLAMA_BUILD_SERVER=OFF \ + -DCMAKE_SYSTEM_NAME=tvOS \ + -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \ + -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml + cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO + + macOS-latest-swift: + runs-on: macos-latest + + strategy: + matrix: + destination: ['generic/platform=macOS', 'generic/platform=iOS', 'generic/platform=tvOS'] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: macOS-latest-swift + evict-old-files: 1d + + - name: Dependencies + id: depends + continue-on-error: true + run: | + brew update + + - name: Build llama.cpp with CMake + id: cmake_build + run: | + sysctl -a + cmake -B build -G Xcode \ + -DGGML_METAL_USE_BF16=ON \ + -DGGML_METAL_EMBED_LIBRARY=ON \ + -DLLAMA_BUILD_EXAMPLES=OFF \ + -DLLAMA_BUILD_TESTS=OFF \ + -DLLAMA_BUILD_SERVER=OFF \ + -DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" + cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) + sudo cmake --install build --config Release + + - name: xcodebuild for swift package + id: xcodebuild + run: | + xcodebuild -scheme llama-Package -destination "${{ matrix.destination }}" + + windows-msys2: + runs-on: windows-latest + + strategy: + fail-fast: false + matrix: + include: + - { sys: UCRT64, env: ucrt-x86_64, build: Release } + - { sys: CLANG64, env: clang-x86_64, build: Release } + + steps: + - name: Clone + uses: actions/checkout@v4 + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: windows-msys2 + variant: sccache + evict-old-files: 1d + + - name: Setup ${{ matrix.sys }} + uses: msys2/setup-msys2@v2 + with: + update: true + msystem: ${{matrix.sys}} + install: >- + base-devel + git + mingw-w64-${{matrix.env}}-toolchain + mingw-w64-${{matrix.env}}-cmake + mingw-w64-${{matrix.env}}-openblas + + - name: Build using CMake + shell: msys2 {0} + run: | + cmake -B build + cmake --build build --config ${{ matrix.build }} -j $(nproc) + + - name: Clean after building using CMake + shell: msys2 {0} + run: | + rm -rf build + + - name: Build using CMake w/ OpenBLAS + shell: msys2 {0} + run: | + cmake -B build -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS + cmake --build build --config ${{ matrix.build }} -j $(nproc) + + windows-latest-cmake: + runs-on: windows-latest + + env: + OPENBLAS_VERSION: 0.3.23 + SDE_VERSION: 9.33.0-2024-01-07 + VULKAN_VERSION: 1.3.261.1 + + strategy: + matrix: + include: + - build: 'noavx-x64' + defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF' + - build: 'avx2-x64' + defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON' + - build: 'avx-x64' + defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX2=OFF' + - build: 'avx512-x64' + defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX512=ON' + - build: 'openblas-x64' + defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"' + - build: 'kompute-x64' + defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON' + - build: 'vulkan-x64' + defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_VULKAN=ON' + - build: 'llvm-arm64' + defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON' + - build: 'msvc-arm64' + defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-msvc.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON' + - build: 'llvm-arm64-opencl-adreno' + defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON' + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: windows-latest-cmake-${{ matrix.build }} + variant: sccache + evict-old-files: 1d + + - name: Clone Kompute submodule + id: clone_kompute + if: ${{ matrix.build == 'kompute-x64' }} + run: | + git submodule update --init ggml/src/ggml-kompute/kompute + + - name: Download OpenBLAS + id: get_openblas + if: ${{ matrix.build == 'openblas-x64' }} + run: | + curl.exe -o $env:RUNNER_TEMP/openblas.zip -L "https://github.com/xianyi/OpenBLAS/releases/download/v${env:OPENBLAS_VERSION}/OpenBLAS-${env:OPENBLAS_VERSION}-x64.zip" + curl.exe -o $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt -L "https://github.com/xianyi/OpenBLAS/raw/v${env:OPENBLAS_VERSION}/LICENSE" + mkdir $env:RUNNER_TEMP/openblas + tar.exe -xvf $env:RUNNER_TEMP/openblas.zip -C $env:RUNNER_TEMP/openblas + $vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath) + $msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim())) + $lib = $(join-path $msvc 'bin\Hostx64\x64\lib.exe') + & $lib /machine:x64 "/def:${env:RUNNER_TEMP}/openblas/lib/libopenblas.def" "/out:${env:RUNNER_TEMP}/openblas/lib/openblas.lib" /name:openblas.dll + + - name: Install Vulkan SDK + id: get_vulkan + if: ${{ matrix.build == 'kompute-x64' || matrix.build == 'vulkan-x64' }} + run: | + curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/VulkanSDK-${env:VULKAN_VERSION}-Installer.exe" + & "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install + Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}" + Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin" + + - name: Install Ninja + id: install_ninja + run: | + choco install ninja + + - name: Install OpenCL Headers and Libs + id: install_opencl + if: ${{ matrix.build == 'llvm-arm64-opencl-adreno' }} + run: | + git clone https://github.com/KhronosGroup/OpenCL-Headers + cd OpenCL-Headers + cmake -B build ` + -DBUILD_TESTING=OFF ` + -DOPENCL_HEADERS_BUILD_TESTING=OFF ` + -DOPENCL_HEADERS_BUILD_CXX_TESTS=OFF ` + -DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release" + cmake --build build --target install + git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader + cd OpenCL-ICD-Loader + cmake -B build-arm64-release ` + -A arm64 ` + -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" ` + -DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release" + cmake --build build-arm64-release --target install --config release + + - name: Build + id: cmake_build + run: | + cmake -S . -B build ${{ matrix.defines }} + cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS} + + - name: Add libopenblas.dll + id: add_libopenblas_dll + if: ${{ matrix.build == 'openblas-x64' }} + run: | + cp $env:RUNNER_TEMP/openblas/bin/libopenblas.dll ./build/bin/Release/openblas.dll + cp $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt ./build/bin/Release/OpenBLAS-${env:OPENBLAS_VERSION}.txt + + - name: Check AVX512F support + id: check_avx512f + if: ${{ matrix.build == 'avx512-x64' }} + continue-on-error: true + run: | + cd build + $vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath) + $msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim())) + $cl = $(join-path $msvc 'bin\Hostx64\x64\cl.exe') + echo 'int main(void){unsigned int a[4];__cpuid(a,7);return !(a[1]&65536);}' >> avx512f.c + & $cl /O2 /GS- /kernel avx512f.c /link /nodefaultlib /entry:main + .\avx512f.exe && echo "AVX512F: YES" && ( echo HAS_AVX512F=1 >> $env:GITHUB_ENV ) || echo "AVX512F: NO" + + - name: Test + id: cmake_test + # not all machines have native AVX-512 + if: ${{ matrix.build != 'msvc-arm64' && matrix.build != 'llvm-arm64' && matrix.build != 'llvm-arm64-opencl-adreno' && matrix.build != 'kompute-x64' && matrix.build != 'vulkan-x64' && (matrix.build != 'avx512-x64' || env.HAS_AVX512F == '1') }} + run: | + cd build + ctest -L main -C Release --verbose --timeout 900 + + - name: Test (Intel SDE) + id: cmake_test_sde + if: ${{ matrix.build == 'avx512-x64' && env.HAS_AVX512F == '0' }} # use Intel SDE for AVX-512 emulation + run: | + curl.exe -o $env:RUNNER_TEMP/sde.tar.xz -L "https://downloadmirror.intel.com/813591/sde-external-${env:SDE_VERSION}-win.tar.xz" + # for some weird reason windows tar doesn't like sde tar.xz + 7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar.xz + 7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar + $sde = $(join-path $env:RUNNER_TEMP sde-external-${env:SDE_VERSION}-win/sde.exe) + cd build + $env:LLAMA_SKIP_TESTS_SLOW_ON_EMULATOR = 1 + & $sde -future -- ctest -L main -C Release --verbose --timeout 900 + + - name: Determine tag name + id: tag + shell: bash + run: | + BUILD_NUMBER="$(git rev-list --count HEAD)" + SHORT_HASH="$(git rev-parse --short=7 HEAD)" + if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then + echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT + else + SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-') + echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT + fi + + - name: Pack artifacts + id: pack_artifacts + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + run: | + Copy-Item LICENSE .\build\bin\Release\llama.cpp.txt + Copy-Item .\examples\run\linenoise.cpp\LICENSE .\build\bin\Release\linenoise.cpp.txt + 7z a llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}.zip .\build\bin\Release\* + + - name: Upload artifacts + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + uses: actions/upload-artifact@v4 + with: + path: llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}.zip + name: llama-bin-win-${{ matrix.build }}.zip + + ubuntu-latest-cmake-cuda: + runs-on: ubuntu-latest + container: nvidia/cuda:12.6.2-devel-ubuntu24.04 + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Install dependencies + env: + DEBIAN_FRONTEND: noninteractive + run: | + apt update + apt install -y cmake build-essential ninja-build libgomp1 git + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: ubuntu-latest-cmake-cuda + evict-old-files: 1d + + - name: Build with CMake + run: | + cmake -S . -B build -G Ninja \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_CUDA_ARCHITECTURES=89-real \ + -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined \ + -DLLAMA_FATAL_WARNINGS=ON \ + -DGGML_NATIVE=OFF \ + -DGGML_CUDA=ON + cmake --build build + + windows-2019-cmake-cuda: + runs-on: windows-2019 + + strategy: + matrix: + cuda: ['12.4', '11.7'] + build: ['cuda'] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Install ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: ${{ github.job }}-${{ matrix.cuda }}-${{ matrix.build }} + variant: sccache + evict-old-files: 1d + + - name: Install Cuda Toolkit 11.7 + if: ${{ matrix.cuda == '11.7' }} + run: | + mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" + choco install unzip -y + curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-11.7.99-archive.zip" + curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-11.7.99-archive.zip" + curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-11.7.99-archive.zip" + curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-11.7.4.6-archive.zip" + curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-11.7.91-archive.zip" + curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-11.7.91-archive.zip" + curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-11.7.101-archive.zip" + curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-11.7.91-archive.zip" + unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" + xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_cudart-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y + xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvcc-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y + xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvrtc-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y + xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\libcublas-windows-x86_64-11.7.4.6-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y + xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvtx-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y + xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\visual_studio_integration-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y + xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvprof-windows-x86_64-11.7.101-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y + xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_cccl-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y + echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append + echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append + echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8 + echo "CUDA_PATH_V11_7=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8 + + - name: Install Cuda Toolkit 12.4 + if: ${{ matrix.cuda == '12.4' }} + run: | + mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" + choco install unzip -y + curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-12.4.127-archive.zip" + curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-12.4.131-archive.zip" + curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-12.4.127-archive.zip" + curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-12.4.5.8-archive.zip" + curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-12.4.127-archive.zip" + curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_profiler_api/windows-x86_64/cuda_profiler_api-windows-x86_64-12.4.127-archive.zip" + curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-12.4.127-archive.zip" + curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-12.4.127-archive.zip" + curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-12.4.127-archive.zip" + unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" + xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cudart-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y + xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvcc-windows-x86_64-12.4.131-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y + xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvrtc-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y + xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libcublas-windows-x86_64-12.4.5.8-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y + xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvtx-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y + xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_profiler_api-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y + xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\visual_studio_integration-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y + xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvprof-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y + xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cccl-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y + echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append + echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append + echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8 + echo "CUDA_PATH_V12_4=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8 + + - name: Install Ninja + id: install_ninja + run: | + choco install ninja + + - name: Build + id: cmake_build + shell: cmd + run: | + call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat" + cmake -S . -B build -G "Ninja Multi-Config" ^ + -DLLAMA_BUILD_SERVER=ON ^ + -DGGML_NATIVE=OFF ^ + -DGGML_CUDA=ON ^ + -DGGML_RPC=ON + set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1 + cmake --build build --config Release -j %NINJA_JOBS% -t ggml + cmake --build build --config Release + + - name: Determine tag name + id: tag + shell: bash + run: | + BUILD_NUMBER="$(git rev-list --count HEAD)" + SHORT_HASH="$(git rev-parse --short=7 HEAD)" + if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then + echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT + else + SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-') + echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT + fi + + - name: Pack artifacts + id: pack_artifacts + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + run: | + 7z a llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip .\build\bin\Release\* + + - name: Upload artifacts + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + uses: actions/upload-artifact@v4 + with: + path: llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip + name: llama-bin-win-cu${{ matrix.cuda }}-x64.zip + + - name: Copy and pack Cuda runtime + if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + run: | + echo "Cuda install location: ${{ env.CUDA_PATH }}" + $dst='.\build\bin\cudart\' + robocopy "${{env.CUDA_PATH}}\bin" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll + robocopy "${{env.CUDA_PATH}}\lib" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll + 7z a cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip $dst\* + + - name: Upload Cuda runtime + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + uses: actions/upload-artifact@v4 + with: + path: cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip + name: cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip + + windows-latest-cmake-sycl: + runs-on: windows-latest + + defaults: + run: + shell: bash + + env: + WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/b380d914-366b-4b77-a74a-05e3c38b3514/intel-oneapi-base-toolkit-2025.0.0.882_offline.exe + WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel:intel.oneapi.win.dnnl:intel.oneapi.win.tbb.devel + ONEAPI_ROOT: "C:/Program Files (x86)/Intel/oneAPI" + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: windows-latest-cmake-sycl + variant: sccache + evict-old-files: 1d + + - name: Install + run: | + scripts/install-oneapi.bat $WINDOWS_BASEKIT_URL $WINDOWS_DPCPP_MKL + + - name: Build + id: cmake_build + run: examples/sycl/win-build-sycl.bat + + - name: Determine tag name + id: tag + shell: bash + run: | + BUILD_NUMBER="$(git rev-list --count HEAD)" + SHORT_HASH="$(git rev-parse --short=7 HEAD)" + if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then + echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT + else + SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-') + echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT + fi + + - name: Build the release package + id: pack_artifacts + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + run: | + echo "cp oneAPI running time dll files in ${{ env.ONEAPI_ROOT }} to ./build/bin" + + cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_sycl_blas.5.dll" ./build/bin + cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_core.2.dll" ./build/bin + cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_tbb_thread.2.dll" ./build/bin + + cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_level_zero.dll" ./build/bin + cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_opencl.dll" ./build/bin + cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_loader.dll" ./build/bin + cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_win_proxy_loader.dll" ./build/bin + + cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/sycl8.dll" ./build/bin + cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/svml_dispmd.dll" ./build/bin + cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libmmd.dll" ./build/bin + cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libiomp5md.dll" ./build/bin + + cp "${{ env.ONEAPI_ROOT }}/dnnl/latest/bin/dnnl.dll" ./build/bin + cp "${{ env.ONEAPI_ROOT }}/tbb/latest/bin/tbb12.dll" ./build/bin + + echo "cp oneAPI running time dll files to ./build/bin done" + 7z a llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip ./build/bin/* + + - name: Upload the release package + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + uses: actions/upload-artifact@v4 + with: + path: llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip + name: llama-bin-win-sycl-x64.zip + + windows-latest-cmake-hip: + if: ${{ github.event.inputs.create_release != 'true' }} + runs-on: windows-latest + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: Install + id: depends + run: | + $ErrorActionPreference = "Stop" + write-host "Downloading AMD HIP SDK Installer" + Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe" + write-host "Installing AMD HIP SDK" + Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait + write-host "Completed AMD HIP SDK installation" + + - name: Verify ROCm + id: verify + run: | + & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version + + - name: Install ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: ${{ github.job }} + evict-old-files: 1d + + - name: Build + id: cmake_build + run: | + $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path) + $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}" + cmake -G "Unix Makefiles" -B build -S . ` + -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" ` + -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" ` + -DCMAKE_BUILD_TYPE=Release ` + -DGGML_HIP=ON ` + -DGGML_RPC=ON + cmake --build build -j ${env:NUMBER_OF_PROCESSORS} + + windows-latest-cmake-hip-release: + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + runs-on: windows-latest + + strategy: + matrix: + gpu_target: [gfx1100, gfx1101, gfx1030] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: windows-latest-cmake-hip-release + evict-old-files: 1d + + - name: Install + id: depends + run: | + $ErrorActionPreference = "Stop" + write-host "Downloading AMD HIP SDK Installer" + Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe" + write-host "Installing AMD HIP SDK" + Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait + write-host "Completed AMD HIP SDK installation" + + - name: Verify ROCm + id: verify + run: | + & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version + + - name: Build + id: cmake_build + run: | + $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path) + $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}" + cmake -G "Unix Makefiles" -B build -S . ` + -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" ` + -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" ` + -DCMAKE_BUILD_TYPE=Release ` + -DAMDGPU_TARGETS=${{ matrix.gpu_target }} ` + -DGGML_HIP=ON ` + -DGGML_RPC=ON + cmake --build build -j ${env:NUMBER_OF_PROCESSORS} + md "build\bin\rocblas\library\" + cp "${env:HIP_PATH}\bin\hipblas.dll" "build\bin\" + cp "${env:HIP_PATH}\bin\rocblas.dll" "build\bin\" + cp "${env:HIP_PATH}\bin\rocblas\library\*" "build\bin\rocblas\library\" + + - name: Determine tag name + id: tag + shell: bash + run: | + BUILD_NUMBER="$(git rev-list --count HEAD)" + SHORT_HASH="$(git rev-parse --short=7 HEAD)" + if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then + echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT + else + SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-') + echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT + fi + + - name: Pack artifacts + id: pack_artifacts + run: | + 7z a llama-${{ steps.tag.outputs.name }}-bin-win-hip-x64-${{ matrix.gpu_target }}.zip .\build\bin\* + + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + path: llama-${{ steps.tag.outputs.name }}-bin-win-hip-x64-${{ matrix.gpu_target }}.zip + name: llama-bin-win-hip-x64-${{ matrix.gpu_target }}.zip + + ios-xcode-build: + runs-on: macos-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Build + id: cmake_build + run: | + sysctl -a + cmake -B build -G Xcode \ + -DGGML_METAL_USE_BF16=ON \ + -DGGML_METAL_EMBED_LIBRARY=ON \ + -DLLAMA_BUILD_EXAMPLES=OFF \ + -DLLAMA_BUILD_TESTS=OFF \ + -DLLAMA_BUILD_SERVER=OFF \ + -DCMAKE_SYSTEM_NAME=iOS \ + -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \ + -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml + cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO + sudo cmake --install build --config Release + + - name: xcodebuild for swift package + id: xcodebuild + run: | + xcodebuild -scheme llama-Package -destination 'generic/platform=iOS' + + - name: Build Xcode project + run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' build + + android-build: + runs-on: ubuntu-latest + + steps: + - name: Clone + uses: actions/checkout@v4 + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: android-build + evict-old-files: 1d + + - name: Set up JDK + uses: actions/setup-java@v3 + with: + java-version: 17 + distribution: zulu + + - name: Setup Android SDK + uses: android-actions/setup-android@v3 + with: + log-accepted-android-sdk-licenses: false + + - name: Build + run: | + cd examples/llama.android + + ./gradlew build --no-daemon + + release: + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + + runs-on: ubuntu-latest + + needs: + - ubuntu-cpu-cmake + - windows-latest-cmake + - windows-2019-cmake-cuda + - windows-latest-cmake-hip-release + - macOS-latest-cmake-arm64 + - macOS-latest-cmake-x64 + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: release + evict-old-files: 1d + + - name: Determine tag name + id: tag + shell: bash + run: | + BUILD_NUMBER="$(git rev-list --count HEAD)" + SHORT_HASH="$(git rev-parse --short=7 HEAD)" + if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then + echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT + else + SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-') + echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT + fi + + - name: Download artifacts + id: download-artifact + uses: actions/download-artifact@v4 + with: + path: ./artifact + + - name: Move artifacts + id: move_artifacts + run: mkdir -p ./artifact/release && mv ./artifact/*/*.zip ./artifact/release + + - name: Create release + id: create_release + uses: ggml-org/action-create-release@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + tag_name: ${{ steps.tag.outputs.name }} + + - name: Upload release + id: upload_release + uses: actions/github-script@v3 + with: + github-token: ${{secrets.GITHUB_TOKEN}} + script: | + const path = require('path'); + const fs = require('fs'); + const release_id = '${{ steps.create_release.outputs.id }}'; + for (let file of await fs.readdirSync('./artifact/release')) { + if (path.extname(file) === '.zip') { + console.log('uploadReleaseAsset', file); + await github.repos.uploadReleaseAsset({ + owner: context.repo.owner, + repo: context.repo.repo, + release_id: release_id, + name: file, + data: await fs.readFileSync(`./artifact/release/${file}`) + }); + } + } + +# ubuntu-latest-gcc: +# runs-on: ubuntu-latest +# +# strategy: +# matrix: +# build: [Debug, Release] +# +# steps: +# - name: Clone +# uses: actions/checkout@v4 +# +# - name: Dependencies +# run: | +# sudo apt-get update +# sudo apt-get install build-essential +# sudo apt-get install cmake +# +# - name: Configure +# run: cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }} +# +# - name: Build +# run: | +# make +# +# ubuntu-latest-clang: +# runs-on: ubuntu-latest +# +# strategy: +# matrix: +# build: [Debug, Release] +# +# steps: +# - name: Clone +# uses: actions/checkout@v4 +# +# - name: Dependencies +# run: | +# sudo apt-get update +# sudo apt-get install build-essential +# sudo apt-get install cmake +# +# - name: Configure +# run: cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang +# +# - name: Build +# run: | +# make +# +# ubuntu-latest-gcc-sanitized: +# runs-on: ubuntu-latest +# +# strategy: +# matrix: +# sanitizer: [ADDRESS, THREAD, UNDEFINED] +# +# steps: +# - name: Clone +# uses: actions/checkout@v4 +# +# - name: Dependencies +# run: | +# sudo apt-get update +# sudo apt-get install build-essential +# sudo apt-get install cmake +# +# - name: Configure +# run: cmake . -DCMAKE_BUILD_TYPE=Debug -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON +# +# - name: Build +# run: | +# make +# +# windows: +# runs-on: windows-latest +# +# strategy: +# matrix: +# build: [Release] +# arch: [Win32, x64] +# include: +# - arch: Win32 +# s2arc: x86 +# - arch: x64 +# s2arc: x64 +# +# steps: +# - name: Clone +# uses: actions/checkout@v4 +# +# - name: Add msbuild to PATH +# uses: microsoft/setup-msbuild@v1 +# +# - name: Configure +# run: > +# cmake -S . -B ./build -A ${{ matrix.arch }} +# -DCMAKE_BUILD_TYPE=${{ matrix.build }} +# +# - name: Build +# run: | +# cd ./build +# msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }} +# +# - name: Upload binaries +# uses: actions/upload-artifact@v4 +# with: +# name: llama-bin-${{ matrix.arch }} +# path: build/bin/${{ matrix.build }} +# +# windows-blas: +# runs-on: windows-latest +# +# strategy: +# matrix: +# build: [Release] +# arch: [Win32, x64] +# blas: [ON] +# include: +# - arch: Win32 +# obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x86.zip +# s2arc: x86 +# - arch: x64 +# obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x64.zip +# s2arc: x64 +# +# steps: +# - name: Clone +# uses: actions/checkout@v4 +# +# - name: Add msbuild to PATH +# uses: microsoft/setup-msbuild@v1 +# +# - name: Fetch OpenBLAS +# if: matrix.blas == 'ON' +# run: | +# C:/msys64/usr/bin/wget.exe -qO blas.zip ${{ matrix.obzip }} +# 7z x blas.zip -oblas -y +# copy blas/include/cblas.h . +# copy blas/include/openblas_config.h . +# echo "blasdir=$env:GITHUB_WORKSPACE/blas" >> $env:GITHUB_ENV +# +# - name: Configure +# run: > +# cmake -S . -B ./build -A ${{ matrix.arch }} +# -DCMAKE_BUILD_TYPE=${{ matrix.build }} +# -DLLAMA_SUPPORT_OPENBLAS=${{ matrix.blas }} +# -DCMAKE_LIBRARY_PATH="$env:blasdir/lib" +# +# - name: Build +# run: | +# cd ./build +# msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }} +# +# - name: Copy libopenblas.dll +# if: matrix.blas == 'ON' +# run: copy "$env:blasdir/bin/libopenblas.dll" build/bin/${{ matrix.build }} +# +# - name: Upload binaries +# if: matrix.blas == 'ON' +# uses: actions/upload-artifact@v4 +# with: +# name: llama-blas-bin-${{ matrix.arch }} +# path: build/bin/${{ matrix.build }} +# +# emscripten: +# runs-on: ubuntu-latest +# +# strategy: +# matrix: +# build: [Release] +# +# steps: +# - name: Clone +# uses: actions/checkout@v4 +# +# - name: Dependencies +# run: | +# wget -q https://github.com/emscripten-core/emsdk/archive/master.tar.gz +# tar -xvf master.tar.gz +# emsdk-master/emsdk update +# emsdk-master/emsdk install latest +# emsdk-master/emsdk activate latest +# +# - name: Configure +# run: echo "tmp" +# +# - name: Build +# run: | +# pushd emsdk-master +# source ./emsdk_env.sh +# popd +# emcmake cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }} +# make + + openEuler-latest-cmake-cann: + if: ${{ github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'Ascend NPU') }} + defaults: + run: + shell: bash -el {0} + runs-on: ubuntu-24.04-arm + strategy: + matrix: + cann: + - '8.0.rc3.beta1-910b-openeuler22.03-py3.10' + device: + - 'ascend910b3' + build: + - 'Release' + container: ascendai/cann:${{ matrix.cann }} + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Dependencies + run: | + yum update -y + yum install -y git gcc gcc-c++ make cmake + + - name: Build + run: | + export LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/$(uname -m)-linux/devlib/:${LD_LIBRARY_PATH} + + cmake -S . -B build \ + -DCMAKE_BUILD_TYPE=${{ matrix.build }} \ + -DGGML_CANN=on \ + -DSOC_TYPE=${{ matrix.device }} + cmake --build build -j $(nproc) diff --git a/llama.cpp/.github/workflows/close-issue.yml b/llama.cpp/.github/workflows/close-issue.yml new file mode 100644 index 0000000000000000000000000000000000000000..276a217d4500523b8a951510a918de276f088cc3 --- /dev/null +++ b/llama.cpp/.github/workflows/close-issue.yml @@ -0,0 +1,28 @@ +name: Close inactive issues +on: + schedule: + - cron: "42 0 * * *" + +# Fine-grant permission +# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token +permissions: + issues: write + +jobs: + close-issues: + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + steps: + - uses: actions/stale@v5 + with: + exempt-issue-labels: "refactor,help wanted,good first issue,research,bug,roadmap" + days-before-issue-stale: 30 + days-before-issue-close: 14 + stale-issue-label: "stale" + close-issue-message: "This issue was closed because it has been inactive for 14 days since being marked as stale." + days-before-pr-stale: -1 + days-before-pr-close: -1 + operations-per-run: 10000 + repo-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/llama.cpp/.github/workflows/docker.yml b/llama.cpp/.github/workflows/docker.yml new file mode 100644 index 0000000000000000000000000000000000000000..6955a7dc8234db96df82961b2811c10ad6598af6 --- /dev/null +++ b/llama.cpp/.github/workflows/docker.yml @@ -0,0 +1,173 @@ +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +# GitHub recommends pinning actions to a commit SHA. +# To get a newer version, you will need to update the SHA. +# You can also reference a tag or branch, but the action may change without warning. + +name: Publish Docker image + +on: + workflow_dispatch: # allows manual triggering + schedule: + # Rebuild daily rather than on every push because it is expensive + - cron: '12 4 * * *' + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} + cancel-in-progress: true + +# Fine-grant permission +# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token +permissions: + packages: write + +jobs: + push_to_registry: + name: Push Docker image to Docker Hub + + runs-on: ubuntu-22.04 + env: + COMMIT_SHA: ${{ github.sha }} + strategy: + fail-fast: false + matrix: + config: + # Multi-stage build + - { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, freediskspace: false} + - { tag: "cuda", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false} + - { tag: "musa", dockerfile: ".devops/musa.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false} + - { tag: "intel", dockerfile: ".devops/intel.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false} + - { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false} + # Note: the rocm images are failing due to a compiler error and are disabled until this is fixed to allow the workflow to complete + #- {tag: "rocm", dockerfile: ".devops/rocm.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, freediskspace: true } + steps: + - name: Check out the repo + uses: actions/checkout@v4 + with: + fetch-depth: 0 # preserve git history, so we can determine the build number + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to Docker Hub + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Determine tag name + id: tag + shell: bash + run: | + BUILD_NUMBER="$(git rev-list --count HEAD)" + SHORT_HASH="$(git rev-parse --short=7 HEAD)" + REPO_OWNER="${GITHUB_REPOSITORY_OWNER@L}" # to lower case + REPO_NAME="${{ github.event.repository.name }}" + + # determine tag name postfix (build number, commit hash) + if [[ "${{ env.GITHUB_BRANCH_NAME }}" == "master" ]]; then + TAG_POSTFIX="-b${BUILD_NUMBER}" + else + SAFE_NAME=$(echo "${{ env.GITHUB_BRANCH_NAME }}" | tr '/' '-') + TAG_POSTFIX="-${SAFE_NAME}-${SHORT_HASH}" + fi + # list all tags possible + if [[ "${{ matrix.config.tag }}" == "cpu" ]]; then + TYPE="" + else + TYPE="-${{ matrix.config.tag }}" + fi + PREFIX="ghcr.io/${REPO_OWNER}/${REPO_NAME}:" + FULLTAGS="${PREFIX}full${TYPE},${PREFIX}full${TYPE}${TAG_POSTFIX}" + LIGHTTAGS="${PREFIX}light${TYPE},${PREFIX}light${TYPE}${TAG_POSTFIX}" + SERVERTAGS="${PREFIX}server${TYPE},${PREFIX}server${TYPE}${TAG_POSTFIX}" + echo "full_output_tags=$FULLTAGS" >> $GITHUB_OUTPUT + echo "light_output_tags=$LIGHTTAGS" >> $GITHUB_OUTPUT + echo "server_output_tags=$SERVERTAGS" >> $GITHUB_OUTPUT + echo "full_output_tags=$FULLTAGS" # print out for debugging + echo "light_output_tags=$LIGHTTAGS" # print out for debugging + echo "server_output_tags=$SERVERTAGS" # print out for debugging + env: + GITHUB_BRANCH_NAME: ${{ github.head_ref || github.ref_name }} + GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}' + + - name: Free Disk Space (Ubuntu) + if: ${{ matrix.config.free_disk_space == true }} + uses: ggml-org/free-disk-space@v1.3.1 + with: + # this might remove tools that are actually needed, + # if set to "true" but frees about 6 GB + tool-cache: false + + # all of these default to true, but feel free to set to + # "false" if necessary for your workflow + android: true + dotnet: true + haskell: true + large-packages: true + docker-images: true + swap-storage: true + + - name: Build and push Full Docker image (tagged + versioned) + if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.full == true }} + uses: docker/build-push-action@v6 + with: + context: . + push: true + platforms: ${{ matrix.config.platforms }} + # tag list is generated from step above + tags: ${{ steps.tag.outputs.full_output_tags }} + file: ${{ matrix.config.dockerfile }} + target: full + provenance: false + # using github experimental cache + cache-from: type=gha + cache-to: type=gha,mode=max + # return to this if the experimental github cache is having issues + #cache-to: type=local,dest=/tmp/.buildx-cache + #cache-from: type=local,src=/tmp/.buildx-cache + + - name: Build and push Light Docker image (tagged + versioned) + if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.light == true }} + uses: docker/build-push-action@v6 + with: + context: . + push: true + platforms: ${{ matrix.config.platforms }} + # tag list is generated from step above + tags: ${{ steps.tag.outputs.light_output_tags }} + file: ${{ matrix.config.dockerfile }} + target: light + provenance: false + # using github experimental cache + cache-from: type=gha + cache-to: type=gha,mode=max + # return to this if the experimental github cache is having issues + #cache-to: type=local,dest=/tmp/.buildx-cache + #cache-from: type=local,src=/tmp/.buildx-cache + + - name: Build and push Server Docker image (tagged + versioned) + if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.server == true }} + uses: docker/build-push-action@v6 + with: + context: . + push: true + platforms: ${{ matrix.config.platforms }} + # tag list is generated from step above + tags: ${{ steps.tag.outputs.server_output_tags }} + file: ${{ matrix.config.dockerfile }} + target: server + provenance: false + # using github experimental cache + cache-from: type=gha + cache-to: type=gha,mode=max + # return to this if the experimental github cache is having issues + #cache-to: type=local,dest=/tmp/.buildx-cache + #cache-from: type=local,src=/tmp/.buildx-cache diff --git a/llama.cpp/.github/workflows/editorconfig.yml b/llama.cpp/.github/workflows/editorconfig.yml new file mode 100644 index 0000000000000000000000000000000000000000..f02b7c2194bcf57568eb29e1d072ec9b5923f4cd --- /dev/null +++ b/llama.cpp/.github/workflows/editorconfig.yml @@ -0,0 +1,29 @@ +name: EditorConfig Checker + +on: + workflow_dispatch: # allows manual triggering + inputs: + create_release: + description: 'Create new release' + required: true + type: boolean + push: + branches: + - master + pull_request: + branches: + - master + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} + cancel-in-progress: true + +jobs: + editorconfig: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: editorconfig-checker/action-editorconfig-checker@v2 + with: + version: v3.0.3 + - run: editorconfig-checker diff --git a/llama.cpp/.github/workflows/gguf-publish.yml b/llama.cpp/.github/workflows/gguf-publish.yml new file mode 100644 index 0000000000000000000000000000000000000000..3ca4d30581074cc1b69d7b7306eef170f0baa0d6 --- /dev/null +++ b/llama.cpp/.github/workflows/gguf-publish.yml @@ -0,0 +1,44 @@ +# This workflow will upload a Python Package using Twine when a GGUF release is created +# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries + +# See `gguf-py/README.md` for how to make a release. + +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +name: Upload Python Package + +on: + workflow_dispatch: + push: + # Pattern matched against refs/tags + tags: + - 'gguf-v*' # Push events to every version tag + + +jobs: + deploy: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.9.x' + - name: Install dependencies + run: | + cd gguf-py + python -m pip install poetry + poetry install + + - name: Build package + run: cd gguf-py && poetry build + - name: Publish package + uses: pypa/gh-action-pypi-publish@release/v1 + with: + password: ${{ secrets.PYPI_API_TOKEN }} + packages-dir: gguf-py/dist diff --git a/llama.cpp/.github/workflows/labeler.yml b/llama.cpp/.github/workflows/labeler.yml new file mode 100644 index 0000000000000000000000000000000000000000..368dbdbe5dcccfff731c35b4ff1dc15bc9f85695 --- /dev/null +++ b/llama.cpp/.github/workflows/labeler.yml @@ -0,0 +1,17 @@ +name: "Pull Request Labeler" +on: +- pull_request_target + +jobs: + labeler: + permissions: + contents: read + pull-requests: write + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + repository: "ggerganov/llama.cpp" + - uses: actions/labeler@v5 + with: + configuration-path: '.github/labeler.yml' diff --git a/llama.cpp/.github/workflows/python-check-requirements.yml b/llama.cpp/.github/workflows/python-check-requirements.yml new file mode 100644 index 0000000000000000000000000000000000000000..46e80aecd0a0cd9d44847d120e2bc9c0026dfc77 --- /dev/null +++ b/llama.cpp/.github/workflows/python-check-requirements.yml @@ -0,0 +1,33 @@ +name: Python check requirements.txt + +on: + push: + paths: + - '.github/workflows/python-check-requirements.yml' + - 'scripts/check-requirements.sh' + - 'convert*.py' + - '**/requirements*.txt' + pull_request: + paths: + - '.github/workflows/python-check-requirements.yml' + - 'scripts/check-requirements.sh' + - 'convert*.py' + - '**/requirements*.txt' + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} + cancel-in-progress: true + +jobs: + python-check-requirements: + runs-on: ubuntu-latest + name: check-requirements + steps: + - name: Check out source repository + uses: actions/checkout@v4 + - name: Set up Python environment + uses: actions/setup-python@v5 + with: + python-version: "3.11" + - name: Run check-requirements.sh script + run: bash scripts/check-requirements.sh diff --git a/llama.cpp/.github/workflows/python-lint.yml b/llama.cpp/.github/workflows/python-lint.yml new file mode 100644 index 0000000000000000000000000000000000000000..ddfdf73b8fce2c12d413127eb24575dc81a8e64d --- /dev/null +++ b/llama.cpp/.github/workflows/python-lint.yml @@ -0,0 +1,30 @@ +name: flake8 Lint + +on: + push: + branches: + - master + paths: ['.github/workflows/python-lint.yml', '**/*.py'] + pull_request: + types: [opened, synchronize, reopened] + paths: ['.github/workflows/python-lint.yml', '**/*.py'] + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} + cancel-in-progress: true + +jobs: + flake8-lint: + runs-on: ubuntu-latest + name: Lint + steps: + - name: Check out source repository + uses: actions/checkout@v4 + - name: Set up Python environment + uses: actions/setup-python@v5 + with: + python-version: "3.11" + - name: flake8 Lint + uses: py-actions/flake8@v2 + with: + plugins: "flake8-no-print" diff --git a/llama.cpp/.github/workflows/python-type-check.yml b/llama.cpp/.github/workflows/python-type-check.yml new file mode 100644 index 0000000000000000000000000000000000000000..373bb601020b25597408efdb4bcbf31dc4562a18 --- /dev/null +++ b/llama.cpp/.github/workflows/python-type-check.yml @@ -0,0 +1,40 @@ +name: Python Type-Check + +on: + push: + paths: + - '.github/workflows/python-type-check.yml' + - 'pyrightconfig.json' + - '**.py' + - '**/requirements*.txt' + pull_request: + paths: + - '.github/workflows/python-type-check.yml' + - 'pyrightconfig.json' + - '**.py' + - '**/requirements*.txt' + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} + cancel-in-progress: true + +jobs: + python-type-check: + runs-on: ubuntu-latest + name: pyright type-check + steps: + - name: Check out source repository + uses: actions/checkout@v4 + - name: Set up Python environment + uses: actions/setup-python@v5 + with: + python-version: "3.11" + - name: Install Python dependencies + # TODO: use a venv + run: pip install -r requirements/requirements-all.txt + - name: Type-check with Pyright + uses: jakebailey/pyright-action@v2 + with: + version: 1.1.382 + level: warning + warnings: true diff --git a/llama.cpp/.github/workflows/server.yml b/llama.cpp/.github/workflows/server.yml new file mode 100644 index 0000000000000000000000000000000000000000..3a29107d0ea5adada3d3fd1d34b637f256b40be0 --- /dev/null +++ b/llama.cpp/.github/workflows/server.yml @@ -0,0 +1,239 @@ +# Server build and tests +name: Server + +on: + workflow_dispatch: # allows manual triggering + inputs: + sha: + description: 'Commit SHA1 to build' + required: false + type: string + slow_tests: + description: 'Run slow tests' + required: true + type: boolean + push: + branches: + - master + paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*'] + pull_request: + types: [opened, synchronize, reopened] + paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*'] + +env: + LLAMA_LOG_COLORS: 1 + LLAMA_LOG_PREFIX: 1 + LLAMA_LOG_TIMESTAMPS: 1 + LLAMA_LOG_VERBOSITY: 10 + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + server: + runs-on: ubuntu-latest + + strategy: + matrix: + sanitizer: [ADDRESS, UNDEFINED] # THREAD is broken + build_type: [RelWithDebInfo] + include: + - build_type: Release + sanitizer: "" + fail-fast: false # While -DLLAMA_SANITIZE_THREAD=ON is broken + + steps: + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get -y install \ + build-essential \ + xxd \ + git \ + cmake \ + curl \ + wget \ + language-pack-en \ + libcurl4-openssl-dev + + - name: Clone + id: checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }} + + - name: Python setup + id: setup_python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Tests dependencies + id: test_dependencies + run: | + pip install -r examples/server/tests/requirements.txt + + # Setup nodejs (to be used for verifying bundled index.html) + - uses: actions/setup-node@v4 + with: + node-version: '22.11.0' + + - name: WebUI - Install dependencies + id: webui_lint + run: | + cd examples/server/webui + npm ci + + - name: WebUI - Check code format + id: webui_format + run: | + git config --global --add safe.directory $(realpath .) + cd examples/server/webui + git status + + npm run format + git status + modified_files="$(git status -s)" + echo "Modified files: ${modified_files}" + if [ -n "${modified_files}" ]; then + echo "Files do not follow coding style. To fix: npm run format" + echo "${modified_files}" + exit 1 + fi + + - name: Verify bundled index.html + id: verify_server_index_html + run: | + git config --global --add safe.directory $(realpath .) + cd examples/server/webui + git status + + npm run build + git status + modified_files="$(git status -s)" + echo "Modified files: ${modified_files}" + if [ -n "${modified_files}" ]; then + echo "Repository is dirty or server/webui is not built as expected" + echo "Hint: You may need to follow Web UI build guide in server/README.md" + echo "${modified_files}" + exit 1 + fi + + - name: Build (no OpenMP) + id: cmake_build_no_openmp + if: ${{ matrix.sanitizer == 'THREAD' }} + run: | + cmake -B build \ + -DGGML_NATIVE=OFF \ + -DLLAMA_BUILD_SERVER=ON \ + -DLLAMA_CURL=ON \ + -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ + -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \ + -DGGML_OPENMP=OFF ; + cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server + + - name: Build (sanitizers) + id: cmake_build_sanitizers + if: ${{ matrix.sanitizer != '' && matrix.sanitizer != 'THREAD' }} + run: | + cmake -B build \ + -DGGML_NATIVE=OFF \ + -DLLAMA_BUILD_SERVER=ON \ + -DLLAMA_CURL=ON \ + -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ + -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ; + cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server + + - name: Build (sanitizers) + id: cmake_build + if: ${{ matrix.sanitizer == '' }} + run: | + cmake -B build \ + -DGGML_NATIVE=OFF \ + -DLLAMA_BUILD_SERVER=ON \ + -DLLAMA_CURL=ON \ + -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} ; + cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server + + - name: Tests + id: server_integration_tests + if: ${{ matrix.sanitizer == '' }} + run: | + cd examples/server/tests + ./tests.sh + + - name: Tests (sanitizers) + id: server_integration_tests_sanitizers + if: ${{ matrix.sanitizer != '' }} + run: | + cd examples/server/tests + LLAMA_SANITIZE=1 ./tests.sh + + - name: Slow tests + id: server_integration_tests_slow + if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }} + run: | + cd examples/server/tests + SLOW_TESTS=1 ./tests.sh + + + server-windows: + runs-on: windows-2019 + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }} + + - name: libCURL + id: get_libcurl + env: + CURL_VERSION: 8.6.0_6 + run: | + curl.exe -o $env:RUNNER_TEMP/curl.zip -L "https://curl.se/windows/dl-${env:CURL_VERSION}/curl-${env:CURL_VERSION}-win64-mingw.zip" + mkdir $env:RUNNER_TEMP/libcurl + tar.exe -xvf $env:RUNNER_TEMP/curl.zip --strip-components=1 -C $env:RUNNER_TEMP/libcurl + + - name: Build + id: cmake_build + run: | + cmake -B build -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include" + cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS} --target llama-server + + - name: Python setup + id: setup_python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Tests dependencies + id: test_dependencies + run: | + pip install -r examples/server/tests/requirements.txt + + - name: Copy Libcurl + id: prepare_libcurl + run: | + cp $env:RUNNER_TEMP/libcurl/bin/libcurl-x64.dll ./build/bin/Release/libcurl-x64.dll + + - name: Tests + id: server_integration_tests + if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }} + run: | + cd examples/server/tests + $env:PYTHONIOENCODING = ":replace" + pytest -v -x -m "not slow" + + - name: Slow tests + id: server_integration_tests_slow + if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }} + run: | + cd examples/server/tests + $env:SLOW_TESTS = "1" + pytest -v -x diff --git a/llama.cpp/.gitignore b/llama.cpp/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..694f36e042fb58db85cb8baac44308779537370b --- /dev/null +++ b/llama.cpp/.gitignore @@ -0,0 +1,145 @@ +# Extensions + +*.a +*.bat +*.bin +*.d +*.dll +*.dot +*.etag +*.exe +*.gcda +*.gcno +*.gcov +*.gguf +*.gguf.json +*.lastModified +*.log +*.metallib +*.o +*.so +*.swp +*.tmp + +# IDE / OS + +.cache/ +.ccls-cache/ +.direnv/ +.DS_Store +.envrc +.idea/ +.swiftpm +.vs/ +.vscode/ +nppBackup + + +# Coverage + +gcovr-report/ +lcov-report/ + +# Build Artifacts + +tags +.build/ +build* +!build-info.cmake +!build-info.cpp.in +!build-info.sh +!build.zig +!docs/build.md +/libllama.so +/llama-* +/vulkan-shaders-gen +android-ndk-* +arm_neon.h +cmake-build-* +CMakeSettings.json +compile_commands.json +ggml-metal-embed.metal +llama-batched-swift +/rpc-server +out/ +tmp/ +autogen-*.md + +# Deprecated + +/main +/server + +# CI + +!.github/workflows/*.yml + +# Models + +models/* +models-mnt +!models/.editorconfig +!models/ggml-vocab-*.gguf* + +# Zig +zig-out/ +zig-cache/ + +# Logs + +ppl-*.txt +qnt-*.txt +perf-*.txt + +# Examples + +examples/jeopardy/results.txt +examples/server/*.css.hpp +examples/server/*.html.hpp +examples/server/*.js.hpp +examples/server/*.mjs.hpp +!build_64.sh +!examples/*.bat +!examples/*/*.kts +!examples/*/*/*.kts +!examples/sycl/*.bat +!examples/sycl/*.sh + +# Server Web UI temporary files +node_modules +examples/server/webui/dist + +# Python + +/.venv +__pycache__/ +*/poetry.lock +poetry.toml + +# Nix +/result + +# Test binaries +/tests/test-backend-ops +/tests/test-double-float +/tests/test-grad0 +/tests/test-grammar-parser +/tests/test-llama-grammar +/tests/test-opt +/tests/test-quantize-fns +/tests/test-quantize-perf +/tests/test-rope +/tests/test-sampling +/tests/test-tokenizer-0 +/tests/test-tokenizer-1-bpe +/tests/test-tokenizer-1-spm + +# Scripts +!/scripts/install-oneapi.bat + +# Test models for lora adapters +/lora-tests + +# Local scripts +/run-vim.sh +/run-chat.sh diff --git a/llama.cpp/.gitmodules b/llama.cpp/.gitmodules new file mode 100644 index 0000000000000000000000000000000000000000..23ce5ff059b1bd9a25266c963d2e0ba271cce693 --- /dev/null +++ b/llama.cpp/.gitmodules @@ -0,0 +1,3 @@ +[submodule "kompute"] + path = ggml/src/ggml-kompute/kompute + url = https://github.com/nomic-ai/kompute.git diff --git a/llama.cpp/.pre-commit-config.yaml b/llama.cpp/.pre-commit-config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..91d7916285081aa14f5b801935cb35d031f2e601 --- /dev/null +++ b/llama.cpp/.pre-commit-config.yaml @@ -0,0 +1,16 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +exclude: prompts/.*.txt +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.6.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files +- repo: https://github.com/PyCQA/flake8 + rev: 7.0.0 + hooks: + - id: flake8 + additional_dependencies: [flake8-no-print] diff --git a/llama.cpp/AUTHORS b/llama.cpp/AUTHORS new file mode 100644 index 0000000000000000000000000000000000000000..6796b294135954613c8cd06ff98b7a8bbbeabec7 --- /dev/null +++ b/llama.cpp/AUTHORS @@ -0,0 +1,1047 @@ +# date: Tue Feb 4 13:04:05 EET 2025 +# this file is auto-generated by scripts/gen-authors.sh + +0cc4m +0xspringtime <110655352+0xspringtime@users.noreply.github.com> +20kdc +2f38b454 +3ooabkhxtn <31479382+3ooabkhxtn@users.noreply.github.com> +44670 <44670@users.noreply.github.com> +65a <10104049+65a@users.noreply.github.com> +AN Long +AT +Aarni Koskela +Aaron Miller +Aaryaman Vasishta +Abheek Gulati +Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com> +Abhishek Gopinath K <31348521+overtunned@users.noreply.github.com> +Adithya Balaji +AdithyanI +Adrian +Adrian Hesketh +Adrien Gallouët +Adrien Gallouët +Ahmad Tameem <113388789+Tameem-10xE@users.noreply.github.com> +Ahmet Zeer +AidanBeltonS <87009434+AidanBeltonS@users.noreply.github.com> +AidanBeltonS +Aisuko +Akarshan Biswas +Akarshan Biswas +Al Mochkin <14274697+amochkin@users.noreply.github.com> +Albert Jin +Alberto <57916483+albbus-stack@users.noreply.github.com> +Alberto Cabrera Pérez +Alberto Cabrera Pérez +Alex +Alex Azarov +Alex Azarov +Alex Klinkhamer +Alex Klinkhamer +Alex Nguyen +Alex O'Connell <35843486+acon96@users.noreply.github.com> +Alex Petenchea +Alex Renda +Alex Tuddenham <61622354+AlexsCode@users.noreply.github.com> +Alex von Gluck IV +Alexey Parfenov +Ali Chraghi <63465728+alichraghi@users.noreply.github.com> +Ali Nehzat +Ali Tariq +Alon +AlpinDale <52078762+AlpinDale@users.noreply.github.com> +Amir +AmirAli Mirian <37371367+amiralimi@users.noreply.github.com> +Ananta Bastola +Anas Ahouzi <112881240+aahouzi@users.noreply.github.com> +András Salamon +Andreas (Andi) Kunar +Andreas Kieslinger <47689530+aendk@users.noreply.github.com> +Andrei +Andrew Canis +Andrew Downing +Andrew Duffy +Andrew Godfrey +Andrew Minh Nguyen <40281306+amqdn@users.noreply.github.com> +Andy Salerno +Andy Tai +Anthony Van de Gejuchte +Antonis Makropoulos +Arik Poznanski +Armen Kaleshian +Artem +Artem Zinnatullin +Artyom Lebedev +Asbjørn Olling +Ásgeir Bjarni Ingvarsson +Asghar Ghorbani +Ashish <1856117+ashishdatta@users.noreply.github.com> +Ashok Gelal <401055+ashokgelal@users.noreply.github.com> +Ashraful Islam +Atsushi Tatsuma +Austin <77757836+teleprint-me@users.noreply.github.com> +AustinMroz +BADR +Bach Le +Bailey Chittle <39804642+bachittle@users.noreply.github.com> +BarfingLemurs <128182951+BarfingLemurs@users.noreply.github.com> +Bartowski +Behnam M <58621210+ibehnam@users.noreply.github.com> +Ben Ashbaugh +Ben Garney +Ben Siraphob +Ben Williams +Benjamin Findley <39356821+Kartoffelsaft@users.noreply.github.com> +Benjamin Lecaillon <84293038+blecaillon@users.noreply.github.com> +Benson Wong +Bernat Vadell +Bernhard M. Wiedemann +Bert Wagner +Billel Mokeddem +Bingan <70050083+binganao@users.noreply.github.com> +Bjarke Viksøe <164612031+bviksoe@users.noreply.github.com> +Bodo Graumann +Bono Lv +Borislav Stanimirov +Borislav Stanimirov +Branden Butler +Brandon Squizzato <35474886+bsquizz@users.noreply.github.com> +Brian +Brian Cunnie +Bruce MacDonald +Bryan Honof +CJ Pais +CRD716 +Calvin Laurenson +Cameron +Cameron Kaiser +Carolinabanana <140120812+Carolinabanana@users.noreply.github.com> +CarryFun <76023481+CarryFun@users.noreply.github.com> +Carsten Kragelund Jørgensen +CarterLi999 <664681047@qq.com> +Casey Primozic +Casey Primozic +CausalLM <148736309+CausalLM@users.noreply.github.com> +Cebtenzzre +CentricStorm +Chad Brewbaker +Changyeon Kim +Chao Jiang +Charles Xu <63788048+chaxu01@users.noreply.github.com> +Charles Xu +Chen Xi +Chen Xi +Cheng Shao +Chenguang Li <87689256+noemotiovon@users.noreply.github.com> +Chris Elrod +Chris Kuehl +Christian Demsar +Christian Demsar +Christian Falch <875252+chrfalch@users.noreply.github.com> +Christian Kastner +Christian Kögler +Christian Köhnenkamp +Christian Zhou-Zheng <59622928+christianazinn@users.noreply.github.com> +Christopher Nielsen <62156882+mascguy@users.noreply.github.com> +Clark Saben <76020733+csaben@users.noreply.github.com> +Clint Herron +Conrad Kramer +Corentin REGAL +CrispStrobe <154636388+CrispStrobe@users.noreply.github.com> +Csaba Kecskemeti +Cuong Trinh Manh +DAN™ +Damian Stewart +Dan Johansson <164997844+eddnjjn@users.noreply.github.com> +Dan Johansson +Dane Madsen +DaniAndTheWeb <57776841+DaniAndTheWeb@users.noreply.github.com> +Daniel Bevenius +Daniel Drake +Daniel Hiltgen +Daniel Illescas Romero +Daniel Kleine <53251018+d-kleine@users.noreply.github.com> +Daniele <57776841+daniandtheweb@users.noreply.github.com> +DannyDaemonic +Dat Quoc Nguyen <2412555+datquocnguyen@users.noreply.github.com> +Dave +Dave Airlie +Dave Airlie +Dave Della Costa +David Friehs +David Kennedy +David Pflug +David Renshaw +David Sommers <12738+databyte@users.noreply.github.com> +David Yang +DavidKorczynski +Dawid Potocki +Dawid Wysocki <62249621+TortillaZHawaii@users.noreply.github.com> +Dean +Deins +Denis Spasyuk <34203011+dspasyuk@users.noreply.github.com> +Derrick T. Woolworth +Deven Mistry <31466137+deven367@users.noreply.github.com> +Dibakar Gope +Didzis Gosko +Diego Devesa +Diogo Teles Sant'Anna +Djip007 <3705339+Djip007@users.noreply.github.com> +Djip007 +Don Mahurin +DooWoong Lee (David) +Doomsdayrs <38189170+Doomsdayrs@users.noreply.github.com> +Dou Xinpeng <15529241576@163.com> +Dou Xinpeng <81913537+Dou-Git@users.noreply.github.com> +Douglas Hanley +Dr. Tom Murphy VII Ph.D <499244+tom7@users.noreply.github.com> +Ebey Abraham +Echo Nolan +Ed Lee +Ed Lepedus +Eddie-Wang +Edward Taylor +Elaine +Elbios <141279586+Elbios@users.noreply.github.com> +Elton Kola +Emreerdog <34742675+Emreerdog@users.noreply.github.com> +Engininja2 <139037756+Engininja2@users.noreply.github.com> +Equim +Eric Curtin +Eric Curtin +Eric Sommerlade +Eric Zhang <34133756+EZForever@users.noreply.github.com> +Erik Garrison +Erik Scholz +Esko Toivonen +Ettore Di Giacinto +Evan Jones +Evan Miller +Eve <139727413+netrunnereve@users.noreply.github.com> +Evgeny Kurnevsky +Ewout ter Hoeven +ExtReMLapin <3909752+ExtReMLapin@users.noreply.github.com> +FK +Fabian +Fabio R. Sluzala +Faez Shakil +Faisal Zaghloul +Faisal Zaghloul +Fan Shupei +FantasyGmm <16450052+FantasyGmm@users.noreply.github.com> +Farbod Bijary <110523279+farbodbj@users.noreply.github.com> +Fattire <528174+fat-tire@users.noreply.github.com> +Felix +Finn Voorhees +Firat +FirstTimeEZ <179362031+FirstTimeEZ@users.noreply.github.com> +Folko-Ven <71110216+Folko-Ven@users.noreply.github.com> +Foul-Tarnished <107711110+Foul-Tarnished@users.noreply.github.com> +Francisco Melo <43780565+francis2tm@users.noreply.github.com> +Frank Mai +FrankHB +Frankie Robertson +Fred Douglas <43351173+fredlas@users.noreply.github.com> +Frederik Vogel +Gabe Goodhart +Gabe Goodhart +Gaetan Bisson +GainLee +Galunid +Gary Linscott +Gary Mulder +Gavin Zhao +Genkagaku.GPT +Georgi Gerganov +Gilad S +Gilad S. <7817232+giladgd@users.noreply.github.com> +Giuseppe Scrivano +GiviMAD +Govlzkoy +Guillaume "Vermeille" Sanchez +Guillaume Wenzek +Guoliang Hua <32868157+nbcsm@users.noreply.github.com> +Guoteng <32697156+SolenoidWGT@users.noreply.github.com> +Guspan Tanadi <36249910+guspan-tanadi@users.noreply.github.com> +Gustavo Rocha Dias <91472747+gustrd@users.noreply.github.com> +Haggai Nuchi +Halalaluyafail3 <55773281+Halalaluyafail3@users.noreply.github.com> +Hamdoud Hakem <90524568+hamdoudhakem@users.noreply.github.com> +HanishKVC +Haohui Mai +Haoxiang Fei +Harald Fernengel +Hatsune Miku <129688334+at8u@users.noreply.github.com> +HatsuneMikuUwU33 <173229399+HatsuneMikuUwU33@users.noreply.github.com> +Haus1 +Henk Poley +Henri Vasserman +Henrik Forstén +Herman Semenov +Hesen Peng +HimariO +Hoang Nguyen +Hong Bo PENG +Hongyu Ouyang <96765450+casavaca@users.noreply.github.com> +Howard Su +Hua Jiang +Huang Qi +Huawei Lin +Hugo Roussel +Huifeng Ou <79071290+ho2103@users.noreply.github.com> +Ian Bull +Ian Bull +Ian Scrivener +Icecream95 +Ido S +IgnacioFDM +Igor Okulist +Ihar Hrachyshka +Ikko Eltociear Ashimine +Ilya Kurdyukov <59548320+ilyakurdyukov@users.noreply.github.com> +Ionoclast Laboratories +Isaac McFadyen +IsaacDynamo <61521674+IsaacDynamo@users.noreply.github.com> +Ivan +Ivan Filipov <159561759+vanaka11@users.noreply.github.com> +Ivan Komarov +Ivan Stepanov +JFLFY2255 +JH23X <165871467+JH23X@users.noreply.github.com> +Jack Mousseau +Jack Mousseau +JackJollimore <130917767+JackJollimore@users.noreply.github.com> +Jaeden Amero +Jaemin Son +Jafar Uruç +Jag Chadha +Jakub N +James A Capozzoli <157492257+jac-jim@users.noreply.github.com> +James Reynolds +Jan Boon +Jan Boon +Jan Ploski +Jannis Schönleber +Jared Van Bortel +Jared Van Bortel +Jason McCartney +Jason Stillerman +Jean-Christophe Hoelt +Jean-Michaël Celerier +Jed Fox +Jeff Bolz +Jeffrey Morgan +Jeffrey Quesnelle +Jeroen Mostert +Jesse Jojo Johnson +Jett Janiak +Jeximo +Jhen-Jie Hong +Jiahao Li +Jian Liao +JidongZhang-THU <1119708529@qq.com> +Jinwoo Jeong <33892306+williamjeong2@users.noreply.github.com> +Jiří Podivín <66251151+jpodivin@users.noreply.github.com> +Jiří Sejkora +Joan Fontanals +Joan Fontanals +João Dinis Ferreira +Joe Eli McIlvain +Joe Todd +Johan +Johannes Gäßler +Johannes Rudolph +John <78893154+cmp-nct@users.noreply.github.com> +John Balis +John Smith <67539080+kingsidelee@users.noreply.github.com> +JohnnyB +Jonas Wunderlich <32615971+jonas-w@users.noreply.github.com> +Jorge A <161275481+jorgealias@users.noreply.github.com> +Jose Maldonado <63384398+yukiteruamano@users.noreply.github.com> +Joseph Stahl <1269177+josephst@users.noreply.github.com> +Josh Ramer +Joyce +Juan Calderon-Perez <835733+gaby@users.noreply.github.com> +Judd +Juk Armstrong <69222624+jukofyork@users.noreply.github.com> +Julius Arkenberg +Jun Hee Yoo +Jun Jie <71215065+junnjiee16@users.noreply.github.com> +Junil Kim +Junyang Lin +Juraj Bednar +Justin Parker +Justin Suess +Justina Cho +Justine Tunney +Justine Tunney +Juuso Alasuutari +KASR +Kamil Tomšík +Karol Kontny <82021046+kkontny@users.noreply.github.com> +Karsten Weiss +Karthick +Karthik Kumar Viswanathan <195178+guilt@users.noreply.github.com> +Karthik Sethuraman +Kasumi <90275229+kasumi-1@users.noreply.github.com> +Kawrakow <48489457+ikawrakow@users.noreply.github.com> +Keiichi Tabata +Keke Han +Kenvix ⭐ +Kerfuffle <44031344+KerfuffleV2@users.noreply.github.com> +Kevin Gibbons +Kevin Ji <1146876+kevinji@users.noreply.github.com> +Kevin Kwok +Kevin Lo +Kevin Wang +Kolen Cheung +Konstantin Herud +Konstantin Zhuravlyov +Kunshang Ji +Kyle Bruene +Kyle Liang +Kyle Mistele +Kylin <56434533+KyL0N@users.noreply.github.com> +Lars Grammel +Laura +Lee <44310445+lx200916@users.noreply.github.com> +Lee Drake +Leng Yue +Leon Knauer +LeonEricsson <70749762+LeonEricsson@users.noreply.github.com> +Leonardo Neumann +Li Tan +Linwei Wang +Liu Jia <109258120+Septa2112@users.noreply.github.com> +Liu Jia +LoganDark +Loïc Carrère +LostRuins <39025047+LostRuins@users.noreply.github.com> +LostRuins Concedo <39025047+LostRuins@users.noreply.github.com> +Luciano +Luo Tian +Lyle Dean +M-A +M. Yusuf Sarıgöz +Ma Mingfei +Maarten ter Huurne +Mack Straight +Maël Kerbiriou +MaggotHATE +Mahesh Madhav <67384846+heshpdx@users.noreply.github.com> +Manuel <44313466+makuche@users.noreply.github.com> +Marc Köhlbrugge +Marco Matthies <71844+marcom@users.noreply.github.com> +Marcus Dunn <51931484+MarcusDunn@users.noreply.github.com> +Marian Cepok +Mark Fairbairn +Mark Zhuang +Marko Tasic +Markus Tavenrath +Martin Delille +Martin Krasser +Martin Schwaighofer +Marvin Gießing +Masaya, Kato <62578291+msy-kato@users.noreply.github.com> +MasterYi1024 <39848311+MasterYi1024@users.noreply.github.com> +Mateusz Charytoniuk +Matheus C. França +Matheus Gabriel Alves Silva +Mathieu Baudier +Mathieu Geli +Mathieu Nayrolles +Mathijs Henquet +Mathijs de Bruin +Matt Clayton <156335168+mattjcly@users.noreply.github.com> +Matt Pulver +Matt Stephenson +Matteo Boschini <12133566+mbosc@users.noreply.github.com> +Matteo Mortari +Mattheus Chediak +Matthew Tejo +Matvey Soloviev +Max Krasnyansky +Max Krasnyansky +Maxime <672982+maximegmd@users.noreply.github.com> +Maximilian Winter +Meng Zhang +Meng, Hengyu +Mengqing Cao +Merrick Christensen +Michael Coppola +Michael Engel +Michael Francis +Michael Hueschen +Michael Kesper +Michael Klimenko +Michael Podvitskiy +Michael Potter +Michael de Gans +Michaël de Vries +Michał Moskal +Michał Tuszyński +Michelle Tan <41475767+MichelleTanPY@users.noreply.github.com> +Mihai +Mike +Mikko Juola +Minsoo Cheong <54794500+mscheong01@users.noreply.github.com> +Minsoo Cheong +Mirko185 +Mirror Azure <54669636+MirrorAzure@users.noreply.github.com> +MistApproach <98988043+MistApproach@users.noreply.github.com> +Miwa / Ensan <63481257+ensan-hcl@users.noreply.github.com> +Mohammadreza Hendiani +Mohammadreza Hendiani +Molly Sophia +MorganRO8 <47795945+MorganRO8@users.noreply.github.com> +Murilo Santana +Musab Gultekin +Nam D. Tran <42194884+namtranase@users.noreply.github.com> +Nathan Epstein +Natsu +NawafAlansari <72708095+NawafAlansari@users.noreply.github.com> +Nebula +Neo Zhang <14088817+arthw@users.noreply.github.com> +Neo Zhang +Neo Zhang Jianyu +Neuman Vong +NeverLucky <92274250+nvrxq@users.noreply.github.com> +Nexes the Old <124105151+Nexesenex@users.noreply.github.com> +Nexesenex <124105151+Nexesenex@users.noreply.github.com> +Niall Coates <1349685+Niall-@users.noreply.github.com> +Nicholai Tukanov +Nico Bosshard +Nicolai Weitkemper +Nicolás Pérez +Nicolò Scipione +Nigel Bosch +Nikita Sarychev <42014488+sARY77@users.noreply.github.com> +Niklas Korz +NikolaiLyssogor <59844691+NikolaiLyssogor@users.noreply.github.com> +Nikolaos Pothitos +Nikolas <127742645+nneubacher@users.noreply.github.com> +Nindaleth +Nuno +OSecret <135510162+OLSecret@users.noreply.github.com> +Oleksandr Nikitin +Oleksii Maryshchenko +Olivier Chafik +Ondřej Čertík +Ouadie EL FAROUKI +PAB +Pablo Duboue +Pascal Patry +Patrice Ferlet +Paul Tsochantaris +Pavel Zloi +Pavol Rusnak +Paweł Wodnicki <151604+32bitmicro@users.noreply.github.com> +Pedro Cuenca +Peter +Peter Sugihara +Phil H <5756783+phiharri@users.noreply.github.com> +Philip Taron +Phillip Kravtsov +Pierre Alexandre SCHEMBRI +Pierrick Hymbert +Pieter Ouwerkerk +Plamen Minev +Prashant Vithule <119530321+Vithulep@users.noreply.github.com> +Przemysław Pawełczyk +Qin Yue Chen <71813199+chenqiny@users.noreply.github.com> +Qingyou Meng +Qu Zongfu <43257352+yancaoweidaode@users.noreply.github.com> +R0CKSTAR +R0CKSTAR +RJ Adriaansen +Radoslav Gerganov +Radosław Gryta +Rahul Vivek Nair <68507071+RahulVivekNair@users.noreply.github.com> +Raj Hammeer Singh Hada +Ralph Soika +Rand Xie +Randall Fitzgerald +Random Fly +Reinforce-II +Rémy Oudompheng +Ren Xuancheng +Rene Leonhardt <65483435+reneleonhardt@users.noreply.github.com> +Reza Kakhki +RhinoDevel +Riccardo Orlando +Riceball LEE +Rich Dougherty +Richard Kiss +Richard Roberson +Rick G <26732651+TheFlipbook@users.noreply.github.com> +Rickard Edén +Rickard Hallerbäck +Rickey Bowers Jr +Riley Stewart +Rinne +Rinne +Robert Brisita <986796+rbrisita@users.noreply.github.com> +Robert Collins +Robert Ormandi <52251610+ormandi@users.noreply.github.com> +Robert Sung-wook Shin +Robey Holderith +Robyn +Roger Meier +Roland <14355895+rbur0425@users.noreply.github.com> +Romain Biessy +Romain D <90720+Artefact2@users.noreply.github.com> +Romain Neutron +Roman Parykin +Ron Evans +Ron Jailall +Roni +Ronny Brendel +Ronsor +Rowan Hart +Ruan <47767371+ruanych@users.noreply.github.com> +Ruchira Hasaranga +Rudi Servo +Ruixin Huang <18860020911@163.com> +Rune <43761327+Rune-AI@users.noreply.github.com> +RunningLeon +RunningLeon +Ryan Landay +Ryder Wishart +Ryuei +Rőczey Barnabás <31726601+An0nie@users.noreply.github.com> +SRHMorris <69468379+SRHMorris@users.noreply.github.com> +SXX +SakuraUmi +Salvador E. Tropea +Salvatore Mesoraca +Sam Spilsbury +Sami Farin <3876865+Safari77@users.noreply.github.com> +Samuel Maynard +Sang-Kil Park +Seb C <47074056+Sebby37@users.noreply.github.com> +Sebastián A +SebastianApel <13675545+SebastianApel@users.noreply.github.com> +Senemu <10880819+Senemu@users.noreply.github.com> +Sergey Alirzaev +Sergio López +Sergio López +Sertaç Özercan <852750+sozercan@users.noreply.github.com> +SeungWon Jeong <65549245+redlion0929@users.noreply.github.com> +ShadovvBeast +Shakhar Dasgupta +Shane A +Shangning Xu <32517059+xushangning@users.noreply.github.com> +Shankar +Shanshan Shen <467638484@qq.com> +Shijie <821898965@qq.com> +Shintarou Okada +Shouzheng Liu <61452103+lshzh-ww@users.noreply.github.com> +Shouzheng Liu +Shuichi Tsutsumi +Shupei Fan +Sigbjørn Skjæret +Simon Willison +Siwen Yu +Sky Yan +Slaren <2141330+slaren@users.noreply.github.com> +Slava Primenko +Small Grass Forest +SoftwareRenderer <138734813+SoftwareRenderer@users.noreply.github.com> +Someone +Someone Serge +Sourab Mangrulkar <13534540+pacman100@users.noreply.github.com> +Spencer Sutton +Srihari-mcw <96763064+Srihari-mcw@users.noreply.github.com> +Srinivas Billa +Stefan Sydow +Steffen Röcker +Stephan Walter +Stephen Nichols +Steve Bonds +Steve Grubb +Steven Prichard +Steven Roussey +Steward Garcia <57494570+FSSRepo@users.noreply.github.com> +StrangeBytesDev <141275258+StrangeBytesDev@users.noreply.github.com> +Suaj Carrot <72162667+SuajCarrot@users.noreply.github.com> +Sukriti Sharma +SuperUserNameMan +Sutou Kouhei +Tai Duc Nguyen +Taikono-Himazin +Tameem <113388789+AhmadTameem@users.noreply.github.com> +Tamotsu Takahashi +Tei Home +Thái Hoàng Tâm <75922889+RoyalHeart@users.noreply.github.com> +Thatcher Chamberlin +Theia Vogel +Thérence <13496987+Royalphax@users.noreply.github.com> +Thibault Terrasson +Thomas Klausner +Thorsten Sommer +Tim Miller +Tim Wang +Timmy Knight +Timothy Cronin <40186632+4imothy@users.noreply.github.com> +Ting Lou +Ting Lou +Ting Sun +Tobias Lütke +Tom C +Tom Jobbins <784313+TheBloke@users.noreply.github.com> +Tomas +Tomáš Pazdiora +Tony Wasserka <4840017+neobrain@users.noreply.github.com> +Tristan Druyen +Tristan Ross +Trivikram Kamat <16024985+trivikr@users.noreply.github.com> +Tungsten842 <886724vf@anonaddy.me> +Tungsten842 +Tushar +UEXTM.com <84163508+uextm@users.noreply.github.com> +Ujjawal Panchal <31011628+Ujjawal-K-Panchal@users.noreply.github.com> +Ulrich Drepper +Uzo Nweke +Vaibhav Srivastav +Val Kharitonov +Valentin Konovalov +Valentin Mamedov <45292985+Inf1delis@users.noreply.github.com> +Valentyn Bezshapkin <61702053+valentynbez@users.noreply.github.com> +Vali Malinoiu <0x4139@gmail.com> +Victor Nogueira +Victor Z. Peng +Viet-Anh NGUYEN (Andrew) +Vinesh Janarthanan <36610342+VJHack@users.noreply.github.com> +Vlad +Vladimir +Vladimir Malyutin +Vladimir Zorin +VoidIsVoid <343750470@qq.com> +Volodymyr Vitvitskyi <72226+signalpillar@users.noreply.github.com> +Wang Qin <37098874+wangqin0@users.noreply.github.com> +Wang Ran (汪然) +WangHaoranRobin <56047610+WangHaoranRobin@users.noreply.github.com> +Weird Constructor +Welby Seely +Wentai Zhang +WillCorticesAI <150854901+WillCorticesAI@users.noreply.github.com> +William Tambellini +William Tambellini +Willy Tarreau +Woof Dog <197125663+woof-dog@users.noreply.github.com> +Wouter <9594229+DifferentialityDevelopment@users.noreply.github.com> +Wu Jian Ping +Wu Jian Ping +Xiake Sun +Xiang (Kevin) Li +Xiao-Yong Jin +XiaotaoChen +Xiaoyi Chen +Xie Yanbo +Xingchen Song(宋星辰) +Xinpeng Dou <81913537+Dou-Git@users.noreply.github.com> +Xuan Son Nguyen +Xuan-Son Nguyen +Yaiko +Yann Follet <131855179+YannFollet@users.noreply.github.com> +Yaroslav +Yazan Agha-Schrader +Yiming Cui +Yishuo Wang +Yoshi Suhara +Yoshi Suhara +Younes Belkada <49240599+younesbelkada@users.noreply.github.com> +Yueh-Po Peng <94939112+y10ab1@users.noreply.github.com> +Yüg +Yui +Yun Dou +Yuri Khrustalev +Yusuf Kağan Hanoğlu +Yuval Peled <31162840+Yuval-Peled@users.noreply.github.com> +ZHAOKAI WANG +Zane Shannon +Zay <95888118+isaiahbjork@users.noreply.github.com> +Zenix +Zhang Peiyuan +Zheng.Deng <32841220+dengzheng-cloud@users.noreply.github.com> +Zhenwei Jin <109658203+kylo5aby@users.noreply.github.com> +Zhiyuan Li +Zhiyuan Li +ZhouYuChen +Ziad Ben Hadj-Alouane +Ziang Wu <97337387+ZiangWu-77@users.noreply.github.com> +Zsapi +a-n-n-a-l-e-e <150648636+a-n-n-a-l-e-e@users.noreply.github.com> +a3sh <38979186+A3shTnT@users.noreply.github.com> +adel boussaken +afrideva <95653597+afrideva@users.noreply.github.com> +ag2s20150909 <19373730+ag2s20150909@users.noreply.github.com> +agray3 +akawrykow <142945436+akawrykow@users.noreply.github.com> +alek3y <44779186+alek3y@users.noreply.github.com> +alexpinel <93524949+alexpinel@users.noreply.github.com> +alonfaraj +alwqx +amd-dwang +amd-lalithnc +amritahs-ibm +andrijdavid +anon998 <131767832+anon998@users.noreply.github.com> +anzz1 +apaz +apcameron <37645737+apcameron@users.noreply.github.com> +arch-btw <57669023+arch-btw@users.noreply.github.com> +arcrank +ardfork <134447697+ardfork@users.noreply.github.com> +arlo-phoenix <140345165+arlo-phoenix@users.noreply.github.com> +aryantandon01 <80969509+aryantandon01@users.noreply.github.com> +at8u <129688334+at8u@users.noreply.github.com> +automaticcat +awatuna <23447591+awatuna@users.noreply.github.com> +b4b4o +bandoti <141645996+bandoti@users.noreply.github.com> +beiller +bhubbb <79117352+bhubbb@users.noreply.github.com> +bmwl +bobqianic <129547291+bobqianic@users.noreply.github.com> +brucepro +bryanSwk <93190252+bryanSwk@users.noreply.github.com> +bsilvereagle +bssrdf +byte-6174 <88070277+byte-6174@users.noreply.github.com> +cduk <19917266+cduk@users.noreply.github.com> +cebtenzzre +chaihahaha +chiranko <96988916+chiranko@users.noreply.github.com> +clibdev <52199778+clibdev@users.noreply.github.com> +clyang +cocktailpeanut <121128867+cocktailpeanut@users.noreply.github.com> +codezjx +coezbek +comex +compilade <113953597+compilade@users.noreply.github.com> +compilade +cpumaxx <163466046+cpumaxx@users.noreply.github.com> +crasm +crasm +daboe01 +daghanerdonmez <44506702+daghanerdonmez@users.noreply.github.com> +daminho <37615795+daminho@users.noreply.github.com> +david raistrick +ddh0 +ddpasa <112642920+ddpasa@users.noreply.github.com> +deepdiffuser <112834445+deepdiffuser@users.noreply.github.com> +devojony <61173062+devojony@users.noreply.github.com> +ditsuke +divinity76 +dm4 +dotpy314 <33351922+dotpy314@users.noreply.github.com> +drbh +ds5t5 <145942675+ds5t5@users.noreply.github.com> +dylan +eastriver +ebraminio +ebraminio +eiery <19350831+eiery@users.noreply.github.com> +eric8607242 +fairydreaming <166155368+fairydreaming@users.noreply.github.com> +fengerhu1 <2748250768@qq.com> +fj-y-saito <85871716+fj-y-saito@users.noreply.github.com> +fraxy-v <65565042+fraxy-v@users.noreply.github.com> +github-actions[bot] +gliptic +gn64 +goerch +grahameth <96447521+grahameth@users.noreply.github.com> +gtygo +gwjr <502526+gwjr@users.noreply.github.com> +h-h-h-h <13482553+h-h-h-h@users.noreply.github.com> +hankcs +haopeng <657407891@qq.com> +hipudding +hoangmit +hongbo.mo <352280764@qq.com> +hopkins385 <98618192+hopkins385@users.noreply.github.com> +howlger +howlger +hutli <6594598+hutli@users.noreply.github.com> +hutli +hutli +hxer7963 +hydai +iSma +iacore <74560659+iacore@users.noreply.github.com> +icppWorld <124377669+icppWorld@users.noreply.github.com> +igarnier +intelmatt <61025942+intelmatt@users.noreply.github.com> +iohub +issixx <46835150+issixx@users.noreply.github.com> +jacobi petrucciani <8117202+jpetrucciani@users.noreply.github.com> +jaime-m-p <167997752+jaime-m-p@users.noreply.github.com> +jameswu2014 <545426914@qq.com> +jdomke <28772296+jdomke@users.noreply.github.com> +jiahao su +jiez <373447296@qq.com> +jneem +joecryptotoo <80373433+joecryptotoo@users.noreply.github.com> +johnson442 <56517414+johnson442@users.noreply.github.com> +jojorne +jon-chuang <9093549+jon-chuang@users.noreply.github.com> +jp-x-g +jukofyork <69222624+jukofyork@users.noreply.github.com> +junchao-loongson <68935141+junchao-loongson@users.noreply.github.com> +jwj7140 <32943891+jwj7140@users.noreply.github.com> +k.h.lai +kaizau +kallewoof +kalomaze <66376113+kalomaze@users.noreply.github.com> +kang +katsu560 <118887472+katsu560@users.noreply.github.com> +kchro3 <62481661+kchro3@users.noreply.github.com> +khimaros +kiltyj +klosax <131523366+klosax@users.noreply.github.com> +krystiancha +kunal-vaishnavi <115581922+kunal-vaishnavi@users.noreply.github.com> +kunnis +kuronekosaiko +kustaaya <58045274+kustaaya@users.noreply.github.com> +kuvaus <22169537+kuvaus@users.noreply.github.com> +kwin1412 <42286931+kwin1412@users.noreply.github.com> +l3utterfly +laik +ldwang +le.chang +leejet +leo-pony +lexasub +lhez +limitedAtonement +liuwei-git <14815172+liuwei-git@users.noreply.github.com> +lon <114724657+longregen@users.noreply.github.com> +loonerin <132926317+loonerin@users.noreply.github.com> +ltoniazzi <61414566+ltoniazzi@users.noreply.github.com> +luoyu-intel +m3ndax +maddes8cht <55592906+maddes8cht@users.noreply.github.com> +mahorozte <41834471+mahorozte@users.noreply.github.com> +makomk +manikbhandari +maor-ps <154728172+maor-ps@users.noreply.github.com> +mashdragon <122402293+mashdragon@users.noreply.github.com> +matiaslin <45382001+matiaslin@users.noreply.github.com> +matt23654 +matteo +mdrokz +mgroeber9110 <45620825+mgroeber9110@users.noreply.github.com> +minarchist +mj-shifu <77107165+mj-shifu@users.noreply.github.com> +mmyjona +momonga <115213907+mmnga@users.noreply.github.com> +momonga <146910567+mmngays@users.noreply.github.com> +moritzbrantner <31051084+moritzbrantner@users.noreply.github.com> +musoles <135031143+musoles@users.noreply.github.com> +mzcu +nanahi <130121847+na-na-hi@users.noreply.github.com> +ngc92 <7938269+ngc92@users.noreply.github.com> +nhamanasu <45545786+nhamanasu@users.noreply.github.com> +niansa/tuxifan +niansa/tuxifan +nickp27 +ningshanwutuobang +nold +nopperl <54780682+nopperl@users.noreply.github.com> +nusu-github <29514220+nusu-github@users.noreply.github.com> +olexiyb +omahs <73983677+omahs@users.noreply.github.com> +oobabooga <112222186+oobabooga@users.noreply.github.com> +opparco +ostix360 <55257054+ostix360@users.noreply.github.com> +pculliton +peidaqi +pengxin99 +perserk +piDack <104877312+piDack@users.noreply.github.com> +pmysl +postmasters +pudepiedj +qingfengfenga <41416092+qingfengfenga@users.noreply.github.com> +qingy1337 +qouoq +qunash +rabidcopy +rankaiyx +redbeard +rhjdvsgsgks <26178113+rhjdvsgsgks@users.noreply.github.com> +rhuddleston +rimoliga <53384203+rimoliga@users.noreply.github.com> +runfuture +sandyiscool +sasha0552 +semidark +serhii-nakon <57632032+serhii-nakon@users.noreply.github.com> +sharpHL <132747147+sharpHL@users.noreply.github.com> +shibe2 +singularity <12184989+singularity-s0@users.noreply.github.com> +sjinzh +sjxx <63994076+ylsdamxssjxxdd@users.noreply.github.com> +slaren <2141330+slaren@users.noreply.github.com> +slaren +snadampal <87143774+snadampal@users.noreply.github.com> +someone13574 <81528246+someone13574@users.noreply.github.com> +standby24x7 +staviq +stduhpf +strawberrymelonpanda <152940198+strawberrymelonpanda@users.noreply.github.com> +swittk +takov751 <40316768+takov751@users.noreply.github.com> +tarcey +tc-mb <157115220+tc-mb@users.noreply.github.com> +texmex76 <40733439+texmex76@users.noreply.github.com> +thement <40525767+thement@users.noreply.github.com> +thewh1teagle <61390950+thewh1teagle@users.noreply.github.com> +tjohnman +toyer <2042519524@qq.com> +tslmy +ubik2 +uint256_t +uint256_t +unbounded +uvos +uvos +valiray <133289098+valiray@users.noreply.github.com> +vb +vik +viric +vodkaslime <646329483@qq.com> +vvhg1 <94630311+vvhg1@users.noreply.github.com> +vxiiduu <73044267+vxiiduu@users.noreply.github.com> +wangshuai09 <391746016@qq.com> +wbpxre150 <100937007+wbpxre150@users.noreply.github.com> +whoreson <139810751+whoreson@users.noreply.github.com> +woachk <24752637+woachk@users.noreply.github.com> +wonjun Jang +woodx <124784234+woodx9@users.noreply.github.com> +wwoodsTM <104587230+wwoodsTM@users.noreply.github.com> +wzy <32936898+Freed-Wu@users.noreply.github.com> +xaedes +xaedes +xctan +xloem <0xloem@gmail.com> +yangli2 +ymcki <84055651+ymcki@users.noreply.github.com> +yuiseki +yuri@FreeBSD +zakkor +zhangkaihuo +zhentaoyu +zhouwg <6889919+zhouwg@users.noreply.github.com> +zhouwg +zrm +Ștefan-Gabriel Muscalu +杨朱 · Kiki +源文雨 <41315874+fumiama@users.noreply.github.com> +蕭澧邦 <45505768+shou692199@users.noreply.github.com> +谢乃闻 +Нияз Гарифзянов <112617865+garrnizon@users.noreply.github.com> diff --git a/llama.cpp/CMakeLists.txt b/llama.cpp/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..7b2a1845e5c7c747124e5dbfd7132087791cd398 --- /dev/null +++ b/llama.cpp/CMakeLists.txt @@ -0,0 +1,236 @@ +cmake_minimum_required(VERSION 3.14) # for add_link_options and implicit target directories. +project("llama.cpp" C CXX) +include(CheckIncludeFileCXX) + +#set(CMAKE_WARN_DEPRECATED YES) +set(CMAKE_WARN_UNUSED_CLI YES) + +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + +if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE) + set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo") +endif() + +# Add path to modules +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/") + +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) + +if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) + set(LLAMA_STANDALONE ON) + + include(git-vars) + + # configure project version + # TODO +else() + set(LLAMA_STANDALONE OFF) +endif() + +if (EMSCRIPTEN) + set(BUILD_SHARED_LIBS_DEFAULT OFF) + + option(LLAMA_WASM_SINGLE_FILE "llama: embed WASM inside the generated llama.js" ON) +else() + if (MINGW) + set(BUILD_SHARED_LIBS_DEFAULT OFF) + else() + set(BUILD_SHARED_LIBS_DEFAULT ON) + endif() +endif() + +option(BUILD_SHARED_LIBS "build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT}) + +if (WIN32) + add_compile_definitions(_CRT_SECURE_NO_WARNINGS) +endif() + +if (MSVC) + add_compile_options("$<$:/utf-8>") + add_compile_options("$<$:/utf-8>") + add_compile_options("$<$:/bigobj>") + add_compile_options("$<$:/bigobj>") +endif() + +# +# option list +# + +# debug +option(LLAMA_ALL_WARNINGS "llama: enable all compiler warnings" ON) +option(LLAMA_ALL_WARNINGS_3RD_PARTY "llama: enable all compiler warnings in 3rd party libs" OFF) + +# build +option(LLAMA_FATAL_WARNINGS "llama: enable -Werror flag" OFF) + +# sanitizers +option(LLAMA_SANITIZE_THREAD "llama: enable thread sanitizer" OFF) +option(LLAMA_SANITIZE_ADDRESS "llama: enable address sanitizer" OFF) +option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF) + +# utils +option(LLAMA_BUILD_COMMON "llama: build common utils library" ${LLAMA_STANDALONE}) + +# extra artifacts +option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE}) +option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE}) +option(LLAMA_BUILD_SERVER "llama: build server example" ${LLAMA_STANDALONE}) + +# 3rd party libs +option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF) +option(LLAMA_LLGUIDANCE "llama-common: include LLGuidance library for structured output in common utils" OFF) + +# Required for relocatable CMake package +include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake) +include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/common.cmake) + +# override ggml options +set(GGML_ALL_WARNINGS ${LLAMA_ALL_WARNINGS}) +set(GGML_FATAL_WARNINGS ${LLAMA_FATAL_WARNINGS}) + +# change the default for these ggml options +if (NOT DEFINED GGML_LLAMAFILE) + set(GGML_LLAMAFILE_DEFAULT ON) +endif() + +if (NOT DEFINED GGML_CUDA_GRAPHS) + set(GGML_CUDA_GRAPHS_DEFAULT ON) +endif() + +# transition helpers +function (llama_option_depr TYPE OLD NEW) + if (${OLD}) + message(${TYPE} "${OLD} is deprecated and will be removed in the future.\nUse ${NEW} instead\n") + set(${NEW} ON PARENT_SCOPE) + endif() +endfunction() + +llama_option_depr(FATAL_ERROR LLAMA_CUBLAS GGML_CUDA) +llama_option_depr(WARNING LLAMA_CUDA GGML_CUDA) +llama_option_depr(WARNING LLAMA_KOMPUTE GGML_KOMPUTE) +llama_option_depr(WARNING LLAMA_METAL GGML_METAL) +llama_option_depr(WARNING LLAMA_METAL_EMBED_LIBRARY GGML_METAL_EMBED_LIBRARY) +llama_option_depr(WARNING LLAMA_NATIVE GGML_NATIVE) +llama_option_depr(WARNING LLAMA_RPC GGML_RPC) +llama_option_depr(WARNING LLAMA_SYCL GGML_SYCL) +llama_option_depr(WARNING LLAMA_SYCL_F16 GGML_SYCL_F16) +llama_option_depr(WARNING LLAMA_CANN GGML_CANN) + +if (NOT MSVC) + if (LLAMA_SANITIZE_THREAD) + message(STATUS "Using -fsanitize=thread") + + add_compile_options(-fsanitize=thread) + link_libraries (-fsanitize=thread) + endif() + + if (LLAMA_SANITIZE_ADDRESS) + message(STATUS "Using -fsanitize=address") + + add_compile_options(-fsanitize=address -fno-omit-frame-pointer) + link_libraries (-fsanitize=address) + endif() + + if (LLAMA_SANITIZE_UNDEFINED) + message(STATUS "Using -fsanitize=undefined") + + add_compile_options(-fsanitize=undefined) + link_libraries (-fsanitize=undefined) + endif() +endif() + +# +# 3rd-party +# + +if (NOT TARGET ggml) + add_subdirectory(ggml) + # ... otherwise assume ggml is added by a parent CMakeLists.txt +endif() + +# +# build the library +# + +add_subdirectory(src) + +# +# utils, programs, examples and tests +# + +if (LLAMA_BUILD_COMMON) + add_subdirectory(common) +endif() + +if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION) + include(CTest) + add_subdirectory(tests) +endif() + +if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_EXAMPLES) + add_subdirectory(examples) + add_subdirectory(pocs) +endif() + +# +# install +# + +include(GNUInstallDirs) +include(CMakePackageConfigHelpers) + +set(LLAMA_BUILD_NUMBER ${BUILD_NUMBER}) +set(LLAMA_BUILD_COMMIT ${BUILD_COMMIT}) +set(LLAMA_INSTALL_VERSION 0.0.${BUILD_NUMBER}) + +set(LLAMA_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of header files") +set(LLAMA_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files") +set(LLAMA_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files") + +set(LLAMA_PUBLIC_HEADERS + ${CMAKE_CURRENT_SOURCE_DIR}/include/llama.h + ${CMAKE_CURRENT_SOURCE_DIR}/include/llama-cpp.h) + +set_target_properties(llama + PROPERTIES + PUBLIC_HEADER "${LLAMA_PUBLIC_HEADERS}") + +install(TARGETS llama LIBRARY PUBLIC_HEADER) + +configure_package_config_file( + ${CMAKE_CURRENT_SOURCE_DIR}/cmake/llama-config.cmake.in + ${CMAKE_CURRENT_BINARY_DIR}/llama-config.cmake + INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/llama + PATH_VARS LLAMA_INCLUDE_INSTALL_DIR + LLAMA_LIB_INSTALL_DIR + LLAMA_BIN_INSTALL_DIR ) + +write_basic_package_version_file( + ${CMAKE_CURRENT_BINARY_DIR}/llama-version.cmake + VERSION ${LLAMA_INSTALL_VERSION} + COMPATIBILITY SameMajorVersion) + +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/llama-config.cmake + ${CMAKE_CURRENT_BINARY_DIR}/llama-version.cmake + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/llama) + +install( + FILES convert_hf_to_gguf.py + PERMISSIONS + OWNER_READ + OWNER_WRITE + OWNER_EXECUTE + GROUP_READ + GROUP_EXECUTE + WORLD_READ + WORLD_EXECUTE + DESTINATION ${CMAKE_INSTALL_BINDIR}) + +configure_file(cmake/llama.pc.in + "${CMAKE_CURRENT_BINARY_DIR}/llama.pc" + @ONLY) + +install(FILES "${CMAKE_CURRENT_BINARY_DIR}/llama.pc" + DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) diff --git a/llama.cpp/CMakePresets.json b/llama.cpp/CMakePresets.json new file mode 100644 index 0000000000000000000000000000000000000000..13bdd7907ab4085fafeb3179936e47ea95c64345 --- /dev/null +++ b/llama.cpp/CMakePresets.json @@ -0,0 +1,97 @@ +{ + "version": 4, + "configurePresets": [ + { + "name": "base", + "hidden": true, + "generator": "Ninja", + "binaryDir": "${sourceDir}/build-${presetName}", + "cacheVariables": { + "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", + "CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.." + } + }, + { + "name": "sycl-base", + "hidden": true, + "generator": "Ninja", + "binaryDir": "${sourceDir}/build-${presetName}", + "cacheVariables": { + "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", + "CMAKE_CXX_COMPILER": "icx", + "CMAKE_C_COMPILER": "cl", + "GGML_SYCL": "ON", + "CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.." + } + }, + { "name": "debug", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Debug" } }, + { "name": "release", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Release" } }, + { "name": "reldbg", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithDebInfo" } }, + { "name": "static", "hidden": true, "cacheVariables": { "GGML_STATIC": "ON" } }, + { "name": "sycl_f16", "hidden": true, "cacheVariables": { "GGML_SYCL_F16": "ON" } }, + { "name": "vulkan", "hidden": true, "cacheVariables": { "GGML_VULKAN": "ON" } }, + + { + "name": "x64-windows-llvm", "hidden": true, + "cacheVariables": { + "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/x64-windows-llvm.cmake" + } + }, + + { + "name": "arm64-windows-msvc", "hidden": true, + "architecture": { "value": "arm64", "strategy": "external" }, + "toolset": { "value": "host=x64", "strategy": "external" }, + "cacheVariables": { + "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-windows-msvc.cmake" + } + }, + + { + "name": "arm64-windows-llvm", "hidden": true, + "architecture": { "value": "arm64", "strategy": "external" }, + "toolset": { "value": "host=x64", "strategy": "external" }, + "cacheVariables": { + "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-windows-llvm.cmake" + } + }, + + { + "name": "arm64-apple-clang", "hidden": true, + "architecture": { "value": "arm64", "strategy": "external" }, + "toolset": { "value": "host=x64", "strategy": "external" }, + "cacheVariables": { + "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-apple-clang.cmake" + } + }, + + { "name": "arm64-windows-llvm-debug", "inherits": [ "base", "arm64-windows-llvm", "debug" ] }, + { "name": "arm64-windows-llvm-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg" ] }, + { "name": "arm64-windows-llvm+static-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg", "static" ] }, + + { "name": "arm64-apple-clang-debug", "inherits": [ "base", "arm64-apple-clang", "debug" ] }, + { "name": "arm64-apple-clang-release", "inherits": [ "base", "arm64-apple-clang", "reldbg" ] }, + { "name": "arm64-apple-clang+static-release", "inherits": [ "base", "arm64-apple-clang", "reldbg", "static" ] }, + + { "name": "arm64-windows-msvc-debug", "inherits": [ "base", "arm64-windows-msvc", "debug" ] }, + { "name": "arm64-windows-msvc-release", "inherits": [ "base", "arm64-windows-msvc", "reldbg" ] }, + { "name": "arm64-windows-msvc+static-release", "inherits": [ "base", "arm64-windows-msvc", "reldbg", "static" ] }, + + { "name": "x64-windows-llvm-debug", "inherits": [ "base", "x64-windows-llvm", "debug" ] }, + { "name": "x64-windows-llvm-release", "inherits": [ "base", "x64-windows-llvm", "release" ] }, + { "name": "x64-windows-llvm-reldbg", "inherits": [ "base", "x64-windows-llvm", "reldbg" ] }, + { "name": "x64-windows-llvm+static-release", "inherits": [ "base", "x64-windows-llvm", "reldbg", "static" ] }, + + { "name": "x64-windows-msvc-debug", "inherits": [ "base", "debug" ] }, + { "name": "x64-windows-msvc-release", "inherits": [ "base", "reldbg" ] }, + { "name": "x64-windows-msvc+static-release", "inherits": [ "base", "reldbg", "static" ] }, + + { "name": "x64-windows-sycl-debug", "inherits": [ "sycl-base", "debug" ] }, + { "name": "x64-windows-sycl-debug-f16", "inherits": [ "sycl-base", "debug", "sycl_f16" ] }, + { "name": "x64-windows-sycl-release", "inherits": [ "sycl-base", "release" ] }, + { "name": "x64-windows-sycl-release-f16", "inherits": [ "sycl-base", "release", "sycl_f16" ] }, + + { "name": "x64-windows-vulkan-debug", "inherits": [ "base", "vulkan", "debug" ] }, + { "name": "x64-windows-vulkan-release", "inherits": [ "base", "vulkan", "release" ] } + ] +} diff --git a/llama.cpp/CODEOWNERS b/llama.cpp/CODEOWNERS new file mode 100644 index 0000000000000000000000000000000000000000..72d594b46e9111f27ee64515d971bbf016a70a82 --- /dev/null +++ b/llama.cpp/CODEOWNERS @@ -0,0 +1,11 @@ +# collaborators can optionally add themselves here to indicate their availability for reviewing related PRs + +/ci/ @ggerganov +/.devops/*.Dockerfile @ngxson +/examples/server/ @ngxson +/ggml/src/ggml-cuda/fattn* @JohannesGaessler +/ggml/src/ggml-cuda/mmq.* @JohannesGaessler +/ggml/src/ggml-cuda/mmv.* @JohannesGaessler +/ggml/src/ggml-cuda/mmvq.* @JohannesGaessler +/ggml/src/ggml-opt.cpp @JohannesGaessler +/ggml/src/gguf.cpp @JohannesGaessler diff --git a/llama.cpp/CONTRIBUTING.md b/llama.cpp/CONTRIBUTING.md new file mode 100644 index 0000000000000000000000000000000000000000..8d411982b4379ee9c9be7686809d0448f15131dc --- /dev/null +++ b/llama.cpp/CONTRIBUTING.md @@ -0,0 +1,125 @@ +# Pull requests (for contributors) + +- Test your changes: + - Execute [the full CI locally on your machine](ci/README.md) before publishing + - Verify that the perplexity and the performance are not affected negatively by your changes (use `llama-perplexity` and `llama-bench`) + - If you modified the `ggml` source, run the `test-backend-ops` tool to check whether different backend implementations of the `ggml` operators produce consistent results (this requires access to at least two different `ggml` backends) + - If you modified a `ggml` operator or added a new one, add the corresponding test cases to `test-backend-ops` +- Consider allowing write access to your branch for faster reviews, as reviewers can push commits directly +- If your PR becomes stale, don't hesitate to ping the maintainers in the comments + +# Pull requests (for collaborators) + +- Squash-merge PRs +- Use the following format for the squashed commit title: ` : (#)`. For example: `utils : fix typo in utils.py (#1234)` +- Optionally pick a `` from here: https://github.com/ggerganov/llama.cpp/wiki/Modules +- Consider adding yourself to [CODEOWNERS](CODEOWNERS) + +# Coding guidelines + +- Avoid adding third-party dependencies, extra files, extra headers, etc. +- Always consider cross-compatibility with other operating systems and architectures +- Avoid fancy-looking modern STL constructs, use basic `for` loops, avoid templates, keep it simple +- Vertical alignment makes things more readable and easier to batch edit +- Clean-up any trailing whitespaces, use 4 spaces for indentation, brackets on the same line, `void * ptr`, `int & a` +- Use sized integer types such as `int32_t` in the public API, e.g. `size_t` may also be appropriate for allocation sizes or byte offsets +- Declare structs with `struct foo {}` instead of `typedef struct foo {} foo` + - In C++ code omit optional `struct` and `enum` keyword whenever they are not necessary + ```cpp + // OK + llama_context * ctx; + const llama_rope_type rope_type; + + // not OK + struct llama_context * ctx; + const enum llama_rope_type rope_type; + ``` + + _(NOTE: this guideline is yet to be applied to the `llama.cpp` codebase. New code should follow this guideline.)_ + +- Try to follow the existing patterns in the code (indentation, spaces, etc.). In case of doubt use `clang-format` to format the added code +- For anything not covered in the current guidelines, refer to the [C++ Core Guidelines](https://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines) +- Tensors store data in row-major order. We refer to dimension 0 as columns, 1 as rows, 2 as matrices +- Matrix multiplication is unconventional: [`C = ggml_mul_mat(ctx, A, B)`](https://github.com/ggerganov/llama.cpp/blob/880e352277fc017df4d5794f0c21c44e1eae2b84/ggml.h#L1058-L1064) means $C^T = A B^T \Leftrightarrow C = B A^T.$ + +![matmul](media/matmul.png) + +# Naming guidelines + +- Use `snake_case` for function, variable and type names +- Naming usually optimizes for longest common prefix (see https://github.com/ggerganov/ggml/pull/302#discussion_r1243240963) + + ```cpp + // not OK + int small_number; + int big_number; + + // OK + int number_small; + int number_big; + ``` + +- Enum values are always in upper case and prefixed with the enum name + + ```cpp + enum llama_vocab_type { + LLAMA_VOCAB_TYPE_NONE = 0, + LLAMA_VOCAB_TYPE_SPM = 1, + LLAMA_VOCAB_TYPE_BPE = 2, + LLAMA_VOCAB_TYPE_WPM = 3, + LLAMA_VOCAB_TYPE_UGM = 4, + LLAMA_VOCAB_TYPE_RWKV = 5, + }; + ``` + +- The general naming pattern is `_`, with `` being `_` + + ```cpp + llama_model_init(); // class: "llama_model", method: "init" + llama_sampler_chain_remove(); // class: "llama_sampler_chain", method: "remove" + llama_sampler_get_seed(); // class: "llama_sampler", method: "get_seed" + llama_set_embeddings(); // class: "llama_context", method: "set_embeddings" + llama_n_threads(); // class: "llama_context", method: "n_threads" + llama_adapter_lora_free(); // class: "llama_adapter_lora", method: "free" + ``` + + - The `get` `` can be omitted + - The `` can be omitted if not necessary + - The `_context` suffix of the `` is optional. Use it to disambiguate symbols when needed + - Use `init`/`free` for constructor/destructor `` + +- Use the `_t` suffix when a type is supposed to be opaque to the user - it's not relevant to them if it is a struct or anything else + + ```cpp + typedef struct llama_context * llama_context_t; + + enum llama_pooling_type llama_pooling_type(const llama_context_t ctx); + ``` + + _(NOTE: this guideline is yet to be applied to the `llama.cpp` codebase. New code should follow this guideline)_ + +- C/C++ filenames are all lowercase with dashes. Headers use the `.h` extension. Source files use the `.c` or `.cpp` extension +- Python filenames are all lowercase with underscores + +- _(TODO: abbreviations usage)_ + +# Preprocessor directives + +- _(TODO: add guidelines with examples and apply them to the codebase)_ + + ```cpp + #ifdef FOO + #endif // FOO + ``` + +# Documentation + +- Documentation is a community effort +- When you need to look into the source code to figure out how to use an API consider adding a short summary to the header file for future reference +- When you notice incorrect or outdated documentation, please update it + +# Resources + +The Github issues, PRs and discussions contain a lot of information that can be useful to get familiar with the codebase. For convenience, some of the more important information is referenced from Github projects: + +https://github.com/ggerganov/llama.cpp/projects diff --git a/llama.cpp/build/bin/LICENSE b/llama.cpp/LICENSE similarity index 100% rename from llama.cpp/build/bin/LICENSE rename to llama.cpp/LICENSE diff --git a/llama.cpp/Makefile b/llama.cpp/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..dc3de3cb14e44d01d0ffdaaded39e071e3c1ad37 --- /dev/null +++ b/llama.cpp/Makefile @@ -0,0 +1,1605 @@ +ifndef LLAMA_MAKEFILE +$(error The Makefile build is deprecated. Use the CMake build instead. For more details, see https://github.com/ggerganov/llama.cpp/blob/master/docs/build.md) +endif + +# Define the default target now so that it is always the first target +BUILD_TARGETS = \ + libllava.a \ + llama-batched \ + llama-batched-bench \ + llama-bench \ + llama-cli \ + llama-convert-llama2c-to-ggml \ + llama-embedding \ + llama-eval-callback \ + llama-export-lora \ + llama-gbnf-validator \ + llama-gguf \ + llama-gguf-hash \ + llama-gguf-split \ + llama-gritlm \ + llama-imatrix \ + llama-infill \ + llama-llava-cli \ + llama-minicpmv-cli\ + llama-qwen2vl-cli\ + llama-lookahead \ + llama-lookup \ + llama-lookup-create \ + llama-lookup-merge \ + llama-lookup-stats \ + llama-parallel \ + llama-passkey \ + llama-perplexity \ + llama-q8dot \ + llama-quantize \ + llama-quantize-stats \ + llama-retrieval \ + llama-save-load-state \ + llama-server \ + llama-simple \ + llama-simple-chat \ + llama-run \ + llama-speculative \ + llama-tokenize \ + llama-vdot \ + llama-cvector-generator \ + llama-gen-docs \ + tests/test-c.o + +# Binaries only useful for tests +TEST_TARGETS = \ + tests/test-arg-parser \ + tests/test-autorelease \ + tests/test-backend-ops \ + tests/test-chat \ + tests/test-chat-template \ + tests/test-double-float \ + tests/test-grammar-integration \ + tests/test-grammar-parser \ + tests/test-json-schema-to-grammar \ + tests/test-llama-grammar \ + tests/test-log \ + tests/test-model-load-cancel \ + tests/test-quantize-fns \ + tests/test-quantize-perf \ + tests/test-rope \ + tests/test-sampling \ + tests/test-tokenizer-0 \ + tests/test-tokenizer-1-bpe \ + tests/test-tokenizer-1-spm +# tests/test-opt \ + +# Legacy build targets that were renamed in #7809, but should still be removed when the project is cleaned +LEGACY_TARGETS_CLEAN = main quantize quantize-stats perplexity imatrix embedding vdot q8dot convert-llama2c-to-ggml \ + simple batched batched-bench save-load-state server gguf gguf-split eval-callback llama-bench libllava.a llava-cli baby-llama \ + retrieval speculative infill tokenize parallel export-lora lookahead lookup passkey gritlm + +# Legacy build targets that were renamed in #7809, but we want to build binaries that for them that output a deprecation warning if people try to use them. +# We don't want to clutter things too much, so we only build replacements for the most commonly used binaries. +LEGACY_TARGETS_BUILD = main quantize perplexity embedding server + +# Deprecation aliases +ifdef LLAMA_CUBLAS +$(error LLAMA_CUBLAS is removed. Use GGML_CUDA instead.) +endif + +ifdef LLAMA_CUDA +GGML_CUDA := 1 +DEPRECATE_WARNING := 1 +endif + +ifdef LLAMA_KOMPUTE +GGML_KOMPUTE := 1 +DEPRECATE_WARNING := 1 +endif + +ifdef LLAMA_METAL +GGML_METAL := 1 +DEPRECATE_WARNING := 1 +endif + +ifdef LLAMA_RPC +GGML_RPC := 1 +DEPRECATE_WARNING := 1 +endif + +ifdef LLAMA_SYCL +GGML_SYCL := 1 +DEPRECATE_WARNING := 1 +endif + +ifdef LLAMA_SYCL_F16 +GGML_SYCL_F16 := 1 +DEPRECATE_WARNING := 1 +endif + +ifdef LLAMA_OPENBLAS +GGML_OPENBLAS := 1 +DEPRECATE_WARNING := 1 +endif + +ifdef LLAMA_OPENBLAS64 +GGML_OPENBLAS64 := 1 +DEPRECATE_WARNING := 1 +endif + +ifdef LLAMA_BLIS +GGML_BLIS := 1 +DEPRECATE_WARNING := 1 +endif + +ifdef LLAMA_NO_LLAMAFILE +GGML_NO_LLAMAFILE := 1 +DEPRECATE_WARNING := 1 +endif + +ifdef LLAMA_NO_ACCELERATE +GGML_NO_ACCELERATE := 1 +DEPRECATE_WARNING := 1 +endif + +ifdef LLAMA_NO_OPENMP +GGML_NO_OPENMP := 1 +DEPRECATE_WARNING := 1 +endif + +ifdef LLAMA_NO_METAL +GGML_NO_METAL := 1 +DEPRECATE_WARNING := 1 +endif + +ifdef LLAMA_DISABLE_LOGS +REMOVE_WARNING := 1 +endif + +ifdef LLAMA_SERVER_VERBOSE +REMOVE_WARNING := 1 +endif + +ifndef UNAME_S +UNAME_S := $(shell uname -s) +endif + +ifndef UNAME_P +UNAME_P := $(shell uname -p) +endif + +ifndef UNAME_M +UNAME_M := $(shell uname -m) +endif + +# In GNU make default CXX is g++ instead of c++. Let's fix that so that users +# of non-gcc compilers don't have to provide g++ alias or wrapper. +DEFCC := cc +DEFCXX := c++ +ifeq ($(origin CC),default) +CC := $(DEFCC) +endif +ifeq ($(origin CXX),default) +CXX := $(DEFCXX) +endif + +# Mac OS + Arm can report x86_64 +# ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789 +ifeq ($(UNAME_S),Darwin) + ifndef GGML_NO_METAL + GGML_METAL := 1 + endif + + GGML_NO_OPENMP := 1 + + ifneq ($(UNAME_P),arm) + SYSCTL_M := $(shell sysctl -n hw.optional.arm64 2>/dev/null) + ifeq ($(SYSCTL_M),1) + # UNAME_P := arm + # UNAME_M := arm64 + warn := $(warning Your arch is announced as x86_64, but it seems to actually be ARM64. Not fixing that can lead to bad performance. For more info see: https://github.com/ggerganov/whisper.cpp/issues/66\#issuecomment-1282546789) + endif + endif +endif + +ifdef GGML_METAL + GGML_METAL_EMBED_LIBRARY := 1 +endif + +ifdef GGML_RPC + BUILD_TARGETS += rpc-server +endif + +ifdef GGML_VULKAN + BUILD_TARGETS += vulkan-shaders-gen +endif + +default: $(BUILD_TARGETS) $(LEGACY_TARGETS_BUILD) + +test: $(TEST_TARGETS) + @failures=0; \ + for test_target in $(TEST_TARGETS); do \ + if [ "$$test_target" = "tests/test-tokenizer-0" ]; then \ + ./$$test_target $(CURDIR)/models/ggml-vocab-llama-spm.gguf; \ + ./$$test_target $(CURDIR)/models/ggml-vocab-llama-bpe.gguf; \ + ./$$test_target $(CURDIR)/models/ggml-vocab-phi-3.gguf; \ + ./$$test_target $(CURDIR)/models/ggml-vocab-falcon.gguf; \ + ./$$test_target $(CURDIR)/models/ggml-vocab-bert-bge.gguf; \ + ./$$test_target $(CURDIR)/models/ggml-vocab-starcoder.gguf; \ + ./$$test_target $(CURDIR)/models/ggml-vocab-gpt-2.gguf; \ + ./$$test_target $(CURDIR)/models/ggml-vocab-refact.gguf; \ + elif [ "$$test_target" = "tests/test-tokenizer-1-spm" ]; then \ + continue; \ + elif [ "$$test_target" = "tests/test-tokenizer-1-bpe" ]; then \ + continue; \ + else \ + echo "Running test $$test_target..."; \ + ./$$test_target; \ + fi; \ + if [ $$? -ne 0 ]; then \ + printf 'Test %s FAILED!\n\n' $$test_target; \ + failures=$$(( failures + 1 )); \ + else \ + printf 'Test %s passed.\n\n' $$test_target; \ + fi; \ + done; \ + if [ $$failures -gt 0 ]; then \ + printf '\n%s tests failed.\n' $$failures; \ + exit 1; \ + fi + @echo 'All tests passed.' + +all: $(BUILD_TARGETS) $(TEST_TARGETS) $(LEGACY_TARGETS_BUILD) + +ifdef RISCV_CROSS_COMPILE +CC := riscv64-unknown-linux-gnu-gcc +CXX := riscv64-unknown-linux-gnu-g++ +endif + +# +# Compile flags +# + +# keep standard at C11 and C++17 +MK_CPPFLAGS = -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -DGGML_USE_CPU +MK_CFLAGS = -std=c11 -fPIC +MK_CXXFLAGS = -std=c++17 -fPIC +MK_NVCCFLAGS = -std=c++17 + +ifdef LLAMA_NO_CCACHE +GGML_NO_CCACHE := 1 +DEPRECATE_WARNING := 1 +endif + +ifndef GGML_NO_CCACHE +CCACHE := $(shell which ccache) +ifdef CCACHE +export CCACHE_SLOPPINESS = time_macros +$(info I ccache found, compilation results will be cached. Disable with GGML_NO_CCACHE.) +CC := $(CCACHE) $(CC) +CXX := $(CCACHE) $(CXX) +else +$(info I ccache not found. Consider installing it for faster compilation.) +endif # CCACHE +endif # GGML_NO_CCACHE + +# clock_gettime came in POSIX.1b (1993) +# CLOCK_MONOTONIC came in POSIX.1-2001 / SUSv3 as optional +# posix_memalign came in POSIX.1-2001 / SUSv3 +# M_PI is an XSI extension since POSIX.1-2001 / SUSv3, came in XPG1 (1985) +MK_CPPFLAGS += -D_XOPEN_SOURCE=600 + +# Somehow in OpenBSD whenever POSIX conformance is specified +# some string functions rely on locale_t availability, +# which was introduced in POSIX.1-2008, forcing us to go higher +ifeq ($(UNAME_S),OpenBSD) + MK_CPPFLAGS += -U_XOPEN_SOURCE -D_XOPEN_SOURCE=700 +endif + +# Data types, macros and functions related to controlling CPU affinity and +# some memory allocation are available on Linux through GNU extensions in libc +ifeq ($(UNAME_S),Linux) + MK_CPPFLAGS += -D_GNU_SOURCE + MK_LDFLAGS += -ldl +endif + +# RLIMIT_MEMLOCK came in BSD, is not specified in POSIX.1, +# and on macOS its availability depends on enabling Darwin extensions +# similarly on DragonFly, enabling BSD extensions is necessary +ifeq ($(UNAME_S),Darwin) + MK_CPPFLAGS += -D_DARWIN_C_SOURCE +endif +ifeq ($(UNAME_S),DragonFly) + MK_CPPFLAGS += -D__BSD_VISIBLE +endif + +# alloca is a non-standard interface that is not visible on BSDs when +# POSIX conformance is specified, but not all of them provide a clean way +# to enable it in such cases +ifeq ($(UNAME_S),FreeBSD) + MK_CPPFLAGS += -D__BSD_VISIBLE +endif +ifeq ($(UNAME_S),NetBSD) + MK_CPPFLAGS += -D_NETBSD_SOURCE +endif +ifeq ($(UNAME_S),OpenBSD) + MK_CPPFLAGS += -D_BSD_SOURCE +endif + +ifdef GGML_SCHED_MAX_COPIES + MK_CPPFLAGS += -DGGML_SCHED_MAX_COPIES=$(GGML_SCHED_MAX_COPIES) +endif + +ifdef LLAMA_DEBUG + MK_CFLAGS += -O0 -g + MK_CXXFLAGS += -O0 -g + MK_LDFLAGS += -g + MK_NVCCFLAGS += -O0 -g + + ifeq ($(UNAME_S),Linux) + MK_CPPFLAGS += -D_GLIBCXX_ASSERTIONS + endif +else + MK_CPPFLAGS += -DNDEBUG + MK_CFLAGS += -O3 -g + MK_CXXFLAGS += -O3 -g + MK_NVCCFLAGS += -O3 -g +endif + +ifdef LLAMA_SANITIZE_THREAD + MK_CFLAGS += -fsanitize=thread -g + MK_CXXFLAGS += -fsanitize=thread -g + MK_LDFLAGS += -fsanitize=thread -g +endif + +ifdef LLAMA_SANITIZE_ADDRESS + MK_CFLAGS += -fsanitize=address -fno-omit-frame-pointer -g + MK_CXXFLAGS += -fsanitize=address -fno-omit-frame-pointer -g + MK_LDFLAGS += -fsanitize=address -fno-omit-frame-pointer -g +endif + +ifdef LLAMA_SANITIZE_UNDEFINED + MK_CFLAGS += -fsanitize=undefined -g + MK_CXXFLAGS += -fsanitize=undefined -g + MK_LDFLAGS += -fsanitize=undefined -g +endif + +ifdef LLAMA_SERVER_SSL + MK_CPPFLAGS += -DCPPHTTPLIB_OPENSSL_SUPPORT + MK_LDFLAGS += -lssl -lcrypto +endif + +ifndef GGML_NO_CPU_AARCH64 + MK_CPPFLAGS += -DGGML_USE_CPU_AARCH64 +endif + +# warnings +WARN_FLAGS = \ + -Wall \ + -Wextra \ + -Wpedantic \ + -Wcast-qual \ + -Wno-unused-function + +MK_CFLAGS += \ + $(WARN_FLAGS) \ + -Wshadow \ + -Wstrict-prototypes \ + -Wpointer-arith \ + -Wmissing-prototypes \ + -Werror=implicit-int \ + -Werror=implicit-function-declaration + +MK_CXXFLAGS += \ + $(WARN_FLAGS) \ + -Wmissing-declarations \ + -Wmissing-noreturn + +ifeq ($(LLAMA_FATAL_WARNINGS),1) + MK_CFLAGS += -Werror + MK_CXXFLAGS += -Werror +endif + +# this version of Apple ld64 is buggy +ifneq '' '$(findstring dyld-1015.7,$(shell $(CC) $(LDFLAGS) -Wl,-v 2>&1))' + MK_CPPFLAGS += -DHAVE_BUGGY_APPLE_LINKER +endif + +# OS specific +# TODO: support Windows +ifneq '' '$(filter $(UNAME_S),Linux Darwin FreeBSD NetBSD OpenBSD Haiku)' + MK_CFLAGS += -pthread + MK_CXXFLAGS += -pthread +endif + +# detect Windows +ifneq ($(findstring _NT,$(UNAME_S)),) + _WIN32 := 1 +endif + +# library name prefix +ifneq ($(_WIN32),1) + LIB_PRE := lib +endif + +# Dynamic Shared Object extension +ifneq ($(_WIN32),1) + DSO_EXT := .so +else + DSO_EXT := .dll +endif + +# Windows Sockets 2 (Winsock) for network-capable apps +ifeq ($(_WIN32),1) + LWINSOCK2 := -lws2_32 +endif + +ifdef LLAMA_GPROF + MK_CFLAGS += -pg + MK_CXXFLAGS += -pg +endif + +# Architecture specific +# TODO: probably these flags need to be tweaked on some architectures +# feel free to update the Makefile for your architecture and send a pull request or issue + +ifndef RISCV_CROSS_COMPILE + +ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64)) + # Use all CPU extensions that are available: + MK_CFLAGS += -march=native -mtune=native + HOST_CXXFLAGS += -march=native -mtune=native + + # Usage AMX build test + #MK_CFLAGS += -march=graniterapids -mtune=graniterapids + #HOST_CXXFLAGS += -march=graniterapids -mtune=graniterapids + + # Usage AVX-only + #MK_CFLAGS += -mfma -mf16c -mavx + #MK_CXXFLAGS += -mfma -mf16c -mavx + + # Usage SSSE3-only (Not is SSE3!) + #MK_CFLAGS += -mssse3 + #MK_CXXFLAGS += -mssse3 +endif + +ifneq '' '$(findstring mingw,$(shell $(CC) -dumpmachine))' + # The stack is only 16-byte aligned on Windows, so don't let gcc emit aligned moves. + # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=54412 + # https://github.com/ggerganov/llama.cpp/issues/2922 + MK_CFLAGS += -Xassembler -muse-unaligned-vector-move + MK_CXXFLAGS += -Xassembler -muse-unaligned-vector-move + + # Target Windows 8 for PrefetchVirtualMemory + MK_CPPFLAGS += -D_WIN32_WINNT=0x602 +endif + +ifneq ($(filter aarch64%,$(UNAME_M)),) + # Apple M1, M2, etc. + # Raspberry Pi 3, 4, Zero 2 (64-bit) + # Nvidia Jetson + MK_CFLAGS += -mcpu=native + MK_CXXFLAGS += -mcpu=native + JETSON_RELEASE_INFO = $(shell jetson_release) + ifdef JETSON_RELEASE_INFO + ifneq ($(filter TX2%,$(JETSON_RELEASE_INFO)),) + JETSON_EOL_MODULE_DETECT = 1 + CC = aarch64-unknown-linux-gnu-gcc + cxx = aarch64-unknown-linux-gnu-g++ + endif + endif +endif + +ifneq ($(filter armv6%,$(UNAME_M)),) + # Raspberry Pi 1, Zero + MK_CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access + MK_CXXFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access +endif + +ifneq ($(filter armv7%,$(UNAME_M)),) + # Raspberry Pi 2 + MK_CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations + MK_CXXFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations +endif + +ifneq ($(filter armv8%,$(UNAME_M)),) + # Raspberry Pi 3, 4, Zero 2 (32-bit) + MK_CFLAGS += -mfp16-format=ieee -mno-unaligned-access + MK_CXXFLAGS += -mfp16-format=ieee -mno-unaligned-access +endif + +ifneq ($(filter ppc64%,$(UNAME_M)),) + POWER9_M := $(shell grep "POWER9" /proc/cpuinfo) + ifneq (,$(findstring POWER9,$(POWER9_M))) + MK_CFLAGS += -mcpu=power9 + MK_CXXFLAGS += -mcpu=power9 + endif +endif + +ifneq ($(filter ppc64le%,$(UNAME_M)),) + MK_CFLAGS += -mcpu=powerpc64le + MK_CXXFLAGS += -mcpu=powerpc64le + CUDA_POWER_ARCH = 1 +endif + +ifneq ($(filter loongarch64%,$(UNAME_M)),) + MK_CFLAGS += -mlasx + MK_CXXFLAGS += -mlasx +endif + +ifneq ($(filter riscv64%,$(UNAME_M)),) + MK_CFLAGS += -march=rv64gcv -mabi=lp64d + MK_CXXFLAGS += -march=rv64gcv -mabi=lp64d +endif + +else # RISC-V CROSS COMPILATION + MK_CFLAGS += -march=rv64gcv -mabi=lp64d + MK_CXXFLAGS += -march=rv64gcv -mabi=lp64d +endif + +ifndef GGML_NO_ACCELERATE + # Mac OS - include Accelerate framework. + # `-framework Accelerate` works both with Apple Silicon and Mac Intel + ifeq ($(UNAME_S),Darwin) + MK_CPPFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS -DGGML_BLAS_USE_ACCELERATE + MK_CPPFLAGS += -DACCELERATE_NEW_LAPACK + MK_CPPFLAGS += -DACCELERATE_LAPACK_ILP64 + MK_LDFLAGS += -framework Accelerate + OBJ_GGML_EXT += ggml/src/ggml-blas/ggml-blas.o + endif +endif # GGML_NO_ACCELERATE + +ifndef GGML_NO_OPENMP + MK_CPPFLAGS += -DGGML_USE_OPENMP + MK_CFLAGS += -fopenmp + MK_CXXFLAGS += -fopenmp +endif # GGML_NO_OPENMP + +ifdef GGML_OPENBLAS + MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas) + MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas) + MK_LDFLAGS += $(shell pkg-config --libs openblas) + OBJ_GGML_EXT += ggml/src/ggml-blas/ggml-blas.o +endif # GGML_OPENBLAS + +ifdef GGML_OPENBLAS64 + MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas64) + MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas64) + MK_LDFLAGS += $(shell pkg-config --libs openblas64) + OBJ_GGML_EXT += ggml/src/ggml-blas/ggml-blas.o +endif # GGML_OPENBLAS64 + +ifdef GGML_BLIS + MK_CPPFLAGS += -DGGML_USE_BLAS -DGGML_BLAS_USE_BLIS -I/usr/local/include/blis -I/usr/include/blis + MK_LDFLAGS += -lblis -L/usr/local/lib + OBJ_GGML_EXT += ggml/src/ggml-blas/ggml-blas.o +endif # GGML_BLIS + +ifdef GGML_NVPL + MK_CPPFLAGS += -DGGML_USE_BLAS -DGGML_BLAS_USE_NVPL -DNVPL_ILP64 -I/usr/local/include/nvpl_blas -I/usr/include/nvpl_blas + MK_LDFLAGS += -L/usr/local/lib -lnvpl_blas_core -lnvpl_blas_ilp64_gomp + OBJ_GGML_EXT += ggml/src/ggml-blas/ggml-blas.o +endif # GGML_NVPL + +ifndef GGML_NO_LLAMAFILE + MK_CPPFLAGS += -DGGML_USE_LLAMAFILE + OBJ_GGML_EXT += ggml/src/ggml-cpu/llamafile/sgemm.o +endif + +ifndef GGML_NO_AMX + MK_CPPFLAGS += -DGGML_USE_AMX + OBJ_GGML_EXT += ggml/src/ggml-cpu/amx/amx.o ggml/src/ggml-cpu/amx/mmq.o +endif + +# only necessary for the CPU backend files +MK_CPPFLAGS += -Iggml/src/ggml-cpu + +ifdef GGML_RPC + MK_CPPFLAGS += -DGGML_USE_RPC + OBJ_GGML_EXT += ggml/src/ggml-rpc.o +endif # GGML_RPC + +OBJ_CUDA_TMPL = $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-mma*.cu)) +OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/mmq*.cu)) + +ifdef GGML_CUDA_FA_ALL_QUANTS + OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*.cu)) +else + OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu)) + OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu)) + OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*f16-f16.cu)) +endif # GGML_CUDA_FA_ALL_QUANTS + +ifdef GGML_CUDA + ifneq ('', '$(wildcard /opt/cuda)') + CUDA_PATH ?= /opt/cuda + else + CUDA_PATH ?= /usr/local/cuda + endif + + MK_CPPFLAGS += -DGGML_USE_CUDA -DGGML_CUDA_USE_GRAPHS -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include + MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib + MK_NVCCFLAGS += -use_fast_math + + OBJ_GGML_EXT += ggml/src/ggml-cuda/ggml-cuda.o + OBJ_GGML_EXT += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu)) + OBJ_GGML_EXT += $(OBJ_CUDA_TMPL) + +ifdef LLAMA_FATAL_WARNINGS + MK_NVCCFLAGS += -Werror all-warnings +endif # LLAMA_FATAL_WARNINGS + +ifndef JETSON_EOL_MODULE_DETECT + MK_NVCCFLAGS += --forward-unknown-to-host-compiler +endif # JETSON_EOL_MODULE_DETECT + +ifdef LLAMA_DEBUG + MK_NVCCFLAGS += -lineinfo +endif # LLAMA_DEBUG + +ifdef GGML_CUDA_DEBUG + MK_NVCCFLAGS += --device-debug +endif # GGML_CUDA_DEBUG + +ifdef GGML_CUDA_NVCC + NVCC = $(CCACHE) $(GGML_CUDA_NVCC) +else + NVCC = $(CCACHE) nvcc +endif # GGML_CUDA_NVCC + +ifdef CUDA_DOCKER_ARCH + MK_NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH) +else ifndef CUDA_POWER_ARCH + MK_NVCCFLAGS += -arch=native +endif # CUDA_DOCKER_ARCH + +ifdef GGML_CUDA_FORCE_MMQ + MK_NVCCFLAGS += -DGGML_CUDA_FORCE_MMQ +endif # GGML_CUDA_FORCE_MMQ + +ifdef GGML_CUDA_FORCE_CUBLAS + MK_NVCCFLAGS += -DGGML_CUDA_FORCE_CUBLAS +endif # GGML_CUDA_FORCE_CUBLAS + +ifdef GGML_CUDA_F16 + MK_NVCCFLAGS += -DGGML_CUDA_F16 +endif # GGML_CUDA_F16 + +ifdef GGML_CUDA_DMMV_F16 + MK_NVCCFLAGS += -DGGML_CUDA_F16 +endif # GGML_CUDA_DMMV_F16 + +ifdef GGML_CUDA_PEER_MAX_BATCH_SIZE + MK_NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=$(GGML_CUDA_PEER_MAX_BATCH_SIZE) +else + MK_NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 +endif # GGML_CUDA_PEER_MAX_BATCH_SIZE + +ifdef GGML_CUDA_NO_PEER_COPY + MK_NVCCFLAGS += -DGGML_CUDA_NO_PEER_COPY +endif # GGML_CUDA_NO_PEER_COPY + +ifdef GGML_CUDA_CCBIN + MK_NVCCFLAGS += -ccbin $(GGML_CUDA_CCBIN) +endif # GGML_CUDA_CCBIN + +ifdef GGML_CUDA_FA_ALL_QUANTS + MK_NVCCFLAGS += -DGGML_CUDA_FA_ALL_QUANTS +endif # GGML_CUDA_FA_ALL_QUANTS + +ifdef JETSON_EOL_MODULE_DETECT +define NVCC_COMPILE + $(NVCC) -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUDA -I/usr/local/cuda/include -I/opt/cuda/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c++11 -O3 $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@ +endef # NVCC_COMPILE +else +define NVCC_COMPILE + $(NVCC) $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@ +endef # NVCC_COMPILE +endif # JETSON_EOL_MODULE_DETECT + +ggml/src/ggml-cuda/%.o: \ + ggml/src/ggml-cuda/%.cu \ + ggml/include/ggml.h \ + ggml/src/ggml-common.h \ + ggml/src/ggml-cuda/common.cuh + $(NVCC_COMPILE) + +ggml/src/ggml-cuda/ggml-cuda.o: \ + ggml/src/ggml-cuda/ggml-cuda.cu \ + ggml/include/ggml-cuda.h \ + ggml/include/ggml.h \ + ggml/include/ggml-backend.h \ + ggml/src/ggml-backend-impl.h \ + ggml/src/ggml-common.h \ + $(wildcard ggml/src/ggml-cuda/*.cuh) + $(NVCC_COMPILE) +endif # GGML_CUDA + +ifdef GGML_VULKAN + MK_CPPFLAGS += -DGGML_USE_VULKAN + MK_LDFLAGS += $(shell pkg-config --libs vulkan) + OBJ_GGML_EXT += ggml/src/ggml-vulkan.o ggml/src/ggml-vulkan-shaders.o + +ifdef GGML_VULKAN_CHECK_RESULTS + MK_CPPFLAGS += -DGGML_VULKAN_CHECK_RESULTS +endif + +ifdef GGML_VULKAN_DEBUG + MK_CPPFLAGS += -DGGML_VULKAN_DEBUG +endif + +ifdef GGML_VULKAN_MEMORY_DEBUG + MK_CPPFLAGS += -DGGML_VULKAN_MEMORY_DEBUG +endif + +ifdef GGML_VULKAN_PERF + MK_CPPFLAGS += -DGGML_VULKAN_PERF +endif + +ifdef GGML_VULKAN_VALIDATE + MK_CPPFLAGS += -DGGML_VULKAN_VALIDATE +endif + +ifdef GGML_VULKAN_RUN_TESTS + MK_CPPFLAGS += -DGGML_VULKAN_RUN_TESTS +endif + +GLSLC_CMD = glslc +_ggml_vk_genshaders_cmd = $(shell pwd)/vulkan-shaders-gen +_ggml_vk_header = ggml/src/ggml-vulkan-shaders.hpp +_ggml_vk_source = ggml/src/ggml-vulkan-shaders.cpp +_ggml_vk_input_dir = ggml/src/ggml-vulkan/vulkan-shaders +_ggml_vk_shader_deps = $(echo $(_ggml_vk_input_dir)/*.comp) + +ggml/src/ggml-vulkan.o: ggml/src/ggml-vulkan/ggml-vulkan.cpp ggml/include/ggml-vulkan.h $(_ggml_vk_header) $(_ggml_vk_source) + $(CXX) $(CXXFLAGS) $(shell pkg-config --cflags vulkan) -c $< -o $@ + +$(_ggml_vk_header): $(_ggml_vk_source) + +$(_ggml_vk_source): $(_ggml_vk_shader_deps) vulkan-shaders-gen + $(_ggml_vk_genshaders_cmd) \ + --glslc $(GLSLC_CMD) \ + --input-dir $(_ggml_vk_input_dir) \ + --target-hpp $(_ggml_vk_header) \ + --target-cpp $(_ggml_vk_source) + +vulkan-shaders-gen: ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp + $(CXX) $(CXXFLAGS) -o $@ $(LDFLAGS) ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp + +endif # GGML_VULKAN + +ifdef GGML_HIP + ifeq ($(wildcard /opt/rocm),) + ROCM_PATH ?= /usr + AMDGPU_TARGETS ?= $(shell $(shell which amdgpu-arch)) + else + ROCM_PATH ?= /opt/rocm + AMDGPU_TARGETS ?= $(shell $(ROCM_PATH)/llvm/bin/amdgpu-arch) + endif + + MK_CPPFLAGS += -DGGML_USE_HIP -DGGML_USE_CUDA + +ifdef GGML_HIP_UMA + MK_CPPFLAGS += -DGGML_HIP_UMA +endif # GGML_HIP_UMA + + MK_LDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib + MK_LDFLAGS += -L$(ROCM_PATH)/lib64 -Wl,-rpath=$(ROCM_PATH)/lib64 + MK_LDFLAGS += -lhipblas -lamdhip64 -lrocblas + + HIPCC ?= $(CCACHE) $(ROCM_PATH)/bin/hipcc + + HIPFLAGS += $(addprefix --offload-arch=,$(AMDGPU_TARGETS)) + +ifdef GGML_CUDA_FORCE_MMQ + HIPFLAGS += -DGGML_CUDA_FORCE_MMQ +endif # GGML_CUDA_FORCE_MMQ + +ifdef GGML_CUDA_FORCE_CUBLAS + HIPFLAGS += -DGGML_CUDA_FORCE_CUBLAS +endif # GGML_CUDA_FORCE_CUBLAS + +ifdef GGML_CUDA_NO_PEER_COPY + HIPFLAGS += -DGGML_CUDA_NO_PEER_COPY +endif # GGML_CUDA_NO_PEER_COPY + + OBJ_GGML_EXT += ggml/src/ggml-cuda/ggml-cuda.o + OBJ_GGML_EXT += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu)) + OBJ_GGML_EXT += $(OBJ_CUDA_TMPL) + +ggml/src/ggml-cuda/ggml-cuda.o: \ + ggml/src/ggml-cuda/ggml-cuda.cu \ + ggml/include/ggml-cuda.h \ + ggml/include/ggml.h \ + ggml/include/ggml-backend.h \ + ggml/src/ggml-backend-impl.h \ + ggml/src/ggml-common.h \ + $(wildcard ggml/src/ggml-cuda/*.cuh) + $(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $< + +ggml/src/ggml-cuda/%.o: \ + ggml/src/ggml-cuda/%.cu \ + ggml/include/ggml.h \ + ggml/src/ggml-common.h \ + ggml/src/ggml-cuda/common.cuh + $(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $< +endif # GGML_HIP + +ifdef GGML_MUSA + ifeq ($(wildcard /opt/musa),) + MUSA_PATH ?= /usr/local/musa + else + MUSA_PATH ?= /opt/musa + endif + MUSA_ARCHITECTURES ?= 21;22 + + MK_CPPFLAGS += -DGGML_USE_MUSA -DGGML_USE_CUDA + MK_LDFLAGS += -L$(MUSA_PATH)/lib -Wl,-rpath=$(MUSA_PATH)/lib + MK_LDFLAGS += -lmusa -lmusart -lmublas + + ifndef GGML_NO_OPENMP + # For Ubuntu Focal + MK_CPPFLAGS += -I/usr/lib/llvm-10/include/openmp + MK_LDFLAGS += -L/usr/lib/llvm-10/lib + # For Ubuntu Jammy + MK_CPPFLAGS += -I/usr/lib/llvm-14/lib/clang/14.0.0/include + MK_LDFLAGS += -L/usr/lib/llvm-14/lib + endif # GGML_NO_OPENMP + + CC := $(MUSA_PATH)/bin/clang + CXX := $(MUSA_PATH)/bin/clang++ + MCC := $(CCACHE) $(MUSA_PATH)/bin/mcc + + MUSAFLAGS = -x musa -mtgpu + MUSAFLAGS += $(foreach arch,$(subst ;, ,$(MUSA_ARCHITECTURES)),--cuda-gpu-arch=mp_$(arch)) + +ifdef GGML_CUDA_FORCE_MMQ + MUSAFLAGS += -DGGML_CUDA_FORCE_MMQ +endif # GGML_CUDA_FORCE_MMQ + +ifdef GGML_CUDA_FORCE_CUBLAS + MUSAFLAGS += -DGGML_CUDA_FORCE_CUBLAS +endif # GGML_CUDA_FORCE_CUBLAS + +ifdef GGML_CUDA_F16 + MUSAFLAGS += -DGGML_CUDA_F16 +endif # GGML_CUDA_F16 + +ifdef GGML_CUDA_DMMV_F16 + MUSAFLAGS += -DGGML_CUDA_F16 +endif # GGML_CUDA_DMMV_F16 + +ifdef GGML_CUDA_PEER_MAX_BATCH_SIZE + MUSAFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=$(GGML_CUDA_PEER_MAX_BATCH_SIZE) +else + MUSAFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 +endif # GGML_CUDA_PEER_MAX_BATCH_SIZE + +ifdef GGML_CUDA_NO_PEER_COPY + MUSAFLAGS += -DGGML_CUDA_NO_PEER_COPY +endif # GGML_CUDA_NO_PEER_COPY + +ifdef GGML_CUDA_FA_ALL_QUANTS + MUSAFLAGS += -DGGML_CUDA_FA_ALL_QUANTS +endif # GGML_CUDA_FA_ALL_QUANTS + + OBJ_GGML_EXT += ggml/src/ggml-cuda/ggml-cuda.o + OBJ_GGML_EXT += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu)) + OBJ_GGML_EXT += $(OBJ_CUDA_TMPL) + +ggml/src/ggml-cuda/ggml-cuda.o: \ + ggml/src/ggml-cuda/ggml-cuda.cu \ + ggml/include/ggml-cuda.h \ + ggml/include/ggml.h \ + ggml/include/ggml-backend.h \ + ggml/src/ggml-backend-impl.h \ + ggml/src/ggml-common.h \ + $(wildcard ggml/src/ggml-cuda/*.cuh) + $(MCC) $(CXXFLAGS) $(MUSAFLAGS) -c -o $@ $< + +ggml/src/ggml-cuda/%.o: \ + ggml/src/ggml-cuda/%.cu \ + ggml/include/ggml.h \ + ggml/src/ggml-common.h \ + ggml/src/ggml-cuda/common.cuh + $(MCC) $(CXXFLAGS) $(MUSAFLAGS) -c -o $@ $< +endif # GGML_MUSA + +ifdef GGML_METAL + MK_CPPFLAGS += -DGGML_USE_METAL + MK_LDFLAGS += -framework Foundation -framework Metal -framework MetalKit + OBJ_GGML_EXT += ggml/src/ggml-metal/ggml-metal.o + +ifdef GGML_METAL_USE_BF16 + MK_CPPFLAGS += -DGGML_METAL_USE_BF16 +endif # GGML_METAL_USE_BF16 +ifdef GGML_METAL_NDEBUG + MK_CPPFLAGS += -DGGML_METAL_NDEBUG +endif +ifdef GGML_METAL_EMBED_LIBRARY + MK_CPPFLAGS += -DGGML_METAL_EMBED_LIBRARY + OBJ_GGML_EXT += ggml/src/ggml-metal-embed.o +endif +endif # GGML_METAL + +ifdef GGML_METAL +ggml/src/ggml-metal/ggml-metal.o: \ + ggml/src/ggml-metal/ggml-metal.m \ + ggml/src/ggml-metal/ggml-metal-impl.h \ + ggml/include/ggml-metal.h \ + ggml/include/ggml.h + $(CC) $(CFLAGS) -c $< -o $@ + +ifdef GGML_METAL_EMBED_LIBRARY +ggml/src/ggml-metal-embed.o: \ + ggml/src/ggml-metal/ggml-metal.metal \ + ggml/src/ggml-metal/ggml-metal-impl.h \ + ggml/src/ggml-common.h + @echo "Embedding Metal library" + @sed -e '/__embed_ggml-common.h__/r ggml/src/ggml-common.h' -e '/__embed_ggml-common.h__/d' < ggml/src/ggml-metal/ggml-metal.metal > ggml/src/ggml-metal/ggml-metal-embed.metal.tmp + @sed -e '/#include "ggml-metal-impl.h"/r ggml/src/ggml-metal/ggml-metal-impl.h' -e '/#include "ggml-metal-impl.h"/d' < ggml/src/ggml-metal/ggml-metal-embed.metal.tmp > ggml/src/ggml-metal/ggml-metal-embed.metal + $(eval TEMP_ASSEMBLY=$(shell mktemp -d)) + @echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)/ggml-metal-embed.s + @echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s + @echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s + @echo ".incbin \"ggml/src/ggml-metal/ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s + @echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s + @echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s + $(CC) $(CFLAGS) -c $(TEMP_ASSEMBLY)/ggml-metal-embed.s -o $@ + @rm -f ${TEMP_ASSEMBLY}/ggml-metal-embed.s + @rmdir ${TEMP_ASSEMBLY} +endif +endif # GGML_METAL + +DIR_GGML = ggml +DIR_LLAMA = src +DIR_COMMON = common + +OBJ_GGML = \ + $(DIR_GGML)/src/ggml.o \ + $(DIR_GGML)/src/ggml-alloc.o \ + $(DIR_GGML)/src/ggml-backend.o \ + $(DIR_GGML)/src/ggml-backend-reg.o \ + $(DIR_GGML)/src/ggml-opt.o \ + $(DIR_GGML)/src/ggml-quants.o \ + $(DIR_GGML)/src/ggml-threading.o \ + $(DIR_GGML)/src/ggml-cpu/ggml-cpu.o \ + $(DIR_GGML)/src/ggml-cpu/ggml-cpu_cpp.o \ + $(DIR_GGML)/src/ggml-cpu/ggml-cpu-aarch64.o \ + $(DIR_GGML)/src/ggml-cpu/ggml-cpu-hbm.o \ + $(DIR_GGML)/src/ggml-cpu/ggml-cpu-quants.o \ + $(DIR_GGML)/src/ggml-cpu/ggml-cpu-traits.o \ + $(OBJ_GGML_EXT) + +OBJ_LLAMA = \ + $(DIR_LLAMA)/llama.o \ + $(DIR_LLAMA)/llama-vocab.o \ + $(DIR_LLAMA)/llama-grammar.o \ + $(DIR_LLAMA)/llama-sampling.o \ + $(DIR_LLAMA)/unicode.o \ + $(DIR_LLAMA)/unicode-data.o + +OBJ_COMMON = \ + $(DIR_COMMON)/common.o \ + $(DIR_COMMON)/arg.o \ + $(DIR_COMMON)/log.o \ + $(DIR_COMMON)/console.o \ + $(DIR_COMMON)/ngram-cache.o \ + $(DIR_COMMON)/sampling.o \ + $(DIR_COMMON)/speculative.o \ + $(DIR_COMMON)/chat.o \ + $(DIR_COMMON)/build-info.o \ + $(DIR_COMMON)/json-schema-to-grammar.o + +OBJ_ALL = $(OBJ_GGML) $(OBJ_LLAMA) $(OBJ_COMMON) + +LIB_GGML = $(LIB_PRE)ggml$(DSO_EXT) +LIB_GGML_S = $(LIB_PRE)ggml.a + +LIB_LLAMA = $(LIB_PRE)llama$(DSO_EXT) +LIB_LLAMA_S = $(LIB_PRE)llama.a + +LIB_COMMON = $(LIB_PRE)common$(DSO_EXT) +LIB_COMMON_S = $(LIB_PRE)common.a + +LIB_ALL = $(LIB_GGML) $(LIB_LLAMA) $(LIB_COMMON) +LIB_ALL_S = $(LIB_GGML_S) $(LIB_LLAMA_S) $(LIB_COMMON_S) + +GF_CC := $(CC) +include scripts/get-flags.mk + +# combine build flags with cmdline overrides +override CPPFLAGS := $(MK_CPPFLAGS) $(CPPFLAGS) +override CFLAGS := $(CPPFLAGS) $(MK_CFLAGS) $(GF_CFLAGS) $(CFLAGS) +BASE_CXXFLAGS := $(MK_CXXFLAGS) $(CXXFLAGS) +override CXXFLAGS := $(BASE_CXXFLAGS) $(HOST_CXXFLAGS) $(GF_CXXFLAGS) $(CPPFLAGS) +override NVCCFLAGS := $(MK_NVCCFLAGS) $(NVCCFLAGS) +override LDFLAGS := $(MK_LDFLAGS) $(LDFLAGS) + +# identify CUDA host compiler +ifdef GGML_CUDA +GF_CC := $(NVCC) $(NVCCFLAGS) 2>/dev/null .c -Xcompiler +include scripts/get-flags.mk +CUDA_CXXFLAGS := $(BASE_CXXFLAGS) $(GF_CXXFLAGS) -Wno-pedantic +endif + +ifdef LLAMA_CURL +override CXXFLAGS := $(CXXFLAGS) -DLLAMA_USE_CURL +override LDFLAGS := $(LDFLAGS) -lcurl +endif + +# +# Print build information +# + +$(info I llama.cpp build info: ) +$(info I UNAME_S: $(UNAME_S)) +$(info I UNAME_P: $(UNAME_P)) +$(info I UNAME_M: $(UNAME_M)) +$(info I CFLAGS: $(CFLAGS)) +$(info I CXXFLAGS: $(CXXFLAGS)) +$(info I NVCCFLAGS: $(NVCCFLAGS)) +$(info I LDFLAGS: $(LDFLAGS)) +$(info I CC: $(shell $(CC) --version | head -n 1)) +$(info I CXX: $(shell $(CXX) --version | head -n 1)) +ifdef GGML_CUDA +$(info I NVCC: $(shell $(NVCC) --version | tail -n 1)) +CUDA_VERSION := $(shell $(NVCC) --version | grep -oP 'release (\K[0-9]+\.[0-9])') +ifeq ($(shell awk -v "v=$(CUDA_VERSION)" 'BEGIN { print (v < 11.7) }'),1) + +ifndef CUDA_DOCKER_ARCH +ifndef CUDA_POWER_ARCH +$(error I ERROR: For CUDA versions < 11.7 a target CUDA architecture must be explicitly provided via environment variable CUDA_DOCKER_ARCH, e.g. by running "export CUDA_DOCKER_ARCH=compute_XX" on Unix-like systems, where XX is the minimum compute capability that the code needs to run on. A list with compute capabilities can be found here: https://developer.nvidia.com/cuda-gpus ) +endif # CUDA_POWER_ARCH +endif # CUDA_DOCKER_ARCH + +endif # eq ($(shell echo "$(CUDA_VERSION) < 11.7" | bc),1) +endif # GGML_CUDA +$(info ) + +ifdef DEPRECATE_WARNING +$(info !!! DEPRECATION WARNING !!!) +$(info The following LLAMA_ options are deprecated and will be removed in the future. Use the GGML_ prefix instead) +$(info - LLAMA_CUDA) +$(info - LLAMA_METAL) +$(info - LLAMA_METAL_EMBED_LIBRARY) +$(info - LLAMA_OPENMP) +$(info - LLAMA_RPC) +$(info - LLAMA_SYCL) +$(info - LLAMA_SYCL_F16) +$(info - LLAMA_OPENBLAS) +$(info - LLAMA_OPENBLAS64) +$(info - LLAMA_BLIS) +$(info - LLAMA_NO_LLAMAFILE) +$(info - LLAMA_NO_ACCELERATE) +$(info - LLAMA_NO_OPENMP) +$(info - LLAMA_NO_METAL) +$(info - LLAMA_NO_CCACHE) +$(info ) +endif + +ifdef REMOVE_WARNING +$(info !!! REMOVAL WARNING !!!) +$(info The following LLAMA_ options have been removed and are no longer supported) +$(info - LLAMA_DISABLE_LOGS (https://github.com/ggerganov/llama.cpp/pull/9418)) +$(info - LLAMA_SERVER_VERBOSE (https://github.com/ggerganov/llama.cpp/pull/9418)) +$(info ) +endif + +# +# Build libraries +# + +# Libraries +LIB_GGML = libggml.so +LIB_GGML_S = libggml.a + +LIB_LLAMA = libllama.so +LIB_LLAMA_S = libllama.a + +LIB_COMMON = libcommon.so +LIB_COMMON_S = libcommon.a + +# Targets +BUILD_TARGETS += $(LIB_GGML) $(LIB_GGML_S) $(LIB_LLAMA) $(LIB_LLAMA_S) $(LIB_COMMON) $(LIB_COMMON_S) + +# Dependency files +DEP_FILES = $(OBJ_GGML:.o=.d) $(OBJ_LLAMA:.o=.d) $(OBJ_COMMON:.o=.d) + +# Default target +all: $(BUILD_TARGETS) + +# force c++ build for source file that have same name as c file +# Note: need this exception because `ggml-cpu.c` and `ggml-cpu.cpp` both produce the same obj/dep files +$(DIR_GGML)/%_cpp.o: $(DIR_GGML)/%.cpp + $(CXX) $(CXXFLAGS) -MMD -c $< -o $@ + +# Rules for building object files +$(DIR_GGML)/%.o: $(DIR_GGML)/%.c + $(CC) $(CFLAGS) -MMD -c $< -o $@ + +$(DIR_GGML)/%.o: $(DIR_GGML)/%.cpp + $(CXX) $(CXXFLAGS) -MMD -c $< -o $@ + +$(DIR_LLAMA)/%.o: $(DIR_LLAMA)/%.cpp + $(CXX) $(CXXFLAGS) -MMD -c $< -o $@ + +$(DIR_COMMON)/%.o: $(DIR_COMMON)/%.cpp + $(CXX) $(CXXFLAGS) -MMD -c $< -o $@ + +# Rules for building libraries +$(LIB_GGML): $(OBJ_GGML) + $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS) + +$(LIB_GGML_S): $(OBJ_GGML) + ar rcs $(LIB_GGML_S) $^ + +$(LIB_LLAMA): $(OBJ_LLAMA) $(LIB_GGML) + $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS) + +$(LIB_LLAMA_S): $(OBJ_LLAMA) + ar rcs $(LIB_LLAMA_S) $^ + +$(LIB_COMMON): $(OBJ_COMMON) $(LIB_LLAMA) $(LIB_GGML) + $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS) + +$(LIB_COMMON_S): $(OBJ_COMMON) + ar rcs $(LIB_COMMON_S) $^ + +# Include dependency files +-include $(DEP_FILES) + +# Clean generated server assets +clean-server-assets: + find examples/server -type f -name "*.js.hpp" -delete + find examples/server -type f -name "*.mjs.hpp" -delete + find examples/server -type f -name "*.css.hpp" -delete + find examples/server -type f -name "*.html.hpp" -delete + +# Clean rule +clean: clean-server-assets + rm -vrf $(BUILD_TARGETS) $(TEST_TARGETS) + rm -rvf *.a *.dll *.so *.dot + find ggml src common tests examples pocs -type f -name "*.o" -delete + find ggml src common tests examples pocs -type f -name "*.d" -delete + +# +# Examples +# + +# $< is the first prerequisite, i.e. the source file. +# Explicitly compile this to an object file so that it can be cached with ccache. +# The source file is then filtered out from $^ (the list of all prerequisites) and the object file is added instead. + +# Helper function that replaces .c, .cpp, and .cu file endings with .o: +GET_OBJ_FILE = $(patsubst %.c,%.o,$(patsubst %.cpp,%.o,$(patsubst %.cu,%.o,$(1)))) + +llama-cli: examples/main/main.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + @echo + @echo '==== Run ./llama-cli -h for help. ====' + @echo + +llama-infill: examples/infill/infill.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-run: examples/run/run.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-simple: examples/simple/simple.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-simple-chat: examples/simple-chat/simple-chat.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-tokenize: examples/tokenize/tokenize.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-batched: examples/batched/batched.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-batched-bench: examples/batched-bench/batched-bench.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-quantize: examples/quantize/quantize.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-quantize-stats: examples/quantize-stats/quantize-stats.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-perplexity: examples/perplexity/perplexity.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-imatrix: examples/imatrix/imatrix.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-embedding: examples/embedding/embedding.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-gritlm: examples/gritlm/gritlm.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-save-load-state: examples/save-load-state/save-load-state.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-gguf: examples/gguf/gguf.cpp \ + $(OBJ_GGML) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +examples/gguf-hash/deps/sha1/sha1.o: \ + examples/gguf-hash/deps/sha1/sha1.c + $(CC) $(CFLAGS) -Iexamples/gguf-hash/deps -c $< -o $@ + +examples/gguf-hash/deps/xxhash/xxhash.o: \ + examples/gguf-hash/deps/xxhash/xxhash.c + $(CC) $(CFLAGS) -Iexamples/gguf-hash/deps -c $< -o $@ + +examples/gguf-hash/deps/sha256/sha256.o: \ + examples/gguf-hash/deps/sha256/sha256.c + $(CC) $(CFLAGS) -Iexamples/gguf-hash/deps -c $< -o $@ + +llama-gguf-hash: examples/gguf-hash/gguf-hash.cpp examples/gguf-hash/deps/sha1/sha1.o examples/gguf-hash/deps/xxhash/xxhash.o examples/gguf-hash/deps/sha256/sha256.o\ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -Iexamples/gguf-hash/deps -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-gguf-split: examples/gguf-split/gguf-split.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-eval-callback: examples/eval-callback/eval-callback.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-cvector-generator: examples/cvector-generator/cvector-generator.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-bench: examples/llama-bench/llama-bench.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-export-lora: examples/export-lora/export-lora.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-retrieval: examples/retrieval/retrieval.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-speculative: examples/speculative/speculative.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-parallel: examples/parallel/parallel.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-lookahead: examples/lookahead/lookahead.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-lookup: examples/lookup/lookup.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-lookup-create: examples/lookup/lookup-create.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-lookup-merge: examples/lookup/lookup-merge.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-lookup-stats: examples/lookup/lookup-stats.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-passkey: examples/passkey/passkey.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-gbnf-validator: examples/gbnf-validator/gbnf-validator.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +ifdef GGML_RPC +rpc-server: examples/rpc/rpc-server.cpp \ + $(OBJ_GGML) + $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS) +endif # GGML_RPC + +llama-server: \ + examples/server/server.cpp \ + examples/server/utils.hpp \ + examples/server/httplib.h \ + examples/server/index.html.hpp \ + examples/server/loading.html.hpp \ + common/chat.cpp \ + common/chat.hpp \ + common/chat-template.hpp \ + common/json.hpp \ + common/minja.hpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h %.hpp $<,$^) -Iexamples/server $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LWINSOCK2) + +# Portable equivalent of `cd examples/server/public && xxd -i $(notdir $<) ../$(notdir $<).hpp`: +examples/server/%.hpp: examples/server/public/% FORCE Makefile + @( export NAME=$(subst .,_,$(subst -,_,$(notdir $<))) && \ + echo "unsigned char $${NAME}[] = {" && \ + cat $< | od -v -t x1 -An | sed -E 's/([0-9a-fA-F]+)/0x\1, /g' && \ + echo "};" && \ + echo "unsigned int $${NAME}_len = $(shell cat $< | wc -c );" \ + ) > $@ + +llama-gen-docs: examples/gen-docs/gen-docs.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +libllava.a: examples/llava/llava.cpp \ + examples/llava/llava.h \ + examples/llava/clip.cpp \ + examples/llava/clip.h \ + common/stb_image.h \ + common/base64.hpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -static -fPIC -c $< -o $@ -Wno-cast-qual + +llama-llava-cli: examples/llava/llava-cli.cpp \ + examples/llava/llava.cpp \ + examples/llava/llava.h \ + examples/llava/clip.cpp \ + examples/llava/clip.h \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual + +llama-minicpmv-cli: examples/llava/minicpmv-cli.cpp \ + examples/llava/llava.cpp \ + examples/llava/llava.h \ + examples/llava/clip.cpp \ + examples/llava/clip.h \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual + +llama-qwen2vl-cli: examples/llava/qwen2vl-cli.cpp \ + examples/llava/llava.cpp \ + examples/llava/llava.h \ + examples/llava/clip.cpp \ + examples/llava/clip.h \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual + +ifeq ($(UNAME_S),Darwin) +swift: examples/batched.swift + (cd examples/batched.swift; make build) +endif + +common/build-info.cpp: $(wildcard .git/index) scripts/build-info.sh + @sh scripts/build-info.sh "$(CC)" > $@.tmp + @if ! cmp -s $@.tmp $@; then \ + mv $@.tmp $@; \ + else \ + rm $@.tmp; \ + fi + +common/build-info.o: common/build-info.cpp + $(CXX) $(CXXFLAGS) -c $(filter-out %.h,$^) -o $@ + +# +# Tests +# + +tests: $(TEST_TARGETS) + +tests/test-arg-parser: tests/test-arg-parser.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-llama-grammar: tests/test-llama-grammar.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-log: tests/test-log.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-grammar-parser: tests/test-grammar-parser.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-grammar-integration: tests/test-grammar-integration.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-double-float: tests/test-double-float.cpp + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-json-schema-to-grammar: tests/test-json-schema-to-grammar.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -Iexamples/server -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-chat: tests/test-chat.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -Iexamples/server -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-opt: tests/test-opt.cpp \ + $(OBJ_GGML) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-quantize-fns: tests/test-quantize-fns.cpp \ + $(OBJ_GGML) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-quantize-perf: tests/test-quantize-perf.cpp \ + $(OBJ_GGML) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-sampling: tests/test-sampling.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-tokenizer-0: tests/test-tokenizer-0.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-tokenizer-1-bpe: tests/test-tokenizer-1-bpe.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-tokenizer-1-spm: tests/test-tokenizer-1-spm.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-rope: tests/test-rope.cpp ggml/src/ggml.o \ + $(OBJ_GGML) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-c.o: tests/test-c.c include/llama.h + $(CC) $(CFLAGS) -c $(filter-out %.h,$^) -o $@ + +tests/test-backend-ops: tests/test-backend-ops.cpp \ + $(OBJ_GGML) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-model-load-cancel: tests/test-model-load-cancel.cpp tests/get-model.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-autorelease: tests/test-autorelease.cpp tests/get-model.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-chat-template: tests/test-chat-template.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +# +# PoCs +# + +llama-vdot: pocs/vdot/vdot.cpp ggml/src/ggml.o \ + $(OBJ_GGML) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-q8dot: pocs/vdot/q8dot.cpp ggml/src/ggml.o \ + $(OBJ_GGML) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +# +# Deprecated binaries that we want to keep around long enough for people to migrate to the new filenames, then these can be removed. +# +# Mark legacy binary targets as .PHONY so that they are always checked. +.PHONY: FORCE main quantize perplexity embedding server + +# Define the object file target +examples/deprecation-warning/deprecation-warning.o: examples/deprecation-warning/deprecation-warning.cpp + $(CXX) $(CXXFLAGS) -c $< -o $@ + +# NOTE: We currently will always build the deprecation-warning `main` and `server` binaries to help users migrate. +# Eventually we will want to remove these target from building all the time. +main: examples/deprecation-warning/deprecation-warning.o + $(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS) + @echo "NOTICE: The 'main' binary is deprecated. Please use 'llama-cli' instead." + +server: examples/deprecation-warning/deprecation-warning.o + $(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS) + @echo "NOTICE: The 'server' binary is deprecated. Please use 'llama-server' instead." + +quantize: examples/deprecation-warning/deprecation-warning.o +ifneq (,$(wildcard quantize)) + $(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS) + @echo "#########" + @echo "WARNING: The 'quantize' binary is deprecated. Please use 'llama-quantize' instead." + @echo " Remove the 'quantize' binary to remove this warning." + @echo "#########" +endif + +perplexity: examples/deprecation-warning/deprecation-warning.o +ifneq (,$(wildcard perplexity)) + $(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS) + @echo "#########" + @echo "WARNING: The 'perplexity' binary is deprecated. Please use 'llama-perplexity' instead." + @echo " Remove the 'perplexity' binary to remove this warning." + @echo "#########" +endif + +embedding: examples/deprecation-warning/deprecation-warning.o +ifneq (,$(wildcard embedding)) + $(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS) + @echo "#########" + @echo "WARNING: The 'embedding' binary is deprecated. Please use 'llama-embedding' instead." + @echo " Remove the 'embedding' binary to remove this warning." + @echo "#########" +endif diff --git a/llama.cpp/Package.swift b/llama.cpp/Package.swift new file mode 100644 index 0000000000000000000000000000000000000000..01c996d24203763dafc4bf7792306bef999be176 --- /dev/null +++ b/llama.cpp/Package.swift @@ -0,0 +1,19 @@ +// swift-tools-version:5.5 + +import PackageDescription + +let package = Package( + name: "llama", + platforms: [ + .macOS(.v12), + .iOS(.v14), + .watchOS(.v4), + .tvOS(.v14) + ], + products: [ + .library(name: "llama", targets: ["llama"]), + ], + targets: [ + .systemLibrary(name: "llama", pkgConfig: "llama"), + ] +) diff --git a/llama.cpp/README.md b/llama.cpp/README.md new file mode 100644 index 0000000000000000000000000000000000000000..43290f2c53a75d914b99646647fe2704ba4ab107 --- /dev/null +++ b/llama.cpp/README.md @@ -0,0 +1,523 @@ +# llama.cpp + +![llama](https://user-images.githubusercontent.com/1991296/230134379-7181e485-c521-4d23-a0d6-f7b3b61ba524.png) + +[![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT) +[![Server](https://github.com/ggerganov/llama.cpp/actions/workflows/server.yml/badge.svg)](https://github.com/ggerganov/llama.cpp/actions/workflows/server.yml) + +[Roadmap](https://github.com/users/ggerganov/projects/7) / [Project status](https://github.com/ggerganov/llama.cpp/discussions/3471) / [Manifesto](https://github.com/ggerganov/llama.cpp/discussions/205) / [ggml](https://github.com/ggerganov/ggml) + +Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others) in pure C/C++ + +## Recent API changes + +- [Changelog for `libllama` API](https://github.com/ggerganov/llama.cpp/issues/9289) +- [Changelog for `llama-server` REST API](https://github.com/ggerganov/llama.cpp/issues/9291) + +## Hot topics + +- **How to use [MTLResidencySet](https://developer.apple.com/documentation/metal/mtlresidencyset?language=objc) to keep the GPU memory active?** https://github.com/ggerganov/llama.cpp/pull/11427 +- **VS Code extension for FIM completions:** https://github.com/ggml-org/llama.vscode +- Universal tool call support in `llama-server`: https://github.com/ggerganov/llama.cpp/pull/9639 +- Vim/Neovim plugin for FIM completions: https://github.com/ggml-org/llama.vim +- Introducing GGUF-my-LoRA https://github.com/ggerganov/llama.cpp/discussions/10123 +- Hugging Face Inference Endpoints now support GGUF out of the box! https://github.com/ggerganov/llama.cpp/discussions/9669 +- Hugging Face GGUF editor: [discussion](https://github.com/ggerganov/llama.cpp/discussions/9268) | [tool](https://huggingface.co/spaces/CISCai/gguf-editor) + +---- + +## Description + +The main goal of `llama.cpp` is to enable LLM inference with minimal setup and state-of-the-art performance on a wide +range of hardware - locally and in the cloud. + +- Plain C/C++ implementation without any dependencies +- Apple silicon is a first-class citizen - optimized via ARM NEON, Accelerate and Metal frameworks +- AVX, AVX2, AVX512 and AMX support for x86 architectures +- 1.5-bit, 2-bit, 3-bit, 4-bit, 5-bit, 6-bit, and 8-bit integer quantization for faster inference and reduced memory use +- Custom CUDA kernels for running LLMs on NVIDIA GPUs (support for AMD GPUs via HIP and Moore Threads MTT GPUs via MUSA) +- Vulkan and SYCL backend support +- CPU+GPU hybrid inference to partially accelerate models larger than the total VRAM capacity + +The `llama.cpp` project is the main playground for developing new features for the [ggml](https://github.com/ggerganov/ggml) library. + +
+Models + +Typically finetunes of the base models below are supported as well. + +Instructions for adding support for new models: [HOWTO-add-model.md](docs/development/HOWTO-add-model.md) + +#### Text-only + +- [X] LLaMA 🦙 +- [x] LLaMA 2 🦙🦙 +- [x] LLaMA 3 🦙🦙🦙 +- [X] [Mistral 7B](https://huggingface.co/mistralai/Mistral-7B-v0.1) +- [x] [Mixtral MoE](https://huggingface.co/models?search=mistral-ai/Mixtral) +- [x] [DBRX](https://huggingface.co/databricks/dbrx-instruct) +- [X] [Falcon](https://huggingface.co/models?search=tiiuae/falcon) +- [X] [Chinese LLaMA / Alpaca](https://github.com/ymcui/Chinese-LLaMA-Alpaca) and [Chinese LLaMA-2 / Alpaca-2](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2) +- [X] [Vigogne (French)](https://github.com/bofenghuang/vigogne) +- [X] [BERT](https://github.com/ggerganov/llama.cpp/pull/5423) +- [X] [Koala](https://bair.berkeley.edu/blog/2023/04/03/koala/) +- [X] [Baichuan 1 & 2](https://huggingface.co/models?search=baichuan-inc/Baichuan) + [derivations](https://huggingface.co/hiyouga/baichuan-7b-sft) +- [X] [Aquila 1 & 2](https://huggingface.co/models?search=BAAI/Aquila) +- [X] [Starcoder models](https://github.com/ggerganov/llama.cpp/pull/3187) +- [X] [Refact](https://huggingface.co/smallcloudai/Refact-1_6B-fim) +- [X] [MPT](https://github.com/ggerganov/llama.cpp/pull/3417) +- [X] [Bloom](https://github.com/ggerganov/llama.cpp/pull/3553) +- [x] [Yi models](https://huggingface.co/models?search=01-ai/Yi) +- [X] [StableLM models](https://huggingface.co/stabilityai) +- [x] [Deepseek models](https://huggingface.co/models?search=deepseek-ai/deepseek) +- [x] [Qwen models](https://huggingface.co/models?search=Qwen/Qwen) +- [x] [PLaMo-13B](https://github.com/ggerganov/llama.cpp/pull/3557) +- [x] [Phi models](https://huggingface.co/models?search=microsoft/phi) +- [x] [PhiMoE](https://github.com/ggerganov/llama.cpp/pull/11003) +- [x] [GPT-2](https://huggingface.co/gpt2) +- [x] [Orion 14B](https://github.com/ggerganov/llama.cpp/pull/5118) +- [x] [InternLM2](https://huggingface.co/models?search=internlm2) +- [x] [CodeShell](https://github.com/WisdomShell/codeshell) +- [x] [Gemma](https://ai.google.dev/gemma) +- [x] [Mamba](https://github.com/state-spaces/mamba) +- [x] [Grok-1](https://huggingface.co/keyfan/grok-1-hf) +- [x] [Xverse](https://huggingface.co/models?search=xverse) +- [x] [Command-R models](https://huggingface.co/models?search=CohereForAI/c4ai-command-r) +- [x] [SEA-LION](https://huggingface.co/models?search=sea-lion) +- [x] [GritLM-7B](https://huggingface.co/GritLM/GritLM-7B) + [GritLM-8x7B](https://huggingface.co/GritLM/GritLM-8x7B) +- [x] [OLMo](https://allenai.org/olmo) +- [x] [OLMo 2](https://allenai.org/olmo) +- [x] [OLMoE](https://huggingface.co/allenai/OLMoE-1B-7B-0924) +- [x] [Granite models](https://huggingface.co/collections/ibm-granite/granite-code-models-6624c5cec322e4c148c8b330) +- [x] [GPT-NeoX](https://github.com/EleutherAI/gpt-neox) + [Pythia](https://github.com/EleutherAI/pythia) +- [x] [Snowflake-Arctic MoE](https://huggingface.co/collections/Snowflake/arctic-66290090abe542894a5ac520) +- [x] [Smaug](https://huggingface.co/models?search=Smaug) +- [x] [Poro 34B](https://huggingface.co/LumiOpen/Poro-34B) +- [x] [Bitnet b1.58 models](https://huggingface.co/1bitLLM) +- [x] [Flan T5](https://huggingface.co/models?search=flan-t5) +- [x] [Open Elm models](https://huggingface.co/collections/apple/openelm-instruct-models-6619ad295d7ae9f868b759ca) +- [x] [ChatGLM3-6b](https://huggingface.co/THUDM/chatglm3-6b) + [ChatGLM4-9b](https://huggingface.co/THUDM/glm-4-9b) + [GLMEdge-1.5b](https://huggingface.co/THUDM/glm-edge-1.5b-chat) + [GLMEdge-4b](https://huggingface.co/THUDM/glm-edge-4b-chat) +- [x] [SmolLM](https://huggingface.co/collections/HuggingFaceTB/smollm-6695016cad7167254ce15966) +- [x] [EXAONE-3.0-7.8B-Instruct](https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct) +- [x] [FalconMamba Models](https://huggingface.co/collections/tiiuae/falconmamba-7b-66b9a580324dd1598b0f6d4a) +- [x] [Jais](https://huggingface.co/inceptionai/jais-13b-chat) +- [x] [Bielik-11B-v2.3](https://huggingface.co/collections/speakleash/bielik-11b-v23-66ee813238d9b526a072408a) +- [x] [RWKV-6](https://github.com/BlinkDL/RWKV-LM) +- [x] [QRWKV-6](https://huggingface.co/recursal/QRWKV6-32B-Instruct-Preview-v0.1) +- [x] [GigaChat-20B-A3B](https://huggingface.co/ai-sage/GigaChat-20B-A3B-instruct) + +#### Multimodal + +- [x] [LLaVA 1.5 models](https://huggingface.co/collections/liuhaotian/llava-15-653aac15d994e992e2677a7e), [LLaVA 1.6 models](https://huggingface.co/collections/liuhaotian/llava-16-65b9e40155f60fd046a5ccf2) +- [x] [BakLLaVA](https://huggingface.co/models?search=SkunkworksAI/Bakllava) +- [x] [Obsidian](https://huggingface.co/NousResearch/Obsidian-3B-V0.5) +- [x] [ShareGPT4V](https://huggingface.co/models?search=Lin-Chen/ShareGPT4V) +- [x] [MobileVLM 1.7B/3B models](https://huggingface.co/models?search=mobileVLM) +- [x] [Yi-VL](https://huggingface.co/models?search=Yi-VL) +- [x] [Mini CPM](https://huggingface.co/models?search=MiniCPM) +- [x] [Moondream](https://huggingface.co/vikhyatk/moondream2) +- [x] [Bunny](https://github.com/BAAI-DCAI/Bunny) +- [x] [GLM-EDGE](https://huggingface.co/models?search=glm-edge) +- [x] [Qwen2-VL](https://huggingface.co/collections/Qwen/qwen2-vl-66cee7455501d7126940800d) + +
+ +
+Bindings + +- Python: [abetlen/llama-cpp-python](https://github.com/abetlen/llama-cpp-python) +- Go: [go-skynet/go-llama.cpp](https://github.com/go-skynet/go-llama.cpp) +- Node.js: [withcatai/node-llama-cpp](https://github.com/withcatai/node-llama-cpp) +- JS/TS (llama.cpp server client): [lgrammel/modelfusion](https://modelfusion.dev/integration/model-provider/llamacpp) +- JS/TS (Programmable Prompt Engine CLI): [offline-ai/cli](https://github.com/offline-ai/cli) +- JavaScript/Wasm (works in browser): [tangledgroup/llama-cpp-wasm](https://github.com/tangledgroup/llama-cpp-wasm) +- Typescript/Wasm (nicer API, available on npm): [ngxson/wllama](https://github.com/ngxson/wllama) +- Ruby: [yoshoku/llama_cpp.rb](https://github.com/yoshoku/llama_cpp.rb) +- Rust (more features): [edgenai/llama_cpp-rs](https://github.com/edgenai/llama_cpp-rs) +- Rust (nicer API): [mdrokz/rust-llama.cpp](https://github.com/mdrokz/rust-llama.cpp) +- Rust (more direct bindings): [utilityai/llama-cpp-rs](https://github.com/utilityai/llama-cpp-rs) +- Rust (automated build from crates.io): [ShelbyJenkins/llm_client](https://github.com/ShelbyJenkins/llm_client) +- C#/.NET: [SciSharp/LLamaSharp](https://github.com/SciSharp/LLamaSharp) +- C#/VB.NET (more features - community license): [LM-Kit.NET](https://docs.lm-kit.com/lm-kit-net/index.html) +- Scala 3: [donderom/llm4s](https://github.com/donderom/llm4s) +- Clojure: [phronmophobic/llama.clj](https://github.com/phronmophobic/llama.clj) +- React Native: [mybigday/llama.rn](https://github.com/mybigday/llama.rn) +- Java: [kherud/java-llama.cpp](https://github.com/kherud/java-llama.cpp) +- Zig: [deins/llama.cpp.zig](https://github.com/Deins/llama.cpp.zig) +- Flutter/Dart: [netdur/llama_cpp_dart](https://github.com/netdur/llama_cpp_dart) +- Flutter: [xuegao-tzx/Fllama](https://github.com/xuegao-tzx/Fllama) +- PHP (API bindings and features built on top of llama.cpp): [distantmagic/resonance](https://github.com/distantmagic/resonance) [(more info)](https://github.com/ggerganov/llama.cpp/pull/6326) +- Guile Scheme: [guile_llama_cpp](https://savannah.nongnu.org/projects/guile-llama-cpp) +- Swift [srgtuszy/llama-cpp-swift](https://github.com/srgtuszy/llama-cpp-swift) +- Swift [ShenghaiWang/SwiftLlama](https://github.com/ShenghaiWang/SwiftLlama) + +
+ +
+UIs + +*(to have a project listed here, it should clearly state that it depends on `llama.cpp`)* + +- [AI Sublime Text plugin](https://github.com/yaroslavyaroslav/OpenAI-sublime-text) (MIT) +- [cztomsik/ava](https://github.com/cztomsik/ava) (MIT) +- [Dot](https://github.com/alexpinel/Dot) (GPL) +- [eva](https://github.com/ylsdamxssjxxdd/eva) (MIT) +- [iohub/collama](https://github.com/iohub/coLLaMA) (Apache-2.0) +- [janhq/jan](https://github.com/janhq/jan) (AGPL) +- [KanTV](https://github.com/zhouwg/kantv?tab=readme-ov-file) (Apache-2.0) +- [KodiBot](https://github.com/firatkiral/kodibot) (GPL) +- [llama.vim](https://github.com/ggml-org/llama.vim) (MIT) +- [LARS](https://github.com/abgulati/LARS) (AGPL) +- [Llama Assistant](https://github.com/vietanhdev/llama-assistant) (GPL) +- [LLMFarm](https://github.com/guinmoon/LLMFarm?tab=readme-ov-file) (MIT) +- [LLMUnity](https://github.com/undreamai/LLMUnity) (MIT) +- [LMStudio](https://lmstudio.ai/) (proprietary) +- [LocalAI](https://github.com/mudler/LocalAI) (MIT) +- [LostRuins/koboldcpp](https://github.com/LostRuins/koboldcpp) (AGPL) +- [MindMac](https://mindmac.app) (proprietary) +- [MindWorkAI/AI-Studio](https://github.com/MindWorkAI/AI-Studio) (FSL-1.1-MIT) +- [Mobile-Artificial-Intelligence/maid](https://github.com/Mobile-Artificial-Intelligence/maid) (MIT) +- [Mozilla-Ocho/llamafile](https://github.com/Mozilla-Ocho/llamafile) (Apache-2.0) +- [nat/openplayground](https://github.com/nat/openplayground) (MIT) +- [nomic-ai/gpt4all](https://github.com/nomic-ai/gpt4all) (MIT) +- [ollama/ollama](https://github.com/ollama/ollama) (MIT) +- [oobabooga/text-generation-webui](https://github.com/oobabooga/text-generation-webui) (AGPL) +- [PocketPal AI](https://github.com/a-ghorbani/pocketpal-ai) (MIT) +- [psugihara/FreeChat](https://github.com/psugihara/FreeChat) (MIT) +- [ptsochantaris/emeltal](https://github.com/ptsochantaris/emeltal) (MIT) +- [pythops/tenere](https://github.com/pythops/tenere) (AGPL) +- [ramalama](https://github.com/containers/ramalama) (MIT) +- [semperai/amica](https://github.com/semperai/amica) (MIT) +- [withcatai/catai](https://github.com/withcatai/catai) (MIT) +- [Autopen](https://github.com/blackhole89/autopen) (GPL) + +
+ +
+Tools + +- [akx/ggify](https://github.com/akx/ggify) – download PyTorch models from HuggingFace Hub and convert them to GGML +- [akx/ollama-dl](https://github.com/akx/ollama-dl) – download models from the Ollama library to be used directly with llama.cpp +- [crashr/gppm](https://github.com/crashr/gppm) – launch llama.cpp instances utilizing NVIDIA Tesla P40 or P100 GPUs with reduced idle power consumption +- [gpustack/gguf-parser](https://github.com/gpustack/gguf-parser-go/tree/main/cmd/gguf-parser) - review/check the GGUF file and estimate the memory usage +- [Styled Lines](https://marketplace.unity.com/packages/tools/generative-ai/styled-lines-llama-cpp-model-292902) (proprietary licensed, async wrapper of inference part for game development in Unity3d with pre-built Mobile and Web platform wrappers and a model example) + +
+ +
+Infrastructure + +- [Paddler](https://github.com/distantmagic/paddler) - Stateful load balancer custom-tailored for llama.cpp +- [GPUStack](https://github.com/gpustack/gpustack) - Manage GPU clusters for running LLMs +- [llama_cpp_canister](https://github.com/onicai/llama_cpp_canister) - llama.cpp as a smart contract on the Internet Computer, using WebAssembly +- [llama-swap](https://github.com/mostlygeek/llama-swap) - transparent proxy that adds automatic model switching with llama-server +- [Kalavai](https://github.com/kalavai-net/kalavai-client) - Crowdsource end to end LLM deployment at any scale + +
+ +
+Games + +- [Lucy's Labyrinth](https://github.com/MorganRO8/Lucys_Labyrinth) - A simple maze game where agents controlled by an AI model will try to trick you. + +
+ +## Supported backends + +| Backend | Target devices | +| --- | --- | +| [Metal](docs/build.md#metal-build) | Apple Silicon | +| [BLAS](docs/build.md#blas-build) | All | +| [BLIS](docs/backend/BLIS.md) | All | +| [SYCL](docs/backend/SYCL.md) | Intel and Nvidia GPU | +| [MUSA](docs/build.md#musa) | Moore Threads MTT GPU | +| [CUDA](docs/build.md#cuda) | Nvidia GPU | +| [HIP](docs/build.md#hip) | AMD GPU | +| [Vulkan](docs/build.md#vulkan) | GPU | +| [CANN](docs/build.md#cann) | Ascend NPU | +| [OpenCL](docs/backend/OPENCL.md) | Adreno GPU | + +## Building the project + +The main product of this project is the `llama` library. Its C-style interface can be found in [include/llama.h](include/llama.h). +The project also includes many example programs and tools using the `llama` library. The examples range from simple, minimal code snippets to sophisticated sub-projects such as an OpenAI-compatible HTTP server. Possible methods for obtaining the binaries: + +- Clone this repository and build locally, see [how to build](docs/build.md) +- On MacOS or Linux, install `llama.cpp` via [brew, flox or nix](docs/install.md) +- Use a Docker image, see [documentation for Docker](docs/docker.md) +- Download pre-built binaries from [releases](https://github.com/ggerganov/llama.cpp/releases) + +## Obtaining and quantizing models + +The [Hugging Face](https://huggingface.co) platform hosts a [number of LLMs](https://huggingface.co/models?library=gguf&sort=trending) compatible with `llama.cpp`: + +- [Trending](https://huggingface.co/models?library=gguf&sort=trending) +- [LLaMA](https://huggingface.co/models?sort=trending&search=llama+gguf) + +You can either manually download the GGUF file or directly use any `llama.cpp`-compatible models from Hugging Face by using this CLI argument: `-hf /[:quant]` + +After downloading a model, use the CLI tools to run it locally - see below. + +`llama.cpp` requires the model to be stored in the [GGUF](https://github.com/ggerganov/ggml/blob/master/docs/gguf.md) file format. Models in other data formats can be converted to GGUF using the `convert_*.py` Python scripts in this repo. + +The Hugging Face platform provides a variety of online tools for converting, quantizing and hosting models with `llama.cpp`: + +- Use the [GGUF-my-repo space](https://huggingface.co/spaces/ggml-org/gguf-my-repo) to convert to GGUF format and quantize model weights to smaller sizes +- Use the [GGUF-my-LoRA space](https://huggingface.co/spaces/ggml-org/gguf-my-lora) to convert LoRA adapters to GGUF format (more info: https://github.com/ggerganov/llama.cpp/discussions/10123) +- Use the [GGUF-editor space](https://huggingface.co/spaces/CISCai/gguf-editor) to edit GGUF meta data in the browser (more info: https://github.com/ggerganov/llama.cpp/discussions/9268) +- Use the [Inference Endpoints](https://ui.endpoints.huggingface.co/) to directly host `llama.cpp` in the cloud (more info: https://github.com/ggerganov/llama.cpp/discussions/9669) + +To learn more about model quantization, [read this documentation](examples/quantize/README.md) + +## [`llama-cli`](examples/main) + +#### A CLI tool for accessing and experimenting with most of `llama.cpp`'s functionality. + +-
+ Run in conversation mode + + Models with a built-in chat template will automatically activate conversation mode. If this doesn't occur, you can manually enable it by adding `-cnv` and specifying a suitable chat template with `--chat-template NAME` + + ```bash + llama-cli -m model.gguf + + # > hi, who are you? + # Hi there! I'm your helpful assistant! I'm an AI-powered chatbot designed to assist and provide information to users like you. I'm here to help answer your questions, provide guidance, and offer support on a wide range of topics. I'm a friendly and knowledgeable AI, and I'm always happy to help with anything you need. What's on your mind, and how can I assist you today? + # + # > what is 1+1? + # Easy peasy! The answer to 1+1 is... 2! + ``` + +
+ +-
+ Run in conversation mode with custom chat template + + ```bash + # use the "chatml" template (use -h to see the list of supported templates) + llama-cli -m model.gguf -cnv --chat-template chatml + + # use a custom template + llama-cli -m model.gguf -cnv --in-prefix 'User: ' --reverse-prompt 'User:' + ``` + +
+ +-
+ Run simple text completion + + To disable conversation mode explicitly, use `-no-cnv` + + ```bash + llama-cli -m model.gguf -p "I believe the meaning of life is" -n 128 -no-cnv + + # I believe the meaning of life is to find your own truth and to live in accordance with it. For me, this means being true to myself and following my passions, even if they don't align with societal expectations. I think that's what I love about yoga – it's not just a physical practice, but a spiritual one too. It's about connecting with yourself, listening to your inner voice, and honoring your own unique journey. + ``` + +
+ +-
+ Constrain the output with a custom grammar + + ```bash + llama-cli -m model.gguf -n 256 --grammar-file grammars/json.gbnf -p 'Request: schedule a call at 8pm; Command:' + + # {"appointmentTime": "8pm", "appointmentDetails": "schedule a a call"} + ``` + + The [grammars/](grammars/) folder contains a handful of sample grammars. To write your own, check out the [GBNF Guide](grammars/README.md). + + For authoring more complex JSON grammars, check out https://grammar.intrinsiclabs.ai/ + +
+ + +## [`llama-server`](examples/server) + +#### A lightweight, [OpenAI API](https://github.com/openai/openai-openapi) compatible, HTTP server for serving LLMs. + +-
+ Start a local HTTP server with default configuration on port 8080 + + ```bash + llama-server -m model.gguf --port 8080 + + # Basic web UI can be accessed via browser: http://localhost:8080 + # Chat completion endpoint: http://localhost:8080/v1/chat/completions + ``` + +
+ +-
+ Support multiple-users and parallel decoding + + ```bash + # up to 4 concurrent requests, each with 4096 max context + llama-server -m model.gguf -c 16384 -np 4 + ``` + +
+ +-
+ Enable speculative decoding + + ```bash + # the draft.gguf model should be a small variant of the target model.gguf + llama-server -m model.gguf -md draft.gguf + ``` + +
+ +-
+ Serve an embedding model + + ```bash + # use the /embedding endpoint + llama-server -m model.gguf --embedding --pooling cls -ub 8192 + ``` + +
+ +-
+ Serve a reranking model + + ```bash + # use the /reranking endpoint + llama-server -m model.gguf --reranking + ``` + +
+ +-
+ Constrain all outputs with a grammar + + ```bash + # custom grammar + llama-server -m model.gguf --grammar-file grammar.gbnf + + # JSON + llama-server -m model.gguf --grammar-file grammars/json.gbnf + ``` + +
+ + +## [`llama-perplexity`](examples/perplexity) + +#### A tool for measuring the perplexity [^1][^2] (and other quality metrics) of a model over a given text. + +-
+ Measure the perplexity over a text file + + ```bash + llama-perplexity -m model.gguf -f file.txt + + # [1]15.2701,[2]5.4007,[3]5.3073,[4]6.2965,[5]5.8940,[6]5.6096,[7]5.7942,[8]4.9297, ... + # Final estimate: PPL = 5.4007 +/- 0.67339 + ``` + +
+ +-
+ Measure KL divergence + + ```bash + # TODO + ``` + +
+ +[^1]: [examples/perplexity/README.md](./examples/perplexity/README.md) +[^2]: [https://huggingface.co/docs/transformers/perplexity](https://huggingface.co/docs/transformers/perplexity) + +## [`llama-bench`](examples/llama-bench) + +#### Benchmark the performance of the inference for various parameters. + +-
+ Run default benchmark + + ```bash + llama-bench -m model.gguf + + # Output: + # | model | size | params | backend | threads | test | t/s | + # | ------------------- | ---------: | ---------: | ---------- | ------: | ------------: | -------------------: | + # | qwen2 1.5B Q4_0 | 885.97 MiB | 1.54 B | Metal,BLAS | 16 | pp512 | 5765.41 ± 20.55 | + # | qwen2 1.5B Q4_0 | 885.97 MiB | 1.54 B | Metal,BLAS | 16 | tg128 | 197.71 ± 0.81 | + # + # build: 3e0ba0e60 (4229) + ``` + +
+ +## [`llama-run`](examples/run) + +#### A comprehensive example for running `llama.cpp` models. Useful for inferencing. Used with RamaLama [^3]. + +-
+ Run a model with a specific prompt (by default it's pulled from Ollama registry) + + ```bash + llama-run granite-code + ``` + +
+ +[^3]: [RamaLama](https://github.com/containers/ramalama) + +## [`llama-simple`](examples/simple) + +#### A minimal example for implementing apps with `llama.cpp`. Useful for developers. + +-
+ Basic text completion + + ```bash + llama-simple -m model.gguf + + # Hello my name is Kaitlyn and I am a 16 year old girl. I am a junior in high school and I am currently taking a class called "The Art of + ``` + +
+ + +## Contributing + +- Contributors can open PRs +- Collaborators can push to branches in the `llama.cpp` repo and merge PRs into the `master` branch +- Collaborators will be invited based on contributions +- Any help with managing issues, PRs and projects is very appreciated! +- See [good first issues](https://github.com/ggerganov/llama.cpp/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22) for tasks suitable for first contributions +- Read the [CONTRIBUTING.md](CONTRIBUTING.md) for more information +- Make sure to read this: [Inference at the edge](https://github.com/ggerganov/llama.cpp/discussions/205) +- A bit of backstory for those who are interested: [Changelog podcast](https://changelog.com/podcast/532) + +## Other documentation + +- [main (cli)](examples/main/README.md) +- [server](examples/server/README.md) +- [GBNF grammars](grammars/README.md) + +#### Development documentation + +- [How to build](docs/build.md) +- [Running on Docker](docs/docker.md) +- [Build on Android](docs/android.md) +- [Performance troubleshooting](docs/development/token_generation_performance_tips.md) +- [GGML tips & tricks](https://github.com/ggerganov/llama.cpp/wiki/GGML-Tips-&-Tricks) + +#### Seminal papers and background on the models + +If your issue is with model generation quality, then please at least scan the following links and papers to understand the limitations of LLaMA models. This is especially important when choosing an appropriate model size and appreciating both the significant and subtle differences between LLaMA models and ChatGPT: +- LLaMA: + - [Introducing LLaMA: A foundational, 65-billion-parameter large language model](https://ai.facebook.com/blog/large-language-model-llama-meta-ai/) + - [LLaMA: Open and Efficient Foundation Language Models](https://arxiv.org/abs/2302.13971) +- GPT-3 + - [Language Models are Few-Shot Learners](https://arxiv.org/abs/2005.14165) +- GPT-3.5 / InstructGPT / ChatGPT: + - [Aligning language models to follow instructions](https://openai.com/research/instruction-following) + - [Training language models to follow instructions with human feedback](https://arxiv.org/abs/2203.02155) + +#### References + diff --git a/llama.cpp/SECURITY.md b/llama.cpp/SECURITY.md new file mode 100644 index 0000000000000000000000000000000000000000..f4322c6ee4d18c3fb9f694fcc01f2be8640adb50 --- /dev/null +++ b/llama.cpp/SECURITY.md @@ -0,0 +1,67 @@ +# Security Policy + + - [**Using llama.cpp securely**](#using-llamacpp-securely) + - [Untrusted models](#untrusted-models) + - [Untrusted inputs](#untrusted-inputs) + - [Data privacy](#data-privacy) + - [Untrusted environments or networks](#untrusted-environments-or-networks) + - [Multi-Tenant environments](#multi-tenant-environments) + - [**Reporting a vulnerability**](#reporting-a-vulnerability) + +## Using llama.cpp securely + +### Untrusted models +Be careful when running untrusted models. This classification includes models created by unknown developers or utilizing data obtained from unknown sources. + +*Always execute untrusted models within a secure, isolated environment such as a sandbox* (e.g., containers, virtual machines). This helps protect your system from potentially malicious code. + +> [!NOTE] +> The trustworthiness of a model is not binary. You must always determine the proper level of caution depending on the specific model and how it matches your use case and risk tolerance. + +### Untrusted inputs + +Some models accept various input formats (text, images, audio, etc.). The libraries converting these inputs have varying security levels, so it's crucial to isolate the model and carefully pre-process inputs to mitigate script injection risks. + +For maximum security when handling untrusted inputs, you may need to employ the following: + +* Sandboxing: Isolate the environment where the inference happens. +* Pre-analysis: Check how the model performs by default when exposed to prompt injection (e.g. using [fuzzing for prompt injection](https://github.com/FonduAI/awesome-prompt-injection?tab=readme-ov-file#tools)). This will give you leads on how hard you will have to work on the next topics. +* Updates: Keep both LLaMA C++ and your libraries updated with the latest security patches. +* Input Sanitation: Before feeding data to the model, sanitize inputs rigorously. This involves techniques such as: + * Validation: Enforce strict rules on allowed characters and data types. + * Filtering: Remove potentially malicious scripts or code fragments. + * Encoding: Convert special characters into safe representations. + * Verification: Run tooling that identifies potential script injections (e.g. [models that detect prompt injection attempts](https://python.langchain.com/docs/guides/safety/hugging_face_prompt_injection)). + +### Data privacy + +To protect sensitive data from potential leaks or unauthorized access, it is crucial to sandbox the model execution. This means running the model in a secure, isolated environment, which helps mitigate many attack vectors. + +### Untrusted environments or networks + +If you can't run your models in a secure and isolated environment or if it must be exposed to an untrusted network, make sure to take the following security precautions: +* Confirm the hash of any downloaded artifact (e.g. pre-trained model weights) matches a known-good value +* Encrypt your data if sending it over the network. + +### Multi-Tenant environments + +If you intend to run multiple models in parallel with shared memory, it is your responsibility to ensure the models do not interact or access each other's data. The primary areas of concern are tenant isolation, resource allocation, model sharing and hardware attacks. + +1. Tenant Isolation: Models should run separately with strong isolation methods to prevent unwanted data access. Separating networks is crucial for isolation, as it prevents unauthorized access to data or models and malicious users from sending graphs to execute under another tenant's identity. + +2. Resource Allocation: A denial of service caused by one model can impact the overall system health. Implement safeguards like rate limits, access controls, and health monitoring. + +3. Model Sharing: In a multitenant model sharing design, tenants and users must understand the security risks of running code provided by others. Since there are no reliable methods to detect malicious models, sandboxing the model execution is the recommended approach to mitigate the risk. + +4. Hardware Attacks: GPUs or TPUs can also be attacked. [Researches](https://scholar.google.com/scholar?q=gpu+side+channel) has shown that side channel attacks on GPUs are possible, which can make data leak from other models or processes running on the same system at the same time. + +## Reporting a vulnerability + +Beware that none of the topics under [Using llama.cpp securely](#using-llamacpp-securely) are considered vulnerabilities of LLaMA C++. + + +However, If you have discovered a security vulnerability in this project, please report it privately. **Do not disclose it as a public issue.** This gives us time to work with you to fix the issue before public exposure, reducing the chance that the exploit will be used before a patch is released. + +Please disclose it as a private [security advisory](https://github.com/ggerganov/llama.cpp/security/advisories/new). + +A team of volunteers on a reasonable-effort basis maintains this project. As such, please give us at least 90 days to work on a fix before public exposure. diff --git a/llama.cpp/Sources/llama/llama.h b/llama.cpp/Sources/llama/llama.h new file mode 100644 index 0000000000000000000000000000000000000000..41725880ed8c06244c3ee7e050a9d89368a4b2a3 --- /dev/null +++ b/llama.cpp/Sources/llama/llama.h @@ -0,0 +1,4 @@ +#pragma once + +#include + diff --git a/llama.cpp/Sources/llama/module.modulemap b/llama.cpp/Sources/llama/module.modulemap new file mode 100644 index 0000000000000000000000000000000000000000..d010555b1cb658c433cf5709d4165f592c5aa202 --- /dev/null +++ b/llama.cpp/Sources/llama/module.modulemap @@ -0,0 +1,5 @@ +module llama [system] { + header "llama.h" + link "llama" + export * +} diff --git a/llama.cpp/build/bin/libggml-base.so b/llama.cpp/build/bin/libggml-base.so old mode 100644 new mode 100755 index 60b4a2eb3d1e3f043ae75131410fd1742499b836..0c80144756535837121dd458001f8d2d85b58008 --- a/llama.cpp/build/bin/libggml-base.so +++ b/llama.cpp/build/bin/libggml-base.so @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:031cf356a19145a270349ea25a0d085a59601e635a34ba8c131deb3bbdf00edd -size 653680 +oid sha256:d299db995126e56e8dfeebe02678652ffa3b4e3c34e6ff44c44ec7f6eb4f5e7a +size 636368 diff --git a/llama.cpp/build/bin/libggml-cpu.so b/llama.cpp/build/bin/libggml-cpu.so old mode 100644 new mode 100755 index 3f74af45bce906927c258e0f9223dde66a6db34b..7303313dccf8edd5d93c8973bfccb19a545e58c9 --- a/llama.cpp/build/bin/libggml-cpu.so +++ b/llama.cpp/build/bin/libggml-cpu.so @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1fc72f4d2a6c76a4965037a8f4b602889db1f0c6f80b8c5f92367f71ad436f9b -size 510296 +oid sha256:a21f1c367fef2cfcd751de0b7f8d6c15be948a92e3b5297d39e92f8200db23b9 +size 575624 diff --git a/llama.cpp/build/bin/libggml-rpc.so b/llama.cpp/build/bin/libggml-rpc.so deleted file mode 100644 index b6434aa8dae4b92722f783861696de520b528386..0000000000000000000000000000000000000000 Binary files a/llama.cpp/build/bin/libggml-rpc.so and /dev/null differ diff --git a/llama.cpp/build/bin/libggml.so b/llama.cpp/build/bin/libggml.so old mode 100644 new mode 100755 index 42a9d7c2c2142ec9ddefa262adb19669ac4a0a74..06f4dcee8a3c596e6e9dcffb253bac593d3050f0 Binary files a/llama.cpp/build/bin/libggml.so and b/llama.cpp/build/bin/libggml.so differ diff --git a/llama.cpp/build/bin/libllama.so b/llama.cpp/build/bin/libllama.so old mode 100644 new mode 100755 index 7916e16168dc432fd17a2b8447efcaae00d4656d..868a868457f020b11b087325cae0a5ec1690d2db --- a/llama.cpp/build/bin/libllama.so +++ b/llama.cpp/build/bin/libllama.so @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:361bfcc7ac18a241a85c553a521f811fd6ca0a2cf76aae68b9b32564347d7925 -size 1803888 +oid sha256:5f14ec0dc11d1c5d94321ad04e8455896ad0d59020b08c6240eff14fc1d35a3f +size 1777560 diff --git a/llama.cpp/build/bin/libllava_shared.so b/llama.cpp/build/bin/libllava_shared.so old mode 100644 new mode 100755 index 89f9de9609faeed1449316beb4acaccc52a4592f..931a304808c6e9922786f84d86350fc16dcfb7f7 --- a/llama.cpp/build/bin/libllava_shared.so +++ b/llama.cpp/build/bin/libllava_shared.so @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0b88d638059a3ac7d8753ae485c0d4162d5db95e641bd0d8b6c373127e996148 -size 523160 +oid sha256:a921c4dcca76c7ecf428deda548f5844943e0ff0e15809a75eef44ab4dd94d95 +size 498736 diff --git a/llama.cpp/build/bin/llama-batched b/llama.cpp/build/bin/llama-batched old mode 100644 new mode 100755 index b3945e46c079341fb8f979a40ac8c0eb864c3b85..baa93a5203ede67fab1adc25f03e1db516ae965c --- a/llama.cpp/build/bin/llama-batched +++ b/llama.cpp/build/bin/llama-batched @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f0653d74bd8b9fad846831f5e4c74a7465ad7d1ac181b062b9561b2b4881eae3 -size 1992952 +oid sha256:ee2ae9b0240a9001c646a177a1821c9b8af24a4b2cff5b4f0dd20537759a14c8 +size 1809144 diff --git a/llama.cpp/build/bin/llama-batched-bench b/llama.cpp/build/bin/llama-batched-bench old mode 100644 new mode 100755 index 1e803ed337b47437a3d297f155111e888b0d06da..8b13926fbdfc005eaca3ebe65ff59e1bc96d5f11 --- a/llama.cpp/build/bin/llama-batched-bench +++ b/llama.cpp/build/bin/llama-batched-bench @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:882716af6d0d263561c335dda360dd1e228213c6d6c3487815324037b69ca651 -size 1993016 +oid sha256:62c348e641ae4324d690c689e9b911233ddc82b122cbeef02d47c531dd5b1ba3 +size 1809184 diff --git a/llama.cpp/build/bin/llama-bench b/llama.cpp/build/bin/llama-bench old mode 100644 new mode 100755 index 7ca29e4f80f45af02ad66a888e99a7368f9686ac..36e49b7bc545eb9dcd0754cad01b3cd0128e2196 --- a/llama.cpp/build/bin/llama-bench +++ b/llama.cpp/build/bin/llama-bench @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:242879f186000bc29dc8dbd1952c8f7cfbdb19a5e558232560b9da2214ca94d8 -size 1788048 +oid sha256:671b9ec2803e0d5f3d3620628bd6bf21c8ce478614ecf12e523daa3322f44c0c +size 1602304 diff --git a/llama.cpp/build/bin/llama-cli b/llama.cpp/build/bin/llama-cli old mode 100644 new mode 100755 index 2063fcd4855bc2216421a7f78d73131d4e26e41f..7d983b38cc2514d1cfeb29e9e1b8806e756ad135 --- a/llama.cpp/build/bin/llama-cli +++ b/llama.cpp/build/bin/llama-cli @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9f25ff967fb28553ffc42fb3feec4a7894adc3b3b5dc950113f7024da8849b4e -size 2025048 +oid sha256:916315b39d993dc4befd734eb10fffe5fc5e7a81973eab6d9d20e0d8344a037f +size 1841408 diff --git a/llama.cpp/build/bin/llama-convert-llama2c-to-ggml b/llama.cpp/build/bin/llama-convert-llama2c-to-ggml old mode 100644 new mode 100755 index 265b930b753538fd1337a028ff0c3793dbf797e9..865eb4a1427bcbe712072cbae5b0639002114d3f --- a/llama.cpp/build/bin/llama-convert-llama2c-to-ggml +++ b/llama.cpp/build/bin/llama-convert-llama2c-to-ggml @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:57987520e873bdaa365f7bb7cb934139a51d049521e78bcaaf9f9030194cfedd -size 1660640 +oid sha256:ec6a3137307a7d002084d02207edc2aa0343d0123900eba12d2ff4a9d7c3a37b +size 1486632 diff --git a/llama.cpp/build/bin/llama-cvector-generator b/llama.cpp/build/bin/llama-cvector-generator old mode 100644 new mode 100755 index 4d09e712e30cf0d7c775d1bd52370c67a676dfda..9c1d780fdb55832185e5cf15fe646af77813e5c1 --- a/llama.cpp/build/bin/llama-cvector-generator +++ b/llama.cpp/build/bin/llama-cvector-generator @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3884e6b151f8474f92b794b4686b36fcf8b9fa324490e557833fb90313a69bcd -size 2026312 +oid sha256:c4ba39b4f139e7844d14437b8200605d18d25bc8a30315527dd42707bdd4a95d +size 1838936 diff --git a/llama.cpp/build/bin/llama-embedding b/llama.cpp/build/bin/llama-embedding old mode 100644 new mode 100755 index 3d015ad0ea7770a5fa0f17177b1d40d769da284a..d2e0cda5404eec21337b2db69c904de8abaf29a9 --- a/llama.cpp/build/bin/llama-embedding +++ b/llama.cpp/build/bin/llama-embedding @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6e5a6811018cee9c27caead3e04d0cdb1e98b3256a1e5e5bf5c7e618df3fb4ef -size 1997400 +oid sha256:b897b4c92ac082c1bb8ec472dd1d0614a1ffdb5ac9cee4f65356a40f8083ba00 +size 1813592 diff --git a/llama.cpp/build/bin/llama-eval-callback b/llama.cpp/build/bin/llama-eval-callback old mode 100644 new mode 100755 index 95e97e7d6d95ae06d4a1455dabd300ae762038f1..6904e07e132faee1d9961a05e49d2ff682f0c31c --- a/llama.cpp/build/bin/llama-eval-callback +++ b/llama.cpp/build/bin/llama-eval-callback @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:58212d2a7261cb709313f304a8e4d56de64cec11e5afce7b740f2d76a199c13f -size 1993320 +oid sha256:06b4992fcded7ff58b7d60e190e03b4a8c512ce9f995c5f60803072c35c69e27 +size 1813616 diff --git a/llama.cpp/build/bin/llama-export-lora b/llama.cpp/build/bin/llama-export-lora old mode 100644 new mode 100755 index 707a879966cbbce07934ab1aa46b83c0e8834893..56f12bfb78d6c8bb0b9a6dc2066cb46f312101ca --- a/llama.cpp/build/bin/llama-export-lora +++ b/llama.cpp/build/bin/llama-export-lora @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2a238163dd7f8cb8d0c3df4929729dc5fd9e2a548632290f152be0518fb60e6f -size 2030936 +oid sha256:89607fa63002ca69281e7a0d481f5965436509fdd59f81389b2bc1f0db0b5655 +size 1839880 diff --git a/llama.cpp/build/bin/llama-gbnf-validator b/llama.cpp/build/bin/llama-gbnf-validator old mode 100644 new mode 100755 index 4ff0168844dc51f3e1793fbeb42cbb884ba51a58..3dc7a5e6645952c0f2b2ff959730d4bb38e30101 Binary files a/llama.cpp/build/bin/llama-gbnf-validator and b/llama.cpp/build/bin/llama-gbnf-validator differ diff --git a/llama.cpp/build/bin/llama-gen-docs b/llama.cpp/build/bin/llama-gen-docs old mode 100644 new mode 100755 index ef85f713cd4884504d44a10c2709cd4135c776a1..7412f0bd626eb6879fbeba9d3ea8875468f1eea9 --- a/llama.cpp/build/bin/llama-gen-docs +++ b/llama.cpp/build/bin/llama-gen-docs @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:88d7d7b626a094c5a4edaf3ebb76e80f623526668e78d9be0e394285d0b22789 -size 1993056 +oid sha256:53d12b4bd8d9fbace4bd757dcdf36ad3135fc39be9e6b9631cdc8bf309454659 +size 1805408 diff --git a/llama.cpp/build/bin/llama-gguf b/llama.cpp/build/bin/llama-gguf old mode 100644 new mode 100755 index 995ff1b2a815866e9358a4bafafdeda0cc34dd00..f22cb634b3bda9975fc3cf76fb5a517869cb53a5 Binary files a/llama.cpp/build/bin/llama-gguf and b/llama.cpp/build/bin/llama-gguf differ diff --git a/llama.cpp/build/bin/llama-gguf-hash b/llama.cpp/build/bin/llama-gguf-hash old mode 100644 new mode 100755 index 74ac06720d1ad8476cb046222aa1a0879f5182e1..7b56bdee7db6d26f42f6c4bd3d5d28f980c2f992 --- a/llama.cpp/build/bin/llama-gguf-hash +++ b/llama.cpp/build/bin/llama-gguf-hash @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c53a26ec24312c2197b6af0acacbd0dec9d8a352d877ae6bd2c2e7c46e5de843 -size 103448 +oid sha256:d180eec1738c65a1f65d2ecf07e9a876eec8687d242174b72c4ddec3b2a167a4 +size 95512 diff --git a/llama.cpp/build/bin/llama-gguf-split b/llama.cpp/build/bin/llama-gguf-split old mode 100644 new mode 100755 index 412b3b7a953ff2e23388c5975b006e0b29b5afe8..a484264d4cb7c453df0b694a91f17d39e5e2172a Binary files a/llama.cpp/build/bin/llama-gguf-split and b/llama.cpp/build/bin/llama-gguf-split differ diff --git a/llama.cpp/build/bin/llama-gritlm b/llama.cpp/build/bin/llama-gritlm old mode 100644 new mode 100755 index fcec44bc657c467b6ebff67bd786772db11f38c0..9e70973f8080a90f9b85dd8ee78dfafcb6d9bb60 --- a/llama.cpp/build/bin/llama-gritlm +++ b/llama.cpp/build/bin/llama-gritlm @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:732373ada5c46f47e01bad08323685b929884d72977817a3c9394a3e92aeb876 -size 1997840 +oid sha256:c14dab16990fa59e9362e8a302b4653a211b06c90fc52ee577001b24b474fc2b +size 1818168 diff --git a/llama.cpp/build/bin/llama-imatrix b/llama.cpp/build/bin/llama-imatrix old mode 100644 new mode 100755 index ac91ebd070222e1c36653d6ec4024934517a37fc..c82f2d5a7356a3dc8458b41bfd3a8d2a270ba6d0 --- a/llama.cpp/build/bin/llama-imatrix +++ b/llama.cpp/build/bin/llama-imatrix @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:36430fb60208097b37b3b3f75d3cb20bc03651255d3a30bfab1fcf405fe403c5 -size 2036120 +oid sha256:6762095dcc081165b0e3ea82b2af59defaf2e6b3a2fe96118eba2057f68a9d01 +size 1852600 diff --git a/llama.cpp/build/bin/llama-infill b/llama.cpp/build/bin/llama-infill old mode 100644 new mode 100755 index 88e3b60d8faa4298e5899edd6a1b0ca4bed40ccf..33bb01ea34d93aad5e4cc8f565087a965f4a1060 --- a/llama.cpp/build/bin/llama-infill +++ b/llama.cpp/build/bin/llama-infill @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:be54da473bd3ab8c5e8230052dfacb844dd8fda3b2b875c5910660008d0325c2 -size 2015728 +oid sha256:764e044ecd6c07e76fb4159e24506d12ae73a53f22f1dab403021c506439c0aa +size 1828024 diff --git a/llama.cpp/build/bin/llama-llava-cli b/llama.cpp/build/bin/llama-llava-cli old mode 100644 new mode 100755 index 26a86427596f8d7a6eece274f9c0cd4d3101c4bb..89dbc53214c1be5e888e89e1a59864ba7da1def8 --- a/llama.cpp/build/bin/llama-llava-cli +++ b/llama.cpp/build/bin/llama-llava-cli @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:331f1631863a63f1eca09b8182fafebfa644a841d6b1d6747aed679c161d78d0 -size 2301888 +oid sha256:ff55727538092d64ac2120f5b32802e30a774092c13411f0dd3dfc1f8d1959ea +size 2111248 diff --git a/llama.cpp/build/bin/llama-llava-clip-quantize-cli b/llama.cpp/build/bin/llama-llava-clip-quantize-cli old mode 100644 new mode 100755 index e6204c063f24fd6eaafeade6609ffaeb64696833..389418ed5fc14884ce31bf65445dd3b5a8fee063 --- a/llama.cpp/build/bin/llama-llava-clip-quantize-cli +++ b/llama.cpp/build/bin/llama-llava-clip-quantize-cli @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:326df701323805b7f43a67f4bede528eb278ad74c763808b277b517fa47b75fd -size 500544 +oid sha256:e88fde073b4f7b9218a64f68dec7fc9503db457d53f85010acaa5838bb64d708 +size 482104 diff --git a/llama.cpp/build/bin/llama-lookahead b/llama.cpp/build/bin/llama-lookahead old mode 100644 new mode 100755 index bb1ac2eb8256bc8c7d674f59b3d0bea8c19004b6..6bdbff64ac5f0a4da684a1807115d14802a1baa8 --- a/llama.cpp/build/bin/llama-lookahead +++ b/llama.cpp/build/bin/llama-lookahead @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8abcb0139c15e4df4924511b54f27da19a17e69d931ac0d36b066c595f554d78 -size 1997816 +oid sha256:37c9624f30063f5c184a154cdade287a5b517611a27241e3ea5319c900dea271 +size 1818144 diff --git a/llama.cpp/build/bin/llama-lookup b/llama.cpp/build/bin/llama-lookup old mode 100644 new mode 100755 index d5e01231fd7591055aa318ab2ed3836d256146f6..6fc8e3f0fca9732f8c385b4a1246a37969239591 --- a/llama.cpp/build/bin/llama-lookup +++ b/llama.cpp/build/bin/llama-lookup @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c93952da9179c1ff3da8c13ca773335fbb5b2dad2d96895322b39c98587daa01 -size 2026912 +oid sha256:43b1b9d731ee72b0953073b8c2a9254e913e54e8ef1f051e1f6209d60e64a525 +size 1839960 diff --git a/llama.cpp/build/bin/llama-lookup-create b/llama.cpp/build/bin/llama-lookup-create old mode 100644 new mode 100755 index 61bb7ed9ff9f043a14bb948e265e95bdf47cdb71..91750707c9a6f6044c84959a0715dcd99ed1f2cf --- a/llama.cpp/build/bin/llama-lookup-create +++ b/llama.cpp/build/bin/llama-lookup-create @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:307f90f21698eed8bf06e616552f3b76790cf6bccbeb665f2835e5205d005fd9 -size 2013888 +oid sha256:269efb9a3787ea36642de390d84fe1e5a431f89020fda06617592d80bb5d2d49 +size 1835064 diff --git a/llama.cpp/build/bin/llama-lookup-merge b/llama.cpp/build/bin/llama-lookup-merge old mode 100644 new mode 100755 index a6acc593d8caa5aab8359e7626da05a2ad46e743..dcdd1900f38d9de45c4f8d772d1d0409ded8b81c Binary files a/llama.cpp/build/bin/llama-lookup-merge and b/llama.cpp/build/bin/llama-lookup-merge differ diff --git a/llama.cpp/build/bin/llama-lookup-stats b/llama.cpp/build/bin/llama-lookup-stats old mode 100644 new mode 100755 index e89b38165311a3c6206c7d67c5b59ee79fcc6b59..5d18992990fa0a2df21933bba33820a3e3773bbb --- a/llama.cpp/build/bin/llama-lookup-stats +++ b/llama.cpp/build/bin/llama-lookup-stats @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:97adfb3a4b98a9b752cd78829b6ac06ce9310592aa1f1d27c4ca067d12fe7dfd -size 2014720 +oid sha256:d89184a0571a335aaf4edb9fd69f283ca9715e5053ed345ca22fa1ff8911728b +size 1835928 diff --git a/llama.cpp/build/bin/llama-minicpmv-cli b/llama.cpp/build/bin/llama-minicpmv-cli old mode 100644 new mode 100755 index 383b16b3f8518b33156ba7852f0cf18ce00e037a..792bc56978fbab4959c1da28ee116d05fcf722b6 --- a/llama.cpp/build/bin/llama-minicpmv-cli +++ b/llama.cpp/build/bin/llama-minicpmv-cli @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c60280869ffa808f7945357350e56c4d4685bbe745af33a39f5684ca370cfeb0 -size 2293240 +oid sha256:1d02b70b497f7d22c462cce7e01d8e31f70a2da3165d67fc5f351570256b306e +size 2106696 diff --git a/llama.cpp/build/bin/llama-parallel b/llama.cpp/build/bin/llama-parallel old mode 100644 new mode 100755 index 34cfb6d5fb00b280f2d1aa23eaeea5cfff74aa84..11ca49d3855fbc0ac2d7d30a76c73a26d1f3351b --- a/llama.cpp/build/bin/llama-parallel +++ b/llama.cpp/build/bin/llama-parallel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9e56bbd6e72179d6bdfbb871e35394446bcd1ddcfbdda63db2a271e91c22b032 -size 2002048 +oid sha256:892480418ccaca869bbf32b14acbe9cfdb4f25095a3580fe6b2c373591819db0 +size 1818280 diff --git a/llama.cpp/build/bin/llama-passkey b/llama.cpp/build/bin/llama-passkey old mode 100644 new mode 100755 index e0c87e110a967635c53b7824404a50699d2a049c..ce7221876e632f5ba47d6fcd7d0503070b5e5496 --- a/llama.cpp/build/bin/llama-passkey +++ b/llama.cpp/build/bin/llama-passkey @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fc820e518e97ff91d791eb1dff35fc129278e3f4d171e56d9a22c4ce80cf3d89 -size 1993160 +oid sha256:fc31554ad16bcce8e41c6026aaad6fb765e25eec14ea5e511f1cc958d8910201 +size 1813488 diff --git a/llama.cpp/build/bin/llama-perplexity b/llama.cpp/build/bin/llama-perplexity old mode 100644 new mode 100755 index a99a9f99e597900164075ee424345decc3631010..b51b38f33bb1c2c967eb91a43c95fe5fad13788a --- a/llama.cpp/build/bin/llama-perplexity +++ b/llama.cpp/build/bin/llama-perplexity @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:881ecf9cecedcda98bac9cfd63637e701941c35c65a7782d6707c4d51b1124b4 -size 2090840 +oid sha256:8e196ca134bf807b6be1f5d3fdbdfbd3f2d9b61727387de13fc22537f00df095 +size 1899024 diff --git a/llama.cpp/build/bin/llama-q8dot b/llama.cpp/build/bin/llama-q8dot old mode 100644 new mode 100755 index ddc711516f54c58001be6eb23e4f106a3cdf93b9..8ba025a5137ef2efedbacac1c3aec29784413c3d Binary files a/llama.cpp/build/bin/llama-q8dot and b/llama.cpp/build/bin/llama-q8dot differ diff --git a/llama.cpp/build/bin/llama-quantize b/llama.cpp/build/bin/llama-quantize old mode 100644 new mode 100755 index 77b4b5014bca08765cdc160d1bb4b1f0fb725c91..b2f61a87f29883e60ec93386033412842053b59a --- a/llama.cpp/build/bin/llama-quantize +++ b/llama.cpp/build/bin/llama-quantize @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d71f796a2e5615453cd5921d23157b4a6e88dd4a87824541bdecd9ba3da0b652 -size 1653312 +oid sha256:480f2309fc990ac2d22e66edaa951d44f8555f702507c3dcae68e096bf7d6cf8 +size 1483296 diff --git a/llama.cpp/build/bin/llama-quantize-stats b/llama.cpp/build/bin/llama-quantize-stats old mode 100644 new mode 100755 index 8fe6e82ac7cfe830a47bd284ad287c97e56058ba..55be7bb95f1459b0723ea2e36a37d8b9565bfb0b --- a/llama.cpp/build/bin/llama-quantize-stats +++ b/llama.cpp/build/bin/llama-quantize-stats @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bc78cd849d47f77f20db073eb43afd314d106ad10a63ea0594349bd3c30996ea -size 213272 +oid sha256:a8c4f23de5a73c8fec7fac17e44c5000d49944c83e68ec91cc86f88a423c6491 +size 205360 diff --git a/llama.cpp/build/bin/llama-qwen2vl-cli b/llama.cpp/build/bin/llama-qwen2vl-cli old mode 100644 new mode 100755 index 4908b7c797011fd8b555b43dd777ec239453fcd7..b3726effe71fdc425f6f7d14edd93e603d8619b0 --- a/llama.cpp/build/bin/llama-qwen2vl-cli +++ b/llama.cpp/build/bin/llama-qwen2vl-cli @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3dbabcbcdc737896505d11be5139cb34347bb2002ed2cfdc7bf3db3e7674d822 -size 2301968 +oid sha256:a1d7779695fa91ef7646e9858d4abfab3bb52fb9e5f558ae9e426900ea41401b +size 2115424 diff --git a/llama.cpp/build/bin/llama-retrieval b/llama.cpp/build/bin/llama-retrieval old mode 100644 new mode 100755 index e398170490e045b51637ab851a805b426e6412a2..842545e65233d7e818fbdf181f97c20b5d5fc41d --- a/llama.cpp/build/bin/llama-retrieval +++ b/llama.cpp/build/bin/llama-retrieval @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0b32851e2b5334f4435d53298832781fcb1ec0001af52ecf63d8f9e21e237bbf -size 2006952 +oid sha256:1f9449597d6835c109e7c990717af6a63bf3e8771bcc2d656631b77b29c5efd2 +size 1827168 diff --git a/llama.cpp/build/bin/llama-run b/llama.cpp/build/bin/llama-run old mode 100644 new mode 100755 index a889b4d8729416710e2a62eb4632a5e60b1ce4ad..0a68f1fd8cb5dd21b86a13f4ef7d4f9f3d30a80f --- a/llama.cpp/build/bin/llama-run +++ b/llama.cpp/build/bin/llama-run @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bd110895af60b7f8ba5dec7bff013bf573005161914a39d0737525be3f790a70 -size 1758880 +oid sha256:24e5f5d3705ac35befa8409363c62ab935a4b34fd2a3ba70e4c3c51a3ed7fe87 +size 1542440 diff --git a/llama.cpp/build/bin/llama-save-load-state b/llama.cpp/build/bin/llama-save-load-state old mode 100644 new mode 100755 index ca6adc1fe5067be20be1beb3f83f60c8f441e79b..b540e670c45b47cf664b4240fe0ed098c6196abb --- a/llama.cpp/build/bin/llama-save-load-state +++ b/llama.cpp/build/bin/llama-save-load-state @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:de0bea0229e896bb0c33f5476da68e46b0b3d61644f04f788c35fdc87eb16b74 -size 1993512 +oid sha256:77529778489de266a20c184dbf4cc3235ffae5a68460f22cd8bbfd0f6d3b5ee7 +size 1813832 diff --git a/llama.cpp/build/bin/llama-server b/llama.cpp/build/bin/llama-server old mode 100644 new mode 100755 index 286830a047a94359ebb8dcfc572664d6aec6b865..aa2d9c4a8d5bac899afffde1b5977af5794f63e5 --- a/llama.cpp/build/bin/llama-server +++ b/llama.cpp/build/bin/llama-server @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b85057dee5bbf9f4d10a196204f411ef010f541bc32f7cb1a01b50eecba1ab9c -size 4057544 +oid sha256:4858869e72d5735148f967bfa19b365fd3dfcacb78bcf242427be72a92c3f3f7 +size 3895192 diff --git a/llama.cpp/build/bin/llama-simple b/llama.cpp/build/bin/llama-simple old mode 100644 new mode 100755 index e306d03d6b4ffdbe4eb7980d91b1824736c21b23..63b34f33e94a8eff4a5f301b6ad2bd3cdffd6dbb Binary files a/llama.cpp/build/bin/llama-simple and b/llama.cpp/build/bin/llama-simple differ diff --git a/llama.cpp/build/bin/llama-simple-chat b/llama.cpp/build/bin/llama-simple-chat old mode 100644 new mode 100755 index 4858ea3f3901ba86b739ced3636af3cce02629ad..ac70ed96b93299f3503424d4291a062069e43b1f Binary files a/llama.cpp/build/bin/llama-simple-chat and b/llama.cpp/build/bin/llama-simple-chat differ diff --git a/llama.cpp/build/bin/llama-speculative b/llama.cpp/build/bin/llama-speculative old mode 100644 new mode 100755 index 42a9078c91fb293d9bb5954ee70ad5213eda3b7b..6a5e11787c25bad3e792ba18b08df5d4b17f3100 --- a/llama.cpp/build/bin/llama-speculative +++ b/llama.cpp/build/bin/llama-speculative @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:db53918452df1a89030b4dd33c946e87c3fb4a24b05102d51410871a71a3d65f -size 2025392 +oid sha256:b2181f15fcb5592001612bf60eb3c9eeb87f0665f9ae1bd58a2077ce7f1c4048 +size 1833576 diff --git a/llama.cpp/build/bin/llama-speculative-simple b/llama.cpp/build/bin/llama-speculative-simple old mode 100644 new mode 100755 index 803a11d0f5eae66551d2e396491b3cab587ec340..418b305f6c00e39144e2a6a0e7be2257d41aa4df --- a/llama.cpp/build/bin/llama-speculative-simple +++ b/llama.cpp/build/bin/llama-speculative-simple @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:09641bc4e2b8f46eb08056cbbe4d9626781b3cc210b5907080888663263e1ecd -size 2002192 +oid sha256:0eb6522ba6a6d522fa3bf29df3f75caff99bd7403d7088224248d1996297a394 +size 1818424 diff --git a/llama.cpp/build/bin/llama-tokenize b/llama.cpp/build/bin/llama-tokenize old mode 100644 new mode 100755 index 0dd511e8700018a041d6314a77be52ed064541d5..82779c906ac7c60f2883e1e4f0308cf14fac380b --- a/llama.cpp/build/bin/llama-tokenize +++ b/llama.cpp/build/bin/llama-tokenize @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6f914c0771d537db92a8dd90a3a81bedf8d90c1dedca14e8e5d1adfc3f22450b -size 1631640 +oid sha256:4410ba6c1e1c5fa7a81ea08b36c6a818571c41d355528e2415d05bd06e08de3a +size 1457568 diff --git a/llama.cpp/build/bin/llama-tts b/llama.cpp/build/bin/llama-tts old mode 100644 new mode 100755 index 39fbb4afee6514159f6d6fc6b6b999d6827243b6..e6bcae6bc72d8656937e1d4b4a476acd20f2a1ce --- a/llama.cpp/build/bin/llama-tts +++ b/llama.cpp/build/bin/llama-tts @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f547191398c108a4b7d4e6dc76c674f33e3a9a087a2f61822c2a81c6875d84cd -size 2043000 +oid sha256:31d10d44ad74354dce7ebeddbae1acc26bac26c8fa2b60b49295f868267decd1 +size 1855176 diff --git a/llama.cpp/build/bin/llama-vdot b/llama.cpp/build/bin/llama-vdot old mode 100644 new mode 100755 index 47ea70f7729fce58b09e3619889c0a0aaaf8318a..8220224282a64202a86b7d936f8f6e8ab1923ce9 Binary files a/llama.cpp/build/bin/llama-vdot and b/llama.cpp/build/bin/llama-vdot differ diff --git a/llama.cpp/build/bin/rpc-server b/llama.cpp/build/bin/rpc-server deleted file mode 100644 index fa4cc053bfc314dbf0428709b1c81c55c2665768..0000000000000000000000000000000000000000 Binary files a/llama.cpp/build/bin/rpc-server and /dev/null differ diff --git a/llama.cpp/build/bin/test-arg-parser b/llama.cpp/build/bin/test-arg-parser old mode 100644 new mode 100755 index 28c36f9bb3fe7ec7a9de8c4e537005749f083dd6..4701c1f4a509f23eea1776f2f6d3b02f908c5d73 --- a/llama.cpp/build/bin/test-arg-parser +++ b/llama.cpp/build/bin/test-arg-parser @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c48d58635ae6d1de5bd8464f837b7349d6717dcd4317c40d5dec70a9cd0c39a2 -size 1998136 +oid sha256:f02b32de58ae0f2efe59d15de990be294565a75c23e80cc7c15e602fa7dd722c +size 1814392 diff --git a/llama.cpp/build/bin/test-autorelease b/llama.cpp/build/bin/test-autorelease old mode 100644 new mode 100755 index d6e485e7c8764481f70beb23a96dcfe23c81b36d..29c2303626b10c2333240fc802673f2ad6bf9c49 Binary files a/llama.cpp/build/bin/test-autorelease and b/llama.cpp/build/bin/test-autorelease differ diff --git a/llama.cpp/build/bin/test-backend-ops b/llama.cpp/build/bin/test-backend-ops old mode 100644 new mode 100755 index bc980fb24b179d376b480c140c44ba090a917c5a..81b339e8e0a083d17d6e621542da5dc52bd70e78 --- a/llama.cpp/build/bin/test-backend-ops +++ b/llama.cpp/build/bin/test-backend-ops @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:19e3f6d2076a28d5641918675f6176b15a92e8d59f5d352b0cab7dc09873d4da -size 448216 +oid sha256:fc78d75f7ed8d59744afd69584d002aed13ef0cdcace31b2dff8d9abd6e5fd4c +size 437288 diff --git a/llama.cpp/build/bin/test-barrier b/llama.cpp/build/bin/test-barrier old mode 100644 new mode 100755 index 43bb79a8df60aa8c37683f73ed920ed26469a290..c8cfeb714e73a3d572bceb9369ece4a0a8ee0687 Binary files a/llama.cpp/build/bin/test-barrier and b/llama.cpp/build/bin/test-barrier differ diff --git a/llama.cpp/build/bin/test-c b/llama.cpp/build/bin/test-c old mode 100644 new mode 100755 index 69142b6ba381fb4d237b5d480982c08d725411d0..453d2aa172c3f46e081b576697c4d5f6a9ceb158 Binary files a/llama.cpp/build/bin/test-c and b/llama.cpp/build/bin/test-c differ diff --git a/llama.cpp/build/bin/test-chat b/llama.cpp/build/bin/test-chat old mode 100644 new mode 100755 index a6b18d6dc32ae1af54f78de34adb9389e3f7653b..41f1089e14416c580239c1049098062d5a2c34fc --- a/llama.cpp/build/bin/test-chat +++ b/llama.cpp/build/bin/test-chat @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e1d4fc7c710090d3c9d1d3d85c96a66c8a2a800b83334e81d1bc4f8fb5dcc0b6 -size 1796224 +oid sha256:635c00c87e832fa112559cced5ef7e3b966353e402523cb9d55518fbb88ac473 +size 1642256 diff --git a/llama.cpp/build/bin/test-chat-template b/llama.cpp/build/bin/test-chat-template old mode 100644 new mode 100755 index 38b896549272d6014e287b29b71a23291d7f15ec..316e4e5ab4e8a5418b593d7fe817e65c33d9d68a --- a/llama.cpp/build/bin/test-chat-template +++ b/llama.cpp/build/bin/test-chat-template @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:96fa77fbcc385c1373f34bc9842fc7b52514bc3a0359b831955d878f3d4deb5a -size 1745736 +oid sha256:670188cf20205177c86f2cc0a5592c1dc8def08d52854f1e8efe1d96190d66fb +size 1587288 diff --git a/llama.cpp/build/bin/test-gguf b/llama.cpp/build/bin/test-gguf old mode 100644 new mode 100755 index 8939b01a1d9ba5cb89be2ac801557f9f0472869d..6d834dd7573ed41e96a7559ce21727a195e075aa Binary files a/llama.cpp/build/bin/test-gguf and b/llama.cpp/build/bin/test-gguf differ diff --git a/llama.cpp/build/bin/test-grammar-integration b/llama.cpp/build/bin/test-grammar-integration old mode 100644 new mode 100755 index a057c66ab39bcfa6165a2ba5f6f2e406f515497b..a6e17a0680dc540996b680216bd77d80f9493196 --- a/llama.cpp/build/bin/test-grammar-integration +++ b/llama.cpp/build/bin/test-grammar-integration @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:05231ff039ebc6656108d6c480e55d4a4b61ae56886abea87fa7ce7f53c56bff -size 1716792 +oid sha256:29ce883f3c98664c036e037181f2036714f53c442d34d0688f6d3628fe8d42c8 +size 1551472 diff --git a/llama.cpp/build/bin/test-grammar-parser b/llama.cpp/build/bin/test-grammar-parser old mode 100644 new mode 100755 index a534d32c6d98441fdb431d7a840f9b4fa55ef29a..b24fcc0b37d0d5cc085278bf140842fcac872b3f Binary files a/llama.cpp/build/bin/test-grammar-parser and b/llama.cpp/build/bin/test-grammar-parser differ diff --git a/llama.cpp/build/bin/test-json-schema-to-grammar b/llama.cpp/build/bin/test-json-schema-to-grammar old mode 100644 new mode 100755 index ed776f8ecd3d492199b080cbd57bd1a4cdafa350..2e8eee536fb94c2d95bd315863ab484849a40c01 --- a/llama.cpp/build/bin/test-json-schema-to-grammar +++ b/llama.cpp/build/bin/test-json-schema-to-grammar @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7a17a51896fd8315838a9f9b1a0e293eb910bfdd0a2d5bfac4a18f3c43e3e2c9 -size 1709000 +oid sha256:a5842bd1b6a9f19a18258aa0e9c17898a7a6844d46b0565e35a252384efdb4f1 +size 1543512 diff --git a/llama.cpp/build/bin/test-llama-grammar b/llama.cpp/build/bin/test-llama-grammar old mode 100644 new mode 100755 index d8d15905eeb51d3b441192a271cc1e3e0bb3d357..52e097414df232134a533acde7dea41c03943bd4 Binary files a/llama.cpp/build/bin/test-llama-grammar and b/llama.cpp/build/bin/test-llama-grammar differ diff --git a/llama.cpp/build/bin/test-log b/llama.cpp/build/bin/test-log old mode 100644 new mode 100755 index 411d3c7baa56dd2e8e41db1090db21425ac8f30f..7a51b57a7414c4ebb10a18a3786fe789d371d41d Binary files a/llama.cpp/build/bin/test-log and b/llama.cpp/build/bin/test-log differ diff --git a/llama.cpp/build/bin/test-model-load-cancel b/llama.cpp/build/bin/test-model-load-cancel old mode 100644 new mode 100755 index b98caa2a3d69d2c4b23c9fa73b5e5b8c86bdb984..2483ce360d4a9ffb48c132f467c17a8d2ab6c2b7 Binary files a/llama.cpp/build/bin/test-model-load-cancel and b/llama.cpp/build/bin/test-model-load-cancel differ diff --git a/llama.cpp/build/bin/test-quantize-fns b/llama.cpp/build/bin/test-quantize-fns old mode 100644 new mode 100755 index 49b7d3731e04ac0162eacbbd7a79e439d01c6862..407e91cd4cc9a7bc1e289c072c3fa769d0146647 Binary files a/llama.cpp/build/bin/test-quantize-fns and b/llama.cpp/build/bin/test-quantize-fns differ diff --git a/llama.cpp/build/bin/test-quantize-perf b/llama.cpp/build/bin/test-quantize-perf old mode 100644 new mode 100755 index b991b65683a5f5c09d4516bd0a118a24d22f7854..1d07be0762ce3c1b533dae147315a96f485eef65 Binary files a/llama.cpp/build/bin/test-quantize-perf and b/llama.cpp/build/bin/test-quantize-perf differ diff --git a/llama.cpp/build/bin/test-rope b/llama.cpp/build/bin/test-rope old mode 100644 new mode 100755 index 19bd9df1bd36304374481b6f6e73cce57491487b..7816ca4bdc9b09c2572dcc8bf958206c3c60660c Binary files a/llama.cpp/build/bin/test-rope and b/llama.cpp/build/bin/test-rope differ diff --git a/llama.cpp/build/bin/test-sampling b/llama.cpp/build/bin/test-sampling old mode 100644 new mode 100755 index 8f1a9d0f77cdc4661b9530aa744d8a66cc141845..64970d9b5cfd2df9d00ce43989f17d5b2f6790cf Binary files a/llama.cpp/build/bin/test-sampling and b/llama.cpp/build/bin/test-sampling differ diff --git a/llama.cpp/build/bin/test-tokenizer-0 b/llama.cpp/build/bin/test-tokenizer-0 old mode 100644 new mode 100755 index 0c2582ca8446d8382fffaffb9daca9e565802a51..a50607743399ed3e843f39d4aa8a9fd3f3c37065 --- a/llama.cpp/build/bin/test-tokenizer-0 +++ b/llama.cpp/build/bin/test-tokenizer-0 @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d3c44f223d427b66ddcda04d1fd56b5898e0c4c30905c1347861155f540fbcd7 -size 1642128 +oid sha256:ce2f30c0d174747442db103639cd1c732e33bc26ad7ed5143d943f8af0d516fb +size 1468456 diff --git a/llama.cpp/build/bin/test-tokenizer-1-bpe b/llama.cpp/build/bin/test-tokenizer-1-bpe old mode 100644 new mode 100755 index cee7eb26b3d220b475fe7a3f783e1c9ac2b77d89..5f8ef7cdc915fbf98da348d4cc85a3997902eb7d --- a/llama.cpp/build/bin/test-tokenizer-1-bpe +++ b/llama.cpp/build/bin/test-tokenizer-1-bpe @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a16ccb6d521ffc5f0dccd8227533c5ee734d4fcd80667dd3aebd4800eae7e1b4 -size 1632240 +oid sha256:7c2fa1156d1a3664ff7cf6edd4fc00ed3bbcf782c12742f855a2f1a0e3204f70 +size 1454256 diff --git a/llama.cpp/build/bin/test-tokenizer-1-spm b/llama.cpp/build/bin/test-tokenizer-1-spm old mode 100644 new mode 100755 index 369b8c178623fe321d2e7ac2dcd4119ab5238326..b388e25ea73513148a6576f0e14b9a6f3bc6954a --- a/llama.cpp/build/bin/test-tokenizer-1-spm +++ b/llama.cpp/build/bin/test-tokenizer-1-spm @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:01551e4b5d997968836d5b6fde1b96f8cf081ed9e6a522576cbe0807ca9e53b7 -size 1628048 +oid sha256:3e5b7ffc69d953edf0c2b2833313fa532ca16b7fcc01a7b5be74a453f2b77512 +size 1449880 diff --git a/llama.cpp/ci/README.md b/llama.cpp/ci/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4064705190697a7da430e1b85863afe3c71ea123 --- /dev/null +++ b/llama.cpp/ci/README.md @@ -0,0 +1,29 @@ +# CI + +In addition to [Github Actions](https://github.com/ggerganov/llama.cpp/actions) `llama.cpp` uses a custom CI framework: + +https://github.com/ggml-org/ci + +It monitors the `master` branch for new commits and runs the +[ci/run.sh](https://github.com/ggerganov/llama.cpp/blob/master/ci/run.sh) script on dedicated cloud instances. This allows us +to execute heavier workloads compared to just using Github Actions. Also with time, the cloud instances will be scaled +to cover various hardware architectures, including GPU and Apple Silicon instances. + +Collaborators can optionally trigger the CI run by adding the `ggml-ci` keyword to their commit message. +Only the branches of this repo are monitored for this keyword. + +It is a good practice, before publishing changes to execute the full CI locally on your machine: + +```bash +mkdir tmp + +# CPU-only build +bash ./ci/run.sh ./tmp/results ./tmp/mnt + +# with CUDA support +GG_BUILD_CUDA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt + +# with SYCL support +source /opt/intel/oneapi/setvars.sh +GG_BUILD_SYCL=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt +``` diff --git a/llama.cpp/ci/run.sh b/llama.cpp/ci/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..77c32ce0050aa95ee2ad2c5d078f0766a9695945 --- /dev/null +++ b/llama.cpp/ci/run.sh @@ -0,0 +1,854 @@ +#!/bin/bash +# +# sample usage: +# +# mkdir tmp +# +# # CPU-only build +# bash ./ci/run.sh ./tmp/results ./tmp/mnt +# +# # with CUDA support +# GG_BUILD_CUDA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt +# +# # with SYCL support +# GG_BUILD_SYCL=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt +# +# # with VULKAN support +# GG_BUILD_VULKAN=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt +# + +if [ -z "$2" ]; then + echo "usage: $0 " + exit 1 +fi + +mkdir -p "$1" +mkdir -p "$2" + +OUT=$(realpath "$1") +MNT=$(realpath "$2") + +rm -f "$OUT/*.log" +rm -f "$OUT/*.exit" +rm -f "$OUT/*.md" + +sd=`dirname $0` +cd $sd/../ +SRC=`pwd` + +CMAKE_EXTRA="-DLLAMA_FATAL_WARNINGS=ON" + +if [ ! -z ${GG_BUILD_METAL} ]; then + CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_METAL=ON -DGGML_METAL_USE_BF16=ON" +fi + +if [ ! -z ${GG_BUILD_CUDA} ]; then + CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=native" +fi + +if [ ! -z ${GG_BUILD_SYCL} ]; then + if [ -z ${ONEAPI_ROOT} ]; then + echo "Not detected ONEAPI_ROOT, please install oneAPI base toolkit and enable it by:" + echo "source /opt/intel/oneapi/setvars.sh" + exit 1 + fi + + CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_SYCL=1 -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON" +fi + +if [ ! -z ${GG_BUILD_VULKAN} ]; then + CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_VULKAN=1" +fi +## helpers + +# download a file if it does not exist or if it is outdated +function gg_wget { + local out=$1 + local url=$2 + + local cwd=`pwd` + + mkdir -p $out + cd $out + + # should not re-download if file is the same + wget -nv -N $url + + cd $cwd +} + +function gg_printf { + printf -- "$@" >> $OUT/README.md +} + +function gg_run { + ci=$1 + + set -o pipefail + set -x + + gg_run_$ci | tee $OUT/$ci.log + cur=$? + echo "$cur" > $OUT/$ci.exit + + set +x + set +o pipefail + + gg_sum_$ci + + ret=$((ret | cur)) +} + +## ci + +# ctest_debug + +function gg_run_ctest_debug { + cd ${SRC} + + rm -rf build-ci-debug && mkdir build-ci-debug && cd build-ci-debug + + set -e + + # Check cmake, make and ctest are installed + gg_check_build_requirements + + (time cmake -DCMAKE_BUILD_TYPE=Debug ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log + (time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log + + (time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log + + set +e +} + +function gg_sum_ctest_debug { + gg_printf '### %s\n\n' "${ci}" + + gg_printf 'Runs ctest in debug mode\n' + gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" + gg_printf '```\n' + gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)" + gg_printf '```\n' + gg_printf '\n' +} + +# ctest_release + +function gg_run_ctest_release { + cd ${SRC} + + rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release + + set -e + + # Check cmake, make and ctest are installed + gg_check_build_requirements + + (time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log + (time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log + + if [ -z ${GG_BUILD_LOW_PERF} ]; then + (time ctest --output-on-failure -L main ) 2>&1 | tee -a $OUT/${ci}-ctest.log + else + (time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log + fi + + set +e +} + +function gg_sum_ctest_release { + gg_printf '### %s\n\n' "${ci}" + + gg_printf 'Runs ctest in release mode\n' + gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" + gg_printf '```\n' + gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)" + gg_printf '```\n' +} + +# test_scripts_debug + +function gg_run_test_scripts_debug { + cd ${SRC} + + set -e + + (cd ./examples/gguf-split && time bash tests.sh "$SRC/build-ci-debug/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log + (cd ./examples/quantize && time bash tests.sh "$SRC/build-ci-debug/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log + + set +e +} + +function gg_sum_test_scripts_debug { + gg_printf '### %s\n\n' "${ci}" + + gg_printf 'Runs test scripts in debug mode\n' + gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" + gg_printf '```\n' + gg_printf '%s\n' "$(cat $OUT/${ci}-scripts.log)" + gg_printf '```\n' + gg_printf '\n' +} + +# test_scripts_release + +function gg_run_test_scripts_release { + cd ${SRC} + + set -e + + (cd ./examples/gguf-split && time bash tests.sh "$SRC/build-ci-release/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log + (cd ./examples/quantize && time bash tests.sh "$SRC/build-ci-release/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log + + set +e +} + +function gg_sum_test_scripts_release { + gg_printf '### %s\n\n' "${ci}" + + gg_printf 'Runs test scripts in release mode\n' + gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" + gg_printf '```\n' + gg_printf '%s\n' "$(cat $OUT/${ci}-scripts.log)" + gg_printf '```\n' + gg_printf '\n' +} + +function gg_get_model { + local gguf_0="$MNT/models/pythia/1.4B/ggml-model-f16.gguf" + local gguf_1="$MNT/models/pythia/2.8B/ggml-model-f16.gguf" + local gguf_2="$MNT/models/open-llama/7B-v2/ggml-model-f16.gguf" + if [[ -s $gguf_0 ]]; then + echo -n "$gguf_0" + elif [[ -s $gguf_1 ]]; then + echo -n "$gguf_1" + elif [[ -s $gguf_2 ]]; then + echo -n "$gguf_2" + else + echo >&2 "No model found. Can't run gg_run_ctest_with_model." + exit 1 + fi +} + +function gg_run_ctest_with_model_debug { + cd ${SRC} + + local model; model=$(gg_get_model) + cd build-ci-debug + set -e + (LLAMACPP_TEST_MODELFILE="$model" time ctest --output-on-failure -L model) 2>&1 | tee -a $OUT/${ci}-ctest.log + set +e + cd .. +} + +function gg_run_ctest_with_model_release { + cd ${SRC} + + local model; model=$(gg_get_model) + cd build-ci-release + set -e + (LLAMACPP_TEST_MODELFILE="$model" time ctest --output-on-failure -L model) 2>&1 | tee -a $OUT/${ci}-ctest.log + set +e + cd .. +} + +function gg_sum_ctest_with_model_debug { + gg_printf '### %s\n\n' "${ci}" + + gg_printf 'Runs ctest with model files in debug mode\n' + gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" + gg_printf '```\n' + gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)" + gg_printf '```\n' +} + +function gg_sum_ctest_with_model_release { + gg_printf '### %s\n\n' "${ci}" + + gg_printf 'Runs ctest with model files in release mode\n' + gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" + gg_printf '```\n' + gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)" + gg_printf '```\n' +} + +# open_llama_7b_v2 + +function gg_run_open_llama_7b_v2 { + cd ${SRC} + + gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/config.json + gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/resolve/main/tokenizer.model + gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/tokenizer_config.json + gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/special_tokens_map.json + gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/pytorch_model.bin.index.json + gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/resolve/main/pytorch_model-00001-of-00002.bin + gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/resolve/main/pytorch_model-00002-of-00002.bin + gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/generation_config.json + + gg_wget models-mnt/wikitext/ https://huggingface.co/datasets/ggml-org/ci/resolve/main/wikitext-2-raw-v1.zip + unzip -o models-mnt/wikitext/wikitext-2-raw-v1.zip -d models-mnt/wikitext/ + + path_models="../models-mnt/open-llama/7B-v2" + path_wiki="../models-mnt/wikitext/wikitext-2-raw" + + rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release + + set -e + + (time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log + (time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log + + python3 ../examples/convert_legacy_llama.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf + + model_f16="${path_models}/ggml-model-f16.gguf" + model_q8_0="${path_models}/ggml-model-q8_0.gguf" + model_q4_0="${path_models}/ggml-model-q4_0.gguf" + model_q4_1="${path_models}/ggml-model-q4_1.gguf" + model_q5_0="${path_models}/ggml-model-q5_0.gguf" + model_q5_1="${path_models}/ggml-model-q5_1.gguf" + model_q2_k="${path_models}/ggml-model-q2_k.gguf" + model_q3_k="${path_models}/ggml-model-q3_k.gguf" + model_q4_k="${path_models}/ggml-model-q4_k.gguf" + model_q5_k="${path_models}/ggml-model-q5_k.gguf" + model_q6_k="${path_models}/ggml-model-q6_k.gguf" + + wiki_test="${path_wiki}/wiki.test.raw" + + ./bin/llama-quantize ${model_f16} ${model_q8_0} q8_0 + ./bin/llama-quantize ${model_f16} ${model_q4_0} q4_0 + ./bin/llama-quantize ${model_f16} ${model_q4_1} q4_1 + ./bin/llama-quantize ${model_f16} ${model_q5_0} q5_0 + ./bin/llama-quantize ${model_f16} ${model_q5_1} q5_1 + ./bin/llama-quantize ${model_f16} ${model_q2_k} q2_k + ./bin/llama-quantize ${model_f16} ${model_q3_k} q3_k + ./bin/llama-quantize ${model_f16} ${model_q4_k} q4_k + ./bin/llama-quantize ${model_f16} ${model_q5_k} q5_k + ./bin/llama-quantize ${model_f16} ${model_q6_k} q6_k + + (time ./bin/llama-cli -no-cnv --model ${model_f16} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log + (time ./bin/llama-cli -no-cnv --model ${model_q8_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log + (time ./bin/llama-cli -no-cnv --model ${model_q4_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log + (time ./bin/llama-cli -no-cnv --model ${model_q4_1} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log + (time ./bin/llama-cli -no-cnv --model ${model_q5_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log + (time ./bin/llama-cli -no-cnv --model ${model_q5_1} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log + (time ./bin/llama-cli -no-cnv --model ${model_q2_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log + (time ./bin/llama-cli -no-cnv --model ${model_q3_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log + (time ./bin/llama-cli -no-cnv --model ${model_q4_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log + (time ./bin/llama-cli -no-cnv --model ${model_q5_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log + (time ./bin/llama-cli -no-cnv --model ${model_q6_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log + + (time ./bin/llama-perplexity --model ${model_f16} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log + (time ./bin/llama-perplexity --model ${model_q8_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log + (time ./bin/llama-perplexity --model ${model_q4_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log + (time ./bin/llama-perplexity --model ${model_q4_1} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log + (time ./bin/llama-perplexity --model ${model_q5_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log + (time ./bin/llama-perplexity --model ${model_q5_1} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log + (time ./bin/llama-perplexity --model ${model_q2_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log + (time ./bin/llama-perplexity --model ${model_q3_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log + (time ./bin/llama-perplexity --model ${model_q4_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log + (time ./bin/llama-perplexity --model ${model_q5_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log + (time ./bin/llama-perplexity --model ${model_q6_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log + + (time ./bin/llama-imatrix --model ${model_f16} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log + + (time ./bin/llama-save-load-state--model ${model_q4_0} -ngl 10 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log + (time ./bin/llama-save-load-state--model ${model_q4_0} -ngl 10 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log + (time ./bin/llama-save-load-state--model ${model_q4_0} -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log + (time ./bin/llama-save-load-state--model ${model_q4_0} -ngl 99 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log + + function check_ppl { + qnt="$1" + ppl=$(echo "$2" | grep -oE "[0-9]+\.[0-9]+" | tail -n 1) + + if [ $(echo "$ppl > 20.0" | bc) -eq 1 ]; then + printf ' - %s @ %s (FAIL: ppl > 20.0)\n' "$qnt" "$ppl" + return 20 + fi + + printf ' - %s @ %s OK\n' "$qnt" "$ppl" + return 0 + } + + check_ppl "f16" "$(cat $OUT/${ci}-tg-f16.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q8_0" "$(cat $OUT/${ci}-tg-q8_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q4_0" "$(cat $OUT/${ci}-tg-q4_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q4_1" "$(cat $OUT/${ci}-tg-q4_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q5_0" "$(cat $OUT/${ci}-tg-q5_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q5_1" "$(cat $OUT/${ci}-tg-q5_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q2_k" "$(cat $OUT/${ci}-tg-q2_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q3_k" "$(cat $OUT/${ci}-tg-q3_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q4_k" "$(cat $OUT/${ci}-tg-q4_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q5_k" "$(cat $OUT/${ci}-tg-q5_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q6_k" "$(cat $OUT/${ci}-tg-q6_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + + cat $OUT/${ci}-imatrix.log | grep "Final" >> $OUT/${ci}-imatrix-sum.log + + set +e +} + +function gg_sum_open_llama_7b_v2 { + gg_printf '### %s\n\n' "${ci}" + + gg_printf 'OpenLLaMA 7B-v2:\n' + gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" + gg_printf '- perplexity:\n%s\n' "$(cat $OUT/${ci}-ppl.log)" + gg_printf '- imatrix:\n```\n%s\n```\n' "$(cat $OUT/${ci}-imatrix-sum.log)" + gg_printf '- f16: \n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-f16.log)" + gg_printf '- q8_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q8_0.log)" + gg_printf '- q4_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_0.log)" + gg_printf '- q4_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_1.log)" + gg_printf '- q5_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_0.log)" + gg_printf '- q5_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_1.log)" + gg_printf '- q2_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q2_k.log)" + gg_printf '- q3_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q3_k.log)" + gg_printf '- q4_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_k.log)" + gg_printf '- q5_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_k.log)" + gg_printf '- q6_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q6_k.log)" + gg_printf '- save-load-state: \n```\n%s\n```\n' "$(cat $OUT/${ci}-save-load-state.log)" +} + +# pythia_1.4b + +function gg_run_pythia_1_4b { + cd ${SRC} + + gg_wget models-mnt/pythia/1.4B/ https://huggingface.co/EleutherAI/pythia-1.4b/raw/main/config.json + gg_wget models-mnt/pythia/1.4B/ https://huggingface.co/EleutherAI/pythia-1.4b/raw/main/tokenizer.json + gg_wget models-mnt/pythia/1.4B/ https://huggingface.co/EleutherAI/pythia-1.4b/raw/main/tokenizer_config.json + gg_wget models-mnt/pythia/1.4B/ https://huggingface.co/EleutherAI/pythia-1.4b/raw/main/special_tokens_map.json + gg_wget models-mnt/pythia/1.4B/ https://huggingface.co/EleutherAI/pythia-1.4b/resolve/main/pytorch_model.bin + + gg_wget models-mnt/wikitext/ https://huggingface.co/datasets/ggml-org/ci/resolve/main/wikitext-2-raw-v1.zip + unzip -o models-mnt/wikitext/wikitext-2-raw-v1.zip -d models-mnt/wikitext/ + head -n 60 models-mnt/wikitext/wikitext-2-raw/wiki.test.raw > models-mnt/wikitext/wikitext-2-raw/wiki.test-60.raw + + path_models="../models-mnt/pythia/1.4B" + path_wiki="../models-mnt/wikitext/wikitext-2-raw" + + rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release + + set -e + + (time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log + (time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log + + python3 ../convert_hf_to_gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf + + model_f16="${path_models}/ggml-model-f16.gguf" + model_q8_0="${path_models}/ggml-model-q8_0.gguf" + model_q4_0="${path_models}/ggml-model-q4_0.gguf" + model_q4_1="${path_models}/ggml-model-q4_1.gguf" + model_q5_0="${path_models}/ggml-model-q5_0.gguf" + model_q5_1="${path_models}/ggml-model-q5_1.gguf" + model_q2_k="${path_models}/ggml-model-q2_k.gguf" + model_q3_k="${path_models}/ggml-model-q3_k.gguf" + model_q4_k="${path_models}/ggml-model-q4_k.gguf" + model_q5_k="${path_models}/ggml-model-q5_k.gguf" + model_q6_k="${path_models}/ggml-model-q6_k.gguf" + + wiki_test_60="${path_wiki}/wiki.test-60.raw" + + ./bin/llama-quantize ${model_f16} ${model_q8_0} q8_0 + ./bin/llama-quantize ${model_f16} ${model_q4_0} q4_0 + ./bin/llama-quantize ${model_f16} ${model_q4_1} q4_1 + ./bin/llama-quantize ${model_f16} ${model_q5_0} q5_0 + ./bin/llama-quantize ${model_f16} ${model_q5_1} q5_1 + ./bin/llama-quantize ${model_f16} ${model_q2_k} q2_k + ./bin/llama-quantize ${model_f16} ${model_q3_k} q3_k + ./bin/llama-quantize ${model_f16} ${model_q4_k} q4_k + ./bin/llama-quantize ${model_f16} ${model_q5_k} q5_k + ./bin/llama-quantize ${model_f16} ${model_q6_k} q6_k + + (time ./bin/llama-cli -no-cnv --model ${model_f16} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log + (time ./bin/llama-cli -no-cnv --model ${model_q8_0} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log + (time ./bin/llama-cli -no-cnv --model ${model_q4_0} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log + (time ./bin/llama-cli -no-cnv --model ${model_q4_1} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log + (time ./bin/llama-cli -no-cnv --model ${model_q5_0} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log + (time ./bin/llama-cli -no-cnv --model ${model_q5_1} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log + (time ./bin/llama-cli -no-cnv --model ${model_q2_k} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log + (time ./bin/llama-cli -no-cnv --model ${model_q3_k} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log + (time ./bin/llama-cli -no-cnv --model ${model_q4_k} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log + (time ./bin/llama-cli -no-cnv --model ${model_q5_k} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log + (time ./bin/llama-cli -no-cnv --model ${model_q6_k} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log + + (time ./bin/llama-perplexity --model ${model_f16} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log + (time ./bin/llama-perplexity --model ${model_q8_0} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log + (time ./bin/llama-perplexity --model ${model_q4_0} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log + (time ./bin/llama-perplexity --model ${model_q4_1} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log + (time ./bin/llama-perplexity --model ${model_q5_0} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log + (time ./bin/llama-perplexity --model ${model_q5_1} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log + (time ./bin/llama-perplexity --model ${model_q2_k} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log + (time ./bin/llama-perplexity --model ${model_q3_k} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log + (time ./bin/llama-perplexity --model ${model_q4_k} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log + (time ./bin/llama-perplexity --model ${model_q5_k} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log + (time ./bin/llama-perplexity --model ${model_q6_k} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log + + (time ./bin/llama-imatrix --model ${model_f16} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log + + (time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log + (time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log + + function check_ppl { + qnt="$1" + ppl=$(echo "$2" | grep -oE "[0-9]+\.[0-9]+" | tail -n 1) + + if [ $(echo "$ppl > 20.0" | bc) -eq 1 ]; then + printf ' - %s @ %s (FAIL: ppl > 20.0)\n' "$qnt" "$ppl" + return 20 + fi + + printf ' - %s @ %s OK\n' "$qnt" "$ppl" + return 0 + } + + check_ppl "f16" "$(cat $OUT/${ci}-tg-f16.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q8_0" "$(cat $OUT/${ci}-tg-q8_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q4_0" "$(cat $OUT/${ci}-tg-q4_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q4_1" "$(cat $OUT/${ci}-tg-q4_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q5_0" "$(cat $OUT/${ci}-tg-q5_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q5_1" "$(cat $OUT/${ci}-tg-q5_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + #check_ppl "q2_k" "$(cat $OUT/${ci}-tg-q2_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log # note: ppl > 20.0 for this quant and model + check_ppl "q3_k" "$(cat $OUT/${ci}-tg-q3_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q4_k" "$(cat $OUT/${ci}-tg-q4_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q5_k" "$(cat $OUT/${ci}-tg-q5_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q6_k" "$(cat $OUT/${ci}-tg-q6_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + + cat $OUT/${ci}-imatrix.log | grep "Final" >> $OUT/${ci}-imatrix-sum.log + + set +e +} + +function gg_sum_pythia_1_4b { + gg_printf '### %s\n\n' "${ci}" + + gg_printf 'Pythia 1.4B:\n' + gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" + gg_printf '- perplexity:\n%s\n' "$(cat $OUT/${ci}-ppl.log)" + gg_printf '- imatrix:\n```\n%s\n```\n' "$(cat $OUT/${ci}-imatrix-sum.log)" + gg_printf '- f16: \n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-f16.log)" + gg_printf '- q8_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q8_0.log)" + gg_printf '- q4_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_0.log)" + gg_printf '- q4_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_1.log)" + gg_printf '- q5_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_0.log)" + gg_printf '- q5_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_1.log)" + gg_printf '- q2_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q2_k.log)" + gg_printf '- q3_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q3_k.log)" + gg_printf '- q4_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_k.log)" + gg_printf '- q5_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_k.log)" + gg_printf '- q6_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q6_k.log)" + gg_printf '- save-load-state: \n```\n%s\n```\n' "$(cat $OUT/${ci}-save-load-state.log)" +} + +# pythia_2_8b + +function gg_run_pythia_2_8b { + cd ${SRC} + + gg_wget models-mnt/pythia/2.8B/ https://huggingface.co/EleutherAI/pythia-2.8b/raw/main/config.json + gg_wget models-mnt/pythia/2.8B/ https://huggingface.co/EleutherAI/pythia-2.8b/raw/main/tokenizer.json + gg_wget models-mnt/pythia/2.8B/ https://huggingface.co/EleutherAI/pythia-2.8b/raw/main/tokenizer_config.json + gg_wget models-mnt/pythia/2.8B/ https://huggingface.co/EleutherAI/pythia-2.8b/raw/main/special_tokens_map.json + gg_wget models-mnt/pythia/2.8B/ https://huggingface.co/EleutherAI/pythia-2.8b/resolve/main/pytorch_model.bin + + gg_wget models-mnt/wikitext/ https://huggingface.co/datasets/ggml-org/ci/resolve/main/wikitext-2-raw-v1.zip + unzip -o models-mnt/wikitext/wikitext-2-raw-v1.zip -d models-mnt/wikitext/ + + path_models="../models-mnt/pythia/2.8B" + path_wiki="../models-mnt/wikitext/wikitext-2-raw" + + rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release + + set -e + + (time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log + (time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log + + python3 ../convert_hf_to_gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf + + model_f16="${path_models}/ggml-model-f16.gguf" + model_q8_0="${path_models}/ggml-model-q8_0.gguf" + model_q4_0="${path_models}/ggml-model-q4_0.gguf" + model_q4_1="${path_models}/ggml-model-q4_1.gguf" + model_q5_0="${path_models}/ggml-model-q5_0.gguf" + model_q5_1="${path_models}/ggml-model-q5_1.gguf" + model_q2_k="${path_models}/ggml-model-q2_k.gguf" + model_q3_k="${path_models}/ggml-model-q3_k.gguf" + model_q4_k="${path_models}/ggml-model-q4_k.gguf" + model_q5_k="${path_models}/ggml-model-q5_k.gguf" + model_q6_k="${path_models}/ggml-model-q6_k.gguf" + + wiki_test="${path_wiki}/wiki.test.raw" + + ./bin/llama-quantize ${model_f16} ${model_q8_0} q8_0 + ./bin/llama-quantize ${model_f16} ${model_q4_0} q4_0 + ./bin/llama-quantize ${model_f16} ${model_q4_1} q4_1 + ./bin/llama-quantize ${model_f16} ${model_q5_0} q5_0 + ./bin/llama-quantize ${model_f16} ${model_q5_1} q5_1 + ./bin/llama-quantize ${model_f16} ${model_q2_k} q2_k + ./bin/llama-quantize ${model_f16} ${model_q3_k} q3_k + ./bin/llama-quantize ${model_f16} ${model_q4_k} q4_k + ./bin/llama-quantize ${model_f16} ${model_q5_k} q5_k + ./bin/llama-quantize ${model_f16} ${model_q6_k} q6_k + + (time ./bin/llama-cli -no-cnv --model ${model_f16} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log + (time ./bin/llama-cli -no-cnv --model ${model_q8_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log + (time ./bin/llama-cli -no-cnv --model ${model_q4_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log + (time ./bin/llama-cli -no-cnv --model ${model_q4_1} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log + (time ./bin/llama-cli -no-cnv --model ${model_q5_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log + (time ./bin/llama-cli -no-cnv --model ${model_q5_1} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log + (time ./bin/llama-cli -no-cnv --model ${model_q2_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log + (time ./bin/llama-cli -no-cnv --model ${model_q3_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log + (time ./bin/llama-cli -no-cnv --model ${model_q4_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log + (time ./bin/llama-cli -no-cnv --model ${model_q5_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log + (time ./bin/llama-cli -no-cnv --model ${model_q6_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log + + (time ./bin/llama-perplexity --model ${model_f16} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log + (time ./bin/llama-perplexity --model ${model_q8_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log + (time ./bin/llama-perplexity --model ${model_q4_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log + (time ./bin/llama-perplexity --model ${model_q4_1} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log + (time ./bin/llama-perplexity --model ${model_q5_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log + (time ./bin/llama-perplexity --model ${model_q5_1} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log + (time ./bin/llama-perplexity --model ${model_q2_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log + (time ./bin/llama-perplexity --model ${model_q3_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log + (time ./bin/llama-perplexity --model ${model_q4_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log + (time ./bin/llama-perplexity --model ${model_q5_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log + (time ./bin/llama-perplexity --model ${model_q6_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log + + (time ./bin/llama-imatrix --model ${model_f16} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log + + (time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 10 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log + (time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 10 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log + (time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log + (time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log + + function check_ppl { + qnt="$1" + ppl=$(echo "$2" | grep -oE "[0-9]+\.[0-9]+" | tail -n 1) + + if [ $(echo "$ppl > 20.0" | bc) -eq 1 ]; then + printf ' - %s @ %s (FAIL: ppl > 20.0)\n' "$qnt" "$ppl" + return 20 + fi + + printf ' - %s @ %s OK\n' "$qnt" "$ppl" + return 0 + } + + check_ppl "f16" "$(cat $OUT/${ci}-tg-f16.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q8_0" "$(cat $OUT/${ci}-tg-q8_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q4_0" "$(cat $OUT/${ci}-tg-q4_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q4_1" "$(cat $OUT/${ci}-tg-q4_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q5_0" "$(cat $OUT/${ci}-tg-q5_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q5_1" "$(cat $OUT/${ci}-tg-q5_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + #check_ppl "q2_k" "$(cat $OUT/${ci}-tg-q2_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log # note: ppl > 20.0 for this quant and model + check_ppl "q3_k" "$(cat $OUT/${ci}-tg-q3_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q4_k" "$(cat $OUT/${ci}-tg-q4_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q5_k" "$(cat $OUT/${ci}-tg-q5_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q6_k" "$(cat $OUT/${ci}-tg-q6_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + + cat $OUT/${ci}-imatrix.log | grep "Final" >> $OUT/${ci}-imatrix-sum.log + + set +e +} + +function gg_sum_pythia_2_8b { + gg_printf '### %s\n\n' "${ci}" + + gg_printf 'Pythia 2.8B:\n' + gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" + gg_printf '- perplexity:\n%s\n' "$(cat $OUT/${ci}-ppl.log)" + gg_printf '- imatrix:\n```\n%s\n```\n' "$(cat $OUT/${ci}-imatrix-sum.log)" + gg_printf '- f16: \n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-f16.log)" + gg_printf '- q8_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q8_0.log)" + gg_printf '- q4_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_0.log)" + gg_printf '- q4_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_1.log)" + gg_printf '- q5_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_0.log)" + gg_printf '- q5_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_1.log)" + gg_printf '- q2_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q2_k.log)" + gg_printf '- q3_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q3_k.log)" + gg_printf '- q4_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_k.log)" + gg_printf '- q5_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_k.log)" + gg_printf '- q6_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q6_k.log)" + gg_printf '- save-load-state: \n```\n%s\n```\n' "$(cat $OUT/${ci}-save-load-state.log)" +} + +# bge-small + +function gg_run_embd_bge_small { + cd ${SRC} + + gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/config.json + gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/tokenizer.json + gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/tokenizer_config.json + gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/special_tokens_map.json + gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/resolve/main/pytorch_model.bin + gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/sentence_bert_config.json + gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/vocab.txt + gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/modules.json + gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/config.json + + gg_wget models-mnt/bge-small/1_Pooling https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/1_Pooling/config.json + + path_models="../models-mnt/bge-small" + + rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release + + set -e + + (time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log + (time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log + + python3 ../convert_hf_to_gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf + + model_f16="${path_models}/ggml-model-f16.gguf" + model_q8_0="${path_models}/ggml-model-q8_0.gguf" + + ./bin/llama-quantize ${model_f16} ${model_q8_0} q8_0 + + (time ./bin/llama-embedding --model ${model_f16} -p "I believe the meaning of life is" -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log + (time ./bin/llama-embedding --model ${model_q8_0} -p "I believe the meaning of life is" -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log + + set +e +} + +function gg_sum_embd_bge_small { + gg_printf '### %s\n\n' "${ci}" + + gg_printf 'BGE Small (BERT):\n' + gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" + gg_printf '- f16: \n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-f16.log)" + gg_printf '- q8_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q8_0.log)" +} + +# rerank_tiny + +function gg_run_rerank_tiny { + cd ${SRC} + + gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/config.json + gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/tokenizer.json + gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/tokenizer_config.json + gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/special_tokens_map.json + gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/resolve/main/pytorch_model.bin + gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/sentence_bert_config.json + gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/vocab.txt + gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/modules.json + gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/config.json + + gg_wget models-mnt/rerank-tiny/1_Pooling https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/1_Pooling/config.json + + path_models="../models-mnt/rerank-tiny" + + rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release + + set -e + + (time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log + (time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log + + python3 ../convert_hf_to_gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf + + model_f16="${path_models}/ggml-model-f16.gguf" + + # for this model, the SEP token is "" + (time ./bin/llama-embedding --model ${model_f16} -p "what is panda?hi\nwhat is panda?it's a bear\nwhat is panda?The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China." -ngl 99 -c 0 --pooling rank --embd-normalize -1 --verbose-prompt) 2>&1 | tee -a $OUT/${ci}-rk-f16.log + + # sample output + # rerank score 0: 0.029 + # rerank score 1: 0.029 + # rerank score 2: 0.135 + + # check that the score is in the range [$3, $4] + function check_score { + qnt="$1" + score=$(echo "$2" | grep -oE "[0-9]+\.[0-9]+" | tail -n 1) + + if [ $(echo "$score < $3" | bc) -eq 1 ] || [ $(echo "$score > $4" | bc) -eq 1 ]; then + printf ' - %s @ %s (FAIL: score not in range [%s, %s])\n' "$qnt" "$score" "$3" "$4" + return 20 + fi + + printf ' - %s @ %s OK\n' "$qnt" "$score" + return 0 + } + + check_score "rerank score 0" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 0")" "0.00" "0.05" | tee -a $OUT/${ci}-rk-f16.log + check_score "rerank score 1" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 1")" "0.00" "0.05" | tee -a $OUT/${ci}-rk-f16.log + check_score "rerank score 2" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 2")" "0.10" "0.30" | tee -a $OUT/${ci}-rk-f16.log + + set +e +} + +function gg_sum_rerank_tiny { + gg_printf '### %s\n\n' "${ci}" + + gg_printf 'Rerank Tiny (Jina):\n' + gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" + gg_printf '- f16: \n```\n%s\n```\n' "$(cat $OUT/${ci}-rk-f16.log)" +} + +function gg_check_build_requirements { + if ! command -v cmake &> /dev/null; then + gg_printf 'cmake not found, please install' + fi + + if ! command -v make &> /dev/null; then + gg_printf 'make not found, please install' + fi + + if ! command -v ctest &> /dev/null; then + gg_printf 'ctest not found, please install' + fi +} + +## main + +export LLAMA_LOG_PREFIX=1 +export LLAMA_LOG_TIMESTAMPS=1 + +if [ -z ${GG_BUILD_LOW_PERF} ]; then + # Create symlink: ./llama.cpp/models-mnt -> $MNT/models/models-mnt + rm -rf ${SRC}/models-mnt + mnt_models=${MNT}/models + mkdir -p ${mnt_models} + ln -sfn ${mnt_models} ${SRC}/models-mnt + + # Create a fresh python3 venv and enter it + if ! python3 -m venv "$MNT/venv"; then + echo "Error: Failed to create Python virtual environment at $MNT/venv." + exit 1 + fi + source "$MNT/venv/bin/activate" + + pip install -r ${SRC}/requirements.txt --disable-pip-version-check + pip install --editable gguf-py --disable-pip-version-check +fi + +ret=0 + +test $ret -eq 0 && gg_run ctest_debug +test $ret -eq 0 && gg_run ctest_release + +if [ -z ${GG_BUILD_LOW_PERF} ]; then + test $ret -eq 0 && gg_run embd_bge_small + test $ret -eq 0 && gg_run rerank_tiny + + if [ -z ${GG_BUILD_CLOUD} ] || [ ${GG_BUILD_EXTRA_TESTS_0} ]; then + test $ret -eq 0 && gg_run test_scripts_debug + test $ret -eq 0 && gg_run test_scripts_release + fi + + if [ -z ${GG_BUILD_VRAM_GB} ] || [ ${GG_BUILD_VRAM_GB} -ge 8 ]; then + if [ -z ${GG_BUILD_CUDA} ] && [ -z ${GG_BUILD_VULKAN} ]; then + test $ret -eq 0 && gg_run pythia_1_4b + else + test $ret -eq 0 && gg_run pythia_2_8b + #test $ret -eq 0 && gg_run open_llama_7b_v2 + fi + test $ret -eq 0 && gg_run ctest_with_model_debug + test $ret -eq 0 && gg_run ctest_with_model_release + fi +fi + +exit $ret diff --git a/llama.cpp/cmake/arm64-apple-clang.cmake b/llama.cpp/cmake/arm64-apple-clang.cmake new file mode 100644 index 0000000000000000000000000000000000000000..5fcd2882afc9d96ccc1c28396b91e5e46c93f66c --- /dev/null +++ b/llama.cpp/cmake/arm64-apple-clang.cmake @@ -0,0 +1,16 @@ +set( CMAKE_SYSTEM_NAME Darwin ) +set( CMAKE_SYSTEM_PROCESSOR arm64 ) + +set( target arm64-apple-darwin-macho ) + +set( CMAKE_C_COMPILER clang ) +set( CMAKE_CXX_COMPILER clang++ ) + +set( CMAKE_C_COMPILER_TARGET ${target} ) +set( CMAKE_CXX_COMPILER_TARGET ${target} ) + +set( arch_c_flags "-march=armv8.4-a -fvectorize -ffp-model=fast -fno-finite-math-only" ) +set( warn_c_flags "-Wno-format -Wno-unused-variable -Wno-unused-function" ) + +set( CMAKE_C_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" ) +set( CMAKE_CXX_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" ) diff --git a/llama.cpp/cmake/arm64-windows-llvm.cmake b/llama.cpp/cmake/arm64-windows-llvm.cmake new file mode 100644 index 0000000000000000000000000000000000000000..8023796800683240e32048180a1eb874ddab1963 --- /dev/null +++ b/llama.cpp/cmake/arm64-windows-llvm.cmake @@ -0,0 +1,16 @@ +set( CMAKE_SYSTEM_NAME Windows ) +set( CMAKE_SYSTEM_PROCESSOR arm64 ) + +set( target arm64-pc-windows-msvc ) + +set( CMAKE_C_COMPILER clang ) +set( CMAKE_CXX_COMPILER clang++ ) + +set( CMAKE_C_COMPILER_TARGET ${target} ) +set( CMAKE_CXX_COMPILER_TARGET ${target} ) + +set( arch_c_flags "-march=armv8.7-a -fvectorize -ffp-model=fast -fno-finite-math-only" ) +set( warn_c_flags "-Wno-format -Wno-unused-variable -Wno-unused-function -Wno-gnu-zero-variadic-macro-arguments" ) + +set( CMAKE_C_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" ) +set( CMAKE_CXX_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" ) diff --git a/llama.cpp/cmake/arm64-windows-msvc.cmake b/llama.cpp/cmake/arm64-windows-msvc.cmake new file mode 100644 index 0000000000000000000000000000000000000000..c77631420ce848d051eb4ef774313964dd5636ed --- /dev/null +++ b/llama.cpp/cmake/arm64-windows-msvc.cmake @@ -0,0 +1,6 @@ +set( CMAKE_SYSTEM_NAME Windows ) +set( CMAKE_SYSTEM_PROCESSOR arm64 ) + +set( target arm64-pc-windows-msvc ) +set( CMAKE_C_COMPILER_TARGET ${target} ) +set( CMAKE_CXX_COMPILER_TARGET ${target} ) diff --git a/llama.cpp/cmake/build-info.cmake b/llama.cpp/cmake/build-info.cmake new file mode 100644 index 0000000000000000000000000000000000000000..c1a456e1798724eb3c4a160f98670f7e49653669 --- /dev/null +++ b/llama.cpp/cmake/build-info.cmake @@ -0,0 +1,58 @@ +set(BUILD_NUMBER 0) +set(BUILD_COMMIT "unknown") +set(BUILD_COMPILER "unknown") +set(BUILD_TARGET "unknown") + +# Look for git +find_package(Git) +if(NOT Git_FOUND) + find_program(GIT_EXECUTABLE NAMES git git.exe) + if(GIT_EXECUTABLE) + set(Git_FOUND TRUE) + message(STATUS "Found Git: ${GIT_EXECUTABLE}") + else() + message(WARNING "Git not found. Build info will not be accurate.") + endif() +endif() + +# Get the commit count and hash +if(Git_FOUND) + execute_process( + COMMAND ${GIT_EXECUTABLE} rev-parse --short HEAD + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + OUTPUT_VARIABLE HEAD + OUTPUT_STRIP_TRAILING_WHITESPACE + RESULT_VARIABLE RES + ) + if (RES EQUAL 0) + set(BUILD_COMMIT ${HEAD}) + endif() + execute_process( + COMMAND ${GIT_EXECUTABLE} rev-list --count HEAD + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + OUTPUT_VARIABLE COUNT + OUTPUT_STRIP_TRAILING_WHITESPACE + RESULT_VARIABLE RES + ) + if (RES EQUAL 0) + set(BUILD_NUMBER ${COUNT}) + endif() +endif() + +if(MSVC) + set(BUILD_COMPILER "${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}") + set(BUILD_TARGET ${CMAKE_VS_PLATFORM_NAME}) +else() + execute_process( + COMMAND sh -c "\"$@\" --version | head -1" _ ${CMAKE_C_COMPILER} + OUTPUT_VARIABLE OUT + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + set(BUILD_COMPILER ${OUT}) + execute_process( + COMMAND ${CMAKE_C_COMPILER} -dumpmachine + OUTPUT_VARIABLE OUT + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + set(BUILD_TARGET ${OUT}) +endif() diff --git a/llama.cpp/cmake/common.cmake b/llama.cpp/cmake/common.cmake new file mode 100644 index 0000000000000000000000000000000000000000..0f54871e4143dcd7bbab3b6ae0cb19b8d1a80a13 --- /dev/null +++ b/llama.cpp/cmake/common.cmake @@ -0,0 +1,33 @@ +function(llama_add_compile_flags) + if (LLAMA_FATAL_WARNINGS) + if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") + list(APPEND C_FLAGS -Werror) + list(APPEND CXX_FLAGS -Werror) + elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + add_compile_options(/WX) + endif() + endif() + + if (LLAMA_ALL_WARNINGS) + if (NOT MSVC) + list(APPEND C_FLAGS -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes + -Werror=implicit-int -Werror=implicit-function-declaration) + + list(APPEND CXX_FLAGS -Wmissing-declarations -Wmissing-noreturn) + + list(APPEND WARNING_FLAGS -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function) + + list(APPEND C_FLAGS ${WARNING_FLAGS}) + list(APPEND CXX_FLAGS ${WARNING_FLAGS}) + + ggml_get_flags(${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}) + + add_compile_options("$<$:${C_FLAGS};${GF_C_FLAGS}>" + "$<$:${CXX_FLAGS};${GF_CXX_FLAGS}>") + else() + # todo : msvc + set(C_FLAGS "" PARENT_SCOPE) + set(CXX_FLAGS "" PARENT_SCOPE) + endif() + endif() +endfunction() diff --git a/llama.cpp/cmake/git-vars.cmake b/llama.cpp/cmake/git-vars.cmake new file mode 100644 index 0000000000000000000000000000000000000000..1a4c24ebf6adeb1126e626f56de601621179353d --- /dev/null +++ b/llama.cpp/cmake/git-vars.cmake @@ -0,0 +1,22 @@ +find_package(Git) + +# the commit's SHA1 +execute_process(COMMAND + "${GIT_EXECUTABLE}" describe --match=NeVeRmAtCh --always --abbrev=8 + WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" + OUTPUT_VARIABLE GIT_SHA1 + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + +# the date of the commit +execute_process(COMMAND + "${GIT_EXECUTABLE}" log -1 --format=%ad --date=local + WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" + OUTPUT_VARIABLE GIT_DATE + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + +# the subject of the commit +execute_process(COMMAND + "${GIT_EXECUTABLE}" log -1 --format=%s + WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" + OUTPUT_VARIABLE GIT_COMMIT_SUBJECT + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) diff --git a/llama.cpp/cmake/llama-config.cmake.in b/llama.cpp/cmake/llama-config.cmake.in new file mode 100644 index 0000000000000000000000000000000000000000..90cbec5b6f1331b9149a5ae3aa03fea45d69246d --- /dev/null +++ b/llama.cpp/cmake/llama-config.cmake.in @@ -0,0 +1,30 @@ +set(LLAMA_VERSION @LLAMA_INSTALL_VERSION@) +set(LLAMA_BUILD_COMMIT @LLAMA_BUILD_COMMIT@) +set(LLAMA_BUILD_NUMBER @LLAMA_BUILD_NUMBER@) +set(LLAMA_SHARED_LIB @BUILD_SHARED_LIBS@) + +@PACKAGE_INIT@ + +set_and_check(LLAMA_INCLUDE_DIR "@PACKAGE_LLAMA_INCLUDE_INSTALL_DIR@") +set_and_check(LLAMA_LIB_DIR "@PACKAGE_LLAMA_LIB_INSTALL_DIR@") +set_and_check(LLAMA_BIN_DIR "@PACKAGE_LLAMA_BIN_INSTALL_DIR@") + +find_package(ggml REQUIRED HINTS ${LLAMA_LIB_DIR}/cmake) + +find_library(llama_LIBRARY llama + REQUIRED + HINTS ${LLAMA_LIB_DIR} + NO_CMAKE_FIND_ROOT_PATH +) + +add_library(llama UNKNOWN IMPORTED) +set_target_properties(llama + PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${LLAMA_INCLUDE_DIR}" + INTERFACE_LINK_LIBRARIES "ggml::ggml;ggml::ggml-base;" + IMPORTED_LINK_INTERFACE_LANGUAGES "CXX" + IMPORTED_LOCATION "${llama_LIBRARY}" + INTERFACE_COMPILE_FEATURES c_std_90 + POSITION_INDEPENDENT_CODE ON) + +check_required_components(Llama) diff --git a/llama.cpp/cmake/llama.pc.in b/llama.cpp/cmake/llama.pc.in new file mode 100644 index 0000000000000000000000000000000000000000..6fb58b5f6881b7f204d9094a8f2d3ed1d93906c2 --- /dev/null +++ b/llama.cpp/cmake/llama.pc.in @@ -0,0 +1,10 @@ +prefix=@CMAKE_INSTALL_PREFIX@ +exec_prefix=@CMAKE_INSTALL_PREFIX@ +libdir=@CMAKE_INSTALL_FULL_LIBDIR@ +includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@ + +Name: llama +Description: Port of Facebook's LLaMA model in C/C++ +Version: @LLAMA_INSTALL_VERSION@ +Libs: -L${libdir} -lggml -lggml-base -lllama +Cflags: -I${includedir} diff --git a/llama.cpp/cmake/x64-windows-llvm.cmake b/llama.cpp/cmake/x64-windows-llvm.cmake new file mode 100644 index 0000000000000000000000000000000000000000..0603d738fbef06233a9d3e4665c8c75384d7478c --- /dev/null +++ b/llama.cpp/cmake/x64-windows-llvm.cmake @@ -0,0 +1,11 @@ +set( CMAKE_SYSTEM_NAME Windows ) +set( CMAKE_SYSTEM_PROCESSOR x86_64 ) + +set( CMAKE_C_COMPILER clang ) +set( CMAKE_CXX_COMPILER clang++ ) + +set( arch_c_flags "-march=native" ) + +set( CMAKE_C_FLAGS_INIT "${arch_c_flags}" ) +set( CMAKE_CXX_FLAGS_INIT "${arch_c_flags}" ) + diff --git a/llama.cpp/common/CMakeLists.txt b/llama.cpp/common/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..e61015d2ad7f9515ca8469f2fc1e48a847d0eea8 --- /dev/null +++ b/llama.cpp/common/CMakeLists.txt @@ -0,0 +1,124 @@ +# common + +find_package(Threads REQUIRED) + +llama_add_compile_flags() + +# Build info header +# + +if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/../.git") + set(GIT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../.git") + + # Is git submodule + if(NOT IS_DIRECTORY "${GIT_DIR}") + file(READ ${GIT_DIR} REAL_GIT_DIR_LINK) + string(REGEX REPLACE "gitdir: (.*)\n$" "\\1" REAL_GIT_DIR ${REAL_GIT_DIR_LINK}) + string(FIND "${REAL_GIT_DIR}" "/" SLASH_POS) + if (SLASH_POS EQUAL 0) + set(GIT_DIR "${REAL_GIT_DIR}") + else() + set(GIT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../${REAL_GIT_DIR}") + endif() + endif() + + if(EXISTS "${GIT_DIR}/index") + set(GIT_INDEX "${GIT_DIR}/index") + else() + message(WARNING "Git index not found in git repository.") + set(GIT_INDEX "") + endif() +else() + message(WARNING "Git repository not found; to enable automatic generation of build info, make sure Git is installed and the project is a Git repository.") + set(GIT_INDEX "") +endif() + +# Add a custom command to rebuild build-info.cpp when .git/index changes +add_custom_command( + OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/build-info.cpp" + COMMENT "Generating build details from Git" + COMMAND ${CMAKE_COMMAND} -DMSVC=${MSVC} -DCMAKE_C_COMPILER_VERSION=${CMAKE_C_COMPILER_VERSION} + -DCMAKE_C_COMPILER_ID=${CMAKE_C_COMPILER_ID} -DCMAKE_VS_PLATFORM_NAME=${CMAKE_VS_PLATFORM_NAME} + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -P "${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info-gen-cpp.cmake" + WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/.." + DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/build-info.cpp.in" ${GIT_INDEX} + VERBATIM +) +set(TARGET build_info) +add_library(${TARGET} OBJECT build-info.cpp) +if (BUILD_SHARED_LIBS) + set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON) +endif() + +set(TARGET common) + +add_library(${TARGET} STATIC + arg.cpp + arg.h + base64.hpp + chat.cpp + chat.hpp + chat-template.hpp + common.cpp + common.h + console.cpp + console.h + json-schema-to-grammar.cpp + json.hpp + llguidance.cpp + log.cpp + log.h + minja.hpp + ngram-cache.cpp + ngram-cache.h + sampling.cpp + sampling.h + speculative.cpp + speculative.h + ) + +if (BUILD_SHARED_LIBS) + set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON) +endif() + +set(LLAMA_COMMON_EXTRA_LIBS build_info) + +# Use curl to download model url +if (LLAMA_CURL) + find_package(CURL REQUIRED) + target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_CURL) + include_directories(${CURL_INCLUDE_DIRS}) + find_library(CURL_LIBRARY curl REQUIRED) + set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} ${CURL_LIBRARY}) +endif () + +if (LLAMA_LLGUIDANCE) + include(ExternalProject) + set(LLGUIDANCE_SRC ${CMAKE_BINARY_DIR}/llguidance/source) + set(LLGUIDANCE_PATH ${LLGUIDANCE_SRC}/target/release) + ExternalProject_Add(llguidance_ext + GIT_REPOSITORY https://github.com/guidance-ai/llguidance + # v0.6.12: + GIT_TAG ced1c9023d47ec194fa977932d35ce65c2ebfc09 + PREFIX ${CMAKE_BINARY_DIR}/llguidance + SOURCE_DIR ${LLGUIDANCE_SRC} + BUILD_IN_SOURCE TRUE + CONFIGURE_COMMAND "" + BUILD_COMMAND cargo build --release + INSTALL_COMMAND "" + BUILD_BYPRODUCTS ${LLGUIDANCE_PATH}/libllguidance.a ${LLGUIDANCE_PATH}/llguidance.h + UPDATE_COMMAND "" + ) + target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_LLGUIDANCE) + + add_library(llguidance STATIC IMPORTED) + set_target_properties(llguidance PROPERTIES IMPORTED_LOCATION ${LLGUIDANCE_PATH}/libllguidance.a) + add_dependencies(llguidance llguidance_ext) + + target_include_directories(${TARGET} PRIVATE ${LLGUIDANCE_PATH}) + set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance) +endif () + +target_include_directories(${TARGET} PUBLIC .) +target_compile_features (${TARGET} PUBLIC cxx_std_17) +target_link_libraries (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads) diff --git a/llama.cpp/common/arg.cpp b/llama.cpp/common/arg.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f3027af4db4d85cf28e6a0dc99a858e01e424b83 --- /dev/null +++ b/llama.cpp/common/arg.cpp @@ -0,0 +1,2377 @@ +#include "arg.h" + +#include "log.h" +#include "sampling.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "json-schema-to-grammar.h" + +using json = nlohmann::ordered_json; + +common_arg & common_arg::set_examples(std::initializer_list examples) { + this->examples = std::move(examples); + return *this; +} + +common_arg & common_arg::set_excludes(std::initializer_list excludes) { + this->excludes = std::move(excludes); + return *this; +} + +common_arg & common_arg::set_env(const char * env) { + help = help + "\n(env: " + env + ")"; + this->env = env; + return *this; +} + +common_arg & common_arg::set_sparam() { + is_sparam = true; + return *this; +} + +bool common_arg::in_example(enum llama_example ex) { + return examples.find(ex) != examples.end(); +} + +bool common_arg::is_exclude(enum llama_example ex) { + return excludes.find(ex) != excludes.end(); +} + +bool common_arg::get_value_from_env(std::string & output) { + if (env == nullptr) return false; + char * value = std::getenv(env); + if (value) { + output = value; + return true; + } + return false; +} + +bool common_arg::has_value_from_env() { + return env != nullptr && std::getenv(env); +} + +static std::vector break_str_into_lines(std::string input, size_t max_char_per_line) { + std::vector result; + std::istringstream iss(input); + std::string line; + auto add_line = [&](const std::string& l) { + if (l.length() <= max_char_per_line) { + result.push_back(l); + } else { + std::istringstream line_stream(l); + std::string word, current_line; + while (line_stream >> word) { + if (current_line.length() + !current_line.empty() + word.length() > max_char_per_line) { + if (!current_line.empty()) result.push_back(current_line); + current_line = word; + } else { + current_line += (!current_line.empty() ? " " : "") + word; + } + } + if (!current_line.empty()) result.push_back(current_line); + } + }; + while (std::getline(iss, line)) { + add_line(line); + } + return result; +} + +std::string common_arg::to_string() { + // params for printing to console + const static int n_leading_spaces = 40; + const static int n_char_per_line_help = 70; // TODO: detect this based on current console + std::string leading_spaces(n_leading_spaces, ' '); + + std::ostringstream ss; + for (const auto arg : args) { + if (arg == args.front()) { + if (args.size() == 1) { + ss << arg; + } else { + // first arg is usually abbreviation, we need padding to make it more beautiful + auto tmp = std::string(arg) + ", "; + auto spaces = std::string(std::max(0, 7 - (int)tmp.size()), ' '); + ss << tmp << spaces; + } + } else { + ss << arg << (arg != args.back() ? ", " : ""); + } + } + if (value_hint) ss << " " << value_hint; + if (value_hint_2) ss << " " << value_hint_2; + if (ss.tellp() > n_leading_spaces - 3) { + // current line is too long, add new line + ss << "\n" << leading_spaces; + } else { + // padding between arg and help, same line + ss << std::string(leading_spaces.size() - ss.tellp(), ' '); + } + const auto help_lines = break_str_into_lines(help, n_char_per_line_help); + for (const auto & line : help_lines) { + ss << (&line == &help_lines.front() ? "" : leading_spaces) << line << "\n"; + } + return ss.str(); +} + +// +// utils +// + +static void common_params_handle_model_default( + std::string & model, + const std::string & model_url, + std::string & hf_repo, + std::string & hf_file, + const std::string & hf_token, + const std::string & model_default) { + if (!hf_repo.empty()) { + // short-hand to avoid specifying --hf-file -> default it to --model + if (hf_file.empty()) { + if (model.empty()) { + auto auto_detected = common_get_hf_file(hf_repo, hf_token); + if (auto_detected.first.empty() || auto_detected.second.empty()) { + exit(1); // built without CURL, error message already printed + } + hf_repo = auto_detected.first; + hf_file = auto_detected.second; + } else { + hf_file = model; + } + } + // make sure model path is present (for caching purposes) + if (model.empty()) { + // this is to avoid different repo having same file name, or same file name in different subdirs + std::string filename = hf_repo + "_" + hf_file; + // to make sure we don't have any slashes in the filename + string_replace_all(filename, "/", "_"); + model = fs_get_cache_file(filename); + } + } else if (!model_url.empty()) { + if (model.empty()) { + auto f = string_split(model_url, '#').front(); + f = string_split(f, '?').front(); + model = fs_get_cache_file(string_split(f, '/').back()); + } + } else if (model.empty()) { + model = model_default; + } +} + +const std::vector kv_cache_types = { + GGML_TYPE_F32, + GGML_TYPE_F16, + GGML_TYPE_BF16, + GGML_TYPE_Q8_0, + GGML_TYPE_Q4_0, + GGML_TYPE_Q4_1, + GGML_TYPE_IQ4_NL, + GGML_TYPE_Q5_0, + GGML_TYPE_Q5_1, +}; + +static ggml_type kv_cache_type_from_str(const std::string & s) { + for (const auto & type : kv_cache_types) { + if (ggml_type_name(type) == s) { + return type; + } + } + throw std::runtime_error("Unsupported cache type: " + s); +} + +static std::string get_all_kv_cache_types() { + std::ostringstream msg; + for (const auto & type : kv_cache_types) { + msg << ggml_type_name(type) << (&type == &kv_cache_types.back() ? "" : ", "); + } + return msg.str(); +} + +// +// CLI argument parsing functions +// + +static bool common_params_parse_ex(int argc, char ** argv, common_params_context & ctx_arg) { + std::string arg; + const std::string arg_prefix = "--"; + common_params & params = ctx_arg.params; + + std::unordered_map arg_to_options; + for (auto & opt : ctx_arg.options) { + for (const auto & arg : opt.args) { + arg_to_options[arg] = &opt; + } + } + + // handle environment variables + for (auto & opt : ctx_arg.options) { + std::string value; + if (opt.get_value_from_env(value)) { + try { + if (opt.handler_void && (value == "1" || value == "true")) { + opt.handler_void(params); + } + if (opt.handler_int) { + opt.handler_int(params, std::stoi(value)); + } + if (opt.handler_string) { + opt.handler_string(params, value); + continue; + } + } catch (std::exception & e) { + throw std::invalid_argument(string_format( + "error while handling environment variable \"%s\": %s\n\n", opt.env, e.what())); + } + } + } + + // handle command line arguments + auto check_arg = [&](int i) { + if (i+1 >= argc) { + throw std::invalid_argument("expected value for argument"); + } + }; + + for (int i = 1; i < argc; i++) { + const std::string arg_prefix = "--"; + + std::string arg = argv[i]; + if (arg.compare(0, arg_prefix.size(), arg_prefix) == 0) { + std::replace(arg.begin(), arg.end(), '_', '-'); + } + if (arg_to_options.find(arg) == arg_to_options.end()) { + throw std::invalid_argument(string_format("error: invalid argument: %s", arg.c_str())); + } + auto opt = *arg_to_options[arg]; + if (opt.has_value_from_env()) { + fprintf(stderr, "warn: %s environment variable is set, but will be overwritten by command line argument %s\n", opt.env, arg.c_str()); + } + try { + if (opt.handler_void) { + opt.handler_void(params); + continue; + } + + // arg with single value + check_arg(i); + std::string val = argv[++i]; + if (opt.handler_int) { + opt.handler_int(params, std::stoi(val)); + continue; + } + if (opt.handler_string) { + opt.handler_string(params, val); + continue; + } + + // arg with 2 values + check_arg(i); + std::string val2 = argv[++i]; + if (opt.handler_str_str) { + opt.handler_str_str(params, val, val2); + continue; + } + } catch (std::exception & e) { + throw std::invalid_argument(string_format( + "error while handling argument \"%s\": %s\n\n" + "usage:\n%s\n\nto show complete usage, run with -h", + arg.c_str(), e.what(), arg_to_options[arg]->to_string().c_str())); + } + } + + postprocess_cpu_params(params.cpuparams, nullptr); + postprocess_cpu_params(params.cpuparams_batch, ¶ms.cpuparams); + + postprocess_cpu_params(params.speculative.cpuparams, ¶ms.cpuparams); + postprocess_cpu_params(params.speculative.cpuparams_batch, ¶ms.cpuparams_batch); + + if (params.prompt_cache_all && (params.interactive || params.interactive_first)) { + throw std::invalid_argument("error: --prompt-cache-all not supported in interactive mode yet\n"); + } + + // TODO: refactor model params in a common struct + common_params_handle_model_default(params.model, params.model_url, params.hf_repo, params.hf_file, params.hf_token, DEFAULT_MODEL_PATH); + common_params_handle_model_default(params.speculative.model, params.speculative.model_url, params.speculative.hf_repo, params.speculative.hf_file, params.hf_token, ""); + common_params_handle_model_default(params.vocoder.model, params.vocoder.model_url, params.vocoder.hf_repo, params.vocoder.hf_file, params.hf_token, ""); + + if (params.escape) { + string_process_escapes(params.prompt); + string_process_escapes(params.input_prefix); + string_process_escapes(params.input_suffix); + for (auto & antiprompt : params.antiprompt) { + string_process_escapes(antiprompt); + } + for (auto & seq_breaker : params.sampling.dry_sequence_breakers) { + string_process_escapes(seq_breaker); + } + } + + if (!params.kv_overrides.empty()) { + params.kv_overrides.emplace_back(); + params.kv_overrides.back().key[0] = 0; + } + + if (params.reranking && params.embedding) { + throw std::invalid_argument("error: either --embedding or --reranking can be specified, but not both"); + } + + if (!params.chat_template.empty() && !common_chat_verify_template(params.chat_template, params.use_jinja)) { + throw std::runtime_error(string_format( + "error: the supplied chat template is not supported: %s%s\n", + params.chat_template.c_str(), + params.use_jinja ? "" : "\nnote: llama.cpp was started without --jinja, we only support commonly used templates" + )); + } + + return true; +} + +static void common_params_print_usage(common_params_context & ctx_arg) { + auto print_options = [](std::vector & options) { + for (common_arg * opt : options) { + printf("%s", opt->to_string().c_str()); + } + }; + + std::vector common_options; + std::vector sparam_options; + std::vector specific_options; + for (auto & opt : ctx_arg.options) { + // in case multiple LLAMA_EXAMPLE_* are set, we prioritize the LLAMA_EXAMPLE_* matching current example + if (opt.is_sparam) { + sparam_options.push_back(&opt); + } else if (opt.in_example(ctx_arg.ex)) { + specific_options.push_back(&opt); + } else { + common_options.push_back(&opt); + } + } + printf("----- common params -----\n\n"); + print_options(common_options); + printf("\n\n----- sampling params -----\n\n"); + print_options(sparam_options); + // TODO: maybe convert enum llama_example to string + printf("\n\n----- example-specific params -----\n\n"); + print_options(specific_options); +} + +static std::vector parse_device_list(const std::string & value) { + std::vector devices; + auto dev_names = string_split(value, ','); + if (dev_names.empty()) { + throw std::invalid_argument("no devices specified"); + } + if (dev_names.size() == 1 && dev_names[0] == "none") { + devices.push_back(nullptr); + } else { + for (const auto & device : dev_names) { + auto * dev = ggml_backend_dev_by_name(device.c_str()); + if (!dev || ggml_backend_dev_type(dev) != GGML_BACKEND_DEVICE_TYPE_GPU) { + throw std::invalid_argument(string_format("invalid device: %s", device.c_str())); + } + devices.push_back(dev); + } + devices.push_back(nullptr); + } + return devices; +} + +static void add_rpc_devices(std::string servers) { + auto rpc_servers = string_split(servers, ','); + if (rpc_servers.empty()) { + throw std::invalid_argument("no RPC servers specified"); + } + ggml_backend_reg_t rpc_reg = ggml_backend_reg_by_name("RPC"); + if (!rpc_reg) { + throw std::invalid_argument("failed to find RPC backend"); + } + typedef ggml_backend_dev_t (*ggml_backend_rpc_add_device_t)(const char * endpoint); + ggml_backend_rpc_add_device_t ggml_backend_rpc_add_device_fn = (ggml_backend_rpc_add_device_t) ggml_backend_reg_get_proc_address(rpc_reg, "ggml_backend_rpc_add_device"); + if (!ggml_backend_rpc_add_device_fn) { + throw std::invalid_argument("failed to find RPC device add function"); + } + for (const auto & server : rpc_servers) { + ggml_backend_dev_t dev = ggml_backend_rpc_add_device_fn(server.c_str()); + if (dev) { + ggml_backend_device_register(dev); + } else { + throw std::invalid_argument("failed to register RPC device"); + } + } +} + +bool common_params_parse(int argc, char ** argv, common_params & params, llama_example ex, void(*print_usage)(int, char **)) { + auto ctx_arg = common_params_parser_init(params, ex, print_usage); + const common_params params_org = ctx_arg.params; // the example can modify the default params + + try { + if (!common_params_parse_ex(argc, argv, ctx_arg)) { + ctx_arg.params = params_org; + return false; + } + if (ctx_arg.params.usage) { + common_params_print_usage(ctx_arg); + if (ctx_arg.print_usage) { + ctx_arg.print_usage(argc, argv); + } + exit(0); + } + } catch (const std::invalid_argument & ex) { + fprintf(stderr, "%s\n", ex.what()); + ctx_arg.params = params_org; + return false; + } + + return true; +} + +static std::string list_builtin_chat_templates() { + std::vector supported_tmpl; + int32_t res = llama_chat_builtin_templates(nullptr, 0); + supported_tmpl.resize(res); + res = llama_chat_builtin_templates(supported_tmpl.data(), supported_tmpl.size()); + std::ostringstream msg; + for (auto & tmpl : supported_tmpl) { + msg << tmpl << (&tmpl == &supported_tmpl.back() ? "" : ", "); + } + return msg.str(); +} + +common_params_context common_params_parser_init(common_params & params, llama_example ex, void(*print_usage)(int, char **)) { + // load dynamic backends + ggml_backend_load_all(); + + common_params_context ctx_arg(params); + ctx_arg.print_usage = print_usage; + ctx_arg.ex = ex; + + std::string sampler_type_chars; + std::string sampler_type_names; + for (const auto & sampler : params.sampling.samplers) { + sampler_type_chars += common_sampler_type_to_chr(sampler); + sampler_type_names += common_sampler_type_to_str(sampler) + ";"; + } + sampler_type_names.pop_back(); + + + /** + * filter options by example + * rules: + * - all examples inherit options from LLAMA_EXAMPLE_COMMON + * - if LLAMA_EXAMPLE_* is set (other than COMMON), we only show the option in the corresponding example + * - if both {LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_*,} are set, we will prioritize the LLAMA_EXAMPLE_* matching current example + */ + auto add_opt = [&](common_arg arg) { + if ((arg.in_example(ex) || arg.in_example(LLAMA_EXAMPLE_COMMON)) && !arg.is_exclude(ex)) { + ctx_arg.options.push_back(std::move(arg)); + } + }; + + + add_opt(common_arg( + {"-h", "--help", "--usage"}, + "print usage and exit", + [](common_params & params) { + params.usage = true; + } + )); + add_opt(common_arg( + {"--version"}, + "show version and build info", + [](common_params &) { + fprintf(stderr, "version: %d (%s)\n", LLAMA_BUILD_NUMBER, LLAMA_COMMIT); + fprintf(stderr, "built with %s for %s\n", LLAMA_COMPILER, LLAMA_BUILD_TARGET); + exit(0); + } + )); + add_opt(common_arg( + {"--verbose-prompt"}, + string_format("print a verbose prompt before generation (default: %s)", params.verbose_prompt ? "true" : "false"), + [](common_params & params) { + params.verbose_prompt = true; + } + )); + add_opt(common_arg( + {"--no-display-prompt"}, + string_format("don't print prompt at generation (default: %s)", !params.display_prompt ? "true" : "false"), + [](common_params & params) { + params.display_prompt = false; + } + ).set_examples({LLAMA_EXAMPLE_MAIN})); + add_opt(common_arg( + {"-co", "--color"}, + string_format("colorise output to distinguish prompt and user input from generations (default: %s)", params.use_color ? "true" : "false"), + [](common_params & params) { + params.use_color = true; + } + ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_INFILL, LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_LOOKUP})); + add_opt(common_arg( + {"-t", "--threads"}, "N", + string_format("number of threads to use during generation (default: %d)", params.cpuparams.n_threads), + [](common_params & params, int value) { + params.cpuparams.n_threads = value; + if (params.cpuparams.n_threads <= 0) { + params.cpuparams.n_threads = std::thread::hardware_concurrency(); + } + } + ).set_env("LLAMA_ARG_THREADS")); + add_opt(common_arg( + {"-tb", "--threads-batch"}, "N", + "number of threads to use during batch and prompt processing (default: same as --threads)", + [](common_params & params, int value) { + params.cpuparams_batch.n_threads = value; + if (params.cpuparams_batch.n_threads <= 0) { + params.cpuparams_batch.n_threads = std::thread::hardware_concurrency(); + } + } + )); + add_opt(common_arg( + {"-C", "--cpu-mask"}, "M", + "CPU affinity mask: arbitrarily long hex. Complements cpu-range (default: \"\")", + [](common_params & params, const std::string & mask) { + params.cpuparams.mask_valid = true; + if (!parse_cpu_mask(mask, params.cpuparams.cpumask)) { + throw std::invalid_argument("invalid cpumask"); + } + } + )); + add_opt(common_arg( + {"-Cr", "--cpu-range"}, "lo-hi", + "range of CPUs for affinity. Complements --cpu-mask", + [](common_params & params, const std::string & range) { + params.cpuparams.mask_valid = true; + if (!parse_cpu_range(range, params.cpuparams.cpumask)) { + throw std::invalid_argument("invalid range"); + } + } + )); + add_opt(common_arg( + {"--cpu-strict"}, "<0|1>", + string_format("use strict CPU placement (default: %u)\n", (unsigned) params.cpuparams.strict_cpu), + [](common_params & params, const std::string & value) { + params.cpuparams.strict_cpu = std::stoul(value); + } + )); + add_opt(common_arg( + {"--prio"}, "N", + string_format("set process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: %d)\n", params.cpuparams.priority), + [](common_params & params, int prio) { + if (prio < 0 || prio > 3) { + throw std::invalid_argument("invalid value"); + } + params.cpuparams.priority = (enum ggml_sched_priority) prio; + } + )); + add_opt(common_arg( + {"--poll"}, "<0...100>", + string_format("use polling level to wait for work (0 - no polling, default: %u)\n", (unsigned) params.cpuparams.poll), + [](common_params & params, const std::string & value) { + params.cpuparams.poll = std::stoul(value); + } + )); + add_opt(common_arg( + {"-Cb", "--cpu-mask-batch"}, "M", + "CPU affinity mask: arbitrarily long hex. Complements cpu-range-batch (default: same as --cpu-mask)", + [](common_params & params, const std::string & mask) { + params.cpuparams_batch.mask_valid = true; + if (!parse_cpu_mask(mask, params.cpuparams_batch.cpumask)) { + throw std::invalid_argument("invalid cpumask"); + } + } + )); + add_opt(common_arg( + {"-Crb", "--cpu-range-batch"}, "lo-hi", + "ranges of CPUs for affinity. Complements --cpu-mask-batch", + [](common_params & params, const std::string & range) { + params.cpuparams_batch.mask_valid = true; + if (!parse_cpu_range(range, params.cpuparams_batch.cpumask)) { + throw std::invalid_argument("invalid range"); + } + } + )); + add_opt(common_arg( + {"--cpu-strict-batch"}, "<0|1>", + "use strict CPU placement (default: same as --cpu-strict)", + [](common_params & params, int value) { + params.cpuparams_batch.strict_cpu = value; + } + )); + add_opt(common_arg( + {"--prio-batch"}, "N", + string_format("set process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: %d)\n", params.cpuparams_batch.priority), + [](common_params & params, int prio) { + if (prio < 0 || prio > 3) { + throw std::invalid_argument("invalid value"); + } + params.cpuparams_batch.priority = (enum ggml_sched_priority) prio; + } + )); + add_opt(common_arg( + {"--poll-batch"}, "<0|1>", + "use polling to wait for work (default: same as --poll)", + [](common_params & params, int value) { + params.cpuparams_batch.poll = value; + } + )); + add_opt(common_arg( + {"-lcs", "--lookup-cache-static"}, "FNAME", + "path to static lookup cache to use for lookup decoding (not updated by generation)", + [](common_params & params, const std::string & value) { + params.lookup_cache_static = value; + } + ).set_examples({LLAMA_EXAMPLE_LOOKUP})); + add_opt(common_arg( + {"-lcd", "--lookup-cache-dynamic"}, "FNAME", + "path to dynamic lookup cache to use for lookup decoding (updated by generation)", + [](common_params & params, const std::string & value) { + params.lookup_cache_dynamic = value; + } + ).set_examples({LLAMA_EXAMPLE_LOOKUP})); + add_opt(common_arg( + {"-c", "--ctx-size"}, "N", + string_format("size of the prompt context (default: %d, 0 = loaded from model)", params.n_ctx), + [](common_params & params, int value) { + params.n_ctx = value; + } + ).set_env("LLAMA_ARG_CTX_SIZE")); + add_opt(common_arg( + {"-n", "--predict", "--n-predict"}, "N", + string_format("number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)", params.n_predict), + [](common_params & params, int value) { + params.n_predict = value; + } + ).set_env("LLAMA_ARG_N_PREDICT")); + add_opt(common_arg( + {"-b", "--batch-size"}, "N", + string_format("logical maximum batch size (default: %d)", params.n_batch), + [](common_params & params, int value) { + params.n_batch = value; + } + ).set_env("LLAMA_ARG_BATCH")); + add_opt(common_arg( + {"-ub", "--ubatch-size"}, "N", + string_format("physical maximum batch size (default: %d)", params.n_ubatch), + [](common_params & params, int value) { + params.n_ubatch = value; + } + ).set_env("LLAMA_ARG_UBATCH")); + add_opt(common_arg( + {"--keep"}, "N", + string_format("number of tokens to keep from the initial prompt (default: %d, -1 = all)", params.n_keep), + [](common_params & params, int value) { + params.n_keep = value; + } + )); + add_opt(common_arg( + {"--no-context-shift"}, + string_format("disables context shift on infinite text generation (default: %s)", params.ctx_shift ? "disabled" : "enabled"), + [](common_params & params) { + params.ctx_shift = false; + } + ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_IMATRIX, LLAMA_EXAMPLE_PERPLEXITY}).set_env("LLAMA_ARG_NO_CONTEXT_SHIFT")); + add_opt(common_arg( + {"--chunks"}, "N", + string_format("max number of chunks to process (default: %d, -1 = all)", params.n_chunks), + [](common_params & params, int value) { + params.n_chunks = value; + } + ).set_examples({LLAMA_EXAMPLE_IMATRIX, LLAMA_EXAMPLE_PERPLEXITY, LLAMA_EXAMPLE_RETRIEVAL})); + add_opt(common_arg( + {"-fa", "--flash-attn"}, + string_format("enable Flash Attention (default: %s)", params.flash_attn ? "enabled" : "disabled"), + [](common_params & params) { + params.flash_attn = true; + } + ).set_env("LLAMA_ARG_FLASH_ATTN")); + add_opt(common_arg( + {"-p", "--prompt"}, "PROMPT", + ex == LLAMA_EXAMPLE_MAIN + ? "prompt to start generation with\nif -cnv is set, this will be used as system prompt" + : "prompt to start generation with", + [](common_params & params, const std::string & value) { + params.prompt = value; + } + ).set_excludes({LLAMA_EXAMPLE_SERVER})); + add_opt(common_arg( + {"--no-perf"}, + string_format("disable internal libllama performance timings (default: %s)", params.no_perf ? "true" : "false"), + [](common_params & params) { + params.no_perf = true; + params.sampling.no_perf = true; + } + ).set_env("LLAMA_ARG_NO_PERF")); + add_opt(common_arg( + {"-f", "--file"}, "FNAME", + "a file containing the prompt (default: none)", + [](common_params & params, const std::string & value) { + std::ifstream file(value); + if (!file) { + throw std::runtime_error(string_format("error: failed to open file '%s'\n", value.c_str())); + } + // store the external file name in params + params.prompt_file = value; + std::copy(std::istreambuf_iterator(file), std::istreambuf_iterator(), back_inserter(params.prompt)); + if (!params.prompt.empty() && params.prompt.back() == '\n') { + params.prompt.pop_back(); + } + } + ).set_excludes({LLAMA_EXAMPLE_SERVER})); + add_opt(common_arg( + {"--in-file"}, "FNAME", + "an input file (repeat to specify multiple files)", + [](common_params & params, const std::string & value) { + std::ifstream file(value); + if (!file) { + throw std::runtime_error(string_format("error: failed to open file '%s'\n", value.c_str())); + } + params.in_files.push_back(value); + } + ).set_examples({LLAMA_EXAMPLE_IMATRIX})); + add_opt(common_arg( + {"-bf", "--binary-file"}, "FNAME", + "binary file containing the prompt (default: none)", + [](common_params & params, const std::string & value) { + std::ifstream file(value, std::ios::binary); + if (!file) { + throw std::runtime_error(string_format("error: failed to open file '%s'\n", value.c_str())); + } + // store the external file name in params + params.prompt_file = value; + std::ostringstream ss; + ss << file.rdbuf(); + params.prompt = ss.str(); + fprintf(stderr, "Read %zu bytes from binary file %s\n", params.prompt.size(), value.c_str()); + } + ).set_excludes({LLAMA_EXAMPLE_SERVER})); + add_opt(common_arg( + {"-e", "--escape"}, + string_format("process escapes sequences (\\n, \\r, \\t, \\', \\\", \\\\) (default: %s)", params.escape ? "true" : "false"), + [](common_params & params) { + params.escape = true; + } + )); + add_opt(common_arg( + {"--no-escape"}, + "do not process escape sequences", + [](common_params & params) { + params.escape = false; + } + )); + add_opt(common_arg( + {"-ptc", "--print-token-count"}, "N", + string_format("print token count every N tokens (default: %d)", params.n_print), + [](common_params & params, int value) { + params.n_print = value; + } + ).set_examples({LLAMA_EXAMPLE_MAIN})); + add_opt(common_arg( + {"--prompt-cache"}, "FNAME", + "file to cache prompt state for faster startup (default: none)", + [](common_params & params, const std::string & value) { + params.path_prompt_cache = value; + } + ).set_examples({LLAMA_EXAMPLE_MAIN})); + add_opt(common_arg( + {"--prompt-cache-all"}, + "if specified, saves user input and generations to cache as well\n", + [](common_params & params) { + params.prompt_cache_all = true; + } + ).set_examples({LLAMA_EXAMPLE_MAIN})); + add_opt(common_arg( + {"--prompt-cache-ro"}, + "if specified, uses the prompt cache but does not update it", + [](common_params & params) { + params.prompt_cache_ro = true; + } + ).set_examples({LLAMA_EXAMPLE_MAIN})); + add_opt(common_arg( + {"-r", "--reverse-prompt"}, "PROMPT", + "halt generation at PROMPT, return control in interactive mode\n", + [](common_params & params, const std::string & value) { + params.antiprompt.emplace_back(value); + } + ).set_examples({LLAMA_EXAMPLE_MAIN})); + add_opt(common_arg( + {"-sp", "--special"}, + string_format("special tokens output enabled (default: %s)", params.special ? "true" : "false"), + [](common_params & params) { + params.special = true; + } + ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER})); + add_opt(common_arg( + {"-cnv", "--conversation"}, + "run in conversation mode:\n" + "- does not print special tokens and suffix/prefix\n" + "- interactive mode is also enabled\n" + "(default: auto enabled if chat template is available)", + [](common_params & params) { + params.conversation_mode = COMMON_CONVERSATION_MODE_ENABLED; + } + ).set_examples({LLAMA_EXAMPLE_MAIN})); + add_opt(common_arg( + {"-no-cnv", "--no-conversation"}, + "force disable conversation mode (default: false)", + [](common_params & params) { + params.conversation_mode = COMMON_CONVERSATION_MODE_DISABLED; + } + ).set_examples({LLAMA_EXAMPLE_MAIN})); + add_opt(common_arg( + {"-i", "--interactive"}, + string_format("run in interactive mode (default: %s)", params.interactive ? "true" : "false"), + [](common_params & params) { + params.interactive = true; + } + ).set_examples({LLAMA_EXAMPLE_MAIN})); + add_opt(common_arg( + {"-if", "--interactive-first"}, + string_format("run in interactive mode and wait for input right away (default: %s)", params.interactive_first ? "true" : "false"), + [](common_params & params) { + params.interactive_first = true; + } + ).set_examples({LLAMA_EXAMPLE_MAIN})); + add_opt(common_arg( + {"-mli", "--multiline-input"}, + "allows you to write or paste multiple lines without ending each in '\\'", + [](common_params & params) { + params.multiline_input = true; + } + ).set_examples({LLAMA_EXAMPLE_MAIN})); + add_opt(common_arg( + {"--in-prefix-bos"}, + "prefix BOS to user inputs, preceding the `--in-prefix` string", + [](common_params & params) { + params.input_prefix_bos = true; + params.enable_chat_template = false; + } + ).set_examples({LLAMA_EXAMPLE_MAIN})); + add_opt(common_arg( + {"--in-prefix"}, "STRING", + "string to prefix user inputs with (default: empty)", + [](common_params & params, const std::string & value) { + params.input_prefix = value; + params.enable_chat_template = false; + } + ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_INFILL})); + add_opt(common_arg( + {"--in-suffix"}, "STRING", + "string to suffix after user inputs with (default: empty)", + [](common_params & params, const std::string & value) { + params.input_suffix = value; + params.enable_chat_template = false; + } + ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_INFILL})); + add_opt(common_arg( + {"--no-warmup"}, + "skip warming up the model with an empty run", + [](common_params & params) { + params.warmup = false; + } + ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_EMBEDDING})); + add_opt(common_arg( + {"--spm-infill"}, + string_format( + "use Suffix/Prefix/Middle pattern for infill (instead of Prefix/Suffix/Middle) as some models prefer this. (default: %s)", + params.spm_infill ? "enabled" : "disabled" + ), + [](common_params & params) { + params.spm_infill = true; + } + ).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_INFILL})); + add_opt(common_arg( + {"--samplers"}, "SAMPLERS", + string_format("samplers that will be used for generation in the order, separated by \';\'\n(default: %s)", sampler_type_names.c_str()), + [](common_params & params, const std::string & value) { + const auto sampler_names = string_split(value, ';'); + params.sampling.samplers = common_sampler_types_from_names(sampler_names, true); + } + ).set_sparam()); + add_opt(common_arg( + {"-s", "--seed"}, "SEED", + string_format("RNG seed (default: %d, use random seed for %d)", params.sampling.seed, LLAMA_DEFAULT_SEED), + [](common_params & params, const std::string & value) { + params.sampling.seed = std::stoul(value); + } + ).set_sparam()); + add_opt(common_arg( + {"--sampling-seq", "--sampler-seq"}, "SEQUENCE", + string_format("simplified sequence for samplers that will be used (default: %s)", sampler_type_chars.c_str()), + [](common_params & params, const std::string & value) { + params.sampling.samplers = common_sampler_types_from_chars(value); + } + ).set_sparam()); + add_opt(common_arg( + {"--ignore-eos"}, + "ignore end of stream token and continue generating (implies --logit-bias EOS-inf)", + [](common_params & params) { + params.sampling.ignore_eos = true; + } + ).set_sparam()); + add_opt(common_arg( + {"--temp"}, "N", + string_format("temperature (default: %.1f)", (double)params.sampling.temp), + [](common_params & params, const std::string & value) { + params.sampling.temp = std::stof(value); + params.sampling.temp = std::max(params.sampling.temp, 0.0f); + } + ).set_sparam()); + add_opt(common_arg( + {"--top-k"}, "N", + string_format("top-k sampling (default: %d, 0 = disabled)", params.sampling.top_k), + [](common_params & params, int value) { + params.sampling.top_k = value; + } + ).set_sparam()); + add_opt(common_arg( + {"--top-p"}, "N", + string_format("top-p sampling (default: %.1f, 1.0 = disabled)", (double)params.sampling.top_p), + [](common_params & params, const std::string & value) { + params.sampling.top_p = std::stof(value); + } + ).set_sparam()); + add_opt(common_arg( + {"--min-p"}, "N", + string_format("min-p sampling (default: %.1f, 0.0 = disabled)", (double)params.sampling.min_p), + [](common_params & params, const std::string & value) { + params.sampling.min_p = std::stof(value); + } + ).set_sparam()); + add_opt(common_arg( + {"--top-nsigma"}, "N", + string_format("top-n-sigma sampling (default: %.1f, -1.0 = disabled)", params.sampling.top_n_sigma), + [](common_params & params, const std::string & value) { + params.sampling.top_n_sigma = std::stof(value); + } + ).set_examples({LLAMA_EXAMPLE_MAIN}).set_sparam()); + add_opt(common_arg( + {"--xtc-probability"}, "N", + string_format("xtc probability (default: %.1f, 0.0 = disabled)", (double)params.sampling.xtc_probability), + [](common_params & params, const std::string & value) { + params.sampling.xtc_probability = std::stof(value); + } + ).set_sparam()); + add_opt(common_arg( + {"--xtc-threshold"}, "N", + string_format("xtc threshold (default: %.1f, 1.0 = disabled)", (double)params.sampling.xtc_threshold), + [](common_params & params, const std::string & value) { + params.sampling.xtc_threshold = std::stof(value); + } + ).set_sparam()); + add_opt(common_arg( + {"--typical"}, "N", + string_format("locally typical sampling, parameter p (default: %.1f, 1.0 = disabled)", (double)params.sampling.typ_p), + [](common_params & params, const std::string & value) { + params.sampling.typ_p = std::stof(value); + } + ).set_sparam()); + add_opt(common_arg( + {"--repeat-last-n"}, "N", + string_format("last n tokens to consider for penalize (default: %d, 0 = disabled, -1 = ctx_size)", params.sampling.penalty_last_n), + [](common_params & params, int value) { + if (value < -1) { + throw std::runtime_error(string_format("error: invalid repeat-last-n = %d\n", value)); + } + params.sampling.penalty_last_n = value; + params.sampling.n_prev = std::max(params.sampling.n_prev, params.sampling.penalty_last_n); + } + ).set_sparam()); + add_opt(common_arg( + {"--repeat-penalty"}, "N", + string_format("penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)", (double)params.sampling.penalty_repeat), + [](common_params & params, const std::string & value) { + params.sampling.penalty_repeat = std::stof(value); + } + ).set_sparam()); + add_opt(common_arg( + {"--presence-penalty"}, "N", + string_format("repeat alpha presence penalty (default: %.1f, 0.0 = disabled)", (double)params.sampling.penalty_present), + [](common_params & params, const std::string & value) { + params.sampling.penalty_present = std::stof(value); + } + ).set_sparam()); + add_opt(common_arg( + {"--frequency-penalty"}, "N", + string_format("repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)", (double)params.sampling.penalty_freq), + [](common_params & params, const std::string & value) { + params.sampling.penalty_freq = std::stof(value); + } + ).set_sparam()); + add_opt(common_arg( + {"--dry-multiplier"}, "N", + string_format("set DRY sampling multiplier (default: %.1f, 0.0 = disabled)", (double)params.sampling.dry_multiplier), + [](common_params & params, const std::string & value) { + params.sampling.dry_multiplier = std::stof(value); + } + ).set_sparam()); + add_opt(common_arg( + {"--dry-base"}, "N", + string_format("set DRY sampling base value (default: %.2f)", (double)params.sampling.dry_base), + [](common_params & params, const std::string & value) { + float potential_base = std::stof(value); + if (potential_base >= 1.0f) + { + params.sampling.dry_base = potential_base; + } + } + ).set_sparam()); + add_opt(common_arg( + {"--dry-allowed-length"}, "N", + string_format("set allowed length for DRY sampling (default: %d)", params.sampling.dry_allowed_length), + [](common_params & params, int value) { + params.sampling.dry_allowed_length = value; + } + ).set_sparam()); + add_opt(common_arg( + {"--dry-penalty-last-n"}, "N", + string_format("set DRY penalty for the last n tokens (default: %d, 0 = disable, -1 = context size)", params.sampling.dry_penalty_last_n), + [](common_params & params, int value) { + if (value < -1) { + throw std::runtime_error(string_format("error: invalid dry-penalty-last-n = %d\n", value)); + } + params.sampling.dry_penalty_last_n = value; + } + ).set_sparam()); + add_opt(common_arg( + {"--dry-sequence-breaker"}, "STRING", + string_format("add sequence breaker for DRY sampling, clearing out default breakers (%s) in the process; use \"none\" to not use any sequence breakers\n", + params.sampling.dry_sequence_breakers.empty() ? "none" : + std::accumulate(std::next(params.sampling.dry_sequence_breakers.begin()), + params.sampling.dry_sequence_breakers.end(), + std::string("'") + (params.sampling.dry_sequence_breakers[0] == "\n" ? "\\n" : params.sampling.dry_sequence_breakers[0]) + "'", + [](const std::string& a, const std::string& b) { + std::string formatted_b = (b == "\n") ? "\\n" : b; + return a + ", '" + formatted_b + "'"; + }).c_str()), + [](common_params & params, const std::string & value) { + static bool defaults_cleared = false; + + if (!defaults_cleared) { + params.sampling.dry_sequence_breakers.clear(); + defaults_cleared = true; + } + + if (value == "none") { + params.sampling.dry_sequence_breakers.clear(); + } else { + params.sampling.dry_sequence_breakers.emplace_back(value); + } + } + ).set_sparam()); + add_opt(common_arg( + {"--dynatemp-range"}, "N", + string_format("dynamic temperature range (default: %.1f, 0.0 = disabled)", (double)params.sampling.dynatemp_range), + [](common_params & params, const std::string & value) { + params.sampling.dynatemp_range = std::stof(value); + } + ).set_sparam()); + add_opt(common_arg( + {"--dynatemp-exp"}, "N", + string_format("dynamic temperature exponent (default: %.1f)", (double)params.sampling.dynatemp_exponent), + [](common_params & params, const std::string & value) { + params.sampling.dynatemp_exponent = std::stof(value); + } + ).set_sparam()); + add_opt(common_arg( + {"--mirostat"}, "N", + string_format("use Mirostat sampling.\nTop K, Nucleus and Locally Typical samplers are ignored if used.\n" + "(default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)", params.sampling.mirostat), + [](common_params & params, int value) { + params.sampling.mirostat = value; + } + ).set_sparam()); + add_opt(common_arg( + {"--mirostat-lr"}, "N", + string_format("Mirostat learning rate, parameter eta (default: %.1f)", (double)params.sampling.mirostat_eta), + [](common_params & params, const std::string & value) { + params.sampling.mirostat_eta = std::stof(value); + } + ).set_sparam()); + add_opt(common_arg( + {"--mirostat-ent"}, "N", + string_format("Mirostat target entropy, parameter tau (default: %.1f)", (double)params.sampling.mirostat_tau), + [](common_params & params, const std::string & value) { + params.sampling.mirostat_tau = std::stof(value); + } + ).set_sparam()); + add_opt(common_arg( + {"-l", "--logit-bias"}, "TOKEN_ID(+/-)BIAS", + "modifies the likelihood of token appearing in the completion,\n" + "i.e. `--logit-bias 15043+1` to increase likelihood of token ' Hello',\n" + "or `--logit-bias 15043-1` to decrease likelihood of token ' Hello'", + [](common_params & params, const std::string & value) { + std::stringstream ss(value); + llama_token key; + char sign; + std::string value_str; + try { + if (ss >> key && ss >> sign && std::getline(ss, value_str) && (sign == '+' || sign == '-')) { + const float bias = std::stof(value_str) * ((sign == '-') ? -1.0f : 1.0f); + params.sampling.logit_bias.push_back({key, bias}); + } else { + throw std::invalid_argument("invalid input format"); + } + } catch (const std::exception&) { + throw std::invalid_argument("invalid input format"); + } + } + ).set_sparam()); + add_opt(common_arg( + {"--grammar"}, "GRAMMAR", + string_format("BNF-like grammar to constrain generations (see samples in grammars/ dir) (default: '%s')", params.sampling.grammar.c_str()), + [](common_params & params, const std::string & value) { + params.sampling.grammar = value; + } + ).set_sparam()); + add_opt(common_arg( + {"--grammar-file"}, "FNAME", + "file to read grammar from", + [](common_params & params, const std::string & value) { + std::ifstream file(value); + if (!file) { + throw std::runtime_error(string_format("error: failed to open file '%s'\n", value.c_str())); + } + std::copy( + std::istreambuf_iterator(file), + std::istreambuf_iterator(), + std::back_inserter(params.sampling.grammar) + ); + } + ).set_sparam()); + add_opt(common_arg( + {"-j", "--json-schema"}, "SCHEMA", + "JSON schema to constrain generations (https://json-schema.org/), e.g. `{}` for any JSON object\nFor schemas w/ external $refs, use --grammar + example/json_schema_to_grammar.py instead", + [](common_params & params, const std::string & value) { + params.sampling.grammar = json_schema_to_grammar(json::parse(value)); + } + ).set_sparam()); + add_opt(common_arg( + {"--pooling"}, "{none,mean,cls,last,rank}", + "pooling type for embeddings, use model default if unspecified", + [](common_params & params, const std::string & value) { + /**/ if (value == "none") { params.pooling_type = LLAMA_POOLING_TYPE_NONE; } + else if (value == "mean") { params.pooling_type = LLAMA_POOLING_TYPE_MEAN; } + else if (value == "cls") { params.pooling_type = LLAMA_POOLING_TYPE_CLS; } + else if (value == "last") { params.pooling_type = LLAMA_POOLING_TYPE_LAST; } + else if (value == "rank") { params.pooling_type = LLAMA_POOLING_TYPE_RANK; } + else { throw std::invalid_argument("invalid value"); } + } + ).set_examples({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_RETRIEVAL, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_POOLING")); + add_opt(common_arg( + {"--attention"}, "{causal,non-causal}", + "attention type for embeddings, use model default if unspecified", + [](common_params & params, const std::string & value) { + /**/ if (value == "causal") { params.attention_type = LLAMA_ATTENTION_TYPE_CAUSAL; } + else if (value == "non-causal") { params.attention_type = LLAMA_ATTENTION_TYPE_NON_CAUSAL; } + else { throw std::invalid_argument("invalid value"); } + } + ).set_examples({LLAMA_EXAMPLE_EMBEDDING})); + add_opt(common_arg( + {"--rope-scaling"}, "{none,linear,yarn}", + "RoPE frequency scaling method, defaults to linear unless specified by the model", + [](common_params & params, const std::string & value) { + /**/ if (value == "none") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_NONE; } + else if (value == "linear") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_LINEAR; } + else if (value == "yarn") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_YARN; } + else { throw std::invalid_argument("invalid value"); } + } + ).set_env("LLAMA_ARG_ROPE_SCALING_TYPE")); + add_opt(common_arg( + {"--rope-scale"}, "N", + "RoPE context scaling factor, expands context by a factor of N", + [](common_params & params, const std::string & value) { + params.rope_freq_scale = 1.0f / std::stof(value); + } + ).set_env("LLAMA_ARG_ROPE_SCALE")); + add_opt(common_arg( + {"--rope-freq-base"}, "N", + "RoPE base frequency, used by NTK-aware scaling (default: loaded from model)", + [](common_params & params, const std::string & value) { + params.rope_freq_base = std::stof(value); + } + ).set_env("LLAMA_ARG_ROPE_FREQ_BASE")); + add_opt(common_arg( + {"--rope-freq-scale"}, "N", + "RoPE frequency scaling factor, expands context by a factor of 1/N", + [](common_params & params, const std::string & value) { + params.rope_freq_scale = std::stof(value); + } + ).set_env("LLAMA_ARG_ROPE_FREQ_SCALE")); + add_opt(common_arg( + {"--yarn-orig-ctx"}, "N", + string_format("YaRN: original context size of model (default: %d = model training context size)", params.yarn_orig_ctx), + [](common_params & params, int value) { + params.yarn_orig_ctx = value; + } + ).set_env("LLAMA_ARG_YARN_ORIG_CTX")); + add_opt(common_arg( + {"--yarn-ext-factor"}, "N", + string_format("YaRN: extrapolation mix factor (default: %.1f, 0.0 = full interpolation)", (double)params.yarn_ext_factor), + [](common_params & params, const std::string & value) { + params.yarn_ext_factor = std::stof(value); + } + ).set_env("LLAMA_ARG_YARN_EXT_FACTOR")); + add_opt(common_arg( + {"--yarn-attn-factor"}, "N", + string_format("YaRN: scale sqrt(t) or attention magnitude (default: %.1f)", (double)params.yarn_attn_factor), + [](common_params & params, const std::string & value) { + params.yarn_attn_factor = std::stof(value); + } + ).set_env("LLAMA_ARG_YARN_ATTN_FACTOR")); + add_opt(common_arg( + {"--yarn-beta-slow"}, "N", + string_format("YaRN: high correction dim or alpha (default: %.1f)", (double)params.yarn_beta_slow), + [](common_params & params, const std::string & value) { + params.yarn_beta_slow = std::stof(value); + } + ).set_env("LLAMA_ARG_YARN_BETA_SLOW")); + add_opt(common_arg( + {"--yarn-beta-fast"}, "N", + string_format("YaRN: low correction dim or beta (default: %.1f)", (double)params.yarn_beta_fast), + [](common_params & params, const std::string & value) { + params.yarn_beta_fast = std::stof(value); + } + ).set_env("LLAMA_ARG_YARN_BETA_FAST")); + add_opt(common_arg( + {"-gan", "--grp-attn-n"}, "N", + string_format("group-attention factor (default: %d)", params.grp_attn_n), + [](common_params & params, int value) { + params.grp_attn_n = value; + } + ).set_env("LLAMA_ARG_GRP_ATTN_N").set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_PASSKEY})); + add_opt(common_arg( + {"-gaw", "--grp-attn-w"}, "N", + string_format("group-attention width (default: %d)", params.grp_attn_w), + [](common_params & params, int value) { + params.grp_attn_w = value; + } + ).set_env("LLAMA_ARG_GRP_ATTN_W").set_examples({LLAMA_EXAMPLE_MAIN})); + add_opt(common_arg( + {"-dkvc", "--dump-kv-cache"}, + "verbose print of the KV cache", + [](common_params & params) { + params.dump_kv_cache = true; + } + )); + add_opt(common_arg( + {"-nkvo", "--no-kv-offload"}, + "disable KV offload", + [](common_params & params) { + params.no_kv_offload = true; + } + ).set_env("LLAMA_ARG_NO_KV_OFFLOAD")); + add_opt(common_arg( + {"-ctk", "--cache-type-k"}, "TYPE", + string_format( + "KV cache data type for K\n" + "allowed values: %s\n" + "(default: %s)", + get_all_kv_cache_types().c_str(), + ggml_type_name(params.cache_type_k) + ), + [](common_params & params, const std::string & value) { + params.cache_type_k = kv_cache_type_from_str(value); + } + ).set_env("LLAMA_ARG_CACHE_TYPE_K")); + add_opt(common_arg( + {"-ctv", "--cache-type-v"}, "TYPE", + string_format( + "KV cache data type for V\n" + "allowed values: %s\n" + "(default: %s)", + get_all_kv_cache_types().c_str(), + ggml_type_name(params.cache_type_v) + ), + [](common_params & params, const std::string & value) { + params.cache_type_v = kv_cache_type_from_str(value); + } + ).set_env("LLAMA_ARG_CACHE_TYPE_V")); + add_opt(common_arg( + {"--perplexity", "--all-logits"}, + string_format("return logits for all tokens in the batch (default: %s)", params.logits_all ? "true" : "false"), + [](common_params & params) { + params.logits_all = true; + } + ).set_examples({LLAMA_EXAMPLE_PERPLEXITY})); + add_opt(common_arg( + {"--hellaswag"}, + "compute HellaSwag score over random tasks from datafile supplied with -f", + [](common_params & params) { + params.hellaswag = true; + } + ).set_examples({LLAMA_EXAMPLE_PERPLEXITY})); + add_opt(common_arg( + {"--hellaswag-tasks"}, "N", + string_format("number of tasks to use when computing the HellaSwag score (default: %zu)", params.hellaswag_tasks), + [](common_params & params, int value) { + params.hellaswag_tasks = value; + } + ).set_examples({LLAMA_EXAMPLE_PERPLEXITY})); + add_opt(common_arg( + {"--winogrande"}, + "compute Winogrande score over random tasks from datafile supplied with -f", + [](common_params & params) { + params.winogrande = true; + } + ).set_examples({LLAMA_EXAMPLE_PERPLEXITY})); + add_opt(common_arg( + {"--winogrande-tasks"}, "N", + string_format("number of tasks to use when computing the Winogrande score (default: %zu)", params.winogrande_tasks), + [](common_params & params, int value) { + params.winogrande_tasks = value; + } + ).set_examples({LLAMA_EXAMPLE_PERPLEXITY})); + add_opt(common_arg( + {"--multiple-choice"}, + "compute multiple choice score over random tasks from datafile supplied with -f", + [](common_params & params) { + params.multiple_choice = true; + } + ).set_examples({LLAMA_EXAMPLE_PERPLEXITY})); + add_opt(common_arg( + {"--multiple-choice-tasks"}, "N", + string_format("number of tasks to use when computing the multiple choice score (default: %zu)", params.multiple_choice_tasks), + [](common_params & params, int value) { + params.multiple_choice_tasks = value; + } + ).set_examples({LLAMA_EXAMPLE_PERPLEXITY})); + add_opt(common_arg( + {"--kl-divergence"}, + "computes KL-divergence to logits provided via --kl-divergence-base", + [](common_params & params) { + params.kl_divergence = true; + } + ).set_examples({LLAMA_EXAMPLE_PERPLEXITY})); + add_opt(common_arg( + {"--save-all-logits", "--kl-divergence-base"}, "FNAME", + "set logits file", + [](common_params & params, const std::string & value) { + params.logits_file = value; + } + ).set_examples({LLAMA_EXAMPLE_PERPLEXITY})); + add_opt(common_arg( + {"--ppl-stride"}, "N", + string_format("stride for perplexity calculation (default: %d)", params.ppl_stride), + [](common_params & params, int value) { + params.ppl_stride = value; + } + ).set_examples({LLAMA_EXAMPLE_PERPLEXITY})); + add_opt(common_arg( + {"--ppl-output-type"}, "<0|1>", + string_format("output type for perplexity calculation (default: %d)", params.ppl_output_type), + [](common_params & params, int value) { + params.ppl_output_type = value; + } + ).set_examples({LLAMA_EXAMPLE_PERPLEXITY})); + add_opt(common_arg( + {"-dt", "--defrag-thold"}, "N", + string_format("KV cache defragmentation threshold (default: %.1f, < 0 - disabled)", (double)params.defrag_thold), + [](common_params & params, const std::string & value) { + params.defrag_thold = std::stof(value); + } + ).set_env("LLAMA_ARG_DEFRAG_THOLD")); + add_opt(common_arg( + {"-np", "--parallel"}, "N", + string_format("number of parallel sequences to decode (default: %d)", params.n_parallel), + [](common_params & params, int value) { + params.n_parallel = value; + } + ).set_env("LLAMA_ARG_N_PARALLEL")); + add_opt(common_arg( + {"-ns", "--sequences"}, "N", + string_format("number of sequences to decode (default: %d)", params.n_sequences), + [](common_params & params, int value) { + params.n_sequences = value; + } + ).set_examples({LLAMA_EXAMPLE_PARALLEL})); + add_opt(common_arg( + {"-cb", "--cont-batching"}, + string_format("enable continuous batching (a.k.a dynamic batching) (default: %s)", params.cont_batching ? "enabled" : "disabled"), + [](common_params & params) { + params.cont_batching = true; + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_CONT_BATCHING")); + add_opt(common_arg( + {"-nocb", "--no-cont-batching"}, + "disable continuous batching", + [](common_params & params) { + params.cont_batching = false; + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_NO_CONT_BATCHING")); + add_opt(common_arg( + {"--mmproj"}, "FILE", + "path to a multimodal projector file for LLaVA. see examples/llava/README.md", + [](common_params & params, const std::string & value) { + params.mmproj = value; + } + ).set_examples({LLAMA_EXAMPLE_LLAVA})); + add_opt(common_arg( + {"--image"}, "FILE", + "path to an image file. use with multimodal models. Specify multiple times for batching", + [](common_params & params, const std::string & value) { + params.image.emplace_back(value); + } + ).set_examples({LLAMA_EXAMPLE_LLAVA})); + if (llama_supports_rpc()) { + add_opt(common_arg( + {"--rpc"}, "SERVERS", + "comma separated list of RPC servers", + [](common_params & params, const std::string & value) { + add_rpc_devices(value); + GGML_UNUSED(params); + } + ).set_env("LLAMA_ARG_RPC")); + } + add_opt(common_arg( + {"--mlock"}, + "force system to keep model in RAM rather than swapping or compressing", + [](common_params & params) { + params.use_mlock = true; + } + ).set_env("LLAMA_ARG_MLOCK")); + add_opt(common_arg( + {"--no-mmap"}, + "do not memory-map model (slower load but may reduce pageouts if not using mlock)", + [](common_params & params) { + params.use_mmap = false; + } + ).set_env("LLAMA_ARG_NO_MMAP")); + add_opt(common_arg( + {"--numa"}, "TYPE", + "attempt optimizations that help on some NUMA systems\n" + "- distribute: spread execution evenly over all nodes\n" + "- isolate: only spawn threads on CPUs on the node that execution started on\n" + "- numactl: use the CPU map provided by numactl\n" + "if run without this previously, it is recommended to drop the system page cache before using this\n" + "see https://github.com/ggerganov/llama.cpp/issues/1437", + [](common_params & params, const std::string & value) { + /**/ if (value == "distribute" || value == "") { params.numa = GGML_NUMA_STRATEGY_DISTRIBUTE; } + else if (value == "isolate") { params.numa = GGML_NUMA_STRATEGY_ISOLATE; } + else if (value == "numactl") { params.numa = GGML_NUMA_STRATEGY_NUMACTL; } + else { throw std::invalid_argument("invalid value"); } + } + ).set_env("LLAMA_ARG_NUMA")); + add_opt(common_arg( + {"-dev", "--device"}, "", + "comma-separated list of devices to use for offloading (none = don't offload)\n" + "use --list-devices to see a list of available devices", + [](common_params & params, const std::string & value) { + params.devices = parse_device_list(value); + } + ).set_env("LLAMA_ARG_DEVICE")); + add_opt(common_arg( + {"--list-devices"}, + "print list of available devices and exit", + [](common_params &) { + std::vector rpc_devices; + std::vector all_devices; + for (size_t i = 0; i < ggml_backend_dev_count(); ++i) { + auto * dev = ggml_backend_dev_get(i); + if (ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_GPU) { + ggml_backend_reg_t reg = ggml_backend_dev_backend_reg(dev); + if (ggml_backend_reg_name(reg) == std::string("RPC")) { + rpc_devices.push_back(dev); + } else { + all_devices.push_back(dev); + } + } + } + // insert RPC devices in front + all_devices.insert(all_devices.begin(), rpc_devices.begin(), rpc_devices.end()); + printf("Available devices:\n"); + for (size_t i = 0; i < all_devices.size(); ++i) { + auto * dev = all_devices[i]; + size_t free, total; + ggml_backend_dev_memory(dev, &free, &total); + printf(" %s: %s (%zu MiB, %zu MiB free)\n", ggml_backend_dev_name(dev), ggml_backend_dev_description(dev), total / 1024 / 1024, free / 1024 / 1024); + } + exit(0); + } + )); + add_opt(common_arg( + {"-ngl", "--gpu-layers", "--n-gpu-layers"}, "N", + "number of layers to store in VRAM", + [](common_params & params, int value) { + params.n_gpu_layers = value; + if (!llama_supports_gpu_offload()) { + fprintf(stderr, "warning: no usable GPU found, --gpu-layers option will be ignored\n"); + fprintf(stderr, "warning: one possible reason is that llama.cpp was compiled without GPU support\n"); + fprintf(stderr, "warning: consult docs/build.md for compilation instructions\n"); + } + } + ).set_env("LLAMA_ARG_N_GPU_LAYERS")); + add_opt(common_arg( + {"-sm", "--split-mode"}, "{none,layer,row}", + "how to split the model across multiple GPUs, one of:\n" + "- none: use one GPU only\n" + "- layer (default): split layers and KV across GPUs\n" + "- row: split rows across GPUs", + [](common_params & params, const std::string & value) { + std::string arg_next = value; + if (arg_next == "none") { + params.split_mode = LLAMA_SPLIT_MODE_NONE; + } else if (arg_next == "layer") { + params.split_mode = LLAMA_SPLIT_MODE_LAYER; + } else if (arg_next == "row") { + params.split_mode = LLAMA_SPLIT_MODE_ROW; + } else { + throw std::invalid_argument("invalid value"); + } + if (!llama_supports_gpu_offload()) { + fprintf(stderr, "warning: llama.cpp was compiled without support for GPU offload. Setting the split mode has no effect.\n"); + } + } + ).set_env("LLAMA_ARG_SPLIT_MODE")); + add_opt(common_arg( + {"-ts", "--tensor-split"}, "N0,N1,N2,...", + "fraction of the model to offload to each GPU, comma-separated list of proportions, e.g. 3,1", + [](common_params & params, const std::string & value) { + std::string arg_next = value; + + // split string by , and / + const std::regex regex{ R"([,/]+)" }; + std::sregex_token_iterator it{ arg_next.begin(), arg_next.end(), regex, -1 }; + std::vector split_arg{ it, {} }; + if (split_arg.size() >= llama_max_devices()) { + throw std::invalid_argument( + string_format("got %d input configs, but system only has %d devices", (int)split_arg.size(), (int)llama_max_devices()) + ); + } + for (size_t i = 0; i < llama_max_devices(); ++i) { + if (i < split_arg.size()) { + params.tensor_split[i] = std::stof(split_arg[i]); + } else { + params.tensor_split[i] = 0.0f; + } + } + if (!llama_supports_gpu_offload()) { + fprintf(stderr, "warning: llama.cpp was compiled without support for GPU offload. Setting a tensor split has no effect.\n"); + } + } + ).set_env("LLAMA_ARG_TENSOR_SPLIT")); + add_opt(common_arg( + {"-mg", "--main-gpu"}, "INDEX", + string_format("the GPU to use for the model (with split-mode = none), or for intermediate results and KV (with split-mode = row) (default: %d)", params.main_gpu), + [](common_params & params, int value) { + params.main_gpu = value; + if (!llama_supports_gpu_offload()) { + fprintf(stderr, "warning: llama.cpp was compiled without support for GPU offload. Setting the main GPU has no effect.\n"); + } + } + ).set_env("LLAMA_ARG_MAIN_GPU")); + add_opt(common_arg( + {"--check-tensors"}, + string_format("check model tensor data for invalid values (default: %s)", params.check_tensors ? "true" : "false"), + [](common_params & params) { + params.check_tensors = true; + } + )); + add_opt(common_arg( + {"--override-kv"}, "KEY=TYPE:VALUE", + "advanced option to override model metadata by key. may be specified multiple times.\n" + "types: int, float, bool, str. example: --override-kv tokenizer.ggml.add_bos_token=bool:false", + [](common_params & params, const std::string & value) { + if (!string_parse_kv_override(value.c_str(), params.kv_overrides)) { + throw std::runtime_error(string_format("error: Invalid type for KV override: %s\n", value.c_str())); + } + } + )); + add_opt(common_arg( + {"--lora"}, "FNAME", + "path to LoRA adapter (can be repeated to use multiple adapters)", + [](common_params & params, const std::string & value) { + params.lora_adapters.push_back({ std::string(value), 1.0, nullptr }); + } + // we define this arg on both COMMON and EXPORT_LORA, so when showing help message of export-lora, it will be categorized as "example-specific" arg + ).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_EXPORT_LORA})); + add_opt(common_arg( + {"--lora-scaled"}, "FNAME", "SCALE", + "path to LoRA adapter with user defined scaling (can be repeated to use multiple adapters)", + [](common_params & params, const std::string & fname, const std::string & scale) { + params.lora_adapters.push_back({ fname, std::stof(scale), nullptr }); + } + // we define this arg on both COMMON and EXPORT_LORA, so when showing help message of export-lora, it will be categorized as "example-specific" arg + ).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_EXPORT_LORA})); + add_opt(common_arg( + {"--control-vector"}, "FNAME", + "add a control vector\nnote: this argument can be repeated to add multiple control vectors", + [](common_params & params, const std::string & value) { + params.control_vectors.push_back({ 1.0f, value, }); + } + )); + add_opt(common_arg( + {"--control-vector-scaled"}, "FNAME", "SCALE", + "add a control vector with user defined scaling SCALE\n" + "note: this argument can be repeated to add multiple scaled control vectors", + [](common_params & params, const std::string & fname, const std::string & scale) { + params.control_vectors.push_back({ std::stof(scale), fname }); + } + )); + add_opt(common_arg( + {"--control-vector-layer-range"}, "START", "END", + "layer range to apply the control vector(s) to, start and end inclusive", + [](common_params & params, const std::string & start, const std::string & end) { + params.control_vector_layer_start = std::stoi(start); + params.control_vector_layer_end = std::stoi(end); + } + )); + add_opt(common_arg( + {"-a", "--alias"}, "STRING", + "set alias for model name (to be used by REST API)", + [](common_params & params, const std::string & value) { + params.model_alias = value; + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_ALIAS")); + add_opt(common_arg( + {"-m", "--model"}, "FNAME", + ex == LLAMA_EXAMPLE_EXPORT_LORA + ? std::string("model path from which to load base model") + : string_format( + "model path (default: `models/$filename` with filename from `--hf-file` " + "or `--model-url` if set, otherwise %s)", DEFAULT_MODEL_PATH + ), + [](common_params & params, const std::string & value) { + params.model = value; + } + ).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_EXPORT_LORA}).set_env("LLAMA_ARG_MODEL")); + add_opt(common_arg( + {"-mu", "--model-url"}, "MODEL_URL", + "model download url (default: unused)", + [](common_params & params, const std::string & value) { + params.model_url = value; + } + ).set_env("LLAMA_ARG_MODEL_URL")); + add_opt(common_arg( + {"-hf", "-hfr", "--hf-repo"}, "/[:quant]", + "Hugging Face model repository; quant is optional, case-insensitive, default to Q4_K_M, or falls back to the first file in the repo if Q4_K_M doesn't exist.\n" + "example: unsloth/phi-4-GGUF:q4_k_m\n" + "(default: unused)", + [](common_params & params, const std::string & value) { + params.hf_repo = value; + } + ).set_env("LLAMA_ARG_HF_REPO")); + add_opt(common_arg( + {"-hfd", "-hfrd", "--hf-repo-draft"}, "/[:quant]", + "Same as --hf-repo, but for the draft model (default: unused)", + [](common_params & params, const std::string & value) { + params.speculative.hf_repo = value; + } + ).set_env("LLAMA_ARG_HFD_REPO")); + add_opt(common_arg( + {"-hff", "--hf-file"}, "FILE", + "Hugging Face model file. If specified, it will override the quant in --hf-repo (default: unused)", + [](common_params & params, const std::string & value) { + params.hf_file = value; + } + ).set_env("LLAMA_ARG_HF_FILE")); + add_opt(common_arg( + {"-hfv", "-hfrv", "--hf-repo-v"}, "/[:quant]", + "Hugging Face model repository for the vocoder model (default: unused)", + [](common_params & params, const std::string & value) { + params.vocoder.hf_repo = value; + } + ).set_env("LLAMA_ARG_HF_REPO_V")); + add_opt(common_arg( + {"-hffv", "--hf-file-v"}, "FILE", + "Hugging Face model file for the vocoder model (default: unused)", + [](common_params & params, const std::string & value) { + params.vocoder.hf_file = value; + } + ).set_env("LLAMA_ARG_HF_FILE_V")); + add_opt(common_arg( + {"-hft", "--hf-token"}, "TOKEN", + "Hugging Face access token (default: value from HF_TOKEN environment variable)", + [](common_params & params, const std::string & value) { + params.hf_token = value; + } + ).set_env("HF_TOKEN")); + add_opt(common_arg( + {"--context-file"}, "FNAME", + "file to load context from (repeat to specify multiple files)", + [](common_params & params, const std::string & value) { + std::ifstream file(value, std::ios::binary); + if (!file) { + throw std::runtime_error(string_format("error: failed to open file '%s'\n", value.c_str())); + } + params.context_files.push_back(value); + } + ).set_examples({LLAMA_EXAMPLE_RETRIEVAL})); + add_opt(common_arg( + {"--chunk-size"}, "N", + string_format("minimum length of embedded text chunks (default: %d)", params.chunk_size), + [](common_params & params, int value) { + params.chunk_size = value; + } + ).set_examples({LLAMA_EXAMPLE_RETRIEVAL})); + add_opt(common_arg( + {"--chunk-separator"}, "STRING", + string_format("separator between chunks (default: '%s')", params.chunk_separator.c_str()), + [](common_params & params, const std::string & value) { + params.chunk_separator = value; + } + ).set_examples({LLAMA_EXAMPLE_RETRIEVAL})); + add_opt(common_arg( + {"--junk"}, "N", + string_format("number of times to repeat the junk text (default: %d)", params.n_junk), + [](common_params & params, int value) { + params.n_junk = value; + } + ).set_examples({LLAMA_EXAMPLE_PASSKEY})); + add_opt(common_arg( + {"--pos"}, "N", + string_format("position of the passkey in the junk text (default: %d)", params.i_pos), + [](common_params & params, int value) { + params.i_pos = value; + } + ).set_examples({LLAMA_EXAMPLE_PASSKEY})); + add_opt(common_arg( + {"-o", "--output", "--output-file"}, "FNAME", + string_format("output file (default: '%s')", + ex == LLAMA_EXAMPLE_EXPORT_LORA + ? params.lora_outfile.c_str() + : ex == LLAMA_EXAMPLE_CVECTOR_GENERATOR + ? params.cvector_outfile.c_str() + : params.out_file.c_str()), + [](common_params & params, const std::string & value) { + params.out_file = value; + params.cvector_outfile = value; + params.lora_outfile = value; + } + ).set_examples({LLAMA_EXAMPLE_IMATRIX, LLAMA_EXAMPLE_CVECTOR_GENERATOR, LLAMA_EXAMPLE_EXPORT_LORA})); + add_opt(common_arg( + {"-ofreq", "--output-frequency"}, "N", + string_format("output the imatrix every N iterations (default: %d)", params.n_out_freq), + [](common_params & params, int value) { + params.n_out_freq = value; + } + ).set_examples({LLAMA_EXAMPLE_IMATRIX})); + add_opt(common_arg( + {"--save-frequency"}, "N", + string_format("save an imatrix copy every N iterations (default: %d)", params.n_save_freq), + [](common_params & params, int value) { + params.n_save_freq = value; + } + ).set_examples({LLAMA_EXAMPLE_IMATRIX})); + add_opt(common_arg( + {"--process-output"}, + string_format("collect data for the output tensor (default: %s)", params.process_output ? "true" : "false"), + [](common_params & params) { + params.process_output = true; + } + ).set_examples({LLAMA_EXAMPLE_IMATRIX})); + add_opt(common_arg( + {"--no-ppl"}, + string_format("do not compute perplexity (default: %s)", params.compute_ppl ? "true" : "false"), + [](common_params & params) { + params.compute_ppl = false; + } + ).set_examples({LLAMA_EXAMPLE_IMATRIX})); + add_opt(common_arg( + {"--chunk", "--from-chunk"}, "N", + string_format("start processing the input from chunk N (default: %d)", params.i_chunk), + [](common_params & params, int value) { + params.i_chunk = value; + } + ).set_examples({LLAMA_EXAMPLE_IMATRIX})); + add_opt(common_arg( + {"-pps"}, + string_format("is the prompt shared across parallel sequences (default: %s)", params.is_pp_shared ? "true" : "false"), + [](common_params & params) { + params.is_pp_shared = true; + } + ).set_examples({LLAMA_EXAMPLE_BENCH})); + add_opt(common_arg( + {"-npp"}, "n0,n1,...", + "number of prompt tokens", + [](common_params & params, const std::string & value) { + auto p = string_split(value, ','); + params.n_pp.insert(params.n_pp.end(), p.begin(), p.end()); + } + ).set_examples({LLAMA_EXAMPLE_BENCH})); + add_opt(common_arg( + {"-ntg"}, "n0,n1,...", + "number of text generation tokens", + [](common_params & params, const std::string & value) { + auto p = string_split(value, ','); + params.n_tg.insert(params.n_tg.end(), p.begin(), p.end()); + } + ).set_examples({LLAMA_EXAMPLE_BENCH})); + add_opt(common_arg( + {"-npl"}, "n0,n1,...", + "number of parallel prompts", + [](common_params & params, const std::string & value) { + auto p = string_split(value, ','); + params.n_pl.insert(params.n_pl.end(), p.begin(), p.end()); + } + ).set_examples({LLAMA_EXAMPLE_BENCH})); + add_opt(common_arg( + {"--embd-normalize"}, "N", + string_format("normalisation for embeddings (default: %d) (-1=none, 0=max absolute int16, 1=taxicab, 2=euclidean, >2=p-norm)", params.embd_normalize), + [](common_params & params, int value) { + params.embd_normalize = value; + } + ).set_examples({LLAMA_EXAMPLE_EMBEDDING})); + add_opt(common_arg( + {"--embd-output-format"}, "FORMAT", + "empty = default, \"array\" = [[],[]...], \"json\" = openai style, \"json+\" = same \"json\" + cosine similarity matrix", + [](common_params & params, const std::string & value) { + params.embd_out = value; + } + ).set_examples({LLAMA_EXAMPLE_EMBEDDING})); + add_opt(common_arg( + {"--embd-separator"}, "STRING", + "separator of embeddings (default \\n) for example \"<#sep#>\"", + [](common_params & params, const std::string & value) { + params.embd_sep = value; + } + ).set_examples({LLAMA_EXAMPLE_EMBEDDING})); + add_opt(common_arg( + {"--host"}, "HOST", + string_format("ip address to listen (default: %s)", params.hostname.c_str()), + [](common_params & params, const std::string & value) { + params.hostname = value; + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_HOST")); + add_opt(common_arg( + {"--port"}, "PORT", + string_format("port to listen (default: %d)", params.port), + [](common_params & params, int value) { + params.port = value; + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_PORT")); + add_opt(common_arg( + {"--path"}, "PATH", + string_format("path to serve static files from (default: %s)", params.public_path.c_str()), + [](common_params & params, const std::string & value) { + params.public_path = value; + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_STATIC_PATH")); + add_opt(common_arg( + {"--no-webui"}, + string_format("Disable the Web UI (default: %s)", params.webui ? "enabled" : "disabled"), + [](common_params & params) { + params.webui = false; + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_NO_WEBUI")); + add_opt(common_arg( + {"--embedding", "--embeddings"}, + string_format("restrict to only support embedding use case; use only with dedicated embedding models (default: %s)", params.embedding ? "enabled" : "disabled"), + [](common_params & params) { + params.embedding = true; + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_EMBEDDINGS")); + add_opt(common_arg( + {"--reranking", "--rerank"}, + string_format("enable reranking endpoint on server (default: %s)", params.reranking ? "enabled" : "disabled"), + [](common_params & params) { + params.reranking = true; + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_RERANKING")); + add_opt(common_arg( + {"--api-key"}, "KEY", + "API key to use for authentication (default: none)", + [](common_params & params, const std::string & value) { + params.api_keys.push_back(value); + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_API_KEY")); + add_opt(common_arg( + {"--api-key-file"}, "FNAME", + "path to file containing API keys (default: none)", + [](common_params & params, const std::string & value) { + std::ifstream key_file(value); + if (!key_file) { + throw std::runtime_error(string_format("error: failed to open file '%s'\n", value.c_str())); + } + std::string key; + while (std::getline(key_file, key)) { + if (!key.empty()) { + params.api_keys.push_back(key); + } + } + key_file.close(); + } + ).set_examples({LLAMA_EXAMPLE_SERVER})); + add_opt(common_arg( + {"--ssl-key-file"}, "FNAME", + "path to file a PEM-encoded SSL private key", + [](common_params & params, const std::string & value) { + params.ssl_file_key = value; + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_SSL_KEY_FILE")); + add_opt(common_arg( + {"--ssl-cert-file"}, "FNAME", + "path to file a PEM-encoded SSL certificate", + [](common_params & params, const std::string & value) { + params.ssl_file_cert = value; + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_SSL_CERT_FILE")); + add_opt(common_arg( + {"-to", "--timeout"}, "N", + string_format("server read/write timeout in seconds (default: %d)", params.timeout_read), + [](common_params & params, int value) { + params.timeout_read = value; + params.timeout_write = value; + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_TIMEOUT")); + add_opt(common_arg( + {"--threads-http"}, "N", + string_format("number of threads used to process HTTP requests (default: %d)", params.n_threads_http), + [](common_params & params, int value) { + params.n_threads_http = value; + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_THREADS_HTTP")); + add_opt(common_arg( + {"--cache-reuse"}, "N", + string_format("min chunk size to attempt reusing from the cache via KV shifting (default: %d)", params.n_cache_reuse), + [](common_params & params, int value) { + params.n_cache_reuse = value; + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_CACHE_REUSE")); + add_opt(common_arg( + {"--metrics"}, + string_format("enable prometheus compatible metrics endpoint (default: %s)", params.endpoint_metrics ? "enabled" : "disabled"), + [](common_params & params) { + params.endpoint_metrics = true; + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_ENDPOINT_METRICS")); + add_opt(common_arg( + {"--slots"}, + string_format("enable slots monitoring endpoint (default: %s)", params.endpoint_slots ? "enabled" : "disabled"), + [](common_params & params) { + params.endpoint_slots = true; + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_ENDPOINT_SLOTS")); + add_opt(common_arg( + {"--props"}, + string_format("enable changing global properties via POST /props (default: %s)", params.endpoint_props ? "enabled" : "disabled"), + [](common_params & params) { + params.endpoint_props = true; + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_ENDPOINT_PROPS")); + add_opt(common_arg( + {"--no-slots"}, + "disables slots monitoring endpoint", + [](common_params & params) { + params.endpoint_slots = false; + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_NO_ENDPOINT_SLOTS")); + add_opt(common_arg( + {"--slot-save-path"}, "PATH", + "path to save slot kv cache (default: disabled)", + [](common_params & params, const std::string & value) { + params.slot_save_path = value; + // if doesn't end with DIRECTORY_SEPARATOR, add it + if (!params.slot_save_path.empty() && params.slot_save_path[params.slot_save_path.size() - 1] != DIRECTORY_SEPARATOR) { + params.slot_save_path += DIRECTORY_SEPARATOR; + } + } + ).set_examples({LLAMA_EXAMPLE_SERVER})); + add_opt(common_arg( + {"--jinja"}, + "use jinja template for chat (default: disabled)", + [](common_params & params) { + params.use_jinja = true; + } + ).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MAIN}).set_env("LLAMA_ARG_JINJA")); + add_opt(common_arg( + {"--chat-template"}, "JINJA_TEMPLATE", + string_format( + "set custom jinja chat template (default: template taken from model's metadata)\n" + "if suffix/prefix are specified, template will be disabled\n" + "only commonly used templates are accepted (unless --jinja is set before this flag):\n" + "list of built-in templates:\n%s", list_builtin_chat_templates().c_str() + ), + [](common_params & params, const std::string & value) { + params.chat_template = value; + } + ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_CHAT_TEMPLATE")); + add_opt(common_arg( + {"--chat-template-file"}, "JINJA_TEMPLATE_FILE", + string_format( + "set custom jinja chat template file (default: template taken from model's metadata)\n" + "if suffix/prefix are specified, template will be disabled\n" + "only commonly used templates are accepted (unless --jinja is set before this flag):\n" + "list of built-in templates:\n%s", list_builtin_chat_templates().c_str() + ), + [](common_params & params, const std::string & value) { + std::ifstream file(value); + if (!file) { + throw std::runtime_error(string_format("error: failed to open file '%s'\n", value.c_str())); + } + std::copy( + std::istreambuf_iterator(file), + std::istreambuf_iterator(), + std::back_inserter(params.chat_template)); + } + ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_CHAT_TEMPLATE_FILE")); + add_opt(common_arg( + {"-sps", "--slot-prompt-similarity"}, "SIMILARITY", + string_format("how much the prompt of a request must match the prompt of a slot in order to use that slot (default: %.2f, 0.0 = disabled)\n", params.slot_prompt_similarity), + [](common_params & params, const std::string & value) { + params.slot_prompt_similarity = std::stof(value); + } + ).set_examples({LLAMA_EXAMPLE_SERVER})); + add_opt(common_arg( + {"--lora-init-without-apply"}, + string_format("load LoRA adapters without applying them (apply later via POST /lora-adapters) (default: %s)", params.lora_init_without_apply ? "enabled" : "disabled"), + [](common_params & params) { + params.lora_init_without_apply = true; + } + ).set_examples({LLAMA_EXAMPLE_SERVER})); + add_opt(common_arg( + {"--simple-io"}, + "use basic IO for better compatibility in subprocesses and limited consoles", + [](common_params & params) { + params.simple_io = true; + } + ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_INFILL})); + add_opt(common_arg( + {"--positive-file"}, "FNAME", + string_format("positive prompts file, one prompt per line (default: '%s')", params.cvector_positive_file.c_str()), + [](common_params & params, const std::string & value) { + params.cvector_positive_file = value; + } + ).set_examples({LLAMA_EXAMPLE_CVECTOR_GENERATOR})); + add_opt(common_arg( + {"--negative-file"}, "FNAME", + string_format("negative prompts file, one prompt per line (default: '%s')", params.cvector_negative_file.c_str()), + [](common_params & params, const std::string & value) { + params.cvector_negative_file = value; + } + ).set_examples({LLAMA_EXAMPLE_CVECTOR_GENERATOR})); + add_opt(common_arg( + {"--pca-batch"}, "N", + string_format("batch size used for PCA. Larger batch runs faster, but uses more memory (default: %d)", params.n_pca_batch), + [](common_params & params, int value) { + params.n_pca_batch = value; + } + ).set_examples({LLAMA_EXAMPLE_CVECTOR_GENERATOR})); + add_opt(common_arg( + {"--pca-iter"}, "N", + string_format("number of iterations used for PCA (default: %d)", params.n_pca_iterations), + [](common_params & params, int value) { + params.n_pca_iterations = value; + } + ).set_examples({LLAMA_EXAMPLE_CVECTOR_GENERATOR})); + add_opt(common_arg( + {"--method"}, "{pca, mean}", + "dimensionality reduction method to be used (default: pca)", + [](common_params & params, const std::string & value) { + /**/ if (value == "pca") { params.cvector_dimre_method = DIMRE_METHOD_PCA; } + else if (value == "mean") { params.cvector_dimre_method = DIMRE_METHOD_MEAN; } + else { throw std::invalid_argument("invalid value"); } + } + ).set_examples({LLAMA_EXAMPLE_CVECTOR_GENERATOR})); + add_opt(common_arg( + {"--output-format"}, "{md,jsonl}", + "output format for batched-bench results (default: md)", + [](common_params & params, const std::string & value) { + /**/ if (value == "jsonl") { params.batched_bench_output_jsonl = true; } + else if (value == "md") { params.batched_bench_output_jsonl = false; } + else { std::invalid_argument("invalid value"); } + } + ).set_examples({LLAMA_EXAMPLE_BENCH})); + add_opt(common_arg( + {"--log-disable"}, + "Log disable", + [](common_params &) { + common_log_pause(common_log_main()); + } + )); + add_opt(common_arg( + {"--log-file"}, "FNAME", + "Log to file", + [](common_params &, const std::string & value) { + common_log_set_file(common_log_main(), value.c_str()); + } + )); + add_opt(common_arg( + {"--log-colors"}, + "Enable colored logging", + [](common_params &) { + common_log_set_colors(common_log_main(), true); + } + ).set_env("LLAMA_LOG_COLORS")); + add_opt(common_arg( + {"-v", "--verbose", "--log-verbose"}, + "Set verbosity level to infinity (i.e. log all messages, useful for debugging)", + [](common_params & params) { + params.verbosity = INT_MAX; + common_log_set_verbosity_thold(INT_MAX); + } + )); + add_opt(common_arg( + {"-lv", "--verbosity", "--log-verbosity"}, "N", + "Set the verbosity threshold. Messages with a higher verbosity will be ignored.", + [](common_params & params, int value) { + params.verbosity = value; + common_log_set_verbosity_thold(value); + } + ).set_env("LLAMA_LOG_VERBOSITY")); + add_opt(common_arg( + {"--log-prefix"}, + "Enable prefx in log messages", + [](common_params &) { + common_log_set_prefix(common_log_main(), true); + } + ).set_env("LLAMA_LOG_PREFIX")); + add_opt(common_arg( + {"--log-timestamps"}, + "Enable timestamps in log messages", + [](common_params &) { + common_log_set_timestamps(common_log_main(), true); + } + ).set_env("LLAMA_LOG_TIMESTAMPS")); + + // speculative parameters + add_opt(common_arg( + {"-td", "--threads-draft"}, "N", + "number of threads to use during generation (default: same as --threads)", + [](common_params & params, int value) { + params.speculative.cpuparams.n_threads = value; + if (params.speculative.cpuparams.n_threads <= 0) { + params.speculative.cpuparams.n_threads = std::thread::hardware_concurrency(); + } + } + ).set_examples({LLAMA_EXAMPLE_SPECULATIVE})); + add_opt(common_arg( + {"-tbd", "--threads-batch-draft"}, "N", + "number of threads to use during batch and prompt processing (default: same as --threads-draft)", + [](common_params & params, int value) { + params.speculative.cpuparams_batch.n_threads = value; + if (params.speculative.cpuparams_batch.n_threads <= 0) { + params.speculative.cpuparams_batch.n_threads = std::thread::hardware_concurrency(); + } + } + ).set_examples({LLAMA_EXAMPLE_SPECULATIVE})); + add_opt(common_arg( + {"-Cd", "--cpu-mask-draft"}, "M", + "Draft model CPU affinity mask. Complements cpu-range-draft (default: same as --cpu-mask)", + [](common_params & params, const std::string & mask) { + params.speculative.cpuparams.mask_valid = true; + if (!parse_cpu_mask(mask, params.speculative.cpuparams.cpumask)) { + throw std::invalid_argument("invalid cpumask"); + } + } + ).set_examples({LLAMA_EXAMPLE_SPECULATIVE})); + add_opt(common_arg( + {"-Crd", "--cpu-range-draft"}, "lo-hi", + "Ranges of CPUs for affinity. Complements --cpu-mask-draft", + [](common_params & params, const std::string & range) { + params.speculative.cpuparams.mask_valid = true; + if (!parse_cpu_range(range, params.speculative.cpuparams.cpumask)) { + throw std::invalid_argument("invalid range"); + } + } + ).set_examples({LLAMA_EXAMPLE_SPECULATIVE})); + add_opt(common_arg( + {"--cpu-strict-draft"}, "<0|1>", + "Use strict CPU placement for draft model (default: same as --cpu-strict)", + [](common_params & params, int value) { + params.speculative.cpuparams.strict_cpu = value; + } + ).set_examples({LLAMA_EXAMPLE_SPECULATIVE})); + add_opt(common_arg( + {"--prio-draft"}, "N", + string_format("set draft process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: %d)\n", params.speculative.cpuparams.priority), + [](common_params & params, int prio) { + if (prio < 0 || prio > 3) { + throw std::invalid_argument("invalid value"); + } + params.speculative.cpuparams.priority = (enum ggml_sched_priority) prio; + } + ).set_examples({LLAMA_EXAMPLE_SPECULATIVE})); + add_opt(common_arg( + {"--poll-draft"}, "<0|1>", + "Use polling to wait for draft model work (default: same as --poll])", + [](common_params & params, int value) { + params.speculative.cpuparams.poll = value; + } + ).set_examples({LLAMA_EXAMPLE_SPECULATIVE})); + add_opt(common_arg( + {"-Cbd", "--cpu-mask-batch-draft"}, "M", + "Draft model CPU affinity mask. Complements cpu-range-draft (default: same as --cpu-mask)", + [](common_params & params, const std::string & mask) { + params.speculative.cpuparams_batch.mask_valid = true; + if (!parse_cpu_mask(mask, params.speculative.cpuparams_batch.cpumask)) { + throw std::invalid_argument("invalid cpumask"); + } + } + ).set_examples({LLAMA_EXAMPLE_SPECULATIVE})); + add_opt(common_arg( + {"-Crbd", "--cpu-range-batch-draft"}, "lo-hi", + "Ranges of CPUs for affinity. Complements --cpu-mask-draft-batch)", + [](common_params & params, const std::string & range) { + params.speculative.cpuparams_batch.mask_valid = true; + if (!parse_cpu_range(range, params.speculative.cpuparams_batch.cpumask)) { + throw std::invalid_argument("invalid cpumask"); + } + } + ).set_examples({LLAMA_EXAMPLE_SPECULATIVE})); + add_opt(common_arg( + {"--cpu-strict-batch-draft"}, "<0|1>", + "Use strict CPU placement for draft model (default: --cpu-strict-draft)", + [](common_params & params, int value) { + params.speculative.cpuparams_batch.strict_cpu = value; + } + ).set_examples({LLAMA_EXAMPLE_SPECULATIVE})); + add_opt(common_arg( + {"--prio-batch-draft"}, "N", + string_format("set draft process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: %d)\n", params.speculative.cpuparams_batch.priority), + [](common_params & params, int prio) { + if (prio < 0 || prio > 3) { + throw std::invalid_argument("invalid value"); + } + params.speculative.cpuparams_batch.priority = (enum ggml_sched_priority) prio; + } + ).set_examples({LLAMA_EXAMPLE_SPECULATIVE})); + add_opt(common_arg( + {"--poll-batch-draft"}, "<0|1>", + "Use polling to wait for draft model work (default: --poll-draft)", + [](common_params & params, int value) { + params.speculative.cpuparams_batch.poll = value; + } + ).set_examples({LLAMA_EXAMPLE_SPECULATIVE})); + add_opt(common_arg( + {"--draft-max", "--draft", "--draft-n"}, "N", + string_format("number of tokens to draft for speculative decoding (default: %d)", params.speculative.n_max), + [](common_params & params, int value) { + params.speculative.n_max = value; + } + ).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_LOOKUP, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_DRAFT_MAX")); + add_opt(common_arg( + {"--draft-min", "--draft-n-min"}, "N", + string_format("minimum number of draft tokens to use for speculative decoding (default: %d)", params.speculative.n_min), + [](common_params & params, int value) { + params.speculative.n_min = value; + } + ).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_LOOKUP, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_DRAFT_MIN")); + add_opt(common_arg( + {"--draft-p-split"}, "P", + string_format("speculative decoding split probability (default: %.1f)", (double)params.speculative.p_split), + [](common_params & params, const std::string & value) { + params.speculative.p_split = std::stof(value); + } + ).set_examples({LLAMA_EXAMPLE_SPECULATIVE}).set_env("LLAMA_ARG_DRAFT_P_SPLIT")); + add_opt(common_arg( + {"--draft-p-min"}, "P", + string_format("minimum speculative decoding probability (greedy) (default: %.1f)", (double)params.speculative.p_min), + [](common_params & params, const std::string & value) { + params.speculative.p_min = std::stof(value); + } + ).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_DRAFT_P_MIN")); + add_opt(common_arg( + {"-cd", "--ctx-size-draft"}, "N", + string_format("size of the prompt context for the draft model (default: %d, 0 = loaded from model)", params.speculative.n_ctx), + [](common_params & params, int value) { + params.speculative.n_ctx = value; + } + ).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_CTX_SIZE_DRAFT")); + add_opt(common_arg( + {"-devd", "--device-draft"}, "", + "comma-separated list of devices to use for offloading the draft model (none = don't offload)\n" + "use --list-devices to see a list of available devices", + [](common_params & params, const std::string & value) { + params.speculative.devices = parse_device_list(value); + } + ).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER})); + add_opt(common_arg( + {"-ngld", "--gpu-layers-draft", "--n-gpu-layers-draft"}, "N", + "number of layers to store in VRAM for the draft model", + [](common_params & params, int value) { + params.speculative.n_gpu_layers = value; + if (!llama_supports_gpu_offload()) { + fprintf(stderr, "warning: no usable GPU found, --gpu-layers-draft option will be ignored\n"); + fprintf(stderr, "warning: one possible reason is that llama.cpp was compiled without GPU support\n"); + fprintf(stderr, "warning: consult docs/build.md for compilation instructions\n"); + } + } + ).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_N_GPU_LAYERS_DRAFT")); + add_opt(common_arg( + {"-md", "--model-draft"}, "FNAME", + "draft model for speculative decoding (default: unused)", + [](common_params & params, const std::string & value) { + params.speculative.model = value; + } + ).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODEL_DRAFT")); + + add_opt(common_arg( + {"-mv", "--model-vocoder"}, "FNAME", + "vocoder model for audio generation (default: unused)", + [](common_params & params, const std::string & value) { + params.vocoder.model = value; + } + ).set_examples({LLAMA_EXAMPLE_TTS, LLAMA_EXAMPLE_SERVER})); + add_opt(common_arg( + {"--tts-use-guide-tokens"}, + "Use guide tokens to improve TTS word recall", + [](common_params & params) { + params.vocoder.use_guide_tokens = true; + } + ).set_examples({LLAMA_EXAMPLE_TTS, LLAMA_EXAMPLE_SERVER})); + + // model-specific + add_opt(common_arg( + {"--tts-oute-default"}, + string_format("use default OuteTTS models (note: can download weights from the internet)"), + [](common_params & params) { + params.hf_repo = "OuteAI/OuteTTS-0.2-500M-GGUF"; + params.hf_file = "OuteTTS-0.2-500M-Q8_0.gguf"; + params.vocoder.hf_repo = "ggml-org/WavTokenizer"; + params.vocoder.hf_file = "WavTokenizer-Large-75-F16.gguf"; + } + ).set_examples({LLAMA_EXAMPLE_TTS})); + + add_opt(common_arg( + {"--embd-bge-small-en-default"}, + string_format("use default bge-small-en-v1.5 model (note: can download weights from the internet)"), + [](common_params & params) { + params.hf_repo = "ggml-org/bge-small-en-v1.5-Q8_0-GGUF"; + params.hf_file = "bge-small-en-v1.5-q8_0.gguf"; + params.pooling_type = LLAMA_POOLING_TYPE_NONE; + params.embd_normalize = 2; + params.n_ctx = 512; + params.verbose_prompt = true; + params.embedding = true; + } + ).set_examples({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_SERVER})); + + add_opt(common_arg( + {"--embd-e5-small-en-default"}, + string_format("use default e5-small-v2 model (note: can download weights from the internet)"), + [](common_params & params) { + params.hf_repo = "ggml-org/e5-small-v2-Q8_0-GGUF"; + params.hf_file = "e5-small-v2-q8_0.gguf"; + params.pooling_type = LLAMA_POOLING_TYPE_NONE; + params.embd_normalize = 2; + params.n_ctx = 512; + params.verbose_prompt = true; + params.embedding = true; + } + ).set_examples({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_SERVER})); + + add_opt(common_arg( + {"--embd-gte-small-default"}, + string_format("use default gte-small model (note: can download weights from the internet)"), + [](common_params & params) { + params.hf_repo = "ggml-org/gte-small-Q8_0-GGUF"; + params.hf_file = "gte-small-q8_0.gguf"; + params.pooling_type = LLAMA_POOLING_TYPE_NONE; + params.embd_normalize = 2; + params.n_ctx = 512; + params.verbose_prompt = true; + params.embedding = true; + } + ).set_examples({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_SERVER})); + + return ctx_arg; +} diff --git a/llama.cpp/common/arg.h b/llama.cpp/common/arg.h new file mode 100644 index 0000000000000000000000000000000000000000..49ab8667b10527c05d1c6aed3593ad78170518c0 --- /dev/null +++ b/llama.cpp/common/arg.h @@ -0,0 +1,80 @@ +#pragma once + +#include "common.h" + +#include +#include +#include + +// +// CLI argument parsing +// + +struct common_arg { + std::set examples = {LLAMA_EXAMPLE_COMMON}; + std::set excludes = {}; + std::vector args; + const char * value_hint = nullptr; // help text or example for arg value + const char * value_hint_2 = nullptr; // for second arg value + const char * env = nullptr; + std::string help; + bool is_sparam = false; // is current arg a sampling param? + void (*handler_void) (common_params & params) = nullptr; + void (*handler_string) (common_params & params, const std::string &) = nullptr; + void (*handler_str_str)(common_params & params, const std::string &, const std::string &) = nullptr; + void (*handler_int) (common_params & params, int) = nullptr; + + common_arg( + const std::initializer_list & args, + const char * value_hint, + const std::string & help, + void (*handler)(common_params & params, const std::string &) + ) : args(args), value_hint(value_hint), help(help), handler_string(handler) {} + + common_arg( + const std::initializer_list & args, + const char * value_hint, + const std::string & help, + void (*handler)(common_params & params, int) + ) : args(args), value_hint(value_hint), help(help), handler_int(handler) {} + + common_arg( + const std::initializer_list & args, + const std::string & help, + void (*handler)(common_params & params) + ) : args(args), help(help), handler_void(handler) {} + + // support 2 values for arg + common_arg( + const std::initializer_list & args, + const char * value_hint, + const char * value_hint_2, + const std::string & help, + void (*handler)(common_params & params, const std::string &, const std::string &) + ) : args(args), value_hint(value_hint), value_hint_2(value_hint_2), help(help), handler_str_str(handler) {} + + common_arg & set_examples(std::initializer_list examples); + common_arg & set_excludes(std::initializer_list excludes); + common_arg & set_env(const char * env); + common_arg & set_sparam(); + bool in_example(enum llama_example ex); + bool is_exclude(enum llama_example ex); + bool get_value_from_env(std::string & output); + bool has_value_from_env(); + std::string to_string(); +}; + +struct common_params_context { + enum llama_example ex = LLAMA_EXAMPLE_COMMON; + common_params & params; + std::vector options; + void(*print_usage)(int, char **) = nullptr; + common_params_context(common_params & params) : params(params) {} +}; + +// parse input arguments from CLI +// if one argument has invalid value, it will automatically display usage of the specific argument (and not the full usage message) +bool common_params_parse(int argc, char ** argv, common_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr); + +// function to be used by test-arg-parser +common_params_context common_params_parser_init(common_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr); diff --git a/llama.cpp/common/base64.hpp b/llama.cpp/common/base64.hpp new file mode 100644 index 0000000000000000000000000000000000000000..563247a6e5f7dba837c07a509026d8b36e61387c --- /dev/null +++ b/llama.cpp/common/base64.hpp @@ -0,0 +1,392 @@ +/* +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to +*/ + +#ifndef PUBLIC_DOMAIN_BASE64_HPP_ +#define PUBLIC_DOMAIN_BASE64_HPP_ + +#include +#include +#include +#include + +class base64_error : public std::runtime_error +{ +public: + using std::runtime_error::runtime_error; +}; + +class base64 +{ +public: + enum class alphabet + { + /** the alphabet is detected automatically */ + auto_, + /** the standard base64 alphabet is used */ + standard, + /** like `standard` except that the characters `+` and `/` are replaced by `-` and `_` respectively*/ + url_filename_safe + }; + + enum class decoding_behavior + { + /** if the input is not padded, the remaining bits are ignored */ + moderate, + /** if a padding character is encounter decoding is finished */ + loose + }; + + /** + Encodes all the elements from `in_begin` to `in_end` to `out`. + + @warning The source and destination cannot overlap. The destination must be able to hold at least + `required_encode_size(std::distance(in_begin, in_end))`, otherwise the behavior depends on the output iterator. + + @tparam Input_iterator the source; the returned elements are cast to `std::uint8_t` and should not be greater than + 8 bits + @tparam Output_iterator the destination; the elements written to it are from the type `char` + @param in_begin the beginning of the source + @param in_end the ending of the source + @param out the destination iterator + @param alphabet which alphabet should be used + @returns the iterator to the next element past the last element copied + @throws see `Input_iterator` and `Output_iterator` + */ + template + static Output_iterator encode(Input_iterator in_begin, Input_iterator in_end, Output_iterator out, + alphabet alphabet = alphabet::standard) + { + constexpr auto pad = '='; + const char* alpha = alphabet == alphabet::url_filename_safe + ? "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" + : "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + + while (in_begin != in_end) { + std::uint8_t i0 = 0, i1 = 0, i2 = 0; + + // first character + i0 = static_cast(*in_begin); + ++in_begin; + + *out = alpha[i0 >> 2 & 0x3f]; + ++out; + + // part of first character and second + if (in_begin != in_end) { + i1 = static_cast(*in_begin); + ++in_begin; + + *out = alpha[((i0 & 0x3) << 4) | (i1 >> 4 & 0x0f)]; + ++out; + } else { + *out = alpha[(i0 & 0x3) << 4]; + ++out; + + // last padding + *out = pad; + ++out; + + // last padding + *out = pad; + ++out; + + break; + } + + // part of second character and third + if (in_begin != in_end) { + i2 = static_cast(*in_begin); + ++in_begin; + + *out = alpha[((i1 & 0xf) << 2) | (i2 >> 6 & 0x03)]; + ++out; + } else { + *out = alpha[(i1 & 0xf) << 2]; + ++out; + + // last padding + *out = pad; + ++out; + + break; + } + + // rest of third + *out = alpha[i2 & 0x3f]; + ++out; + } + + return out; + } + /** + Encodes a string. + + @param str the string that should be encoded + @param alphabet which alphabet should be used + @returns the encoded base64 string + @throws see base64::encode() + */ + static std::string encode(const std::string& str, alphabet alphabet = alphabet::standard) + { + std::string result; + + result.reserve(required_encode_size(str.length()) + 1); + + encode(str.begin(), str.end(), std::back_inserter(result), alphabet); + + return result; + } + /** + Encodes a char array. + + @param buffer the char array + @param size the size of the array + @param alphabet which alphabet should be used + @returns the encoded string + */ + static std::string encode(const char* buffer, std::size_t size, alphabet alphabet = alphabet::standard) + { + std::string result; + + result.reserve(required_encode_size(size) + 1); + + encode(buffer, buffer + size, std::back_inserter(result), alphabet); + + return result; + } + /** + Decodes all the elements from `in_begin` to `in_end` to `out`. `in_begin` may point to the same location as `out`, + in other words: inplace decoding is possible. + + @warning The destination must be able to hold at least `required_decode_size(std::distance(in_begin, in_end))`, + otherwise the behavior depends on the output iterator. + + @tparam Input_iterator the source; the returned elements are cast to `char` + @tparam Output_iterator the destination; the elements written to it are from the type `std::uint8_t` + @param in_begin the beginning of the source + @param in_end the ending of the source + @param out the destination iterator + @param alphabet which alphabet should be used + @param behavior the behavior when an error was detected + @returns the iterator to the next element past the last element copied + @throws base64_error depending on the set behavior + @throws see `Input_iterator` and `Output_iterator` + */ + template + static Output_iterator decode(Input_iterator in_begin, Input_iterator in_end, Output_iterator out, + alphabet alphabet = alphabet::auto_, + decoding_behavior behavior = decoding_behavior::moderate) + { + //constexpr auto pad = '='; + std::uint8_t last = 0; + auto bits = 0; + + while (in_begin != in_end) { + auto c = *in_begin; + ++in_begin; + + if (c == '=') { + break; + } + + auto part = _base64_value(alphabet, c); + + // enough bits for one byte + if (bits + 6 >= 8) { + *out = (last << (8 - bits)) | (part >> (bits - 2)); + ++out; + + bits -= 2; + } else { + bits += 6; + } + + last = part; + } + + // check padding + if (behavior != decoding_behavior::loose) { + while (in_begin != in_end) { + auto c = *in_begin; + ++in_begin; + + if (c != '=') { + throw base64_error("invalid base64 character."); + } + } + } + + return out; + } + /** + Decodes a string. + + @param str the base64 encoded string + @param alphabet which alphabet should be used + @param behavior the behavior when an error was detected + @returns the decoded string + @throws see base64::decode() + */ + static std::string decode(const std::string& str, alphabet alphabet = alphabet::auto_, + decoding_behavior behavior = decoding_behavior::moderate) + { + std::string result; + + result.reserve(max_decode_size(str.length())); + + decode(str.begin(), str.end(), std::back_inserter(result), alphabet, behavior); + + return result; + } + /** + Decodes a string. + + @param buffer the base64 encoded buffer + @param size the size of the buffer + @param alphabet which alphabet should be used + @param behavior the behavior when an error was detected + @returns the decoded string + @throws see base64::decode() + */ + static std::string decode(const char* buffer, std::size_t size, alphabet alphabet = alphabet::auto_, + decoding_behavior behavior = decoding_behavior::moderate) + { + std::string result; + + result.reserve(max_decode_size(size)); + + decode(buffer, buffer + size, std::back_inserter(result), alphabet, behavior); + + return result; + } + /** + Decodes a string inplace. + + @param[in,out] str the base64 encoded string + @param alphabet which alphabet should be used + @param behavior the behavior when an error was detected + @throws base64::decode_inplace() + */ + static void decode_inplace(std::string& str, alphabet alphabet = alphabet::auto_, + decoding_behavior behavior = decoding_behavior::moderate) + { + str.resize(decode(str.begin(), str.end(), str.begin(), alphabet, behavior) - str.begin()); + } + /** + Decodes a char array inplace. + + @param[in,out] str the string array + @param size the length of the array + @param alphabet which alphabet should be used + @param behavior the behavior when an error was detected + @returns the pointer to the next element past the last element decoded + @throws base64::decode_inplace() + */ + static char* decode_inplace(char* str, std::size_t size, alphabet alphabet = alphabet::auto_, + decoding_behavior behavior = decoding_behavior::moderate) + { + return decode(str, str + size, str, alphabet, behavior); + } + /** + Returns the required decoding size for a given size. The value is calculated with the following formula: + + $$ + \lceil \frac{size}{4} \rceil \cdot 3 + $$ + + @param size the size of the encoded input + @returns the size of the resulting decoded buffer; this the absolute maximum + */ + static std::size_t max_decode_size(std::size_t size) noexcept + { + return (size / 4 + (size % 4 ? 1 : 0)) * 3; + } + /** + Returns the required encoding size for a given size. The value is calculated with the following formula: + + $$ + \lceil \frac{size}{3} \rceil \cdot 4 + $$ + + @param size the size of the decoded input + @returns the size of the resulting encoded buffer + */ + static std::size_t required_encode_size(std::size_t size) noexcept + { + return (size / 3 + (size % 3 ? 1 : 0)) * 4; + } + +private: + static std::uint8_t _base64_value(alphabet& alphabet, char c) + { + if (c >= 'A' && c <= 'Z') { + return c - 'A'; + } else if (c >= 'a' && c <= 'z') { + return c - 'a' + 26; + } else if (c >= '0' && c <= '9') { + return c - '0' + 52; + } + + // comes down to alphabet + if (alphabet == alphabet::standard) { + if (c == '+') { + return 62; + } else if (c == '/') { + return 63; + } + } else if (alphabet == alphabet::url_filename_safe) { + if (c == '-') { + return 62; + } else if (c == '_') { + return 63; + } + } // auto detect + else { + if (c == '+') { + alphabet = alphabet::standard; + + return 62; + } else if (c == '/') { + alphabet = alphabet::standard; + + return 63; + } else if (c == '-') { + alphabet = alphabet::url_filename_safe; + + return 62; + } else if (c == '_') { + alphabet = alphabet::url_filename_safe; + + return 63; + } + } + + throw base64_error("invalid base64 character."); + } +}; + +#endif // !PUBLIC_DOMAIN_BASE64_HPP_ diff --git a/llama.cpp/common/build-info.cpp.in b/llama.cpp/common/build-info.cpp.in new file mode 100644 index 0000000000000000000000000000000000000000..0b945aa68fff3e77b881328e4fd63f6e2f9017ee --- /dev/null +++ b/llama.cpp/common/build-info.cpp.in @@ -0,0 +1,4 @@ +int LLAMA_BUILD_NUMBER = @BUILD_NUMBER@; +char const *LLAMA_COMMIT = "@BUILD_COMMIT@"; +char const *LLAMA_COMPILER = "@BUILD_COMPILER@"; +char const *LLAMA_BUILD_TARGET = "@BUILD_TARGET@"; diff --git a/llama.cpp/common/chat-template.hpp b/llama.cpp/common/chat-template.hpp new file mode 100644 index 0000000000000000000000000000000000000000..882ba41bd1bb0fc5cc22bdf5c258b2b68f60a23e --- /dev/null +++ b/llama.cpp/common/chat-template.hpp @@ -0,0 +1,529 @@ +/* + Copyright 2024 Google LLC + + Use of this source code is governed by an MIT-style + license that can be found in the LICENSE file or at + https://opensource.org/licenses/MIT. +*/ +// SPDX-License-Identifier: MIT +#pragma once + +#include "minja.hpp" +#include +#include +#include + +using json = nlohmann::ordered_json; + +namespace minja { + +struct chat_template_caps { + bool supports_tools = false; + bool supports_tool_calls = false; + bool supports_tool_responses = false; + bool supports_system_role = false; + bool supports_parallel_tool_calls = false; + bool supports_tool_call_id = false; + // meta-llama/Llama-3.1-8B-Instruct expects arguments to be an object. + // Most other templates (and OpenAI's API) expect the arguments object to be stringified. + bool requires_object_arguments = false; + // CohereForAI/c4ai-command-r-plus simple variant + bool requires_non_null_content = false; + // MiniMaxAI/MiniMax-Text-01 special + bool requires_typed_content = false; +}; + +struct chat_template_inputs { + nlohmann::ordered_json messages; + nlohmann::ordered_json tools; + bool add_generation_prompt = true; + nlohmann::ordered_json extra_context; + std::chrono::system_clock::time_point now = std::chrono::system_clock::now(); +}; + +struct chat_template_options { + bool apply_polyfills = true; + bool use_bos_token = true; + bool use_eos_token = true; + bool define_strftime_now = true; + + bool polyfill_tools = true; + bool polyfill_tool_call_examples = true; + bool polyfill_tool_calls = true; + bool polyfill_tool_responses = true; + bool polyfill_system_role = true; + bool polyfill_object_arguments = true; + bool polyfill_typed_content = true; +}; + +class chat_template { + + private: + chat_template_caps caps_; + std::string source_; + std::string bos_token_; + std::string eos_token_; + std::shared_ptr template_root_; + std::string tool_call_example_; + + std::string try_raw_render( + const nlohmann::ordered_json & messages, + const nlohmann::ordered_json & tools, + bool add_generation_prompt, + const nlohmann::ordered_json & extra_context = nlohmann::ordered_json()) const + { + try { + chat_template_inputs inputs; + inputs.messages = messages; + inputs.tools = tools; + inputs.add_generation_prompt = add_generation_prompt; + inputs.extra_context = extra_context; + // Use fixed date for tests + inputs.now = std::chrono::system_clock::from_time_t(0); + + chat_template_options opts; + opts.apply_polyfills = false; + + auto prompt = apply(inputs, opts); + // fprintf(stderr, "try_raw_render: %s\n", prompt.c_str()); + return prompt; + } catch (const std::exception & e) { + // fprintf(stderr, "try_raw_render error: %s\n", e.what()); + return ""; + } + } + + public: + + chat_template(const std::string & source, const std::string & bos_token, const std::string & eos_token) + : source_(source), bos_token_(bos_token), eos_token_(eos_token) + { + template_root_ = minja::Parser::parse(source_, { + /* .trim_blocks = */ true, + /* .lstrip_blocks = */ true, + /* .keep_trailing_newline = */ false, + }); + + auto contains = [](const std::string & haystack, const std::string & needle) { + return haystack.find(needle) != std::string::npos; + }; + + const std::string user_needle = ""; + const std::string sys_needle = ""; + const json dummy_str_user_msg = {{"role", "user"}, {"content", user_needle}}; + const json dummy_typed_user_msg = {{"role", "user"}, {"content", json::array({{{"type", "text"}, {"text", user_needle}}})}}; + + caps_.requires_typed_content = + !contains(try_raw_render(json::array({dummy_str_user_msg}), {}, false), user_needle) + && contains(try_raw_render(json::array({dummy_typed_user_msg}), {}, false), user_needle); + + const auto dummy_user_msg = caps_.requires_typed_content + ? dummy_typed_user_msg + : dummy_str_user_msg; + const json needle_system_msg = { + {"role", "system"}, + {"content", caps_.requires_typed_content ? json::array({{{"type", "text"}, {"text", sys_needle}}}) : json(sys_needle)}, + }; + + caps_.supports_system_role = contains(try_raw_render({needle_system_msg, dummy_user_msg,}, {}, false), sys_needle); + + auto out = try_raw_render(json::array({ + dummy_user_msg + }), json::array({ + { + {"name", "some_tool"}, + {"type", "function"}, + {"function", { + {"name", "some_tool"}, + {"description", "Some tool."}, + {"parameters", { + {"type", "object"}, + {"properties", { + {"arg", { + {"type", "string"}, + {"description", "Some argument."}, + }}, + }}, + {"required", json::array({ "arg" })}, + }}, + }}, + }, + }), false); + caps_.supports_tools = contains(out, "some_tool"); + + auto make_tool_calls_msg = [&](const json & tool_calls) { + return json { + {"role", "assistant"}, + {"content", nullptr}, + {"tool_calls", tool_calls}, + }; + }; + auto make_tool_call = [](const std::string & tool_name, const json & arguments) { + return json { + {"id", "call_1___"}, + {"type", "function"}, + {"function", { + {"arguments", arguments}, + {"name", tool_name}, + }}, + }; + }; + const json dummy_args_obj {{"argument_needle", "print('Hello, World!')"}}; + + // Note: the arguments are rendered in both cases, but may be double-escaped, which we don't want. + out = try_raw_render(json::array({ + dummy_user_msg, + make_tool_calls_msg(json::array({make_tool_call("ipython", dummy_args_obj.dump())})), + }), {}, false); + auto tool_call_renders_str_arguments = contains(out, "\"argument_needle\":") || contains(out, "'argument_needle':"); + out = try_raw_render(json::array({ + dummy_user_msg, + make_tool_calls_msg(json::array({make_tool_call("ipython", dummy_args_obj)})), + }), {}, false); + auto tool_call_renders_obj_arguments = contains(out, "\"argument_needle\":") || contains(out, "'argument_needle':"); + + caps_.supports_tool_calls = tool_call_renders_str_arguments || tool_call_renders_obj_arguments; + caps_.requires_object_arguments = !tool_call_renders_str_arguments && tool_call_renders_obj_arguments; + auto out_empty = try_raw_render(json::array({dummy_user_msg, {{"role", "assistant"}, {"content", ""}}}), {}, false); + auto out_null = try_raw_render(json::array({dummy_user_msg, {{"role", "assistant"}, {"content", nullptr}}}), {}, false); + caps_.requires_non_null_content = contains(out_empty, user_needle) && !contains(out_null, user_needle); + + if (caps_.supports_tool_calls) { + auto dummy_args = caps_.requires_object_arguments ? dummy_args_obj : json(dummy_args_obj.dump()); + auto tc1 = make_tool_call("test_tool1", dummy_args); + auto tc2 = make_tool_call("test_tool2", dummy_args); + auto out = try_raw_render(json::array({ + dummy_user_msg, + make_tool_calls_msg(json::array({tc1, tc2})), + }), {}, false); + caps_.supports_parallel_tool_calls = contains(out, "test_tool1") && contains(out, "test_tool2"); + + out = try_raw_render(json::array({ + dummy_user_msg, + make_tool_calls_msg(json::array({tc1})), + { + {"role", "tool"}, + {"name", "test_tool1"}, + {"content", "Some response!"}, + {"tool_call_id", "call_911_"}, + } + }), {}, false); + caps_.supports_tool_responses = contains(out, "Some response!"); + caps_.supports_tool_call_id = contains(out, "call_911_"); + } + + try { + if (!caps_.supports_tools) { + const json user_msg { + {"role", "user"}, + {"content", "Hey"}, + }; + const json args { + {"arg1", "some_value"}, + }; + const json tool_call_msg { + {"role", "assistant"}, + {"content", nullptr}, + {"tool_calls", json::array({ + { + // TODO: detect if requires numerical id or fixed length == 6 like Nemo + {"id", "call_1___"}, + {"type", "function"}, + {"function", { + {"name", "tool_name"}, + {"arguments", (caps_.requires_object_arguments ? args : json(minja::Value(args).dump(-1, /* to_json= */ true)))}, + }}, + }, + })}, + }; + std::string prefix, full; + { + chat_template_inputs inputs; + inputs.messages = json::array({user_msg}); + inputs.add_generation_prompt = true; + prefix = apply(inputs); + } + { + chat_template_inputs inputs; + inputs.messages = json::array({user_msg, tool_call_msg}); + inputs.add_generation_prompt = false; + full = apply(inputs); + } + auto eos_pos_last = full.rfind(eos_token_); + if (eos_pos_last == prefix.size() - eos_token_.size() || + (full[full.size() - 1] == '\n' && (eos_pos_last == full.size() - eos_token_.size() - 1))) { + full = full.substr(0, eos_pos_last); + } + size_t common_prefix_length = 0; + for (size_t i = 0; i < prefix.size() && i < full.size(); ++i) { + if (prefix[i] != full[i]) { + break; + } + if (prefix[i] == '<') { + // DeepSeek R1's template (as of 20250209) adds a trailing if add_generation_prompt, + // but it removes thinking tags for past messages. + // The prefix and full strings diverge at vs. <|tool▁calls▁begin|>, we avoid consuming the leading <. + continue; + } + common_prefix_length = i + 1; + } + auto example = full.substr(common_prefix_length); + if (example.find("tool_name") == std::string::npos && example.find("some_value") == std::string::npos) { + fprintf(stderr, "Failed to infer a tool call example (possible template bug)\n"); + } else { + tool_call_example_ = example; + } + } + } catch (const std::exception & e) { + fprintf(stderr, "Failed to generate tool call example: %s\n", e.what()); + } + } + + const std::string & source() const { return source_; } + const std::string & bos_token() const { return bos_token_; } + const std::string & eos_token() const { return eos_token_; } + const chat_template_caps & original_caps() const { return caps_; } + + // Deprecated, please use the form with chat_template_inputs and chat_template_options + std::string apply( + const nlohmann::ordered_json & messages, + const nlohmann::ordered_json & tools, + bool add_generation_prompt, + const nlohmann::ordered_json & extra_context = nlohmann::ordered_json(), + bool apply_polyfills = true) + { + fprintf(stderr, "[%s] Deprecated!\n", __func__); + chat_template_inputs inputs; + inputs.messages = messages; + inputs.tools = tools; + inputs.add_generation_prompt = add_generation_prompt; + inputs.extra_context = extra_context; + inputs.now = std::chrono::system_clock::now(); + + chat_template_options opts; + opts.apply_polyfills = apply_polyfills; + + return apply(inputs, opts); + } + + std::string apply( + const chat_template_inputs & inputs, + const chat_template_options & opts = chat_template_options()) const + { + json actual_messages; + + auto has_tools = inputs.tools.is_array() && !inputs.tools.empty(); + auto has_tool_calls = false; + auto has_tool_responses = false; + auto has_string_content = false; + for (const auto & message : inputs.messages) { + if (message.contains("tool_calls") && !message["tool_calls"].is_null()) { + has_tool_calls = true; + } + if (message.contains("role") && message["role"] == "tool") { + has_tool_responses = true; + } + if (message.contains("content") && message["content"].is_string()) { + has_string_content = true; + } + } + + auto polyfill_system_role = opts.polyfill_system_role && !caps_.supports_system_role; + auto polyfill_tools = opts.polyfill_tools && has_tools && !caps_.supports_tools; + auto polyfill_tool_call_example = polyfill_tools && opts.polyfill_tool_call_examples; + auto polyfill_tool_calls = opts.polyfill_tool_calls && has_tool_calls && !caps_.supports_tool_calls; + auto polyfill_tool_responses = opts.polyfill_tool_responses && has_tool_responses && !caps_.supports_tool_responses; + auto polyfill_object_arguments = opts.polyfill_object_arguments && has_tool_calls && caps_.requires_object_arguments; + auto polyfill_typed_content = opts.polyfill_typed_content && has_string_content && caps_.requires_typed_content; + + auto needs_polyfills = opts.apply_polyfills && (false + || polyfill_system_role + || polyfill_tools + || polyfill_tool_calls + || polyfill_tool_responses + || polyfill_object_arguments + || polyfill_typed_content + ); + + if (needs_polyfills) { + actual_messages = json::array(); + + auto add_message = [&](const json & msg) { + if (polyfill_typed_content && msg.contains("content") && !msg.at("content").is_null() && msg.at("content").is_string()) { + actual_messages.push_back({ + {"role", msg.at("role")}, + {"content", {{ + {"type", "text"}, + {"text", msg.at("content")}, + }}}, + }); + } else { + actual_messages.push_back(msg); + } + }; + + std::string pending_system; + auto flush_sys = [&]() { + if (!pending_system.empty()) { + add_message({ + {"role", "user"}, + {"content", pending_system}, + }); + pending_system.clear(); + } + }; + + json adjusted_messages; + if (polyfill_tools) { + adjusted_messages = add_system(inputs.messages, + "You can call any of the following tools to satisfy the user's requests: " + minja::Value(inputs.tools).dump(2, /* to_json= */ true) + + (!polyfill_tool_call_example || tool_call_example_.empty() ? "" : "\n\nExample tool call syntax:\n\n" + tool_call_example_ + "\n\n")); + } else { + adjusted_messages = inputs.messages; + } + + for (const auto & message_ : adjusted_messages) { + auto message = message_; + if (!message.contains("role") || !message.contains("content")) { + throw std::runtime_error("message must have 'role' and 'content' fields: " + message.dump()); + } + std::string role = message.at("role"); + + if (message.contains("tool_calls")) { + if (polyfill_object_arguments || polyfill_tool_calls) { + for (auto & tool_call : message.at("tool_calls")) { + if (tool_call["type"] == "function") { + auto & function = tool_call.at("function"); + auto & arguments = function.at("arguments"); + if (arguments.is_string()) { + try { + arguments = json::parse(arguments.get()); + } catch (const std::exception & ecvt) { + fprintf(stderr, "Failed to parse arguments: %s\n", ecvt.what()); + } + } + } + } + } + if (polyfill_tool_calls) { + auto content = message.at("content"); + auto tool_calls = json::array(); + for (const auto & tool_call : message.at("tool_calls")) { + if (tool_call.at("type") != "function") { + continue; + } + const auto & function = tool_call.at("function"); + auto tc = json { + {"name", function.at("name")}, + {"arguments", function.at("arguments")}, + }; + if (tool_call.contains("id")) { + tc["id"] = tool_call["id"]; + } + tool_calls.push_back(tc); + } + auto obj = json { + {"tool_calls", tool_calls}, + }; + if (!content.is_null() && content != "") { + obj["content"] = content; + } + message["content"] = obj.dump(2); + message.erase("tool_calls"); + } + } + if (polyfill_tool_responses && role == "tool") { + message["role"] = "user"; + auto obj = json { + {"tool_response", { + {"content", message.at("content")}, + }}, + }; + if (message.contains("name")) { + obj["tool_response"]["name"] = message.at("name"); + } + if (message.contains("tool_call_id")) { + obj["tool_response"]["tool_call_id"] = message.at("tool_call_id"); + } + message["content"] = obj.dump(2); + message.erase("name"); + } + + if (!message["content"].is_null() && polyfill_system_role) { + std::string content = message.at("content"); + if (role == "system") { + if (!pending_system.empty()) pending_system += "\n"; + pending_system += content; + continue; + } else { + if (role == "user") { + if (!pending_system.empty()) { + message["content"] = pending_system + (content.empty() ? "" : "\n" + content); + pending_system.clear(); + } + } else { + flush_sys(); + } + } + } + add_message(message); + } + flush_sys(); + } else { + actual_messages = inputs.messages; + } + + auto context = minja::Context::make(json({ + {"messages", actual_messages}, + {"add_generation_prompt", inputs.add_generation_prompt}, + })); + context->set("bos_token", opts.use_bos_token ? bos_token_ : ""); + context->set("eos_token", opts.use_eos_token ? eos_token_ : ""); + if (opts.define_strftime_now) { + auto now = inputs.now; + context->set("strftime_now", Value::callable([now](const std::shared_ptr &, minja::ArgumentsValue & args) { + args.expectArgs("strftime_now", {1, 1}, {0, 0}); + auto format = args.args[0].get(); + + auto time = std::chrono::system_clock::to_time_t(now); + auto local_time = *std::localtime(&time); + std::ostringstream ss; + ss << std::put_time(&local_time, format.c_str()); + return ss.str(); + })); + } + if (!inputs.tools.is_null()) { + context->set("tools", minja::Value(inputs.tools)); + } + if (!inputs.extra_context.is_null()) { + for (auto & kv : inputs.extra_context.items()) { + context->set(kv.key(), minja::Value(kv.value())); + } + } + + auto ret = template_root_->render(context); + // fprintf(stderr, "actual_messages: %s\n", actual_messages.dump(2).c_str()); + // fprintf(stderr, "apply: %s\n\n", ret.c_str()); + return ret; + } + + static nlohmann::ordered_json add_system(const nlohmann::ordered_json & messages, const std::string & system_prompt) { + json messages_with_system = messages; + + if (messages_with_system.size() > 0 && messages_with_system[0].at("role") == "system") { + std::string existing_system = messages_with_system.at(0).at("content"); + messages_with_system[0] = json { + {"role", "system"}, + {"content", existing_system + "\n\n" + system_prompt}, + }; + } else { + messages_with_system.insert(messages_with_system.begin(), json { + {"role", "system"}, + {"content", system_prompt}, + }); + } + return messages_with_system; + } +}; + +} // namespace minja diff --git a/llama.cpp/common/chat.cpp b/llama.cpp/common/chat.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ef1c6fb3d3978adea68939b1202108074ccc5763 --- /dev/null +++ b/llama.cpp/common/chat.cpp @@ -0,0 +1,966 @@ +#include "chat.hpp" +#include "chat-template.hpp" +#include "json-schema-to-grammar.h" +#include "log.h" +#include "minja.hpp" + +std::string common_chat_format_name(common_chat_format format) { + switch (format) { + case COMMON_CHAT_FORMAT_CONTENT_ONLY: return "Content-only"; + case COMMON_CHAT_FORMAT_GENERIC: return "Generic"; + case COMMON_CHAT_FORMAT_MISTRAL_NEMO: return "Mistral Nemo"; + case COMMON_CHAT_FORMAT_LLAMA_3_X: return "Llama 3.x"; + case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS: return "Llama 3.x with builtin tools"; + case COMMON_CHAT_FORMAT_DEEPSEEK_R1: return "DeepSeek R1"; + case COMMON_CHAT_FORMAT_FIREFUNCTION_V2: return "FireFunction v2"; + case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: return "Functionary v3.2"; + case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: return "Functionary v3.1 Llama 3.1"; + case COMMON_CHAT_FORMAT_HERMES_2_PRO: return "Hermes 2 Pro"; + case COMMON_CHAT_FORMAT_COMMAND_R7B: return "Command R7B"; + default: + throw std::runtime_error("Unknown chat format"); + } +} + +const common_grammar_options grammar_options { + /* .dotall = */ false, + /* .compact_spaces = */ false, + // /* .compact_spaces = */ true, +}; + +static bool parse_json(std::string::const_iterator & it, const std::string::const_iterator & end, json & out) { + // // https://json.nlohmann.me/features/parsing/sax_interface/ + struct json_error_locator : public nlohmann::json_sax { + std::size_t position; + bool found_error; + + json_error_locator() : position(0), found_error(false) {} + + bool parse_error(std::size_t position, const std::string &, const json::exception &) override { + this->position = position - 1; + this->found_error = true; + return false; + } + bool null() override { return true; } + bool boolean(bool) override { return true; } + bool number_integer(number_integer_t) override { return true; } + bool number_unsigned(number_unsigned_t) override { return true; } + bool number_float(number_float_t, const string_t &) override { return true; } + bool string(string_t &) override { return true; } + bool binary(binary_t &) override { return true; } + bool start_object(std::size_t) override { return true; } + bool key(string_t &) override { return true; } + bool end_object() override { return true; } + bool start_array(std::size_t) override { return true; } + bool end_array() override { return true; } + }; + json_error_locator err_loc; + json::sax_parse(it, end, &err_loc); + + std::string::const_iterator temptative_end; + if (err_loc.found_error) { + temptative_end = it + err_loc.position; + } else { + temptative_end = end; + } + std::string json_sub {it, temptative_end}; + try { + out = json::parse(json_sub); + it = temptative_end; + return true; + } catch (const std::exception &) { + return false; + } +} + + +/** + * Takes a prefix regex that must have 1 group to capture the function name, a closing suffix, and expects json parameters in between. + * Aggregates the prefix, suffix and in-between text into the content. + */ +static common_chat_msg parse_json_tool_calls( + const std::string& input, + const std::optional & trigger_opt, + const std::regex & function_regex, + const std::regex & close_regex) { + std::smatch match; + + common_chat_msg result; + result.role = "assistant"; + + + auto end = input.end(); + auto it = input.begin(); + + if (trigger_opt) { + if (!std::regex_search(it, end, match, *trigger_opt)) { + result.content = input; + return result; + } + result.content = match.prefix().str(); + it = match.suffix().first; + } + + while (it != end) { + std::sregex_iterator rend; + std::sregex_iterator rit(it, end, function_regex); + if (rit == rend) { + fprintf(stderr, "No more tool calls found\n"); + result.content += std::string(it, end); + break; + } + auto name = rit->str(1); + result.content += std::string(it, rit->prefix().second); + it = rit->suffix().first; + + json arguments; + if (!parse_json(it, end, arguments)) { + throw std::runtime_error("Failed to parse json tool call arguments"); + } + if (!std::regex_search(it, end, match, close_regex)) { + throw std::runtime_error("Malformed input, missing closing pattern"); + } + it = match.suffix().first; + result.tool_calls.push_back({name, arguments.is_string() ? arguments.get() : arguments.dump(), /* id= */ ""}); + } + return result; +} + +static common_chat_msg parse_prefixed_json_tool_call_array(const std::string& input, const std::string & prefix, size_t rstrip_prefix = 0) { + auto content_end = input.find(prefix); + size_t tc_start = std::string::npos; + + common_chat_msg result; + result.role = "assistant"; + const auto process_tool_calls = [&](const json & tool_calls) { + for (const auto & tool_call : tool_calls) { + const auto & arguments = tool_call["arguments"]; + result.tool_calls.push_back({ + tool_call["name"], + arguments.is_string() ? arguments.get() : arguments.dump(), + tool_call.contains("id") ? tool_call["id"] : "", + }); + } + }; + if (content_end == std::string::npos) { + result.content = input; + } else { + tc_start = content_end + prefix.size() - rstrip_prefix; + result.content = input.substr(0, content_end); + auto tool_calls = json::parse(input.substr(tc_start)); + process_tool_calls(tool_calls); + } + return result; +} + +static void foreach_function(const json & tools, const std::function & fn) { + for (const auto & tool : tools) { + if (!tool.contains("type") || tool["type"] != "function" || !tool.contains("function")) { + LOG_INF("Skipping tool without function: %s", tool.dump(2).c_str()); + continue; + } + fn(tool); + } +} + +static std::string apply( + const common_chat_template & tmpl, + const nlohmann::ordered_json & messages, + const nlohmann::ordered_json & tools, + bool add_generation_prompt, + const nlohmann::ordered_json & extra_context = nlohmann::ordered_json()) +{ + minja::chat_template_inputs tmpl_inputs; + tmpl_inputs.messages = messages; + tmpl_inputs.tools = tools; + tmpl_inputs.add_generation_prompt = add_generation_prompt; + tmpl_inputs.extra_context = extra_context; + // TODO: add flag to control date/time, if only for testing purposes. + // tmpl_inputs.now = std::chrono::system_clock::now(); + + minja::chat_template_options tmpl_opts; + tmpl_opts.use_bos_token = false; + tmpl_opts.use_eos_token = false; + + return tmpl.apply(tmpl_inputs, tmpl_opts); +} + +static common_chat_params common_chat_params_init_generic(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) { + common_chat_params data; + + auto tool_call_schemas = json::array(); + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool["function"]; + auto tool_schema = json { + {"type", "object"}, + {"properties", { + {"name", { + {"type", "string"}, + {"const", function["name"]}, + }}, + {"arguments", function["parameters"]}, + }}, + {"required", json::array({"name", "arguments"})}, + }; + if (function.contains("description")) { + tool_schema["description"] = function["description"]; + } + if (inputs.parallel_tool_calls) { + tool_schema["properties"]["id"] = { + {"type", "string"}, + {"minLength", 4}, + }; + tool_schema["required"].push_back("id"); + } + tool_call_schemas.emplace_back(tool_schema); + }); + const auto tool_call = + inputs.parallel_tool_calls + ? json { + {"type", "object"}, + {"properties", { + {"tool_calls", { + {"type", "array"}, + {"items", tool_call_schemas.size() == 1 ? tool_call_schemas[0] : json { + {"anyOf", tool_call_schemas}, + }}, + {"minItems", 1}, + }}, + }}, + {"required", json::array({"tool_calls"})}, + } + : json { + {"type", "object"}, + {"properties", { + {"tool_call", tool_call_schemas.size() == 1 ? tool_call_schemas[0] : json { + {"anyOf", tool_call_schemas}, + }}, + }}, + {"required", json::array({"tool_call"})}, + }; + const auto schema = + inputs.tool_choice != "required" + ? json { + {"anyOf", json::array({ + tool_call, + { + {"type", "object"}, + {"properties", { + {"response", inputs.json_schema.is_null() + ? json {{"type", "string"}} + : inputs.json_schema + }, + }}, + {"required", json::array({"response"})}, + }, + })} + } + : tool_call; + + data.grammar_lazy = false; + data.grammar = build_grammar([&](const common_grammar_builder & builder) { + builder.add_schema("root", schema); + }, grammar_options); + + auto tweaked_messages = common_chat_template::add_system( + inputs.messages, + "Respond in JSON format, either with `tool_call` (a request to call tools) or with `response` reply to the user's request"); + + data.prompt = apply(tmpl, tweaked_messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt); + data.format = COMMON_CHAT_FORMAT_GENERIC; + return data; +} +static common_chat_msg common_chat_parse_generic(const std::string & input) { + json data = json::parse(input); + common_chat_msg result; + result.role = "assistant"; + if (data.contains("tool_calls")) { + for (const auto & tool_call : data["tool_calls"]) { + result.tool_calls.push_back({ + tool_call["name"], + tool_call["arguments"].dump(), + tool_call.contains("id") ? tool_call["id"] : "", + }); + } + } else if (data.contains("tool_call")) { + result.tool_calls.push_back({ + data["tool_call"]["name"], + data["tool_call"]["arguments"].dump(), + /* id= */ "", + }); + } else if (data.contains("response")) { + const auto & response = data["response"]; + result.content = response.is_string() ? response.get() : response.dump(2); + } + return result; +} + +static common_chat_params common_chat_params_init_mistral_nemo(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) { + common_chat_params data; + data.grammar_lazy = inputs.tool_choice != "required"; + data.grammar = build_grammar([&](const common_grammar_builder & builder) { + auto schemas = json::array(); + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool["function"]; + schemas.push_back({ + {"type", "object"}, + {"properties", { + // Important note: the model is probably trained to take a JSON stringified arguments value. + // It's hard to constrain that for now (while reusing the JSON schema conversion), so we're just expecting a plain object. + {"name", { + {"type", "string"}, + {"const", function["name"]}, + }}, + {"arguments", function["parameters"]}, + {"id", { + {"type", "string"}, + // Nemo's template expects a 9-character alphanumeric ID. + {"pattern", "^[a-zA-Z0-9]{9}$"}, + }}, + }}, + {"required", json::array({"name", "arguments", "id"})}, + }); + }); + auto schema = json { + {"type", "array"}, + {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}}, + {"minItems", 1}, + }; + if (!inputs.parallel_tool_calls) { + schema["maxItems"] = 1; + } + builder.add_rule("root", "\"[TOOL_CALLS]\" " + builder.add_schema("tool_calls", schema)); + }, grammar_options); + data.grammar_triggers.push_back({"[TOOL_CALLS]", /* .at_start = */ true}); + data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt); + data.format = COMMON_CHAT_FORMAT_MISTRAL_NEMO; + return data; +} +static common_chat_msg common_chat_parse_mistral_nemo(const std::string & input) { + return parse_prefixed_json_tool_call_array(input, "[TOOL_CALLS]"); +} + +static common_chat_params common_chat_params_init_command_r7b(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) { + common_chat_params data; + data.grammar_lazy = inputs.tool_choice != "required"; + data.grammar = build_grammar([&](const common_grammar_builder & builder) { + auto schemas = json::array(); + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool["function"]; + schemas.push_back({ + {"type", "object"}, + {"properties", { + {"tool_call_id", { + {"type", "string"}, + // Command-R's template expects an integer string. + {"pattern", "^[0-9]{1,10}$"}, + }}, + {"tool_name", { + {"type", "string"}, + {"const", function["name"]}, + }}, + {"parameters", function["parameters"]}, + }}, + {"required", json::array({"tool_call_id", "tool_name", "parameters"})}, + }); + }); + auto schema = json { + {"type", "array"}, + {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}}, + {"minItems", 1}, + }; + if (!inputs.parallel_tool_calls) { + schema["maxItems"] = 1; + } + builder.add_rule("root", "\"<|START_ACTION|>\" " + builder.add_schema("tool_calls", schema) + " \"<|END_ACTION|>\""); + }, grammar_options); + data.grammar_triggers.push_back({"<|START_ACTION|>", /* .at_start = */ false}); + data.preserved_tokens = { + "<|START_RESPONSE|>", + "<|END_RESPONSE|>", + "<|START_THINKING|>", + "<|END_THINKING|>", + "<|END_ACTION|>", + }; + data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt); + data.format = COMMON_CHAT_FORMAT_COMMAND_R7B; + return data; +} +static common_chat_msg common_chat_parse_command_r7b(const std::string & input) { + static std::regex response_regex("<\\|START_RESPONSE\\|>([\\s\\S\\n\\r]*?)<\\|END_RESPONSE\\|>"); + static std::regex thought_action_regex("<\\|START_THINKING\\|>([\\s\\S\\n\\r]*?)<\\|END_THINKING\\|><\\|START_ACTION\\|>([\\s\\S\\n\\r]*?)<\\|END_ACTION\\|>"); + std::smatch match; + + common_chat_msg result; + result.role = "assistant"; + if (std::regex_match(input, match, response_regex)) { + result.content = match[1].str(); + } else if (std::regex_match(input, match, thought_action_regex)) { + result.tool_plan = match[1].str(); + auto actions_str = match[2].str(); + auto actions = json::parse(actions_str); + for (const auto & action : actions) { + result.tool_calls.push_back({ + /* .name = */ action["tool_name"], + /* .arguments = */ action["parameters"].dump(), + /* .id = */ action["tool_call_id"], + }); + } + } else { + LOG_ERR("Failed to parse command_r output"); + result.content = input; + } + return result; +} + +static void expect_tool_parameters(const std::string & name, const json & parameters, const std::vector & expected_properties) { + if (!parameters.is_object() || !parameters.contains("type") || parameters["type"] != "object" || !parameters.contains("properties") || !parameters.contains("required")) { + throw std::runtime_error("Parameters of tool " + name + " must be an object w/ required properties"); + } + const auto & parameters_properties = parameters.at("properties"); + const auto & parameters_required = parameters.at("required"); + for (const auto & prop : expected_properties) { + if (!parameters_properties.contains(prop)) { + throw std::runtime_error("Parameters of tool " + name + " is missing property: " + prop); + } + if (std::find(parameters_required.begin(), parameters_required.end(), json(prop)) == parameters_required.end()) { + throw std::runtime_error("Parameters of tool " + name + " must have property marked as required: " + prop); + } + } + if (parameters_properties.size() != expected_properties.size()) { + throw std::runtime_error("Parameters of tool " + name + " must only have these properties:" + string_join(expected_properties, ", ")); + } +} + +static common_chat_params common_chat_params_init_llama_3_1_tool_calls(const common_chat_template & tmpl, const struct common_chat_inputs & inputs, bool allow_python_tag_builtin_tools) { + auto builtin_tools = json::array(); + common_chat_params data; + data.grammar_lazy = inputs.tool_choice != "required"; + data.grammar = build_grammar([&](const common_grammar_builder & builder) { + std::vector tool_rules; + + auto handle_builtin_tool = [&](const std::string & name, const json & parameters) { + if (name == "wolfram_alpha") { + // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py + expect_tool_parameters(name, parameters, {"query"}); + } else if (name == "web_search" || name == "brave_search") { + // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py + expect_tool_parameters(name, parameters, {"query"}); + } else if (name == "python" || name == "code_interpreter") { + // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/inline/tool_runtime/code_interpreter/code_interpreter.py + expect_tool_parameters(name, parameters, {"code"}); + } else { + return false; + } + + std::vector kvs; + for (const auto & [key, value] : parameters.at("properties").items()) { + kvs.push_back("\"" + key + "=\" " + builder.add_schema(name + "-args-" + key, value)); + } + + tool_rules.push_back( + builder.add_rule( + name + "-call", + "\"<|python_tag|>" + name + ".call(\" " + string_join(kvs, " \", \" ") + " \")\"")); + builtin_tools.push_back(name); + + return true; + }; + + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool["function"]; + std::string name = function["name"]; + auto parameters = function["parameters"]; + builder.resolve_refs(parameters); + + // https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/remote/tool_runtime + if (allow_python_tag_builtin_tools) { + handle_builtin_tool(name, parameters); + } + tool_rules.push_back( + builder.add_rule( + name + "-call", + "\"{\" space " + "( \"\\\"type\\\":\" space \"\\\"function\\\",\" space )? " + "\"\\\"name\\\": \\\"" + name + "\\\", \\\"parameters\\\": \" " + + builder.add_schema(name + "-args", parameters) + + " \"}\"")); + data.grammar_triggers.push_back({"{\"name\": \"" + name + "\"", /* .at_start = */ true}); + }); + data.grammar_triggers.push_back({"{\"name\":", /* .at_start = */ true}); + data.grammar_triggers.push_back({"{\n \"name\":", /* .at_start = */ true}); + data.grammar_triggers.push_back({"{\n \"name\":", /* .at_start = */ true}); + data.grammar_triggers.push_back({"{\"type\": \"function\"", /* .at_start = */ true}); + data.grammar_triggers.push_back({"{\n \"type\": \"function\"", /* .at_start = */ true}); + data.grammar_triggers.push_back({"{\n \"type\": \"function\"", /* .at_start = */ true}); + if (!builtin_tools.empty()) { + data.grammar_triggers.push_back({"<|python_tag|>", /* .at_start = */ false}); + } + builder.add_rule("root", string_join(tool_rules, " | ")); + }, grammar_options); + data.additional_stops.push_back("<|eom_id|>"); + data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt, { + {"tools_in_user_message", false}, + {"builtin_tools", builtin_tools.empty() ? json() : builtin_tools}, + }); + data.format = allow_python_tag_builtin_tools && !builtin_tools.empty() + ? COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS + : COMMON_CHAT_FORMAT_LLAMA_3_X; + return data; +} +static common_chat_msg common_chat_parse_llama_3_1(const std::string & input, bool with_builtin_tools = false) { + // TODO: tighten & simplify the parser, don't accept leading text context. + static std::regex function_regex("\\{[\\s\\n\\r]*(?:\"type\"[\\s\\n\\r]*:[\\s\\n\\r]*\"function\"[\\s\\n\\r]*,[\\s\\n\\r]*|[\\s\\n\\r]*)\"name\"[\\s\\n\\r]*:[\\s\\n\\r]*\"([^\"]+)\"[\\s\\n\\r]*,[\\s\\n\\r]*\"parameters\": "); + static std::regex close_regex("\\}"); + static std::regex builtin_call_regex("<\\|python_tag\\|>([^.(]+)\\.call\\((.*)\\)"); + + if (with_builtin_tools) { + std::smatch match; + if (std::regex_match(input, match, builtin_call_regex)) { + auto name = match[1].str(); + auto raw_args = match[2].str(); + + // TODO: if/when builtin tools start accepting more than 1 argument, use parse_json for real parsing. + auto it_eq = raw_args.find('='); + auto arg_name = raw_args.substr(0, it_eq); + auto arg_value_str = raw_args.substr(it_eq + 1); + auto arg_value = json::parse(arg_value_str); + + return { + /* .role = */ "assistant", + /* .content = */ match.prefix().str(), + /* .tool_calls = */ { + { + /* .name = */ match[1], + /* .arguments = */ (json { + {arg_name, arg_value}, + }).dump(), + /* .id = */ "", + }, + }, + }; + } + } + return parse_json_tool_calls(input, std::nullopt, function_regex, close_regex); +} + +static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) { + common_chat_params data; + data.grammar_lazy = inputs.tool_choice != "required"; + data.grammar = build_grammar([&](const common_grammar_builder & builder) { + std::vector tool_rules; + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool["function"]; + std::string name = function["name"]; + auto parameters = function["parameters"]; + auto args_rule = builder.add_schema(name + "-args", parameters); + tool_rules.push_back(builder.add_rule(name + "-call", + "\"<|tool▁call▁begin|>function<|tool▁sep|>" + name + "\\n```json\\n\" " + args_rule + " \"```<|tool▁call▁end|>\"")); + }); + data.grammar_triggers.push_back({"<|tool▁calls▁begin|>", /* .at_start = */ false}); + data.preserved_tokens = { + "<|tool▁sep|>", + "<|tool▁call▁end|>", + }; + builder.add_rule("root", "\"<|tool▁calls▁begin|>\" (" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " space"); + }, grammar_options); + auto prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt); + data.prompt = prompt; + data.format = COMMON_CHAT_FORMAT_DEEPSEEK_R1; + return data; +} +static common_chat_msg common_chat_parse_deepseek_r1(const std::string & input) { + static std::regex trigger_regex("<|tool▁calls▁begin|>"); + static std::regex function_regex("<|tool▁call▁begin|>function<|tool▁sep|>([^\n]+)\n```json\n"); + static std::regex close_regex("```<|tool▁call▁end|>"); + return parse_json_tool_calls(input, trigger_regex, function_regex, close_regex); +} + +static common_chat_params common_chat_params_init_firefunction_v2(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) { + fprintf(stderr, "%s\n", __func__); + common_chat_params data; + data.prompt = apply(tmpl, inputs.messages, /* tools= */ nullptr, inputs.add_generation_prompt, { + {"datetime", "Jan 29 2025 13:00:00 GMT"}, + {"functions", json(inputs.tools.empty() ? "" : inputs.tools.dump(2))}, + }); + if (!inputs.tools.is_null() && !inputs.tools.empty()) { + data.grammar_lazy = inputs.tool_choice != "required"; + data.grammar = build_grammar([&](const common_grammar_builder & builder) { + auto schemas = json::array(); + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool["function"]; + schemas.push_back({ + {"type", "object"}, + {"properties", { + {"name", { + {"type", "string"}, + {"const", function["name"]}, + }}, + {"arguments", function["parameters"]}, + }}, + {"required", json::array({"name", "arguments", "id"})}, + }); + }); + auto schema = json { + {"type", "array"}, + {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}}, + {"minItems", 1}, + }; + if (!inputs.parallel_tool_calls) { + schema["maxItems"] = 1; + } + builder.add_rule("root", "\" functools\"? " + builder.add_schema("tool_calls", schema)); + }, grammar_options); + data.grammar_triggers.push_back({" functools[", /* .at_start = */ false}); + data.format = COMMON_CHAT_FORMAT_FIREFUNCTION_V2; + } else { + data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY; + } + return data; +} +static common_chat_msg common_chat_parse_firefunction_v2(const std::string & input) { + return parse_prefixed_json_tool_call_array(input, " functools[", /* rstrip_prefix= */ 1); +} + +static common_chat_params common_chat_params_init_functionary_v3_2(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) { + // >>>all\nlet's call functions>>>fn1\n{"arg1": 1...}\n>>>fn2\n{"arg1": 1...}... + // Using ">>>f1\n", ">>>f2\n"... as trigger words for the grammar + common_chat_params data; + data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt); + data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2; + if (!inputs.tools.is_null() && !inputs.tools.empty()) { + data.grammar_lazy = inputs.tool_choice != "required"; + data.grammar = build_grammar([&](const common_grammar_builder & builder) { + std::vector first_tool_rules; + std::vector subsequent_tool_rules; + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool["function"]; + std::string name = function["name"]; + auto parameters = function["parameters"]; + auto args_rule = builder.add_schema(name + "-args", parameters); + first_tool_rules.push_back(builder.add_rule(name + "-call", "\"" + name + "\\n\" " + args_rule)); + subsequent_tool_rules.push_back(builder.add_rule(name + "-call2", "\">>>" + name + "\\n\" " + args_rule)); + data.grammar_triggers.push_back({name, /* .at_start = */ true}); + data.grammar_triggers.push_back({">>>" + name, /* .at_start = */ false}); + }); + auto first_rule = first_tool_rules.empty() ? "" : builder.add_rule("first_tool_call", string_join(first_tool_rules, " | ")) + " space"; + if (inputs.parallel_tool_calls) { + auto subsequent_rule = builder.add_rule("subsequent_tool_call", string_join(subsequent_tool_rules, " | ")) + " space"; + builder.add_rule("root", first_rule + " (" + subsequent_rule + ")*"); + } else { + builder.add_rule("root", first_rule); + } + + }, grammar_options); + } + return data; +} + +static bool consume(std::string::const_iterator & it, const std::string::const_iterator & end, const std::string & expected) { + auto expected_it = expected.begin(); + auto tmp_it = it; + while (tmp_it != end && expected_it != expected.end() && *tmp_it == *expected_it) { + ++tmp_it; + ++expected_it; + } + if (expected_it == expected.end()) { + it = tmp_it; + return true; + } + return false; +} + +static common_chat_msg common_chat_parse_functionary_v3_2(const std::string & input) { + static std::regex function_regex(R"((?:>>>)?(\w+)\n)"); + static std::regex close_regex(R"($|(?=>>>))"); + + std::string content; + auto it = input.begin(); + const auto end = input.end(); + + if (consume(it, end, "all\n")) { + std::smatch match; + if (std::regex_search(it, end, match, function_regex)) { + auto fun_it = match.prefix().second; + content = std::string(it, fun_it); + it = fun_it; + } else { + common_chat_msg res; + res.role = "assistant"; + res.content = std::string(it, end); + return res; + } + } + // TODO: tighten & simplify. + try { + auto res = parse_json_tool_calls(std::string(it, end), std::nullopt, function_regex, close_regex); + res.content = content + res.content; + return res; + } catch (const std::exception & e) { + LOG_ERR("Failed to parse functionary v3.2 input: %s\n", e.what()); + common_chat_msg res; + res.role = "assistant"; + res.content = input; + return res; + } +} + +static common_chat_params common_chat_params_init_functionary_v3_1_llama_3_1(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) { + // https://github.com/MeetKai/functionary/blob/main/tests/prompt_test_v3-llama3.1.txt + common_chat_params data; + json tools = inputs.tools.is_null() ? inputs.tools : json::array(); + std::string python_code_argument_name; + auto has_raw_python = false; + + data.grammar_lazy = inputs.tool_choice != "required"; + data.grammar = build_grammar([&](const common_grammar_builder & builder) { + std::vector tool_rules; + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool["function"]; + const auto & parameters = function["parameters"]; + std::string name = function["name"]; + if (name == "python" || name == "ipython") { + if (!parameters.contains("type")) { + throw std::runtime_error("Missing type in python tool"); + } + has_raw_python = true; + auto type = parameters.at("type"); + if (type == "object") { + auto properties = parameters.at("properties"); + for (auto it = properties.begin(); it != properties.end(); ++it) { + if (it.value().at("type") == "string") { + if (!python_code_argument_name.empty()) { + throw std::runtime_error("Multiple string arguments found in python tool"); + } + python_code_argument_name = it.key(); + } + } + if (python_code_argument_name.empty()) { + throw std::runtime_error("No string argument found in python tool"); + } + } else if (type != "string") { + throw std::runtime_error("Invalid type in python tool: " + type.dump()); + } + } + tool_rules.push_back(builder.add_rule(name + "-call", "\"\" " + builder.add_schema(name + "-args", parameters) + " \"\" space")); + }); + if (has_raw_python) { + tool_rules.push_back(builder.add_rule("python-call", "\"<|python_tag|>\" .*")); + data.grammar_triggers.push_back({"<|python_tag|>", /* .at_start = */ false}); + } + auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | ")) + " space"; + builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call); + data.grammar_triggers.push_back({"([\s\S\n]*)$)"); + std::smatch match; + if (std::regex_search(input, match, python_tag_regex)) { + auto code = match[1].str(); + return { + /* .role = */ "assistant", + /* .content = */ match.prefix().str(), + /* .tool_calls = */ { + { + /* .name = */ "python", + /* .arguments = */ (json {{"code", code}}).dump(), + /* .id = */ "", + }, + } + }; + } + static std::regex function_regex(R"()"); + static std::regex close_regex(R"()"); + // TODO: tighten & simplify. + return parse_json_tool_calls(input, std::nullopt, function_regex, close_regex); +} + +static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) { + common_chat_params data; + // (content)?({"name": "foo", "arguments": {"a": 1}})* + data.grammar_lazy = inputs.tool_choice != "required"; + data.grammar = build_grammar([&](const common_grammar_builder & builder) { + std::vector tool_rules; + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool["function"]; + std::string name = function["name"]; + auto parameters = function["parameters"]; + builder.resolve_refs(parameters); + tool_rules.push_back(builder.add_schema(name + "-call", { + {"type", "object"}, + {"properties", json { + {"name", json {{"const", name}}}, + {"arguments", parameters}, + }}, + {"required", json::array({"name", "arguments"})}, + })); + }); + auto tool_call = "\"\" space " + builder.add_rule("tool_call", string_join(tool_rules, " | ")) + " \"\" space"; + builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call); + data.grammar_triggers.push_back({"", /* .at_start = */ false}); + data.preserved_tokens = { "" }; + }, grammar_options); + + data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt); + data.format = COMMON_CHAT_FORMAT_HERMES_2_PRO; + return data; +} +static common_chat_msg common_chat_parse_hermes_2_pro(const std::string & input) { + try { + std::regex start_pattern(R"([\n\s]*)"); + std::regex middle_pattern(R"([\n\s]*[\n\s]*)"); + std::regex end_pattern(R"([\n\s]*[\n\s]*$)"); + + auto end = input.end(); + std::sregex_iterator rend; + std::sregex_iterator rit(input.begin(), end, start_pattern); + if (rit == rend) { + return { + /* .role = */ "assistant", + /* .content = */ input, + /* .tool_calls = */ {}, + }; + } + + common_chat_msg result; + result.role = "assistant"; + result.content = rit->prefix(); + + auto it = rit->suffix().first; + while (it != end) { + json call; + if (!parse_json(it, end, call)) { + throw std::runtime_error("Failed to parse json tool call"); + } + const auto & arguments = call["arguments"]; + result.tool_calls.push_back({ + call["name"], + arguments.dump(), + // arguments.is_string() ? arguments.get() : arguments.dump(), + /* id= */ "", + }); + rit = {it, end, middle_pattern}; + if (rit != rend) { + it = rit->suffix().first; + } else { + rit = {it, end, end_pattern}; + if (rit == rend) { + throw std::runtime_error("Malformed input, missing "); + } + break; + } + } + return result; + } catch (const std::exception & e) { + return { + /* .role = */ "assistant", + /* .content = */ input, + /* .tool_calls = */ {}, + }; + } +} + +static common_chat_params common_chat_params_init_without_tools(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) { + common_chat_params data; + data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt); + data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY; + data.grammar_lazy = false; + if (!inputs.json_schema.is_null()) { + if (!inputs.grammar.empty()) { + throw std::runtime_error("Either \"json_schema\" or \"grammar\" can be specified, but not both"); + } + data.grammar = json_schema_to_grammar(inputs.json_schema); + } else { + data.grammar = inputs.grammar.empty(); + } + return data; +} + +common_chat_params common_chat_params_init(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) { + auto has_tools = !inputs.tools.is_null() && inputs.tool_choice != "none"; + LOG_DBG("[%s] has_tools=%s\n", __func__, has_tools ? "true" : "false"); + + if (has_tools && !inputs.grammar.empty()) { + throw std::runtime_error("Cannot specify grammar with tools"); + } + + const auto & src = tmpl.source(); + if (src.find(">>>all") != std::string::npos) { + // Functionary prepends "all\n" to plain content outputs, so we use the parser no matter when + return common_chat_params_init_functionary_v3_2(tmpl, inputs); + } + if (src.find(" functools[") != std::string::npos) { + // Firefunction v2 requires datetime and functions in the context, even w/o tools. + return common_chat_params_init_firefunction_v2(tmpl, inputs); + } + + if (!has_tools) { + return common_chat_params_init_without_tools(tmpl, inputs); + } + + if (src.find("") != std::string::npos) { + return common_chat_params_init_hermes_2_pro(tmpl, inputs); + } + if (src.find("<|start_header_id|>") != std::string::npos + && src.find("ipython<|end_header_id|>") != std::string::npos) { + auto allow_python_tag_builtin_tools = src.find("<|python_tag|>") != std::string::npos; + return common_chat_params_init_llama_3_1_tool_calls(tmpl, inputs, allow_python_tag_builtin_tools); + } + if (src.find("<|tool▁calls▁begin|>") != std::string::npos) { + return common_chat_params_init_deepseek_r1(tmpl, inputs); + } + if (src.find("[TOOL_CALLS]") != std::string::npos) { + return common_chat_params_init_mistral_nemo(tmpl, inputs); + } + if (src.find("<|END_THINKING|><|START_ACTION|>") != std::string::npos) { + return common_chat_params_init_command_r7b(tmpl, inputs); + } + return common_chat_params_init_generic(tmpl, inputs); +} + +static common_chat_msg common_chat_parse_content_only(const std::string & input) { + return { + /* .role = */ "assistant", + /* .content = */ input, + /* .tool_calls = */ {}, + }; +} + +common_chat_msg common_chat_parse(const std::string & input, common_chat_format format) { + switch (format) { + case COMMON_CHAT_FORMAT_CONTENT_ONLY: + return common_chat_parse_content_only(input); + case COMMON_CHAT_FORMAT_GENERIC: + return common_chat_parse_generic(input); + case COMMON_CHAT_FORMAT_MISTRAL_NEMO: + return common_chat_parse_mistral_nemo(input); + case COMMON_CHAT_FORMAT_LLAMA_3_X: + return common_chat_parse_llama_3_1(input); + case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS: + return common_chat_parse_llama_3_1(input, /* with_builtin_tools= */ true); + case COMMON_CHAT_FORMAT_DEEPSEEK_R1: + return common_chat_parse_deepseek_r1(input); + case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: + return common_chat_parse_functionary_v3_2(input); + case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: + return common_chat_parse_functionary_v3_1_llama_3_1(input); + case COMMON_CHAT_FORMAT_HERMES_2_PRO: + return common_chat_parse_hermes_2_pro(input); + case COMMON_CHAT_FORMAT_FIREFUNCTION_V2: + return common_chat_parse_firefunction_v2(input); + case COMMON_CHAT_FORMAT_COMMAND_R7B: + return common_chat_parse_command_r7b(input); + default: + throw std::runtime_error("Unsupported format: " + common_chat_format_name(format)); + } +} diff --git a/llama.cpp/common/chat.hpp b/llama.cpp/common/chat.hpp new file mode 100644 index 0000000000000000000000000000000000000000..33e64a430d51e879645d6a231d94e24aa236a1df --- /dev/null +++ b/llama.cpp/common/chat.hpp @@ -0,0 +1,52 @@ +// Chat support (incl. tool call grammar constraining & output parsing) w/ generic & custom template handlers. + +#pragma once + +#include "common.h" +#include +#include +#include +#include + +using json = nlohmann::ordered_json; + +struct common_chat_inputs { + json messages; + json tools; + json tool_choice; + json json_schema; + bool parallel_tool_calls; + bool stream; + std::string grammar; + bool add_generation_prompt = true; +}; + +enum common_chat_format { + COMMON_CHAT_FORMAT_CONTENT_ONLY, + COMMON_CHAT_FORMAT_GENERIC, + COMMON_CHAT_FORMAT_MISTRAL_NEMO, + COMMON_CHAT_FORMAT_LLAMA_3_X, + COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS, + COMMON_CHAT_FORMAT_DEEPSEEK_R1, + COMMON_CHAT_FORMAT_FIREFUNCTION_V2, + COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2, + COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1, + COMMON_CHAT_FORMAT_HERMES_2_PRO, + COMMON_CHAT_FORMAT_COMMAND_R7B, + + COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats +}; + +struct common_chat_params { + common_chat_format format = COMMON_CHAT_FORMAT_CONTENT_ONLY; + json prompt; + std::string grammar; + bool grammar_lazy = false; + std::vector grammar_triggers; + std::vector preserved_tokens; + std::vector additional_stops; +}; + +struct common_chat_params common_chat_params_init(const common_chat_template & tmpl, const struct common_chat_inputs & params); +std::string common_chat_format_name(common_chat_format format); +common_chat_msg common_chat_parse( const std::string & input, common_chat_format format); diff --git a/llama.cpp/common/common.cpp b/llama.cpp/common/common.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8661e164ada6bdba72b9f5222444aa0bb3b7701b --- /dev/null +++ b/llama.cpp/common/common.cpp @@ -0,0 +1,2198 @@ +#if defined(_MSC_VER) +#define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING +#endif + +#include "ggml.h" +#include "gguf.h" + +#include "common.h" +#include "log.h" +// Change JSON_ASSERT from assert() to GGML_ASSERT: +#define JSON_ASSERT GGML_ASSERT +#include "json.hpp" +#include "json-schema-to-grammar.h" +#include "llama.h" +#include "chat.hpp" +#include "chat-template.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(__APPLE__) && defined(__MACH__) +#include +#include +#endif + +#if defined(_WIN32) +#define WIN32_LEAN_AND_MEAN +#ifndef NOMINMAX +# define NOMINMAX +#endif +#include +#include +#include +#include +#else +#include +#include +#include +#endif +#if defined(LLAMA_USE_CURL) +#include +#include +#include +#endif + +#if defined(_MSC_VER) +#pragma warning(disable: 4244 4267) // possible loss of data +#endif + +#if defined(LLAMA_USE_CURL) +#ifdef __linux__ +#include +#elif defined(_WIN32) +# if !defined(PATH_MAX) +# define PATH_MAX MAX_PATH +# endif +#else +#include +#endif +#define LLAMA_CURL_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083 + +// +// CURL utils +// + +using curl_ptr = std::unique_ptr; + +// cannot use unique_ptr for curl_slist, because we cannot update without destroying the old one +struct curl_slist_ptr { + struct curl_slist * ptr = nullptr; + ~curl_slist_ptr() { + if (ptr) { + curl_slist_free_all(ptr); + } + } +}; +#endif // LLAMA_USE_CURL + +using json = nlohmann::ordered_json; + +// +// CPU utils +// + +int32_t cpu_get_num_physical_cores() { +#ifdef __linux__ + // enumerate the set of thread siblings, num entries is num cores + std::unordered_set siblings; + for (uint32_t cpu=0; cpu < UINT32_MAX; ++cpu) { + std::ifstream thread_siblings("/sys/devices/system/cpu/cpu" + + std::to_string(cpu) + "/topology/thread_siblings"); + if (!thread_siblings.is_open()) { + break; // no more cpus + } + std::string line; + if (std::getline(thread_siblings, line)) { + siblings.insert(line); + } + } + if (!siblings.empty()) { + return static_cast(siblings.size()); + } +#elif defined(__APPLE__) && defined(__MACH__) + int32_t num_physical_cores; + size_t len = sizeof(num_physical_cores); + int result = sysctlbyname("hw.perflevel0.physicalcpu", &num_physical_cores, &len, NULL, 0); + if (result == 0) { + return num_physical_cores; + } + result = sysctlbyname("hw.physicalcpu", &num_physical_cores, &len, NULL, 0); + if (result == 0) { + return num_physical_cores; + } +#elif defined(_WIN32) && (_WIN32_WINNT >= 0x0601) && !defined(__MINGW64__) // windows 7 and later + // TODO: windows + arm64 + mingw64 + unsigned int n_threads_win = std::thread::hardware_concurrency(); + unsigned int default_threads = n_threads_win > 0 ? (n_threads_win <= 4 ? n_threads_win : n_threads_win / 2) : 4; + + DWORD buffer_size = 0; + if (!GetLogicalProcessorInformationEx(RelationProcessorCore, nullptr, &buffer_size)) { + if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) { + return default_threads; + } + } + + std::vector buffer(buffer_size); + if (!GetLogicalProcessorInformationEx(RelationProcessorCore, reinterpret_cast(buffer.data()), &buffer_size)) { + return default_threads; + } + + int32_t num_physical_cores = 0; + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info = reinterpret_cast(buffer.data()); + while (buffer_size > 0) { + if (info->Relationship == RelationProcessorCore) { + num_physical_cores += info->Processor.GroupCount; + } + buffer_size -= info->Size; + info = reinterpret_cast(reinterpret_cast(info) + info->Size); + } + + return num_physical_cores > 0 ? num_physical_cores : default_threads; +#endif + unsigned int n_threads = std::thread::hardware_concurrency(); + return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4; +} + +#if defined(__x86_64__) && defined(__linux__) && !defined(__ANDROID__) +#include + +static void cpuid(unsigned leaf, unsigned subleaf, + unsigned *eax, unsigned *ebx, unsigned *ecx, unsigned *edx) { + __asm__("movq\t%%rbx,%%rsi\n\t" + "cpuid\n\t" + "xchgq\t%%rbx,%%rsi" + : "=a"(*eax), "=S"(*ebx), "=c"(*ecx), "=d"(*edx) + : "0"(leaf), "2"(subleaf)); +} + +static int pin_cpu(int cpu) { + cpu_set_t mask; + CPU_ZERO(&mask); + CPU_SET(cpu, &mask); + return pthread_setaffinity_np(pthread_self(), sizeof(mask), &mask); +} + +static bool is_hybrid_cpu(void) { + unsigned eax, ebx, ecx, edx; + cpuid(7, 0, &eax, &ebx, &ecx, &edx); + return !!(edx & (1u << 15)); +} + +static bool is_running_on_efficiency_core(void) { + unsigned eax, ebx, ecx, edx; + cpuid(0x1a, 0, &eax, &ebx, &ecx, &edx); + int intel_atom = 0x20; + int core_type = (eax & 0xff000000u) >> 24; + return core_type == intel_atom; +} + +static int cpu_count_math_cpus(int n_cpu) { + int result = 0; + for (int cpu = 0; cpu < n_cpu; ++cpu) { + if (pin_cpu(cpu)) { + return -1; + } + if (is_running_on_efficiency_core()) { + continue; // efficiency cores harm lockstep threading + } + ++cpu; // hyperthreading isn't useful for linear algebra + ++result; + } + return result; +} + +#endif // __x86_64__ && __linux__ + +/** + * Returns number of CPUs on system that are useful for math. + */ +int32_t cpu_get_num_math() { +#if defined(__x86_64__) && defined(__linux__) && !defined(__ANDROID__) + int n_cpu = sysconf(_SC_NPROCESSORS_ONLN); + if (n_cpu < 1) { + return cpu_get_num_physical_cores(); + } + if (is_hybrid_cpu()) { + cpu_set_t affinity; + if (!pthread_getaffinity_np(pthread_self(), sizeof(affinity), &affinity)) { + int result = cpu_count_math_cpus(n_cpu); + pthread_setaffinity_np(pthread_self(), sizeof(affinity), &affinity); + if (result > 0) { + return result; + } + } + } +#endif + return cpu_get_num_physical_cores(); +} + +// Helper for setting process priority + +#if defined(_WIN32) + +bool set_process_priority(enum ggml_sched_priority prio) { + if (prio == GGML_SCHED_PRIO_NORMAL) { + return true; + } + + DWORD p = NORMAL_PRIORITY_CLASS; + switch (prio) { + case GGML_SCHED_PRIO_NORMAL: p = NORMAL_PRIORITY_CLASS; break; + case GGML_SCHED_PRIO_MEDIUM: p = ABOVE_NORMAL_PRIORITY_CLASS; break; + case GGML_SCHED_PRIO_HIGH: p = HIGH_PRIORITY_CLASS; break; + case GGML_SCHED_PRIO_REALTIME: p = REALTIME_PRIORITY_CLASS; break; + } + + if (!SetPriorityClass(GetCurrentProcess(), p)) { + LOG_WRN("failed to set process priority class %d : (%d)\n", prio, (int) GetLastError()); + return false; + } + + return true; +} + +#else // MacOS and POSIX +#include +#include + +bool set_process_priority(enum ggml_sched_priority prio) { + if (prio == GGML_SCHED_PRIO_NORMAL) { + return true; + } + + int p = 0; + switch (prio) { + case GGML_SCHED_PRIO_NORMAL: p = 0; break; + case GGML_SCHED_PRIO_MEDIUM: p = -5; break; + case GGML_SCHED_PRIO_HIGH: p = -10; break; + case GGML_SCHED_PRIO_REALTIME: p = -20; break; + } + + if (!setpriority(PRIO_PROCESS, 0, p)) { + LOG_WRN("failed to set process priority %d : %s (%d)\n", prio, strerror(errno), errno); + return false; + } + return true; +} + +#endif + +// +// CLI argument parsing +// + + +void postprocess_cpu_params(cpu_params& cpuparams, const cpu_params* role_model) { + int32_t n_set = 0; + + if (cpuparams.n_threads < 0) { + // Assuming everything about cpuparams is invalid + if (role_model != nullptr) { + cpuparams = *role_model; + } else { + cpuparams.n_threads = cpu_get_num_math(); + } + } + + for (int32_t i = 0; i < GGML_MAX_N_THREADS; i++) { + if (cpuparams.cpumask[i]) { + n_set++; + } + } + + if (n_set && n_set < cpuparams.n_threads) { + // Not enough set bits, may experience performance issues. + LOG_WRN("Not enough set bits in CPU mask (%d) to satisfy requested thread count: %d\n", n_set, cpuparams.n_threads); + } +} + +bool parse_cpu_range(const std::string & range, bool (&boolmask)[GGML_MAX_N_THREADS]) { + size_t dash_loc = range.find('-'); + if (dash_loc == std::string::npos) { + LOG_ERR("Format of CPU range is invalid! Expected []-[].\n"); + return false; + } + + size_t start_i; + size_t end_i; + + if (dash_loc == 0) { + start_i = 0; + } else { + start_i = std::stoull(range.substr(0, dash_loc)); + if (start_i >= GGML_MAX_N_THREADS) { + LOG_ERR("Start index out of bounds!\n"); + return false; + } + } + + if (dash_loc == range.length() - 1) { + end_i = GGML_MAX_N_THREADS - 1; + } else { + end_i = std::stoull(range.substr(dash_loc + 1)); + if (end_i >= GGML_MAX_N_THREADS) { + LOG_ERR("End index out of bounds!\n"); + return false; + } + } + + for (size_t i = start_i; i <= end_i; i++) { + boolmask[i] = true; + } + + return true; +} + +bool parse_cpu_mask(const std::string & mask, bool (&boolmask)[GGML_MAX_N_THREADS]) { + // Discard potential 0x prefix + size_t start_i = 0; + if (mask.length() >= 2 && mask.substr(0, 2) == "0x") { + start_i = 2; + } + + size_t num_digits = mask.length() - start_i; + if (num_digits > 128) num_digits = 128; + + size_t end_i = num_digits + start_i; + + for (size_t i = start_i, n = (num_digits*4 - 1); i < end_i; i++, n-=4) { + char c = mask.at(i); + int8_t id = c; + + if ((c >= '0' && c <= '9')) { + id -= '0'; + } else if (c >= 'a' && c <= 'f') { + id -= 'a' - 10; + } else if (c >= 'A' && c <= 'F') { + id -= 'A' - 10; + } else { + LOG_ERR("Invalid hex character '%c' at position %d\n", c, int32_t(i)); + return false; + } + + boolmask[ n ] = boolmask[ n ] || ((id & 8) != 0); + boolmask[n - 1] = boolmask[n - 1] || ((id & 4) != 0); + boolmask[n - 2] = boolmask[n - 2] || ((id & 2) != 0); + boolmask[n - 3] = boolmask[n - 3] || ((id & 1) != 0); + } + + return true; +} + +void common_init() { + llama_log_set([](ggml_log_level level, const char * text, void * /*user_data*/) { + if (LOG_DEFAULT_LLAMA <= common_log_verbosity_thold) { + common_log_add(common_log_main(), level, "%s", text); + } + }, NULL); + +#ifdef NDEBUG + const char * build_type = ""; +#else + const char * build_type = " (debug)"; +#endif + + LOG_INF("build: %d (%s) with %s for %s%s\n", LLAMA_BUILD_NUMBER, LLAMA_COMMIT, LLAMA_COMPILER, LLAMA_BUILD_TARGET, build_type); +} + +std::string common_params_get_system_info(const common_params & params) { + std::ostringstream os; + + os << "system_info: n_threads = " << params.cpuparams.n_threads; + if (params.cpuparams_batch.n_threads != -1) { + os << " (n_threads_batch = " << params.cpuparams_batch.n_threads << ")"; + } +#if defined(_WIN32) && (_WIN32_WINNT >= 0x0601) && !defined(__MINGW64__) // windows 7 and later + // TODO: windows + arm64 + mingw64 + DWORD logicalProcessorCount = GetActiveProcessorCount(ALL_PROCESSOR_GROUPS); + os << " / " << logicalProcessorCount << " | " << llama_print_system_info(); +#else + os << " / " << std::thread::hardware_concurrency() << " | " << llama_print_system_info(); +#endif + + return os.str(); +} + +// +// String utils +// + +std::string string_format(const char * fmt, ...) { + va_list ap; + va_list ap2; + va_start(ap, fmt); + va_copy(ap2, ap); + int size = vsnprintf(NULL, 0, fmt, ap); + GGML_ASSERT(size >= 0 && size < INT_MAX); // NOLINT + std::vector buf(size + 1); + int size2 = vsnprintf(buf.data(), size + 1, fmt, ap2); + GGML_ASSERT(size2 == size); + va_end(ap2); + va_end(ap); + return std::string(buf.data(), size); +} + +std::string string_strip(const std::string & str) { + size_t start = 0; + size_t end = str.size(); + while (start < end && std::isspace(str[start])) { + start++; + } + while (end > start && std::isspace(str[end - 1])) { + end--; + } + return str.substr(start, end - start); +} + +std::string string_get_sortable_timestamp() { + using clock = std::chrono::system_clock; + + const clock::time_point current_time = clock::now(); + const time_t as_time_t = clock::to_time_t(current_time); + char timestamp_no_ns[100]; + std::strftime(timestamp_no_ns, 100, "%Y_%m_%d-%H_%M_%S", std::localtime(&as_time_t)); + + const int64_t ns = std::chrono::duration_cast( + current_time.time_since_epoch() % 1000000000).count(); + char timestamp_ns[11]; + snprintf(timestamp_ns, 11, "%09" PRId64, ns); + + return std::string(timestamp_no_ns) + "." + std::string(timestamp_ns); +} + +void string_replace_all(std::string & s, const std::string & search, const std::string & replace) { + if (search.empty()) { + return; + } + std::string builder; + builder.reserve(s.length()); + size_t pos = 0; + size_t last_pos = 0; + while ((pos = s.find(search, last_pos)) != std::string::npos) { + builder.append(s, last_pos, pos - last_pos); + builder.append(replace); + last_pos = pos + search.length(); + } + builder.append(s, last_pos, std::string::npos); + s = std::move(builder); +} + +std::string string_join(const std::vector & values, const std::string & separator) { + std::ostringstream result; + for (size_t i = 0; i < values.size(); ++i) { + if (i > 0) { + result << separator; + } + result << values[i]; + } + return result.str(); +} + +std::vector string_split(const std::string & str, const std::string & delimiter) { + std::vector parts; + size_t start = 0; + size_t end = str.find(delimiter); + + while (end != std::string::npos) { + parts.push_back(str.substr(start, end - start)); + start = end + delimiter.length(); + end = str.find(delimiter, start); + } + + parts.push_back(str.substr(start)); + + return parts; +} + +std::string string_repeat(const std::string & str, size_t n) { + if (n == 0) { + return ""; + } + + std::string result; + result.reserve(str.length() * n); + + for (size_t i = 0; i < n; ++i) { + result += str; + } + + return result; +} + +std::string string_from(bool value) { + return value ? "true" : "false"; +} + +std::string string_from(const std::vector & values) { + std::stringstream buf; + + buf << "[ "; + bool first = true; + for (auto e : values) { + if (first) { + first = false; + } else { + buf << ", "; + } + buf << std::to_string(e); + } + buf << " ]"; + + return buf.str(); +} + +std::string string_from(const struct llama_context * ctx, const std::vector & tokens) { + std::stringstream buf; + + buf << "[ "; + + bool first = true; + for (const auto & token : tokens) { + if (!first) { + buf << ", "; + } else { + first = false; + } + + auto detokenized = common_token_to_piece(ctx, token); + + detokenized.erase( + std::remove_if( + detokenized.begin(), + detokenized.end(), + [](const unsigned char c) { return !std::isprint(c); }), + detokenized.end()); + + buf << "'" << detokenized << "'" + << ":" << std::to_string(token); + } + + buf << " ]"; + + return buf.str(); +} + +std::string string_from(const struct llama_context * ctx, const struct llama_batch & batch) { + std::stringstream buf; + + buf << "[ "; + + bool first = true; + for (int i = 0; i < batch.n_tokens; ++i) { + if (!first) { + buf << ", "; + } else { + first = false; + } + + auto detokenized = common_token_to_piece(ctx, batch.token[i]); + + detokenized.erase( + std::remove_if( + detokenized.begin(), + detokenized.end(), + [](const unsigned char c) { return !std::isprint(c); }), + detokenized.end()); + + buf << "\n" << std::to_string(i) + << ", token '" << detokenized << "'" + << ", pos " << std::to_string(batch.pos[i]) + << ", n_seq_id " << std::to_string(batch.n_seq_id[i]) + << ", seq_id " << std::to_string(batch.seq_id[i][0]) + << ", logits " << std::to_string(batch.logits[i]); + } + + buf << " ]"; + + return buf.str(); +} + +void string_process_escapes(std::string & input) { + std::size_t input_len = input.length(); + std::size_t output_idx = 0; + + for (std::size_t input_idx = 0; input_idx < input_len; ++input_idx) { + if (input[input_idx] == '\\' && input_idx + 1 < input_len) { + switch (input[++input_idx]) { + case 'n': input[output_idx++] = '\n'; break; + case 'r': input[output_idx++] = '\r'; break; + case 't': input[output_idx++] = '\t'; break; + case '\'': input[output_idx++] = '\''; break; + case '\"': input[output_idx++] = '\"'; break; + case '\\': input[output_idx++] = '\\'; break; + case 'x': + // Handle \x12, etc + if (input_idx + 2 < input_len) { + const char x[3] = { input[input_idx + 1], input[input_idx + 2], 0 }; + char *err_p = nullptr; + const long val = std::strtol(x, &err_p, 16); + if (err_p == x + 2) { + input_idx += 2; + input[output_idx++] = char(val); + break; + } + } + // fall through + default: input[output_idx++] = '\\'; + input[output_idx++] = input[input_idx]; break; + } + } else { + input[output_idx++] = input[input_idx]; + } + } + + input.resize(output_idx); +} + +bool string_parse_kv_override(const char * data, std::vector & overrides) { + const char * sep = strchr(data, '='); + if (sep == nullptr || sep - data >= 128) { + LOG_ERR("%s: malformed KV override '%s'\n", __func__, data); + return false; + } + llama_model_kv_override kvo; + std::strncpy(kvo.key, data, sep - data); + kvo.key[sep - data] = 0; + sep++; + if (strncmp(sep, "int:", 4) == 0) { + sep += 4; + kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT; + kvo.val_i64 = std::atol(sep); + } else if (strncmp(sep, "float:", 6) == 0) { + sep += 6; + kvo.tag = LLAMA_KV_OVERRIDE_TYPE_FLOAT; + kvo.val_f64 = std::atof(sep); + } else if (strncmp(sep, "bool:", 5) == 0) { + sep += 5; + kvo.tag = LLAMA_KV_OVERRIDE_TYPE_BOOL; + if (std::strcmp(sep, "true") == 0) { + kvo.val_bool = true; + } else if (std::strcmp(sep, "false") == 0) { + kvo.val_bool = false; + } else { + LOG_ERR("%s: invalid boolean value for KV override '%s'\n", __func__, data); + return false; + } + } else if (strncmp(sep, "str:", 4) == 0) { + sep += 4; + kvo.tag = LLAMA_KV_OVERRIDE_TYPE_STR; + if (strlen(sep) > 127) { + LOG_ERR("%s: malformed KV override '%s', value cannot exceed 127 chars\n", __func__, data); + return false; + } + strncpy(kvo.val_str, sep, 127); + kvo.val_str[127] = '\0'; + } else { + LOG_ERR("%s: invalid type for KV override '%s'\n", __func__, data); + return false; + } + overrides.emplace_back(std::move(kvo)); + return true; +} + +// +// Filesystem utils +// + +// Validate if a filename is safe to use +// To validate a full path, split the path by the OS-specific path separator, and validate each part with this function +bool fs_validate_filename(const std::string & filename) { + if (!filename.length()) { + // Empty filename invalid + return false; + } + if (filename.length() > 255) { + // Limit at common largest possible filename on Linux filesystems + // to avoid unnecessary further validation + // (On systems with smaller limits it will be caught by the OS) + return false; + } + + std::u32string filename_utf32; + try { +#if defined(__clang__) + // disable C++17 deprecation warning for std::codecvt_utf8 +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wdeprecated-declarations" +#endif + std::wstring_convert, char32_t> converter; + +#if defined(__clang__) +# pragma clang diagnostic pop +#endif + + filename_utf32 = converter.from_bytes(filename); + + // If the reverse conversion mismatches, it means overlong UTF-8 sequences were used, + // or invalid encodings were encountered. Reject such attempts + std::string filename_reencoded = converter.to_bytes(filename_utf32); + if (filename_reencoded != filename) { + return false; + } + } catch (const std::exception &) { + return false; + } + + // Check for forbidden codepoints: + // - Control characters + // - Unicode equivalents of illegal characters + // - UTF-16 surrogate pairs + // - UTF-8 replacement character + // - Byte order mark (BOM) + // - Illegal characters: / \ : * ? " < > | + for (char32_t c : filename_utf32) { + if (c <= 0x1F // Control characters (C0) + || c == 0x7F // Control characters (DEL) + || (c >= 0x80 && c <= 0x9F) // Control characters (C1) + || c == 0xFF0E // Fullwidth Full Stop (period equivalent) + || c == 0x2215 // Division Slash (forward slash equivalent) + || c == 0x2216 // Set Minus (backslash equivalent) + || (c >= 0xD800 && c <= 0xDFFF) // UTF-16 surrogate pairs + || c == 0xFFFD // Replacement Character (UTF-8) + || c == 0xFEFF // Byte Order Mark (BOM) + || c == '/' || c == '\\' || c == ':' || c == '*' // Illegal characters + || c == '?' || c == '"' || c == '<' || c == '>' || c == '|') { + return false; + } + } + + // Reject any leading or trailing ' ', or any trailing '.', these are stripped on Windows and will cause a different filename + // Unicode and other whitespace is not affected, only 0x20 space + if (filename.front() == ' ' || filename.back() == ' ' || filename.back() == '.') { + return false; + } + + // Reject any ".." (currently stricter than necessary, it should be fine to just check for == ".." instead) + if (filename.find("..") != std::string::npos) { + return false; + } + + // Reject "." + if (filename == ".") { + return false; + } + + return true; +} + +// returns true if successful, false otherwise +bool fs_create_directory_with_parents(const std::string & path) { +#ifdef _WIN32 + std::wstring_convert> converter; + std::wstring wpath = converter.from_bytes(path); + + // if the path already exists, check whether it's a directory + const DWORD attributes = GetFileAttributesW(wpath.c_str()); + if ((attributes != INVALID_FILE_ATTRIBUTES) && (attributes & FILE_ATTRIBUTE_DIRECTORY)) { + return true; + } + + size_t pos_slash = 0; + + // process path from front to back, procedurally creating directories + while ((pos_slash = path.find('\\', pos_slash)) != std::string::npos) { + const std::wstring subpath = wpath.substr(0, pos_slash); + const wchar_t * test = subpath.c_str(); + + const bool success = CreateDirectoryW(test, NULL); + if (!success) { + const DWORD error = GetLastError(); + + // if the path already exists, ensure that it's a directory + if (error == ERROR_ALREADY_EXISTS) { + const DWORD attributes = GetFileAttributesW(subpath.c_str()); + if (attributes == INVALID_FILE_ATTRIBUTES || !(attributes & FILE_ATTRIBUTE_DIRECTORY)) { + return false; + } + } else { + return false; + } + } + + pos_slash += 1; + } + + return true; +#else + // if the path already exists, check whether it's a directory + struct stat info; + if (stat(path.c_str(), &info) == 0) { + return S_ISDIR(info.st_mode); + } + + size_t pos_slash = 1; // skip leading slashes for directory creation + + // process path from front to back, procedurally creating directories + while ((pos_slash = path.find('/', pos_slash)) != std::string::npos) { + const std::string subpath = path.substr(0, pos_slash); + struct stat info; + + // if the path already exists, ensure that it's a directory + if (stat(subpath.c_str(), &info) == 0) { + if (!S_ISDIR(info.st_mode)) { + return false; + } + } else { + // create parent directories + const int ret = mkdir(subpath.c_str(), 0755); + if (ret != 0) { + return false; + } + } + + pos_slash += 1; + } + + return true; +#endif // _WIN32 +} + +std::string fs_get_cache_directory() { + std::string cache_directory = ""; + auto ensure_trailing_slash = [](std::string p) { + // Make sure to add trailing slash + if (p.back() != DIRECTORY_SEPARATOR) { + p += DIRECTORY_SEPARATOR; + } + return p; + }; + if (getenv("LLAMA_CACHE")) { + cache_directory = std::getenv("LLAMA_CACHE"); + } else { +#ifdef __linux__ + if (std::getenv("XDG_CACHE_HOME")) { + cache_directory = std::getenv("XDG_CACHE_HOME"); + } else { + cache_directory = std::getenv("HOME") + std::string("/.cache/"); + } +#elif defined(__APPLE__) + cache_directory = std::getenv("HOME") + std::string("/Library/Caches/"); +#elif defined(_WIN32) + cache_directory = std::getenv("LOCALAPPDATA"); +#endif // __linux__ + cache_directory = ensure_trailing_slash(cache_directory); + cache_directory += "llama.cpp"; + } + return ensure_trailing_slash(cache_directory); +} + +std::string fs_get_cache_file(const std::string & filename) { + GGML_ASSERT(filename.find(DIRECTORY_SEPARATOR) == std::string::npos); + std::string cache_directory = fs_get_cache_directory(); + const bool success = fs_create_directory_with_parents(cache_directory); + if (!success) { + throw std::runtime_error("failed to create cache directory: " + cache_directory); + } + return cache_directory + filename; +} + + +// +// Model utils +// +struct common_init_result common_init_from_params(common_params & params) { + common_init_result iparams; + auto mparams = common_model_params_to_llama(params); + + llama_model * model = nullptr; + + if (!params.hf_repo.empty() && !params.hf_file.empty()) { + model = common_load_model_from_hf(params.hf_repo, params.hf_file, params.model, params.hf_token, mparams); + } else if (!params.model_url.empty()) { + model = common_load_model_from_url(params.model_url, params.model, params.hf_token, mparams); + } else { + model = llama_model_load_from_file(params.model.c_str(), mparams); + } + + if (model == NULL) { + LOG_ERR("%s: failed to load model '%s'\n", __func__, params.model.c_str()); + return iparams; + } + + const llama_vocab * vocab = llama_model_get_vocab(model); + + if (params.reranking) { + bool ok = true; + + if (llama_vocab_bos(vocab) == LLAMA_TOKEN_NULL) { + LOG_WRN("%s: warning: vocab does not have a BOS token, reranking will not work\n", __func__); + ok = false; + } + + if (llama_vocab_eos(vocab) == LLAMA_TOKEN_NULL) { + LOG_WRN("%s: warning: vocab does not have an EOS token, reranking will not work\n", __func__); + ok = false; + } + + if (llama_vocab_sep(vocab) == LLAMA_TOKEN_NULL) { + LOG_WRN("%s: warning: vocab does not have a SEP token, reranking will not work\n", __func__); + ok = false; + } + + if (!ok) { + llama_model_free(model); + + return iparams; + } + } + + auto cparams = common_context_params_to_llama(params); + + llama_context * lctx = llama_init_from_model(model, cparams); + if (lctx == NULL) { + LOG_ERR("%s: failed to create context with model '%s'\n", __func__, params.model.c_str()); + llama_model_free(model); + return iparams; + } + + if (params.ctx_shift && !llama_kv_cache_can_shift(lctx)) { + LOG_WRN("%s: KV cache shifting is not supported for this model, disabling KV cache shifting\n", __func__); + params.ctx_shift = false; + } + + if (!params.control_vectors.empty()) { + if (params.control_vector_layer_start <= 0) params.control_vector_layer_start = 1; + if (params.control_vector_layer_end <= 0) params.control_vector_layer_end = llama_model_n_layer(model); + + const auto cvec = common_control_vector_load(params.control_vectors); + if (cvec.n_embd == -1) { + llama_free(lctx); + llama_model_free(model); + + return iparams; + } + + int err = llama_apply_adapter_cvec( + lctx, + cvec.data.data(), + cvec.data.size(), + cvec.n_embd, + params.control_vector_layer_start, + params.control_vector_layer_end); + if (err) { + llama_free(lctx); + llama_model_free(model); + + return iparams; + } + } + + // load and optionally apply lora adapters + for (auto & la : params.lora_adapters) { + llama_adapter_lora_ptr lora; + lora.reset(llama_adapter_lora_init(model, la.path.c_str())); + if (lora == nullptr) { + LOG_ERR("%s: failed to apply lora adapter '%s'\n", __func__, la.path.c_str()); + llama_free(lctx); + llama_model_free(model); + return iparams; + } + + la.ptr = lora.get(); + iparams.lora.emplace_back(std::move(lora)); // copy to list of loaded adapters + } + + if (!params.lora_init_without_apply) { + common_set_adapter_lora(lctx, params.lora_adapters); + } + + if (params.sampling.ignore_eos && llama_vocab_eos(vocab) == LLAMA_TOKEN_NULL) { + LOG_WRN("%s: warning: vocab does not have an EOS token, ignoring --ignore-eos\n", __func__); + params.sampling.ignore_eos = false; + } + + if (params.sampling.ignore_eos) { + for (llama_token i = 0; i < llama_vocab_n_tokens(vocab); i++) { + if (llama_vocab_is_eog(vocab, i)) { + LOG_INF("%s: added %s logit bias = %f\n", __func__, common_token_to_piece(lctx, i).c_str(), -INFINITY); + params.sampling.logit_bias.push_back({i, -INFINITY}); + } + } + } + + if (params.sampling.penalty_last_n == -1) { + LOG_INF("%s: setting penalty_last_n to ctx_size = %d\n", __func__, llama_n_ctx(lctx)); + params.sampling.penalty_last_n = llama_n_ctx(lctx); + } + + if (params.sampling.dry_penalty_last_n == -1) { + LOG_INF("%s: setting dry_penalty_last_n to ctx_size = %d\n", __func__, llama_n_ctx(lctx)); + params.sampling.dry_penalty_last_n = llama_n_ctx(lctx); + } + + if (params.warmup) { + LOG_WRN("%s: warming up the model with an empty run - please wait ... (--no-warmup to disable)\n", __func__); + + std::vector tmp; + llama_token bos = llama_vocab_bos(vocab); + llama_token eos = llama_vocab_eos(vocab); + + // some models (e.g. T5) don't have a BOS token + if (bos != LLAMA_TOKEN_NULL) { + tmp.push_back(bos); + } + if (eos != LLAMA_TOKEN_NULL) { + tmp.push_back(eos); + } + if (tmp.empty()) { + tmp.push_back(0); + } + + if (llama_model_has_encoder(model)) { + llama_encode(lctx, llama_batch_get_one(tmp.data(), tmp.size())); + llama_token decoder_start_token_id = llama_model_decoder_start_token(model); + if (decoder_start_token_id == LLAMA_TOKEN_NULL) { + decoder_start_token_id = bos; + } + tmp.clear(); + tmp.push_back(decoder_start_token_id); + } + if (llama_model_has_decoder(model)) { + llama_decode(lctx, llama_batch_get_one(tmp.data(), std::min(tmp.size(), (size_t) params.n_batch))); + } + llama_kv_cache_clear(lctx); + llama_synchronize(lctx); + llama_perf_context_reset(lctx); + } + + iparams.model.reset(model); + iparams.context.reset(lctx); + + return iparams; +} + +void common_set_adapter_lora(struct llama_context * ctx, std::vector & lora) { + llama_clear_adapter_lora(ctx); + for (auto & la : lora) { + if (la.scale != 0.0f) { + llama_set_adapter_lora(ctx, la.ptr, la.scale); + } + } +} + +struct llama_model_params common_model_params_to_llama(common_params & params) { + auto mparams = llama_model_default_params(); + + if (!params.devices.empty()) { + mparams.devices = params.devices.data(); + } + if (params.n_gpu_layers != -1) { + mparams.n_gpu_layers = params.n_gpu_layers; + } + mparams.main_gpu = params.main_gpu; + mparams.split_mode = params.split_mode; + mparams.tensor_split = params.tensor_split; + mparams.use_mmap = params.use_mmap; + mparams.use_mlock = params.use_mlock; + mparams.check_tensors = params.check_tensors; + if (params.kv_overrides.empty()) { + mparams.kv_overrides = NULL; + } else { + GGML_ASSERT(params.kv_overrides.back().key[0] == 0 && "KV overrides not terminated with empty key"); + mparams.kv_overrides = params.kv_overrides.data(); + } + + return mparams; +} + +struct llama_context_params common_context_params_to_llama(const common_params & params) { + auto cparams = llama_context_default_params(); + + cparams.n_ctx = params.n_ctx; + cparams.n_seq_max = params.n_parallel; + cparams.n_batch = params.n_batch; + cparams.n_ubatch = params.n_ubatch; + cparams.n_threads = params.cpuparams.n_threads; + cparams.n_threads_batch = params.cpuparams_batch.n_threads == -1 ? + params.cpuparams.n_threads : params.cpuparams_batch.n_threads; + cparams.logits_all = params.logits_all; + cparams.embeddings = params.embedding; + cparams.rope_scaling_type = params.rope_scaling_type; + cparams.rope_freq_base = params.rope_freq_base; + cparams.rope_freq_scale = params.rope_freq_scale; + cparams.yarn_ext_factor = params.yarn_ext_factor; + cparams.yarn_attn_factor = params.yarn_attn_factor; + cparams.yarn_beta_fast = params.yarn_beta_fast; + cparams.yarn_beta_slow = params.yarn_beta_slow; + cparams.yarn_orig_ctx = params.yarn_orig_ctx; + cparams.pooling_type = params.pooling_type; + cparams.attention_type = params.attention_type; + cparams.defrag_thold = params.defrag_thold; + cparams.cb_eval = params.cb_eval; + cparams.cb_eval_user_data = params.cb_eval_user_data; + cparams.offload_kqv = !params.no_kv_offload; + cparams.flash_attn = params.flash_attn; + cparams.no_perf = params.no_perf; + + if (params.reranking) { + cparams.embeddings = true; + cparams.pooling_type = LLAMA_POOLING_TYPE_RANK; + } + + cparams.type_k = params.cache_type_k; + cparams.type_v = params.cache_type_v; + + return cparams; +} + +struct ggml_threadpool_params ggml_threadpool_params_from_cpu_params(const cpu_params & params) { + struct ggml_threadpool_params tpp; + + ggml_threadpool_params_init(&tpp, params.n_threads); // setup the defaults + + if (params.mask_valid) { + std::memcpy(&tpp.cpumask, ¶ms.cpumask, GGML_MAX_N_THREADS); + } + + tpp.prio = params.priority; + tpp.poll = params.poll; + tpp.strict_cpu = params.strict_cpu; + + return tpp; +} + +#ifdef LLAMA_USE_CURL + +#define CURL_MAX_RETRY 3 +#define CURL_RETRY_DELAY_SECONDS 2 + +static bool curl_perform_with_retry(const std::string & url, CURL * curl, int max_attempts, int retry_delay_seconds) { + int remaining_attempts = max_attempts; + + while (remaining_attempts > 0) { + LOG_INF("%s: Trying to download from %s (attempt %d of %d)...\n", __func__ , url.c_str(), max_attempts - remaining_attempts + 1, max_attempts); + + CURLcode res = curl_easy_perform(curl); + if (res == CURLE_OK) { + return true; + } + + int exponential_backoff_delay = std::pow(retry_delay_seconds, max_attempts - remaining_attempts) * 1000; + LOG_WRN("%s: curl_easy_perform() failed: %s, retrying after %d milliseconds...\n", __func__, curl_easy_strerror(res), exponential_backoff_delay); + + remaining_attempts--; + std::this_thread::sleep_for(std::chrono::milliseconds(exponential_backoff_delay)); + } + + LOG_ERR("%s: curl_easy_perform() failed after %d attempts\n", __func__, max_attempts); + + return false; +} + +static bool common_download_file(const std::string & url, const std::string & path, const std::string & hf_token) { + // Initialize libcurl + curl_ptr curl(curl_easy_init(), &curl_easy_cleanup); + curl_slist_ptr http_headers; + if (!curl) { + LOG_ERR("%s: error initializing libcurl\n", __func__); + return false; + } + + bool force_download = false; + + // Set the URL, allow to follow http redirection + curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str()); + curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L); + + // Check if hf-token or bearer-token was specified + if (!hf_token.empty()) { + std::string auth_header = "Authorization: Bearer " + hf_token; + http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str()); + curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr); + } + +#if defined(_WIN32) + // CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of + // operating system. Currently implemented under MS-Windows. + curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA); +#endif + + // Check if the file already exists locally + auto file_exists = std::filesystem::exists(path); + + // If the file exists, check its JSON metadata companion file. + std::string metadata_path = path + ".json"; + nlohmann::json metadata; + std::string etag; + std::string last_modified; + + if (file_exists) { + // Try and read the JSON metadata file (note: stream autoclosed upon exiting this block). + std::ifstream metadata_in(metadata_path); + if (metadata_in.good()) { + try { + metadata_in >> metadata; + LOG_INF("%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(), metadata.dump().c_str()); + if (metadata.contains("url") && metadata.at("url").is_string()) { + auto previous_url = metadata.at("url").get(); + if (previous_url != url) { + LOG_ERR("%s: Model URL mismatch: %s != %s\n", __func__, url.c_str(), previous_url.c_str()); + return false; + } + } + if (metadata.contains("etag") && metadata.at("etag").is_string()) { + etag = metadata.at("etag"); + } + if (metadata.contains("lastModified") && metadata.at("lastModified").is_string()) { + last_modified = metadata.at("lastModified"); + } + } catch (const nlohmann::json::exception & e) { + LOG_ERR("%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what()); + return false; + } + } + } else { + LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str()); + } + + // Send a HEAD request to retrieve the etag and last-modified headers + struct common_load_model_from_url_headers { + std::string etag; + std::string last_modified; + }; + + common_load_model_from_url_headers headers; + + { + typedef size_t(*CURLOPT_HEADERFUNCTION_PTR)(char *, size_t, size_t, void *); + auto header_callback = [](char * buffer, size_t /*size*/, size_t n_items, void * userdata) -> size_t { + common_load_model_from_url_headers * headers = (common_load_model_from_url_headers *) userdata; + + static std::regex header_regex("([^:]+): (.*)\r\n"); + static std::regex etag_regex("ETag", std::regex_constants::icase); + static std::regex last_modified_regex("Last-Modified", std::regex_constants::icase); + + std::string header(buffer, n_items); + std::smatch match; + if (std::regex_match(header, match, header_regex)) { + const std::string & key = match[1]; + const std::string & value = match[2]; + if (std::regex_match(key, match, etag_regex)) { + headers->etag = value; + } else if (std::regex_match(key, match, last_modified_regex)) { + headers->last_modified = value; + } + } + return n_items; + }; + + curl_easy_setopt(curl.get(), CURLOPT_NOBODY, 1L); // will trigger the HEAD verb + curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L); // hide head request progress + curl_easy_setopt(curl.get(), CURLOPT_HEADERFUNCTION, static_cast(header_callback)); + curl_easy_setopt(curl.get(), CURLOPT_HEADERDATA, &headers); + + bool was_perform_successful = curl_perform_with_retry(url, curl.get(), CURL_MAX_RETRY, CURL_RETRY_DELAY_SECONDS); + if (!was_perform_successful) { + return false; + } + + long http_code = 0; + curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code); + if (http_code != 200) { + // HEAD not supported, we don't know if the file has changed + // force trigger downloading + force_download = true; + LOG_ERR("%s: HEAD invalid http status code received: %ld\n", __func__, http_code); + } + } + + bool should_download = !file_exists || force_download; + if (!should_download) { + if (!etag.empty() && etag != headers.etag) { + LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n", __func__, etag.c_str(), headers.etag.c_str()); + should_download = true; + } else if (!last_modified.empty() && last_modified != headers.last_modified) { + LOG_WRN("%s: Last-Modified header is different (%s != %s): triggering a new download\n", __func__, last_modified.c_str(), headers.last_modified.c_str()); + should_download = true; + } + } + if (should_download) { + std::string path_temporary = path + ".downloadInProgress"; + if (file_exists) { + LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str()); + if (remove(path.c_str()) != 0) { + LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str()); + return false; + } + } + + // Set the output file + + struct FILE_deleter { + void operator()(FILE * f) const { + fclose(f); + } + }; + + std::unique_ptr outfile(fopen(path_temporary.c_str(), "wb")); + if (!outfile) { + LOG_ERR("%s: error opening local file for writing: %s\n", __func__, path.c_str()); + return false; + } + + typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * data, size_t size, size_t nmemb, void * fd); + auto write_callback = [](void * data, size_t size, size_t nmemb, void * fd) -> size_t { + return fwrite(data, size, nmemb, (FILE *)fd); + }; + curl_easy_setopt(curl.get(), CURLOPT_NOBODY, 0L); + curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast(write_callback)); + curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, outfile.get()); + + // display download progress + curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 0L); + + // helper function to hide password in URL + auto llama_download_hide_password_in_url = [](const std::string & url) -> std::string { + std::size_t protocol_pos = url.find("://"); + if (protocol_pos == std::string::npos) { + return url; // Malformed URL + } + + std::size_t at_pos = url.find('@', protocol_pos + 3); + if (at_pos == std::string::npos) { + return url; // No password in URL + } + + return url.substr(0, protocol_pos + 3) + "********" + url.substr(at_pos); + }; + + // start the download + LOG_INF("%s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n", __func__, + llama_download_hide_password_in_url(url).c_str(), path.c_str(), headers.etag.c_str(), headers.last_modified.c_str()); + bool was_perform_successful = curl_perform_with_retry(url, curl.get(), CURL_MAX_RETRY, CURL_RETRY_DELAY_SECONDS); + if (!was_perform_successful) { + return false; + } + + long http_code = 0; + curl_easy_getinfo (curl.get(), CURLINFO_RESPONSE_CODE, &http_code); + if (http_code < 200 || http_code >= 400) { + LOG_ERR("%s: invalid http status code received: %ld\n", __func__, http_code); + return false; + } + + // Causes file to be closed explicitly here before we rename it. + outfile.reset(); + + // Write the updated JSON metadata file. + metadata.update({ + {"url", url}, + {"etag", headers.etag}, + {"lastModified", headers.last_modified} + }); + std::ofstream(metadata_path) << metadata.dump(4); + LOG_INF("%s: file metadata saved: %s\n", __func__, metadata_path.c_str()); + + if (rename(path_temporary.c_str(), path.c_str()) != 0) { + LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str()); + return false; + } + } + + return true; +} + +struct llama_model * common_load_model_from_url( + const std::string & model_url, + const std::string & local_path, + const std::string & hf_token, + const struct llama_model_params & params) { + // Basic validation of the model_url + if (model_url.empty()) { + LOG_ERR("%s: invalid model_url\n", __func__); + return NULL; + } + + if (!common_download_file(model_url, local_path, hf_token)) { + return NULL; + } + + // check for additional GGUFs split to download + int n_split = 0; + { + struct gguf_init_params gguf_params = { + /*.no_alloc = */ true, + /*.ctx = */ NULL, + }; + auto * ctx_gguf = gguf_init_from_file(local_path.c_str(), gguf_params); + if (!ctx_gguf) { + LOG_ERR("\n%s: failed to load input GGUF from %s\n", __func__, local_path.c_str()); + return NULL; + } + + auto key_n_split = gguf_find_key(ctx_gguf, LLM_KV_SPLIT_COUNT); + if (key_n_split >= 0) { + n_split = gguf_get_val_u16(ctx_gguf, key_n_split); + } + + gguf_free(ctx_gguf); + } + + if (n_split > 1) { + char split_prefix[PATH_MAX] = {0}; + char split_url_prefix[LLAMA_CURL_MAX_URL_LENGTH] = {0}; + + // Verify the first split file format + // and extract split URL and PATH prefixes + { + if (!llama_split_prefix(split_prefix, sizeof(split_prefix), local_path.c_str(), 0, n_split)) { + LOG_ERR("\n%s: unexpected model file name: %s n_split=%d\n", __func__, local_path.c_str(), n_split); + return NULL; + } + + if (!llama_split_prefix(split_url_prefix, sizeof(split_url_prefix), model_url.c_str(), 0, n_split)) { + LOG_ERR("\n%s: unexpected model url: %s n_split=%d\n", __func__, model_url.c_str(), n_split); + return NULL; + } + } + + // Prepare download in parallel + std::vector> futures_download; + for (int idx = 1; idx < n_split; idx++) { + futures_download.push_back(std::async(std::launch::async, [&split_prefix, &split_url_prefix, &n_split, hf_token](int download_idx) -> bool { + char split_path[PATH_MAX] = {0}; + llama_split_path(split_path, sizeof(split_path), split_prefix, download_idx, n_split); + + char split_url[LLAMA_CURL_MAX_URL_LENGTH] = {0}; + llama_split_path(split_url, sizeof(split_url), split_url_prefix, download_idx, n_split); + + return common_download_file(split_url, split_path, hf_token); + }, idx)); + } + + // Wait for all downloads to complete + for (auto & f : futures_download) { + if (!f.get()) { + return NULL; + } + } + } + + return llama_model_load_from_file(local_path.c_str(), params); +} + +struct llama_model * common_load_model_from_hf( + const std::string & repo, + const std::string & remote_path, + const std::string & local_path, + const std::string & hf_token, + const struct llama_model_params & params) { + // construct hugging face model url: + // + // --repo ggml-org/models --file tinyllama-1.1b/ggml-model-f16.gguf + // https://huggingface.co/ggml-org/models/resolve/main/tinyllama-1.1b/ggml-model-f16.gguf + // + // --repo TheBloke/Mixtral-8x7B-v0.1-GGUF --file mixtral-8x7b-v0.1.Q4_K_M.gguf + // https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF/resolve/main/mixtral-8x7b-v0.1.Q4_K_M.gguf + // + + std::string model_url = "https://huggingface.co/"; + model_url += repo; + model_url += "/resolve/main/"; + model_url += remote_path; + + return common_load_model_from_url(model_url, local_path, hf_token, params); +} + +/** + * Allow getting the HF file from the HF repo with tag (like ollama), for example: + * - bartowski/Llama-3.2-3B-Instruct-GGUF:q4 + * - bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M + * - bartowski/Llama-3.2-3B-Instruct-GGUF:q5_k_s + * Tag is optional, default to "latest" (meaning it checks for Q4_K_M first, then Q4, then if not found, return the first GGUF file in repo) + * + * Return pair of (with "repo" already having tag removed) + * + * Note: we use the Ollama-compatible HF API, but not using the blobId. Instead, we use the special "ggufFile" field which returns the value for "hf_file". This is done to be backward-compatible with existing cache files. + */ +std::pair common_get_hf_file(const std::string & hf_repo_with_tag, const std::string & hf_token) { + auto parts = string_split(hf_repo_with_tag, ':'); + std::string tag = parts.size() > 1 ? parts.back() : "latest"; + std::string hf_repo = parts[0]; + if (string_split(hf_repo, '/').size() != 2) { + throw std::invalid_argument("error: invalid HF repo format, expected /[:quant]\n"); + } + + // fetch model info from Hugging Face Hub API + json model_info; + curl_ptr curl(curl_easy_init(), &curl_easy_cleanup); + curl_slist_ptr http_headers; + std::string res_str; + std::string url = "https://huggingface.co/v2/" + hf_repo + "/manifests/" + tag; + curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str()); + curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L); + typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data); + auto write_callback = [](void * ptr, size_t size, size_t nmemb, void * data) -> size_t { + static_cast(data)->append((char * ) ptr, size * nmemb); + return size * nmemb; + }; + curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast(write_callback)); + curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &res_str); +#if defined(_WIN32) + curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA); +#endif + if (!hf_token.empty()) { + std::string auth_header = "Authorization: Bearer " + hf_token; + http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str()); + } + // Important: the User-Agent must be "llama-cpp" to get the "ggufFile" field in the response + http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp"); + http_headers.ptr = curl_slist_append(http_headers.ptr, "Accept: application/json"); + curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr); + + CURLcode res = curl_easy_perform(curl.get()); + + if (res != CURLE_OK) { + throw std::runtime_error("error: cannot make GET request to HF API"); + } + + long res_code; + curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &res_code); + if (res_code == 200) { + model_info = json::parse(res_str); + } else if (res_code == 401) { + throw std::runtime_error("error: model is private or does not exist; if you are accessing a gated model, please provide a valid HF token"); + } else { + throw std::runtime_error(string_format("error from HF API, response code: %ld, data: %s", res_code, res_str.c_str())); + } + + // check response + if (!model_info.contains("ggufFile")) { + throw std::runtime_error("error: model does not have ggufFile"); + } + json & gguf_file = model_info.at("ggufFile"); + if (!gguf_file.contains("rfilename")) { + throw std::runtime_error("error: ggufFile does not have rfilename"); + } + + return std::make_pair(hf_repo, gguf_file.at("rfilename")); +} + +#else + +struct llama_model * common_load_model_from_url( + const std::string & /*model_url*/, + const std::string & /*local_path*/, + const std::string & /*hf_token*/, + const struct llama_model_params & /*params*/) { + LOG_WRN("%s: llama.cpp built without libcurl, downloading from an url not supported.\n", __func__); + return nullptr; +} + +struct llama_model * common_load_model_from_hf( + const std::string & /*repo*/, + const std::string & /*remote_path*/, + const std::string & /*local_path*/, + const std::string & /*hf_token*/, + const struct llama_model_params & /*params*/) { + LOG_WRN("%s: llama.cpp built without libcurl, downloading from Hugging Face not supported.\n", __func__); + return nullptr; +} + +std::pair common_get_hf_file(const std::string &, const std::string &) { + LOG_WRN("%s: llama.cpp built without libcurl, downloading from Hugging Face not supported.\n", __func__); + return std::make_pair("", ""); +} + +#endif // LLAMA_USE_CURL + +// +// Batch utils +// + +void common_batch_clear(struct llama_batch & batch) { + batch.n_tokens = 0; +} + +void common_batch_add( + struct llama_batch & batch, + llama_token id, + llama_pos pos, + const std::vector & seq_ids, + bool logits) { + GGML_ASSERT(batch.seq_id[batch.n_tokens] && "llama_batch size exceeded"); + + batch.token [batch.n_tokens] = id; + batch.pos [batch.n_tokens] = pos; + batch.n_seq_id[batch.n_tokens] = seq_ids.size(); + for (size_t i = 0; i < seq_ids.size(); ++i) { + batch.seq_id[batch.n_tokens][i] = seq_ids[i]; + } + batch.logits [batch.n_tokens] = logits; + + batch.n_tokens++; +} + +// +// Token utils +// + +size_t common_lcp(const llama_tokens & a, const llama_tokens & b) { + size_t i; + for (i = 0; i < a.size() && i < b.size() && a[i] == b[i]; i++) {} + + return i; +} + +size_t common_lcs(const llama_tokens & a, const llama_tokens & b) { + // check for empty sequences + if (a.empty() || b.empty()) { + return 0; + } + + // get the lengths of the input sequences + size_t a_len = a.size(); + size_t b_len = b.size(); + + // initialize the maximum length of the longest common subsequence (LCS) + size_t max_length = 0; + + // use two rows instead of a 2D matrix to optimize space + std::vector prev_row(b_len + 1, 0); + std::vector curr_row(b_len + 1, 0); + + // iterate through the elements of a + for (size_t i = 1; i <= a_len; i++) { + // iterate through the elements of b + for (size_t j = 1; j <= b_len; j++) { + // if elements at the current positions match + if (a[i - 1] == b[j - 1]) { + // if it's the first element of either sequences, set LCS length to 1 + if (i == 1 || j == 1) { + curr_row[j] = 1; + } else { + // increment LCS length by 1 compared to the previous element + curr_row[j] = prev_row[j - 1] + 1; + } + + // update max_length if necessary + if (curr_row[j] > max_length) { + max_length = curr_row[j]; + } + } else { + // reset LCS length if elements don't match + curr_row[j] = 0; + } + } + + // update the previous row for the next iteration + prev_row = curr_row; + } + + // return the maximum length of the LCS + return max_length; +} + +// +// Vocab utils +// + +std::vector common_tokenize( + const struct llama_context * ctx, + const std::string & text, + bool add_special, + bool parse_special) { + const llama_model * model = llama_get_model(ctx); + const llama_vocab * vocab = llama_model_get_vocab(model); + return common_tokenize(vocab, text, add_special, parse_special); +} + +std::vector common_tokenize( + const struct llama_vocab * vocab, + const std::string & text, + bool add_special, + bool parse_special) { + // upper limit for the number of tokens + int n_tokens = text.length() + 2 * add_special; + std::vector result(n_tokens); + n_tokens = llama_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special); + if (n_tokens < 0) { + result.resize(-n_tokens); + int check = llama_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special); + GGML_ASSERT(check == -n_tokens); + } else { + result.resize(n_tokens); + } + return result; +} + +std::string common_token_to_piece(const struct llama_context * ctx, llama_token token, bool special) { + const llama_model * model = llama_get_model(ctx); + const llama_vocab * vocab = llama_model_get_vocab(model); + return common_token_to_piece(vocab, token, special); +} + +std::string common_token_to_piece(const struct llama_vocab * vocab, llama_token token, bool special) { + std::string piece; + piece.resize(piece.capacity()); // using string internal cache, 15 bytes + '\n' + const int n_chars = llama_token_to_piece(vocab, token, &piece[0], piece.size(), 0, special); + if (n_chars < 0) { + piece.resize(-n_chars); + int check = llama_token_to_piece(vocab, token, &piece[0], piece.size(), 0, special); + GGML_ASSERT(check == -n_chars); + } + else { + piece.resize(n_chars); + } + + return piece; +} + +std::string common_detokenize(const struct llama_context * ctx, const std::vector & tokens, bool special) { + const llama_model * model = llama_get_model(ctx); + const llama_vocab * vocab = llama_model_get_vocab(model); + return common_detokenize(vocab, tokens, special); +} + +std::string common_detokenize(const struct llama_vocab * vocab, const std::vector & tokens, bool special) { + std::string text; + text.resize(std::max(text.capacity(), tokens.size())); + int32_t n_chars = llama_detokenize(vocab, tokens.data(), (int32_t)tokens.size(), &text[0], (int32_t)text.size(), false, special); + if (n_chars < 0) { + text.resize(-n_chars); + n_chars = llama_detokenize(vocab, tokens.data(), (int32_t)tokens.size(), &text[0], (int32_t)text.size(), false, special); + GGML_ASSERT(n_chars <= (int32_t)text.size()); // whitespace trimming is performed after per-token detokenization + } + + text.resize(n_chars); + + // NOTE: the original tokenizer decodes bytes after collecting the pieces. + return text; +} + +// +// Chat template utils +// + +bool common_chat_verify_template(const std::string & tmpl, bool use_jinja) { + if (use_jinja) { + try { + auto chat_template = common_chat_template(tmpl, "", ""); + common_chat_inputs inputs; + inputs.messages = json::array({{ + {"role", "user"}, + {"content", "test"}, + }}); + common_chat_params_init(chat_template, inputs); + return true; + } catch (const std::exception & e) { + LOG_ERR("%s: failed to apply template: %s\n", __func__, e.what()); + return false; + } + } + llama_chat_message chat[] = {{"user", "test"}}; + const int res = llama_chat_apply_template(tmpl.c_str(), chat, 1, true, nullptr, 0); + return res >= 0; +} + +std::string common_chat_apply_template( + const common_chat_template & tmpl, + const std::vector & msgs, + bool add_ass, + bool use_jinja) { + if (use_jinja) { + auto messages = json::array(); + for (const auto & msg : msgs) { + messages.push_back({{"role", msg.role}, {"content", msg.content}}); + } + common_chat_inputs inputs; + inputs.messages = messages; + inputs.add_generation_prompt = add_ass; + return common_chat_params_init(tmpl, inputs).prompt; + } + + int alloc_size = 0; + std::vector chat; + for (const auto & msg : msgs) { + chat.push_back({msg.role.c_str(), msg.content.c_str()}); + alloc_size += (msg.role.size() + msg.content.size()) * 1.25; + } + + std::vector buf(alloc_size); + + // run the first time to get the total output length + int32_t res = llama_chat_apply_template(tmpl.source().c_str(), chat.data(), chat.size(), add_ass, buf.data(), buf.size()); + + // error: chat template is not supported + if (res < 0) { + // if the custom "tmpl" is not supported, we throw an error + // this is a bit redundant (for good), since we're not sure if user validated the custom template with llama_chat_verify_template() + throw std::runtime_error("this custom template is not supported"); + } + + // if it turns out that our buffer is too small, we resize it + if ((size_t) res > buf.size()) { + buf.resize(res); + res = llama_chat_apply_template(tmpl.source().c_str(), chat.data(), chat.size(), add_ass, buf.data(), buf.size()); + } + + std::string formatted_chat(buf.data(), res); + return formatted_chat; +} + +std::string common_chat_format_single( + const common_chat_template & tmpl, + const std::vector & past_msg, + const common_chat_msg & new_msg, + bool add_ass, + bool use_jinja) { + std::ostringstream ss; + auto fmt_past_msg = past_msg.empty() ? "" : common_chat_apply_template(tmpl, past_msg, false, use_jinja); + std::vector chat_new(past_msg); + // if the past_msg ends with a newline, we must preserve it in the formatted version + if (add_ass && !fmt_past_msg.empty() && fmt_past_msg.back() == '\n') { + ss << "\n"; + }; + // format chat with new_msg + chat_new.push_back(new_msg); + auto fmt_new_msg = common_chat_apply_template(tmpl, chat_new, add_ass, use_jinja); + // get the diff part + ss << fmt_new_msg.substr(fmt_past_msg.size(), fmt_new_msg.size() - fmt_past_msg.size()); + return ss.str(); +} + +std::string common_chat_format_example(const common_chat_template & tmpl, bool use_jinja) { + std::vector msgs = { + {"system", "You are a helpful assistant", {}}, + {"user", "Hello", {}}, + {"assistant", "Hi there", {}}, + {"user", "How are you?", {}}, + }; + return common_chat_apply_template(tmpl, msgs, true, use_jinja); +} + +#define CHATML_TEMPLATE_SRC \ + "{%- for message in messages -%}\n" \ + " {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>\n' -}}\n" \ + "{%- endfor -%}\n" \ + "{%- if add_generation_prompt -%}\n" \ + " {{- '<|im_start|>assistant\n' -}}\n" \ + "{%- endif -%}" + +common_chat_templates common_chat_templates_from_model(const struct llama_model * model, const std::string & chat_template_override) +{ + std::string default_template_src; + std::string template_tool_use_src; + + bool has_explicit_template = !chat_template_override.empty(); + if (chat_template_override.empty()) { + auto str = llama_model_chat_template(model, /* name */ nullptr); + if (str) { + default_template_src = str; + has_explicit_template = true; + } + str = llama_model_chat_template(model, /* name */ "tool_use"); + if (str) { + template_tool_use_src = str; + has_explicit_template = true; + } + } else { + default_template_src = chat_template_override; + } + if (default_template_src.empty() || default_template_src == "chatml") { + if (!template_tool_use_src.empty()) { + default_template_src = template_tool_use_src; + } else { + default_template_src = CHATML_TEMPLATE_SRC; + } + } + auto vocab = llama_model_get_vocab(model); + const auto get_token = [&](llama_token token, const char * name, const char * jinja_variable_name) { + if (token == LLAMA_TOKEN_NULL) { + if (default_template_src.find(jinja_variable_name) != std::string::npos + || template_tool_use_src.find(jinja_variable_name) != std::string::npos) { + LOG_WRN("%s: warning: vocab does not have a %s token, jinja template won't work as intended.\n", __func__, name); + } + return std::string(); + } else { + return common_token_to_piece(vocab, token, true); + } + }; + auto token_bos = get_token(llama_vocab_bos(vocab), "BOS", "bos_token"); + auto token_eos = get_token(llama_vocab_eos(vocab), "EOS", "eos_token"); + try { + return { + has_explicit_template, + std::make_unique(default_template_src, token_bos, token_eos), + template_tool_use_src.empty() + ? nullptr + : std::make_unique(template_tool_use_src, token_bos, token_eos), + }; + } catch (const std::exception & e) { + LOG_ERR("%s: failed to parse chat template: %s\n", __func__, e.what()); + return { + has_explicit_template, + std::make_unique(CHATML_TEMPLATE_SRC, token_bos, token_eos), + nullptr, + }; + } +} + +// +// KV cache utils +// + +void common_kv_cache_dump_view(const llama_kv_cache_view & view, int row_size) { + static const char slot_chars[] = ".123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+"; + + printf("=== Dumping KV cache. total cells %d, max sequences per cell %d, populated cells %d, total tokens in cache %d, largest empty slot=%d @ %d", + view.n_cells, view.n_seq_max, view.used_cells, view.token_count, view.max_contiguous, view.max_contiguous_idx); + + llama_kv_cache_view_cell * c_curr = view.cells; + llama_seq_id * cs_curr = view.cells_sequences; + + for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_seq_max) { + if (i % row_size == 0) { + printf("\n%5d: ", i); + } + int seq_count = 0; + for (int j = 0; j < view.n_seq_max; j++) { + if (cs_curr[j] >= 0) { seq_count++; } + } + putchar(slot_chars[std::min(sizeof(slot_chars) - 2, size_t(seq_count))]); + } + + printf("\n=== Done dumping\n"); +} + +void common_kv_cache_dump_view_seqs(const llama_kv_cache_view & view, int row_size) { + static const char slot_chars[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; + + printf("=== Dumping KV cache. total cells %d, max sequences per cell %d, populated cells %d, total tokens in cache %d, largest empty slot=%d @ %d\n", + view.n_cells, view.n_seq_max, view.used_cells, view.token_count, view.max_contiguous, view.max_contiguous_idx); + + std::unordered_map seqs; + llama_kv_cache_view_cell * c_curr = view.cells; + llama_seq_id * cs_curr = view.cells_sequences; + + for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_seq_max) { + for (int j = 0; j < view.n_seq_max; j++) { + if (cs_curr[j] < 0) { continue; } + if (seqs.find(cs_curr[j]) == seqs.end()) { + if (seqs.size() + 1 >= sizeof(slot_chars)) { break; } + const size_t sz = seqs.size(); + seqs[cs_curr[j]] = sz; + } + } + if (seqs.size() + 1 >= sizeof(slot_chars)) { break; } + } + + printf("=== Sequence legend: "); + for (const auto & it : seqs) { + printf("%zu=%d, ", it.second, it.first); + } + printf("'+'=other sequence ids"); + + c_curr = view.cells; + cs_curr = view.cells_sequences; + for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_seq_max) { + if (i % row_size == 0) { + printf("\n%5d: ", i); + } + for (int j = 0; j < view.n_seq_max; j++) { + if (cs_curr[j] >= 0) { + const auto & it = seqs.find(cs_curr[j]); + putchar(it != seqs.end() ? int(slot_chars[it->second]) : '+'); + } else { + putchar('.'); + } + } + putchar(' '); + } + + printf("\n=== Done dumping\n"); +} + +// +// Embedding utils +// + +void common_embd_normalize(const float * inp, float * out, int n, int embd_norm) { + double sum = 0.0; + + switch (embd_norm) { + case -1: // no normalisation + sum = 1.0; + break; + case 0: // max absolute + for (int i = 0; i < n; i++) { + if (sum < std::abs(inp[i])) { + sum = std::abs(inp[i]); + } + } + sum /= 32760.0; // make an int16 range + break; + case 2: // euclidean + for (int i = 0; i < n; i++) { + sum += inp[i] * inp[i]; + } + sum = std::sqrt(sum); + break; + default: // p-norm (euclidean is p-norm p=2) + for (int i = 0; i < n; i++) { + sum += std::pow(std::abs(inp[i]), embd_norm); + } + sum = std::pow(sum, 1.0 / embd_norm); + break; + } + + const float norm = sum > 0.0 ? 1.0 / sum : 0.0f; + + for (int i = 0; i < n; i++) { + out[i] = inp[i] * norm; + } +} + +float common_embd_similarity_cos(const float * embd1, const float * embd2, int n){ + double sum = 0.0; + double sum1 = 0.0; + double sum2 = 0.0; + + for (int i = 0; i < n; i++) { + sum += embd1[i] * embd2[i]; + sum1 += embd1[i] * embd1[i]; + sum2 += embd2[i] * embd2[i]; + } + + // Handle the case where one or both vectors are zero vectors + if (sum1 == 0.0 || sum2 == 0.0) { + if (sum1 == 0.0 && sum2 == 0.0) { + return 1.0f; // two zero vectors are similar + } + return 0.0f; + } + + return sum / (sqrt(sum1) * sqrt(sum2)); +} + +// +// Control vector utils +// + +static common_control_vector_data common_control_vector_load_one(const common_control_vector_load_info & load_info) { + common_control_vector_data result = { -1, {} }; + + ggml_context * ctx = nullptr; + struct gguf_init_params meta_gguf_params = { + /* .no_alloc = */ false, + /* .ctx = */ &ctx, + }; + struct gguf_context * ctx_gguf = gguf_init_from_file(load_info.fname.c_str(), meta_gguf_params); + if (!ctx_gguf) { + LOG_ERR("%s: failed to load control vector file from %s\n", __func__, load_info.fname.c_str()); + return result; + } + + int32_t n_tensors = gguf_get_n_tensors(ctx_gguf); + if (n_tensors == 0) { + LOG_WRN("%s: no direction tensors found in %s\n", __func__, load_info.fname.c_str()); + } + + for (int i = 0; i < n_tensors; i++) { + std::string name = gguf_get_tensor_name(ctx_gguf, i); + + int layer_idx = -1; + + // split on '.' + size_t dotpos = name.find('.'); + if (dotpos != std::string::npos && name.substr(0, dotpos) == "direction") { + try { + layer_idx = std::stoi(name.substr(dotpos + 1)); + } catch (...) { + layer_idx = -1; + } + } + if (layer_idx < 0) { + LOG_ERR("%s: invalid/unparsable direction tensor layer index in %s\n", __func__, load_info.fname.c_str()); + result.n_embd = -1; + break; + } else if (layer_idx == 0) { + LOG_ERR("%s: invalid (zero) direction tensor layer index in %s\n", __func__, load_info.fname.c_str()); + result.n_embd = -1; + break; + } + + struct ggml_tensor * tensor = ggml_get_tensor(ctx, name.c_str()); + if (tensor->type != GGML_TYPE_F32) { + LOG_ERR("%s: invalid (non-F32) direction tensor type in %s\n", __func__, load_info.fname.c_str()); + result.n_embd = -1; + break; + } + if (ggml_n_dims(tensor) != 1) { + LOG_ERR("%s: invalid (non-1D) direction tensor shape in %s\n", __func__, load_info.fname.c_str()); + result.n_embd = -1; + break; + } + + if (result.n_embd == -1) { + result.n_embd = ggml_nelements(tensor); + } else if (ggml_nelements(tensor) != result.n_embd) { + LOG_ERR("%s: direction tensor in %s does not match previous dimensions\n", __func__, load_info.fname.c_str()); + result.n_embd = -1; + break; + } + + // extend if necessary - do not store data for layer 0 (it's not used) + result.data.resize(std::max(result.data.size(), static_cast(result.n_embd * layer_idx)), 0.0f); + + const float * src = (const float *) tensor->data; + float * dst = result.data.data() + result.n_embd * (layer_idx - 1); // layer 1 at [0] + for (int j = 0; j < result.n_embd; j++) { + dst[j] += src[j] * load_info.strength; // allows multiple directions for same layer in same file + } + + } + + if (result.n_embd == -1) { + LOG_WRN("%s: skipping %s due to invalid direction tensors\n", __func__, load_info.fname.c_str()); + result.data.clear(); + } + + gguf_free(ctx_gguf); + ggml_free(ctx); + + return result; +} + +common_control_vector_data common_control_vector_load(const std::vector & load_infos) { + common_control_vector_data result = { -1, {} }; + + for (const auto & info : load_infos) { + auto cur = common_control_vector_load_one(info); + + if (cur.n_embd == -1) { + result.n_embd = -1; + break; + } + if (result.n_embd != -1 && result.n_embd != cur.n_embd) { + LOG_ERR("%s: control vectors in %s does not match previous dimensions\n", __func__, info.fname.c_str()); + result.n_embd = -1; + break; + } + + if (result.n_embd == -1) { + result = std::move(cur); + } else { + result.data.resize(std::max(result.data.size(), cur.data.size()), 0.0f); // extend if necessary + for (size_t i = 0; i < cur.data.size(); i++) { + result.data[i] += cur.data[i]; + } + } + } + + if (result.n_embd == -1) { + LOG_ERR("%s: no valid control vector files passed\n", __func__); + result.data.clear(); + } + + return result; +} + diff --git a/llama.cpp/common/common.h b/llama.cpp/common/common.h new file mode 100644 index 0000000000000000000000000000000000000000..17f5ea8683d690554e1ce01f86c7da8067caf8db --- /dev/null +++ b/llama.cpp/common/common.h @@ -0,0 +1,718 @@ +// Various helper functions and utilities + +#pragma once + +#include "llama-cpp.h" + +#include +#include +#include +#include + +#ifdef _WIN32 +#define DIRECTORY_SEPARATOR '\\' +#else +#define DIRECTORY_SEPARATOR '/' +#endif // _WIN32 + +#define die(msg) do { fputs("error: " msg "\n", stderr); exit(1); } while (0) +#define die_fmt(fmt, ...) do { fprintf(stderr, "error: " fmt "\n", __VA_ARGS__); exit(1); } while (0) + +#define print_build_info() do { \ + fprintf(stderr, "%s: build = %d (%s)\n", __func__, LLAMA_BUILD_NUMBER, LLAMA_COMMIT); \ + fprintf(stderr, "%s: built with %s for %s\n", __func__, LLAMA_COMPILER, LLAMA_BUILD_TARGET); \ +} while(0) + +#define DEFAULT_MODEL_PATH "models/7B/ggml-model-f16.gguf" + +struct common_adapter_lora_info { + std::string path; + float scale; + + struct llama_adapter_lora * ptr; +}; + +using llama_tokens = std::vector; + +// build info +extern int LLAMA_BUILD_NUMBER; +extern const char * LLAMA_COMMIT; +extern const char * LLAMA_COMPILER; +extern const char * LLAMA_BUILD_TARGET; + +struct common_control_vector_load_info; + +// +// CPU utils +// + +struct cpu_params { + int n_threads = -1; + bool cpumask[GGML_MAX_N_THREADS] = {false}; // CPU affinity mask. + bool mask_valid = false; // Default: any CPU + enum ggml_sched_priority priority = GGML_SCHED_PRIO_NORMAL; // Scheduling prio : (0 - normal, 1 - medium, 2 - high, 3 - realtime) + bool strict_cpu = false; // Use strict CPU placement + uint32_t poll = 50; // Polling (busywait) level (0 - no polling, 100 - mostly polling) +}; + +int32_t cpu_get_num_physical_cores(); +int32_t cpu_get_num_math(); + +// +// Common params +// + +enum llama_example { + LLAMA_EXAMPLE_COMMON, + LLAMA_EXAMPLE_SPECULATIVE, + LLAMA_EXAMPLE_MAIN, + LLAMA_EXAMPLE_INFILL, + LLAMA_EXAMPLE_EMBEDDING, + LLAMA_EXAMPLE_PERPLEXITY, + LLAMA_EXAMPLE_RETRIEVAL, + LLAMA_EXAMPLE_PASSKEY, + LLAMA_EXAMPLE_IMATRIX, + LLAMA_EXAMPLE_BENCH, + LLAMA_EXAMPLE_SERVER, + LLAMA_EXAMPLE_CVECTOR_GENERATOR, + LLAMA_EXAMPLE_EXPORT_LORA, + LLAMA_EXAMPLE_LLAVA, + LLAMA_EXAMPLE_LOOKUP, + LLAMA_EXAMPLE_PARALLEL, + LLAMA_EXAMPLE_TTS, + + LLAMA_EXAMPLE_COUNT, +}; + +enum common_sampler_type { + COMMON_SAMPLER_TYPE_NONE = 0, + COMMON_SAMPLER_TYPE_DRY = 1, + COMMON_SAMPLER_TYPE_TOP_K = 2, + COMMON_SAMPLER_TYPE_TOP_P = 3, + COMMON_SAMPLER_TYPE_MIN_P = 4, + //COMMON_SAMPLER_TYPE_TFS_Z = 5, + COMMON_SAMPLER_TYPE_TYPICAL_P = 6, + COMMON_SAMPLER_TYPE_TEMPERATURE = 7, + COMMON_SAMPLER_TYPE_XTC = 8, + COMMON_SAMPLER_TYPE_INFILL = 9, + COMMON_SAMPLER_TYPE_PENALTIES = 10, +}; + +// dimensionality reduction methods, used by cvector-generator +enum dimre_method { + DIMRE_METHOD_PCA, + DIMRE_METHOD_MEAN, +}; + +enum common_conversation_mode { + COMMON_CONVERSATION_MODE_DISABLED = 0, + COMMON_CONVERSATION_MODE_ENABLED = 1, + COMMON_CONVERSATION_MODE_AUTO = 2, +}; + +struct common_grammar_trigger { + std::string word; + bool at_start; +}; + +// sampling parameters +struct common_params_sampling { + uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampler + + int32_t n_prev = 64; // number of previous tokens to remember + int32_t n_probs = 0; // if greater than 0, output the probabilities of top n_probs tokens. + int32_t min_keep = 0; // 0 = disabled, otherwise samplers should return at least min_keep tokens + int32_t top_k = 40; // <= 0 to use vocab size + float top_p = 0.95f; // 1.0 = disabled + float min_p = 0.05f; // 0.0 = disabled + float xtc_probability = 0.00f; // 0.0 = disabled + float xtc_threshold = 0.10f; // > 0.5 disables XTC + float typ_p = 1.00f; // typical_p, 1.0 = disabled + float temp = 0.80f; // <= 0.0 to sample greedily, 0.0 to not output probabilities + float dynatemp_range = 0.00f; // 0.0 = disabled + float dynatemp_exponent = 1.00f; // controls how entropy maps to temperature in dynamic temperature sampler + int32_t penalty_last_n = 64; // last n tokens to penalize (0 = disable penalty, -1 = context size) + float penalty_repeat = 1.00f; // 1.0 = disabled + float penalty_freq = 0.00f; // 0.0 = disabled + float penalty_present = 0.00f; // 0.0 = disabled + float dry_multiplier = 0.0f; // 0.0 = disabled; DRY repetition penalty for tokens extending repetition: + float dry_base = 1.75f; // 0.0 = disabled; multiplier * base ^ (length of sequence before token - allowed length) + int32_t dry_allowed_length = 2; // tokens extending repetitions beyond this receive penalty + int32_t dry_penalty_last_n = -1; // how many tokens to scan for repetitions (0 = disable penalty, -1 = context size) + int32_t mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0 + float top_n_sigma = -1.00f;// -1.0 = disabled + float mirostat_tau = 5.00f; // target entropy + float mirostat_eta = 0.10f; // learning rate + bool ignore_eos = false; + bool no_perf = false; // disable performance metrics + bool timing_per_token = false; + + std::vector dry_sequence_breakers = {"\n", ":", "\"", "*"}; // default sequence breakers for DRY + + + std::vector samplers = { + COMMON_SAMPLER_TYPE_PENALTIES, + COMMON_SAMPLER_TYPE_DRY, + COMMON_SAMPLER_TYPE_TOP_K, + COMMON_SAMPLER_TYPE_TYPICAL_P, + COMMON_SAMPLER_TYPE_TOP_P, + COMMON_SAMPLER_TYPE_MIN_P, + COMMON_SAMPLER_TYPE_XTC, + COMMON_SAMPLER_TYPE_TEMPERATURE, + }; + + std::string grammar; // optional BNF-like grammar to constrain sampling + bool grammar_lazy = false; + std::vector grammar_trigger_words; // optional trigger words to trigger lazy grammar + std::vector grammar_trigger_tokens; // optional trigger tokens to trigger lazy grammar and print trigger special tokens. + std::set preserved_tokens; + + std::vector logit_bias; // logit biases to apply + + // print the parameters into a string + std::string print() const; +}; + +struct common_params_speculative { + std::vector devices; // devices to use for offloading + + int32_t n_ctx = 0; // draft context size + int32_t n_max = 16; // maximum number of tokens to draft during speculative decoding + int32_t n_min = 5; // minimum number of draft tokens to use for speculative decoding + int32_t n_gpu_layers = -1; // number of layers to store in VRAM for the draft model (-1 - use default) + float p_split = 0.1f; // speculative decoding split probability + float p_min = 0.9f; // minimum speculative decoding probability (greedy) + + struct cpu_params cpuparams; + struct cpu_params cpuparams_batch; + + std::string hf_repo = ""; // HF repo // NOLINT + std::string hf_file = ""; // HF file // NOLINT + + std::string model = ""; // draft model for speculative decoding // NOLINT + std::string model_url = ""; // model url to download // NOLINT +}; + +struct common_params_vocoder { + std::string hf_repo = ""; // HF repo // NOLINT + std::string hf_file = ""; // HF file // NOLINT + + std::string model = ""; // model path // NOLINT + std::string model_url = ""; // model url to download // NOLINT + + bool use_guide_tokens = false; // enable guide tokens to improve TTS accuracy // NOLINT +}; + +struct common_params { + int32_t n_predict = -1; // new tokens to predict + int32_t n_ctx = 4096; // context size + int32_t n_batch = 2048; // logical batch size for prompt processing (must be >=32 to use BLAS) + int32_t n_ubatch = 512; // physical batch size for prompt processing (must be >=32 to use BLAS) + int32_t n_keep = 0; // number of tokens to keep from initial prompt + int32_t n_chunks = -1; // max number of chunks to process (-1 = unlimited) + int32_t n_parallel = 1; // number of parallel sequences to decode + int32_t n_sequences = 1; // number of sequences to decode + int32_t grp_attn_n = 1; // group-attention factor + int32_t grp_attn_w = 512; // group-attention width + int32_t n_print = -1; // print token count every n tokens (-1 = disabled) + float rope_freq_base = 0.0f; // RoPE base frequency + float rope_freq_scale = 0.0f; // RoPE frequency scaling factor + float yarn_ext_factor = -1.0f; // YaRN extrapolation mix factor + float yarn_attn_factor = 1.0f; // YaRN magnitude scaling factor + float yarn_beta_fast = 32.0f; // YaRN low correction dim + float yarn_beta_slow = 1.0f; // YaRN high correction dim + int32_t yarn_orig_ctx = 0; // YaRN original context length + float defrag_thold = 0.1f; // KV cache defragmentation threshold + + // offload params + std::vector devices; // devices to use for offloading + + int32_t n_gpu_layers = -1; // number of layers to store in VRAM (-1 - use default) + int32_t main_gpu = 0; // the GPU that is used for scratch and small tensors + float tensor_split[128] = {0}; // how split tensors should be distributed across GPUs + + enum llama_split_mode split_mode = LLAMA_SPLIT_MODE_LAYER; // how to split the model across GPUs + + struct cpu_params cpuparams; + struct cpu_params cpuparams_batch; + + ggml_backend_sched_eval_callback cb_eval = nullptr; + void * cb_eval_user_data = nullptr; + + ggml_numa_strategy numa = GGML_NUMA_STRATEGY_DISABLED; + + enum llama_rope_scaling_type rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED; + enum llama_pooling_type pooling_type = LLAMA_POOLING_TYPE_UNSPECIFIED; // pooling type for embeddings + enum llama_attention_type attention_type = LLAMA_ATTENTION_TYPE_UNSPECIFIED; // attention type for embeddings + + struct common_params_sampling sampling; + struct common_params_speculative speculative; + struct common_params_vocoder vocoder; + + std::string model = ""; // model path // NOLINT + std::string model_alias = ""; // model alias // NOLINT + std::string model_url = ""; // model url to download // NOLINT + std::string hf_token = ""; // HF token // NOLINT + std::string hf_repo = ""; // HF repo // NOLINT + std::string hf_file = ""; // HF file // NOLINT + std::string prompt = ""; // NOLINT + std::string prompt_file = ""; // store the external prompt file name // NOLINT + std::string path_prompt_cache = ""; // path to file for saving/loading prompt eval state // NOLINT + std::string input_prefix = ""; // string to prefix user inputs with // NOLINT + std::string input_suffix = ""; // string to suffix user inputs with // NOLINT + std::string lookup_cache_static = ""; // path of static ngram cache file for lookup decoding // NOLINT + std::string lookup_cache_dynamic = ""; // path of dynamic ngram cache file for lookup decoding // NOLINT + std::string logits_file = ""; // file for saving *all* logits // NOLINT + + std::vector in_files; // all input files + std::vector antiprompt; // strings upon which more user input is prompted (a.k.a. reverse prompts) + std::vector kv_overrides; + + bool lora_init_without_apply = false; // only load lora to memory, but do not apply it to ctx (user can manually apply lora later using llama_adapter_lora_apply) + std::vector lora_adapters; // lora adapter path with user defined scale + + std::vector control_vectors; // control vector with user defined scale + + int32_t verbosity = 0; + int32_t control_vector_layer_start = -1; // layer range for control vector + int32_t control_vector_layer_end = -1; // layer range for control vector + + int32_t ppl_stride = 0; // stride for perplexity calculations. If left at 0, the pre-existing approach will be used. + int32_t ppl_output_type = 0; // = 0 -> ppl output is as usual, = 1 -> ppl output is num_tokens, ppl, one per line + // (which is more convenient to use for plotting) + // + bool hellaswag = false; // compute HellaSwag score over random tasks from datafile supplied in prompt + size_t hellaswag_tasks = 400; // number of tasks to use when computing the HellaSwag score + + bool winogrande = false; // compute Winogrande score over random tasks from datafile supplied in prompt + size_t winogrande_tasks = 0; // number of tasks to use when computing the Winogrande score. If 0, all tasks will be computed + + bool multiple_choice = false; // compute TruthfulQA score over random tasks from datafile supplied in prompt + size_t multiple_choice_tasks = 0; // number of tasks to use when computing the TruthfulQA score. If 0, all tasks will be computed + + bool kl_divergence = false; // compute KL divergence + + bool usage = false; // print usage + bool use_color = false; // use color to distinguish generations and inputs + bool special = false; // enable special token output + bool interactive = false; // interactive mode + bool interactive_first = false; // wait for user input immediately + bool prompt_cache_all = false; // save user input and generations to prompt cache + bool prompt_cache_ro = false; // open the prompt cache read-only and do not update it + + bool escape = true; // escape "\n", "\r", "\t", "\'", "\"", and "\\" + bool multiline_input = false; // reverse the usage of `\` + bool simple_io = false; // improves compatibility with subprocesses and limited consoles + bool cont_batching = true; // insert new sequences for decoding on-the-fly + bool flash_attn = false; // flash attention + bool no_perf = false; // disable performance metrics + bool ctx_shift = true; // context shift on inifinite text generation + + bool input_prefix_bos = false; // prefix BOS to user inputs, preceding input_prefix + bool logits_all = false; // return logits for all tokens in the batch + bool use_mmap = true; // use mmap for faster loads + bool use_mlock = false; // use mlock to keep model in memory + bool verbose_prompt = false; // print prompt tokens before generation + bool display_prompt = true; // print prompt before generation + bool dump_kv_cache = false; // dump the KV cache contents for debugging purposes + bool no_kv_offload = false; // disable KV offloading + bool warmup = true; // warmup run + bool check_tensors = false; // validate tensor data + + ggml_type cache_type_k = GGML_TYPE_F16; // KV cache data type for the K + ggml_type cache_type_v = GGML_TYPE_F16; // KV cache data type for the V + + common_conversation_mode conversation_mode = COMMON_CONVERSATION_MODE_AUTO; + + // multimodal models (see examples/llava) + std::string mmproj = ""; // path to multimodal projector // NOLINT + std::vector image; // path to image file(s) + + // embedding + bool embedding = false; // get only sentence embedding + int32_t embd_normalize = 2; // normalisation for embeddings (-1=none, 0=max absolute int16, 1=taxicab, 2=euclidean, >2=p-norm) + std::string embd_out = ""; // empty = default, "array" = [[],[]...], "json" = openai style, "json+" = same "json" + cosine similarity matrix + std::string embd_sep = "\n"; // separator of embeddings + bool reranking = false; // enable reranking support on server + + // server params + int32_t port = 8080; // server listens on this network port + int32_t timeout_read = 600; // http read timeout in seconds + int32_t timeout_write = timeout_read; // http write timeout in seconds + int32_t n_threads_http = -1; // number of threads to process HTTP requests (TODO: support threadpool) + int32_t n_cache_reuse = 0; // min chunk size to reuse from the cache via KV shifting + + std::string hostname = "127.0.0.1"; + std::string public_path = ""; // NOLINT + std::string chat_template = ""; // NOLINT + bool use_jinja = false; // NOLINT + bool enable_chat_template = true; + + std::vector api_keys; + + std::string ssl_file_key = ""; // NOLINT + std::string ssl_file_cert = ""; // NOLINT + + // "advanced" endpoints are disabled by default for better security + bool webui = true; + bool endpoint_slots = false; + bool endpoint_props = false; // only control POST requests, not GET + bool endpoint_metrics = false; + + bool log_json = false; + + std::string slot_save_path; + + float slot_prompt_similarity = 0.5f; + + // batched-bench params + bool is_pp_shared = false; + + std::vector n_pp; + std::vector n_tg; + std::vector n_pl; + + // retrieval params + std::vector context_files; // context files to embed + + int32_t chunk_size = 64; // chunk size for context embedding + + std::string chunk_separator = "\n"; // chunk separator for context embedding + + // passkey params + int32_t n_junk = 250; // number of times to repeat the junk text + int32_t i_pos = -1; // position of the passkey in the junk text + + // imatrix params + std::string out_file = "imatrix.dat"; // save the resulting imatrix to this file + + int32_t n_out_freq = 10; // output the imatrix every n_out_freq iterations + int32_t n_save_freq = 0; // save the imatrix every n_save_freq iterations + int32_t i_chunk = 0; // start processing from this chunk + + bool process_output = false; // collect data for the output tensor + bool compute_ppl = true; // whether to compute perplexity + + // cvector-generator params + int n_pca_batch = 100; + int n_pca_iterations = 1000; + dimre_method cvector_dimre_method = DIMRE_METHOD_PCA; + std::string cvector_outfile = "control_vector.gguf"; + std::string cvector_positive_file = "examples/cvector-generator/positive.txt"; + std::string cvector_negative_file = "examples/cvector-generator/negative.txt"; + + bool spm_infill = false; // suffix/prefix/middle pattern for infill + + std::string lora_outfile = "ggml-lora-merged-f16.gguf"; + + // batched-bench params + bool batched_bench_output_jsonl = false; +}; + +// call once at the start of a program if it uses libcommon +// initializes the logging system and prints info about the build +void common_init(); + +std::string common_params_get_system_info(const common_params & params); + +bool parse_cpu_range(const std::string & range, bool(&boolmask)[GGML_MAX_N_THREADS]); +bool parse_cpu_mask(const std::string & mask, bool(&boolmask)[GGML_MAX_N_THREADS]); +void postprocess_cpu_params(cpu_params & cpuparams, const cpu_params * role_model = nullptr); +bool set_process_priority(enum ggml_sched_priority prio); + +// +// String utils +// + +#ifdef __GNUC__ +# if defined(__MINGW32__) && !defined(__clang__) +# define LLAMA_COMMON_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__))) +# else +# define LLAMA_COMMON_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__))) +# endif +#else +# define LLAMA_COMMON_ATTRIBUTE_FORMAT(...) +#endif + +LLAMA_COMMON_ATTRIBUTE_FORMAT(1, 2) +std::string string_format(const char * fmt, ...); + +std::string string_strip(const std::string & str); +std::string string_get_sortable_timestamp(); + +std::string string_join(const std::vector & values, const std::string & separator); +std::vector string_split(const std::string & str, const std::string & delimiter); +std::string string_repeat(const std::string & str, size_t n); + +void string_replace_all(std::string & s, const std::string & search, const std::string & replace); + +template +static std::vector string_split(const std::string & str, char delim) { + static_assert(!std::is_same::value, "Please use the specialized version for std::string"); + std::vector values; + std::istringstream str_stream(str); + std::string token; + while (std::getline(str_stream, token, delim)) { + T value; + std::istringstream token_stream(token); + token_stream >> value; + values.push_back(value); + } + return values; +} + +template<> +std::vector string_split(const std::string & input, char separator) +{ + std::vector parts; + size_t begin_pos = 0; + size_t separator_pos = input.find(separator); + while (separator_pos != std::string::npos) { + std::string part = input.substr(begin_pos, separator_pos - begin_pos); + parts.emplace_back(part); + begin_pos = separator_pos + 1; + separator_pos = input.find(separator, begin_pos); + } + parts.emplace_back(input.substr(begin_pos, separator_pos - begin_pos)); + return parts; +} + +static bool string_starts_with(const std::string & str, + const std::string & prefix) { // While we wait for C++20's std::string::starts_with... + return str.rfind(prefix, 0) == 0; +} + +static bool string_ends_with(const std::string & str, + const std::string & suffix) { // While we wait for C++20's std::string::ends_with... + return str.size() >= suffix.size() && str.compare(str.size()-suffix.size(), suffix.size(), suffix) == 0; +} + +bool string_parse_kv_override(const char * data, std::vector & overrides); +void string_process_escapes(std::string & input); + +std::string string_from(bool value); +std::string string_from(const std::vector & values); +std::string string_from(const struct llama_context * ctx, const std::vector & tokens); +std::string string_from(const struct llama_context * ctx, const struct llama_batch & batch); + +// +// Filesystem utils +// + +bool fs_validate_filename(const std::string & filename); +bool fs_create_directory_with_parents(const std::string & path); + +std::string fs_get_cache_directory(); +std::string fs_get_cache_file(const std::string & filename); + +// +// Model utils +// + +// note: defines object's lifetime +struct common_init_result { + llama_model_ptr model; + llama_context_ptr context; + + std::vector lora; +}; + +struct common_init_result common_init_from_params(common_params & params); + +struct llama_model_params common_model_params_to_llama ( common_params & params); +struct llama_context_params common_context_params_to_llama(const common_params & params); +struct ggml_threadpool_params ggml_threadpool_params_from_cpu_params(const cpu_params & params); + +struct llama_model * common_load_model_from_url( + const std::string & model_url, + const std::string & local_path, + const std::string & hf_token, + const struct llama_model_params & params); + +struct llama_model * common_load_model_from_hf( + const std::string & repo, + const std::string & remote_path, + const std::string & local_path, + const std::string & hf_token, + const struct llama_model_params & params); + +std::pair common_get_hf_file( + const std::string & hf_repo_with_tag, + const std::string & hf_token); + +// clear LoRA adapters from context, then apply new list of adapters +void common_set_adapter_lora(struct llama_context * ctx, std::vector & lora); + +// +// Batch utils +// + +void common_batch_clear(struct llama_batch & batch); + +void common_batch_add( + struct llama_batch & batch, + llama_token id, + llama_pos pos, + const std::vector & seq_ids, + bool logits); + +// +// Token utils +// + +// longest common prefix +size_t common_lcp(const llama_tokens & a, const llama_tokens & b); + +// longet common subsequence +size_t common_lcs(const llama_tokens & a, const llama_tokens & b); + +// +// Vocab utils +// + +// tokenizes a string into a vector of tokens +// should work similar to Python's `tokenizer.encode` +std::vector common_tokenize( + const struct llama_context * ctx, + const std::string & text, + bool add_special, + bool parse_special = false); + +std::vector common_tokenize( + const struct llama_vocab * vocab, + const std::string & text, + bool add_special, + bool parse_special = false); + +// tokenizes a token into a piece, optionally renders special/control tokens +// should work similar to Python's `tokenizer.id_to_piece` +std::string common_token_to_piece( + const struct llama_context * ctx, + llama_token token, + bool special = true); + +std::string common_token_to_piece( + const struct llama_vocab * vocab, + llama_token token, + bool special = true); + +// detokenizes a vector of tokens into a string +// should work similar to Python's `tokenizer.decode` +// optionally renders special/control tokens +std::string common_detokenize( + const struct llama_context * ctx, + const std::vector & tokens, + bool special = true); + +std::string common_detokenize( + const struct llama_vocab * vocab, + const std::vector & tokens, + bool special = true); + +// +// Chat template utils +// + +struct common_tool_call { + std::string name; + std::string arguments; + std::string id; +}; + +// same with llama_chat_message, but uses std::string +struct common_chat_msg { + std::string role; + std::string content; + std::vector tool_calls; + std::string tool_plan = ""; +}; + +// Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid +bool common_chat_verify_template(const std::string & tmpl, bool use_jinja); + +namespace minja { + class chat_template; +} + +typedef minja::chat_template common_chat_template; + +struct common_chat_templates { + bool has_explicit_template; // Model had builtin template or template overridde was specified. + std::unique_ptr template_default; // always set (defaults to chatml) + std::unique_ptr template_tool_use; +}; + +// CPP wrapper for llama_chat_apply_template +// If the built-in template is not supported, we default to chatml +// If the custom "tmpl" is not supported, we throw an error +std::string common_chat_apply_template( + const common_chat_template & tmpl, + const std::vector & chat, + bool add_ass, + bool use_jinja); + +// Format single message, while taking into account the position of that message in chat history +std::string common_chat_format_single( + const common_chat_template & tmpl, + const std::vector & past_msg, + const common_chat_msg & new_msg, + bool add_ass, + bool use_jinja); + +// Returns an example of formatted chat +std::string common_chat_format_example( + const common_chat_template & tmpl, bool use_jinja); + +common_chat_templates common_chat_templates_from_model(const struct llama_model * model, const std::string & chat_template_override); + +// +// KV cache utils +// + +// Dump the KV cache view with the number of sequences per cell. +void common_kv_cache_dump_view(const llama_kv_cache_view & view, int row_size = 80); + +// Dump the KV cache view showing individual sequences in each cell (long output). +void common_kv_cache_dump_view_seqs(const llama_kv_cache_view & view, int row_size = 40); + +// +// Embedding utils +// + +// TODO: repace embd_norm with an enum +void common_embd_normalize(const float * inp, float * out, int n, int embd_norm); + +float common_embd_similarity_cos(const float * embd1, const float * embd2, int n); + +// +// Control vector utils +// + +struct common_control_vector_data { + int n_embd; + + // stores data for layers [1, n_layer] where n_layer = data.size() / n_embd + std::vector data; +}; + +struct common_control_vector_load_info { + float strength; + + std::string fname; +}; + +// Load control vectors, scale each by strength, and add them together. +// On error, returns {-1, empty} +common_control_vector_data common_control_vector_load(const std::vector & load_infos); + +// +// Split utils +// + +namespace { + +const char * const LLM_KV_SPLIT_NO = "split.no"; +const char * const LLM_KV_SPLIT_COUNT = "split.count"; +const char * const LLM_KV_SPLIT_TENSORS_COUNT = "split.tensors.count"; + +} diff --git a/llama.cpp/common/console.cpp b/llama.cpp/common/console.cpp new file mode 100644 index 0000000000000000000000000000000000000000..078a8d678d933bec79eda408ff45858758c16f37 --- /dev/null +++ b/llama.cpp/common/console.cpp @@ -0,0 +1,504 @@ +#include "console.h" +#include +#include + +#if defined(_WIN32) +#define WIN32_LEAN_AND_MEAN +#ifndef NOMINMAX +#define NOMINMAX +#endif +#include +#include +#include +#ifndef ENABLE_VIRTUAL_TERMINAL_PROCESSING +#define ENABLE_VIRTUAL_TERMINAL_PROCESSING 0x0004 +#endif +#else +#include +#include +#include +#include +#include +#include +#include +#include +#endif + +#define ANSI_COLOR_RED "\x1b[31m" +#define ANSI_COLOR_GREEN "\x1b[32m" +#define ANSI_COLOR_YELLOW "\x1b[33m" +#define ANSI_COLOR_BLUE "\x1b[34m" +#define ANSI_COLOR_MAGENTA "\x1b[35m" +#define ANSI_COLOR_CYAN "\x1b[36m" +#define ANSI_COLOR_RESET "\x1b[0m" +#define ANSI_BOLD "\x1b[1m" + +namespace console { + + // + // Console state + // + + static bool advanced_display = false; + static bool simple_io = true; + static display_t current_display = reset; + + static FILE* out = stdout; + +#if defined (_WIN32) + static void* hConsole; +#else + static FILE* tty = nullptr; + static termios initial_state; +#endif + + // + // Init and cleanup + // + + void init(bool use_simple_io, bool use_advanced_display) { + advanced_display = use_advanced_display; + simple_io = use_simple_io; +#if defined(_WIN32) + // Windows-specific console initialization + DWORD dwMode = 0; + hConsole = GetStdHandle(STD_OUTPUT_HANDLE); + if (hConsole == INVALID_HANDLE_VALUE || !GetConsoleMode(hConsole, &dwMode)) { + hConsole = GetStdHandle(STD_ERROR_HANDLE); + if (hConsole != INVALID_HANDLE_VALUE && (!GetConsoleMode(hConsole, &dwMode))) { + hConsole = nullptr; + simple_io = true; + } + } + if (hConsole) { + // Check conditions combined to reduce nesting + if (advanced_display && !(dwMode & ENABLE_VIRTUAL_TERMINAL_PROCESSING) && + !SetConsoleMode(hConsole, dwMode | ENABLE_VIRTUAL_TERMINAL_PROCESSING)) { + advanced_display = false; + } + // Set console output codepage to UTF8 + SetConsoleOutputCP(CP_UTF8); + } + HANDLE hConIn = GetStdHandle(STD_INPUT_HANDLE); + if (hConIn != INVALID_HANDLE_VALUE && GetConsoleMode(hConIn, &dwMode)) { + // Set console input codepage to UTF16 + _setmode(_fileno(stdin), _O_WTEXT); + + // Set ICANON (ENABLE_LINE_INPUT) and ECHO (ENABLE_ECHO_INPUT) + if (simple_io) { + dwMode |= ENABLE_LINE_INPUT | ENABLE_ECHO_INPUT; + } else { + dwMode &= ~(ENABLE_LINE_INPUT | ENABLE_ECHO_INPUT); + } + if (!SetConsoleMode(hConIn, dwMode)) { + simple_io = true; + } + } + if (simple_io) { + _setmode(_fileno(stdin), _O_U8TEXT); + } +#else + // POSIX-specific console initialization + if (!simple_io) { + struct termios new_termios; + tcgetattr(STDIN_FILENO, &initial_state); + new_termios = initial_state; + new_termios.c_lflag &= ~(ICANON | ECHO); + new_termios.c_cc[VMIN] = 1; + new_termios.c_cc[VTIME] = 0; + tcsetattr(STDIN_FILENO, TCSANOW, &new_termios); + + tty = fopen("/dev/tty", "w+"); + if (tty != nullptr) { + out = tty; + } + } + + setlocale(LC_ALL, ""); +#endif + } + + void cleanup() { + // Reset console display + set_display(reset); + +#if !defined(_WIN32) + // Restore settings on POSIX systems + if (!simple_io) { + if (tty != nullptr) { + out = stdout; + fclose(tty); + tty = nullptr; + } + tcsetattr(STDIN_FILENO, TCSANOW, &initial_state); + } +#endif + } + + // + // Display and IO + // + + // Keep track of current display and only emit ANSI code if it changes + void set_display(display_t display) { + if (advanced_display && current_display != display) { + fflush(stdout); + switch(display) { + case reset: + fprintf(out, ANSI_COLOR_RESET); + break; + case prompt: + fprintf(out, ANSI_COLOR_YELLOW); + break; + case user_input: + fprintf(out, ANSI_BOLD ANSI_COLOR_GREEN); + break; + case error: + fprintf(out, ANSI_BOLD ANSI_COLOR_RED); + } + current_display = display; + fflush(out); + } + } + + static char32_t getchar32() { +#if defined(_WIN32) + HANDLE hConsole = GetStdHandle(STD_INPUT_HANDLE); + wchar_t high_surrogate = 0; + + while (true) { + INPUT_RECORD record; + DWORD count; + if (!ReadConsoleInputW(hConsole, &record, 1, &count) || count == 0) { + return WEOF; + } + + if (record.EventType == KEY_EVENT && record.Event.KeyEvent.bKeyDown) { + wchar_t wc = record.Event.KeyEvent.uChar.UnicodeChar; + if (wc == 0) { + continue; + } + + if ((wc >= 0xD800) && (wc <= 0xDBFF)) { // Check if wc is a high surrogate + high_surrogate = wc; + continue; + } + if ((wc >= 0xDC00) && (wc <= 0xDFFF)) { // Check if wc is a low surrogate + if (high_surrogate != 0) { // Check if we have a high surrogate + return ((high_surrogate - 0xD800) << 10) + (wc - 0xDC00) + 0x10000; + } + } + + high_surrogate = 0; // Reset the high surrogate + return static_cast(wc); + } + } +#else + wchar_t wc = getwchar(); + if (static_cast(wc) == WEOF) { + return WEOF; + } + +#if WCHAR_MAX == 0xFFFF + if ((wc >= 0xD800) && (wc <= 0xDBFF)) { // Check if wc is a high surrogate + wchar_t low_surrogate = getwchar(); + if ((low_surrogate >= 0xDC00) && (low_surrogate <= 0xDFFF)) { // Check if the next wchar is a low surrogate + return (static_cast(wc & 0x03FF) << 10) + (low_surrogate & 0x03FF) + 0x10000; + } + } + if ((wc >= 0xD800) && (wc <= 0xDFFF)) { // Invalid surrogate pair + return 0xFFFD; // Return the replacement character U+FFFD + } +#endif + + return static_cast(wc); +#endif + } + + static void pop_cursor() { +#if defined(_WIN32) + if (hConsole != NULL) { + CONSOLE_SCREEN_BUFFER_INFO bufferInfo; + GetConsoleScreenBufferInfo(hConsole, &bufferInfo); + + COORD newCursorPosition = bufferInfo.dwCursorPosition; + if (newCursorPosition.X == 0) { + newCursorPosition.X = bufferInfo.dwSize.X - 1; + newCursorPosition.Y -= 1; + } else { + newCursorPosition.X -= 1; + } + + SetConsoleCursorPosition(hConsole, newCursorPosition); + return; + } +#endif + putc('\b', out); + } + + static int estimateWidth(char32_t codepoint) { +#if defined(_WIN32) + (void)codepoint; + return 1; +#else + return wcwidth(codepoint); +#endif + } + + static int put_codepoint(const char* utf8_codepoint, size_t length, int expectedWidth) { +#if defined(_WIN32) + CONSOLE_SCREEN_BUFFER_INFO bufferInfo; + if (!GetConsoleScreenBufferInfo(hConsole, &bufferInfo)) { + // go with the default + return expectedWidth; + } + COORD initialPosition = bufferInfo.dwCursorPosition; + DWORD nNumberOfChars = length; + WriteConsole(hConsole, utf8_codepoint, nNumberOfChars, &nNumberOfChars, NULL); + + CONSOLE_SCREEN_BUFFER_INFO newBufferInfo; + GetConsoleScreenBufferInfo(hConsole, &newBufferInfo); + + // Figure out our real position if we're in the last column + if (utf8_codepoint[0] != 0x09 && initialPosition.X == newBufferInfo.dwSize.X - 1) { + DWORD nNumberOfChars; + WriteConsole(hConsole, &" \b", 2, &nNumberOfChars, NULL); + GetConsoleScreenBufferInfo(hConsole, &newBufferInfo); + } + + int width = newBufferInfo.dwCursorPosition.X - initialPosition.X; + if (width < 0) { + width += newBufferInfo.dwSize.X; + } + return width; +#else + // We can trust expectedWidth if we've got one + if (expectedWidth >= 0 || tty == nullptr) { + fwrite(utf8_codepoint, length, 1, out); + return expectedWidth; + } + + fputs("\033[6n", tty); // Query cursor position + int x1; + int y1; + int x2; + int y2; + int results = 0; + results = fscanf(tty, "\033[%d;%dR", &y1, &x1); + + fwrite(utf8_codepoint, length, 1, tty); + + fputs("\033[6n", tty); // Query cursor position + results += fscanf(tty, "\033[%d;%dR", &y2, &x2); + + if (results != 4) { + return expectedWidth; + } + + int width = x2 - x1; + if (width < 0) { + // Calculate the width considering text wrapping + struct winsize w; + ioctl(STDOUT_FILENO, TIOCGWINSZ, &w); + width += w.ws_col; + } + return width; +#endif + } + + static void replace_last(char ch) { +#if defined(_WIN32) + pop_cursor(); + put_codepoint(&ch, 1, 1); +#else + fprintf(out, "\b%c", ch); +#endif + } + + static void append_utf8(char32_t ch, std::string & out) { + if (ch <= 0x7F) { + out.push_back(static_cast(ch)); + } else if (ch <= 0x7FF) { + out.push_back(static_cast(0xC0 | ((ch >> 6) & 0x1F))); + out.push_back(static_cast(0x80 | (ch & 0x3F))); + } else if (ch <= 0xFFFF) { + out.push_back(static_cast(0xE0 | ((ch >> 12) & 0x0F))); + out.push_back(static_cast(0x80 | ((ch >> 6) & 0x3F))); + out.push_back(static_cast(0x80 | (ch & 0x3F))); + } else if (ch <= 0x10FFFF) { + out.push_back(static_cast(0xF0 | ((ch >> 18) & 0x07))); + out.push_back(static_cast(0x80 | ((ch >> 12) & 0x3F))); + out.push_back(static_cast(0x80 | ((ch >> 6) & 0x3F))); + out.push_back(static_cast(0x80 | (ch & 0x3F))); + } else { + // Invalid Unicode code point + } + } + + // Helper function to remove the last UTF-8 character from a string + static void pop_back_utf8_char(std::string & line) { + if (line.empty()) { + return; + } + + size_t pos = line.length() - 1; + + // Find the start of the last UTF-8 character (checking up to 4 bytes back) + for (size_t i = 0; i < 3 && pos > 0; ++i, --pos) { + if ((line[pos] & 0xC0) != 0x80) { + break; // Found the start of the character + } + } + line.erase(pos); + } + + static bool readline_advanced(std::string & line, bool multiline_input) { + if (out != stdout) { + fflush(stdout); + } + + line.clear(); + std::vector widths; + bool is_special_char = false; + bool end_of_stream = false; + + char32_t input_char; + while (true) { + fflush(out); // Ensure all output is displayed before waiting for input + input_char = getchar32(); + + if (input_char == '\r' || input_char == '\n') { + break; + } + + if (input_char == (char32_t) WEOF || input_char == 0x04 /* Ctrl+D*/) { + end_of_stream = true; + break; + } + + if (is_special_char) { + set_display(user_input); + replace_last(line.back()); + is_special_char = false; + } + + if (input_char == '\033') { // Escape sequence + char32_t code = getchar32(); + if (code == '[' || code == 0x1B) { + // Discard the rest of the escape sequence + while ((code = getchar32()) != (char32_t) WEOF) { + if ((code >= 'A' && code <= 'Z') || (code >= 'a' && code <= 'z') || code == '~') { + break; + } + } + } + } else if (input_char == 0x08 || input_char == 0x7F) { // Backspace + if (!widths.empty()) { + int count; + do { + count = widths.back(); + widths.pop_back(); + // Move cursor back, print space, and move cursor back again + for (int i = 0; i < count; i++) { + replace_last(' '); + pop_cursor(); + } + pop_back_utf8_char(line); + } while (count == 0 && !widths.empty()); + } + } else { + int offset = line.length(); + append_utf8(input_char, line); + int width = put_codepoint(line.c_str() + offset, line.length() - offset, estimateWidth(input_char)); + if (width < 0) { + width = 0; + } + widths.push_back(width); + } + + if (!line.empty() && (line.back() == '\\' || line.back() == '/')) { + set_display(prompt); + replace_last(line.back()); + is_special_char = true; + } + } + + bool has_more = multiline_input; + if (is_special_char) { + replace_last(' '); + pop_cursor(); + + char last = line.back(); + line.pop_back(); + if (last == '\\') { + line += '\n'; + fputc('\n', out); + has_more = !has_more; + } else { + // llama will just eat the single space, it won't act as a space + if (line.length() == 1 && line.back() == ' ') { + line.clear(); + pop_cursor(); + } + has_more = false; + } + } else { + if (end_of_stream) { + has_more = false; + } else { + line += '\n'; + fputc('\n', out); + } + } + + fflush(out); + return has_more; + } + + static bool readline_simple(std::string & line, bool multiline_input) { +#if defined(_WIN32) + std::wstring wline; + if (!std::getline(std::wcin, wline)) { + // Input stream is bad or EOF received + line.clear(); + GenerateConsoleCtrlEvent(CTRL_C_EVENT, 0); + return false; + } + + int size_needed = WideCharToMultiByte(CP_UTF8, 0, &wline[0], (int)wline.size(), NULL, 0, NULL, NULL); + line.resize(size_needed); + WideCharToMultiByte(CP_UTF8, 0, &wline[0], (int)wline.size(), &line[0], size_needed, NULL, NULL); +#else + if (!std::getline(std::cin, line)) { + // Input stream is bad or EOF received + line.clear(); + return false; + } +#endif + if (!line.empty()) { + char last = line.back(); + if (last == '/') { // Always return control on '/' symbol + line.pop_back(); + return false; + } + if (last == '\\') { // '\\' changes the default action + line.pop_back(); + multiline_input = !multiline_input; + } + } + line += '\n'; + + // By default, continue input if multiline_input is set + return multiline_input; + } + + bool readline(std::string & line, bool multiline_input) { + set_display(user_input); + + if (simple_io) { + return readline_simple(line, multiline_input); + } + return readline_advanced(line, multiline_input); + } + +} diff --git a/llama.cpp/common/console.h b/llama.cpp/common/console.h new file mode 100644 index 0000000000000000000000000000000000000000..ec175269b9d8af48803d0b6e618d008a9ab99b4d --- /dev/null +++ b/llama.cpp/common/console.h @@ -0,0 +1,19 @@ +// Console functions + +#pragma once + +#include + +namespace console { + enum display_t { + reset = 0, + prompt, + user_input, + error + }; + + void init(bool use_simple_io, bool use_advanced_display); + void cleanup(); + void set_display(display_t display); + bool readline(std::string & line, bool multiline_input); +} diff --git a/llama.cpp/common/json-schema-to-grammar.cpp b/llama.cpp/common/json-schema-to-grammar.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3ebcc3d9fbbf8fe718c66a4f8a348b54bbae8f1a --- /dev/null +++ b/llama.cpp/common/json-schema-to-grammar.cpp @@ -0,0 +1,1025 @@ +#include "json-schema-to-grammar.h" +#include "common.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using json = nlohmann::ordered_json; + +static std::string build_repetition(const std::string & item_rule, int min_items, int max_items, const std::string & separator_rule = "") { + auto has_max = max_items != std::numeric_limits::max(); + + if (min_items == 0 && max_items == 1) { + return item_rule + "?"; + } + + if (separator_rule.empty()) { + if (min_items == 1 && !has_max) { + return item_rule + "+"; + } else if (min_items == 0 && !has_max) { + return item_rule + "*"; + } else { + return item_rule + "{" + std::to_string(min_items) + "," + (has_max ? std::to_string(max_items) : "") + "}"; + } + } + + auto result = item_rule + " " + build_repetition("(" + separator_rule + " " + item_rule + ")", min_items == 0 ? 0 : min_items - 1, has_max ? max_items - 1 : max_items); + if (min_items == 0) { + result = "(" + result + ")?"; + } + return result; +} + +/* Minimalistic replacement for std::string_view, which is only available from C++17 onwards */ +class string_view { + const std::string & _str; + const size_t _start; + const size_t _end; +public: + string_view(const std::string & str, size_t start = 0, size_t end = std::string::npos) : _str(str), _start(start), _end(end == std::string::npos ? str.length() : end) {} + + size_t size() const { + return _end - _start; + } + + size_t length() const { + return size(); + } + + operator std::string() const { + return str(); + } + + std::string str() const { + return _str.substr(_start, _end - _start); + } + + string_view substr(size_t pos, size_t len = std::string::npos) const { + return string_view(_str, _start + pos, len == std::string::npos ? _end : _start + pos + len); + } + + char operator[](size_t pos) const { + auto index = _start + pos; + if (index >= _end) { + throw std::out_of_range("string_view index out of range"); + } + return _str[_start + pos]; + } + + bool operator==(const string_view & other) const { + std::string this_str = *this; + std::string other_str = other; + return this_str == other_str; + } +}; + +static void _build_min_max_int(int min_value, int max_value, std::stringstream & out, int decimals_left = 16, bool top_level = true) { + auto has_min = min_value != std::numeric_limits::min(); + auto has_max = max_value != std::numeric_limits::max(); + + auto digit_range = [&](char from, char to) { + out << "["; + if (from == to) { + out << from; + } else { + out << from << "-" << to; + } + out << "]"; + }; + auto more_digits = [&](int min_digits, int max_digits) { + out << "[0-9]"; + if (min_digits == max_digits && min_digits == 1) { + return; + } + out << "{"; + out << min_digits; + if (max_digits != min_digits) { + out << ","; + if (max_digits != std::numeric_limits::max()) { + out << max_digits; + } + } + out << "}"; + }; + std::function uniform_range = + [&](const string_view & from, const string_view & to) { + size_t i = 0; + while (i < from.length() && i < to.length() && from[i] == to[i]) { + i++; + } + if (i > 0) { + out << "\"" << from.substr(0, i).str() << "\""; + } + if (i < from.length() && i < to.length()) { + if (i > 0) { + out << " "; + } + auto sub_len = from.length() - i - 1; + if (sub_len > 0) { + auto from_sub = from.substr(i + 1); + auto to_sub = to.substr(i + 1); + auto sub_zeros = string_repeat("0", sub_len); + auto sub_nines = string_repeat("9", sub_len); + + auto to_reached = false; + out << "("; + if (from_sub == sub_zeros) { + digit_range(from[i], to[i] - 1); + out << " "; + more_digits(sub_len, sub_len); + } else { + out << "[" << from[i] << "] "; + out << "("; + uniform_range(from_sub, sub_nines); + out << ")"; + if (from[i] < to[i] - 1) { + out << " | "; + if (to_sub == sub_nines) { + digit_range(from[i] + 1, to[i]); + to_reached = true; + } else { + digit_range(from[i] + 1, to[i] - 1); + } + out << " "; + more_digits(sub_len, sub_len); + } + } + if (!to_reached) { + out << " | "; + digit_range(to[i], to[i]); + out << " "; + uniform_range(sub_zeros, to_sub); + } + out << ")"; + } else { + out << "[" << from[i] << "-" << to[i] << "]"; + } + } + }; + + if (has_min && has_max) { + if (min_value < 0 && max_value < 0) { + out << "\"-\" ("; + _build_min_max_int(-max_value, -min_value, out, decimals_left, /* top_level= */ true); + out << ")"; + return; + } + + if (min_value < 0) { + out << "\"-\" ("; + _build_min_max_int(0, -min_value, out, decimals_left, /* top_level= */ true); + out << ") | "; + min_value = 0; + } + + auto min_s = std::to_string(min_value); + auto max_s = std::to_string(max_value); + auto min_digits = min_s.length(); + auto max_digits = max_s.length(); + + for (auto digits = min_digits; digits < max_digits; digits++) { + uniform_range(min_s, string_repeat("9", digits)); + min_s = "1" + string_repeat("0", digits); + out << " | "; + } + uniform_range(min_s, max_s); + return; + } + + auto less_decimals = std::max(decimals_left - 1, 1); + + if (has_min) { + if (min_value < 0) { + out << "\"-\" ("; + _build_min_max_int(std::numeric_limits::min(), -min_value, out, decimals_left, /* top_level= */ false); + out << ") | [0] | [1-9] "; + more_digits(0, decimals_left - 1); + } else if (min_value == 0) { + if (top_level) { + out << "[0] | [1-9] "; + more_digits(0, less_decimals); + } else { + more_digits(1, decimals_left); + } + } else if (min_value <= 9) { + char c = '0' + min_value; + auto range_start = top_level ? '1' : '0'; + if (c > range_start) { + digit_range(range_start, c - 1); + out << " "; + more_digits(1, less_decimals); + out << " | "; + } + digit_range(c, '9'); + out << " "; + more_digits(0, less_decimals); + } else { + auto min_s = std::to_string(min_value); + auto len = min_s.length(); + auto c = min_s[0]; + + if (c > '1') { + digit_range(top_level ? '1' : '0', c - 1); + out << " "; + more_digits(len, less_decimals); + out << " | "; + } + digit_range(c, c); + out << " ("; + _build_min_max_int(std::stoi(min_s.substr(1)), std::numeric_limits::max(), out, less_decimals, /* top_level= */ false); + out << ")"; + if (c < '9') { + out << " | "; + digit_range(c + 1, '9'); + out << " "; + more_digits(len - 1, less_decimals); + } + } + return; + } + + if (has_max) { + if (max_value >= 0) { + if (top_level) { + out << "\"-\" [1-9] "; + more_digits(0, less_decimals); + out << " | "; + } + _build_min_max_int(0, max_value, out, decimals_left, /* top_level= */ true); + } else { + out << "\"-\" ("; + _build_min_max_int(-max_value, std::numeric_limits::max(), out, decimals_left, /* top_level= */ false); + out << ")"; + } + return; + } + + throw std::runtime_error("At least one of min_value or max_value must be set"); +} + +const std::string SPACE_RULE = "| \" \" | \"\\n\" [ \\t]{0,20}"; + +struct BuiltinRule { + std::string content; + std::vector deps; +}; + +std::unordered_map PRIMITIVE_RULES = { + {"boolean", {"(\"true\" | \"false\") space", {}}}, + {"decimal-part", {"[0-9]{1,16}", {}}}, + {"integral-part", {"[0] | [1-9] [0-9]{0,15}", {}}}, + {"number", {"(\"-\"? integral-part) (\".\" decimal-part)? ([eE] [-+]? integral-part)? space", {"integral-part", "decimal-part"}}}, + {"integer", {"(\"-\"? integral-part) space", {"integral-part"}}}, + {"value", {"object | array | string | number | boolean | null", {"object", "array", "string", "number", "boolean", "null"}}}, + {"object", {"\"{\" space ( string \":\" space value (\",\" space string \":\" space value)* )? \"}\" space", {"string", "value"}}}, + {"array", {"\"[\" space ( value (\",\" space value)* )? \"]\" space", {"value"}}}, + {"uuid", {"\"\\\"\" [0-9a-fA-F]{8} \"-\" [0-9a-fA-F]{4} \"-\" [0-9a-fA-F]{4} \"-\" [0-9a-fA-F]{4} \"-\" [0-9a-fA-F]{12} \"\\\"\" space", {}}}, + {"char", {"[^\"\\\\\\x7F\\x00-\\x1F] | [\\\\] ([\"\\\\bfnrt] | \"u\" [0-9a-fA-F]{4})", {}}}, + {"string", {"\"\\\"\" char* \"\\\"\" space", {"char"}}}, + {"null", {"\"null\" space", {}}}, +}; + +std::unordered_map STRING_FORMAT_RULES = { + {"date", {"[0-9]{4} \"-\" ( \"0\" [1-9] | \"1\" [0-2] ) \"-\" ( \"0\" [1-9] | [1-2] [0-9] | \"3\" [0-1] )", {}}}, + {"time", {"([01] [0-9] | \"2\" [0-3]) \":\" [0-5] [0-9] \":\" [0-5] [0-9] ( \".\" [0-9]{3} )? ( \"Z\" | ( \"+\" | \"-\" ) ( [01] [0-9] | \"2\" [0-3] ) \":\" [0-5] [0-9] )", {}}}, + {"date-time", {"date \"T\" time", {"date", "time"}}}, + {"date-string", {"\"\\\"\" date \"\\\"\" space", {"date"}}}, + {"time-string", {"\"\\\"\" time \"\\\"\" space", {"time"}}}, + {"date-time-string", {"\"\\\"\" date-time \"\\\"\" space", {"date-time"}}} +}; + +static bool is_reserved_name(const std::string & name) { + static std::unordered_set RESERVED_NAMES; + if (RESERVED_NAMES.empty()) { + RESERVED_NAMES.insert("root"); + for (const auto &p : PRIMITIVE_RULES) RESERVED_NAMES.insert(p.first); + for (const auto &p : STRING_FORMAT_RULES) RESERVED_NAMES.insert(p.first); + } + return RESERVED_NAMES.find(name) != RESERVED_NAMES.end(); +} + +std::regex INVALID_RULE_CHARS_RE("[^a-zA-Z0-9-]+"); +std::regex GRAMMAR_LITERAL_ESCAPE_RE("[\r\n\"]"); +std::regex GRAMMAR_RANGE_LITERAL_ESCAPE_RE("[\r\n\"\\]\\-\\\\]"); +std::unordered_map GRAMMAR_LITERAL_ESCAPES = { + {'\r', "\\r"}, {'\n', "\\n"}, {'"', "\\\""}, {'-', "\\-"}, {']', "\\]"} +}; + +std::unordered_set NON_LITERAL_SET = {'|', '.', '(', ')', '[', ']', '{', '}', '*', '+', '?'}; +std::unordered_set ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = {'^', '$', '.', '[', ']', '(', ')', '|', '{', '}', '*', '+', '?'}; + +static std::string replacePattern(const std::string & input, const std::regex & regex, const std::function & replacement) { + std::smatch match; + std::string result; + + std::string::const_iterator searchStart(input.cbegin()); + std::string::const_iterator searchEnd(input.cend()); + + while (std::regex_search(searchStart, searchEnd, match, regex)) { + result.append(searchStart, searchStart + match.position()); + result.append(replacement(match)); + searchStart = match.suffix().first; + } + + result.append(searchStart, searchEnd); + + return result; +} + +static std::string format_literal(const std::string & literal) { + std::string escaped = replacePattern(literal, GRAMMAR_LITERAL_ESCAPE_RE, [&](const std::smatch & match) { + char c = match.str()[0]; + return GRAMMAR_LITERAL_ESCAPES.at(c); + }); + return "\"" + escaped + "\""; +} + +class SchemaConverter { +private: + friend std::string build_grammar(const std::function & cb, const common_grammar_options & options); + std::function _fetch_json; + bool _dotall; + std::map _rules; + std::unordered_map _refs; + std::unordered_set _refs_being_resolved; + std::vector _errors; + std::vector _warnings; + + std::string _add_rule(const std::string & name, const std::string & rule) { + std::string esc_name = regex_replace(name, INVALID_RULE_CHARS_RE, "-"); + if (_rules.find(esc_name) == _rules.end() || _rules[esc_name] == rule) { + _rules[esc_name] = rule; + return esc_name; + } else { + int i = 0; + while (_rules.find(esc_name + std::to_string(i)) != _rules.end() && _rules[esc_name + std::to_string(i)] != rule) { + i++; + } + std::string key = esc_name + std::to_string(i); + _rules[key] = rule; + return key; + } + } + + std::string _generate_union_rule(const std::string & name, const std::vector & alt_schemas) { + std::vector rules; + for (size_t i = 0; i < alt_schemas.size(); i++) { + rules.push_back(visit(alt_schemas[i], name + (name.empty() ? "alternative-" : "-") + std::to_string(i))); + } + return string_join(rules, " | "); + } + + std::string _visit_pattern(const std::string & pattern, const std::string & name) { + if (!(pattern.front() == '^' && pattern.back() == '$')) { + _errors.push_back("Pattern must start with '^' and end with '$'"); + return ""; + } + std::string sub_pattern = pattern.substr(1, pattern.length() - 2); + std::unordered_map sub_rule_ids; + + size_t i = 0; + size_t length = sub_pattern.length(); + + using literal_or_rule = std::pair; + auto to_rule = [&](const literal_or_rule & ls) { + auto is_literal = ls.second; + auto s = ls.first; + return is_literal ? "\"" + s + "\"" : s; + }; + std::function transform = [&]() -> literal_or_rule { + size_t start = i; + std::vector seq; + + auto get_dot = [&]() { + std::string rule; + if (_dotall) { + rule = "[\\U00000000-\\U0010FFFF]"; + } else { + rule = "[^\\x0A\\x0D]"; + } + return _add_rule("dot", rule); + }; + + // Joins the sequence, merging consecutive literals together. + auto join_seq = [&]() { + std::vector ret; + + std::string literal; + auto flush_literal = [&]() { + if (literal.empty()) { + return false; + } + ret.emplace_back(literal, true); + literal.clear(); + return true; + }; + + for (const auto & item : seq) { + auto is_literal = item.second; + if (is_literal) { + literal += item.first; + } else { + flush_literal(); + ret.push_back(item); + } + } + flush_literal(); + + std::vector results; + for (const auto & item : ret) { + results.push_back(to_rule(item)); + } + return std::make_pair(string_join(results, " "), false); + }; + + while (i < length) { + char c = sub_pattern[i]; + if (c == '.') { + seq.emplace_back(get_dot(), false); + i++; + } else if (c == '(') { + i++; + if (i < length) { + if (sub_pattern[i] == '?') { + _warnings.push_back("Unsupported pattern syntax"); + } + } + seq.emplace_back("(" + to_rule(transform()) + ")", false); + } else if (c == ')') { + i++; + if (start > 0 && sub_pattern[start - 1] != '(') { + _errors.push_back("Unbalanced parentheses"); + } + return join_seq(); + } else if (c == '[') { + std::string square_brackets = std::string(1, c); + i++; + while (i < length && sub_pattern[i] != ']') { + if (sub_pattern[i] == '\\') { + square_brackets += sub_pattern.substr(i, 2); + i += 2; + } else { + square_brackets += sub_pattern[i]; + i++; + } + } + if (i >= length) { + _errors.push_back("Unbalanced square brackets"); + } + square_brackets += ']'; + i++; + seq.emplace_back(square_brackets, false); + } else if (c == '|') { + seq.emplace_back("|", false); + i++; + } else if (c == '*' || c == '+' || c == '?') { + seq.back() = std::make_pair(to_rule(seq.back()) + c, false); + i++; + } else if (c == '{') { + std::string curly_brackets = std::string(1, c); + i++; + while (i < length && sub_pattern[i] != '}') { + curly_brackets += sub_pattern[i]; + i++; + } + if (i >= length) { + _errors.push_back("Unbalanced curly brackets"); + } + curly_brackets += '}'; + i++; + auto nums = string_split(curly_brackets.substr(1, curly_brackets.length() - 2), ","); + int min_times = 0; + int max_times = std::numeric_limits::max(); + try { + if (nums.size() == 1) { + min_times = max_times = std::stoi(nums[0]); + } else if (nums.size() != 2) { + _errors.push_back("Wrong number of values in curly brackets"); + } else { + if (!nums[0].empty()) { + min_times = std::stoi(nums[0]); + } + if (!nums[1].empty()) { + max_times = std::stoi(nums[1]); + } + } + } catch (const std::invalid_argument & e) { + _errors.push_back("Invalid number in curly brackets"); + return std::make_pair("", false); + } + auto &last = seq.back(); + auto &sub = last.first; + auto sub_is_literal = last.second; + + if (!sub_is_literal) { + std::string & sub_id = sub_rule_ids[sub]; + if (sub_id.empty()) { + sub_id = _add_rule(name + "-" + std::to_string(sub_rule_ids.size()), sub); + } + sub = sub_id; + } + seq.back().first = build_repetition( + sub_is_literal ? "\"" + sub + "\"" : sub, + min_times, + max_times, + "" + ); + seq.back().second = false; + } else { + std::string literal; + auto is_non_literal = [&](char c) { + return NON_LITERAL_SET.find(c) != NON_LITERAL_SET.end(); + }; + while (i < length) { + if (sub_pattern[i] == '\\' && i < length - 1) { + char next = sub_pattern[i + 1]; + if (ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS.find(next) != ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS.end()) { + i++; + literal += sub_pattern[i]; + i++; + } else { + literal += sub_pattern.substr(i, 2); + i += 2; + } + } else if (sub_pattern[i] == '"') { + literal += "\\\""; + i++; + } else if (!is_non_literal(sub_pattern[i]) && + (i == length - 1 || literal.empty() || sub_pattern[i + 1] == '.' || !is_non_literal(sub_pattern[i + 1]))) { + literal += sub_pattern[i]; + i++; + } else { + break; + } + } + if (!literal.empty()) { + seq.emplace_back(literal, true); + } + } + } + return join_seq(); + }; + return _add_rule(name, "\"\\\"\" (" + to_rule(transform()) + ") \"\\\"\" space"); + } + + /* + Returns a rule that matches a JSON string that is none of the provided strings + + not_strings({"a"}) + -> ["] ( [a] char+ | [^"a] char* )? ["] space + not_strings({"and", "also"}) + -> ["] ( [a] ([l] ([s] ([o] char+ | [^"o] char*) | [^"s] char*) | [n] ([d] char+ | [^"d] char*) | [^"ln] char*) | [^"a] char* )? ["] space + */ + std::string _not_strings(const std::vector & strings) { + + struct TrieNode { + std::map children; + bool is_end_of_string; + + TrieNode() : is_end_of_string(false) {} + + void insert(const std::string & string) { + auto node = this; + for (char c : string) { + node = &node->children[c]; + } + node->is_end_of_string = true; + } + }; + + TrieNode trie; + for (const auto & s : strings) { + trie.insert(s); + } + + std::string char_rule = _add_primitive("char", PRIMITIVE_RULES.at("char")); + std::ostringstream out; + out << "[\"] ( "; + std::function visit = [&](const TrieNode & node) { + std::ostringstream rejects; + auto first = true; + for (const auto & kv : node.children) { + rejects << kv.first; + if (first) { + first = false; + } else { + out << " | "; + } + out << "[" << kv.first << "]"; + if (!kv.second.children.empty()) { + out << " ("; + visit(kv.second); + out << ")"; + } else if (kv.second.is_end_of_string) { + out << " " << char_rule << "+"; + } + } + if (!node.children.empty()) { + if (!first) { + out << " | "; + } + out << "[^\"" << rejects.str() << "] " << char_rule << "*"; + } + }; + visit(trie); + + out << " )"; + if (!trie.is_end_of_string) { + out << "?"; + } + out << " [\"] space"; + return out.str(); + } + + std::string _resolve_ref(const std::string & ref) { + std::string ref_name = ref.substr(ref.find_last_of('/') + 1); + if (_rules.find(ref_name) == _rules.end() && _refs_being_resolved.find(ref) == _refs_being_resolved.end()) { + _refs_being_resolved.insert(ref); + json resolved = _refs[ref]; + ref_name = visit(resolved, ref_name); + _refs_being_resolved.erase(ref); + } + return ref_name; + } + + std::string _build_object_rule( + const std::vector> & properties, + const std::unordered_set & required, + const std::string & name, + const json & additional_properties) + { + std::vector required_props; + std::vector optional_props; + std::unordered_map prop_kv_rule_names; + std::vector prop_names; + for (const auto & kv : properties) { + const auto &prop_name = kv.first; + const auto &prop_schema = kv.second; + + std::string prop_rule_name = visit(prop_schema, name + (name.empty() ? "" : "-") + prop_name); + prop_kv_rule_names[prop_name] = _add_rule( + name + (name.empty() ? "" : "-") + prop_name + "-kv", + format_literal(json(prop_name).dump()) + " space \":\" space " + prop_rule_name + ); + if (required.find(prop_name) != required.end()) { + required_props.push_back(prop_name); + } else { + optional_props.push_back(prop_name); + } + prop_names.push_back(prop_name); + } + if ((additional_properties.is_boolean() && additional_properties.get()) || additional_properties.is_object()) { + std::string sub_name = name + (name.empty() ? "" : "-") + "additional"; + std::string value_rule = + additional_properties.is_object() ? visit(additional_properties, sub_name + "-value") + : _add_primitive("value", PRIMITIVE_RULES.at("value")); + + auto key_rule = + prop_names.empty() ? _add_primitive("string", PRIMITIVE_RULES.at("string")) + : _add_rule(sub_name + "-k", _not_strings(prop_names)); + std::string kv_rule = _add_rule(sub_name + "-kv", key_rule + " \":\" space " + value_rule); + prop_kv_rule_names["*"] = kv_rule; + optional_props.push_back("*"); + } + + std::string rule = "\"{\" space "; + for (size_t i = 0; i < required_props.size(); i++) { + if (i > 0) { + rule += " \",\" space "; + } + rule += prop_kv_rule_names[required_props[i]]; + } + + if (!optional_props.empty()) { + rule += " ("; + if (!required_props.empty()) { + rule += " \",\" space ( "; + } + + std::function &, bool)> get_recursive_refs = [&](const std::vector & ks, bool first_is_optional) { + std::string res; + if (ks.empty()) { + return res; + } + std::string k = ks[0]; + std::string kv_rule_name = prop_kv_rule_names[k]; + std::string comma_ref = "( \",\" space " + kv_rule_name + " )"; + if (first_is_optional) { + res = comma_ref + (k == "*" ? "*" : "?"); + } else { + res = kv_rule_name + (k == "*" ? " " + comma_ref + "*" : ""); + } + if (ks.size() > 1) { + res += " " + _add_rule( + name + (name.empty() ? "" : "-") + k + "-rest", + get_recursive_refs(std::vector(ks.begin() + 1, ks.end()), true) + ); + } + return res; + }; + + for (size_t i = 0; i < optional_props.size(); i++) { + if (i > 0) { + rule += " | "; + } + rule += get_recursive_refs(std::vector(optional_props.begin() + i, optional_props.end()), false); + } + if (!required_props.empty()) { + rule += " )"; + } + rule += " )?"; + } + + rule += " \"}\" space"; + + return rule; + } + + std::string _add_primitive(const std::string & name, const BuiltinRule & rule) { + auto n = _add_rule(name, rule.content); + for (const auto & dep : rule.deps) { + BuiltinRule dep_rule; + auto it = PRIMITIVE_RULES.find(dep); + if (it == PRIMITIVE_RULES.end()) { + it = STRING_FORMAT_RULES.find(dep); + if (it == STRING_FORMAT_RULES.end()) { + _errors.push_back("Rule " + dep + " not known"); + continue; + } + } + if (_rules.find(dep) == _rules.end()) { + _add_primitive(dep, it->second); + } + } + return n; + } + +public: + SchemaConverter( + const std::function & fetch_json, + bool dotall, + bool compact_spaces) + : _fetch_json(fetch_json), _dotall(dotall) + { + _rules["space"] = compact_spaces ? "\" \"?" : SPACE_RULE; + } + + void resolve_refs(json & schema, const std::string & url) { + /* + * Resolves all $ref fields in the given schema, fetching any remote schemas, + * replacing each $ref with absolute reference URL and populates _refs with the + * respective referenced (sub)schema dictionaries. + */ + std::function visit_refs = [&](json & n) { + if (n.is_array()) { + for (auto & x : n) { + visit_refs(x); + } + } else if (n.is_object()) { + if (n.contains("$ref")) { + std::string ref = n["$ref"]; + if (_refs.find(ref) == _refs.end()) { + json target; + if (ref.find("https://") == 0) { + std::string base_url = ref.substr(0, ref.find('#')); + auto it = _refs.find(base_url); + if (it != _refs.end()) { + target = it->second; + } else { + // Fetch the referenced schema and resolve its refs + auto referenced = _fetch_json(ref); + resolve_refs(referenced, base_url); + _refs[base_url] = referenced; + } + if (ref.find('#') == std::string::npos || ref.substr(ref.find('#') + 1).empty()) { + return; + } + } else if (ref.find("#/") == 0) { + target = schema; + n["$ref"] = url + ref; + ref = url + ref; + } else { + _errors.push_back("Unsupported ref: " + ref); + return; + } + std::string pointer = ref.substr(ref.find('#') + 1); + std::vector tokens = string_split(pointer, "/"); + for (size_t i = 1; i < tokens.size(); ++i) { + std::string sel = tokens[i]; + if (target.is_null() || !target.contains(sel)) { + _errors.push_back("Error resolving ref " + ref + ": " + sel + " not in " + target.dump()); + return; + } + target = target[sel]; + } + _refs[ref] = target; + } + } else { + for (auto & kv : n.items()) { + visit_refs(kv.value()); + } + } + } + }; + + visit_refs(schema); + } + + std::string _generate_constant_rule(const json & value) { + return format_literal(value.dump()); + } + + std::string visit(const json & schema, const std::string & name) { + json schema_type = schema.contains("type") ? schema["type"] : json(); + std::string schema_format = schema.contains("format") ? schema["format"].get() : ""; + std::string rule_name = is_reserved_name(name) ? name + "-" : name.empty() ? "root" : name; + + if (schema.contains("$ref")) { + return _add_rule(rule_name, _resolve_ref(schema["$ref"])); + } else if (schema.contains("oneOf") || schema.contains("anyOf")) { + std::vector alt_schemas = schema.contains("oneOf") ? schema["oneOf"].get>() : schema["anyOf"].get>(); + return _add_rule(rule_name, _generate_union_rule(name, alt_schemas)); + } else if (schema_type.is_array()) { + std::vector schema_types; + for (const auto & t : schema_type) { + json schema_copy(schema); + schema_copy["type"] = t; + schema_types.push_back(schema_copy); + } + return _add_rule(rule_name, _generate_union_rule(name, schema_types)); + } else if (schema.contains("const")) { + return _add_rule(rule_name, _generate_constant_rule(schema["const"]) + " space"); + } else if (schema.contains("enum")) { + std::vector enum_values; + for (const auto & v : schema["enum"]) { + enum_values.push_back(_generate_constant_rule(v)); + } + return _add_rule(rule_name, "(" + string_join(enum_values, " | ") + ") space"); + } else if ((schema_type.is_null() || schema_type == "object") + && (schema.contains("properties") || + (schema.contains("additionalProperties") && schema["additionalProperties"] != true))) { + std::unordered_set required; + if (schema.contains("required") && schema["required"].is_array()) { + for (const auto & item : schema["required"]) { + if (item.is_string()) { + required.insert(item.get()); + } + } + } + std::vector> properties; + if (schema.contains("properties")) { + for (const auto & prop : schema["properties"].items()) { + properties.emplace_back(prop.key(), prop.value()); + } + } + return _add_rule(rule_name, + _build_object_rule( + properties, required, name, + schema.contains("additionalProperties") ? schema["additionalProperties"] : json())); + } else if ((schema_type.is_null() || schema_type == "object") && schema.contains("allOf")) { + std::unordered_set required; + std::vector> properties; + std::string hybrid_name = name; + std::function add_component = [&](const json & comp_schema, bool is_required) { + if (comp_schema.contains("$ref")) { + add_component(_refs[comp_schema["$ref"]], is_required); + } else if (comp_schema.contains("properties")) { + for (const auto & prop : comp_schema["properties"].items()) { + properties.emplace_back(prop.key(), prop.value()); + if (is_required) { + required.insert(prop.key()); + } + } + } else { + // todo warning + } + }; + for (auto & t : schema["allOf"]) { + if (t.contains("anyOf")) { + for (auto & tt : t["anyOf"]) { + add_component(tt, false); + } + } else { + add_component(t, true); + } + } + return _add_rule(rule_name, _build_object_rule(properties, required, hybrid_name, json())); + } else if ((schema_type.is_null() || schema_type == "array") && (schema.contains("items") || schema.contains("prefixItems"))) { + json items = schema.contains("items") ? schema["items"] : schema["prefixItems"]; + if (items.is_array()) { + std::string rule = "\"[\" space "; + for (size_t i = 0; i < items.size(); i++) { + if (i > 0) { + rule += " \",\" space "; + } + rule += visit(items[i], name + (name.empty() ? "" : "-") + "tuple-" + std::to_string(i)); + } + rule += " \"]\" space"; + return _add_rule(rule_name, rule); + } else { + std::string item_rule_name = visit(items, name + (name.empty() ? "" : "-") + "item"); + int min_items = schema.contains("minItems") ? schema["minItems"].get() : 0; + json max_items_json = schema.contains("maxItems") ? schema["maxItems"] : json(); + int max_items = max_items_json.is_number_integer() ? max_items_json.get() : std::numeric_limits::max(); + + return _add_rule(rule_name, "\"[\" space " + build_repetition(item_rule_name, min_items, max_items, "\",\" space") + " \"]\" space"); + } + } else if ((schema_type.is_null() || schema_type == "string") && schema.contains("pattern")) { + return _visit_pattern(schema["pattern"], rule_name); + } else if ((schema_type.is_null() || schema_type == "string") && std::regex_match(schema_format, std::regex("^uuid[1-5]?$"))) { + return _add_primitive(rule_name == "root" ? "root" : schema_format, PRIMITIVE_RULES.at("uuid")); + } else if ((schema_type.is_null() || schema_type == "string") && STRING_FORMAT_RULES.find(schema_format + "-string") != STRING_FORMAT_RULES.end()) { + auto prim_name = schema_format + "-string"; + return _add_rule(rule_name, _add_primitive(prim_name, STRING_FORMAT_RULES.at(prim_name))); + } else if (schema_type == "string" && (schema.contains("minLength") || schema.contains("maxLength"))) { + std::string char_rule = _add_primitive("char", PRIMITIVE_RULES.at("char")); + int min_len = schema.contains("minLength") ? schema["minLength"].get() : 0; + int max_len = schema.contains("maxLength") ? schema["maxLength"].get() : std::numeric_limits::max(); + return _add_rule(rule_name, "\"\\\"\" " + build_repetition(char_rule, min_len, max_len) + " \"\\\"\" space"); + } else if (schema_type == "integer" && (schema.contains("minimum") || schema.contains("exclusiveMinimum") || schema.contains("maximum") || schema.contains("exclusiveMaximum"))) { + int min_value = std::numeric_limits::min(); + int max_value = std::numeric_limits::max(); + if (schema.contains("minimum")) { + min_value = schema["minimum"].get(); + } else if (schema.contains("exclusiveMinimum")) { + min_value = schema["exclusiveMinimum"].get() + 1; + } + if (schema.contains("maximum")) { + max_value = schema["maximum"].get(); + } else if (schema.contains("exclusiveMaximum")) { + max_value = schema["exclusiveMaximum"].get() - 1; + } + std::stringstream out; + out << "("; + _build_min_max_int(min_value, max_value, out); + out << ") space"; + return _add_rule(rule_name, out.str()); + } else if (schema.empty() || schema_type == "object") { + return _add_rule(rule_name, _add_primitive("object", PRIMITIVE_RULES.at("object"))); + } else { + if (!schema_type.is_string() || PRIMITIVE_RULES.find(schema_type.get()) == PRIMITIVE_RULES.end()) { + _errors.push_back("Unrecognized schema: " + schema.dump()); + return ""; + } + // TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero + return _add_primitive(rule_name == "root" ? "root" : schema_type.get(), PRIMITIVE_RULES.at(schema_type.get())); + } + } + + void check_errors() { + if (!_errors.empty()) { + throw std::runtime_error("JSON schema conversion failed:\n" + string_join(_errors, "\n")); + } + if (!_warnings.empty()) { + fprintf(stderr, "WARNING: JSON schema conversion was incomplete: %s\n", string_join(_warnings, "; ").c_str()); + } + } + + std::string format_grammar() { + std::stringstream ss; + for (const auto & kv : _rules) { + ss << kv.first << " ::= " << kv.second << std::endl; + } + return ss.str(); + } +}; + +std::string json_schema_to_grammar(const json & schema, bool force_gbnf) { +#ifdef LLAMA_USE_LLGUIDANCE + if (!force_gbnf) { + return "%llguidance {}\nstart: %json " + schema.dump(); + } +#else + (void)force_gbnf; +#endif // LLAMA_USE_LLGUIDANCE + return build_grammar([&](const common_grammar_builder & callbacks) { + auto copy = schema; + callbacks.resolve_refs(copy); + callbacks.add_schema("", copy); + }); +} + +std::string build_grammar(const std::function & cb, const common_grammar_options & options) { + SchemaConverter converter([&](const std::string &) { return json(); }, options.dotall, options.compact_spaces); + common_grammar_builder builder { + /* .add_rule = */ [&](const std::string & name, const std::string & rule) { + return converter._add_rule(name, rule); + }, + /* .add_schema = */ [&](const std::string & name, const nlohmann::ordered_json & schema) { + return converter.visit(schema, name == "root" ? "" : name); + }, + /* .resolve_refs = */ [&](nlohmann::ordered_json & schema) { + converter.resolve_refs(schema, ""); + } + }; + cb(builder); + converter.check_errors(); + return converter.format_grammar(); +} diff --git a/llama.cpp/common/json-schema-to-grammar.h b/llama.cpp/common/json-schema-to-grammar.h new file mode 100644 index 0000000000000000000000000000000000000000..62a3b0a4477cca0f9558183cec9832ed2c421973 --- /dev/null +++ b/llama.cpp/common/json-schema-to-grammar.h @@ -0,0 +1,22 @@ +#pragma once + +#include "ggml.h" +// Change JSON_ASSERT from assert() to GGML_ASSERT: +#define JSON_ASSERT GGML_ASSERT +#include "json.hpp" + +std::string json_schema_to_grammar(const nlohmann::ordered_json & schema, + bool force_gbnf = false); + +struct common_grammar_builder { + std::function add_rule; + std::function add_schema; + std::function resolve_refs; +}; + +struct common_grammar_options { + bool dotall = false; + bool compact_spaces = false; +}; + +std::string build_grammar(const std::function & cb, const common_grammar_options & options = {}); diff --git a/llama.cpp/common/json.hpp b/llama.cpp/common/json.hpp new file mode 100644 index 0000000000000000000000000000000000000000..a858728c4ceb8a7048da422a96d38d61d3062d62 --- /dev/null +++ b/llama.cpp/common/json.hpp @@ -0,0 +1,24766 @@ +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + +/****************************************************************************\ + * Note on documentation: The source files contain links to the online * + * documentation of the public API at https://json.nlohmann.me. This URL * + * contains the most recent documentation and should also be applicable to * + * previous versions; documentation for deprecated functions is not * + * removed, but marked deprecated. See "Generate documentation" section in * + * file docs/README.md. * +\****************************************************************************/ + +#ifndef INCLUDE_NLOHMANN_JSON_HPP_ +#define INCLUDE_NLOHMANN_JSON_HPP_ + +#include // all_of, find, for_each +#include // nullptr_t, ptrdiff_t, size_t +#include // hash, less +#include // initializer_list +#ifndef JSON_NO_IO + #include // istream, ostream +#endif // JSON_NO_IO +#include // random_access_iterator_tag +#include // unique_ptr +#include // string, stoi, to_string +#include // declval, forward, move, pair, swap +#include // vector + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +// This file contains all macro definitions affecting or depending on the ABI + +#ifndef JSON_SKIP_LIBRARY_VERSION_CHECK + #if defined(NLOHMANN_JSON_VERSION_MAJOR) && defined(NLOHMANN_JSON_VERSION_MINOR) && defined(NLOHMANN_JSON_VERSION_PATCH) + #if NLOHMANN_JSON_VERSION_MAJOR != 3 || NLOHMANN_JSON_VERSION_MINOR != 11 || NLOHMANN_JSON_VERSION_PATCH != 3 + #warning "Already included a different version of the library!" + #endif + #endif +#endif + +#define NLOHMANN_JSON_VERSION_MAJOR 3 // NOLINT(modernize-macro-to-enum) +#define NLOHMANN_JSON_VERSION_MINOR 11 // NOLINT(modernize-macro-to-enum) +#define NLOHMANN_JSON_VERSION_PATCH 3 // NOLINT(modernize-macro-to-enum) + +#ifndef JSON_DIAGNOSTICS + #define JSON_DIAGNOSTICS 0 +#endif + +#ifndef JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON + #define JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON 0 +#endif + +#if JSON_DIAGNOSTICS + #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS _diag +#else + #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS +#endif + +#if JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON + #define NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON _ldvcmp +#else + #define NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON +#endif + +#ifndef NLOHMANN_JSON_NAMESPACE_NO_VERSION + #define NLOHMANN_JSON_NAMESPACE_NO_VERSION 0 +#endif + +// Construct the namespace ABI tags component +#define NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b) json_abi ## a ## b +#define NLOHMANN_JSON_ABI_TAGS_CONCAT(a, b) \ + NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b) + +#define NLOHMANN_JSON_ABI_TAGS \ + NLOHMANN_JSON_ABI_TAGS_CONCAT( \ + NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS, \ + NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON) + +// Construct the namespace version component +#define NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT_EX(major, minor, patch) \ + _v ## major ## _ ## minor ## _ ## patch +#define NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT(major, minor, patch) \ + NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT_EX(major, minor, patch) + +#if NLOHMANN_JSON_NAMESPACE_NO_VERSION +#define NLOHMANN_JSON_NAMESPACE_VERSION +#else +#define NLOHMANN_JSON_NAMESPACE_VERSION \ + NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT(NLOHMANN_JSON_VERSION_MAJOR, \ + NLOHMANN_JSON_VERSION_MINOR, \ + NLOHMANN_JSON_VERSION_PATCH) +#endif + +// Combine namespace components +#define NLOHMANN_JSON_NAMESPACE_CONCAT_EX(a, b) a ## b +#define NLOHMANN_JSON_NAMESPACE_CONCAT(a, b) \ + NLOHMANN_JSON_NAMESPACE_CONCAT_EX(a, b) + +#ifndef NLOHMANN_JSON_NAMESPACE +#define NLOHMANN_JSON_NAMESPACE \ + nlohmann::NLOHMANN_JSON_NAMESPACE_CONCAT( \ + NLOHMANN_JSON_ABI_TAGS, \ + NLOHMANN_JSON_NAMESPACE_VERSION) +#endif + +#ifndef NLOHMANN_JSON_NAMESPACE_BEGIN +#define NLOHMANN_JSON_NAMESPACE_BEGIN \ + namespace nlohmann \ + { \ + inline namespace NLOHMANN_JSON_NAMESPACE_CONCAT( \ + NLOHMANN_JSON_ABI_TAGS, \ + NLOHMANN_JSON_NAMESPACE_VERSION) \ + { +#endif + +#ifndef NLOHMANN_JSON_NAMESPACE_END +#define NLOHMANN_JSON_NAMESPACE_END \ + } /* namespace (inline namespace) NOLINT(readability/namespace) */ \ + } // namespace nlohmann +#endif + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include // transform +#include // array +#include // forward_list +#include // inserter, front_inserter, end +#include // map +#include // string +#include // tuple, make_tuple +#include // is_arithmetic, is_same, is_enum, underlying_type, is_convertible +#include // unordered_map +#include // pair, declval +#include // valarray + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include // nullptr_t +#include // exception +#if JSON_DIAGNOSTICS + #include // accumulate +#endif +#include // runtime_error +#include // to_string +#include // vector + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include // array +#include // size_t +#include // uint8_t +#include // string + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include // declval, pair +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +// #include + + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +template struct make_void +{ + using type = void; +}; +template using void_t = typename make_void::type; + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END + + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +// https://en.cppreference.com/w/cpp/experimental/is_detected +struct nonesuch +{ + nonesuch() = delete; + ~nonesuch() = delete; + nonesuch(nonesuch const&) = delete; + nonesuch(nonesuch const&&) = delete; + void operator=(nonesuch const&) = delete; + void operator=(nonesuch&&) = delete; +}; + +template class Op, + class... Args> +struct detector +{ + using value_t = std::false_type; + using type = Default; +}; + +template class Op, class... Args> +struct detector>, Op, Args...> +{ + using value_t = std::true_type; + using type = Op; +}; + +template class Op, class... Args> +using is_detected = typename detector::value_t; + +template class Op, class... Args> +struct is_detected_lazy : is_detected { }; + +template class Op, class... Args> +using detected_t = typename detector::type; + +template class Op, class... Args> +using detected_or = detector; + +template class Op, class... Args> +using detected_or_t = typename detected_or::type; + +template class Op, class... Args> +using is_detected_exact = std::is_same>; + +template class Op, class... Args> +using is_detected_convertible = + std::is_convertible, To>; + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END + +// #include + + +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2016-2021 Evan Nemerson +// SPDX-License-Identifier: MIT + +/* Hedley - https://nemequ.github.io/hedley + * Created by Evan Nemerson + */ + +#if !defined(JSON_HEDLEY_VERSION) || (JSON_HEDLEY_VERSION < 15) +#if defined(JSON_HEDLEY_VERSION) + #undef JSON_HEDLEY_VERSION +#endif +#define JSON_HEDLEY_VERSION 15 + +#if defined(JSON_HEDLEY_STRINGIFY_EX) + #undef JSON_HEDLEY_STRINGIFY_EX +#endif +#define JSON_HEDLEY_STRINGIFY_EX(x) #x + +#if defined(JSON_HEDLEY_STRINGIFY) + #undef JSON_HEDLEY_STRINGIFY +#endif +#define JSON_HEDLEY_STRINGIFY(x) JSON_HEDLEY_STRINGIFY_EX(x) + +#if defined(JSON_HEDLEY_CONCAT_EX) + #undef JSON_HEDLEY_CONCAT_EX +#endif +#define JSON_HEDLEY_CONCAT_EX(a,b) a##b + +#if defined(JSON_HEDLEY_CONCAT) + #undef JSON_HEDLEY_CONCAT +#endif +#define JSON_HEDLEY_CONCAT(a,b) JSON_HEDLEY_CONCAT_EX(a,b) + +#if defined(JSON_HEDLEY_CONCAT3_EX) + #undef JSON_HEDLEY_CONCAT3_EX +#endif +#define JSON_HEDLEY_CONCAT3_EX(a,b,c) a##b##c + +#if defined(JSON_HEDLEY_CONCAT3) + #undef JSON_HEDLEY_CONCAT3 +#endif +#define JSON_HEDLEY_CONCAT3(a,b,c) JSON_HEDLEY_CONCAT3_EX(a,b,c) + +#if defined(JSON_HEDLEY_VERSION_ENCODE) + #undef JSON_HEDLEY_VERSION_ENCODE +#endif +#define JSON_HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) + +#if defined(JSON_HEDLEY_VERSION_DECODE_MAJOR) + #undef JSON_HEDLEY_VERSION_DECODE_MAJOR +#endif +#define JSON_HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) + +#if defined(JSON_HEDLEY_VERSION_DECODE_MINOR) + #undef JSON_HEDLEY_VERSION_DECODE_MINOR +#endif +#define JSON_HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) + +#if defined(JSON_HEDLEY_VERSION_DECODE_REVISION) + #undef JSON_HEDLEY_VERSION_DECODE_REVISION +#endif +#define JSON_HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) + +#if defined(JSON_HEDLEY_GNUC_VERSION) + #undef JSON_HEDLEY_GNUC_VERSION +#endif +#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) + #define JSON_HEDLEY_GNUC_VERSION JSON_HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) +#elif defined(__GNUC__) + #define JSON_HEDLEY_GNUC_VERSION JSON_HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) +#endif + +#if defined(JSON_HEDLEY_GNUC_VERSION_CHECK) + #undef JSON_HEDLEY_GNUC_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_GNUC_VERSION) + #define JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_GNUC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_MSVC_VERSION) + #undef JSON_HEDLEY_MSVC_VERSION +#endif +#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) + #define JSON_HEDLEY_MSVC_VERSION JSON_HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) +#elif defined(_MSC_FULL_VER) && !defined(__ICL) + #define JSON_HEDLEY_MSVC_VERSION JSON_HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) +#elif defined(_MSC_VER) && !defined(__ICL) + #define JSON_HEDLEY_MSVC_VERSION JSON_HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) +#endif + +#if defined(JSON_HEDLEY_MSVC_VERSION_CHECK) + #undef JSON_HEDLEY_MSVC_VERSION_CHECK +#endif +#if !defined(JSON_HEDLEY_MSVC_VERSION) + #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) +#elif defined(_MSC_VER) && (_MSC_VER >= 1400) + #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) +#elif defined(_MSC_VER) && (_MSC_VER >= 1200) + #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) +#else + #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) +#endif + +#if defined(JSON_HEDLEY_INTEL_VERSION) + #undef JSON_HEDLEY_INTEL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) + #define JSON_HEDLEY_INTEL_VERSION JSON_HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) +#elif defined(__INTEL_COMPILER) && !defined(__ICL) + #define JSON_HEDLEY_INTEL_VERSION JSON_HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) +#endif + +#if defined(JSON_HEDLEY_INTEL_VERSION_CHECK) + #undef JSON_HEDLEY_INTEL_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_INTEL_VERSION) + #define JSON_HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_INTEL_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_INTEL_CL_VERSION) + #undef JSON_HEDLEY_INTEL_CL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) + #define JSON_HEDLEY_INTEL_CL_VERSION JSON_HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) +#endif + +#if defined(JSON_HEDLEY_INTEL_CL_VERSION_CHECK) + #undef JSON_HEDLEY_INTEL_CL_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_INTEL_CL_VERSION) + #define JSON_HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_INTEL_CL_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_PGI_VERSION) + #undef JSON_HEDLEY_PGI_VERSION +#endif +#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) + #define JSON_HEDLEY_PGI_VERSION JSON_HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) +#endif + +#if defined(JSON_HEDLEY_PGI_VERSION_CHECK) + #undef JSON_HEDLEY_PGI_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_PGI_VERSION) + #define JSON_HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_PGI_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_SUNPRO_VERSION) + #undef JSON_HEDLEY_SUNPRO_VERSION +#endif +#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) + #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) +#elif defined(__SUNPRO_C) + #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) +#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) + #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) +#elif defined(__SUNPRO_CC) + #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) +#endif + +#if defined(JSON_HEDLEY_SUNPRO_VERSION_CHECK) + #undef JSON_HEDLEY_SUNPRO_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_SUNPRO_VERSION) + #define JSON_HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_SUNPRO_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_EMSCRIPTEN_VERSION) + #undef JSON_HEDLEY_EMSCRIPTEN_VERSION +#endif +#if defined(__EMSCRIPTEN__) + #define JSON_HEDLEY_EMSCRIPTEN_VERSION JSON_HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) +#endif + +#if defined(JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK) + #undef JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_EMSCRIPTEN_VERSION) + #define JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_EMSCRIPTEN_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_ARM_VERSION) + #undef JSON_HEDLEY_ARM_VERSION +#endif +#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) + #define JSON_HEDLEY_ARM_VERSION JSON_HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) +#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) + #define JSON_HEDLEY_ARM_VERSION JSON_HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) +#endif + +#if defined(JSON_HEDLEY_ARM_VERSION_CHECK) + #undef JSON_HEDLEY_ARM_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_ARM_VERSION) + #define JSON_HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_ARM_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_IBM_VERSION) + #undef JSON_HEDLEY_IBM_VERSION +#endif +#if defined(__ibmxl__) + #define JSON_HEDLEY_IBM_VERSION JSON_HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) +#elif defined(__xlC__) && defined(__xlC_ver__) + #define JSON_HEDLEY_IBM_VERSION JSON_HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) +#elif defined(__xlC__) + #define JSON_HEDLEY_IBM_VERSION JSON_HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) +#endif + +#if defined(JSON_HEDLEY_IBM_VERSION_CHECK) + #undef JSON_HEDLEY_IBM_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_IBM_VERSION) + #define JSON_HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_IBM_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TI_VERSION) + #undef JSON_HEDLEY_TI_VERSION +#endif +#if \ + defined(__TI_COMPILER_VERSION__) && \ + ( \ + defined(__TMS470__) || defined(__TI_ARM__) || \ + defined(__MSP430__) || \ + defined(__TMS320C2000__) \ + ) +#if (__TI_COMPILER_VERSION__ >= 16000000) + #define JSON_HEDLEY_TI_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif +#endif + +#if defined(JSON_HEDLEY_TI_VERSION_CHECK) + #undef JSON_HEDLEY_TI_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TI_VERSION) + #define JSON_HEDLEY_TI_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TI_CL2000_VERSION) + #undef JSON_HEDLEY_TI_CL2000_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) + #define JSON_HEDLEY_TI_CL2000_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(JSON_HEDLEY_TI_CL2000_VERSION_CHECK) + #undef JSON_HEDLEY_TI_CL2000_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TI_CL2000_VERSION) + #define JSON_HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CL2000_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TI_CL430_VERSION) + #undef JSON_HEDLEY_TI_CL430_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) + #define JSON_HEDLEY_TI_CL430_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(JSON_HEDLEY_TI_CL430_VERSION_CHECK) + #undef JSON_HEDLEY_TI_CL430_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TI_CL430_VERSION) + #define JSON_HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CL430_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TI_ARMCL_VERSION) + #undef JSON_HEDLEY_TI_ARMCL_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) + #define JSON_HEDLEY_TI_ARMCL_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(JSON_HEDLEY_TI_ARMCL_VERSION_CHECK) + #undef JSON_HEDLEY_TI_ARMCL_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TI_ARMCL_VERSION) + #define JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_ARMCL_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TI_CL6X_VERSION) + #undef JSON_HEDLEY_TI_CL6X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) + #define JSON_HEDLEY_TI_CL6X_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(JSON_HEDLEY_TI_CL6X_VERSION_CHECK) + #undef JSON_HEDLEY_TI_CL6X_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TI_CL6X_VERSION) + #define JSON_HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CL6X_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TI_CL7X_VERSION) + #undef JSON_HEDLEY_TI_CL7X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) + #define JSON_HEDLEY_TI_CL7X_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(JSON_HEDLEY_TI_CL7X_VERSION_CHECK) + #undef JSON_HEDLEY_TI_CL7X_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TI_CL7X_VERSION) + #define JSON_HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CL7X_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TI_CLPRU_VERSION) + #undef JSON_HEDLEY_TI_CLPRU_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) + #define JSON_HEDLEY_TI_CLPRU_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(JSON_HEDLEY_TI_CLPRU_VERSION_CHECK) + #undef JSON_HEDLEY_TI_CLPRU_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TI_CLPRU_VERSION) + #define JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CLPRU_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_CRAY_VERSION) + #undef JSON_HEDLEY_CRAY_VERSION +#endif +#if defined(_CRAYC) + #if defined(_RELEASE_PATCHLEVEL) + #define JSON_HEDLEY_CRAY_VERSION JSON_HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) + #else + #define JSON_HEDLEY_CRAY_VERSION JSON_HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) + #endif +#endif + +#if defined(JSON_HEDLEY_CRAY_VERSION_CHECK) + #undef JSON_HEDLEY_CRAY_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_CRAY_VERSION) + #define JSON_HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_CRAY_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_IAR_VERSION) + #undef JSON_HEDLEY_IAR_VERSION +#endif +#if defined(__IAR_SYSTEMS_ICC__) + #if __VER__ > 1000 + #define JSON_HEDLEY_IAR_VERSION JSON_HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) + #else + #define JSON_HEDLEY_IAR_VERSION JSON_HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) + #endif +#endif + +#if defined(JSON_HEDLEY_IAR_VERSION_CHECK) + #undef JSON_HEDLEY_IAR_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_IAR_VERSION) + #define JSON_HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_IAR_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TINYC_VERSION) + #undef JSON_HEDLEY_TINYC_VERSION +#endif +#if defined(__TINYC__) + #define JSON_HEDLEY_TINYC_VERSION JSON_HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) +#endif + +#if defined(JSON_HEDLEY_TINYC_VERSION_CHECK) + #undef JSON_HEDLEY_TINYC_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TINYC_VERSION) + #define JSON_HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TINYC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_DMC_VERSION) + #undef JSON_HEDLEY_DMC_VERSION +#endif +#if defined(__DMC__) + #define JSON_HEDLEY_DMC_VERSION JSON_HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) +#endif + +#if defined(JSON_HEDLEY_DMC_VERSION_CHECK) + #undef JSON_HEDLEY_DMC_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_DMC_VERSION) + #define JSON_HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_DMC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_COMPCERT_VERSION) + #undef JSON_HEDLEY_COMPCERT_VERSION +#endif +#if defined(__COMPCERT_VERSION__) + #define JSON_HEDLEY_COMPCERT_VERSION JSON_HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) +#endif + +#if defined(JSON_HEDLEY_COMPCERT_VERSION_CHECK) + #undef JSON_HEDLEY_COMPCERT_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_COMPCERT_VERSION) + #define JSON_HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_COMPCERT_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_PELLES_VERSION) + #undef JSON_HEDLEY_PELLES_VERSION +#endif +#if defined(__POCC__) + #define JSON_HEDLEY_PELLES_VERSION JSON_HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) +#endif + +#if defined(JSON_HEDLEY_PELLES_VERSION_CHECK) + #undef JSON_HEDLEY_PELLES_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_PELLES_VERSION) + #define JSON_HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_PELLES_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_MCST_LCC_VERSION) + #undef JSON_HEDLEY_MCST_LCC_VERSION +#endif +#if defined(__LCC__) && defined(__LCC_MINOR__) + #define JSON_HEDLEY_MCST_LCC_VERSION JSON_HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) +#endif + +#if defined(JSON_HEDLEY_MCST_LCC_VERSION_CHECK) + #undef JSON_HEDLEY_MCST_LCC_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_MCST_LCC_VERSION) + #define JSON_HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_MCST_LCC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_GCC_VERSION) + #undef JSON_HEDLEY_GCC_VERSION +#endif +#if \ + defined(JSON_HEDLEY_GNUC_VERSION) && \ + !defined(__clang__) && \ + !defined(JSON_HEDLEY_INTEL_VERSION) && \ + !defined(JSON_HEDLEY_PGI_VERSION) && \ + !defined(JSON_HEDLEY_ARM_VERSION) && \ + !defined(JSON_HEDLEY_CRAY_VERSION) && \ + !defined(JSON_HEDLEY_TI_VERSION) && \ + !defined(JSON_HEDLEY_TI_ARMCL_VERSION) && \ + !defined(JSON_HEDLEY_TI_CL430_VERSION) && \ + !defined(JSON_HEDLEY_TI_CL2000_VERSION) && \ + !defined(JSON_HEDLEY_TI_CL6X_VERSION) && \ + !defined(JSON_HEDLEY_TI_CL7X_VERSION) && \ + !defined(JSON_HEDLEY_TI_CLPRU_VERSION) && \ + !defined(__COMPCERT__) && \ + !defined(JSON_HEDLEY_MCST_LCC_VERSION) + #define JSON_HEDLEY_GCC_VERSION JSON_HEDLEY_GNUC_VERSION +#endif + +#if defined(JSON_HEDLEY_GCC_VERSION_CHECK) + #undef JSON_HEDLEY_GCC_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_GCC_VERSION) + #define JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_GCC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_HAS_ATTRIBUTE) + #undef JSON_HEDLEY_HAS_ATTRIBUTE +#endif +#if \ + defined(__has_attribute) && \ + ( \ + (!defined(JSON_HEDLEY_IAR_VERSION) || JSON_HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ + ) +# define JSON_HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) +#else +# define JSON_HEDLEY_HAS_ATTRIBUTE(attribute) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_ATTRIBUTE) + #undef JSON_HEDLEY_GNUC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) + #define JSON_HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_HAS_ATTRIBUTE(attribute) +#else + #define JSON_HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_ATTRIBUTE) + #undef JSON_HEDLEY_GCC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) + #define JSON_HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_HAS_ATTRIBUTE(attribute) +#else + #define JSON_HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_HAS_CPP_ATTRIBUTE) + #undef JSON_HEDLEY_HAS_CPP_ATTRIBUTE +#endif +#if \ + defined(__has_cpp_attribute) && \ + defined(__cplusplus) && \ + (!defined(JSON_HEDLEY_SUNPRO_VERSION) || JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) + #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) +#else + #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) +#endif + +#if defined(JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS) + #undef JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS +#endif +#if !defined(__cplusplus) || !defined(__has_cpp_attribute) + #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#elif \ + !defined(JSON_HEDLEY_PGI_VERSION) && \ + !defined(JSON_HEDLEY_IAR_VERSION) && \ + (!defined(JSON_HEDLEY_SUNPRO_VERSION) || JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ + (!defined(JSON_HEDLEY_MSVC_VERSION) || JSON_HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) JSON_HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) +#else + #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) + #undef JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) + #define JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else + #define JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE) + #undef JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) + #define JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else + #define JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_HAS_BUILTIN) + #undef JSON_HEDLEY_HAS_BUILTIN +#endif +#if defined(__has_builtin) + #define JSON_HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) +#else + #define JSON_HEDLEY_HAS_BUILTIN(builtin) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_BUILTIN) + #undef JSON_HEDLEY_GNUC_HAS_BUILTIN +#endif +#if defined(__has_builtin) + #define JSON_HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else + #define JSON_HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_BUILTIN) + #undef JSON_HEDLEY_GCC_HAS_BUILTIN +#endif +#if defined(__has_builtin) + #define JSON_HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else + #define JSON_HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_HAS_FEATURE) + #undef JSON_HEDLEY_HAS_FEATURE +#endif +#if defined(__has_feature) + #define JSON_HEDLEY_HAS_FEATURE(feature) __has_feature(feature) +#else + #define JSON_HEDLEY_HAS_FEATURE(feature) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_FEATURE) + #undef JSON_HEDLEY_GNUC_HAS_FEATURE +#endif +#if defined(__has_feature) + #define JSON_HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else + #define JSON_HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_FEATURE) + #undef JSON_HEDLEY_GCC_HAS_FEATURE +#endif +#if defined(__has_feature) + #define JSON_HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else + #define JSON_HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_HAS_EXTENSION) + #undef JSON_HEDLEY_HAS_EXTENSION +#endif +#if defined(__has_extension) + #define JSON_HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) +#else + #define JSON_HEDLEY_HAS_EXTENSION(extension) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_EXTENSION) + #undef JSON_HEDLEY_GNUC_HAS_EXTENSION +#endif +#if defined(__has_extension) + #define JSON_HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else + #define JSON_HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_EXTENSION) + #undef JSON_HEDLEY_GCC_HAS_EXTENSION +#endif +#if defined(__has_extension) + #define JSON_HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else + #define JSON_HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE) + #undef JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) + #define JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) +#else + #define JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) + #undef JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) + #define JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else + #define JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) + #undef JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) + #define JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else + #define JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_HAS_WARNING) + #undef JSON_HEDLEY_HAS_WARNING +#endif +#if defined(__has_warning) + #define JSON_HEDLEY_HAS_WARNING(warning) __has_warning(warning) +#else + #define JSON_HEDLEY_HAS_WARNING(warning) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_WARNING) + #undef JSON_HEDLEY_GNUC_HAS_WARNING +#endif +#if defined(__has_warning) + #define JSON_HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else + #define JSON_HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_WARNING) + #undef JSON_HEDLEY_GCC_HAS_WARNING +#endif +#if defined(__has_warning) + #define JSON_HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else + #define JSON_HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + defined(__clang__) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ + JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ + (JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) + #define JSON_HEDLEY_PRAGMA(value) _Pragma(#value) +#elif JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) + #define JSON_HEDLEY_PRAGMA(value) __pragma(value) +#else + #define JSON_HEDLEY_PRAGMA(value) +#endif + +#if defined(JSON_HEDLEY_DIAGNOSTIC_PUSH) + #undef JSON_HEDLEY_DIAGNOSTIC_PUSH +#endif +#if defined(JSON_HEDLEY_DIAGNOSTIC_POP) + #undef JSON_HEDLEY_DIAGNOSTIC_POP +#endif +#if defined(__clang__) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") + #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") +#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") + #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,6,0) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") + #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) + #define JSON_HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) +#elif JSON_HEDLEY_ARM_VERSION_CHECK(5,6,0) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") + #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("pop") +#elif \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") + #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") +#elif JSON_HEDLEY_PELLES_VERSION_CHECK(2,90,0) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") + #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#else + #define JSON_HEDLEY_DIAGNOSTIC_PUSH + #define JSON_HEDLEY_DIAGNOSTIC_POP +#endif + +/* JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) + #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ +#endif +#if defined(__cplusplus) +# if JSON_HEDLEY_HAS_WARNING("-Wc++98-compat") +# if JSON_HEDLEY_HAS_WARNING("-Wc++17-extensions") +# if JSON_HEDLEY_HAS_WARNING("-Wc++1z-extensions") +# define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ + xpr \ + JSON_HEDLEY_DIAGNOSTIC_POP +# else +# define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + xpr \ + JSON_HEDLEY_DIAGNOSTIC_POP +# endif +# else +# define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + xpr \ + JSON_HEDLEY_DIAGNOSTIC_POP +# endif +# endif +#endif +#if !defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x +#endif + +#if defined(JSON_HEDLEY_CONST_CAST) + #undef JSON_HEDLEY_CONST_CAST +#endif +#if defined(__cplusplus) +# define JSON_HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) +#elif \ + JSON_HEDLEY_HAS_WARNING("-Wcast-qual") || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define JSON_HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ + ((T) (expr)); \ + JSON_HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define JSON_HEDLEY_CONST_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(JSON_HEDLEY_REINTERPRET_CAST) + #undef JSON_HEDLEY_REINTERPRET_CAST +#endif +#if defined(__cplusplus) + #define JSON_HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) +#else + #define JSON_HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(JSON_HEDLEY_STATIC_CAST) + #undef JSON_HEDLEY_STATIC_CAST +#endif +#if defined(__cplusplus) + #define JSON_HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) +#else + #define JSON_HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(JSON_HEDLEY_CPP_CAST) + #undef JSON_HEDLEY_CPP_CAST +#endif +#if defined(__cplusplus) +# if JSON_HEDLEY_HAS_WARNING("-Wold-style-cast") +# define JSON_HEDLEY_CPP_CAST(T, expr) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ + ((T) (expr)) \ + JSON_HEDLEY_DIAGNOSTIC_POP +# elif JSON_HEDLEY_IAR_VERSION_CHECK(8,3,0) +# define JSON_HEDLEY_CPP_CAST(T, expr) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("diag_suppress=Pe137") \ + JSON_HEDLEY_DIAGNOSTIC_POP +# else +# define JSON_HEDLEY_CPP_CAST(T, expr) ((T) (expr)) +# endif +#else +# define JSON_HEDLEY_CPP_CAST(T, expr) (expr) +#endif + +#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) + #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wdeprecated-declarations") + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") +#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") +#elif JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) +#elif JSON_HEDLEY_PGI_VERSION_CHECK(20,7,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") +#elif JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) +#elif JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") +#elif JSON_HEDLEY_PELLES_VERSION_CHECK(2,90,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") +#else + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif + +#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) + #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wunknown-pragmas") + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") +#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") +#elif JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) +#elif JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") +#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") +#elif JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) +#elif \ + JSON_HEDLEY_TI_VERSION_CHECK(16,9,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") +#elif JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") +#else + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif + +#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) + #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wunknown-attributes") + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") +#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,6,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif JSON_HEDLEY_INTEL_VERSION_CHECK(17,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") +#elif JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) +#elif JSON_HEDLEY_MSVC_VERSION_CHECK(19,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) +#elif JSON_HEDLEY_PGI_VERSION_CHECK(20,7,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") +#elif JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") +#elif \ + JSON_HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") +#elif JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#else + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif + +#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) + #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wcast-qual") + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") +#elif JSON_HEDLEY_GCC_VERSION_CHECK(3,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") +#else + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif + +#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) + #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wunused-function") + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") +#elif JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") +#elif JSON_HEDLEY_MSVC_VERSION_CHECK(1,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) +#elif JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") +#else + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif + +#if defined(JSON_HEDLEY_DEPRECATED) + #undef JSON_HEDLEY_DEPRECATED +#endif +#if defined(JSON_HEDLEY_DEPRECATED_FOR) + #undef JSON_HEDLEY_DEPRECATED_FOR +#endif +#if \ + JSON_HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) +#elif \ + (JSON_HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(JSON_HEDLEY_IAR_VERSION)) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) +#elif defined(__cplusplus) && (__cplusplus >= 201402L) + #define JSON_HEDLEY_DEPRECATED(since) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) +#elif \ + JSON_HEDLEY_HAS_ATTRIBUTE(deprecated) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + JSON_HEDLEY_IAR_VERSION_CHECK(8,10,0) + #define JSON_HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + JSON_HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_DEPRECATED(since) __declspec(deprecated) + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_DEPRECATED(since) _Pragma("deprecated") + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") +#else + #define JSON_HEDLEY_DEPRECATED(since) + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) +#endif + +#if defined(JSON_HEDLEY_UNAVAILABLE) + #undef JSON_HEDLEY_UNAVAILABLE +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(warning) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) +#else + #define JSON_HEDLEY_UNAVAILABLE(available_since) +#endif + +#if defined(JSON_HEDLEY_WARN_UNUSED_RESULT) + #undef JSON_HEDLEY_WARN_UNUSED_RESULT +#endif +#if defined(JSON_HEDLEY_WARN_UNUSED_RESULT_MSG) + #undef JSON_HEDLEY_WARN_UNUSED_RESULT_MSG +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) + #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) +#elif (JSON_HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) + #define JSON_HEDLEY_WARN_UNUSED_RESULT JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) + #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) +#elif JSON_HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) + #define JSON_HEDLEY_WARN_UNUSED_RESULT JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) + #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +#elif defined(_Check_return_) /* SAL */ + #define JSON_HEDLEY_WARN_UNUSED_RESULT _Check_return_ + #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ +#else + #define JSON_HEDLEY_WARN_UNUSED_RESULT + #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) +#endif + +#if defined(JSON_HEDLEY_SENTINEL) + #undef JSON_HEDLEY_SENTINEL +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(sentinel) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) +#else + #define JSON_HEDLEY_SENTINEL(position) +#endif + +#if defined(JSON_HEDLEY_NO_RETURN) + #undef JSON_HEDLEY_NO_RETURN +#endif +#if JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_NO_RETURN __noreturn +#elif \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L + #define JSON_HEDLEY_NO_RETURN _Noreturn +#elif defined(__cplusplus) && (__cplusplus >= 201103L) + #define JSON_HEDLEY_NO_RETURN JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) +#elif \ + JSON_HEDLEY_HAS_ATTRIBUTE(noreturn) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_IAR_VERSION_CHECK(8,10,0) + #define JSON_HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) + #define JSON_HEDLEY_NO_RETURN _Pragma("does_not_return") +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_NO_RETURN __declspec(noreturn) +#elif JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) + #define JSON_HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") +#elif JSON_HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) + #define JSON_HEDLEY_NO_RETURN __attribute((noreturn)) +#elif JSON_HEDLEY_PELLES_VERSION_CHECK(9,0,0) + #define JSON_HEDLEY_NO_RETURN __declspec(noreturn) +#else + #define JSON_HEDLEY_NO_RETURN +#endif + +#if defined(JSON_HEDLEY_NO_ESCAPE) + #undef JSON_HEDLEY_NO_ESCAPE +#endif +#if JSON_HEDLEY_HAS_ATTRIBUTE(noescape) + #define JSON_HEDLEY_NO_ESCAPE __attribute__((__noescape__)) +#else + #define JSON_HEDLEY_NO_ESCAPE +#endif + +#if defined(JSON_HEDLEY_UNREACHABLE) + #undef JSON_HEDLEY_UNREACHABLE +#endif +#if defined(JSON_HEDLEY_UNREACHABLE_RETURN) + #undef JSON_HEDLEY_UNREACHABLE_RETURN +#endif +#if defined(JSON_HEDLEY_ASSUME) + #undef JSON_HEDLEY_ASSUME +#endif +#if \ + JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_ASSUME(expr) __assume(expr) +#elif JSON_HEDLEY_HAS_BUILTIN(__builtin_assume) + #define JSON_HEDLEY_ASSUME(expr) __builtin_assume(expr) +#elif \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) + #if defined(__cplusplus) + #define JSON_HEDLEY_ASSUME(expr) std::_nassert(expr) + #else + #define JSON_HEDLEY_ASSUME(expr) _nassert(expr) + #endif +#endif +#if \ + (JSON_HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(JSON_HEDLEY_ARM_VERSION))) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ + JSON_HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_UNREACHABLE() __builtin_unreachable() +#elif defined(JSON_HEDLEY_ASSUME) + #define JSON_HEDLEY_UNREACHABLE() JSON_HEDLEY_ASSUME(0) +#endif +#if !defined(JSON_HEDLEY_ASSUME) + #if defined(JSON_HEDLEY_UNREACHABLE) + #define JSON_HEDLEY_ASSUME(expr) JSON_HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (JSON_HEDLEY_UNREACHABLE(), 1))) + #else + #define JSON_HEDLEY_ASSUME(expr) JSON_HEDLEY_STATIC_CAST(void, expr) + #endif +#endif +#if defined(JSON_HEDLEY_UNREACHABLE) + #if \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) + #define JSON_HEDLEY_UNREACHABLE_RETURN(value) return (JSON_HEDLEY_STATIC_CAST(void, JSON_HEDLEY_ASSUME(0)), (value)) + #else + #define JSON_HEDLEY_UNREACHABLE_RETURN(value) JSON_HEDLEY_UNREACHABLE() + #endif +#else + #define JSON_HEDLEY_UNREACHABLE_RETURN(value) return (value) +#endif +#if !defined(JSON_HEDLEY_UNREACHABLE) + #define JSON_HEDLEY_UNREACHABLE() JSON_HEDLEY_ASSUME(0) +#endif + +JSON_HEDLEY_DIAGNOSTIC_PUSH +#if JSON_HEDLEY_HAS_WARNING("-Wpedantic") + #pragma clang diagnostic ignored "-Wpedantic" +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) + #pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +#endif +#if JSON_HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) + #if defined(__clang__) + #pragma clang diagnostic ignored "-Wvariadic-macros" + #elif defined(JSON_HEDLEY_GCC_VERSION) + #pragma GCC diagnostic ignored "-Wvariadic-macros" + #endif +#endif +#if defined(JSON_HEDLEY_NON_NULL) + #undef JSON_HEDLEY_NON_NULL +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(nonnull) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) + #define JSON_HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) +#else + #define JSON_HEDLEY_NON_NULL(...) +#endif +JSON_HEDLEY_DIAGNOSTIC_POP + +#if defined(JSON_HEDLEY_PRINTF_FORMAT) + #undef JSON_HEDLEY_PRINTF_FORMAT +#endif +#if defined(__MINGW32__) && JSON_HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) + #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) +#elif defined(__MINGW32__) && JSON_HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) + #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) +#elif \ + JSON_HEDLEY_HAS_ATTRIBUTE(format) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) +#elif JSON_HEDLEY_PELLES_VERSION_CHECK(6,0,0) + #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) +#else + #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) +#endif + +#if defined(JSON_HEDLEY_CONSTEXPR) + #undef JSON_HEDLEY_CONSTEXPR +#endif +#if defined(__cplusplus) + #if __cplusplus >= 201103L + #define JSON_HEDLEY_CONSTEXPR JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) + #endif +#endif +#if !defined(JSON_HEDLEY_CONSTEXPR) + #define JSON_HEDLEY_CONSTEXPR +#endif + +#if defined(JSON_HEDLEY_PREDICT) + #undef JSON_HEDLEY_PREDICT +#endif +#if defined(JSON_HEDLEY_LIKELY) + #undef JSON_HEDLEY_LIKELY +#endif +#if defined(JSON_HEDLEY_UNLIKELY) + #undef JSON_HEDLEY_UNLIKELY +#endif +#if defined(JSON_HEDLEY_UNPREDICTABLE) + #undef JSON_HEDLEY_UNPREDICTABLE +#endif +#if JSON_HEDLEY_HAS_BUILTIN(__builtin_unpredictable) + #define JSON_HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) +#endif +#if \ + (JSON_HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(JSON_HEDLEY_PGI_VERSION)) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define JSON_HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) +# define JSON_HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) +# define JSON_HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) +# define JSON_HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) +# define JSON_HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) +#elif \ + (JSON_HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(JSON_HEDLEY_INTEL_CL_VERSION)) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ + JSON_HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define JSON_HEDLEY_PREDICT(expr, expected, probability) \ + (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (JSON_HEDLEY_STATIC_CAST(void, expected), (expr))) +# define JSON_HEDLEY_PREDICT_TRUE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ + })) +# define JSON_HEDLEY_PREDICT_FALSE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ + })) +# define JSON_HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) +# define JSON_HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) +#else +# define JSON_HEDLEY_PREDICT(expr, expected, probability) (JSON_HEDLEY_STATIC_CAST(void, expected), (expr)) +# define JSON_HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) +# define JSON_HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) +# define JSON_HEDLEY_LIKELY(expr) (!!(expr)) +# define JSON_HEDLEY_UNLIKELY(expr) (!!(expr)) +#endif +#if !defined(JSON_HEDLEY_UNPREDICTABLE) + #define JSON_HEDLEY_UNPREDICTABLE(expr) JSON_HEDLEY_PREDICT(expr, 1, 0.5) +#endif + +#if defined(JSON_HEDLEY_MALLOC) + #undef JSON_HEDLEY_MALLOC +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(malloc) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_MALLOC __attribute__((__malloc__)) +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) + #define JSON_HEDLEY_MALLOC _Pragma("returns_new_memory") +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_MALLOC __declspec(restrict) +#else + #define JSON_HEDLEY_MALLOC +#endif + +#if defined(JSON_HEDLEY_PURE) + #undef JSON_HEDLEY_PURE +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(pure) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define JSON_HEDLEY_PURE __attribute__((__pure__)) +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define JSON_HEDLEY_PURE _Pragma("does_not_write_global_data") +#elif defined(__cplusplus) && \ + ( \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ + ) +# define JSON_HEDLEY_PURE _Pragma("FUNC_IS_PURE;") +#else +# define JSON_HEDLEY_PURE +#endif + +#if defined(JSON_HEDLEY_CONST) + #undef JSON_HEDLEY_CONST +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(const) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_CONST __attribute__((__const__)) +#elif \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) + #define JSON_HEDLEY_CONST _Pragma("no_side_effect") +#else + #define JSON_HEDLEY_CONST JSON_HEDLEY_PURE +#endif + +#if defined(JSON_HEDLEY_RESTRICT) + #undef JSON_HEDLEY_RESTRICT +#endif +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) + #define JSON_HEDLEY_RESTRICT restrict +#elif \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + JSON_HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ + JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + defined(__clang__) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_RESTRICT __restrict +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) + #define JSON_HEDLEY_RESTRICT _Restrict +#else + #define JSON_HEDLEY_RESTRICT +#endif + +#if defined(JSON_HEDLEY_INLINE) + #undef JSON_HEDLEY_INLINE +#endif +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + (defined(__cplusplus) && (__cplusplus >= 199711L)) + #define JSON_HEDLEY_INLINE inline +#elif \ + defined(JSON_HEDLEY_GCC_VERSION) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(6,2,0) + #define JSON_HEDLEY_INLINE __inline__ +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_INLINE __inline +#else + #define JSON_HEDLEY_INLINE +#endif + +#if defined(JSON_HEDLEY_ALWAYS_INLINE) + #undef JSON_HEDLEY_ALWAYS_INLINE +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(always_inline) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + JSON_HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define JSON_HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) JSON_HEDLEY_INLINE +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define JSON_HEDLEY_ALWAYS_INLINE __forceinline +#elif defined(__cplusplus) && \ + ( \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ + ) +# define JSON_HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define JSON_HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") +#else +# define JSON_HEDLEY_ALWAYS_INLINE JSON_HEDLEY_INLINE +#endif + +#if defined(JSON_HEDLEY_NEVER_INLINE) + #undef JSON_HEDLEY_NEVER_INLINE +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(noinline) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + JSON_HEDLEY_IAR_VERSION_CHECK(8,10,0) + #define JSON_HEDLEY_NEVER_INLINE __attribute__((__noinline__)) +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_NEVER_INLINE __declspec(noinline) +#elif JSON_HEDLEY_PGI_VERSION_CHECK(10,2,0) + #define JSON_HEDLEY_NEVER_INLINE _Pragma("noinline") +#elif JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) + #define JSON_HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_NEVER_INLINE _Pragma("inline=never") +#elif JSON_HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) + #define JSON_HEDLEY_NEVER_INLINE __attribute((noinline)) +#elif JSON_HEDLEY_PELLES_VERSION_CHECK(9,0,0) + #define JSON_HEDLEY_NEVER_INLINE __declspec(noinline) +#else + #define JSON_HEDLEY_NEVER_INLINE +#endif + +#if defined(JSON_HEDLEY_PRIVATE) + #undef JSON_HEDLEY_PRIVATE +#endif +#if defined(JSON_HEDLEY_PUBLIC) + #undef JSON_HEDLEY_PUBLIC +#endif +#if defined(JSON_HEDLEY_IMPORT) + #undef JSON_HEDLEY_IMPORT +#endif +#if defined(_WIN32) || defined(__CYGWIN__) +# define JSON_HEDLEY_PRIVATE +# define JSON_HEDLEY_PUBLIC __declspec(dllexport) +# define JSON_HEDLEY_IMPORT __declspec(dllimport) +#else +# if \ + JSON_HEDLEY_HAS_ATTRIBUTE(visibility) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + ( \ + defined(__TI_EABI__) && \ + ( \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ + ) \ + ) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define JSON_HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) +# define JSON_HEDLEY_PUBLIC __attribute__((__visibility__("default"))) +# else +# define JSON_HEDLEY_PRIVATE +# define JSON_HEDLEY_PUBLIC +# endif +# define JSON_HEDLEY_IMPORT extern +#endif + +#if defined(JSON_HEDLEY_NO_THROW) + #undef JSON_HEDLEY_NO_THROW +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(nothrow) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_NO_THROW __attribute__((__nothrow__)) +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) + #define JSON_HEDLEY_NO_THROW __declspec(nothrow) +#else + #define JSON_HEDLEY_NO_THROW +#endif + +#if defined(JSON_HEDLEY_FALL_THROUGH) + #undef JSON_HEDLEY_FALL_THROUGH +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) +#elif JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) + #define JSON_HEDLEY_FALL_THROUGH JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) +#elif JSON_HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) + #define JSON_HEDLEY_FALL_THROUGH JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) +#elif defined(__fallthrough) /* SAL */ + #define JSON_HEDLEY_FALL_THROUGH __fallthrough +#else + #define JSON_HEDLEY_FALL_THROUGH +#endif + +#if defined(JSON_HEDLEY_RETURNS_NON_NULL) + #undef JSON_HEDLEY_RETURNS_NON_NULL +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) +#elif defined(_Ret_notnull_) /* SAL */ + #define JSON_HEDLEY_RETURNS_NON_NULL _Ret_notnull_ +#else + #define JSON_HEDLEY_RETURNS_NON_NULL +#endif + +#if defined(JSON_HEDLEY_ARRAY_PARAM) + #undef JSON_HEDLEY_ARRAY_PARAM +#endif +#if \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ + !defined(__STDC_NO_VLA__) && \ + !defined(__cplusplus) && \ + !defined(JSON_HEDLEY_PGI_VERSION) && \ + !defined(JSON_HEDLEY_TINYC_VERSION) + #define JSON_HEDLEY_ARRAY_PARAM(name) (name) +#else + #define JSON_HEDLEY_ARRAY_PARAM(name) +#endif + +#if defined(JSON_HEDLEY_IS_CONSTANT) + #undef JSON_HEDLEY_IS_CONSTANT +#endif +#if defined(JSON_HEDLEY_REQUIRE_CONSTEXPR) + #undef JSON_HEDLEY_REQUIRE_CONSTEXPR +#endif +/* JSON_HEDLEY_IS_CONSTEXPR_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(JSON_HEDLEY_IS_CONSTEXPR_) + #undef JSON_HEDLEY_IS_CONSTEXPR_ +#endif +#if \ + JSON_HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + JSON_HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) +#endif +#if !defined(__cplusplus) +# if \ + JSON_HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + JSON_HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,24) +#if defined(__INTPTR_TYPE__) + #define JSON_HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) +#else + #include + #define JSON_HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) +#endif +# elif \ + ( \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ + !defined(JSON_HEDLEY_SUNPRO_VERSION) && \ + !defined(JSON_HEDLEY_PGI_VERSION) && \ + !defined(JSON_HEDLEY_IAR_VERSION)) || \ + (JSON_HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(JSON_HEDLEY_IAR_VERSION)) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(5,3,0) +#if defined(__INTPTR_TYPE__) + #define JSON_HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) +#else + #include + #define JSON_HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) +#endif +# elif \ + defined(JSON_HEDLEY_GCC_VERSION) || \ + defined(JSON_HEDLEY_INTEL_VERSION) || \ + defined(JSON_HEDLEY_TINYC_VERSION) || \ + defined(JSON_HEDLEY_TI_ARMCL_VERSION) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ + defined(JSON_HEDLEY_TI_CL2000_VERSION) || \ + defined(JSON_HEDLEY_TI_CL6X_VERSION) || \ + defined(JSON_HEDLEY_TI_CL7X_VERSION) || \ + defined(JSON_HEDLEY_TI_CLPRU_VERSION) || \ + defined(__clang__) +# define JSON_HEDLEY_IS_CONSTEXPR_(expr) ( \ + sizeof(void) != \ + sizeof(*( \ + 1 ? \ + ((void*) ((expr) * 0L) ) : \ +((struct { char v[sizeof(void) * 2]; } *) 1) \ + ) \ + ) \ + ) +# endif +#endif +#if defined(JSON_HEDLEY_IS_CONSTEXPR_) + #if !defined(JSON_HEDLEY_IS_CONSTANT) + #define JSON_HEDLEY_IS_CONSTANT(expr) JSON_HEDLEY_IS_CONSTEXPR_(expr) + #endif + #define JSON_HEDLEY_REQUIRE_CONSTEXPR(expr) (JSON_HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) +#else + #if !defined(JSON_HEDLEY_IS_CONSTANT) + #define JSON_HEDLEY_IS_CONSTANT(expr) (0) + #endif + #define JSON_HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) +#endif + +#if defined(JSON_HEDLEY_BEGIN_C_DECLS) + #undef JSON_HEDLEY_BEGIN_C_DECLS +#endif +#if defined(JSON_HEDLEY_END_C_DECLS) + #undef JSON_HEDLEY_END_C_DECLS +#endif +#if defined(JSON_HEDLEY_C_DECL) + #undef JSON_HEDLEY_C_DECL +#endif +#if defined(__cplusplus) + #define JSON_HEDLEY_BEGIN_C_DECLS extern "C" { + #define JSON_HEDLEY_END_C_DECLS } + #define JSON_HEDLEY_C_DECL extern "C" +#else + #define JSON_HEDLEY_BEGIN_C_DECLS + #define JSON_HEDLEY_END_C_DECLS + #define JSON_HEDLEY_C_DECL +#endif + +#if defined(JSON_HEDLEY_STATIC_ASSERT) + #undef JSON_HEDLEY_STATIC_ASSERT +#endif +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (JSON_HEDLEY_HAS_FEATURE(c_static_assert) && !defined(JSON_HEDLEY_INTEL_CL_VERSION)) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) +# define JSON_HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + JSON_HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define JSON_HEDLEY_STATIC_ASSERT(expr, message) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#else +# define JSON_HEDLEY_STATIC_ASSERT(expr, message) +#endif + +#if defined(JSON_HEDLEY_NULL) + #undef JSON_HEDLEY_NULL +#endif +#if defined(__cplusplus) + #if __cplusplus >= 201103L + #define JSON_HEDLEY_NULL JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) + #elif defined(NULL) + #define JSON_HEDLEY_NULL NULL + #else + #define JSON_HEDLEY_NULL JSON_HEDLEY_STATIC_CAST(void*, 0) + #endif +#elif defined(NULL) + #define JSON_HEDLEY_NULL NULL +#else + #define JSON_HEDLEY_NULL ((void*) 0) +#endif + +#if defined(JSON_HEDLEY_MESSAGE) + #undef JSON_HEDLEY_MESSAGE +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define JSON_HEDLEY_MESSAGE(msg) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + JSON_HEDLEY_PRAGMA(message msg) \ + JSON_HEDLEY_DIAGNOSTIC_POP +#elif \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(message msg) +#elif JSON_HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(_CRI message msg) +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(message(msg)) +#elif JSON_HEDLEY_PELLES_VERSION_CHECK(2,0,0) +# define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(message(msg)) +#else +# define JSON_HEDLEY_MESSAGE(msg) +#endif + +#if defined(JSON_HEDLEY_WARNING) + #undef JSON_HEDLEY_WARNING +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define JSON_HEDLEY_WARNING(msg) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + JSON_HEDLEY_PRAGMA(clang warning msg) \ + JSON_HEDLEY_DIAGNOSTIC_POP +#elif \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define JSON_HEDLEY_WARNING(msg) JSON_HEDLEY_PRAGMA(GCC warning msg) +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define JSON_HEDLEY_WARNING(msg) JSON_HEDLEY_PRAGMA(message(msg)) +#else +# define JSON_HEDLEY_WARNING(msg) JSON_HEDLEY_MESSAGE(msg) +#endif + +#if defined(JSON_HEDLEY_REQUIRE) + #undef JSON_HEDLEY_REQUIRE +#endif +#if defined(JSON_HEDLEY_REQUIRE_MSG) + #undef JSON_HEDLEY_REQUIRE_MSG +#endif +#if JSON_HEDLEY_HAS_ATTRIBUTE(diagnose_if) +# if JSON_HEDLEY_HAS_WARNING("-Wgcc-compat") +# define JSON_HEDLEY_REQUIRE(expr) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), #expr, "error"))) \ + JSON_HEDLEY_DIAGNOSTIC_POP +# define JSON_HEDLEY_REQUIRE_MSG(expr,msg) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), msg, "error"))) \ + JSON_HEDLEY_DIAGNOSTIC_POP +# else +# define JSON_HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) +# define JSON_HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) +# endif +#else +# define JSON_HEDLEY_REQUIRE(expr) +# define JSON_HEDLEY_REQUIRE_MSG(expr,msg) +#endif + +#if defined(JSON_HEDLEY_FLAGS) + #undef JSON_HEDLEY_FLAGS +#endif +#if JSON_HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || JSON_HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) + #define JSON_HEDLEY_FLAGS __attribute__((__flag_enum__)) +#else + #define JSON_HEDLEY_FLAGS +#endif + +#if defined(JSON_HEDLEY_FLAGS_CAST) + #undef JSON_HEDLEY_FLAGS_CAST +#endif +#if JSON_HEDLEY_INTEL_VERSION_CHECK(19,0,0) +# define JSON_HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("warning(disable:188)") \ + ((T) (expr)); \ + JSON_HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define JSON_HEDLEY_FLAGS_CAST(T, expr) JSON_HEDLEY_STATIC_CAST(T, expr) +#endif + +#if defined(JSON_HEDLEY_EMPTY_BASES) + #undef JSON_HEDLEY_EMPTY_BASES +#endif +#if \ + (JSON_HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !JSON_HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_EMPTY_BASES __declspec(empty_bases) +#else + #define JSON_HEDLEY_EMPTY_BASES +#endif + +/* Remaining macros are deprecated. */ + +#if defined(JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) + #undef JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK +#endif +#if defined(__clang__) + #define JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) +#else + #define JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_CLANG_HAS_ATTRIBUTE) + #undef JSON_HEDLEY_CLANG_HAS_ATTRIBUTE +#endif +#define JSON_HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) JSON_HEDLEY_HAS_ATTRIBUTE(attribute) + +#if defined(JSON_HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) + #undef JSON_HEDLEY_CLANG_HAS_CPP_ATTRIBUTE +#endif +#define JSON_HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) JSON_HEDLEY_HAS_CPP_ATTRIBUTE(attribute) + +#if defined(JSON_HEDLEY_CLANG_HAS_BUILTIN) + #undef JSON_HEDLEY_CLANG_HAS_BUILTIN +#endif +#define JSON_HEDLEY_CLANG_HAS_BUILTIN(builtin) JSON_HEDLEY_HAS_BUILTIN(builtin) + +#if defined(JSON_HEDLEY_CLANG_HAS_FEATURE) + #undef JSON_HEDLEY_CLANG_HAS_FEATURE +#endif +#define JSON_HEDLEY_CLANG_HAS_FEATURE(feature) JSON_HEDLEY_HAS_FEATURE(feature) + +#if defined(JSON_HEDLEY_CLANG_HAS_EXTENSION) + #undef JSON_HEDLEY_CLANG_HAS_EXTENSION +#endif +#define JSON_HEDLEY_CLANG_HAS_EXTENSION(extension) JSON_HEDLEY_HAS_EXTENSION(extension) + +#if defined(JSON_HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) + #undef JSON_HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE +#endif +#define JSON_HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) + +#if defined(JSON_HEDLEY_CLANG_HAS_WARNING) + #undef JSON_HEDLEY_CLANG_HAS_WARNING +#endif +#define JSON_HEDLEY_CLANG_HAS_WARNING(warning) JSON_HEDLEY_HAS_WARNING(warning) + +#endif /* !defined(JSON_HEDLEY_VERSION) || (JSON_HEDLEY_VERSION < X) */ + + +// This file contains all internal macro definitions (except those affecting ABI) +// You MUST include macro_unscope.hpp at the end of json.hpp to undef all of them + +// #include + + +// exclude unsupported compilers +#if !defined(JSON_SKIP_UNSUPPORTED_COMPILER_CHECK) + #if defined(__clang__) + #if (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__) < 30400 + #error "unsupported Clang version - see https://github.com/nlohmann/json#supported-compilers" + #endif + #elif defined(__GNUC__) && !(defined(__ICC) || defined(__INTEL_COMPILER)) + #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) < 40800 + #error "unsupported GCC version - see https://github.com/nlohmann/json#supported-compilers" + #endif + #endif +#endif + +// C++ language standard detection +// if the user manually specified the used c++ version this is skipped +#if !defined(JSON_HAS_CPP_20) && !defined(JSON_HAS_CPP_17) && !defined(JSON_HAS_CPP_14) && !defined(JSON_HAS_CPP_11) + #if (defined(__cplusplus) && __cplusplus >= 202002L) || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L) + #define JSON_HAS_CPP_20 + #define JSON_HAS_CPP_17 + #define JSON_HAS_CPP_14 + #elif (defined(__cplusplus) && __cplusplus >= 201703L) || (defined(_HAS_CXX17) && _HAS_CXX17 == 1) // fix for issue #464 + #define JSON_HAS_CPP_17 + #define JSON_HAS_CPP_14 + #elif (defined(__cplusplus) && __cplusplus >= 201402L) || (defined(_HAS_CXX14) && _HAS_CXX14 == 1) + #define JSON_HAS_CPP_14 + #endif + // the cpp 11 flag is always specified because it is the minimal required version + #define JSON_HAS_CPP_11 +#endif + +#ifdef __has_include + #if __has_include() + #include + #endif +#endif + +#if !defined(JSON_HAS_FILESYSTEM) && !defined(JSON_HAS_EXPERIMENTAL_FILESYSTEM) + #ifdef JSON_HAS_CPP_17 + #if defined(__cpp_lib_filesystem) + #define JSON_HAS_FILESYSTEM 1 + #elif defined(__cpp_lib_experimental_filesystem) + #define JSON_HAS_EXPERIMENTAL_FILESYSTEM 1 + #elif !defined(__has_include) + #define JSON_HAS_EXPERIMENTAL_FILESYSTEM 1 + #elif __has_include() + #define JSON_HAS_FILESYSTEM 1 + #elif __has_include() + #define JSON_HAS_EXPERIMENTAL_FILESYSTEM 1 + #endif + + // std::filesystem does not work on MinGW GCC 8: https://sourceforge.net/p/mingw-w64/bugs/737/ + #if defined(__MINGW32__) && defined(__GNUC__) && __GNUC__ == 8 + #undef JSON_HAS_FILESYSTEM + #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM + #endif + + // no filesystem support before GCC 8: https://en.cppreference.com/w/cpp/compiler_support + #if defined(__GNUC__) && !defined(__clang__) && __GNUC__ < 8 + #undef JSON_HAS_FILESYSTEM + #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM + #endif + + // no filesystem support before Clang 7: https://en.cppreference.com/w/cpp/compiler_support + #if defined(__clang_major__) && __clang_major__ < 7 + #undef JSON_HAS_FILESYSTEM + #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM + #endif + + // no filesystem support before MSVC 19.14: https://en.cppreference.com/w/cpp/compiler_support + #if defined(_MSC_VER) && _MSC_VER < 1914 + #undef JSON_HAS_FILESYSTEM + #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM + #endif + + // no filesystem support before iOS 13 + #if defined(__IPHONE_OS_VERSION_MIN_REQUIRED) && __IPHONE_OS_VERSION_MIN_REQUIRED < 130000 + #undef JSON_HAS_FILESYSTEM + #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM + #endif + + // no filesystem support before macOS Catalina + #if defined(__MAC_OS_X_VERSION_MIN_REQUIRED) && __MAC_OS_X_VERSION_MIN_REQUIRED < 101500 + #undef JSON_HAS_FILESYSTEM + #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM + #endif + #endif +#endif + +#ifndef JSON_HAS_EXPERIMENTAL_FILESYSTEM + #define JSON_HAS_EXPERIMENTAL_FILESYSTEM 0 +#endif + +#ifndef JSON_HAS_FILESYSTEM + #define JSON_HAS_FILESYSTEM 0 +#endif + +#ifndef JSON_HAS_THREE_WAY_COMPARISON + #if defined(__cpp_impl_three_way_comparison) && __cpp_impl_three_way_comparison >= 201907L \ + && defined(__cpp_lib_three_way_comparison) && __cpp_lib_three_way_comparison >= 201907L + #define JSON_HAS_THREE_WAY_COMPARISON 1 + #else + #define JSON_HAS_THREE_WAY_COMPARISON 0 + #endif +#endif + +#ifndef JSON_HAS_RANGES + // ranges header shipping in GCC 11.1.0 (released 2021-04-27) has syntax error + #if defined(__GLIBCXX__) && __GLIBCXX__ == 20210427 + #define JSON_HAS_RANGES 0 + #elif defined(__cpp_lib_ranges) + #define JSON_HAS_RANGES 1 + #else + #define JSON_HAS_RANGES 0 + #endif +#endif + +#ifndef JSON_HAS_STATIC_RTTI + #if !defined(_HAS_STATIC_RTTI) || _HAS_STATIC_RTTI != 0 + #define JSON_HAS_STATIC_RTTI 1 + #else + #define JSON_HAS_STATIC_RTTI 0 + #endif +#endif + +#ifdef JSON_HAS_CPP_17 + #define JSON_INLINE_VARIABLE inline +#else + #define JSON_INLINE_VARIABLE +#endif + +#if JSON_HEDLEY_HAS_ATTRIBUTE(no_unique_address) + #define JSON_NO_UNIQUE_ADDRESS [[no_unique_address]] +#else + #define JSON_NO_UNIQUE_ADDRESS +#endif + +// disable documentation warnings on clang +#if defined(__clang__) + #pragma clang diagnostic push + #pragma clang diagnostic ignored "-Wdocumentation" + #pragma clang diagnostic ignored "-Wdocumentation-unknown-command" +#endif + +// allow disabling exceptions +#if (defined(__cpp_exceptions) || defined(__EXCEPTIONS) || defined(_CPPUNWIND)) && !defined(JSON_NOEXCEPTION) + #define JSON_THROW(exception) throw exception + #define JSON_TRY try + #define JSON_CATCH(exception) catch(exception) + #define JSON_INTERNAL_CATCH(exception) catch(exception) +#else + #include + #define JSON_THROW(exception) std::abort() + #define JSON_TRY if(true) + #define JSON_CATCH(exception) if(false) + #define JSON_INTERNAL_CATCH(exception) if(false) +#endif + +// override exception macros +#if defined(JSON_THROW_USER) + #undef JSON_THROW + #define JSON_THROW JSON_THROW_USER +#endif +#if defined(JSON_TRY_USER) + #undef JSON_TRY + #define JSON_TRY JSON_TRY_USER +#endif +#if defined(JSON_CATCH_USER) + #undef JSON_CATCH + #define JSON_CATCH JSON_CATCH_USER + #undef JSON_INTERNAL_CATCH + #define JSON_INTERNAL_CATCH JSON_CATCH_USER +#endif +#if defined(JSON_INTERNAL_CATCH_USER) + #undef JSON_INTERNAL_CATCH + #define JSON_INTERNAL_CATCH JSON_INTERNAL_CATCH_USER +#endif + +// allow overriding assert +#if !defined(JSON_ASSERT) + #include // assert + #define JSON_ASSERT(x) assert(x) +#endif + +// allow to access some private functions (needed by the test suite) +#if defined(JSON_TESTS_PRIVATE) + #define JSON_PRIVATE_UNLESS_TESTED public +#else + #define JSON_PRIVATE_UNLESS_TESTED private +#endif + +/*! +@brief macro to briefly define a mapping between an enum and JSON +@def NLOHMANN_JSON_SERIALIZE_ENUM +@since version 3.4.0 +*/ +#define NLOHMANN_JSON_SERIALIZE_ENUM(ENUM_TYPE, ...) \ + template \ + inline void to_json(BasicJsonType& j, const ENUM_TYPE& e) \ + { \ + static_assert(std::is_enum::value, #ENUM_TYPE " must be an enum!"); \ + static const std::pair m[] = __VA_ARGS__; \ + auto it = std::find_if(std::begin(m), std::end(m), \ + [e](const std::pair& ej_pair) -> bool \ + { \ + return ej_pair.first == e; \ + }); \ + j = ((it != std::end(m)) ? it : std::begin(m))->second; \ + } \ + template \ + inline void from_json(const BasicJsonType& j, ENUM_TYPE& e) \ + { \ + static_assert(std::is_enum::value, #ENUM_TYPE " must be an enum!"); \ + static const std::pair m[] = __VA_ARGS__; \ + auto it = std::find_if(std::begin(m), std::end(m), \ + [&j](const std::pair& ej_pair) -> bool \ + { \ + return ej_pair.second == j; \ + }); \ + e = ((it != std::end(m)) ? it : std::begin(m))->first; \ + } + +// Ugly macros to avoid uglier copy-paste when specializing basic_json. They +// may be removed in the future once the class is split. + +#define NLOHMANN_BASIC_JSON_TPL_DECLARATION \ + template class ObjectType, \ + template class ArrayType, \ + class StringType, class BooleanType, class NumberIntegerType, \ + class NumberUnsignedType, class NumberFloatType, \ + template class AllocatorType, \ + template class JSONSerializer, \ + class BinaryType, \ + class CustomBaseClass> + +#define NLOHMANN_BASIC_JSON_TPL \ + basic_json + +// Macros to simplify conversion from/to types + +#define NLOHMANN_JSON_EXPAND( x ) x +#define NLOHMANN_JSON_GET_MACRO(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, _51, _52, _53, _54, _55, _56, _57, _58, _59, _60, _61, _62, _63, _64, NAME,...) NAME +#define NLOHMANN_JSON_PASTE(...) NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_GET_MACRO(__VA_ARGS__, \ + NLOHMANN_JSON_PASTE64, \ + NLOHMANN_JSON_PASTE63, \ + NLOHMANN_JSON_PASTE62, \ + NLOHMANN_JSON_PASTE61, \ + NLOHMANN_JSON_PASTE60, \ + NLOHMANN_JSON_PASTE59, \ + NLOHMANN_JSON_PASTE58, \ + NLOHMANN_JSON_PASTE57, \ + NLOHMANN_JSON_PASTE56, \ + NLOHMANN_JSON_PASTE55, \ + NLOHMANN_JSON_PASTE54, \ + NLOHMANN_JSON_PASTE53, \ + NLOHMANN_JSON_PASTE52, \ + NLOHMANN_JSON_PASTE51, \ + NLOHMANN_JSON_PASTE50, \ + NLOHMANN_JSON_PASTE49, \ + NLOHMANN_JSON_PASTE48, \ + NLOHMANN_JSON_PASTE47, \ + NLOHMANN_JSON_PASTE46, \ + NLOHMANN_JSON_PASTE45, \ + NLOHMANN_JSON_PASTE44, \ + NLOHMANN_JSON_PASTE43, \ + NLOHMANN_JSON_PASTE42, \ + NLOHMANN_JSON_PASTE41, \ + NLOHMANN_JSON_PASTE40, \ + NLOHMANN_JSON_PASTE39, \ + NLOHMANN_JSON_PASTE38, \ + NLOHMANN_JSON_PASTE37, \ + NLOHMANN_JSON_PASTE36, \ + NLOHMANN_JSON_PASTE35, \ + NLOHMANN_JSON_PASTE34, \ + NLOHMANN_JSON_PASTE33, \ + NLOHMANN_JSON_PASTE32, \ + NLOHMANN_JSON_PASTE31, \ + NLOHMANN_JSON_PASTE30, \ + NLOHMANN_JSON_PASTE29, \ + NLOHMANN_JSON_PASTE28, \ + NLOHMANN_JSON_PASTE27, \ + NLOHMANN_JSON_PASTE26, \ + NLOHMANN_JSON_PASTE25, \ + NLOHMANN_JSON_PASTE24, \ + NLOHMANN_JSON_PASTE23, \ + NLOHMANN_JSON_PASTE22, \ + NLOHMANN_JSON_PASTE21, \ + NLOHMANN_JSON_PASTE20, \ + NLOHMANN_JSON_PASTE19, \ + NLOHMANN_JSON_PASTE18, \ + NLOHMANN_JSON_PASTE17, \ + NLOHMANN_JSON_PASTE16, \ + NLOHMANN_JSON_PASTE15, \ + NLOHMANN_JSON_PASTE14, \ + NLOHMANN_JSON_PASTE13, \ + NLOHMANN_JSON_PASTE12, \ + NLOHMANN_JSON_PASTE11, \ + NLOHMANN_JSON_PASTE10, \ + NLOHMANN_JSON_PASTE9, \ + NLOHMANN_JSON_PASTE8, \ + NLOHMANN_JSON_PASTE7, \ + NLOHMANN_JSON_PASTE6, \ + NLOHMANN_JSON_PASTE5, \ + NLOHMANN_JSON_PASTE4, \ + NLOHMANN_JSON_PASTE3, \ + NLOHMANN_JSON_PASTE2, \ + NLOHMANN_JSON_PASTE1)(__VA_ARGS__)) +#define NLOHMANN_JSON_PASTE2(func, v1) func(v1) +#define NLOHMANN_JSON_PASTE3(func, v1, v2) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE2(func, v2) +#define NLOHMANN_JSON_PASTE4(func, v1, v2, v3) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE3(func, v2, v3) +#define NLOHMANN_JSON_PASTE5(func, v1, v2, v3, v4) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE4(func, v2, v3, v4) +#define NLOHMANN_JSON_PASTE6(func, v1, v2, v3, v4, v5) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE5(func, v2, v3, v4, v5) +#define NLOHMANN_JSON_PASTE7(func, v1, v2, v3, v4, v5, v6) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE6(func, v2, v3, v4, v5, v6) +#define NLOHMANN_JSON_PASTE8(func, v1, v2, v3, v4, v5, v6, v7) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE7(func, v2, v3, v4, v5, v6, v7) +#define NLOHMANN_JSON_PASTE9(func, v1, v2, v3, v4, v5, v6, v7, v8) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE8(func, v2, v3, v4, v5, v6, v7, v8) +#define NLOHMANN_JSON_PASTE10(func, v1, v2, v3, v4, v5, v6, v7, v8, v9) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE9(func, v2, v3, v4, v5, v6, v7, v8, v9) +#define NLOHMANN_JSON_PASTE11(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE10(func, v2, v3, v4, v5, v6, v7, v8, v9, v10) +#define NLOHMANN_JSON_PASTE12(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE11(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11) +#define NLOHMANN_JSON_PASTE13(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE12(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12) +#define NLOHMANN_JSON_PASTE14(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE13(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13) +#define NLOHMANN_JSON_PASTE15(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE14(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14) +#define NLOHMANN_JSON_PASTE16(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE15(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15) +#define NLOHMANN_JSON_PASTE17(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE16(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16) +#define NLOHMANN_JSON_PASTE18(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE17(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17) +#define NLOHMANN_JSON_PASTE19(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE18(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18) +#define NLOHMANN_JSON_PASTE20(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE19(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19) +#define NLOHMANN_JSON_PASTE21(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE20(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20) +#define NLOHMANN_JSON_PASTE22(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE21(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21) +#define NLOHMANN_JSON_PASTE23(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE22(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22) +#define NLOHMANN_JSON_PASTE24(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE23(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23) +#define NLOHMANN_JSON_PASTE25(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE24(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24) +#define NLOHMANN_JSON_PASTE26(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE25(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25) +#define NLOHMANN_JSON_PASTE27(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE26(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26) +#define NLOHMANN_JSON_PASTE28(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE27(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27) +#define NLOHMANN_JSON_PASTE29(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE28(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28) +#define NLOHMANN_JSON_PASTE30(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE29(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29) +#define NLOHMANN_JSON_PASTE31(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE30(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30) +#define NLOHMANN_JSON_PASTE32(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE31(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31) +#define NLOHMANN_JSON_PASTE33(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE32(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32) +#define NLOHMANN_JSON_PASTE34(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE33(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33) +#define NLOHMANN_JSON_PASTE35(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE34(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34) +#define NLOHMANN_JSON_PASTE36(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE35(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35) +#define NLOHMANN_JSON_PASTE37(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE36(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36) +#define NLOHMANN_JSON_PASTE38(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE37(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37) +#define NLOHMANN_JSON_PASTE39(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE38(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38) +#define NLOHMANN_JSON_PASTE40(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE39(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39) +#define NLOHMANN_JSON_PASTE41(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE40(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40) +#define NLOHMANN_JSON_PASTE42(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE41(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41) +#define NLOHMANN_JSON_PASTE43(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE42(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42) +#define NLOHMANN_JSON_PASTE44(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE43(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43) +#define NLOHMANN_JSON_PASTE45(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE44(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44) +#define NLOHMANN_JSON_PASTE46(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE45(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45) +#define NLOHMANN_JSON_PASTE47(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE46(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46) +#define NLOHMANN_JSON_PASTE48(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE47(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47) +#define NLOHMANN_JSON_PASTE49(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE48(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48) +#define NLOHMANN_JSON_PASTE50(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE49(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49) +#define NLOHMANN_JSON_PASTE51(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE50(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50) +#define NLOHMANN_JSON_PASTE52(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE51(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51) +#define NLOHMANN_JSON_PASTE53(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE52(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52) +#define NLOHMANN_JSON_PASTE54(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE53(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53) +#define NLOHMANN_JSON_PASTE55(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE54(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54) +#define NLOHMANN_JSON_PASTE56(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE55(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55) +#define NLOHMANN_JSON_PASTE57(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE56(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56) +#define NLOHMANN_JSON_PASTE58(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE57(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57) +#define NLOHMANN_JSON_PASTE59(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE58(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58) +#define NLOHMANN_JSON_PASTE60(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE59(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59) +#define NLOHMANN_JSON_PASTE61(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE60(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60) +#define NLOHMANN_JSON_PASTE62(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE61(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61) +#define NLOHMANN_JSON_PASTE63(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE62(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62) +#define NLOHMANN_JSON_PASTE64(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE63(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63) + +#define NLOHMANN_JSON_TO(v1) nlohmann_json_j[#v1] = nlohmann_json_t.v1; +#define NLOHMANN_JSON_FROM(v1) nlohmann_json_j.at(#v1).get_to(nlohmann_json_t.v1); +#define NLOHMANN_JSON_FROM_WITH_DEFAULT(v1) nlohmann_json_t.v1 = nlohmann_json_j.value(#v1, nlohmann_json_default_obj.v1); + +/*! +@brief macro +@def NLOHMANN_DEFINE_TYPE_INTRUSIVE +@since version 3.9.0 +*/ +#define NLOHMANN_DEFINE_TYPE_INTRUSIVE(Type, ...) \ + friend void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ + friend void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM, __VA_ARGS__)) } + +#define NLOHMANN_DEFINE_TYPE_INTRUSIVE_WITH_DEFAULT(Type, ...) \ + friend void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ + friend void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { const Type nlohmann_json_default_obj{}; NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM_WITH_DEFAULT, __VA_ARGS__)) } + +#define NLOHMANN_DEFINE_TYPE_INTRUSIVE_ONLY_SERIALIZE(Type, ...) \ + friend void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } + +/*! +@brief macro +@def NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE +@since version 3.9.0 +*/ +#define NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(Type, ...) \ + inline void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ + inline void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM, __VA_ARGS__)) } + +#define NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_ONLY_SERIALIZE(Type, ...) \ + inline void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } + +#define NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT(Type, ...) \ + inline void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ + inline void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { const Type nlohmann_json_default_obj{}; NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM_WITH_DEFAULT, __VA_ARGS__)) } + +// inspired from https://stackoverflow.com/a/26745591 +// allows to call any std function as if (e.g. with begin): +// using std::begin; begin(x); +// +// it allows using the detected idiom to retrieve the return type +// of such an expression +#define NLOHMANN_CAN_CALL_STD_FUNC_IMPL(std_name) \ + namespace detail { \ + using std::std_name; \ + \ + template \ + using result_of_##std_name = decltype(std_name(std::declval()...)); \ + } \ + \ + namespace detail2 { \ + struct std_name##_tag \ + { \ + }; \ + \ + template \ + std_name##_tag std_name(T&&...); \ + \ + template \ + using result_of_##std_name = decltype(std_name(std::declval()...)); \ + \ + template \ + struct would_call_std_##std_name \ + { \ + static constexpr auto const value = ::nlohmann::detail:: \ + is_detected_exact::value; \ + }; \ + } /* namespace detail2 */ \ + \ + template \ + struct would_call_std_##std_name : detail2::would_call_std_##std_name \ + { \ + } + +#ifndef JSON_USE_IMPLICIT_CONVERSIONS + #define JSON_USE_IMPLICIT_CONVERSIONS 1 +#endif + +#if JSON_USE_IMPLICIT_CONVERSIONS + #define JSON_EXPLICIT +#else + #define JSON_EXPLICIT explicit +#endif + +#ifndef JSON_DISABLE_ENUM_SERIALIZATION + #define JSON_DISABLE_ENUM_SERIALIZATION 0 +#endif + +#ifndef JSON_USE_GLOBAL_UDLS + #define JSON_USE_GLOBAL_UDLS 1 +#endif + +#if JSON_HAS_THREE_WAY_COMPARISON + #include // partial_ordering +#endif + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +/////////////////////////// +// JSON type enumeration // +/////////////////////////// + +/*! +@brief the JSON type enumeration + +This enumeration collects the different JSON types. It is internally used to +distinguish the stored values, and the functions @ref basic_json::is_null(), +@ref basic_json::is_object(), @ref basic_json::is_array(), +@ref basic_json::is_string(), @ref basic_json::is_boolean(), +@ref basic_json::is_number() (with @ref basic_json::is_number_integer(), +@ref basic_json::is_number_unsigned(), and @ref basic_json::is_number_float()), +@ref basic_json::is_discarded(), @ref basic_json::is_primitive(), and +@ref basic_json::is_structured() rely on it. + +@note There are three enumeration entries (number_integer, number_unsigned, and +number_float), because the library distinguishes these three types for numbers: +@ref basic_json::number_unsigned_t is used for unsigned integers, +@ref basic_json::number_integer_t is used for signed integers, and +@ref basic_json::number_float_t is used for floating-point numbers or to +approximate integers which do not fit in the limits of their respective type. + +@sa see @ref basic_json::basic_json(const value_t value_type) -- create a JSON +value with the default value for a given type + +@since version 1.0.0 +*/ +enum class value_t : std::uint8_t +{ + null, ///< null value + object, ///< object (unordered set of name/value pairs) + array, ///< array (ordered collection of values) + string, ///< string value + boolean, ///< boolean value + number_integer, ///< number value (signed integer) + number_unsigned, ///< number value (unsigned integer) + number_float, ///< number value (floating-point) + binary, ///< binary array (ordered collection of bytes) + discarded ///< discarded by the parser callback function +}; + +/*! +@brief comparison operator for JSON types + +Returns an ordering that is similar to Python: +- order: null < boolean < number < object < array < string < binary +- furthermore, each type is not smaller than itself +- discarded values are not comparable +- binary is represented as a b"" string in python and directly comparable to a + string; however, making a binary array directly comparable with a string would + be surprising behavior in a JSON file. + +@since version 1.0.0 +*/ +#if JSON_HAS_THREE_WAY_COMPARISON + inline std::partial_ordering operator<=>(const value_t lhs, const value_t rhs) noexcept // *NOPAD* +#else + inline bool operator<(const value_t lhs, const value_t rhs) noexcept +#endif +{ + static constexpr std::array order = {{ + 0 /* null */, 3 /* object */, 4 /* array */, 5 /* string */, + 1 /* boolean */, 2 /* integer */, 2 /* unsigned */, 2 /* float */, + 6 /* binary */ + } + }; + + const auto l_index = static_cast(lhs); + const auto r_index = static_cast(rhs); +#if JSON_HAS_THREE_WAY_COMPARISON + if (l_index < order.size() && r_index < order.size()) + { + return order[l_index] <=> order[r_index]; // *NOPAD* + } + return std::partial_ordering::unordered; +#else + return l_index < order.size() && r_index < order.size() && order[l_index] < order[r_index]; +#endif +} + +// GCC selects the built-in operator< over an operator rewritten from +// a user-defined spaceship operator +// Clang, MSVC, and ICC select the rewritten candidate +// (see GCC bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105200) +#if JSON_HAS_THREE_WAY_COMPARISON && defined(__GNUC__) +inline bool operator<(const value_t lhs, const value_t rhs) noexcept +{ + return std::is_lt(lhs <=> rhs); // *NOPAD* +} +#endif + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +// #include + + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +/*! +@brief replace all occurrences of a substring by another string + +@param[in,out] s the string to manipulate; changed so that all + occurrences of @a f are replaced with @a t +@param[in] f the substring to replace with @a t +@param[in] t the string to replace @a f + +@pre The search string @a f must not be empty. **This precondition is +enforced with an assertion.** + +@since version 2.0.0 +*/ +template +inline void replace_substring(StringType& s, const StringType& f, + const StringType& t) +{ + JSON_ASSERT(!f.empty()); + for (auto pos = s.find(f); // find first occurrence of f + pos != StringType::npos; // make sure f was found + s.replace(pos, f.size(), t), // replace with t, and + pos = s.find(f, pos + t.size())) // find next occurrence of f + {} +} + +/*! + * @brief string escaping as described in RFC 6901 (Sect. 4) + * @param[in] s string to escape + * @return escaped string + * + * Note the order of escaping "~" to "~0" and "/" to "~1" is important. + */ +template +inline StringType escape(StringType s) +{ + replace_substring(s, StringType{"~"}, StringType{"~0"}); + replace_substring(s, StringType{"/"}, StringType{"~1"}); + return s; +} + +/*! + * @brief string unescaping as described in RFC 6901 (Sect. 4) + * @param[in] s string to unescape + * @return unescaped string + * + * Note the order of escaping "~1" to "/" and "~0" to "~" is important. + */ +template +static void unescape(StringType& s) +{ + replace_substring(s, StringType{"~1"}, StringType{"/"}); + replace_substring(s, StringType{"~0"}, StringType{"~"}); +} + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include // size_t + +// #include + + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +/// struct to capture the start position of the current token +struct position_t +{ + /// the total number of characters read + std::size_t chars_read_total = 0; + /// the number of characters read in the current line + std::size_t chars_read_current_line = 0; + /// the number of lines read + std::size_t lines_read = 0; + + /// conversion to size_t to preserve SAX interface + constexpr operator size_t() const + { + return chars_read_total; + } +}; + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END + +// #include + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2018 The Abseil Authors +// SPDX-License-Identifier: MIT + + + +#include // array +#include // size_t +#include // conditional, enable_if, false_type, integral_constant, is_constructible, is_integral, is_same, remove_cv, remove_reference, true_type +#include // index_sequence, make_index_sequence, index_sequence_for + +// #include + + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +template +using uncvref_t = typename std::remove_cv::type>::type; + +#ifdef JSON_HAS_CPP_14 + +// the following utilities are natively available in C++14 +using std::enable_if_t; +using std::index_sequence; +using std::make_index_sequence; +using std::index_sequence_for; + +#else + +// alias templates to reduce boilerplate +template +using enable_if_t = typename std::enable_if::type; + +// The following code is taken from https://github.com/abseil/abseil-cpp/blob/10cb35e459f5ecca5b2ff107635da0bfa41011b4/absl/utility/utility.h +// which is part of Google Abseil (https://github.com/abseil/abseil-cpp), licensed under the Apache License 2.0. + +//// START OF CODE FROM GOOGLE ABSEIL + +// integer_sequence +// +// Class template representing a compile-time integer sequence. An instantiation +// of `integer_sequence` has a sequence of integers encoded in its +// type through its template arguments (which is a common need when +// working with C++11 variadic templates). `absl::integer_sequence` is designed +// to be a drop-in replacement for C++14's `std::integer_sequence`. +// +// Example: +// +// template< class T, T... Ints > +// void user_function(integer_sequence); +// +// int main() +// { +// // user_function's `T` will be deduced to `int` and `Ints...` +// // will be deduced to `0, 1, 2, 3, 4`. +// user_function(make_integer_sequence()); +// } +template +struct integer_sequence +{ + using value_type = T; + static constexpr std::size_t size() noexcept + { + return sizeof...(Ints); + } +}; + +// index_sequence +// +// A helper template for an `integer_sequence` of `size_t`, +// `absl::index_sequence` is designed to be a drop-in replacement for C++14's +// `std::index_sequence`. +template +using index_sequence = integer_sequence; + +namespace utility_internal +{ + +template +struct Extend; + +// Note that SeqSize == sizeof...(Ints). It's passed explicitly for efficiency. +template +struct Extend, SeqSize, 0> +{ + using type = integer_sequence < T, Ints..., (Ints + SeqSize)... >; +}; + +template +struct Extend, SeqSize, 1> +{ + using type = integer_sequence < T, Ints..., (Ints + SeqSize)..., 2 * SeqSize >; +}; + +// Recursion helper for 'make_integer_sequence'. +// 'Gen::type' is an alias for 'integer_sequence'. +template +struct Gen +{ + using type = + typename Extend < typename Gen < T, N / 2 >::type, N / 2, N % 2 >::type; +}; + +template +struct Gen +{ + using type = integer_sequence; +}; + +} // namespace utility_internal + +// Compile-time sequences of integers + +// make_integer_sequence +// +// This template alias is equivalent to +// `integer_sequence`, and is designed to be a drop-in +// replacement for C++14's `std::make_integer_sequence`. +template +using make_integer_sequence = typename utility_internal::Gen::type; + +// make_index_sequence +// +// This template alias is equivalent to `index_sequence<0, 1, ..., N-1>`, +// and is designed to be a drop-in replacement for C++14's +// `std::make_index_sequence`. +template +using make_index_sequence = make_integer_sequence; + +// index_sequence_for +// +// Converts a typename pack into an index sequence of the same length, and +// is designed to be a drop-in replacement for C++14's +// `std::index_sequence_for()` +template +using index_sequence_for = make_index_sequence; + +//// END OF CODE FROM GOOGLE ABSEIL + +#endif + +// dispatch utility (taken from ranges-v3) +template struct priority_tag : priority_tag < N - 1 > {}; +template<> struct priority_tag<0> {}; + +// taken from ranges-v3 +template +struct static_const +{ + static JSON_INLINE_VARIABLE constexpr T value{}; +}; + +#ifndef JSON_HAS_CPP_17 + template + constexpr T static_const::value; +#endif + +template +inline constexpr std::array make_array(Args&& ... args) +{ + return std::array {{static_cast(std::forward(args))...}}; +} + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include // numeric_limits +#include // false_type, is_constructible, is_integral, is_same, true_type +#include // declval +#include // tuple +#include // char_traits + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include // random_access_iterator_tag + +// #include + +// #include + +// #include + + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +template +struct iterator_types {}; + +template +struct iterator_types < + It, + void_t> +{ + using difference_type = typename It::difference_type; + using value_type = typename It::value_type; + using pointer = typename It::pointer; + using reference = typename It::reference; + using iterator_category = typename It::iterator_category; +}; + +// This is required as some compilers implement std::iterator_traits in a way that +// doesn't work with SFINAE. See https://github.com/nlohmann/json/issues/1341. +template +struct iterator_traits +{ +}; + +template +struct iterator_traits < T, enable_if_t < !std::is_pointer::value >> + : iterator_types +{ +}; + +template +struct iterator_traits::value>> +{ + using iterator_category = std::random_access_iterator_tag; + using value_type = T; + using difference_type = ptrdiff_t; + using pointer = T*; + using reference = T&; +}; + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END + +// #include + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +// #include + + +NLOHMANN_JSON_NAMESPACE_BEGIN + +NLOHMANN_CAN_CALL_STD_FUNC_IMPL(begin); + +NLOHMANN_JSON_NAMESPACE_END + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +// #include + + +NLOHMANN_JSON_NAMESPACE_BEGIN + +NLOHMANN_CAN_CALL_STD_FUNC_IMPL(end); + +NLOHMANN_JSON_NAMESPACE_END + +// #include + +// #include + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + +#ifndef INCLUDE_NLOHMANN_JSON_FWD_HPP_ + #define INCLUDE_NLOHMANN_JSON_FWD_HPP_ + + #include // int64_t, uint64_t + #include // map + #include // allocator + #include // string + #include // vector + + // #include + + + /*! + @brief namespace for Niels Lohmann + @see https://github.com/nlohmann + @since version 1.0.0 + */ + NLOHMANN_JSON_NAMESPACE_BEGIN + + /*! + @brief default JSONSerializer template argument + + This serializer ignores the template arguments and uses ADL + ([argument-dependent lookup](https://en.cppreference.com/w/cpp/language/adl)) + for serialization. + */ + template + struct adl_serializer; + + /// a class to store JSON values + /// @sa https://json.nlohmann.me/api/basic_json/ + template class ObjectType = + std::map, + template class ArrayType = std::vector, + class StringType = std::string, class BooleanType = bool, + class NumberIntegerType = std::int64_t, + class NumberUnsignedType = std::uint64_t, + class NumberFloatType = double, + template class AllocatorType = std::allocator, + template class JSONSerializer = + adl_serializer, + class BinaryType = std::vector, // cppcheck-suppress syntaxError + class CustomBaseClass = void> + class basic_json; + + /// @brief JSON Pointer defines a string syntax for identifying a specific value within a JSON document + /// @sa https://json.nlohmann.me/api/json_pointer/ + template + class json_pointer; + + /*! + @brief default specialization + @sa https://json.nlohmann.me/api/json/ + */ + using json = basic_json<>; + + /// @brief a minimal map-like container that preserves insertion order + /// @sa https://json.nlohmann.me/api/ordered_map/ + template + struct ordered_map; + + /// @brief specialization that maintains the insertion order of object keys + /// @sa https://json.nlohmann.me/api/ordered_json/ + using ordered_json = basic_json; + + NLOHMANN_JSON_NAMESPACE_END + +#endif // INCLUDE_NLOHMANN_JSON_FWD_HPP_ + + +NLOHMANN_JSON_NAMESPACE_BEGIN +/*! +@brief detail namespace with internal helper functions + +This namespace collects functions that should not be exposed, +implementations of some @ref basic_json methods, and meta-programming helpers. + +@since version 2.1.0 +*/ +namespace detail +{ + +///////////// +// helpers // +///////////// + +// Note to maintainers: +// +// Every trait in this file expects a non CV-qualified type. +// The only exceptions are in the 'aliases for detected' section +// (i.e. those of the form: decltype(T::member_function(std::declval()))) +// +// In this case, T has to be properly CV-qualified to constraint the function arguments +// (e.g. to_json(BasicJsonType&, const T&)) + +template struct is_basic_json : std::false_type {}; + +NLOHMANN_BASIC_JSON_TPL_DECLARATION +struct is_basic_json : std::true_type {}; + +// used by exceptions create() member functions +// true_type for pointer to possibly cv-qualified basic_json or std::nullptr_t +// false_type otherwise +template +struct is_basic_json_context : + std::integral_constant < bool, + is_basic_json::type>::type>::value + || std::is_same::value > +{}; + +////////////////////// +// json_ref helpers // +////////////////////// + +template +class json_ref; + +template +struct is_json_ref : std::false_type {}; + +template +struct is_json_ref> : std::true_type {}; + +////////////////////////// +// aliases for detected // +////////////////////////// + +template +using mapped_type_t = typename T::mapped_type; + +template +using key_type_t = typename T::key_type; + +template +using value_type_t = typename T::value_type; + +template +using difference_type_t = typename T::difference_type; + +template +using pointer_t = typename T::pointer; + +template +using reference_t = typename T::reference; + +template +using iterator_category_t = typename T::iterator_category; + +template +using to_json_function = decltype(T::to_json(std::declval()...)); + +template +using from_json_function = decltype(T::from_json(std::declval()...)); + +template +using get_template_function = decltype(std::declval().template get()); + +// trait checking if JSONSerializer::from_json(json const&, udt&) exists +template +struct has_from_json : std::false_type {}; + +// trait checking if j.get is valid +// use this trait instead of std::is_constructible or std::is_convertible, +// both rely on, or make use of implicit conversions, and thus fail when T +// has several constructors/operator= (see https://github.com/nlohmann/json/issues/958) +template +struct is_getable +{ + static constexpr bool value = is_detected::value; +}; + +template +struct has_from_json < BasicJsonType, T, enable_if_t < !is_basic_json::value >> +{ + using serializer = typename BasicJsonType::template json_serializer; + + static constexpr bool value = + is_detected_exact::value; +}; + +// This trait checks if JSONSerializer::from_json(json const&) exists +// this overload is used for non-default-constructible user-defined-types +template +struct has_non_default_from_json : std::false_type {}; + +template +struct has_non_default_from_json < BasicJsonType, T, enable_if_t < !is_basic_json::value >> +{ + using serializer = typename BasicJsonType::template json_serializer; + + static constexpr bool value = + is_detected_exact::value; +}; + +// This trait checks if BasicJsonType::json_serializer::to_json exists +// Do not evaluate the trait when T is a basic_json type, to avoid template instantiation infinite recursion. +template +struct has_to_json : std::false_type {}; + +template +struct has_to_json < BasicJsonType, T, enable_if_t < !is_basic_json::value >> +{ + using serializer = typename BasicJsonType::template json_serializer; + + static constexpr bool value = + is_detected_exact::value; +}; + +template +using detect_key_compare = typename T::key_compare; + +template +struct has_key_compare : std::integral_constant::value> {}; + +// obtains the actual object key comparator +template +struct actual_object_comparator +{ + using object_t = typename BasicJsonType::object_t; + using object_comparator_t = typename BasicJsonType::default_object_comparator_t; + using type = typename std::conditional < has_key_compare::value, + typename object_t::key_compare, object_comparator_t>::type; +}; + +template +using actual_object_comparator_t = typename actual_object_comparator::type; + +///////////////// +// char_traits // +///////////////// + +// Primary template of char_traits calls std char_traits +template +struct char_traits : std::char_traits +{}; + +// Explicitly define char traits for unsigned char since it is not standard +template<> +struct char_traits : std::char_traits +{ + using char_type = unsigned char; + using int_type = uint64_t; + + // Redefine to_int_type function + static int_type to_int_type(char_type c) noexcept + { + return static_cast(c); + } + + static char_type to_char_type(int_type i) noexcept + { + return static_cast(i); + } + + static constexpr int_type eof() noexcept + { + return static_cast(EOF); + } +}; + +// Explicitly define char traits for signed char since it is not standard +template<> +struct char_traits : std::char_traits +{ + using char_type = signed char; + using int_type = uint64_t; + + // Redefine to_int_type function + static int_type to_int_type(char_type c) noexcept + { + return static_cast(c); + } + + static char_type to_char_type(int_type i) noexcept + { + return static_cast(i); + } + + static constexpr int_type eof() noexcept + { + return static_cast(EOF); + } +}; + +/////////////////// +// is_ functions // +/////////////////// + +// https://en.cppreference.com/w/cpp/types/conjunction +template struct conjunction : std::true_type { }; +template struct conjunction : B { }; +template +struct conjunction +: std::conditional(B::value), conjunction, B>::type {}; + +// https://en.cppreference.com/w/cpp/types/negation +template struct negation : std::integral_constant < bool, !B::value > { }; + +// Reimplementation of is_constructible and is_default_constructible, due to them being broken for +// std::pair and std::tuple until LWG 2367 fix (see https://cplusplus.github.io/LWG/lwg-defects.html#2367). +// This causes compile errors in e.g. clang 3.5 or gcc 4.9. +template +struct is_default_constructible : std::is_default_constructible {}; + +template +struct is_default_constructible> + : conjunction, is_default_constructible> {}; + +template +struct is_default_constructible> + : conjunction, is_default_constructible> {}; + +template +struct is_default_constructible> + : conjunction...> {}; + +template +struct is_default_constructible> + : conjunction...> {}; + +template +struct is_constructible : std::is_constructible {}; + +template +struct is_constructible> : is_default_constructible> {}; + +template +struct is_constructible> : is_default_constructible> {}; + +template +struct is_constructible> : is_default_constructible> {}; + +template +struct is_constructible> : is_default_constructible> {}; + +template +struct is_iterator_traits : std::false_type {}; + +template +struct is_iterator_traits> +{ + private: + using traits = iterator_traits; + + public: + static constexpr auto value = + is_detected::value && + is_detected::value && + is_detected::value && + is_detected::value && + is_detected::value; +}; + +template +struct is_range +{ + private: + using t_ref = typename std::add_lvalue_reference::type; + + using iterator = detected_t; + using sentinel = detected_t; + + // to be 100% correct, it should use https://en.cppreference.com/w/cpp/iterator/input_or_output_iterator + // and https://en.cppreference.com/w/cpp/iterator/sentinel_for + // but reimplementing these would be too much work, as a lot of other concepts are used underneath + static constexpr auto is_iterator_begin = + is_iterator_traits>::value; + + public: + static constexpr bool value = !std::is_same::value && !std::is_same::value && is_iterator_begin; +}; + +template +using iterator_t = enable_if_t::value, result_of_begin())>>; + +template +using range_value_t = value_type_t>>; + +// The following implementation of is_complete_type is taken from +// https://blogs.msdn.microsoft.com/vcblog/2015/12/02/partial-support-for-expression-sfinae-in-vs-2015-update-1/ +// and is written by Xiang Fan who agreed to using it in this library. + +template +struct is_complete_type : std::false_type {}; + +template +struct is_complete_type : std::true_type {}; + +template +struct is_compatible_object_type_impl : std::false_type {}; + +template +struct is_compatible_object_type_impl < + BasicJsonType, CompatibleObjectType, + enable_if_t < is_detected::value&& + is_detected::value >> +{ + using object_t = typename BasicJsonType::object_t; + + // macOS's is_constructible does not play well with nonesuch... + static constexpr bool value = + is_constructible::value && + is_constructible::value; +}; + +template +struct is_compatible_object_type + : is_compatible_object_type_impl {}; + +template +struct is_constructible_object_type_impl : std::false_type {}; + +template +struct is_constructible_object_type_impl < + BasicJsonType, ConstructibleObjectType, + enable_if_t < is_detected::value&& + is_detected::value >> +{ + using object_t = typename BasicJsonType::object_t; + + static constexpr bool value = + (is_default_constructible::value && + (std::is_move_assignable::value || + std::is_copy_assignable::value) && + (is_constructible::value && + std::is_same < + typename object_t::mapped_type, + typename ConstructibleObjectType::mapped_type >::value)) || + (has_from_json::value || + has_non_default_from_json < + BasicJsonType, + typename ConstructibleObjectType::mapped_type >::value); +}; + +template +struct is_constructible_object_type + : is_constructible_object_type_impl {}; + +template +struct is_compatible_string_type +{ + static constexpr auto value = + is_constructible::value; +}; + +template +struct is_constructible_string_type +{ + // launder type through decltype() to fix compilation failure on ICPC +#ifdef __INTEL_COMPILER + using laundered_type = decltype(std::declval()); +#else + using laundered_type = ConstructibleStringType; +#endif + + static constexpr auto value = + conjunction < + is_constructible, + is_detected_exact>::value; +}; + +template +struct is_compatible_array_type_impl : std::false_type {}; + +template +struct is_compatible_array_type_impl < + BasicJsonType, CompatibleArrayType, + enable_if_t < + is_detected::value&& + is_iterator_traits>>::value&& +// special case for types like std::filesystem::path whose iterator's value_type are themselves +// c.f. https://github.com/nlohmann/json/pull/3073 + !std::is_same>::value >> +{ + static constexpr bool value = + is_constructible>::value; +}; + +template +struct is_compatible_array_type + : is_compatible_array_type_impl {}; + +template +struct is_constructible_array_type_impl : std::false_type {}; + +template +struct is_constructible_array_type_impl < + BasicJsonType, ConstructibleArrayType, + enable_if_t::value >> + : std::true_type {}; + +template +struct is_constructible_array_type_impl < + BasicJsonType, ConstructibleArrayType, + enable_if_t < !std::is_same::value&& + !is_compatible_string_type::value&& + is_default_constructible::value&& +(std::is_move_assignable::value || + std::is_copy_assignable::value)&& +is_detected::value&& +is_iterator_traits>>::value&& +is_detected::value&& +// special case for types like std::filesystem::path whose iterator's value_type are themselves +// c.f. https://github.com/nlohmann/json/pull/3073 +!std::is_same>::value&& + is_complete_type < + detected_t>::value >> +{ + using value_type = range_value_t; + + static constexpr bool value = + std::is_same::value || + has_from_json::value || + has_non_default_from_json < + BasicJsonType, + value_type >::value; +}; + +template +struct is_constructible_array_type + : is_constructible_array_type_impl {}; + +template +struct is_compatible_integer_type_impl : std::false_type {}; + +template +struct is_compatible_integer_type_impl < + RealIntegerType, CompatibleNumberIntegerType, + enable_if_t < std::is_integral::value&& + std::is_integral::value&& + !std::is_same::value >> +{ + // is there an assert somewhere on overflows? + using RealLimits = std::numeric_limits; + using CompatibleLimits = std::numeric_limits; + + static constexpr auto value = + is_constructible::value && + CompatibleLimits::is_integer && + RealLimits::is_signed == CompatibleLimits::is_signed; +}; + +template +struct is_compatible_integer_type + : is_compatible_integer_type_impl {}; + +template +struct is_compatible_type_impl: std::false_type {}; + +template +struct is_compatible_type_impl < + BasicJsonType, CompatibleType, + enable_if_t::value >> +{ + static constexpr bool value = + has_to_json::value; +}; + +template +struct is_compatible_type + : is_compatible_type_impl {}; + +template +struct is_constructible_tuple : std::false_type {}; + +template +struct is_constructible_tuple> : conjunction...> {}; + +template +struct is_json_iterator_of : std::false_type {}; + +template +struct is_json_iterator_of : std::true_type {}; + +template +struct is_json_iterator_of : std::true_type +{}; + +// checks if a given type T is a template specialization of Primary +template