temnick commited on 8 days ago

Commit

f724cf3

1 Parent(s): 1c86d18

Initial content

Browse files

Files changed (37) hide show

.gitattributes +7 -0
.gitignore +2 -0
Arm_AI_Model_Community_License_v1_0_PRE-1154.pdf +0 -0
LICENSE +2 -0
README.md +150 -0
bin/windows-x86_64/VkLayer_Graph.dll +3 -0
bin/windows-x86_64/VkLayer_Graph.json +29 -0
bin/windows-x86_64/VkLayer_Tensor.dll +3 -0
bin/windows-x86_64/VkLayer_Tensor.json +31 -0
bin/windows-x86_64/scenario-runner.exe +3 -0
nss_v0.1.0_fp32.pt +3 -0
nss_v0.1.0_int8.pt +3 -0
nss_v0.1.0_int8.vgf +3 -0
nss_v0.1.0_int8_metadata.json +79 -0
resources/Enchanted_Castle_NSS_Demo.mp4 +3 -0
resources/model-explorer-screenshot.png +3 -0
scenario/0_pre_process.comp +572 -0
scenario/0_pre_process.spv +3 -0
scenario/0_pre_process_push_consts.npy +3 -0
scenario/1_nss.vgf +3 -0
scenario/2_post_process.comp +361 -0
scenario/2_post_process.spv +3 -0
scenario/2_post_process_push_consts.npy +3 -0
scenario/common.h +160 -0
scenario/in_colour.dds +3 -0
scenario/in_depth.dds +3 -0
scenario/in_depth_tm1.dds +3 -0
scenario/in_derivative_tm1.dds +3 -0
scenario/in_feedback_tm1.dds +3 -0
scenario/in_history.dds +3 -0
scenario/in_motion.dds +3 -0
scenario/in_nearest_offset_tm1.dds +3 -0
scenario/kernel_lut.h +83 -0
scenario/parameters.json +79 -0
scenario/scenario.json +821 -0
scenario/typedefs.h +86 -0
third_party_licenses_and_copyright_notices.txt +15 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,10 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.mp4 filter=lfs diff=lfs merge=lfs -text
+*.dds filter=lfs diff=lfs merge=lfs -text
+*.vgf filter=lfs diff=lfs merge=lfs -text
+*.dll filter=lfs diff=lfs merge=lfs -text
+*.exe filter=lfs diff=lfs merge=lfs -text
+*.spv filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ out/
2	+ bin/linux-x86_64/

Arm_AI_Model_Community_License_v1_0_PRE-1154.pdf ADDED Viewed

Binary file (49.6 kB). View file

LICENSE ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ The license for the model source code can be found at https://github.com/arm/neural-graphics-model-gym/blob/main/LICENSES/Apache-2.0.txt
2	+ The license for the content of this repository can be found in Arm_AI_Model_Community_License_v1_0_PRE-1154.pdf

README.md ADDED Viewed

	@@ -0,0 +1,150 @@

+---
+license: other
+pipeline_tag: image-to-image
+tags:
+- android
+- neural-graphics
+- gaming
+- graphics
+language:
+- en
+---
+# Neural Super Sampling (NSS)
+Neural Super Sampling (NSS) is an innovative, efficient network for temporal super sampling on mobile devices. Content rendered at 540p can be upscaled to 1080p, resulting in up to 50% GPU savings. With our retraining tools content creators and game studios can build derivatives of the model suited to artwork style and performance requirements.
+### 🎥 Neural Super Sampling Demo
+<video controls width="100%">
+  <source src="https://huggingface.co/Arm/neural-super-sampling/resolve/main/resources/Enchanted_Castle_NSS_Demo.mp4" type="video/mp4">
+  Your browser does not support the video tag.
+</video>
+## Model Details
+Neural Super Sampling (NSS) is a parameter prediction model for real-time temporal super sampling developed by Arm, optimized for execution on Neural Accelerators (NX) in mobile GPUs. It enables high-resolution rendering at a lower compute cost by reconstructing high-quality output frames from low-resolution temporal inputs. NSS is particularly suited for mobile gaming, XR, and other power-constrained graphics use cases.
+- **Developed by:** Arm Limited
+- **Model type:** Temporal image super sampling
+- **License:** Other
+- **Repository:** [Neural Graphics Model Gym](https://github.com/arm/neural-graphics-model-gym)
+- **Paper:** [How Neural Super Sampling Works](https://community.arm.com/arm-community-blogs/b/mobile-graphics-and-gaming-blog/posts/how-arm-neural-super-sampling-works)
+- **Quickstart with ML extensions for Vulkan®**: [ML extensions for Vulkan® Quickstart Guide](https://learn.arm.com/learning-paths/mobile-graphics-and-gaming/vulkan-ml-sample/)
+- **Quickstart with Unreal**: [Neural Super Sampling Quickstart Guide](https://learn.arm.com/learning-paths/mobile-graphics-and-gaming/nss-unreal/) for NSS integration into Unreal Engine
+NSS is under active development with regular updates planned. It should not be considered production-grade at this stage. As we increase the size and diversity of the training dataset we expect to see significant quality improvements. Follow Arm to stay up to date on the latest releases.
+The model is released under Arm's [AI Model Community License](https://huggingface.co/Arm/neural-super-sampling/blob/main/Arm_AI_Model_Community_License_v1_0_PRE-1154.pdf) which allows NSS to be retrained on datasets captured from your own content. Future releases of the [Neural Graphics Model Gym](https://github.com/arm/neural-graphics-model-gym) will provide the tools to capture and convert content for use in (re)retraining..
+## Uses
+NSS can be directly integrated into graphics pipelines using ML extensions for Vulkan®. See included ML SDK for Vulkan [scenario](https://huggingface.co/Arm/neural-super-sampling/tree/main/scenario) for the simplest way to evaluate the model. The scenario includes the necessary pre- and post-processing compute shaders along with a single frame worth of input data.
+The recommended way of integrating the model into a graphics pipeline is by using the [VGF Library](https://github.com/arm/ai-ml-sdk-vgf-library/tree/main) from the ML SDK for Vulkan.
+NSS is released under a [permissive license](https://huggingface.co/Arm/neural-super-sampling/blob/main/Arm_AI_Model_Community_License_v1_0_PRE-1154.pdf) designed to foster innovation in the graphics industry and provide differentiation to content creators.
+### Direct Use
+NSS has been integrated into Unreal Engine via two plugins, the [NSS Plugin for Unreal Engine](https://github.com/arm/neural-graphics-for-unreal/) and [Unreal NNE Plugin for ML extensions for Vulkan](https://github.com/arm/ml-extensions-for-vulkan-unreal-plugin/). See our [quick start guide](https://learn.arm.com/learning-paths/mobile-graphics-and-gaming/nss-unreal/) for step-by-step instructions on how to use NSS in Unreal® Engine.
+### Out-of-Scope Use
+- Not suited for non-temporal tasks such as a standalone image upsampling
+## Bias, Risks, and Limitations
+- Requires accurate motion vectors and frame history for stable output
+- May underperform in extremely low framerate scenarios (<10 FPS) with fast camera movement
+- Padding of the input is needed if input dimensions are not divisible by 8
+### Recommendations
+For ultra-low-FPS use cases, reduce the camera speed, acceleration, or both so that the relative motion between frames mimics the
+application running at a higher frame rate.
+## How to Get Started with the Model
+This repository contains pre-trained weights and compiled NSS model in VGF format ready for integration with Vulkan applications.
+The included Scenario demonstrates full execution of the model on a Vulkan compute-capable system. An Emulation Layer is provided to implement ML Extensions for Vulkan where it is not supported by the native Vulkan driver.
+### Download and Prepare the Scenario
+Clone the NSS model repository from Hugging Face
+```powershell
+git clone https://huggingface.co/Arm/TestNSS
+cd TestNSS
+```
+### Run the Scenario
+The NSS Hugging Face repository includes pre-built Windows® binaries for ML Emulation Layer for Vulkan and Scenario Runner. For other platforms,
+- build from source following the instructions for [Building the Emulation Layer from source](https://github.com/arm/ai-ml-emulation-layer-for-vulkan/blob/main/README.md#building-the-emulation-layer-from-source) and [Building the Scenario Runner from source](https://github.com/arm/ai-ml-sdk-scenario-runner/blob/main/README.md#building-scenario-runner-from-source)
+- adapt the instructions below accordingly
+1. Set the required environment variables:
+   On Windows:
+   ```powershell
+   $env:VK_LAYER_PATH="$PWD\bin\windows-x86_64"
+   $env:VK_INSTANCE_LAYERS="VK_LAYER_ML_Graph_Emulation;VK_LAYER_ML_Tensor_Emulation"
+   ```
+   On Linux (assuming the Emulation Layer binaries and JSON files and Scenario Runner executable are copied to `bin/linux-x86_64`):
+   ```powershell
+   export LD_LIBRARY_PATH=$PWD/bin/linux-x86_64:$LD_LIBRARY_PATH
+   export VK_LAYER_PATH=$PWD/bin/linux-x86_64
+   export VK_INSTANCE_LAYERS=VK_LAYER_ML_Graph_Emulation:VK_LAYER_ML_Tensor_Emulation
+   ```
+2. Execute the scenario:
+   On Windows:
+   ```powershell
+   bin\windows-x86_64\scenario-runner.exe --scenario scenario\scenario.json --output out
+   ```
+   On Linux:
+   ```powershell
+   bin/linux-x86_64/scenario-runner --scenario scenario/scenario.json --output out
+   ```
+- Output images are encoded as `B10G11R11_UFLOAT`. This format is common for framebuffers but not widely supported by image viewers. Use [RenderDoc](https://renderdoc.org/) to view these images.
+## Training and Evaluation
+For background on NSS architecture and training read our blog: [How Neural Super Sampling Works](https://community.arm.com/arm-community-blogs/b/mobile-graphics-and-gaming-blog/posts/how-arm-neural-super-sampling-works)
+Training and evaluation details, including model architecture code, training pipeline, and test configurations, are available at:
+- Model training code: https://github.com/arm/neural-graphics-model-gym
+- Examples and tutorials: https://github.com/arm/neural-graphics-model-gym-examples
+- Sample dataset: https://huggingface.co/datasets/Arm/neural-graphics-dataset
+### 🔎 Model Explorer VGF extension
+The [VGF extension to Model Explorer](https://github.com/arm/vgf-adapter-model-explorer) provides a simple interface to visualize model and analyse VGF composition.
+![Model Explorer screenshot](resources/model-explorer-screenshot.png)
+## License
+- The license for the model source code can be found [here](https://github.com/arm/neural-graphics-model-gym/blob/main/LICENSES/Apache-2.0.txt).
+- The license for the content of this repository can be found [here](https://huggingface.co/Arm/neural-super-sampling/blob/main/Arm_AI_Model_Community_License_v1_0_PRE-1154.pdf)
+## More Information
+🧑‍🔬 More technical details about the model can be found in the [NSS Guide](https://developer.arm.com/documentation/111009/latest/).
+👩🏽‍💻 Our [Neural Graphics Development Kit](https://developer.arm.com/mobile-graphics-and-gaming/neural-graphics) contains engine plugins, model training tools, code examples and extensive developer documentation.
+🙋🏻‍♀️ For questions or feedback please [start a discussion](https://huggingface.co/Arm/neural-super-sampling/discussions)
+## Trademark notice
+Arm® is a registered trademarks of Arm Limited (or its subsidiaries) in the US and/or elsewhere.
+Windows® is a trademark of the Microsoft group of companies.
+Vulkan® is a registered trademark of the [Khronos® Group](https://www.khronos.org/legal/trademarks).

bin/windows-x86_64/VkLayer_Graph.dll ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0a2fd54f62bef850685bcc4331714e0590a0a3aef28ea2dd6aa8c9e6f68f4da0
+size 7437312

bin/windows-x86_64/VkLayer_Graph.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+    "file_format_version": "1.0.0",
+    "layer": {
+        "name": "VK_LAYER_ML_Graph_Emulation",
+        "type": "INSTANCE",
+        "library_path": ".\\VkLayer_Graph.dll",
+        "api_version": "1.3.0",
+        "implementation_version": "1",
+        "description": "ML Graph Emulation Layer",
+        "functions": {
+            "vkGetInstanceProcAddr": "graphGetInstanceProcAddr",
+            "vkGetDeviceProcAddr": "graphGetDeviceProcAddr"
+        },
+        "device_extensions": [
+            {
+                "name": "VK_ARM_data_graph",
+                "spec_version": "1",
+                "entrypoints": [
+                    "vkGetPhysicalDeviceGraphInstructionSetsARM",
+                    "vkCreateGraphPipelinesARM",
+                    "vkCreateGraphPipelineSessionARM",
+                    "vkGetGraphPipelineSessionMemoryRequirementsARM",
+                    "vkBindGraphPipelineSessionMemoryARM",
+                    "vkDestroyGraphPipelineSessionARM",
+                    "vkCmdDispatchGraphARM"
+                ]
+            }]
+    }
+}

bin/windows-x86_64/VkLayer_Tensor.dll ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:15a7e7f9b4ff74f530a550669740f1c8b5295bdbe9a0474da3e3b9e906c4ce76
+size 5689344

bin/windows-x86_64/VkLayer_Tensor.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+    "file_format_version": "1.0.0",
+    "layer": {
+        "name": "VK_LAYER_ML_Tensor_Emulation",
+        "type": "INSTANCE",
+        "library_path": ".\\VkLayer_Tensor.dll",
+        "api_version": "1.3.0",
+        "implementation_version": "1",
+        "description": "ML Tensor Emulation Layer",
+        "functions": {
+            "vkGetInstanceProcAddr": "tensorGetInstanceProcAddr",
+            "vkGetDeviceProcAddr": "tensorGetDeviceProcAddr"
+        },
+        "device_extensions": [
+            {
+                "name": "VK_ARM_tensors",
+                "spec_version": "1",
+                "entrypoints": [
+                    "vkCreateTensorARM",
+                    "vkDestroyTensorARM",
+                    "vkCreateTensorViewARM",
+                    "vkDestroyTensorViewARM",
+                    "vkGetTensorMemoryRequirementsARM",
+                    "vkBindTensorMemoryARM",
+                    "vkGetDeviceTensorMemoryRequirementsARM",
+                    "vkCmdCopyTensorARM"
+                ]
+            }
+        ]
+    }
+}

bin/windows-x86_64/scenario-runner.exe ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5a99f1076dec1f504d64387969a3d18dc687c7f95c505dca812e4fcbca60f3d2
+size 5285376

nss_v0.1.0_fp32.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:391ad31f72783175afcb94180e0d8ffad7a34a6d848edfdea3409677236fc1da
+size 553364

nss_v0.1.0_int8.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:57ebcd81596ca7720015fc42b8c3d509c45d0c031244b024c7d1056b671dce9d
+size 665897

nss_v0.1.0_int8.vgf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f2bb554b54f186111150cbe3f80b258300d22e6e6e23610a6c519abe1962d8f9
+size 162900

nss_v0.1.0_int8_metadata.json ADDED Viewed

	@@ -0,0 +1,79 @@

+{
+    "dm_scale_on_no_motion": [
+        0.617464542388916
+    ],
+    "inputs": {
+        "x": {
+            "SINT": {
+                "scale": 0.003921568859368563,
+                "zero_point": -128
+            },
+            "SNORM": {
+                "scale": 0.49803924513980746,
+                "zero_point": -1.0078740157480315
+            }
+        }
+    },
+    "outputs": {
+        "activation_post_process_45": {
+            "SINT": {
+                "scale": 0.003937007859349251,
+                "zero_point": -127
+            },
+            "SNORM": {
+                "scale": 0.49999999813735485,
+                "zero_point": -1.0
+            }
+        },
+        "activation_post_process_50": {
+            "SINT": {
+                "scale": 0.003937007859349251,
+                "zero_point": -127
+            },
+            "SNORM": {
+                "scale": 0.49999999813735485,
+                "zero_point": -1.0
+            }
+        },
+        "activation_post_process_55": {
+            "SINT": {
+                "scale": 0.003937007859349251,
+                "zero_point": -127
+            },
+            "SNORM": {
+                "scale": 0.49999999813735485,
+                "zero_point": -1.0
+            }
+        },
+        "activation_post_process_60": {
+            "SINT": {
+                "scale": 0.003937007859349251,
+                "zero_point": -127
+            },
+            "SNORM": {
+                "scale": 0.49999999813735485,
+                "zero_point": -1.0
+            }
+        },
+        "activation_post_process_65": {
+            "SINT": {
+                "scale": 0.003937007859349251,
+                "zero_point": -127
+            },
+            "SNORM": {
+                "scale": 0.49999999813735485,
+                "zero_point": -1.0
+            }
+        },
+        "activation_post_process_70": {
+            "SINT": {
+                "scale": 0.003937007859349251,
+                "zero_point": -127
+            },
+            "SNORM": {
+                "scale": 0.49999999813735485,
+                "zero_point": -1.0
+            }
+        }
+    }
+}

resources/Enchanted_Castle_NSS_Demo.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:13cc07b5829e7335b94b548314dd189add180ae4b6fd4d1529db37de72a9c3d8
+size 96767057

resources/model-explorer-screenshot.png ADDED Viewed

Git LFS Details

SHA256: 38bd4531f68626059c16e656f4d6a2df017ef074c26b3bce71b68405539f63da
Pointer size: 131 Bytes
Size of remote file: 153 kB

scenario/0_pre_process.comp ADDED Viewed

	@@ -0,0 +1,572 @@

+//
+// -----------------------------------------------------------------------------
+// The proprietary software and information contained in this file is
+// confidential and may only be used by an authorized person under a valid
+// licensing agreement from Arm Limited or its affiliates.
+//
+// Copyright (C) 2025. Arm Limited or its affiliates. All rights reserved.
+//
+// This entire notice must be reproduced on all copies of this file and
+// copies of this file may only be made by an authorized person under a valid
+// licensing agreement from Arm Limited or its affiliates.
+// -----------------------------------------------------------------------------
+//
+#version 460
+#extension GL_EXT_shader_8bit_storage : require
+#extension GL_EXT_shader_16bit_storage : require
+#extension GL_EXT_shader_explicit_arithmetic_types : require
+#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
+#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
+#extension GL_EXT_shader_explicit_arithmetic_types_float32 : require
+#extension GL_GOOGLE_include_directive : enable
+#extension GL_ARM_tensors : require
+// includes
+#include "typedefs.h"
+#include "common.h"
+// types
+struct TensorElement
+{
+    int8_t4 wh_rgb_col_r;      // warped_history.rgb, jittered_colour.r
+    int8_t4 col_gb_dm_fback_r; // jittered_colour.gb, disocclusion mask, feedback.r
+    int8_t4 fback_gba_ld;      // feedback.gba, luma derivative
+};
+// inputs
+layout (set=0, binding=0) uniform mediump   sampler2D _ColourTex;               // 540p  | R11G11B10 32bpp
+layout (set=0, binding=1) uniform highp     sampler2D _DepthTex;                // 540p  | R32_FLOAT 32bpp
+layout (set=0, binding=2) uniform mediump   sampler2D _MotionVectorTex;         // 540p  | RG_16 32bpp
+layout (set=0, binding=3) uniform mediump   sampler2D _HistoryTex;              // 1080p | R11G11B10 32bpp
+layout (set=0, binding=4) uniform lowp      sampler2D _FeedbackTensor;          // 1080p | R8G8B8A8_SNORM 32bpp | Tensor->Texture Alias (Linear)
+layout (set=0, binding=5) uniform highp     sampler2D _DepthTm1Tex;             // 540p  | R32_FLOAT 32bpp
+layout (set=0, binding=6) uniform lowp      sampler2D _LumaDerivTm1Tex;         // 540p  | R8G8_UNORM 16bpp
+layout (set=0, binding=7) uniform lowp      sampler2D _NearestDepthCoordTm1Tex; // 540p  | R8_UNORM 8bpp
+// outputs
+layout (set=1, binding=0)       uniform writeonly tensorARM<int8_t, 4> _PreprocessTensor;         // 540p  | 12ch 96bpp
+layout (set=1, binding=1, rg8)  uniform writeonly lowp  image2D        _PreProcessLumaDerivOut;   // 540p  | R8G8 16bpp
+layout (set=1, binding=3, r8)   uniform writeonly lowp image2D         _NearestDepthCoordOut;     // 540p  | R8 8bpp
+// push-constants
+layout(push_constant, std430) uniform PushConstants {
+    // ─────────────── 16-byte aligned ───────────────
+    layout(offset =  0)  float4  _DeviceToViewDepth;   //  16 B
+    layout(offset = 16)  float4  _JitterOffset;        //  16 B (.xy = pixels, .zw = uvs)
+    layout(offset = 32)  float4  _JitterOffsetTm1;     //  16 B (.xy = pixels, .zw = uvs)
+    layout(offset = 48)  float4  _ScaleFactor;         //  16 B (.xy = scale, .zw = inv scale)
+    // ───────────────  8-byte aligned ───────────────
+    layout(offset = 64)  int32_t2 _OutputDims;         //   8 B
+    layout(offset = 72)  int32_t2 _InputDims;          //   8 B
+    layout(offset = 80)  float2   _InvOutputDims;      //   8 B
+    layout(offset = 88)  float2   _InvInputDims;       //   8 B
+    layout(offset = 96)  half4    _QuantParams;        //   8 B  (.xy SINT, .zw SNORM)
+    layout(offset = 104) half4    _MotionDisThreshPad; //   8 B  (.xyzw = motion/disocclusion thresholds)
+    // ───────────────  4-byte aligned ───────────────
+    layout(offset = 112) half2    _Exposure;           //   4 B  (.x = exposure, .y = 1/exp)
+    layout(offset = 116) half2    _HistoryPad;         //   4 B
+    // ─────────────── padding to 16-byte struct size ────
+    layout(offset = 120) int32_t2 _Padding;            //   8 B
+                                                       // Total: **128 bytes**
+};
+// Convenience mapping for accessing push constants
+#define _Scale              _ScaleFactor.xy
+#define _InvScale           _ScaleFactor.zw
+#define _Exposure           _Exposure.x
+#define _InvExposure        _Exposure.y
+#define _JitterOffsetPix    _JitterOffset.xy
+#define _JitterOffsetUv     _JitterOffset.zw
+#define _JitterOffsetTm1Pix _JitterOffsetTm1.xy
+#define _JitterOffsetTm1Uv  _JitterOffsetTm1.zw
+#define _MotionWarpThresh   _MotionDisThreshPad.x
+#define _MotionDisThresh    _MotionDisThreshPad.y
+#define _DisocclusionScale  _MotionDisThreshPad.z
+#define _NotHistoryReset    _HistoryPad.x
+// Quantization Parameters
+// inside: `./parameters.json`
+// these values are embdedded inside the TOSA file and learnt during QAT
+#ifndef _InputQuantParams
+    // inputs - x["SINT"]
+    #define _InputQuantParams _QuantParams.xy
+#endif
+#ifndef _FeedbackQuantParams
+    // outputs - activation_post_process_70["SNORM"]
+    #define _FeedbackQuantParams _QuantParams.zw
+#endif
+// constants
+#ifdef INVERTED_DEPTH
+    #define MAX_DEPTH 0.f
+#else
+    #define MAX_DEPTH 1.f
+#endif
+// methods
+bool IsOnScreen(int32_t2 pos, int32_t2 size)
+{
+    return all(lessThan(uint32_t2(pos), uint32_t2(size)));
+}
+half2 LoadMotion(int32_t2 pixel)
+{
+    return half2(texelFetch(_MotionVectorTex, pixel, 0).rg);
+}
+half3 LoadColour(int32_t2 pixel)
+{
+    return Tonemap(SafeColour(half3(texelFetch(_ColourTex, pixel, 0).rgb) * _Exposure));
+}
+int32_t2 LoadDepthNearestDepthOffsetTm1(int32_t2 pixel)
+{
+    int32_t2 is_oob = int32_t2(IsOnScreen(pixel, _InputDims));
+    pixel = clamp(pixel, int32_t2(0), _InputDims - int32_t2(1));
+    half encNorm = half(texelFetch(_NearestDepthCoordTm1Tex, pixel, 0).r);
+    int32_t code = int32_t(encNorm * 255.0 + 0.5);
+    // 3. map back to {-1,0,1}²
+    return DecodeNearestDepthCoord(code) * is_oob;
+}
+void GatherReconstructedPreviousDepthRQuad(float2 fUV, inout float4 depthQuad)
+{
+    int32_t2 offset = LoadDepthNearestDepthOffsetTm1(int32_t2(fUV * _InputDims));
+    float2 offset_uv = float2(offset) * _InvInputDims;
+    depthQuad = textureGather(_DepthTm1Tex, fUV + offset_uv, 0).wzxy;
+}
+half3 WarpHistory(float2 uv)
+{
+    return Tonemap(SafeColour(half3(textureLod(_HistoryTex, uv, 0).rgb) * _Exposure));
+}
+half4 WarpFeedback(float2 uv)
+{
+    return Dequantize(half4(textureLod(_FeedbackTensor, uv, 0)), _FeedbackQuantParams);
+}
+half2 WarpLumaDerivative(float2 uv)
+{
+    return half2(textureLod(_LumaDerivTm1Tex, uv, 0).rg);
+}
+half2 CalculateLumaDerivative(float2 reproj_uv, half3 jittered_colour, half disocclusion_mask)
+{
+    const half  DIS_THRESH  = 0.01HF;
+    const half  DERIV_MIN   = 0.05HF;
+    const half  DERIV_MAX   = 0.3HF;
+    const half  DERIV_POW   = 1.5HF;
+    const half  DERIV_ALPHA = 0.1HF;
+    const half  DERIV_MAX_R = rcp(DERIV_MAX);
+    const half  DERIV_MAX_POW_R = rcp(pow(DERIV_MAX, DERIV_POW));
+    //--------------------------------------------------------------------
+    // 1.  Fetch history (luma + derivative)
+    //--------------------------------------------------------------------
+    half2 h = WarpLumaDerivative(reproj_uv);
+    half  luma_tm1        = h.y;
+    half  derivative_tm1  = h.x;
+    //--------------------------------------------------------------------
+    // 2.  Current luma & raw derivative
+    //--------------------------------------------------------------------
+    half luma_t       = Luminance(jittered_colour);
+    half derivative_t = abs(luma_t - luma_tm1);
+    //--------------------------------------------------------------------
+    // 3.  Soft-clip & normalize
+    //--------------------------------------------------------------------
+    // Clip to `DERIV_MAX` which is ~typical max value,
+    // allows for better precision allocation when normalized
+    half clipped = min(derivative_t, DERIV_MAX);
+    // Discard values less than `DERIV_MIN` to reduce ghosting
+    clipped *= step(DERIV_MIN, derivative_t);
+    // Normalize with soft-clip
+    // x^1.5  =  x * sqrt(x) | NOTE: only works because `DERIV_POW=1.5`
+    half curved = clipped * sqrt(clipped) * DERIV_MAX_POW_R;
+    //--------------------------------------------------------------------
+    // 4.  Temporal accumulation
+    //--------------------------------------------------------------------
+    // Accumulate the new derivative into the history.
+    // We apply an adaptive alpha scaling, to ensure that if a derivative converges to a high value
+    // it becomes more difficult to reset that value, this provides temporally stable convergence
+    half alpha_scale = mix(DERIV_ALPHA,
+                           DERIV_ALPHA * 0.1HF,
+                           clamp(derivative_tm1, 0.HF, DERIV_MAX) * DERIV_MAX_R);
+    half derivative = mix(derivative_tm1, curved, alpha_scale);
+    //--------------------------------------------------------------------
+    // 5.  Remove disoccluded pixels
+    //--------------------------------------------------------------------
+    derivative *= step(disocclusion_mask, DIS_THRESH);
+    // .x -> derivative for current frame, .y -> luma of current frame
+    return half2(derivative, luma_t);
+}
+void FindNearestDepth(int32_t2 iPxPos, int32_t2 iPxSize, out float fNearestDepth, out int32_t2 fNearestDepthOffset)
+{
+    /*
+        Closely based on:
+        https://github.com/arm/accuracy-super-resolution-generic-library/blob/38697a58a6e7818ec9d28774bc073f537abb9178/
+        include/gpu/fsr2/ffxm_fsr2_reconstruct_dilated_velocity_and_previous_depth.h#L59
+    */
+    int32_t iSampleIndex = 0;
+    const int32_t iSampleCount = 9;
+    // x, y
+    const int32_t2 iSampleOffsets[iSampleCount] = {
+        int32_t2(+0, +0).yx,
+        int32_t2(+1, +0).yx,
+        int32_t2(+0, +1).yx,
+        int32_t2(+0, -1).yx,
+        int32_t2(-1, +0).yx,
+        int32_t2(-1, +1).yx,
+        int32_t2(+1, +1).yx,
+        int32_t2(-1, -1).yx,
+        int32_t2(+1, -1).yx,
+    };
+    // pull out the depth loads to allow SC to batch them
+    float depth[9];
+    depth[0] = float(texelFetchOffset(_DepthTex, iPxPos, 0, int32_t2(+0, +0).yx).r);
+    depth[1] = float(texelFetchOffset(_DepthTex, iPxPos, 0, int32_t2(+1, +0).yx).r);
+    depth[2] = float(texelFetchOffset(_DepthTex, iPxPos, 0, int32_t2(+0, +1).yx).r);
+    depth[3] = float(texelFetchOffset(_DepthTex, iPxPos, 0, int32_t2(+0, -1).yx).r);
+    depth[4] = float(texelFetchOffset(_DepthTex, iPxPos, 0, int32_t2(-1, +0).yx).r);
+    depth[5] = float(texelFetchOffset(_DepthTex, iPxPos, 0, int32_t2(-1, +1).yx).r);
+    depth[6] = float(texelFetchOffset(_DepthTex, iPxPos, 0, int32_t2(+1, +1).yx).r);
+    depth[7] = float(texelFetchOffset(_DepthTex, iPxPos, 0, int32_t2(-1, -1).yx).r);
+    depth[8] = float(texelFetchOffset(_DepthTex, iPxPos, 0, int32_t2(+1, -1).yx).r);
+    // find closest depth
+    fNearestDepth = depth[0];
+    fNearestDepthOffset = iSampleOffsets[0];
+    #pragma unroll
+    for (iSampleIndex = 1; iSampleIndex < iSampleCount; ++iSampleIndex) {
+        int32_t2 iPos = iPxPos + iSampleOffsets[iSampleIndex];
+        if (IsOnScreen(iPos, iPxSize)) {
+            float fNdDepth = depth[iSampleIndex];
+#ifdef INVERTED_DEPTH
+            if (fNdDepth > fNearestDepth) {
+#else
+            if (fNdDepth < fNearestDepth) {
+#endif
+                fNearestDepth = fNdDepth;
+                fNearestDepthOffset = iSampleOffsets[iSampleIndex];
+            }
+        }
+    }
+}
+int32_t2 RenderSize()
+{
+    return int32_t2(_InputDims);
+}
+float2 ComputeNdc(float2 fPxPos, int32_t2 iSize)
+{
+    /*
+        Closely based on:
+        https://github.com/arm/accuracy-super-resolution-generic-library/blob/
+        38697a58a6e7818ec9d28774bc073f537abb9178/include/gpu/fsr2/ffxm_fsr2_common.h#L457
+    */
+    return fPxPos.yx / float2(iSize.yx) * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f);
+}
+float GetViewSpaceDepth(float fDeviceDepth)
+{
+    /*
+        Closely based on:
+        https://github.com/arm/accuracy-super-resolution-generic-library/blob/
+        38697a58a6e7818ec9d28774bc073f537abb9178/include/gpu/fsr2/ffxm_fsr2_common.h#L462
+        `fDeviceToViewDepth` / `_DeviceToViewDepth` details found in:
+        https://github.com/arm/accuracy-super-resolution-generic-library/blob/
+        0501f490bd9946a2e1806b5363d7ab8a9a6a5e0a/src/components/fsr2/ffxm_fsr2.cpp#L829
+    */
+    const float4 fDeviceToViewDepth = _DeviceToViewDepth;
+    return (fDeviceToViewDepth[1] / (fDeviceDepth - fDeviceToViewDepth[0]));
+}
+float3 GetViewSpacePosition(int32_t2 iViewportPos, int32_t2 iViewportSize, float fDeviceDepth)
+{
+    /*
+        Closely based on:
+        https://github.com/arm/accuracy-super-resolution-generic-library/blob/
+        38697a58a6e7818ec9d28774bc073f537abb9178/include/gpu/fsr2/ffxm_fsr2_common.h#L475
+    */
+    const float4 fDeviceToViewDepth = _DeviceToViewDepth;
+    const float Z = GetViewSpaceDepth(fDeviceDepth);
+    const float2 fNdcPos = ComputeNdc(iViewportPos, iViewportSize);
+    const float X = fDeviceToViewDepth[2] * fNdcPos.x * Z;
+    const float Y = fDeviceToViewDepth[3] * fNdcPos.y * Z;
+    return float3(X, Y, Z);
+}
+struct BilinearSamplingData
+{
+    int32_t2 iOffsets[4];
+    float fWeights[4];
+    int32_t2 iBasePos;
+    float2 fQuadCenterUv;
+};
+BilinearSamplingData GetBilinearSamplingData(float2 fUv, int32_t2 iSize)
+{
+    /*
+        Closely based on:
+        https://github.com/arm/accuracy-super-resolution-generic-library/blob/
+        38697a58a6e7818ec9d28774bc073f537abb9178/include/gpu/fsr2/ffxm_fsr2_common.h#L548
+    */
+    BilinearSamplingData data;
+    float2 fPxSample = (fUv * iSize) - float2(0.5f, 0.5f);
+    data.iBasePos = int32_t2(floor(fPxSample));
+    data.fQuadCenterUv = (fPxSample + 0.5f) / float2(iSize);
+    float2 fPxFrac = fract(fPxSample);
+    data.iOffsets[0] = int32_t2(0, 0);
+    data.iOffsets[2] = int32_t2(1, 0);
+    data.iOffsets[1] = int32_t2(0, 1);
+    data.iOffsets[3] = int32_t2(1, 1);
+    data.fWeights[0] = (1.f - fPxFrac.x) * (1.f - fPxFrac.y);
+    data.fWeights[1] = (fPxFrac.x) * (1.f - fPxFrac.y);
+    data.fWeights[2] = (1.f - fPxFrac.x) * (fPxFrac.y);
+    data.fWeights[3] = (fPxFrac.x) * (fPxFrac.y);
+    return data;
+}
+float ComputeDepthClip(float2 fUvSample, float fCurrentDepthSample)
+{
+    /*
+        Closely based on:
+        https://github.com/arm/accuracy-super-resolution-generic-library/blob/
+        38697a58a6e7818ec9d28774bc073f537abb9178/include/gpu/fsr2/ffxm_fsr2_depth_clip.h#L36
+    */
+    const float fReconstructedDepthBilinearWeightThreshold = 0.1f;
+    float fCurrentDepthViewSpace = GetViewSpaceDepth(fCurrentDepthSample);
+    BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fUvSample, RenderSize());
+    float fDepth = 0.0f;
+    float fWeightSum = 0.0f;
+    float4 fPrevDepthSamples;
+    GatherReconstructedPreviousDepthRQuad(bilinearInfo.fQuadCenterUv, fPrevDepthSamples);
+    for (int32_t iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++)
+    {
+        const int32_t2 iOffset = bilinearInfo.iOffsets[iSampleIndex];
+        const int32_t2 iSamplePos = bilinearInfo.iBasePos + iOffset;
+        const float fWeight = bilinearInfo.fWeights[iSampleIndex];
+        const bool onscreen = IsOnScreen(iSamplePos, RenderSize());
+        fWeightSum += onscreen ? 0.f : fWeight;
+        if (onscreen)
+        {
+            if (fWeight > fReconstructedDepthBilinearWeightThreshold)
+            {
+                const float fPrevDepthSample = fPrevDepthSamples[iSampleIndex];
+                const float fPrevNearestDepthViewSpace = GetViewSpaceDepth(fPrevDepthSample);
+                const float fDepthDiff = fCurrentDepthViewSpace - fPrevNearestDepthViewSpace;
+                if (fDepthDiff > 0.0f) {
+#ifdef INVERTED_DEPTH
+                    const float fPlaneDepth = min(fPrevDepthSample, fCurrentDepthSample);
+#else
+                    const float fPlaneDepth = max(fPrevDepthSample, fCurrentDepthSample);
+#endif
+                    const float3 fCenter = GetViewSpacePosition(int32_t2(RenderSize() * 0.5f), RenderSize(), fPlaneDepth);
+                    const float3 fCorner = GetViewSpacePosition(int32_t2(0, 0), RenderSize(), fPlaneDepth);
+                    const float fHalfViewportWidth = length(float2(RenderSize()));
+                    const float fDepthThreshold = max(fCurrentDepthViewSpace, fPrevNearestDepthViewSpace);
+                    const float Ksep = 1.37e-05f;
+                    const float Kfov = length(fCorner) / length(fCenter);
+                    const float fRequiredDepthSeparation = Ksep * Kfov * fHalfViewportWidth * fDepthThreshold;
+                    const float fResolutionFactor = saturate(length(float2(RenderSize())) / length(float2(1920.0f, 1080.0f)));
+                    const float fPower = lerp(1.0f, 3.0f, fResolutionFactor);
+                    fDepth += pow(saturate(float(fRequiredDepthSeparation / fDepthDiff)), fPower) * fWeight;
+                    fWeightSum += fWeight;
+                }
+            }
+        }
+    }
+    return (fWeightSum > 0) ? saturate(1.0f - fDepth / fWeightSum) : 0.0f;
+}
+void WriteLumaDerivative(int32_t2 pixel, half2 derivative)
+{
+    imageStore(_PreProcessLumaDerivOut, pixel, half4(derivative, half2(0.f, 1.f)));
+}
+void WriteNearestDepthOffset(int32_t2 pixel, uint8_t offset)
+{
+    half enc_norm = half(offset) / 255.HF;
+    imageStore(_NearestDepthCoordOut, pixel, half4(enc_norm, 0.HF, 0.HF, 1.HF));
+}
+void WriteToTensor(int32_t2 outputPixel, half3 input_colour, half3 history, half disocclusion_mask, half luma_derivative, half4 temporal_feedback)
+{
+    TensorElement te;
+    te.wh_rgb_col_r = Quantize(half4(history.rgb, input_colour.r), _InputQuantParams);
+    te.col_gb_dm_fback_r = Quantize(half4(input_colour.gb, disocclusion_mask, temporal_feedback.r), _InputQuantParams);
+    te.fback_gba_ld = Quantize(half4(temporal_feedback.gba, luma_derivative), _InputQuantParams);
+    int8_t t0[12] =
+    {
+        te.wh_rgb_col_r.x,
+        te.wh_rgb_col_r.y,
+        te.wh_rgb_col_r.z,
+        te.wh_rgb_col_r.w,
+        te.col_gb_dm_fback_r.x,
+        te.col_gb_dm_fback_r.y,
+        te.col_gb_dm_fback_r.z,
+        te.col_gb_dm_fback_r.w,
+        te.fback_gba_ld.x,
+        te.fback_gba_ld.y,
+        te.fback_gba_ld.z,
+        te.fback_gba_ld.w
+    };
+    tensorWriteARM(_PreprocessTensor, uint[](0, outputPixel.y, outputPixel.x, 0), t0);
+}
+// entry-point
+layout(local_size_x = 16, local_size_y = 16) in;
+void main()
+{
+    int32_t2 input_pixel = int32_t2(gl_GlobalInvocationID.xy);
+    if (any(greaterThanEqual(input_pixel, _InputDims))) return;
+    float2 uv = (float2(input_pixel) + 0.5f) * _InvInputDims;
+    //-------------------------------------------------------------------------
+    // 1) Dilate depth, find nearest pixel coordinate
+    //-------------------------------------------------------------------------
+    float depth_dilated = float(0.f);
+    int32_t2 nearest_pixel_offset = int32_t2(0);
+    FindNearestDepth(input_pixel, RenderSize(), depth_dilated, nearest_pixel_offset);
+    //-------------------------------------------------------------------------
+    // 2) Load motion vectors
+    //-------------------------------------------------------------------------
+    half2 motion = LoadMotion(input_pixel + nearest_pixel_offset);
+    // Suppress very small motion - no value in resampling here
+    half2  motion_pix = motion * half2(RenderSize());
+    motion *= half(dot(motion_pix, motion_pix) > _MotionWarpThresh);
+    // Calculate sample position(s) for everything in `tm1` frame
+    float2 reproj_uv = uv - float2(motion);
+    float2 unjitter_tm1_uv = reproj_uv - _JitterOffsetTm1Uv;
+    //-------------------------------------------------------------------------
+    // 3) Calculate depth-based disocclusion mask
+    //-------------------------------------------------------------------------
+    half disocclusion_mask = half(ComputeDepthClip(unjitter_tm1_uv, depth_dilated));
+    // Scale disocclusion mask on static frames to let network know this is happening under
+    // static conditions, reduces jitter differences across frames causing false flags
+    half dm_scale =  dot(motion_pix, motion_pix) > _MotionDisThresh ? half(1.0f) : _DisocclusionScale;
+    disocclusion_mask = disocclusion_mask * dm_scale;
+    //-------------------------------------------------------------------------
+    // 4) Downsample + warp history buffer
+    //-------------------------------------------------------------------------
+    half3 warped_history = WarpHistory(reproj_uv);
+    //-------------------------------------------------------------------------
+    // 5) Read current low-res / jittered / aliased colour
+    //-------------------------------------------------------------------------
+    half3 jittered_colour = LoadColour(input_pixel);
+    //-------------------------------------------------------------------------
+    // 6) Calculate derivative of `luma`
+    //    helps identifying high-frequency flicker due to jitter
+    //-------------------------------------------------------------------------
+    half2 luma_derivative = CalculateLumaDerivative(reproj_uv, jittered_colour, disocclusion_mask);
+    //-------------------------------------------------------------------------
+    // 7) Warp temporal feedback
+    //-------------------------------------------------------------------------
+    half4 temporal_feedback = WarpFeedback(reproj_uv);
+    //-------------------------------------------------------------------------
+    // 8) Convert dilated depth coord to a position offset
+    //-------------------------------------------------------------------------
+    uint8_t enc_depth_offset = EncodeNearestDepthCoord(nearest_pixel_offset);
+    //-------------------------------------------------------------------------
+    // 9) Write Outputs
+    //-------------------------------------------------------------------------
+    // Consumed by NE
+    WriteToTensor(
+        input_pixel,
+        jittered_colour,     // 3ch
+        warped_history,      // 3ch
+        disocclusion_mask,   // 1ch
+        luma_derivative.x,   // 1ch
+        temporal_feedback    // 4ch
+    );                       // total: 12ch
+    // Consumed by post process and frame t+1
+    WriteNearestDepthOffset(input_pixel, enc_depth_offset);
+    // Consumed at frame t+1
+    WriteLumaDerivative(input_pixel, luma_derivative);
+}

scenario/0_pre_process.spv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b03bcb283b73870daa0a540cfb8f1e8ec9c4842b38a711f52d31517569e79b87
+size 29476

scenario/0_pre_process_push_consts.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6319b912dd9ee3e1ce44794067ea57fd9eb01ff0e38b3f8a55ceea7be18e6412
+size 256

scenario/1_nss.vgf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fd2a1bd13f156fcfa7a0cf132220ca39c2c2498f4af2c7c7da10a42ef4e555a7
+size 163860

scenario/2_post_process.comp ADDED Viewed

	@@ -0,0 +1,361 @@

+//
+// -----------------------------------------------------------------------------
+// The proprietary software and information contained in this file is
+// confidential and may only be used by an authorized person under a valid
+// licensing agreement from Arm Limited or its affiliates.
+//
+// Copyright (C) 2025. Arm Limited or its affiliates. All rights reserved.
+//
+// This entire notice must be reproduced on all copies of this file and
+// copies of this file may only be made by an authorized person under a valid
+// licensing agreement from Arm Limited or its affiliates.
+// -----------------------------------------------------------------------------
+//
+#version 460
+#extension GL_EXT_shader_8bit_storage : require
+#extension GL_EXT_shader_16bit_storage : require
+#extension GL_EXT_shader_explicit_arithmetic_types : require
+#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
+#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
+#extension GL_EXT_shader_explicit_arithmetic_types_float32 : require
+#extension GL_GOOGLE_include_directive : enable
+// defines
+#define SCALE_1_0X 0
+#define SCALE_1_3X 1
+#define SCALE_1_5X 2
+#define SCALE_2_0X 3
+// settings
+#define HISTORY_CATMULL
+#define SCALE_MODE SCALE_2_0X
+// includes
+#include "typedefs.h"
+#include "common.h"
+#include "kernel_lut.h"
+// inputs
+layout (set=0, binding=0) uniform mediump   sampler2D _ColourTex;               // 540p  | R11G11B10 32bpp
+layout (set=0, binding=1) uniform mediump   sampler2D _MotionVectorTex;         // 540p  | RG16_FLOAT 32bpp
+layout (set=0, binding=2) uniform mediump   sampler2D _HistoryTex;              // 1080p | R11G11B10 32bpp
+layout (set=0, binding=3) uniform lowp      sampler2D _K0Tensor;                // 540p  | R8G8B8A8_SNORM 32bpp | Tensor->Texture Alias (Linear)
+layout (set=0, binding=4) uniform lowp      sampler2D _K1Tensor;                // 540p  | R8G8B8A8_SNORM 32bpp | Tensor->Texture Alias (Linear)
+layout (set=0, binding=5) uniform lowp      sampler2D _K2Tensor;                // 540p  | R8G8B8A8_SNORM 32bpp | Tensor->Texture Alias (Linear)
+layout (set=0, binding=6) uniform lowp      sampler2D _K3Tensor;                // 540p  | R8G8B8A8_SNORM 32bpp | Tensor->Texture Alias (Linear)
+layout (set=0, binding=7) uniform lowp      sampler2D _TemporalTensor;          // 540p  | R8G8B8A8_SNORM 32bpp | Tensor->Texture Alias (Linear)
+layout (set=0, binding=8) uniform lowp      sampler2D _NearestDepthCoordTex;    // 540p  | R8_UNORM 8bpp
+// outputs
+layout (set=1, binding=0, r11f_g11f_b10f) uniform writeonly mediump image2D _UpsampledColourOut; // 1080p | R11G11B10 32bpp
+// push-constants
+layout(push_constant, std430) uniform PushConstants {
+    // ─────────────── 8-byte aligned ───────────────
+    layout(offset =  0) int32_t2 _OutputDims;        //  8 B
+    layout(offset =  8) int32_t2 _InputDims;         //  8 B
+    layout(offset = 16) float2   _InvOutputDims;     //  8 B
+    layout(offset = 24) float2   _InvInputDims;      //  8 B
+    layout(offset = 32) float2   _Scale;             //  8 B
+    layout(offset = 40) float2   _InvScale;          //  8 B
+    // ─────────────── 4-byte aligned ───────────────
+    layout(offset = 48) int16_t2 _IndexModulo;       //  4 B
+    layout(offset = 52) half2    _QuantParams;       //  4 B
+    layout(offset = 56) int16_t2 _LutOffset;         //  4 B
+    layout(offset = 60) half2    _ExposurePair;      //  4 B
+    layout(offset = 64) half2    _HistoryPad;        //  4 B
+    layout(offset = 68) half2    _MotionThreshPad;   //  4 B (.x = motion, .y = unused)
+    layout(offset = 72) int32_t  _Padding0;          //  4 B (explicit pad for alignment)
+                                                     // Total: **76 bytes**
+};
+// Convenience mapping for accessing push constants
+#define _Exposure        _ExposurePair.x
+#define _InvExposure     _ExposurePair.y
+#define _NotHistoryReset _HistoryPad.x
+#define _MotionThresh    _MotionThreshPad.x
+// Quantization Parameters
+// inside: `./parameters.json`
+// these values are embdedded inside the TOSA file and learnt during QAT
+#ifndef _K0QuantParams
+    // outputs - activation_post_process_45["SNORM"]
+    #define _K0QuantParams _QuantParams.xy
+#endif
+#ifndef _K1QuantParams
+    // outputs - activation_post_process_50["SNORM"]
+    #define _K1QuantParams _QuantParams.xy
+#endif
+#ifndef _K2QuantParams
+    // outputs - activation_post_process_55["SNORM"]
+    #define _K2QuantParams _QuantParams.xy
+#endif
+#ifndef _K3QuantParams
+    // outputs - activation_post_process_60["SNORM"]
+    #define _K3QuantParams _QuantParams.xy
+#endif
+#ifndef _TemporalQuantParams
+    // outputs - activation_post_process_65["SNORM"]
+    #define _TemporalQuantParams _QuantParams.xy
+#endif
+// methods
+half2 LoadMotion(int32_t2 pixel)
+{
+    return half2(texelFetch(_MotionVectorTex, pixel, 0).rg);
+}
+half3 LoadHistory(float2 uv)
+{
+    return half3(textureLod(_HistoryTex, uv, 0).rgb);
+}
+half3 LoadHistoryCatmull(float2 uv)
+{
+    //------------------------------------------------------------------------------------
+    // 1) Compute Catmull–Rom weights
+    //------------------------------------------------------------------------------------
+    float2 scaledUV = uv * _OutputDims;
+    float2 baseFloor = floor(scaledUV - 0.5) + 0.5;
+    half2 f  = half2(scaledUV - baseFloor);
+    half2 f2 = f * f;
+    half2 f3 = f2 * f;
+    // Catmull–Rom basis
+    half2 w0 = f2 - 0.5HF * (f3 + f);
+    half2 w1 = 1.5HF * f3 - 2.5HF * f2 + 1.0HF;
+    half2 w3 = 0.5HF * (f3 - f2);
+    half2 w2 = (1.0HF - w0) - w1 - w3; // = 1 - (w0 + w1 + w3)
+    // Combine w1 and w2 for center axis
+    half2 w12 = w1 + w2;
+    half wx0  = w0.x, wy0  = w0.y;
+    half wx1  = w12.x, wy1 = w12.y;
+    half wx2  = w3.x, wy2  = w3.y;
+    // Final weights for the cross sample layout
+    half wUp     = wx1 * wy0;   // center in X, up in Y
+    half wDown   = wx1 * wy2;   // center in X, down in Y
+    half wLeft   = wx0 * wy1;   // left   in X, center in Y
+    half wRight  = wx2 * wy1;   // right  in X, center in Y
+    half wCenter = wx1 * wy1;   // center in X, center in Y
+    // Fractional offsets for the center
+    half dx = w2.x / wx1;
+    half dy = w2.y / wy1;
+    //------------------------------------------------------------------------------------
+    // 2) Gather the 5 taps
+    //------------------------------------------------------------------------------------
+    half4 left   = half4(LoadHistory((baseFloor + float2(-1.0, dy))  * _InvOutputDims ), 1.HF);
+    half4 up     = half4(LoadHistory((baseFloor + float2(dx,  -1.0)) * _InvOutputDims ), 1.HF);
+    half4 center = half4(LoadHistory((baseFloor + float2(dx,  dy))   * _InvOutputDims ), 1.HF);
+    half4 right  = half4(LoadHistory((baseFloor + float2(2.0, dy))   * _InvOutputDims ), 1.HF);
+    half4 down   = half4(LoadHistory((baseFloor + float2(dx,  2.0))  * _InvOutputDims ), 1.HF);
+    //------------------------------------------------------------------------------------
+    // 3) Accumulate and track min/max
+    //------------------------------------------------------------------------------------
+    half4 accum = up    * wUp     +
+                  left  * wLeft   +
+                  center* wCenter +
+                  right * wRight  +
+                  down  * wDown;
+    half3 cmin3 = min(up.rgb,
+                  min(left.rgb,
+                  min(center.rgb,
+                  min(right.rgb, down.rgb))));
+    half3 cmax3 = max(up.rgb,
+                  max(left.rgb,
+                  max(center.rgb,
+                  max(right.rgb, down.rgb))));
+    //------------------------------------------------------------------------------------
+    // 4) Final color
+    //------------------------------------------------------------------------------------
+    half3 color = accum.rgb * rcp(accum.w);
+    // dering in the case where we have negative values, we don't do this all the time
+    // as it can impose unnecessary blurring on the output
+    return any(lessThan(color, half3(0.HF)))
+         ? clamp(color, cmin3, cmax3)
+         : color;
+}
+int32_t2 LoadNearestDepthOffset(int32_t2 pixel)
+{
+    half encNorm = half(texelFetch(_NearestDepthCoordTex, pixel, 0).r);
+    int32_t code = int32_t(encNorm * 255.0 + 0.5);
+    // 3. map back to {-1,0,1}²
+    return DecodeNearestDepthCoord(code);
+}
+half3 LoadWarpedHistory(float2 uv, int32_t2 input_pixel, out half onscreen)
+{
+    // Dilate motion vectors with previously calculated nearest depth coordinate
+    int32_t2 nearest_offset = LoadNearestDepthOffset(input_pixel);
+    half2 motion = LoadMotion(input_pixel + nearest_offset);
+    // Suppress very small motion - no need to resample
+    half2  motion_pix = motion * half2(_OutputDims);
+    motion *= half(dot(motion_pix, motion_pix) > _MotionThresh);
+    // UV coordinates in previous frame to resample history
+    float2 reproj_uv = uv - float2(motion);
+    // Mask to flag whether the motion vector is resampling from valid location onscreen
+    onscreen = half(
+        all(greaterThanEqual(reproj_uv, float2(0.0))) &&
+        all(lessThan(reproj_uv, float2(1.0)))
+    );
+#ifdef HISTORY_CATMULL
+    half3 warped_history = LoadHistoryCatmull(reproj_uv);
+#else
+    half3 warped_history = LoadHistory(reproj_uv);
+#endif
+    return SafeColour(warped_history * _Exposure);
+}
+#if SCALE_MODE == SCALE_2_0X
+/*
+    Optimised special case pattern for applying 4x4 kernel to
+    sparse jitter-aware 2x2 upsampled image
+*/
+half4 LoadKPNWeight(float2 uv, int16_t lut_idx)
+{
+    // Load 4 kernel slices (each with 4 taps)
+    half4 k0 = Dequantize(half4(textureLod(_K0Tensor, uv, 0)), _K0QuantParams);
+    half4 k1 = Dequantize(half4(textureLod(_K1Tensor, uv, 0)), _K1QuantParams);
+    half4 k2 = Dequantize(half4(textureLod(_K2Tensor, uv, 0)), _K2QuantParams);
+    half4 k3 = Dequantize(half4(textureLod(_K3Tensor, uv, 0)), _K3QuantParams);
+    // Precomputed swizzle patterns for KernelTile
+    half4 p0 = half4(k0.x, k2.x, k0.z, k2.z);
+    half4 p1 = half4(k1.x, k3.x, k1.z, k3.z);
+    half4 p2 = half4(k0.y, k2.y, k0.w, k2.w);
+    half4 p3 = half4(k1.y, k3.y, k1.w, k3.w);
+    // Return the correct pattern for this tile
+    return (lut_idx == 0) ? p0 :
+           (lut_idx == 1) ? p1 :
+           (lut_idx == 2) ? p2 :
+                            p3;
+}
+half3 LoadAndFilterColour(int32_t2 output_pixel, float2 uv, out half4 col_to_accum)
+{
+    //-------------------------------------------------------------------
+    // 1. Compute indexes, load correct pattern from LUT for given thread
+    //-------------------------------------------------------------------
+    float2 out_tex = float2(output_pixel) + 0.5f;
+    // Compute the LUT index for this pixel
+    int16_t2 tiled_idx = (int16_t2(output_pixel) + _LutOffset) % int16_t2(_IndexModulo);
+    int16_t lut_idx = tiled_idx.y * int16_t(_IndexModulo) + tiled_idx.x;
+    KernelTile lut = kernelLUT[lut_idx];
+    //------------------------------------------------------------------
+    // 2. Apply KPN
+    //------------------------------------------------------------------
+    // Dequantize the kernel weights
+    half4 kpn_weights = clamp(LoadKPNWeight(uv, lut_idx), half4(EPS), half4(1.HF));
+    // Calculate tap locations
+    int16_t4 tap_x = clamp(int16_t4(floor((float4(out_tex.x) + float4(lut.dx)) * _InvScale.x)), int16_t4(0), int16_t4(_InputDims.x - 1));
+    int16_t4 tap_y = clamp(int16_t4(floor((float4(out_tex.y) + float4(lut.dy)) * _InvScale.y)), int16_t4(0), int16_t4(_InputDims.y - 1));
+    // Gather taps
+    f16mat4x4 interm;
+    interm[0] = half4(SafeColour(half3(texelFetch(_ColourTex, int16_t2(tap_x[0], tap_y[0]), 0).rgb) * half3(_Exposure)), 1.HF);
+    interm[1] = half4(SafeColour(half3(texelFetch(_ColourTex, int16_t2(tap_x[1], tap_y[1]), 0).rgb) * half3(_Exposure)), 1.HF);
+    interm[2] = half4(SafeColour(half3(texelFetch(_ColourTex, int16_t2(tap_x[2], tap_y[2]), 0).rgb) * half3(_Exposure)), 1.HF);
+    interm[3] = half4(SafeColour(half3(texelFetch(_ColourTex, int16_t2(tap_x[3], tap_y[3]), 0).rgb) * half3(_Exposure)), 1.HF);
+    // Special case: grab the accumulation pixel, when it corresponds to current thread
+    half match = half(lut.dx[CENTER_TAP] == 0 && lut.dy[CENTER_TAP] == 0);
+    col_to_accum = interm[CENTER_TAP] * match;
+    // Apply filter
+    half4 out_colour = interm * kpn_weights;
+    return half3(out_colour.rgb * rcp(out_colour.w));
+}
+#else
+    #error "Unsupported SCALE_MODE"
+#endif // SCALE_MODE == SCALE_2_0X
+void LoadTemporalParameters(float2 uv, out half theta, out half alpha)
+{
+    half2 tp = Dequantize(half2(textureLod(_TemporalTensor, uv, 0).xy), _TemporalQuantParams);
+    theta = tp.x * _NotHistoryReset; // {0 <= x <= 1}
+    alpha = tp.y * 0.35HF + 0.05HF; // { 0.05 <= x <= 0.4}
+}
+void WriteUpsampledColour(int32_t2 pixel, half3 colour)
+{
+    half3 to_write = SafeColour(colour);
+    // Write with alpha = 1.0
+    imageStore(_UpsampledColourOut, pixel, half4(to_write, 1.0));
+}
+// entry-point
+layout(local_size_x = 16, local_size_y = 16) in;
+void main()
+{
+    int32_t2 output_pixel = int32_t2(gl_GlobalInvocationID.xy);
+    if (any(greaterThanEqual(output_pixel, _OutputDims))) return;
+    float2 uv = (float2(output_pixel) + 0.5) * _InvOutputDims;
+    int32_t2 input_pixel = int32_t2(uv * _InputDims);
+    //-------------------------------------------------------------------------
+    // 1) Warp history
+    //-------------------------------------------------------------------------
+    half  onscreen;
+    half3 history = LoadWarpedHistory(uv, input_pixel, onscreen);
+    //-------------------------------------------------------------------------
+    // 2) KPN filter → col
+    //-------------------------------------------------------------------------
+    half4 col_to_accum;
+    half3 colour = LoadAndFilterColour(output_pixel, uv, col_to_accum);
+    // -------------------------------------------------------------------------
+    // 3) Load temporal parameters
+    //-------------------------------------------------------------------------
+    half theta, alpha;
+    LoadTemporalParameters(uv, theta, alpha);
+    //-------------------------------------------------------------------------
+    // 3) Rectify history, force reset when offscreen
+    //-------------------------------------------------------------------------
+    half3 rectified = lerp(colour, history, theta * onscreen);
+    //-------------------------------------------------------------------------
+    // 3) Accumulate new sample
+    //-------------------------------------------------------------------------
+    half3 accumulated = lerp(Tonemap(rectified), Tonemap(col_to_accum.rgb), alpha * col_to_accum.a);
+    //-------------------------------------------------------------------------
+    // 4) Inverse tonemap + exposure and write output
+    //-------------------------------------------------------------------------
+    half3 out_linear = InverseTonemap(accumulated) * _InvExposure;
+    WriteUpsampledColour(output_pixel, out_linear);
+}

scenario/2_post_process.spv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d15c811db716f90606bb42710f5093bfd3dcfedb674ab7223b27909d8c3467a5
+size 25780

scenario/2_post_process_push_consts.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4fe5029783bc6bb2adaa1f8bafc9ab8fe73340bb0a3055d18f80ca3e6a99862a
+size 204

scenario/common.h ADDED Viewed

	@@ -0,0 +1,160 @@

+//
+// -----------------------------------------------------------------------------
+// The proprietary software and information contained in this file is
+// confidential and may only be used by an authorized person under a valid
+// licensing agreement from Arm Limited or its affiliates.
+//
+// Copyright (C) 2025. Arm Limited or its affiliates. All rights reserved.
+//
+// This entire notice must be reproduced on all copies of this file and
+// copies of this file may only be made by an authorized person under a valid
+// licensing agreement from Arm Limited or its affiliates.
+// -----------------------------------------------------------------------------
+//
+#ifndef NSS_COMMON
+#define NSS_COMMON
+#include "typedefs.h"
+#define MAX_FP16 65504.HF
+#define EPS 1e-7HF
+// Activation Functions
+// ──────────────────────────────────────────────────────────────────────────────────────────
+half Sigmoid(half x)
+{
+    return rcp(half(1.0) + exp(-x));
+}
+half2 Sigmoid(half2 x)
+{
+    return rcp(half2(1.0) + exp(-x));
+}
+half3 Sigmoid(half3 x)
+{
+    return rcp(half3(1.0) + exp(-x));
+}
+half4 Sigmoid(half4 x)
+{
+    return rcp(half4(1.0) + exp(-x));
+}
+// Quantize/Dequantize
+// ──────────────────────────────────────────────────────────────────────────────────────────
+// all expect .x = scale, .y = zero point, quantize methods expect to receive: .x = rcp(scale)
+half Dequantize(half i, half2 quant_params)
+{
+    return (i - quant_params.y) * quant_params.x;
+}
+half2 Dequantize(half2 i, half2 quant_params)
+{
+    return (i - quant_params.y) * quant_params.x;
+}
+half3 Dequantize(half3 i, half2 quant_params)
+{
+    return (i - quant_params.y) * quant_params.x;
+}
+half4 Dequantize(half4 i, half2 quant_params)
+{
+    return (i - quant_params.y) * quant_params.x;
+}
+int8_t Quantize(half f, half2 quant_params)
+{
+    return int8_t(clamp(round(f * quant_params.x + quant_params.y), -128.HF, 127.HF));
+}
+int8_t2 Quantize(half2 f, half2 quant_params)
+{
+    return int8_t2(clamp(round(f * quant_params.x + quant_params.y), -128.HF, 127.HF));
+}
+int8_t3 Quantize(half3 f, half2 quant_params)
+{
+    return int8_t3(clamp(round(f * quant_params.x + quant_params.y), -128.HF, 127.HF));
+}
+int8_t4 Quantize(half4 f, half2 quant_params)
+{
+    return int8_t4(clamp(round(f * quant_params.x + quant_params.y), -128.HF, 127.HF));
+}
+// Encode/Decode
+// ──────────────────────────────────────────────────────────────────────────────────────────
+// Note: both encode/decode methods are currently bound to 3x3 windows, they should be
+//       expandable in future if needed. The most likely to need this would be the jitter
+//       encoding, where 3x3 may not be enough for larger than 3x3 scale factors.
+uint8_t EncodeNearestDepthCoord(int32_t2 o)
+{
+    // o ∈ {-1, 0, 1}²
+    o  = clamp(o, ivec2(-1), ivec2( 1));
+    return uint8_t((o.y + 1) << 2 | (o.x + 1));  // 0-15
+}
+int32_t2 DecodeNearestDepthCoord(int32_t code)
+{
+    int32_t x = int32_t( code       & 0x3) - 1;  // bits 0-1
+    int32_t y = int32_t((code >> 2) & 0x3) - 1;  // bits 2-3
+    return int32_t2(x, y);
+}
+// Image Operations
+// ──────────────────────────────────────────────────────────────────────────────────────────
+half Luminance(half3 rgb)
+{
+    // ITU-R BT.709: `0.2126 * R + 0.7152 * G + 0.0722 * B`
+    return dot(rgb, half3(0.2126, 0.7152, 0.0722));
+}
+half3 Tonemap(half3 x)
+{
+    // Karis tonemapper
+    // http://graphicrants.blogspot.com/2013/12/tone-mapping.html
+    x = max(x, half3(0.HF));
+    return x * rcp(half3(1.HF) + max(max(x.r, x.g), x.b));
+}
+half3 InverseTonemap(half3 x)
+{
+    // Karis tonemapper inverse
+    // http://graphicrants.blogspot.com/2013/12/tone-mapping.html
+    x = clamp(x, half3(0.HF), Tonemap(half3(MAX_FP16)));
+    return x * rcp(half3(1.HF) - max(max(x.r, x.g), x.b));
+}
+half3 SafeColour(half3 x)
+{
+    return clamp(x, half3(0.HF), half3(MAX_FP16));
+}
+#endif // NSS_COMMON

scenario/in_colour.dds ADDED Viewed

Git LFS Details

SHA256: 06ee236dd66b3a6843af2a0617a8186bc78dc0310cff4c3a21b44803ce742ecb
Pointer size: 132 Bytes
Size of remote file: 2.09 MB

scenario/in_depth.dds ADDED Viewed

Git LFS Details

SHA256: d68696b2e29f63999a65f9125b9788a7a68a89adfe105efe5b817dc71ab6137a
Pointer size: 132 Bytes
Size of remote file: 2.09 MB

scenario/in_depth_tm1.dds ADDED Viewed

Git LFS Details

SHA256: 49bc41de4eaee7b2fe0f486419bb5bcd65cca46b76c86003f9bcaf3f3e3fe6e4
Pointer size: 132 Bytes
Size of remote file: 2.09 MB

scenario/in_derivative_tm1.dds ADDED Viewed

Git LFS Details

SHA256: 993558f23469464d8cd517d695c6e85693c2580466d1c1d1779dddaecbc450e4
Pointer size: 132 Bytes
Size of remote file: 1.04 MB

scenario/in_feedback_tm1.dds ADDED Viewed

Git LFS Details

SHA256: a37afdcdbb350ebbe0b5c3a1fa610c84aafed2d31195e77e8cc77598625239d3
Pointer size: 132 Bytes
Size of remote file: 2.09 MB

scenario/in_history.dds ADDED Viewed

Git LFS Details

SHA256: cf6e0a5805a7abeb497eced1c22adff5ecb5e249ac66f6419fcf9e0b64418197
Pointer size: 132 Bytes
Size of remote file: 8.36 MB

scenario/in_motion.dds ADDED Viewed

Git LFS Details

SHA256: 7653e886a7a12b7c9ced950cba304144433bf755fa5010bb53a010ee56ae78ab
Pointer size: 132 Bytes
Size of remote file: 2.09 MB

scenario/in_nearest_offset_tm1.dds ADDED Viewed

Git LFS Details

SHA256: f94dccb9315cee17e25e98d822eb3234e44308c7838e8a6aafe57e38194e57e7
Pointer size: 131 Bytes
Size of remote file: 522 kB

scenario/kernel_lut.h ADDED Viewed

	@@ -0,0 +1,83 @@

+//
+// -----------------------------------------------------------------------------
+// The proprietary software and information contained in this file is
+// confidential and may only be used by an authorized person under a valid
+// licensing agreement from Arm Limited or its affiliates.
+//
+// Copyright (C) 2025. Arm Limited or its affiliates. All rights reserved.
+//
+// This entire notice must be reproduced on all copies of this file and
+// copies of this file may only be made by an authorized person under a valid
+// licensing agreement from Arm Limited or its affiliates.
+// -----------------------------------------------------------------------------
+//
+#ifndef NSS_KERNEL_LUT
+#define NSS_KERNEL_LUT
+#include "typedefs.h"
+struct KernelTile {
+    int16_t4 dy;
+    int16_t4 dx;
+};
+// Define actual scale value based on mode
+#if SCALE_MODE == SCALE_2_0X
+#define CENTER_TAP 0
+#define NUM_PATTERNS 4
+const KernelTile kernelLUT[NUM_PATTERNS] = {
+    {
+        // Pattern 0:
+        // Taps:  0,  2,  8, 10
+        // Grid:
+        //   [●  ·  ●  ·]
+        //   [·  ·  ·  ·]
+        //   [●  ·  ●  ·]
+        //   [·  ·  ·  ·]
+        int16_t4(-1, -1, +1, +1),
+        int16_t4(-1, +1, -1, +1)
+    },
+    {
+        // Pattern 1:
+        // Taps:  4,  6, 12, 14
+        // Grid:
+        //   [·  ·  ·  ·]
+        //   [●  ·  ●  ·]
+        //   [·  ·  ·  ·]
+        //   [●  ·  ●  ·]
+        int16_t4(-1, -1, +1, +1),
+        int16_t4(+0, +2, +0, +2)
+    },
+    {
+        // Pattern 2:
+        // Taps:  1,  3,  9, 11
+        // Grid:
+        //   [·  ●  ·  ●]
+        //   [·  ·  ·  ·]
+        //   [·  ●  ·  ●]
+        //   [·  ·  ·  ·]
+        int16_t4(+0, +0, +2, +2),
+        int16_t4(-1, +1, -1, +1)
+    },
+    {
+        // Pattern 3:
+        // Taps:  5,  7, 13, 15
+        // Grid:
+        //   [·  ·  ·  ·]
+        //   [·  ●  ·  ●]
+        //   [·  ·  ·  ·]
+        //   [·  ●  ·  ●]
+        int16_t4( 0, +0, +2, +2), // center-aligned
+        int16_t4( 0, +2, +0, +2)
+    }
+};
+#else
+    #error "Unsupported SCALE_MODE"
+#endif
+#endif //NSS_KERNEL_LUT

scenario/parameters.json ADDED Viewed

	@@ -0,0 +1,79 @@

+{
+    "inputs": {
+        "x": {
+            "SINT": {
+                "scale": 0.003921568859368563,
+                "zero_point": -128
+            },
+            "SNORM": {
+                "scale": 0.49803924513980746,
+                "zero_point": -1.0078740157480315
+            }
+        }
+    },
+    "outputs": {
+        "activation_post_process_45": {
+            "SINT": {
+                "scale": 0.003937007859349251,
+                "zero_point": -127
+            },
+            "SNORM": {
+                "scale": 0.49999999813735485,
+                "zero_point": -1.0
+            }
+        },
+        "activation_post_process_50": {
+            "SINT": {
+                "scale": 0.003937007859349251,
+                "zero_point": -127
+            },
+            "SNORM": {
+                "scale": 0.49999999813735485,
+                "zero_point": -1.0
+            }
+        },
+        "activation_post_process_55": {
+            "SINT": {
+                "scale": 0.003937007859349251,
+                "zero_point": -127
+            },
+            "SNORM": {
+                "scale": 0.49999999813735485,
+                "zero_point": -1.0
+            }
+        },
+        "activation_post_process_60": {
+            "SINT": {
+                "scale": 0.003937007859349251,
+                "zero_point": -127
+            },
+            "SNORM": {
+                "scale": 0.49999999813735485,
+                "zero_point": -1.0
+            }
+        },
+        "activation_post_process_65": {
+            "SINT": {
+                "scale": 0.003937007859349251,
+                "zero_point": -127
+            },
+            "SNORM": {
+                "scale": 0.49999999813735485,
+                "zero_point": -1.0
+            }
+        },
+        "activation_post_process_70": {
+            "SINT": {
+                "scale": 0.003937007859349251,
+                "zero_point": -127
+            },
+            "SNORM": {
+                "scale": 0.49999999813735485,
+                "zero_point": -1.0
+            }
+        }
+    },
+    "learnt_constants": {
+        "dm_scale": 0.617464542388916
+    }
+}

scenario/scenario.json ADDED Viewed

	@@ -0,0 +1,821 @@

+{
+    "commands": [
+        {
+            "mark_boundary": {
+                "frame_id": "0",
+                "resources": []
+            }
+        },
+        {
+            "dispatch_compute": {
+                "shader_ref": "0_pre_process",
+                "push_data_ref": "push_data_1",
+                "rangeND": [
+                    60,
+                    34,
+                    1
+                ],
+                "implicit_barrier": false,
+                "bindings": [
+                    {
+                        "set": 0,
+                        "id": 2,
+                        "resource_ref": "in_motion"
+                    },
+                    {
+                        "set": 0,
+                        "id": 0,
+                        "resource_ref": "in_colour"
+                    },
+                    {
+                        "set": 0,
+                        "id": 7,
+                        "resource_ref": "in_nearest_offset_tm1"
+                    },
+                    {
+                        "set": 0,
+                        "id": 5,
+                        "resource_ref": "in_depth_tm1"
+                    },
+                    {
+                        "set": 0,
+                        "id": 3,
+                        "resource_ref": "in_history"
+                    },
+                    {
+                        "set": 0,
+                        "id": 4,
+                        "resource_ref": "in_feedback_tm1"
+                    },
+                    {
+                        "set": 0,
+                        "id": 6,
+                        "resource_ref": "in_derivative_tm1"
+                    },
+                    {
+                        "set": 0,
+                        "id": 1,
+                        "resource_ref": "in_depth"
+                    },
+                    {
+                        "set": 1,
+                        "id": 1,
+                        "resource_ref": "out_derivative",
+                        "descriptor_type": "VK_DESCRIPTOR_TYPE_STORAGE_IMAGE"
+                    },
+                    {
+                        "set": 1,
+                        "id": 3,
+                        "resource_ref": "out_nearest_offset",
+                        "descriptor_type": "VK_DESCRIPTOR_TYPE_STORAGE_IMAGE"
+                    },
+                    {
+                        "set": 1,
+                        "id": 0,
+                        "resource_ref": "out_input_tensor"
+                    }
+                ]
+            }
+        },
+        {
+            "dispatch_barrier": {
+                "image_barrier_refs": [],
+                "tensor_barrier_refs": [
+                    "barrier_14"
+                ],
+                "memory_barrier_refs": [],
+                "buffer_barrier_refs": []
+            }
+        },
+        {
+            "dispatch_graph": {
+                "graph_ref": "1_nss",
+                "implicit_barrier": false,
+                "bindings": [
+                    {
+                        "set": 0,
+                        "id": 0,
+                        "resource_ref": "out_input_tensor"
+                    },
+                    {
+                        "set": 0,
+                        "id": 1,
+                        "resource_ref": "out_feedback"
+                    },
+                    {
+                        "set": 0,
+                        "id": 2,
+                        "resource_ref": "out_tp_aliaser"
+                    },
+                    {
+                        "set": 0,
+                        "id": 3,
+                        "resource_ref": "out_k3_aliaser"
+                    },
+                    {
+                        "set": 0,
+                        "id": 4,
+                        "resource_ref": "out_k2_aliaser"
+                    },
+                    {
+                        "set": 0,
+                        "id": 5,
+                        "resource_ref": "out_k1_aliaser"
+                    },
+                    {
+                        "set": 0,
+                        "id": 6,
+                        "resource_ref": "out_k0_aliaser"
+                    }
+                ]
+            }
+        },
+        {
+            "dispatch_barrier": {
+                "image_barrier_refs": [
+                    "barrier_23",
+                    "barrier_25",
+                    "barrier_27",
+                    "barrier_29",
+                    "barrier_31",
+                    "barrier_33"
+                ],
+                "tensor_barrier_refs": [],
+                "memory_barrier_refs": [],
+                "buffer_barrier_refs": []
+            }
+        },
+        {
+            "dispatch_compute": {
+                "shader_ref": "2_post_process",
+                "push_data_ref": "push_data_22",
+                "rangeND": [
+                    120,
+                    68,
+                    1
+                ],
+                "implicit_barrier": false,
+                "bindings": [
+                    {
+                        "set": 0,
+                        "id": 1,
+                        "resource_ref": "in_motion"
+                    },
+                    {
+                        "set": 0,
+                        "id": 2,
+                        "resource_ref": "in_history"
+                    },
+                    {
+                        "set": 0,
+                        "id": 8,
+                        "resource_ref": "out_nearest_offset"
+                    },
+                    {
+                        "set": 0,
+                        "id": 3,
+                        "resource_ref": "out_k0"
+                    },
+                    {
+                        "set": 0,
+                        "id": 4,
+                        "resource_ref": "out_k1"
+                    },
+                    {
+                        "set": 0,
+                        "id": 5,
+                        "resource_ref": "out_k2"
+                    },
+                    {
+                        "set": 0,
+                        "id": 6,
+                        "resource_ref": "out_k3"
+                    },
+                    {
+                        "set": 0,
+                        "id": 0,
+                        "resource_ref": "in_colour"
+                    },
+                    {
+                        "set": 0,
+                        "id": 7,
+                        "resource_ref": "out_tp"
+                    },
+                    {
+                        "set": 1,
+                        "id": 0,
+                        "resource_ref": "out_colour",
+                        "descriptor_type": "VK_DESCRIPTOR_TYPE_STORAGE_IMAGE"
+                    }
+                ]
+            }
+        },
+        {
+            "mark_boundary": {
+                "frame_id": "1",
+                "resources": [
+                    "out_colour"
+                ]
+            }
+        }
+    ],
+    "resources": [
+        {
+            "shader": {
+                "uid": "0_pre_process",
+                "src": "./0_pre_process.spv",
+                "entry": "main",
+                "type": "SPIR-V",
+                "push_constants_size": 128,
+                "specialization_constants": []
+            }
+        },
+        {
+            "raw_data": {
+                "uid": "push_data_1",
+                "src": "./0_pre_process_push_consts.npy"
+            }
+        },
+        {
+            "image": {
+                "uid": "in_motion",
+                "dims": [
+                    1,
+                    960,
+                    544,
+                    1
+                ],
+                "src": "./in_motion.dds",
+                "format": "VK_FORMAT_R16G16_SFLOAT",
+                "shader_access": "readonly",
+                "mips": 1,
+                "min_filter": "LINEAR",
+                "mag_filter": "LINEAR",
+                "mip_filter": "NEAREST",
+                "border_address_mode": "CLAMP_BORDER",
+                "border_color": "FLOAT_TRANSPARENT_BLACK",
+                "tiling": "OPTIMAL"
+            }
+        },
+        {
+            "image": {
+                "uid": "in_colour",
+                "dims": [
+                    1,
+                    960,
+                    544,
+                    1
+                ],
+                "src": "./in_colour.dds",
+                "format": "VK_FORMAT_B10G11R11_UFLOAT_PACK32",
+                "shader_access": "readonly",
+                "mips": 1,
+                "min_filter": "LINEAR",
+                "mag_filter": "LINEAR",
+                "mip_filter": "NEAREST",
+                "border_address_mode": "CLAMP_BORDER",
+                "border_color": "FLOAT_TRANSPARENT_BLACK",
+                "tiling": "OPTIMAL"
+            }
+        },
+        {
+            "image": {
+                "uid": "in_nearest_offset_tm1",
+                "dims": [
+                    1,
+                    960,
+                    544,
+                    1
+                ],
+                "src": "./in_nearest_offset_tm1.dds",
+                "format": "VK_FORMAT_R8_UNORM",
+                "shader_access": "readonly",
+                "mips": 1,
+                "min_filter": "LINEAR",
+                "mag_filter": "LINEAR",
+                "mip_filter": "NEAREST",
+                "border_address_mode": "CLAMP_BORDER",
+                "border_color": "FLOAT_CUSTOM_EXT",
+                "custom_border_color": [
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0
+                ],
+                "tiling": "OPTIMAL"
+            }
+        },
+        {
+            "image": {
+                "uid": "in_depth_tm1",
+                "dims": [
+                    1,
+                    960,
+                    544,
+                    1
+                ],
+                "src": "./in_depth_tm1.dds",
+                "format": "VK_FORMAT_R32_SFLOAT",
+                "shader_access": "readonly",
+                "mips": 1,
+                "min_filter": "LINEAR",
+                "mag_filter": "LINEAR",
+                "mip_filter": "NEAREST",
+                "border_address_mode": "CLAMP_BORDER",
+                "border_color": "FLOAT_CUSTOM_EXT",
+                "custom_border_color": [
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0
+                ],
+                "tiling": "OPTIMAL"
+            }
+        },
+        {
+            "image": {
+                "uid": "in_history",
+                "dims": [
+                    1,
+                    1920,
+                    1088,
+                    1
+                ],
+                "src": "./in_history.dds",
+                "format": "VK_FORMAT_B10G11R11_UFLOAT_PACK32",
+                "shader_access": "readonly",
+                "mips": 1,
+                "min_filter": "LINEAR",
+                "mag_filter": "LINEAR",
+                "mip_filter": "NEAREST",
+                "border_address_mode": "CLAMP_EDGE",
+                "tiling": "OPTIMAL"
+            }
+        },
+        {
+            "image": {
+                "uid": "in_feedback_tm1",
+                "dims": [
+                    1,
+                    960,
+                    544,
+                    1
+                ],
+                "src": "./in_feedback_tm1.dds",
+                "format": "VK_FORMAT_R8G8B8A8_SNORM",
+                "shader_access": "readonly",
+                "mips": 1,
+                "min_filter": "LINEAR",
+                "mag_filter": "LINEAR",
+                "mip_filter": "NEAREST",
+                "border_address_mode": "CLAMP_BORDER",
+                "border_color": "FLOAT_CUSTOM_EXT",
+                "custom_border_color": [
+                    -1.0,
+                    -1.0,
+                    -1.0,
+                    -1.0
+                ],
+                "tiling": "OPTIMAL"
+            }
+        },
+        {
+            "image": {
+                "uid": "in_derivative_tm1",
+                "dims": [
+                    1,
+                    960,
+                    544,
+                    1
+                ],
+                "src": "./in_derivative_tm1.dds",
+                "format": "VK_FORMAT_R8G8_UNORM",
+                "shader_access": "readonly",
+                "mips": 1,
+                "min_filter": "LINEAR",
+                "mag_filter": "LINEAR",
+                "mip_filter": "NEAREST",
+                "border_address_mode": "CLAMP_BORDER",
+                "border_color": "FLOAT_TRANSPARENT_BLACK",
+                "tiling": "OPTIMAL"
+            }
+        },
+        {
+            "image": {
+                "uid": "in_depth",
+                "dims": [
+                    1,
+                    960,
+                    544,
+                    1
+                ],
+                "src": "./in_depth.dds",
+                "format": "VK_FORMAT_R32_SFLOAT",
+                "shader_access": "readonly",
+                "mips": 1,
+                "min_filter": "LINEAR",
+                "mag_filter": "LINEAR",
+                "mip_filter": "NEAREST",
+                "border_address_mode": "CLAMP_BORDER",
+                "border_color": "FLOAT_TRANSPARENT_BLACK",
+                "tiling": "OPTIMAL"
+            }
+        },
+        {
+            "image": {
+                "uid": "out_derivative",
+                "dims": [
+                    1,
+                    960,
+                    544,
+                    1
+                ],
+                "dst": "./out_derivative.dds",
+                "format": "VK_FORMAT_R8G8_UNORM",
+                "shader_access": "writeonly",
+                "mips": 1,
+                "tiling": "LINEAR"
+            }
+        },
+        {
+            "image": {
+                "uid": "out_nearest_offset",
+                "dims": [
+                    1,
+                    960,
+                    544,
+                    1
+                ],
+                "dst": "./out_nearest_offset.dds",
+                "format": "VK_FORMAT_R8_UNORM",
+                "shader_access": "readwrite",
+                "mips": 1,
+                "min_filter": "LINEAR",
+                "mag_filter": "LINEAR",
+                "mip_filter": "NEAREST",
+                "border_address_mode": "CLAMP_BORDER",
+                "border_color": "FLOAT_TRANSPARENT_BLACK",
+                "tiling": "LINEAR"
+            }
+        },
+        {
+            "tensor": {
+                "uid": "out_input_tensor",
+                "dims": [
+                    1,
+                    544,
+                    960,
+                    12
+                ],
+                "dst": "./out_input_tensor.npy",
+                "format": "VK_FORMAT_R8_SINT",
+                "shader_access": "readwrite",
+                "tiling": "LINEAR"
+            }
+        },
+        {
+            "graph": {
+                "uid": "1_nss",
+                "src": "./1_nss.vgf"
+            }
+        },
+        {
+            "tensor_barrier": {
+                "uid": "barrier_14",
+                "src_access": "compute_shader_write",
+                "dst_access": "graph_read",
+                "src_stage": [
+                    "compute"
+                ],
+                "dst_stage": [
+                    "graph"
+                ],
+                "tensor_resource": "out_input_tensor"
+            }
+        },
+        {
+            "tensor": {
+                "uid": "out_feedback",
+                "dims": [
+                    1,
+                    544,
+                    960,
+                    4
+                ],
+                "dst": "./out_feedback.npy",
+                "format": "VK_FORMAT_R8_SINT",
+                "shader_access": "writeonly",
+                "tiling": "LINEAR"
+            }
+        },
+        {
+            "image": {
+                "uid": "out_tp",
+                "dims": [
+                    1,
+                    960,
+                    544,
+                    1
+                ],
+                "format": "VK_FORMAT_R8G8B8A8_SNORM",
+                "shader_access": "readonly",
+                "mips": 1,
+                "min_filter": "LINEAR",
+                "mag_filter": "LINEAR",
+                "mip_filter": "NEAREST",
+                "border_address_mode": "CLAMP_BORDER",
+                "border_color": "FLOAT_TRANSPARENT_BLACK",
+                "tiling": "LINEAR"
+            }
+        },
+        {
+            "tensor": {
+                "uid": "out_tp_aliaser",
+                "dims": [
+                    1,
+                    544,
+                    960,
+                    4
+                ],
+                "format": "VK_FORMAT_R8_SINT",
+                "shader_access": "readwrite",
+                "alias_target": {
+                    "resource_ref": "out_tp"
+                },
+                "tiling": "LINEAR"
+            }
+        },
+        {
+            "image": {
+                "uid": "out_k3",
+                "dims": [
+                    1,
+                    960,
+                    544,
+                    1
+                ],
+                "format": "VK_FORMAT_R8G8B8A8_SNORM",
+                "shader_access": "readonly",
+                "mips": 1,
+                "min_filter": "LINEAR",
+                "mag_filter": "LINEAR",
+                "mip_filter": "NEAREST",
+                "border_address_mode": "CLAMP_EDGE",
+                "tiling": "LINEAR"
+            }
+        },
+        {
+            "tensor": {
+                "uid": "out_k3_aliaser",
+                "dims": [
+                    1,
+                    544,
+                    960,
+                    4
+                ],
+                "format": "VK_FORMAT_R8_SINT",
+                "shader_access": "readwrite",
+                "alias_target": {
+                    "resource_ref": "out_k3"
+                },
+                "tiling": "LINEAR"
+            }
+        },
+        {
+            "image": {
+                "uid": "out_k2",
+                "dims": [
+                    1,
+                    960,
+                    544,
+                    1
+                ],
+                "format": "VK_FORMAT_R8G8B8A8_SNORM",
+                "shader_access": "readonly",
+                "mips": 1,
+                "min_filter": "LINEAR",
+                "mag_filter": "LINEAR",
+                "mip_filter": "NEAREST",
+                "border_address_mode": "CLAMP_EDGE",
+                "tiling": "LINEAR"
+            }
+        },
+        {
+            "tensor": {
+                "uid": "out_k2_aliaser",
+                "dims": [
+                    1,
+                    544,
+                    960,
+                    4
+                ],
+                "format": "VK_FORMAT_R8_SINT",
+                "shader_access": "readwrite",
+                "alias_target": {
+                    "resource_ref": "out_k2"
+                },
+                "tiling": "LINEAR"
+            }
+        },
+        {
+            "image": {
+                "uid": "out_k1",
+                "dims": [
+                    1,
+                    960,
+                    544,
+                    1
+                ],
+                "format": "VK_FORMAT_R8G8B8A8_SNORM",
+                "shader_access": "readonly",
+                "mips": 1,
+                "min_filter": "LINEAR",
+                "mag_filter": "LINEAR",
+                "mip_filter": "NEAREST",
+                "border_address_mode": "CLAMP_EDGE",
+                "tiling": "LINEAR"
+            }
+        },
+        {
+            "tensor": {
+                "uid": "out_k1_aliaser",
+                "dims": [
+                    1,
+                    544,
+                    960,
+                    4
+                ],
+                "format": "VK_FORMAT_R8_SINT",
+                "shader_access": "readwrite",
+                "alias_target": {
+                    "resource_ref": "out_k1"
+                },
+                "tiling": "LINEAR"
+            }
+        },
+        {
+            "image": {
+                "uid": "out_k0",
+                "dims": [
+                    1,
+                    960,
+                    544,
+                    1
+                ],
+                "format": "VK_FORMAT_R8G8B8A8_SNORM",
+                "shader_access": "readonly",
+                "mips": 1,
+                "min_filter": "LINEAR",
+                "mag_filter": "LINEAR",
+                "mip_filter": "NEAREST",
+                "border_address_mode": "CLAMP_EDGE",
+                "tiling": "LINEAR"
+            }
+        },
+        {
+            "tensor": {
+                "uid": "out_k0_aliaser",
+                "dims": [
+                    1,
+                    544,
+                    960,
+                    4
+                ],
+                "format": "VK_FORMAT_R8_SINT",
+                "shader_access": "readwrite",
+                "alias_target": {
+                    "resource_ref": "out_k0"
+                },
+                "tiling": "LINEAR"
+            }
+        },
+        {
+            "shader": {
+                "uid": "2_post_process",
+                "src": "./2_post_process.spv",
+                "entry": "main",
+                "type": "SPIR-V",
+                "push_constants_size": 76,
+                "specialization_constants": []
+            }
+        },
+        {
+            "raw_data": {
+                "uid": "push_data_22",
+                "src": "./2_post_process_push_consts.npy"
+            }
+        },
+        {
+            "image_barrier": {
+                "uid": "barrier_23",
+                "src_access": "compute_shader_write",
+                "dst_access": "compute_shader_read",
+                "old_layout": "general",
+                "new_layout": "general",
+                "src_stage": [
+                    "compute"
+                ],
+                "dst_stage": [
+                    "compute"
+                ],
+                "image_resource": "out_nearest_offset"
+            }
+        },
+        {
+            "image_barrier": {
+                "uid": "barrier_25",
+                "src_access": "graph_write",
+                "dst_access": "compute_shader_read",
+                "old_layout": "general",
+                "new_layout": "general",
+                "src_stage": [
+                    "graph"
+                ],
+                "dst_stage": [
+                    "compute"
+                ],
+                "image_resource": "out_k0"
+            }
+        },
+        {
+            "image_barrier": {
+                "uid": "barrier_27",
+                "src_access": "graph_write",
+                "dst_access": "compute_shader_read",
+                "old_layout": "general",
+                "new_layout": "general",
+                "src_stage": [
+                    "graph"
+                ],
+                "dst_stage": [
+                    "compute"
+                ],
+                "image_resource": "out_k1"
+            }
+        },
+        {
+            "image_barrier": {
+                "uid": "barrier_29",
+                "src_access": "graph_write",
+                "dst_access": "compute_shader_read",
+                "old_layout": "general",
+                "new_layout": "general",
+                "src_stage": [
+                    "graph"
+                ],
+                "dst_stage": [
+                    "compute"
+                ],
+                "image_resource": "out_k2"
+            }
+        },
+        {
+            "image_barrier": {
+                "uid": "barrier_31",
+                "src_access": "graph_write",
+                "dst_access": "compute_shader_read",
+                "old_layout": "general",
+                "new_layout": "general",
+                "src_stage": [
+                    "graph"
+                ],
+                "dst_stage": [
+                    "compute"
+                ],
+                "image_resource": "out_k3"
+            }
+        },
+        {
+            "image_barrier": {
+                "uid": "barrier_33",
+                "src_access": "graph_write",
+                "dst_access": "compute_shader_read",
+                "old_layout": "general",
+                "new_layout": "general",
+                "src_stage": [
+                    "graph"
+                ],
+                "dst_stage": [
+                    "compute"
+                ],
+                "image_resource": "out_tp"
+            }
+        },
+        {
+            "image": {
+                "uid": "out_colour",
+                "dims": [
+                    1,
+                    1920,
+                    1088,
+                    1
+                ],
+                "dst": "./out_colour.dds",
+                "format": "VK_FORMAT_B10G11R11_UFLOAT_PACK32",
+                "shader_access": "writeonly",
+                "mips": 1,
+                "tiling": "LINEAR"
+            }
+        }
+    ]
+}

scenario/typedefs.h ADDED Viewed

	@@ -0,0 +1,86 @@

+//
+// -----------------------------------------------------------------------------
+// The proprietary software and information contained in this file is
+// confidential and may only be used by an authorized person under a valid
+// licensing agreement from Arm Limited or its affiliates.
+//
+// Copyright (C) 2025. Arm Limited or its affiliates. All rights reserved.
+//
+// This entire notice must be reproduced on all copies of this file and
+// copies of this file may only be made by an authorized person under a valid
+// licensing agreement from Arm Limited or its affiliates.
+// -----------------------------------------------------------------------------
+//
+#ifndef NSS_TYPEDEFS
+#define NSS_TYPEDEFS
+// fp16 types
+#define half  float16_t
+#define half2 f16vec2
+#define half3 f16vec3
+#define half4 f16vec4
+// fp32 types
+#define float  float32_t
+#define float2 f32vec2
+#define float3 f32vec3
+#define float4 f32vec4
+// int8 types
+#define int8_t int8_t
+#define int8_t2 i8vec2
+#define int8_t3 i8vec3
+#define int8_t4 i8vec4
+// int16 types
+#define int16_t  int16_t
+#define int16_t2 i16vec2
+#define int16_t3 i16vec3
+#define int16_t4 i16vec4
+// uint16 types
+#define uint16_t  uint16_t
+#define uint16_t2 u16vec2
+#define uint16_t3 u16vec3
+#define uint16_t4 u16vec4
+// int32 types
+#define int32_t  int32_t
+#define int32_t2 i32vec2
+#define int32_t3 i32vec3
+#define int32_t4 i32vec4
+// uint32 types
+#define uint32_t  uint32_t
+#define uint32_t2 u32vec2
+#define uint32_t3 u32vec3
+#define uint32_t4 u32vec4
+// methods
+#define lerp mix
+// --- RCP functions for float16 types ---
+half rcp(half x)     { return half( 1.HF) / x; }
+half2 rcp(half2 x)   { return half2(1.HF) / x; }
+half3 rcp(half3 x)   { return half3(1.HF) / x; }
+half4 rcp(half4 x)   { return half4(1.HF) / x; }
+// --- RCP functions for float32 types ---
+float rcp(float x)   { return float( 1.0f) / x; }
+float2 rcp(float2 x) { return float2(1.0f) / x; }
+float3 rcp(float3 x) { return float3(1.0f) / x; }
+float4 rcp(float4 x) { return float4(1.0f) / x; }
+// --- Saturate functions for float16 types ---
+half   saturate(half x)     { return clamp(x, half( 0.HF), half( 1.HF)); }
+half2  saturate(half2 x)    { return clamp(x, half2(0.HF), half2(1.HF)); }
+half3  saturate(half3 x)    { return clamp(x, half3(0.HF), half3(1.HF)); }
+half4  saturate(half4 x)    { return clamp(x, half4(0.HF), half4(1.HF)); }
+// --- Saturate functions for float32 types ---
+float   saturate(float x)     { return clamp(x, 0.f, 1.f); }
+float2  saturate(float2 x)    { return clamp(x, float2(0.f), float2(1.f)); }
+float3  saturate(float3 x)    { return clamp(x, float3(0.f), float3(1.f)); }
+float4  saturate(float4 x)    { return clamp(x, float4(0.f), float4(1.f)); }
+#endif  // NSS_TYPEDEFS

third_party_licenses_and_copyright_notices.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+ML SDK Scenario Runner - revision 197a36e
+Source Code: https://github.com/arm/ai-ml-sdk-scenario-runner
+License: Apache-2.0 (https://github.com/arm/ai-ml-sdk-scenario-runner/blob/main/LICENSES/Apache-2.0.txt)
+Copyright Notice: "Copyright 2022-2025 Arm Limited and/or its affiliates <[email protected]>"
+ML Emulation Layer for Vulkan® - revision 788ac99
+Source Code: https://github.com/arm/ai-ml-emulation-layer-for-vulkan
+License: Apache-2.0 (https://github.com/arm/ai-ml-emulation-layer-for-vulkan/blob/main/LICENSES/Apache-2.0.txt)
+Copyright Notice: "Copyright 2022-2025 Arm Limited and/or its affiliates <[email protected]>"
+Amazon Lumberyard Bistro
+Asset page: http://developer.nvidia.com/orca/amazon-lumberyard-bistro
+Download page: https://casual-effects.com/g3d/data10/research/model/bistro/Exterior.zip
+License: CC BY 4.0 (https://creativecommons.org/licenses/by/4.0/)
+Copyright Notice: "Copyright 2017 Amazon Lumberyard"