File size: 4,623 Bytes
f724cf3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
//
// -----------------------------------------------------------------------------
// The proprietary software and information contained in this file is
// confidential and may only be used by an authorized person under a valid
// licensing agreement from Arm Limited or its affiliates.
//
// Copyright (C) 2025. Arm Limited or its affiliates. All rights reserved.
//
// This entire notice must be reproduced on all copies of this file and
// copies of this file may only be made by an authorized person under a valid
// licensing agreement from Arm Limited or its affiliates.
// -----------------------------------------------------------------------------
//
#ifndef NSS_COMMON
#define NSS_COMMON
#include "typedefs.h"
#define MAX_FP16 65504.HF
#define EPS 1e-7HF
// Activation Functions
// ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
half Sigmoid(half x)
{
return rcp(half(1.0) + exp(-x));
}
half2 Sigmoid(half2 x)
{
return rcp(half2(1.0) + exp(-x));
}
half3 Sigmoid(half3 x)
{
return rcp(half3(1.0) + exp(-x));
}
half4 Sigmoid(half4 x)
{
return rcp(half4(1.0) + exp(-x));
}
// Quantize/Dequantize
// ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
// all expect .x = scale, .y = zero point, quantize methods expect to receive: .x = rcp(scale)
half Dequantize(half i, half2 quant_params)
{
return (i - quant_params.y) * quant_params.x;
}
half2 Dequantize(half2 i, half2 quant_params)
{
return (i - quant_params.y) * quant_params.x;
}
half3 Dequantize(half3 i, half2 quant_params)
{
return (i - quant_params.y) * quant_params.x;
}
half4 Dequantize(half4 i, half2 quant_params)
{
return (i - quant_params.y) * quant_params.x;
}
int8_t Quantize(half f, half2 quant_params)
{
return int8_t(clamp(round(f * quant_params.x + quant_params.y), -128.HF, 127.HF));
}
int8_t2 Quantize(half2 f, half2 quant_params)
{
return int8_t2(clamp(round(f * quant_params.x + quant_params.y), -128.HF, 127.HF));
}
int8_t3 Quantize(half3 f, half2 quant_params)
{
return int8_t3(clamp(round(f * quant_params.x + quant_params.y), -128.HF, 127.HF));
}
int8_t4 Quantize(half4 f, half2 quant_params)
{
return int8_t4(clamp(round(f * quant_params.x + quant_params.y), -128.HF, 127.HF));
}
// Encode/Decode
// ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
// Note: both encode/decode methods are currently bound to 3x3 windows, they should be
// expandable in future if needed. The most likely to need this would be the jitter
// encoding, where 3x3 may not be enough for larger than 3x3 scale factors.
uint8_t EncodeNearestDepthCoord(int32_t2 o)
{
// o β {-1, 0, 1}Β²
o = clamp(o, ivec2(-1), ivec2( 1));
return uint8_t((o.y + 1) << 2 | (o.x + 1)); // 0-15
}
int32_t2 DecodeNearestDepthCoord(int32_t code)
{
int32_t x = int32_t( code & 0x3) - 1; // bits 0-1
int32_t y = int32_t((code >> 2) & 0x3) - 1; // bits 2-3
return int32_t2(x, y);
}
// Image Operations
// ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
half Luminance(half3 rgb)
{
// ITU-R BT.709: `0.2126 * R + 0.7152 * G + 0.0722 * B`
return dot(rgb, half3(0.2126, 0.7152, 0.0722));
}
half3 Tonemap(half3 x)
{
// Karis tonemapper
// http://graphicrants.blogspot.com/2013/12/tone-mapping.html
x = max(x, half3(0.HF));
return x * rcp(half3(1.HF) + max(max(x.r, x.g), x.b));
}
half3 InverseTonemap(half3 x)
{
// Karis tonemapper inverse
// http://graphicrants.blogspot.com/2013/12/tone-mapping.html
x = clamp(x, half3(0.HF), Tonemap(half3(MAX_FP16)));
return x * rcp(half3(1.HF) - max(max(x.r, x.g), x.b));
}
half3 SafeColour(half3 x)
{
return clamp(x, half3(0.HF), half3(MAX_FP16));
}
#endif // NSS_COMMON
|