Browse Source

Update gguf.js

Lutz Roeder 2 weeks ago
parent
commit
92895f3bd5
1 changed files with 50 additions and 38 deletions
  1. 50 38
      source/gguf.js

+ 50 - 38
source/gguf.js

@@ -206,42 +206,48 @@ gguf.Reader = class {
     constructor(context) {
         this.context = context;
         const QK_K = 256;
+        // https://github.com/ggml-org/llama.cpp/blob/master/gguf-py/gguf/constants.py
         gguf.Reader.GGML_QUANT_SIZES = gguf.Reader.GGML_QUANT_SIZES || new Map([
-            [gguf.QuantizationType.F32,      [1, 4, 'float32']],
-            [gguf.QuantizationType.F16,      [1, 2, 'float16']],
-            [gguf.QuantizationType.Q4_0,     [32, 2 + 16, '']],
-            [gguf.QuantizationType.Q4_1,     [32, 2 + 2 + 16, '']],
-            [gguf.QuantizationType.Q5_0,     [32, 2 + 4 + 16, '']],
-            [gguf.QuantizationType.Q5_1,     [32, 2 + 2 + 4 + 16, '']],
-            [gguf.QuantizationType.Q8_0,     [32, 2 + 32, 'q8_0']],
-            [gguf.QuantizationType.Q8_1,     [32, 4 + 4 + 32, '']],
-            [gguf.QuantizationType.Q2_K,     [256, 2 + 2 + Math.floor(QK_K / 16) + Math.floor(QK_K / 4), '']],
-            [gguf.QuantizationType.Q3_K,     [256, 2 + Math.floor(QK_K / 4) + Math.floor(QK_K / 8) + 12, '']],
-            [gguf.QuantizationType.Q4_K,     [256, 2 + 2 + Math.floor(QK_K / 2) + 12, '']],
-            [gguf.QuantizationType.Q5_K,     [256, 2 + 2 + Math.floor(QK_K / 2) + Math.floor(QK_K / 8) + 12, '']],
-            [gguf.QuantizationType.Q6_K,     [256, 2 + Math.floor(QK_K / 2) + Math.floor(QK_K / 4) + Math.floor(QK_K / 16), '']],
-            [gguf.QuantizationType.Q8_K,     [256, 4 + QK_K + Math.floor(QK_K / 8), '']],
-            [gguf.QuantizationType.IQ2_XXS,  [256, 2 + Math.floor(QK_K / 4), '']],
-            [gguf.QuantizationType.IQ2_XS,   [256, 2 + Math.floor(QK_K / 4) + Math.floor(QK_K / 32), '']],
-            [gguf.QuantizationType.IQ3_XXS,  [256, 2 + Math.floor(QK_K / 4) + Math.floor(QK_K / 8), '']],
-            [gguf.QuantizationType.IQ1_S,    [256, 2 + Math.floor(QK_K / 8) + Math.floor(QK_K / 16), '']],
-            [gguf.QuantizationType.IQ4_NL,   [32, 2 + 16, '']],
-            [gguf.QuantizationType.IQ3_S,    [256, 2 + Math.floor(QK_K / 4) + Math.floor(QK_K / 8) + Math.floor(QK_K / 32) + 4, '']],
-            [gguf.QuantizationType.IQ2_S,    [256, 2 + Math.floor(QK_K / 4) + Math.floor(QK_K / 16), '']],
-            [gguf.QuantizationType.IQ4_XS,   [256, 2 + 2 + Math.floor(QK_K / 2) + Math.floor(QK_K / 64), '']],
-            [gguf.QuantizationType.I8,       [1, 1, 'int8']],
-            [gguf.QuantizationType.I16,      [1, 2, 'int16']],
-            [gguf.QuantizationType.I32,      [1, 4, 'int32']],
-            [gguf.QuantizationType.I64,      [1, 8, 'int64']],
-            [gguf.QuantizationType.F64,      [1, 8, 'float64']],
-            [gguf.QuantizationType.IQ1_M,    [256, Math.floor(QK_K / 8) + Math.floor(QK_K / 16)  + Math.floor(QK_K / 32)]],
-            [gguf.QuantizationType.BF16,     [1, 2, 'bfloat16']],
-            [gguf.QuantizationType.Q4_0_4_4, [32, 2 + 16, '']],
-            [gguf.QuantizationType.Q4_0_4_8, [32, 2 + 16, '']],
-            [gguf.QuantizationType.Q4_0_8_8, [32, 2 + 16, '']],
-            [gguf.QuantizationType.TQ1_0,    [256, 2 + 4 * 13, '']],
-            [gguf.QuantizationType.TQ2_0,    [256, 2 + 64, '']],
-            [gguf.QuantizationType.MXFP4,    [32, 1 + 16, 'mxfp4']]
+            [gguf.QuantizationType.F32,        [1, 4, 'float32']],
+            [gguf.QuantizationType.F16,        [1, 2, 'float16']],
+            [gguf.QuantizationType.Q4_0,       [32, 2 + 16, '']],
+            [gguf.QuantizationType.Q4_1,       [32, 2 + 2 + 16, '']],
+            [gguf.QuantizationType.Q4_2,       [16, 2 + 8, '']],
+            [gguf.QuantizationType.Q4_3,       [16, 2 + 2 + 8, '']],
+            [gguf.QuantizationType.Q5_0,       [32, 2 + 4 + 16, '']],
+            [gguf.QuantizationType.Q5_1,       [32, 2 + 2 + 4 + 16, '']],
+            [gguf.QuantizationType.Q8_0,       [32, 2 + 32, 'q8_0']],
+            [gguf.QuantizationType.Q8_1,       [32, 4 + 4 + 32, '']],
+            [gguf.QuantizationType.Q2_K,       [256, 2 + 2 + Math.floor(QK_K / 16) + Math.floor(QK_K / 4), '']],
+            [gguf.QuantizationType.Q3_K,       [256, 2 + Math.floor(QK_K / 4) + Math.floor(QK_K / 8) + 12, '']],
+            [gguf.QuantizationType.Q4_K,       [256, 2 + 2 + Math.floor(QK_K / 2) + 12, '']],
+            [gguf.QuantizationType.Q5_K,       [256, 2 + 2 + Math.floor(QK_K / 2) + Math.floor(QK_K / 8) + 12, '']],
+            [gguf.QuantizationType.Q6_K,       [256, 2 + Math.floor(QK_K / 2) + Math.floor(QK_K / 4) + Math.floor(QK_K / 16), '']],
+            [gguf.QuantizationType.Q8_K,       [256, 4 + QK_K + Math.floor(QK_K / 8), '']],
+            [gguf.QuantizationType.IQ2_XXS,    [256, 2 + Math.floor(QK_K / 4), '']],
+            [gguf.QuantizationType.IQ2_XS,     [256, 2 + Math.floor(QK_K / 4) + Math.floor(QK_K / 32), '']],
+            [gguf.QuantizationType.IQ3_XXS,    [256, 2 + Math.floor(QK_K / 4) + Math.floor(QK_K / 8), '']],
+            [gguf.QuantizationType.IQ1_S,      [256, 2 + Math.floor(QK_K / 8) + Math.floor(QK_K / 16), '']],
+            [gguf.QuantizationType.IQ4_NL,     [32, 2 + 16, '']],
+            [gguf.QuantizationType.IQ3_S,      [256, 2 + Math.floor(QK_K / 4) + Math.floor(QK_K / 8) + Math.floor(QK_K / 32) + 4, '']],
+            [gguf.QuantizationType.IQ2_S,      [256, 2 + Math.floor(QK_K / 4) + Math.floor(QK_K / 16), '']],
+            [gguf.QuantizationType.IQ4_XS,     [256, 2 + 2 + Math.floor(QK_K / 2) + Math.floor(QK_K / 64), '']],
+            [gguf.QuantizationType.I8,         [1, 1, 'int8']],
+            [gguf.QuantizationType.I16,        [1, 2, 'int16']],
+            [gguf.QuantizationType.I32,        [1, 4, 'int32']],
+            [gguf.QuantizationType.I64,        [1, 8, 'int64']],
+            [gguf.QuantizationType.F64,        [1, 8, 'float64']],
+            [gguf.QuantizationType.IQ1_M,      [256, Math.floor(QK_K / 8) + Math.floor(QK_K / 16)  + Math.floor(QK_K / 32)]],
+            [gguf.QuantizationType.BF16,       [1, 2, 'bfloat16']],
+            [gguf.QuantizationType.Q4_0_4_4,   [32, 2 + 16, '']],
+            [gguf.QuantizationType.Q4_0_4_8,   [32, 2 + 16, '']],
+            [gguf.QuantizationType.Q4_0_8_8,   [32, 2 + 16, '']],
+            [gguf.QuantizationType.TQ1_0,      [256, 2 + 4 * 13, '']],
+            [gguf.QuantizationType.TQ2_0,      [256, 2 + 64, '']],
+            [gguf.QuantizationType.IQ4_NL_4_4, [32, 2 + 16, '']],
+            [gguf.QuantizationType.IQ4_NL_4_8, [32, 2 + 16, '']],
+            [gguf.QuantizationType.IQ4_NL_8_8, [32, 2 + 16, '']],
+            [gguf.QuantizationType.MXFP4,      [32, 1 + 16, 'mxfp4']]
         ]);
     }
 
@@ -434,11 +440,14 @@ gguf.Type = {
     FLOAT64: 12,
 };
 
+// https://github.com/ggml-org/llama.cpp/blob/master/ggml/include/ggml.h
 gguf.QuantizationType = {
     F32: 0,
     F16: 1,
     Q4_0: 2,
     Q4_1: 3,
+    Q4_2: 4, // deprecated
+    Q4_3: 5, // deprecated
     Q5_0: 6,
     Q5_1: 7,
     Q8_0: 8,
@@ -464,11 +473,14 @@ gguf.QuantizationType = {
     F64: 28,
     IQ1_M: 29,
     BF16: 30,
-    Q4_0_4_4: 31,
-    Q4_0_4_8: 32,
-    Q4_0_8_8: 33,
+    Q4_0_4_4: 31, // deprecated
+    Q4_0_4_8: 32, // deprecated
+    Q4_0_8_8: 33, // deprecated
     TQ1_0: 34,
     TQ2_0: 35,
+    IQ4_NL_4_4: 36, // deprecated
+    IQ4_NL_4_8: 37, // deprecated
+    IQ4_NL_8_8: 38, // deprecated
     MXFP4: 39
 };