gguf.js 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499
  1. const gguf = {};
  2. gguf.ModelFactory = class {
  3. async match(context) {
  4. const reader = gguf.Reader.open(context);
  5. if (reader) {
  6. return context.set('gguf', reader);
  7. }
  8. return null;
  9. }
  10. async open(context) {
  11. const target = context.value;
  12. await target.read();
  13. return new gguf.Model(target);
  14. }
  15. };
  16. gguf.Model = class {
  17. constructor(target) {
  18. this.format = target.format;
  19. this.metadata = [];
  20. const layers = new Map();
  21. for (const [name, tensor] of target.tensors) {
  22. const parts = name.split('.');
  23. const param = parts.pop();
  24. const key = parts.join('.');
  25. if (!layers.has(key)) {
  26. layers.set(key, { name: key, type: 'weights', metadata: new Map(), weights: new Map() });
  27. }
  28. const layer = layers.get(key);
  29. layer.weights.set(param, tensor);
  30. }
  31. const metadata = new Map();
  32. const graph = {};
  33. if (target.metadata.size === 0) {
  34. graph.layers = Array.from(layers.values());
  35. } else {
  36. let architecture = '?';
  37. for (const [name, value] of target.metadata) {
  38. switch (name) {
  39. case 'general.name': this.name = value; break;
  40. case 'general.architecture': architecture = value; break;
  41. case 'general.description': this.description = value; break;
  42. case 'general.author': this.metadata.push(new gguf.Argument('author', value)); break;
  43. case 'general.license': this.metadata.push(new gguf.Argument('license', value)); break;
  44. case 'general.file_type':
  45. case 'general.quantization_version':
  46. break;
  47. default:
  48. metadata.set(name, value);
  49. break;
  50. }
  51. }
  52. const tokenizer = { type: 'tokenizer', metadata: new Map(), layers: [] };
  53. const model = { type: architecture, metadata: new Map(), layers: Array.from(layers.values()) };
  54. for (const [name, value] of metadata) {
  55. if (name.startsWith('tokenizer.')) {
  56. const match = name.match(/^(.*)\.(.*?)$/);
  57. if (match) {
  58. const [, param] = match.slice(1);
  59. tokenizer.metadata.set(param, value);
  60. }
  61. } else if (architecture && name.startsWith(`${architecture}.`)) {
  62. model.metadata.set(name, value);
  63. } else {
  64. this.metadata.push(new gguf.Argument(name, value));
  65. }
  66. }
  67. graph.layers = [model];
  68. if (tokenizer.metadata.size > 0) {
  69. graph.layers.push(tokenizer);
  70. }
  71. }
  72. this.modules = [new gguf.Graph(graph)];
  73. }
  74. };
  75. gguf.Graph = class {
  76. constructor(graph, type = '') {
  77. this.name = graph.type;
  78. this.type = type;
  79. this.nodes = [];
  80. this.inputs = [];
  81. this.outputs = [];
  82. for (const layer of graph.layers) {
  83. const node = new gguf.Node(layer);
  84. this.nodes.push(node);
  85. }
  86. }
  87. };
  88. gguf.Argument = class {
  89. constructor(name, value) {
  90. this.name = name;
  91. this.value = value;
  92. }
  93. };
  94. gguf.Value = class {
  95. constructor(name, tensor) {
  96. this.name = name;
  97. this.type = tensor.type;
  98. this.quantization = tensor.quantization || null;
  99. this.initializer = tensor;
  100. }
  101. };
  102. gguf.Node = class {
  103. constructor(layer) {
  104. if (Array.isArray(layer.layers) && layer.layers.length > 0) {
  105. this.type = new gguf.Graph(layer, 'weights');
  106. } else {
  107. this.type = { name: layer.type };
  108. }
  109. this.name = layer.name || '';
  110. this.inputs = [];
  111. this.outputs = [];
  112. this.attributes = [];
  113. if (layer.weights) {
  114. for (const [name, weight] of layer.weights) {
  115. const tensor = new gguf.Tensor(weight);
  116. const value = new gguf.Value(weight.name, tensor);
  117. const argument = new gguf.Argument(name, [value]);
  118. this.inputs.push(argument);
  119. }
  120. }
  121. if (layer.metadata) {
  122. for (const [name, value] of layer.metadata) {
  123. const attribute = new gguf.Argument(name, value);
  124. this.attributes.push(attribute);
  125. }
  126. }
  127. }
  128. };
  129. gguf.TensorType = class {
  130. constructor(dataType, shape) {
  131. this.dataType = dataType;
  132. this.shape = shape;
  133. }
  134. toString() {
  135. return (this.dataType || '?') + this.shape.toString();
  136. }
  137. };
  138. gguf.TensorShape = class {
  139. constructor(dimensions) {
  140. this.dimensions = dimensions;
  141. }
  142. toString() {
  143. return `[${this.dimensions.map((dimension) => dimension.toString()).join(',')}]`;
  144. }
  145. };
  146. gguf.Tensor = class {
  147. constructor(tensor) {
  148. const shape = new gguf.TensorShape(tensor.ne);
  149. this.type = new gguf.TensorType(tensor.dtype, shape);
  150. if (tensor.type !== gguf.QuantizationType.F32 && tensor.type !== gguf.QuantizationType.F16) {
  151. this.quantization = {
  152. type: gguf.Utility.enum(gguf.QuantizationType, tensor.type).toLowerCase()
  153. };
  154. }
  155. if (tensor.dtype === 'float32' || tensor.dtype === 'float16' ||
  156. tensor.dtype === 'int8' || tensor.dtype === 'int16' || tensor.dtype === 'int32') {
  157. this.encoding = '<';
  158. this._data = tensor.data;
  159. }
  160. }
  161. get values() {
  162. if (this._data) {
  163. return this._data.peek();
  164. }
  165. return null;
  166. }
  167. };
  168. gguf.Reader = class {
  169. static open(context) {
  170. const stream = context.stream;
  171. if (stream && stream.length > 4) {
  172. const buffer = stream.peek(4);
  173. const signature = String.fromCharCode.apply(null, buffer);
  174. if (signature === 'GGUF') {
  175. return new gguf.Reader(context);
  176. }
  177. }
  178. return null;
  179. }
  180. constructor(context) {
  181. this.context = context;
  182. const QK_K = 256;
  183. gguf.Reader.GGML_QUANT_SIZES = gguf.Reader.GGML_QUANT_SIZES || new Map([
  184. [gguf.QuantizationType.F32, [1, 4, 'float32']],
  185. [gguf.QuantizationType.F16, [1, 2, 'float16']],
  186. [gguf.QuantizationType.Q4_0, [32, 2 + 16, '']],
  187. [gguf.QuantizationType.Q4_1, [32, 2 + 2 + 16, '']],
  188. [gguf.QuantizationType.Q5_0, [32, 2 + 4 + 16, '']],
  189. [gguf.QuantizationType.Q5_1, [32, 2 + 2 + 4 + 16, '']],
  190. [gguf.QuantizationType.Q8_0, [32, 2 + 32, 'q8_0']],
  191. [gguf.QuantizationType.Q8_1, [32, 4 + 4 + 32, '']],
  192. [gguf.QuantizationType.Q2_K, [256, 2 + 2 + Math.floor(QK_K / 16) + Math.floor(QK_K / 4), '']],
  193. [gguf.QuantizationType.Q3_K, [256, 2 + Math.floor(QK_K / 4) + Math.floor(QK_K / 8) + 12, '']],
  194. [gguf.QuantizationType.Q4_K, [256, 2 + 2 + Math.floor(QK_K / 2) + 12, '']],
  195. [gguf.QuantizationType.Q5_K, [256, 2 + 2 + Math.floor(QK_K / 2) + Math.floor(QK_K / 8) + 12, '']],
  196. [gguf.QuantizationType.Q6_K, [256, 2 + Math.floor(QK_K / 2) + Math.floor(QK_K / 4) + Math.floor(QK_K / 16), '']],
  197. [gguf.QuantizationType.Q8_K, [256, 4 + QK_K + Math.floor(QK_K / 8), '']],
  198. [gguf.QuantizationType.IQ2_XXS, [256, 2 + Math.floor(QK_K / 4), '']],
  199. [gguf.QuantizationType.IQ2_XS, [256, 2 + Math.floor(QK_K / 4) + Math.floor(QK_K / 32), '']],
  200. [gguf.QuantizationType.IQ3_XXS, [256, 2 + Math.floor(QK_K / 4) + Math.floor(QK_K / 8), '']],
  201. [gguf.QuantizationType.IQ1_S, [256, 2 + Math.floor(QK_K / 8) + Math.floor(QK_K / 16), '']],
  202. [gguf.QuantizationType.IQ4_NL, [32, 2 + 16, '']],
  203. [gguf.QuantizationType.IQ3_S, [256, 2 + Math.floor(QK_K / 4) + Math.floor(QK_K / 8) + Math.floor(QK_K / 32) + 4, '']],
  204. [gguf.QuantizationType.IQ2_S, [256, 2 + Math.floor(QK_K / 4) + Math.floor(QK_K / 16), '']],
  205. [gguf.QuantizationType.IQ4_XS, [256, 2 + 2 + Math.floor(QK_K / 2) + Math.floor(QK_K / 64), '']],
  206. [gguf.QuantizationType.I8, [1, 1, 'int8']],
  207. [gguf.QuantizationType.I16, [1, 2, 'int16']],
  208. [gguf.QuantizationType.I32, [1, 4, 'int32']],
  209. [gguf.QuantizationType.I64, [1, 8, 'int64']],
  210. [gguf.QuantizationType.F64, [1, 8, 'float64']],
  211. [gguf.QuantizationType.IQ1_M, [256, Math.floor(QK_K / 8) + Math.floor(QK_K / 16) + Math.floor(QK_K / 32)]],
  212. [gguf.QuantizationType.BF16, [1, 2, 'bfloat16']],
  213. [gguf.QuantizationType.Q4_0_4_4, [32, 2 + 16, '']],
  214. [gguf.QuantizationType.Q4_0_4_8, [32, 2 + 16, '']],
  215. [gguf.QuantizationType.Q4_0_8_8, [32, 2 + 16, '']],
  216. [gguf.QuantizationType.TQ1_0, [256, 2 + 4 * 13, '']],
  217. [gguf.QuantizationType.TQ2_0, [256, 2 + 64, '']],
  218. [gguf.QuantizationType.MXFP4, [32, 1 + 16, 'mxfp4']]
  219. ]);
  220. }
  221. async read() {
  222. const context = this.context;
  223. const stream = context.stream;
  224. let reader = await context.read('binary');
  225. reader = new gguf.BinaryReader(reader);
  226. this.tensors = new Map();
  227. this.metadata = new Map();
  228. this.header = {};
  229. this.header.magic = String.fromCharCode.apply(null, reader.read(4));
  230. this.header.version = reader.uint32();
  231. this.format = `GGUF v${this.header.version}`;
  232. if (this.header.version >= 2) {
  233. this.header.n_tensors = reader.uint64().toNumber();
  234. this.header.n_kv = reader.uint64().toNumber();
  235. for (let i = 0; i < this.header.n_kv; i++) {
  236. const entry = reader.entry();
  237. this.metadata.set(entry.name, entry.value);
  238. }
  239. const tensors = this.header.n_tensors;
  240. if (tensors > 0) {
  241. for (let i = 0; i < tensors; i++) {
  242. const tensor = reader.tensor();
  243. this.tensors.set(tensor.name, tensor);
  244. }
  245. this.alignment = this.metadata.get('general.alignment') || 32;
  246. const offset_pad = reader.position % this.alignment;
  247. if (offset_pad !== 0) {
  248. reader.skip(this.alignment - offset_pad);
  249. }
  250. const offset = reader.position;
  251. for (const tensor of this.tensors.values()) {
  252. if (!gguf.Reader.GGML_QUANT_SIZES.has(tensor.type)) {
  253. throw new gguf.Error(`Unsupported tensor quantization type '${tensor.type}'.`);
  254. }
  255. const [block_size, type_size, dtype] = gguf.Reader.GGML_QUANT_SIZES.get(tensor.type);
  256. tensor.block_size = block_size;
  257. tensor.type_size = type_size;
  258. tensor.dtype = dtype || '?';
  259. if (offset < reader.length) {
  260. const n_elems = tensor.ne.reduce((a, b) => a * b, 1);
  261. const n_bytes = Math.floor((n_elems * type_size) / block_size);
  262. reader.seek(offset + tensor.offset);
  263. tensor.data = reader.stream(n_bytes);
  264. }
  265. }
  266. }
  267. }
  268. stream.seek(0);
  269. delete this.context;
  270. }
  271. };
  272. gguf.BinaryReader = class {
  273. constructor(reader) {
  274. this._reader = reader;
  275. }
  276. get length() {
  277. return this._reader.length;
  278. }
  279. get position() {
  280. return this._reader.position;
  281. }
  282. seek(position) {
  283. this._reader.seek(position);
  284. }
  285. skip(offset) {
  286. this._reader.skip(offset);
  287. }
  288. stream(length) {
  289. return this._reader.stream(length);
  290. }
  291. read(length) {
  292. return this._reader.read(length);
  293. }
  294. byte() {
  295. return this._reader.byte();
  296. }
  297. int8() {
  298. return this._reader.int8();
  299. }
  300. uint16() {
  301. return this._reader.uint16();
  302. }
  303. int16() {
  304. return this._reader.int16();
  305. }
  306. uint32() {
  307. return this._reader.uint32();
  308. }
  309. int32() {
  310. return this._reader.int32();
  311. }
  312. uint64() {
  313. return this._reader.uint64();
  314. }
  315. int64() {
  316. return this._reader.int64();
  317. }
  318. float32() {
  319. return this._reader.float32();
  320. }
  321. string() {
  322. const size = this.uint64().toNumber();
  323. const buffer = this.read(size);
  324. return String.fromCharCode.apply(null, buffer);
  325. }
  326. value(type) {
  327. switch (type) {
  328. case gguf.Type.UINT8: return this.byte();
  329. case gguf.Type.INT8: return this.int8();
  330. case gguf.Type.UINT16: return this.uint16();
  331. case gguf.Type.INT16: return this.int16();
  332. case gguf.Type.UINT32: return this.uint32();
  333. case gguf.Type.INT32: return this.int32();
  334. case gguf.Type.UINT64: return this.uint64();
  335. case gguf.Type.INT64: return this.int64();
  336. case gguf.Type.FLOAT32: return this.float32();
  337. case gguf.Type.BOOL: return this.byte() !== 0;
  338. case gguf.Type.STRING: return this.string();
  339. case gguf.Type.ARRAY: {
  340. const type = this.uint32();
  341. const size = this.uint64().toNumber();
  342. const value = new Array(size);
  343. for (let i = 0; i < size; i++) {
  344. value[i] = this.value(type);
  345. }
  346. return value;
  347. }
  348. default: {
  349. throw new gguf.Error(`Unsupported GGUF type '${type}'.`);
  350. }
  351. }
  352. }
  353. entry() {
  354. const name = this.string();
  355. const type = this.uint32();
  356. const value = this.value(type);
  357. return { name, value, type };
  358. }
  359. tensor() {
  360. const tensor = {};
  361. tensor.name = this.string();
  362. const n_dims = this.uint32();
  363. tensor.ne = new Array(n_dims);
  364. for (let i = 0; i < n_dims; i++) {
  365. tensor.ne[i] = this.uint64().toNumber();
  366. }
  367. tensor.type = this.uint32();
  368. tensor.offset = this.uint64().toNumber();
  369. return tensor;
  370. }
  371. };
  372. gguf.Type = {
  373. UINT8: 0,
  374. INT8: 1,
  375. UINT16: 2,
  376. INT16: 3,
  377. UINT32: 4,
  378. INT32: 5,
  379. FLOAT32: 6,
  380. BOOL: 7,
  381. STRING: 8,
  382. ARRAY: 9,
  383. UINT64: 10,
  384. INT64: 11,
  385. FLOAT64: 12,
  386. };
  387. gguf.QuantizationType = {
  388. F32: 0,
  389. F16: 1,
  390. Q4_0: 2,
  391. Q4_1: 3,
  392. Q5_0: 6,
  393. Q5_1: 7,
  394. Q8_0: 8,
  395. Q8_1: 9,
  396. Q2_K: 10,
  397. Q3_K: 11,
  398. Q4_K: 12,
  399. Q5_K: 13,
  400. Q6_K: 14,
  401. Q8_K: 15,
  402. IQ2_XXS: 16,
  403. IQ2_XS: 17,
  404. IQ3_XXS: 18,
  405. IQ1_S: 19,
  406. IQ4_NL: 20,
  407. IQ3_S: 21,
  408. IQ2_S: 22,
  409. IQ4_XS: 23,
  410. I8: 24,
  411. I16: 25,
  412. I32: 26,
  413. I64: 27,
  414. F64: 28,
  415. IQ1_M: 29,
  416. BF16: 30,
  417. Q4_0_4_4: 31,
  418. Q4_0_4_8: 32,
  419. Q4_0_8_8: 33,
  420. TQ1_0: 34,
  421. TQ2_0: 35,
  422. MXFP4: 39
  423. };
  424. gguf.Utility = class {
  425. static enum(type, value) {
  426. gguf.Utility._enums = gguf.Utility._enums || new Map();
  427. if (!gguf.Utility._enums.has(type)) {
  428. const entries = new Map(Object.entries(type).map(([key, value]) => [value, key]));
  429. gguf.Utility._enums.set(type, entries);
  430. }
  431. const entries = gguf.Utility._enums.get(type);
  432. if (entries.has(value)) {
  433. return entries.get(value);
  434. }
  435. return value;
  436. }
  437. };
  438. gguf.Error = class extends Error {
  439. constructor(message) {
  440. super(message);
  441. this.name = 'GGML Error';
  442. }
  443. };
  444. export const ModelFactory = gguf.ModelFactory;