| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499 |
- const gguf = {};
- gguf.ModelFactory = class {
- async match(context) {
- const reader = gguf.Reader.open(context);
- if (reader) {
- return context.set('gguf', reader);
- }
- return null;
- }
- async open(context) {
- const target = context.value;
- await target.read();
- return new gguf.Model(target);
- }
- };
- gguf.Model = class {
- constructor(target) {
- this.format = target.format;
- this.metadata = [];
- const layers = new Map();
- for (const [name, tensor] of target.tensors) {
- const parts = name.split('.');
- const param = parts.pop();
- const key = parts.join('.');
- if (!layers.has(key)) {
- layers.set(key, { name: key, type: 'weights', metadata: new Map(), weights: new Map() });
- }
- const layer = layers.get(key);
- layer.weights.set(param, tensor);
- }
- const metadata = new Map();
- const graph = {};
- if (target.metadata.size === 0) {
- graph.layers = Array.from(layers.values());
- } else {
- let architecture = '?';
- for (const [name, value] of target.metadata) {
- switch (name) {
- case 'general.name': this.name = value; break;
- case 'general.architecture': architecture = value; break;
- case 'general.description': this.description = value; break;
- case 'general.author': this.metadata.push(new gguf.Argument('author', value)); break;
- case 'general.license': this.metadata.push(new gguf.Argument('license', value)); break;
- case 'general.file_type':
- case 'general.quantization_version':
- break;
- default:
- metadata.set(name, value);
- break;
- }
- }
- const tokenizer = { type: 'tokenizer', metadata: new Map(), layers: [] };
- const model = { type: architecture, metadata: new Map(), layers: Array.from(layers.values()) };
- for (const [name, value] of metadata) {
- if (name.startsWith('tokenizer.')) {
- const match = name.match(/^(.*)\.(.*?)$/);
- if (match) {
- const [, param] = match.slice(1);
- tokenizer.metadata.set(param, value);
- }
- } else if (architecture && name.startsWith(`${architecture}.`)) {
- model.metadata.set(name, value);
- } else {
- this.metadata.push(new gguf.Argument(name, value));
- }
- }
- graph.layers = [model];
- if (tokenizer.metadata.size > 0) {
- graph.layers.push(tokenizer);
- }
- }
- this.modules = [new gguf.Graph(graph)];
- }
- };
- gguf.Graph = class {
- constructor(graph, type = '') {
- this.name = graph.type;
- this.type = type;
- this.nodes = [];
- this.inputs = [];
- this.outputs = [];
- for (const layer of graph.layers) {
- const node = new gguf.Node(layer);
- this.nodes.push(node);
- }
- }
- };
- gguf.Argument = class {
- constructor(name, value) {
- this.name = name;
- this.value = value;
- }
- };
- gguf.Value = class {
- constructor(name, tensor) {
- this.name = name;
- this.type = tensor.type;
- this.quantization = tensor.quantization || null;
- this.initializer = tensor;
- }
- };
- gguf.Node = class {
- constructor(layer) {
- if (Array.isArray(layer.layers) && layer.layers.length > 0) {
- this.type = new gguf.Graph(layer, 'weights');
- } else {
- this.type = { name: layer.type };
- }
- this.name = layer.name || '';
- this.inputs = [];
- this.outputs = [];
- this.attributes = [];
- if (layer.weights) {
- for (const [name, weight] of layer.weights) {
- const tensor = new gguf.Tensor(weight);
- const value = new gguf.Value(weight.name, tensor);
- const argument = new gguf.Argument(name, [value]);
- this.inputs.push(argument);
- }
- }
- if (layer.metadata) {
- for (const [name, value] of layer.metadata) {
- const attribute = new gguf.Argument(name, value);
- this.attributes.push(attribute);
- }
- }
- }
- };
- gguf.TensorType = class {
- constructor(dataType, shape) {
- this.dataType = dataType;
- this.shape = shape;
- }
- toString() {
- return (this.dataType || '?') + this.shape.toString();
- }
- };
- gguf.TensorShape = class {
- constructor(dimensions) {
- this.dimensions = dimensions;
- }
- toString() {
- return `[${this.dimensions.map((dimension) => dimension.toString()).join(',')}]`;
- }
- };
- gguf.Tensor = class {
- constructor(tensor) {
- const shape = new gguf.TensorShape(tensor.ne);
- this.type = new gguf.TensorType(tensor.dtype, shape);
- if (tensor.type !== gguf.QuantizationType.F32 && tensor.type !== gguf.QuantizationType.F16) {
- this.quantization = {
- type: gguf.Utility.enum(gguf.QuantizationType, tensor.type).toLowerCase()
- };
- }
- if (tensor.dtype === 'float32' || tensor.dtype === 'float16' ||
- tensor.dtype === 'int8' || tensor.dtype === 'int16' || tensor.dtype === 'int32') {
- this.encoding = '<';
- this._data = tensor.data;
- }
- }
- get values() {
- if (this._data) {
- return this._data.peek();
- }
- return null;
- }
- };
- gguf.Reader = class {
- static open(context) {
- const stream = context.stream;
- if (stream && stream.length > 4) {
- const buffer = stream.peek(4);
- const signature = String.fromCharCode.apply(null, buffer);
- if (signature === 'GGUF') {
- return new gguf.Reader(context);
- }
- }
- return null;
- }
- constructor(context) {
- this.context = context;
- const QK_K = 256;
- gguf.Reader.GGML_QUANT_SIZES = gguf.Reader.GGML_QUANT_SIZES || new Map([
- [gguf.QuantizationType.F32, [1, 4, 'float32']],
- [gguf.QuantizationType.F16, [1, 2, 'float16']],
- [gguf.QuantizationType.Q4_0, [32, 2 + 16, '']],
- [gguf.QuantizationType.Q4_1, [32, 2 + 2 + 16, '']],
- [gguf.QuantizationType.Q5_0, [32, 2 + 4 + 16, '']],
- [gguf.QuantizationType.Q5_1, [32, 2 + 2 + 4 + 16, '']],
- [gguf.QuantizationType.Q8_0, [32, 2 + 32, 'q8_0']],
- [gguf.QuantizationType.Q8_1, [32, 4 + 4 + 32, '']],
- [gguf.QuantizationType.Q2_K, [256, 2 + 2 + Math.floor(QK_K / 16) + Math.floor(QK_K / 4), '']],
- [gguf.QuantizationType.Q3_K, [256, 2 + Math.floor(QK_K / 4) + Math.floor(QK_K / 8) + 12, '']],
- [gguf.QuantizationType.Q4_K, [256, 2 + 2 + Math.floor(QK_K / 2) + 12, '']],
- [gguf.QuantizationType.Q5_K, [256, 2 + 2 + Math.floor(QK_K / 2) + Math.floor(QK_K / 8) + 12, '']],
- [gguf.QuantizationType.Q6_K, [256, 2 + Math.floor(QK_K / 2) + Math.floor(QK_K / 4) + Math.floor(QK_K / 16), '']],
- [gguf.QuantizationType.Q8_K, [256, 4 + QK_K + Math.floor(QK_K / 8), '']],
- [gguf.QuantizationType.IQ2_XXS, [256, 2 + Math.floor(QK_K / 4), '']],
- [gguf.QuantizationType.IQ2_XS, [256, 2 + Math.floor(QK_K / 4) + Math.floor(QK_K / 32), '']],
- [gguf.QuantizationType.IQ3_XXS, [256, 2 + Math.floor(QK_K / 4) + Math.floor(QK_K / 8), '']],
- [gguf.QuantizationType.IQ1_S, [256, 2 + Math.floor(QK_K / 8) + Math.floor(QK_K / 16), '']],
- [gguf.QuantizationType.IQ4_NL, [32, 2 + 16, '']],
- [gguf.QuantizationType.IQ3_S, [256, 2 + Math.floor(QK_K / 4) + Math.floor(QK_K / 8) + Math.floor(QK_K / 32) + 4, '']],
- [gguf.QuantizationType.IQ2_S, [256, 2 + Math.floor(QK_K / 4) + Math.floor(QK_K / 16), '']],
- [gguf.QuantizationType.IQ4_XS, [256, 2 + 2 + Math.floor(QK_K / 2) + Math.floor(QK_K / 64), '']],
- [gguf.QuantizationType.I8, [1, 1, 'int8']],
- [gguf.QuantizationType.I16, [1, 2, 'int16']],
- [gguf.QuantizationType.I32, [1, 4, 'int32']],
- [gguf.QuantizationType.I64, [1, 8, 'int64']],
- [gguf.QuantizationType.F64, [1, 8, 'float64']],
- [gguf.QuantizationType.IQ1_M, [256, Math.floor(QK_K / 8) + Math.floor(QK_K / 16) + Math.floor(QK_K / 32)]],
- [gguf.QuantizationType.BF16, [1, 2, 'bfloat16']],
- [gguf.QuantizationType.Q4_0_4_4, [32, 2 + 16, '']],
- [gguf.QuantizationType.Q4_0_4_8, [32, 2 + 16, '']],
- [gguf.QuantizationType.Q4_0_8_8, [32, 2 + 16, '']],
- [gguf.QuantizationType.TQ1_0, [256, 2 + 4 * 13, '']],
- [gguf.QuantizationType.TQ2_0, [256, 2 + 64, '']],
- [gguf.QuantizationType.MXFP4, [32, 1 + 16, 'mxfp4']]
- ]);
- }
- async read() {
- const context = this.context;
- const stream = context.stream;
- let reader = await context.read('binary');
- reader = new gguf.BinaryReader(reader);
- this.tensors = new Map();
- this.metadata = new Map();
- this.header = {};
- this.header.magic = String.fromCharCode.apply(null, reader.read(4));
- this.header.version = reader.uint32();
- this.format = `GGUF v${this.header.version}`;
- if (this.header.version >= 2) {
- this.header.n_tensors = reader.uint64().toNumber();
- this.header.n_kv = reader.uint64().toNumber();
- for (let i = 0; i < this.header.n_kv; i++) {
- const entry = reader.entry();
- this.metadata.set(entry.name, entry.value);
- }
- const tensors = this.header.n_tensors;
- if (tensors > 0) {
- for (let i = 0; i < tensors; i++) {
- const tensor = reader.tensor();
- this.tensors.set(tensor.name, tensor);
- }
- this.alignment = this.metadata.get('general.alignment') || 32;
- const offset_pad = reader.position % this.alignment;
- if (offset_pad !== 0) {
- reader.skip(this.alignment - offset_pad);
- }
- const offset = reader.position;
- for (const tensor of this.tensors.values()) {
- if (!gguf.Reader.GGML_QUANT_SIZES.has(tensor.type)) {
- throw new gguf.Error(`Unsupported tensor quantization type '${tensor.type}'.`);
- }
- const [block_size, type_size, dtype] = gguf.Reader.GGML_QUANT_SIZES.get(tensor.type);
- tensor.block_size = block_size;
- tensor.type_size = type_size;
- tensor.dtype = dtype || '?';
- if (offset < reader.length) {
- const n_elems = tensor.ne.reduce((a, b) => a * b, 1);
- const n_bytes = Math.floor((n_elems * type_size) / block_size);
- reader.seek(offset + tensor.offset);
- tensor.data = reader.stream(n_bytes);
- }
- }
- }
- }
- stream.seek(0);
- delete this.context;
- }
- };
- gguf.BinaryReader = class {
- constructor(reader) {
- this._reader = reader;
- }
- get length() {
- return this._reader.length;
- }
- get position() {
- return this._reader.position;
- }
- seek(position) {
- this._reader.seek(position);
- }
- skip(offset) {
- this._reader.skip(offset);
- }
- stream(length) {
- return this._reader.stream(length);
- }
- read(length) {
- return this._reader.read(length);
- }
- byte() {
- return this._reader.byte();
- }
- int8() {
- return this._reader.int8();
- }
- uint16() {
- return this._reader.uint16();
- }
- int16() {
- return this._reader.int16();
- }
- uint32() {
- return this._reader.uint32();
- }
- int32() {
- return this._reader.int32();
- }
- uint64() {
- return this._reader.uint64();
- }
- int64() {
- return this._reader.int64();
- }
- float32() {
- return this._reader.float32();
- }
- string() {
- const size = this.uint64().toNumber();
- const buffer = this.read(size);
- return String.fromCharCode.apply(null, buffer);
- }
- value(type) {
- switch (type) {
- case gguf.Type.UINT8: return this.byte();
- case gguf.Type.INT8: return this.int8();
- case gguf.Type.UINT16: return this.uint16();
- case gguf.Type.INT16: return this.int16();
- case gguf.Type.UINT32: return this.uint32();
- case gguf.Type.INT32: return this.int32();
- case gguf.Type.UINT64: return this.uint64();
- case gguf.Type.INT64: return this.int64();
- case gguf.Type.FLOAT32: return this.float32();
- case gguf.Type.BOOL: return this.byte() !== 0;
- case gguf.Type.STRING: return this.string();
- case gguf.Type.ARRAY: {
- const type = this.uint32();
- const size = this.uint64().toNumber();
- const value = new Array(size);
- for (let i = 0; i < size; i++) {
- value[i] = this.value(type);
- }
- return value;
- }
- default: {
- throw new gguf.Error(`Unsupported GGUF type '${type}'.`);
- }
- }
- }
- entry() {
- const name = this.string();
- const type = this.uint32();
- const value = this.value(type);
- return { name, value, type };
- }
- tensor() {
- const tensor = {};
- tensor.name = this.string();
- const n_dims = this.uint32();
- tensor.ne = new Array(n_dims);
- for (let i = 0; i < n_dims; i++) {
- tensor.ne[i] = this.uint64().toNumber();
- }
- tensor.type = this.uint32();
- tensor.offset = this.uint64().toNumber();
- return tensor;
- }
- };
- gguf.Type = {
- UINT8: 0,
- INT8: 1,
- UINT16: 2,
- INT16: 3,
- UINT32: 4,
- INT32: 5,
- FLOAT32: 6,
- BOOL: 7,
- STRING: 8,
- ARRAY: 9,
- UINT64: 10,
- INT64: 11,
- FLOAT64: 12,
- };
- gguf.QuantizationType = {
- F32: 0,
- F16: 1,
- Q4_0: 2,
- Q4_1: 3,
- Q5_0: 6,
- Q5_1: 7,
- Q8_0: 8,
- Q8_1: 9,
- Q2_K: 10,
- Q3_K: 11,
- Q4_K: 12,
- Q5_K: 13,
- Q6_K: 14,
- Q8_K: 15,
- IQ2_XXS: 16,
- IQ2_XS: 17,
- IQ3_XXS: 18,
- IQ1_S: 19,
- IQ4_NL: 20,
- IQ3_S: 21,
- IQ2_S: 22,
- IQ4_XS: 23,
- I8: 24,
- I16: 25,
- I32: 26,
- I64: 27,
- F64: 28,
- IQ1_M: 29,
- BF16: 30,
- Q4_0_4_4: 31,
- Q4_0_4_8: 32,
- Q4_0_8_8: 33,
- TQ1_0: 34,
- TQ2_0: 35,
- MXFP4: 39
- };
- gguf.Utility = class {
- static enum(type, value) {
- gguf.Utility._enums = gguf.Utility._enums || new Map();
- if (!gguf.Utility._enums.has(type)) {
- const entries = new Map(Object.entries(type).map(([key, value]) => [value, key]));
- gguf.Utility._enums.set(type, entries);
- }
- const entries = gguf.Utility._enums.get(type);
- if (entries.has(value)) {
- return entries.get(value);
- }
- return value;
- }
- };
- gguf.Error = class extends Error {
- constructor(message) {
- super(message);
- this.name = 'GGML Error';
- }
- };
- export const ModelFactory = gguf.ModelFactory;
|