SunnyMirror
/
netron
mirror of https://github.com/lutzroeder/netron.git


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499
							
const gguf = {};

gguf.ModelFactory = class {

    async match(context) {
        const reader = gguf.Reader.open(context);
        if (reader) {
            return context.set('gguf', reader);
        }
        return null;
    }

    async open(context) {
        const target = context.value;
        await target.read();
        return new gguf.Model(target);
    }
};

gguf.Model = class {

    constructor(target) {
        this.format = target.format;
        this.metadata = [];
        const layers = new Map();
        for (const [name, tensor] of target.tensors) {
            const parts = name.split('.');
            const param = parts.pop();
            const key = parts.join('.');
            if (!layers.has(key)) {
                layers.set(key, { name: key, type: 'weights', metadata: new Map(), weights: new Map() });
            }
            const layer = layers.get(key);
            layer.weights.set(param, tensor);
        }
        const metadata = new Map();
        const graph = {};
        if (target.metadata.size === 0) {
            graph.layers = Array.from(layers.values());
        } else {
            let architecture = '?';
            for (const [name, value] of target.metadata) {
                switch (name) {
                    case 'general.name': this.name = value; break;
                    case 'general.architecture': architecture = value; break;
                    case 'general.description': this.description = value; break;
                    case 'general.author': this.metadata.push(new gguf.Argument('author', value)); break;
                    case 'general.license': this.metadata.push(new gguf.Argument('license', value)); break;
                    case 'general.file_type':
                    case 'general.quantization_version':
                        break;
                    default:
                        metadata.set(name, value);
                        break;
                }
            }
            const tokenizer = { type: 'tokenizer', metadata: new Map(), layers: [] };
            const model = { type: architecture, metadata: new Map(), layers: Array.from(layers.values()) };
            for (const [name, value] of metadata) {
                if (name.startsWith('tokenizer.')) {
                    const match = name.match(/^(.*)\.(.*?)$/);
                    if (match) {
                        const [, param] = match.slice(1);
                        tokenizer.metadata.set(param, value);
                    }
                } else if (architecture && name.startsWith(`${architecture}.`)) {
                    model.metadata.set(name, value);
                } else {
                    this.metadata.push(new gguf.Argument(name, value));
                }
            }
            graph.layers = [model];
            if (tokenizer.metadata.size > 0) {
                graph.layers.push(tokenizer);
            }
        }
        this.modules = [new gguf.Graph(graph)];
    }
};

gguf.Graph = class {

    constructor(graph, type = '') {
        this.name = graph.type;
        this.type = type;
        this.nodes = [];
        this.inputs = [];
        this.outputs = [];
        for (const layer of graph.layers) {
            const node = new gguf.Node(layer);
            this.nodes.push(node);
        }
    }
};

gguf.Argument = class {

    constructor(name, value) {
        this.name = name;
        this.value = value;
    }
};

gguf.Value = class {

    constructor(name, tensor) {
        this.name = name;
        this.type = tensor.type;
        this.quantization = tensor.quantization || null;
        this.initializer = tensor;
    }
};

gguf.Node = class {

    constructor(layer) {
        if (Array.isArray(layer.layers) && layer.layers.length > 0) {
            this.type = new gguf.Graph(layer, 'weights');
        } else {
            this.type = { name: layer.type };
        }
        this.name = layer.name || '';
        this.inputs = [];
        this.outputs = [];
        this.attributes = [];
        if (layer.weights) {
            for (const [name, weight] of layer.weights) {
                const tensor = new gguf.Tensor(weight);
                const value = new gguf.Value(weight.name, tensor);
                const argument = new gguf.Argument(name, [value]);
                this.inputs.push(argument);
            }
        }
        if (layer.metadata) {
            for (const [name, value] of layer.metadata) {
                const attribute = new gguf.Argument(name, value);
                this.attributes.push(attribute);
            }
        }
    }
};

gguf.TensorType = class {

    constructor(dataType, shape) {
        this.dataType = dataType;
        this.shape = shape;
    }

    toString() {
        return (this.dataType || '?') + this.shape.toString();
    }
};

gguf.TensorShape = class {

    constructor(dimensions) {
        this.dimensions = dimensions;
    }

    toString() {
        return `[${this.dimensions.map((dimension) => dimension.toString()).join(',')}]`;
    }
};

gguf.Tensor = class {

    constructor(tensor) {
        const shape = new gguf.TensorShape(tensor.ne);
        this.type = new gguf.TensorType(tensor.dtype, shape);
        if (tensor.type !== gguf.QuantizationType.F32 && tensor.type !== gguf.QuantizationType.F16) {
            this.quantization = {
                type: gguf.Utility.enum(gguf.QuantizationType, tensor.type).toLowerCase()
            };
        }
        if (tensor.dtype === 'float32' || tensor.dtype === 'float16' ||
            tensor.dtype === 'int8' || tensor.dtype === 'int16' || tensor.dtype === 'int32') {
            this.encoding = '<';
            this._data = tensor.data;
        }
    }

    get values() {
        if (this._data) {
            return this._data.peek();
        }
        return null;
    }
};

gguf.Reader = class {

    static open(context) {
        const stream = context.stream;
        if (stream && stream.length > 4) {
            const buffer = stream.peek(4);
            const signature = String.fromCharCode.apply(null, buffer);
            if (signature === 'GGUF') {
                return new gguf.Reader(context);
            }
        }
        return null;
    }

    constructor(context) {
        this.context = context;
        const QK_K = 256;
        gguf.Reader.GGML_QUANT_SIZES = gguf.Reader.GGML_QUANT_SIZES || new Map([
            [gguf.QuantizationType.F32,      [1, 4, 'float32']],
            [gguf.QuantizationType.F16,      [1, 2, 'float16']],
            [gguf.QuantizationType.Q4_0,     [32, 2 + 16, '']],
            [gguf.QuantizationType.Q4_1,     [32, 2 + 2 + 16, '']],
            [gguf.QuantizationType.Q5_0,     [32, 2 + 4 + 16, '']],
            [gguf.QuantizationType.Q5_1,     [32, 2 + 2 + 4 + 16, '']],
            [gguf.QuantizationType.Q8_0,     [32, 2 + 32, 'q8_0']],
            [gguf.QuantizationType.Q8_1,     [32, 4 + 4 + 32, '']],
            [gguf.QuantizationType.Q2_K,     [256, 2 + 2 + Math.floor(QK_K / 16) + Math.floor(QK_K / 4), '']],
            [gguf.QuantizationType.Q3_K,     [256, 2 + Math.floor(QK_K / 4) + Math.floor(QK_K / 8) + 12, '']],
            [gguf.QuantizationType.Q4_K,     [256, 2 + 2 + Math.floor(QK_K / 2) + 12, '']],
            [gguf.QuantizationType.Q5_K,     [256, 2 + 2 + Math.floor(QK_K / 2) + Math.floor(QK_K / 8) + 12, '']],
            [gguf.QuantizationType.Q6_K,     [256, 2 + Math.floor(QK_K / 2) + Math.floor(QK_K / 4) + Math.floor(QK_K / 16), '']],
            [gguf.QuantizationType.Q8_K,     [256, 4 + QK_K + Math.floor(QK_K / 8), '']],
            [gguf.QuantizationType.IQ2_XXS,  [256, 2 + Math.floor(QK_K / 4), '']],
            [gguf.QuantizationType.IQ2_XS,   [256, 2 + Math.floor(QK_K / 4) + Math.floor(QK_K / 32), '']],
            [gguf.QuantizationType.IQ3_XXS,  [256, 2 + Math.floor(QK_K / 4) + Math.floor(QK_K / 8), '']],
            [gguf.QuantizationType.IQ1_S,    [256, 2 + Math.floor(QK_K / 8) + Math.floor(QK_K / 16), '']],
            [gguf.QuantizationType.IQ4_NL,   [32, 2 + 16, '']],
            [gguf.QuantizationType.IQ3_S,    [256, 2 + Math.floor(QK_K / 4) + Math.floor(QK_K / 8) + Math.floor(QK_K / 32) + 4, '']],
            [gguf.QuantizationType.IQ2_S,    [256, 2 + Math.floor(QK_K / 4) + Math.floor(QK_K / 16), '']],
            [gguf.QuantizationType.IQ4_XS,   [256, 2 + 2 + Math.floor(QK_K / 2) + Math.floor(QK_K / 64), '']],
            [gguf.QuantizationType.I8,       [1, 1, 'int8']],
            [gguf.QuantizationType.I16,      [1, 2, 'int16']],
            [gguf.QuantizationType.I32,      [1, 4, 'int32']],
            [gguf.QuantizationType.I64,      [1, 8, 'int64']],
            [gguf.QuantizationType.F64,      [1, 8, 'float64']],
            [gguf.QuantizationType.IQ1_M,    [256, Math.floor(QK_K / 8) + Math.floor(QK_K / 16)  + Math.floor(QK_K / 32)]],
            [gguf.QuantizationType.BF16,     [1, 2, 'bfloat16']],
            [gguf.QuantizationType.Q4_0_4_4, [32, 2 + 16, '']],
            [gguf.QuantizationType.Q4_0_4_8, [32, 2 + 16, '']],
            [gguf.QuantizationType.Q4_0_8_8, [32, 2 + 16, '']],
            [gguf.QuantizationType.TQ1_0,    [256, 2 + 4 * 13, '']],
            [gguf.QuantizationType.TQ2_0,    [256, 2 + 64, '']],
            [gguf.QuantizationType.MXFP4,    [32, 1 + 16, 'mxfp4']]
        ]);
    }

    async read() {
        const context = this.context;
        const stream = context.stream;
        let reader = await context.read('binary');
        reader = new gguf.BinaryReader(reader);
        this.tensors = new Map();
        this.metadata = new Map();
        this.header = {};
        this.header.magic = String.fromCharCode.apply(null, reader.read(4));
        this.header.version = reader.uint32();
        this.format = `GGUF v${this.header.version}`;
        if (this.header.version >= 2) {
            this.header.n_tensors = reader.uint64().toNumber();
            this.header.n_kv = reader.uint64().toNumber();
            for (let i = 0; i < this.header.n_kv; i++) {
                const entry = reader.entry();
                this.metadata.set(entry.name, entry.value);
            }
            const tensors = this.header.n_tensors;
            if (tensors > 0) {
                for (let i = 0; i < tensors; i++) {
                    const tensor = reader.tensor();
                    this.tensors.set(tensor.name, tensor);
                }
                this.alignment = this.metadata.get('general.alignment') || 32;
                const offset_pad = reader.position % this.alignment;
                if (offset_pad !== 0) {
                    reader.skip(this.alignment - offset_pad);
                }
                const offset = reader.position;
                for (const tensor of this.tensors.values()) {
                    if (!gguf.Reader.GGML_QUANT_SIZES.has(tensor.type)) {
                        throw new gguf.Error(`Unsupported tensor quantization type '${tensor.type}'.`);
                    }
                    const [block_size, type_size, dtype] = gguf.Reader.GGML_QUANT_SIZES.get(tensor.type);
                    tensor.block_size = block_size;
                    tensor.type_size = type_size;
                    tensor.dtype = dtype || '?';
                    if (offset < reader.length) {
                        const n_elems = tensor.ne.reduce((a, b) => a * b, 1);
                        const n_bytes = Math.floor((n_elems * type_size) / block_size);
                        reader.seek(offset + tensor.offset);
                        tensor.data = reader.stream(n_bytes);
                    }
                }
            }
        }
        stream.seek(0);
        delete this.context;
    }
};

gguf.BinaryReader = class {

    constructor(reader) {
        this._reader = reader;
    }

    get length() {
        return this._reader.length;
    }

    get position() {
        return this._reader.position;
    }

    seek(position) {
        this._reader.seek(position);
    }

    skip(offset) {
        this._reader.skip(offset);
    }

    stream(length) {
        return this._reader.stream(length);
    }

    read(length) {
        return this._reader.read(length);
    }

    byte() {
        return this._reader.byte();
    }

    int8() {
        return this._reader.int8();
    }

    uint16() {
        return this._reader.uint16();
    }

    int16() {
        return this._reader.int16();
    }

    uint32() {
        return this._reader.uint32();
    }

    int32() {
        return this._reader.int32();
    }

    uint64() {
        return this._reader.uint64();
    }

    int64() {
        return this._reader.int64();
    }

    float32() {
        return this._reader.float32();
    }

    string() {
        const size = this.uint64().toNumber();
        const buffer = this.read(size);
        return String.fromCharCode.apply(null, buffer);
    }

    value(type) {
        switch (type) {
            case gguf.Type.UINT8: return this.byte();
            case gguf.Type.INT8: return this.int8();
            case gguf.Type.UINT16: return this.uint16();
            case gguf.Type.INT16: return this.int16();
            case gguf.Type.UINT32: return this.uint32();
            case gguf.Type.INT32: return this.int32();
            case gguf.Type.UINT64: return this.uint64();
            case gguf.Type.INT64: return this.int64();
            case gguf.Type.FLOAT32: return this.float32();
            case gguf.Type.BOOL: return this.byte() !== 0;
            case gguf.Type.STRING: return this.string();
            case gguf.Type.ARRAY: {
                const type = this.uint32();
                const size = this.uint64().toNumber();
                const value = new Array(size);
                for (let i = 0; i < size; i++) {
                    value[i] = this.value(type);
                }
                return value;
            }
            default: {
                throw new gguf.Error(`Unsupported GGUF type '${type}'.`);
            }
        }
    }

    entry() {
        const name = this.string();
        const type = this.uint32();
        const value = this.value(type);
        return { name, value, type };
    }

    tensor() {
        const tensor = {};
        tensor.name = this.string();
        const n_dims = this.uint32();
        tensor.ne = new Array(n_dims);
        for (let i = 0; i < n_dims; i++) {
            tensor.ne[i] = this.uint64().toNumber();
        }
        tensor.type = this.uint32();
        tensor.offset = this.uint64().toNumber();
        return tensor;
    }
};

gguf.Type = {
    UINT8: 0,
    INT8: 1,
    UINT16: 2,
    INT16: 3,
    UINT32: 4,
    INT32: 5,
    FLOAT32: 6,
    BOOL: 7,
    STRING: 8,
    ARRAY: 9,
    UINT64: 10,
    INT64: 11,
    FLOAT64: 12,
};

gguf.QuantizationType = {
    F32: 0,
    F16: 1,
    Q4_0: 2,
    Q4_1: 3,
    Q5_0: 6,
    Q5_1: 7,
    Q8_0: 8,
    Q8_1: 9,
    Q2_K: 10,
    Q3_K: 11,
    Q4_K: 12,
    Q5_K: 13,
    Q6_K: 14,
    Q8_K: 15,
    IQ2_XXS: 16,
    IQ2_XS: 17,
    IQ3_XXS: 18,
    IQ1_S: 19,
    IQ4_NL: 20,
    IQ3_S: 21,
    IQ2_S: 22,
    IQ4_XS: 23,
    I8: 24,
    I16: 25,
    I32: 26,
    I64: 27,
    F64: 28,
    IQ1_M: 29,
    BF16: 30,
    Q4_0_4_4: 31,
    Q4_0_4_8: 32,
    Q4_0_8_8: 33,
    TQ1_0: 34,
    TQ2_0: 35,
    MXFP4: 39
};

gguf.Utility = class {

    static enum(type, value) {
        gguf.Utility._enums = gguf.Utility._enums || new Map();
        if (!gguf.Utility._enums.has(type)) {
            const entries = new Map(Object.entries(type).map(([key, value]) => [value, key]));
            gguf.Utility._enums.set(type, entries);
        }
        const entries = gguf.Utility._enums.get(type);
        if (entries.has(value)) {
            return entries.get(value);
        }
        return value;
    }
};

gguf.Error = class extends Error {

    constructor(message) {
        super(message);
        this.name = 'GGML Error';
    }
};

export const ModelFactory = gguf.ModelFactory;