Lutz Roeder 6 лет назад
Родитель
Сommit
d124febf70
5 измененных файлов с 151 добавлено и 76 удалено
  1. 2 2
      src/chainer.js
  2. 0 1
      src/gzip.js
  3. 140 62
      src/hdf5.js
  4. 1 1
      src/keras.js
  5. 8 10
      src/zip.js

+ 2 - 2
src/chainer.js

@@ -13,7 +13,7 @@ chainer.ModelFactory = class {
         const extension = identifier.split('.').pop().toLowerCase();
         switch (extension) {
             case 'npz':
-                return context.entries.length > 0;
+                return context.entries.length > 0 && context.entries.every((entry) => entry.name.indexOf('/') !== -1);
             case 'h5':
             case 'hdf5':
                 return true;
@@ -253,7 +253,7 @@ chainer.ModelFactory = class {
                             dataType: variable.type,
                             byteOrder: variable.littleEndian ? '<' : '>',
                             shape: variable.shape, 
-                            data: variable.rawData });
+                            data: variable.data });
                     }
                 }
 

+ 0 - 1
src/gzip.js

@@ -5,7 +5,6 @@ var gzip = gzip || {};
 
 gzip.Archive = class {
 
-    // inflate (optional): optimized inflater callback like require('zlib').inflateRawSync or pako.inflateRa
     constructor(buffer) {
         this._entries = [];
         if (buffer.length < 18 || buffer[0] != 0x1f || buffer[1] != 0x8b) {

+ 140 - 62
src/hdf5.js

@@ -1,14 +1,16 @@
 /* jshint esversion: 6 */
 /* eslint "indent": [ "error", 4, { "SwitchCase": 1 } ] */
 
-// Experimental H5/HDF5 JavaScript reader
+// Experimental HDF5 JavaScript reader
 
 var hdf5 = hdf5 || {};
 var long = long || { Long: require('long') };
+var zip = zip || require('./zip');
 
 hdf5.File = class {
 
     constructor(buffer) {
+        // https://support.hdfgroup.org/HDF5/doc/H5.format.html
         const reader = new hdf5.Reader(buffer, 0);
         this._globalHeap = new hdf5.GlobalHeap(reader);
         if (!reader.match('\x89HDF\r\n\x1A\n')) {
@@ -135,8 +137,9 @@ hdf5.Group = class {
             const datatype = this._dataObjectHeader.datatype;
             const dataspace = this._dataObjectHeader.dataspace;
             const dataLayout = this._dataObjectHeader.dataLayout;
+            const filterPipeline = this._dataObjectHeader.filterPipeline;
             if (datatype && dataspace && dataLayout) {
-                this._value = new hdf5.Variable(this._reader, this._globalHeap, datatype, dataspace, dataLayout);
+                this._value = new hdf5.Variable(this._reader, this._globalHeap, datatype, dataspace, dataLayout, filterPipeline);
             }
         }
     }
@@ -177,12 +180,13 @@ hdf5.Group = class {
 
 hdf5.Variable = class {
 
-    constructor(reader, globalHeap, datatype, dataspace, dataLayout) {
+    constructor(reader, globalHeap, datatype, dataspace, dataLayout, filterPipeline) {
         this._reader = reader;
         this._globalHeap = globalHeap;
         this._datatype = datatype;
         this._dataspace = dataspace;
         this._dataLayout = dataLayout;
+        this._filterPipeline = filterPipeline;
     }
 
     get type () {
@@ -198,20 +202,55 @@ hdf5.Variable = class {
     }
 
     get value() {
-        if (this._dataLayout.address) {
-            const reader = this._reader.at(this._dataLayout.address);
-            const data = this._dataspace.read(this._datatype, reader);
-            const value = this._dataspace.decode(this._datatype, data, data, this._globalHeap);
+        var data = this.data;
+        if (data) {
+            const reader = new hdf5.Reader(data);
+            const array = this._dataspace.read(this._datatype, reader);
+            const value = this._dataspace.decode(this._datatype, array, array, this._globalHeap);
             return value;
         }
         return null;
     }
 
-    get rawData() {
-        if (this._dataLayout.address) {
-            const reader = this._reader.at(this._dataLayout.address);
-            return reader.bytes(this._dataLayout.size);
-        }
+    get data() {
+        switch (this._dataLayout.layoutClass) {
+            case 1: // Contiguous
+                if (this._dataLayout.address) {
+                    return this._reader.at(this._dataLayout.address).bytes(this._dataLayout.size);
+                }
+                break;
+            case 2: // Chunked
+                var tree = new hdf5.Tree(this._reader.at(this._dataLayout.address), this._dataLayout.dimensionality);
+                if (this._dataLayout.dimensionality == 2 && this._dataspace.shape.length == 1) {
+                    var size = this._dataLayout.datasetElementSize;
+                    for (let i = 0; i < this._dataspace.shape.length; i++) {
+                        size *= this._dataspace.shape[i];
+                    }
+                    var data = new Uint8Array(size);
+                    for (let node of tree.nodes) {
+                        if (node.fields.length !== 2 || node.fields[1] !== 0) {
+                            return null;
+                        }
+                        if (node.filterMask !== 0) {
+                            return null;
+                        }
+                        const start = node.fields[0] * this._dataLayout.datasetElementSize;
+                        var chunk = node.data;
+                        if (this._filterPipeline) {
+                            for (let filter of this._filterPipeline.filters) {
+                                chunk = filter.decode(chunk);
+                            }
+                        }
+                        for (let i = 0; i < chunk.length; i++) {
+                            data[start + i] = chunk[i];
+                        }
+                    }
+                    return data;
+                }
+                break;
+            default:
+                throw new hdf5.Error("Unknown data layout class '" + this.layoutClass + "'.");
+        } 
         return null;
     }
 };
@@ -360,9 +399,18 @@ hdf5.Reader = class {
     offset() { 
         switch (this._offsetSize) {
             case 8:
-                return this.uint64();
+                var lo = this.uint32();
+                var hi = this.uint32();
+                if (lo === 0xffffffff && hi === 0xffffffff) {
+                    return undefined;
+                }
+                return new long.Long(lo, hi, true).toNumber();
             case 4:
-                return this.uint32(); 
+                var value = this.uint32();
+                if (value === 0xffffffff) {
+                    return undefined;
+                }
+                return value;
         }
         throw new hdf5.Error('Unsupported offset size \'' + this._offsetSize + '\'.');
     }
@@ -370,9 +418,18 @@ hdf5.Reader = class {
     length() {
         switch (this._lengthSize) {
             case 8:
-                return this.uint64();
+                var lo = this.uint32();
+                var hi = this.uint32();
+                if (lo === 0xffffffff && hi === 0xffffffff) {
+                    return undefined;
+                }
+                return new long.Long(lo, hi, true).toNumber();
             case 4:
-                return this.uint32(); 
+                var value = this.uint32();
+                if (value === 0xffffffff) {
+                    return undefined;
+                }
+                return value;
         }
         throw new hdf5.Error('Unsupported length size \'' + this._lengthSize + '\'.');
     }
@@ -503,7 +560,7 @@ hdf5.DataObjectHeader = class {
             const messageFlags = reader.byte();
             reader.seek(3);
             reader.align(8);
-            let next = this.readMessage(reader, messageType, messageSize, messageFlags);
+            let next = this._readMessage(reader, messageType, messageSize, messageFlags);
             if ((!next || reader.position >= end) && this.continuations.length > 0) {
                 const continuation = this.continuations.shift();
                 reader = reader.at(continuation.offset);
@@ -538,7 +595,7 @@ hdf5.DataObjectHeader = class {
                 if ((flags & 0x04) != 0) {
                     reader.uint16();
                 }
-                next = this.readMessage(reader, messageType, messageSize, messageFlags);
+                next = this._readMessage(reader, messageType, messageSize, messageFlags);
             } 
             if ((!next || reader.position >= end) && this.continuations.length > 0) {
                 const continuation = this.continuations.shift();
@@ -552,7 +609,7 @@ hdf5.DataObjectHeader = class {
         }
     }
 
-    readMessage(reader, type, size, flags) {
+    _readMessage(reader, type, size, flags) {
         switch(type) {
             case 0x0000: // NIL
                 return false;
@@ -819,10 +876,10 @@ hdf5.Dataspace = class {
         if (this._dimensions == 0) {
             return datatype.read(reader);
         }
-        return this.readArray(datatype, reader, this._sizes, 0);
+        return this._readArray(datatype, reader, this._sizes, 0);
     }
 
-    readArray(datatype, reader, shape, dimension) {
+    _readArray(datatype, reader, shape, dimension) {
         const array = [];
         const size = shape[dimension];
         if (dimension == shape.length - 1) {
@@ -832,7 +889,7 @@ hdf5.Dataspace = class {
         }
         else {
             for (let j = 0; j < size; j++) {
-                array.push(this.readArray(datatype, reader, shape, dimension + 1));
+                array.push(this._readArray(datatype, reader, shape, dimension + 1));
             }
         }     
         return array;
@@ -842,10 +899,10 @@ hdf5.Dataspace = class {
         if (this._dimensions == 0) {
             return datatype.decode(data, globalHeap);
         }
-        return this.decodeArray(datatype, data, globalHeap, this._sizes, 0);
+        return this._decodeArray(datatype, data, globalHeap, this._sizes, 0);
     }
 
-    decodeArray(datatype, data, globalHeap, shape, dimension) {
+    _decodeArray(datatype, data, globalHeap, shape, dimension) {
         const size = shape[dimension];
         if (dimension == shape.length - 1) {
             for (let i = 0; i < size; i++) {
@@ -854,7 +911,7 @@ hdf5.Dataspace = class {
         }
         else {
             for (let j = 0; j < size; j++) {
-                data[j] = this.decodeArray(datatype, data[j], shape, dimension + 1);
+                data[j] = this._decodeArray(datatype, data[j], shape, dimension + 1);
             }
         }
         return data;
@@ -898,10 +955,6 @@ hdf5.FillValue = class {
                 }
                 break;
             case 3:
-                // let flags = reader.byte();
-                // if ((flags & 0x20) != 0) {
-                // }
-                break;
             default:
                 throw new hdf5.Error('Unsupported fill value version \'' + version + '\'.');
         }
@@ -942,26 +995,24 @@ hdf5.DataLayout = class {
         const version = reader.byte();
         switch (version) {
             case 3:
-                var layoutClass = reader.byte();
-                switch (layoutClass) {
-                    case 1: // Contiguous Storage
+                this.layoutClass = reader.byte();
+                switch (this.layoutClass) {
+                    case 1: // Contiguous
                         this.address = reader.offset();
                         this.size = reader.length();
                         break;
                     case 2: // Chunked
-                        // debugger;
-                        var dimensionality = reader.byte();
+                        this.dimensionality = reader.byte();
                         this.address = reader.offset();
-                        // const tree = new hdf5.Tree(reader.at(this.address));
                         this.dimensionSizes = [];
-                        for (let i = 0; i < dimensionality; i++) {
+                        for (let i = 0; i < this.dimensionality - 1; i++) {
                             this.dimensionSizes.push(reader.int32());
                         }
                         this.datasetElementSize = reader.int32();
                         break;
-                    case 0: // Compact Storage
+                    case 0: // Compact
                     default:
-                        throw new hdf5.Error('Unsupported data layout class \'' + layoutClass + '\'.');
+                        throw new hdf5.Error('Unsupported data layout class \'' + this.layoutClass + '\'.');
                 }
                 break;
             default:
@@ -999,18 +1050,13 @@ hdf5.FilterPipeline = class {
         const version = reader.byte();
         switch (version) {
             case 1:
-                var numberOfFilters = reader.byte();
-                reader.seek(2 + 6);
                 this.filters = [];
+                var numberOfFilters = reader.byte();
+                reader.seek(2);
+                reader.seek(4);
                 for (let i = 0; i < numberOfFilters; i++) {
-                    const id = reader.int16();
-                    const nameLength = reader.int16();
-                    const flags = reader.int16();
-                    const numberOfClientDataValues = reader.int16();
-                    const name = reader.string(nameLength, 'ascii');
-                    const clientData = reader.bytes(numberOfClientDataValues * 4);
+                    this.filters.push(new hdf5.Filter(reader));
                     reader.align(8);
-                    this.filters.push({ id: id, name: name, flags: flags, clientData: clientData });
                 }
                 break;
             default:
@@ -1019,6 +1065,29 @@ hdf5.FilterPipeline = class {
     }
 };
 
+hdf5.Filter = class {
+
+    constructor(reader) {
+        this.id = reader.int16();
+        const nameLength = reader.int16();
+        this.flags = reader.int16();
+        const clientDataSize = reader.int16();
+        this.name = reader.string(nameLength, 'ascii');
+        this.clientData = reader.bytes(clientDataSize * 4);
+    }
+
+    decode(data) {
+        switch (this.id) {
+            case 1: // gzip
+                var rawData = data.subarray(2, data.length); // skip zlib header
+                var uncompressed = new zip.Inflater().inflateRaw(rawData);
+                return uncompressed;
+            default:
+                throw hdf5.Error("Unsupported filter '" + this.name + "'.");
+        }
+    }
+}
+
 hdf5.Attribute = class {
 
     constructor(reader) {
@@ -1113,43 +1182,52 @@ hdf5.AttributeInfo = class {
 
 hdf5.Tree = class {
 
-    constructor(reader) {
+    constructor(reader, dimensionality) {
+        // https://support.hdfgroup.org/HDF5/doc/H5.format.html#V1Btrees
         if (!reader.match('TREE')) {
             throw new hdf5.Error("Not a valid 'TREE' block.");
         }
-        const type = reader.byte();
-        const level = reader.byte();
+        this.type = reader.byte();
+        this.level = reader.byte();
         const entriesUsed = reader.uint16();
         reader.offset(); // address of left sibling
         reader.offset(); // address of right sibling
         this.nodes = [];
-        switch (type) {
+        switch (this.type) {
             case 0: // Group nodes
                 for (let i = 0; i < entriesUsed; i++) {
                     reader.length();
-                    const childReader = reader.at(reader.offset());
-                    if (level == 0) {
-                        this.nodes.push(new hdf5.SymbolTableNode(childReader));
+                    const childPointer = reader.offset();
+                    if (this.level == 0) {
+                        this.nodes.push(new hdf5.SymbolTableNode(reader.at(childPointer)));
                     }
                     else {
-                        const tree = new hdf5.Tree(childReader);
+                        const tree = new hdf5.Tree(reader.at(childPointer));
                         this.nodes = this.nodes.concat(tree.nodes);
                     }
                 }
                 break;
             case 1: // Raw data chunk nodes
-                /*
                 for (let i = 0; i < entriesUsed; i++) {
                     const size = reader.int32();
                     const filterMask = reader.int32();
-                    const fields = reader.bytes(8 * (2));
-                    const offset = reader.offset();
+                    const fields = [];
+                    for (let j = 0; j < dimensionality; j++) {
+                        fields.push(reader.uint64())
+                    }
+                    const childPointer = reader.offset();
+                    if (this.level == 0) {
+                        const data = reader.at(childPointer).bytes(size);
+                        this.nodes.push({ data: data, fields: fields, filterMask: filterMask });
+                    }
+                    else {
+                        const tree = new hdf5.Tree(reader.at(childPointer), dimensionality);
+                        this.nodes = this.nodes.concat(tree.nodes);
+                    }
                 }
-                */
-                throw new hdf5.Error('Unsupported B-Tree node type \'' + type + '\'.');
-                // break;
+                break;  
             default:
-                throw new hdf5.Error('Unsupported B-Tree node type \'' + type + '\'.');
+                throw new hdf5.Error('Unsupported B-Tree node type \'' + this.type + '\'.');
         }
     }
 };

+ 1 - 1
src/keras.js

@@ -163,7 +163,7 @@ keras.Model = class {
                                 if (variable) {
                                     let parts = weight_name.split('/');
                                     parts.pop();
-                                    let initializer = new keras.Tensor(weight_name, variable.type, variable.shape, variable.littleEndian, variable.rawData, '');
+                                    let initializer = new keras.Tensor(weight_name, variable.type, variable.shape, variable.littleEndian, variable.data, '');
                                     let match = false;
                                     while (parts.length > 0) {
                                         let name = parts.join('/');

+ 8 - 10
src/zip.js

@@ -91,15 +91,7 @@ zip.Entry = class {
                     this._data.set(this._compressedData);
                     break;
                 case 8: // Deflate
-                    if (typeof process === 'object' && typeof process.versions == 'object' && typeof process.versions.node !== 'undefined') {
-                        this._data = require('zlib').inflateRawSync(this._compressedData);
-                    }
-                    else if (typeof pako !== 'undefined') {
-                        this._data = pako.inflateRaw(this._compressedData);
-                    }
-                    else {
-                        this._data = new zip.Inflater().inflateRaw(this._compressedData);
-                    }
+                    this._data = new zip.Inflater().inflateRaw(this._compressedData);
                     if (this._size != this._data.length) {
                         throw new zip.Error('Invalid uncompressed size.');
                     }
@@ -109,7 +101,6 @@ zip.Entry = class {
             }
 
             delete this._size;
-            delete this._inflateRaw;
             delete this._compressedData;
         }
         return this._data;
@@ -172,6 +163,13 @@ zip.Inflater = class {
 
     inflateRaw(data) {
 
+        if (typeof process === 'object' && typeof process.versions == 'object' && typeof process.versions.node !== 'undefined') {
+            return require('zlib').inflateRawSync(data);
+        }
+        if (typeof pako !== 'undefined') {
+            return pako.inflateRaw(data);
+        }
+
         zip.Inflater.initilize();
         zip.HuffmanTree.initialize();