sklearn.js 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634
  1. // Experimental
  2. var sklearn = sklearn || {};
  3. sklearn.ModelFactory = class {
  4. match(context) {
  5. const obj = context.open('pkl');
  6. const validate = (obj, name) => {
  7. if (obj && obj.__class__ && obj.__class__.__module__ && obj.__class__.__name__) {
  8. const key = obj.__class__.__module__ + '.' + obj.__class__.__name__;
  9. return key.startsWith(name);
  10. }
  11. return false;
  12. };
  13. const formats = [
  14. { name: 'sklearn.', format: 'sklearn' },
  15. { name: 'xgboost.sklearn.', format: 'sklearn' },
  16. { name: 'lightgbm.sklearn.', format: 'sklearn' },
  17. { name: 'scipy.', format: 'scipy' },
  18. { name: 'hmmlearn.', format: 'hmmlearn' }
  19. ];
  20. for (const format of formats) {
  21. if (validate(obj, format.name)) {
  22. return format.format;
  23. }
  24. if (Array.isArray(obj) && obj.every((item) => validate(item, format.name))) {
  25. return format.format + '.list';
  26. }
  27. if ((Object(obj) === obj) && Object.entries(obj).every((entry) => validate(entry[1], format.name))) {
  28. return format.format + '.map';
  29. }
  30. }
  31. return undefined;
  32. }
  33. open(context, match) {
  34. return context.metadata('sklearn-metadata.json').then((metadata) => {
  35. const obj = context.open('pkl');
  36. return new sklearn.Model(metadata, match, obj);
  37. });
  38. }
  39. };
  40. sklearn.Model = class {
  41. constructor(metadata, match, obj) {
  42. const formats = new Map([ [ 'sklearn', 'scikit-learn' ], [ 'scipy', 'SciPy' ], [ 'hmmlearn', 'hmmlearn' ] ]);
  43. this._format = formats.get(match.split('.').shift());
  44. this._graphs = [];
  45. const version = [];
  46. switch (match) {
  47. case 'sklearn':
  48. case 'scipy':
  49. case 'hmmlearn': {
  50. version.push(obj._sklearn_version ? ' v' + obj._sklearn_version.toString() : '');
  51. this._graphs.push(new sklearn.Graph(metadata, '', obj));
  52. break;
  53. }
  54. case 'sklearn.list':
  55. case 'scipy.list': {
  56. const list = obj;
  57. for (let i = 0; i < list.length; i++) {
  58. const obj = list[i];
  59. this._graphs.push(new sklearn.Graph(metadata, i.toString(), obj));
  60. version.push(obj._sklearn_version ? ' v' + obj._sklearn_version.toString() : '');
  61. }
  62. break;
  63. }
  64. case 'sklearn.map':
  65. case 'scipy.map': {
  66. for (const entry of Object.entries(obj)) {
  67. const obj = entry[1];
  68. this._graphs.push(new sklearn.Graph(metadata, entry[0], obj));
  69. version.push(obj._sklearn_version ? ' v' + obj._sklearn_version.toString() : '');
  70. }
  71. break;
  72. }
  73. default: {
  74. throw new sklearn.Error("Unsupported scikit-learn format '" + match + "'.");
  75. }
  76. }
  77. if (version.every((value) => value === version[0])) {
  78. this._format += version[0];
  79. }
  80. }
  81. get format() {
  82. return this._format;
  83. }
  84. get graphs() {
  85. return this._graphs;
  86. }
  87. };
  88. sklearn.Graph = class {
  89. constructor(metadata, name, obj) {
  90. this._name = name || '';
  91. this._metadata = metadata;
  92. this._nodes = [];
  93. this._groups = false;
  94. this._process('', '', obj, ['data']);
  95. }
  96. _process(group, name, obj, inputs) {
  97. const type = obj.__class__.__module__ + '.' + obj.__class__.__name__;
  98. switch (type) {
  99. case 'sklearn.pipeline.Pipeline': {
  100. this._groups = true;
  101. name = name || 'pipeline';
  102. const childGroup = this._concat(group, name);
  103. for (const step of obj.steps) {
  104. inputs = this._process(childGroup, step[0], step[1], inputs);
  105. }
  106. return inputs;
  107. }
  108. case 'sklearn.pipeline.FeatureUnion': {
  109. this._groups = true;
  110. const outputs = [];
  111. name = name || 'union';
  112. const output = this._concat(group, name);
  113. const subgroup = this._concat(group, name);
  114. this._nodes.push(new sklearn.Node(this._metadata, subgroup, output, obj, inputs, [ output ]));
  115. for (const transformer of obj.transformer_list){
  116. outputs.push(...this._process(subgroup, transformer[0], transformer[1], [ output ]));
  117. }
  118. return outputs;
  119. }
  120. case 'sklearn.compose._column_transformer.ColumnTransformer': {
  121. this._groups = true;
  122. name = name || 'transformer';
  123. const output = this._concat(group, name);
  124. const subgroup = this._concat(group, name);
  125. const outputs = [];
  126. this._nodes.push(new sklearn.Node(this._metadata, subgroup, output, obj, inputs, [ output ]));
  127. for (const transformer of obj.transformers){
  128. if (transformer[1] !== 'passthrough') {
  129. outputs.push(...this._process(subgroup, transformer[0], transformer[1], [ output ]));
  130. }
  131. }
  132. return outputs;
  133. }
  134. default: {
  135. const output = this._concat(group, name);
  136. this._nodes.push(new sklearn.Node(this._metadata, group, output, obj, inputs, output === '' ? [] : [ output ]));
  137. return [ output ];
  138. }
  139. }
  140. }
  141. _concat(parent, name){
  142. return (parent === '' ? name : `${parent}/${name}`);
  143. }
  144. get name() {
  145. return this._name;
  146. }
  147. get groups() {
  148. return this._groups;
  149. }
  150. get inputs() {
  151. return [];
  152. }
  153. get outputs() {
  154. return [];
  155. }
  156. get nodes() {
  157. return this._nodes;
  158. }
  159. };
  160. sklearn.Parameter = class {
  161. constructor(name, args) {
  162. this._name = name;
  163. this._arguments = args;
  164. }
  165. get name() {
  166. return this._name;
  167. }
  168. get visible() {
  169. return true;
  170. }
  171. get arguments() {
  172. return this._arguments;
  173. }
  174. };
  175. sklearn.Argument = class {
  176. constructor(name, type, initializer) {
  177. if (typeof name !== 'string') {
  178. throw new sklearn.Error("Invalid argument identifier '" + JSON.stringify(name) + "'.");
  179. }
  180. this._name = name;
  181. this._type = type || null;
  182. this._initializer = initializer || null;
  183. }
  184. get name() {
  185. return this._name;
  186. }
  187. get type() {
  188. if (this._initializer) {
  189. return this._initializer.type;
  190. }
  191. return this._type;
  192. }
  193. get initializer() {
  194. return this._initializer;
  195. }
  196. };
  197. sklearn.Node = class {
  198. constructor(metadata, group, name, obj, inputs, outputs) {
  199. this._group = group || '';
  200. this._name = name || '';
  201. const type = obj.__class__ ? obj.__class__.__module__ + '.' + obj.__class__.__name__ : 'Object';
  202. this._type = metadata.type(type) || { name: type };
  203. this._inputs = inputs.map((input) => new sklearn.Parameter(input, [ new sklearn.Argument(input, null, null) ]));
  204. this._outputs = outputs.map((output) => new sklearn.Parameter(output, [ new sklearn.Argument(output, null, null) ]));
  205. this._attributes = [];
  206. for (const entry of Object.entries(obj)) {
  207. const name = entry[0];
  208. const value = entry[1];
  209. if (value && sklearn.Utility.isTensor(value)) {
  210. const argument = new sklearn.Argument('', null, new sklearn.Tensor(value));
  211. const paramter = new sklearn.Parameter(name, [ argument ]);
  212. this._inputs.push(paramter);
  213. }
  214. else if (Array.isArray(value) && value.every((obj) => sklearn.Utility.isTensor(obj))) {
  215. const args = value.map((obj) => new sklearn.Argument('', null, new sklearn.Tensor(obj)));
  216. const paramter = new sklearn.Parameter(name, args);
  217. this._inputs.push(paramter);
  218. }
  219. else if (!name.startsWith('_')) {
  220. const attribute = new sklearn.Attribute(metadata.attribute(type, name), name, value);
  221. this._attributes.push(attribute);
  222. }
  223. }
  224. }
  225. get type() {
  226. return this._type; // .split('.').pop();
  227. }
  228. get name() {
  229. return this._name;
  230. }
  231. get group() {
  232. return this._group ? this._group : null;
  233. }
  234. get inputs() {
  235. return this._inputs;
  236. }
  237. get outputs() {
  238. return this._outputs;
  239. }
  240. get attributes() {
  241. return this._attributes;
  242. }
  243. };
  244. sklearn.Attribute = class {
  245. constructor(metadata, name, value) {
  246. this._name = name;
  247. this._value = value;
  248. if (metadata) {
  249. if (metadata.optional && this._value == null) {
  250. this._visible = false;
  251. }
  252. else if (metadata.visible === false) {
  253. this._visible = false;
  254. }
  255. else if (metadata.default !== undefined) {
  256. if (Array.isArray(value)) {
  257. if (Array.isArray(metadata.default)) {
  258. this._visible = value.length !== metadata.default || !this.value.every((item, index) => item == metadata.default[index]);
  259. }
  260. else {
  261. this._visible = !this.value.every((item) => item == metadata.default);
  262. }
  263. }
  264. else {
  265. this._visible = this.value !== metadata.default;
  266. }
  267. }
  268. }
  269. if (value) {
  270. if (Array.isArray(value) && value.length > 0 && value.every((obj) => obj.__class__ && obj.__class__.__module__ === value[0].__class__.__module__ && obj.__class__.__name__ === value[0].__class__.__name__)) {
  271. this._type = value[0].__class__.__module__ + '.' + value[0].__class__.__name__ + '[]';
  272. }
  273. else if (value.__class__) {
  274. this._type = value.__class__.__module__ + '.' + value.__class__.__name__;
  275. }
  276. }
  277. }
  278. get name() {
  279. return this._name;
  280. }
  281. get value() {
  282. return this._value;
  283. }
  284. get type() {
  285. return this._type;
  286. }
  287. get visible() {
  288. return this._visible == false ? false : true;
  289. }
  290. };
  291. sklearn.Tensor = class {
  292. constructor(value) {
  293. if (!sklearn.Utility.isTensor(value)) {
  294. const type = value.__class__.__module__ + '.' + value.__class__.__name__;
  295. throw new sklearn.Error("Unsupported tensor type '" + type + "'.");
  296. }
  297. this._type = new sklearn.TensorType(value.dtype.__name__, new sklearn.TensorShape(value.shape));
  298. this._data = value.data;
  299. if (this._type.dataType === 'string') {
  300. this._itemsize = value.dtype.itemsize;
  301. }
  302. }
  303. get type() {
  304. return this._type;
  305. }
  306. get kind() {
  307. return 'NumPy Array';
  308. }
  309. get state() {
  310. return this._context().state || null;
  311. }
  312. get value() {
  313. const context = this._context();
  314. if (context.state) {
  315. return null;
  316. }
  317. context.limit = Number.MAX_SAFE_INTEGER;
  318. return this._decode(context, 0);
  319. }
  320. toString() {
  321. const context = this._context();
  322. if (context.state) {
  323. return '';
  324. }
  325. context.limit = 10000;
  326. const value = this._decode(context, 0);
  327. switch (this._type.dataType) {
  328. case 'int64':
  329. case 'uint64':
  330. return sklearn.Tensor._stringify(value, '', ' ');
  331. default:
  332. break;
  333. }
  334. return JSON.stringify(value, null, 4);
  335. }
  336. _context() {
  337. const context = {};
  338. context.index = 0;
  339. context.count = 0;
  340. context.state = null;
  341. if (!this._type) {
  342. context.state = 'Tensor has no data type.';
  343. return context;
  344. }
  345. if (!this._data) {
  346. context.state = 'Tensor is data is empty.';
  347. return context;
  348. }
  349. context.dataType = this._type.dataType;
  350. context.dimensions = this._type.shape.dimensions;
  351. switch (context.dataType) {
  352. case 'float32':
  353. case 'float64':
  354. case 'uint32':
  355. case 'int8':
  356. case 'int16':
  357. case 'int32':
  358. case 'int64':
  359. case 'uint64':
  360. context.view = new DataView(this._data.buffer, this._data.byteOffset, this._data.byteLength);
  361. break;
  362. case 'string':
  363. context.data = this._data;
  364. context.itemsize = this._itemsize;
  365. context.decoder = new TextDecoder('utf-8');
  366. break;
  367. case 'object':
  368. context.data = this._data;
  369. break;
  370. default:
  371. context.state = "Tensor data type '" + context.dataType + "' is not implemented.";
  372. return context;
  373. }
  374. return context;
  375. }
  376. _decode(context, dimension) {
  377. const results = [];
  378. const size = context.dimensions[dimension];
  379. if (dimension == context.dimensions.length - 1) {
  380. for (let i = 0; i < size; i++) {
  381. if (context.count > context.limit) {
  382. results.push('...');
  383. return results;
  384. }
  385. switch (context.dataType) {
  386. case 'float32': {
  387. results.push(context.view.getFloat32(context.index, true));
  388. context.index += 4;
  389. context.count++;
  390. break;
  391. }
  392. case 'float64': {
  393. results.push(context.view.getFloat64(context.index, true));
  394. context.index += 8;
  395. context.count++;
  396. break;
  397. }
  398. case 'int8': {
  399. results.push(context.view.getInt8(context.index, true));
  400. context.index += 1;
  401. context.count++;
  402. break;
  403. }
  404. case 'int16': {
  405. results.push(context.view.getInt16(context.index, true));
  406. context.index += 2;
  407. context.count++;
  408. break;
  409. }
  410. case 'int32': {
  411. results.push(context.view.getInt32(context.index, true));
  412. context.index += 4;
  413. context.count++;
  414. break;
  415. }
  416. case 'int64': {
  417. results.push(context.view.getInt64(context.index, true));
  418. context.index += 8;
  419. context.count++;
  420. break;
  421. }
  422. case 'uint32': {
  423. results.push(context.view.getUint32(context.index, true));
  424. context.index += 4;
  425. context.count++;
  426. break;
  427. }
  428. case 'uint64': {
  429. results.push(context.view.getUint64(context.index, true));
  430. context.index += 8;
  431. context.count++;
  432. break;
  433. }
  434. case 'string': {
  435. const buffer = context.data.subarray(context.index, context.index + context.itemsize);
  436. const index = buffer.indexOf(0);
  437. const content = context.decoder.decode(index >= 0 ? buffer.subarray(0, index) : buffer);
  438. results.push(content);
  439. context.index += context.itemsize;
  440. context.count++;
  441. break;
  442. }
  443. case 'object': {
  444. results.push(context.data[context.index++]);
  445. context.count++;
  446. break;
  447. }
  448. default: {
  449. throw new sklearn.Error("Unsupported tensor data type '" + context.dataType + "'.");
  450. }
  451. }
  452. }
  453. }
  454. else {
  455. for (let j = 0; j < size; j++) {
  456. if (context.count > context.limit) {
  457. results.push('...');
  458. return results;
  459. }
  460. results.push(this._decode(context, dimension + 1));
  461. }
  462. }
  463. return results;
  464. }
  465. static _stringify(value, indentation, indent) {
  466. if (Array.isArray(value)) {
  467. const result = [];
  468. result.push('[');
  469. const items = value.map((item) => sklearn.Tensor._stringify(item, indentation + indent, indent));
  470. if (items.length > 0) {
  471. result.push(items.join(',\n'));
  472. }
  473. result.push(']');
  474. return result.join('\n');
  475. }
  476. return indentation + value.toString();
  477. }
  478. };
  479. sklearn.TensorType = class {
  480. constructor(dataType, shape) {
  481. this._dataType = dataType;
  482. this._shape = shape;
  483. }
  484. get dataType() {
  485. return this._dataType;
  486. }
  487. get shape() {
  488. return this._shape;
  489. }
  490. toString() {
  491. return this.dataType + this._shape.toString();
  492. }
  493. };
  494. sklearn.TensorShape = class {
  495. constructor(dimensions) {
  496. this._dimensions = dimensions;
  497. }
  498. get dimensions() {
  499. return this._dimensions;
  500. }
  501. toString() {
  502. return this._dimensions ? ('[' + this._dimensions.map((dimension) => dimension.toString()).join(',') + ']') : '';
  503. }
  504. };
  505. sklearn.Utility = class {
  506. static isTensor(obj) {
  507. return obj && obj.__class__ && obj.__class__.__module__ === 'numpy' && obj.__class__.__name__ === 'ndarray';
  508. }
  509. static findWeights(obj) {
  510. const keys = [ '', 'blobs' ];
  511. for (const key of keys) {
  512. const dict = key === '' ? obj : obj[key];
  513. if (dict) {
  514. const weights = new Map();
  515. if (dict instanceof Map) {
  516. for (const pair of dict) {
  517. if (!sklearn.Utility.isTensor(pair[1])) {
  518. return null;
  519. }
  520. weights.set(pair[0], pair[1]);
  521. }
  522. return weights;
  523. }
  524. else if (!Array.isArray(dict)) {
  525. for (const key in dict) {
  526. const value = dict[key];
  527. if (key != 'weight_order' && key != 'lr') {
  528. if (!key || !sklearn.Utility.isTensor(value)) {
  529. return null;
  530. }
  531. weights.set(key, value);
  532. }
  533. }
  534. return weights;
  535. }
  536. }
  537. }
  538. for (const key of keys) {
  539. const list = key === '' ? obj : obj[key];
  540. if (list && Array.isArray(list)) {
  541. const weights = new Map();
  542. for (let i = 0; i < list.length; i++) {
  543. const value = list[i];
  544. if (!sklearn.Utility.isTensor(value, 'numpy.ndarray')) {
  545. return null;
  546. }
  547. weights.set(i.toString(), value);
  548. }
  549. return weights;
  550. }
  551. }
  552. return null;
  553. }
  554. };
  555. sklearn.Error = class extends Error {
  556. constructor(message) {
  557. super(message);
  558. this.name = 'Error loading scikit-learn model.';
  559. }
  560. };
  561. if (typeof module !== 'undefined' && typeof module.exports === 'object') {
  562. module.exports.ModelFactory = sklearn.ModelFactory;
  563. }