sklearn.js 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461
  1. // Experimental
  2. var sklearn = sklearn || {};
  3. sklearn.ModelFactory = class {
  4. match(context) {
  5. const obj = context.open('pkl');
  6. const validate = (obj, name) => {
  7. if (obj && obj.__class__ && obj.__class__.__module__ && obj.__class__.__name__) {
  8. const key = obj.__class__.__module__ + '.' + obj.__class__.__name__;
  9. return key.startsWith(name);
  10. }
  11. return false;
  12. };
  13. const formats = [
  14. { name: 'sklearn.', format: 'sklearn' },
  15. { name: 'xgboost.sklearn.', format: 'sklearn' },
  16. { name: 'lightgbm.sklearn.', format: 'sklearn' },
  17. { name: 'scipy.', format: 'scipy' },
  18. { name: 'hmmlearn.', format: 'hmmlearn' }
  19. ];
  20. for (const format of formats) {
  21. if (validate(obj, format.name)) {
  22. return format.format;
  23. }
  24. if (Array.isArray(obj) && obj.length > 0 && obj.every((item) => validate(item, format.name))) {
  25. return format.format + '.list';
  26. }
  27. if (Object(obj) === obj) {
  28. const entries = Object.entries(obj);
  29. if (entries.length > 0 && entries.every((entry) => validate(entry[1], format.name))) {
  30. return format.format + '.map';
  31. }
  32. }
  33. }
  34. return null;
  35. }
  36. async open(context, target) {
  37. const metadata = await context.metadata('sklearn-metadata.json');
  38. const obj = context.open('pkl');
  39. return new sklearn.Model(metadata, target, obj);
  40. }
  41. };
  42. sklearn.Model = class {
  43. constructor(metadata, target, obj) {
  44. const formats = new Map([ [ 'sklearn', 'scikit-learn' ], [ 'scipy', 'SciPy' ], [ 'hmmlearn', 'hmmlearn' ] ]);
  45. this._format = formats.get(target.split('.').shift());
  46. this._graphs = [];
  47. const version = [];
  48. switch (target) {
  49. case 'sklearn':
  50. case 'scipy':
  51. case 'hmmlearn': {
  52. if (obj._sklearn_version) {
  53. version.push(' v' + obj._sklearn_version.toString());
  54. }
  55. this._graphs.push(new sklearn.Graph(metadata, '', obj));
  56. break;
  57. }
  58. case 'sklearn.list':
  59. case 'scipy.list': {
  60. const list = obj;
  61. for (let i = 0; i < list.length; i++) {
  62. const obj = list[i];
  63. this._graphs.push(new sklearn.Graph(metadata, i.toString(), obj));
  64. if (obj._sklearn_version) {
  65. version.push(' v' + obj._sklearn_version.toString());
  66. }
  67. }
  68. break;
  69. }
  70. case 'sklearn.map':
  71. case 'scipy.map': {
  72. for (const entry of Object.entries(obj)) {
  73. const obj = entry[1];
  74. this._graphs.push(new sklearn.Graph(metadata, entry[0], obj));
  75. if (obj._sklearn_version) {
  76. version.push(' v' + obj._sklearn_version.toString());
  77. }
  78. }
  79. break;
  80. }
  81. default: {
  82. throw new sklearn.Error("Unsupported scikit-learn format '" + target + "'.");
  83. }
  84. }
  85. if (version.length > 0 && version.every((value) => value === version[0])) {
  86. this._format += version[0];
  87. }
  88. }
  89. get format() {
  90. return this._format;
  91. }
  92. get graphs() {
  93. return this._graphs;
  94. }
  95. };
  96. sklearn.Graph = class {
  97. constructor(metadata, name, obj) {
  98. this._name = name || '';
  99. this._metadata = metadata;
  100. this._nodes = [];
  101. this._groups = false;
  102. const values = new Map();
  103. const value = (name) => {
  104. if (!values.has(name)) {
  105. values.set(name, new sklearn.Value(name, null, null));
  106. }
  107. return values.get(name);
  108. };
  109. const concat = (parent, name) => {
  110. return (parent === '' ? name : `${parent}/${name}`);
  111. };
  112. const process = (group, name, obj, inputs) => {
  113. const type = obj.__class__.__module__ + '.' + obj.__class__.__name__;
  114. switch (type) {
  115. case 'sklearn.pipeline.Pipeline': {
  116. this._groups = true;
  117. name = name || 'pipeline';
  118. const childGroup = concat(group, name);
  119. for (const step of obj.steps) {
  120. inputs = process(childGroup, step[0], step[1], inputs);
  121. }
  122. return inputs;
  123. }
  124. case 'sklearn.pipeline.FeatureUnion': {
  125. this._groups = true;
  126. const outputs = [];
  127. name = name || 'union';
  128. const output = concat(group, name);
  129. const subgroup = concat(group, name);
  130. this._nodes.push(new sklearn.Node(this._metadata, subgroup, output, obj, inputs, [ output ], value));
  131. for (const transformer of obj.transformer_list) {
  132. outputs.push(...process(subgroup, transformer[0], transformer[1], [ output ]));
  133. }
  134. return outputs;
  135. }
  136. case 'sklearn.compose._column_transformer.ColumnTransformer': {
  137. this._groups = true;
  138. name = name || 'transformer';
  139. const output = concat(group, name);
  140. const subgroup = concat(group, name);
  141. const outputs = [];
  142. this._nodes.push(new sklearn.Node(this._metadata, subgroup, output, obj, inputs, [ output ], value));
  143. for (const transformer of obj.transformers) {
  144. if (transformer[1] !== 'passthrough') {
  145. outputs.push(...process(subgroup, transformer[0], transformer[1], [ output ]));
  146. }
  147. }
  148. return outputs;
  149. }
  150. default: {
  151. const output = concat(group, name);
  152. this._nodes.push(new sklearn.Node(this._metadata, group, output, obj, inputs, output === '' ? [] : [ output ], value));
  153. return [ output ];
  154. }
  155. }
  156. };
  157. process('', '', obj, ['data']);
  158. }
  159. get name() {
  160. return this._name;
  161. }
  162. get groups() {
  163. return this._groups;
  164. }
  165. get inputs() {
  166. return [];
  167. }
  168. get outputs() {
  169. return [];
  170. }
  171. get nodes() {
  172. return this._nodes;
  173. }
  174. };
  175. sklearn.Argument = class {
  176. constructor(name, value) {
  177. this._name = name;
  178. this._value = value;
  179. }
  180. get name() {
  181. return this._name;
  182. }
  183. get value() {
  184. return this._value;
  185. }
  186. };
  187. sklearn.Value = class {
  188. constructor(name, type, initializer) {
  189. if (typeof name !== 'string') {
  190. throw new sklearn.Error("Invalid value identifier '" + JSON.stringify(name) + "'.");
  191. }
  192. this._name = name;
  193. this._type = type || null;
  194. this._initializer = initializer || null;
  195. }
  196. get name() {
  197. return this._name;
  198. }
  199. get type() {
  200. if (this._initializer) {
  201. return this._initializer.type;
  202. }
  203. return this._type;
  204. }
  205. get initializer() {
  206. return this._initializer;
  207. }
  208. };
  209. sklearn.Node = class {
  210. constructor(metadata, group, name, obj, inputs, outputs, value) {
  211. this._group = group || '';
  212. this._name = name || '';
  213. const type = obj.__class__ ? obj.__class__.__module__ + '.' + obj.__class__.__name__ : 'Object';
  214. this._type = metadata.type(type) || { name: type };
  215. this._inputs = inputs.map((input) => new sklearn.Argument(input, [ value(input) ]));
  216. this._outputs = outputs.map((output) => new sklearn.Argument(output, [ value(output) ]));
  217. this._attributes = [];
  218. for (const entry of Object.entries(obj)) {
  219. const name = entry[0];
  220. const value = entry[1];
  221. if (value && sklearn.Utility.isTensor(value)) {
  222. const argument = new sklearn.Argument(name, [ new sklearn.Value('', null, new sklearn.Tensor(value)) ]);
  223. this._inputs.push(argument);
  224. } else if (Array.isArray(value) && value.every((obj) => sklearn.Utility.isTensor(obj))) {
  225. const argument = new sklearn.Argument(name, value.map((obj) => new sklearn.Value('', null, new sklearn.Tensor(obj))));
  226. this._inputs.push(argument);
  227. } else if (!name.startsWith('_')) {
  228. const attribute = new sklearn.Attribute(metadata.attribute(type, name), name, value);
  229. this._attributes.push(attribute);
  230. }
  231. }
  232. }
  233. get type() {
  234. return this._type; // .split('.').pop();
  235. }
  236. get name() {
  237. return this._name;
  238. }
  239. get group() {
  240. return this._group ? this._group : null;
  241. }
  242. get inputs() {
  243. return this._inputs;
  244. }
  245. get outputs() {
  246. return this._outputs;
  247. }
  248. get attributes() {
  249. return this._attributes;
  250. }
  251. };
  252. sklearn.Attribute = class {
  253. constructor(metadata, name, value) {
  254. this._name = name;
  255. this._value = value;
  256. if (metadata) {
  257. if (metadata.optional && this._value == null) {
  258. this._visible = false;
  259. } else if (metadata.visible === false) {
  260. this._visible = false;
  261. } else if (metadata.default !== undefined) {
  262. if (Array.isArray(value)) {
  263. if (Array.isArray(metadata.default)) {
  264. this._visible = value.length !== metadata.default || !this.value.every((item, index) => item == metadata.default[index]);
  265. } else {
  266. this._visible = !this.value.every((item) => item == metadata.default);
  267. }
  268. } else {
  269. this._visible = this.value !== metadata.default;
  270. }
  271. }
  272. }
  273. if (value) {
  274. if (Array.isArray(value) && value.length > 0 && value.every((obj) => obj.__class__ && obj.__class__.__module__ === value[0].__class__.__module__ && obj.__class__.__name__ === value[0].__class__.__name__)) {
  275. this._type = value[0].__class__.__module__ + '.' + value[0].__class__.__name__ + '[]';
  276. } else if (value.__class__) {
  277. this._type = value.__class__.__module__ + '.' + value.__class__.__name__;
  278. }
  279. }
  280. }
  281. get name() {
  282. return this._name;
  283. }
  284. get value() {
  285. return this._value;
  286. }
  287. get type() {
  288. return this._type;
  289. }
  290. get visible() {
  291. return this._visible == false ? false : true;
  292. }
  293. };
  294. sklearn.Tensor = class {
  295. constructor(array) {
  296. if (!sklearn.Utility.isTensor(array)) {
  297. const type = array.__class__.__module__ + '.' + array.__class__.__name__;
  298. throw new sklearn.Error("Unsupported tensor type '" + type + "'.");
  299. }
  300. this._type = new sklearn.TensorType(array.dtype.__name__, new sklearn.TensorShape(array.shape));
  301. this._byteorder = array.dtype.byteorder;
  302. this._data = this._type.dataType == 'string' || this._type.dataType == 'object' ? array.tolist() : array.tobytes();
  303. }
  304. get type() {
  305. return this._type;
  306. }
  307. get category() {
  308. return 'NumPy Array';
  309. }
  310. get layout() {
  311. return this._type.dataType == 'string' || this._type.dataType == 'object' ? '|' : this._byteorder;
  312. }
  313. get values() {
  314. return this._data;
  315. }
  316. };
  317. sklearn.TensorType = class {
  318. constructor(dataType, shape) {
  319. this._dataType = dataType;
  320. this._shape = shape;
  321. }
  322. get dataType() {
  323. return this._dataType;
  324. }
  325. get shape() {
  326. return this._shape;
  327. }
  328. toString() {
  329. return this.dataType + this._shape.toString();
  330. }
  331. };
  332. sklearn.TensorShape = class {
  333. constructor(dimensions) {
  334. this._dimensions = dimensions;
  335. }
  336. get dimensions() {
  337. return this._dimensions;
  338. }
  339. toString() {
  340. return this._dimensions ? ('[' + this._dimensions.map((dimension) => dimension.toString()).join(',') + ']') : '';
  341. }
  342. };
  343. sklearn.Utility = class {
  344. static isTensor(obj) {
  345. return obj && obj.__class__ && obj.__class__.__module__ === 'numpy' && obj.__class__.__name__ === 'ndarray';
  346. }
  347. static findWeights(obj) {
  348. const keys = [ '', 'blobs' ];
  349. for (const key of keys) {
  350. const dict = key === '' ? obj : obj[key];
  351. if (dict) {
  352. const weights = new Map();
  353. if (dict instanceof Map) {
  354. for (const pair of dict) {
  355. if (!sklearn.Utility.isTensor(pair[1])) {
  356. return null;
  357. }
  358. weights.set(pair[0], pair[1]);
  359. }
  360. return weights;
  361. } else if (!Array.isArray(dict)) {
  362. for (const key in dict) {
  363. const value = dict[key];
  364. if (key != 'weight_order' && key != 'lr') {
  365. if (!key || !sklearn.Utility.isTensor(value)) {
  366. return null;
  367. }
  368. weights.set(key, value);
  369. }
  370. }
  371. return weights;
  372. }
  373. }
  374. }
  375. for (const key of keys) {
  376. const list = key === '' ? obj : obj[key];
  377. if (list && Array.isArray(list)) {
  378. const weights = new Map();
  379. for (let i = 0; i < list.length; i++) {
  380. const value = list[i];
  381. if (!sklearn.Utility.isTensor(value, 'numpy.ndarray')) {
  382. return null;
  383. }
  384. weights.set(i.toString(), value);
  385. }
  386. return weights;
  387. }
  388. }
  389. return null;
  390. }
  391. };
  392. sklearn.Error = class extends Error {
  393. constructor(message) {
  394. super(message);
  395. this.name = 'Error loading scikit-learn model.';
  396. }
  397. };
  398. if (typeof module !== 'undefined' && typeof module.exports === 'object') {
  399. module.exports.ModelFactory = sklearn.ModelFactory;
  400. }