transformers.js 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. // import * as python from './python.js';
  2. // import * as safetensors from './safetensors.js';
  3. const transformers = {};
  4. transformers.ModelFactory = class {
  5. async match(context) {
  6. const obj = await context.peek('json');
  7. if (obj) {
  8. if (obj.architectures && (obj.model_type || obj.transformers_version)) {
  9. return context.set('transformers.config', obj);
  10. }
  11. if (obj.version && obj.added_tokens && obj.model) {
  12. return context.set('transformers.tokenizer', obj);
  13. }
  14. if (obj.tokenizer_class ||
  15. (obj.bos_token && obj.eos_token && obj.unk_token) ||
  16. (obj.pad_token && obj.additional_special_tokens) ||
  17. obj.special_tokens_map_file || obj.full_tokenizer_file) {
  18. return context.set('transformers.tokenizer.config', obj);
  19. }
  20. if (obj.transformers_version && obj.do_sample !== undefined && obj.temperature !== undefined) {
  21. return context.set('transformers.generation_config', obj);
  22. }
  23. if (obj.transformers_version && obj._from_model_config !== undefined) {
  24. return context.set('transformers.generation_config', obj);
  25. }
  26. if (obj.crop_size !== undefined && obj.do_center_crop !== undefined && obj.image_mean !== undefined && obj.image_std !== undefined && obj.do_resize !== undefined) {
  27. return context.set('transformers.preprocessor_config.json', obj);
  28. }
  29. if (!Array.isArray(obj) && typeof obj === 'object') {
  30. const entries = Object.entries(obj);
  31. if (entries.every(([key, value]) => typeof key === 'string' && key.length < 256 && Number.isInteger(value) && value < 0x80000)) {
  32. if (obj["<|im_start|>"] || obj["<|endoftext|>"]) {
  33. return context.set('transformers.vocab', obj);
  34. }
  35. }
  36. const dtypes = new Set(['BF16', 'FP4', 'UE8']);
  37. if (entries.every(([key, value]) => typeof key === 'string' && dtypes.has(value))) {
  38. return context.set('transformers.dtypes', obj);
  39. }
  40. }
  41. }
  42. return null;
  43. }
  44. async open(context) {
  45. const fetch = async (name) => {
  46. try {
  47. const content = await context.fetch(name);
  48. await this.match(content);
  49. if (content.value) {
  50. return content;
  51. }
  52. } catch {
  53. // continue regardless of error
  54. }
  55. return null;
  56. };
  57. const type = context.type;
  58. const config = type === 'transformers.config' ? context : await fetch('config.json');
  59. const tokenizer = type === 'transformers.tokenizer' ? context : await fetch('tokenizer.json');
  60. const tokenizer_config = type === 'transformers.tokenizer.config' ? context : await fetch('tokenizer_config.json');
  61. const vocab = type === 'transformers.vocab' ? context : await fetch('vocab.json');
  62. const generation_config = type === 'transformers.generation_config' ? context : await fetch('generation_config.json');
  63. const preprocessor_config = type === 'transformers.preprocessor_config.json' ? context : await fetch('preprocessor_config.json');
  64. return new transformers.Model(config, tokenizer, tokenizer_config, vocab, generation_config, preprocessor_config);
  65. }
  66. filter(context, match) {
  67. const priority = new Map([
  68. ['transformers.config', 7],
  69. ['transformers.tokenizer', 6],
  70. ['transformers.tokenizer.config', 5],
  71. ['transformers.vocab', 4],
  72. ['transformers.generation_config', 3],
  73. ['transformers.preprocessor_config.json', 2],
  74. ['transformers.dtypes', 1],
  75. ['safetensors.json', 0],
  76. ['safetensors', 0]
  77. ]);
  78. const a = priority.has(context.type) ? priority.get(context.type) : -1; // current
  79. const b = priority.has(match.type) ? priority.get(match.type) : -1;
  80. if (a !== -1 && b !== -1) {
  81. return a < b;
  82. }
  83. return true;
  84. }
  85. };
  86. transformers.Model = class {
  87. constructor(config, tokenizer, tokenizer_config, vocab) {
  88. this.format = 'Transformers';
  89. this.metadata = [];
  90. this.modules = [new transformers.Graph(config, tokenizer, tokenizer_config, vocab)];
  91. }
  92. };
  93. transformers.Graph = class {
  94. constructor(config, tokenizer, tokenizer_config, vocab) {
  95. this.type = 'graph';
  96. this.nodes = [];
  97. this.inputs = [];
  98. this.outputs = [];
  99. this.metadata = [];
  100. if (config) {
  101. for (const [key, value] of Object.entries(config.value)) {
  102. const argument = new transformers.Argument(key, value);
  103. this.metadata.push(argument);
  104. }
  105. }
  106. if (tokenizer || tokenizer_config) {
  107. const node = new transformers.Tokenizer(tokenizer, tokenizer_config, vocab);
  108. this.nodes.push(node);
  109. }
  110. }
  111. };
  112. transformers.Tokenizer = class {
  113. constructor(tokenizer, tokenizer_config) {
  114. this.type = { name: 'Tokenizer' };
  115. this.name = (tokenizer || tokenizer_config).identifier;
  116. this.attributes = [];
  117. if (tokenizer) {
  118. const obj = tokenizer.value;
  119. const keys = new Set(['decoder', 'post_processor', 'pre_tokenizer']);
  120. for (const [key, value] of Object.entries(tokenizer.value)) {
  121. if (!keys.has(key)) {
  122. const argument = new transformers.Argument(key, value);
  123. this.attributes.push(argument);
  124. }
  125. }
  126. for (const key of keys) {
  127. const value = obj[key];
  128. if (value) {
  129. const module = new transformers.Object(value);
  130. const argument = new transformers.Argument(key, module, 'object');
  131. this.attributes.push(argument);
  132. }
  133. }
  134. }
  135. }
  136. };
  137. transformers.Object = class {
  138. constructor(obj, type) {
  139. this.type = { name: type || obj.type };
  140. this.attributes = [];
  141. for (const [key, value] of Object.entries(obj)) {
  142. if (key !== 'type') {
  143. let argument = null;
  144. if (Array.isArray(value) && value.every((item) => typeof item === 'object' && Object.keys(item).length === 1 && typeof Object.entries(item)[0][1] === 'object')) {
  145. const values = value.map((item) => new transformers.Object(Object.entries(item)[0][1], Object.entries(item)[0][0]));
  146. argument = new transformers.Argument(key, values, 'object[]');
  147. } else if (Array.isArray(value) && value.every((item) => typeof item === 'object')) {
  148. const values = value.map((item) => new transformers.Object(item));
  149. argument = new transformers.Argument(key, values, 'object[]');
  150. } else {
  151. argument = new transformers.Argument(key, value);
  152. }
  153. this.attributes.push(argument);
  154. }
  155. }
  156. }
  157. };
  158. transformers.Argument = class {
  159. constructor(name, value, type = null) {
  160. this.name = name;
  161. this.value = value;
  162. this.type = type;
  163. }
  164. };
  165. transformers.Error = class extends Error {
  166. constructor(message) {
  167. super(message);
  168. this.name = 'Error loading Transformers model.';
  169. }
  170. };
  171. export const ModelFactory = transformers.ModelFactory;