transformers.js 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187
  1. // import * as python from './python.js';
  2. // import * as safetensors from './safetensors.js';
  3. const transformers = {};
  4. transformers.ModelFactory = class {
  5. async match(context) {
  6. const obj = await context.peek('json');
  7. if (obj) {
  8. if (obj.architectures && (obj.model_type || obj.transformers_version)) {
  9. return context.set('transformers.config', obj);
  10. }
  11. if (obj.version && obj.added_tokens && obj.model) {
  12. return context.set('transformers.tokenizer', obj);
  13. }
  14. if (obj.tokenizer_class ||
  15. (obj.bos_token && obj.eos_token && obj.unk_token) ||
  16. (obj.pad_token && obj.additional_special_tokens) ||
  17. obj.special_tokens_map_file || obj.full_tokenizer_file) {
  18. return context.set('transformers.tokenizer.config', obj);
  19. }
  20. if (obj.transformers_version && obj.do_sample !== undefined && obj.temperature !== undefined) {
  21. return context.set('transformers.generation_config', obj);
  22. }
  23. if (obj.crop_size !== undefined && obj.do_center_crop !== undefined && obj.image_mean !== undefined && obj.image_std !== undefined && obj.do_resize !== undefined) {
  24. return context.set('transformers.preprocessor_config.json', obj);
  25. }
  26. if (!Array.isArray(obj) && typeof obj === 'object') {
  27. const entries = Object.entries(obj);
  28. if (entries.every(([key, value]) => typeof key === 'string' && key.length < 256 && Number.isInteger(value) && value < 0x80000)) {
  29. if (obj["<|im_start|>"] || obj["<|endoftext|>"]) {
  30. return context.set('transformers.vocab', obj);
  31. }
  32. }
  33. const dtypes = new Set(['BF16', 'FP4', 'UE8']);
  34. if (entries.every(([key, value]) => typeof key === 'string' && dtypes.has(value))) {
  35. return context.set('transformers.dtypes', obj);
  36. }
  37. }
  38. }
  39. return null;
  40. }
  41. async open(context) {
  42. const fetch = async (name) => {
  43. try {
  44. const content = await context.fetch(name);
  45. await this.match(content);
  46. if (content.value) {
  47. return content;
  48. }
  49. } catch {
  50. // continue regardless of error
  51. }
  52. return null;
  53. };
  54. const type = context.type;
  55. const config = type === 'transformers.config' ? context : await fetch('config.json');
  56. const tokenizer = type === 'transformers.tokenizer' ? context : await fetch('tokenizer.json');
  57. const tokenizer_config = type === 'transformers.tokenizer.config' ? context : await fetch('tokenizer_config.json');
  58. const vocab = type === 'transformers.vocab' ? context : await fetch('vocab.json');
  59. const generation_config = type === 'transformers.generation_config' ? context : await fetch('generation_config.json');
  60. const preprocessor_config = type === 'transformers.preprocessor_config.json' ? context : await fetch('preprocessor_config.json');
  61. return new transformers.Model(config, tokenizer, tokenizer_config, vocab, generation_config, preprocessor_config);
  62. }
  63. filter(context, match) {
  64. const priority = new Map([
  65. ['transformers.config', 7],
  66. ['transformers.tokenizer', 6],
  67. ['transformers.tokenizer.config', 5],
  68. ['transformers.vocab', 4],
  69. ['transformers.generation_config', 3],
  70. ['transformers.preprocessor_config.json', 2],
  71. ['transformers.dtypes', 1],
  72. ['safetensors.json', 0],
  73. ['safetensors', 0]
  74. ]);
  75. const a = priority.has(context.type) ? priority.get(context.type) : -1; // current
  76. const b = priority.has(match.type) ? priority.get(match.type) : -1;
  77. if (a !== -1 && b !== -1) {
  78. return a < b;
  79. }
  80. return true;
  81. }
  82. };
  83. transformers.Model = class {
  84. constructor(config, tokenizer, tokenizer_config, vocab) {
  85. this.format = 'Transformers';
  86. this.metadata = [];
  87. this.modules = [new transformers.Graph(config, tokenizer, tokenizer_config, vocab)];
  88. }
  89. };
  90. transformers.Graph = class {
  91. constructor(config, tokenizer, tokenizer_config, vocab) {
  92. this.type = 'graph';
  93. this.nodes = [];
  94. this.inputs = [];
  95. this.outputs = [];
  96. this.metadata = [];
  97. if (config) {
  98. for (const [key, value] of Object.entries(config.value)) {
  99. const argument = new transformers.Argument(key, value);
  100. this.metadata.push(argument);
  101. }
  102. }
  103. if (tokenizer || tokenizer_config) {
  104. const node = new transformers.Tokenizer(tokenizer, tokenizer_config, vocab);
  105. this.nodes.push(node);
  106. }
  107. }
  108. };
  109. transformers.Tokenizer = class {
  110. constructor(tokenizer, tokenizer_config) {
  111. this.type = { name: 'Tokenizer' };
  112. this.name = (tokenizer || tokenizer_config).identifier;
  113. this.attributes = [];
  114. if (tokenizer) {
  115. const obj = tokenizer.value;
  116. const keys = new Set(['decoder', 'model', 'post_processor', 'pre_tokenizer']);
  117. for (const [key, value] of Object.entries(tokenizer.value)) {
  118. if (!keys.has(key)) {
  119. const argument = new transformers.Argument(key, value);
  120. this.attributes.push(argument);
  121. }
  122. }
  123. for (const key of keys) {
  124. const value = obj[key];
  125. if (value) {
  126. const module = new transformers.Object(value);
  127. const argument = new transformers.Argument(key, module, 'object');
  128. this.attributes.push(argument);
  129. }
  130. }
  131. }
  132. }
  133. };
  134. transformers.Object = class {
  135. constructor(obj, type) {
  136. this.type = { name: type || obj.type };
  137. this.attributes = [];
  138. for (const [key, value] of Object.entries(obj)) {
  139. if (key !== 'type') {
  140. let argument = null;
  141. if (Array.isArray(value) && value.every((item) => typeof item === 'object' && Object.keys(item).length === 1 && typeof Object.entries(item)[0][1] === 'object')) {
  142. const values = value.map((item) => new transformers.Object(Object.entries(item)[0][1], Object.entries(item)[0][0]));
  143. argument = new transformers.Argument(key, values, 'object[]');
  144. } else if (Array.isArray(value) && value.every((item) => typeof item === 'object')) {
  145. const values = value.map((item) => new transformers.Object(item));
  146. argument = new transformers.Argument(key, values, 'object[]');
  147. } else {
  148. argument = new transformers.Argument(key, value);
  149. }
  150. this.attributes.push(argument);
  151. }
  152. }
  153. }
  154. };
  155. transformers.Argument = class {
  156. constructor(name, value, type) {
  157. this.name = name;
  158. this.value = value;
  159. this.type = type || null;
  160. }
  161. };
  162. transformers.Error = class extends Error {
  163. constructor(message) {
  164. super(message);
  165. this.name = 'Error loading Transformers model.';
  166. }
  167. };
  168. export const ModelFactory = transformers.ModelFactory;