gguf-metadata.json 112 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669
  1. [
  2. {
  3. "name": "llama",
  4. "family": "decoder",
  5. "graph": {
  6. "input": [
  7. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  8. ],
  9. "blocks": [
  10. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  11. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope" },
  12. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  13. { "name": "ffn_gate_inp", "type": "MUL_MAT" },
  14. { "name": "ffn_gate", "type": "MUL_MAT" },
  15. { "name": "ffn_up", "type": "MUL_MAT" },
  16. { "name": "ffn_down", "type": "MUL_MAT" },
  17. { "name": "ffn_gate_exps", "type": "MUL_MAT_ID" },
  18. { "name": "ffn_up_exps", "type": "MUL_MAT_ID" },
  19. { "name": "ffn_down_exps", "type": "MUL_MAT_ID" }
  20. ],
  21. "output": [
  22. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  23. { "name": "output", "type": "MUL_MAT" }
  24. ]
  25. }
  26. },
  27. {
  28. "name": "llama4",
  29. "family": "decoder-moe",
  30. "graph": {
  31. "input": [
  32. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  33. ],
  34. "blocks": [
  35. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  36. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope" },
  37. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  38. { "name": "ffn_gate_inp", "type": "MUL_MAT" },
  39. { "name": "ffn_gate_exps", "type": "MUL_MAT_ID" },
  40. { "name": "ffn_up_exps", "type": "MUL_MAT_ID" },
  41. { "name": "ffn_down_exps", "type": "MUL_MAT_ID" }
  42. ],
  43. "output": [
  44. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  45. { "name": "output", "type": "MUL_MAT" }
  46. ]
  47. }
  48. },
  49. {
  50. "name": "deci",
  51. "family": "decoder",
  52. "graph": {
  53. "input": [
  54. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  55. ],
  56. "blocks": [
  57. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  58. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope" },
  59. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  60. { "name": "ffn_gate", "type": "MUL_MAT" },
  61. { "name": "ffn_up", "type": "MUL_MAT" },
  62. { "name": "ffn_down", "type": "MUL_MAT" }
  63. ],
  64. "output": [
  65. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  66. { "name": "output", "type": "MUL_MAT" }
  67. ]
  68. }
  69. },
  70. {
  71. "name": "qwen2",
  72. "family": "decoder",
  73. "graph": {
  74. "input": [
  75. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  76. ],
  77. "blocks": [
  78. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  79. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope", "has_bias": true },
  80. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  81. { "name": "ffn_gate", "type": "MUL_MAT" },
  82. { "name": "ffn_up", "type": "MUL_MAT" },
  83. { "name": "ffn_down", "type": "MUL_MAT" }
  84. ],
  85. "output": [
  86. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  87. { "name": "output", "type": "MUL_MAT" }
  88. ]
  89. }
  90. },
  91. {
  92. "name": "qwen2vl",
  93. "family": "decoder",
  94. "graph": {
  95. "input": [
  96. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  97. ],
  98. "blocks": [
  99. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  100. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope", "has_bias": true },
  101. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  102. { "name": "ffn_gate", "type": "MUL_MAT" },
  103. { "name": "ffn_up", "type": "MUL_MAT" },
  104. { "name": "ffn_down", "type": "MUL_MAT" }
  105. ],
  106. "output": [
  107. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  108. { "name": "output", "type": "MUL_MAT" }
  109. ]
  110. }
  111. },
  112. {
  113. "name": "qwen3",
  114. "family": "decoder",
  115. "graph": {
  116. "input": [
  117. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  118. ],
  119. "blocks": [
  120. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  121. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope", "has_bias": true, "has_qk_norm": true },
  122. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  123. { "name": "ffn_gate", "type": "MUL_MAT" },
  124. { "name": "ffn_up", "type": "MUL_MAT" },
  125. { "name": "ffn_down", "type": "MUL_MAT" }
  126. ],
  127. "output": [
  128. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  129. { "name": "output", "type": "MUL_MAT" }
  130. ]
  131. }
  132. },
  133. {
  134. "name": "qwen3vl",
  135. "family": "decoder",
  136. "graph": {
  137. "input": [
  138. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  139. ],
  140. "blocks": [
  141. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  142. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope", "has_bias": true, "has_qk_norm": true },
  143. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  144. { "name": "ffn_gate", "type": "MUL_MAT" },
  145. { "name": "ffn_up", "type": "MUL_MAT" },
  146. { "name": "ffn_down", "type": "MUL_MAT" }
  147. ],
  148. "output": [
  149. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  150. { "name": "output", "type": "MUL_MAT" }
  151. ]
  152. }
  153. },
  154. {
  155. "name": "dream",
  156. "family": "decoder",
  157. "graph": {
  158. "input": [
  159. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  160. ],
  161. "blocks": [
  162. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  163. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope", "has_bias": true },
  164. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  165. { "name": "ffn_gate", "type": "MUL_MAT" },
  166. { "name": "ffn_up", "type": "MUL_MAT" },
  167. { "name": "ffn_down", "type": "MUL_MAT" }
  168. ],
  169. "output": [
  170. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  171. { "name": "output", "type": "MUL_MAT" }
  172. ]
  173. }
  174. },
  175. {
  176. "name": "llada",
  177. "family": "decoder",
  178. "graph": {
  179. "input": [
  180. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  181. ],
  182. "blocks": [
  183. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  184. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope" },
  185. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  186. { "name": "ffn_gate", "type": "MUL_MAT" },
  187. { "name": "ffn_up", "type": "MUL_MAT" },
  188. { "name": "ffn_down", "type": "MUL_MAT" }
  189. ],
  190. "output": [
  191. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  192. { "name": "output", "type": "MUL_MAT" }
  193. ]
  194. }
  195. },
  196. {
  197. "name": "gemma",
  198. "family": "decoder",
  199. "graph": {
  200. "input": [
  201. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  202. ],
  203. "blocks": [
  204. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  205. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope" },
  206. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  207. { "name": "ffn_gate", "type": "MUL_MAT" },
  208. { "name": "ffn_up", "type": "MUL_MAT" },
  209. { "name": "ffn_down", "type": "MUL_MAT" }
  210. ],
  211. "output": [
  212. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" }
  213. ]
  214. }
  215. },
  216. {
  217. "name": "gemma2",
  218. "family": "decoder",
  219. "graph": {
  220. "input": [
  221. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  222. ],
  223. "blocks": [
  224. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  225. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope" },
  226. { "name": "attn_post_norm", "type": "RMS_NORM", "category": "Normalization" },
  227. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  228. { "name": "ffn_gate", "type": "MUL_MAT" },
  229. { "name": "ffn_up", "type": "MUL_MAT" },
  230. { "name": "ffn_down", "type": "MUL_MAT" },
  231. { "name": "ffn_post_norm", "type": "RMS_NORM", "category": "Normalization" }
  232. ],
  233. "output": [
  234. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" }
  235. ]
  236. }
  237. },
  238. {
  239. "name": "gemma3",
  240. "family": "decoder",
  241. "graph": {
  242. "input": [
  243. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  244. ],
  245. "blocks": [
  246. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  247. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope", "has_qk_norm": true },
  248. { "name": "attn_post_norm", "type": "RMS_NORM", "category": "Normalization" },
  249. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  250. { "name": "ffn_gate", "type": "MUL_MAT" },
  251. { "name": "ffn_up", "type": "MUL_MAT" },
  252. { "name": "ffn_down", "type": "MUL_MAT" },
  253. { "name": "ffn_post_norm", "type": "RMS_NORM", "category": "Normalization" }
  254. ],
  255. "output": [
  256. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  257. { "name": "output", "type": "MUL_MAT" }
  258. ]
  259. }
  260. },
  261. {
  262. "name": "gemma3n",
  263. "family": "decoder",
  264. "graph": {
  265. "input": [
  266. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  267. ],
  268. "blocks": [
  269. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  270. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope", "has_qk_norm": true },
  271. { "name": "attn_post_norm", "type": "RMS_NORM", "category": "Normalization" },
  272. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  273. { "name": "ffn_gate", "type": "MUL_MAT" },
  274. { "name": "ffn_up", "type": "MUL_MAT" },
  275. { "name": "ffn_down", "type": "MUL_MAT" },
  276. { "name": "ffn_post_norm", "type": "RMS_NORM", "category": "Normalization" }
  277. ],
  278. "output": [
  279. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  280. { "name": "output", "type": "MUL_MAT" }
  281. ]
  282. }
  283. },
  284. {
  285. "name": "gemma-embedding",
  286. "family": "encoder",
  287. "graph": {
  288. "input": [
  289. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  290. ],
  291. "blocks": [
  292. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  293. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope", "has_qk_norm": true },
  294. { "name": "attn_post_norm", "type": "RMS_NORM", "category": "Normalization" },
  295. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  296. { "name": "ffn_gate", "type": "MUL_MAT" },
  297. { "name": "ffn_up", "type": "MUL_MAT" },
  298. { "name": "ffn_down", "type": "MUL_MAT" },
  299. { "name": "ffn_post_norm", "type": "RMS_NORM", "category": "Normalization" }
  300. ],
  301. "output": [
  302. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  303. { "name": "output", "type": "MUL_MAT" }
  304. ]
  305. }
  306. },
  307. {
  308. "name": "falcon",
  309. "family": "decoder",
  310. "graph": {
  311. "input": [
  312. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  313. ],
  314. "blocks": [
  315. { "name": "attn_norm", "type": "LAYER_NORM", "category": "Normalization" },
  316. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_qkv", "attn_output"], "position_encoding": "rope" },
  317. { "name": "ffn_norm", "type": "LAYER_NORM", "category": "Normalization" },
  318. { "name": "ffn_up", "type": "MUL_MAT" },
  319. { "name": "ffn_down", "type": "MUL_MAT" }
  320. ],
  321. "output": [
  322. { "name": "output_norm", "type": "LAYER_NORM", "category": "Normalization" },
  323. { "name": "output", "type": "MUL_MAT" }
  324. ],
  325. "parallel_ffn": true
  326. }
  327. },
  328. {
  329. "name": "gpt2",
  330. "family": "decoder",
  331. "graph": {
  332. "input": [
  333. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  334. ],
  335. "blocks": [
  336. { "name": "attn_norm", "type": "LAYER_NORM", "category": "Normalization" },
  337. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_qkv", "attn_output"], "position_encoding": "learned", "has_bias": true },
  338. { "name": "ffn_norm", "type": "LAYER_NORM", "category": "Normalization" },
  339. { "name": "ffn_up", "type": "MUL_MAT" },
  340. { "name": "ffn_down", "type": "MUL_MAT" }
  341. ],
  342. "output": [
  343. { "name": "output_norm", "type": "LAYER_NORM", "category": "Normalization" },
  344. { "name": "output", "type": "MUL_MAT" }
  345. ]
  346. }
  347. },
  348. {
  349. "name": "gptneox",
  350. "family": "decoder",
  351. "graph": {
  352. "input": [
  353. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  354. ],
  355. "blocks": [
  356. { "name": "attn_norm", "type": "LAYER_NORM", "category": "Normalization" },
  357. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_qkv", "attn_output"], "position_encoding": "rope", "has_bias": true },
  358. { "name": "ffn_norm", "type": "LAYER_NORM", "category": "Normalization" },
  359. { "name": "ffn_up", "type": "MUL_MAT" },
  360. { "name": "ffn_down", "type": "MUL_MAT" }
  361. ],
  362. "output": [
  363. { "name": "output_norm", "type": "LAYER_NORM", "category": "Normalization" },
  364. { "name": "output", "type": "MUL_MAT" }
  365. ],
  366. "parallel_ffn": true
  367. }
  368. },
  369. {
  370. "name": "gptj",
  371. "family": "decoder",
  372. "graph": {
  373. "input": [
  374. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  375. ],
  376. "blocks": [
  377. { "name": "attn_norm", "type": "LAYER_NORM", "category": "Normalization" },
  378. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope" },
  379. { "name": "ffn_norm", "type": "LAYER_NORM", "category": "Normalization" },
  380. { "name": "ffn_up", "type": "MUL_MAT" },
  381. { "name": "ffn_down", "type": "MUL_MAT" }
  382. ],
  383. "output": [
  384. { "name": "output_norm", "type": "LAYER_NORM", "category": "Normalization" },
  385. { "name": "output", "type": "MUL_MAT" }
  386. ],
  387. "parallel_ffn": true
  388. }
  389. },
  390. {
  391. "name": "bloom",
  392. "family": "decoder",
  393. "graph": {
  394. "input": [
  395. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" },
  396. { "name": "token_embd_norm", "type": "LAYER_NORM", "category": "Normalization" }
  397. ],
  398. "blocks": [
  399. { "name": "attn_norm", "type": "LAYER_NORM", "category": "Normalization" },
  400. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_qkv", "attn_output"], "position_encoding": "alibi", "has_bias": true },
  401. { "name": "ffn_norm", "type": "LAYER_NORM", "category": "Normalization" },
  402. { "name": "ffn_up", "type": "MUL_MAT" },
  403. { "name": "ffn_down", "type": "MUL_MAT" }
  404. ],
  405. "output": [
  406. { "name": "output_norm", "type": "LAYER_NORM", "category": "Normalization" },
  407. { "name": "output", "type": "MUL_MAT" }
  408. ]
  409. }
  410. },
  411. {
  412. "name": "starcoder",
  413. "family": "decoder",
  414. "graph": {
  415. "input": [
  416. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  417. ],
  418. "blocks": [
  419. { "name": "attn_norm", "type": "LAYER_NORM", "category": "Normalization" },
  420. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_qkv", "attn_output"], "position_encoding": "learned", "has_bias": true },
  421. { "name": "ffn_norm", "type": "LAYER_NORM", "category": "Normalization" },
  422. { "name": "ffn_up", "type": "MUL_MAT" },
  423. { "name": "ffn_down", "type": "MUL_MAT" }
  424. ],
  425. "output": [
  426. { "name": "output_norm", "type": "LAYER_NORM", "category": "Normalization" },
  427. { "name": "output", "type": "MUL_MAT" }
  428. ]
  429. }
  430. },
  431. {
  432. "name": "starcoder2",
  433. "family": "decoder",
  434. "graph": {
  435. "input": [
  436. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  437. ],
  438. "blocks": [
  439. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  440. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope", "has_bias": true },
  441. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  442. { "name": "ffn_gate", "type": "MUL_MAT" },
  443. { "name": "ffn_up", "type": "MUL_MAT" },
  444. { "name": "ffn_down", "type": "MUL_MAT" }
  445. ],
  446. "output": [
  447. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  448. { "name": "output", "type": "MUL_MAT" }
  449. ]
  450. }
  451. },
  452. {
  453. "name": "baichuan",
  454. "family": "decoder",
  455. "graph": {
  456. "input": [
  457. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  458. ],
  459. "blocks": [
  460. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  461. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope" },
  462. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  463. { "name": "ffn_gate", "type": "MUL_MAT" },
  464. { "name": "ffn_up", "type": "MUL_MAT" },
  465. { "name": "ffn_down", "type": "MUL_MAT" }
  466. ],
  467. "output": [
  468. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  469. { "name": "output", "type": "MUL_MAT" }
  470. ]
  471. }
  472. },
  473. {
  474. "name": "internlm2",
  475. "family": "decoder",
  476. "graph": {
  477. "input": [
  478. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  479. ],
  480. "blocks": [
  481. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  482. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope" },
  483. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  484. { "name": "ffn_gate", "type": "MUL_MAT" },
  485. { "name": "ffn_up", "type": "MUL_MAT" },
  486. { "name": "ffn_down", "type": "MUL_MAT" }
  487. ],
  488. "output": [
  489. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  490. { "name": "output", "type": "MUL_MAT" }
  491. ]
  492. }
  493. },
  494. {
  495. "name": "codeshell",
  496. "family": "decoder",
  497. "graph": {
  498. "input": [
  499. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  500. ],
  501. "blocks": [
  502. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  503. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_qkv", "attn_output"], "position_encoding": "rope" },
  504. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  505. { "name": "ffn_gate", "type": "MUL_MAT" },
  506. { "name": "ffn_up", "type": "MUL_MAT" },
  507. { "name": "ffn_down", "type": "MUL_MAT" }
  508. ],
  509. "output": [
  510. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  511. { "name": "output", "type": "MUL_MAT" }
  512. ]
  513. }
  514. },
  515. {
  516. "name": "orion",
  517. "family": "decoder",
  518. "graph": {
  519. "input": [
  520. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  521. ],
  522. "blocks": [
  523. { "name": "attn_norm", "type": "LAYER_NORM", "category": "Normalization" },
  524. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope", "has_bias": true },
  525. { "name": "ffn_norm", "type": "LAYER_NORM", "category": "Normalization" },
  526. { "name": "ffn_gate", "type": "MUL_MAT" },
  527. { "name": "ffn_up", "type": "MUL_MAT" },
  528. { "name": "ffn_down", "type": "MUL_MAT" }
  529. ],
  530. "output": [
  531. { "name": "output_norm", "type": "LAYER_NORM", "category": "Normalization" },
  532. { "name": "output", "type": "MUL_MAT" }
  533. ]
  534. }
  535. },
  536. {
  537. "name": "refact",
  538. "family": "decoder",
  539. "graph": {
  540. "input": [
  541. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  542. ],
  543. "blocks": [
  544. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  545. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope" },
  546. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  547. { "name": "ffn_gate", "type": "MUL_MAT" },
  548. { "name": "ffn_up", "type": "MUL_MAT" },
  549. { "name": "ffn_down", "type": "MUL_MAT" }
  550. ],
  551. "output": [
  552. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  553. { "name": "output", "type": "MUL_MAT" }
  554. ]
  555. }
  556. },
  557. {
  558. "name": "xverse",
  559. "family": "decoder",
  560. "graph": {
  561. "input": [
  562. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  563. ],
  564. "blocks": [
  565. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  566. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope" },
  567. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  568. { "name": "ffn_gate", "type": "MUL_MAT" },
  569. { "name": "ffn_up", "type": "MUL_MAT" },
  570. { "name": "ffn_down", "type": "MUL_MAT" }
  571. ],
  572. "output": [
  573. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  574. { "name": "output", "type": "MUL_MAT" }
  575. ]
  576. }
  577. },
  578. {
  579. "name": "stablelm",
  580. "family": "decoder",
  581. "graph": {
  582. "input": [
  583. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  584. ],
  585. "blocks": [
  586. { "name": "attn_norm", "type": "LAYER_NORM", "category": "Normalization" },
  587. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope", "has_bias": true, "has_qk_norm": true },
  588. { "name": "ffn_norm", "type": "LAYER_NORM", "category": "Normalization" },
  589. { "name": "ffn_gate", "type": "MUL_MAT" },
  590. { "name": "ffn_up", "type": "MUL_MAT" },
  591. { "name": "ffn_down", "type": "MUL_MAT" }
  592. ],
  593. "output": [
  594. { "name": "output_norm", "type": "LAYER_NORM", "category": "Normalization" },
  595. { "name": "output", "type": "MUL_MAT" }
  596. ]
  597. }
  598. },
  599. {
  600. "name": "phi2",
  601. "family": "decoder",
  602. "graph": {
  603. "input": [
  604. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  605. ],
  606. "blocks": [
  607. { "name": "attn_norm", "type": "LAYER_NORM", "category": "Normalization" },
  608. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope", "has_bias": true },
  609. { "name": "ffn_norm", "type": "LAYER_NORM", "category": "Normalization" },
  610. { "name": "ffn_up", "type": "MUL_MAT" },
  611. { "name": "ffn_down", "type": "MUL_MAT" }
  612. ],
  613. "output": [
  614. { "name": "output_norm", "type": "LAYER_NORM", "category": "Normalization" },
  615. { "name": "output", "type": "MUL_MAT" }
  616. ],
  617. "parallel_ffn": true
  618. }
  619. },
  620. {
  621. "name": "phi3",
  622. "family": "decoder",
  623. "graph": {
  624. "input": [
  625. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  626. ],
  627. "blocks": [
  628. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  629. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope" },
  630. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  631. { "name": "ffn_gate", "type": "MUL_MAT" },
  632. { "name": "ffn_up", "type": "MUL_MAT" },
  633. { "name": "ffn_down", "type": "MUL_MAT" }
  634. ],
  635. "output": [
  636. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  637. { "name": "output", "type": "MUL_MAT" }
  638. ]
  639. }
  640. },
  641. {
  642. "name": "phimoe",
  643. "family": "decoder-moe",
  644. "graph": {
  645. "input": [
  646. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  647. ],
  648. "blocks": [
  649. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  650. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope" },
  651. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  652. { "name": "ffn_gate_inp", "type": "MUL_MAT" },
  653. { "name": "ffn_gate_exps", "type": "MUL_MAT_ID" },
  654. { "name": "ffn_up_exps", "type": "MUL_MAT_ID" },
  655. { "name": "ffn_down_exps", "type": "MUL_MAT_ID" }
  656. ],
  657. "output": [
  658. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  659. { "name": "output", "type": "MUL_MAT" }
  660. ]
  661. }
  662. },
  663. {
  664. "name": "olmo",
  665. "family": "decoder",
  666. "graph": {
  667. "input": [
  668. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  669. ],
  670. "blocks": [
  671. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  672. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope" },
  673. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  674. { "name": "ffn_gate", "type": "MUL_MAT" },
  675. { "name": "ffn_up", "type": "MUL_MAT" },
  676. { "name": "ffn_down", "type": "MUL_MAT" }
  677. ],
  678. "output": [
  679. { "name": "output", "type": "MUL_MAT" }
  680. ]
  681. }
  682. },
  683. {
  684. "name": "olmo2",
  685. "family": "decoder",
  686. "graph": {
  687. "input": [
  688. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  689. ],
  690. "blocks": [
  691. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  692. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope", "has_qk_norm": true },
  693. { "name": "attn_post_norm", "type": "RMS_NORM", "category": "Normalization" },
  694. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  695. { "name": "ffn_gate", "type": "MUL_MAT" },
  696. { "name": "ffn_up", "type": "MUL_MAT" },
  697. { "name": "ffn_down", "type": "MUL_MAT" },
  698. { "name": "ffn_post_norm", "type": "RMS_NORM", "category": "Normalization" }
  699. ],
  700. "output": [
  701. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  702. { "name": "output", "type": "MUL_MAT" }
  703. ]
  704. }
  705. },
  706. {
  707. "name": "olmoe",
  708. "family": "decoder-moe",
  709. "graph": {
  710. "input": [
  711. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  712. ],
  713. "blocks": [
  714. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  715. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope", "has_qk_norm": true },
  716. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  717. { "name": "ffn_gate_inp", "type": "MUL_MAT" },
  718. { "name": "ffn_gate_exps", "type": "MUL_MAT_ID" },
  719. { "name": "ffn_up_exps", "type": "MUL_MAT_ID" },
  720. { "name": "ffn_down_exps", "type": "MUL_MAT_ID" }
  721. ],
  722. "output": [
  723. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  724. { "name": "output", "type": "MUL_MAT" }
  725. ]
  726. }
  727. },
  728. {
  729. "name": "exaone",
  730. "family": "decoder",
  731. "graph": {
  732. "input": [
  733. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  734. ],
  735. "blocks": [
  736. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  737. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope" },
  738. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  739. { "name": "ffn_gate", "type": "MUL_MAT" },
  740. { "name": "ffn_up", "type": "MUL_MAT" },
  741. { "name": "ffn_down", "type": "MUL_MAT" }
  742. ],
  743. "output": [
  744. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  745. { "name": "output", "type": "MUL_MAT" }
  746. ]
  747. }
  748. },
  749. {
  750. "name": "exaone4",
  751. "family": "decoder",
  752. "graph": {
  753. "input": [
  754. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  755. ],
  756. "blocks": [
  757. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  758. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope", "has_qk_norm": true },
  759. { "name": "attn_post_norm", "type": "RMS_NORM", "category": "Normalization" },
  760. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  761. { "name": "ffn_gate", "type": "MUL_MAT" },
  762. { "name": "ffn_up", "type": "MUL_MAT" },
  763. { "name": "ffn_down", "type": "MUL_MAT" },
  764. { "name": "ffn_post_norm", "type": "RMS_NORM", "category": "Normalization" }
  765. ],
  766. "output": [
  767. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  768. { "name": "output", "type": "MUL_MAT" }
  769. ]
  770. }
  771. },
  772. {
  773. "name": "exaone-moe",
  774. "family": "decoder-moe",
  775. "graph": {
  776. "input": [
  777. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  778. ],
  779. "blocks": [
  780. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  781. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope", "has_qk_norm": true },
  782. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  783. { "name": "ffn_gate_inp", "type": "MUL_MAT" },
  784. { "name": "ffn_gate_exps", "type": "MUL_MAT_ID" },
  785. { "name": "ffn_up_exps", "type": "MUL_MAT_ID" },
  786. { "name": "ffn_down_exps", "type": "MUL_MAT_ID" }
  787. ],
  788. "output": [
  789. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  790. { "name": "output", "type": "MUL_MAT" }
  791. ]
  792. }
  793. },
  794. {
  795. "name": "chameleon",
  796. "family": "decoder",
  797. "graph": {
  798. "input": [
  799. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  800. ],
  801. "blocks": [
  802. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  803. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope", "has_qk_norm": true },
  804. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  805. { "name": "ffn_gate", "type": "MUL_MAT" },
  806. { "name": "ffn_up", "type": "MUL_MAT" },
  807. { "name": "ffn_down", "type": "MUL_MAT" }
  808. ],
  809. "output": [
  810. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  811. { "name": "output", "type": "MUL_MAT" }
  812. ]
  813. }
  814. },
  815. {
  816. "name": "chatglm",
  817. "family": "decoder",
  818. "graph": {
  819. "input": [
  820. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  821. ],
  822. "blocks": [
  823. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  824. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_qkv", "attn_output"], "position_encoding": "rope" },
  825. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  826. { "name": "ffn_up", "type": "MUL_MAT" },
  827. { "name": "ffn_down", "type": "MUL_MAT" }
  828. ],
  829. "output": [
  830. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  831. { "name": "output", "type": "MUL_MAT" }
  832. ]
  833. }
  834. },
  835. {
  836. "name": "glm4",
  837. "family": "decoder",
  838. "graph": {
  839. "input": [
  840. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  841. ],
  842. "blocks": [
  843. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  844. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope", "has_bias": true },
  845. { "name": "attn_post_norm", "type": "RMS_NORM", "category": "Normalization" },
  846. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  847. { "name": "ffn_gate", "type": "MUL_MAT" },
  848. { "name": "ffn_up", "type": "MUL_MAT" },
  849. { "name": "ffn_down", "type": "MUL_MAT" },
  850. { "name": "ffn_post_norm", "type": "RMS_NORM", "category": "Normalization" }
  851. ],
  852. "output": [
  853. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  854. { "name": "output", "type": "MUL_MAT" }
  855. ]
  856. }
  857. },
  858. {
  859. "name": "glm4moe",
  860. "family": "decoder-moe",
  861. "graph": {
  862. "input": [
  863. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  864. ],
  865. "blocks": [
  866. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  867. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope", "has_qk_norm": true },
  868. { "name": "attn_post_norm", "type": "RMS_NORM", "category": "Normalization" },
  869. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  870. { "name": "ffn_gate_inp", "type": "MUL_MAT" },
  871. { "name": "ffn_gate_exps", "type": "MUL_MAT_ID" },
  872. { "name": "ffn_up_exps", "type": "MUL_MAT_ID" },
  873. { "name": "ffn_down_exps", "type": "MUL_MAT_ID" },
  874. { "name": "ffn_post_norm", "type": "RMS_NORM", "category": "Normalization" }
  875. ],
  876. "output": [
  877. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  878. { "name": "output", "type": "MUL_MAT" }
  879. ]
  880. }
  881. },
  882. {
  883. "name": "glm-dsa",
  884. "family": "mla",
  885. "graph": {
  886. "input": [
  887. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  888. ],
  889. "blocks": [
  890. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  891. { "name": "attention", "type": "MULTI_LATENT_ATTENTION", "category": "Attention", "tensors": ["attn_q_a", "attn_q_b", "attn_kv_a_mqa", "attn_kv_b", "attn_output"], "position_encoding": "rope", "has_qk_norm": true },
  892. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  893. { "name": "ffn_gate_inp", "type": "MUL_MAT" },
  894. { "name": "ffn_gate_exps", "type": "MUL_MAT_ID" },
  895. { "name": "ffn_up_exps", "type": "MUL_MAT_ID" },
  896. { "name": "ffn_down_exps", "type": "MUL_MAT_ID" }
  897. ],
  898. "output": [
  899. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  900. { "name": "output", "type": "MUL_MAT" }
  901. ]
  902. }
  903. },
  904. {
  905. "name": "command-r",
  906. "family": "decoder",
  907. "graph": {
  908. "input": [
  909. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  910. ],
  911. "blocks": [
  912. { "name": "attn_norm", "type": "LAYER_NORM", "category": "Normalization" },
  913. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope", "has_qk_norm": true },
  914. { "name": "ffn_norm", "type": "LAYER_NORM", "category": "Normalization" },
  915. { "name": "ffn_gate", "type": "MUL_MAT" },
  916. { "name": "ffn_up", "type": "MUL_MAT" },
  917. { "name": "ffn_down", "type": "MUL_MAT" }
  918. ],
  919. "output": [
  920. { "name": "output_norm", "type": "LAYER_NORM", "category": "Normalization" }
  921. ]
  922. }
  923. },
  924. {
  925. "name": "cohere2",
  926. "family": "decoder",
  927. "graph": {
  928. "input": [
  929. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  930. ],
  931. "blocks": [
  932. { "name": "attn_norm", "type": "LAYER_NORM", "category": "Normalization" },
  933. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope" },
  934. { "name": "ffn_norm", "type": "LAYER_NORM", "category": "Normalization" },
  935. { "name": "ffn_gate", "type": "MUL_MAT" },
  936. { "name": "ffn_up", "type": "MUL_MAT" },
  937. { "name": "ffn_down", "type": "MUL_MAT" }
  938. ],
  939. "output": [
  940. { "name": "output_norm", "type": "LAYER_NORM", "category": "Normalization" }
  941. ]
  942. }
  943. },
  944. {
  945. "name": "plamo",
  946. "family": "decoder",
  947. "graph": {
  948. "input": [
  949. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  950. ],
  951. "blocks": [
  952. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  953. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope" },
  954. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  955. { "name": "ffn_gate", "type": "MUL_MAT" },
  956. { "name": "ffn_up", "type": "MUL_MAT" },
  957. { "name": "ffn_down", "type": "MUL_MAT" }
  958. ],
  959. "output": [
  960. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  961. { "name": "output", "type": "MUL_MAT" }
  962. ]
  963. }
  964. },
  965. {
  966. "name": "plamo3",
  967. "family": "decoder",
  968. "graph": {
  969. "input": [
  970. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  971. ],
  972. "blocks": [
  973. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  974. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_qkv", "attn_output"], "position_encoding": "rope", "has_qk_norm": true },
  975. { "name": "attn_post_norm", "type": "RMS_NORM", "category": "Normalization" },
  976. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  977. { "name": "ffn_up", "type": "MUL_MAT" },
  978. { "name": "ffn_down", "type": "MUL_MAT" },
  979. { "name": "ffn_post_norm", "type": "RMS_NORM", "category": "Normalization" }
  980. ],
  981. "output": [
  982. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  983. { "name": "output", "type": "MUL_MAT" }
  984. ]
  985. }
  986. },
  987. {
  988. "name": "mpt",
  989. "family": "decoder",
  990. "graph": {
  991. "input": [
  992. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  993. ],
  994. "blocks": [
  995. { "name": "attn_norm", "type": "LAYER_NORM", "category": "Normalization" },
  996. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_qkv", "attn_output"], "position_encoding": "alibi", "has_qk_norm": true },
  997. { "name": "ffn_norm", "type": "LAYER_NORM", "category": "Normalization" },
  998. { "name": "ffn_up", "type": "MUL_MAT" },
  999. { "name": "ffn_down", "type": "MUL_MAT" }
  1000. ],
  1001. "output": [
  1002. { "name": "output_norm", "type": "LAYER_NORM", "category": "Normalization" },
  1003. { "name": "output", "type": "MUL_MAT" }
  1004. ]
  1005. }
  1006. },
  1007. {
  1008. "name": "nemotron",
  1009. "family": "decoder",
  1010. "graph": {
  1011. "input": [
  1012. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  1013. ],
  1014. "blocks": [
  1015. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1016. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope" },
  1017. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1018. { "name": "ffn_up", "type": "MUL_MAT" },
  1019. { "name": "ffn_down", "type": "MUL_MAT" }
  1020. ],
  1021. "output": [
  1022. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  1023. { "name": "output", "type": "MUL_MAT" }
  1024. ]
  1025. }
  1026. },
  1027. {
  1028. "name": "openelm",
  1029. "family": "decoder",
  1030. "graph": {
  1031. "input": [
  1032. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  1033. ],
  1034. "blocks": [
  1035. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1036. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_qkv", "attn_output"], "position_encoding": "rope", "has_qk_norm": true },
  1037. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1038. { "name": "ffn_gate", "type": "MUL_MAT" },
  1039. { "name": "ffn_up", "type": "MUL_MAT" },
  1040. { "name": "ffn_down", "type": "MUL_MAT" }
  1041. ],
  1042. "output": [
  1043. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" }
  1044. ]
  1045. }
  1046. },
  1047. {
  1048. "name": "maincoder",
  1049. "family": "decoder",
  1050. "graph": {
  1051. "input": [
  1052. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  1053. ],
  1054. "blocks": [
  1055. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1056. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope", "has_qk_norm": true },
  1057. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1058. { "name": "ffn_gate", "type": "MUL_MAT" },
  1059. { "name": "ffn_up", "type": "MUL_MAT" },
  1060. { "name": "ffn_down", "type": "MUL_MAT" }
  1061. ],
  1062. "output": [
  1063. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  1064. { "name": "output", "type": "MUL_MAT" }
  1065. ]
  1066. }
  1067. },
  1068. {
  1069. "name": "arcee",
  1070. "family": "decoder",
  1071. "graph": {
  1072. "input": [
  1073. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  1074. ],
  1075. "blocks": [
  1076. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1077. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope" },
  1078. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1079. { "name": "ffn_up", "type": "MUL_MAT" },
  1080. { "name": "ffn_down", "type": "MUL_MAT" }
  1081. ],
  1082. "output": [
  1083. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  1084. { "name": "output", "type": "MUL_MAT" }
  1085. ]
  1086. }
  1087. },
  1088. {
  1089. "name": "mistral3",
  1090. "family": "decoder",
  1091. "graph": {
  1092. "input": [
  1093. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  1094. ],
  1095. "blocks": [
  1096. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1097. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope" },
  1098. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1099. { "name": "ffn_gate", "type": "MUL_MAT" },
  1100. { "name": "ffn_up", "type": "MUL_MAT" },
  1101. { "name": "ffn_down", "type": "MUL_MAT" }
  1102. ],
  1103. "output": [
  1104. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  1105. { "name": "output", "type": "MUL_MAT" }
  1106. ]
  1107. }
  1108. },
  1109. {
  1110. "name": "hunyuan-dense",
  1111. "family": "decoder",
  1112. "graph": {
  1113. "input": [
  1114. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  1115. ],
  1116. "blocks": [
  1117. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1118. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope", "has_qk_norm": true },
  1119. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1120. { "name": "ffn_gate", "type": "MUL_MAT" },
  1121. { "name": "ffn_up", "type": "MUL_MAT" },
  1122. { "name": "ffn_down", "type": "MUL_MAT" }
  1123. ],
  1124. "output": [
  1125. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  1126. { "name": "output", "type": "MUL_MAT" }
  1127. ]
  1128. }
  1129. },
  1130. {
  1131. "name": "pangu-embedded",
  1132. "family": "decoder",
  1133. "graph": {
  1134. "input": [
  1135. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  1136. ],
  1137. "blocks": [
  1138. { "name": "attn_norm", "type": "LAYER_NORM", "category": "Normalization" },
  1139. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope", "has_bias": true },
  1140. { "name": "ffn_norm", "type": "LAYER_NORM", "category": "Normalization" },
  1141. { "name": "ffn_gate", "type": "MUL_MAT" },
  1142. { "name": "ffn_up", "type": "MUL_MAT" },
  1143. { "name": "ffn_down", "type": "MUL_MAT" }
  1144. ],
  1145. "output": [
  1146. { "name": "output_norm", "type": "LAYER_NORM", "category": "Normalization" },
  1147. { "name": "output", "type": "MUL_MAT" }
  1148. ]
  1149. }
  1150. },
  1151. {
  1152. "name": "smollm3",
  1153. "family": "decoder",
  1154. "graph": {
  1155. "input": [
  1156. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  1157. ],
  1158. "blocks": [
  1159. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1160. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope" },
  1161. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1162. { "name": "ffn_gate", "type": "MUL_MAT" },
  1163. { "name": "ffn_up", "type": "MUL_MAT" },
  1164. { "name": "ffn_down", "type": "MUL_MAT" }
  1165. ],
  1166. "output": [
  1167. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  1168. { "name": "output", "type": "MUL_MAT" }
  1169. ]
  1170. }
  1171. },
  1172. {
  1173. "name": "dots1",
  1174. "family": "decoder-moe",
  1175. "graph": {
  1176. "input": [
  1177. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  1178. ],
  1179. "blocks": [
  1180. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1181. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope", "has_qk_norm": true },
  1182. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1183. { "name": "ffn_gate_inp", "type": "MUL_MAT" },
  1184. { "name": "ffn_gate_exps", "type": "MUL_MAT_ID" },
  1185. { "name": "ffn_up_exps", "type": "MUL_MAT_ID" },
  1186. { "name": "ffn_down_exps", "type": "MUL_MAT_ID" },
  1187. { "name": "ffn_gate", "type": "MUL_MAT" },
  1188. { "name": "ffn_up", "type": "MUL_MAT" },
  1189. { "name": "ffn_down", "type": "MUL_MAT" }
  1190. ],
  1191. "output": [
  1192. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  1193. { "name": "output", "type": "MUL_MAT" }
  1194. ]
  1195. }
  1196. },
  1197. {
  1198. "name": "ernie4_5",
  1199. "family": "decoder",
  1200. "graph": {
  1201. "input": [
  1202. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  1203. ],
  1204. "blocks": [
  1205. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1206. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope" },
  1207. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1208. { "name": "ffn_gate", "type": "MUL_MAT" },
  1209. { "name": "ffn_up", "type": "MUL_MAT" },
  1210. { "name": "ffn_down", "type": "MUL_MAT" }
  1211. ],
  1212. "output": [
  1213. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  1214. { "name": "output", "type": "MUL_MAT" }
  1215. ]
  1216. }
  1217. },
  1218. {
  1219. "name": "apertus",
  1220. "family": "decoder",
  1221. "graph": {
  1222. "input": [
  1223. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  1224. ],
  1225. "blocks": [
  1226. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1227. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope", "has_qk_norm": true },
  1228. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1229. { "name": "ffn_gate", "type": "MUL_MAT" },
  1230. { "name": "ffn_up", "type": "MUL_MAT" },
  1231. { "name": "ffn_down", "type": "MUL_MAT" }
  1232. ],
  1233. "output": [
  1234. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  1235. { "name": "output", "type": "MUL_MAT" }
  1236. ]
  1237. }
  1238. },
  1239. {
  1240. "name": "granite",
  1241. "family": "decoder",
  1242. "graph": {
  1243. "input": [
  1244. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  1245. ],
  1246. "blocks": [
  1247. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1248. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope" },
  1249. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1250. { "name": "ffn_gate", "type": "MUL_MAT" },
  1251. { "name": "ffn_up", "type": "MUL_MAT" },
  1252. { "name": "ffn_down", "type": "MUL_MAT" }
  1253. ],
  1254. "output": [
  1255. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  1256. { "name": "output", "type": "MUL_MAT" }
  1257. ]
  1258. }
  1259. },
  1260. {
  1261. "name": "granitemoe",
  1262. "family": "decoder-moe",
  1263. "graph": {
  1264. "input": [
  1265. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  1266. ],
  1267. "blocks": [
  1268. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1269. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope" },
  1270. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1271. { "name": "ffn_gate_inp", "type": "MUL_MAT" },
  1272. { "name": "ffn_gate_exps", "type": "MUL_MAT_ID" },
  1273. { "name": "ffn_up_exps", "type": "MUL_MAT_ID" },
  1274. { "name": "ffn_down_exps", "type": "MUL_MAT_ID" }
  1275. ],
  1276. "output": [
  1277. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  1278. { "name": "output", "type": "MUL_MAT" }
  1279. ]
  1280. }
  1281. },
  1282. {
  1283. "name": "jais",
  1284. "family": "decoder",
  1285. "graph": {
  1286. "input": [
  1287. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  1288. ],
  1289. "blocks": [
  1290. { "name": "attn_norm", "type": "LAYER_NORM", "category": "Normalization" },
  1291. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_qkv", "attn_output"], "position_encoding": "alibi", "has_bias": true },
  1292. { "name": "ffn_norm", "type": "LAYER_NORM", "category": "Normalization" },
  1293. { "name": "ffn_gate", "type": "MUL_MAT" },
  1294. { "name": "ffn_up", "type": "MUL_MAT" },
  1295. { "name": "ffn_down", "type": "MUL_MAT" }
  1296. ],
  1297. "output": [
  1298. { "name": "output_norm", "type": "LAYER_NORM", "category": "Normalization" },
  1299. { "name": "output", "type": "MUL_MAT" }
  1300. ]
  1301. }
  1302. },
  1303. {
  1304. "name": "jais2",
  1305. "family": "decoder",
  1306. "graph": {
  1307. "input": [
  1308. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  1309. ],
  1310. "blocks": [
  1311. { "name": "attn_norm", "type": "LAYER_NORM", "category": "Normalization" },
  1312. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope", "has_bias": true },
  1313. { "name": "ffn_norm", "type": "LAYER_NORM", "category": "Normalization" },
  1314. { "name": "ffn_up", "type": "MUL_MAT" },
  1315. { "name": "ffn_down", "type": "MUL_MAT" }
  1316. ],
  1317. "output": [
  1318. { "name": "output_norm", "type": "LAYER_NORM", "category": "Normalization" },
  1319. { "name": "output", "type": "MUL_MAT" }
  1320. ]
  1321. }
  1322. },
  1323. {
  1324. "name": "seed_oss",
  1325. "family": "decoder",
  1326. "graph": {
  1327. "input": [
  1328. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  1329. ],
  1330. "blocks": [
  1331. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1332. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope" },
  1333. { "name": "attn_post_norm", "type": "RMS_NORM", "category": "Normalization" },
  1334. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1335. { "name": "ffn_gate", "type": "MUL_MAT" },
  1336. { "name": "ffn_up", "type": "MUL_MAT" },
  1337. { "name": "ffn_down", "type": "MUL_MAT" },
  1338. { "name": "ffn_post_norm", "type": "RMS_NORM", "category": "Normalization" }
  1339. ],
  1340. "output": [
  1341. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  1342. { "name": "output", "type": "MUL_MAT" }
  1343. ]
  1344. }
  1345. },
  1346. {
  1347. "name": "rnd1",
  1348. "family": "decoder-moe",
  1349. "graph": {
  1350. "input": [
  1351. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  1352. ],
  1353. "blocks": [
  1354. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1355. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope", "has_qk_norm": true },
  1356. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1357. { "name": "ffn_gate_inp", "type": "MUL_MAT" },
  1358. { "name": "ffn_gate_exps", "type": "MUL_MAT_ID" },
  1359. { "name": "ffn_up_exps", "type": "MUL_MAT_ID" },
  1360. { "name": "ffn_down_exps", "type": "MUL_MAT_ID" }
  1361. ],
  1362. "output": [
  1363. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  1364. { "name": "output", "type": "MUL_MAT" }
  1365. ]
  1366. }
  1367. },
  1368. {
  1369. "name": "cogvlm",
  1370. "family": "decoder",
  1371. "graph": {
  1372. "input": [
  1373. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  1374. ],
  1375. "blocks": [
  1376. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1377. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_qkv", "attn_output"], "position_encoding": "rope" },
  1378. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1379. { "name": "ffn_gate", "type": "MUL_MAT" },
  1380. { "name": "ffn_up", "type": "MUL_MAT" },
  1381. { "name": "ffn_down", "type": "MUL_MAT" }
  1382. ],
  1383. "output": [
  1384. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  1385. { "name": "output", "type": "MUL_MAT" }
  1386. ]
  1387. }
  1388. },
  1389. {
  1390. "name": "minimax-m2",
  1391. "family": "decoder-moe",
  1392. "graph": {
  1393. "input": [
  1394. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  1395. ],
  1396. "blocks": [
  1397. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1398. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope", "has_qk_norm": true },
  1399. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1400. { "name": "ffn_gate_inp", "type": "MUL_MAT" },
  1401. { "name": "ffn_gate_exps", "type": "MUL_MAT_ID" },
  1402. { "name": "ffn_up_exps", "type": "MUL_MAT_ID" },
  1403. { "name": "ffn_down_exps", "type": "MUL_MAT_ID" }
  1404. ],
  1405. "output": [
  1406. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  1407. { "name": "output", "type": "MUL_MAT" }
  1408. ]
  1409. }
  1410. },
  1411. {
  1412. "name": "minicpm",
  1413. "family": "decoder",
  1414. "graph": {
  1415. "input": [
  1416. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  1417. ],
  1418. "blocks": [
  1419. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1420. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope" },
  1421. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1422. { "name": "ffn_gate", "type": "MUL_MAT" },
  1423. { "name": "ffn_up", "type": "MUL_MAT" },
  1424. { "name": "ffn_down", "type": "MUL_MAT" }
  1425. ],
  1426. "output": [
  1427. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  1428. { "name": "output", "type": "MUL_MAT" }
  1429. ]
  1430. }
  1431. },
  1432. {
  1433. "name": "step35",
  1434. "family": "decoder-moe",
  1435. "graph": {
  1436. "input": [
  1437. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  1438. ],
  1439. "blocks": [
  1440. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1441. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope", "has_qk_norm": true, "has_gate": true },
  1442. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1443. { "name": "ffn_gate_inp", "type": "MUL_MAT" },
  1444. { "name": "ffn_gate_exps", "type": "MUL_MAT_ID" },
  1445. { "name": "ffn_up_exps", "type": "MUL_MAT_ID" },
  1446. { "name": "ffn_down_exps", "type": "MUL_MAT_ID" }
  1447. ],
  1448. "output": [
  1449. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  1450. { "name": "output", "type": "MUL_MAT" }
  1451. ]
  1452. }
  1453. },
  1454. {
  1455. "name": "llama-embed",
  1456. "family": "decoder",
  1457. "graph": {
  1458. "input": [
  1459. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  1460. ],
  1461. "blocks": [
  1462. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1463. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope" },
  1464. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1465. { "name": "ffn_gate", "type": "MUL_MAT" },
  1466. { "name": "ffn_up", "type": "MUL_MAT" },
  1467. { "name": "ffn_down", "type": "MUL_MAT" }
  1468. ],
  1469. "output": [
  1470. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  1471. { "name": "output", "type": "MUL_MAT" }
  1472. ]
  1473. }
  1474. },
  1475. {
  1476. "name": "smallthinker",
  1477. "family": "decoder",
  1478. "graph": {
  1479. "input": [
  1480. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  1481. ],
  1482. "blocks": [
  1483. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1484. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope" },
  1485. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1486. { "name": "ffn_gate", "type": "MUL_MAT" },
  1487. { "name": "ffn_up", "type": "MUL_MAT" },
  1488. { "name": "ffn_down", "type": "MUL_MAT" }
  1489. ],
  1490. "output": [
  1491. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  1492. { "name": "output", "type": "MUL_MAT" }
  1493. ]
  1494. }
  1495. },
  1496. {
  1497. "name": "mimo2",
  1498. "family": "decoder-moe",
  1499. "graph": {
  1500. "input": [
  1501. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  1502. ],
  1503. "blocks": [
  1504. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1505. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope" },
  1506. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1507. { "name": "ffn_gate_inp", "type": "MUL_MAT" },
  1508. { "name": "ffn_gate_exps", "type": "MUL_MAT_ID" },
  1509. { "name": "ffn_up_exps", "type": "MUL_MAT_ID" },
  1510. { "name": "ffn_down_exps", "type": "MUL_MAT_ID" }
  1511. ],
  1512. "output": [
  1513. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  1514. { "name": "output", "type": "MUL_MAT" }
  1515. ]
  1516. }
  1517. },
  1518. {
  1519. "name": "lfm2",
  1520. "family": "decoder",
  1521. "graph": {
  1522. "input": [
  1523. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" },
  1524. { "name": "token_embd_norm", "type": "LAYER_NORM", "category": "Normalization" }
  1525. ],
  1526. "blocks": [
  1527. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1528. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope", "has_qk_norm": true },
  1529. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1530. { "name": "ffn_gate", "type": "MUL_MAT" },
  1531. { "name": "ffn_up", "type": "MUL_MAT" },
  1532. { "name": "ffn_down", "type": "MUL_MAT" }
  1533. ],
  1534. "output": [
  1535. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  1536. { "name": "output", "type": "MUL_MAT" }
  1537. ]
  1538. }
  1539. },
  1540. {
  1541. "name": "lfm2moe",
  1542. "family": "decoder-moe",
  1543. "graph": {
  1544. "input": [
  1545. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" },
  1546. { "name": "token_embd_norm", "type": "LAYER_NORM", "category": "Normalization" }
  1547. ],
  1548. "blocks": [
  1549. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1550. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope", "has_qk_norm": true },
  1551. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1552. { "name": "ffn_gate_inp", "type": "MUL_MAT" },
  1553. { "name": "ffn_gate_exps", "type": "MUL_MAT_ID" },
  1554. { "name": "ffn_up_exps", "type": "MUL_MAT_ID" },
  1555. { "name": "ffn_down_exps", "type": "MUL_MAT_ID" }
  1556. ],
  1557. "output": [
  1558. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  1559. { "name": "output", "type": "MUL_MAT" }
  1560. ]
  1561. }
  1562. },
  1563. {
  1564. "name": "gpt-oss",
  1565. "family": "decoder-moe",
  1566. "graph": {
  1567. "input": [
  1568. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  1569. ],
  1570. "blocks": [
  1571. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1572. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope" },
  1573. { "name": "attn_post_norm", "type": "RMS_NORM", "category": "Normalization" },
  1574. { "name": "ffn_gate_inp", "type": "MUL_MAT" },
  1575. { "name": "ffn_gate_exps", "type": "MUL_MAT_ID" },
  1576. { "name": "ffn_up_exps", "type": "MUL_MAT_ID" },
  1577. { "name": "ffn_down_exps", "type": "MUL_MAT_ID" }
  1578. ],
  1579. "output": [
  1580. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  1581. { "name": "output", "type": "MUL_MAT" }
  1582. ]
  1583. }
  1584. },
  1585. {
  1586. "name": "qwen2moe",
  1587. "family": "decoder-moe",
  1588. "graph": {
  1589. "input": [
  1590. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  1591. ],
  1592. "blocks": [
  1593. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1594. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope", "has_bias": true },
  1595. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1596. { "name": "ffn_gate_inp", "type": "MUL_MAT" },
  1597. { "name": "ffn_gate_exps", "type": "MUL_MAT_ID" },
  1598. { "name": "ffn_up_exps", "type": "MUL_MAT_ID" },
  1599. { "name": "ffn_down_exps", "type": "MUL_MAT_ID" }
  1600. ],
  1601. "output": [
  1602. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  1603. { "name": "output", "type": "MUL_MAT" }
  1604. ]
  1605. }
  1606. },
  1607. {
  1608. "name": "qwen3moe",
  1609. "family": "decoder-moe",
  1610. "graph": {
  1611. "input": [
  1612. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  1613. ],
  1614. "blocks": [
  1615. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1616. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope", "has_bias": true, "has_qk_norm": true },
  1617. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1618. { "name": "ffn_gate_inp", "type": "MUL_MAT" },
  1619. { "name": "ffn_gate_exps", "type": "MUL_MAT_ID" },
  1620. { "name": "ffn_up_exps", "type": "MUL_MAT_ID" },
  1621. { "name": "ffn_down_exps", "type": "MUL_MAT_ID" }
  1622. ],
  1623. "output": [
  1624. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  1625. { "name": "output", "type": "MUL_MAT" }
  1626. ]
  1627. }
  1628. },
  1629. {
  1630. "name": "qwen3vlmoe",
  1631. "family": "decoder-moe",
  1632. "graph": {
  1633. "input": [
  1634. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  1635. ],
  1636. "blocks": [
  1637. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1638. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope", "has_bias": true, "has_qk_norm": true },
  1639. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1640. { "name": "ffn_gate_inp", "type": "MUL_MAT" },
  1641. { "name": "ffn_gate_exps", "type": "MUL_MAT_ID" },
  1642. { "name": "ffn_up_exps", "type": "MUL_MAT_ID" },
  1643. { "name": "ffn_down_exps", "type": "MUL_MAT_ID" }
  1644. ],
  1645. "output": [
  1646. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  1647. { "name": "output", "type": "MUL_MAT" }
  1648. ]
  1649. }
  1650. },
  1651. {
  1652. "name": "grok",
  1653. "family": "decoder-moe",
  1654. "graph": {
  1655. "input": [
  1656. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  1657. ],
  1658. "blocks": [
  1659. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1660. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope" },
  1661. { "name": "attn_post_norm", "type": "RMS_NORM", "category": "Normalization" },
  1662. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1663. { "name": "ffn_gate_inp", "type": "MUL_MAT" },
  1664. { "name": "ffn_gate_exps", "type": "MUL_MAT_ID" },
  1665. { "name": "ffn_up_exps", "type": "MUL_MAT_ID" },
  1666. { "name": "ffn_down_exps", "type": "MUL_MAT_ID" },
  1667. { "name": "ffn_post_norm", "type": "RMS_NORM", "category": "Normalization" }
  1668. ],
  1669. "output": [
  1670. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  1671. { "name": "output", "type": "MUL_MAT" }
  1672. ]
  1673. }
  1674. },
  1675. {
  1676. "name": "arctic",
  1677. "family": "decoder-moe",
  1678. "graph": {
  1679. "input": [
  1680. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  1681. ],
  1682. "blocks": [
  1683. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1684. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope" },
  1685. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1686. { "name": "ffn_gate_inp", "type": "MUL_MAT" },
  1687. { "name": "ffn_gate_exps", "type": "MUL_MAT_ID" },
  1688. { "name": "ffn_up_exps", "type": "MUL_MAT_ID" },
  1689. { "name": "ffn_down_exps", "type": "MUL_MAT_ID" },
  1690. { "name": "ffn_gate", "type": "MUL_MAT" },
  1691. { "name": "ffn_up", "type": "MUL_MAT" },
  1692. { "name": "ffn_down", "type": "MUL_MAT" }
  1693. ],
  1694. "output": [
  1695. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  1696. { "name": "output", "type": "MUL_MAT" }
  1697. ]
  1698. }
  1699. },
  1700. {
  1701. "name": "dbrx",
  1702. "family": "decoder-moe",
  1703. "graph": {
  1704. "input": [
  1705. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  1706. ],
  1707. "blocks": [
  1708. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1709. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_qkv", "attn_output"], "position_encoding": "rope" },
  1710. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1711. { "name": "ffn_gate_inp", "type": "MUL_MAT" },
  1712. { "name": "ffn_gate_exps", "type": "MUL_MAT_ID" },
  1713. { "name": "ffn_up_exps", "type": "MUL_MAT_ID" },
  1714. { "name": "ffn_down_exps", "type": "MUL_MAT_ID" }
  1715. ],
  1716. "output": [
  1717. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  1718. { "name": "output", "type": "MUL_MAT" }
  1719. ]
  1720. }
  1721. },
  1722. {
  1723. "name": "deepseek",
  1724. "family": "decoder-moe",
  1725. "graph": {
  1726. "input": [
  1727. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  1728. ],
  1729. "blocks": [
  1730. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1731. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope" },
  1732. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1733. { "name": "ffn_gate_inp", "type": "MUL_MAT" },
  1734. { "name": "ffn_gate_exps", "type": "MUL_MAT_ID" },
  1735. { "name": "ffn_up_exps", "type": "MUL_MAT_ID" },
  1736. { "name": "ffn_down_exps", "type": "MUL_MAT_ID" }
  1737. ],
  1738. "output": [
  1739. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  1740. { "name": "output", "type": "MUL_MAT" }
  1741. ]
  1742. }
  1743. },
  1744. {
  1745. "name": "ernie4_5-moe",
  1746. "family": "decoder-moe",
  1747. "graph": {
  1748. "input": [
  1749. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  1750. ],
  1751. "blocks": [
  1752. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1753. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope" },
  1754. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1755. { "name": "ffn_gate_inp", "type": "MUL_MAT" },
  1756. { "name": "ffn_gate_exps", "type": "MUL_MAT_ID" },
  1757. { "name": "ffn_up_exps", "type": "MUL_MAT_ID" },
  1758. { "name": "ffn_down_exps", "type": "MUL_MAT_ID" },
  1759. { "name": "ffn_gate", "type": "MUL_MAT" },
  1760. { "name": "ffn_up", "type": "MUL_MAT" },
  1761. { "name": "ffn_down", "type": "MUL_MAT" }
  1762. ],
  1763. "output": [
  1764. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  1765. { "name": "output", "type": "MUL_MAT" }
  1766. ]
  1767. }
  1768. },
  1769. {
  1770. "name": "hunyuan-moe",
  1771. "family": "decoder-moe",
  1772. "graph": {
  1773. "input": [
  1774. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  1775. ],
  1776. "blocks": [
  1777. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1778. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope", "has_qk_norm": true },
  1779. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1780. { "name": "ffn_gate_inp", "type": "MUL_MAT" },
  1781. { "name": "ffn_gate_exps", "type": "MUL_MAT_ID" },
  1782. { "name": "ffn_up_exps", "type": "MUL_MAT_ID" },
  1783. { "name": "ffn_down_exps", "type": "MUL_MAT_ID" }
  1784. ],
  1785. "output": [
  1786. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  1787. { "name": "output", "type": "MUL_MAT" }
  1788. ]
  1789. }
  1790. },
  1791. {
  1792. "name": "bailingmoe",
  1793. "family": "decoder-moe",
  1794. "graph": {
  1795. "input": [
  1796. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  1797. ],
  1798. "blocks": [
  1799. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1800. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope" },
  1801. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1802. { "name": "ffn_gate_inp", "type": "MUL_MAT" },
  1803. { "name": "ffn_gate_exps", "type": "MUL_MAT_ID" },
  1804. { "name": "ffn_up_exps", "type": "MUL_MAT_ID" },
  1805. { "name": "ffn_down_exps", "type": "MUL_MAT_ID" }
  1806. ],
  1807. "output": [
  1808. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  1809. { "name": "output", "type": "MUL_MAT" }
  1810. ]
  1811. }
  1812. },
  1813. {
  1814. "name": "bailingmoe2",
  1815. "family": "decoder-moe",
  1816. "graph": {
  1817. "input": [
  1818. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  1819. ],
  1820. "blocks": [
  1821. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1822. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_qkv", "attn_output"], "position_encoding": "rope", "has_qk_norm": true },
  1823. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1824. { "name": "ffn_gate_inp", "type": "MUL_MAT" },
  1825. { "name": "ffn_gate_exps", "type": "MUL_MAT_ID" },
  1826. { "name": "ffn_up_exps", "type": "MUL_MAT_ID" },
  1827. { "name": "ffn_down_exps", "type": "MUL_MAT_ID" }
  1828. ],
  1829. "output": [
  1830. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  1831. { "name": "output", "type": "MUL_MAT" }
  1832. ]
  1833. }
  1834. },
  1835. {
  1836. "name": "afmoe",
  1837. "family": "decoder-moe",
  1838. "graph": {
  1839. "input": [
  1840. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  1841. ],
  1842. "blocks": [
  1843. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1844. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope", "has_qk_norm": true, "has_gate": true },
  1845. { "name": "attn_post_norm", "type": "RMS_NORM", "category": "Normalization" },
  1846. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1847. { "name": "ffn_gate_inp", "type": "MUL_MAT" },
  1848. { "name": "ffn_gate_exps", "type": "MUL_MAT_ID" },
  1849. { "name": "ffn_up_exps", "type": "MUL_MAT_ID" },
  1850. { "name": "ffn_down_exps", "type": "MUL_MAT_ID" },
  1851. { "name": "ffn_post_norm", "type": "RMS_NORM", "category": "Normalization" }
  1852. ],
  1853. "output": [
  1854. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  1855. { "name": "output", "type": "MUL_MAT" }
  1856. ]
  1857. }
  1858. },
  1859. {
  1860. "name": "llada-moe",
  1861. "family": "decoder-moe",
  1862. "graph": {
  1863. "input": [
  1864. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  1865. ],
  1866. "blocks": [
  1867. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1868. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope", "has_qk_norm": true },
  1869. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1870. { "name": "ffn_gate_inp", "type": "MUL_MAT" },
  1871. { "name": "ffn_gate_exps", "type": "MUL_MAT_ID" },
  1872. { "name": "ffn_up_exps", "type": "MUL_MAT_ID" },
  1873. { "name": "ffn_down_exps", "type": "MUL_MAT_ID" }
  1874. ],
  1875. "output": [
  1876. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  1877. { "name": "output", "type": "MUL_MAT" }
  1878. ]
  1879. }
  1880. },
  1881. {
  1882. "name": "grovemoe",
  1883. "family": "decoder-moe",
  1884. "graph": {
  1885. "input": [
  1886. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  1887. ],
  1888. "blocks": [
  1889. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1890. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope", "has_qk_norm": true },
  1891. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1892. { "name": "ffn_gate_inp", "type": "MUL_MAT" },
  1893. { "name": "ffn_gate_exps", "type": "MUL_MAT_ID" },
  1894. { "name": "ffn_up_exps", "type": "MUL_MAT_ID" },
  1895. { "name": "ffn_down_exps", "type": "MUL_MAT_ID" }
  1896. ],
  1897. "output": [
  1898. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  1899. { "name": "output", "type": "MUL_MAT" }
  1900. ]
  1901. }
  1902. },
  1903. {
  1904. "name": "deepseek2",
  1905. "family": "mla",
  1906. "graph": {
  1907. "input": [
  1908. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  1909. ],
  1910. "blocks": [
  1911. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1912. { "name": "attention", "type": "MULTI_LATENT_ATTENTION", "category": "Attention", "tensors": ["attn_q_a", "attn_q_b", "attn_kv_a_mqa", "attn_kv_b", "attn_output"], "position_encoding": "rope" },
  1913. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1914. { "name": "ffn_gate_inp", "type": "MUL_MAT" },
  1915. { "name": "ffn_gate_exps", "type": "MUL_MAT_ID" },
  1916. { "name": "ffn_up_exps", "type": "MUL_MAT_ID" },
  1917. { "name": "ffn_down_exps", "type": "MUL_MAT_ID" }
  1918. ],
  1919. "output": [
  1920. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  1921. { "name": "output", "type": "MUL_MAT" }
  1922. ]
  1923. }
  1924. },
  1925. {
  1926. "name": "minicpm3",
  1927. "family": "mla",
  1928. "graph": {
  1929. "input": [
  1930. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  1931. ],
  1932. "blocks": [
  1933. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1934. { "name": "attention", "type": "MULTI_LATENT_ATTENTION", "category": "Attention", "tensors": ["attn_q_a", "attn_q_b", "attn_kv_a_mqa", "attn_kv_b", "attn_output"], "position_encoding": "rope" },
  1935. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1936. { "name": "ffn_gate", "type": "MUL_MAT" },
  1937. { "name": "ffn_up", "type": "MUL_MAT" },
  1938. { "name": "ffn_down", "type": "MUL_MAT" }
  1939. ],
  1940. "output": [
  1941. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  1942. { "name": "output", "type": "MUL_MAT" }
  1943. ]
  1944. }
  1945. },
  1946. {
  1947. "name": "plm",
  1948. "family": "mla",
  1949. "graph": {
  1950. "input": [
  1951. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  1952. ],
  1953. "blocks": [
  1954. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1955. { "name": "attention", "type": "MULTI_LATENT_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_kv_a_mqa", "attn_kv_b", "attn_output"], "position_encoding": "rope" },
  1956. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1957. { "name": "ffn_up", "type": "MUL_MAT" },
  1958. { "name": "ffn_down", "type": "MUL_MAT" }
  1959. ],
  1960. "output": [
  1961. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  1962. { "name": "output", "type": "MUL_MAT" }
  1963. ]
  1964. }
  1965. },
  1966. {
  1967. "name": "mamba",
  1968. "family": "ssm",
  1969. "graph": {
  1970. "input": [
  1971. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  1972. ],
  1973. "blocks": [
  1974. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1975. { "name": "ssm", "type": "MAMBA", "tensors": ["ssm_in", "ssm_conv1d", "ssm_x", "ssm_dt", "ssm_a", "ssm_d", "ssm_out"] }
  1976. ],
  1977. "output": [
  1978. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  1979. { "name": "output", "type": "MUL_MAT" }
  1980. ]
  1981. }
  1982. },
  1983. {
  1984. "name": "mamba2",
  1985. "family": "ssm",
  1986. "graph": {
  1987. "input": [
  1988. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  1989. ],
  1990. "blocks": [
  1991. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  1992. { "name": "ssm", "type": "MAMBA2", "tensors": ["ssm_in", "ssm_conv1d", "ssm_dt", "ssm_a", "ssm_d", "ssm_norm", "ssm_out"] }
  1993. ],
  1994. "output": [
  1995. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  1996. { "name": "output", "type": "MUL_MAT" }
  1997. ]
  1998. }
  1999. },
  2000. {
  2001. "name": "rwkv6",
  2002. "family": "rwkv",
  2003. "graph": {
  2004. "input": [
  2005. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" },
  2006. { "name": "token_embd_norm", "type": "LAYER_NORM", "category": "Normalization" }
  2007. ],
  2008. "blocks": [
  2009. { "name": "attn_norm", "type": "LAYER_NORM", "category": "Normalization" },
  2010. { "name": "time_mix", "type": "RWKV", "category": "Layer", "tensors": ["time_mix_key", "time_mix_value", "time_mix_receptance", "time_mix_gate", "time_mix_first", "time_mix_decay", "time_mix_lerp_x", "time_mix_lerp_k", "time_mix_lerp_v", "time_mix_lerp_r", "time_mix_lerp_g", "time_mix_lerp_w", "time_mix_w1", "time_mix_w2", "time_mix_decay_w1", "time_mix_decay_w2", "time_mix_ln", "time_mix_output"] },
  2011. { "name": "ffn_norm", "type": "LAYER_NORM", "category": "Normalization" },
  2012. { "name": "channel_mix", "type": "RWKV", "category": "Layer", "tensors": ["channel_mix_key", "channel_mix_receptance", "channel_mix_value", "channel_mix_lerp_k", "channel_mix_lerp_r"] }
  2013. ],
  2014. "output": [
  2015. { "name": "output_norm", "type": "LAYER_NORM", "category": "Normalization" },
  2016. { "name": "output", "type": "MUL_MAT" }
  2017. ]
  2018. }
  2019. },
  2020. {
  2021. "name": "rwkv6qwen2",
  2022. "family": "rwkv",
  2023. "graph": {
  2024. "input": [
  2025. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  2026. ],
  2027. "blocks": [
  2028. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  2029. { "name": "time_mix", "type": "RWKV", "category": "Layer", "tensors": ["time_mix_key", "time_mix_value", "time_mix_receptance", "time_mix_gate", "time_mix_first", "time_mix_decay", "time_mix_lerp_x", "time_mix_lerp_k", "time_mix_lerp_v", "time_mix_lerp_r", "time_mix_lerp_g", "time_mix_lerp_w", "time_mix_w1", "time_mix_w2", "time_mix_decay_w1", "time_mix_decay_w2", "time_mix_ln", "time_mix_output"] },
  2030. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  2031. { "name": "ffn_gate", "type": "MUL_MAT" },
  2032. { "name": "ffn_up", "type": "MUL_MAT" },
  2033. { "name": "ffn_down", "type": "MUL_MAT" }
  2034. ],
  2035. "output": [
  2036. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  2037. { "name": "output", "type": "MUL_MAT" }
  2038. ]
  2039. }
  2040. },
  2041. {
  2042. "name": "rwkv7",
  2043. "family": "rwkv",
  2044. "graph": {
  2045. "input": [
  2046. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" },
  2047. { "name": "token_embd_norm", "type": "LAYER_NORM", "category": "Normalization" }
  2048. ],
  2049. "blocks": [
  2050. { "name": "attn_norm", "type": "LAYER_NORM", "category": "Normalization" },
  2051. { "name": "time_mix", "type": "RWKV", "category": "Layer", "tensors": ["time_mix_key", "time_mix_value", "time_mix_receptance", "time_mix_lerp_fused", "time_mix_w0", "time_mix_w1", "time_mix_w2", "time_mix_a0", "time_mix_a1", "time_mix_a2", "time_mix_v0", "time_mix_v1", "time_mix_v2", "time_mix_g1", "time_mix_g2", "time_mix_k_k", "time_mix_k_a", "time_mix_r_k", "time_mix_ln", "time_mix_output"] },
  2052. { "name": "ffn_norm", "type": "LAYER_NORM", "category": "Normalization" },
  2053. { "name": "channel_mix", "type": "RWKV", "category": "Layer", "tensors": ["channel_mix_key", "channel_mix_value", "channel_mix_lerp_k"] }
  2054. ],
  2055. "output": [
  2056. { "name": "output_norm", "type": "LAYER_NORM", "category": "Normalization" },
  2057. { "name": "output", "type": "MUL_MAT" }
  2058. ]
  2059. }
  2060. },
  2061. {
  2062. "name": "arwkv7",
  2063. "family": "rwkv",
  2064. "graph": {
  2065. "input": [
  2066. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" },
  2067. { "name": "token_embd_norm", "type": "LAYER_NORM", "category": "Normalization" }
  2068. ],
  2069. "blocks": [
  2070. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  2071. { "name": "time_mix", "type": "RWKV", "category": "Layer", "tensors": ["time_mix_key", "time_mix_value", "time_mix_receptance", "time_mix_lerp_fused", "time_mix_w0", "time_mix_w1", "time_mix_w2", "time_mix_a0", "time_mix_a1", "time_mix_a2", "time_mix_v0", "time_mix_v1", "time_mix_v2", "time_mix_g1", "time_mix_g2", "time_mix_k_k", "time_mix_k_a", "time_mix_r_k", "time_mix_ln", "time_mix_output"] },
  2072. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  2073. { "name": "ffn_gate", "type": "MUL_MAT" },
  2074. { "name": "ffn_up", "type": "MUL_MAT" },
  2075. { "name": "ffn_down", "type": "MUL_MAT" }
  2076. ],
  2077. "output": [
  2078. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  2079. { "name": "output", "type": "MUL_MAT" }
  2080. ]
  2081. }
  2082. },
  2083. {
  2084. "name": "jamba",
  2085. "family": "hybrid",
  2086. "graph": {
  2087. "input": [
  2088. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  2089. ],
  2090. "blocks": [
  2091. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  2092. {
  2093. "name": "attention_or_ssm",
  2094. "type": "HYBRID",
  2095. "category": "Layer",
  2096. "attention": { "type": "attention", "subtype": "mha", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "none" },
  2097. "ssm": { "type": "ssm", "subtype": "mamba", "tensors": ["ssm_in", "ssm_conv1d", "ssm_x", "ssm_dt", "ssm_a", "ssm_d", "ssm_out"] }
  2098. },
  2099. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  2100. {
  2101. "name": "ffn_or_moe",
  2102. "type": "HYBRID_FFN",
  2103. "category": "Layer",
  2104. "dense": { "type": "ffn", "subtype": "swiglu", "tensors": ["ffn_gate", "ffn_up", "ffn_down"] },
  2105. "moe": { "type": "moe", "subtype": "swiglu", "tensors": ["ffn_gate_inp", "ffn_gate_exps", "ffn_up_exps", "ffn_down_exps"] }
  2106. }
  2107. ],
  2108. "output": [
  2109. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  2110. { "name": "output", "type": "MUL_MAT" }
  2111. ]
  2112. }
  2113. },
  2114. {
  2115. "name": "falcon-h1",
  2116. "family": "hybrid",
  2117. "graph": {
  2118. "input": [
  2119. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  2120. ],
  2121. "blocks": [
  2122. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  2123. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope" },
  2124. { "name": "ssm", "type": "MAMBA2", "tensors": ["ssm_in", "ssm_conv1d", "ssm_dt", "ssm_a", "ssm_d", "ssm_norm", "ssm_out"], "parallel": true },
  2125. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  2126. { "name": "ffn_gate", "type": "MUL_MAT" },
  2127. { "name": "ffn_up", "type": "MUL_MAT" },
  2128. { "name": "ffn_down", "type": "MUL_MAT" }
  2129. ],
  2130. "output": [
  2131. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  2132. { "name": "output", "type": "MUL_MAT" }
  2133. ]
  2134. }
  2135. },
  2136. {
  2137. "name": "granitehybrid",
  2138. "family": "hybrid",
  2139. "graph": {
  2140. "input": [
  2141. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  2142. ],
  2143. "blocks": [
  2144. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  2145. {
  2146. "name": "attention_or_ssm",
  2147. "type": "HYBRID",
  2148. "category": "Layer",
  2149. "attention": { "type": "attention", "subtype": "mha", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope" },
  2150. "ssm": { "type": "ssm", "subtype": "mamba2", "tensors": ["ssm_in", "ssm_conv1d", "ssm_dt", "ssm_a", "ssm_d", "ssm_norm", "ssm_out"] }
  2151. },
  2152. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  2153. {
  2154. "name": "ffn_or_moe",
  2155. "type": "HYBRID_FFN",
  2156. "category": "Layer",
  2157. "dense": { "type": "ffn", "subtype": "swiglu", "tensors": ["ffn_gate", "ffn_up", "ffn_down"] },
  2158. "moe": { "type": "moe", "subtype": "swiglu", "tensors": ["ffn_gate_inp", "ffn_gate_exps", "ffn_up_exps", "ffn_down_exps"], "has_shared_expert": true }
  2159. }
  2160. ],
  2161. "output": [
  2162. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  2163. { "name": "output", "type": "MUL_MAT" }
  2164. ]
  2165. }
  2166. },
  2167. {
  2168. "name": "nemotron_h",
  2169. "family": "hybrid",
  2170. "graph": {
  2171. "input": [
  2172. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  2173. ],
  2174. "blocks": [
  2175. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  2176. {
  2177. "name": "attention_or_ssm",
  2178. "type": "HYBRID",
  2179. "category": "Layer",
  2180. "attention": { "type": "attention", "subtype": "mha", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope" },
  2181. "ssm": { "type": "ssm", "subtype": "mamba2", "tensors": ["ssm_in", "ssm_conv1d", "ssm_dt", "ssm_a", "ssm_d", "ssm_norm", "ssm_out"] }
  2182. },
  2183. { "name": "ffn_up", "type": "MUL_MAT" },
  2184. { "name": "ffn_down", "type": "MUL_MAT" }
  2185. ],
  2186. "output": [
  2187. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  2188. { "name": "output", "type": "MUL_MAT" }
  2189. ]
  2190. }
  2191. },
  2192. {
  2193. "name": "nemotron_h_moe",
  2194. "family": "hybrid",
  2195. "graph": {
  2196. "input": [
  2197. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  2198. ],
  2199. "blocks": [
  2200. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  2201. {
  2202. "name": "attention_or_ssm",
  2203. "type": "HYBRID",
  2204. "category": "Layer",
  2205. "attention": { "type": "attention", "subtype": "mha", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope" },
  2206. "ssm": { "type": "ssm", "subtype": "mamba2", "tensors": ["ssm_in", "ssm_conv1d", "ssm_dt", "ssm_a", "ssm_d", "ssm_norm", "ssm_out"] }
  2207. },
  2208. {
  2209. "name": "ffn_or_moe",
  2210. "type": "HYBRID_FFN",
  2211. "category": "Layer",
  2212. "dense": { "type": "ffn", "subtype": "gelu", "tensors": ["ffn_up", "ffn_down"] },
  2213. "moe": { "type": "moe", "subtype": "gelu", "tensors": ["ffn_gate_inp", "ffn_up_exps", "ffn_down_exps"], "has_shared_expert": true }
  2214. }
  2215. ],
  2216. "output": [
  2217. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  2218. { "name": "output", "type": "MUL_MAT" }
  2219. ]
  2220. }
  2221. },
  2222. {
  2223. "name": "plamo2",
  2224. "family": "hybrid",
  2225. "graph": {
  2226. "input": [
  2227. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  2228. ],
  2229. "blocks": [
  2230. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  2231. {
  2232. "name": "attention_or_ssm",
  2233. "type": "HYBRID",
  2234. "category": "Layer",
  2235. "attention": { "type": "attention", "subtype": "mha", "tensors": ["attn_q", "attn_k", "attn_output"], "position_encoding": "rope", "has_qk_norm": true },
  2236. "ssm": { "type": "ssm", "subtype": "mamba2", "tensors": ["ssm_in", "ssm_conv1d", "ssm_x", "ssm_dt", "ssm_a", "ssm_d", "ssm_out"] }
  2237. },
  2238. { "name": "attn_post_norm", "type": "RMS_NORM", "category": "Normalization" },
  2239. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  2240. { "name": "ffn_gate", "type": "MUL_MAT" },
  2241. { "name": "ffn_up", "type": "MUL_MAT" },
  2242. { "name": "ffn_down", "type": "MUL_MAT" },
  2243. { "name": "ffn_post_norm", "type": "RMS_NORM", "category": "Normalization" }
  2244. ],
  2245. "output": [
  2246. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  2247. { "name": "output", "type": "MUL_MAT" }
  2248. ]
  2249. }
  2250. },
  2251. {
  2252. "name": "bert",
  2253. "family": "encoder",
  2254. "graph": {
  2255. "input": [
  2256. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" },
  2257. { "name": "token_embd_norm", "type": "LAYER_NORM", "category": "Normalization" },
  2258. { "name": "token_types", "type": "EMBEDDING", "category": "Transform" },
  2259. { "name": "position_embd", "type": "EMBEDDING", "category": "Transform" }
  2260. ],
  2261. "blocks": [
  2262. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_qkv", "attn_output"], "position_encoding": "learned", "has_bias": true },
  2263. { "name": "attn_output_norm", "type": "LAYER_NORM", "category": "Normalization" },
  2264. { "name": "ffn_up", "type": "MUL_MAT" },
  2265. { "name": "ffn_down", "type": "MUL_MAT" },
  2266. { "name": "layer_output_norm", "type": "LAYER_NORM", "category": "Normalization" }
  2267. ],
  2268. "output": [
  2269. { "name": "output_norm", "type": "LAYER_NORM", "category": "Normalization" }
  2270. ]
  2271. }
  2272. },
  2273. {
  2274. "name": "nomic-bert",
  2275. "family": "encoder",
  2276. "graph": {
  2277. "input": [
  2278. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" },
  2279. { "name": "token_embd_norm", "type": "LAYER_NORM", "category": "Normalization" },
  2280. { "name": "token_types", "type": "EMBEDDING", "category": "Transform" }
  2281. ],
  2282. "blocks": [
  2283. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_qkv", "attn_output"], "position_encoding": "rope" },
  2284. { "name": "attn_output_norm", "type": "LAYER_NORM", "category": "Normalization" },
  2285. { "name": "ffn_gate", "type": "MUL_MAT" },
  2286. { "name": "ffn_up", "type": "MUL_MAT" },
  2287. { "name": "ffn_down", "type": "MUL_MAT" },
  2288. { "name": "layer_output_norm", "type": "LAYER_NORM", "category": "Normalization" }
  2289. ],
  2290. "output": [
  2291. { "name": "output_norm", "type": "LAYER_NORM", "category": "Normalization" }
  2292. ]
  2293. }
  2294. },
  2295. {
  2296. "name": "nomic-bert-moe",
  2297. "family": "encoder",
  2298. "graph": {
  2299. "input": [
  2300. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" },
  2301. { "name": "token_embd_norm", "type": "LAYER_NORM", "category": "Normalization" },
  2302. { "name": "token_types", "type": "EMBEDDING", "category": "Transform" }
  2303. ],
  2304. "blocks": [
  2305. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_qkv", "attn_output"], "position_encoding": "rope" },
  2306. { "name": "attn_output_norm", "type": "LAYER_NORM", "category": "Normalization" },
  2307. { "name": "ffn_gate_inp", "type": "MUL_MAT" },
  2308. { "name": "ffn_up_exps", "type": "MUL_MAT_ID" },
  2309. { "name": "ffn_down_exps", "type": "MUL_MAT_ID" },
  2310. { "name": "layer_output_norm", "type": "LAYER_NORM", "category": "Normalization" }
  2311. ],
  2312. "output": [
  2313. { "name": "output_norm", "type": "LAYER_NORM", "category": "Normalization" }
  2314. ]
  2315. }
  2316. },
  2317. {
  2318. "name": "jina-bert-v2",
  2319. "family": "encoder",
  2320. "graph": {
  2321. "input": [
  2322. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" },
  2323. { "name": "token_embd_norm", "type": "LAYER_NORM", "category": "Normalization" },
  2324. { "name": "token_types", "type": "EMBEDDING", "category": "Transform" }
  2325. ],
  2326. "blocks": [
  2327. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "alibi", "has_qk_norm": true },
  2328. { "name": "attn_output_norm", "type": "LAYER_NORM", "category": "Normalization" },
  2329. { "name": "ffn_gate", "type": "MUL_MAT" },
  2330. { "name": "ffn_up", "type": "MUL_MAT" },
  2331. { "name": "ffn_down", "type": "MUL_MAT" },
  2332. { "name": "layer_output_norm", "type": "LAYER_NORM", "category": "Normalization" }
  2333. ],
  2334. "output": []
  2335. }
  2336. },
  2337. {
  2338. "name": "jina-bert-v3",
  2339. "family": "encoder",
  2340. "graph": {
  2341. "input": [
  2342. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" },
  2343. { "name": "token_embd_norm", "type": "LAYER_NORM", "category": "Normalization" },
  2344. { "name": "token_types", "type": "EMBEDDING", "category": "Transform" }
  2345. ],
  2346. "blocks": [
  2347. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_qkv", "attn_output"], "position_encoding": "learned", "has_bias": true },
  2348. { "name": "attn_output_norm", "type": "LAYER_NORM", "category": "Normalization" },
  2349. { "name": "ffn_up", "type": "MUL_MAT" },
  2350. { "name": "ffn_down", "type": "MUL_MAT" },
  2351. { "name": "layer_output_norm", "type": "LAYER_NORM", "category": "Normalization" }
  2352. ],
  2353. "output": [
  2354. { "name": "output_norm", "type": "LAYER_NORM", "category": "Normalization" }
  2355. ]
  2356. }
  2357. },
  2358. {
  2359. "name": "modern-bert",
  2360. "family": "encoder",
  2361. "graph": {
  2362. "input": [
  2363. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" },
  2364. { "name": "token_embd_norm", "type": "LAYER_NORM", "category": "Normalization" }
  2365. ],
  2366. "blocks": [
  2367. { "name": "attn_norm", "type": "LAYER_NORM", "category": "Normalization" },
  2368. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_qkv", "attn_output"], "position_encoding": "rope" },
  2369. { "name": "ffn_norm", "type": "LAYER_NORM", "category": "Normalization" },
  2370. { "name": "ffn_up", "type": "MUL_MAT" },
  2371. { "name": "ffn_down", "type": "MUL_MAT" }
  2372. ],
  2373. "output": [
  2374. { "name": "output_norm", "type": "LAYER_NORM", "category": "Normalization" }
  2375. ]
  2376. }
  2377. },
  2378. {
  2379. "name": "neo-bert",
  2380. "family": "encoder",
  2381. "graph": {
  2382. "input": [
  2383. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  2384. ],
  2385. "blocks": [
  2386. { "name": "attn_norm", "type": "LAYER_NORM", "category": "Normalization" },
  2387. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_qkv", "attn_output"], "position_encoding": "rope" },
  2388. { "name": "ffn_norm", "type": "LAYER_NORM", "category": "Normalization" },
  2389. { "name": "ffn_up", "type": "MUL_MAT" },
  2390. { "name": "ffn_down", "type": "MUL_MAT" }
  2391. ],
  2392. "output": [
  2393. { "name": "output_norm", "type": "LAYER_NORM", "category": "Normalization" }
  2394. ]
  2395. }
  2396. },
  2397. {
  2398. "name": "paddleocr",
  2399. "family": "encoder",
  2400. "graph": {
  2401. "input": [
  2402. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  2403. ],
  2404. "blocks": [
  2405. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  2406. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope" },
  2407. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  2408. { "name": "ffn_gate", "type": "MUL_MAT" },
  2409. { "name": "ffn_up", "type": "MUL_MAT" },
  2410. { "name": "ffn_down", "type": "MUL_MAT" }
  2411. ],
  2412. "output": [
  2413. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  2414. { "name": "output", "type": "MUL_MAT" }
  2415. ]
  2416. }
  2417. },
  2418. {
  2419. "name": "t5",
  2420. "family": "encoder-decoder",
  2421. "graph": {
  2422. "encoder": {
  2423. "input": [
  2424. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  2425. ],
  2426. "blocks": [
  2427. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  2428. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["enc.attn_q", "enc.attn_k", "enc.attn_v", "enc.attn_o"], "position_encoding": "relative_bias" },
  2429. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  2430. { "name": "ffn_up", "type": "MUL_MAT" },
  2431. { "name": "ffn_down", "type": "MUL_MAT" }
  2432. ],
  2433. "output": [
  2434. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" }
  2435. ]
  2436. },
  2437. "decoder": {
  2438. "input": [
  2439. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  2440. ],
  2441. "blocks": [
  2442. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  2443. { "name": "self_attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["dec.attn_q", "dec.attn_k", "dec.attn_v", "dec.attn_o"], "position_encoding": "relative_bias" },
  2444. { "name": "cross_attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  2445. { "name": "cross_attention", "type": "CROSS_ATTENTION", "category": "Attention", "tensors": ["dec.cross_attn_q", "dec.cross_attn_k", "dec.cross_attn_v", "dec.cross_attn_o"] },
  2446. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  2447. { "name": "ffn_up", "type": "MUL_MAT" },
  2448. { "name": "ffn_down", "type": "MUL_MAT" }
  2449. ],
  2450. "output": [
  2451. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  2452. { "name": "output", "type": "MUL_MAT" }
  2453. ]
  2454. }
  2455. }
  2456. },
  2457. {
  2458. "name": "t5encoder",
  2459. "family": "encoder",
  2460. "graph": {
  2461. "input": [
  2462. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  2463. ],
  2464. "blocks": [
  2465. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  2466. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["enc.attn_q", "enc.attn_k", "enc.attn_v", "enc.attn_o"], "position_encoding": "relative_bias" },
  2467. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  2468. { "name": "ffn_up", "type": "MUL_MAT" },
  2469. { "name": "ffn_down", "type": "MUL_MAT" }
  2470. ],
  2471. "output": [
  2472. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  2473. { "name": "output", "type": "MUL_MAT" }
  2474. ]
  2475. }
  2476. },
  2477. {
  2478. "name": "qwen3next",
  2479. "family": "delta-hybrid",
  2480. "graph": {
  2481. "input": [
  2482. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  2483. ],
  2484. "blocks": [
  2485. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  2486. {
  2487. "name": "attention_or_delta",
  2488. "type": "HYBRID",
  2489. "category": "Layer",
  2490. "attention": { "type": "attention", "subtype": "mha", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope", "has_qk_norm": true },
  2491. "delta_net": { "type": "delta_net", "tensors": ["ssm_in", "ssm_conv1d", "ssm_dt", "ssm_a", "ssm_norm", "ssm_ba", "ssm_out"] }
  2492. },
  2493. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  2494. { "name": "ffn_gate_inp", "type": "MUL_MAT" },
  2495. { "name": "ffn_gate_exps", "type": "MUL_MAT_ID" },
  2496. { "name": "ffn_up_exps", "type": "MUL_MAT_ID" },
  2497. { "name": "ffn_down_exps", "type": "MUL_MAT_ID" }
  2498. ],
  2499. "output": [
  2500. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  2501. { "name": "output", "type": "MUL_MAT" }
  2502. ]
  2503. }
  2504. },
  2505. {
  2506. "name": "qwen35",
  2507. "family": "delta-hybrid",
  2508. "graph": {
  2509. "input": [
  2510. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  2511. ],
  2512. "blocks": [
  2513. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  2514. {
  2515. "name": "attention_or_delta",
  2516. "type": "HYBRID",
  2517. "category": "Layer",
  2518. "attention": { "type": "attention", "subtype": "mha", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope", "has_qk_norm": true },
  2519. "delta_net": { "type": "delta_net", "tensors": ["ssm_a", "ssm_conv1d", "ssm_dt", "ssm_norm", "ssm_beta", "ssm_alpha", "ssm_out"] }
  2520. },
  2521. { "name": "attn_post_norm", "type": "RMS_NORM", "category": "Normalization" },
  2522. { "name": "ffn_gate", "type": "MUL_MAT" },
  2523. { "name": "ffn_up", "type": "MUL_MAT" },
  2524. { "name": "ffn_down", "type": "MUL_MAT" }
  2525. ],
  2526. "output": [
  2527. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  2528. { "name": "output", "type": "MUL_MAT" }
  2529. ]
  2530. }
  2531. },
  2532. {
  2533. "name": "qwen35moe",
  2534. "family": "delta-hybrid",
  2535. "graph": {
  2536. "input": [
  2537. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  2538. ],
  2539. "blocks": [
  2540. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  2541. {
  2542. "name": "attention_or_delta",
  2543. "type": "HYBRID",
  2544. "category": "Layer",
  2545. "attention": { "type": "attention", "subtype": "mha", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope", "has_qk_norm": true },
  2546. "delta_net": { "type": "delta_net", "tensors": ["ssm_a", "ssm_conv1d", "ssm_dt", "ssm_norm", "ssm_beta", "ssm_alpha", "ssm_out"] }
  2547. },
  2548. { "name": "attn_post_norm", "type": "RMS_NORM", "category": "Normalization" },
  2549. { "name": "ffn_gate_inp", "type": "MUL_MAT" },
  2550. { "name": "ffn_gate_exps", "type": "MUL_MAT_ID" },
  2551. { "name": "ffn_up_exps", "type": "MUL_MAT_ID" },
  2552. { "name": "ffn_down_exps", "type": "MUL_MAT_ID" }
  2553. ],
  2554. "output": [
  2555. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  2556. { "name": "output", "type": "MUL_MAT" }
  2557. ]
  2558. }
  2559. },
  2560. {
  2561. "name": "kimi-linear",
  2562. "family": "delta-hybrid",
  2563. "graph": {
  2564. "input": [
  2565. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  2566. ],
  2567. "blocks": [
  2568. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  2569. {
  2570. "name": "attention_or_delta",
  2571. "type": "HYBRID",
  2572. "category": "Layer",
  2573. "attention": { "type": "attention", "subtype": "mla", "tensors": ["attn_q_a", "attn_q_b", "attn_kv_a_mqa", "attn_kv_b", "attn_output"], "position_encoding": "rope" },
  2574. "delta_net": { "type": "delta_net", "subtype": "kda", "tensors": ["ssm_conv1d_q", "ssm_conv1d_k", "ssm_conv1d_v", "ssm_f_a", "ssm_f_b", "ssm_beta", "ssm_a", "ssm_g_a", "ssm_g_b", "ssm_dt", "ssm_norm"] }
  2575. },
  2576. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  2577. { "name": "ffn_gate_inp", "type": "MUL_MAT" },
  2578. { "name": "ffn_gate_exps", "type": "MUL_MAT_ID" },
  2579. { "name": "ffn_up_exps", "type": "MUL_MAT_ID" },
  2580. { "name": "ffn_down_exps", "type": "MUL_MAT_ID" }
  2581. ],
  2582. "output": [
  2583. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  2584. { "name": "output", "type": "MUL_MAT" }
  2585. ]
  2586. }
  2587. },
  2588. {
  2589. "name": "bitnet",
  2590. "family": "bitnet",
  2591. "graph": {
  2592. "input": [
  2593. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  2594. ],
  2595. "blocks": [
  2596. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  2597. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_q", "attn_k", "attn_v", "attn_output"], "position_encoding": "rope", "has_sub_norm": true },
  2598. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  2599. { "name": "ffn_gate", "type": "MUL_MAT" },
  2600. { "name": "ffn_up", "type": "MUL_MAT" },
  2601. { "name": "ffn_down", "type": "MUL_MAT" }
  2602. ],
  2603. "output": [
  2604. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" }
  2605. ]
  2606. }
  2607. },
  2608. {
  2609. "name": "clip",
  2610. "family": "vision-encoder",
  2611. "graph": {
  2612. "input": [
  2613. { "name": "v.patch_embd", "type": "EMBEDDING", "category": "Transform" },
  2614. { "name": "v.position_embd", "type": "EMBEDDING", "category": "Transform" },
  2615. { "name": "v.class_embd", "type": "EMBEDDING", "category": "Transform" }
  2616. ],
  2617. "blocks": [
  2618. { "name": "input_norm", "type": "LAYER_NORM", "category": "Normalization" },
  2619. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["v.attn_qkv", "v.attn_out"], "position_encoding": "learned" },
  2620. { "name": "post_attn_norm", "type": "LAYER_NORM", "category": "Normalization" },
  2621. { "name": "ffn_up", "type": "MUL_MAT" },
  2622. { "name": "ffn_down", "type": "MUL_MAT" }
  2623. ],
  2624. "output": [
  2625. { "name": "v.post_ln", "type": "LAYER_NORM", "category": "Normalization" }
  2626. ]
  2627. }
  2628. },
  2629. {
  2630. "name": "wavtokenizer-dec",
  2631. "family": "audio-decoder",
  2632. "graph": {
  2633. "input": [
  2634. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" },
  2635. { "name": "token_embd_norm", "type": "LAYER_NORM", "category": "Normalization" },
  2636. { "name": "conv1d", "type": "CONV_1D", "category": "Layer" }
  2637. ],
  2638. "blocks": [
  2639. { "name": "convnext", "type": "CONV_NEXT", "category": "Layer", "tensors": ["convnext.dw", "convnext.norm", "convnext.pw1", "convnext.pw2", "convnext.gamma"] },
  2640. { "name": "posnet", "type": "POS_NET", "category": "Layer", "tensors": ["posnet.conv1", "posnet.conv2", "posnet.norm", "posnet.norm1", "posnet.norm2", "posnet.attn_norm", "posnet.attn_q", "posnet.attn_k", "posnet.attn_v", "posnet.attn_output"] }
  2641. ],
  2642. "output": [
  2643. { "name": "output_norm", "type": "LAYER_NORM", "category": "Normalization" },
  2644. { "name": "output", "type": "MUL_MAT" }
  2645. ]
  2646. }
  2647. },
  2648. {
  2649. "name": "qwen",
  2650. "family": "decoder",
  2651. "graph": {
  2652. "input": [
  2653. { "name": "token_embd", "type": "EMBEDDING", "category": "Transform" }
  2654. ],
  2655. "blocks": [
  2656. { "name": "attn_norm", "type": "RMS_NORM", "category": "Normalization" },
  2657. { "name": "attention", "type": "MULTI_HEAD_ATTENTION", "category": "Attention", "tensors": ["attn_qkv", "attn_output"], "position_encoding": "rope" },
  2658. { "name": "ffn_norm", "type": "RMS_NORM", "category": "Normalization" },
  2659. { "name": "ffn_gate", "type": "MUL_MAT" },
  2660. { "name": "ffn_up", "type": "MUL_MAT" },
  2661. { "name": "ffn_down", "type": "MUL_MAT" }
  2662. ],
  2663. "output": [
  2664. { "name": "output_norm", "type": "RMS_NORM", "category": "Normalization" },
  2665. { "name": "output", "type": "MUL_MAT" }
  2666. ]
  2667. }
  2668. }
  2669. ]