ParseTreeComparer.h 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #pragma once
  6. #ifdef EDIT_AND_CONTINUE
  7. namespace Js
  8. {
  9. class SyntaxEquivalenceBase;
  10. template <class Allocator> class SyntaxEquivalence;
  11. //-----------------------------------------------------------------------------
  12. // TreeComparer for ParseNode TreeMatch.
  13. //-----------------------------------------------------------------------------
  14. template <class SubClass, class Allocator>
  15. class ParseTreeComparer : public TreeComparerBase<SubClass, ParseNode>
  16. {
  17. public:
  18. using PNode = TreeComparerBase<SubClass, ParseNode>::PNode; // make the type explicit for /permissive-
  19. private:
  20. static const int TOKENLIST_MAXDIFF_SHIFT = 3; // Used to detect lists of significantly different lengths
  21. SyntaxEquivalence<Allocator> syntaxEquivalence;
  22. // 2 lists used in GetDistance. (Can mark isLeaf because they don't own the nodes.)
  23. typedef JsUtil::List<PNode, Allocator, /*isLeaf*/true> NodeList;
  24. NodeList leftList, rightList;
  25. public:
  26. ParseTreeComparer(Allocator* alloc) :
  27. syntaxEquivalence(alloc), leftList(alloc), rightList(alloc)
  28. {}
  29. ParseTreeComparer(const ParseTreeComparer& other) :
  30. syntaxEquivalence(other.GetAllocator()), leftList(other.GetAllocator()), rightList(other.GetAllocator())
  31. {}
  32. Allocator* GetAllocator() const
  33. {
  34. return leftList.GetAllocator();
  35. }
  36. int LabelCount() const
  37. {
  38. return ::OpCode::knopLim;
  39. }
  40. int GetLabel(PNode x) const
  41. {
  42. return x->nop;
  43. }
  44. PNode GetParent(PNode x) const
  45. {
  46. return x->parent;
  47. }
  48. template <class Func>
  49. void MapChildren(PNode x, const Func& func) const
  50. {
  51. Js::MapChildren(x, func);
  52. }
  53. // Map (sub)tree nodes to compute distance. Child class can re-implement to control which nodes participate in
  54. // distance computing.
  55. template <class Func>
  56. void MapTreeToComputeDistance(PNode x, const Func& func) const
  57. {
  58. pThis()->MapTree(x, func);
  59. }
  60. double GetDistance(PNode left, PNode right)
  61. {
  62. Assert(pThis()->GetLabel(left) == pThis()->GetLabel(right)); // Only called for nodes of same label
  63. return ComputeValueDistance(left, right);
  64. }
  65. bool ValuesEqual(PNode oldNode, PNode newNode)
  66. {
  67. // This determines if we emit Update edit for matched nodes. If ValuesEqual, don't need update edit.
  68. return !(syntaxEquivalence.IsToken(oldNode) || syntaxEquivalence.HasToken(oldNode))
  69. || syntaxEquivalence.AreEquivalent(oldNode, newNode);
  70. }
  71. private:
  72. double ComputeValueDistance(PNode left, PNode right)
  73. {
  74. // If 2 nodes are equivalent trees, consider them exact match.
  75. if (syntaxEquivalence.AreEquivalent(left, right))
  76. {
  77. return ExactMatchDistance;
  78. }
  79. double distance = ComputeDistance(left, right);
  80. // We don't want to return an exact match, because there
  81. // must be something different, since we got here
  82. return (distance == ExactMatchDistance) ? EpsilonDistance : distance;
  83. }
  84. //
  85. // Computer distance the same as Roslyn:
  86. // * For token nodes, use their string LCS distance.
  87. // * Otherwise, flatten the tree to get all tokens, use token list LCS distance.
  88. //
  89. // However, our parser are significantly different to Roslyn. Roslyn uses "full fidelity" parser,
  90. // keeping every token scanned from source. e.g., "var a = 1" -> "var","a","=","1". Our parser keeps
  91. // much less tokens. Thus our LCS distance will be quite different, which may affect diff accuracy.
  92. //
  93. double ComputeDistance(PNode left, PNode right)
  94. {
  95. // For token nodes, use their string LCS distance
  96. if (syntaxEquivalence.IsToken(left))
  97. {
  98. return ComputeTokenDistance(left, right);
  99. }
  100. // Otherwise, flatten the tree to get all tokens, use token list LCS distance
  101. Flatten(left, leftList);
  102. Flatten(right, rightList);
  103. // If token list lengths are significantly different, consider they are quite different.
  104. {
  105. int leftLen = leftList.Count();
  106. int rightLen = rightList.Count();
  107. int minLen = min(leftLen, rightLen);
  108. int maxLen = max(leftLen, rightLen);
  109. if (minLen < (maxLen >> TOKENLIST_MAXDIFF_SHIFT))
  110. {
  111. // Assuming minLen are all matched, distance > 0.875 (7/8). These two nodes shouldn't be a match.
  112. return 1.0 - (double)minLen / (double)maxLen;
  113. }
  114. }
  115. return ComputeLongestCommonSubsequenceDistance(GetAllocator(), leftList.Count(), rightList.Count(), [this](int indexA, int indexB)
  116. {
  117. return AreNodesTokenEquivalent(leftList.Item(indexA), rightList.Item(indexB));
  118. });
  119. }
  120. // Flatten IsToken/HasToken nodes in the (sub)tree into given list to compute distance.
  121. void Flatten(PNode root, NodeList& list)
  122. {
  123. list.Clear();
  124. pThis()->MapTreeToComputeDistance(root, [&](PNode child)
  125. {
  126. if (syntaxEquivalence.IsToken(child) || syntaxEquivalence.HasToken(child))
  127. {
  128. list.Add(child);
  129. }
  130. });
  131. }
  132. // Check if IsToken/HasToken nodes are equivalent
  133. bool AreNodesTokenEquivalent(PNode left, PNode right)
  134. {
  135. if (left->nop == right->nop)
  136. {
  137. return syntaxEquivalence.IsToken(left) ?
  138. syntaxEquivalence.AreTokensEquivalent(left, right) : syntaxEquivalence.HaveEquivalentTokens(left, right);
  139. }
  140. return false;
  141. }
  142. double ComputeTokenDistance(PNode left, PNode right) const
  143. {
  144. Assert(syntaxEquivalence.IsToken(left));
  145. switch (left->nop)
  146. {
  147. case knopName:
  148. return ComputeDistance(left->AsParseNodeName()->pid, right->AsParseNodeName()->pid);
  149. case knopStr:
  150. return ComputeDistance(left->AsParseNodeStr()->pid, right->AsParseNodeStr()->pid);
  151. case knopInt:
  152. return left->AsParseNodeInt()->lw == right->AsParseNodeInt()->lw ? ExactMatchDistance : 1.0;
  153. case knopFlt:
  154. return left->AsParseNodeFloat()->dbl == right->AsParseNodeFloat()->dbl ? ExactMatchDistance : 1.0;
  155. case knopRegExp: //TODO: AsParseNodeRegExp()->regexPattern
  156. break;
  157. }
  158. // Other token nodes with fixed strings, e.g. "true", "null", always match exactly
  159. return ExactMatchDistance;
  160. }
  161. // Compute distance of 2 PIDs as their string LCS distance
  162. double ComputeDistance(IdentPtr left, IdentPtr right) const
  163. {
  164. Allocator* alloc = leftList.GetAllocator();
  165. return ComputeLongestCommonSubsequenceDistance(alloc, left->Cch(), right->Cch(), [=](int indexA, int indexB)
  166. {
  167. return left->Psz()[indexA] == right->Psz()[indexB];
  168. });
  169. }
  170. };
  171. //-----------------------------------------------------------------------------
  172. // Function TreeComparer for TreeMatch at function level. View the parse tree as a hierarchy of functions.
  173. // Ignore statement details.
  174. //-----------------------------------------------------------------------------
  175. template <class Allocator>
  176. class FunctionTreeComparer : public ParseTreeComparer<FunctionTreeComparer<Allocator>, Allocator>
  177. {
  178. public:
  179. using PNode = ParseTreeComparer<FunctionTreeComparer<Allocator>, Allocator>::PNode;
  180. FunctionTreeComparer(Allocator* alloc) : ParseTreeComparer(alloc) {}
  181. FunctionTreeComparer(const FunctionTreeComparer& other) : ParseTreeComparer(other) {}
  182. // We only have 1 kind of node in this view -- FuncDecl
  183. int LabelCount() const { return 1; }
  184. int GetLabel(PNode x) const { return 0; }
  185. PNode GetParent(PNode x) const
  186. {
  187. while (true)
  188. {
  189. x = __super::GetParent(x);
  190. if (!x || x->nop == knopFncDecl || x->nop == knopProg)
  191. {
  192. break;
  193. }
  194. }
  195. return x;
  196. }
  197. template <class Func>
  198. void MapChildren(PNode x, const Func& func) const
  199. {
  200. __super::MapChildren(x, [&](PNode child)
  201. {
  202. if (child->nop == knopFncDecl)
  203. {
  204. func(child);
  205. }
  206. else
  207. {
  208. pThis()->MapChildren(child, func);
  209. }
  210. });
  211. }
  212. // To compute function node distance, only use their direct child nodes. Do not include descendant nodes
  213. // under nested child functions.
  214. template <class Func>
  215. void MapTreeToComputeDistance(PNode x, const Func& func) const
  216. {
  217. func(x);
  218. __super::MapChildren(x, [&](PNode child)
  219. {
  220. if (child->nop == knopFncDecl)
  221. {
  222. func(child); // For child func, output the node itself but don't map its descendants
  223. }
  224. else
  225. {
  226. pThis()->MapTreeToComputeDistance(child, func); // recursive into other nodes
  227. }
  228. });
  229. }
  230. };
  231. //-----------------------------------------------------------------------------
  232. // Full TreeComparer for TreeMatch full parse tree. Used for test only.
  233. //-----------------------------------------------------------------------------
  234. template <class Allocator>
  235. class FullTreeComparer : public ParseTreeComparer<FullTreeComparer<Allocator>, Allocator>
  236. {
  237. public:
  238. FullTreeComparer(Allocator* alloc) : ParseTreeComparer(alloc) {}
  239. FullTreeComparer(const FullTreeComparer& other) : ParseTreeComparer(other) {}
  240. };
  241. //-----------------------------------------------------------------------------
  242. // Visit every node of a parse (sub)tree in preorder. Delegates to Preorder/Postorder of PreorderContext.
  243. //-----------------------------------------------------------------------------
  244. template <class PreorderContext>
  245. void ParseTreePreorder(ParseNode* root, PreorderContext* context)
  246. {
  247. class ParseTreePreorderVisitorPolicy : public VisitorPolicyBase<PreorderContext*>
  248. {
  249. protected:
  250. bool Preorder(ParseNode* pnode, Context context) { context->Preorder(pnode); return true; }
  251. void Postorder(ParseNode* pnode, Context context) { context->Postorder(pnode); }
  252. };
  253. ParseNodeVisitor<ParseTreePreorderVisitorPolicy> visitor;
  254. visitor.Visit(root, context);
  255. }
  256. template <class Func>
  257. void ParseTreePreorder(ParseNode* root, const Func& func)
  258. {
  259. class PreorderContext
  260. {
  261. private:
  262. const Func& func;
  263. public:
  264. PreorderContext(const Func& func) : func(func) {}
  265. void Preorder(ParseNode* pnode) { func(pnode); }
  266. void Postorder(ParseNode* pnode) {}
  267. };
  268. PreorderContext context(func);
  269. ParseTreePreorder(root, &context);
  270. }
  271. // TEMP: Consider setting parent at parse time. Temporarily traverse the whole tree to fix parent links.
  272. template <class Allocator>
  273. void FixParentLinks(ParseNodePtr root, Allocator* alloc)
  274. {
  275. class FixAstParentVisitorContext
  276. {
  277. private:
  278. JsUtil::Stack<ParseNodePtr, Allocator, /*isLeaf*/true> stack;
  279. public:
  280. FixAstParentVisitorContext(Allocator* alloc) : stack(alloc) {};
  281. void Preorder(ParseNode* pnode)
  282. {
  283. pnode->parent = !stack.Empty() ? stack.Top() : nullptr;
  284. stack.Push(pnode);
  285. }
  286. void Postorder(ParseNode* pnode)
  287. {
  288. Assert(pnode == stack.Peek());
  289. stack.Pop();
  290. }
  291. };
  292. FixAstParentVisitorContext fixAstParentVisitorContext(alloc);
  293. ParseTreePreorder(root, &fixAstParentVisitorContext);
  294. }
  295. //-----------------------------------------------------------------------------
  296. // Map child nodes of a parse node.
  297. //-----------------------------------------------------------------------------
  298. template <class Func>
  299. void MapChildren(ParseNode* pnode, const Func& func)
  300. {
  301. struct ChildrenWalkerPolicy : public WalkerPolicyBase<bool, const Func&>
  302. {
  303. ResultType WalkChildChecked(ParseNode *pnode, Context context)
  304. {
  305. // Some of Walker code calls with null ParseNode. e.g., a for loop with null init child.
  306. if (pnode)
  307. {
  308. context(pnode);
  309. }
  310. return true;
  311. }
  312. ResultType WalkFirstChild(ParseNode *pnode, Context context) { return WalkChildChecked(pnode, context); }
  313. ResultType WalkSecondChild(ParseNode *pnode, Context context) { return WalkChildChecked(pnode, context); }
  314. ResultType WalkNthChild(ParseNode *pparentnode, ParseNode *pnode, Context context) { return WalkChildChecked(pnode, context); }
  315. };
  316. ParseNodeWalker<ChildrenWalkerPolicy> walker;
  317. walker.Walk(pnode, func);
  318. }
  319. //-----------------------------------------------------------------------------
  320. // Helpers for testing ParseNode equivalence
  321. //-----------------------------------------------------------------------------
  322. class SyntaxEquivalenceBase
  323. {
  324. public:
  325. //
  326. // Check if a node is a token node (leaf only, can never have child nodes). e.g., "123" (number literal).
  327. //
  328. static bool IsToken(ParseNode* pnode)
  329. {
  330. // TODO: We may use a new flag fnopToken
  331. return (pnode->Grfnop() & fnopLeaf)
  332. && pnode->nop != knopFncDecl
  333. && pnode->nop != knopClassDecl;
  334. }
  335. //
  336. // Check if a node has token (node type owning an implicit token, e.g. "var x" (var declaration)).
  337. //
  338. static bool HasToken(ParseNode* pnode)
  339. {
  340. // TODO: We may use a new flag fnopHasToken
  341. return pnode->nop == knopVarDecl
  342. || pnode->nop == knopFncDecl; // TODO: other nodes with data
  343. }
  344. //
  345. // Check if 2 IsToken nodes (of the same type) are equivalent.
  346. //
  347. static bool AreTokensEquivalent(ParseNodePtr left, ParseNodePtr right)
  348. {
  349. Assert(IsToken(left) && left->nop == right->nop);
  350. switch (left->nop)
  351. {
  352. case knopName:
  353. return AreEquivalent(left->AsParseNodeName()->pid, right->AsParseNodeName()->pid);
  354. case knopStr:
  355. return AreEquivalent(left->AsParseNodeStr()->pid, right->AsParseNodeStr()->pid);
  356. case knopInt:
  357. return left->AsParseNodeInt()->lw == right->AsParseNodeInt()->lw;
  358. case knopFlt:
  359. return left->AsParseNodeFloat()->dbl == right->AsParseNodeFloat()->dbl;
  360. case knopRegExp:
  361. //TODO: AsParseNodePid()->regexPattern
  362. break;
  363. }
  364. // Other tokens have fixed strings and are always equivalent, e.g. "true", "null"
  365. return true;
  366. }
  367. //
  368. // Check if 2 HasToken nodes (of the same type) have equivalent tokens.
  369. //
  370. static bool HaveEquivalentTokens(ParseNodePtr left, ParseNodePtr right)
  371. {
  372. Assert(HasToken(left) && left->nop == right->nop);
  373. switch (left->nop)
  374. {
  375. case knopVarDecl:
  376. return AreEquivalent(left->AsParseNodeVar()->pid, right->AsParseNodeVar()->pid);
  377. case knopFncDecl:
  378. return AreEquivalent(left->AsParseNodeFnc()->pid, right->AsParseNodeFnc()->pid);
  379. //TODO: other nodes with data
  380. }
  381. Assert(false);
  382. return false;
  383. }
  384. private:
  385. // Test if 2 PIDs refer to the same text.
  386. static bool AreEquivalent(IdentPtr pid1, IdentPtr pid2)
  387. {
  388. if (pid1 && pid2)
  389. {
  390. // Optimize: If we can have both trees (scanner/parser) share Ident dictionary, this can become pid1 == pid2.
  391. return pid1->Hash() == pid2->Hash()
  392. && pid1->Cch() == pid2->Cch()
  393. && wcsncmp(pid1->Psz(), pid2->Psz(), pid1->Cch()) == 0;
  394. }
  395. // PIDs may be null, e.g. anonymous function declarations
  396. return pid1 == pid2;
  397. }
  398. };
  399. template <class Allocator>
  400. class SyntaxEquivalence : public SyntaxEquivalenceBase
  401. {
  402. private:
  403. // 2 stacks used during equivalence test. (Can mark isLeaf because they don't own the nodes.)
  404. JsUtil::Stack<ParseNode*, Allocator, /*isLeaf*/true> leftStack, rightStack;
  405. public:
  406. SyntaxEquivalence(Allocator* alloc) : leftStack(alloc), rightStack(alloc)
  407. {}
  408. //
  409. // Tests if 2 parse (sub)trees are equivalent.
  410. //
  411. bool AreEquivalent(ParseNode* left, ParseNode* right)
  412. {
  413. bool result;
  414. if (TryTestEquivalenceFast(left, right, &result))
  415. {
  416. return result;
  417. }
  418. Reset(); // Clear possible remaining nodes in leftStack/rightStack
  419. PushChildren(left, right);
  420. while (!leftStack.Empty() && leftStack.Count() == rightStack.Count())
  421. {
  422. left = leftStack.Pop();
  423. right = rightStack.Pop();
  424. if (TryTestEquivalenceFast(left, right, &result))
  425. {
  426. if (!result)
  427. {
  428. return false;
  429. }
  430. }
  431. else
  432. {
  433. PushChildren(left, right); // Sub-pair is ok, but need to compare children
  434. }
  435. }
  436. return leftStack.Empty() && rightStack.Empty();
  437. }
  438. private:
  439. void Reset()
  440. {
  441. leftStack.Clear();
  442. rightStack.Clear();
  443. }
  444. void PushChildren(ParseNode* left, ParseNode* right)
  445. {
  446. Assert(leftStack.Count() == rightStack.Count());
  447. MapChildren(left, [&](ParseNode* child) { leftStack.Push(child); });
  448. MapChildren(right, [&](ParseNode* child) { rightStack.Push(child); });
  449. }
  450. //
  451. // Try to test 2 nodes for equivalence. Return true if we can determine the pair equivalence.
  452. // Otherwise return false, which means the pair test is ok but we need further child nodes comparison.
  453. //
  454. static bool TryTestEquivalenceFast(ParseNode* left, ParseNode* right, _Out_ bool* result)
  455. {
  456. Assert(left && right);
  457. if (left == right)
  458. {
  459. *result = true; // Same node
  460. return true;
  461. }
  462. if (left->nop != right->nop)
  463. {
  464. *result = false; // Different node type
  465. return true;
  466. }
  467. if (IsToken(left))
  468. {
  469. *result = AreTokensEquivalent(left, right); // Token comparison suffices
  470. return true;
  471. }
  472. if (HasToken(left) && !HaveEquivalentTokens(left, right))
  473. {
  474. *result = false; // Different implicit tokens, e.g. "var x" vs "var y"
  475. return true;
  476. }
  477. return false; // This pair is ok, but not sure about children
  478. }
  479. };
  480. } // namespace Js
  481. #endif