ParseTreeComparer.h 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #pragma once
  6. #ifdef EDIT_AND_CONTINUE
  7. namespace Js
  8. {
  9. class SyntaxEquivalenceBase;
  10. template <class Allocator> class SyntaxEquivalence;
  11. //-----------------------------------------------------------------------------
  12. // TreeComparer for ParseNode TreeMatch.
  13. //-----------------------------------------------------------------------------
  14. template <class SubClass, class Allocator>
  15. class ParseTreeComparer : public TreeComparerBase<SubClass, ParseNode>
  16. {
  17. private:
  18. static const int TOKENLIST_MAXDIFF_SHIFT = 3; // Used to detect lists of significantly different lengths
  19. SyntaxEquivalence<Allocator> syntaxEquivalence;
  20. // 2 lists used in GetDistance. (Can mark isLeaf because they don't own the nodes.)
  21. typedef JsUtil::List<PNode, Allocator, /*isLeaf*/true> NodeList;
  22. NodeList leftList, rightList;
  23. public:
  24. ParseTreeComparer(Allocator* alloc) :
  25. syntaxEquivalence(alloc), leftList(alloc), rightList(alloc)
  26. {}
  27. ParseTreeComparer(const ParseTreeComparer& other) :
  28. syntaxEquivalence(other.GetAllocator()), leftList(other.GetAllocator()), rightList(other.GetAllocator())
  29. {}
  30. Allocator* GetAllocator() const
  31. {
  32. return leftList.GetAllocator();
  33. }
  34. int LabelCount() const
  35. {
  36. return ::OpCode::knopLim;
  37. }
  38. int GetLabel(PNode x) const
  39. {
  40. return x->nop;
  41. }
  42. PNode GetParent(PNode x) const
  43. {
  44. return x->parent;
  45. }
  46. template <class Func>
  47. void MapChildren(PNode x, const Func& func) const
  48. {
  49. Js::MapChildren(x, func);
  50. }
  51. // Map (sub)tree nodes to compute distance. Child class can re-implement to control which nodes participate in
  52. // distance computing.
  53. template <class Func>
  54. void MapTreeToComputeDistance(PNode x, const Func& func) const
  55. {
  56. pThis()->MapTree(x, func);
  57. }
  58. double GetDistance(PNode left, PNode right)
  59. {
  60. Assert(pThis()->GetLabel(left) == pThis()->GetLabel(right)); // Only called for nodes of same label
  61. return ComputeValueDistance(left, right);
  62. }
  63. bool ValuesEqual(PNode oldNode, PNode newNode)
  64. {
  65. // This determines if we emit Update edit for matched nodes. If ValuesEqual, don't need update edit.
  66. return !(syntaxEquivalence.IsToken(oldNode) || syntaxEquivalence.HasToken(oldNode))
  67. || syntaxEquivalence.AreEquivalent(oldNode, newNode);
  68. }
  69. private:
  70. double ComputeValueDistance(PNode left, PNode right)
  71. {
  72. // If 2 nodes are equivalent trees, consider them exact match.
  73. if (syntaxEquivalence.AreEquivalent(left, right))
  74. {
  75. return ExactMatchDistance;
  76. }
  77. double distance = ComputeDistance(left, right);
  78. // We don't want to return an exact match, because there
  79. // must be something different, since we got here
  80. return (distance == ExactMatchDistance) ? EpsilonDistance : distance;
  81. }
  82. //
  83. // Computer distance the same as Roslyn:
  84. // * For token nodes, use their string LCS distance.
  85. // * Otherwise, flatten the tree to get all tokens, use token list LCS distance.
  86. //
  87. // However, our parser are significantly different to Roslyn. Roslyn uses "full fidelity" parser,
  88. // keeping every token scanned from source. e.g., "var a = 1" -> "var","a","=","1". Our parser keeps
  89. // much less tokens. Thus our LCS distance will be quite different, which may affect diff accuracy.
  90. //
  91. double ComputeDistance(PNode left, PNode right)
  92. {
  93. // For token nodes, use their string LCS distance
  94. if (syntaxEquivalence.IsToken(left))
  95. {
  96. return ComputeTokenDistance(left, right);
  97. }
  98. // Otherwise, flatten the tree to get all tokens, use token list LCS distance
  99. Flatten(left, leftList);
  100. Flatten(right, rightList);
  101. // If token list lengths are significantly different, consider they are quite different.
  102. {
  103. int leftLen = leftList.Count();
  104. int rightLen = rightList.Count();
  105. int minLen = min(leftLen, rightLen);
  106. int maxLen = max(leftLen, rightLen);
  107. if (minLen < (maxLen >> TOKENLIST_MAXDIFF_SHIFT))
  108. {
  109. // Assuming minLen are all matched, distance > 0.875 (7/8). These two nodes shouldn't be a match.
  110. return 1.0 - (double)minLen / (double)maxLen;
  111. }
  112. }
  113. return ComputeLongestCommonSubsequenceDistance(GetAllocator(), leftList.Count(), rightList.Count(), [this](int indexA, int indexB)
  114. {
  115. return AreNodesTokenEquivalent(leftList.Item(indexA), rightList.Item(indexB));
  116. });
  117. }
  118. // Flatten IsToken/HasToken nodes in the (sub)tree into given list to compute distance.
  119. void Flatten(PNode root, NodeList& list)
  120. {
  121. list.Clear();
  122. pThis()->MapTreeToComputeDistance(root, [&](PNode child)
  123. {
  124. if (syntaxEquivalence.IsToken(child) || syntaxEquivalence.HasToken(child))
  125. {
  126. list.Add(child);
  127. }
  128. });
  129. }
  130. // Check if IsToken/HasToken nodes are equivalent
  131. bool AreNodesTokenEquivalent(PNode left, PNode right)
  132. {
  133. if (left->nop == right->nop)
  134. {
  135. return syntaxEquivalence.IsToken(left) ?
  136. syntaxEquivalence.AreTokensEquivalent(left, right) : syntaxEquivalence.HaveEquivalentTokens(left, right);
  137. }
  138. return false;
  139. }
  140. double ComputeTokenDistance(PNode left, PNode right) const
  141. {
  142. Assert(syntaxEquivalence.IsToken(left));
  143. switch (left->nop)
  144. {
  145. case knopName:
  146. case knopStr:
  147. return ComputeDistance(left->sxPid.pid, right->sxPid.pid);
  148. case knopInt:
  149. return left->sxInt.lw == right->sxInt.lw ? ExactMatchDistance : 1.0;
  150. case knopFlt:
  151. return left->sxFlt.dbl == right->sxFlt.dbl ? ExactMatchDistance : 1.0;
  152. case knopRegExp: //TODO: sxPid.regexPattern
  153. break;
  154. }
  155. // Other token nodes with fixed strings, e.g. "true", "null", always match exactly
  156. return ExactMatchDistance;
  157. }
  158. // Compute distance of 2 PIDs as their string LCS distance
  159. double ComputeDistance(IdentPtr left, IdentPtr right) const
  160. {
  161. Allocator* alloc = leftList.GetAllocator();
  162. return ComputeLongestCommonSubsequenceDistance(alloc, left->Cch(), right->Cch(), [=](int indexA, int indexB)
  163. {
  164. return left->Psz()[indexA] == right->Psz()[indexB];
  165. });
  166. }
  167. };
  168. //-----------------------------------------------------------------------------
  169. // Function TreeComparer for TreeMatch at function level. View the parse tree as a hierarchy of functions.
  170. // Ignore statement details.
  171. //-----------------------------------------------------------------------------
  172. template <class Allocator>
  173. class FunctionTreeComparer : public ParseTreeComparer<FunctionTreeComparer<Allocator>, Allocator>
  174. {
  175. public:
  176. FunctionTreeComparer(Allocator* alloc) : ParseTreeComparer(alloc) {}
  177. FunctionTreeComparer(const FunctionTreeComparer& other) : ParseTreeComparer(other) {}
  178. // We only have 1 kind of node in this view -- FuncDecl
  179. int LabelCount() const { return 1; }
  180. int GetLabel(PNode x) const { return 0; }
  181. PNode GetParent(PNode x) const
  182. {
  183. while (true)
  184. {
  185. x = __super::GetParent(x);
  186. if (!x || x->nop == knopFncDecl || x->nop == knopProg)
  187. {
  188. break;
  189. }
  190. }
  191. return x;
  192. }
  193. template <class Func>
  194. void MapChildren(PNode x, const Func& func) const
  195. {
  196. __super::MapChildren(x, [&](PNode child)
  197. {
  198. if (child->nop == knopFncDecl)
  199. {
  200. func(child);
  201. }
  202. else
  203. {
  204. pThis()->MapChildren(child, func);
  205. }
  206. });
  207. }
  208. // To compute function node distance, only use their direct child nodes. Do not include descendant nodes
  209. // under nested child functions.
  210. template <class Func>
  211. void MapTreeToComputeDistance(PNode x, const Func& func) const
  212. {
  213. func(x);
  214. __super::MapChildren(x, [&](PNode child)
  215. {
  216. if (child->nop == knopFncDecl)
  217. {
  218. func(child); // For child func, output the node itself but don't map its descendants
  219. }
  220. else
  221. {
  222. pThis()->MapTreeToComputeDistance(child, func); // recursive into other nodes
  223. }
  224. });
  225. }
  226. };
  227. //-----------------------------------------------------------------------------
  228. // Full TreeComparer for TreeMatch full parse tree. Used for test only.
  229. //-----------------------------------------------------------------------------
  230. template <class Allocator>
  231. class FullTreeComparer : public ParseTreeComparer<FullTreeComparer<Allocator>, Allocator>
  232. {
  233. public:
  234. FullTreeComparer(Allocator* alloc) : ParseTreeComparer(alloc) {}
  235. FullTreeComparer(const FullTreeComparer& other) : ParseTreeComparer(other) {}
  236. };
  237. //-----------------------------------------------------------------------------
  238. // Visit every node of a parse (sub)tree in preorder. Delegates to Preorder/Postorder of PreorderContext.
  239. //-----------------------------------------------------------------------------
  240. template <class PreorderContext>
  241. void ParseTreePreorder(ParseNode* root, PreorderContext* context)
  242. {
  243. class ParseTreePreorderVisitorPolicy : public VisitorPolicyBase<PreorderContext*>
  244. {
  245. protected:
  246. bool Preorder(ParseNode* pnode, Context context) { context->Preorder(pnode); return true; }
  247. void Postorder(ParseNode* pnode, Context context) { context->Postorder(pnode); }
  248. };
  249. ParseNodeVisitor<ParseTreePreorderVisitorPolicy> visitor;
  250. visitor.Visit(root, context);
  251. }
  252. template <class Func>
  253. void ParseTreePreorder(ParseNode* root, const Func& func)
  254. {
  255. class PreorderContext
  256. {
  257. private:
  258. const Func& func;
  259. public:
  260. PreorderContext(const Func& func) : func(func) {}
  261. void Preorder(ParseNode* pnode) { func(pnode); }
  262. void Postorder(ParseNode* pnode) {}
  263. };
  264. PreorderContext context(func);
  265. ParseTreePreorder(root, &context);
  266. }
  267. // TEMP: Consider setting parent at parse time. Temporarily traverse the whole tree to fix parent links.
  268. template <class Allocator>
  269. void FixParentLinks(ParseNodePtr root, Allocator* alloc)
  270. {
  271. class FixAstParentVisitorContext
  272. {
  273. private:
  274. JsUtil::Stack<ParseNodePtr, Allocator, /*isLeaf*/true> stack;
  275. public:
  276. FixAstParentVisitorContext(Allocator* alloc) : stack(alloc) {};
  277. void Preorder(ParseNode* pnode)
  278. {
  279. pnode->parent = !stack.Empty() ? stack.Top() : nullptr;
  280. stack.Push(pnode);
  281. }
  282. void Postorder(ParseNode* pnode)
  283. {
  284. Assert(pnode == stack.Peek());
  285. stack.Pop();
  286. }
  287. };
  288. FixAstParentVisitorContext fixAstParentVisitorContext(alloc);
  289. ParseTreePreorder(root, &fixAstParentVisitorContext);
  290. }
  291. //-----------------------------------------------------------------------------
  292. // Map child nodes of a parse node.
  293. //-----------------------------------------------------------------------------
  294. template <class Func>
  295. void MapChildren(ParseNode* pnode, const Func& func)
  296. {
  297. struct ChildrenWalkerPolicy : public WalkerPolicyBase<bool, const Func&>
  298. {
  299. ResultType WalkChildChecked(ParseNode *pnode, Context context)
  300. {
  301. // Some of Walker code calls with null ParseNode. e.g., a for loop with null init child.
  302. if (pnode)
  303. {
  304. context(pnode);
  305. }
  306. return true;
  307. }
  308. ResultType WalkFirstChild(ParseNode *pnode, Context context) { return WalkChildChecked(pnode, context); }
  309. ResultType WalkSecondChild(ParseNode *pnode, Context context) { return WalkChildChecked(pnode, context); }
  310. ResultType WalkNthChild(ParseNode *pparentnode, ParseNode *pnode, Context context) { return WalkChildChecked(pnode, context); }
  311. };
  312. ParseNodeWalker<ChildrenWalkerPolicy> walker;
  313. walker.Walk(pnode, func);
  314. }
  315. //-----------------------------------------------------------------------------
  316. // Helpers for testing ParseNode equivalence
  317. //-----------------------------------------------------------------------------
  318. class SyntaxEquivalenceBase
  319. {
  320. public:
  321. //
  322. // Check if a node is a token node (leaf only, can never have child nodes). e.g., "123" (number literal).
  323. //
  324. static bool IsToken(ParseNode* pnode)
  325. {
  326. // TODO: We may use a new flag fnopToken
  327. return (ParseNode::Grfnop(pnode->nop) & fnopLeaf)
  328. && pnode->nop != knopFncDecl
  329. && pnode->nop != knopClassDecl;
  330. }
  331. //
  332. // Check if a node has token (node type owning an implicit token, e.g. "var x" (var declaration)).
  333. //
  334. static bool HasToken(ParseNode* pnode)
  335. {
  336. // TODO: We may use a new flag fnopHasToken
  337. return pnode->nop == knopVarDecl
  338. || pnode->nop == knopFncDecl; // TODO: other nodes with data
  339. }
  340. //
  341. // Check if 2 IsToken nodes (of the same type) are equivalent.
  342. //
  343. static bool AreTokensEquivalent(ParseNodePtr left, ParseNodePtr right)
  344. {
  345. Assert(IsToken(left) && left->nop == right->nop);
  346. switch (left->nop)
  347. {
  348. case knopName:
  349. case knopStr:
  350. return AreEquivalent(left->sxPid.pid, right->sxPid.pid);
  351. case knopInt:
  352. return left->sxInt.lw == right->sxInt.lw;
  353. case knopFlt:
  354. return left->sxFlt.dbl == right->sxFlt.dbl;
  355. case knopRegExp:
  356. //TODO: sxPid.regexPattern
  357. break;
  358. }
  359. // Other tokens have fixed strings and are always equivalent, e.g. "true", "null"
  360. return true;
  361. }
  362. //
  363. // Check if 2 HasToken nodes (of the same type) have equivalent tokens.
  364. //
  365. static bool HaveEquivalentTokens(ParseNodePtr left, ParseNodePtr right)
  366. {
  367. Assert(HasToken(left) && left->nop == right->nop);
  368. switch (left->nop)
  369. {
  370. case knopVarDecl:
  371. return AreEquivalent(left->sxVar.pid, right->sxVar.pid);
  372. case knopFncDecl:
  373. return AreEquivalent(left->sxFnc.pid, right->sxFnc.pid);
  374. //TODO: other nodes with data
  375. }
  376. Assert(false);
  377. return false;
  378. }
  379. private:
  380. // Test if 2 PIDs refer to the same text.
  381. static bool AreEquivalent(IdentPtr pid1, IdentPtr pid2)
  382. {
  383. if (pid1 && pid2)
  384. {
  385. // Optimize: If we can have both trees (scanner/parser) share Ident dictionary, this can become pid1 == pid2.
  386. return pid1->Hash() == pid2->Hash()
  387. && pid1->Cch() == pid2->Cch()
  388. && wcsncmp(pid1->Psz(), pid2->Psz(), pid1->Cch()) == 0;
  389. }
  390. // PIDs may be null, e.g. anonymous function declarations
  391. return pid1 == pid2;
  392. }
  393. };
  394. template <class Allocator>
  395. class SyntaxEquivalence : public SyntaxEquivalenceBase
  396. {
  397. private:
  398. // 2 stacks used during equivalence test. (Can mark isLeaf because they don't own the nodes.)
  399. JsUtil::Stack<ParseNode*, Allocator, /*isLeaf*/true> leftStack, rightStack;
  400. public:
  401. SyntaxEquivalence(Allocator* alloc) : leftStack(alloc), rightStack(alloc)
  402. {}
  403. //
  404. // Tests if 2 parse (sub)trees are equivalent.
  405. //
  406. bool AreEquivalent(ParseNode* left, ParseNode* right)
  407. {
  408. bool result;
  409. if (TryTestEquivalenceFast(left, right, &result))
  410. {
  411. return result;
  412. }
  413. Reset(); // Clear possible remaining nodes in leftStack/rightStack
  414. PushChildren(left, right);
  415. while (!leftStack.Empty() && leftStack.Count() == rightStack.Count())
  416. {
  417. left = leftStack.Pop();
  418. right = rightStack.Pop();
  419. if (TryTestEquivalenceFast(left, right, &result))
  420. {
  421. if (!result)
  422. {
  423. return false;
  424. }
  425. }
  426. else
  427. {
  428. PushChildren(left, right); // Sub-pair is ok, but need to compare children
  429. }
  430. }
  431. return leftStack.Empty() && rightStack.Empty();
  432. }
  433. private:
  434. void Reset()
  435. {
  436. leftStack.Clear();
  437. rightStack.Clear();
  438. }
  439. void PushChildren(ParseNode* left, ParseNode* right)
  440. {
  441. Assert(leftStack.Count() == rightStack.Count());
  442. MapChildren(left, [&](ParseNode* child) { leftStack.Push(child); });
  443. MapChildren(right, [&](ParseNode* child) { rightStack.Push(child); });
  444. }
  445. //
  446. // Try to test 2 nodes for equivalence. Return true if we can determine the pair equivalence.
  447. // Otherwise return false, which means the pair test is ok but we need further child nodes comparison.
  448. //
  449. static bool TryTestEquivalenceFast(ParseNode* left, ParseNode* right, _Out_ bool* result)
  450. {
  451. Assert(left && right);
  452. if (left == right)
  453. {
  454. *result = true; // Same node
  455. return true;
  456. }
  457. if (left->nop != right->nop)
  458. {
  459. *result = false; // Different node type
  460. return true;
  461. }
  462. if (IsToken(left))
  463. {
  464. *result = AreTokensEquivalent(left, right); // Token comparison suffices
  465. return true;
  466. }
  467. if (HasToken(left) && !HaveEquivalentTokens(left, right))
  468. {
  469. *result = false; // Different implicit tokens, e.g. "var x" vs "var y"
  470. return true;
  471. }
  472. return false; // This pair is ok, but not sure about children
  473. }
  474. };
  475. } // namespace Js
  476. #endif