difflib.js 11 KB


  1. /***
  2. This is part of jsdifflib v1.0. <http://snowtide.com/jsdifflib>
  3. Copyright (c) 2007, Snowtide Informatics Systems, Inc.
  4. All rights reserved.
  5. Redistribution and use in source and binary forms, with or without modification,
  6. are permitted provided that the following conditions are met:
  7. * Redistributions of source code must retain the above copyright notice, this
  8. list of conditions and the following disclaimer.
  9. * Redistributions in binary form must reproduce the above copyright notice,
  10. this list of conditions and the following disclaimer in the documentation
  11. and/or other materials provided with the distribution.
  12. * Neither the name of the Snowtide Informatics Systems nor the names of its
  13. contributors may be used to endorse or promote products derived from this
  14. software without specific prior written permission.
  15. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
  16. EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  17. OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
  18. SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  19. INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
  20. TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  21. BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  22. CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  23. ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
  24. DAMAGE.
  25. ***/
  26. /* Author: Chas Emerick <cemerick@snowtide.com> */
  27. __whitespace = {" ":true, "\t":true, "\n":true, "\f":true, "\r":true};
  28. difflib = {
  29. defaultJunkFunction: function (c) {
  30. return c in __whitespace;
  31. },
  32. stripLinebreaks: function (str) { return str.replace(/^[\n\r]*|[\n\r]*$/g, ""); },
  33. stringAsLines: function (str) {
  34. var lfpos = str.indexOf("\n");
  35. var crpos = str.indexOf("\r");
  36. var linebreak = ((lfpos > -1 && crpos > -1) || crpos < 0) ? "\n" : "\r";
  37. var lines = str.split(linebreak);
  38. for (var i = 0; i < lines.length; i++) {
  39. lines[i] = difflib.stripLinebreaks(lines[i]);
  40. }
  41. return lines;
  42. },
  43. // iteration-based reduce implementation
  44. __reduce: function (func, list, initial) {
  45. if (initial != null) {
  46. var value = initial;
  47. var idx = 0;
  48. } else if (list) {
  49. var value = list[0];
  50. var idx = 1;
  51. } else {
  52. return null;
  53. }
  54. for (; idx < list.length; idx++) {
  55. value = func(value, list[idx]);
  56. }
  57. return value;
  58. },
  59. // comparison function for sorting lists of numeric tuples
  60. __ntuplecomp: function (a, b) {
  61. var mlen = Math.max(a.length, b.length);
  62. for (var i = 0; i < mlen; i++) {
  63. if (a[i] < b[i]) return -1;
  64. if (a[i] > b[i]) return 1;
  65. }
  66. return a.length == b.length ? 0 : (a.length < b.length ? -1 : 1);
  67. },
  68. __calculate_ratio: function (matches, length) {
  69. return length ? 2.0 * matches / length : 1.0;
  70. },
  71. // returns a function that returns true if a key passed to the returned function
  72. // is in the dict (js object) provided to this function; replaces being able to
  73. // carry around dict.has_key in python...
  74. __isindict: function (dict) {
  75. return function (key) { return key in dict; };
  76. },
  77. // replacement for python's dict.get function -- need easy default values
  78. __dictget: function (dict, key, defaultValue) {
  79. return key in dict ? dict[key] : defaultValue;
  80. },
  81. SequenceMatcher: function (a, b, isjunk) {
  82. this.set_seqs = function (a, b) {
  83. this.set_seq1(a);
  84. this.set_seq2(b);
  85. }
  86. this.set_seq1 = function (a) {
  87. if (a == this.a) return;
  88. this.a = a;
  89. this.matching_blocks = this.opcodes = null;
  90. }
  91. this.set_seq2 = function (b) {
  92. if (b == this.b) return;
  93. this.b = b;
  94. this.matching_blocks = this.opcodes = this.fullbcount = null;
  95. this.__chain_b();
  96. }
  97. this.__chain_b = function () {
  98. var b = this.b;
  99. var n = b.length;
  100. var b2j = this.b2j = {};
  101. var populardict = {};
  102. for (var i = 0; i < b.length; i++) {
  103. var elt = b[i];
  104. if (elt in b2j) {
  105. var indices = b2j[elt];
  106. if (n >= 200 && indices.length * 100 > n) {
  107. populardict[elt] = 1;
  108. delete b2j[elt];
  109. } else {
  110. indices.push(i);
  111. }
  112. } else {
  113. b2j[elt] = [i];
  114. }
  115. }
  116. for (var elt in populardict)
  117. delete b2j[elt];
  118. var isjunk = this.isjunk;
  119. var junkdict = {};
  120. if (isjunk) {
  121. for (var elt in populardict) {
  122. if (isjunk(elt)) {
  123. junkdict[elt] = 1;
  124. delete populardict[elt];
  125. }
  126. }
  127. for (var elt in b2j) {
  128. if (isjunk(elt)) {
  129. junkdict[elt] = 1;
  130. delete b2j[elt];
  131. }
  132. }
  133. }
  134. this.isbjunk = difflib.__isindict(junkdict);
  135. this.isbpopular = difflib.__isindict(populardict);
  136. }
  137. this.find_longest_match = function (alo, ahi, blo, bhi) {
  138. var a = this.a;
  139. var b = this.b;
  140. var b2j = this.b2j;
  141. var isbjunk = this.isbjunk;
  142. var besti = alo;
  143. var bestj = blo;
  144. var bestsize = 0;
  145. var j = null;
  146. var j2len = {};
  147. var nothing = [];
  148. for (var i = alo; i < ahi; i++) {
  149. var newj2len = {};
  150. var jdict = difflib.__dictget(b2j, a[i], nothing);
  151. for (var jkey in jdict) {
  152. j = jdict[jkey];
  153. if (j < blo) continue;
  154. if (j >= bhi) break;
  155. newj2len[j] = k = difflib.__dictget(j2len, j - 1, 0) + 1;
  156. if (k > bestsize) {
  157. besti = i - k + 1;
  158. bestj = j - k + 1;
  159. bestsize = k;
  160. }
  161. }
  162. j2len = newj2len;
  163. }
  164. while (besti > alo && bestj > blo && !isbjunk(b[bestj - 1]) && a[besti - 1] == b[bestj - 1]) {
  165. besti--;
  166. bestj--;
  167. bestsize++;
  168. }
  169. while (besti + bestsize < ahi && bestj + bestsize < bhi &&
  170. !isbjunk(b[bestj + bestsize]) &&
  171. a[besti + bestsize] == b[bestj + bestsize]) {
  172. bestsize++;
  173. }
  174. while (besti > alo && bestj > blo && isbjunk(b[bestj - 1]) && a[besti - 1] == b[bestj - 1]) {
  175. besti--;
  176. bestj--;
  177. bestsize++;
  178. }
  179. while (besti + bestsize < ahi && bestj + bestsize < bhi && isbjunk(b[bestj + bestsize]) &&
  180. a[besti + bestsize] == b[bestj + bestsize]) {
  181. bestsize++;
  182. }
  183. return [besti, bestj, bestsize];
  184. }
  185. this.get_matching_blocks = function () {
  186. if (this.matching_blocks != null) return this.matching_blocks;
  187. var la = this.a.length;
  188. var lb = this.b.length;
  189. var queue = [[0, la, 0, lb]];
  190. var matching_blocks = [];
  191. var alo, ahi, blo, bhi, qi, i, j, k, x;
  192. while (queue.length) {
  193. qi = queue.pop();
  194. alo = qi[0];
  195. ahi = qi[1];
  196. blo = qi[2];
  197. bhi = qi[3];
  198. x = this.find_longest_match(alo, ahi, blo, bhi);
  199. i = x[0];
  200. j = x[1];
  201. k = x[2];
  202. if (k) {
  203. matching_blocks.push(x);
  204. if (alo < i && blo < j)
  205. queue.push([alo, i, blo, j]);
  206. if (i+k < ahi && j+k < bhi)
  207. queue.push([i + k, ahi, j + k, bhi]);
  208. }
  209. }
  210. matching_blocks.sort(difflib.__ntuplecomp);
  211. var i1 = j1 = k1 = block = 0;
  212. var non_adjacent = [];
  213. for (var idx in matching_blocks) {
  214. block = matching_blocks[idx];
  215. i2 = block[0];
  216. j2 = block[1];
  217. k2 = block[2];
  218. if (i1 + k1 == i2 && j1 + k1 == j2) {
  219. k1 += k2;
  220. } else {
  221. if (k1) non_adjacent.push([i1, j1, k1]);
  222. i1 = i2;
  223. j1 = j2;
  224. k1 = k2;
  225. }
  226. }
  227. if (k1) non_adjacent.push([i1, j1, k1]);
  228. non_adjacent.push([la, lb, 0]);
  229. this.matching_blocks = non_adjacent;
  230. return this.matching_blocks;
  231. }
  232. this.get_opcodes = function () {
  233. if (this.opcodes != null) return this.opcodes;
  234. var i = 0;
  235. var j = 0;
  236. var answer = [];
  237. this.opcodes = answer;
  238. var block, ai, bj, size, tag;
  239. var blocks = this.get_matching_blocks();
  240. for (var idx in blocks) {
  241. block = blocks[idx];
  242. ai = block[0];
  243. bj = block[1];
  244. size = block[2];
  245. tag = '';
  246. if (i < ai && j < bj) {
  247. tag = 'replace';
  248. } else if (i < ai) {
  249. tag = 'delete';
  250. } else if (j < bj) {
  251. tag = 'insert';
  252. }
  253. if (tag) answer.push([tag, i, ai, j, bj]);
  254. i = ai + size;
  255. j = bj + size;
  256. if (size) answer.push(['equal', ai, i, bj, j]);
  257. }
  258. return answer;
  259. }
  260. // this is a generator function in the python lib, which of course is not supported in javascript
  261. // the reimplementation builds up the grouped opcodes into a list in their entirety and returns that.
  262. this.get_grouped_opcodes = function (n) {
  263. if (!n) n = 3;
  264. var codes = this.get_opcodes();
  265. if (!codes) codes = [["equal", 0, 1, 0, 1]];
  266. var code, tag, i1, i2, j1, j2;
  267. if (codes[0][0] == 'equal') {
  268. code = codes[0];
  269. tag = code[0];
  270. i1 = code[1];
  271. i2 = code[2];
  272. j1 = code[3];
  273. j2 = code[4];
  274. codes[0] = [tag, Math.max(i1, i2 - n), i2, Math.max(j1, j2 - n), j2];
  275. }
  276. if (codes[codes.length - 1][0] == 'equal') {
  277. code = codes[codes.length - 1];
  278. tag = code[0];
  279. i1 = code[1];
  280. i2 = code[2];
  281. j1 = code[3];
  282. j2 = code[4];
  283. codes[codes.length - 1] = [tag, i1, Math.min(i2, i1 + n), j1, Math.min(j2, j1 + n)];
  284. }
  285. var nn = n + n;
  286. var groups = [];
  287. for (var idx in codes) {
  288. code = codes[idx];
  289. tag = code[0];
  290. i1 = code[1];
  291. i2 = code[2];
  292. j1 = code[3];
  293. j2 = code[4];
  294. if (tag == 'equal' && i2 - i1 > nn) {
  295. groups.push([tag, i1, Math.min(i2, i1 + n), j1, Math.min(j2, j1 + n)]);
  296. i1 = Math.max(i1, i2-n);
  297. j1 = Math.max(j1, j2-n);
  298. }
  299. groups.push([tag, i1, i2, j1, j2]);
  300. }
  301. if (groups && groups[groups.length - 1][0] == 'equal') groups.pop();
  302. return groups;
  303. }
  304. this.ratio = function () {
  305. matches = difflib.__reduce(
  306. function (sum, triple) { return sum + triple[triple.length - 1]; },
  307. this.get_matching_blocks(), 0);
  308. return difflib.__calculate_ratio(matches, this.a.length + this.b.length);
  309. }
  310. this.quick_ratio = function () {
  311. var fullbcount, elt;
  312. if (this.fullbcount == null) {
  313. this.fullbcount = fullbcount = {};
  314. for (var i = 0; i < this.b.length; i++) {
  315. elt = this.b[i];
  316. fullbcount[elt] = difflib.__dictget(fullbcount, elt, 0) + 1;
  317. }
  318. }
  319. fullbcount = this.fullbcount;
  320. var avail = {};
  321. var availhas = difflib.__isindict(avail);
  322. var matches = numb = 0;
  323. for (var i = 0; i < this.a.length; i++) {
  324. elt = this.a[i];
  325. if (availhas(elt)) {
  326. numb = avail[elt];
  327. } else {
  328. numb = difflib.__dictget(fullbcount, elt, 0);
  329. }
  330. avail[elt] = numb - 1;
  331. if (numb > 0) matches++;
  332. }
  333. return difflib.__calculate_ratio(matches, this.a.length + this.b.length);
  334. }
  335. this.real_quick_ratio = function () {
  336. var la = this.a.length;
  337. var lb = this.b.length;
  338. return _calculate_ratio(Math.min(la, lb), la + lb);
  339. }
  340. this.isjunk = isjunk ? isjunk : difflib.defaultJunkFunction;
  341. this.a = this.b = null;
  342. this.set_seqs(a, b);
  343. }
  344. }