/* ** Copyright (c) 2007 D. Richard Hipp ** ** This program is free software; you can redistribute it and/or ** modify it under the terms of the Simplified BSD License (also ** known as the "2-Clause License" or "FreeBSD License".) ** This program is distributed in the hope that it will be useful, ** but without any warranty; without even the implied warranty of ** merchantability or fitness for a particular purpose. ** ** Author contact information: ** drh@hwaci.com ** http://www.hwaci.com/drh/ ** ******************************************************************************* ** ** This file contains code used to compute a "diff" between two ** text files. */ #include "config.h" #include "diff.h" #include #if INTERFACE /* ** Allowed flag parameters to the text_diff() and html_sbsdiff() funtions: */ #define DIFF_CONTEXT_MASK 0x0000fff /* Lines of context. Default if 0 */ #define DIFF_WIDTH_MASK 0x00ff000 /* side-by-side column width */ #define DIFF_IGNORE_EOLWS 0x0100000 /* Ignore end-of-line whitespace */ #define DIFF_SIDEBYSIDE 0x0200000 /* Generate a side-by-side diff */ #define DIFF_NEWFILE 0x0400000 /* Missing files are as empty files */ #endif /* INTERFACE */ /* ** Maximum length of a line in a text file. (8192) */ #define LENGTH_MASK_SZ 13 #define LENGTH_MASK ((1<LENGTH_MASK ){ return 0; } j = 0; } } if( j>LENGTH_MASK ){ return 0; } a = fossil_malloc( nLine*sizeof(a[0]) ); memset(a, 0, nLine*sizeof(a[0]) ); if( n==0 ){ *pnLine = 0; return a; } /* Fill in the array */ for(i=0; i0 && fossil_isspace(z[k-1]) ){ k--; } for(h=0, x=0; xh==pB->h && memcmp(pA->z,pB->z,pA->h & LENGTH_MASK)==0; } /* ** Append a single line of "diff" output to pOut. */ static void appendDiffLine(Blob *pOut, char *zPrefix, DLine *pLine){ blob_append(pOut, zPrefix, 1); blob_append(pOut, pLine->z, pLine->h & LENGTH_MASK); blob_append(pOut, "\n", 1); } /* ** Expand the size of aEdit[] array to hold nEdit elements. */ static void expandEdit(DContext *p, int nEdit){ p->aEdit = fossil_realloc(p->aEdit, nEdit*sizeof(int)); p->nEditAlloc = nEdit; } /* ** Append a new COPY/DELETE/INSERT triple. */ static void appendTriple(DContext *p, int nCopy, int nDel, int nIns){ /* printf("APPEND %d/%d/%d\n", nCopy, nDel, nIns); */ if( p->nEdit>=3 ){ if( p->aEdit[p->nEdit-1]==0 ){ if( p->aEdit[p->nEdit-2]==0 ){ p->aEdit[p->nEdit-3] += nCopy; p->aEdit[p->nEdit-2] += nDel; p->aEdit[p->nEdit-1] += nIns; return; } if( nCopy==0 ){ p->aEdit[p->nEdit-2] += nDel; p->aEdit[p->nEdit-1] += nIns; return; } } if( nCopy==0 && nDel==0 ){ p->aEdit[p->nEdit-1] += nIns; return; } } if( p->nEdit+3>p->nEditAlloc ){ expandEdit(p, p->nEdit*2 + 15); if( p->aEdit==0 ) return; } p->aEdit[p->nEdit++] = nCopy; p->aEdit[p->nEdit++] = nDel; p->aEdit[p->nEdit++] = nIns; } /* ** Given a diff context in which the aEdit[] array has been filled ** in, compute a context diff into pOut. */ static void contextDiff(DContext *p, Blob *pOut, int nContext){ DLine *A; /* Left side of the diff */ DLine *B; /* Right side of the diff */ int a = 0; /* Index of next line in A[] */ int b = 0; /* Index of next line in B[] */ int *R; /* Array of COPY/DELETE/INSERT triples */ int r; /* Index into R[] */ int nr; /* Number of COPY/DELETE/INSERT triples to process */ int mxr; /* Maximum value for r */ int na, nb; /* Number of lines shown from A and B */ int i, j; /* Loop counters */ int m; /* Number of lines to output */ int skip; /* Number of lines to skip */ A = p->aFrom; B = p->aTo; R = p->aEdit; mxr = p->nEdit; while( mxr>2 && R[mxr-1]==0 && R[mxr-2]==0 ){ mxr -= 3; } for(r=0; r0 && R[r+nr*3]nContext ){ na = nb = nContext; skip = R[r] - nContext; }else{ na = nb = R[r]; skip = 0; } for(i=0; inContext ){ na += nContext; nb += nContext; }else{ na += R[r+nr*3]; nb += R[r+nr*3]; } for(i=1; inContext ) m = nContext; for(j=0; j100 ){ blob_append(pOut, z100, 100); n -= 100; } if( n>0 ){ blob_append(pOut, z100, n); } } /* ** Append text to a sbs diff output */ static void appendSbsLine(Blob *pOut, DLine *pLine, int width, int pad){ int sz = pLine->h & LENGTH_MASK; if( szz, sz); if( pad ) appendSpace(pOut, width-sz); }else{ blob_append(pOut, pLine->z, width); } } /* ** Given a diff context in which the aEdit[] array has been filled ** in, compute a side-by-side diff into pOut. */ static void sbsDiff(DContext *p, Blob *pOut, int nContext, int width){ DLine *A; /* Left side of the diff */ DLine *B; /* Right side of the diff */ int a = 0; /* Index of next line in A[] */ int b = 0; /* Index of next line in B[] */ int *R; /* Array of COPY/DELETE/INSERT triples */ int r; /* Index into R[] */ int nr; /* Number of COPY/DELETE/INSERT triples to process */ int mxr; /* Maximum value for r */ int na, nb; /* Number of lines shown from A and B */ int i, j; /* Loop counters */ int m, ma, mb;/* Number of lines to output */ int skip; /* Number of lines to skip */ char zFormat[50]; /* Output format */ sqlite3_snprintf(sizeof(zFormat), zFormat, "%%4d %%%d.%ds %%c %%4d %%.%ds\n", width, width, width); A = p->aFrom; B = p->aTo; R = p->aEdit; mxr = p->nEdit; while( mxr>2 && R[mxr-1]==0 && R[mxr-2]==0 ){ mxr -= 3; } for(r=0; r0 && R[r+nr*3]nContext ){ na = nb = nContext; skip = R[r] - nContext; }else{ na = nb = R[r]; skip = 0; } for(i=0; inContext ){ na += nContext; nb += nContext; }else{ na += R[r+nr*3]; nb += R[r+nr*3]; } for(i=1; i %6d ", b+j); appendSbsLine(pOut, &B[b+j], width, 0); blob_append(pOut, "\n", 1); } b += mb; if( inContext ) m = nContext; for(j=0; jaFrom[] */ int iS2, int iE2, /* Range of lines in p->aTo[] */ int *piSX, int *piEX, /* Write p->aFrom[] common segment here */ int *piSY, int *piEY /* Write p->aTo[] common segment here */ ){ int mxLength = 0; /* Length of longest common subsequence */ int i, j; /* Loop counters */ int k; /* Length of a candidate subsequence */ int iSXb = iS1; /* Best match so far */ int iSYb = iS2; /* Best match so far */ for(i=iS1; iaFrom[i], &p->aTo[j]) ) continue; if( mxLength && !same_dline(&p->aFrom[i+mxLength], &p->aTo[j+mxLength]) ){ continue; } k = 1; while( i+kaFrom[i+k],&p->aTo[j+k]) ){ k++; } if( k>mxLength ){ iSXb = i; iSYb = j; mxLength = k; } } } *piSX = iSXb; *piEX = iSXb + mxLength; *piSY = iSYb; *piEY = iSYb + mxLength; } /* ** Compare two blocks of text on lines iS1 through iE1-1 of the aFrom[] ** file and lines iS2 through iE2-1 of the aTo[] file. Locate a sequence ** of lines in these two blocks that are exactly the same. Return ** the bounds of the matching sequence. ** ** If there are two or more possible answers of the same length, the ** returned sequence should be the one closest to the center of the ** input range. ** ** Ideally, the common sequence should be the longest possible common ** sequence. However, an exact computation of LCS is O(N*N) which is ** way too slow for larger files. So this routine uses an O(N) ** heuristic approximation based on hashing that usually works about ** as well. But if the O(N) algorithm doesn't get a good solution ** and N is not too large, we fall back to an exact solution by ** calling optimalLCS(). */ static void longestCommonSequence( DContext *p, /* Two files being compared */ int iS1, int iE1, /* Range of lines in p->aFrom[] */ int iS2, int iE2, /* Range of lines in p->aTo[] */ int *piSX, int *piEX, /* Write p->aFrom[] common segment here */ int *piSY, int *piEY /* Write p->aTo[] common segment here */ ){ double bestScore = -1e30; /* Best score seen so far */ int i, j; /* Loop counters */ int iSX, iSY, iEX, iEY; /* Current match */ double score; /* Current score */ int skew; /* How lopsided is the match */ int dist; /* Distance of match from center */ int mid; /* Center of the span */ int iSXb, iSYb, iEXb, iEYb; /* Best match so far */ int iSXp, iSYp, iEXp, iEYp; /* Previous match */ iSXb = iSXp = iS1; iEXb = iEXp = iS1; iSYb = iSYp = iS2; iEYb = iEYp = iS2; mid = (iE1 + iS1)/2; for(i=iS1; iaTo[p->aFrom[i].h % p->nTo].iHash; while( j>0 && (j-1=iE2 || !same_dline(&p->aFrom[i], &p->aTo[j-1])) ){ if( limit++ > 10 ){ j = 0; break; } j = p->aTo[j-1].iNext; } if( j==0 ) continue; assert( i>=iSXb && i>=iSXp ); if( i=iSYb && j=iSYp && jiS1 && iSY>iS2 && same_dline(&p->aFrom[iSX-1],&p->aTo[iSY-1]) ){ iSX--; iSY--; } iEX = i+1; iEY = j; while( iEXaFrom[iEX],&p->aTo[iEY]) ){ iEX++; iEY++; } skew = (iSX-iS1) - (iSY-iS2); if( skew<0 ) skew = -skew; dist = (iSX+iEX)/2 - mid; if( dist<0 ) dist = -dist; score = (iEX - iSX) - 0.05*skew - 0.05*dist; if( score>bestScore ){ bestScore = score; iSXb = iSX; iSYb = iSY; iEXb = iEX; iEYb = iEY; }else{ iSXp = iSX; iSYp = iSY; iEXp = iEX; iEYp = iEY; } } if( iSXb==iEXb && (iE1-iS1)*(iE2-iS2)<400 ){ /* If no common sequence is found using the hashing heuristic and ** the input is not too big, use the expensive exact solution */ optimalLCS(p, iS1, iE1, iS2, iE2, piSX, piEX, piSY, piEY); }else{ *piSX = iSXb; *piSY = iSYb; *piEX = iEXb; *piEY = iEYb; } /* printf("LCS(%d..%d/%d..%d) = %d..%d/%d..%d\n", iS1, iE1, iS2, iE2, *piSX, *piEX, *piSY, *piEY); */ } /* ** Do a single step in the difference. Compute a sequence of ** copy/delete/insert steps that will convert lines iS1 through iE1-1 of ** the input into lines iS2 through iE2-1 of the output and write ** that sequence into the difference context. ** ** The algorithm is to find a block of common text near the middle ** of the two segments being diffed. Then recursively compute ** differences on the blocks before and after that common segment. ** Special cases apply if either input segment is empty or if the ** two segments have no text in common. */ static void diff_step(DContext *p, int iS1, int iE1, int iS2, int iE2){ int iSX, iEX, iSY, iEY; if( iE1<=iS1 ){ /* The first segment is empty */ if( iE2>iS2 ){ appendTriple(p, 0, 0, iE2-iS2); } return; } if( iE2<=iS2 ){ /* The second segment is empty */ appendTriple(p, 0, iE1-iS1, 0); return; } /* Find the longest matching segment between the two sequences */ longestCommonSequence(p, iS1, iE1, iS2, iE2, &iSX, &iEX, &iSY, &iEY); if( iEX>iSX ){ /* A common segement has been found. ** Recursively diff either side of the matching segment */ diff_step(p, iS1, iSX, iS2, iSY); if( iEX>iSX ){ appendTriple(p, iEX - iSX, 0, 0); } diff_step(p, iEX, iE1, iEY, iE2); }else{ /* The two segments have nothing in common. Delete the first then ** insert the second. */ appendTriple(p, 0, iE1-iS1, iE2-iS2); } } /* ** Compute the differences between two files already loaded into ** the DContext structure. ** ** A divide and conquer technique is used. We look for a large ** block of common text that is in the middle of both files. Then ** compute the difference on those parts of the file before and ** after the common block. This technique is fast, but it does ** not necessarily generate the minimum difference set. On the ** other hand, we do not need a minimum difference set, only one ** that makes sense to human readers, which this algorithm does. ** ** Any common text at the beginning and end of the two files is ** removed before starting the divide-and-conquer algorithm. */ static void diff_all(DContext *p){ int mnE, iS, iE1, iE2; /* Carve off the common header and footer */ iE1 = p->nFrom; iE2 = p->nTo; while( iE1>0 && iE2>0 && same_dline(&p->aFrom[iE1-1], &p->aTo[iE2-1]) ){ iE1--; iE2--; } mnE = iE1aFrom[iS],&p->aTo[iS]); iS++){} /* do the difference */ if( iS>0 ){ appendTriple(p, iS, 0, 0); } diff_step(p, iS, iE1, iS, iE2); if( iE1nFrom ){ appendTriple(p, p->nFrom - iE1, 0, 0); } /* Terminate the COPY/DELETE/INSERT triples with three zeros */ expandEdit(p, p->nEdit+3); if( p->aEdit ){ p->aEdit[p->nEdit++] = 0; p->aEdit[p->nEdit++] = 0; p->aEdit[p->nEdit++] = 0; } } /* ** Generate a report of the differences between files pA and pB. ** If pOut is not NULL then a unified diff is appended there. It ** is assumed that pOut has already been initialized. If pOut is ** NULL, then a pointer to an array of integers is returned. ** The integers come in triples. For each triple, ** the elements are the number of lines copied, the number of ** lines deleted, and the number of lines inserted. The vector ** is terminated by a triple of all zeros. ** ** This diff utility does not work on binary files. If a binary ** file is encountered, 0 is returned and pOut is written with ** text "cannot compute difference between binary files". */ int *text_diff( Blob *pA_Blob, /* FROM file */ Blob *pB_Blob, /* TO file */ Blob *pOut, /* Write diff here if not NULL */ int diffFlags /* DIFF_* flags defined above */ ){ int ignoreEolWs; /* Ignore whitespace at the end of lines */ int nContext; /* Amount of context to display */ DContext c; nContext = diffFlags & DIFF_CONTEXT_MASK; if( nContext==0 ) nContext = 5; ignoreEolWs = (diffFlags & DIFF_IGNORE_EOLWS)!=0; /* Prepare the input files */ memset(&c, 0, sizeof(c)); c.aFrom = break_into_lines(blob_str(pA_Blob), blob_size(pA_Blob), &c.nFrom, ignoreEolWs); c.aTo = break_into_lines(blob_str(pB_Blob), blob_size(pB_Blob), &c.nTo, ignoreEolWs); if( c.aFrom==0 || c.aTo==0 ){ free(c.aFrom); free(c.aTo); if( pOut ){ blob_appendf(pOut, "cannot compute difference between binary files\n"); } return 0; } /* Compute the difference */ diff_all(&c); if( pOut ){ /* Compute a context or side-by-side diff into pOut */ if( diffFlags & DIFF_SIDEBYSIDE ){ int width = (diffFlags & DIFF_WIDTH_MASK)/(DIFF_CONTEXT_MASK+1); if( width==0 ) width = 80; sbsDiff(&c, pOut, nContext, width); }else{ contextDiff(&c, pOut, nContext); } free(c.aFrom); free(c.aTo); free(c.aEdit); return 0; }else{ /* If a context diff is not requested, then return the ** array of COPY/DELETE/INSERT triples. */ free(c.aFrom); free(c.aTo); return c.aEdit; } } /* ** Copy a line with a limit. Used for side-by-side diffs to enforce a maximum ** line length limit. */ static char *copylimline(char *out, DLine *dl, int lim){ int len; len = dl->h & LENGTH_MASK; if( lim && len > lim ){ memcpy(out, dl->z, lim-3); memcpy(&out[lim-3], "...", 4); }else{ memcpy(out, dl->z, len); out[len] = '\0'; } return out; } /* ** Output table body of a side-by-side diff. Prior to the call, the caller ** should have output: ** ** ** ** And after the call, it should output: **
Old title ** New title
** ** Some good reference diffs in the fossil repository for testing: ** /vdiff?from=080d27a&to=4b0f813&detail=1 ** /vdiff?from=636804745b&to=c1d78e0556&detail=1 ** /vdiff?from=c0b6c28d29&to=25169506b7&detail=1 ** /vdiff?from=e3d022dffa&to=48bcfbd47b&detail=1 */ int html_sbsdiff( Blob *pA_Blob, /* FROM file */ Blob *pB_Blob, /* TO file */ int nContext, /* Amount of context to unified diff */ int ignoreEolWs /* Ignore whitespace at the end of lines */ ){ DContext c; int i; int iFrom, iTo; char *linebuf; int collim=0; /* Currently not settable; allows a column limit for diffs */ int allowExp=0; /* Currently not settable; (dis)allow expansion of rows */ /* Prepare the input files */ memset(&c, 0, sizeof(c)); c.aFrom = break_into_lines(blob_str(pA_Blob), blob_size(pA_Blob), &c.nFrom, ignoreEolWs); c.aTo = break_into_lines(blob_str(pB_Blob), blob_size(pB_Blob), &c.nTo, ignoreEolWs); if( c.aFrom==0 || c.aTo==0 ){ free(c.aFrom); free(c.aTo); /* Note: This would be generated within a table. */ @

cannot compute @ difference between binary files

return 0; } collim = collim < 4 ? 0 : collim; /* Compute the difference */ diff_all(&c); linebuf = fossil_malloc(LENGTH_MASK+1); if( !linebuf ){ free(c.aFrom); free(c.aTo); free(c.aEdit); return 0; } iFrom=iTo=0; i=0; while( i