/* ** Copyright (c) 2007 D. Richard Hipp ** ** This program is free software; you can redistribute it and/or ** modify it under the terms of the Simplified BSD License (also ** known as the "2-Clause License" or "FreeBSD License".) ** ** This program is distributed in the hope that it will be useful, ** but without any warranty; without even the implied warranty of ** merchantability or fitness for a particular purpose. ** ** Author contact information: ** drh@hwaci.com ** http://www.hwaci.com/drh/ ** ******************************************************************************* ** ** This file contains code used to compute a "diff" between two ** text files. */ #include "config.h" #include "diff.h" #include #if INTERFACE /* ** Flag parameters to the text_diff() routine used to control the formatting ** of the diff output. */ #define DIFF_IGNORE_EOLWS 0x00000001 /* Ignore end-of-line whitespace */ #define DIFF_IGNORE_ALLWS 0x00000003 /* Ignore all whitespace */ #define DIFF_SIDEBYSIDE 0x00000004 /* Generate a side-by-side diff */ #define DIFF_VERBOSE 0x00000008 /* Missing shown as empty files */ #define DIFF_BRIEF 0x00000010 /* Show filenames only */ #define DIFF_HTML 0x00000020 /* Render for HTML */ #define DIFF_LINENO 0x00000040 /* Show line numbers */ #define DIFF_NUMSTAT 0x00000080 /* Show line count of changes */ #define DIFF_NOOPT 0x00000100 /* Suppress optimizations (debug) */ #define DIFF_INVERT 0x00000200 /* Invert the diff (debug) */ #define DIFF_CONTEXT_EX 0x00000400 /* Use context even if zero */ #define DIFF_NOTTOOBIG 0x00000800 /* Only display if not too big */ #define DIFF_STRIP_EOLCR 0x00001000 /* Strip trailing CR */ #define DIFF_SLOW_SBS 0x00002000 /* Better but slower side-by-side */ #define DIFF_WEBPAGE 0x00004000 /* Complete webpage */ #define DIFF_BROWSER 0x00008000 /* The --browser option */ #define DIFF_JSON 0x00010000 /* JSON output */ #define DIFF_DEBUG 0x00020000 /* Debugging diff output */ #define DIFF_RAW 0x00040000 /* Raw triples - for debugging */ #define DIFF_TCL 0x00080000 /* For the --tk option */ #define DIFF_INCBINARY 0x00100000 /* The --diff-binary option */ #define DIFF_SHOW_VERS 0x00200000 /* Show compared versions */ #define DIFF_DARKMODE 0x00400000 /* Use dark mode for HTML */ /* ** These error messages are shared in multiple locations. They are defined ** here for consistency. */ #define DIFF_CANNOT_COMPUTE_BINARY \ "cannot compute difference between binary files\n" #define DIFF_CANNOT_COMPUTE_SYMLINK \ "cannot compute difference between symlink and regular file\n" #define DIFF_TOO_MANY_CHANGES \ "more than 10,000 changes\n" #define DIFF_WHITESPACE_ONLY \ "whitespace changes only\n" /* ** Maximum length of a line in a text file, in bytes. (2**15 = 32768 bytes) */ #define LENGTH_MASK_SZ 15 #define LENGTH_MASK ((1<diffFlags = diffFlags; return pCfg; } /* ** Information about each line of a file being diffed. ** ** The lower LENGTH_MASK_SZ bits of the hash (DLine.h) are the length ** of the line. If any line is longer than LENGTH_MASK characters, ** the file is considered binary. */ typedef struct DLine DLine; struct DLine { const char *z; /* The text of the line */ u64 h; /* Hash of the line */ unsigned short indent; /* Index of first non-space */ unsigned short n; /* number of bytes */ unsigned short nw; /* number of bytes without leading/trailing space */ unsigned int iNext; /* 1+(Index of next line with same the same hash) */ /* an array of DLine elements serves two purposes. The fields ** above are one per line of input text. But each entry is also ** a bucket in a hash table, as follows: */ unsigned int iHash; /* 1+(first entry in the hash chain) */ }; /* ** Length of a dline */ #define LENGTH(X) ((X)->n) /* ** Number of diff chunks generated */ static int nChunk = 0; /* ** A context for running a raw diff. ** ** The aEdit[] array describes the raw diff. Each triple of integers in ** aEdit[] means: ** ** (1) COPY: Number of lines aFrom and aTo have in common ** (2) DELETE: Number of lines found only in aFrom ** (3) INSERT: Number of lines found only in aTo ** ** The triples repeat until all lines of both aFrom and aTo are accounted ** for. */ typedef struct DContext DContext; struct DContext { int *aEdit; /* Array of copy/delete/insert triples */ int nEdit; /* Number of integers (3x num of triples) in aEdit[] */ int nEditAlloc; /* Space allocated for aEdit[] */ DLine *aFrom; /* File on left side of the diff */ int nFrom; /* Number of lines in aFrom[] */ DLine *aTo; /* File on right side of the diff */ int nTo; /* Number of lines in aTo[] */ int (*xDiffer)(const DLine *,const DLine *); /* comparison function */ }; /* Fast isspace for use by diff */ static const char diffIsSpace[] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; #define diff_isspace(X) (diffIsSpace[(unsigned char)(X)]) /* ** Count the number of lines in the input string. Include the last line ** in the count even if it lacks the \n terminator. If an empty string ** is specified, the number of lines is zero. For the purposes of this ** function, a string is considered empty if it contains no characters ** -OR- it contains only NUL characters. ** ** Returns true if the input seems to be plain text input, else false. ** If it returns false, pnLine is not modified, else it is set to the ** number of lines in z. */ int count_lines( const char *z, int n, int *pnLine ){ int nLine; const char *zNL, *z2; for(nLine=0, z2=z; (zNL = strchr(z2,'\n'))!=0; z2=zNL+1, nLine++){} if( z2[0]!='\0' ){ nLine++; do{ z2++; }while( z2[0]!='\0' ); } if( n!=(int)(z2-z) ) return 0; if( pnLine ) *pnLine = nLine; return 1; } /* ** Return an array of DLine objects containing a pointer to the ** start of each line and a hash of that line. The lower ** bits of the hash store the length of each line. ** ** Trailing whitespace is removed from each line. 2010-08-20: Not any ** more. If trailing whitespace is ignored, the "patch" command gets ** confused by the diff output. Ticket [a9f7b23c2e376af5b0e5b] ** ** Return 0 if the file is binary or contains a line that is ** too long. ** ** Profiling show that in most cases this routine consumes the bulk of ** the CPU time on a diff. */ static DLine *break_into_lines( const char *z, int n, int *pnLine, u64 diffFlags ){ int nLine, i, k, nn, s, x; u64 h, h2; DLine *a; const char *zNL; if( count_lines(z, n, &nLine)==0 ){ return 0; } assert( nLine>0 || z[0]=='\0' ); a = fossil_malloc( sizeof(a[0])*nLine ); memset(a, 0, sizeof(a[0])*nLine); if( nLine==0 ){ *pnLine = 0; return a; } i = 0; do{ zNL = strchr(z,'\n'); if( zNL==0 ) zNL = z+n; nn = (int)(zNL - z); if( nn>LENGTH_MASK ){ fossil_free(a); return 0; } a[i].z = z; k = nn; if( diffFlags & DIFF_STRIP_EOLCR ){ if( k>0 && z[k-1]=='\r' ){ k--; } } a[i].n = k; if( diffFlags & DIFF_IGNORE_EOLWS ){ while( k>0 && diff_isspace(z[k-1]) ){ k--; } } if( (diffFlags & DIFF_IGNORE_ALLWS)==DIFF_IGNORE_ALLWS ){ int numws = 0; for(s=0; sh!=pB->h ) return 1; return memcmp(pA->z,pB->z, pA->h&LENGTH_MASK); } /* ** Return zero if two DLine elements are identical, ignoring ** all whitespace. The indent field of pA/pB already points ** to the first non-space character in the string. */ static int compare_dline_ignore_allws(const DLine *pA, const DLine *pB){ if( pA->h==pB->h ){ int a, b; if( memcmp(pA->z, pB->z, pA->h&LENGTH_MASK)==0 ) return 0; a = pA->indent; b = pB->indent; while( an || bn ){ if( an && bn && pA->z[a++] != pB->z[b++] ) return 1; while( an && diff_isspace(pA->z[a])) ++a; while( bn && diff_isspace(pB->z[b])) ++b; } return pA->n-a != pB->n-b; } return 1; } /* ** Return true if the regular expression *pRe matches any of the ** N dlines */ static int re_dline_match( ReCompiled *pRe, /* The regular expression to be matched */ const DLine *aDLine, /* First of N DLines to compare against */ int N /* Number of DLines to check */ ){ while( N-- ){ if( re_match(pRe, (const unsigned char *)aDLine->z, LENGTH(aDLine)) ){ return 1; } aDLine++; } return 0; } /* ** Append a single line of context-diff output to pOut. */ static void appendDiffLine( Blob *pOut, /* Where to write the line of output */ char cPrefix, /* One of " ", "+", or "-" */ const DLine *pLine /* The line to be output */ ){ blob_append_char(pOut, cPrefix); blob_append(pOut, pLine->z, pLine->n); blob_append_char(pOut, '\n'); } /* ** Add two line numbers to the beginning of an output line for a context ** diff. One or the other of the two numbers might be zero, which means ** to leave that number field blank. */ static void appendDiffLineno(Blob *pOut, int lnA, int lnB){ if( lnA>0 ){ blob_appendf(pOut, "%6d ", lnA); }else{ blob_append(pOut, " ", 7); } if( lnB>0 ){ blob_appendf(pOut, "%6d ", lnB); }else{ blob_append(pOut, " ", 8); } } /* ** Output a patch-style text diff. */ static void contextDiff( DContext *p, /* The difference */ Blob *pOut, /* Output a context diff to here */ DiffConfig *pCfg /* Configuration options */ ){ const DLine *A; /* Left side of the diff */ const DLine *B; /* Right side of the diff */ int a = 0; /* Index of next line in A[] */ int b = 0; /* Index of next line in B[] */ int *R; /* Array of COPY/DELETE/INSERT triples */ int r; /* Index into R[] */ int nr; /* Number of COPY/DELETE/INSERT triples to process */ int mxr; /* Maximum value for r */ int na, nb; /* Number of lines shown from A and B */ int i, j; /* Loop counters */ int m; /* Number of lines to output */ int skip; /* Number of lines to skip */ int nContext; /* Number of lines of context */ int showLn; /* Show line numbers */ int showDivider = 0; /* True to show the divider between diff blocks */ nContext = diff_context_lines(pCfg); showLn = (pCfg->diffFlags & DIFF_LINENO)!=0; A = p->aFrom; B = p->aTo; R = p->aEdit; mxr = p->nEdit; while( mxr>2 && R[mxr-1]==0 && R[mxr-2]==0 ){ mxr -= 3; } for(r=0; r0 && R[r+nr*3]nContext ){ na = nb = nContext; skip = R[r] - nContext; }else{ na = nb = R[r]; skip = 0; } for(i=0; inContext ){ na += nContext; nb += nContext; }else{ na += R[r+nr*3]; nb += R[r+nr*3]; } for(i=1; inContext ) m = nContext; for(j=0; jlenBest ){ lenBest = k; aLCS[0] = i; aLCS[1] = i+k; aLCS[2] = j; aLCS[3] = j+k; } } } } return lenBest>0; } /* ** Find the smallest spans that are different between two text strings that ** are known to be different on both ends. */ static int textLineChanges( const char *zLeft, int nA, /* String on the left */ const char *zRight, int nB, /* String on the right */ LineChange *p /* Write results here */ ){ p->n = 1; p->a[0].iStart1 = 0; p->a[0].iLen1 = nA; p->a[0].iStart2 = 0; p->a[0].iLen2 = nB; p->a[0].isMin = 0; while( p->nn; i++){ if( p->a[i].isMin ) continue; x = p->a[i].iLen1; if( p->a[i].iLen2a[i].iLen2; if( x>mxLen ){ mxLen = x; mxi = i; } } if( mxLen<6 ) break; x = textLCS(zLeft + p->a[mxi].iStart1, p->a[mxi].iLen1, zRight + p->a[mxi].iStart2, p->a[mxi].iLen2, aLCS); if( x==0 ){ p->a[mxi].isMin = 1; continue; } a = p->a+mxi; b = a+1; if( mxin-1 ){ memmove(b+1, b, sizeof(*b)*(p->n-mxi-1)); } p->n++; b->iStart1 = a->iStart1 + aLCS[1]; b->iLen1 = a->iLen1 - aLCS[1]; a->iLen1 = aLCS[0]; b->iStart2 = a->iStart2 + aLCS[3]; b->iLen2 = a->iLen2 - aLCS[3]; a->iLen2 = aLCS[2]; b->isMin = 0; } return p->n; } /* ** Return true if the string starts with n spaces */ static int allSpaces(const char *z, int n){ int i; for(i=0; in<1 ) return; /* (1) Attempt to move indentation changes to the left margin */ if( p->a[0].iLen1==0 && (len = p->a[0].iLen2)>0 && (j = p->a[0].iStart2)>0 && zB[0]==zB[j] && allSpaces(zB, j) ){ for(n=1; na[1], &p->a[0], sizeof(p->a[0])*p->n); p->n++; p->a[0] = p->a[1]; p->a[1].iStart2 += n; p->a[1].iLen2 -= n; p->a[0].iLen2 = n; } p->a[0].iStart1 = 0; p->a[0].iStart2 = 0; }else if( p->a[0].iLen2==0 && (len = p->a[0].iLen1)>0 && (j = p->a[0].iStart1)>0 && zA[0]==zA[j] && allSpaces(zA, j) ){ for(n=1; na[1], &p->a[0], sizeof(p->a[0])*p->n); p->n++; p->a[0] = p->a[1]; p->a[1].iStart1 += n; p->a[1].iLen1 -= n; p->a[0].iLen1 = n; } p->a[0].iStart1 = 0; p->a[0].iStart2 = 0; } /* (2) Try to shift changes so that they begin or end with a ** space. (TBD) */ } /* ** Given two lines of text, pFrom and pTo, compute a set of changes ** between those two lines, for enhanced display purposes. ** ** The result is written into the LineChange object given by the ** third parameter. */ static void oneLineChange( const DLine *pLeft, /* Left line of the change */ const DLine *pRight, /* Right line of the change */ LineChange *p /* OUTPUT: Write the results here */ ){ int nLeft; /* Length of left line in bytes */ int nRight; /* Length of right line in bytes */ int nShort; /* Shortest of left and right */ int nPrefix; /* Length of common prefix */ int nSuffix; /* Length of common suffix */ int nCommon; /* Total byte length of suffix and prefix */ const char *zLeft; /* Text of the left line */ const char *zRight; /* Text of the right line */ int nLeftDiff; /* nLeft - nPrefix - nSuffix */ int nRightDiff; /* nRight - nPrefix - nSuffix */ nLeft = pLeft->n; zLeft = pLeft->z; nRight = pRight->n; zRight = pRight->z; nShort = nLeft0 && (zLeft[nPrefix]&0xc0)==0x80 ) nPrefix--; } nSuffix = 0; if( nPrefix0 && (zLeft[nLeft-nSuffix]&0xc0)==0x80 ) nSuffix--; } if( nSuffix==nLeft || nSuffix==nRight ) nPrefix = 0; } nCommon = nPrefix + nSuffix; /* If the prefix and suffix overlap, that means that we are dealing with ** a pure insertion or deletion of text that can have multiple alignments. ** Try to find an alignment to begins and ends on whitespace, or on ** punctuation, rather than in the middle of a name or number. */ if( nCommon > nShort ){ int iBest = -1; int iBestVal = -1; int i; int nLong = nLeftiBestVal ){ iBestVal = iVal; iBest = i; } } nPrefix = iBest; nSuffix = nShort - nPrefix; nCommon = nPrefix + nSuffix; } /* A single chunk of text inserted */ if( nCommon==nLeft ){ p->n = 1; p->a[0].iStart1 = nPrefix; p->a[0].iLen1 = 0; p->a[0].iStart2 = nPrefix; p->a[0].iLen2 = nRight - nCommon; improveReadability(zLeft, zRight, p); return; } /* A single chunk of text deleted */ if( nCommon==nRight ){ p->n = 1; p->a[0].iStart1 = nPrefix; p->a[0].iLen1 = nLeft - nCommon; p->a[0].iStart2 = nPrefix; p->a[0].iLen2 = 0; improveReadability(zLeft, zRight, p); return; } /* At this point we know that there is a chunk of text that has ** changed between the left and the right. Check to see if there ** is a large unchanged section in the middle of that changed block. */ nLeftDiff = nLeft - nCommon; nRightDiff = nRight - nCommon; if( nLeftDiff >= 4 && nRightDiff >= 4 && textLineChanges(&zLeft[nPrefix], nLeftDiff, &zRight[nPrefix], nRightDiff, p)>1 ){ int i; for(i=0; in; i++){ p->a[i].iStart1 += nPrefix; p->a[i].iStart2 += nPrefix; } improveReadability(zLeft, zRight, p); return; } /* If all else fails, show a single big change between left and right */ p->n = 1; p->a[0].iStart1 = nPrefix; p->a[0].iLen1 = nLeft - nCommon; p->a[0].iStart2 = nPrefix; p->a[0].iLen2 = nRight - nCommon; improveReadability(zLeft, zRight, p); } /* ** COMMAND: test-line-diff ** Usage: %fossil% test-line-diff STRING1 STRING2 ** ** Show the differences between the two strings. Used for testing ** the oneLineChange() routine in the diff logic. */ void test_line_diff(void){ DLine a, b; LineChange chng; int i, j, x; if( g.argc!=4 ) usage("STRING1 STRING2"); a.z = g.argv[2]; a.n = (int)strlen(a.z); b.z = g.argv[3]; b.n = (int)strlen(b.z); oneLineChange(&a, &b, &chng); fossil_print("left: [%s]\n", a.z); for(i=x=0; i x ){ if( (a.z[x]&0xc0)!=0x80 ) fossil_print(" "); x++; } for(j=0; j x ){ if( (b.z[x]&0xc0)!=0x80 ) fossil_print(" "); x++; } for(j=0; jpOut, "SKIP %d (%d..%d left and %d..%d right)%s\n", n, p->lnLeft+1, p->lnLeft+n, p->lnRight+1, p->lnRight+n, isFinal ? " FINAL" : ""); p->lnLeft += n; p->lnRight += n; } static void dfdebugCommon(DiffBuilder *p, const DLine *pLine){ p->lnLeft++; p->lnRight++; blob_appendf(p->pOut, "COMMON %8u %8u %.*s\n", p->lnLeft, p->lnRight, (int)pLine->n, pLine->z); } static void dfdebugInsert(DiffBuilder *p, const DLine *pLine){ p->lnRight++; blob_appendf(p->pOut, "INSERT %8d %.*s\n", p->lnRight, (int)pLine->n, pLine->z); } static void dfdebugDelete(DiffBuilder *p, const DLine *pLine){ p->lnLeft++; blob_appendf(p->pOut, "DELETE %8u %.*s\n", p->lnLeft, (int)pLine->n, pLine->z); } static void dfdebugReplace(DiffBuilder *p, const DLine *pX, const DLine *pY){ p->lnLeft++; p->lnRight++; blob_appendf(p->pOut, "REPLACE %8u %.*s\n", p->lnLeft, (int)pX->n, pX->z); blob_appendf(p->pOut, " %8u %.*s\n", p->lnRight, (int)pY->n, pY->z); } static void dfdebugEdit(DiffBuilder *p, const DLine *pX, const DLine *pY){ int i, j; int x; LineChange chng; p->lnLeft++; p->lnRight++; blob_appendf(p->pOut, "EDIT %8u %.*s\n", p->lnLeft, (int)pX->n, pX->z); oneLineChange(pX, pY, &chng); for(i=x=0; ipOut, "%*s", 26, ""); } while( ofst > x ){ if( (pX->z[x]&0xc0)!=0x80 ) blob_append_char(p->pOut, ' '); x++; } for(j=0; jz[x]&0xc0)!=0x80 ) blob_append_char(p->pOut, c); } } } if( x ) blob_append_char(p->pOut, '\n'); blob_appendf(p->pOut, " %8u %.*s\n", p->lnRight, (int)pY->n, pY->z); for(i=x=0; ipOut, "%*s", 26, ""); } while( ofst > x ){ if( (pY->z[x]&0xc0)!=0x80 ) blob_append_char(p->pOut, ' '); x++; } for(j=0; jz[x]&0xc0)!=0x80 ) blob_append_char(p->pOut, c); } } } if( x ) blob_append_char(p->pOut, '\n'); } static void dfdebugEnd(DiffBuilder *p){ blob_appendf(p->pOut, "END with %u lines left and %u lines right\n", p->lnLeft, p->lnRight); fossil_free(p); } static DiffBuilder *dfdebugNew(Blob *pOut){ DiffBuilder *p = fossil_malloc(sizeof(*p)); p->xSkip = dfdebugSkip; p->xCommon = dfdebugCommon; p->xInsert = dfdebugInsert; p->xDelete = dfdebugDelete; p->xReplace = dfdebugReplace; p->xEdit = dfdebugEdit; p->xEnd = dfdebugEnd; p->lnLeft = p->lnRight = 0; p->pOut = pOut; return p; } /************************* DiffBuilderTcl ********************************/ /* ** This formatter outputs a description of the diff formatted as TCL, for ** use by the --tk option to "diff". See also the "diff.tcl" file. The ** output can be viewed directly using the --tcl option. ** ** There is one line per method call: ** ** SKIP n -- Skip "n" lines of input ** COM string -- "string" is an unchanged context line ** INS string -- "string" is in the right file only ** DEL string -- "string" is in the left file only ** EDIT string .... -- Complex edit between left and right ** ** The EDIT verb will be followed by 3*N or 3*N+1 strings. The triples ** each show: ** ** 1. Common text ** 2. Text from the left side ** 3. Text on the right that replaces (2) from the left ** ** For inserted text (2) will be an empty string. For deleted text, (3) ** will be an empty string. (1) might be empty for the first triple if ** the line begins with an edit. After all triples, there might be one ** additional string which is a common suffix. */ static void dftclSkip(DiffBuilder *p, unsigned int n, int isFinal){ blob_appendf(p->pOut, "SKIP %u\n", n); } static void dftclCommon(DiffBuilder *p, const DLine *pLine){ blob_appendf(p->pOut, "COM "); blob_append_tcl_literal(p->pOut, pLine->z, pLine->n); blob_append_char(p->pOut, '\n'); } static void dftclInsert(DiffBuilder *p, const DLine *pLine){ blob_append(p->pOut, "INS ", -1); blob_append_tcl_literal(p->pOut, pLine->z, pLine->n); blob_append_char(p->pOut, '\n'); } static void dftclDelete(DiffBuilder *p, const DLine *pLine){ blob_append(p->pOut, "DEL ", -1); blob_append_tcl_literal(p->pOut, pLine->z, pLine->n); blob_append_char(p->pOut, '\n'); } static void dftclReplace(DiffBuilder *p, const DLine *pX, const DLine *pY){ blob_append(p->pOut, "EDIT \"\" ", -1); blob_append_tcl_literal(p->pOut, pX->z, pX->n); blob_append_char(p->pOut, ' '); blob_append_tcl_literal(p->pOut, pY->z, pY->n); blob_append_char(p->pOut, '\n'); } static void dftclEdit(DiffBuilder *p, const DLine *pX, const DLine *pY){ int i, x; LineChange chng; blob_append(p->pOut, "EDIT", 4); oneLineChange(pX, pY, &chng); for(i=x=0; ipOut, ' '); blob_append_tcl_literal(p->pOut, pX->z + x, chng.a[i].iStart1 - x); x = chng.a[i].iStart1; blob_append_char(p->pOut, ' '); blob_append_tcl_literal(p->pOut, pX->z + x, chng.a[i].iLen1); x += chng.a[i].iLen1; blob_append_char(p->pOut, ' '); blob_append_tcl_literal(p->pOut, pY->z + chng.a[i].iStart2, chng.a[i].iLen2); } if( xn ){ blob_append_char(p->pOut, ' '); blob_append_tcl_literal(p->pOut, pX->z + x, pX->n - x); } blob_append_char(p->pOut, '\n'); } static void dftclEnd(DiffBuilder *p){ fossil_free(p); } static DiffBuilder *dftclNew(Blob *pOut){ DiffBuilder *p = fossil_malloc(sizeof(*p)); p->xSkip = dftclSkip; p->xCommon = dftclCommon; p->xInsert = dftclInsert; p->xDelete = dftclDelete; p->xReplace = dftclReplace; p->xEdit = dftclEdit; p->xEnd = dftclEnd; p->pOut = pOut; return p; } /************************* DiffBuilderJson ********************************/ /* ** This formatter generates a JSON array that describes the difference. ** ** The Json array consists of integer opcodes with each opcode followed ** by zero or more arguments: ** ** Syntax Mnemonic Description ** ----------- -------- -------------------------- ** 0 END This is the end of the diff ** 1 INTEGER SKIP Skip N lines from both files ** 2 STRING COMMON The line shown by STRING is in both files ** 3 STRING INSERT The line STRING is in only the right file ** 4 STRING DELETE The STRING line is in only the left file ** 5 SUBARRAY EDIT One line is different on left and right. ** ** The SUBARRAY is an array of 3*N+1 strings with N>=0. The triples ** represent common-text, left-text, and right-text. The last string ** in SUBARRAY is the common-suffix. Any string can be empty if it does ** not apply. */ static void dfjsonSkip(DiffBuilder *p, unsigned int n, int isFinal){ blob_appendf(p->pOut, "1,%u,\n", n); } static void dfjsonCommon(DiffBuilder *p, const DLine *pLine){ blob_append(p->pOut, "2,",2); blob_append_json_literal(p->pOut, pLine->z, (int)pLine->n); blob_append(p->pOut, ",\n",2); } static void dfjsonInsert(DiffBuilder *p, const DLine *pLine){ blob_append(p->pOut, "3,",2); blob_append_json_literal(p->pOut, pLine->z, (int)pLine->n); blob_append(p->pOut, ",\n",2); } static void dfjsonDelete(DiffBuilder *p, const DLine *pLine){ blob_append(p->pOut, "4,",2); blob_append_json_literal(p->pOut, pLine->z, (int)pLine->n); blob_append(p->pOut, ",\n",2); } static void dfjsonReplace(DiffBuilder *p, const DLine *pX, const DLine *pY){ blob_append(p->pOut, "5,[\"\",",-1); blob_append_json_literal(p->pOut, pX->z, (int)pX->n); blob_append(p->pOut, ",",1); blob_append_json_literal(p->pOut, pY->z, (int)pY->n); blob_append(p->pOut, ",\"\"],\n",-1); } static void dfjsonEdit(DiffBuilder *p, const DLine *pX, const DLine *pY){ int i, x; LineChange chng; blob_append(p->pOut, "5,[", 3); oneLineChange(pX, pY, &chng); for(i=x=0; i0){ blob_append_char(p->pOut, ','); } blob_append_json_literal(p->pOut, pX->z + x, chng.a[i].iStart1 - x); x = chng.a[i].iStart1; blob_append_char(p->pOut, ','); blob_append_json_literal(p->pOut, pX->z + x, chng.a[i].iLen1); x += chng.a[i].iLen1; blob_append_char(p->pOut, ','); blob_append_json_literal(p->pOut, pY->z + chng.a[i].iStart2, chng.a[i].iLen2); } blob_append_char(p->pOut, ','); blob_append_json_literal(p->pOut, pX->z + x, pX->n - x); blob_append(p->pOut, "],\n",3); } static void dfjsonEnd(DiffBuilder *p){ blob_append(p->pOut, "0]}", 3); fossil_free(p); } static DiffBuilder *dfjsonNew(Blob *pOut){ DiffBuilder *p = fossil_malloc(sizeof(*p)); p->xSkip = dfjsonSkip; p->xCommon = dfjsonCommon; p->xInsert = dfjsonInsert; p->xDelete = dfjsonDelete; p->xReplace = dfjsonReplace; p->xEdit = dfjsonEdit; p->xEnd = dfjsonEnd; p->lnLeft = p->lnRight = 0; p->pOut = pOut; blob_append_char(pOut, '['); return p; } /************************* DiffBuilderUnified********************************/ /* This formatter generates a unified diff for HTML. ** ** The result is a with four columns. The four columns hold: ** ** 1. The line numbers for the first file. ** 2. The line numbers for the second file. ** 3. The "diff mark": "+" or "-" or just a space ** 4. Text of the line ** ** Inserted lines are marked with and deleted lines are marked ** with . The whole line is marked this way, not just the part that ** changed. The part that change has an additional nested or . ** The CSS needs to be set up such that a single or gives a ** light background and a nested or gives a darker background. ** Additional attributes (like bold font) might also be added to nested ** and since those are the characters that have actually ** changed. ** ** Accumulator strategy: ** ** * Delete line numbers are output directly to p->pOut ** * Insert line numbers accumulate in p->aCol[0]. ** * Separator marks accumulate in p->aCol[1]. ** * Change text accumulates in p->aCol[2]. ** * Pending insert line numbers go into p->aCol[3]. ** * Pending insert text goes into p->aCol[4]. ** ** eState is 1 if text has an open */ static void dfunifiedFinishDelete(DiffBuilder *p){ if( p->eState==0 ) return; blob_append(p->pOut, "", 6); blob_append(&p->aCol[2], "", 6); p->eState = 0; } static void dfunifiedFinishInsert(DiffBuilder *p){ unsigned int i; if( p->nPending==0 ) return; dfunifiedFinishDelete(p); /* Blank lines for delete line numbers for each inserted line */ for(i=0; inPending; i++) blob_append_char(p->pOut, '\n'); /* Insert line numbers */ blob_append(&p->aCol[0], "", 5); blob_append_xfer(&p->aCol[0], &p->aCol[3]); blob_append(&p->aCol[0], "", 6); /* "+" marks for the separator on inserted lines */ for(i=0; inPending; i++) blob_append(&p->aCol[1], "+\n", 2); /* Text of the inserted lines */ blob_append(&p->aCol[2], "", 5); blob_append_xfer(&p->aCol[2], &p->aCol[4]); blob_append(&p->aCol[2], "", 6); p->nPending = 0; } static void dfunifiedFinishRow(DiffBuilder *p){ dfunifiedFinishDelete(p); dfunifiedFinishInsert(p); if( blob_size(&p->aCol[0])==0 ) return; blob_append(p->pOut, "\n", -1); } static void dfunifiedStartRow(DiffBuilder *p){ if( blob_size(&p->aCol[0])>0 ) return; blob_appendf(p->pOut,"" "\n", p->lnLeft+1, p->lnLeft+n, nChunk, p->lnLeft, n); }else{ blob_append(p->pOut, "", 4); } blob_append(p->pOut, "\n", -1); p->lnLeft += n; p->lnRight += n; } static void dfunifiedCommon(DiffBuilder *p, const DLine *pLine){ dfunifiedStartRow(p); dfunifiedFinishDelete(p); dfunifiedFinishInsert(p); p->lnLeft++; p->lnRight++; blob_appendf(p->pOut,"%d\n", p->lnLeft); blob_appendf(&p->aCol[0],"%d\n",p->lnRight); blob_append_char(&p->aCol[1], '\n'); htmlize_to_blob(&p->aCol[2], pLine->z, (int)pLine->n); blob_append_char(&p->aCol[2], '\n'); } static void dfunifiedInsert(DiffBuilder *p, const DLine *pLine){ dfunifiedStartRow(p); p->lnRight++; blob_appendf(&p->aCol[3],"%d\n", p->lnRight); blob_append(&p->aCol[4], "", 5); htmlize_to_blob(&p->aCol[4], pLine->z, (int)pLine->n); blob_append(&p->aCol[4], "\n", 7); p->nPending++; } static void dfunifiedDelete(DiffBuilder *p, const DLine *pLine){ dfunifiedStartRow(p); dfunifiedFinishInsert(p); if( p->eState==0 ){ dfunifiedFinishInsert(p); blob_append(p->pOut, "", 5); blob_append(&p->aCol[2], "", 5); p->eState = 1; } p->lnLeft++; blob_appendf(p->pOut,"%d\n", p->lnLeft); blob_append_char(&p->aCol[0],'\n'); blob_append(&p->aCol[1],"-\n",2); blob_append(&p->aCol[2], "", 5); htmlize_to_blob(&p->aCol[2], pLine->z, (int)pLine->n); blob_append(&p->aCol[2], "\n", 7); } static void dfunifiedReplace(DiffBuilder *p, const DLine *pX, const DLine *pY){ dfunifiedStartRow(p); if( p->eState==0 ){ dfunifiedFinishInsert(p); blob_append(p->pOut, "", 5); blob_append(&p->aCol[2], "", 5); p->eState = 1; } p->lnLeft++; p->lnRight++; blob_appendf(p->pOut,"%d\n", p->lnLeft); blob_append_char(&p->aCol[0], '\n'); blob_append(&p->aCol[1], "-\n", 2); htmlize_to_blob(&p->aCol[2], pX->z, pX->n); blob_append_char(&p->aCol[2], '\n'); blob_appendf(&p->aCol[3],"%d\n", p->lnRight); htmlize_to_blob(&p->aCol[4], pY->z, pY->n); blob_append_char(&p->aCol[4], '\n'); p->nPending++; } static void dfunifiedEdit(DiffBuilder *p, const DLine *pX, const DLine *pY){ int i; int x; LineChange chng; oneLineChange(pX, pY, &chng); dfunifiedStartRow(p); if( p->eState==0 ){ dfunifiedFinishInsert(p); blob_append(p->pOut, "", 5); blob_append(&p->aCol[2], "", 5); p->eState = 1; } p->lnLeft++; p->lnRight++; blob_appendf(p->pOut,"%d\n", p->lnLeft); blob_append_char(&p->aCol[0], '\n'); blob_append(&p->aCol[1], "-\n", 2); for(i=x=0; iaCol[2], pX->z+x, ofst - x); x = ofst; blob_append(&p->aCol[2], "", 5); htmlize_to_blob(&p->aCol[2], pX->z+x, len); x += len; blob_append(&p->aCol[2], "", 6); } } htmlize_to_blob(&p->aCol[2], pX->z+x, pX->n - x); blob_append_char(&p->aCol[2], '\n'); blob_appendf(&p->aCol[3],"%d\n", p->lnRight); for(i=x=0; iaCol[4], pY->z+x, ofst - x); x = ofst; blob_append(&p->aCol[4], "", 5); htmlize_to_blob(&p->aCol[4], pY->z+x, len); x += len; blob_append(&p->aCol[4], "", 6); } } htmlize_to_blob(&p->aCol[4], pY->z+x, pY->n - x); blob_append_char(&p->aCol[4], '\n'); p->nPending++; } static void dfunifiedEnd(DiffBuilder *p){ dfunifiedFinishRow(p); blob_append(p->pOut, "
\n", -1);
  blob_append_xfer(p->pOut, &p->aCol[0]);
  blob_append(p->pOut, "
\n", -1);
  blob_append_xfer(p->pOut, &p->aCol[1]);
  blob_append(p->pOut, "
\n",-1);
  blob_append_xfer(p->pOut, &p->aCol[2]);
  blob_append(p->pOut, "
\n", ++nChunk);
}
static void dfunifiedSkip(DiffBuilder *p, unsigned int n, int isFinal){
  dfunifiedFinishRow(p);
  if( p->pCfg && p->pCfg->zLeftHash ){
    blob_appendf(p->pOut,
       "
" "︙
\n",-1); fossil_free(p); } static DiffBuilder *dfunifiedNew(Blob *pOut, DiffConfig *pCfg){ DiffBuilder *p = fossil_malloc(sizeof(*p)); p->xSkip = dfunifiedSkip; p->xCommon = dfunifiedCommon; p->xInsert = dfunifiedInsert; p->xDelete = dfunifiedDelete; p->xReplace = dfunifiedReplace; p->xEdit = dfunifiedEdit; p->xEnd = dfunifiedEnd; p->lnLeft = p->lnRight = 0; p->eState = 0; p->nPending = 0; p->pOut = pOut; if( pCfg->zLeftHash ){ blob_appendf(pOut, "\n", pCfg->zLeftHash); }else{ blob_append(pOut, "
\n", -1); } blob_init(&p->aCol[0], 0, 0); blob_init(&p->aCol[1], 0, 0); blob_init(&p->aCol[2], 0, 0); blob_init(&p->aCol[3], 0, 0); blob_init(&p->aCol[4], 0, 0); p->pCfg = pCfg; return p; } /************************* DiffBuilderSplit ******************************/ /* This formatter creates a side-by-side diff in HTML. The output is a **
with 5 columns: ** ** 1. Line numbers for the first file. ** 2. Text for the first file. ** 3. The difference mark. "<", ">", "|" or blank ** 4. Line numbers for the second file. ** 5. Text for the second file. ** ** The and strategy is the same as for unified diff above. ** In fact, the same CSS can be used for both. ** ** Accumulator strategy: ** ** * Left line numbers are output directly to p->pOut ** * Left text accumulates in p->aCol[0]. ** * Edit marks accumulates in p->aCol[1]. ** * Right line numbers accumulate in p->aCol[2]. ** * Right text accumulates in p->aCol[3]. ** ** eState: ** 0 In common block ** 1 Have on the left ** 2 Have on the right ** 3 Have on left and on the right */ static void dfsplitChangeState(DiffBuilder *p, int newState){ if( p->eState == newState ) return; if( (p->eState&1)==0 && (newState & 1)!=0 ){ blob_append(p->pOut, "", 5); blob_append(&p->aCol[0], "", 5); p->eState |= 1; }else if( (p->eState&1)!=0 && (newState & 1)==0 ){ blob_append(p->pOut, "", 6); blob_append(&p->aCol[0], "", 6); p->eState &= ~1; } if( (p->eState&2)==0 && (newState & 2)!=0 ){ blob_append(&p->aCol[2], "", 5); blob_append(&p->aCol[3], "", 5); p->eState |= 2; }else if( (p->eState&2)!=0 && (newState & 2)==0 ){ blob_append(&p->aCol[2], "", 6); blob_append(&p->aCol[3], "", 6); p->eState &= ~2; } } static void dfsplitFinishRow(DiffBuilder *p){ if( blob_size(&p->aCol[0])==0 ) return; dfsplitChangeState(p, 0); blob_append(p->pOut, "\n", -1); } static void dfsplitStartRow(DiffBuilder *p){ if( blob_size(&p->aCol[0])>0 ) return; blob_appendf(p->pOut,"" "\n", p->lnLeft+1, p->lnLeft+n, nChunk,p->lnLeft,n); }else{ blob_append(p->pOut, "", 4); } blob_append(p->pOut, "" "" "" "\n", -1); p->lnLeft += n; p->lnRight += n; } static void dfsplitCommon(DiffBuilder *p, const DLine *pLine){ dfsplitStartRow(p); dfsplitChangeState(p, 0); p->lnLeft++; p->lnRight++; blob_appendf(p->pOut,"%d\n", p->lnLeft); htmlize_to_blob(&p->aCol[0], pLine->z, (int)pLine->n); blob_append_char(&p->aCol[0], '\n'); blob_append_char(&p->aCol[1], '\n'); blob_appendf(&p->aCol[2],"%d\n",p->lnRight); htmlize_to_blob(&p->aCol[3], pLine->z, (int)pLine->n); blob_append_char(&p->aCol[3], '\n'); } static void dfsplitInsert(DiffBuilder *p, const DLine *pLine){ dfsplitStartRow(p); dfsplitChangeState(p, 2); p->lnRight++; blob_append_char(p->pOut, '\n'); blob_append_char(&p->aCol[0], '\n'); blob_append(&p->aCol[1], ">\n", -1); blob_appendf(&p->aCol[2],"%d\n", p->lnRight); blob_append(&p->aCol[3], "", 5); htmlize_to_blob(&p->aCol[3], pLine->z, (int)pLine->n); blob_append(&p->aCol[3], "\n", 7); } static void dfsplitDelete(DiffBuilder *p, const DLine *pLine){ dfsplitStartRow(p); dfsplitChangeState(p, 1); p->lnLeft++; blob_appendf(p->pOut,"%d\n", p->lnLeft); blob_append(&p->aCol[0], "", 5); htmlize_to_blob(&p->aCol[0], pLine->z, (int)pLine->n); blob_append(&p->aCol[0], "\n", 7); blob_append(&p->aCol[1], "<\n", -1); blob_append_char(&p->aCol[2],'\n'); blob_append_char(&p->aCol[3],'\n'); } static void dfsplitReplace(DiffBuilder *p, const DLine *pX, const DLine *pY){ dfsplitStartRow(p); dfsplitChangeState(p, 3); p->lnLeft++; p->lnRight++; blob_appendf(p->pOut,"%d\n", p->lnLeft); htmlize_to_blob(&p->aCol[0], pX->z, pX->n); blob_append_char(&p->aCol[0], '\n'); blob_append(&p->aCol[1], "|\n", 2); blob_appendf(&p->aCol[2],"%d\n", p->lnRight); htmlize_to_blob(&p->aCol[3], pY->z, pY->n); blob_append_char(&p->aCol[3], '\n'); } static void dfsplitEdit(DiffBuilder *p, const DLine *pX, const DLine *pY){ int i; int x; LineChange chng; oneLineChange(pX, pY, &chng); dfsplitStartRow(p); dfsplitChangeState(p, 3); p->lnLeft++; p->lnRight++; blob_appendf(p->pOut,"%d\n", p->lnLeft); for(i=x=0; iaCol[0], pX->z+x, ofst - x); x = ofst; if( chng.a[i].iLen2 ){ blob_append(&p->aCol[0], "", -1); }else{ blob_append(&p->aCol[0], "", 5); } htmlize_to_blob(&p->aCol[0], pX->z+x, len); x += len; blob_append(&p->aCol[0], "", 6); } } htmlize_to_blob(&p->aCol[0], pX->z+x, pX->n - x); blob_append_char(&p->aCol[0], '\n'); blob_append(&p->aCol[1], "|\n", 2); blob_appendf(&p->aCol[2],"%d\n", p->lnRight); for(i=x=0; iaCol[3], pY->z+x, ofst - x); x = ofst; if( chng.a[i].iLen1 ){ blob_append(&p->aCol[3], "", -1); }else{ blob_append(&p->aCol[3], "", 5); } htmlize_to_blob(&p->aCol[3], pY->z+x, len); x += len; blob_append(&p->aCol[3], "", 6); } } htmlize_to_blob(&p->aCol[3], pY->z+x, pY->n - x); blob_append_char(&p->aCol[3], '\n'); } static void dfsplitEnd(DiffBuilder *p){ dfsplitFinishRow(p); blob_append(p->pOut, "
\n",-1);
  blob_append_xfer(p->pOut, &p->aCol[0]);
  blob_append(p->pOut, "
\n", -1);
  blob_append_xfer(p->pOut, &p->aCol[1]);
  blob_append(p->pOut, "
\n",-1);
  blob_append_xfer(p->pOut, &p->aCol[2]);
  blob_append(p->pOut, "
\n",-1);
  blob_append_xfer(p->pOut, &p->aCol[3]);
  blob_append(p->pOut, "
\n", ++nChunk);
  p->eState = 0;
}
static void dfsplitSkip(DiffBuilder *p, unsigned int n, int isFinal){
  dfsplitFinishRow(p);
  if( p->pCfg && p->pCfg->zLeftHash ){
    blob_appendf(p->pOut,
       "
\n",-1); fossil_free(p); } static DiffBuilder *dfsplitNew(Blob *pOut, DiffConfig *pCfg){ DiffBuilder *p = fossil_malloc(sizeof(*p)); p->xSkip = dfsplitSkip; p->xCommon = dfsplitCommon; p->xInsert = dfsplitInsert; p->xDelete = dfsplitDelete; p->xReplace = dfsplitReplace; p->xEdit = dfsplitEdit; p->xEnd = dfsplitEnd; p->lnLeft = p->lnRight = 0; p->eState = 0; p->pOut = pOut; if( pCfg->zLeftHash ){ blob_appendf(pOut, "\n", pCfg->zLeftHash); }else{ blob_append(pOut, "
\n", -1); } blob_init(&p->aCol[0], 0, 0); blob_init(&p->aCol[1], 0, 0); blob_init(&p->aCol[2], 0, 0); blob_init(&p->aCol[3], 0, 0); blob_init(&p->aCol[4], 0, 0); p->pCfg = pCfg; return p; } /************************* DiffBuilderSbs ******************************/ /* This formatter creates a side-by-side diff in text. */ static void dfsbsSkip(DiffBuilder *p, unsigned int n, int isFinal){ if( (p->lnLeft || p->lnRight) && !isFinal ){ blob_appendf(p->pOut, "%.*c\n", p->width*2 + 16, '.'); } p->lnLeft += n; p->lnRight += n; } /* ** Append at least iMin characters (not bytes) and at most iMax characters ** from pX onto the into of p. ** ** This comment contains multibyte unicode characters (ü, Æ, ð) in order ** to test the ability of the diff code to handle such characters. */ static void sbs_append_chars(Blob *p, int iMin, int iMax, const DLine *pX){ int i; const char *z = pX->z; for(i=0; in; i++){ char c = z[i]; blob_append_char(p, c); if( (c&0xc0)==0x80 ){ iMin++; iMax++; } } while( ilnLeft++; p->lnRight++; blob_appendf(p->pOut,"%6u ", p->lnLeft); sbs_append_chars(p->pOut, p->width, p->width, pLine); blob_appendf(p->pOut," %6u ", p->lnRight); sbs_append_chars(p->pOut, 0, p->width, pLine); blob_append_char(p->pOut, '\n'); } static void dfsbsInsert(DiffBuilder *p, const DLine *pLine){ p->lnRight++; blob_appendf(p->pOut,"%6s %*s > %6u ", "", p->width, "", p->lnRight); sbs_append_chars(p->pOut, 0, p->width, pLine); blob_append_char(p->pOut, '\n'); } static void dfsbsDelete(DiffBuilder *p, const DLine *pLine){ p->lnLeft++; blob_appendf(p->pOut,"%6u ", p->lnLeft); sbs_append_chars(p->pOut, p->width, p->width, pLine); blob_append(p->pOut," <\n", 3); } static void dfsbsEdit(DiffBuilder *p, const DLine *pX, const DLine *pY){ p->lnLeft++; p->lnRight++; blob_appendf(p->pOut,"%6u ", p->lnLeft); sbs_append_chars(p->pOut, p->width, p->width, pX); blob_appendf(p->pOut, " | %6u ", p->lnRight); sbs_append_chars(p->pOut, 0, p->width, pY); blob_append_char(p->pOut, '\n'); } static void dfsbsEnd(DiffBuilder *p){ fossil_free(p); } static DiffBuilder *dfsbsNew(Blob *pOut, DiffConfig *pCfg){ DiffBuilder *p = fossil_malloc(sizeof(*p)); p->xSkip = dfsbsSkip; p->xCommon = dfsbsCommon; p->xInsert = dfsbsInsert; p->xDelete = dfsbsDelete; p->xReplace = dfsbsEdit; p->xEdit = dfsbsEdit; p->xEnd = dfsbsEnd; p->lnLeft = p->lnRight = 0; p->width = diff_width(pCfg); p->pOut = pOut; return p; } /****************************************************************************/ /* ** Return the number between 0 and 100 that is smaller the closer pA and ** pB match. Return 0 for a perfect match. Return 100 if pA and pB are ** completely different. ** ** The current algorithm is as follows: ** ** (1) Remove leading and trailing whitespace. ** (2) Truncate both strings to at most 250 characters ** (3) If the two strings have a common prefix, measure that prefix ** (4) Find the length of the longest common subsequence that is ** at least 150% longer than the common prefix. ** (5) Longer common subsequences yield lower scores. */ static int match_dline(DLine *pA, DLine *pB){ const char *zA; /* Left string */ const char *zB; /* right string */ int nA; /* Bytes in zA[] */ int nB; /* Bytes in zB[] */ int nMin; int nPrefix; int avg; /* Average length of A and B */ int i, j, k; /* Loop counters */ int best = 0; /* Longest match found so far */ int score; /* Final score. 0..100 */ unsigned char c; /* Character being examined */ unsigned char aFirst[256]; /* aFirst[X] = index in zB[] of first char X */ unsigned char aNext[252]; /* aNext[i] = index in zB[] of next zB[i] char */ zA = pA->z; if( pA->nw==0 && pA->n ){ for(i=0; in && diff_isspace(zA[i]); i++){} pA->indent = i; for(j=pA->n-1; j>i && diff_isspace(zA[j]); j--){} pA->nw = j - i + 1; } zA += pA->indent; nA = pA->nw; zB = pB->z; if( pB->nw==0 && pB->n ){ for(i=0; in && diff_isspace(zB[i]); i++){} pB->indent = i; for(j=pB->n-1; j>i && diff_isspace(zB[j]); j--){} pB->nw = j - i + 1; } zB += pB->indent; nB = pB->nw; if( nA>250 ) nA = 250; if( nB>250 ) nB = 250; avg = (nA+nB)/2; if( avg==0 ) return 0; nMin = nA; if( nB5 && nPrefix>nMin/2 ){ best = nPrefix*3/2; if( best>=avg - 2 ) best = avg - 2; } if( nA==nB && memcmp(zA, zB, nA)==0 ) return 0; memset(aFirst, 0xff, sizeof(aFirst)); zA--; zB--; /* Make both zA[] and zB[] 1-indexed */ for(i=nB; i>0; i--){ c = (unsigned char)zB[i]; aNext[i] = aFirst[c]; aFirst[c] = i; } for(i=1; i<=nA-best; i++){ c = (unsigned char)zA[i]; for(j=aFirst[c]; jbest ) best = k; } } score = 5 + ((best>=avg) ? 0 : (avg - best)*95/avg); #if 0 fprintf(stderr, "A: [%.*s]\nB: [%.*s]\nbest=%d avg=%d score=%d\n", nA, zA+1, nB, zB+1, best, avg, score); #endif /* Return the result */ return score; } /* ** COMMAND: test-line-match ** Usage: %fossil test-line-match STRING1 STRING2 ** ** Return a score from 0 to 100 that is how similar STRING1 is to ** STRING2. Smaller numbers mean more similar. 0 is an exact match. ** ** This command is used to test to match_dline() function in the ** internal Fossil diff logic. */ void test_dline_match(void){ DLine a, b; int x; if( g.argc!=4 ) usage("STRING1 STRING2"); a.z = g.argv[2]; a.n = (int)strlen(a.z); b.z = g.argv[3]; b.n = (int)strlen(b.z); x = match_dline(&a, &b); fossil_print("%d\n", x); } /* Forward declarations for recursion */ static unsigned char *diffBlockAlignment( DLine *aLeft, int nLeft, /* Text on the left */ DLine *aRight, int nRight, /* Text on the right */ DiffConfig *pCfg, /* Configuration options */ int *pNResult /* OUTPUT: Bytes of result */ ); static void longestCommonSequence( DContext *p, /* Two files being compared */ int iS1, int iE1, /* Range of lines in p->aFrom[] */ int iS2, int iE2, /* Range of lines in p->aTo[] */ int *piSX, int *piEX, /* Write p->aFrom[] common segment here */ int *piSY, int *piEY /* Write p->aTo[] common segment here */ ); /* ** Make a copy of a list of nLine DLine objects from one array to ** another. Hash the new array to ignore whitespace. */ static void diffDLineXfer( DLine *aTo, const DLine *aFrom, int nLine ){ int i, j, k; u64 h, h2; for(i=0; ij && diff_isspace(z[k-1]); k--){} aTo[i].n = n = k-j; aTo[i].indent = 0; aTo[i].nw = 0; for(h=0; jdiffFlags & DIFF_DEBUG ){ fossil_print(" LCS size=%d\n" " [%.*s]\n" " [%.*s]\n", nLCS, aLeft[iSX].n, aLeft[iSX].z, aLeft[iEX-1].n, aLeft[iEX-1].z); } a1 = diffBlockAlignment(aLeft,iSX,aRight,iSY,pCfg,&n1); a2 = diffBlockAlignment(aLeft+iEX, nLeft-iEX, aRight+iEY, nRight-iEY, pCfg, &n2); a1 = fossil_realloc(a1, n1+nLCS+n2); memcpy(a1+n1+nLCS,a2,n2); memset(a1+n1,3,nLCS); fossil_free(a2); *pNResult = n1+n2+nLCS; return a1; } /* ** This is a helper route for diffBlockAlignment(). In this case, ** a very large block is encountered that might be too expensive to ** use the O(N*N) Wagner edit distance algorithm. So instead, this ** block implements a less-precise but faster O(N*logN) divide-and-conquer ** approach. */ static unsigned char *diffBlockAlignmentDivideAndConquer( DLine *aLeft, int nLeft, /* Text on the left */ DLine *aRight, int nRight, /* Text on the right */ DiffConfig *pCfg, /* Configuration options */ int *pNResult /* OUTPUT: Bytes of result */ ){ DLine *aSmall; /* The smaller of aLeft and aRight */ DLine *aBig; /* The larger of aLeft and aRight */ int nSmall, nBig; /* Size of aSmall and aBig. nSmall<=nBig */ int iDivSmall, iDivBig; /* Divider point for aSmall and aBig */ int iDivLeft, iDivRight; /* Divider point for aLeft and aRight */ unsigned char *a1, *a2; /* Results of the alignments on two halves */ int n1, n2; /* Number of entries in a1 and a2 */ int score, bestScore; /* Score and best score seen so far */ int i; /* Loop counter */ if( nLeft>nRight ){ aSmall = aRight; nSmall = nRight; aBig = aLeft; nBig = nLeft; }else{ aSmall = aLeft; nSmall = nLeft; aBig = aRight; nBig = nRight; } iDivBig = nBig/2; iDivSmall = nSmall/2; if( pCfg->diffFlags & DIFF_DEBUG ){ fossil_print(" Divide at [%.*s]\n", aBig[iDivBig].n, aBig[iDivBig].z); } bestScore = 10000; for(i=0; idiffFlags & DIFF_DEBUG ){ fossil_print("BlockAlignment:\n [%.*s] + %d\n [%.*s] + %d\n", aLeft[0].n, aLeft[0].z, nLeft, aRight[0].n, aRight[0].z, nRight); } /* For large alignments, try to use alternative algorithms that are ** faster than the O(N*N) Wagner edit distance. */ if( (i64)nLeft*(i64)nRight>DIFF_ALIGN_MX && (pCfg->diffFlags & DIFF_SLOW_SBS)==0 ){ if( (pCfg->diffFlags & DIFF_IGNORE_ALLWS)==0 ){ unsigned char *aRes; aRes = diffBlockAlignmentIgnoreSpace( aLeft, nLeft,aRight, nRight,pCfg,pNResult); if( aRes ) return aRes; } return diffBlockAlignmentDivideAndConquer( aLeft, nLeft,aRight, nRight,pCfg,pNResult); } /* If we reach this point, we will be doing an O(N*N) Wagner minimum ** edit distance to compute the alignment. */ if( nRight < count(aBuf)-1 ){ pToFree = 0; a = aBuf; }else{ a = pToFree = fossil_malloc( sizeof(a[0])*(nRight+1) ); } aM = fossil_malloc( (nLeft+1)*(nRight+1) ); /* Compute the best alignment */ for(i=0; i<=nRight; i++){ aM[i] = 2; a[i] = i*50; } aM[0] = 0; for(j=1; j<=nLeft; j++){ int p = a[0]; a[0] = p+50; aM[j*(nRight+1)] = 1; for(i=1; i<=nRight; i++){ int m = a[i-1]+50; int d = 2; if( m>a[i]+50 ){ m = a[i]+50; d = 1; } if( m>p ){ int score = match_dline(&aLeft[j-1], &aRight[i-1]); if( (score<=90 || (ij-1)) && m>p+score ){ m = p+score; d = 3 | score*4; } } p = a[i]; a[i] = m; aM[j*(nRight+1)+i] = d; } } /* Compute the lowest-cost path back through the matrix */ i = nRight; j = nLeft; k = (nRight+1)*(nLeft+1)-1; while( i+j>0 ){ unsigned char c = aM[k]; if( c>=3 ){ assert( i>0 && j>0 ); i--; j--; aM[k] = 3; }else if( c==2 ){ assert( i>0 ); i--; }else{ assert( j>0 ); j--; } k--; aM[k] = aM[j*(nRight+1)+i]; } k++; i = (nRight+1)*(nLeft+1) - k; memmove(aM, &aM[k], i); *pNResult = i; /* Return the result */ fossil_free(pToFree); return aM; } /* ** R[] is an array of six integer, two COPY/DELETE/INSERT triples for a ** pair of adjacent differences. Return true if the gap between these ** two differences is so small that they should be rendered as a single ** edit. */ static int smallGap(const int *R, int ma, int mb){ int m = R[3]; ma += R[4] + m; mb += R[5] + m; if( ma*mb>DIFF_ALIGN_MX ) return 0; return m<=2 || m<=(R[1]+R[2]+R[4]+R[5])/8; } /* ** Format a diff using a DiffBuilder object */ static void formatDiff( DContext *p, /* The computed diff */ DiffConfig *pCfg, /* Configuration options */ DiffBuilder *pBuilder /* The formatter object */ ){ DLine *A; /* Left side of the diff */ DLine *B; /* Right side of the diff */ unsigned int a = 0; /* Index of next line in A[] */ unsigned int b = 0; /* Index of next line in B[] */ const int *R; /* Array of COPY/DELETE/INSERT triples */ unsigned int r; /* Index into R[] */ unsigned int nr; /* Number of COPY/DELETE/INSERT triples to process */ unsigned int mxr; /* Maximum value for r */ unsigned int i, j; /* Loop counters */ unsigned int m, ma, mb;/* Number of lines to output */ signed int skip = 0; /* Number of lines to skip */ unsigned int nContext; /* Lines of context above and below each change */ nContext = diff_context_lines(pCfg); A = p->aFrom; B = p->aTo; R = p->aEdit; mxr = p->nEdit; while( mxr>2 && R[mxr-1]==0 && R[mxr-2]==0 ){ mxr -= 3; } for(r=0; r0 && R[r+nr*3]<(int)nContext*2; nr++){} /* If there is a regex, skip this block (generate no diff output) ** if the regex matches or does not match both insert and delete. ** Only display the block if one side matches but the other side does ** not. */ if( pCfg->pRe ){ int hideBlock = 1; int xa = a, xb = b; for(i=0; hideBlock && ipRe, &A[xa], R[r+i*3+1]); c2 = re_dline_match(pCfg->pRe, &B[xb], R[r+i*3+2]); hideBlock = c1==c2; xa += R[r+i*3+1]; xb += R[r+i*3+2]; } if( hideBlock ){ a = xa; b = xb; continue; } } /* Figure out how many lines of A and B are to be displayed ** for this change block. */ if( R[r]>(int)nContext ){ skip = R[r] - nContext; }else{ skip = 0; } /* Show the initial common area */ a += skip; b += skip; m = R[r] - skip; if( r ) skip -= nContext; if( skip>0 ){ if( skip<(int)nContext ){ /* If the amount to skip is less that the context band, then ** go ahead and show the skip band as it is not worth eliding */ for(j=0; (int)jxCommon(pBuilder, &A[a+j-skip]); } }else{ pBuilder->xSkip(pBuilder, skip, 0); } } for(j=0; jxCommon(pBuilder, &A[a+j]); } a += m; b += m; /* Show the differences */ for(i=0; i0; j++){ assert( (int)jxDelete(pBuilder, &A[a]); ma--; a++; break; } case 2: { /* Insert one line on the right */ pBuilder->xInsert(pBuilder, &B[b]); assert( mb>0 ); mb--; b++; break; } case 3: { /* The left line is changed into the right line */ if( p->xDiffer(&A[a], &B[b])==0 ){ pBuilder->xCommon(pBuilder, &A[a]); }else{ pBuilder->xEdit(pBuilder, &A[a], &B[b]); } assert( ma>0 && mb>0 ); ma--; mb--; a++; b++; break; } case 4: { /* Delete from left then separately insert on the right */ pBuilder->xReplace(pBuilder, &A[a], &B[b]); ma--; a++; mb--; b++; break; } } } assert( nAlign==(int)j ); fossil_free(alignment); if( ixCommon(pBuilder, &A[a+j]); } b += m; a += m; } } /* Show the final common area */ assert( nr==i ); m = R[r+nr*3]; if( m>nContext ) m = nContext; for(j=0; jxCommon(pBuilder, &A[a+j]); } } if( R[r]>(int)nContext ){ pBuilder->xSkip(pBuilder, R[r] - nContext, 1); } pBuilder->xEnd(pBuilder); } /* ** Compute the optimal longest common subsequence (LCS) using an ** exhaustive search. This version of the LCS is only used for ** shorter input strings since runtime is O(N*N) where N is the ** input string length. */ static void optimalLCS( DContext *p, /* Two files being compared */ int iS1, int iE1, /* Range of lines in p->aFrom[] */ int iS2, int iE2, /* Range of lines in p->aTo[] */ int *piSX, int *piEX, /* Write p->aFrom[] common segment here */ int *piSY, int *piEY /* Write p->aTo[] common segment here */ ){ int mxLength = 0; /* Length of longest common subsequence */ int i, j; /* Loop counters */ int k; /* Length of a candidate subsequence */ int iSXb = iS1; /* Best match so far */ int iSYb = iS2; /* Best match so far */ for(i=iS1; ixDiffer(&p->aFrom[i], &p->aTo[j]) ) continue; if( mxLength && p->xDiffer(&p->aFrom[i+mxLength], &p->aTo[j+mxLength]) ){ continue; } k = 1; while( i+kxDiffer(&p->aFrom[i+k],&p->aTo[j+k])==0 ){ k++; } if( k>mxLength ){ iSXb = i; iSYb = j; mxLength = k; } } } *piSX = iSXb; *piEX = iSXb + mxLength; *piSY = iSYb; *piEY = iSYb + mxLength; } /* ** Compare two blocks of text on lines iS1 through iE1-1 of the aFrom[] ** file and lines iS2 through iE2-1 of the aTo[] file. Locate a sequence ** of lines in these two blocks that are exactly the same. Return ** the bounds of the matching sequence. ** ** If there are two or more possible answers of the same length, the ** returned sequence should be the one closest to the center of the ** input range. ** ** Ideally, the common sequence should be the longest possible common ** sequence. However, an exact computation of LCS is O(N*N) which is ** way too slow for larger files. So this routine uses an O(N) ** heuristic approximation based on hashing that usually works about ** as well. But if the O(N) algorithm doesn't get a good solution ** and N is not too large, we fall back to an exact solution by ** calling optimalLCS(). */ static void longestCommonSequence( DContext *p, /* Two files being compared */ int iS1, int iE1, /* Range of lines in p->aFrom[] */ int iS2, int iE2, /* Range of lines in p->aTo[] */ int *piSX, int *piEX, /* Write p->aFrom[] common segment here */ int *piSY, int *piEY /* Write p->aTo[] common segment here */ ){ int i, j, k; /* Loop counters */ int n; /* Loop limit */ DLine *pA, *pB; /* Pointers to lines */ int iSX, iSY, iEX, iEY; /* Current match */ int skew = 0; /* How lopsided is the match */ int dist = 0; /* Distance of match from center */ int mid; /* Center of the chng */ int iSXb, iSYb, iEXb, iEYb; /* Best match so far */ int iSXp, iSYp, iEXp, iEYp; /* Previous match */ sqlite3_int64 bestScore; /* Best score so far */ sqlite3_int64 score; /* Score for current candidate LCS */ int span; /* combined width of the input sequences */ int cutoff = 4; /* Max hash chain entries to follow */ int nextCutoff = -1; /* Value of cutoff for next iteration */ span = (iE1 - iS1) + (iE2 - iS2); bestScore = -10000; score = 0; iSXb = iSXp = iS1; iEXb = iEXp = iS1; iSYb = iSYp = iS2; iEYb = iEYp = iS2; mid = (iE1 + iS1)/2; do{ nextCutoff = 0; for(i=iS1; iaTo[p->aFrom[i].h % p->nTo].iHash; while( j>0 && (j-1=iE2 || p->xDiffer(&p->aFrom[i], &p->aTo[j-1])) ){ if( limit++ > cutoff ){ j = 0; nextCutoff = cutoff*4; break; } j = p->aTo[j-1].iNext; } if( j==0 ) continue; assert( i>=iSXb && i>=iSXp ); if( i=iSYb && j=iSYp && jaFrom[iSX-1]; pB = &p->aTo[iSY-1]; n = minInt(iSX-iS1, iSY-iS2); for(k=0; kxDiffer(pA,pB)==0; k++, pA--, pB--){} iSX -= k; iSY -= k; iEX = i+1; iEY = j; pA = &p->aFrom[iEX]; pB = &p->aTo[iEY]; n = minInt(iE1-iEX, iE2-iEY); for(k=0; kxDiffer(pA,pB)==0; k++, pA++, pB++){} iEX += k; iEY += k; skew = (iSX-iS1) - (iSY-iS2); if( skew<0 ) skew = -skew; dist = (iSX+iEX)/2 - mid; if( dist<0 ) dist = -dist; score = (iEX - iSX)*(sqlite3_int64)span - (skew + dist); if( score>bestScore ){ bestScore = score; iSXb = iSX; iSYb = iSY; iEXb = iEX; iEYb = iEY; }else if( iEX>iEXp ){ iSXp = iSX; iSYp = iSY; iEXp = iEX; iEYp = iEY; } } }while( iSXb==iEXb && nextCutoff && (cutoff=nextCutoff)<=64 ); if( iSXb==iEXb && (sqlite3_int64)(iE1-iS1)*(iE2-iS2)<2500 ){ /* If no common sequence is found using the hashing heuristic and ** the input is not too big, use the expensive exact solution */ optimalLCS(p, iS1, iE1, iS2, iE2, piSX, piEX, piSY, piEY); }else{ *piSX = iSXb; *piSY = iSYb; *piEX = iEXb; *piEY = iEYb; } } /* ** Expand the size of aEdit[] array to hold at least nEdit elements. */ static void expandEdit(DContext *p, int nEdit){ p->aEdit = fossil_realloc(p->aEdit, nEdit*sizeof(int)); p->nEditAlloc = nEdit; } /* ** Append a new COPY/DELETE/INSERT triple. */ static void appendTriple(DContext *p, int nCopy, int nDel, int nIns){ /* printf("APPEND %d/%d/%d\n", nCopy, nDel, nIns); */ if( p->nEdit>=3 ){ if( p->aEdit[p->nEdit-1]==0 ){ if( p->aEdit[p->nEdit-2]==0 ){ p->aEdit[p->nEdit-3] += nCopy; p->aEdit[p->nEdit-2] += nDel; p->aEdit[p->nEdit-1] += nIns; return; } if( nCopy==0 ){ p->aEdit[p->nEdit-2] += nDel; p->aEdit[p->nEdit-1] += nIns; return; } } if( nCopy==0 && nDel==0 ){ p->aEdit[p->nEdit-1] += nIns; return; } } if( p->nEdit+3>p->nEditAlloc ){ expandEdit(p, p->nEdit*2 + 15); if( p->aEdit==0 ) return; } p->aEdit[p->nEdit++] = nCopy; p->aEdit[p->nEdit++] = nDel; p->aEdit[p->nEdit++] = nIns; } /* ** A common subsequence between p->aFrom and p->aTo has been found. ** This routine tries to judge if the subsequence really is a valid ** match or rather is just an artifact of an indentation change. ** ** Return non-zero if the subsequence is valid. Return zero if the ** subsequence seems likely to be an editing artifact and should be ** ignored. ** ** This routine is a heuristic optimization intended to give more ** intuitive diff results following an indentation change it code that ** is formatted similarly to C/C++, Javascript, Go, TCL, and similar ** languages that use {...} for nesting. A correct diff is computed ** even if this routine always returns true (non-zero). But sometimes ** a more intuitive diff can result if this routine returns false. ** ** The subsequences consists of the rows iSX through iEX-1 (inclusive) ** in p->aFrom[]. The total sequences is iS1 through iE1-1 (inclusive) ** of p->aFrom[]. ** ** Example where this heuristic is useful, see the diff at ** https://www.sqlite.org/src/fdiff?v1=0e79dd15cbdb4f48&v2=33955a6fd874dd97 ** ** See also discussion at https://fossil-scm.org/forum/forumpost/9ba3284295 ** ** ALGORITHM (subject to change and refinement): ** ** 1. If the subsequence is larger than 1/7th of the original span, ** then consider it valid. --> return 1 ** ** 2. If no lines of the subsequence contains more than one ** non-whitespace character, --> return 0 ** ** 3. If any line of the subsequence contains more than one non-whitespace ** character and is unique across the entire sequence after ignoring ** leading and trailing whitespace --> return 1 ** ** 4. Otherwise, it is potentially an artifact of an indentation ** change. --> return 0 */ static int likelyNotIndentChngArtifact( DContext *p, /* The complete diff context */ int iS1, /* Start of the main segment */ int iSX, /* Start of the subsequence */ int iEX, /* First row past the end of the subsequence */ int iE1 /* First row past the end of the main segment */ ){ int i, j, n; /* Rule (1) */ if( (iEX-iSX)*7 >= (iE1-iS1) ) return 1; /* Compute DLine.indent and DLine.nw for all lines of the subsequence. ** If no lines contain more than one non-whitespace character return ** 0 because the subsequence could be due to an indentation change. ** Rule (2). */ n = 0; for(i=iSX; iaFrom[i]; if( pA->nw==0 && pA->n ){ const char *zA = pA->z; const int nn = pA->n; int ii, jj; for(ii=0; iiindent = ii; for(jj=nn-1; jj>ii && diff_isspace(zA[jj]); jj--){} pA->nw = jj - ii + 1; } if( pA->nw>1 ) n++; } if( n==0 ) return 0; /* Compute DLine.indent and DLine.nw for the entire sequence */ for(i=iS1; i=iE1 ) break; } pA = &p->aFrom[i]; if( pA->nw==0 && pA->n ){ const char *zA = pA->z; const int nn = pA->n; int ii, jj; for(ii=0; iiindent = ii; for(jj=nn-1; jj>ii && diff_isspace(zA[jj]); jj--){} pA->nw = jj - ii + 1; } } /* Check to see if any subsequence line that has more than one ** non-whitespace character is unique across the entire sequence. ** Rule (3) */ for(i=iSX; iaFrom[i].z + p->aFrom[i].indent; const int nw = p->aFrom[i].nw; if( nw<=1 ) continue; for(j=iS1; jaFrom[j].nw!=nw ) continue; if( memcmp(p->aFrom[j].z+p->aFrom[j].indent,z,nw)==0 ) break; } if( jaFrom[j].nw!=nw ) continue; if( memcmp(p->aFrom[j].z+p->aFrom[j].indent,z,nw)==0 ) break; } if( j>=iE1 ) break; } return iiS2 ){ appendTriple(p, 0, 0, iE2-iS2); } return; } if( iE2<=iS2 ){ /* The second segment is empty */ appendTriple(p, 0, iE1-iS1, 0); return; } /* Find the longest matching segment between the two sequences */ longestCommonSequence(p, iS1, iE1, iS2, iE2, &iSX, &iEX, &iSY, &iEY); if( iEX>iSX+5 || (iEX>iSX && likelyNotIndentChngArtifact(p,iS1,iSX,iEX,iE1) ) ){ /* A common segment has been found. ** Recursively diff either side of the matching segment */ diff_step(p, iS1, iSX, iS2, iSY); if( iEX>iSX ){ appendTriple(p, iEX - iSX, 0, 0); } diff_step(p, iEX, iE1, iEY, iE2); }else{ /* The two segments have nothing in common. Delete the first then ** insert the second. */ appendTriple(p, 0, iE1-iS1, iE2-iS2); } } /* ** Compute the differences between two files already loaded into ** the DContext structure. ** ** A divide and conquer technique is used. We look for a large ** block of common text that is in the middle of both files. Then ** compute the difference on those parts of the file before and ** after the common block. This technique is fast, but it does ** not necessarily generate the minimum difference set. On the ** other hand, we do not need a minimum difference set, only one ** that makes sense to human readers, which this algorithm does. ** ** Any common text at the beginning and end of the two files is ** removed before starting the divide-and-conquer algorithm. */ static void diff_all(DContext *p){ int mnE, iS, iE1, iE2; /* Carve off the common header and footer */ iE1 = p->nFrom; iE2 = p->nTo; while( iE1>0 && iE2>0 && p->xDiffer(&p->aFrom[iE1-1], &p->aTo[iE2-1])==0 ){ iE1--; iE2--; } mnE = iE1xDiffer(&p->aFrom[iS],&p->aTo[iS])==0; iS++){} /* do the difference */ if( iS>0 ){ appendTriple(p, iS, 0, 0); } diff_step(p, iS, iE1, iS, iE2); if( iE1nFrom ){ appendTriple(p, p->nFrom - iE1, 0, 0); } /* Terminate the COPY/DELETE/INSERT triples with three zeros */ expandEdit(p, p->nEdit+3); if( p->aEdit ){ p->aEdit[p->nEdit++] = 0; p->aEdit[p->nEdit++] = 0; p->aEdit[p->nEdit++] = 0; } } /* ** Attempt to shift insertion or deletion blocks so that they begin and ** end on lines that are pure whitespace. In other words, try to transform ** this: ** ** int func1(int x){ ** return x*10; ** +} ** + ** +int func2(int x){ ** + return x*20; ** } ** ** int func3(int x){ ** return x/5; ** } ** ** Into one of these: ** ** int func1(int x){ int func1(int x){ ** return x*10; return x*10; ** } } ** + ** +int func2(int x){ +int func2(int x){ ** + return x*20; + return x*20; ** +} +} ** + ** int func3(int x){ int func3(int x){ ** return x/5; return x/5; ** } } */ static void diff_optimize(DContext *p){ int r; /* Index of current triple */ int lnFrom; /* Line number in p->aFrom */ int lnTo; /* Line number in p->aTo */ int cpy, del, ins; lnFrom = lnTo = 0; for(r=0; rnEdit; r += 3){ cpy = p->aEdit[r]; del = p->aEdit[r+1]; ins = p->aEdit[r+2]; lnFrom += cpy; lnTo += cpy; /* Shift insertions toward the beginning of the file */ while( cpy>0 && del==0 && ins>0 ){ DLine *pTop = &p->aFrom[lnFrom-1]; /* Line before start of insert */ DLine *pBtm = &p->aTo[lnTo+ins-1]; /* Last line inserted */ if( p->xDiffer(pTop, pBtm) ) break; if( LENGTH(pTop+1)+LENGTH(pBtm)<=LENGTH(pTop)+LENGTH(pBtm-1) ) break; lnFrom--; lnTo--; p->aEdit[r]--; p->aEdit[r+3]++; cpy--; } /* Shift insertions toward the end of the file */ while( r+3nEdit && p->aEdit[r+3]>0 && del==0 && ins>0 ){ DLine *pTop = &p->aTo[lnTo]; /* First line inserted */ DLine *pBtm = &p->aTo[lnTo+ins]; /* First line past end of insert */ if( p->xDiffer(pTop, pBtm) ) break; if( LENGTH(pTop)+LENGTH(pBtm-1)<=LENGTH(pTop+1)+LENGTH(pBtm) ) break; lnFrom++; lnTo++; p->aEdit[r]++; p->aEdit[r+3]--; cpy++; } /* Shift deletions toward the beginning of the file */ while( cpy>0 && del>0 && ins==0 ){ DLine *pTop = &p->aFrom[lnFrom-1]; /* Line before start of delete */ DLine *pBtm = &p->aFrom[lnFrom+del-1]; /* Last line deleted */ if( p->xDiffer(pTop, pBtm) ) break; if( LENGTH(pTop+1)+LENGTH(pBtm)<=LENGTH(pTop)+LENGTH(pBtm-1) ) break; lnFrom--; lnTo--; p->aEdit[r]--; p->aEdit[r+3]++; cpy--; } /* Shift deletions toward the end of the file */ while( r+3nEdit && p->aEdit[r+3]>0 && del>0 && ins==0 ){ DLine *pTop = &p->aFrom[lnFrom]; /* First line deleted */ DLine *pBtm = &p->aFrom[lnFrom+del]; /* First line past end of delete */ if( p->xDiffer(pTop, pBtm) ) break; if( LENGTH(pTop)+LENGTH(pBtm-1)<=LENGTH(pTop)+LENGTH(pBtm) ) break; lnFrom++; lnTo++; p->aEdit[r]++; p->aEdit[r+3]--; cpy++; } lnFrom += del; lnTo += ins; } } /* ** Extract the number of lines of context from diffFlags. Supply an ** appropriate default if no context width is specified. If pCfg is ** NULL then the compile-time default is used (which gets propagated ** to JS-side state by certain pages). */ int diff_context_lines(DiffConfig *pCfg){ const int dflt = 5; if(pCfg!=0){ int n = pCfg->nContext; if( n==0 && (pCfg->diffFlags & DIFF_CONTEXT_EX)==0 ) n = dflt; return n<0 ? 0x7ffffff : n; }else{ return dflt; } } /* ** Extract the width of columns for side-by-side diff. Supply an ** appropriate default if no width is given. ** ** Calculate the default automatically, based on terminal's current width: ** term-width = 2*diff-col + diff-marker + 1 ** diff-col = lineno + lmargin + text-width + rmargin ** ** text-width = (term-width - diff-marker - 1)/2 - lineno - lmargin - rmargin */ int diff_width(DiffConfig *pCfg){ int w = pCfg->wColumn; if( w==0 ){ static struct { unsigned int lineno, lmargin, text, rmargin, marker; } sbsW = { 5, 2, 0, 0, 3 }; const unsigned int wMin = 24, wMax = 132; unsigned int tw = terminal_get_width(80); unsigned int twMin = (wMin + sbsW.lineno + sbsW.lmargin + sbsW.rmargin)*2 + sbsW.marker + 1; unsigned int twMax = (wMax + sbsW.lineno + sbsW.lmargin + sbsW.rmargin)*2 + sbsW.marker + 1; if( twtwMax ){ tw = twMax; } sbsW.text = (tw - sbsW.marker - 1)/2 - sbsW.lineno - sbsW.lmargin - sbsW.rmargin; w = sbsW.text; } return w; } /* ** Append the error message to pOut. */ void diff_errmsg(Blob *pOut, const char *msg, int diffFlags){ if( diffFlags & DIFF_HTML ){ blob_appendf(pOut, "

%s

", msg); }else{ blob_append(pOut, msg, -1); } } /* ** Generate a report of the differences between files pA_Blob and pB_Blob. ** ** If pOut!=NULL then append text to pOut that will be the difference, ** formatted according to flags in diffFlags. The pOut Blob must have ** already been initialized. ** ** If pOut==NULL then no formatting occurs. Instead, this routine ** returns a pointer to an array of integers. The integers come in ** triples. The elements of each triple are: ** ** 1. The number of lines to copy ** 2. The number of lines to delete ** 3. The number of lines to insert ** ** The return vector is terminated by a triple of all zeros. The caller ** should free the returned vector using fossil_free(). ** ** This diff utility does not work on binary files. If a binary ** file is encountered, 0 is returned and pOut is written with ** text "cannot compute difference between binary files". */ int *text_diff( Blob *pA_Blob, /* FROM file */ Blob *pB_Blob, /* TO file */ Blob *pOut, /* Write diff here if not NULL */ DiffConfig *pCfg /* Configuration options */ ){ int ignoreWs; /* Ignore whitespace */ DContext c; if( pCfg->diffFlags & DIFF_INVERT ){ Blob *pTemp = pA_Blob; pA_Blob = pB_Blob; pB_Blob = pTemp; } ignoreWs = (pCfg->diffFlags & DIFF_IGNORE_ALLWS)!=0; blob_to_utf8_no_bom(pA_Blob, 0); blob_to_utf8_no_bom(pB_Blob, 0); /* Prepare the input files */ memset(&c, 0, sizeof(c)); if( (pCfg->diffFlags & DIFF_IGNORE_ALLWS)==DIFF_IGNORE_ALLWS ){ c.xDiffer = compare_dline_ignore_allws; }else{ c.xDiffer = compare_dline; } c.aFrom = break_into_lines(blob_str(pA_Blob), blob_size(pA_Blob), &c.nFrom, pCfg->diffFlags); c.aTo = break_into_lines(blob_str(pB_Blob), blob_size(pB_Blob), &c.nTo, pCfg->diffFlags); if( c.aFrom==0 || c.aTo==0 ){ fossil_free(c.aFrom); fossil_free(c.aTo); if( pOut ){ diff_errmsg(pOut, DIFF_CANNOT_COMPUTE_BINARY, pCfg->diffFlags); } return 0; } /* Compute the difference */ diff_all(&c); if( ignoreWs && c.nEdit==6 && c.aEdit[1]==0 && c.aEdit[2]==0 ){ fossil_free(c.aFrom); fossil_free(c.aTo); fossil_free(c.aEdit); if( pOut ) diff_errmsg(pOut, DIFF_WHITESPACE_ONLY, pCfg->diffFlags); return 0; } if( (pCfg->diffFlags & DIFF_NOTTOOBIG)!=0 ){ int i, m, n; int *a = c.aEdit; int mx = c.nEdit; for(i=m=n=0; i10000 ){ fossil_free(c.aFrom); fossil_free(c.aTo); fossil_free(c.aEdit); if( pOut ) diff_errmsg(pOut, DIFF_TOO_MANY_CHANGES, pCfg->diffFlags); return 0; } } if( (pCfg->diffFlags & DIFF_NOOPT)==0 ){ diff_optimize(&c); } if( pOut ){ if( pCfg->diffFlags & DIFF_NUMSTAT ){ int nDel = 0, nIns = 0, i; for(i=0; c.aEdit[i] || c.aEdit[i+1] || c.aEdit[i+2]; i+=3){ nDel += c.aEdit[i+1]; nIns += c.aEdit[i+2]; } g.diffCnt[1] += nIns; g.diffCnt[2] += nDel; if( nIns+nDel ){ g.diffCnt[0]++; blob_appendf(pOut, "%10d %10d", nIns, nDel); } }else if( pCfg->diffFlags & DIFF_RAW ){ const int *R = c.aEdit; unsigned int r; for(r=0; R[r] || R[r+1] || R[r+2]; r += 3){ blob_appendf(pOut, " copy %6d delete %6d insert %6d\n", R[r], R[r+1], R[r+2]); } }else if( pCfg->diffFlags & DIFF_JSON ){ DiffBuilder *pBuilder = dfjsonNew(pOut); formatDiff(&c, pCfg, pBuilder); blob_append_char(pOut, '\n'); }else if( pCfg->diffFlags & DIFF_TCL ){ DiffBuilder *pBuilder = dftclNew(pOut); formatDiff(&c, pCfg, pBuilder); }else if( pCfg->diffFlags & DIFF_SIDEBYSIDE ){ DiffBuilder *pBuilder; if( pCfg->diffFlags & DIFF_HTML ){ pBuilder = dfsplitNew(pOut, pCfg); }else{ pBuilder = dfsbsNew(pOut, pCfg); } formatDiff(&c, pCfg, pBuilder); }else if( pCfg->diffFlags & DIFF_DEBUG ){ DiffBuilder *pBuilder = dfdebugNew(pOut); formatDiff(&c, pCfg, pBuilder); }else if( pCfg->diffFlags & DIFF_HTML ){ DiffBuilder *pBuilder = dfunifiedNew(pOut, pCfg); formatDiff(&c, pCfg, pBuilder); }else{ contextDiff(&c, pOut, pCfg); } fossil_free(c.aFrom); fossil_free(c.aTo); fossil_free(c.aEdit); return 0; }else{ /* If a context diff is not requested, then return the ** array of COPY/DELETE/INSERT triples. */ free(c.aFrom); free(c.aTo); return c.aEdit; } } /* ** Initialize the DiffConfig object using command-line options. ** ** Process diff-related command-line options and return an appropriate ** "diffFlags" integer. ** ** --brief Show filenames only DIFF_BRIEF ** -c|--context N N lines of context. nContext ** --html Format for HTML DIFF_HTML ** --invert Invert the diff DIFF_INVERT ** -n|--linenum Show line numbers DIFF_LINENO ** --noopt Disable optimization DIFF_NOOPT ** --numstat Show change counts DIFF_NUMSTAT ** --strip-trailing-cr Strip trailing CR DIFF_STRIP_EOLCR ** --unified Unified diff. ~DIFF_SIDEBYSIDE ** -w|--ignore-all-space Ignore all whitespaces DIFF_IGNORE_ALLWS ** -W|--width N N character lines. wColumn ** -y|--side-by-side Side-by-side diff. DIFF_SIDEBYSIDE ** -Z|--ignore-trailing-space Ignore eol-whitespaces DIFF_IGNORE_EOLWS */ void diff_options(DiffConfig *pCfg, int isGDiff, int bUnifiedTextOnly){ u64 diffFlags = 0; const char *z; int f; memset(pCfg, 0, sizeof(*pCfg)); if( find_option("ignore-trailing-space","Z",0)!=0 ){ diffFlags = DIFF_IGNORE_EOLWS; } if( find_option("ignore-all-space","w",0)!=0 ){ diffFlags = DIFF_IGNORE_ALLWS; /* stronger than DIFF_IGNORE_EOLWS */ } if( find_option("strip-trailing-cr",0,0)!=0 ){ diffFlags |= DIFF_STRIP_EOLCR; } if( !bUnifiedTextOnly ){ if( find_option("side-by-side","y",0)!=0 ) diffFlags |= DIFF_SIDEBYSIDE; if( find_option("yy",0,0)!=0 ){ diffFlags |= DIFF_SIDEBYSIDE | DIFF_SLOW_SBS; } if( find_option("html",0,0)!=0 ) diffFlags |= DIFF_HTML; if( find_option("unified",0,0)!=0 ) diffFlags &= ~DIFF_SIDEBYSIDE; if( find_option("webpage",0,0)!=0 ){ diffFlags |= DIFF_HTML|DIFF_WEBPAGE|DIFF_LINENO; } if( find_option("browser","b",0)!=0 ){ diffFlags |= DIFF_HTML|DIFF_WEBPAGE|DIFF_LINENO|DIFF_BROWSER; } if( find_option("by",0,0)!=0 ){ diffFlags |= DIFF_HTML|DIFF_WEBPAGE|DIFF_LINENO|DIFF_BROWSER |DIFF_SIDEBYSIDE; } if( find_option("json",0,0)!=0 ){ diffFlags |= DIFF_JSON; } if( find_option("tcl",0,0)!=0 ){ diffFlags |= DIFF_TCL; } /* Undocumented and unsupported flags used for development ** debugging and analysis: */ if( find_option("debug",0,0)!=0 ) diffFlags |= DIFF_DEBUG; if( find_option("raw",0,0)!=0 ) diffFlags |= DIFF_RAW; } if( (z = find_option("context","c",1))!=0 && (f = atoi(z))!=0 ){ pCfg->nContext = f; diffFlags |= DIFF_CONTEXT_EX; } if( (z = find_option("width","W",1))!=0 && (f = atoi(z))>0 ){ pCfg->wColumn = f; } if( find_option("linenum","n",0)!=0 ) diffFlags |= DIFF_LINENO; if( find_option("noopt",0,0)!=0 ) diffFlags |= DIFF_NOOPT; if( find_option("numstat",0,0)!=0 ) diffFlags |= DIFF_NUMSTAT; if( find_option("versions","h",0)!=0 ) diffFlags |= DIFF_SHOW_VERS; if( find_option("dark",0,0)!=0 ) diffFlags |= DIFF_DARKMODE; if( find_option("invert",0,0)!=0 ) diffFlags |= DIFF_INVERT; if( find_option("brief",0,0)!=0 ) diffFlags |= DIFF_BRIEF; if( find_option("internal","i",0)==0 && (diffFlags & (DIFF_HTML|DIFF_TCL|DIFF_DEBUG|DIFF_JSON))==0 ){ pCfg->zDiffCmd = find_option("command", 0, 1); if( pCfg->zDiffCmd==0 ) pCfg->zDiffCmd = diff_command_external(isGDiff); if( pCfg->zDiffCmd ){ const char *zDiffBinary; pCfg->zBinGlob = diff_get_binary_glob(); zDiffBinary = find_option("diff-binary", 0, 1); if( zDiffBinary ){ if( is_truth(zDiffBinary) ) diffFlags |= DIFF_INCBINARY; }else if( db_get_boolean("diff-binary", 1) ){ diffFlags |= DIFF_INCBINARY; } } } if( find_option("verbose","v",0)!=0 ) diffFlags |= DIFF_VERBOSE; /* Deprecated, but retained for script compatibility. */ else if( find_option("new-file","N",0)!=0 ) diffFlags |= DIFF_VERBOSE; pCfg->diffFlags = diffFlags; } /* ** COMMAND: test-diff ** COMMAND: xdiff ** ** Usage: %fossil xdiff [options] FILE1 FILE2 ** ** Compute an "external diff" between two files. By "external diff" we mean ** a diff between two disk files that are not necessarily under management. ** In other words, this command provides a mechanism to use Fossil's file ** difference engine on arbitrary disk files. See the "diff" command for ** computing differences between files that are under management. ** ** This command prints the differences between the two files FILE1 and FILE2. ** all of the usual diff formatting options (--tk, --by, -c N, etc.) apply. ** See the "diff" command for a full list of command-line options. ** ** This command used to be called "test-diff". The older "test-diff" spelling ** still works, for compatibility. */ void xdiff_cmd(void){ Blob a, b, out; const char *zRe; /* Regex filter for diff output */ DiffConfig DCfg; if( find_option("tk",0,0)!=0 ){ diff_tk("xdiff", 2); return; } find_option("i",0,0); find_option("v",0,0); diff_options(&DCfg, 0, 0); zRe = find_option("regexp","e",1); if( zRe ){ const char *zErr = re_compile(&DCfg.pRe, zRe, 0); if( zErr ) fossil_fatal("regex error: %s", zErr); } verify_all_options(); if( g.argc!=4 ) usage("FILE1 FILE2"); blob_zero(&out); diff_begin(&DCfg); diff_print_filenames(g.argv[2], g.argv[3], &DCfg, &out); blob_read_from_file(&a, g.argv[2], ExtFILE); blob_read_from_file(&b, g.argv[3], ExtFILE); text_diff(&a, &b, &out, &DCfg); blob_write_to_file(&out, "-"); diff_end(&DCfg, 0); re_free(DCfg.pRe); } /************************************************************************** ** The basic difference engine is above. What follows is the annotation ** engine. Both are in the same file since they share many components. */ /* ** The status of an annotation operation is recorded by an instance ** of the following structure. */ typedef struct Annotator Annotator; struct Annotator { DContext c; /* The diff-engine context */ struct AnnLine { /* Lines of the original files... */ const char *z; /* The text of the line */ short int n; /* Number of bytes (omitting trailing \n) */ short int iVers; /* Level at which tag was set */ } *aOrig; int nOrig; /* Number of elements in aOrig[] */ int nVers; /* Number of versions analyzed */ int bMoreToDo; /* True if the limit was reached */ int origId; /* RID for the zOrigin version */ int showId; /* RID for the version being analyzed */ struct AnnVers { const char *zFUuid; /* File being analyzed */ const char *zMUuid; /* Check-in containing the file */ const char *zDate; /* Date of the check-in */ const char *zBgColor; /* Suggested background color */ const char *zUser; /* Name of user who did the check-in */ unsigned cnt; /* Number of lines contributed by this check-in */ } *aVers; /* For each check-in analyzed */ char **azVers; /* Names of versions analyzed */ }; /* ** Initialize the annotation process by specifying the file that is ** to be annotated. The annotator takes control of the input Blob and ** will release it when it is finished with it. */ static int annotation_start(Annotator *p, Blob *pInput, u64 diffFlags){ int i; memset(p, 0, sizeof(*p)); if( (diffFlags & DIFF_IGNORE_ALLWS)==DIFF_IGNORE_ALLWS ){ p->c.xDiffer = compare_dline_ignore_allws; }else{ p->c.xDiffer = compare_dline; } p->c.aTo = break_into_lines(blob_str(pInput), blob_size(pInput),&p->c.nTo, diffFlags); if( p->c.aTo==0 ){ return 1; } p->aOrig = fossil_malloc( sizeof(p->aOrig[0])*p->c.nTo ); for(i=0; ic.nTo; i++){ p->aOrig[i].z = p->c.aTo[i].z; p->aOrig[i].n = p->c.aTo[i].n; p->aOrig[i].iVers = -1; } p->nOrig = p->c.nTo; return 0; } /* ** The input pParent is the next most recent ancestor of the file ** being annotated. Do another step of the annotation. Return true ** if additional annotation is required. */ static int annotation_step( Annotator *p, Blob *pParent, int iVers, u64 diffFlags ){ int i, j; int lnTo; /* Prepare the parent file to be diffed */ p->c.aFrom = break_into_lines(blob_str(pParent), blob_size(pParent), &p->c.nFrom, diffFlags); if( p->c.aFrom==0 ){ return 1; } /* Compute the differences going from pParent to the file being ** annotated. */ diff_all(&p->c); /* Where new lines are inserted on this difference, record the ** iVers as the source of the new line. */ for(i=lnTo=0; ic.nEdit; i+=3){ int nCopy = p->c.aEdit[i]; int nIns = p->c.aEdit[i+2]; lnTo += nCopy; for(j=0; jaOrig[lnTo].iVers<0 ){ p->aOrig[lnTo].iVers = iVers; } } } /* Clear out the diff results */ fossil_free(p->c.aEdit); p->c.aEdit = 0; p->c.nEdit = 0; p->c.nEditAlloc = 0; /* Clear out the from file */ free(p->c.aFrom); /* Return no errors */ return 0; } /* Return the current time as milliseconds since the Julian epoch */ static sqlite3_int64 current_time_in_milliseconds(void){ static sqlite3_vfs *clockVfs = 0; sqlite3_int64 t; if( clockVfs==0 ) clockVfs = sqlite3_vfs_find(0); if( clockVfs->iVersion>=2 && clockVfs->xCurrentTimeInt64!=0 ){ clockVfs->xCurrentTimeInt64(clockVfs, &t); }else{ double r; clockVfs->xCurrentTime(clockVfs, &r); t = (sqlite3_int64)(r*86400000.0); } return t; } /* ** Compute a complete annotation on a file. The file is identified by its ** filename and check-in name (NULL for current check-in). */ static void annotate_file( Annotator *p, /* The annotator */ const char *zFilename, /* The name of the file to be annotated */ const char *zRevision, /* Use the version of the file in this check-in */ const char *zLimit, /* Limit the number of versions analyzed */ const char *zOrigin, /* The origin check-in, or NULL for root-of-tree */ u64 annFlags /* Flags to alter the annotation */ ){ Blob toAnnotate; /* Text of the final (mid) version of the file */ Blob step; /* Text of previous revision */ int cid; /* Selected check-in ID */ int origid = 0; /* The origin ID or zero */ int rid; /* Artifact ID of the file being annotated */ int fnid; /* Filename ID */ Stmt q; /* Query returning all ancestor versions */ int cnt = 0; /* Number of versions analyzed */ int iLimit; /* Maximum number of versions to analyze */ sqlite3_int64 mxTime; /* Halt at this time if not already complete */ memset(p, 0, sizeof(*p)); if( zLimit ){ if( strcmp(zLimit,"none")==0 ){ iLimit = 0; mxTime = 0; }else if( sqlite3_strglob("*[0-9]s", zLimit)==0 ){ iLimit = 0; mxTime = (sqlite3_int64)(current_time_in_milliseconds() + 1000.0*atof(zLimit)); }else{ iLimit = atoi(zLimit); if( iLimit<=0 ) iLimit = 30; mxTime = 0; } }else{ /* Default limit is as much as we can do in 1.000 seconds */ iLimit = 0; mxTime = current_time_in_milliseconds()+1000; } db_begin_transaction(); /* Get the artifact ID for the check-in begin analyzed */ if( zRevision ){ cid = name_to_typed_rid(zRevision, "ci"); }else{ db_must_be_within_tree(); cid = db_lget_int("checkout", 0); } origid = zOrigin ? name_to_typed_rid(zOrigin, "ci") : 0; /* Compute all direct ancestors of the check-in being analyzed into ** the "ancestor" table. */ if( origid ){ path_shortest_stored_in_ancestor_table(origid, cid); }else{ compute_direct_ancestors(cid); } /* Get filename ID */ fnid = db_int(0, "SELECT fnid FROM filename WHERE name=%Q", zFilename); if( fnid==0 ){ fossil_fatal("no such file: %Q", zFilename); } db_prepare(&q, "SELECT DISTINCT" " (SELECT uuid FROM blob WHERE rid=mlink.fid)," " (SELECT uuid FROM blob WHERE rid=mlink.mid)," " date(event.mtime)," " coalesce(event.euser,event.user)," " mlink.fid" " FROM mlink, event, ancestor" " WHERE mlink.fnid=%d" " AND ancestor.rid=mlink.mid" " AND event.objid=mlink.mid" " AND mlink.mid!=mlink.pid" " ORDER BY ancestor.generation;", fnid ); while( db_step(&q)==SQLITE_ROW ){ if( cnt>=3 ){ /* Process at least 3 rows before imposing limits */ if( (iLimit>0 && cnt>=iLimit) || (cnt>0 && mxTime>0 && current_time_in_milliseconds()>mxTime) ){ p->bMoreToDo = 1; break; } } rid = db_column_int(&q, 4); if( cnt==0 ){ if( !content_get(rid, &toAnnotate) ){ fossil_fatal("unable to retrieve content of artifact #%d", rid); } blob_to_utf8_no_bom(&toAnnotate, 0); annotation_start(p, &toAnnotate, annFlags); p->bMoreToDo = origid!=0; p->origId = origid; p->showId = cid; } p->aVers = fossil_realloc(p->aVers, (p->nVers+1)*sizeof(p->aVers[0])); p->aVers[p->nVers].zFUuid = fossil_strdup(db_column_text(&q, 0)); p->aVers[p->nVers].zMUuid = fossil_strdup(db_column_text(&q, 1)); p->aVers[p->nVers].zDate = fossil_strdup(db_column_text(&q, 2)); p->aVers[p->nVers].zUser = fossil_strdup(db_column_text(&q, 3)); if( cnt>0 ){ content_get(rid, &step); blob_to_utf8_no_bom(&step, 0); annotation_step(p, &step, p->nVers-1, annFlags); blob_reset(&step); } p->nVers++; cnt++; } if( p->nVers==0 ){ if( zRevision ){ fossil_fatal("file %s does not exist in check-in %s", zFilename, zRevision); }else{ fossil_fatal("no history for file: %s", zFilename); } } db_finalize(&q); db_end_transaction(0); } /* ** Return a color from a gradient. */ unsigned gradient_color(unsigned c1, unsigned c2, int n, int i){ unsigned c; /* Result color */ unsigned x1, x2; if( i==0 || n==0 ) return c1; else if(i>=n) return c2; x1 = (c1>>16)&0xff; x2 = (c2>>16)&0xff; c = (x1*(n-i) + x2*i)/n<<16 & 0xff0000; x1 = (c1>>8)&0xff; x2 = (c2>>8)&0xff; c |= (x1*(n-i) + x2*i)/n<<8 & 0xff00; x1 = c1&0xff; x2 = c2&0xff; c |= (x1*(n-i) + x2*i)/n & 0xff; return c; } /* ** WEBPAGE: annotate ** WEBPAGE: blame ** WEBPAGE: praise ** ** URL: /annotate?checkin=ID&filename=FILENAME ** URL: /blame?checkin=ID&filename=FILENAME ** URL: /praise?checkin=ID&filename=FILENAME ** ** Show the most recent change to each line of a text file. /annotate shows ** the date of the changes and the check-in hash (with a link to the ** check-in). /blame and /praise also show the user who made the check-in. ** ** Reverse Annotations: Normally, these web pages look at versions of ** FILENAME moving backwards in time back toward the root check-in. However, ** if the origin= query parameter is used to specify some future check-in ** (example: "origin=trunk") then these pages show changes moving towards ** that alternative origin. Thus using "origin=trunk" on an historical ** version of the file shows the first time each line in the file was changed ** or removed by any subsequent check-in. ** ** Query parameters: ** ** checkin=ID The check-in at which to start the annotation ** filename=FILENAME The filename. ** filevers=BOOLEAN Show file versions rather than check-in versions ** limit=LIMIT Limit the amount of analysis. LIMIT can be one of: ** none No limit ** Xs As much as can be computed in X seconds ** N N versions ** log=BOOLEAN Show a log of versions analyzed ** origin=ID The origin check-in. If unspecified, the root ** check-in over the entire repository is used. ** Specify "origin=trunk" or similar for a reverse ** annotation ** w=BOOLEAN Ignore whitespace */ void annotation_page(void){ int i; const char *zLimit; /* Depth limit */ u64 annFlags = DIFF_STRIP_EOLCR; int showLog; /* True to display the log */ int fileVers; /* Show file version instead of check-in versions */ int ignoreWs; /* Ignore whitespace */ const char *zFilename; /* Name of file to annotate */ const char *zRevision; /* Name of check-in from which to start annotation */ const char *zCI; /* The check-in containing zFilename */ const char *zOrigin; /* The origin of the analysis */ int szHash; /* Number of characters in %S display */ char *zLink; Annotator ann; HQuery url; struct AnnVers *p; unsigned clr1, clr2, clr; int bBlame = g.zPath[0]!='a';/* True for BLAME output. False for ANNOTATE. */ /* Gather query parameters */ login_check_credentials(); if( !g.perm.Read ){ login_needed(g.anon.Read); return; } if( exclude_spiders() ) return; fossil_nice_default(); zFilename = P("filename"); zRevision = PD("checkin",0); zOrigin = P("origin"); zLimit = P("limit"); showLog = PB("log"); fileVers = PB("filevers"); ignoreWs = PB("w"); if( ignoreWs ) annFlags |= DIFF_IGNORE_ALLWS; cgi_check_for_malice(); /* compute the annotation */ annotate_file(&ann, zFilename, zRevision, zLimit, zOrigin, annFlags); zCI = ann.aVers[0].zMUuid; /* generate the web page */ style_set_current_feature("annotate"); style_header("Annotation For %h", zFilename); if( bBlame ){ url_initialize(&url, "blame"); }else{ url_initialize(&url, "annotate"); } url_add_parameter(&url, "checkin", P("checkin")); url_add_parameter(&url, "filename", zFilename); if( zLimit ){ url_add_parameter(&url, "limit", zLimit); } url_add_parameter(&url, "w", ignoreWs ? "1" : "0"); url_add_parameter(&url, "log", showLog ? "1" : "0"); url_add_parameter(&url, "filevers", fileVers ? "1" : "0"); style_submenu_checkbox("w", "Ignore Whitespace", 0, 0); style_submenu_checkbox("log", "Log", 0, "toggle_annotation_log"); style_submenu_checkbox("filevers", "Link to Files", 0, 0); if( ann.bMoreToDo ){ style_submenu_element("All Ancestors", "%s", url_render(&url, "limit", "none", 0, 0)); } if( skin_detail_boolean("white-foreground") ){ clr1 = 0xa04040; clr2 = 0x4059a0; }else{ clr1 = 0xffb5b5; /* Recent changes: red (hot) */ clr2 = 0xb5e0ff; /* Older changes: blue (cold) */ } for(p=ann.aVers, i=0; i if( zOrigin ){ zLink = href("%R/finfo?name=%t&from=%!S&to=%!S",zFilename,zCI,zOrigin); }else{ zLink = href("%R/finfo?name=%t&from=%!S",zFilename,zCI); } @

Versions of %z(zLink)%h(zFilename) analyzed:

@
    for(p=ann.aVers, i=0; i%s(p->zDate) @ check-in %z(href("%R/info/%!S",p->zMUuid))%S(p->zMUuid) @ artifact %z(href("%R/artifact/%!S",p->zFUuid))%S(p->zFUuid) @ } @
@
@ if( !ann.bMoreToDo ){ assert( ann.origId==0 ); /* bMoreToDo always set for a point-to-point */ @

Origin for each line in @ %z(href("%R/finfo?name=%h&from=%!S", zFilename, zCI))%h(zFilename) @ from check-in %z(href("%R/info/%!S",zCI))%S(zCI):

}else if( ann.origId>0 ){ @

Lines of @ %z(href("%R/finfo?name=%h&from=%!S", zFilename, zCI))%h(zFilename) @ from check-in %z(href("%R/info/%!S",zCI))%S(zCI) @ that are changed by the sequence of edits moving toward @ check-in %z(href("%R/info/%!S",zOrigin))%S(zOrigin):

}else{ @

Lines added by the %d(ann.nVers) most recent ancestors of @ %z(href("%R/finfo?name=%h&from=%!S", zFilename, zCI))%h(zFilename) @ from check-in %z(href("%R/info/%!S",zCI))%S(zCI):

} @
  szHash = 10;
  for(i=0; i=0 ){
        struct AnnVers *p = ann.aVers+iVers;
        const char *zUuid = fileVers ? p->zFUuid : p->zMUuid;
        char *zLink = xhref("target='infowindow'", "%R/info/%!S", zUuid);
        sqlite3_snprintf(sizeof(zPrefix), zPrefix,
             ""
             "%s%.10s %s %13.13s:",
             p->zBgColor, zLink, zUuid, p->zDate, p->zUser);
        fossil_free(zLink);
      }else{
        sqlite3_snprintf(sizeof(zPrefix), zPrefix, "%*s", szHash+26, "");
      }
    }else{
      if( iVers>=0 ){
        struct AnnVers *p = ann.aVers+iVers;
        const char *zUuid = fileVers ? p->zFUuid : p->zMUuid;
        char *zLink = xhref("target='infowindow'", "%R/info/%!S", zUuid);
        sqlite3_snprintf(sizeof(zPrefix), zPrefix,
             ""
             "%s%.10s %s %4d:",
             p->zBgColor, zLink, zUuid, p->zDate, i+1);
        fossil_free(zLink);
      }else{
        sqlite3_snprintf(sizeof(zPrefix), zPrefix, "%*s%4d:",szHash+12,"",i+1);
      }
    }
    @ %s(zPrefix) %h(z)

  }
  @ 
style_finish_page(); } /* ** COMMAND: annotate ** COMMAND: blame ** COMMAND: praise* ** ** Usage: %fossil annotate|blame|praise ?OPTIONS? FILENAME ** ** Output the text of a file with markings to show when each line of the file ** was last modified. The version currently checked out is shown by default. ** Other versions may be specified using the -r option. The "annotate" command ** shows line numbers and omits the username. The "blame" and "praise" commands ** show the user who made each check-in. ** ** Reverse Annotations: Normally, these commands look at versions of ** FILENAME moving backwards in time back toward the root check-in, and ** thus the output shows the most recent change to each line. However, ** if the -o|--origin option is used to specify some future check-in ** (example: "-o trunk") then these commands show changes moving towards ** that alternative origin. Thus using "-o trunk" on an historical version ** of the file shows the first time each line in the file was changed or ** removed by any subsequent check-in. ** ** Options: ** --filevers Show file version numbers rather than ** check-in versions ** -r|--revision VERSION The specific check-in containing the file ** -l|--log List all versions analyzed ** -n|--limit LIMIT LIMIT can be one of: ** N Up to N versions ** Xs As much as possible in X seconds ** none No limit ** -o|--origin VERSION The origin check-in. By default this is the ** root of the repository. Set to "trunk" or ** similar for a reverse annotation. ** -w|--ignore-all-space Ignore white space when comparing lines ** -Z|--ignore-trailing-space Ignore whitespace at line end ** ** See also: [[info]], [[finfo]], [[timeline]] */ void annotate_cmd(void){ const char *zRevision; /* Revision name, or NULL for current check-in */ Annotator ann; /* The annotation of the file */ int i; /* Loop counter */ const char *zLimit; /* The value to the -n|--limit option */ const char *zOrig; /* The value for -o|--origin */ int showLog; /* True to show the log */ int fileVers; /* Show file version instead of check-in versions */ u64 annFlags = 0; /* Flags to control annotation properties */ int bBlame = 0; /* True for BLAME output. False for ANNOTATE. */ int szHash; /* Display size of a version hash */ Blob treename; /* Name of file to be annotated */ char *zFilename; /* Name of file to be annotated */ bBlame = g.argv[1][0]!='a'; zRevision = find_option("r","revision",1); zLimit = find_option("limit","n",1); zOrig = find_option("origin","o",1); showLog = find_option("log","l",0)!=0; if( find_option("ignore-trailing-space","Z",0)!=0 ){ annFlags = DIFF_IGNORE_EOLWS; } if( find_option("ignore-all-space","w",0)!=0 ){ annFlags = DIFF_IGNORE_ALLWS; /* stronger than DIFF_IGNORE_EOLWS */ } fileVers = find_option("filevers",0,0)!=0; db_must_be_within_tree(); /* We should be done with options.. */ verify_all_options(); if( g.argc<3 ) { usage("FILENAME"); } annFlags |= DIFF_STRIP_EOLCR; file_tree_name(g.argv[2], &treename, 0, 1); zFilename = blob_str(&treename); annotate_file(&ann, zFilename, zRevision, zLimit, zOrig, annFlags); if( showLog ){ struct AnnVers *p; for(p=ann.aVers, i=0; izDate, p->zMUuid, p->zFUuid); } fossil_print("---------------------------------------------------\n"); } szHash = length_of_S_display(); for(i=0; i=0 ){ p = ann.aVers + iVers; fossil_print("%S %s %13.13s: %.*s\n", fileVers ? p->zFUuid : p->zMUuid, p->zDate, p->zUser, n, z); }else{ fossil_print("%*s %.*s\n", szHash+26, "", n, z); } }else{ if( iVers>=0 ){ p = ann.aVers + iVers; fossil_print("%S %s %5d: %.*s\n", fileVers ? p->zFUuid : p->zMUuid, p->zDate, i+1, n, z); }else{ fossil_print("%*s %5d: %.*s\n", szHash+11, "", i+1, n, z); } } } }