Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Changes In Branch commitWarningV2 Excluding Merge-Ins
This is equivalent to a diff from ef6c243ed9 to 7d3a06b89a
2012-11-02
| ||
02:27 | Merge commit warning and file content type detection changes to trunk. ... (check-in: 0c7c61447f user: mistachkin tags: trunk) | |
2012-11-01
| ||
20:19 | Add detection of binary data with no leading UTF-16 byte-order-mark. ... (Closed-Leaf check-in: 7d3a06b89a user: mistachkin tags: commitWarningV2) | |
20:09 | Improve detection of UTF-8, UTF-16, binary data, and carriage returns during commit operations. ... (check-in: c837e44445 user: mistachkin tags: commitWarningV2) | |
12:32 | merge trunk ... (check-in: 9e97de3410 user: jan.nijtmans tags: use-blob_strip_bom) | |
11:48 | merge trunk <p>let looks_like_text() give different values for UTF-16 BE/LE. Not used yet. ... (check-in: 348637dedf user: jan.nijtmans tags: improve_looks_like_binary) | |
10:20 | Restore Style fix, which got lost by [618258421767778c] ... (check-in: ef6c243ed9 user: jan.nijtmans tags: trunk) | |
07:40 | dont check for same BOM twice ... (check-in: 8c32e6f0dd user: jan.nijtmans tags: trunk) | |
Changes to src/checkin.c.
︙ | ︙ | |||
884 885 886 887 888 889 890 | /* ** Issue a warning and give the user an opportunity to abandon out ** if a Unicode (UTF-16) byte-order-mark (BOM) or a \r\n line ending ** is seen in a text file. */ static void commit_warning(const Blob *p, int crnlOk, const char *zFilename){ | | < > | > > | 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 | /* ** Issue a warning and give the user an opportunity to abandon out ** if a Unicode (UTF-16) byte-order-mark (BOM) or a \r\n line ending ** is seen in a text file. */ static void commit_warning(const Blob *p, int crnlOk, const char *zFilename){ int eType; /* return value of looks_like_utf8/utf16() */ int fUnicode; /* return value of starts_with_utf16_bom() */ char *zMsg; /* Warning message */ Blob fname; /* Relative pathname of the file */ static int allOk = 0; /* Set to true to disable this routine */ if( allOk ) return; fUnicode = starts_with_utf16_bom(p); eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p); if( eType==0 || eType==-1 || fUnicode ){ const char *zWarning; Blob ans; char cReply; if( eType==-1 && fUnicode ){ zWarning = "Unicode and CR/NL line endings"; }else if( eType==-1 ){ if( crnlOk ){ return; /* We don't want CR/NL warnings for this file. */ } zWarning = "CR/NL line endings"; }else if( eType==0 ){ zWarning = "binary data"; }else{ zWarning = "Unicode"; } file_relative_name(zFilename, &fname, 0); blob_zero(&ans); zMsg = mprintf( "%s contains %s. commit anyhow (a=all/y/N)? ", |
︙ | ︙ |
Changes to src/diff.c.
︙ | ︙ | |||
46 47 48 49 50 51 52 | */ #define DIFF_CANNOT_COMPUTE_BINARY \ "cannot compute difference between binary files\n" #define DIFF_CANNOT_COMPUTE_SYMLINK \ "cannot compute difference between symlink and regular file\n" | | | | 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 | */ #define DIFF_CANNOT_COMPUTE_BINARY \ "cannot compute difference between binary files\n" #define DIFF_CANNOT_COMPUTE_SYMLINK \ "cannot compute difference between symlink and regular file\n" #define looks_like_binary(blob) (looks_like_utf8((blob)) == 0) #endif /* INTERFACE */ /* ** Maximum length of a line in a text file, in bytes. (8192) */ #define LENGTH_MASK_SZ 13 #define LENGTH_MASK ((1<<LENGTH_MASK_SZ)-1) /* ** Information about each line of a file being diffed. ** |
︙ | ︙ | |||
177 178 179 180 181 182 183 | ** values are: ** ** (1) -- The content appears to consist entirely of text, with lines ** delimited by line-feed characters; however, the encoding may ** not be UTF-8. ** ** (0) -- The content appears to be binary because it contains embedded | | | | | | | | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 | ** values are: ** ** (1) -- The content appears to consist entirely of text, with lines ** delimited by line-feed characters; however, the encoding may ** not be UTF-8. ** ** (0) -- The content appears to be binary because it contains embedded ** NUL characters or an extremely long line. Since this function ** does not understand UTF-16, it may falsely consider UTF-16 text ** to be binary. ** ** (-1) -- The content appears to consist entirely of text, with lines ** delimited by carriage-return, line-feed pairs; however, the ** encoding may not be UTF-8. ** */ int looks_like_utf8(const Blob *pContent){ const char *z = blob_buffer(pContent); unsigned int n = blob_size(pContent); int j, c; int result = 1; /* Assume UTF-8 text with no CR/NL */ /* Check individual lines. */ if( n==0 ) return result; /* Empty file -> text */ c = *z; if( c==0 ) return 0; /* Zero byte in a file -> binary */ j = (c!='\n'); while( --n>0 ){ c = *++z; ++j; if( c==0 ) return 0; /* Zero byte in a file -> binary */ if( c=='\n' ){ if( z[-1]=='\r' ){ result = -1; /* Contains CR/NL, continue */ } if( j>LENGTH_MASK ){ return 0; /* Very long line -> binary */ } j = 0; } } if( j>LENGTH_MASK ){ return 0; /* Very long line -> binary */ } return result; /* No problems seen -> not binary */ } /* ** Maximum length of a line in a text file, in UTF-16 characters. (4096) ** The number of bytes represented by this value cannot exceed LENGTH_MASK ** bytes, because that is the line buffer size by the diff engine. */ #define UTF16_LENGTH_MASK_SZ (LENGTH_MASK_SZ-1) #define UTF16_LENGTH_MASK ((1<<UTF16_LENGTH_MASK_SZ)-1) /* ** The carriage-return / line-feed characters in the UTF-16be and UTF-16le ** encodings. */ #define UTF16BE_CR ((wchar_t)'\r') #define UTF16BE_LF ((wchar_t)'\n') #define UTF16LE_CR (((wchar_t)'\r')<<(sizeof(wchar_t)<<2)) #define UTF16LE_LF (((wchar_t)'\n')<<(sizeof(wchar_t)<<2)) /* ** This function attempts to scan each logical line within the blob to ** determine the type of content it appears to contain. Possible return ** values are: ** ** (1) -- The content appears to consist entirely of text, with lines ** delimited by line-feed characters; however, the encoding may ** not be UTF-16. ** ** (0) -- The content appears to be binary because it contains embedded ** NUL characters or an extremely long line. Since this function ** does not understand UTF-8, it may falsely consider UTF-8 text ** to be binary. ** ** (-1) -- The content appears to consist entirely of text, with lines ** delimited by carriage-return, line-feed pairs; however, the ** encoding may not be UTF-16. ** */ int looks_like_utf16(const Blob *pContent){ const wchar_t *z = (wchar_t *)blob_buffer(pContent); unsigned int n = blob_size(pContent); int j, c; int result = 1; /* Assume UTF-16 text with no CR/NL */ /* Check individual lines. */ if( n==0 ) return result; /* Empty file -> text */ if( n%2 ) return 0; /* Odd number of bytes -> binary (or UTF-8) */ c = *z; if( c==0 ) return 0; /* NUL character in a file -> binary */ j = ((c!=UTF16BE_LF) && (c!=UTF16LE_LF)); while( (n-=2)>0 ){ c = *++z; ++j; if( c==0 ) return 0; /* NUL character in a file -> binary */ if( c==UTF16BE_LF || c==UTF16LE_LF ){ if( z[-1]==UTF16BE_CR || z[-1]==UTF16LE_CR ){ result = -1; /* Contains CR/NL, continue */ } if( j>UTF16_LENGTH_MASK ){ return 0; /* Very long line -> binary */ } j = 0; } } if( j>UTF16_LENGTH_MASK ){ return 0; /* Very long line -> binary */ } return result; /* No problems seen -> not binary */ } /* ** This function returns non-zero if the blob starts with a UTF-16le or ** UTF-16be byte-order-mark (BOM). |
︙ | ︙ |