/* ** Copyright (c) 2006 D. Richard Hipp ** ** This program is free software; you can redistribute it and/or ** modify it under the terms of the GNU General Public ** License version 2 as published by the Free Software Foundation. ** ** This program is distributed in the hope that it will be useful, ** but WITHOUT ANY WARRANTY; without even the implied warranty of ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ** General Public License for more details. ** ** You should have received a copy of the GNU General Public ** License along with this library; if not, write to the ** Free Software Foundation, Inc., 59 Temple Place - Suite 330, ** Boston, MA 02111-1307, USA. ** ** Author contact information: ** drh@hwaci.com ** http://www.hwaci.com/drh/ ** ******************************************************************************* ** ** Procedures store and retrieve records from the repository */ #include "config.h" #include "content.h" #include /* ** Macros for debugging */ #if 0 # define CONTENT_TRACE(X) printf X; #else # define CONTENT_TRACE(X) #endif /* ** The artifact retrival cache */ #define MX_CACHE_CNT 50 /* Maximum number of positive cache entries */ #define EXPELL_INTERVAL 5 /* How often to expell from a full cache */ static struct { int n; /* Current number of positive cache entries */ int nextAge; /* Age counter for implementing LRU */ int skipCnt; /* Used to limit entries expelled from cache */ struct { /* One instance of this for each cache entry */ int rid; /* Artifact id */ int age; /* Age. Newer is larger */ Blob content; /* Content of the artifact */ } a[MX_CACHE_CNT]; /* The positive cache */ /* ** The missing artifact cache. ** ** Artifacts whose record ID are in missingCache cannot be retrieved ** either because they are phantoms or because they are a delta that ** depends on a phantom. Artifacts whose content we are certain is ** available are in availableCache. If an artifact is in neither cache ** then its current availablity is unknown. */ Bag missing; /* Cache of artifacts that are incomplete */ Bag available; /* Cache of artifacts that are complete */ } contentCache; /* ** Clear the content cache. */ void content_clear_cache(void){ int i; for(i=0; i=0", rid); if( db_step(&q)==SQLITE_ROW ){ Blob delta; db_ephemeral_blob(&q, 0, &delta); blob_uncompress(&delta, &delta); blob_init(pBlob,0,0); blob_delta_apply(&src, &delta, pBlob); blob_reset(&delta); rc = 1; } db_finalize(&q); /* Save the srcid artifact in the cache */ if( contentCache.n=0 ){ contentCache.a[i].content = src; contentCache.a[i].age = contentCache.nextAge++; contentCache.a[i].rid = srcid; CONTENT_TRACE(("%*sadd %d to cache\n", bag_count(&inProcess), "", srcid)) }else{ blob_reset(&src); } } bag_remove(&inProcess, srcid); }else{ /* No delta required. Read content directly from the database */ db_prepare(&q, "SELECT content FROM blob WHERE rid=%d AND size>=0", rid); if( db_step(&q)==SQLITE_ROW ){ db_ephemeral_blob(&q, 0, pBlob); blob_uncompress(pBlob, pBlob); rc = 1; } db_finalize(&q); } if( rc==0 ){ bag_insert(&contentCache.missing, rid); }else{ bag_insert(&contentCache.available, rid); } return rc; } /* ** Get the contents of a file within a given baseline. */ int content_get_historical_file( const char *revision, /* Name of the baseline containing the file */ const char *file, /* Name of the file */ Blob *content /* Write file content here */ ){ Blob mfile; Manifest m; int i, rid=0; rid = name_to_rid(revision); content_get(rid, &mfile); if( manifest_parse(&m, &mfile) ){ for(i=0; i=0 || pBlob==0 ){ /* Either the entry is not a phantom or it is a phantom but we ** have no data with which to dephantomize it. In either case, ** there is nothing for us to do other than return the RID. */ db_finalize(&s1); db_end_transaction(0); return rid; } }else{ rid = 0; /* No entry with the same UUID currently exists */ markAsUnclustered = 1; } db_finalize(&s1); /* Construct a received-from ID if we do not already have one */ if( g.rcvid==0 ){ db_multi_exec( "INSERT INTO rcvfrom(uid, mtime, nonce, ipaddr)" "VALUES(%d, julianday('now'), %Q, %Q)", g.userUid, g.zNonce, g.zIpAddr ); g.rcvid = db_last_insert_rowid(); } blob_compress(pBlob, &cmpr); if( rid>0 ){ /* We are just adding data to a phantom */ db_prepare(&s1, "UPDATE blob SET rcvid=%d, size=%d, content=:data WHERE rid=%d", g.rcvid, size, rid ); db_bind_blob(&s1, ":data", &cmpr); db_exec(&s1); db_multi_exec("DELETE FROM phantom WHERE rid=%d", rid); if( srcId==0 || content_is_available(srcId) ){ isDephantomize = 1; content_mark_available(rid); } }else{ /* We are creating a new entry */ db_prepare(&s1, "INSERT INTO blob(rcvid,size,uuid,content)" "VALUES(%d,%d,'%b',:data)", g.rcvid, size, &hash ); db_bind_blob(&s1, ":data", &cmpr); db_exec(&s1); rid = db_last_insert_rowid(); if( !pBlob ){ db_multi_exec("INSERT OR IGNORE INTO phantom VALUES(%d)", rid); } } blob_reset(&cmpr); /* If the srcId is specified, then the data we just added is ** really a delta. Record this fact in the delta table. */ if( srcId ){ db_multi_exec("REPLACE INTO delta(rid,srcid) VALUES(%d,%d)", rid, srcId); } if( !isDephantomize && bag_find(&contentCache.missing, rid) && (srcId==0 || content_is_available(srcId)) ){ content_mark_available(rid); } if( isDephantomize ){ after_dephantomize(rid, 0); } /* Add the element to the unclustered table if has never been ** previously seen. */ if( markAsUnclustered ){ db_multi_exec("INSERT OR IGNORE INTO unclustered VALUES(%d)", rid); } /* Finish the transaction and cleanup */ db_finalize(&s1); db_end_transaction(0); blob_reset(&hash); /* Make arrangements to verify that the data can be recovered ** before we commit */ verify_before_commit(rid); return rid; } /* ** Create a new phantom with the given UUID and return its artifact ID. */ int content_new(const char *zUuid){ int rid; static Stmt s1, s2; assert( g.repositoryOpen ); db_begin_transaction(); if( uuid_is_shunned(zUuid) ){ return 0; } db_static_prepare(&s1, "INSERT INTO blob(rcvid,size,uuid,content)" "VALUES(0,-1,:uuid,NULL)" ); db_bind_text(&s1, ":uuid", zUuid); db_exec(&s1); rid = db_last_insert_rowid(); db_static_prepare(&s2, "INSERT INTO phantom VALUES(:rid)" ); db_bind_int(&s2, ":rid", rid); db_exec(&s2); bag_insert(&contentCache.missing, rid); db_end_transaction(0); return rid; } /* ** COMMAND: test-content-put ** ** Extract a blob from the database and write it into a file. */ void test_content_put_cmd(void){ int rid; Blob content; if( g.argc!=3 ) usage("FILENAME"); db_must_be_within_tree(); user_select(); blob_read_from_file(&content, g.argv[2]); rid = content_put(&content, 0, 0); printf("inserted as record %d\n", rid); } /* ** Make sure the content at rid is the original content and is not a ** delta. */ void content_undelta(int rid){ if( findSrcid(rid)>0 ){ Blob x; if( content_get(rid, &x) ){ Stmt s; db_prepare(&s, "UPDATE blob SET content=:c, size=%d WHERE rid=%d", blob_size(&x), rid); blob_compress(&x, &x); db_bind_blob(&s, ":c", &x); db_exec(&s); db_finalize(&s); blob_reset(&x); db_multi_exec("DELETE FROM delta WHERE rid=%d", rid); } } } /* ** COMMAND: test-content-undelta ** ** Make sure the content at RECORDID is not a delta */ void test_content_undelta_cmd(void){ int rid; if( g.argc!=2 ) usage("RECORDID"); db_must_be_within_tree(); rid = atoi(g.argv[2]); content_undelta(rid); } /* ** Change the storage of rid so that it is a delta of srcid. ** ** If rid is already a delta from some other place then no ** conversion occurs and this is a no-op unless force==1. ** ** If srcid is a delta that depends on rid, then srcid is ** converted to undeltaed text. ** ** If either rid or srcid contain less than 50 bytes, or if the ** resulting delta does not achieve a compression of at least 25% on ** its own the rid is left untouched. ** ** NOTE: IMHO the creation of the delta should be defered until after ** the blob sizes have been checked. Doing it before the check as is ** done now the code will generate a delta just to immediately throw ** it away, wasting space and time. */ void content_deltify(int rid, int srcid, int force){ int s; Blob data, src, delta; Stmt s1, s2; if( srcid==rid ) return; if( !force && findSrcid(rid)>0 ) return; s = srcid; while( (s = findSrcid(s))>0 ){ if( s==rid ){ content_undelta(srcid); break; } } content_get(srcid, &src); content_get(rid, &data); blob_delta_create(&src, &data, &delta); if( blob_size(&src)>=50 && blob_size(&data)>=50 && blob_size(&delta) < blob_size(&data)*0.75 ){ blob_compress(&delta, &delta); db_prepare(&s1, "UPDATE blob SET content=:data WHERE rid=%d", rid); db_prepare(&s2, "REPLACE INTO delta(rid,srcid)VALUES(%d,%d)", rid, srcid); db_bind_blob(&s1, ":data", &delta); db_begin_transaction(); db_exec(&s1); db_exec(&s2); db_end_transaction(0); db_finalize(&s1); db_finalize(&s2); verify_before_commit(rid); } blob_reset(&src); blob_reset(&data); blob_reset(&delta); } /* ** COMMAND: test-content-deltify ** ** Convert the content at RID into a delta from SRCID. */ void test_content_deltify_cmd(void){ if( g.argc!=5 ) usage("RID SRCID FORCE"); db_must_be_within_tree(); content_deltify(atoi(g.argv[2]), atoi(g.argv[3]), atoi(g.argv[4])); }