/* ** Copyright (c) 2006 D. Richard Hipp ** ** This program is free software; you can redistribute it and/or ** modify it under the terms of the Simplified BSD License (also ** known as the "2-Clause License" or "FreeBSD License".) ** This program is distributed in the hope that it will be useful, ** but without any warranty; without even the implied warranty of ** merchantability or fitness for a particular purpose. ** ** Author contact information: ** drh@hwaci.com ** http://www.hwaci.com/drh/ ** ******************************************************************************* ** ** Procedures store and retrieve records from the repository */ #include "config.h" #include "content.h" #include /* ** The artifact retrival cache */ static struct { i64 szTotal; /* Total size of all entries in the cache */ int n; /* Current number of eache entries */ int nAlloc; /* Number of slots allocated in a[] */ int nextAge; /* Age counter for implementing LRU */ int skipCnt; /* Used to limit entries expelled from cache */ struct cacheLine { /* One instance of this for each cache entry */ int rid; /* Artifact id */ int age; /* Age. Newer is larger */ Blob content; /* Content of the artifact */ } *a; /* The positive cache */ Bag inCache; /* Set of artifacts currently in cache */ /* ** The missing artifact cache. ** ** Artifacts whose record ID are in missingCache cannot be retrieved ** either because they are phantoms or because they are a delta that ** depends on a phantom. Artifacts whose content we are certain is ** available are in availableCache. If an artifact is in neither cache ** then its current availablity is unknown. */ Bag missing; /* Cache of artifacts that are incomplete */ Bag available; /* Cache of artifacts that are complete */ } contentCache; /* ** Remove the oldest element from the content cache */ static void content_cache_expire_oldest(void){ int i; int mnAge = contentCache.nextAge; int mn = -1; for(i=0; i=0 ){ bag_remove(&contentCache.inCache, contentCache.a[mn].rid); contentCache.szTotal -= blob_size(&contentCache.a[mn].content); blob_reset(&contentCache.a[mn].content); contentCache.n--; contentCache.a[mn] = contentCache.a[contentCache.n]; } } /* ** Add an entry to the content cache */ void content_cache_insert(int rid, Blob *pBlob){ struct cacheLine *p; if( contentCache.n>500 || contentCache.szTotal>50000000 ){ content_cache_expire_oldest(); } if( contentCache.n>=contentCache.nAlloc ){ contentCache.nAlloc = contentCache.nAlloc*2 + 10; contentCache.a = fossil_realloc(contentCache.a, contentCache.nAlloc*sizeof(contentCache.a[0])); } p = &contentCache.a[contentCache.n++]; p->rid = rid; p->age = contentCache.nextAge++; contentCache.szTotal += blob_size(pBlob); p->content = *pBlob; blob_zero(pBlob); bag_insert(&contentCache.inCache, rid); } /* ** Clear the content cache. */ void content_clear_cache(void){ int i; for(i=0; i=0"); db_bind_int(&q, ":rid", rid); if( db_step(&q)==SQLITE_ROW ){ db_ephemeral_blob(&q, 0, pBlob); blob_uncompress(pBlob, pBlob); rc = 1; } db_reset(&q); return rc; } /* ** Extract the content for ID rid and put it into the ** uninitialized blob. Return 1 on success. If the record ** is a phantom, zero pBlob and return 0. */ int content_get(int rid, Blob *pBlob){ int rc; int i; int nextRid; assert( g.repositoryOpen ); blob_zero(pBlob); if( rid==0 ) return 0; /* Early out if we know the content is not available */ if( bag_find(&contentCache.missing, rid) ){ return 0; } /* Look for the artifact in the cache first */ if( bag_find(&contentCache.inCache, rid) ){ for(i=0; i0 ){ n++; if( n>=nAlloc ){ nAlloc = nAlloc*2 + 10; a = fossil_realloc(a, nAlloc*sizeof(a[0])); } a[n] = nextRid; } mx = n; rc = content_get(a[n], pBlob); n--; while( rc && n>=0 ){ rc = content_of_blob(a[n], &delta); if( rc ){ blob_delta_apply(pBlob, &delta, &next); blob_reset(&delta); if( (mx-n)%8==0 ){ content_cache_insert(a[n+1], pBlob); }else{ blob_reset(pBlob); } *pBlob = next; } n--; } free(a); if( !rc ) blob_reset(pBlob); } if( rc==0 ){ bag_insert(&contentCache.missing, rid); }else{ bag_insert(&contentCache.available, rid); } return rc; } /* ** COMMAND: artifact ** ** Usage: %fossil artifact ARTIFACT-ID ?OUTPUT-FILENAME? ?OPTIONS? ** ** Extract an artifact by its SHA1 hash and write the results on ** standard output, or if the optional 4th argument is given, in ** the named output file. ** ** Options: ** ** -R|--repository FILE Extract artifacts from repository FILE */ void artifact_cmd(void){ int rid; Blob content; const char *zFile; db_find_and_open_repository(OPEN_ANY_SCHEMA, 0); if( g.argc!=4 && g.argc!=3 ) usage("ARTIFACT-ID ?FILENAME? ?OPTIONS?"); zFile = g.argc==4 ? g.argv[3] : "-"; rid = name_to_rid(g.argv[2]); content_get(rid, &content); blob_write_to_file(&content, zFile); } /* ** COMMAND: test-content-rawget ** ** Extract a blob from the database and write it into a file. This ** version does not expand the delta. */ void test_content_rawget_cmd(void){ int rid; Blob content; const char *zFile; if( g.argc!=4 && g.argc!=3 ) usage("RECORDID ?FILENAME?"); zFile = g.argc==4 ? g.argv[3] : "-"; db_must_be_within_tree(); rid = name_to_rid(g.argv[2]); blob_zero(&content); db_blob(&content, "SELECT content FROM blob WHERE rid=%d", rid); blob_uncompress(&content, &content); blob_write_to_file(&content, zFile); } /* ** The following flag is set to disable the automatic calls to ** manifest_crosslink() when a record is dephantomized. This ** flag can be set (for example) when doing a clone when we know ** that rebuild will be run over all records at the conclusion ** of the operation. */ static int ignoreDephantomizations = 0; /* ** When a record is converted from a phantom to a real record, ** if that record has other records that are derived by delta, ** then call manifest_crosslink() on those other records. ** ** If the formerly phantom record or any of the other records ** derived by delta from the former phantom are a baseline manifest, ** then also invoke manifest_crosslink() on the delta-manifests ** associated with that baseline. ** ** Tail recursion is used to minimize stack depth. */ void after_dephantomize(int rid, int linkFlag){ Stmt q; int nChildAlloc = 0; int *aChild = 0; Blob content; if( ignoreDephantomizations ) return; while( rid ){ int nChildUsed = 0; int i; /* Parse the object rid itself */ if( linkFlag ){ content_get(rid, &content); manifest_crosslink(rid, &content); blob_reset(&content); } /* Parse all delta-manifests that depend on baseline-manifest rid */ db_prepare(&q, "SELECT rid FROM orphan WHERE baseline=%d", rid); while( db_step(&q)==SQLITE_ROW ){ int child = db_column_int(&q, 0); if( nChildUsed>=nChildAlloc ){ nChildAlloc = nChildAlloc*2 + 10; aChild = fossil_realloc(aChild, nChildAlloc*sizeof(aChild)); } aChild[nChildUsed++] = child; } db_finalize(&q); for(i=0; i=nChildAlloc ){ nChildAlloc = nChildAlloc*2 + 10; aChild = fossil_realloc(aChild, nChildAlloc*sizeof(aChild)); } aChild[nChildUsed++] = child; } db_finalize(&q); for(i=1; i0 ? aChild[0] : 0; linkFlag = 1; } free(aChild); } /* ** Turn dephantomization processing on or off. */ void content_enable_dephantomize(int onoff){ ignoreDephantomizations = !onoff; } /* ** Write content into the database. Return the record ID. If the ** content is already in the database, just return the record ID. ** ** If srcId is specified, then pBlob is delta content from ** the srcId record. srcId might be a phantom. If nBlob>0 then the ** pBlob value has already been compressed and nBlob is its uncompressed ** size. If nBlob>0 then zUuid must be valid. ** ** zUuid is the UUID of the artifact, if it is specified. When srcId is ** specified then zUuid must always be specified. If srcId is zero, ** and zUuid is zero then the correct zUuid is computed from pBlob. ** ** If the record already exists but is a phantom, the pBlob content ** is inserted and the phatom becomes a real record. */ int content_put(Blob *pBlob, const char *zUuid, int srcId, int nBlob){ int size; int rid; Stmt s1; Blob cmpr; Blob hash; int markAsUnclustered = 0; int isDephantomize = 0; assert( g.repositoryOpen ); assert( pBlob!=0 ); assert( srcId==0 || zUuid!=0 ); if( zUuid==0 ){ assert( pBlob!=0 ); assert( nBlob==0 ); sha1sum_blob(pBlob, &hash); }else{ blob_init(&hash, zUuid, -1); } size = nBlob ? nBlob : blob_size(pBlob); db_begin_transaction(); /* Check to see if the entry already exists and if it does whether ** or not the entry is a phantom */ db_prepare(&s1, "SELECT rid, size FROM blob WHERE uuid=%B", &hash); if( db_step(&s1)==SQLITE_ROW ){ rid = db_column_int(&s1, 0); if( db_column_int(&s1, 1)>=0 || pBlob==0 ){ /* Either the entry is not a phantom or it is a phantom but we ** have no data with which to dephantomize it. In either case, ** there is nothing for us to do other than return the RID. */ db_finalize(&s1); db_end_transaction(0); return rid; } }else{ rid = 0; /* No entry with the same UUID currently exists */ markAsUnclustered = 1; } db_finalize(&s1); /* Construct a received-from ID if we do not already have one */ if( g.rcvid==0 ){ db_multi_exec( "INSERT INTO rcvfrom(uid, mtime, nonce, ipaddr)" "VALUES(%d, julianday('now'), %Q, %Q)", g.userUid, g.zNonce, g.zIpAddr ); g.rcvid = db_last_insert_rowid(); } if( nBlob ){ cmpr = pBlob[0]; }else{ blob_compress(pBlob, &cmpr); } if( rid>0 ){ /* We are just adding data to a phantom */ db_prepare(&s1, "UPDATE blob SET rcvid=%d, size=%d, content=:data WHERE rid=%d", g.rcvid, size, rid ); db_bind_blob(&s1, ":data", &cmpr); db_exec(&s1); db_multi_exec("DELETE FROM phantom WHERE rid=%d", rid); if( srcId==0 || content_is_available(srcId) ){ isDephantomize = 1; content_mark_available(rid); } }else{ /* We are creating a new entry */ db_prepare(&s1, "INSERT INTO blob(rcvid,size,uuid,content)" "VALUES(%d,%d,'%b',:data)", g.rcvid, size, &hash ); db_bind_blob(&s1, ":data", &cmpr); db_exec(&s1); rid = db_last_insert_rowid(); if( !pBlob ){ db_multi_exec("INSERT OR IGNORE INTO phantom VALUES(%d)", rid); } if( g.markPrivate ){ db_multi_exec("INSERT INTO private VALUES(%d)", rid); markAsUnclustered = 0; } } if( nBlob==0 ) blob_reset(&cmpr); /* If the srcId is specified, then the data we just added is ** really a delta. Record this fact in the delta table. */ if( srcId ){ db_multi_exec("REPLACE INTO delta(rid,srcid) VALUES(%d,%d)", rid, srcId); } if( !isDephantomize && bag_find(&contentCache.missing, rid) && (srcId==0 || content_is_available(srcId)) ){ content_mark_available(rid); } if( isDephantomize ){ after_dephantomize(rid, 0); } /* Add the element to the unclustered table if has never been ** previously seen. */ if( markAsUnclustered ){ db_multi_exec("INSERT OR IGNORE INTO unclustered VALUES(%d)", rid); } /* Finish the transaction and cleanup */ db_finalize(&s1); db_end_transaction(0); blob_reset(&hash); /* Make arrangements to verify that the data can be recovered ** before we commit */ verify_before_commit(rid); return rid; } /* ** Create a new phantom with the given UUID and return its artifact ID. */ int content_new(const char *zUuid){ int rid; static Stmt s1, s2, s3; assert( g.repositoryOpen ); db_begin_transaction(); if( uuid_is_shunned(zUuid) ){ db_end_transaction(0); return 0; } db_static_prepare(&s1, "INSERT INTO blob(rcvid,size,uuid,content)" "VALUES(0,-1,:uuid,NULL)" ); db_bind_text(&s1, ":uuid", zUuid); db_exec(&s1); rid = db_last_insert_rowid(); db_static_prepare(&s2, "INSERT INTO phantom VALUES(:rid)" ); db_bind_int(&s2, ":rid", rid); db_exec(&s2); if( g.markPrivate ){ db_multi_exec("INSERT INTO private VALUES(%d)", rid); }else{ db_static_prepare(&s3, "INSERT INTO unclustered VALUES(:rid)" ); db_bind_int(&s3, ":rid", rid); db_exec(&s3); } bag_insert(&contentCache.missing, rid); db_end_transaction(0); return rid; } /* ** COMMAND: test-content-put ** ** Extract a blob from a file and write it into the database */ void test_content_put_cmd(void){ int rid; Blob content; if( g.argc!=3 ) usage("FILENAME"); db_must_be_within_tree(); user_select(); blob_read_from_file(&content, g.argv[2]); rid = content_put(&content, 0, 0, 0); printf("inserted as record %d\n", rid); } /* ** Make sure the content at rid is the original content and is not a ** delta. */ void content_undelta(int rid){ if( findSrcid(rid)>0 ){ Blob x; if( content_get(rid, &x) ){ Stmt s; db_prepare(&s, "UPDATE blob SET content=:c, size=%d WHERE rid=%d", blob_size(&x), rid); blob_compress(&x, &x); db_bind_blob(&s, ":c", &x); db_exec(&s); db_finalize(&s); blob_reset(&x); db_multi_exec("DELETE FROM delta WHERE rid=%d", rid); } } } /* ** COMMAND: test-content-undelta ** ** Make sure the content at RECORDID is not a delta */ void test_content_undelta_cmd(void){ int rid; if( g.argc!=2 ) usage("RECORDID"); db_must_be_within_tree(); rid = atoi(g.argv[2]); content_undelta(rid); } /* ** Return true if the given RID is marked as PRIVATE. */ int content_is_private(int rid){ static Stmt s1; int rc; db_static_prepare(&s1, "SELECT 1 FROM private WHERE rid=:rid" ); db_bind_int(&s1, ":rid", rid); rc = db_step(&s1); db_reset(&s1); return rc==SQLITE_ROW; } /* ** Make sure an artifact is public. */ void content_make_public(int rid){ static Stmt s1; db_static_prepare(&s1, "DELETE FROM private WHERE rid=:rid" ); db_bind_int(&s1, ":rid", rid); db_exec(&s1); } /* ** Change the storage of rid so that it is a delta of srcid. ** ** If rid is already a delta from some other place then no ** conversion occurs and this is a no-op unless force==1. ** ** Never generate a delta that carries a private artifact into a public ** artifact. Otherwise, when we go to send the public artifact on a ** sync operation, the other end of the sync will never be able to receive ** the source of the delta. It is OK to delta private->private and ** public->private and public->public. Just no private->public delta. ** ** If srcid is a delta that depends on rid, then srcid is ** converted to undeltaed text. ** ** If either rid or srcid contain less than 50 bytes, or if the ** resulting delta does not achieve a compression of at least 25% on ** its own the rid is left untouched. */ void content_deltify(int rid, int srcid, int force){ int s; Blob data, src, delta; Stmt s1, s2; if( srcid==rid ) return; if( !force && findSrcid(rid)>0 ) return; if( content_is_private(srcid) && !content_is_private(rid) ){ return; } s = srcid; while( (s = findSrcid(s))>0 ){ if( s==rid ){ content_undelta(srcid); break; } } content_get(srcid, &src); if( blob_size(&src)<50 ){ blob_reset(&src); return; } content_get(rid, &data); if( blob_size(&data)<50 ){ blob_reset(&src); blob_reset(&data); return; } blob_delta_create(&src, &data, &delta); if( blob_size(&delta) < blob_size(&data)*0.75 ){ blob_compress(&delta, &delta); db_prepare(&s1, "UPDATE blob SET content=:data WHERE rid=%d", rid); db_prepare(&s2, "REPLACE INTO delta(rid,srcid)VALUES(%d,%d)", rid, srcid); db_bind_blob(&s1, ":data", &delta); db_begin_transaction(); db_exec(&s1); db_exec(&s2); db_end_transaction(0); db_finalize(&s1); db_finalize(&s2); verify_before_commit(rid); } blob_reset(&src); blob_reset(&data); blob_reset(&delta); } /* ** COMMAND: test-content-deltify ** ** Convert the content at RID into a delta from SRCID. */ void test_content_deltify_cmd(void){ if( g.argc!=5 ) usage("RID SRCID FORCE"); db_must_be_within_tree(); content_deltify(atoi(g.argv[2]), atoi(g.argv[3]), atoi(g.argv[4])); }