sm_base.h

Go to the documentation of this file.
00001 /* -*- mode:C++; c-basic-offset:4 -*-
00002      Shore-MT -- Multi-threaded port of the SHORE storage manager
00003    
00004                        Copyright (c) 2007-2009
00005       Data Intensive Applications and Systems Labaratory (DIAS)
00006                Ecole Polytechnique Federale de Lausanne
00007    
00008                          All Rights Reserved.
00009    
00010    Permission to use, copy, modify and distribute this software and
00011    its documentation is hereby granted, provided that both the
00012    copyright notice and this permission notice appear in all copies of
00013    the software, derivative works or modified versions, and any
00014    portions thereof, and that both notices appear in supporting
00015    documentation.
00016    
00017    This code is distributed in the hope that it will be useful, but
00018    WITHOUT ANY WARRANTY; without even the implied warranty of
00019    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. THE AUTHORS
00020    DISCLAIM ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER
00021    RESULTING FROM THE USE OF THIS SOFTWARE.
00022 */
00023 
00024 /*<std-header orig-src='shore' incl-file-exclusion='SM_BASE_H'>
00025 
00026  $Id: sm_base.h,v 1.157 2010/10/27 17:04:23 nhall Exp $
00027 
00028 SHORE -- Scalable Heterogeneous Object REpository
00029 
00030 Copyright (c) 1994-99 Computer Sciences Department, University of
00031                       Wisconsin -- Madison
00032 All Rights Reserved.
00033 
00034 Permission to use, copy, modify and distribute this software and its
00035 documentation is hereby granted, provided that both the copyright
00036 notice and this permission notice appear in all copies of the
00037 software, derivative works or modified versions, and any portions
00038 thereof, and that both notices appear in supporting documentation.
00039 
00040 THE AUTHORS AND THE COMPUTER SCIENCES DEPARTMENT OF THE UNIVERSITY
00041 OF WISCONSIN - MADISON ALLOW FREE USE OF THIS SOFTWARE IN ITS
00042 "AS IS" CONDITION, AND THEY DISCLAIM ANY LIABILITY OF ANY KIND
00043 FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
00044 
00045 This software was developed with support by the Advanced Research
00046 Project Agency, ARPA order number 018 (formerly 8230), monitored by
00047 the U.S. Army Research Laboratory under contract DAAB07-91-C-Q518.
00048 Further funding for this work was provided by DARPA through
00049 Rome Research Laboratory Contract No. F30602-97-2-0247.
00050 
00051 */
00052 
00053 #ifndef SM_BASE_H
00054 #define SM_BASE_H
00055 
00056 #include "w_defines.h"
00057 
00058 /*  -- do not edit anything above this line --   </std-header>*/
00059 
00060 /**\file sm_base.h
00061  * \ingroup Macros
00062  */
00063 
00064 #ifdef __GNUG__
00065 #pragma interface
00066 #endif
00067 
00068 #include <climits>
00069 #ifndef OPTION_H
00070 #include "option.h"
00071 #endif
00072 #ifndef __opt_error_def_gen_h__
00073 #include "opt_error_def_gen.h"
00074 #endif
00075 
00076 
00077 class ErrLog;
00078 class sm_stats_info_t;
00079 class xct_t;
00080 class xct_i;
00081 
00082 class device_m;
00083 class io_m;
00084 class bf_m;
00085 class comm_m;
00086 class log_m;
00087 class lock_m;
00088 
00089 class tid_t;
00090 class option_t;
00091 
00092 #ifndef        SM_EXTENTSIZE
00093 #define        SM_EXTENTSIZE        8
00094 #endif
00095 #ifndef        SM_LOG_PARTITIONS
00096 #define        SM_LOG_PARTITIONS        8
00097 #endif
00098 
00099 typedef   w_rc_t        rc_t;
00100 
00101 
00102 /**\cond skip
00103  * This structure collects the depth on construction
00104  * and checks that it matches the depth on destruction; this
00105  * is to ensure that we haven't forgotten to release
00106  * an anchor somewhere.
00107  * It's been extended to check the # times
00108  * we have acquired the 1thread_log_mutex. 
00109  *
00110  * We're defining the CHECK_NESTING_VARIABLES macro b/c
00111  * this work is spread out and we want to have 1 place to
00112  * determine whether it's turned on or off; don't want to 
00113  * make the mistake of changing the debug level (on which
00114  * it depends) in only one of several places.
00115  *
00116  * NOTE: this doesn't work in a multi-threaded xct context.
00117  * That's b/c the check is too late -- once the count goes
00118  * to zero, another thread can change it and throw off all the
00119  * counts. To be sure, we'd have to use a TLS copy as well
00120  * as the common copy of these counts.
00121  */
00122 #if W_DEBUG_LEVEL > 0
00123 #define CHECK_NESTING_VARIABLES 1
00124 #else
00125 #define CHECK_NESTING_VARIABLES 0
00126 #endif
00127 struct check_compensated_op_nesting {
00128 #if CHECK_NESTING_VARIABLES
00129     xct_t* _xd;
00130     int _depth;
00131     int _depth_of_acquires;
00132     int _line;
00133     const char *const _file;
00134     // static methods are so we can avoid having to
00135     // include xct.h here.
00136     static int compensated_op_depth(xct_t* xd, int dflt);
00137     static int acquire_1thread_log_depth(xct_t* xd, int dflt);
00138 
00139     check_compensated_op_nesting(xct_t* xd, int line, const char *const file)
00140     : _xd(xd), 
00141     _depth(_xd? compensated_op_depth(_xd, 0) : 0), 
00142     _depth_of_acquires(_xd? acquire_1thread_log_depth(_xd, 0) : 0), 
00143     _line(line),
00144     _file(file)
00145     {
00146     }
00147 
00148     ~check_compensated_op_nesting() {
00149         if(_xd) {
00150             if( _depth != compensated_op_depth(_xd, _depth) ) {
00151                 fprintf(stderr, 
00152                     "th.%d check_compensated_op_nesting(%d,%s) depth was %d is %d\n",
00153                     sthread_t::me()->id,
00154                     _line, _file, _depth, compensated_op_depth(_xd, _depth));
00155             }
00156 
00157             if(_depth_of_acquires != acquire_1thread_log_depth(_xd, _depth)) {
00158                 fprintf(stderr, 
00159                 "th.%d check_acquire_1thread_log_depth (%d,%s) depth was %d is %d\n",
00160                     sthread_t::me()->id,
00161                     _line, _file, _depth_of_acquires, 
00162                     acquire_1thread_log_depth(_xd, _depth));
00163             }
00164 
00165             w_assert0(_depth == compensated_op_depth(_xd, _depth));
00166             w_assert0(_depth_of_acquires == acquire_1thread_log_depth(_xd, _depth));
00167         }
00168     }
00169 #else
00170     check_compensated_op_nesting(xct_t*, int, const char *const) { }
00171 #endif
00172 };
00173 
00174 
00175 /**\brief Encapsulates a few types uses in the API */
00176 class smlevel_0 : public w_base_t {
00177 public:
00178     // Give these enums names for doxygen purposes:
00179     enum error_constant_t { eNOERROR = 0, eFAILURE = -1 };
00180     enum sm_constant_t { 
00181         page_sz = SM_PAGESIZE,        // page size (SM_PAGESIZE is set by makemake)
00182         ext_sz = SM_EXTENTSIZE,        // extent size
00183         max_exts = max_int4,        // max no. extents, must fit extnum_t
00184 #if defined(_POSIX_PATH_MAX)
00185         max_devname = _POSIX_PATH_MAX,        // max length of unix path name
00186     // BEWARE: this might be larger than you want.  Array sizes depend on it.
00187     // The default might be small enough, e.g., 256; getconf() yields the upper
00188     // bound on this value.
00189 #elif defined(MAXPATHLEN)
00190         max_devname = MAXPATHLEN,
00191 #else
00192         max_devname = 1024,        
00193 #endif
00194         max_vols = 20,                // max mounted volumes
00195         max_xct_thread = 20,        // max threads in a xct
00196         max_servers = 15,       // max servers to be connected with
00197         max_keycomp = 20,        // max key component (for btree)
00198         max_openlog = SM_LOG_PARTITIONS,        // max # log partitions
00199         max_dir_cache = max_vols * 10,
00200 
00201         /* XXX I want to propogate sthread_t::iovec_max here, but
00202            it doesn't work because of sm_app.h not including
00203            the thread package. */
00204         max_many_pages = 8,
00205 
00206         srvid_map_sz = (max_servers - 1) / 8 + 1,
00207         ext_map_sz_in_bytes = ((ext_sz + 7) / 8),
00208 
00209         dummy = 0
00210     };
00211 
00212     enum {
00213         max_rec_len = max_uint4
00214     };
00215 
00216     typedef sthread_base_t::fileoff_t fileoff_t;
00217     /*
00218      * Sizes-in-Kbytes for for things like volumes and devices.
00219      * A KB is assumes to be 1024 bytes.
00220      * Note: a different type was used for added type checking.
00221      */
00222     typedef sthread_t::fileoff_t smksize_t;
00223     typedef w_base_t::base_stat_t base_stat_t; 
00224 
00225     /**\endcond skip */
00226 
00227     /*
00228      * rather than automatically aborting the transaction, when the
00229      * _log_warn_percent is exceeded, this callback is made, with a
00230      * pointer to the xct that did the writing, and with the
00231      * expectation that the result will be one of:
00232      * - return value == RCOK --> proceed
00233      * - return value == eUSERABORT --> victim to abort is given in the argument
00234      *
00235      * The server has the responsibility for choosing a victim and 
00236      * for aborting the victim transaction. 
00237      *
00238      */
00239 
00240     /**\brief Log space warning callback function type.  
00241      *
00242      * For more details of how this is used, see the constructor ss_m::ss_m().
00243      *
00244      * Storage manager methods check the available log space. 
00245      * If the log is in danger of filling to the point that it will be
00246      * impossible to abort a transaction, a
00247      * callback is made to the server.  The callback function is of this type.
00248      * The danger point is a threshold determined by the option sm_log_warn. 
00249      *
00250      * The callback
00251      * function is meant to choose a victim xct and 
00252      * tell if the xct should be
00253      * aborted by returning RC(eUSERABORT).  
00254      *
00255      * Any other RC value is returned to the server through the call stack.
00256      *
00257      * The arguments:
00258      * @param[in] iter    Pointer to an iterator over all xcts.
00259      * @param[out] victim    Victim will be returned here. This is an in/out
00260      * paramter and is initially populated with the transaction that is
00261      * attached to the running thread.
00262      * @param[in] curr    Bytes of log consumed by active transactions.
00263      * @param[in] thresh   Threshhold just exceeded. 
00264      * @param[in] logfile   Character string name of oldest file to archive.
00265      *                     
00266      *  This function must be careful not to return the same victim more
00267      *  than once, even though the callback may be called many 
00268      *  times before the victim is completely aborted.
00269      *
00270      *  When this function has archived the given log file, it needs
00271      *  to notify the storage manager of that fact by calling
00272      *  ss_m::log_file_was_archived(logfile)
00273      */
00274     typedef w_rc_t (*LOG_WARN_CALLBACK_FUNC) (
00275             xct_i*      iter,     
00276             xct_t *&    victim, 
00277             fileoff_t   curr, 
00278             fileoff_t   thresh, 
00279             const char *logfile
00280         );
00281     /**\brief Callback function type for restoring an archived log file.
00282      *
00283      * @param[in] fname   Original file name (with path).
00284      * @param[in] needed   Partition number of the file needed.
00285      *
00286      *  An alternative to aborting a transaction (when the log fills)
00287      *  is to archive log files.
00288      *  The server can use the log directory name to locate these files,
00289      *  and may use the iterator and the static methods of xct_t to 
00290      *  determine which log file(s) to archive.
00291      *
00292      *  Archiving and removing the older log files will work only if
00293      *  the server also provides a LOG_ARCHIVED_CALLBACK_FUNCTION 
00294      *  to restore the
00295      *  archived log files when the storage manager needs them for
00296      *  rollback.
00297      *  This is the function type used for that purpose.
00298      *
00299      *  The function must locate the archived log file containing for the
00300      *  partition number \a num, which was a suffix of the original log file's
00301      *  name.
00302      *  The log file must be restored with its original name.  
00303      */
00304     typedef    w_base_t::uint4_t partition_number_t; 
00305     typedef w_rc_t (*LOG_ARCHIVED_CALLBACK_FUNC) (
00306             const char *fname,
00307             partition_number_t num
00308         );
00309 
00310 /**\cond skip */
00311     enum switch_t {
00312         ON = 1,
00313         OFF = 0
00314     };
00315 /**\endcond skip */
00316 
00317     /**\brief Comparison types used in scan_index_i
00318      * \enum cmp_t
00319      * Shorthand for CompareOp.
00320      */
00321     enum cmp_t { bad_cmp_t=badOp, eq=eqOp,
00322                  gt=gtOp, ge=geOp, lt=ltOp, le=leOp };
00323 
00324 
00325     /* used by lock escalation routines */
00326     enum escalation_options {
00327         dontEscalate        = max_int4_minus1,
00328         dontEscalateDontPassOn,
00329         dontModifyThreshold        = -1
00330     };
00331 
00332     /**\brief Types of stores.
00333      * \enum store_t
00334      */
00335     enum store_t { 
00336         t_bad_store_t, 
00337         /// a b-tree or r-tree index
00338         t_index, 
00339         /// a file of records
00340         t_file, 
00341         /// t_lgrec is used for storing large record pages 
00342         /// and is always associated with some t_file store
00343         t_lgrec 
00344     };
00345     
00346     // types of indexes
00347 
00348     /**\brief Index types */
00349     enum ndx_t { 
00350         t_bad_ndx_t,             // illegal value
00351         t_btree,                 // B+tree with duplicates
00352         t_uni_btree,             // Unique-key btree
00353         t_rtree                  // R*tree
00354     };
00355 
00356     /**\enum concurrency_t 
00357      * \brief 
00358      * Lock granularities 
00359      * \details
00360      * - t_cc_bad Illegal
00361      * - t_cc_none No locking
00362      * - t_cc_record Record-level locking for files & records
00363      * - t_cc_page Page-level locking for files & records 
00364      * - t_cc_file File-level locking for files & records 
00365      * - t_cc_vol Volume-level locking for files and indexes 
00366      * - t_cc_kvl Key-value locking for B+-Tree indexes
00367      * - t_cc_im Aries IM locking for B+-Tree indexes : experimental
00368      * - t_cc_modkvl Modified key-value locking: experimental
00369      * - t_cc_append Used internally \todo true?
00370      */
00371     enum concurrency_t {
00372         t_cc_bad,                // this is an illegal value
00373         t_cc_none,                // no locking
00374         t_cc_record,                // record-level
00375         t_cc_page,                // page-level
00376         t_cc_file,                // file-level
00377         t_cc_vol,
00378         t_cc_kvl,                // key-value
00379         t_cc_im,                 // ARIES IM, not supported yet
00380         t_cc_modkvl,                 // modified ARIES KVL, for paradise use
00381         t_cc_append                 // append-only with scan_file_i
00382     };
00383 
00384     /**\enum pg_policy_t 
00385      * \brief 
00386      * File-compaction policy for creating records.
00387      * \details
00388      * - t_append : append new record to file (preserve order)
00389      * - t_cache  : look in cache for pages with space for new record (does
00390      *              not preserve order)
00391      * - t_compact: keep file compact even if it means searching the file
00392      *              for space in which to create the file (does not preserve
00393      *              order)
00394      *
00395      * These are masks - the following combinations are sensible:
00396      *
00397      * - t_append                        -- preserve sort order
00398      * - t_cache | t_append              -- check the cache first, 
00399      *                                      append if no luck
00400      * - t_cache | t_compact | t_append  -- append to file as a last resort
00401      */
00402     enum pg_policy_t {
00403         t_append        = 0x01, // retain sort order (cache 0 pages)
00404         t_cache        = 0x02, // look in n cached pgs 
00405         t_compact        = 0x04 // scan file for space in pages 
00406         
00407     };
00408 
00409 /**\cond skip */
00410 
00411     /* 
00412      * smlevel_0::operating_mode is always set to 
00413      * ONE of these, but the function in_recovery() tests for
00414      * any of them, so we'll give them bit-mask values
00415      */
00416     enum operating_mode_t {
00417         t_not_started = 0, 
00418         t_in_analysis = 0x1,
00419         t_in_redo = 0x2,
00420         t_in_undo = 0x4,
00421         t_forward_processing = 0x8
00422     };
00423 
00424     static concurrency_t cc_alg;        // concurrency control algorithm
00425     static bool          cc_adaptive;        // is PS-AA (adaptive) algorithm used?
00426 
00427 #include "e_error_enum_gen.h"
00428 
00429     static const w_error_info_t error_info[];
00430     static void init_errorcodes();
00431 
00432     static void  add_to_global_stats(const sm_stats_info_t &from);
00433     static void  add_from_global_stats(sm_stats_info_t &to);
00434 
00435     static device_m* dev;
00436     static io_m* io;
00437     static bf_m* bf;
00438     static lock_m* lm;
00439 
00440     static log_m* log;
00441     static tid_t* redo_tid;
00442 
00443     static LOG_WARN_CALLBACK_FUNC log_warn_callback;
00444     static LOG_ARCHIVED_CALLBACK_FUNC log_archived_callback;
00445     static fileoff_t              log_warn_trigger; 
00446     static int                    log_warn_exceed_percent; 
00447 
00448     static int    dcommit_timeout; // to convey option to coordinator,
00449                                    // if it is created by VAS
00450 
00451     static ErrLog* errlog;
00452 
00453     static bool        shutdown_clean;
00454     static bool        shutting_down;
00455     static bool        logging_enabled;
00456     static bool        lock_caching_default;
00457     static bool        do_prefetch;
00458 
00459     static operating_mode_t operating_mode;
00460     static bool in_recovery() { 
00461         return ((operating_mode & 
00462                 (t_in_redo | t_in_undo | t_in_analysis)) !=0); }
00463     static bool in_recovery_analysis() { 
00464         return ((operating_mode & t_in_analysis) !=0); }
00465     static bool in_recovery_undo() { 
00466         return ((operating_mode & t_in_undo ) !=0); }
00467     static bool in_recovery_redo() { 
00468         return ((operating_mode & t_in_redo ) !=0); }
00469 
00470     // these variable are the default values for lock escalation counts
00471     static w_base_t::int4_t defaultLockEscalateToPageThreshold;
00472     static w_base_t::int4_t defaultLockEscalateToStoreThreshold;
00473     static w_base_t::int4_t defaultLockEscalateToVolumeThreshold;
00474 
00475     // These variables control the size of the log.
00476     static fileoff_t max_logsz; // max log file size
00477 
00478     // This variable controls checkpoint frequency.
00479     // Checkpoints are taken every chkpt_displacement bytes
00480     // written to the log.
00481     static fileoff_t chkpt_displacement;
00482 
00483     // The volume_format_version is used to test compatability
00484     // of software with a volume.  Whenever a change is made
00485     // to the SM software that makes it incompatible with
00486     // previouly formatted volumes, this volume number should
00487     // be incremented.  The value is set in sm.cpp.
00488     static w_base_t::uint4_t volume_format_version;
00489 
00490     // This is a zeroed page for use wherever initialized memory
00491     // is needed.
00492     static char zero_page[page_sz];
00493 
00494     // option for controlling background buffer flush thread
00495     static option_t* _backgroundflush;
00496 
00497 
00498     /*
00499      * Pre-defined store IDs -- see also vol.h
00500      * 0 -- is reserved for the extent map and the store map
00501      * 1 -- directory (see dir.cpp)
00502      * 2 -- root index (see sm.cpp)
00503      */
00504     enum {
00505         store_id_extentmap = 0,
00506         store_id_directory = 1,
00507         store_id_root_index = 2 
00508     };
00509 
00510     enum {
00511             eINTERNAL = fcINTERNAL,
00512             eOS = fcOS,
00513             eOUTOFMEMORY = fcOUTOFMEMORY,
00514             eNOTFOUND = fcNOTFOUND,
00515             eNOTIMPLEMENTED = fcNOTIMPLEMENTED
00516     };
00517 
00518     enum store_flag_t {
00519         // NB: this had better match sm_store_property_t (sm_int_3.h) !!!
00520         // or at least be convted properly every time we come through the API
00521         st_bad            = 0x0,
00522         st_regular        = 0x01, // fully logged
00523         st_tmp            = 0x02, // space logging only, 
00524                                   // file destroy on dismount/restart
00525         st_load_file      = 0x04, // not stored in the stnode_t, 
00526                             // only passed down to
00527                             // io_m and then converted to tmp and added to the
00528                             // list of load files for the xct.
00529                             // no longer needed
00530         st_insert_file     = 0x08,        // stored in stnode, but not on page.
00531                             // new pages are saved as tmp, old pages as regular.
00532         st_empty           = 0x100 // store might be empty - used ONLY
00533                             // as a function argument, NOT stored
00534                             // persistently.  Nevertheless, it's
00535                             // defined here to be sure that if other
00536                             // store flags are added, this doesn't
00537                             // conflict with them.
00538     };
00539 
00540     /* 
00541      * for use by set_store_deleting_log; 
00542      * type of operation to perform on the stnode 
00543      */
00544     enum store_operation_t {
00545             t_delete_store, 
00546             t_create_store, 
00547             t_set_deleting, 
00548             t_set_store_flags, 
00549             t_set_first_ext};
00550 
00551     enum store_deleting_t  {
00552             t_not_deleting_store = 0,  // must be 0: code assumes it
00553             t_deleting_store, 
00554             t_store_freeing_exts, 
00555             t_unknown_deleting};
00556 /**\endcond skip */
00557 };
00558 
00559 /**\cond skip */
00560 ostream&
00561 operator<<(ostream& o, smlevel_0::store_flag_t flag);
00562 
00563 ostream&
00564 operator<<(ostream& o, const smlevel_0::store_operation_t op);
00565 
00566 ostream&
00567 operator<<(ostream& o, const smlevel_0::store_deleting_t value);
00568 
00569 /**\endcond skip */
00570 
00571 /*<std-footer incl-file-exclusion='SM_BASE_H'>  -- do not edit anything below this line -- */
00572 
00573 #endif          /*</std-footer>*/

Generated on Mon Nov 8 11:12:38 2010 for Shore Storage Manager by  doxygen 1.4.7