fs/jfs/jfs_metapage.c

   1 /*
   2  *   Copyright (C) International Business Machines Corp., 2000-2003
   3  *   Portions Copyright (C) Christoph Hellwig, 2001-2002
   4  *
   5  *   This program is free software;  you can redistribute it and/or modify
   6  *   it under the terms of the GNU General Public License as published by
   7  *   the Free Software Foundation; either version 2 of the License, or
   8  *   (at your option) any later version.
   9  *
  10  *   This program is distributed in the hope that it will be useful,
  11  *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
  12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
  13  *   the GNU General Public License for more details.
  14  *
  15  *   You should have received a copy of the GNU General Public License
  16  *   along with this program;  if not, write to the Free Software
  17  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  18  */
  19
  20 #include <linux/fs.h>
  21 #include <linux/init.h>
  22 #include <linux/buffer_head.h>
  23 #include <linux/mempool.h>
  24 #include <linux/delay.h>
  25 #include "jfs_incore.h"
  26 #include "jfs_superblock.h"
  27 #include "jfs_filsys.h"
  28 #include "jfs_metapage.h"
  29 #include "jfs_txnmgr.h"
  30 #include "jfs_debug.h"
  31
  32 static DEFINE_SPINLOCK(meta_lock);
  33
  34 #ifdef CONFIG_JFS_STATISTICS
  35 static struct {
  36         uint    pagealloc;      /* # of page allocations */
  37         uint    pagefree;       /* # of page frees */
  38         uint    lockwait;       /* # of sleeping lock_metapage() calls */
  39 } mpStat;
  40 #endif
  41
  42
  43 #define HASH_BITS 10            /* This makes hash_table 1 4K page */
  44 #define HASH_SIZE (1 << HASH_BITS)
  45 static struct metapage **hash_table = NULL;
  46 static unsigned long hash_order;
  47
  48
  49 static inline int metapage_locked(struct metapage *mp)
  50 {
  51         return test_bit(META_locked, &mp->flag);
  52 }
  53
  54 static inline int trylock_metapage(struct metapage *mp)
  55 {
  56         return test_and_set_bit(META_locked, &mp->flag);
  57 }
  58
  59 static inline void unlock_metapage(struct metapage *mp)
  60 {
  61         clear_bit(META_locked, &mp->flag);
  62         wake_up(&mp->wait);
  63 }
  64
  65 static void __lock_metapage(struct metapage *mp)
  66 {
  67         DECLARE_WAITQUEUE(wait, current);
  68
  69         INCREMENT(mpStat.lockwait);
  70
  71         add_wait_queue_exclusive(&mp->wait, &wait);
  72         do {
  73                 set_current_state(TASK_UNINTERRUPTIBLE);
  74                 if (metapage_locked(mp)) {
  75                         spin_unlock(&meta_lock);
  76                         schedule();
  77                         spin_lock(&meta_lock);
  78                 }
  79         } while (trylock_metapage(mp));
  80         __set_current_state(TASK_RUNNING);
  81         remove_wait_queue(&mp->wait, &wait);
  82 }
  83
  84 /* needs meta_lock */
  85 static inline void lock_metapage(struct metapage *mp)
  86 {
  87         if (trylock_metapage(mp))
  88                 __lock_metapage(mp);
  89 }
  90
  91 #define METAPOOL_MIN_PAGES 32
  92 static kmem_cache_t *metapage_cache;
  93 static mempool_t *metapage_mempool;
  94
  95 static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
  96 {
  97         struct metapage *mp = (struct metapage *)foo;
  98
  99         if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
 100             SLAB_CTOR_CONSTRUCTOR) {
 101                 mp->lid = 0;
 102                 mp->lsn = 0;
 103                 mp->flag = 0;
 104                 mp->data = NULL;
 105                 mp->clsn = 0;
 106                 mp->log = NULL;
 107                 set_bit(META_free, &mp->flag);
 108                 init_waitqueue_head(&mp->wait);
 109         }
 110 }
 111
 112 static inline struct metapage *alloc_metapage(int gfp_mask)
 113 {
 114         return mempool_alloc(metapage_mempool, gfp_mask);
 115 }
 116
 117 static inline void free_metapage(struct metapage *mp)
 118 {
 119         mp->flag = 0;
 120         set_bit(META_free, &mp->flag);
 121
 122         mempool_free(mp, metapage_mempool);
 123 }
 124
 125 int __init metapage_init(void)
 126 {
 127         /*
 128          * Allocate the metapage structures
 129          */
 130         metapage_cache = kmem_cache_create("jfs_mp", sizeof(struct metapage),
 131                                            0, 0, init_once, NULL);
 132         if (metapage_cache == NULL)
 133                 return -ENOMEM;
 134
 135         metapage_mempool = mempool_create(METAPOOL_MIN_PAGES, mempool_alloc_slab,
 136                                           mempool_free_slab, metapage_cache);
 137
 138         if (metapage_mempool == NULL) {
 139                 kmem_cache_destroy(metapage_cache);
 140                 return -ENOMEM;
 141         }
 142         /*
 143          * Now the hash list
 144          */
 145         for (hash_order = 0;
 146              ((PAGE_SIZE << hash_order) / sizeof(void *)) < HASH_SIZE;
 147              hash_order++);
 148         hash_table =
 149             (struct metapage **) __get_free_pages(GFP_KERNEL, hash_order);
 150         assert(hash_table);
 151         memset(hash_table, 0, PAGE_SIZE << hash_order);
 152
 153         return 0;
 154 }
 155
 156 void metapage_exit(void)
 157 {
 158         mempool_destroy(metapage_mempool);
 159         kmem_cache_destroy(metapage_cache);
 160 }
 161
 162 /*
 163  * Basically same hash as in pagemap.h, but using our hash table
 164  */
 165 static struct metapage **meta_hash(struct address_space *mapping,
 166                                    unsigned long index)
 167 {
 168 #define i (((unsigned long)mapping)/ \
 169            (sizeof(struct inode) & ~(sizeof(struct inode) -1 )))
 170 #define s(x) ((x) + ((x) >> HASH_BITS))
 171         return hash_table + (s(i + index) & (HASH_SIZE - 1));
 172 #undef i
 173 #undef s
 174 }
 175
 176 static struct metapage *search_hash(struct metapage ** hash_ptr,
 177                                     struct address_space *mapping,
 178                                unsigned long index)
 179 {
 180         struct metapage *ptr;
 181
 182         for (ptr = *hash_ptr; ptr; ptr = ptr->hash_next) {
 183                 if ((ptr->mapping == mapping) && (ptr->index == index))
 184                         return ptr;
 185         }
 186
 187         return NULL;
 188 }
 189
 190 static void add_to_hash(struct metapage * mp, struct metapage ** hash_ptr)
 191 {
 192         if (*hash_ptr)
 193                 (*hash_ptr)->hash_prev = mp;
 194
 195         mp->hash_prev = NULL;
 196         mp->hash_next = *hash_ptr;
 197         *hash_ptr = mp;
 198 }
 199
 200 static void remove_from_hash(struct metapage * mp, struct metapage ** hash_ptr)
 201 {
 202         if (mp->hash_prev)
 203                 mp->hash_prev->hash_next = mp->hash_next;
 204         else {
 205                 assert(*hash_ptr == mp);
 206                 *hash_ptr = mp->hash_next;
 207         }
 208
 209         if (mp->hash_next)
 210                 mp->hash_next->hash_prev = mp->hash_prev;
 211 }
 212
 213 struct metapage *__get_metapage(struct inode *inode, unsigned long lblock,
 214                                 unsigned int size, int absolute,
 215                                 unsigned long new)
 216 {
 217         struct metapage **hash_ptr;
 218         int l2BlocksPerPage;
 219         int l2bsize;
 220         struct address_space *mapping;
 221         struct metapage *mp;
 222         unsigned long page_index;
 223         unsigned long page_offset;
 224
 225         jfs_info("__get_metapage: inode = 0x%p, lblock = 0x%lx", inode, lblock);
 226
 227         if (absolute)
 228                 mapping = inode->i_sb->s_bdev->bd_inode->i_mapping;
 229         else {
 230                 /*
 231                  * If an nfs client tries to read an inode that is larger
 232                  * than any existing inodes, we may try to read past the
 233                  * end of the inode map
 234                  */
 235                 if ((lblock << inode->i_blkbits) >= inode->i_size)
 236                         return NULL;
 237                 mapping = inode->i_mapping;
 238         }
 239
 240         hash_ptr = meta_hash(mapping, lblock);
 241 again:
 242         spin_lock(&meta_lock);
 243         mp = search_hash(hash_ptr, mapping, lblock);
 244         if (mp) {
 245               page_found:
 246                 if (test_bit(META_stale, &mp->flag)) {
 247                         spin_unlock(&meta_lock);
 248                         msleep(1);
 249                         goto again;
 250                 }
 251                 mp->count++;
 252                 lock_metapage(mp);
 253                 spin_unlock(&meta_lock);
 254                 if (test_bit(META_discard, &mp->flag)) {
 255                         if (!new) {
 256                                 jfs_error(inode->i_sb,
 257                                           "__get_metapage: using a "
 258                                           "discarded metapage");
 259                                 release_metapage(mp);
 260                                 return NULL;
 261                         }
 262                         clear_bit(META_discard, &mp->flag);
 263                 }
 264                 jfs_info("__get_metapage: found 0x%p, in hash", mp);
 265                 if (mp->logical_size != size) {
 266                         jfs_error(inode->i_sb,
 267                                   "__get_metapage: mp->logical_size != size");
 268                         release_metapage(mp);
 269                         return NULL;
 270                 }
 271         } else {
 272                 l2bsize = inode->i_blkbits;
 273                 l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize;
 274                 page_index = lblock >> l2BlocksPerPage;
 275                 page_offset = (lblock - (page_index << l2BlocksPerPage)) <<
 276                     l2bsize;
 277                 if ((page_offset + size) > PAGE_CACHE_SIZE) {
 278                         spin_unlock(&meta_lock);
 279                         jfs_err("MetaData crosses page boundary!!");
 280                         return NULL;
 281                 }
 282
 283                 /*
 284                  * Locks held on aggregate inode pages are usually
 285                  * not held long, and they are taken in critical code
 286                  * paths (committing dirty inodes, txCommit thread)
 287                  *
 288                  * Attempt to get metapage without blocking, tapping into
 289                  * reserves if necessary.
 290                  */
 291                 mp = NULL;
 292                 if (JFS_IP(inode)->fileset == AGGREGATE_I) {
 293                         mp = alloc_metapage(GFP_ATOMIC);
 294                         if (!mp) {
 295                                 /*
 296                                  * mempool is supposed to protect us from
 297                                  * failing here.  We will try a blocking
 298                                  * call, but a deadlock is possible here
 299                                  */
 300                                 printk(KERN_WARNING
 301                                        "__get_metapage: atomic call to mempool_alloc failed.\n");
 302                                 printk(KERN_WARNING
 303                                        "Will attempt blocking call\n");
 304                         }
 305                 }
 306                 if (!mp) {
 307                         struct metapage *mp2;
 308
 309                         spin_unlock(&meta_lock);
 310                         mp = alloc_metapage(GFP_NOFS);
 311                         spin_lock(&meta_lock);
 312
 313                         /* we dropped the meta_lock, we need to search the
 314                          * hash again.
 315                          */
 316                         mp2 = search_hash(hash_ptr, mapping, lblock);
 317                         if (mp2) {
 318                                 free_metapage(mp);
 319                                 mp = mp2;
 320                                 goto page_found;
 321                         }
 322                 }
 323                 mp->flag = 0;
 324                 lock_metapage(mp);
 325                 if (absolute)
 326                         set_bit(META_absolute, &mp->flag);
 327                 mp->xflag = COMMIT_PAGE;
 328                 mp->count = 1;
 329                 atomic_set(&mp->nohomeok,0);
 330                 mp->mapping = mapping;
 331                 mp->index = lblock;
 332                 mp->page = NULL;
 333                 mp->logical_size = size;
 334                 add_to_hash(mp, hash_ptr);
 335                 spin_unlock(&meta_lock);
 336
 337                 if (new) {
 338                         jfs_info("__get_metapage: Calling grab_cache_page");
 339                         mp->page = grab_cache_page(mapping, page_index);
 340                         if (!mp->page) {
 341                                 jfs_err("grab_cache_page failed!");
 342                                 goto freeit;
 343                         } else {
 344                                 INCREMENT(mpStat.pagealloc);
 345                                 unlock_page(mp->page);
 346                         }
 347                 } else {
 348                         jfs_info("__get_metapage: Calling read_cache_page");
 349                         mp->page = read_cache_page(mapping, lblock,
 350                                     (filler_t *)mapping->a_ops->readpage, NULL);
 351                         if (IS_ERR(mp->page)) {
 352                                 jfs_err("read_cache_page failed!");
 353                                 goto freeit;
 354                         } else
 355                                 INCREMENT(mpStat.pagealloc);
 356                 }
 357                 mp->data = kmap(mp->page) + page_offset;
 358         }
 359
 360         if (new)
 361                 memset(mp->data, 0, PSIZE);
 362
 363         jfs_info("__get_metapage: returning = 0x%p", mp);
 364         return mp;
 365
 366 freeit:
 367         spin_lock(&meta_lock);
 368         remove_from_hash(mp, hash_ptr);
 369         free_metapage(mp);
 370         spin_unlock(&meta_lock);
 371         return NULL;
 372 }
 373
 374 void hold_metapage(struct metapage * mp, int force)
 375 {
 376         spin_lock(&meta_lock);
 377
 378         mp->count++;
 379
 380         if (force) {
 381                 ASSERT (!(test_bit(META_forced, &mp->flag)));
 382                 if (trylock_metapage(mp))
 383                         set_bit(META_forced, &mp->flag);
 384         } else
 385                 lock_metapage(mp);
 386
 387         spin_unlock(&meta_lock);
 388 }
 389
 390 static void __write_metapage(struct metapage * mp)
 391 {
 392         int l2bsize = mp->mapping->host->i_blkbits;
 393         int l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize;
 394         unsigned long page_index;
 395         unsigned long page_offset;
 396         int rc;
 397
 398         jfs_info("__write_metapage: mp = 0x%p", mp);
 399
 400         page_index = mp->page->index;
 401         page_offset =
 402             (mp->index - (page_index << l2BlocksPerPage)) << l2bsize;
 403
 404         lock_page(mp->page);
 405         rc = mp->mapping->a_ops->prepare_write(NULL, mp->page, page_offset,
 406                                                page_offset +
 407                                                mp->logical_size);
 408         if (rc) {
 409                 jfs_err("prepare_write return %d!", rc);
 410                 ClearPageUptodate(mp->page);
 411                 unlock_page(mp->page);
 412                 clear_bit(META_dirty, &mp->flag);
 413                 return;
 414         }
 415         rc = mp->mapping->a_ops->commit_write(NULL, mp->page, page_offset,
 416                                               page_offset +
 417                                               mp->logical_size);
 418         if (rc) {
 419                 jfs_err("commit_write returned %d", rc);
 420         }
 421
 422         unlock_page(mp->page);
 423         clear_bit(META_dirty, &mp->flag);
 424
 425         jfs_info("__write_metapage done");
 426 }
 427
 428 static inline void sync_metapage(struct metapage *mp)
 429 {
 430         struct page *page = mp->page;
 431
 432         page_cache_get(page);
 433         lock_page(page);
 434
 435         /* we're done with this page - no need to check for errors */
 436         if (page_has_buffers(page))
 437                 write_one_page(page, 1);
 438         else
 439                 unlock_page(page);
 440         page_cache_release(page);
 441 }
 442
 443 void release_metapage(struct metapage * mp)
 444 {
 445         struct jfs_log *log;
 446
 447         jfs_info("release_metapage: mp = 0x%p, flag = 0x%lx", mp, mp->flag);
 448
 449         spin_lock(&meta_lock);
 450         if (test_bit(META_forced, &mp->flag)) {
 451                 clear_bit(META_forced, &mp->flag);
 452                 mp->count--;
 453                 spin_unlock(&meta_lock);
 454                 return;
 455         }
 456
 457         assert(mp->count);
 458         if (--mp->count || atomic_read(&mp->nohomeok)) {
 459                 unlock_metapage(mp);
 460                 spin_unlock(&meta_lock);
 461                 return;
 462         }
 463
 464         if (mp->page) {
 465                 set_bit(META_stale, &mp->flag);
 466                 spin_unlock(&meta_lock);
 467                 kunmap(mp->page);
 468                 mp->data = NULL;
 469                 if (test_bit(META_dirty, &mp->flag))
 470                         __write_metapage(mp);
 471                 if (test_bit(META_sync, &mp->flag)) {
 472                         sync_metapage(mp);
 473                         clear_bit(META_sync, &mp->flag);
 474                 }
 475
 476                 if (test_bit(META_discard, &mp->flag)) {
 477                         lock_page(mp->page);
 478                         block_invalidatepage(mp->page, 0);
 479                         unlock_page(mp->page);
 480                 }
 481
 482                 page_cache_release(mp->page);
 483                 mp->page = NULL;
 484                 INCREMENT(mpStat.pagefree);
 485                 spin_lock(&meta_lock);
 486         }
 487
 488         if (mp->lsn) {
 489                 /*
 490                  * Remove metapage from logsynclist.
 491                  */
 492                 log = mp->log;
 493                 LOGSYNC_LOCK(log);
 494                 mp->log = NULL;
 495                 mp->lsn = 0;
 496                 mp->clsn = 0;
 497                 log->count--;
 498                 list_del(&mp->synclist);
 499                 LOGSYNC_UNLOCK(log);
 500         }
 501         remove_from_hash(mp, meta_hash(mp->mapping, mp->index));
 502         spin_unlock(&meta_lock);
 503
 504         free_metapage(mp);
 505 }
 506
 507 void __invalidate_metapages(struct inode *ip, s64 addr, int len)
 508 {
 509         struct metapage **hash_ptr;
 510         unsigned long lblock;
 511         int l2BlocksPerPage = PAGE_CACHE_SHIFT - ip->i_blkbits;
 512         /* All callers are interested in block device's mapping */
 513         struct address_space *mapping = ip->i_sb->s_bdev->bd_inode->i_mapping;
 514         struct metapage *mp;
 515         struct page *page;
 516
 517         /*
 518          * First, mark metapages to discard.  They will eventually be
 519          * released, but should not be written.
 520          */
 521         for (lblock = addr; lblock < addr + len;
 522              lblock += 1 << l2BlocksPerPage) {
 523                 hash_ptr = meta_hash(mapping, lblock);
 524 again:
 525                 spin_lock(&meta_lock);
 526                 mp = search_hash(hash_ptr, mapping, lblock);
 527                 if (mp) {
 528                         if (test_bit(META_stale, &mp->flag)) {
 529                                 spin_unlock(&meta_lock);
 530                                 msleep(1);
 531                                 goto again;
 532                         }
 533
 534                         clear_bit(META_dirty, &mp->flag);
 535                         set_bit(META_discard, &mp->flag);
 536                         spin_unlock(&meta_lock);
 537                 } else {
 538                         spin_unlock(&meta_lock);
 539                         page = find_lock_page(mapping, lblock>>l2BlocksPerPage);
 540                         if (page) {
 541                                 block_invalidatepage(page, 0);
 542                                 unlock_page(page);
 543                                 page_cache_release(page);
 544                         }
 545                 }
 546         }
 547 }
 548
 549 #ifdef CONFIG_JFS_STATISTICS
 550 int jfs_mpstat_read(char *buffer, char **start, off_t offset, int length,
 551                     int *eof, void *data)
 552 {
 553         int len = 0;
 554         off_t begin;
 555
 556         len += sprintf(buffer,
 557                        "JFS Metapage statistics\n"
 558                        "=======================\n"
 559                        "page allocations = %d\n"
 560                        "page frees = %d\n"
 561                        "lock waits = %d\n",
 562                        mpStat.pagealloc,
 563                        mpStat.pagefree,
 564                        mpStat.lockwait);
 565
 566         begin = offset;
 567         *start = buffer + begin;
 568         len -= begin;
 569
 570         if (len > length)
 571                 len = length;
 572         else
 573                 *eof = 1;
 574
 575         if (len < 0)
 576                 len = 0;
 577
 578         return len;
 579 }
 580 #endif