fs/orangefs/waitqueue.c

   1 /*
   2  * (C) 2001 Clemson University and The University of Chicago
   3  * (C) 2011 Omnibond Systems
   4  *
   5  * Changes by Acxiom Corporation to implement generic service_operation()
   6  * function, Copyright Acxiom Corporation, 2005.
   7  *
   8  * See COPYING in top-level directory.
   9  */
  10
  11 /*
  12  *  In-kernel waitqueue operations.
  13  */
  14
  15 #include "protocol.h"
  16 #include "orangefs-kernel.h"
  17 #include "orangefs-bufmap.h"
  18
  19 static int wait_for_cancellation_downcall(struct orangefs_kernel_op_s *);
  20 static int wait_for_matching_downcall(struct orangefs_kernel_op_s *);
  21
  22 /*
  23  * What we do in this function is to walk the list of operations that are
  24  * present in the request queue and mark them as purged.
  25  * NOTE: This is called from the device close after client-core has
  26  * guaranteed that no new operations could appear on the list since the
  27  * client-core is anyway going to exit.
  28  */
  29 void purge_waiting_ops(void)
  30 {
  31         struct orangefs_kernel_op_s *op;
  32
  33         spin_lock(&orangefs_request_list_lock);
  34         list_for_each_entry(op, &orangefs_request_list, list) {
  35                 gossip_debug(GOSSIP_WAIT_DEBUG,
  36                              "pvfs2-client-core: purging op tag %llu %s\n",
  37                              llu(op->tag),
  38                              get_opname_string(op));
  39                 spin_lock(&op->lock);
  40                 set_op_state_purged(op);
  41                 spin_unlock(&op->lock);
  42         }
  43         spin_unlock(&orangefs_request_list_lock);
  44 }
  45
  46 static inline void
  47 add_op_to_request_list(struct orangefs_kernel_op_s *op)
  48 {
  49         spin_lock(&orangefs_request_list_lock);
  50         spin_lock(&op->lock);
  51         set_op_state_waiting(op);
  52         list_add_tail(&op->list, &orangefs_request_list);
  53         spin_unlock(&orangefs_request_list_lock);
  54         spin_unlock(&op->lock);
  55         wake_up_interruptible(&orangefs_request_list_waitq);
  56 }
  57
  58 static inline
  59 void add_priority_op_to_request_list(struct orangefs_kernel_op_s *op)
  60 {
  61         spin_lock(&orangefs_request_list_lock);
  62         spin_lock(&op->lock);
  63         set_op_state_waiting(op);
  64
  65         list_add(&op->list, &orangefs_request_list);
  66         spin_unlock(&orangefs_request_list_lock);
  67         spin_unlock(&op->lock);
  68         wake_up_interruptible(&orangefs_request_list_waitq);
  69 }
  70
  71 /*
  72  * submits a ORANGEFS operation and waits for it to complete
  73  *
  74  * Note op->downcall.status will contain the status of the operation (in
  75  * errno format), whether provided by pvfs2-client or a result of failure to
  76  * service the operation.  If the caller wishes to distinguish, then
  77  * op->state can be checked to see if it was serviced or not.
  78  *
  79  * Returns contents of op->downcall.status for convenience
  80  */
  81 int service_operation(struct orangefs_kernel_op_s *op,
  82                       const char *op_name,
  83                       int flags)
  84 {
  85         /* flags to modify behavior */
  86         sigset_t orig_sigset;
  87         int ret = 0;
  88
  89         DEFINE_WAIT(wait_entry);
  90
  91         op->upcall.tgid = current->tgid;
  92         op->upcall.pid = current->pid;
  93
  94 retry_servicing:
  95         op->downcall.status = 0;
  96         gossip_debug(GOSSIP_WAIT_DEBUG,
  97                      "orangefs: service_operation: %s %p\n",
  98                      op_name,
  99                      op);
 100         gossip_debug(GOSSIP_WAIT_DEBUG,
 101                      "orangefs: operation posted by process: %s, pid: %i\n",
 102                      current->comm,
 103                      current->pid);
 104
 105         /* mask out signals if this operation is not to be interrupted */
 106         if (!(flags & ORANGEFS_OP_INTERRUPTIBLE))
 107                 orangefs_block_signals(&orig_sigset);
 108
 109         if (!(flags & ORANGEFS_OP_NO_SEMAPHORE)) {
 110                 ret = mutex_lock_interruptible(&request_mutex);
 111                 /*
 112                  * check to see if we were interrupted while waiting for
 113                  * semaphore
 114                  */
 115                 if (ret < 0) {
 116                         if (!(flags & ORANGEFS_OP_INTERRUPTIBLE))
 117                                 orangefs_set_signals(&orig_sigset);
 118                         op->downcall.status = ret;
 119                         gossip_debug(GOSSIP_WAIT_DEBUG,
 120                                      "orangefs: service_operation interrupted.\n");
 121                         return ret;
 122                 }
 123         }
 124
 125         gossip_debug(GOSSIP_WAIT_DEBUG,
 126                      "%s:About to call is_daemon_in_service().\n",
 127                      __func__);
 128
 129         if (is_daemon_in_service() < 0) {
 130                 /*
 131                  * By incrementing the per-operation attempt counter, we
 132                  * directly go into the timeout logic while waiting for
 133                  * the matching downcall to be read
 134                  */
 135                 gossip_debug(GOSSIP_WAIT_DEBUG,
 136                              "%s:client core is NOT in service(%d).\n",
 137                              __func__,
 138                              is_daemon_in_service());
 139                 op->attempts++;
 140         }
 141
 142         /* queue up the operation */
 143         if (flags & ORANGEFS_OP_PRIORITY) {
 144                 add_priority_op_to_request_list(op);
 145         } else {
 146                 gossip_debug(GOSSIP_WAIT_DEBUG,
 147                              "%s:About to call add_op_to_request_list().\n",
 148                              __func__);
 149                 add_op_to_request_list(op);
 150         }
 151
 152         if (!(flags & ORANGEFS_OP_NO_SEMAPHORE))
 153                 mutex_unlock(&request_mutex);
 154
 155         /*
 156          * If we are asked to service an asynchronous operation from
 157          * VFS perspective, we are done.
 158          */
 159         if (flags & ORANGEFS_OP_ASYNC)
 160                 return 0;
 161
 162         if (flags & ORANGEFS_OP_CANCELLATION) {
 163                 gossip_debug(GOSSIP_WAIT_DEBUG,
 164                              "%s:"
 165                              "About to call wait_for_cancellation_downcall.\n",
 166                              __func__);
 167                 ret = wait_for_cancellation_downcall(op);
 168         } else {
 169                 ret = wait_for_matching_downcall(op);
 170         }
 171
 172         if (ret < 0) {
 173                 /* failed to get matching downcall */
 174                 if (ret == -ETIMEDOUT) {
 175                         gossip_err("orangefs: %s -- wait timed out; aborting attempt.\n",
 176                                    op_name);
 177                 }
 178                 op->downcall.status = ret;
 179         } else {
 180                 /* got matching downcall; make sure status is in errno format */
 181                 op->downcall.status =
 182                     orangefs_normalize_to_errno(op->downcall.status);
 183                 ret = op->downcall.status;
 184         }
 185
 186         if (!(flags & ORANGEFS_OP_INTERRUPTIBLE))
 187                 orangefs_set_signals(&orig_sigset);
 188
 189         BUG_ON(ret != op->downcall.status);
 190         /* retry if operation has not been serviced and if requested */
 191         if (!op_state_serviced(op) && op->downcall.status == -EAGAIN) {
 192                 gossip_debug(GOSSIP_WAIT_DEBUG,
 193                              "orangefs: tag %llu (%s)"
 194                              " -- operation to be retried (%d attempt)\n",
 195                              llu(op->tag),
 196                              op_name,
 197                              op->attempts + 1);
 198
 199                 if (!op->uses_shared_memory)
 200                         /*
 201                          * this operation doesn't use the shared memory
 202                          * system
 203                          */
 204                         goto retry_servicing;
 205
 206                 /* op uses shared memory */
 207                 if (orangefs_get_bufmap_init() == 0) {
 208                         WARN_ON(1);
 209                         /*
 210                          * This operation uses the shared memory system AND
 211                          * the system is not yet ready. This situation occurs
 212                          * when the client-core is restarted AND there were
 213                          * operations waiting to be processed or were already
 214                          * in process.
 215                          */
 216                         gossip_debug(GOSSIP_WAIT_DEBUG,
 217                                      "uses_shared_memory is true.\n");
 218                         gossip_debug(GOSSIP_WAIT_DEBUG,
 219                                      "Client core in-service status(%d).\n",
 220                                      is_daemon_in_service());
 221                         gossip_debug(GOSSIP_WAIT_DEBUG, "bufmap_init:%d.\n",
 222                                      orangefs_get_bufmap_init());
 223                         gossip_debug(GOSSIP_WAIT_DEBUG,
 224                                      "operation's status is 0x%0x.\n",
 225                                      op->op_state);
 226
 227                         /*
 228                          * let process sleep for a few seconds so shared
 229                          * memory system can be initialized.
 230                          */
 231                         prepare_to_wait(&orangefs_bufmap_init_waitq,
 232                                         &wait_entry,
 233                                         TASK_INTERRUPTIBLE);
 234
 235                         /*
 236                          * Wait for orangefs_bufmap_initialize() to wake me up
 237                          * within the allotted time.
 238                          */
 239                         ret = schedule_timeout(
 240                                 ORANGEFS_BUFMAP_WAIT_TIMEOUT_SECS * HZ);
 241
 242                         gossip_debug(GOSSIP_WAIT_DEBUG,
 243                                      "Value returned from schedule_timeout:"
 244                                      "%d.\n",
 245                                      ret);
 246                         gossip_debug(GOSSIP_WAIT_DEBUG,
 247                                      "Is shared memory available? (%d).\n",
 248                                      orangefs_get_bufmap_init());
 249
 250                         finish_wait(&orangefs_bufmap_init_waitq, &wait_entry);
 251
 252                         if (orangefs_get_bufmap_init() == 0) {
 253                                 gossip_err("%s:The shared memory system has not started in %d seconds after the client core restarted.  Aborting user's request(%s).\n",
 254                                            __func__,
 255                                            ORANGEFS_BUFMAP_WAIT_TIMEOUT_SECS,
 256                                            get_opname_string(op));
 257                                 return -EIO;
 258                         }
 259
 260                         /*
 261                          * Return to the calling function and re-populate a
 262                          * shared memory buffer.
 263                          */
 264                         return -EAGAIN;
 265                 }
 266         }
 267
 268         gossip_debug(GOSSIP_WAIT_DEBUG,
 269                      "orangefs: service_operation %s returning: %d for %p.\n",
 270                      op_name,
 271                      ret,
 272                      op);
 273         return ret;
 274 }
 275
 276 static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s *op)
 277 {
 278         /*
 279          * handle interrupted cases depending on what state we were in when
 280          * the interruption is detected.  there is a coarse grained lock
 281          * across the operation.
 282          *
 283          * Called with op->lock held.
 284          */
 285         op->op_state |= OP_VFS_STATE_GIVEN_UP;
 286
 287         if (op_state_waiting(op)) {
 288                 /*
 289                  * upcall hasn't been read; remove op from upcall request
 290                  * list.
 291                  */
 292                 spin_unlock(&op->lock);
 293                 spin_lock(&orangefs_request_list_lock);
 294                 list_del(&op->list);
 295                 spin_unlock(&orangefs_request_list_lock);
 296                 gossip_debug(GOSSIP_WAIT_DEBUG,
 297                              "Interrupted: Removed op %p from request_list\n",
 298                              op);
 299         } else if (op_state_in_progress(op)) {
 300                 /* op must be removed from the in progress htable */
 301                 spin_unlock(&op->lock);
 302                 spin_lock(&htable_ops_in_progress_lock);
 303                 list_del(&op->list);
 304                 spin_unlock(&htable_ops_in_progress_lock);
 305                 gossip_debug(GOSSIP_WAIT_DEBUG,
 306                              "Interrupted: Removed op %p"
 307                              " from htable_ops_in_progress\n",
 308                              op);
 309         } else if (!op_state_serviced(op)) {
 310                 spin_unlock(&op->lock);
 311                 gossip_err("interrupted operation is in a weird state 0x%x\n",
 312                            op->op_state);
 313         } else {
 314                 /*
 315                  * It is not intended for execution to flow here,
 316                  * but having this unlock here makes sparse happy.
 317                  */
 318                 gossip_err("%s: can't get here.\n", __func__);
 319                 spin_unlock(&op->lock);
 320         }
 321 }
 322
 323 /*
 324  * sleeps on waitqueue waiting for matching downcall.
 325  * if client-core finishes servicing, then we are good to go.
 326  * else if client-core exits, we get woken up here, and retry with a timeout
 327  *
 328  * Post when this call returns to the caller, the specified op will no
 329  * longer be on any list or htable.
 330  *
 331  * Returns 0 on success and -errno on failure
 332  * Errors are:
 333  * EAGAIN in case we want the caller to requeue and try again..
 334  * EINTR/EIO/ETIMEDOUT indicating we are done trying to service this
 335  * operation since client-core seems to be exiting too often
 336  * or if we were interrupted.
 337  */
 338 static int wait_for_matching_downcall(struct orangefs_kernel_op_s *op)
 339 {
 340         int ret = -EINVAL;
 341         DEFINE_WAIT(wait_entry);
 342
 343         while (1) {
 344                 spin_lock(&op->lock);
 345                 prepare_to_wait(&op->waitq, &wait_entry, TASK_INTERRUPTIBLE);
 346                 if (op_state_serviced(op)) {
 347                         spin_unlock(&op->lock);
 348                         ret = 0;
 349                         break;
 350                 }
 351
 352                 if (unlikely(signal_pending(current))) {
 353                         gossip_debug(GOSSIP_WAIT_DEBUG,
 354                                      "*** %s:"
 355                                      " operation interrupted by a signal (tag "
 356                                      "%llu, op %p)\n",
 357                                      __func__,
 358                                      llu(op->tag),
 359                                      op);
 360                         orangefs_clean_up_interrupted_operation(op);
 361                         ret = -EINTR;
 362                         break;
 363                 }
 364
 365                 /*
 366                  * if this was our first attempt and client-core
 367                  * has not purged our operation, we are happy to
 368                  * simply wait
 369                  */
 370                 if (op->attempts == 0 && !op_state_purged(op)) {
 371                         spin_unlock(&op->lock);
 372                         schedule();
 373                 } else {
 374                         spin_unlock(&op->lock);
 375                         /*
 376                          * subsequent attempts, we retry exactly once
 377                          * with timeouts
 378                          */
 379                         if (!schedule_timeout(op_timeout_secs * HZ)) {
 380                                 gossip_debug(GOSSIP_WAIT_DEBUG,
 381                                              "*** %s:"
 382                                              " operation timed out (tag"
 383                                              " %llu, %p, att %d)\n",
 384                                              __func__,
 385                                              llu(op->tag),
 386                                              op,
 387                                              op->attempts);
 388                                 ret = -ETIMEDOUT;
 389                                 spin_lock(&op->lock);
 390                                 orangefs_clean_up_interrupted_operation(op);
 391                                 break;
 392                         }
 393                 }
 394                 spin_lock(&op->lock);
 395                 op->attempts++;
 396                 /*
 397                  * if the operation was purged in the meantime, it
 398                  * is better to requeue it afresh but ensure that
 399                  * we have not been purged repeatedly. This could
 400                  * happen if client-core crashes when an op
 401                  * is being serviced, so we requeue the op, client
 402                  * core crashes again so we requeue the op, client
 403                  * core starts, and so on...
 404                  */
 405                 if (op_state_purged(op)) {
 406                         ret = (op->attempts < ORANGEFS_PURGE_RETRY_COUNT) ?
 407                                  -EAGAIN :
 408                                  -EIO;
 409                         gossip_debug(GOSSIP_WAIT_DEBUG,
 410                                      "*** %s:"
 411                                      " operation purged (tag "
 412                                      "%llu, %p, att %d)\n",
 413                                      __func__,
 414                                      llu(op->tag),
 415                                      op,
 416                                      op->attempts);
 417                         orangefs_clean_up_interrupted_operation(op);
 418                         break;
 419                 }
 420                 spin_unlock(&op->lock);
 421         }
 422
 423         spin_lock(&op->lock);
 424         finish_wait(&op->waitq, &wait_entry);
 425         spin_unlock(&op->lock);
 426
 427         return ret;
 428 }
 429
 430 /*
 431  * similar to wait_for_matching_downcall(), but used in the special case
 432  * of I/O cancellations.
 433  *
 434  * Note we need a special wait function because if this is called we already
 435  *      know that a signal is pending in current and need to service the
 436  *      cancellation upcall anyway.  the only way to exit this is to either
 437  *      timeout or have the cancellation be serviced properly.
 438  */
 439 static int wait_for_cancellation_downcall(struct orangefs_kernel_op_s *op)
 440 {
 441         int ret = -EINVAL;
 442         DEFINE_WAIT(wait_entry);
 443
 444         while (1) {
 445                 spin_lock(&op->lock);
 446                 prepare_to_wait(&op->waitq, &wait_entry, TASK_INTERRUPTIBLE);
 447                 if (op_state_serviced(op)) {
 448                         gossip_debug(GOSSIP_WAIT_DEBUG,
 449                                      "%s:op-state is SERVICED.\n",
 450                                      __func__);
 451                         spin_unlock(&op->lock);
 452                         ret = 0;
 453                         break;
 454                 }
 455
 456                 if (signal_pending(current)) {
 457                         gossip_debug(GOSSIP_WAIT_DEBUG,
 458                                      "%s:operation interrupted by a signal (tag"
 459                                      " %llu, op %p)\n",
 460                                      __func__,
 461                                      llu(op->tag),
 462                                      op);
 463                         orangefs_clean_up_interrupted_operation(op);
 464                         ret = -EINTR;
 465                         break;
 466                 }
 467
 468                 gossip_debug(GOSSIP_WAIT_DEBUG,
 469                              "%s:About to call schedule_timeout.\n",
 470                              __func__);
 471                 spin_unlock(&op->lock);
 472                 ret = schedule_timeout(op_timeout_secs * HZ);
 473
 474                 gossip_debug(GOSSIP_WAIT_DEBUG,
 475                              "%s:Value returned from schedule_timeout(%d).\n",
 476                              __func__,
 477                              ret);
 478                 if (!ret) {
 479                         gossip_debug(GOSSIP_WAIT_DEBUG,
 480                                      "%s:*** operation timed out: %p\n",
 481                                      __func__,
 482                                      op);
 483                         spin_lock(&op->lock);
 484                         orangefs_clean_up_interrupted_operation(op);
 485                         ret = -ETIMEDOUT;
 486                         break;
 487                 }
 488
 489                 gossip_debug(GOSSIP_WAIT_DEBUG,
 490                              "%s:Breaking out of loop, regardless of value returned by schedule_timeout.\n",
 491                              __func__);
 492                 ret = -ETIMEDOUT;
 493                 break;
 494         }
 495
 496         spin_lock(&op->lock);
 497         finish_wait(&op->waitq, &wait_entry);
 498         spin_unlock(&op->lock);
 499
 500         gossip_debug(GOSSIP_WAIT_DEBUG,
 501                      "%s:returning ret(%d)\n",
 502                      __func__,
 503                      ret);
 504
 505         return ret;
 506 }