]> git.karo-electronics.de Git - karo-tx-linux.git/blob - fs/orangefs/orangefs-utils.c
Merge branch 'akpm-current/current'
[karo-tx-linux.git] / fs / orangefs / orangefs-utils.c
1 /*
2  * (C) 2001 Clemson University and The University of Chicago
3  *
4  * See COPYING in top-level directory.
5  */
6 #include "protocol.h"
7 #include "orangefs-kernel.h"
8 #include "orangefs-dev-proto.h"
9 #include "orangefs-bufmap.h"
10
11 __s32 fsid_of_op(struct orangefs_kernel_op_s *op)
12 {
13         __s32 fsid = ORANGEFS_FS_ID_NULL;
14
15         if (op) {
16                 switch (op->upcall.type) {
17                 case ORANGEFS_VFS_OP_FILE_IO:
18                         fsid = op->upcall.req.io.refn.fs_id;
19                         break;
20                 case ORANGEFS_VFS_OP_LOOKUP:
21                         fsid = op->upcall.req.lookup.parent_refn.fs_id;
22                         break;
23                 case ORANGEFS_VFS_OP_CREATE:
24                         fsid = op->upcall.req.create.parent_refn.fs_id;
25                         break;
26                 case ORANGEFS_VFS_OP_GETATTR:
27                         fsid = op->upcall.req.getattr.refn.fs_id;
28                         break;
29                 case ORANGEFS_VFS_OP_REMOVE:
30                         fsid = op->upcall.req.remove.parent_refn.fs_id;
31                         break;
32                 case ORANGEFS_VFS_OP_MKDIR:
33                         fsid = op->upcall.req.mkdir.parent_refn.fs_id;
34                         break;
35                 case ORANGEFS_VFS_OP_READDIR:
36                         fsid = op->upcall.req.readdir.refn.fs_id;
37                         break;
38                 case ORANGEFS_VFS_OP_SETATTR:
39                         fsid = op->upcall.req.setattr.refn.fs_id;
40                         break;
41                 case ORANGEFS_VFS_OP_SYMLINK:
42                         fsid = op->upcall.req.sym.parent_refn.fs_id;
43                         break;
44                 case ORANGEFS_VFS_OP_RENAME:
45                         fsid = op->upcall.req.rename.old_parent_refn.fs_id;
46                         break;
47                 case ORANGEFS_VFS_OP_STATFS:
48                         fsid = op->upcall.req.statfs.fs_id;
49                         break;
50                 case ORANGEFS_VFS_OP_TRUNCATE:
51                         fsid = op->upcall.req.truncate.refn.fs_id;
52                         break;
53                 case ORANGEFS_VFS_OP_MMAP_RA_FLUSH:
54                         fsid = op->upcall.req.ra_cache_flush.refn.fs_id;
55                         break;
56                 case ORANGEFS_VFS_OP_FS_UMOUNT:
57                         fsid = op->upcall.req.fs_umount.fs_id;
58                         break;
59                 case ORANGEFS_VFS_OP_GETXATTR:
60                         fsid = op->upcall.req.getxattr.refn.fs_id;
61                         break;
62                 case ORANGEFS_VFS_OP_SETXATTR:
63                         fsid = op->upcall.req.setxattr.refn.fs_id;
64                         break;
65                 case ORANGEFS_VFS_OP_LISTXATTR:
66                         fsid = op->upcall.req.listxattr.refn.fs_id;
67                         break;
68                 case ORANGEFS_VFS_OP_REMOVEXATTR:
69                         fsid = op->upcall.req.removexattr.refn.fs_id;
70                         break;
71                 case ORANGEFS_VFS_OP_FSYNC:
72                         fsid = op->upcall.req.fsync.refn.fs_id;
73                         break;
74                 default:
75                         break;
76                 }
77         }
78         return fsid;
79 }
80
81 static int orangefs_inode_flags(struct ORANGEFS_sys_attr_s *attrs)
82 {
83         int flags = 0;
84         if (attrs->flags & ORANGEFS_IMMUTABLE_FL)
85                 flags |= S_IMMUTABLE;
86         else
87                 flags &= ~S_IMMUTABLE;
88         if (attrs->flags & ORANGEFS_APPEND_FL)
89                 flags |= S_APPEND;
90         else
91                 flags &= ~S_APPEND;
92         if (attrs->flags & ORANGEFS_NOATIME_FL)
93                 flags |= S_NOATIME;
94         else
95                 flags &= ~S_NOATIME;
96         return flags;
97 }
98
99 static int orangefs_inode_perms(struct ORANGEFS_sys_attr_s *attrs)
100 {
101         int perm_mode = 0;
102
103         if (attrs->perms & ORANGEFS_O_EXECUTE)
104                 perm_mode |= S_IXOTH;
105         if (attrs->perms & ORANGEFS_O_WRITE)
106                 perm_mode |= S_IWOTH;
107         if (attrs->perms & ORANGEFS_O_READ)
108                 perm_mode |= S_IROTH;
109
110         if (attrs->perms & ORANGEFS_G_EXECUTE)
111                 perm_mode |= S_IXGRP;
112         if (attrs->perms & ORANGEFS_G_WRITE)
113                 perm_mode |= S_IWGRP;
114         if (attrs->perms & ORANGEFS_G_READ)
115                 perm_mode |= S_IRGRP;
116
117         if (attrs->perms & ORANGEFS_U_EXECUTE)
118                 perm_mode |= S_IXUSR;
119         if (attrs->perms & ORANGEFS_U_WRITE)
120                 perm_mode |= S_IWUSR;
121         if (attrs->perms & ORANGEFS_U_READ)
122                 perm_mode |= S_IRUSR;
123
124         if (attrs->perms & ORANGEFS_G_SGID)
125                 perm_mode |= S_ISGID;
126         if (attrs->perms & ORANGEFS_U_SUID)
127                 perm_mode |= S_ISUID;
128
129         return perm_mode;
130 }
131
132 /* NOTE: symname is ignored unless the inode is a sym link */
133 static int copy_attributes_to_inode(struct inode *inode,
134                                     struct ORANGEFS_sys_attr_s *attrs,
135                                     char *symname)
136 {
137         int ret = -1;
138         struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
139         loff_t inode_size = 0;
140         loff_t rounded_up_size = 0;
141
142
143         /*
144          * arbitrarily set the inode block size; FIXME: we need to
145          * resolve the difference between the reported inode blocksize
146          * and the PAGE_CACHE_SIZE, since our block count will always
147          * be wrong.
148          *
149          * For now, we're setting the block count to be the proper
150          * number assuming the block size is 512 bytes, and the size is
151          * rounded up to the nearest 4K.  This is apparently required
152          * to get proper size reports from the 'du' shell utility.
153          *
154          * changing the inode->i_blkbits to something other than
155          * PAGE_CACHE_SHIFT breaks mmap/execution as we depend on that.
156          */
157         gossip_debug(GOSSIP_UTILS_DEBUG,
158                      "attrs->mask = %x (objtype = %s)\n",
159                      attrs->mask,
160                      attrs->objtype == ORANGEFS_TYPE_METAFILE ? "file" :
161                      attrs->objtype == ORANGEFS_TYPE_DIRECTORY ? "directory" :
162                      attrs->objtype == ORANGEFS_TYPE_SYMLINK ? "symlink" :
163                         "invalid/unknown");
164
165         switch (attrs->objtype) {
166         case ORANGEFS_TYPE_METAFILE:
167                 inode->i_flags = orangefs_inode_flags(attrs);
168                 if (attrs->mask & ORANGEFS_ATTR_SYS_SIZE) {
169                         inode_size = (loff_t) attrs->size;
170                         rounded_up_size =
171                             (inode_size + (4096 - (inode_size % 4096)));
172
173                         orangefs_lock_inode(inode);
174                         inode->i_bytes = inode_size;
175                         inode->i_blocks =
176                             (unsigned long)(rounded_up_size / 512);
177                         orangefs_unlock_inode(inode);
178
179                         /*
180                          * NOTE: make sure all the places we're called
181                          * from have the inode->i_sem lock. We're fine
182                          * in 99% of the cases since we're mostly
183                          * called from a lookup.
184                          */
185                         inode->i_size = inode_size;
186                 }
187                 break;
188         case ORANGEFS_TYPE_SYMLINK:
189                 if (symname != NULL) {
190                         inode->i_size = (loff_t) strlen(symname);
191                         break;
192                 }
193                 /*FALLTHRU*/
194         default:
195                 inode->i_size = PAGE_CACHE_SIZE;
196
197                 orangefs_lock_inode(inode);
198                 inode_set_bytes(inode, inode->i_size);
199                 orangefs_unlock_inode(inode);
200                 break;
201         }
202
203         inode->i_uid = make_kuid(&init_user_ns, attrs->owner);
204         inode->i_gid = make_kgid(&init_user_ns, attrs->group);
205         inode->i_atime.tv_sec = (time_t) attrs->atime;
206         inode->i_mtime.tv_sec = (time_t) attrs->mtime;
207         inode->i_ctime.tv_sec = (time_t) attrs->ctime;
208         inode->i_atime.tv_nsec = 0;
209         inode->i_mtime.tv_nsec = 0;
210         inode->i_ctime.tv_nsec = 0;
211
212         inode->i_mode = orangefs_inode_perms(attrs);
213
214         if (is_root_handle(inode)) {
215                 /* special case: mark the root inode as sticky */
216                 inode->i_mode |= S_ISVTX;
217                 gossip_debug(GOSSIP_UTILS_DEBUG,
218                              "Marking inode %pU as sticky\n",
219                              get_khandle_from_ino(inode));
220         }
221
222         switch (attrs->objtype) {
223         case ORANGEFS_TYPE_METAFILE:
224                 inode->i_mode |= S_IFREG;
225                 ret = 0;
226                 break;
227         case ORANGEFS_TYPE_DIRECTORY:
228                 inode->i_mode |= S_IFDIR;
229                 /* NOTE: we have no good way to keep nlink consistent
230                  * for directories across clients; keep constant at 1.
231                  * Why 1?  If we go with 2, then find(1) gets confused
232                  * and won't work properly withouth the -noleaf option
233                  */
234                 set_nlink(inode, 1);
235                 ret = 0;
236                 break;
237         case ORANGEFS_TYPE_SYMLINK:
238                 inode->i_mode |= S_IFLNK;
239
240                 /* copy link target to inode private data */
241                 if (orangefs_inode && symname) {
242                         strncpy(orangefs_inode->link_target,
243                                 symname,
244                                 ORANGEFS_NAME_MAX);
245                         gossip_debug(GOSSIP_UTILS_DEBUG,
246                                      "Copied attr link target %s\n",
247                                      orangefs_inode->link_target);
248                 }
249                 gossip_debug(GOSSIP_UTILS_DEBUG,
250                              "symlink mode %o\n",
251                              inode->i_mode);
252                 ret = 0;
253                 break;
254         default:
255                 gossip_err("orangefs: copy_attributes_to_inode: got invalid attribute type %x\n",
256                         attrs->objtype);
257         }
258
259         gossip_debug(GOSSIP_UTILS_DEBUG,
260                      "orangefs: copy_attributes_to_inode: setting i_mode to %o, i_size to %lu\n",
261                      inode->i_mode,
262                      (unsigned long)i_size_read(inode));
263
264         return ret;
265 }
266
267 /*
268  * NOTE: in kernel land, we never use the sys_attr->link_target for
269  * anything, so don't bother copying it into the sys_attr object here.
270  */
271 static inline int copy_attributes_from_inode(struct inode *inode,
272                                              struct ORANGEFS_sys_attr_s *attrs,
273                                              struct iattr *iattr)
274 {
275         umode_t tmp_mode;
276
277         if (!iattr || !inode || !attrs) {
278                 gossip_err("NULL iattr (%p), inode (%p), attrs (%p) "
279                            "in copy_attributes_from_inode!\n",
280                            iattr,
281                            inode,
282                            attrs);
283                 return -EINVAL;
284         }
285         /*
286          * We need to be careful to only copy the attributes out of the
287          * iattr object that we know are valid.
288          */
289         attrs->mask = 0;
290         if (iattr->ia_valid & ATTR_UID) {
291                 attrs->owner = from_kuid(current_user_ns(), iattr->ia_uid);
292                 attrs->mask |= ORANGEFS_ATTR_SYS_UID;
293                 gossip_debug(GOSSIP_UTILS_DEBUG, "(UID) %d\n", attrs->owner);
294         }
295         if (iattr->ia_valid & ATTR_GID) {
296                 attrs->group = from_kgid(current_user_ns(), iattr->ia_gid);
297                 attrs->mask |= ORANGEFS_ATTR_SYS_GID;
298                 gossip_debug(GOSSIP_UTILS_DEBUG, "(GID) %d\n", attrs->group);
299         }
300
301         if (iattr->ia_valid & ATTR_ATIME) {
302                 attrs->mask |= ORANGEFS_ATTR_SYS_ATIME;
303                 if (iattr->ia_valid & ATTR_ATIME_SET) {
304                         attrs->atime =
305                             orangefs_convert_time_field(&iattr->ia_atime);
306                         attrs->mask |= ORANGEFS_ATTR_SYS_ATIME_SET;
307                 }
308         }
309         if (iattr->ia_valid & ATTR_MTIME) {
310                 attrs->mask |= ORANGEFS_ATTR_SYS_MTIME;
311                 if (iattr->ia_valid & ATTR_MTIME_SET) {
312                         attrs->mtime =
313                             orangefs_convert_time_field(&iattr->ia_mtime);
314                         attrs->mask |= ORANGEFS_ATTR_SYS_MTIME_SET;
315                 }
316         }
317         if (iattr->ia_valid & ATTR_CTIME)
318                 attrs->mask |= ORANGEFS_ATTR_SYS_CTIME;
319
320         /*
321          * ORANGEFS cannot set size with a setattr operation.  Probably not likely
322          * to be requested through the VFS, but just in case, don't worry about
323          * ATTR_SIZE
324          */
325
326         if (iattr->ia_valid & ATTR_MODE) {
327                 tmp_mode = iattr->ia_mode;
328                 if (tmp_mode & (S_ISVTX)) {
329                         if (is_root_handle(inode)) {
330                                 /*
331                                  * allow sticky bit to be set on root (since
332                                  * it shows up that way by default anyhow),
333                                  * but don't show it to the server
334                                  */
335                                 tmp_mode -= S_ISVTX;
336                         } else {
337                                 gossip_debug(GOSSIP_UTILS_DEBUG,
338                                              "User attempted to set sticky bit on non-root directory; returning EINVAL.\n");
339                                 return -EINVAL;
340                         }
341                 }
342
343                 if (tmp_mode & (S_ISUID)) {
344                         gossip_debug(GOSSIP_UTILS_DEBUG,
345                                      "Attempting to set setuid bit (not supported); returning EINVAL.\n");
346                         return -EINVAL;
347                 }
348
349                 attrs->perms = ORANGEFS_util_translate_mode(tmp_mode);
350                 attrs->mask |= ORANGEFS_ATTR_SYS_PERM;
351         }
352
353         return 0;
354 }
355
356 static int compare_attributes_to_inode(struct inode *inode,
357                                        struct ORANGEFS_sys_attr_s *attrs,
358                                        char *symname,
359                                        int mask)
360 {
361         struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
362         loff_t inode_size, rounded_up_size;
363
364         /* Much of what happens below relies on the type being around. */
365         if (!(mask & ORANGEFS_ATTR_SYS_TYPE))
366                 return 0;
367
368         if (attrs->objtype == ORANGEFS_TYPE_METAFILE &&
369             inode->i_flags != orangefs_inode_flags(attrs))
370                 return 0;
371
372         /* Compare file size. */
373
374         switch (attrs->objtype) {
375         case ORANGEFS_TYPE_METAFILE:
376                 if (mask & ORANGEFS_ATTR_SYS_SIZE) {
377                         inode_size = attrs->size;
378                         rounded_up_size = inode_size +
379                             (4096 - (inode_size % 4096));
380                         if (inode->i_bytes != inode_size ||
381                             inode->i_blocks != rounded_up_size/512)
382                                 return 0;
383                 }
384                 break;
385         case ORANGEFS_TYPE_SYMLINK:
386                 if (mask & ORANGEFS_ATTR_SYS_SIZE)
387                         if (symname && strlen(symname) != inode->i_size)
388                                 return 0;
389                 break;
390         default:
391                 if (inode->i_size != PAGE_CACHE_SIZE &&
392                     inode_get_bytes(inode) != PAGE_CACHE_SIZE)
393                         return 0;
394         }
395
396         /* Compare general attributes. */
397
398         if (mask & ORANGEFS_ATTR_SYS_UID &&
399             !uid_eq(inode->i_uid, make_kuid(&init_user_ns, attrs->owner)))
400                 return 0;
401         if (mask & ORANGEFS_ATTR_SYS_GID &&
402             !gid_eq(inode->i_gid, make_kgid(&init_user_ns, attrs->group)))
403                 return 0;
404         if (mask & ORANGEFS_ATTR_SYS_ATIME &&
405             inode->i_atime.tv_sec != attrs->atime)
406                 return 0;
407         if (mask & ORANGEFS_ATTR_SYS_MTIME &&
408             inode->i_atime.tv_sec != attrs->mtime)
409                 return 0;
410         if (mask & ORANGEFS_ATTR_SYS_CTIME &&
411             inode->i_atime.tv_sec != attrs->ctime)
412                 return 0;
413         if (inode->i_atime.tv_nsec != 0 ||
414             inode->i_mtime.tv_nsec != 0 ||
415             inode->i_ctime.tv_nsec != 0)
416                 return 0;
417
418         if (mask & ORANGEFS_ATTR_SYS_PERM &&
419             (inode->i_mode & ~(S_ISVTX|S_IFREG|S_IFDIR|S_IFLNK)) !=
420             orangefs_inode_perms(attrs))
421                 return 0;
422
423         if (is_root_handle(inode))
424                 if (!(inode->i_mode & S_ISVTX))
425                         return 0;
426
427         /* Compare file type. */
428
429         switch (attrs->objtype) {
430         case ORANGEFS_TYPE_METAFILE:
431                 if (!(inode->i_mode & S_IFREG))
432                         return 0;
433                 break;
434         case ORANGEFS_TYPE_DIRECTORY:
435                 if (!(inode->i_mode & S_IFDIR))
436                         return 0;
437                 if (inode->i_nlink != 1)
438                         return 0;
439                 break;
440         case ORANGEFS_TYPE_SYMLINK:
441                 if (!(inode->i_mode & S_IFLNK))
442                         return 0;
443                 if (orangefs_inode && symname &&
444                     mask & ORANGEFS_ATTR_SYS_LNK_TARGET)
445                         if (strcmp(orangefs_inode->link_target, symname))
446                                 return 0;
447                 break;
448         default:
449                 gossip_err("orangefs: compare_attributes_to_inode: got invalid attribute type %x\n",
450                     attrs->objtype);
451
452         }
453
454         return 1;
455 }
456
457 /*
458  * Issues a orangefs getattr request and fills in the appropriate inode
459  * attributes if successful. When check is 0, returns 0 on success and -errno
460  * otherwise. When check is 1, returns 1 on success where the inode is valid
461  * and 0 on success where the inode is stale and -errno otherwise.
462  */
463 int orangefs_inode_getattr(struct inode *inode, __u32 getattr_mask, int check)
464 {
465         struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
466         struct orangefs_kernel_op_s *new_op;
467         int ret = -EINVAL;
468
469         gossip_debug(GOSSIP_UTILS_DEBUG,
470                      "%s: called on inode %pU\n",
471                      __func__,
472                      get_khandle_from_ino(inode));
473
474         new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR);
475         if (!new_op)
476                 return -ENOMEM;
477         new_op->upcall.req.getattr.refn = orangefs_inode->refn;
478         new_op->upcall.req.getattr.mask = getattr_mask;
479
480         ret = service_operation(new_op, __func__,
481                                 get_interruptible_flag(inode));
482         if (ret != 0)
483                 goto out;
484
485         if (check) {
486                 ret = compare_attributes_to_inode(inode,
487                     &new_op->downcall.resp.getattr.attributes,
488                     new_op->downcall.resp.getattr.link_target,
489                     getattr_mask);
490
491                 if (new_op->downcall.resp.getattr.attributes.objtype ==
492                     ORANGEFS_TYPE_METAFILE) {
493                         if (orangefs_inode->blksize !=
494                             new_op->downcall.resp.getattr.attributes.blksize)
495                                 ret = 0;
496                 } else {
497                         if (orangefs_inode->blksize != 1 << inode->i_blkbits)
498                                 ret = 0;
499                 }
500         } else {
501                 if (copy_attributes_to_inode(inode,
502                                 &new_op->downcall.resp.getattr.attributes,
503                                 new_op->downcall.resp.getattr.link_target)) {
504                         gossip_err("%s: failed to copy attributes\n", __func__);
505                         ret = -ENOENT;
506                         goto out;
507                 }
508
509                 /*
510                  * Store blksize in orangefs specific part of inode structure;
511                  * we are only going to use this to report to stat to make sure
512                  * it doesn't perturb any inode related code paths.
513                  */
514                 if (new_op->downcall.resp.getattr.attributes.objtype ==
515                                 ORANGEFS_TYPE_METAFILE) {
516                         orangefs_inode->blksize = new_op->downcall.resp.
517                             getattr.attributes.blksize;
518                 } else {
519                         /*
520                          * mimic behavior of generic_fillattr() for other file
521                          * types.
522                          */
523                         orangefs_inode->blksize = (1 << inode->i_blkbits);
524
525                 }
526         }
527
528 out:
529         gossip_debug(GOSSIP_UTILS_DEBUG,
530                      "Getattr on handle %pU, "
531                      "fsid %d\n  (inode ct = %d) returned %d\n",
532                      &orangefs_inode->refn.khandle,
533                      orangefs_inode->refn.fs_id,
534                      (int)atomic_read(&inode->i_count),
535                      ret);
536
537         op_release(new_op);
538         return ret;
539 }
540
541 /*
542  * issues a orangefs setattr request to make sure the new attribute values
543  * take effect if successful.  returns 0 on success; -errno otherwise
544  */
545 int orangefs_inode_setattr(struct inode *inode, struct iattr *iattr)
546 {
547         struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
548         struct orangefs_kernel_op_s *new_op;
549         int ret;
550
551         new_op = op_alloc(ORANGEFS_VFS_OP_SETATTR);
552         if (!new_op)
553                 return -ENOMEM;
554
555         new_op->upcall.req.setattr.refn = orangefs_inode->refn;
556         ret = copy_attributes_from_inode(inode,
557                        &new_op->upcall.req.setattr.attributes,
558                        iattr);
559         if (ret >= 0) {
560                 ret = service_operation(new_op, __func__,
561                                 get_interruptible_flag(inode));
562
563                 gossip_debug(GOSSIP_UTILS_DEBUG,
564                              "orangefs_inode_setattr: returning %d\n",
565                              ret);
566         }
567
568         op_release(new_op);
569
570         /*
571          * successful setattr should clear the atime, mtime and
572          * ctime flags.
573          */
574         if (ret == 0) {
575                 ClearAtimeFlag(orangefs_inode);
576                 ClearMtimeFlag(orangefs_inode);
577                 ClearCtimeFlag(orangefs_inode);
578                 ClearModeFlag(orangefs_inode);
579         }
580
581         return ret;
582 }
583
584 int orangefs_flush_inode(struct inode *inode)
585 {
586         /*
587          * If it is a dirty inode, this function gets called.
588          * Gather all the information that needs to be setattr'ed
589          * Right now, this will only be used for mode, atime, mtime
590          * and/or ctime.
591          */
592         struct iattr wbattr;
593         int ret;
594         int mtime_flag;
595         int ctime_flag;
596         int atime_flag;
597         int mode_flag;
598         struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
599
600         memset(&wbattr, 0, sizeof(wbattr));
601
602         /*
603          * check inode flags up front, and clear them if they are set.  This
604          * will prevent multiple processes from all trying to flush the same
605          * inode if they call close() simultaneously
606          */
607         mtime_flag = MtimeFlag(orangefs_inode);
608         ClearMtimeFlag(orangefs_inode);
609         ctime_flag = CtimeFlag(orangefs_inode);
610         ClearCtimeFlag(orangefs_inode);
611         atime_flag = AtimeFlag(orangefs_inode);
612         ClearAtimeFlag(orangefs_inode);
613         mode_flag = ModeFlag(orangefs_inode);
614         ClearModeFlag(orangefs_inode);
615
616         /*  -- Lazy atime,mtime and ctime update --
617          * Note: all times are dictated by server in the new scheme
618          * and not by the clients
619          *
620          * Also mode updates are being handled now..
621          */
622
623         if (mtime_flag)
624                 wbattr.ia_valid |= ATTR_MTIME;
625         if (ctime_flag)
626                 wbattr.ia_valid |= ATTR_CTIME;
627         if (atime_flag)
628                 wbattr.ia_valid |= ATTR_ATIME;
629
630         if (mode_flag) {
631                 wbattr.ia_mode = inode->i_mode;
632                 wbattr.ia_valid |= ATTR_MODE;
633         }
634
635         gossip_debug(GOSSIP_UTILS_DEBUG,
636                      "*********** orangefs_flush_inode: %pU "
637                      "(ia_valid %d)\n",
638                      get_khandle_from_ino(inode),
639                      wbattr.ia_valid);
640         if (wbattr.ia_valid == 0) {
641                 gossip_debug(GOSSIP_UTILS_DEBUG,
642                              "orangefs_flush_inode skipping setattr()\n");
643                 return 0;
644         }
645
646         gossip_debug(GOSSIP_UTILS_DEBUG,
647                      "orangefs_flush_inode (%pU) writing mode %o\n",
648                      get_khandle_from_ino(inode),
649                      inode->i_mode);
650
651         ret = orangefs_inode_setattr(inode, &wbattr);
652
653         return ret;
654 }
655
656 int orangefs_unmount_sb(struct super_block *sb)
657 {
658         int ret = -EINVAL;
659         struct orangefs_kernel_op_s *new_op = NULL;
660
661         gossip_debug(GOSSIP_UTILS_DEBUG,
662                      "orangefs_unmount_sb called on sb %p\n",
663                      sb);
664
665         new_op = op_alloc(ORANGEFS_VFS_OP_FS_UMOUNT);
666         if (!new_op)
667                 return -ENOMEM;
668         new_op->upcall.req.fs_umount.id = ORANGEFS_SB(sb)->id;
669         new_op->upcall.req.fs_umount.fs_id = ORANGEFS_SB(sb)->fs_id;
670         strncpy(new_op->upcall.req.fs_umount.orangefs_config_server,
671                 ORANGEFS_SB(sb)->devname,
672                 ORANGEFS_MAX_SERVER_ADDR_LEN);
673
674         gossip_debug(GOSSIP_UTILS_DEBUG,
675                      "Attempting ORANGEFS Unmount via host %s\n",
676                      new_op->upcall.req.fs_umount.orangefs_config_server);
677
678         ret = service_operation(new_op, "orangefs_fs_umount", 0);
679
680         gossip_debug(GOSSIP_UTILS_DEBUG,
681                      "orangefs_unmount: got return value of %d\n", ret);
682         if (ret)
683                 sb = ERR_PTR(ret);
684         else
685                 ORANGEFS_SB(sb)->mount_pending = 1;
686
687         op_release(new_op);
688         return ret;
689 }
690
691 /*
692  * NOTE: on successful cancellation, be sure to return -EINTR, as
693  * that's the return value the caller expects
694  */
695 int orangefs_cancel_op_in_progress(__u64 tag)
696 {
697         int ret = -EINVAL;
698         struct orangefs_kernel_op_s *new_op = NULL;
699
700         gossip_debug(GOSSIP_UTILS_DEBUG,
701                      "orangefs_cancel_op_in_progress called on tag %llu\n",
702                      llu(tag));
703
704         new_op = op_alloc(ORANGEFS_VFS_OP_CANCEL);
705         if (!new_op)
706                 return -ENOMEM;
707         new_op->upcall.req.cancel.op_tag = tag;
708
709         gossip_debug(GOSSIP_UTILS_DEBUG,
710                      "Attempting ORANGEFS operation cancellation of tag %llu\n",
711                      llu(new_op->upcall.req.cancel.op_tag));
712
713         ret = service_operation(new_op, "orangefs_cancel", ORANGEFS_OP_CANCELLATION);
714
715         gossip_debug(GOSSIP_UTILS_DEBUG,
716                      "orangefs_cancel_op_in_progress: got return value of %d\n",
717                      ret);
718
719         op_release(new_op);
720         return ret;
721 }
722
723 void orangefs_make_bad_inode(struct inode *inode)
724 {
725         if (is_root_handle(inode)) {
726                 /*
727                  * if this occurs, the pvfs2-client-core was killed but we
728                  * can't afford to lose the inode operations and such
729                  * associated with the root handle in any case.
730                  */
731                 gossip_debug(GOSSIP_UTILS_DEBUG,
732                              "*** NOT making bad root inode %pU\n",
733                              get_khandle_from_ino(inode));
734         } else {
735                 gossip_debug(GOSSIP_UTILS_DEBUG,
736                              "*** making bad inode %pU\n",
737                              get_khandle_from_ino(inode));
738                 make_bad_inode(inode);
739         }
740 }
741
742 /* Block all blockable signals... */
743 void orangefs_block_signals(sigset_t *orig_sigset)
744 {
745         sigset_t mask;
746
747         /*
748          * Initialize all entries in the signal set to the
749          * inverse of the given mask.
750          */
751         siginitsetinv(&mask, sigmask(SIGKILL));
752
753         /* Block 'em Danno... */
754         sigprocmask(SIG_BLOCK, &mask, orig_sigset);
755 }
756
757 /* set the signal mask to the given template... */
758 void orangefs_set_signals(sigset_t *sigset)
759 {
760         sigprocmask(SIG_SETMASK, sigset, NULL);
761 }
762
763 /*
764  * The following is a very dirty hack that is now a permanent part of the
765  * ORANGEFS protocol. See protocol.h for more error definitions.
766  */
767
768 /* The order matches include/orangefs-types.h in the OrangeFS source. */
769 static int PINT_errno_mapping[] = {
770         0, EPERM, ENOENT, EINTR, EIO, ENXIO, EBADF, EAGAIN, ENOMEM,
771         EFAULT, EBUSY, EEXIST, ENODEV, ENOTDIR, EISDIR, EINVAL, EMFILE,
772         EFBIG, ENOSPC, EROFS, EMLINK, EPIPE, EDEADLK, ENAMETOOLONG,
773         ENOLCK, ENOSYS, ENOTEMPTY, ELOOP, EWOULDBLOCK, ENOMSG, EUNATCH,
774         EBADR, EDEADLOCK, ENODATA, ETIME, ENONET, EREMOTE, ECOMM,
775         EPROTO, EBADMSG, EOVERFLOW, ERESTART, EMSGSIZE, EPROTOTYPE,
776         ENOPROTOOPT, EPROTONOSUPPORT, EOPNOTSUPP, EADDRINUSE,
777         EADDRNOTAVAIL, ENETDOWN, ENETUNREACH, ENETRESET, ENOBUFS,
778         ETIMEDOUT, ECONNREFUSED, EHOSTDOWN, EHOSTUNREACH, EALREADY,
779         EACCES, ECONNRESET, ERANGE
780 };
781
782 int orangefs_normalize_to_errno(__s32 error_code)
783 {
784         __u32 i;
785
786         /* Success */
787         if (error_code == 0) {
788                 return 0;
789         /*
790          * This shouldn't ever happen. If it does it should be fixed on the
791          * server.
792          */
793         } else if (error_code > 0) {
794                 gossip_err("orangefs: error status receieved.\n");
795                 gossip_err("orangefs: assuming error code is inverted.\n");
796                 error_code = -error_code;
797         }
798
799         /*
800          * XXX: This is very bad since error codes from ORANGEFS may not be
801          * suitable for return into userspace.
802          */
803
804         /*
805          * Convert ORANGEFS error values into errno values suitable for return
806          * from the kernel.
807          */
808         if ((-error_code) & ORANGEFS_NON_ERRNO_ERROR_BIT) {
809                 if (((-error_code) &
810                     (ORANGEFS_ERROR_NUMBER_BITS|ORANGEFS_NON_ERRNO_ERROR_BIT|
811                     ORANGEFS_ERROR_BIT)) == ORANGEFS_ECANCEL) {
812                         /*
813                          * cancellation error codes generally correspond to
814                          * a timeout from the client's perspective
815                          */
816                         error_code = -ETIMEDOUT;
817                 } else {
818                         /* assume a default error code */
819                         gossip_err("orangefs: warning: got error code without errno equivalent: %d.\n", error_code);
820                         error_code = -EINVAL;
821                 }
822
823         /* Convert ORANGEFS encoded errno values into regular errno values. */
824         } else if ((-error_code) & ORANGEFS_ERROR_BIT) {
825                 i = (-error_code) & ~(ORANGEFS_ERROR_BIT|ORANGEFS_ERROR_CLASS_BITS);
826                 if (i < sizeof(PINT_errno_mapping)/sizeof(*PINT_errno_mapping))
827                         error_code = -PINT_errno_mapping[i];
828                 else
829                         error_code = -EINVAL;
830
831         /*
832          * Only ORANGEFS protocol error codes should ever come here. Otherwise
833          * there is a bug somewhere.
834          */
835         } else {
836                 gossip_err("orangefs: orangefs_normalize_to_errno: got error code which is not from ORANGEFS.\n");
837         }
838         return error_code;
839 }
840
841 #define NUM_MODES 11
842 __s32 ORANGEFS_util_translate_mode(int mode)
843 {
844         int ret = 0;
845         int i = 0;
846         static int modes[NUM_MODES] = {
847                 S_IXOTH, S_IWOTH, S_IROTH,
848                 S_IXGRP, S_IWGRP, S_IRGRP,
849                 S_IXUSR, S_IWUSR, S_IRUSR,
850                 S_ISGID, S_ISUID
851         };
852         static int orangefs_modes[NUM_MODES] = {
853                 ORANGEFS_O_EXECUTE, ORANGEFS_O_WRITE, ORANGEFS_O_READ,
854                 ORANGEFS_G_EXECUTE, ORANGEFS_G_WRITE, ORANGEFS_G_READ,
855                 ORANGEFS_U_EXECUTE, ORANGEFS_U_WRITE, ORANGEFS_U_READ,
856                 ORANGEFS_G_SGID, ORANGEFS_U_SUID
857         };
858
859         for (i = 0; i < NUM_MODES; i++)
860                 if (mode & modes[i])
861                         ret |= orangefs_modes[i];
862
863         return ret;
864 }
865 #undef NUM_MODES
866
867 /*
868  * After obtaining a string representation of the client's debug
869  * keywords and their associated masks, this function is called to build an
870  * array of these values.
871  */
872 int orangefs_prepare_cdm_array(char *debug_array_string)
873 {
874         int i;
875         int rc = -EINVAL;
876         char *cds_head = NULL;
877         char *cds_delimiter = NULL;
878         int keyword_len = 0;
879
880         gossip_debug(GOSSIP_UTILS_DEBUG, "%s: start\n", __func__);
881
882         /*
883          * figure out how many elements the cdm_array needs.
884          */
885         for (i = 0; i < strlen(debug_array_string); i++)
886                 if (debug_array_string[i] == '\n')
887                         cdm_element_count++;
888
889         if (!cdm_element_count) {
890                 pr_info("No elements in client debug array string!\n");
891                 goto out;
892         }
893
894         cdm_array =
895                 kzalloc(cdm_element_count * sizeof(struct client_debug_mask),
896                         GFP_KERNEL);
897         if (!cdm_array) {
898                 pr_info("malloc failed for cdm_array!\n");
899                 rc = -ENOMEM;
900                 goto out;
901         }
902
903         cds_head = debug_array_string;
904
905         for (i = 0; i < cdm_element_count; i++) {
906                 cds_delimiter = strchr(cds_head, '\n');
907                 *cds_delimiter = '\0';
908
909                 keyword_len = strcspn(cds_head, " ");
910
911                 cdm_array[i].keyword = kzalloc(keyword_len + 1, GFP_KERNEL);
912                 if (!cdm_array[i].keyword) {
913                         rc = -ENOMEM;
914                         goto out;
915                 }
916
917                 sscanf(cds_head,
918                        "%s %llx %llx",
919                        cdm_array[i].keyword,
920                        (unsigned long long *)&(cdm_array[i].mask1),
921                        (unsigned long long *)&(cdm_array[i].mask2));
922
923                 if (!strcmp(cdm_array[i].keyword, ORANGEFS_VERBOSE))
924                         client_verbose_index = i;
925
926                 if (!strcmp(cdm_array[i].keyword, ORANGEFS_ALL))
927                         client_all_index = i;
928
929                 cds_head = cds_delimiter + 1;
930         }
931
932         rc = cdm_element_count;
933
934         gossip_debug(GOSSIP_UTILS_DEBUG, "%s: rc:%d:\n", __func__, rc);
935
936 out:
937
938         return rc;
939
940 }
941
942 /*
943  * /sys/kernel/debug/orangefs/debug-help can be catted to
944  * see all the available kernel and client debug keywords.
945  *
946  * When the kernel boots, we have no idea what keywords the
947  * client supports, nor their associated masks.
948  *
949  * We pass through this function once at boot and stamp a
950  * boilerplate "we don't know" message for the client in the
951  * debug-help file. We pass through here again when the client
952  * starts and then we can fill out the debug-help file fully.
953  *
954  * The client might be restarted any number of times between
955  * reboots, we only build the debug-help file the first time.
956  */
957 int orangefs_prepare_debugfs_help_string(int at_boot)
958 {
959         int rc = -EINVAL;
960         int i;
961         int byte_count = 0;
962         char *client_title = "Client Debug Keywords:\n";
963         char *kernel_title = "Kernel Debug Keywords:\n";
964
965         gossip_debug(GOSSIP_UTILS_DEBUG, "%s: start\n", __func__);
966
967         if (at_boot) {
968                 byte_count += strlen(HELP_STRING_UNINITIALIZED);
969                 client_title = HELP_STRING_UNINITIALIZED;
970         } else {
971                 /*
972                  * fill the client keyword/mask array and remember
973                  * how many elements there were.
974                  */
975                 cdm_element_count =
976                         orangefs_prepare_cdm_array(client_debug_array_string);
977                 if (cdm_element_count <= 0)
978                         goto out;
979
980                 /* Count the bytes destined for debug_help_string. */
981                 byte_count += strlen(client_title);
982
983                 for (i = 0; i < cdm_element_count; i++) {
984                         byte_count += strlen(cdm_array[i].keyword + 2);
985                         if (byte_count >= DEBUG_HELP_STRING_SIZE) {
986                                 pr_info("%s: overflow 1!\n", __func__);
987                                 goto out;
988                         }
989                 }
990
991                 gossip_debug(GOSSIP_UTILS_DEBUG,
992                              "%s: cdm_element_count:%d:\n",
993                              __func__,
994                              cdm_element_count);
995         }
996
997         byte_count += strlen(kernel_title);
998         for (i = 0; i < num_kmod_keyword_mask_map; i++) {
999                 byte_count +=
1000                         strlen(s_kmod_keyword_mask_map[i].keyword + 2);
1001                 if (byte_count >= DEBUG_HELP_STRING_SIZE) {
1002                         pr_info("%s: overflow 2!\n", __func__);
1003                         goto out;
1004                 }
1005         }
1006
1007         /* build debug_help_string. */
1008         debug_help_string = kzalloc(DEBUG_HELP_STRING_SIZE, GFP_KERNEL);
1009         if (!debug_help_string) {
1010                 rc = -ENOMEM;
1011                 goto out;
1012         }
1013
1014         strcat(debug_help_string, client_title);
1015
1016         if (!at_boot) {
1017                 for (i = 0; i < cdm_element_count; i++) {
1018                         strcat(debug_help_string, "\t");
1019                         strcat(debug_help_string, cdm_array[i].keyword);
1020                         strcat(debug_help_string, "\n");
1021                 }
1022         }
1023
1024         strcat(debug_help_string, "\n");
1025         strcat(debug_help_string, kernel_title);
1026
1027         for (i = 0; i < num_kmod_keyword_mask_map; i++) {
1028                 strcat(debug_help_string, "\t");
1029                 strcat(debug_help_string, s_kmod_keyword_mask_map[i].keyword);
1030                 strcat(debug_help_string, "\n");
1031         }
1032
1033         rc = 0;
1034
1035 out:
1036
1037         return rc;
1038
1039 }
1040
1041 /*
1042  * kernel = type 0
1043  * client = type 1
1044  */
1045 void debug_mask_to_string(void *mask, int type)
1046 {
1047         int i;
1048         int len = 0;
1049         char *debug_string;
1050         int element_count = 0;
1051
1052         gossip_debug(GOSSIP_UTILS_DEBUG, "%s: start\n", __func__);
1053
1054         if (type) {
1055                 debug_string = client_debug_string;
1056                 element_count = cdm_element_count;
1057         } else {
1058                 debug_string = kernel_debug_string;
1059                 element_count = num_kmod_keyword_mask_map;
1060         }
1061
1062         memset(debug_string, 0, ORANGEFS_MAX_DEBUG_STRING_LEN);
1063
1064         /*
1065          * Some keywords, like "all" or "verbose", are amalgams of
1066          * numerous other keywords. Make a special check for those
1067          * before grinding through the whole mask only to find out
1068          * later...
1069          */
1070         if (check_amalgam_keyword(mask, type))
1071                 goto out;
1072
1073         /* Build the debug string. */
1074         for (i = 0; i < element_count; i++)
1075                 if (type)
1076                         do_c_string(mask, i);
1077                 else
1078                         do_k_string(mask, i);
1079
1080         len = strlen(debug_string);
1081
1082         if ((len) && (type))
1083                 client_debug_string[len - 1] = '\0';
1084         else if (len)
1085                 kernel_debug_string[len - 1] = '\0';
1086         else if (type)
1087                 strcpy(client_debug_string, "none");
1088         else
1089                 strcpy(kernel_debug_string, "none");
1090
1091 out:
1092 gossip_debug(GOSSIP_UTILS_DEBUG, "%s: string:%s:\n", __func__, debug_string);
1093
1094         return;
1095
1096 }
1097
1098 void do_k_string(void *k_mask, int index)
1099 {
1100         __u64 *mask = (__u64 *) k_mask;
1101
1102         if (keyword_is_amalgam((char *) s_kmod_keyword_mask_map[index].keyword))
1103                 goto out;
1104
1105         if (*mask & s_kmod_keyword_mask_map[index].mask_val) {
1106                 if ((strlen(kernel_debug_string) +
1107                      strlen(s_kmod_keyword_mask_map[index].keyword))
1108                         < ORANGEFS_MAX_DEBUG_STRING_LEN - 1) {
1109                                 strcat(kernel_debug_string,
1110                                        s_kmod_keyword_mask_map[index].keyword);
1111                                 strcat(kernel_debug_string, ",");
1112                         } else {
1113                                 gossip_err("%s: overflow!\n", __func__);
1114                                 strcpy(kernel_debug_string, ORANGEFS_ALL);
1115                                 goto out;
1116                         }
1117         }
1118
1119 out:
1120
1121         return;
1122 }
1123
1124 void do_c_string(void *c_mask, int index)
1125 {
1126         struct client_debug_mask *mask = (struct client_debug_mask *) c_mask;
1127
1128         if (keyword_is_amalgam(cdm_array[index].keyword))
1129                 goto out;
1130
1131         if ((mask->mask1 & cdm_array[index].mask1) ||
1132             (mask->mask2 & cdm_array[index].mask2)) {
1133                 if ((strlen(client_debug_string) +
1134                      strlen(cdm_array[index].keyword) + 1)
1135                         < ORANGEFS_MAX_DEBUG_STRING_LEN - 2) {
1136                                 strcat(client_debug_string,
1137                                        cdm_array[index].keyword);
1138                                 strcat(client_debug_string, ",");
1139                         } else {
1140                                 gossip_err("%s: overflow!\n", __func__);
1141                                 strcpy(client_debug_string, ORANGEFS_ALL);
1142                                 goto out;
1143                         }
1144         }
1145 out:
1146         return;
1147 }
1148
1149 int keyword_is_amalgam(char *keyword)
1150 {
1151         int rc = 0;
1152
1153         if ((!strcmp(keyword, ORANGEFS_ALL)) || (!strcmp(keyword, ORANGEFS_VERBOSE)))
1154                 rc = 1;
1155
1156         return rc;
1157 }
1158
1159 /*
1160  * kernel = type 0
1161  * client = type 1
1162  *
1163  * return 1 if we found an amalgam.
1164  */
1165 int check_amalgam_keyword(void *mask, int type)
1166 {
1167         __u64 *k_mask;
1168         struct client_debug_mask *c_mask;
1169         int k_all_index = num_kmod_keyword_mask_map - 1;
1170         int rc = 0;
1171
1172         if (type) {
1173                 c_mask = (struct client_debug_mask *) mask;
1174
1175                 if ((c_mask->mask1 == cdm_array[client_all_index].mask1) &&
1176                     (c_mask->mask2 == cdm_array[client_all_index].mask2)) {
1177                         strcpy(client_debug_string, ORANGEFS_ALL);
1178                         rc = 1;
1179                         goto out;
1180                 }
1181
1182                 if ((c_mask->mask1 == cdm_array[client_verbose_index].mask1) &&
1183                     (c_mask->mask2 == cdm_array[client_verbose_index].mask2)) {
1184                         strcpy(client_debug_string, ORANGEFS_VERBOSE);
1185                         rc = 1;
1186                         goto out;
1187                 }
1188
1189         } else {
1190                 k_mask = (__u64 *) mask;
1191
1192                 if (*k_mask >= s_kmod_keyword_mask_map[k_all_index].mask_val) {
1193                         strcpy(kernel_debug_string, ORANGEFS_ALL);
1194                         rc = 1;
1195                         goto out;
1196                 }
1197         }
1198
1199 out:
1200
1201         return rc;
1202 }
1203
1204 /*
1205  * kernel = type 0
1206  * client = type 1
1207  */
1208 void debug_string_to_mask(char *debug_string, void *mask, int type)
1209 {
1210         char *unchecked_keyword;
1211         int i;
1212         char *strsep_fodder = kstrdup(debug_string, GFP_KERNEL);
1213         char *original_pointer;
1214         int element_count = 0;
1215         struct client_debug_mask *c_mask;
1216         __u64 *k_mask;
1217
1218         gossip_debug(GOSSIP_UTILS_DEBUG, "%s: start\n", __func__);
1219
1220         if (type) {
1221                 c_mask = (struct client_debug_mask *)mask;
1222                 element_count = cdm_element_count;
1223         } else {
1224                 k_mask = (__u64 *)mask;
1225                 *k_mask = 0;
1226                 element_count = num_kmod_keyword_mask_map;
1227         }
1228
1229         original_pointer = strsep_fodder;
1230         while ((unchecked_keyword = strsep(&strsep_fodder, ",")))
1231                 if (strlen(unchecked_keyword)) {
1232                         for (i = 0; i < element_count; i++)
1233                                 if (type)
1234                                         do_c_mask(i,
1235                                                   unchecked_keyword,
1236                                                   &c_mask);
1237                                 else
1238                                         do_k_mask(i,
1239                                                   unchecked_keyword,
1240                                                   &k_mask);
1241                 }
1242
1243         kfree(original_pointer);
1244 }
1245
1246 void do_c_mask(int i,
1247                char *unchecked_keyword,
1248                struct client_debug_mask **sane_mask)
1249 {
1250
1251         if (!strcmp(cdm_array[i].keyword, unchecked_keyword)) {
1252                 (**sane_mask).mask1 = (**sane_mask).mask1 | cdm_array[i].mask1;
1253                 (**sane_mask).mask2 = (**sane_mask).mask2 | cdm_array[i].mask2;
1254         }
1255 }
1256
1257 void do_k_mask(int i, char *unchecked_keyword, __u64 **sane_mask)
1258 {
1259
1260         if (!strcmp(s_kmod_keyword_mask_map[i].keyword, unchecked_keyword))
1261                 **sane_mask = (**sane_mask) |
1262                                 s_kmod_keyword_mask_map[i].mask_val;
1263 }