]> git.karo-electronics.de Git - karo-tx-linux.git/blob - fs/reiserfs/stree.c
arm: imx6: defconfig: update tx6 defconfigs
[karo-tx-linux.git] / fs / reiserfs / stree.c
1 /*
2  *  Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3  */
4
5 /*
6  *  Written by Anatoly P. Pinchuk pap@namesys.botik.ru
7  *  Programm System Institute
8  *  Pereslavl-Zalessky Russia
9  */
10
11 /*
12  *  This file contains functions dealing with S+tree
13  *
14  * B_IS_IN_TREE
15  * copy_item_head
16  * comp_short_keys
17  * comp_keys
18  * comp_short_le_keys
19  * le_key2cpu_key
20  * comp_le_keys
21  * bin_search
22  * get_lkey
23  * get_rkey
24  * key_in_buffer
25  * decrement_bcount
26  * reiserfs_check_path
27  * pathrelse_and_restore
28  * pathrelse
29  * search_by_key_reada
30  * search_by_key
31  * search_for_position_by_key
32  * comp_items
33  * prepare_for_direct_item
34  * prepare_for_direntry_item
35  * prepare_for_delete_or_cut
36  * calc_deleted_bytes_number
37  * init_tb_struct
38  * padd_item
39  * reiserfs_delete_item
40  * reiserfs_delete_solid_item
41  * reiserfs_delete_object
42  * maybe_indirect_to_direct
43  * indirect_to_direct_roll_back
44  * reiserfs_cut_from_item
45  * truncate_directory
46  * reiserfs_do_truncate
47  * reiserfs_paste_into_item
48  * reiserfs_insert_item
49  */
50
51 #include <linux/time.h>
52 #include <linux/string.h>
53 #include <linux/pagemap.h>
54 #include "reiserfs.h"
55 #include <linux/buffer_head.h>
56 #include <linux/quotaops.h>
57
58 /* Does the buffer contain a disk block which is in the tree. */
59 inline int B_IS_IN_TREE(const struct buffer_head *bh)
60 {
61
62         RFALSE(B_LEVEL(bh) > MAX_HEIGHT,
63                "PAP-1010: block (%b) has too big level (%z)", bh, bh);
64
65         return (B_LEVEL(bh) != FREE_LEVEL);
66 }
67
68 //
69 // to gets item head in le form
70 //
71 inline void copy_item_head(struct item_head *to,
72                            const struct item_head *from)
73 {
74         memcpy(to, from, IH_SIZE);
75 }
76
77 /* k1 is pointer to on-disk structure which is stored in little-endian
78    form. k2 is pointer to cpu variable. For key of items of the same
79    object this returns 0.
80    Returns: -1 if key1 < key2
81    0 if key1 == key2
82    1 if key1 > key2 */
83 inline int comp_short_keys(const struct reiserfs_key *le_key,
84                            const struct cpu_key *cpu_key)
85 {
86         __u32 n;
87         n = le32_to_cpu(le_key->k_dir_id);
88         if (n < cpu_key->on_disk_key.k_dir_id)
89                 return -1;
90         if (n > cpu_key->on_disk_key.k_dir_id)
91                 return 1;
92         n = le32_to_cpu(le_key->k_objectid);
93         if (n < cpu_key->on_disk_key.k_objectid)
94                 return -1;
95         if (n > cpu_key->on_disk_key.k_objectid)
96                 return 1;
97         return 0;
98 }
99
100 /* k1 is pointer to on-disk structure which is stored in little-endian
101    form. k2 is pointer to cpu variable.
102    Compare keys using all 4 key fields.
103    Returns: -1 if key1 < key2 0
104    if key1 = key2 1 if key1 > key2 */
105 static inline int comp_keys(const struct reiserfs_key *le_key,
106                             const struct cpu_key *cpu_key)
107 {
108         int retval;
109
110         retval = comp_short_keys(le_key, cpu_key);
111         if (retval)
112                 return retval;
113         if (le_key_k_offset(le_key_version(le_key), le_key) <
114             cpu_key_k_offset(cpu_key))
115                 return -1;
116         if (le_key_k_offset(le_key_version(le_key), le_key) >
117             cpu_key_k_offset(cpu_key))
118                 return 1;
119
120         if (cpu_key->key_length == 3)
121                 return 0;
122
123         /* this part is needed only when tail conversion is in progress */
124         if (le_key_k_type(le_key_version(le_key), le_key) <
125             cpu_key_k_type(cpu_key))
126                 return -1;
127
128         if (le_key_k_type(le_key_version(le_key), le_key) >
129             cpu_key_k_type(cpu_key))
130                 return 1;
131
132         return 0;
133 }
134
135 inline int comp_short_le_keys(const struct reiserfs_key *key1,
136                               const struct reiserfs_key *key2)
137 {
138         __u32 *k1_u32, *k2_u32;
139         int key_length = REISERFS_SHORT_KEY_LEN;
140
141         k1_u32 = (__u32 *) key1;
142         k2_u32 = (__u32 *) key2;
143         for (; key_length--; ++k1_u32, ++k2_u32) {
144                 if (le32_to_cpu(*k1_u32) < le32_to_cpu(*k2_u32))
145                         return -1;
146                 if (le32_to_cpu(*k1_u32) > le32_to_cpu(*k2_u32))
147                         return 1;
148         }
149         return 0;
150 }
151
152 inline void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from)
153 {
154         int version;
155         to->on_disk_key.k_dir_id = le32_to_cpu(from->k_dir_id);
156         to->on_disk_key.k_objectid = le32_to_cpu(from->k_objectid);
157
158         // find out version of the key
159         version = le_key_version(from);
160         to->version = version;
161         to->on_disk_key.k_offset = le_key_k_offset(version, from);
162         to->on_disk_key.k_type = le_key_k_type(version, from);
163 }
164
165 // this does not say which one is bigger, it only returns 1 if keys
166 // are not equal, 0 otherwise
167 inline int comp_le_keys(const struct reiserfs_key *k1,
168                         const struct reiserfs_key *k2)
169 {
170         return memcmp(k1, k2, sizeof(struct reiserfs_key));
171 }
172
173 /**************************************************************************
174  *  Binary search toolkit function                                        *
175  *  Search for an item in the array by the item key                       *
176  *  Returns:    1 if found,  0 if not found;                              *
177  *        *pos = number of the searched element if found, else the        *
178  *        number of the first element that is larger than key.            *
179  **************************************************************************/
180 /* For those not familiar with binary search: lbound is the leftmost item that it
181  could be, rbound the rightmost item that it could be.  We examine the item
182  halfway between lbound and rbound, and that tells us either that we can increase
183  lbound, or decrease rbound, or that we have found it, or if lbound <= rbound that
184  there are no possible items, and we have not found it. With each examination we
185  cut the number of possible items it could be by one more than half rounded down,
186  or we find it. */
187 static inline int bin_search(const void *key,   /* Key to search for. */
188                              const void *base,  /* First item in the array. */
189                              int num,   /* Number of items in the array. */
190                              int width, /* Item size in the array.
191                                            searched. Lest the reader be
192                                            confused, note that this is crafted
193                                            as a general function, and when it
194                                            is applied specifically to the array
195                                            of item headers in a node, width
196                                            is actually the item header size not
197                                            the item size. */
198                              int *pos /* Number of the searched for element. */
199     )
200 {
201         int rbound, lbound, j;
202
203         for (j = ((rbound = num - 1) + (lbound = 0)) / 2;
204              lbound <= rbound; j = (rbound + lbound) / 2)
205                 switch (comp_keys
206                         ((struct reiserfs_key *)((char *)base + j * width),
207                          (struct cpu_key *)key)) {
208                 case -1:
209                         lbound = j + 1;
210                         continue;
211                 case 1:
212                         rbound = j - 1;
213                         continue;
214                 case 0:
215                         *pos = j;
216                         return ITEM_FOUND;      /* Key found in the array.  */
217                 }
218
219         /* bin_search did not find given key, it returns position of key,
220            that is minimal and greater than the given one. */
221         *pos = lbound;
222         return ITEM_NOT_FOUND;
223 }
224
225
226 /* Minimal possible key. It is never in the tree. */
227 const struct reiserfs_key MIN_KEY = { 0, 0, {{0, 0},} };
228
229 /* Maximal possible key. It is never in the tree. */
230 static const struct reiserfs_key MAX_KEY = {
231         __constant_cpu_to_le32(0xffffffff),
232         __constant_cpu_to_le32(0xffffffff),
233         {{__constant_cpu_to_le32(0xffffffff),
234           __constant_cpu_to_le32(0xffffffff)},}
235 };
236
237 /* Get delimiting key of the buffer by looking for it in the buffers in the path, starting from the bottom
238    of the path, and going upwards.  We must check the path's validity at each step.  If the key is not in
239    the path, there is no delimiting key in the tree (buffer is first or last buffer in tree), and in this
240    case we return a special key, either MIN_KEY or MAX_KEY. */
241 static inline const struct reiserfs_key *get_lkey(const struct treepath *chk_path,
242                                                   const struct super_block *sb)
243 {
244         int position, path_offset = chk_path->path_length;
245         struct buffer_head *parent;
246
247         RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET,
248                "PAP-5010: invalid offset in the path");
249
250         /* While not higher in path than first element. */
251         while (path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
252
253                 RFALSE(!buffer_uptodate
254                        (PATH_OFFSET_PBUFFER(chk_path, path_offset)),
255                        "PAP-5020: parent is not uptodate");
256
257                 /* Parent at the path is not in the tree now. */
258                 if (!B_IS_IN_TREE
259                     (parent =
260                      PATH_OFFSET_PBUFFER(chk_path, path_offset)))
261                         return &MAX_KEY;
262                 /* Check whether position in the parent is correct. */
263                 if ((position =
264                      PATH_OFFSET_POSITION(chk_path,
265                                           path_offset)) >
266                     B_NR_ITEMS(parent))
267                         return &MAX_KEY;
268                 /* Check whether parent at the path really points to the child. */
269                 if (B_N_CHILD_NUM(parent, position) !=
270                     PATH_OFFSET_PBUFFER(chk_path,
271                                         path_offset + 1)->b_blocknr)
272                         return &MAX_KEY;
273                 /* Return delimiting key if position in the parent is not equal to zero. */
274                 if (position)
275                         return B_N_PDELIM_KEY(parent, position - 1);
276         }
277         /* Return MIN_KEY if we are in the root of the buffer tree. */
278         if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)->
279             b_blocknr == SB_ROOT_BLOCK(sb))
280                 return &MIN_KEY;
281         return &MAX_KEY;
282 }
283
284 /* Get delimiting key of the buffer at the path and its right neighbor. */
285 inline const struct reiserfs_key *get_rkey(const struct treepath *chk_path,
286                                            const struct super_block *sb)
287 {
288         int position, path_offset = chk_path->path_length;
289         struct buffer_head *parent;
290
291         RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET,
292                "PAP-5030: invalid offset in the path");
293
294         while (path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
295
296                 RFALSE(!buffer_uptodate
297                        (PATH_OFFSET_PBUFFER(chk_path, path_offset)),
298                        "PAP-5040: parent is not uptodate");
299
300                 /* Parent at the path is not in the tree now. */
301                 if (!B_IS_IN_TREE
302                     (parent =
303                      PATH_OFFSET_PBUFFER(chk_path, path_offset)))
304                         return &MIN_KEY;
305                 /* Check whether position in the parent is correct. */
306                 if ((position =
307                      PATH_OFFSET_POSITION(chk_path,
308                                           path_offset)) >
309                     B_NR_ITEMS(parent))
310                         return &MIN_KEY;
311                 /* Check whether parent at the path really points to the child. */
312                 if (B_N_CHILD_NUM(parent, position) !=
313                     PATH_OFFSET_PBUFFER(chk_path,
314                                         path_offset + 1)->b_blocknr)
315                         return &MIN_KEY;
316                 /* Return delimiting key if position in the parent is not the last one. */
317                 if (position != B_NR_ITEMS(parent))
318                         return B_N_PDELIM_KEY(parent, position);
319         }
320         /* Return MAX_KEY if we are in the root of the buffer tree. */
321         if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)->
322             b_blocknr == SB_ROOT_BLOCK(sb))
323                 return &MAX_KEY;
324         return &MIN_KEY;
325 }
326
327 /* Check whether a key is contained in the tree rooted from a buffer at a path. */
328 /* This works by looking at the left and right delimiting keys for the buffer in the last path_element in
329    the path.  These delimiting keys are stored at least one level above that buffer in the tree. If the
330    buffer is the first or last node in the tree order then one of the delimiting keys may be absent, and in
331    this case get_lkey and get_rkey return a special key which is MIN_KEY or MAX_KEY. */
332 static inline int key_in_buffer(struct treepath *chk_path,      /* Path which should be checked.  */
333                                 const struct cpu_key *key,      /* Key which should be checked.   */
334                                 struct super_block *sb
335     )
336 {
337
338         RFALSE(!key || chk_path->path_length < FIRST_PATH_ELEMENT_OFFSET
339                || chk_path->path_length > MAX_HEIGHT,
340                "PAP-5050: pointer to the key(%p) is NULL or invalid path length(%d)",
341                key, chk_path->path_length);
342         RFALSE(!PATH_PLAST_BUFFER(chk_path)->b_bdev,
343                "PAP-5060: device must not be NODEV");
344
345         if (comp_keys(get_lkey(chk_path, sb), key) == 1)
346                 /* left delimiting key is bigger, that the key we look for */
347                 return 0;
348         /*  if ( comp_keys(key, get_rkey(chk_path, sb)) != -1 ) */
349         if (comp_keys(get_rkey(chk_path, sb), key) != 1)
350                 /* key must be less than right delimitiing key */
351                 return 0;
352         return 1;
353 }
354
355 int reiserfs_check_path(struct treepath *p)
356 {
357         RFALSE(p->path_length != ILLEGAL_PATH_ELEMENT_OFFSET,
358                "path not properly relsed");
359         return 0;
360 }
361
362 /* Drop the reference to each buffer in a path and restore
363  * dirty bits clean when preparing the buffer for the log.
364  * This version should only be called from fix_nodes() */
365 void pathrelse_and_restore(struct super_block *sb,
366                            struct treepath *search_path)
367 {
368         int path_offset = search_path->path_length;
369
370         RFALSE(path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
371                "clm-4000: invalid path offset");
372
373         while (path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) {
374                 struct buffer_head *bh;
375                 bh = PATH_OFFSET_PBUFFER(search_path, path_offset--);
376                 reiserfs_restore_prepared_buffer(sb, bh);
377                 brelse(bh);
378         }
379         search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
380 }
381
382 /* Drop the reference to each buffer in a path */
383 void pathrelse(struct treepath *search_path)
384 {
385         int path_offset = search_path->path_length;
386
387         RFALSE(path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
388                "PAP-5090: invalid path offset");
389
390         while (path_offset > ILLEGAL_PATH_ELEMENT_OFFSET)
391                 brelse(PATH_OFFSET_PBUFFER(search_path, path_offset--));
392
393         search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
394 }
395
396 static int is_leaf(char *buf, int blocksize, struct buffer_head *bh)
397 {
398         struct block_head *blkh;
399         struct item_head *ih;
400         int used_space;
401         int prev_location;
402         int i;
403         int nr;
404
405         blkh = (struct block_head *)buf;
406         if (blkh_level(blkh) != DISK_LEAF_NODE_LEVEL) {
407                 reiserfs_warning(NULL, "reiserfs-5080",
408                                  "this should be caught earlier");
409                 return 0;
410         }
411
412         nr = blkh_nr_item(blkh);
413         if (nr < 1 || nr > ((blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN))) {
414                 /* item number is too big or too small */
415                 reiserfs_warning(NULL, "reiserfs-5081",
416                                  "nr_item seems wrong: %z", bh);
417                 return 0;
418         }
419         ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1;
420         used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location(ih));
421         if (used_space != blocksize - blkh_free_space(blkh)) {
422                 /* free space does not match to calculated amount of use space */
423                 reiserfs_warning(NULL, "reiserfs-5082",
424                                  "free space seems wrong: %z", bh);
425                 return 0;
426         }
427         // FIXME: it is_leaf will hit performance too much - we may have
428         // return 1 here
429
430         /* check tables of item heads */
431         ih = (struct item_head *)(buf + BLKH_SIZE);
432         prev_location = blocksize;
433         for (i = 0; i < nr; i++, ih++) {
434                 if (le_ih_k_type(ih) == TYPE_ANY) {
435                         reiserfs_warning(NULL, "reiserfs-5083",
436                                          "wrong item type for item %h",
437                                          ih);
438                         return 0;
439                 }
440                 if (ih_location(ih) >= blocksize
441                     || ih_location(ih) < IH_SIZE * nr) {
442                         reiserfs_warning(NULL, "reiserfs-5084",
443                                          "item location seems wrong: %h",
444                                          ih);
445                         return 0;
446                 }
447                 if (ih_item_len(ih) < 1
448                     || ih_item_len(ih) > MAX_ITEM_LEN(blocksize)) {
449                         reiserfs_warning(NULL, "reiserfs-5085",
450                                          "item length seems wrong: %h",
451                                          ih);
452                         return 0;
453                 }
454                 if (prev_location - ih_location(ih) != ih_item_len(ih)) {
455                         reiserfs_warning(NULL, "reiserfs-5086",
456                                          "item location seems wrong "
457                                          "(second one): %h", ih);
458                         return 0;
459                 }
460                 prev_location = ih_location(ih);
461         }
462
463         // one may imagine much more checks
464         return 1;
465 }
466
467 /* returns 1 if buf looks like an internal node, 0 otherwise */
468 static int is_internal(char *buf, int blocksize, struct buffer_head *bh)
469 {
470         struct block_head *blkh;
471         int nr;
472         int used_space;
473
474         blkh = (struct block_head *)buf;
475         nr = blkh_level(blkh);
476         if (nr <= DISK_LEAF_NODE_LEVEL || nr > MAX_HEIGHT) {
477                 /* this level is not possible for internal nodes */
478                 reiserfs_warning(NULL, "reiserfs-5087",
479                                  "this should be caught earlier");
480                 return 0;
481         }
482
483         nr = blkh_nr_item(blkh);
484         if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) {
485                 /* for internal which is not root we might check min number of keys */
486                 reiserfs_warning(NULL, "reiserfs-5088",
487                                  "number of key seems wrong: %z", bh);
488                 return 0;
489         }
490
491         used_space = BLKH_SIZE + KEY_SIZE * nr + DC_SIZE * (nr + 1);
492         if (used_space != blocksize - blkh_free_space(blkh)) {
493                 reiserfs_warning(NULL, "reiserfs-5089",
494                                  "free space seems wrong: %z", bh);
495                 return 0;
496         }
497         // one may imagine much more checks
498         return 1;
499 }
500
501 // make sure that bh contains formatted node of reiserfs tree of
502 // 'level'-th level
503 static int is_tree_node(struct buffer_head *bh, int level)
504 {
505         if (B_LEVEL(bh) != level) {
506                 reiserfs_warning(NULL, "reiserfs-5090", "node level %d does "
507                                  "not match to the expected one %d",
508                                  B_LEVEL(bh), level);
509                 return 0;
510         }
511         if (level == DISK_LEAF_NODE_LEVEL)
512                 return is_leaf(bh->b_data, bh->b_size, bh);
513
514         return is_internal(bh->b_data, bh->b_size, bh);
515 }
516
517 #define SEARCH_BY_KEY_READA 16
518
519 /*
520  * The function is NOT SCHEDULE-SAFE!
521  * It might unlock the write lock if we needed to wait for a block
522  * to be read. Note that in this case it won't recover the lock to avoid
523  * high contention resulting from too much lock requests, especially
524  * the caller (search_by_key) will perform other schedule-unsafe
525  * operations just after calling this function.
526  *
527  * @return depth of lock to be restored after read completes
528  */
529 static int search_by_key_reada(struct super_block *s,
530                                 struct buffer_head **bh,
531                                 b_blocknr_t *b, int num)
532 {
533         int i, j;
534         int depth = -1;
535
536         for (i = 0; i < num; i++) {
537                 bh[i] = sb_getblk(s, b[i]);
538         }
539         /*
540          * We are going to read some blocks on which we
541          * have a reference. It's safe, though we might be
542          * reading blocks concurrently changed if we release
543          * the lock. But it's still fine because we check later
544          * if the tree changed
545          */
546         for (j = 0; j < i; j++) {
547                 /*
548                  * note, this needs attention if we are getting rid of the BKL
549                  * you have to make sure the prepared bit isn't set on this buffer
550                  */
551                 if (!buffer_uptodate(bh[j])) {
552                         if (depth == -1)
553                                 depth = reiserfs_write_unlock_nested(s);
554                         ll_rw_block(READA, 1, bh + j);
555                 }
556                 brelse(bh[j]);
557         }
558         return depth;
559 }
560
561 /**************************************************************************
562  * Algorithm   SearchByKey                                                *
563  *             look for item in the Disk S+Tree by its key                *
564  * Input:  sb   -  super block                                            *
565  *         key  - pointer to the key to search                            *
566  * Output: ITEM_FOUND, ITEM_NOT_FOUND or IO_ERROR                         *
567  *         search_path - path from the root to the needed leaf            *
568  **************************************************************************/
569
570 /* This function fills up the path from the root to the leaf as it
571    descends the tree looking for the key.  It uses reiserfs_bread to
572    try to find buffers in the cache given their block number.  If it
573    does not find them in the cache it reads them from disk.  For each
574    node search_by_key finds using reiserfs_bread it then uses
575    bin_search to look through that node.  bin_search will find the
576    position of the block_number of the next node if it is looking
577    through an internal node.  If it is looking through a leaf node
578    bin_search will find the position of the item which has key either
579    equal to given key, or which is the maximal key less than the given
580    key.  search_by_key returns a path that must be checked for the
581    correctness of the top of the path but need not be checked for the
582    correctness of the bottom of the path */
583 /* The function is NOT SCHEDULE-SAFE! */
584 int search_by_key(struct super_block *sb, const struct cpu_key *key,    /* Key to search. */
585                   struct treepath *search_path,/* This structure was
586                                                    allocated and initialized
587                                                    by the calling
588                                                    function. It is filled up
589                                                    by this function.  */
590                   int stop_level        /* How far down the tree to search. To
591                                            stop at leaf level - set to
592                                            DISK_LEAF_NODE_LEVEL */
593     )
594 {
595         b_blocknr_t block_number;
596         int expected_level;
597         struct buffer_head *bh;
598         struct path_element *last_element;
599         int node_level, retval;
600         int right_neighbor_of_leaf_node;
601         int fs_gen;
602         struct buffer_head *reada_bh[SEARCH_BY_KEY_READA];
603         b_blocknr_t reada_blocks[SEARCH_BY_KEY_READA];
604         int reada_count = 0;
605
606 #ifdef CONFIG_REISERFS_CHECK
607         int repeat_counter = 0;
608 #endif
609
610         PROC_INFO_INC(sb, search_by_key);
611
612         /* As we add each node to a path we increase its count.  This means that
613            we must be careful to release all nodes in a path before we either
614            discard the path struct or re-use the path struct, as we do here. */
615
616         pathrelse(search_path);
617
618         right_neighbor_of_leaf_node = 0;
619
620         /* With each iteration of this loop we search through the items in the
621            current node, and calculate the next current node(next path element)
622            for the next iteration of this loop.. */
623         block_number = SB_ROOT_BLOCK(sb);
624         expected_level = -1;
625         while (1) {
626
627 #ifdef CONFIG_REISERFS_CHECK
628                 if (!(++repeat_counter % 50000))
629                         reiserfs_warning(sb, "PAP-5100",
630                                          "%s: there were %d iterations of "
631                                          "while loop looking for key %K",
632                                          current->comm, repeat_counter,
633                                          key);
634 #endif
635
636                 /* prep path to have another element added to it. */
637                 last_element =
638                     PATH_OFFSET_PELEMENT(search_path,
639                                          ++search_path->path_length);
640                 fs_gen = get_generation(sb);
641
642                 /* Read the next tree node, and set the last element in the path to
643                    have a pointer to it. */
644                 if ((bh = last_element->pe_buffer =
645                      sb_getblk(sb, block_number))) {
646
647                         /*
648                          * We'll need to drop the lock if we encounter any
649                          * buffers that need to be read. If all of them are
650                          * already up to date, we don't need to drop the lock.
651                          */
652                         int depth = -1;
653
654                         if (!buffer_uptodate(bh) && reada_count > 1)
655                                 depth = search_by_key_reada(sb, reada_bh,
656                                                     reada_blocks, reada_count);
657
658                         if (!buffer_uptodate(bh) && depth == -1)
659                                 depth = reiserfs_write_unlock_nested(sb);
660
661                         ll_rw_block(READ, 1, &bh);
662                         wait_on_buffer(bh);
663
664                         if (depth != -1)
665                                 reiserfs_write_lock_nested(sb, depth);
666                         if (!buffer_uptodate(bh))
667                                 goto io_error;
668                 } else {
669                       io_error:
670                         search_path->path_length--;
671                         pathrelse(search_path);
672                         return IO_ERROR;
673                 }
674                 reada_count = 0;
675                 if (expected_level == -1)
676                         expected_level = SB_TREE_HEIGHT(sb);
677                 expected_level--;
678
679                 /* It is possible that schedule occurred. We must check whether the key
680                    to search is still in the tree rooted from the current buffer. If
681                    not then repeat search from the root. */
682                 if (fs_changed(fs_gen, sb) &&
683                     (!B_IS_IN_TREE(bh) ||
684                      B_LEVEL(bh) != expected_level ||
685                      !key_in_buffer(search_path, key, sb))) {
686                         PROC_INFO_INC(sb, search_by_key_fs_changed);
687                         PROC_INFO_INC(sb, search_by_key_restarted);
688                         PROC_INFO_INC(sb,
689                                       sbk_restarted[expected_level - 1]);
690                         pathrelse(search_path);
691
692                         /* Get the root block number so that we can repeat the search
693                            starting from the root. */
694                         block_number = SB_ROOT_BLOCK(sb);
695                         expected_level = -1;
696                         right_neighbor_of_leaf_node = 0;
697
698                         /* repeat search from the root */
699                         continue;
700                 }
701
702                 /* only check that the key is in the buffer if key is not
703                    equal to the MAX_KEY. Latter case is only possible in
704                    "finish_unfinished()" processing during mount. */
705                 RFALSE(comp_keys(&MAX_KEY, key) &&
706                        !key_in_buffer(search_path, key, sb),
707                        "PAP-5130: key is not in the buffer");
708 #ifdef CONFIG_REISERFS_CHECK
709                 if (REISERFS_SB(sb)->cur_tb) {
710                         print_cur_tb("5140");
711                         reiserfs_panic(sb, "PAP-5140",
712                                        "schedule occurred in do_balance!");
713                 }
714 #endif
715
716                 // make sure, that the node contents look like a node of
717                 // certain level
718                 if (!is_tree_node(bh, expected_level)) {
719                         reiserfs_error(sb, "vs-5150",
720                                        "invalid format found in block %ld. "
721                                        "Fsck?", bh->b_blocknr);
722                         pathrelse(search_path);
723                         return IO_ERROR;
724                 }
725
726                 /* ok, we have acquired next formatted node in the tree */
727                 node_level = B_LEVEL(bh);
728
729                 PROC_INFO_BH_STAT(sb, bh, node_level - 1);
730
731                 RFALSE(node_level < stop_level,
732                        "vs-5152: tree level (%d) is less than stop level (%d)",
733                        node_level, stop_level);
734
735                 retval = bin_search(key, B_N_PITEM_HEAD(bh, 0),
736                                       B_NR_ITEMS(bh),
737                                       (node_level ==
738                                        DISK_LEAF_NODE_LEVEL) ? IH_SIZE :
739                                       KEY_SIZE,
740                                       &(last_element->pe_position));
741                 if (node_level == stop_level) {
742                         return retval;
743                 }
744
745                 /* we are not in the stop level */
746                 if (retval == ITEM_FOUND)
747                         /* item has been found, so we choose the pointer which is to the right of the found one */
748                         last_element->pe_position++;
749
750                 /* if item was not found we choose the position which is to
751                    the left of the found item. This requires no code,
752                    bin_search did it already. */
753
754                 /* So we have chosen a position in the current node which is
755                    an internal node.  Now we calculate child block number by
756                    position in the node. */
757                 block_number =
758                     B_N_CHILD_NUM(bh, last_element->pe_position);
759
760                 /* if we are going to read leaf nodes, try for read ahead as well */
761                 if ((search_path->reada & PATH_READA) &&
762                     node_level == DISK_LEAF_NODE_LEVEL + 1) {
763                         int pos = last_element->pe_position;
764                         int limit = B_NR_ITEMS(bh);
765                         struct reiserfs_key *le_key;
766
767                         if (search_path->reada & PATH_READA_BACK)
768                                 limit = 0;
769                         while (reada_count < SEARCH_BY_KEY_READA) {
770                                 if (pos == limit)
771                                         break;
772                                 reada_blocks[reada_count++] =
773                                     B_N_CHILD_NUM(bh, pos);
774                                 if (search_path->reada & PATH_READA_BACK)
775                                         pos--;
776                                 else
777                                         pos++;
778
779                                 /*
780                                  * check to make sure we're in the same object
781                                  */
782                                 le_key = B_N_PDELIM_KEY(bh, pos);
783                                 if (le32_to_cpu(le_key->k_objectid) !=
784                                     key->on_disk_key.k_objectid) {
785                                         break;
786                                 }
787                         }
788                 }
789         }
790 }
791
792 /* Form the path to an item and position in this item which contains
793    file byte defined by key. If there is no such item
794    corresponding to the key, we point the path to the item with
795    maximal key less than key, and *pos_in_item is set to one
796    past the last entry/byte in the item.  If searching for entry in a
797    directory item, and it is not found, *pos_in_item is set to one
798    entry more than the entry with maximal key which is less than the
799    sought key.
800
801    Note that if there is no entry in this same node which is one more,
802    then we point to an imaginary entry.  for direct items, the
803    position is in units of bytes, for indirect items the position is
804    in units of blocknr entries, for directory items the position is in
805    units of directory entries.  */
806
807 /* The function is NOT SCHEDULE-SAFE! */
808 int search_for_position_by_key(struct super_block *sb,  /* Pointer to the super block.          */
809                                const struct cpu_key *p_cpu_key, /* Key to search (cpu variable)         */
810                                struct treepath *search_path     /* Filled up by this function.          */
811     )
812 {
813         struct item_head *p_le_ih;      /* pointer to on-disk structure */
814         int blk_size;
815         loff_t item_offset, offset;
816         struct reiserfs_dir_entry de;
817         int retval;
818
819         /* If searching for directory entry. */
820         if (is_direntry_cpu_key(p_cpu_key))
821                 return search_by_entry_key(sb, p_cpu_key, search_path,
822                                            &de);
823
824         /* If not searching for directory entry. */
825
826         /* If item is found. */
827         retval = search_item(sb, p_cpu_key, search_path);
828         if (retval == IO_ERROR)
829                 return retval;
830         if (retval == ITEM_FOUND) {
831
832                 RFALSE(!ih_item_len
833                        (B_N_PITEM_HEAD
834                         (PATH_PLAST_BUFFER(search_path),
835                          PATH_LAST_POSITION(search_path))),
836                        "PAP-5165: item length equals zero");
837
838                 pos_in_item(search_path) = 0;
839                 return POSITION_FOUND;
840         }
841
842         RFALSE(!PATH_LAST_POSITION(search_path),
843                "PAP-5170: position equals zero");
844
845         /* Item is not found. Set path to the previous item. */
846         p_le_ih =
847             B_N_PITEM_HEAD(PATH_PLAST_BUFFER(search_path),
848                            --PATH_LAST_POSITION(search_path));
849         blk_size = sb->s_blocksize;
850
851         if (comp_short_keys(&(p_le_ih->ih_key), p_cpu_key)) {
852                 return FILE_NOT_FOUND;
853         }
854         // FIXME: quite ugly this far
855
856         item_offset = le_ih_k_offset(p_le_ih);
857         offset = cpu_key_k_offset(p_cpu_key);
858
859         /* Needed byte is contained in the item pointed to by the path. */
860         if (item_offset <= offset &&
861             item_offset + op_bytes_number(p_le_ih, blk_size) > offset) {
862                 pos_in_item(search_path) = offset - item_offset;
863                 if (is_indirect_le_ih(p_le_ih)) {
864                         pos_in_item(search_path) /= blk_size;
865                 }
866                 return POSITION_FOUND;
867         }
868
869         /* Needed byte is not contained in the item pointed to by the
870            path. Set pos_in_item out of the item. */
871         if (is_indirect_le_ih(p_le_ih))
872                 pos_in_item(search_path) =
873                     ih_item_len(p_le_ih) / UNFM_P_SIZE;
874         else
875                 pos_in_item(search_path) = ih_item_len(p_le_ih);
876
877         return POSITION_NOT_FOUND;
878 }
879
880 /* Compare given item and item pointed to by the path. */
881 int comp_items(const struct item_head *stored_ih, const struct treepath *path)
882 {
883         struct buffer_head *bh = PATH_PLAST_BUFFER(path);
884         struct item_head *ih;
885
886         /* Last buffer at the path is not in the tree. */
887         if (!B_IS_IN_TREE(bh))
888                 return 1;
889
890         /* Last path position is invalid. */
891         if (PATH_LAST_POSITION(path) >= B_NR_ITEMS(bh))
892                 return 1;
893
894         /* we need only to know, whether it is the same item */
895         ih = get_ih(path);
896         return memcmp(stored_ih, ih, IH_SIZE);
897 }
898
899 /* unformatted nodes are not logged anymore, ever.  This is safe
900 ** now
901 */
902 #define held_by_others(bh) (atomic_read(&(bh)->b_count) > 1)
903
904 // block can not be forgotten as it is in I/O or held by someone
905 #define block_in_use(bh) (buffer_locked(bh) || (held_by_others(bh)))
906
907 // prepare for delete or cut of direct item
908 static inline int prepare_for_direct_item(struct treepath *path,
909                                           struct item_head *le_ih,
910                                           struct inode *inode,
911                                           loff_t new_file_length, int *cut_size)
912 {
913         loff_t round_len;
914
915         if (new_file_length == max_reiserfs_offset(inode)) {
916                 /* item has to be deleted */
917                 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
918                 return M_DELETE;
919         }
920         // new file gets truncated
921         if (get_inode_item_key_version(inode) == KEY_FORMAT_3_6) {
922                 //
923                 round_len = ROUND_UP(new_file_length);
924                 /* this was new_file_length < le_ih ... */
925                 if (round_len < le_ih_k_offset(le_ih)) {
926                         *cut_size = -(IH_SIZE + ih_item_len(le_ih));
927                         return M_DELETE;        /* Delete this item. */
928                 }
929                 /* Calculate first position and size for cutting from item. */
930                 pos_in_item(path) = round_len - (le_ih_k_offset(le_ih) - 1);
931                 *cut_size = -(ih_item_len(le_ih) - pos_in_item(path));
932
933                 return M_CUT;   /* Cut from this item. */
934         }
935
936         // old file: items may have any length
937
938         if (new_file_length < le_ih_k_offset(le_ih)) {
939                 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
940                 return M_DELETE;        /* Delete this item. */
941         }
942         /* Calculate first position and size for cutting from item. */
943         *cut_size = -(ih_item_len(le_ih) -
944                       (pos_in_item(path) =
945                        new_file_length + 1 - le_ih_k_offset(le_ih)));
946         return M_CUT;           /* Cut from this item. */
947 }
948
949 static inline int prepare_for_direntry_item(struct treepath *path,
950                                             struct item_head *le_ih,
951                                             struct inode *inode,
952                                             loff_t new_file_length,
953                                             int *cut_size)
954 {
955         if (le_ih_k_offset(le_ih) == DOT_OFFSET &&
956             new_file_length == max_reiserfs_offset(inode)) {
957                 RFALSE(ih_entry_count(le_ih) != 2,
958                        "PAP-5220: incorrect empty directory item (%h)", le_ih);
959                 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
960                 return M_DELETE;        /* Delete the directory item containing "." and ".." entry. */
961         }
962
963         if (ih_entry_count(le_ih) == 1) {
964                 /* Delete the directory item such as there is one record only
965                    in this item */
966                 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
967                 return M_DELETE;
968         }
969
970         /* Cut one record from the directory item. */
971         *cut_size =
972             -(DEH_SIZE +
973               entry_length(get_last_bh(path), le_ih, pos_in_item(path)));
974         return M_CUT;
975 }
976
977 #define JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD (2 * JOURNAL_PER_BALANCE_CNT + 1)
978
979 /*  If the path points to a directory or direct item, calculate mode and the size cut, for balance.
980     If the path points to an indirect item, remove some number of its unformatted nodes.
981     In case of file truncate calculate whether this item must be deleted/truncated or last
982     unformatted node of this item will be converted to a direct item.
983     This function returns a determination of what balance mode the calling function should employ. */
984 static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, struct inode *inode, struct treepath *path, const struct cpu_key *item_key, int *removed, /* Number of unformatted nodes which were removed
985                                                                                                                                                                                    from end of the file. */
986                                       int *cut_size, unsigned long long new_file_length /* MAX_KEY_OFFSET in case of delete. */
987     )
988 {
989         struct super_block *sb = inode->i_sb;
990         struct item_head *p_le_ih = PATH_PITEM_HEAD(path);
991         struct buffer_head *bh = PATH_PLAST_BUFFER(path);
992
993         BUG_ON(!th->t_trans_id);
994
995         /* Stat_data item. */
996         if (is_statdata_le_ih(p_le_ih)) {
997
998                 RFALSE(new_file_length != max_reiserfs_offset(inode),
999                        "PAP-5210: mode must be M_DELETE");
1000
1001                 *cut_size = -(IH_SIZE + ih_item_len(p_le_ih));
1002                 return M_DELETE;
1003         }
1004
1005         /* Directory item. */
1006         if (is_direntry_le_ih(p_le_ih))
1007                 return prepare_for_direntry_item(path, p_le_ih, inode,
1008                                                  new_file_length,
1009                                                  cut_size);
1010
1011         /* Direct item. */
1012         if (is_direct_le_ih(p_le_ih))
1013                 return prepare_for_direct_item(path, p_le_ih, inode,
1014                                                new_file_length, cut_size);
1015
1016         /* Case of an indirect item. */
1017         {
1018             int blk_size = sb->s_blocksize;
1019             struct item_head s_ih;
1020             int need_re_search;
1021             int delete = 0;
1022             int result = M_CUT;
1023             int pos = 0;
1024
1025             if ( new_file_length == max_reiserfs_offset (inode) ) {
1026                 /* prepare_for_delete_or_cut() is called by
1027                  * reiserfs_delete_item() */
1028                 new_file_length = 0;
1029                 delete = 1;
1030             }
1031
1032             do {
1033                 need_re_search = 0;
1034                 *cut_size = 0;
1035                 bh = PATH_PLAST_BUFFER(path);
1036                 copy_item_head(&s_ih, PATH_PITEM_HEAD(path));
1037                 pos = I_UNFM_NUM(&s_ih);
1038
1039                 while (le_ih_k_offset (&s_ih) + (pos - 1) * blk_size > new_file_length) {
1040                     __le32 *unfm;
1041                     __u32 block;
1042
1043                     /* Each unformatted block deletion may involve one additional
1044                      * bitmap block into the transaction, thereby the initial
1045                      * journal space reservation might not be enough. */
1046                     if (!delete && (*cut_size) != 0 &&
1047                         reiserfs_transaction_free_space(th) < JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD)
1048                         break;
1049
1050                     unfm = (__le32 *)B_I_PITEM(bh, &s_ih) + pos - 1;
1051                     block = get_block_num(unfm, 0);
1052
1053                     if (block != 0) {
1054                         reiserfs_prepare_for_journal(sb, bh, 1);
1055                         put_block_num(unfm, 0, 0);
1056                         journal_mark_dirty(th, sb, bh);
1057                         reiserfs_free_block(th, inode, block, 1);
1058                     }
1059
1060                     reiserfs_cond_resched(sb);
1061
1062                     if (item_moved (&s_ih, path))  {
1063                         need_re_search = 1;
1064                         break;
1065                     }
1066
1067                     pos --;
1068                     (*removed)++;
1069                     (*cut_size) -= UNFM_P_SIZE;
1070
1071                     if (pos == 0) {
1072                         (*cut_size) -= IH_SIZE;
1073                         result = M_DELETE;
1074                         break;
1075                     }
1076                 }
1077                 /* a trick.  If the buffer has been logged, this will do nothing.  If
1078                 ** we've broken the loop without logging it, it will restore the
1079                 ** buffer */
1080                 reiserfs_restore_prepared_buffer(sb, bh);
1081             } while (need_re_search &&
1082                      search_for_position_by_key(sb, item_key, path) == POSITION_FOUND);
1083             pos_in_item(path) = pos * UNFM_P_SIZE;
1084
1085             if (*cut_size == 0) {
1086                 /* Nothing were cut. maybe convert last unformatted node to the
1087                  * direct item? */
1088                 result = M_CONVERT;
1089             }
1090             return result;
1091         }
1092 }
1093
1094 /* Calculate number of bytes which will be deleted or cut during balance */
1095 static int calc_deleted_bytes_number(struct tree_balance *tb, char mode)
1096 {
1097         int del_size;
1098         struct item_head *p_le_ih = PATH_PITEM_HEAD(tb->tb_path);
1099
1100         if (is_statdata_le_ih(p_le_ih))
1101                 return 0;
1102
1103         del_size =
1104             (mode ==
1105              M_DELETE) ? ih_item_len(p_le_ih) : -tb->insert_size[0];
1106         if (is_direntry_le_ih(p_le_ih)) {
1107                 /* return EMPTY_DIR_SIZE; We delete emty directoris only.
1108                  * we can't use EMPTY_DIR_SIZE, as old format dirs have a different
1109                  * empty size.  ick. FIXME, is this right? */
1110                 return del_size;
1111         }
1112
1113         if (is_indirect_le_ih(p_le_ih))
1114                 del_size = (del_size / UNFM_P_SIZE) *
1115                                 (PATH_PLAST_BUFFER(tb->tb_path)->b_size);
1116         return del_size;
1117 }
1118
1119 static void init_tb_struct(struct reiserfs_transaction_handle *th,
1120                            struct tree_balance *tb,
1121                            struct super_block *sb,
1122                            struct treepath *path, int size)
1123 {
1124
1125         BUG_ON(!th->t_trans_id);
1126
1127         memset(tb, '\0', sizeof(struct tree_balance));
1128         tb->transaction_handle = th;
1129         tb->tb_sb = sb;
1130         tb->tb_path = path;
1131         PATH_OFFSET_PBUFFER(path, ILLEGAL_PATH_ELEMENT_OFFSET) = NULL;
1132         PATH_OFFSET_POSITION(path, ILLEGAL_PATH_ELEMENT_OFFSET) = 0;
1133         tb->insert_size[0] = size;
1134 }
1135
1136 void padd_item(char *item, int total_length, int length)
1137 {
1138         int i;
1139
1140         for (i = total_length; i > length;)
1141                 item[--i] = 0;
1142 }
1143
1144 #ifdef REISERQUOTA_DEBUG
1145 char key2type(struct reiserfs_key *ih)
1146 {
1147         if (is_direntry_le_key(2, ih))
1148                 return 'd';
1149         if (is_direct_le_key(2, ih))
1150                 return 'D';
1151         if (is_indirect_le_key(2, ih))
1152                 return 'i';
1153         if (is_statdata_le_key(2, ih))
1154                 return 's';
1155         return 'u';
1156 }
1157
1158 char head2type(struct item_head *ih)
1159 {
1160         if (is_direntry_le_ih(ih))
1161                 return 'd';
1162         if (is_direct_le_ih(ih))
1163                 return 'D';
1164         if (is_indirect_le_ih(ih))
1165                 return 'i';
1166         if (is_statdata_le_ih(ih))
1167                 return 's';
1168         return 'u';
1169 }
1170 #endif
1171
1172 /* Delete object item.
1173  * th       - active transaction handle
1174  * path     - path to the deleted item
1175  * item_key - key to search for the deleted item
1176  * indode   - used for updating i_blocks and quotas
1177  * un_bh    - NULL or unformatted node pointer
1178  */
1179 int reiserfs_delete_item(struct reiserfs_transaction_handle *th,
1180                          struct treepath *path, const struct cpu_key *item_key,
1181                          struct inode *inode, struct buffer_head *un_bh)
1182 {
1183         struct super_block *sb = inode->i_sb;
1184         struct tree_balance s_del_balance;
1185         struct item_head s_ih;
1186         struct item_head *q_ih;
1187         int quota_cut_bytes;
1188         int ret_value, del_size, removed;
1189         int depth;
1190
1191 #ifdef CONFIG_REISERFS_CHECK
1192         char mode;
1193         int iter = 0;
1194 #endif
1195
1196         BUG_ON(!th->t_trans_id);
1197
1198         init_tb_struct(th, &s_del_balance, sb, path,
1199                        0 /*size is unknown */ );
1200
1201         while (1) {
1202                 removed = 0;
1203
1204 #ifdef CONFIG_REISERFS_CHECK
1205                 iter++;
1206                 mode =
1207 #endif
1208                     prepare_for_delete_or_cut(th, inode, path,
1209                                               item_key, &removed,
1210                                               &del_size,
1211                                               max_reiserfs_offset(inode));
1212
1213                 RFALSE(mode != M_DELETE, "PAP-5320: mode must be M_DELETE");
1214
1215                 copy_item_head(&s_ih, PATH_PITEM_HEAD(path));
1216                 s_del_balance.insert_size[0] = del_size;
1217
1218                 ret_value = fix_nodes(M_DELETE, &s_del_balance, NULL, NULL);
1219                 if (ret_value != REPEAT_SEARCH)
1220                         break;
1221
1222                 PROC_INFO_INC(sb, delete_item_restarted);
1223
1224                 // file system changed, repeat search
1225                 ret_value =
1226                     search_for_position_by_key(sb, item_key, path);
1227                 if (ret_value == IO_ERROR)
1228                         break;
1229                 if (ret_value == FILE_NOT_FOUND) {
1230                         reiserfs_warning(sb, "vs-5340",
1231                                          "no items of the file %K found",
1232                                          item_key);
1233                         break;
1234                 }
1235         }                       /* while (1) */
1236
1237         if (ret_value != CARRY_ON) {
1238                 unfix_nodes(&s_del_balance);
1239                 return 0;
1240         }
1241         // reiserfs_delete_item returns item length when success
1242         ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE);
1243         q_ih = get_ih(path);
1244         quota_cut_bytes = ih_item_len(q_ih);
1245
1246         /* hack so the quota code doesn't have to guess if the file
1247          ** has a tail.  On tail insert, we allocate quota for 1 unformatted node.
1248          ** We test the offset because the tail might have been
1249          ** split into multiple items, and we only want to decrement for
1250          ** the unfm node once
1251          */
1252         if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(q_ih)) {
1253                 if ((le_ih_k_offset(q_ih) & (sb->s_blocksize - 1)) == 1) {
1254                         quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE;
1255                 } else {
1256                         quota_cut_bytes = 0;
1257                 }
1258         }
1259
1260         if (un_bh) {
1261                 int off;
1262                 char *data;
1263
1264                 /* We are in direct2indirect conversion, so move tail contents
1265                    to the unformatted node */
1266                 /* note, we do the copy before preparing the buffer because we
1267                  ** don't care about the contents of the unformatted node yet.
1268                  ** the only thing we really care about is the direct item's data
1269                  ** is in the unformatted node.
1270                  **
1271                  ** Otherwise, we would have to call reiserfs_prepare_for_journal on
1272                  ** the unformatted node, which might schedule, meaning we'd have to
1273                  ** loop all the way back up to the start of the while loop.
1274                  **
1275                  ** The unformatted node must be dirtied later on.  We can't be
1276                  ** sure here if the entire tail has been deleted yet.
1277                  **
1278                  ** un_bh is from the page cache (all unformatted nodes are
1279                  ** from the page cache) and might be a highmem page.  So, we
1280                  ** can't use un_bh->b_data.
1281                  ** -clm
1282                  */
1283
1284                 data = kmap_atomic(un_bh->b_page);
1285                 off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_CACHE_SIZE - 1));
1286                 memcpy(data + off,
1287                        B_I_PITEM(PATH_PLAST_BUFFER(path), &s_ih),
1288                        ret_value);
1289                 kunmap_atomic(data);
1290         }
1291         /* Perform balancing after all resources have been collected at once. */
1292         do_balance(&s_del_balance, NULL, NULL, M_DELETE);
1293
1294 #ifdef REISERQUOTA_DEBUG
1295         reiserfs_debug(sb, REISERFS_DEBUG_CODE,
1296                        "reiserquota delete_item(): freeing %u, id=%u type=%c",
1297                        quota_cut_bytes, inode->i_uid, head2type(&s_ih));
1298 #endif
1299         depth = reiserfs_write_unlock_nested(inode->i_sb);
1300         dquot_free_space_nodirty(inode, quota_cut_bytes);
1301         reiserfs_write_lock_nested(inode->i_sb, depth);
1302
1303         /* Return deleted body length */
1304         return ret_value;
1305 }
1306
1307 /* Summary Of Mechanisms For Handling Collisions Between Processes:
1308
1309  deletion of the body of the object is performed by iput(), with the
1310  result that if multiple processes are operating on a file, the
1311  deletion of the body of the file is deferred until the last process
1312  that has an open inode performs its iput().
1313
1314  writes and truncates are protected from collisions by use of
1315  semaphores.
1316
1317  creates, linking, and mknod are protected from collisions with other
1318  processes by making the reiserfs_add_entry() the last step in the
1319  creation, and then rolling back all changes if there was a collision.
1320  - Hans
1321 */
1322
1323 /* this deletes item which never gets split */
1324 void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th,
1325                                 struct inode *inode, struct reiserfs_key *key)
1326 {
1327         struct super_block *sb = th->t_super;
1328         struct tree_balance tb;
1329         INITIALIZE_PATH(path);
1330         int item_len = 0;
1331         int tb_init = 0;
1332         struct cpu_key cpu_key;
1333         int retval;
1334         int quota_cut_bytes = 0;
1335
1336         BUG_ON(!th->t_trans_id);
1337
1338         le_key2cpu_key(&cpu_key, key);
1339
1340         while (1) {
1341                 retval = search_item(th->t_super, &cpu_key, &path);
1342                 if (retval == IO_ERROR) {
1343                         reiserfs_error(th->t_super, "vs-5350",
1344                                        "i/o failure occurred trying "
1345                                        "to delete %K", &cpu_key);
1346                         break;
1347                 }
1348                 if (retval != ITEM_FOUND) {
1349                         pathrelse(&path);
1350                         // No need for a warning, if there is just no free space to insert '..' item into the newly-created subdir
1351                         if (!
1352                             ((unsigned long long)
1353                              GET_HASH_VALUE(le_key_k_offset
1354                                             (le_key_version(key), key)) == 0
1355                              && (unsigned long long)
1356                              GET_GENERATION_NUMBER(le_key_k_offset
1357                                                    (le_key_version(key),
1358                                                     key)) == 1))
1359                                 reiserfs_warning(th->t_super, "vs-5355",
1360                                                  "%k not found", key);
1361                         break;
1362                 }
1363                 if (!tb_init) {
1364                         tb_init = 1;
1365                         item_len = ih_item_len(PATH_PITEM_HEAD(&path));
1366                         init_tb_struct(th, &tb, th->t_super, &path,
1367                                        -(IH_SIZE + item_len));
1368                 }
1369                 quota_cut_bytes = ih_item_len(PATH_PITEM_HEAD(&path));
1370
1371                 retval = fix_nodes(M_DELETE, &tb, NULL, NULL);
1372                 if (retval == REPEAT_SEARCH) {
1373                         PROC_INFO_INC(th->t_super, delete_solid_item_restarted);
1374                         continue;
1375                 }
1376
1377                 if (retval == CARRY_ON) {
1378                         do_balance(&tb, NULL, NULL, M_DELETE);
1379                         if (inode) {    /* Should we count quota for item? (we don't count quotas for save-links) */
1380                                 int depth;
1381 #ifdef REISERQUOTA_DEBUG
1382                                 reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
1383                                                "reiserquota delete_solid_item(): freeing %u id=%u type=%c",
1384                                                quota_cut_bytes, inode->i_uid,
1385                                                key2type(key));
1386 #endif
1387                                 depth = reiserfs_write_unlock_nested(sb);
1388                                 dquot_free_space_nodirty(inode,
1389                                                          quota_cut_bytes);
1390                                 reiserfs_write_lock_nested(sb, depth);
1391                         }
1392                         break;
1393                 }
1394                 // IO_ERROR, NO_DISK_SPACE, etc
1395                 reiserfs_warning(th->t_super, "vs-5360",
1396                                  "could not delete %K due to fix_nodes failure",
1397                                  &cpu_key);
1398                 unfix_nodes(&tb);
1399                 break;
1400         }
1401
1402         reiserfs_check_path(&path);
1403 }
1404
1405 int reiserfs_delete_object(struct reiserfs_transaction_handle *th,
1406                            struct inode *inode)
1407 {
1408         int err;
1409         inode->i_size = 0;
1410         BUG_ON(!th->t_trans_id);
1411
1412         /* for directory this deletes item containing "." and ".." */
1413         err =
1414             reiserfs_do_truncate(th, inode, NULL, 0 /*no timestamp updates */ );
1415         if (err)
1416                 return err;
1417
1418 #if defined( USE_INODE_GENERATION_COUNTER )
1419         if (!old_format_only(th->t_super)) {
1420                 __le32 *inode_generation;
1421
1422                 inode_generation =
1423                     &REISERFS_SB(th->t_super)->s_rs->s_inode_generation;
1424                 le32_add_cpu(inode_generation, 1);
1425         }
1426 /* USE_INODE_GENERATION_COUNTER */
1427 #endif
1428         reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode));
1429
1430         return err;
1431 }
1432
1433 static void unmap_buffers(struct page *page, loff_t pos)
1434 {
1435         struct buffer_head *bh;
1436         struct buffer_head *head;
1437         struct buffer_head *next;
1438         unsigned long tail_index;
1439         unsigned long cur_index;
1440
1441         if (page) {
1442                 if (page_has_buffers(page)) {
1443                         tail_index = pos & (PAGE_CACHE_SIZE - 1);
1444                         cur_index = 0;
1445                         head = page_buffers(page);
1446                         bh = head;
1447                         do {
1448                                 next = bh->b_this_page;
1449
1450                                 /* we want to unmap the buffers that contain the tail, and
1451                                  ** all the buffers after it (since the tail must be at the
1452                                  ** end of the file).  We don't want to unmap file data
1453                                  ** before the tail, since it might be dirty and waiting to
1454                                  ** reach disk
1455                                  */
1456                                 cur_index += bh->b_size;
1457                                 if (cur_index > tail_index) {
1458                                         reiserfs_unmap_buffer(bh);
1459                                 }
1460                                 bh = next;
1461                         } while (bh != head);
1462                 }
1463         }
1464 }
1465
1466 static int maybe_indirect_to_direct(struct reiserfs_transaction_handle *th,
1467                                     struct inode *inode,
1468                                     struct page *page,
1469                                     struct treepath *path,
1470                                     const struct cpu_key *item_key,
1471                                     loff_t new_file_size, char *mode)
1472 {
1473         struct super_block *sb = inode->i_sb;
1474         int block_size = sb->s_blocksize;
1475         int cut_bytes;
1476         BUG_ON(!th->t_trans_id);
1477         BUG_ON(new_file_size != inode->i_size);
1478
1479         /* the page being sent in could be NULL if there was an i/o error
1480          ** reading in the last block.  The user will hit problems trying to
1481          ** read the file, but for now we just skip the indirect2direct
1482          */
1483         if (atomic_read(&inode->i_count) > 1 ||
1484             !tail_has_to_be_packed(inode) ||
1485             !page || (REISERFS_I(inode)->i_flags & i_nopack_mask)) {
1486                 /* leave tail in an unformatted node */
1487                 *mode = M_SKIP_BALANCING;
1488                 cut_bytes =
1489                     block_size - (new_file_size & (block_size - 1));
1490                 pathrelse(path);
1491                 return cut_bytes;
1492         }
1493         /* Perform the conversion to a direct_item. */
1494         /* return indirect_to_direct(inode, path, item_key,
1495                                   new_file_size, mode); */
1496         return indirect2direct(th, inode, page, path, item_key,
1497                                new_file_size, mode);
1498 }
1499
1500 /* we did indirect_to_direct conversion. And we have inserted direct
1501    item successesfully, but there were no disk space to cut unfm
1502    pointer being converted. Therefore we have to delete inserted
1503    direct item(s) */
1504 static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th,
1505                                          struct inode *inode, struct treepath *path)
1506 {
1507         struct cpu_key tail_key;
1508         int tail_len;
1509         int removed;
1510         BUG_ON(!th->t_trans_id);
1511
1512         make_cpu_key(&tail_key, inode, inode->i_size + 1, TYPE_DIRECT, 4);      // !!!!
1513         tail_key.key_length = 4;
1514
1515         tail_len =
1516             (cpu_key_k_offset(&tail_key) & (inode->i_sb->s_blocksize - 1)) - 1;
1517         while (tail_len) {
1518                 /* look for the last byte of the tail */
1519                 if (search_for_position_by_key(inode->i_sb, &tail_key, path) ==
1520                     POSITION_NOT_FOUND)
1521                         reiserfs_panic(inode->i_sb, "vs-5615",
1522                                        "found invalid item");
1523                 RFALSE(path->pos_in_item !=
1524                        ih_item_len(PATH_PITEM_HEAD(path)) - 1,
1525                        "vs-5616: appended bytes found");
1526                 PATH_LAST_POSITION(path)--;
1527
1528                 removed =
1529                     reiserfs_delete_item(th, path, &tail_key, inode,
1530                                          NULL /*unbh not needed */ );
1531                 RFALSE(removed <= 0
1532                        || removed > tail_len,
1533                        "vs-5617: there was tail %d bytes, removed item length %d bytes",
1534                        tail_len, removed);
1535                 tail_len -= removed;
1536                 set_cpu_key_k_offset(&tail_key,
1537                                      cpu_key_k_offset(&tail_key) - removed);
1538         }
1539         reiserfs_warning(inode->i_sb, "reiserfs-5091", "indirect_to_direct "
1540                          "conversion has been rolled back due to "
1541                          "lack of disk space");
1542         //mark_file_without_tail (inode);
1543         mark_inode_dirty(inode);
1544 }
1545
1546 /* (Truncate or cut entry) or delete object item. Returns < 0 on failure */
1547 int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
1548                            struct treepath *path,
1549                            struct cpu_key *item_key,
1550                            struct inode *inode,
1551                            struct page *page, loff_t new_file_size)
1552 {
1553         struct super_block *sb = inode->i_sb;
1554         /* Every function which is going to call do_balance must first
1555            create a tree_balance structure.  Then it must fill up this
1556            structure by using the init_tb_struct and fix_nodes functions.
1557            After that we can make tree balancing. */
1558         struct tree_balance s_cut_balance;
1559         struct item_head *p_le_ih;
1560         int cut_size = 0,       /* Amount to be cut. */
1561             ret_value = CARRY_ON, removed = 0,  /* Number of the removed unformatted nodes. */
1562             is_inode_locked = 0;
1563         char mode;              /* Mode of the balance. */
1564         int retval2 = -1;
1565         int quota_cut_bytes;
1566         loff_t tail_pos = 0;
1567         int depth;
1568
1569         BUG_ON(!th->t_trans_id);
1570
1571         init_tb_struct(th, &s_cut_balance, inode->i_sb, path,
1572                        cut_size);
1573
1574         /* Repeat this loop until we either cut the item without needing
1575            to balance, or we fix_nodes without schedule occurring */
1576         while (1) {
1577                 /* Determine the balance mode, position of the first byte to
1578                    be cut, and size to be cut.  In case of the indirect item
1579                    free unformatted nodes which are pointed to by the cut
1580                    pointers. */
1581
1582                 mode =
1583                     prepare_for_delete_or_cut(th, inode, path,
1584                                               item_key, &removed,
1585                                               &cut_size, new_file_size);
1586                 if (mode == M_CONVERT) {
1587                         /* convert last unformatted node to direct item or leave
1588                            tail in the unformatted node */
1589                         RFALSE(ret_value != CARRY_ON,
1590                                "PAP-5570: can not convert twice");
1591
1592                         ret_value =
1593                             maybe_indirect_to_direct(th, inode, page,
1594                                                      path, item_key,
1595                                                      new_file_size, &mode);
1596                         if (mode == M_SKIP_BALANCING)
1597                                 /* tail has been left in the unformatted node */
1598                                 return ret_value;
1599
1600                         is_inode_locked = 1;
1601
1602                         /* removing of last unformatted node will change value we
1603                            have to return to truncate. Save it */
1604                         retval2 = ret_value;
1605                         /*retval2 = sb->s_blocksize - (new_file_size & (sb->s_blocksize - 1)); */
1606
1607                         /* So, we have performed the first part of the conversion:
1608                            inserting the new direct item.  Now we are removing the
1609                            last unformatted node pointer. Set key to search for
1610                            it. */
1611                         set_cpu_key_k_type(item_key, TYPE_INDIRECT);
1612                         item_key->key_length = 4;
1613                         new_file_size -=
1614                             (new_file_size & (sb->s_blocksize - 1));
1615                         tail_pos = new_file_size;
1616                         set_cpu_key_k_offset(item_key, new_file_size + 1);
1617                         if (search_for_position_by_key
1618                             (sb, item_key,
1619                              path) == POSITION_NOT_FOUND) {
1620                                 print_block(PATH_PLAST_BUFFER(path), 3,
1621                                             PATH_LAST_POSITION(path) - 1,
1622                                             PATH_LAST_POSITION(path) + 1);
1623                                 reiserfs_panic(sb, "PAP-5580", "item to "
1624                                                "convert does not exist (%K)",
1625                                                item_key);
1626                         }
1627                         continue;
1628                 }
1629                 if (cut_size == 0) {
1630                         pathrelse(path);
1631                         return 0;
1632                 }
1633
1634                 s_cut_balance.insert_size[0] = cut_size;
1635
1636                 ret_value = fix_nodes(mode, &s_cut_balance, NULL, NULL);
1637                 if (ret_value != REPEAT_SEARCH)
1638                         break;
1639
1640                 PROC_INFO_INC(sb, cut_from_item_restarted);
1641
1642                 ret_value =
1643                     search_for_position_by_key(sb, item_key, path);
1644                 if (ret_value == POSITION_FOUND)
1645                         continue;
1646
1647                 reiserfs_warning(sb, "PAP-5610", "item %K not found",
1648                                  item_key);
1649                 unfix_nodes(&s_cut_balance);
1650                 return (ret_value == IO_ERROR) ? -EIO : -ENOENT;
1651         }                       /* while */
1652
1653         // check fix_nodes results (IO_ERROR or NO_DISK_SPACE)
1654         if (ret_value != CARRY_ON) {
1655                 if (is_inode_locked) {
1656                         // FIXME: this seems to be not needed: we are always able
1657                         // to cut item
1658                         indirect_to_direct_roll_back(th, inode, path);
1659                 }
1660                 if (ret_value == NO_DISK_SPACE)
1661                         reiserfs_warning(sb, "reiserfs-5092",
1662                                          "NO_DISK_SPACE");
1663                 unfix_nodes(&s_cut_balance);
1664                 return -EIO;
1665         }
1666
1667         /* go ahead and perform balancing */
1668
1669         RFALSE(mode == M_PASTE || mode == M_INSERT, "invalid mode");
1670
1671         /* Calculate number of bytes that need to be cut from the item. */
1672         quota_cut_bytes =
1673             (mode ==
1674              M_DELETE) ? ih_item_len(get_ih(path)) : -s_cut_balance.
1675             insert_size[0];
1676         if (retval2 == -1)
1677                 ret_value = calc_deleted_bytes_number(&s_cut_balance, mode);
1678         else
1679                 ret_value = retval2;
1680
1681         /* For direct items, we only change the quota when deleting the last
1682          ** item.
1683          */
1684         p_le_ih = PATH_PITEM_HEAD(s_cut_balance.tb_path);
1685         if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(p_le_ih)) {
1686                 if (mode == M_DELETE &&
1687                     (le_ih_k_offset(p_le_ih) & (sb->s_blocksize - 1)) ==
1688                     1) {
1689                         // FIXME: this is to keep 3.5 happy
1690                         REISERFS_I(inode)->i_first_direct_byte = U32_MAX;
1691                         quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE;
1692                 } else {
1693                         quota_cut_bytes = 0;
1694                 }
1695         }
1696 #ifdef CONFIG_REISERFS_CHECK
1697         if (is_inode_locked) {
1698                 struct item_head *le_ih =
1699                     PATH_PITEM_HEAD(s_cut_balance.tb_path);
1700                 /* we are going to complete indirect2direct conversion. Make
1701                    sure, that we exactly remove last unformatted node pointer
1702                    of the item */
1703                 if (!is_indirect_le_ih(le_ih))
1704                         reiserfs_panic(sb, "vs-5652",
1705                                        "item must be indirect %h", le_ih);
1706
1707                 if (mode == M_DELETE && ih_item_len(le_ih) != UNFM_P_SIZE)
1708                         reiserfs_panic(sb, "vs-5653", "completing "
1709                                        "indirect2direct conversion indirect "
1710                                        "item %h being deleted must be of "
1711                                        "4 byte long", le_ih);
1712
1713                 if (mode == M_CUT
1714                     && s_cut_balance.insert_size[0] != -UNFM_P_SIZE) {
1715                         reiserfs_panic(sb, "vs-5654", "can not complete "
1716                                        "indirect2direct conversion of %h "
1717                                        "(CUT, insert_size==%d)",
1718                                        le_ih, s_cut_balance.insert_size[0]);
1719                 }
1720                 /* it would be useful to make sure, that right neighboring
1721                    item is direct item of this file */
1722         }
1723 #endif
1724
1725         do_balance(&s_cut_balance, NULL, NULL, mode);
1726         if (is_inode_locked) {
1727                 /* we've done an indirect->direct conversion.  when the data block
1728                  ** was freed, it was removed from the list of blocks that must
1729                  ** be flushed before the transaction commits, make sure to
1730                  ** unmap and invalidate it
1731                  */
1732                 unmap_buffers(page, tail_pos);
1733                 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
1734         }
1735 #ifdef REISERQUOTA_DEBUG
1736         reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
1737                        "reiserquota cut_from_item(): freeing %u id=%u type=%c",
1738                        quota_cut_bytes, inode->i_uid, '?');
1739 #endif
1740         depth = reiserfs_write_unlock_nested(sb);
1741         dquot_free_space_nodirty(inode, quota_cut_bytes);
1742         reiserfs_write_lock_nested(sb, depth);
1743         return ret_value;
1744 }
1745
1746 static void truncate_directory(struct reiserfs_transaction_handle *th,
1747                                struct inode *inode)
1748 {
1749         BUG_ON(!th->t_trans_id);
1750         if (inode->i_nlink)
1751                 reiserfs_error(inode->i_sb, "vs-5655", "link count != 0");
1752
1753         set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), DOT_OFFSET);
1754         set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_DIRENTRY);
1755         reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode));
1756         reiserfs_update_sd(th, inode);
1757         set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), SD_OFFSET);
1758         set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_STAT_DATA);
1759 }
1760
1761 /* Truncate file to the new size. Note, this must be called with a transaction
1762    already started */
1763 int reiserfs_do_truncate(struct reiserfs_transaction_handle *th,
1764                           struct inode *inode,  /* ->i_size contains new size */
1765                          struct page *page,     /* up to date for last block */
1766                          int update_timestamps  /* when it is called by
1767                                                    file_release to convert
1768                                                    the tail - no timestamps
1769                                                    should be updated */
1770     )
1771 {
1772         INITIALIZE_PATH(s_search_path); /* Path to the current object item. */
1773         struct item_head *p_le_ih;      /* Pointer to an item header. */
1774         struct cpu_key s_item_key;      /* Key to search for a previous file item. */
1775         loff_t file_size,       /* Old file size. */
1776          new_file_size; /* New file size. */
1777         int deleted;            /* Number of deleted or truncated bytes. */
1778         int retval;
1779         int err = 0;
1780
1781         BUG_ON(!th->t_trans_id);
1782         if (!
1783             (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
1784              || S_ISLNK(inode->i_mode)))
1785                 return 0;
1786
1787         if (S_ISDIR(inode->i_mode)) {
1788                 // deletion of directory - no need to update timestamps
1789                 truncate_directory(th, inode);
1790                 return 0;
1791         }
1792
1793         /* Get new file size. */
1794         new_file_size = inode->i_size;
1795
1796         // FIXME: note, that key type is unimportant here
1797         make_cpu_key(&s_item_key, inode, max_reiserfs_offset(inode),
1798                      TYPE_DIRECT, 3);
1799
1800         retval =
1801             search_for_position_by_key(inode->i_sb, &s_item_key,
1802                                        &s_search_path);
1803         if (retval == IO_ERROR) {
1804                 reiserfs_error(inode->i_sb, "vs-5657",
1805                                "i/o failure occurred trying to truncate %K",
1806                                &s_item_key);
1807                 err = -EIO;
1808                 goto out;
1809         }
1810         if (retval == POSITION_FOUND || retval == FILE_NOT_FOUND) {
1811                 reiserfs_error(inode->i_sb, "PAP-5660",
1812                                "wrong result %d of search for %K", retval,
1813                                &s_item_key);
1814
1815                 err = -EIO;
1816                 goto out;
1817         }
1818
1819         s_search_path.pos_in_item--;
1820
1821         /* Get real file size (total length of all file items) */
1822         p_le_ih = PATH_PITEM_HEAD(&s_search_path);
1823         if (is_statdata_le_ih(p_le_ih))
1824                 file_size = 0;
1825         else {
1826                 loff_t offset = le_ih_k_offset(p_le_ih);
1827                 int bytes =
1828                     op_bytes_number(p_le_ih, inode->i_sb->s_blocksize);
1829
1830                 /* this may mismatch with real file size: if last direct item
1831                    had no padding zeros and last unformatted node had no free
1832                    space, this file would have this file size */
1833                 file_size = offset + bytes - 1;
1834         }
1835         /*
1836          * are we doing a full truncate or delete, if so
1837          * kick in the reada code
1838          */
1839         if (new_file_size == 0)
1840                 s_search_path.reada = PATH_READA | PATH_READA_BACK;
1841
1842         if (file_size == 0 || file_size < new_file_size) {
1843                 goto update_and_out;
1844         }
1845
1846         /* Update key to search for the last file item. */
1847         set_cpu_key_k_offset(&s_item_key, file_size);
1848
1849         do {
1850                 /* Cut or delete file item. */
1851                 deleted =
1852                     reiserfs_cut_from_item(th, &s_search_path, &s_item_key,
1853                                            inode, page, new_file_size);
1854                 if (deleted < 0) {
1855                         reiserfs_warning(inode->i_sb, "vs-5665",
1856                                          "reiserfs_cut_from_item failed");
1857                         reiserfs_check_path(&s_search_path);
1858                         return 0;
1859                 }
1860
1861                 RFALSE(deleted > file_size,
1862                        "PAP-5670: reiserfs_cut_from_item: too many bytes deleted: deleted %d, file_size %lu, item_key %K",
1863                        deleted, file_size, &s_item_key);
1864
1865                 /* Change key to search the last file item. */
1866                 file_size -= deleted;
1867
1868                 set_cpu_key_k_offset(&s_item_key, file_size);
1869
1870                 /* While there are bytes to truncate and previous file item is presented in the tree. */
1871
1872                 /*
1873                  ** This loop could take a really long time, and could log
1874                  ** many more blocks than a transaction can hold.  So, we do a polite
1875                  ** journal end here, and if the transaction needs ending, we make
1876                  ** sure the file is consistent before ending the current trans
1877                  ** and starting a new one
1878                  */
1879                 if (journal_transaction_should_end(th, 0) ||
1880                     reiserfs_transaction_free_space(th) <= JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) {
1881                         int orig_len_alloc = th->t_blocks_allocated;
1882                         pathrelse(&s_search_path);
1883
1884                         if (update_timestamps) {
1885                                 inode->i_mtime = CURRENT_TIME_SEC;
1886                                 inode->i_ctime = CURRENT_TIME_SEC;
1887                         }
1888                         reiserfs_update_sd(th, inode);
1889
1890                         err = journal_end(th, inode->i_sb, orig_len_alloc);
1891                         if (err)
1892                                 goto out;
1893                         err = journal_begin(th, inode->i_sb,
1894                                             JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD + JOURNAL_PER_BALANCE_CNT * 4) ;
1895                         if (err)
1896                                 goto out;
1897                         reiserfs_update_inode_transaction(inode);
1898                 }
1899         } while (file_size > ROUND_UP(new_file_size) &&
1900                  search_for_position_by_key(inode->i_sb, &s_item_key,
1901                                             &s_search_path) == POSITION_FOUND);
1902
1903         RFALSE(file_size > ROUND_UP(new_file_size),
1904                "PAP-5680: truncate did not finish: new_file_size %Ld, current %Ld, oid %d",
1905                new_file_size, file_size, s_item_key.on_disk_key.k_objectid);
1906
1907       update_and_out:
1908         if (update_timestamps) {
1909                 // this is truncate, not file closing
1910                 inode->i_mtime = CURRENT_TIME_SEC;
1911                 inode->i_ctime = CURRENT_TIME_SEC;
1912         }
1913         reiserfs_update_sd(th, inode);
1914
1915       out:
1916         pathrelse(&s_search_path);
1917         return err;
1918 }
1919
1920 #ifdef CONFIG_REISERFS_CHECK
1921 // this makes sure, that we __append__, not overwrite or add holes
1922 static void check_research_for_paste(struct treepath *path,
1923                                      const struct cpu_key *key)
1924 {
1925         struct item_head *found_ih = get_ih(path);
1926
1927         if (is_direct_le_ih(found_ih)) {
1928                 if (le_ih_k_offset(found_ih) +
1929                     op_bytes_number(found_ih,
1930                                     get_last_bh(path)->b_size) !=
1931                     cpu_key_k_offset(key)
1932                     || op_bytes_number(found_ih,
1933                                        get_last_bh(path)->b_size) !=
1934                     pos_in_item(path))
1935                         reiserfs_panic(NULL, "PAP-5720", "found direct item "
1936                                        "%h or position (%d) does not match "
1937                                        "to key %K", found_ih,
1938                                        pos_in_item(path), key);
1939         }
1940         if (is_indirect_le_ih(found_ih)) {
1941                 if (le_ih_k_offset(found_ih) +
1942                     op_bytes_number(found_ih,
1943                                     get_last_bh(path)->b_size) !=
1944                     cpu_key_k_offset(key)
1945                     || I_UNFM_NUM(found_ih) != pos_in_item(path)
1946                     || get_ih_free_space(found_ih) != 0)
1947                         reiserfs_panic(NULL, "PAP-5730", "found indirect "
1948                                        "item (%h) or position (%d) does not "
1949                                        "match to key (%K)",
1950                                        found_ih, pos_in_item(path), key);
1951         }
1952 }
1953 #endif                          /* config reiserfs check */
1954
1955 /* Paste bytes to the existing item. Returns bytes number pasted into the item. */
1956 int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct treepath *search_path,      /* Path to the pasted item.       */
1957                              const struct cpu_key *key, /* Key to search for the needed item. */
1958                              struct inode *inode,       /* Inode item belongs to */
1959                              const char *body,  /* Pointer to the bytes to paste.    */
1960                              int pasted_size)
1961 {                               /* Size of pasted bytes.             */
1962         struct super_block *sb = inode->i_sb;
1963         struct tree_balance s_paste_balance;
1964         int retval;
1965         int fs_gen;
1966         int depth;
1967
1968         BUG_ON(!th->t_trans_id);
1969
1970         fs_gen = get_generation(inode->i_sb);
1971
1972 #ifdef REISERQUOTA_DEBUG
1973         reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
1974                        "reiserquota paste_into_item(): allocating %u id=%u type=%c",
1975                        pasted_size, inode->i_uid,
1976                        key2type(&(key->on_disk_key)));
1977 #endif
1978
1979         depth = reiserfs_write_unlock_nested(sb);
1980         retval = dquot_alloc_space_nodirty(inode, pasted_size);
1981         reiserfs_write_lock_nested(sb, depth);
1982         if (retval) {
1983                 pathrelse(search_path);
1984                 return retval;
1985         }
1986         init_tb_struct(th, &s_paste_balance, th->t_super, search_path,
1987                        pasted_size);
1988 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
1989         s_paste_balance.key = key->on_disk_key;
1990 #endif
1991
1992         /* DQUOT_* can schedule, must check before the fix_nodes */
1993         if (fs_changed(fs_gen, inode->i_sb)) {
1994                 goto search_again;
1995         }
1996
1997         while ((retval =
1998                 fix_nodes(M_PASTE, &s_paste_balance, NULL,
1999                           body)) == REPEAT_SEARCH) {
2000               search_again:
2001                 /* file system changed while we were in the fix_nodes */
2002                 PROC_INFO_INC(th->t_super, paste_into_item_restarted);
2003                 retval =
2004                     search_for_position_by_key(th->t_super, key,
2005                                                search_path);
2006                 if (retval == IO_ERROR) {
2007                         retval = -EIO;
2008                         goto error_out;
2009                 }
2010                 if (retval == POSITION_FOUND) {
2011                         reiserfs_warning(inode->i_sb, "PAP-5710",
2012                                          "entry or pasted byte (%K) exists",
2013                                          key);
2014                         retval = -EEXIST;
2015                         goto error_out;
2016                 }
2017 #ifdef CONFIG_REISERFS_CHECK
2018                 check_research_for_paste(search_path, key);
2019 #endif
2020         }
2021
2022         /* Perform balancing after all resources are collected by fix_nodes, and
2023            accessing them will not risk triggering schedule. */
2024         if (retval == CARRY_ON) {
2025                 do_balance(&s_paste_balance, NULL /*ih */ , body, M_PASTE);
2026                 return 0;
2027         }
2028         retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
2029       error_out:
2030         /* this also releases the path */
2031         unfix_nodes(&s_paste_balance);
2032 #ifdef REISERQUOTA_DEBUG
2033         reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
2034                        "reiserquota paste_into_item(): freeing %u id=%u type=%c",
2035                        pasted_size, inode->i_uid,
2036                        key2type(&(key->on_disk_key)));
2037 #endif
2038         depth = reiserfs_write_unlock_nested(sb);
2039         dquot_free_space_nodirty(inode, pasted_size);
2040         reiserfs_write_lock_nested(sb, depth);
2041         return retval;
2042 }
2043
2044 /* Insert new item into the buffer at the path.
2045  * th   - active transaction handle
2046  * path - path to the inserted item
2047  * ih   - pointer to the item header to insert
2048  * body - pointer to the bytes to insert
2049  */
2050 int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
2051                          struct treepath *path, const struct cpu_key *key,
2052                          struct item_head *ih, struct inode *inode,
2053                          const char *body)
2054 {
2055         struct tree_balance s_ins_balance;
2056         int retval;
2057         int fs_gen = 0;
2058         int quota_bytes = 0;
2059
2060         BUG_ON(!th->t_trans_id);
2061
2062         if (inode) {            /* Do we count quotas for item? */
2063                 int depth;
2064                 fs_gen = get_generation(inode->i_sb);
2065                 quota_bytes = ih_item_len(ih);
2066
2067                 /* hack so the quota code doesn't have to guess if the file has
2068                  ** a tail, links are always tails, so there's no guessing needed
2069                  */
2070                 if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(ih))
2071                         quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE;
2072 #ifdef REISERQUOTA_DEBUG
2073                 reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
2074                                "reiserquota insert_item(): allocating %u id=%u type=%c",
2075                                quota_bytes, inode->i_uid, head2type(ih));
2076 #endif
2077                 /* We can't dirty inode here. It would be immediately written but
2078                  * appropriate stat item isn't inserted yet... */
2079                 depth = reiserfs_write_unlock_nested(inode->i_sb);
2080                 retval = dquot_alloc_space_nodirty(inode, quota_bytes);
2081                 reiserfs_write_lock_nested(inode->i_sb, depth);
2082                 if (retval) {
2083                         pathrelse(path);
2084                         return retval;
2085                 }
2086         }
2087         init_tb_struct(th, &s_ins_balance, th->t_super, path,
2088                        IH_SIZE + ih_item_len(ih));
2089 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
2090         s_ins_balance.key = key->on_disk_key;
2091 #endif
2092         /* DQUOT_* can schedule, must check to be sure calling fix_nodes is safe */
2093         if (inode && fs_changed(fs_gen, inode->i_sb)) {
2094                 goto search_again;
2095         }
2096
2097         while ((retval =
2098                 fix_nodes(M_INSERT, &s_ins_balance, ih,
2099                           body)) == REPEAT_SEARCH) {
2100               search_again:
2101                 /* file system changed while we were in the fix_nodes */
2102                 PROC_INFO_INC(th->t_super, insert_item_restarted);
2103                 retval = search_item(th->t_super, key, path);
2104                 if (retval == IO_ERROR) {
2105                         retval = -EIO;
2106                         goto error_out;
2107                 }
2108                 if (retval == ITEM_FOUND) {
2109                         reiserfs_warning(th->t_super, "PAP-5760",
2110                                          "key %K already exists in the tree",
2111                                          key);
2112                         retval = -EEXIST;
2113                         goto error_out;
2114                 }
2115         }
2116
2117         /* make balancing after all resources will be collected at a time */
2118         if (retval == CARRY_ON) {
2119                 do_balance(&s_ins_balance, ih, body, M_INSERT);
2120                 return 0;
2121         }
2122
2123         retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
2124       error_out:
2125         /* also releases the path */
2126         unfix_nodes(&s_ins_balance);
2127 #ifdef REISERQUOTA_DEBUG
2128         reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
2129                        "reiserquota insert_item(): freeing %u id=%u type=%c",
2130                        quota_bytes, inode->i_uid, head2type(ih));
2131 #endif
2132         if (inode) {
2133                 int depth = reiserfs_write_unlock_nested(inode->i_sb);
2134                 dquot_free_space_nodirty(inode, quota_bytes);
2135                 reiserfs_write_lock_nested(inode->i_sb, depth);
2136         }
2137         return retval;
2138 }