Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
jfs_xtree.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) International Business Machines Corp., 2000-2005
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
12  * the GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17  */
18 /*
19  * jfs_xtree.c: extent allocation descriptor B+-tree manager
20  */
21 
22 #include <linux/fs.h>
23 #include <linux/module.h>
24 #include <linux/quotaops.h>
25 #include <linux/seq_file.h>
26 #include "jfs_incore.h"
27 #include "jfs_filsys.h"
28 #include "jfs_metapage.h"
29 #include "jfs_dmap.h"
30 #include "jfs_dinode.h"
31 #include "jfs_superblock.h"
32 #include "jfs_debug.h"
33 
34 /*
35  * xtree local flag
36  */
37 #define XT_INSERT 0x00000001
38 
39 /*
40  * xtree key/entry comparison: extent offset
41  *
42  * return:
43  * -1: k < start of extent
44  * 0: start_of_extent <= k <= end_of_extent
45  * 1: k > end_of_extent
46  */
47 #define XT_CMP(CMP, K, X, OFFSET64)\
48 {\
49  OFFSET64 = offsetXAD(X);\
50  (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\
51  ((K) < OFFSET64) ? -1 : 0;\
52 }
53 
54 /* write a xad entry */
55 #define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\
56 {\
57  (XAD)->flag = (FLAG);\
58  XADoffset((XAD), (OFF));\
59  XADlength((XAD), (LEN));\
60  XADaddress((XAD), (ADDR));\
61 }
62 
63 #define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot)
64 
65 /* get page buffer for specified block address */
66 /* ToDo: Replace this ugly macro with a function */
67 #define XT_GETPAGE(IP, BN, MP, SIZE, P, RC)\
68 {\
69  BT_GETPAGE(IP, BN, MP, xtpage_t, SIZE, P, RC, i_xtroot)\
70  if (!(RC))\
71  {\
72  if ((le16_to_cpu((P)->header.nextindex) < XTENTRYSTART) ||\
73  (le16_to_cpu((P)->header.nextindex) > le16_to_cpu((P)->header.maxentry)) ||\
74  (le16_to_cpu((P)->header.maxentry) > (((BN)==0)?XTROOTMAXSLOT:PSIZE>>L2XTSLOTSIZE)))\
75  {\
76  jfs_error((IP)->i_sb, "XT_GETPAGE: xtree page corrupt");\
77  BT_PUTPAGE(MP);\
78  MP = NULL;\
79  RC = -EIO;\
80  }\
81  }\
82 }
83 
84 /* for consistency */
85 #define XT_PUTPAGE(MP) BT_PUTPAGE(MP)
86 
87 #define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \
88  BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot)
89 /* xtree entry parameter descriptor */
90 struct xtsplit {
91  struct metapage *mp;
96  int len;
97  struct pxdlist *pxdlist;
98 };
99 
100 
101 /*
102  * statistics
103  */
104 #ifdef CONFIG_JFS_STATISTICS
105 static struct {
106  uint search;
107  uint fastSearch;
108  uint split;
109 } xtStat;
110 #endif
111 
112 
113 /*
114  * forward references
115  */
116 static int xtSearch(struct inode *ip, s64 xoff, s64 *next, int *cmpp,
117  struct btstack * btstack, int flag);
118 
119 static int xtSplitUp(tid_t tid,
120  struct inode *ip,
121  struct xtsplit * split, struct btstack * btstack);
122 
123 static int xtSplitPage(tid_t tid, struct inode *ip, struct xtsplit * split,
124  struct metapage ** rmpp, s64 * rbnp);
125 
126 static int xtSplitRoot(tid_t tid, struct inode *ip,
127  struct xtsplit * split, struct metapage ** rmpp);
128 
129 #ifdef _STILL_TO_PORT
130 static int xtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp,
131  xtpage_t * fp, struct btstack * btstack);
132 
133 static int xtSearchNode(struct inode *ip,
134  xad_t * xad,
135  int *cmpp, struct btstack * btstack, int flag);
136 
137 static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * fp);
138 #endif /* _STILL_TO_PORT */
139 
140 /*
141  * xtLookup()
142  *
143  * function: map a single page into a physical extent;
144  */
145 int xtLookup(struct inode *ip, s64 lstart,
146  s64 llen, int *pflag, s64 * paddr, s32 * plen, int no_check)
147 {
148  int rc = 0;
149  struct btstack btstack;
150  int cmp;
151  s64 bn;
152  struct metapage *mp;
153  xtpage_t *p;
154  int index;
155  xad_t *xad;
156  s64 next, size, xoff, xend;
157  int xlen;
158  s64 xaddr;
159 
160  *paddr = 0;
161  *plen = llen;
162 
163  if (!no_check) {
164  /* is lookup offset beyond eof ? */
165  size = ((u64) ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
166  JFS_SBI(ip->i_sb)->l2bsize;
167  if (lstart >= size)
168  return 0;
169  }
170 
171  /*
172  * search for the xad entry covering the logical extent
173  */
174 //search:
175  if ((rc = xtSearch(ip, lstart, &next, &cmp, &btstack, 0))) {
176  jfs_err("xtLookup: xtSearch returned %d", rc);
177  return rc;
178  }
179 
180  /*
181  * compute the physical extent covering logical extent
182  *
183  * N.B. search may have failed (e.g., hole in sparse file),
184  * and returned the index of the next entry.
185  */
186  /* retrieve search result */
187  XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
188 
189  /* is xad found covering start of logical extent ?
190  * lstart is a page start address,
191  * i.e., lstart cannot start in a hole;
192  */
193  if (cmp) {
194  if (next)
195  *plen = min(next - lstart, llen);
196  goto out;
197  }
198 
199  /*
200  * lxd covered by xad
201  */
202  xad = &p->xad[index];
203  xoff = offsetXAD(xad);
204  xlen = lengthXAD(xad);
205  xend = xoff + xlen;
206  xaddr = addressXAD(xad);
207 
208  /* initialize new pxd */
209  *pflag = xad->flag;
210  *paddr = xaddr + (lstart - xoff);
211  /* a page must be fully covered by an xad */
212  *plen = min(xend - lstart, llen);
213 
214  out:
215  XT_PUTPAGE(mp);
216 
217  return rc;
218 }
219 
220 /*
221  * xtSearch()
222  *
223  * function: search for the xad entry covering specified offset.
224  *
225  * parameters:
226  * ip - file object;
227  * xoff - extent offset;
228  * nextp - address of next extent (if any) for search miss
229  * cmpp - comparison result:
230  * btstack - traverse stack;
231  * flag - search process flag (XT_INSERT);
232  *
233  * returns:
234  * btstack contains (bn, index) of search path traversed to the entry.
235  * *cmpp is set to result of comparison with the entry returned.
236  * the page containing the entry is pinned at exit.
237  */
238 static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp,
239  int *cmpp, struct btstack * btstack, int flag)
240 {
241  struct jfs_inode_info *jfs_ip = JFS_IP(ip);
242  int rc = 0;
243  int cmp = 1; /* init for empty page */
244  s64 bn; /* block number */
245  struct metapage *mp; /* page buffer */
246  xtpage_t *p; /* page */
247  xad_t *xad;
248  int base, index, lim, btindex;
249  struct btframe *btsp;
250  int nsplit = 0; /* number of pages to split */
251  s64 t64;
252  s64 next = 0;
253 
254  INCREMENT(xtStat.search);
255 
256  BT_CLR(btstack);
257 
258  btstack->nsplit = 0;
259 
260  /*
261  * search down tree from root:
262  *
263  * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
264  * internal page, child page Pi contains entry with k, Ki <= K < Kj.
265  *
266  * if entry with search key K is not found
267  * internal page search find the entry with largest key Ki
268  * less than K which point to the child page to search;
269  * leaf page search find the entry with smallest key Kj
270  * greater than K so that the returned index is the position of
271  * the entry to be shifted right for insertion of new entry.
272  * for empty tree, search key is greater than any key of the tree.
273  *
274  * by convention, root bn = 0.
275  */
276  for (bn = 0;;) {
277  /* get/pin the page to search */
278  XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
279  if (rc)
280  return rc;
281 
282  /* try sequential access heuristics with the previous
283  * access entry in target leaf page:
284  * once search narrowed down into the target leaf,
285  * key must either match an entry in the leaf or
286  * key entry does not exist in the tree;
287  */
288 //fastSearch:
289  if ((jfs_ip->btorder & BT_SEQUENTIAL) &&
290  (p->header.flag & BT_LEAF) &&
291  (index = jfs_ip->btindex) <
292  le16_to_cpu(p->header.nextindex)) {
293  xad = &p->xad[index];
294  t64 = offsetXAD(xad);
295  if (xoff < t64 + lengthXAD(xad)) {
296  if (xoff >= t64) {
297  *cmpp = 0;
298  goto out;
299  }
300 
301  /* stop sequential access heuristics */
302  goto binarySearch;
303  } else { /* (t64 + lengthXAD(xad)) <= xoff */
304 
305  /* try next sequential entry */
306  index++;
307  if (index <
308  le16_to_cpu(p->header.nextindex)) {
309  xad++;
310  t64 = offsetXAD(xad);
311  if (xoff < t64 + lengthXAD(xad)) {
312  if (xoff >= t64) {
313  *cmpp = 0;
314  goto out;
315  }
316 
317  /* miss: key falls between
318  * previous and this entry
319  */
320  *cmpp = 1;
321  next = t64;
322  goto out;
323  }
324 
325  /* (xoff >= t64 + lengthXAD(xad));
326  * matching entry may be further out:
327  * stop heuristic search
328  */
329  /* stop sequential access heuristics */
330  goto binarySearch;
331  }
332 
333  /* (index == p->header.nextindex);
334  * miss: key entry does not exist in
335  * the target leaf/tree
336  */
337  *cmpp = 1;
338  goto out;
339  }
340 
341  /*
342  * if hit, return index of the entry found, and
343  * if miss, where new entry with search key is
344  * to be inserted;
345  */
346  out:
347  /* compute number of pages to split */
348  if (flag & XT_INSERT) {
349  if (p->header.nextindex == /* little-endian */
350  p->header.maxentry)
351  nsplit++;
352  else
353  nsplit = 0;
354  btstack->nsplit = nsplit;
355  }
356 
357  /* save search result */
358  btsp = btstack->top;
359  btsp->bn = bn;
360  btsp->index = index;
361  btsp->mp = mp;
362 
363  /* update sequential access heuristics */
364  jfs_ip->btindex = index;
365 
366  if (nextp)
367  *nextp = next;
368 
369  INCREMENT(xtStat.fastSearch);
370  return 0;
371  }
372 
373  /* well, ... full search now */
374  binarySearch:
375  lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
376 
377  /*
378  * binary search with search key K on the current page
379  */
380  for (base = XTENTRYSTART; lim; lim >>= 1) {
381  index = base + (lim >> 1);
382 
383  XT_CMP(cmp, xoff, &p->xad[index], t64);
384  if (cmp == 0) {
385  /*
386  * search hit
387  */
388  /* search hit - leaf page:
389  * return the entry found
390  */
391  if (p->header.flag & BT_LEAF) {
392  *cmpp = cmp;
393 
394  /* compute number of pages to split */
395  if (flag & XT_INSERT) {
396  if (p->header.nextindex ==
397  p->header.maxentry)
398  nsplit++;
399  else
400  nsplit = 0;
401  btstack->nsplit = nsplit;
402  }
403 
404  /* save search result */
405  btsp = btstack->top;
406  btsp->bn = bn;
407  btsp->index = index;
408  btsp->mp = mp;
409 
410  /* init sequential access heuristics */
411  btindex = jfs_ip->btindex;
412  if (index == btindex ||
413  index == btindex + 1)
414  jfs_ip->btorder = BT_SEQUENTIAL;
415  else
416  jfs_ip->btorder = BT_RANDOM;
417  jfs_ip->btindex = index;
418 
419  return 0;
420  }
421  /* search hit - internal page:
422  * descend/search its child page
423  */
424  if (index < le16_to_cpu(p->header.nextindex)-1)
425  next = offsetXAD(&p->xad[index + 1]);
426  goto next;
427  }
428 
429  if (cmp > 0) {
430  base = index + 1;
431  --lim;
432  }
433  }
434 
435  /*
436  * search miss
437  *
438  * base is the smallest index with key (Kj) greater than
439  * search key (K) and may be zero or maxentry index.
440  */
441  if (base < le16_to_cpu(p->header.nextindex))
442  next = offsetXAD(&p->xad[base]);
443  /*
444  * search miss - leaf page:
445  *
446  * return location of entry (base) where new entry with
447  * search key K is to be inserted.
448  */
449  if (p->header.flag & BT_LEAF) {
450  *cmpp = cmp;
451 
452  /* compute number of pages to split */
453  if (flag & XT_INSERT) {
454  if (p->header.nextindex ==
455  p->header.maxentry)
456  nsplit++;
457  else
458  nsplit = 0;
459  btstack->nsplit = nsplit;
460  }
461 
462  /* save search result */
463  btsp = btstack->top;
464  btsp->bn = bn;
465  btsp->index = base;
466  btsp->mp = mp;
467 
468  /* init sequential access heuristics */
469  btindex = jfs_ip->btindex;
470  if (base == btindex || base == btindex + 1)
471  jfs_ip->btorder = BT_SEQUENTIAL;
472  else
473  jfs_ip->btorder = BT_RANDOM;
474  jfs_ip->btindex = base;
475 
476  if (nextp)
477  *nextp = next;
478 
479  return 0;
480  }
481 
482  /*
483  * search miss - non-leaf page:
484  *
485  * if base is non-zero, decrement base by one to get the parent
486  * entry of the child page to search.
487  */
488  index = base ? base - 1 : base;
489 
490  /*
491  * go down to child page
492  */
493  next:
494  /* update number of pages to split */
495  if (p->header.nextindex == p->header.maxentry)
496  nsplit++;
497  else
498  nsplit = 0;
499 
500  /* push (bn, index) of the parent page/entry */
501  if (BT_STACK_FULL(btstack)) {
502  jfs_error(ip->i_sb, "stack overrun in xtSearch!");
503  XT_PUTPAGE(mp);
504  return -EIO;
505  }
506  BT_PUSH(btstack, bn, index);
507 
508  /* get the child page block number */
509  bn = addressXAD(&p->xad[index]);
510 
511  /* unpin the parent page */
512  XT_PUTPAGE(mp);
513  }
514 }
515 
516 /*
517  * xtInsert()
518  *
519  * function:
520  *
521  * parameter:
522  * tid - transaction id;
523  * ip - file object;
524  * xflag - extent flag (XAD_NOTRECORDED):
525  * xoff - extent offset;
526  * xlen - extent length;
527  * xaddrp - extent address pointer (in/out):
528  * if (*xaddrp)
529  * caller allocated data extent at *xaddrp;
530  * else
531  * allocate data extent and return its xaddr;
532  * flag -
533  *
534  * return:
535  */
536 int xtInsert(tid_t tid, /* transaction id */
537  struct inode *ip, int xflag, s64 xoff, s32 xlen, s64 * xaddrp,
538  int flag)
539 {
540  int rc = 0;
541  s64 xaddr, hint;
542  struct metapage *mp; /* meta-page buffer */
543  xtpage_t *p; /* base B+-tree index page */
544  s64 bn;
545  int index, nextindex;
546  struct btstack btstack; /* traverse stack */
547  struct xtsplit split; /* split information */
548  xad_t *xad;
549  int cmp;
550  s64 next;
551  struct tlock *tlck;
552  struct xtlock *xtlck;
553 
554  jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
555 
556  /*
557  * search for the entry location at which to insert:
558  *
559  * xtFastSearch() and xtSearch() both returns (leaf page
560  * pinned, index at which to insert).
561  * n.b. xtSearch() may return index of maxentry of
562  * the full page.
563  */
564  if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT)))
565  return rc;
566 
567  /* retrieve search result */
568  XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
569 
570  /* This test must follow XT_GETSEARCH since mp must be valid if
571  * we branch to out: */
572  if ((cmp == 0) || (next && (xlen > next - xoff))) {
573  rc = -EEXIST;
574  goto out;
575  }
576 
577  /*
578  * allocate data extent requested
579  *
580  * allocation hint: last xad
581  */
582  if ((xaddr = *xaddrp) == 0) {
583  if (index > XTENTRYSTART) {
584  xad = &p->xad[index - 1];
585  hint = addressXAD(xad) + lengthXAD(xad) - 1;
586  } else
587  hint = 0;
588  if ((rc = dquot_alloc_block(ip, xlen)))
589  goto out;
590  if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) {
591  dquot_free_block(ip, xlen);
592  goto out;
593  }
594  }
595 
596  /*
597  * insert entry for new extent
598  */
599  xflag |= XAD_NEW;
600 
601  /*
602  * if the leaf page is full, split the page and
603  * propagate up the router entry for the new page from split
604  *
605  * The xtSplitUp() will insert the entry and unpin the leaf page.
606  */
607  nextindex = le16_to_cpu(p->header.nextindex);
608  if (nextindex == le16_to_cpu(p->header.maxentry)) {
609  split.mp = mp;
610  split.index = index;
611  split.flag = xflag;
612  split.off = xoff;
613  split.len = xlen;
614  split.addr = xaddr;
615  split.pxdlist = NULL;
616  if ((rc = xtSplitUp(tid, ip, &split, &btstack))) {
617  /* undo data extent allocation */
618  if (*xaddrp == 0) {
619  dbFree(ip, xaddr, (s64) xlen);
620  dquot_free_block(ip, xlen);
621  }
622  return rc;
623  }
624 
625  *xaddrp = xaddr;
626  return 0;
627  }
628 
629  /*
630  * insert the new entry into the leaf page
631  */
632  /*
633  * acquire a transaction lock on the leaf page;
634  *
635  * action: xad insertion/extension;
636  */
637  BT_MARK_DIRTY(mp, ip);
638 
639  /* if insert into middle, shift right remaining entries. */
640  if (index < nextindex)
641  memmove(&p->xad[index + 1], &p->xad[index],
642  (nextindex - index) * sizeof(xad_t));
643 
644  /* insert the new entry: mark the entry NEW */
645  xad = &p->xad[index];
646  XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
647 
648  /* advance next available entry index */
649  le16_add_cpu(&p->header.nextindex, 1);
650 
651  /* Don't log it if there are no links to the file */
652  if (!test_cflag(COMMIT_Nolink, ip)) {
653  tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
654  xtlck = (struct xtlock *) & tlck->lock;
655  xtlck->lwm.offset =
656  (xtlck->lwm.offset) ? min(index,
657  (int)xtlck->lwm.offset) : index;
658  xtlck->lwm.length =
659  le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
660  }
661 
662  *xaddrp = xaddr;
663 
664  out:
665  /* unpin the leaf page */
666  XT_PUTPAGE(mp);
667 
668  return rc;
669 }
670 
671 
672 /*
673  * xtSplitUp()
674  *
675  * function:
676  * split full pages as propagating insertion up the tree
677  *
678  * parameter:
679  * tid - transaction id;
680  * ip - file object;
681  * split - entry parameter descriptor;
682  * btstack - traverse stack from xtSearch()
683  *
684  * return:
685  */
686 static int
687 xtSplitUp(tid_t tid,
688  struct inode *ip, struct xtsplit * split, struct btstack * btstack)
689 {
690  int rc = 0;
691  struct metapage *smp;
692  xtpage_t *sp; /* split page */
693  struct metapage *rmp;
694  s64 rbn; /* new right page block number */
695  struct metapage *rcmp;
696  xtpage_t *rcp; /* right child page */
697  s64 rcbn; /* right child page block number */
698  int skip; /* index of entry of insertion */
699  int nextindex; /* next available entry index of p */
700  struct btframe *parent; /* parent page entry on traverse stack */
701  xad_t *xad;
702  s64 xaddr;
703  int xlen;
704  int nsplit; /* number of pages split */
705  struct pxdlist pxdlist;
706  pxd_t *pxd;
707  struct tlock *tlck;
708  struct xtlock *xtlck;
709 
710  smp = split->mp;
711  sp = XT_PAGE(ip, smp);
712 
713  /* is inode xtree root extension/inline EA area free ? */
714  if ((sp->header.flag & BT_ROOT) && (!S_ISDIR(ip->i_mode)) &&
715  (le16_to_cpu(sp->header.maxentry) < XTROOTMAXSLOT) &&
716  (JFS_IP(ip)->mode2 & INLINEEA)) {
717  sp->header.maxentry = cpu_to_le16(XTROOTMAXSLOT);
718  JFS_IP(ip)->mode2 &= ~INLINEEA;
719 
720  BT_MARK_DIRTY(smp, ip);
721  /*
722  * acquire a transaction lock on the leaf page;
723  *
724  * action: xad insertion/extension;
725  */
726 
727  /* if insert into middle, shift right remaining entries. */
728  skip = split->index;
729  nextindex = le16_to_cpu(sp->header.nextindex);
730  if (skip < nextindex)
731  memmove(&sp->xad[skip + 1], &sp->xad[skip],
732  (nextindex - skip) * sizeof(xad_t));
733 
734  /* insert the new entry: mark the entry NEW */
735  xad = &sp->xad[skip];
736  XT_PUTENTRY(xad, split->flag, split->off, split->len,
737  split->addr);
738 
739  /* advance next available entry index */
740  le16_add_cpu(&sp->header.nextindex, 1);
741 
742  /* Don't log it if there are no links to the file */
743  if (!test_cflag(COMMIT_Nolink, ip)) {
744  tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW);
745  xtlck = (struct xtlock *) & tlck->lock;
746  xtlck->lwm.offset = (xtlck->lwm.offset) ?
747  min(skip, (int)xtlck->lwm.offset) : skip;
748  xtlck->lwm.length =
749  le16_to_cpu(sp->header.nextindex) -
750  xtlck->lwm.offset;
751  }
752 
753  return 0;
754  }
755 
756  /*
757  * allocate new index blocks to cover index page split(s)
758  *
759  * allocation hint: ?
760  */
761  if (split->pxdlist == NULL) {
762  nsplit = btstack->nsplit;
763  split->pxdlist = &pxdlist;
764  pxdlist.maxnpxd = pxdlist.npxd = 0;
765  pxd = &pxdlist.pxd[0];
766  xlen = JFS_SBI(ip->i_sb)->nbperpage;
767  for (; nsplit > 0; nsplit--, pxd++) {
768  if ((rc = dbAlloc(ip, (s64) 0, (s64) xlen, &xaddr))
769  == 0) {
770  PXDaddress(pxd, xaddr);
771  PXDlength(pxd, xlen);
772 
773  pxdlist.maxnpxd++;
774 
775  continue;
776  }
777 
778  /* undo allocation */
779 
780  XT_PUTPAGE(smp);
781  return rc;
782  }
783  }
784 
785  /*
786  * Split leaf page <sp> into <sp> and a new right page <rp>.
787  *
788  * The split routines insert the new entry into the leaf page,
789  * and acquire txLock as appropriate.
790  * return <rp> pinned and its block number <rpbn>.
791  */
792  rc = (sp->header.flag & BT_ROOT) ?
793  xtSplitRoot(tid, ip, split, &rmp) :
794  xtSplitPage(tid, ip, split, &rmp, &rbn);
795 
796  XT_PUTPAGE(smp);
797 
798  if (rc)
799  return -EIO;
800  /*
801  * propagate up the router entry for the leaf page just split
802  *
803  * insert a router entry for the new page into the parent page,
804  * propagate the insert/split up the tree by walking back the stack
805  * of (bn of parent page, index of child page entry in parent page)
806  * that were traversed during the search for the page that split.
807  *
808  * the propagation of insert/split up the tree stops if the root
809  * splits or the page inserted into doesn't have to split to hold
810  * the new entry.
811  *
812  * the parent entry for the split page remains the same, and
813  * a new entry is inserted at its right with the first key and
814  * block number of the new right page.
815  *
816  * There are a maximum of 3 pages pinned at any time:
817  * right child, left parent and right parent (when the parent splits)
818  * to keep the child page pinned while working on the parent.
819  * make sure that all pins are released at exit.
820  */
821  while ((parent = BT_POP(btstack)) != NULL) {
822  /* parent page specified by stack frame <parent> */
823 
824  /* keep current child pages <rcp> pinned */
825  rcmp = rmp;
826  rcbn = rbn;
827  rcp = XT_PAGE(ip, rcmp);
828 
829  /*
830  * insert router entry in parent for new right child page <rp>
831  */
832  /* get/pin the parent page <sp> */
833  XT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc);
834  if (rc) {
835  XT_PUTPAGE(rcmp);
836  return rc;
837  }
838 
839  /*
840  * The new key entry goes ONE AFTER the index of parent entry,
841  * because the split was to the right.
842  */
843  skip = parent->index + 1;
844 
845  /*
846  * split or shift right remaining entries of the parent page
847  */
848  nextindex = le16_to_cpu(sp->header.nextindex);
849  /*
850  * parent page is full - split the parent page
851  */
852  if (nextindex == le16_to_cpu(sp->header.maxentry)) {
853  /* init for parent page split */
854  split->mp = smp;
855  split->index = skip; /* index at insert */
856  split->flag = XAD_NEW;
857  split->off = offsetXAD(&rcp->xad[XTENTRYSTART]);
858  split->len = JFS_SBI(ip->i_sb)->nbperpage;
859  split->addr = rcbn;
860 
861  /* unpin previous right child page */
862  XT_PUTPAGE(rcmp);
863 
864  /* The split routines insert the new entry,
865  * and acquire txLock as appropriate.
866  * return <rp> pinned and its block number <rpbn>.
867  */
868  rc = (sp->header.flag & BT_ROOT) ?
869  xtSplitRoot(tid, ip, split, &rmp) :
870  xtSplitPage(tid, ip, split, &rmp, &rbn);
871  if (rc) {
872  XT_PUTPAGE(smp);
873  return rc;
874  }
875 
876  XT_PUTPAGE(smp);
877  /* keep new child page <rp> pinned */
878  }
879  /*
880  * parent page is not full - insert in parent page
881  */
882  else {
883  /*
884  * insert router entry in parent for the right child
885  * page from the first entry of the right child page:
886  */
887  /*
888  * acquire a transaction lock on the parent page;
889  *
890  * action: router xad insertion;
891  */
892  BT_MARK_DIRTY(smp, ip);
893 
894  /*
895  * if insert into middle, shift right remaining entries
896  */
897  if (skip < nextindex)
898  memmove(&sp->xad[skip + 1], &sp->xad[skip],
899  (nextindex -
900  skip) << L2XTSLOTSIZE);
901 
902  /* insert the router entry */
903  xad = &sp->xad[skip];
904  XT_PUTENTRY(xad, XAD_NEW,
905  offsetXAD(&rcp->xad[XTENTRYSTART]),
906  JFS_SBI(ip->i_sb)->nbperpage, rcbn);
907 
908  /* advance next available entry index. */
909  le16_add_cpu(&sp->header.nextindex, 1);
910 
911  /* Don't log it if there are no links to the file */
912  if (!test_cflag(COMMIT_Nolink, ip)) {
913  tlck = txLock(tid, ip, smp,
914  tlckXTREE | tlckGROW);
915  xtlck = (struct xtlock *) & tlck->lock;
916  xtlck->lwm.offset = (xtlck->lwm.offset) ?
917  min(skip, (int)xtlck->lwm.offset) : skip;
918  xtlck->lwm.length =
919  le16_to_cpu(sp->header.nextindex) -
920  xtlck->lwm.offset;
921  }
922 
923  /* unpin parent page */
924  XT_PUTPAGE(smp);
925 
926  /* exit propagate up */
927  break;
928  }
929  }
930 
931  /* unpin current right page */
932  XT_PUTPAGE(rmp);
933 
934  return 0;
935 }
936 
937 
938 /*
939  * xtSplitPage()
940  *
941  * function:
942  * split a full non-root page into
943  * original/split/left page and new right page
944  * i.e., the original/split page remains as left page.
945  *
946  * parameter:
947  * int tid,
948  * struct inode *ip,
949  * struct xtsplit *split,
950  * struct metapage **rmpp,
951  * u64 *rbnp,
952  *
953  * return:
954  * Pointer to page in which to insert or NULL on error.
955  */
956 static int
957 xtSplitPage(tid_t tid, struct inode *ip,
958  struct xtsplit * split, struct metapage ** rmpp, s64 * rbnp)
959 {
960  int rc = 0;
961  struct metapage *smp;
962  xtpage_t *sp;
963  struct metapage *rmp;
964  xtpage_t *rp; /* new right page allocated */
965  s64 rbn; /* new right page block number */
966  struct metapage *mp;
967  xtpage_t *p;
968  s64 nextbn;
969  int skip, maxentry, middle, righthalf, n;
970  xad_t *xad;
971  struct pxdlist *pxdlist;
972  pxd_t *pxd;
973  struct tlock *tlck;
974  struct xtlock *sxtlck = NULL, *rxtlck = NULL;
975  int quota_allocation = 0;
976 
977  smp = split->mp;
978  sp = XT_PAGE(ip, smp);
979 
980  INCREMENT(xtStat.split);
981 
982  pxdlist = split->pxdlist;
983  pxd = &pxdlist->pxd[pxdlist->npxd];
984  pxdlist->npxd++;
985  rbn = addressPXD(pxd);
986 
987  /* Allocate blocks to quota. */
988  rc = dquot_alloc_block(ip, lengthPXD(pxd));
989  if (rc)
990  goto clean_up;
991 
992  quota_allocation += lengthPXD(pxd);
993 
994  /*
995  * allocate the new right page for the split
996  */
997  rmp = get_metapage(ip, rbn, PSIZE, 1);
998  if (rmp == NULL) {
999  rc = -EIO;
1000  goto clean_up;
1001  }
1002 
1003  jfs_info("xtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp);
1004 
1005  BT_MARK_DIRTY(rmp, ip);
1006  /*
1007  * action: new page;
1008  */
1009 
1010  rp = (xtpage_t *) rmp->data;
1011  rp->header.self = *pxd;
1012  rp->header.flag = sp->header.flag & BT_TYPE;
1013  rp->header.maxentry = sp->header.maxentry; /* little-endian */
1014  rp->header.nextindex = cpu_to_le16(XTENTRYSTART);
1015 
1016  BT_MARK_DIRTY(smp, ip);
1017  /* Don't log it if there are no links to the file */
1018  if (!test_cflag(COMMIT_Nolink, ip)) {
1019  /*
1020  * acquire a transaction lock on the new right page;
1021  */
1022  tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW);
1023  rxtlck = (struct xtlock *) & tlck->lock;
1024  rxtlck->lwm.offset = XTENTRYSTART;
1025  /*
1026  * acquire a transaction lock on the split page
1027  */
1028  tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW);
1029  sxtlck = (struct xtlock *) & tlck->lock;
1030  }
1031 
1032  /*
1033  * initialize/update sibling pointers of <sp> and <rp>
1034  */
1035  nextbn = le64_to_cpu(sp->header.next);
1036  rp->header.next = cpu_to_le64(nextbn);
1037  rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self));
1038  sp->header.next = cpu_to_le64(rbn);
1039 
1040  skip = split->index;
1041 
1042  /*
1043  * sequential append at tail (after last entry of last page)
1044  *
1045  * if splitting the last page on a level because of appending
1046  * a entry to it (skip is maxentry), it's likely that the access is
1047  * sequential. adding an empty page on the side of the level is less
1048  * work and can push the fill factor much higher than normal.
1049  * if we're wrong it's no big deal - we will do the split the right
1050  * way next time.
1051  * (it may look like it's equally easy to do a similar hack for
1052  * reverse sorted data, that is, split the tree left, but it's not.
1053  * Be my guest.)
1054  */
1055  if (nextbn == 0 && skip == le16_to_cpu(sp->header.maxentry)) {
1056  /*
1057  * acquire a transaction lock on the new/right page;
1058  *
1059  * action: xad insertion;
1060  */
1061  /* insert entry at the first entry of the new right page */
1062  xad = &rp->xad[XTENTRYSTART];
1063  XT_PUTENTRY(xad, split->flag, split->off, split->len,
1064  split->addr);
1065 
1066  rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1);
1067 
1068  if (!test_cflag(COMMIT_Nolink, ip)) {
1069  /* rxtlck->lwm.offset = XTENTRYSTART; */
1070  rxtlck->lwm.length = 1;
1071  }
1072 
1073  *rmpp = rmp;
1074  *rbnp = rbn;
1075 
1076  jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp);
1077  return 0;
1078  }
1079 
1080  /*
1081  * non-sequential insert (at possibly middle page)
1082  */
1083 
1084  /*
1085  * update previous pointer of old next/right page of <sp>
1086  */
1087  if (nextbn != 0) {
1088  XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
1089  if (rc) {
1090  XT_PUTPAGE(rmp);
1091  goto clean_up;
1092  }
1093 
1094  BT_MARK_DIRTY(mp, ip);
1095  /*
1096  * acquire a transaction lock on the next page;
1097  *
1098  * action:sibling pointer update;
1099  */
1100  if (!test_cflag(COMMIT_Nolink, ip))
1101  tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK);
1102 
1103  p->header.prev = cpu_to_le64(rbn);
1104 
1105  /* sibling page may have been updated previously, or
1106  * it may be updated later;
1107  */
1108 
1109  XT_PUTPAGE(mp);
1110  }
1111 
1112  /*
1113  * split the data between the split and new/right pages
1114  */
1115  maxentry = le16_to_cpu(sp->header.maxentry);
1116  middle = maxentry >> 1;
1117  righthalf = maxentry - middle;
1118 
1119  /*
1120  * skip index in old split/left page - insert into left page:
1121  */
1122  if (skip <= middle) {
1123  /* move right half of split page to the new right page */
1124  memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle],
1125  righthalf << L2XTSLOTSIZE);
1126 
1127  /* shift right tail of left half to make room for new entry */
1128  if (skip < middle)
1129  memmove(&sp->xad[skip + 1], &sp->xad[skip],
1130  (middle - skip) << L2XTSLOTSIZE);
1131 
1132  /* insert new entry */
1133  xad = &sp->xad[skip];
1134  XT_PUTENTRY(xad, split->flag, split->off, split->len,
1135  split->addr);
1136 
1137  /* update page header */
1138  sp->header.nextindex = cpu_to_le16(middle + 1);
1139  if (!test_cflag(COMMIT_Nolink, ip)) {
1140  sxtlck->lwm.offset = (sxtlck->lwm.offset) ?
1141  min(skip, (int)sxtlck->lwm.offset) : skip;
1142  }
1143 
1144  rp->header.nextindex =
1145  cpu_to_le16(XTENTRYSTART + righthalf);
1146  }
1147  /*
1148  * skip index in new right page - insert into right page:
1149  */
1150  else {
1151  /* move left head of right half to right page */
1152  n = skip - middle;
1153  memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle],
1154  n << L2XTSLOTSIZE);
1155 
1156  /* insert new entry */
1157  n += XTENTRYSTART;
1158  xad = &rp->xad[n];
1159  XT_PUTENTRY(xad, split->flag, split->off, split->len,
1160  split->addr);
1161 
1162  /* move right tail of right half to right page */
1163  if (skip < maxentry)
1164  memmove(&rp->xad[n + 1], &sp->xad[skip],
1165  (maxentry - skip) << L2XTSLOTSIZE);
1166 
1167  /* update page header */
1168  sp->header.nextindex = cpu_to_le16(middle);
1169  if (!test_cflag(COMMIT_Nolink, ip)) {
1170  sxtlck->lwm.offset = (sxtlck->lwm.offset) ?
1171  min(middle, (int)sxtlck->lwm.offset) : middle;
1172  }
1173 
1174  rp->header.nextindex = cpu_to_le16(XTENTRYSTART +
1175  righthalf + 1);
1176  }
1177 
1178  if (!test_cflag(COMMIT_Nolink, ip)) {
1179  sxtlck->lwm.length = le16_to_cpu(sp->header.nextindex) -
1180  sxtlck->lwm.offset;
1181 
1182  /* rxtlck->lwm.offset = XTENTRYSTART; */
1183  rxtlck->lwm.length = le16_to_cpu(rp->header.nextindex) -
1184  XTENTRYSTART;
1185  }
1186 
1187  *rmpp = rmp;
1188  *rbnp = rbn;
1189 
1190  jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp);
1191  return rc;
1192 
1193  clean_up:
1194 
1195  /* Rollback quota allocation. */
1196  if (quota_allocation)
1197  dquot_free_block(ip, quota_allocation);
1198 
1199  return (rc);
1200 }
1201 
1202 
1203 /*
1204  * xtSplitRoot()
1205  *
1206  * function:
1207  * split the full root page into original/root/split page and new
1208  * right page
1209  * i.e., root remains fixed in tree anchor (inode) and the root is
1210  * copied to a single new right child page since root page <<
1211  * non-root page, and the split root page contains a single entry
1212  * for the new right child page.
1213  *
1214  * parameter:
1215  * int tid,
1216  * struct inode *ip,
1217  * struct xtsplit *split,
1218  * struct metapage **rmpp)
1219  *
1220  * return:
1221  * Pointer to page in which to insert or NULL on error.
1222  */
1223 static int
1224 xtSplitRoot(tid_t tid,
1225  struct inode *ip, struct xtsplit * split, struct metapage ** rmpp)
1226 {
1227  xtpage_t *sp;
1228  struct metapage *rmp;
1229  xtpage_t *rp;
1230  s64 rbn;
1231  int skip, nextindex;
1232  xad_t *xad;
1233  pxd_t *pxd;
1234  struct pxdlist *pxdlist;
1235  struct tlock *tlck;
1236  struct xtlock *xtlck;
1237  int rc;
1238 
1239  sp = &JFS_IP(ip)->i_xtroot;
1240 
1241  INCREMENT(xtStat.split);
1242 
1243  /*
1244  * allocate a single (right) child page
1245  */
1246  pxdlist = split->pxdlist;
1247  pxd = &pxdlist->pxd[pxdlist->npxd];
1248  pxdlist->npxd++;
1249  rbn = addressPXD(pxd);
1250  rmp = get_metapage(ip, rbn, PSIZE, 1);
1251  if (rmp == NULL)
1252  return -EIO;
1253 
1254  /* Allocate blocks to quota. */
1255  rc = dquot_alloc_block(ip, lengthPXD(pxd));
1256  if (rc) {
1257  release_metapage(rmp);
1258  return rc;
1259  }
1260 
1261  jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp);
1262 
1263  /*
1264  * acquire a transaction lock on the new right page;
1265  *
1266  * action: new page;
1267  */
1268  BT_MARK_DIRTY(rmp, ip);
1269 
1270  rp = (xtpage_t *) rmp->data;
1271  rp->header.flag =
1272  (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL;
1273  rp->header.self = *pxd;
1274  rp->header.nextindex = cpu_to_le16(XTENTRYSTART);
1275  rp->header.maxentry = cpu_to_le16(PSIZE >> L2XTSLOTSIZE);
1276 
1277  /* initialize sibling pointers */
1278  rp->header.next = 0;
1279  rp->header.prev = 0;
1280 
1281  /*
1282  * copy the in-line root page into new right page extent
1283  */
1284  nextindex = le16_to_cpu(sp->header.maxentry);
1285  memmove(&rp->xad[XTENTRYSTART], &sp->xad[XTENTRYSTART],
1286  (nextindex - XTENTRYSTART) << L2XTSLOTSIZE);
1287 
1288  /*
1289  * insert the new entry into the new right/child page
1290  * (skip index in the new right page will not change)
1291  */
1292  skip = split->index;
1293  /* if insert into middle, shift right remaining entries */
1294  if (skip != nextindex)
1295  memmove(&rp->xad[skip + 1], &rp->xad[skip],
1296  (nextindex - skip) * sizeof(xad_t));
1297 
1298  xad = &rp->xad[skip];
1299  XT_PUTENTRY(xad, split->flag, split->off, split->len, split->addr);
1300 
1301  /* update page header */
1302  rp->header.nextindex = cpu_to_le16(nextindex + 1);
1303 
1304  if (!test_cflag(COMMIT_Nolink, ip)) {
1305  tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW);
1306  xtlck = (struct xtlock *) & tlck->lock;
1307  xtlck->lwm.offset = XTENTRYSTART;
1308  xtlck->lwm.length = le16_to_cpu(rp->header.nextindex) -
1309  XTENTRYSTART;
1310  }
1311 
1312  /*
1313  * reset the root
1314  *
1315  * init root with the single entry for the new right page
1316  * set the 1st entry offset to 0, which force the left-most key
1317  * at any level of the tree to be less than any search key.
1318  */
1319  /*
1320  * acquire a transaction lock on the root page (in-memory inode);
1321  *
1322  * action: root split;
1323  */
1324  BT_MARK_DIRTY(split->mp, ip);
1325 
1326  xad = &sp->xad[XTENTRYSTART];
1327  XT_PUTENTRY(xad, XAD_NEW, 0, JFS_SBI(ip->i_sb)->nbperpage, rbn);
1328 
1329  /* update page header of root */
1330  sp->header.flag &= ~BT_LEAF;
1331  sp->header.flag |= BT_INTERNAL;
1332 
1333  sp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1);
1334 
1335  if (!test_cflag(COMMIT_Nolink, ip)) {
1336  tlck = txLock(tid, ip, split->mp, tlckXTREE | tlckGROW);
1337  xtlck = (struct xtlock *) & tlck->lock;
1338  xtlck->lwm.offset = XTENTRYSTART;
1339  xtlck->lwm.length = 1;
1340  }
1341 
1342  *rmpp = rmp;
1343 
1344  jfs_info("xtSplitRoot: sp:0x%p rp:0x%p", sp, rp);
1345  return 0;
1346 }
1347 
1348 
1349 /*
1350  * xtExtend()
1351  *
1352  * function: extend in-place;
1353  *
1354  * note: existing extent may or may not have been committed.
1355  * caller is responsible for pager buffer cache update, and
1356  * working block allocation map update;
1357  * update pmap: alloc whole extended extent;
1358  */
1359 int xtExtend(tid_t tid, /* transaction id */
1360  struct inode *ip, s64 xoff, /* delta extent offset */
1361  s32 xlen, /* delta extent length */
1362  int flag)
1363 {
1364  int rc = 0;
1365  int cmp;
1366  struct metapage *mp; /* meta-page buffer */
1367  xtpage_t *p; /* base B+-tree index page */
1368  s64 bn;
1369  int index, nextindex, len;
1370  struct btstack btstack; /* traverse stack */
1371  struct xtsplit split; /* split information */
1372  xad_t *xad;
1373  s64 xaddr;
1374  struct tlock *tlck;
1375  struct xtlock *xtlck = NULL;
1376 
1377  jfs_info("xtExtend: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
1378 
1379  /* there must exist extent to be extended */
1380  if ((rc = xtSearch(ip, xoff - 1, NULL, &cmp, &btstack, XT_INSERT)))
1381  return rc;
1382 
1383  /* retrieve search result */
1384  XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
1385 
1386  if (cmp != 0) {
1387  XT_PUTPAGE(mp);
1388  jfs_error(ip->i_sb, "xtExtend: xtSearch did not find extent");
1389  return -EIO;
1390  }
1391 
1392  /* extension must be contiguous */
1393  xad = &p->xad[index];
1394  if ((offsetXAD(xad) + lengthXAD(xad)) != xoff) {
1395  XT_PUTPAGE(mp);
1396  jfs_error(ip->i_sb, "xtExtend: extension is not contiguous");
1397  return -EIO;
1398  }
1399 
1400  /*
1401  * acquire a transaction lock on the leaf page;
1402  *
1403  * action: xad insertion/extension;
1404  */
1405  BT_MARK_DIRTY(mp, ip);
1406  if (!test_cflag(COMMIT_Nolink, ip)) {
1407  tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
1408  xtlck = (struct xtlock *) & tlck->lock;
1409  }
1410 
1411  /* extend will overflow extent ? */
1412  xlen = lengthXAD(xad) + xlen;
1413  if ((len = xlen - MAXXLEN) <= 0)
1414  goto extendOld;
1415 
1416  /*
1417  * extent overflow: insert entry for new extent
1418  */
1419 //insertNew:
1420  xoff = offsetXAD(xad) + MAXXLEN;
1421  xaddr = addressXAD(xad) + MAXXLEN;
1422  nextindex = le16_to_cpu(p->header.nextindex);
1423 
1424  /*
1425  * if the leaf page is full, insert the new entry and
1426  * propagate up the router entry for the new page from split
1427  *
1428  * The xtSplitUp() will insert the entry and unpin the leaf page.
1429  */
1430  if (nextindex == le16_to_cpu(p->header.maxentry)) {
1431  /* xtSpliUp() unpins leaf pages */
1432  split.mp = mp;
1433  split.index = index + 1;
1434  split.flag = XAD_NEW;
1435  split.off = xoff; /* split offset */
1436  split.len = len;
1437  split.addr = xaddr;
1438  split.pxdlist = NULL;
1439  if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
1440  return rc;
1441 
1442  /* get back old page */
1443  XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1444  if (rc)
1445  return rc;
1446  /*
1447  * if leaf root has been split, original root has been
1448  * copied to new child page, i.e., original entry now
1449  * resides on the new child page;
1450  */
1451  if (p->header.flag & BT_INTERNAL) {
1452  ASSERT(p->header.nextindex ==
1453  cpu_to_le16(XTENTRYSTART + 1));
1454  xad = &p->xad[XTENTRYSTART];
1455  bn = addressXAD(xad);
1456  XT_PUTPAGE(mp);
1457 
1458  /* get new child page */
1459  XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1460  if (rc)
1461  return rc;
1462 
1463  BT_MARK_DIRTY(mp, ip);
1464  if (!test_cflag(COMMIT_Nolink, ip)) {
1465  tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
1466  xtlck = (struct xtlock *) & tlck->lock;
1467  }
1468  }
1469  }
1470  /*
1471  * insert the new entry into the leaf page
1472  */
1473  else {
1474  /* insert the new entry: mark the entry NEW */
1475  xad = &p->xad[index + 1];
1476  XT_PUTENTRY(xad, XAD_NEW, xoff, len, xaddr);
1477 
1478  /* advance next available entry index */
1479  le16_add_cpu(&p->header.nextindex, 1);
1480  }
1481 
1482  /* get back old entry */
1483  xad = &p->xad[index];
1484  xlen = MAXXLEN;
1485 
1486  /*
1487  * extend old extent
1488  */
1489  extendOld:
1490  XADlength(xad, xlen);
1491  if (!(xad->flag & XAD_NEW))
1492  xad->flag |= XAD_EXTENDED;
1493 
1494  if (!test_cflag(COMMIT_Nolink, ip)) {
1495  xtlck->lwm.offset =
1496  (xtlck->lwm.offset) ? min(index,
1497  (int)xtlck->lwm.offset) : index;
1498  xtlck->lwm.length =
1499  le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
1500  }
1501 
1502  /* unpin the leaf page */
1503  XT_PUTPAGE(mp);
1504 
1505  return rc;
1506 }
1507 
1508 #ifdef _NOTYET
1509 /*
1510  * xtTailgate()
1511  *
1512  * function: split existing 'tail' extent
1513  * (split offset >= start offset of tail extent), and
1514  * relocate and extend the split tail half;
1515  *
1516  * note: existing extent may or may not have been committed.
1517  * caller is responsible for pager buffer cache update, and
1518  * working block allocation map update;
1519  * update pmap: free old split tail extent, alloc new extent;
1520  */
1521 int xtTailgate(tid_t tid, /* transaction id */
1522  struct inode *ip, s64 xoff, /* split/new extent offset */
1523  s32 xlen, /* new extent length */
1524  s64 xaddr, /* new extent address */
1525  int flag)
1526 {
1527  int rc = 0;
1528  int cmp;
1529  struct metapage *mp; /* meta-page buffer */
1530  xtpage_t *p; /* base B+-tree index page */
1531  s64 bn;
1532  int index, nextindex, llen, rlen;
1533  struct btstack btstack; /* traverse stack */
1534  struct xtsplit split; /* split information */
1535  xad_t *xad;
1536  struct tlock *tlck;
1537  struct xtlock *xtlck = 0;
1538  struct tlock *mtlck;
1539  struct maplock *pxdlock;
1540 
1541 /*
1542 printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n",
1543  (ulong)xoff, xlen, (ulong)xaddr);
1544 */
1545 
1546  /* there must exist extent to be tailgated */
1547  if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, XT_INSERT)))
1548  return rc;
1549 
1550  /* retrieve search result */
1551  XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
1552 
1553  if (cmp != 0) {
1554  XT_PUTPAGE(mp);
1555  jfs_error(ip->i_sb, "xtTailgate: couldn't find extent");
1556  return -EIO;
1557  }
1558 
1559  /* entry found must be last entry */
1560  nextindex = le16_to_cpu(p->header.nextindex);
1561  if (index != nextindex - 1) {
1562  XT_PUTPAGE(mp);
1563  jfs_error(ip->i_sb,
1564  "xtTailgate: the entry found is not the last entry");
1565  return -EIO;
1566  }
1567 
1568  BT_MARK_DIRTY(mp, ip);
1569  /*
1570  * acquire tlock of the leaf page containing original entry
1571  */
1572  if (!test_cflag(COMMIT_Nolink, ip)) {
1573  tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
1574  xtlck = (struct xtlock *) & tlck->lock;
1575  }
1576 
1577  /* completely replace extent ? */
1578  xad = &p->xad[index];
1579 /*
1580 printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n",
1581  (ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad));
1582 */
1583  if ((llen = xoff - offsetXAD(xad)) == 0)
1584  goto updateOld;
1585 
1586  /*
1587  * partially replace extent: insert entry for new extent
1588  */
1589 //insertNew:
1590  /*
1591  * if the leaf page is full, insert the new entry and
1592  * propagate up the router entry for the new page from split
1593  *
1594  * The xtSplitUp() will insert the entry and unpin the leaf page.
1595  */
1596  if (nextindex == le16_to_cpu(p->header.maxentry)) {
1597  /* xtSpliUp() unpins leaf pages */
1598  split.mp = mp;
1599  split.index = index + 1;
1600  split.flag = XAD_NEW;
1601  split.off = xoff; /* split offset */
1602  split.len = xlen;
1603  split.addr = xaddr;
1604  split.pxdlist = NULL;
1605  if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
1606  return rc;
1607 
1608  /* get back old page */
1609  XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1610  if (rc)
1611  return rc;
1612  /*
1613  * if leaf root has been split, original root has been
1614  * copied to new child page, i.e., original entry now
1615  * resides on the new child page;
1616  */
1617  if (p->header.flag & BT_INTERNAL) {
1618  ASSERT(p->header.nextindex ==
1619  cpu_to_le16(XTENTRYSTART + 1));
1620  xad = &p->xad[XTENTRYSTART];
1621  bn = addressXAD(xad);
1622  XT_PUTPAGE(mp);
1623 
1624  /* get new child page */
1625  XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1626  if (rc)
1627  return rc;
1628 
1629  BT_MARK_DIRTY(mp, ip);
1630  if (!test_cflag(COMMIT_Nolink, ip)) {
1631  tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
1632  xtlck = (struct xtlock *) & tlck->lock;
1633  }
1634  }
1635  }
1636  /*
1637  * insert the new entry into the leaf page
1638  */
1639  else {
1640  /* insert the new entry: mark the entry NEW */
1641  xad = &p->xad[index + 1];
1642  XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr);
1643 
1644  /* advance next available entry index */
1645  le16_add_cpu(&p->header.nextindex, 1);
1646  }
1647 
1648  /* get back old XAD */
1649  xad = &p->xad[index];
1650 
1651  /*
1652  * truncate/relocate old extent at split offset
1653  */
1654  updateOld:
1655  /* update dmap for old/committed/truncated extent */
1656  rlen = lengthXAD(xad) - llen;
1657  if (!(xad->flag & XAD_NEW)) {
1658  /* free from PWMAP at commit */
1659  if (!test_cflag(COMMIT_Nolink, ip)) {
1660  mtlck = txMaplock(tid, ip, tlckMAP);
1661  pxdlock = (struct maplock *) & mtlck->lock;
1662  pxdlock->flag = mlckFREEPXD;
1663  PXDaddress(&pxdlock->pxd, addressXAD(xad) + llen);
1664  PXDlength(&pxdlock->pxd, rlen);
1665  pxdlock->index = 1;
1666  }
1667  } else
1668  /* free from WMAP */
1669  dbFree(ip, addressXAD(xad) + llen, (s64) rlen);
1670 
1671  if (llen)
1672  /* truncate */
1673  XADlength(xad, llen);
1674  else
1675  /* replace */
1676  XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr);
1677 
1678  if (!test_cflag(COMMIT_Nolink, ip)) {
1679  xtlck->lwm.offset = (xtlck->lwm.offset) ?
1680  min(index, (int)xtlck->lwm.offset) : index;
1681  xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
1682  xtlck->lwm.offset;
1683  }
1684 
1685  /* unpin the leaf page */
1686  XT_PUTPAGE(mp);
1687 
1688  return rc;
1689 }
1690 #endif /* _NOTYET */
1691 
1692 /*
1693  * xtUpdate()
1694  *
1695  * function: update XAD;
1696  *
1697  * update extent for allocated_but_not_recorded or
1698  * compressed extent;
1699  *
1700  * parameter:
1701  * nxad - new XAD;
1702  * logical extent of the specified XAD must be completely
1703  * contained by an existing XAD;
1704  */
1705 int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad)
1706 { /* new XAD */
1707  int rc = 0;
1708  int cmp;
1709  struct metapage *mp; /* meta-page buffer */
1710  xtpage_t *p; /* base B+-tree index page */
1711  s64 bn;
1712  int index0, index, newindex, nextindex;
1713  struct btstack btstack; /* traverse stack */
1714  struct xtsplit split; /* split information */
1715  xad_t *xad, *lxad, *rxad;
1716  int xflag;
1717  s64 nxoff, xoff;
1718  int nxlen, xlen, lxlen, rxlen;
1719  s64 nxaddr, xaddr;
1720  struct tlock *tlck;
1721  struct xtlock *xtlck = NULL;
1722  int newpage = 0;
1723 
1724  /* there must exist extent to be tailgated */
1725  nxoff = offsetXAD(nxad);
1726  nxlen = lengthXAD(nxad);
1727  nxaddr = addressXAD(nxad);
1728 
1729  if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT)))
1730  return rc;
1731 
1732  /* retrieve search result */
1733  XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0);
1734 
1735  if (cmp != 0) {
1736  XT_PUTPAGE(mp);
1737  jfs_error(ip->i_sb, "xtUpdate: Could not find extent");
1738  return -EIO;
1739  }
1740 
1741  BT_MARK_DIRTY(mp, ip);
1742  /*
1743  * acquire tlock of the leaf page containing original entry
1744  */
1745  if (!test_cflag(COMMIT_Nolink, ip)) {
1746  tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
1747  xtlck = (struct xtlock *) & tlck->lock;
1748  }
1749 
1750  xad = &p->xad[index0];
1751  xflag = xad->flag;
1752  xoff = offsetXAD(xad);
1753  xlen = lengthXAD(xad);
1754  xaddr = addressXAD(xad);
1755 
1756  /* nXAD must be completely contained within XAD */
1757  if ((xoff > nxoff) ||
1758  (nxoff + nxlen > xoff + xlen)) {
1759  XT_PUTPAGE(mp);
1760  jfs_error(ip->i_sb,
1761  "xtUpdate: nXAD in not completely contained within XAD");
1762  return -EIO;
1763  }
1764 
1765  index = index0;
1766  newindex = index + 1;
1767  nextindex = le16_to_cpu(p->header.nextindex);
1768 
1769 #ifdef _JFS_WIP_NOCOALESCE
1770  if (xoff < nxoff)
1771  goto updateRight;
1772 
1773  /*
1774  * replace XAD with nXAD
1775  */
1776  replace: /* (nxoff == xoff) */
1777  if (nxlen == xlen) {
1778  /* replace XAD with nXAD:recorded */
1779  *xad = *nxad;
1780  xad->flag = xflag & ~XAD_NOTRECORDED;
1781 
1782  goto out;
1783  } else /* (nxlen < xlen) */
1784  goto updateLeft;
1785 #endif /* _JFS_WIP_NOCOALESCE */
1786 
1787 /* #ifdef _JFS_WIP_COALESCE */
1788  if (xoff < nxoff)
1789  goto coalesceRight;
1790 
1791  /*
1792  * coalesce with left XAD
1793  */
1794 //coalesceLeft: /* (xoff == nxoff) */
1795  /* is XAD first entry of page ? */
1796  if (index == XTENTRYSTART)
1797  goto replace;
1798 
1799  /* is nXAD logically and physically contiguous with lXAD ? */
1800  lxad = &p->xad[index - 1];
1801  lxlen = lengthXAD(lxad);
1802  if (!(lxad->flag & XAD_NOTRECORDED) &&
1803  (nxoff == offsetXAD(lxad) + lxlen) &&
1804  (nxaddr == addressXAD(lxad) + lxlen) &&
1805  (lxlen + nxlen < MAXXLEN)) {
1806  /* extend right lXAD */
1807  index0 = index - 1;
1808  XADlength(lxad, lxlen + nxlen);
1809 
1810  /* If we just merged two extents together, need to make sure the
1811  * right extent gets logged. If the left one is marked XAD_NEW,
1812  * then we know it will be logged. Otherwise, mark as
1813  * XAD_EXTENDED
1814  */
1815  if (!(lxad->flag & XAD_NEW))
1816  lxad->flag |= XAD_EXTENDED;
1817 
1818  if (xlen > nxlen) {
1819  /* truncate XAD */
1820  XADoffset(xad, xoff + nxlen);
1821  XADlength(xad, xlen - nxlen);
1822  XADaddress(xad, xaddr + nxlen);
1823  goto out;
1824  } else { /* (xlen == nxlen) */
1825 
1826  /* remove XAD */
1827  if (index < nextindex - 1)
1828  memmove(&p->xad[index], &p->xad[index + 1],
1829  (nextindex - index -
1830  1) << L2XTSLOTSIZE);
1831 
1832  p->header.nextindex =
1833  cpu_to_le16(le16_to_cpu(p->header.nextindex) -
1834  1);
1835 
1836  index = index0;
1837  newindex = index + 1;
1838  nextindex = le16_to_cpu(p->header.nextindex);
1839  xoff = nxoff = offsetXAD(lxad);
1840  xlen = nxlen = lxlen + nxlen;
1841  xaddr = nxaddr = addressXAD(lxad);
1842  goto coalesceRight;
1843  }
1844  }
1845 
1846  /*
1847  * replace XAD with nXAD
1848  */
1849  replace: /* (nxoff == xoff) */
1850  if (nxlen == xlen) {
1851  /* replace XAD with nXAD:recorded */
1852  *xad = *nxad;
1853  xad->flag = xflag & ~XAD_NOTRECORDED;
1854 
1855  goto coalesceRight;
1856  } else /* (nxlen < xlen) */
1857  goto updateLeft;
1858 
1859  /*
1860  * coalesce with right XAD
1861  */
1862  coalesceRight: /* (xoff <= nxoff) */
1863  /* is XAD last entry of page ? */
1864  if (newindex == nextindex) {
1865  if (xoff == nxoff)
1866  goto out;
1867  goto updateRight;
1868  }
1869 
1870  /* is nXAD logically and physically contiguous with rXAD ? */
1871  rxad = &p->xad[index + 1];
1872  rxlen = lengthXAD(rxad);
1873  if (!(rxad->flag & XAD_NOTRECORDED) &&
1874  (nxoff + nxlen == offsetXAD(rxad)) &&
1875  (nxaddr + nxlen == addressXAD(rxad)) &&
1876  (rxlen + nxlen < MAXXLEN)) {
1877  /* extend left rXAD */
1878  XADoffset(rxad, nxoff);
1879  XADlength(rxad, rxlen + nxlen);
1880  XADaddress(rxad, nxaddr);
1881 
1882  /* If we just merged two extents together, need to make sure
1883  * the left extent gets logged. If the right one is marked
1884  * XAD_NEW, then we know it will be logged. Otherwise, mark as
1885  * XAD_EXTENDED
1886  */
1887  if (!(rxad->flag & XAD_NEW))
1888  rxad->flag |= XAD_EXTENDED;
1889 
1890  if (xlen > nxlen)
1891  /* truncate XAD */
1892  XADlength(xad, xlen - nxlen);
1893  else { /* (xlen == nxlen) */
1894 
1895  /* remove XAD */
1896  memmove(&p->xad[index], &p->xad[index + 1],
1897  (nextindex - index - 1) << L2XTSLOTSIZE);
1898 
1899  p->header.nextindex =
1900  cpu_to_le16(le16_to_cpu(p->header.nextindex) -
1901  1);
1902  }
1903 
1904  goto out;
1905  } else if (xoff == nxoff)
1906  goto out;
1907 
1908  if (xoff >= nxoff) {
1909  XT_PUTPAGE(mp);
1910  jfs_error(ip->i_sb, "xtUpdate: xoff >= nxoff");
1911  return -EIO;
1912  }
1913 /* #endif _JFS_WIP_COALESCE */
1914 
1915  /*
1916  * split XAD into (lXAD, nXAD):
1917  *
1918  * |---nXAD--->
1919  * --|----------XAD----------|--
1920  * |-lXAD-|
1921  */
1922  updateRight: /* (xoff < nxoff) */
1923  /* truncate old XAD as lXAD:not_recorded */
1924  xad = &p->xad[index];
1925  XADlength(xad, nxoff - xoff);
1926 
1927  /* insert nXAD:recorded */
1928  if (nextindex == le16_to_cpu(p->header.maxentry)) {
1929 
1930  /* xtSpliUp() unpins leaf pages */
1931  split.mp = mp;
1932  split.index = newindex;
1933  split.flag = xflag & ~XAD_NOTRECORDED;
1934  split.off = nxoff;
1935  split.len = nxlen;
1936  split.addr = nxaddr;
1937  split.pxdlist = NULL;
1938  if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
1939  return rc;
1940 
1941  /* get back old page */
1942  XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1943  if (rc)
1944  return rc;
1945  /*
1946  * if leaf root has been split, original root has been
1947  * copied to new child page, i.e., original entry now
1948  * resides on the new child page;
1949  */
1950  if (p->header.flag & BT_INTERNAL) {
1951  ASSERT(p->header.nextindex ==
1952  cpu_to_le16(XTENTRYSTART + 1));
1953  xad = &p->xad[XTENTRYSTART];
1954  bn = addressXAD(xad);
1955  XT_PUTPAGE(mp);
1956 
1957  /* get new child page */
1958  XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1959  if (rc)
1960  return rc;
1961 
1962  BT_MARK_DIRTY(mp, ip);
1963  if (!test_cflag(COMMIT_Nolink, ip)) {
1964  tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
1965  xtlck = (struct xtlock *) & tlck->lock;
1966  }
1967  } else {
1968  /* is nXAD on new page ? */
1969  if (newindex >
1970  (le16_to_cpu(p->header.maxentry) >> 1)) {
1971  newindex =
1972  newindex -
1973  le16_to_cpu(p->header.nextindex) +
1974  XTENTRYSTART;
1975  newpage = 1;
1976  }
1977  }
1978  } else {
1979  /* if insert into middle, shift right remaining entries */
1980  if (newindex < nextindex)
1981  memmove(&p->xad[newindex + 1], &p->xad[newindex],
1982  (nextindex - newindex) << L2XTSLOTSIZE);
1983 
1984  /* insert the entry */
1985  xad = &p->xad[newindex];
1986  *xad = *nxad;
1987  xad->flag = xflag & ~XAD_NOTRECORDED;
1988 
1989  /* advance next available entry index. */
1990  p->header.nextindex =
1991  cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
1992  }
1993 
1994  /*
1995  * does nXAD force 3-way split ?
1996  *
1997  * |---nXAD--->|
1998  * --|----------XAD-------------|--
1999  * |-lXAD-| |-rXAD -|
2000  */
2001  if (nxoff + nxlen == xoff + xlen)
2002  goto out;
2003 
2004  /* reorient nXAD as XAD for further split XAD into (nXAD, rXAD) */
2005  if (newpage) {
2006  /* close out old page */
2007  if (!test_cflag(COMMIT_Nolink, ip)) {
2008  xtlck->lwm.offset = (xtlck->lwm.offset) ?
2009  min(index0, (int)xtlck->lwm.offset) : index0;
2010  xtlck->lwm.length =
2011  le16_to_cpu(p->header.nextindex) -
2012  xtlck->lwm.offset;
2013  }
2014 
2015  bn = le64_to_cpu(p->header.next);
2016  XT_PUTPAGE(mp);
2017 
2018  /* get new right page */
2019  XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2020  if (rc)
2021  return rc;
2022 
2023  BT_MARK_DIRTY(mp, ip);
2024  if (!test_cflag(COMMIT_Nolink, ip)) {
2025  tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
2026  xtlck = (struct xtlock *) & tlck->lock;
2027  }
2028 
2029  index0 = index = newindex;
2030  } else
2031  index++;
2032 
2033  newindex = index + 1;
2034  nextindex = le16_to_cpu(p->header.nextindex);
2035  xlen = xlen - (nxoff - xoff);
2036  xoff = nxoff;
2037  xaddr = nxaddr;
2038 
2039  /* recompute split pages */
2040  if (nextindex == le16_to_cpu(p->header.maxentry)) {
2041  XT_PUTPAGE(mp);
2042 
2043  if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT)))
2044  return rc;
2045 
2046  /* retrieve search result */
2047  XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0);
2048 
2049  if (cmp != 0) {
2050  XT_PUTPAGE(mp);
2051  jfs_error(ip->i_sb, "xtUpdate: xtSearch failed");
2052  return -EIO;
2053  }
2054 
2055  if (index0 != index) {
2056  XT_PUTPAGE(mp);
2057  jfs_error(ip->i_sb,
2058  "xtUpdate: unexpected value of index");
2059  return -EIO;
2060  }
2061  }
2062 
2063  /*
2064  * split XAD into (nXAD, rXAD)
2065  *
2066  * ---nXAD---|
2067  * --|----------XAD----------|--
2068  * |-rXAD-|
2069  */
2070  updateLeft: /* (nxoff == xoff) && (nxlen < xlen) */
2071  /* update old XAD with nXAD:recorded */
2072  xad = &p->xad[index];
2073  *xad = *nxad;
2074  xad->flag = xflag & ~XAD_NOTRECORDED;
2075 
2076  /* insert rXAD:not_recorded */
2077  xoff = xoff + nxlen;
2078  xlen = xlen - nxlen;
2079  xaddr = xaddr + nxlen;
2080  if (nextindex == le16_to_cpu(p->header.maxentry)) {
2081 /*
2082 printf("xtUpdate.updateLeft.split p:0x%p\n", p);
2083 */
2084  /* xtSpliUp() unpins leaf pages */
2085  split.mp = mp;
2086  split.index = newindex;
2087  split.flag = xflag;
2088  split.off = xoff;
2089  split.len = xlen;
2090  split.addr = xaddr;
2091  split.pxdlist = NULL;
2092  if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
2093  return rc;
2094 
2095  /* get back old page */
2096  XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2097  if (rc)
2098  return rc;
2099 
2100  /*
2101  * if leaf root has been split, original root has been
2102  * copied to new child page, i.e., original entry now
2103  * resides on the new child page;
2104  */
2105  if (p->header.flag & BT_INTERNAL) {
2106  ASSERT(p->header.nextindex ==
2107  cpu_to_le16(XTENTRYSTART + 1));
2108  xad = &p->xad[XTENTRYSTART];
2109  bn = addressXAD(xad);
2110  XT_PUTPAGE(mp);
2111 
2112  /* get new child page */
2113  XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2114  if (rc)
2115  return rc;
2116 
2117  BT_MARK_DIRTY(mp, ip);
2118  if (!test_cflag(COMMIT_Nolink, ip)) {
2119  tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
2120  xtlck = (struct xtlock *) & tlck->lock;
2121  }
2122  }
2123  } else {
2124  /* if insert into middle, shift right remaining entries */
2125  if (newindex < nextindex)
2126  memmove(&p->xad[newindex + 1], &p->xad[newindex],
2127  (nextindex - newindex) << L2XTSLOTSIZE);
2128 
2129  /* insert the entry */
2130  xad = &p->xad[newindex];
2131  XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
2132 
2133  /* advance next available entry index. */
2134  p->header.nextindex =
2135  cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
2136  }
2137 
2138  out:
2139  if (!test_cflag(COMMIT_Nolink, ip)) {
2140  xtlck->lwm.offset = (xtlck->lwm.offset) ?
2141  min(index0, (int)xtlck->lwm.offset) : index0;
2142  xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
2143  xtlck->lwm.offset;
2144  }
2145 
2146  /* unpin the leaf page */
2147  XT_PUTPAGE(mp);
2148 
2149  return rc;
2150 }
2151 
2152 
2153 /*
2154  * xtAppend()
2155  *
2156  * function: grow in append mode from contiguous region specified ;
2157  *
2158  * parameter:
2159  * tid - transaction id;
2160  * ip - file object;
2161  * xflag - extent flag:
2162  * xoff - extent offset;
2163  * maxblocks - max extent length;
2164  * xlen - extent length (in/out);
2165  * xaddrp - extent address pointer (in/out):
2166  * flag -
2167  *
2168  * return:
2169  */
2170 int xtAppend(tid_t tid, /* transaction id */
2171  struct inode *ip, int xflag, s64 xoff, s32 maxblocks,
2172  s32 * xlenp, /* (in/out) */
2173  s64 * xaddrp, /* (in/out) */
2174  int flag)
2175 {
2176  int rc = 0;
2177  struct metapage *mp; /* meta-page buffer */
2178  xtpage_t *p; /* base B+-tree index page */
2179  s64 bn, xaddr;
2180  int index, nextindex;
2181  struct btstack btstack; /* traverse stack */
2182  struct xtsplit split; /* split information */
2183  xad_t *xad;
2184  int cmp;
2185  struct tlock *tlck;
2186  struct xtlock *xtlck;
2187  int nsplit, nblocks, xlen;
2188  struct pxdlist pxdlist;
2189  pxd_t *pxd;
2190  s64 next;
2191 
2192  xaddr = *xaddrp;
2193  xlen = *xlenp;
2194  jfs_info("xtAppend: xoff:0x%lx maxblocks:%d xlen:%d xaddr:0x%lx",
2195  (ulong) xoff, maxblocks, xlen, (ulong) xaddr);
2196 
2197  /*
2198  * search for the entry location at which to insert:
2199  *
2200  * xtFastSearch() and xtSearch() both returns (leaf page
2201  * pinned, index at which to insert).
2202  * n.b. xtSearch() may return index of maxentry of
2203  * the full page.
2204  */
2205  if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT)))
2206  return rc;
2207 
2208  /* retrieve search result */
2209  XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
2210 
2211  if (cmp == 0) {
2212  rc = -EEXIST;
2213  goto out;
2214  }
2215 
2216  if (next)
2217  xlen = min(xlen, (int)(next - xoff));
2218 //insert:
2219  /*
2220  * insert entry for new extent
2221  */
2222  xflag |= XAD_NEW;
2223 
2224  /*
2225  * if the leaf page is full, split the page and
2226  * propagate up the router entry for the new page from split
2227  *
2228  * The xtSplitUp() will insert the entry and unpin the leaf page.
2229  */
2230  nextindex = le16_to_cpu(p->header.nextindex);
2231  if (nextindex < le16_to_cpu(p->header.maxentry))
2232  goto insertLeaf;
2233 
2234  /*
2235  * allocate new index blocks to cover index page split(s)
2236  */
2237  nsplit = btstack.nsplit;
2238  split.pxdlist = &pxdlist;
2239  pxdlist.maxnpxd = pxdlist.npxd = 0;
2240  pxd = &pxdlist.pxd[0];
2241  nblocks = JFS_SBI(ip->i_sb)->nbperpage;
2242  for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) {
2243  if ((rc = dbAllocBottomUp(ip, xaddr, (s64) nblocks)) == 0) {
2244  PXDaddress(pxd, xaddr);
2245  PXDlength(pxd, nblocks);
2246 
2247  pxdlist.maxnpxd++;
2248 
2249  continue;
2250  }
2251 
2252  /* undo allocation */
2253 
2254  goto out;
2255  }
2256 
2257  xlen = min(xlen, maxblocks);
2258 
2259  /*
2260  * allocate data extent requested
2261  */
2262  if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen)))
2263  goto out;
2264 
2265  split.mp = mp;
2266  split.index = index;
2267  split.flag = xflag;
2268  split.off = xoff;
2269  split.len = xlen;
2270  split.addr = xaddr;
2271  if ((rc = xtSplitUp(tid, ip, &split, &btstack))) {
2272  /* undo data extent allocation */
2273  dbFree(ip, *xaddrp, (s64) * xlenp);
2274 
2275  return rc;
2276  }
2277 
2278  *xaddrp = xaddr;
2279  *xlenp = xlen;
2280  return 0;
2281 
2282  /*
2283  * insert the new entry into the leaf page
2284  */
2285  insertLeaf:
2286  /*
2287  * allocate data extent requested
2288  */
2289  if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen)))
2290  goto out;
2291 
2292  BT_MARK_DIRTY(mp, ip);
2293  /*
2294  * acquire a transaction lock on the leaf page;
2295  *
2296  * action: xad insertion/extension;
2297  */
2298  tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
2299  xtlck = (struct xtlock *) & tlck->lock;
2300 
2301  /* insert the new entry: mark the entry NEW */
2302  xad = &p->xad[index];
2303  XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
2304 
2305  /* advance next available entry index */
2306  le16_add_cpu(&p->header.nextindex, 1);
2307 
2308  xtlck->lwm.offset =
2309  (xtlck->lwm.offset) ? min(index,(int) xtlck->lwm.offset) : index;
2310  xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
2311  xtlck->lwm.offset;
2312 
2313  *xaddrp = xaddr;
2314  *xlenp = xlen;
2315 
2316  out:
2317  /* unpin the leaf page */
2318  XT_PUTPAGE(mp);
2319 
2320  return rc;
2321 }
2322 #ifdef _STILL_TO_PORT
2323 
2324 /* - TBD for defragmentaion/reorganization -
2325  *
2326  * xtDelete()
2327  *
2328  * function:
2329  * delete the entry with the specified key.
2330  *
2331  * N.B.: whole extent of the entry is assumed to be deleted.
2332  *
2333  * parameter:
2334  *
2335  * return:
2336  * ENOENT: if the entry is not found.
2337  *
2338  * exception:
2339  */
2340 int xtDelete(tid_t tid, struct inode *ip, s64 xoff, s32 xlen, int flag)
2341 {
2342  int rc = 0;
2343  struct btstack btstack;
2344  int cmp;
2345  s64 bn;
2346  struct metapage *mp;
2347  xtpage_t *p;
2348  int index, nextindex;
2349  struct tlock *tlck;
2350  struct xtlock *xtlck;
2351 
2352  /*
2353  * find the matching entry; xtSearch() pins the page
2354  */
2355  if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0)))
2356  return rc;
2357 
2358  XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
2359  if (cmp) {
2360  /* unpin the leaf page */
2361  XT_PUTPAGE(mp);
2362  return -ENOENT;
2363  }
2364 
2365  /*
2366  * delete the entry from the leaf page
2367  */
2368  nextindex = le16_to_cpu(p->header.nextindex);
2369  le16_add_cpu(&p->header.nextindex, -1);
2370 
2371  /*
2372  * if the leaf page bocome empty, free the page
2373  */
2374  if (p->header.nextindex == cpu_to_le16(XTENTRYSTART))
2375  return (xtDeleteUp(tid, ip, mp, p, &btstack));
2376 
2377  BT_MARK_DIRTY(mp, ip);
2378  /*
2379  * acquire a transaction lock on the leaf page;
2380  *
2381  * action:xad deletion;
2382  */
2383  tlck = txLock(tid, ip, mp, tlckXTREE);
2384  xtlck = (struct xtlock *) & tlck->lock;
2385  xtlck->lwm.offset =
2386  (xtlck->lwm.offset) ? min(index, xtlck->lwm.offset) : index;
2387 
2388  /* if delete from middle, shift left/compact the remaining entries */
2389  if (index < nextindex - 1)
2390  memmove(&p->xad[index], &p->xad[index + 1],
2391  (nextindex - index - 1) * sizeof(xad_t));
2392 
2393  XT_PUTPAGE(mp);
2394 
2395  return 0;
2396 }
2397 
2398 
2399 /* - TBD for defragmentaion/reorganization -
2400  *
2401  * xtDeleteUp()
2402  *
2403  * function:
2404  * free empty pages as propagating deletion up the tree
2405  *
2406  * parameter:
2407  *
2408  * return:
2409  */
2410 static int
2411 xtDeleteUp(tid_t tid, struct inode *ip,
2412  struct metapage * fmp, xtpage_t * fp, struct btstack * btstack)
2413 {
2414  int rc = 0;
2415  struct metapage *mp;
2416  xtpage_t *p;
2417  int index, nextindex;
2418  s64 xaddr;
2419  int xlen;
2420  struct btframe *parent;
2421  struct tlock *tlck;
2422  struct xtlock *xtlck;
2423 
2424  /*
2425  * keep root leaf page which has become empty
2426  */
2427  if (fp->header.flag & BT_ROOT) {
2428  /* keep the root page */
2429  fp->header.flag &= ~BT_INTERNAL;
2430  fp->header.flag |= BT_LEAF;
2431  fp->header.nextindex = cpu_to_le16(XTENTRYSTART);
2432 
2433  /* XT_PUTPAGE(fmp); */
2434 
2435  return 0;
2436  }
2437 
2438  /*
2439  * free non-root leaf page
2440  */
2441  if ((rc = xtRelink(tid, ip, fp))) {
2442  XT_PUTPAGE(fmp);
2443  return rc;
2444  }
2445 
2446  xaddr = addressPXD(&fp->header.self);
2447  xlen = lengthPXD(&fp->header.self);
2448  /* free the page extent */
2449  dbFree(ip, xaddr, (s64) xlen);
2450 
2451  /* free the buffer page */
2452  discard_metapage(fmp);
2453 
2454  /*
2455  * propagate page deletion up the index tree
2456  *
2457  * If the delete from the parent page makes it empty,
2458  * continue all the way up the tree.
2459  * stop if the root page is reached (which is never deleted) or
2460  * if the entry deletion does not empty the page.
2461  */
2462  while ((parent = BT_POP(btstack)) != NULL) {
2463  /* get/pin the parent page <sp> */
2464  XT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc);
2465  if (rc)
2466  return rc;
2467 
2468  index = parent->index;
2469 
2470  /* delete the entry for the freed child page from parent.
2471  */
2472  nextindex = le16_to_cpu(p->header.nextindex);
2473 
2474  /*
2475  * the parent has the single entry being deleted:
2476  * free the parent page which has become empty.
2477  */
2478  if (nextindex == 1) {
2479  if (p->header.flag & BT_ROOT) {
2480  /* keep the root page */
2481  p->header.flag &= ~BT_INTERNAL;
2482  p->header.flag |= BT_LEAF;
2483  p->header.nextindex =
2485 
2486  /* XT_PUTPAGE(mp); */
2487 
2488  break;
2489  } else {
2490  /* free the parent page */
2491  if ((rc = xtRelink(tid, ip, p)))
2492  return rc;
2493 
2494  xaddr = addressPXD(&p->header.self);
2495  /* free the page extent */
2496  dbFree(ip, xaddr,
2497  (s64) JFS_SBI(ip->i_sb)->nbperpage);
2498 
2499  /* unpin/free the buffer page */
2500  discard_metapage(mp);
2501 
2502  /* propagate up */
2503  continue;
2504  }
2505  }
2506  /*
2507  * the parent has other entries remaining:
2508  * delete the router entry from the parent page.
2509  */
2510  else {
2511  BT_MARK_DIRTY(mp, ip);
2512  /*
2513  * acquire a transaction lock on the leaf page;
2514  *
2515  * action:xad deletion;
2516  */
2517  tlck = txLock(tid, ip, mp, tlckXTREE);
2518  xtlck = (struct xtlock *) & tlck->lock;
2519  xtlck->lwm.offset =
2520  (xtlck->lwm.offset) ? min(index,
2521  xtlck->lwm.
2522  offset) : index;
2523 
2524  /* if delete from middle,
2525  * shift left/compact the remaining entries in the page
2526  */
2527  if (index < nextindex - 1)
2528  memmove(&p->xad[index], &p->xad[index + 1],
2529  (nextindex - index -
2530  1) << L2XTSLOTSIZE);
2531 
2532  le16_add_cpu(&p->header.nextindex, -1);
2533  jfs_info("xtDeleteUp(entry): 0x%lx[%d]",
2534  (ulong) parent->bn, index);
2535  }
2536 
2537  /* unpin the parent page */
2538  XT_PUTPAGE(mp);
2539 
2540  /* exit propagation up */
2541  break;
2542  }
2543 
2544  return 0;
2545 }
2546 
2547 
2548 /*
2549  * NAME: xtRelocate()
2550  *
2551  * FUNCTION: relocate xtpage or data extent of regular file;
2552  * This function is mainly used by defragfs utility.
2553  *
2554  * NOTE: This routine does not have the logic to handle
2555  * uncommitted allocated extent. The caller should call
2556  * txCommit() to commit all the allocation before call
2557  * this routine.
2558  */
2559 int
2560 xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */
2561  s64 nxaddr, /* new xaddr */
2562  int xtype)
2563 { /* extent type: XTPAGE or DATAEXT */
2564  int rc = 0;
2565  struct tblock *tblk;
2566  struct tlock *tlck;
2567  struct xtlock *xtlck;
2568  struct metapage *mp, *pmp, *lmp, *rmp; /* meta-page buffer */
2569  xtpage_t *p, *pp, *rp, *lp; /* base B+-tree index page */
2570  xad_t *xad;
2571  pxd_t *pxd;
2572  s64 xoff, xsize;
2573  int xlen;
2574  s64 oxaddr, sxaddr, dxaddr, nextbn, prevbn;
2575  cbuf_t *cp;
2576  s64 offset, nbytes, nbrd, pno;
2577  int nb, npages, nblks;
2578  s64 bn;
2579  int cmp;
2580  int index;
2581  struct pxd_lock *pxdlock;
2582  struct btstack btstack; /* traverse stack */
2583 
2584  xtype = xtype & EXTENT_TYPE;
2585 
2586  xoff = offsetXAD(oxad);
2587  oxaddr = addressXAD(oxad);
2588  xlen = lengthXAD(oxad);
2589 
2590  /* validate extent offset */
2591  offset = xoff << JFS_SBI(ip->i_sb)->l2bsize;
2592  if (offset >= ip->i_size)
2593  return -ESTALE; /* stale extent */
2594 
2595  jfs_info("xtRelocate: xtype:%d xoff:0x%lx xlen:0x%x xaddr:0x%lx:0x%lx",
2596  xtype, (ulong) xoff, xlen, (ulong) oxaddr, (ulong) nxaddr);
2597 
2598  /*
2599  * 1. get and validate the parent xtpage/xad entry
2600  * covering the source extent to be relocated;
2601  */
2602  if (xtype == DATAEXT) {
2603  /* search in leaf entry */
2604  rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0);
2605  if (rc)
2606  return rc;
2607 
2608  /* retrieve search result */
2609  XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
2610 
2611  if (cmp) {
2612  XT_PUTPAGE(pmp);
2613  return -ESTALE;
2614  }
2615 
2616  /* validate for exact match with a single entry */
2617  xad = &pp->xad[index];
2618  if (addressXAD(xad) != oxaddr || lengthXAD(xad) != xlen) {
2619  XT_PUTPAGE(pmp);
2620  return -ESTALE;
2621  }
2622  } else { /* (xtype == XTPAGE) */
2623 
2624  /* search in internal entry */
2625  rc = xtSearchNode(ip, oxad, &cmp, &btstack, 0);
2626  if (rc)
2627  return rc;
2628 
2629  /* retrieve search result */
2630  XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
2631 
2632  if (cmp) {
2633  XT_PUTPAGE(pmp);
2634  return -ESTALE;
2635  }
2636 
2637  /* xtSearchNode() validated for exact match with a single entry
2638  */
2639  xad = &pp->xad[index];
2640  }
2641  jfs_info("xtRelocate: parent xad entry validated.");
2642 
2643  /*
2644  * 2. relocate the extent
2645  */
2646  if (xtype == DATAEXT) {
2647  /* if the extent is allocated-but-not-recorded
2648  * there is no real data to be moved in this extent,
2649  */
2650  if (xad->flag & XAD_NOTRECORDED)
2651  goto out;
2652  else
2653  /* release xtpage for cmRead()/xtLookup() */
2654  XT_PUTPAGE(pmp);
2655 
2656  /*
2657  * cmRelocate()
2658  *
2659  * copy target data pages to be relocated;
2660  *
2661  * data extent must start at page boundary and
2662  * multiple of page size (except the last data extent);
2663  * read in each page of the source data extent into cbuf,
2664  * update the cbuf extent descriptor of the page to be
2665  * homeward bound to new dst data extent
2666  * copy the data from the old extent to new extent.
2667  * copy is essential for compressed files to avoid problems
2668  * that can arise if there was a change in compression
2669  * algorithms.
2670  * it is a good strategy because it may disrupt cache
2671  * policy to keep the pages in memory afterwards.
2672  */
2673  offset = xoff << JFS_SBI(ip->i_sb)->l2bsize;
2674  assert((offset & CM_OFFSET) == 0);
2675  nbytes = xlen << JFS_SBI(ip->i_sb)->l2bsize;
2676  pno = offset >> CM_L2BSIZE;
2677  npages = (nbytes + (CM_BSIZE - 1)) >> CM_L2BSIZE;
2678 /*
2679  npages = ((offset + nbytes - 1) >> CM_L2BSIZE) -
2680  (offset >> CM_L2BSIZE) + 1;
2681 */
2682  sxaddr = oxaddr;
2683  dxaddr = nxaddr;
2684 
2685  /* process the request one cache buffer at a time */
2686  for (nbrd = 0; nbrd < nbytes; nbrd += nb,
2687  offset += nb, pno++, npages--) {
2688  /* compute page size */
2689  nb = min(nbytes - nbrd, CM_BSIZE);
2690 
2691  /* get the cache buffer of the page */
2692  if (rc = cmRead(ip, offset, npages, &cp))
2693  break;
2694 
2695  assert(addressPXD(&cp->cm_pxd) == sxaddr);
2696  assert(!cp->cm_modified);
2697 
2698  /* bind buffer with the new extent address */
2699  nblks = nb >> JFS_IP(ip->i_sb)->l2bsize;
2700  cmSetXD(ip, cp, pno, dxaddr, nblks);
2701 
2702  /* release the cbuf, mark it as modified */
2703  cmPut(cp, true);
2704 
2705  dxaddr += nblks;
2706  sxaddr += nblks;
2707  }
2708 
2709  /* get back parent page */
2710  if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0)))
2711  return rc;
2712 
2713  XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
2714  jfs_info("xtRelocate: target data extent relocated.");
2715  } else { /* (xtype == XTPAGE) */
2716 
2717  /*
2718  * read in the target xtpage from the source extent;
2719  */
2720  XT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc);
2721  if (rc) {
2722  XT_PUTPAGE(pmp);
2723  return rc;
2724  }
2725 
2726  /*
2727  * read in sibling pages if any to update sibling pointers;
2728  */
2729  rmp = NULL;
2730  if (p->header.next) {
2731  nextbn = le64_to_cpu(p->header.next);
2732  XT_GETPAGE(ip, nextbn, rmp, PSIZE, rp, rc);
2733  if (rc) {
2734  XT_PUTPAGE(pmp);
2735  XT_PUTPAGE(mp);
2736  return (rc);
2737  }
2738  }
2739 
2740  lmp = NULL;
2741  if (p->header.prev) {
2742  prevbn = le64_to_cpu(p->header.prev);
2743  XT_GETPAGE(ip, prevbn, lmp, PSIZE, lp, rc);
2744  if (rc) {
2745  XT_PUTPAGE(pmp);
2746  XT_PUTPAGE(mp);
2747  if (rmp)
2748  XT_PUTPAGE(rmp);
2749  return (rc);
2750  }
2751  }
2752 
2753  /* at this point, all xtpages to be updated are in memory */
2754 
2755  /*
2756  * update sibling pointers of sibling xtpages if any;
2757  */
2758  if (lmp) {
2759  BT_MARK_DIRTY(lmp, ip);
2760  tlck = txLock(tid, ip, lmp, tlckXTREE | tlckRELINK);
2761  lp->header.next = cpu_to_le64(nxaddr);
2762  XT_PUTPAGE(lmp);
2763  }
2764 
2765  if (rmp) {
2766  BT_MARK_DIRTY(rmp, ip);
2767  tlck = txLock(tid, ip, rmp, tlckXTREE | tlckRELINK);
2768  rp->header.prev = cpu_to_le64(nxaddr);
2769  XT_PUTPAGE(rmp);
2770  }
2771 
2772  /*
2773  * update the target xtpage to be relocated
2774  *
2775  * update the self address of the target page
2776  * and write to destination extent;
2777  * redo image covers the whole xtpage since it is new page
2778  * to the destination extent;
2779  * update of bmap for the free of source extent
2780  * of the target xtpage itself:
2781  * update of bmap for the allocation of destination extent
2782  * of the target xtpage itself:
2783  * update of bmap for the extents covered by xad entries in
2784  * the target xtpage is not necessary since they are not
2785  * updated;
2786  * if not committed before this relocation,
2787  * target page may contain XAD_NEW entries which must
2788  * be scanned for bmap update (logredo() always
2789  * scan xtpage REDOPAGE image for bmap update);
2790  * if committed before this relocation (tlckRELOCATE),
2791  * scan may be skipped by commit() and logredo();
2792  */
2793  BT_MARK_DIRTY(mp, ip);
2794  /* tlckNEW init xtlck->lwm.offset = XTENTRYSTART; */
2795  tlck = txLock(tid, ip, mp, tlckXTREE | tlckNEW);
2796  xtlck = (struct xtlock *) & tlck->lock;
2797 
2798  /* update the self address in the xtpage header */
2799  pxd = &p->header.self;
2800  PXDaddress(pxd, nxaddr);
2801 
2802  /* linelock for the after image of the whole page */
2803  xtlck->lwm.length =
2804  le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
2805 
2806  /* update the buffer extent descriptor of target xtpage */
2807  xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize;
2808  bmSetXD(mp, nxaddr, xsize);
2809 
2810  /* unpin the target page to new homeward bound */
2811  XT_PUTPAGE(mp);
2812  jfs_info("xtRelocate: target xtpage relocated.");
2813  }
2814 
2815  /*
2816  * 3. acquire maplock for the source extent to be freed;
2817  *
2818  * acquire a maplock saving the src relocated extent address;
2819  * to free of the extent at commit time;
2820  */
2821  out:
2822  /* if DATAEXT relocation, write a LOG_UPDATEMAP record for
2823  * free PXD of the source data extent (logredo() will update
2824  * bmap for free of source data extent), and update bmap for
2825  * free of the source data extent;
2826  */
2827  if (xtype == DATAEXT)
2828  tlck = txMaplock(tid, ip, tlckMAP);
2829  /* if XTPAGE relocation, write a LOG_NOREDOPAGE record
2830  * for the source xtpage (logredo() will init NoRedoPage
2831  * filter and will also update bmap for free of the source
2832  * xtpage), and update bmap for free of the source xtpage;
2833  * N.B. We use tlckMAP instead of tlkcXTREE because there
2834  * is no buffer associated with this lock since the buffer
2835  * has been redirected to the target location.
2836  */
2837  else /* (xtype == XTPAGE) */
2838  tlck = txMaplock(tid, ip, tlckMAP | tlckRELOCATE);
2839 
2840  pxdlock = (struct pxd_lock *) & tlck->lock;
2841  pxdlock->flag = mlckFREEPXD;
2842  PXDaddress(&pxdlock->pxd, oxaddr);
2843  PXDlength(&pxdlock->pxd, xlen);
2844  pxdlock->index = 1;
2845 
2846  /*
2847  * 4. update the parent xad entry for relocation;
2848  *
2849  * acquire tlck for the parent entry with XAD_NEW as entry
2850  * update which will write LOG_REDOPAGE and update bmap for
2851  * allocation of XAD_NEW destination extent;
2852  */
2853  jfs_info("xtRelocate: update parent xad entry.");
2854  BT_MARK_DIRTY(pmp, ip);
2855  tlck = txLock(tid, ip, pmp, tlckXTREE | tlckGROW);
2856  xtlck = (struct xtlock *) & tlck->lock;
2857 
2858  /* update the XAD with the new destination extent; */
2859  xad = &pp->xad[index];
2860  xad->flag |= XAD_NEW;
2861  XADaddress(xad, nxaddr);
2862 
2863  xtlck->lwm.offset = min(index, xtlck->lwm.offset);
2864  xtlck->lwm.length = le16_to_cpu(pp->header.nextindex) -
2865  xtlck->lwm.offset;
2866 
2867  /* unpin the parent xtpage */
2868  XT_PUTPAGE(pmp);
2869 
2870  return rc;
2871 }
2872 
2873 
2874 /*
2875  * xtSearchNode()
2876  *
2877  * function: search for the internal xad entry covering specified extent.
2878  * This function is mainly used by defragfs utility.
2879  *
2880  * parameters:
2881  * ip - file object;
2882  * xad - extent to find;
2883  * cmpp - comparison result:
2884  * btstack - traverse stack;
2885  * flag - search process flag;
2886  *
2887  * returns:
2888  * btstack contains (bn, index) of search path traversed to the entry.
2889  * *cmpp is set to result of comparison with the entry returned.
2890  * the page containing the entry is pinned at exit.
2891  */
2892 static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */
2893  int *cmpp, struct btstack * btstack, int flag)
2894 {
2895  int rc = 0;
2896  s64 xoff, xaddr;
2897  int xlen;
2898  int cmp = 1; /* init for empty page */
2899  s64 bn; /* block number */
2900  struct metapage *mp; /* meta-page buffer */
2901  xtpage_t *p; /* page */
2902  int base, index, lim;
2903  struct btframe *btsp;
2904  s64 t64;
2905 
2906  BT_CLR(btstack);
2907 
2908  xoff = offsetXAD(xad);
2909  xlen = lengthXAD(xad);
2910  xaddr = addressXAD(xad);
2911 
2912  /*
2913  * search down tree from root:
2914  *
2915  * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
2916  * internal page, child page Pi contains entry with k, Ki <= K < Kj.
2917  *
2918  * if entry with search key K is not found
2919  * internal page search find the entry with largest key Ki
2920  * less than K which point to the child page to search;
2921  * leaf page search find the entry with smallest key Kj
2922  * greater than K so that the returned index is the position of
2923  * the entry to be shifted right for insertion of new entry.
2924  * for empty tree, search key is greater than any key of the tree.
2925  *
2926  * by convention, root bn = 0.
2927  */
2928  for (bn = 0;;) {
2929  /* get/pin the page to search */
2930  XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2931  if (rc)
2932  return rc;
2933  if (p->header.flag & BT_LEAF) {
2934  XT_PUTPAGE(mp);
2935  return -ESTALE;
2936  }
2937 
2938  lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
2939 
2940  /*
2941  * binary search with search key K on the current page
2942  */
2943  for (base = XTENTRYSTART; lim; lim >>= 1) {
2944  index = base + (lim >> 1);
2945 
2946  XT_CMP(cmp, xoff, &p->xad[index], t64);
2947  if (cmp == 0) {
2948  /*
2949  * search hit
2950  *
2951  * verify for exact match;
2952  */
2953  if (xaddr == addressXAD(&p->xad[index]) &&
2954  xoff == offsetXAD(&p->xad[index])) {
2955  *cmpp = cmp;
2956 
2957  /* save search result */
2958  btsp = btstack->top;
2959  btsp->bn = bn;
2960  btsp->index = index;
2961  btsp->mp = mp;
2962 
2963  return 0;
2964  }
2965 
2966  /* descend/search its child page */
2967  goto next;
2968  }
2969 
2970  if (cmp > 0) {
2971  base = index + 1;
2972  --lim;
2973  }
2974  }
2975 
2976  /*
2977  * search miss - non-leaf page:
2978  *
2979  * base is the smallest index with key (Kj) greater than
2980  * search key (K) and may be zero or maxentry index.
2981  * if base is non-zero, decrement base by one to get the parent
2982  * entry of the child page to search.
2983  */
2984  index = base ? base - 1 : base;
2985 
2986  /*
2987  * go down to child page
2988  */
2989  next:
2990  /* get the child page block number */
2991  bn = addressXAD(&p->xad[index]);
2992 
2993  /* unpin the parent page */
2994  XT_PUTPAGE(mp);
2995  }
2996 }
2997 
2998 
2999 /*
3000  * xtRelink()
3001  *
3002  * function:
3003  * link around a freed page.
3004  *
3005  * Parameter:
3006  * int tid,
3007  * struct inode *ip,
3008  * xtpage_t *p)
3009  *
3010  * returns:
3011  */
3012 static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * p)
3013 {
3014  int rc = 0;
3015  struct metapage *mp;
3016  s64 nextbn, prevbn;
3017  struct tlock *tlck;
3018 
3019  nextbn = le64_to_cpu(p->header.next);
3020  prevbn = le64_to_cpu(p->header.prev);
3021 
3022  /* update prev pointer of the next page */
3023  if (nextbn != 0) {
3024  XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
3025  if (rc)
3026  return rc;
3027 
3028  /*
3029  * acquire a transaction lock on the page;
3030  *
3031  * action: update prev pointer;
3032  */
3033  BT_MARK_DIRTY(mp, ip);
3034  tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK);
3035 
3036  /* the page may already have been tlock'd */
3037 
3038  p->header.prev = cpu_to_le64(prevbn);
3039 
3040  XT_PUTPAGE(mp);
3041  }
3042 
3043  /* update next pointer of the previous page */
3044  if (prevbn != 0) {
3045  XT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc);
3046  if (rc)
3047  return rc;
3048 
3049  /*
3050  * acquire a transaction lock on the page;
3051  *
3052  * action: update next pointer;
3053  */
3054  BT_MARK_DIRTY(mp, ip);
3055  tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK);
3056 
3057  /* the page may already have been tlock'd */
3058 
3059  p->header.next = le64_to_cpu(nextbn);
3060 
3061  XT_PUTPAGE(mp);
3062  }
3063 
3064  return 0;
3065 }
3066 #endif /* _STILL_TO_PORT */
3067 
3068 
3069 /*
3070  * xtInitRoot()
3071  *
3072  * initialize file root (inline in inode)
3073  */
3074 void xtInitRoot(tid_t tid, struct inode *ip)
3075 {
3076  xtpage_t *p;
3077 
3078  /*
3079  * acquire a transaction lock on the root
3080  *
3081  * action:
3082  */
3083  txLock(tid, ip, (struct metapage *) &JFS_IP(ip)->bxflag,
3084  tlckXTREE | tlckNEW);
3085  p = &JFS_IP(ip)->i_xtroot;
3086 
3087  p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF;
3088  p->header.nextindex = cpu_to_le16(XTENTRYSTART);
3089 
3090  if (S_ISDIR(ip->i_mode))
3091  p->header.maxentry = cpu_to_le16(XTROOTINITSLOT_DIR);
3092  else {
3093  p->header.maxentry = cpu_to_le16(XTROOTINITSLOT);
3094  ip->i_size = 0;
3095  }
3096 
3097 
3098  return;
3099 }
3100 
3101 
3102 /*
3103  * We can run into a deadlock truncating a file with a large number of
3104  * xtree pages (large fragmented file). A robust fix would entail a
3105  * reservation system where we would reserve a number of metadata pages
3106  * and tlocks which we would be guaranteed without a deadlock. Without
3107  * this, a partial fix is to limit number of metadata pages we will lock
3108  * in a single transaction. Currently we will truncate the file so that
3109  * no more than 50 leaf pages will be locked. The caller of xtTruncate
3110  * will be responsible for ensuring that the current transaction gets
3111  * committed, and that subsequent transactions are created to truncate
3112  * the file further if needed.
3113  */
3114 #define MAX_TRUNCATE_LEAVES 50
3115 
3116 /*
3117  * xtTruncate()
3118  *
3119  * function:
3120  * traverse for truncation logging backward bottom up;
3121  * terminate at the last extent entry at the current subtree
3122  * root page covering new down size.
3123  * truncation may occur within the last extent entry.
3124  *
3125  * parameter:
3126  * int tid,
3127  * struct inode *ip,
3128  * s64 newsize,
3129  * int type) {PWMAP, PMAP, WMAP; DELETE, TRUNCATE}
3130  *
3131  * return:
3132  *
3133  * note:
3134  * PWMAP:
3135  * 1. truncate (non-COMMIT_NOLINK file)
3136  * by jfs_truncate() or jfs_open(O_TRUNC):
3137  * xtree is updated;
3138  * 2. truncate index table of directory when last entry removed
3139  * map update via tlock at commit time;
3140  * PMAP:
3141  * Call xtTruncate_pmap instead
3142  * WMAP:
3143  * 1. remove (free zero link count) on last reference release
3144  * (pmap has been freed at commit zero link count);
3145  * 2. truncate (COMMIT_NOLINK file, i.e., tmp file):
3146  * xtree is updated;
3147  * map update directly at truncation time;
3148  *
3149  * if (DELETE)
3150  * no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient);
3151  * else if (TRUNCATE)
3152  * must write LOG_NOREDOPAGE for deleted index page;
3153  *
3154  * pages may already have been tlocked by anonymous transactions
3155  * during file growth (i.e., write) before truncation;
3156  *
3157  * except last truncated entry, deleted entries remains as is
3158  * in the page (nextindex is updated) for other use
3159  * (e.g., log/update allocation map): this avoid copying the page
3160  * info but delay free of pages;
3161  *
3162  */
3163 s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
3164 {
3165  int rc = 0;
3166  s64 teof;
3167  struct metapage *mp;
3168  xtpage_t *p;
3169  s64 bn;
3170  int index, nextindex;
3171  xad_t *xad;
3172  s64 xoff, xaddr;
3173  int xlen, len, freexlen;
3174  struct btstack btstack;
3175  struct btframe *parent;
3176  struct tblock *tblk = NULL;
3177  struct tlock *tlck = NULL;
3178  struct xtlock *xtlck = NULL;
3179  struct xdlistlock xadlock; /* maplock for COMMIT_WMAP */
3180  struct pxd_lock *pxdlock; /* maplock for COMMIT_WMAP */
3181  s64 nfreed;
3182  int freed, log;
3183  int locked_leaves = 0;
3184 
3185  /* save object truncation type */
3186  if (tid) {
3187  tblk = tid_to_tblock(tid);
3188  tblk->xflag |= flag;
3189  }
3190 
3191  nfreed = 0;
3192 
3193  flag &= COMMIT_MAP;
3194  assert(flag != COMMIT_PMAP);
3195 
3196  if (flag == COMMIT_PWMAP)
3197  log = 1;
3198  else {
3199  log = 0;
3200  xadlock.flag = mlckFREEXADLIST;
3201  xadlock.index = 1;
3202  }
3203 
3204  /*
3205  * if the newsize is not an integral number of pages,
3206  * the file between newsize and next page boundary will
3207  * be cleared.
3208  * if truncating into a file hole, it will cause
3209  * a full block to be allocated for the logical block.
3210  */
3211 
3212  /*
3213  * release page blocks of truncated region <teof, eof>
3214  *
3215  * free the data blocks from the leaf index blocks.
3216  * delete the parent index entries corresponding to
3217  * the freed child data/index blocks.
3218  * free the index blocks themselves which aren't needed
3219  * in new sized file.
3220  *
3221  * index blocks are updated only if the blocks are to be
3222  * retained in the new sized file.
3223  * if type is PMAP, the data and index pages are NOT
3224  * freed, and the data and index blocks are NOT freed
3225  * from working map.
3226  * (this will allow continued access of data/index of
3227  * temporary file (zerolink count file truncated to zero-length)).
3228  */
3229  teof = (newsize + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
3230  JFS_SBI(ip->i_sb)->l2bsize;
3231 
3232  /* clear stack */
3233  BT_CLR(&btstack);
3234 
3235  /*
3236  * start with root
3237  *
3238  * root resides in the inode
3239  */
3240  bn = 0;
3241 
3242  /*
3243  * first access of each page:
3244  */
3245  getPage:
3246  XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3247  if (rc)
3248  return rc;
3249 
3250  /* process entries backward from last index */
3251  index = le16_to_cpu(p->header.nextindex) - 1;
3252 
3253 
3254  /* Since this is the rightmost page at this level, and we may have
3255  * already freed a page that was formerly to the right, let's make
3256  * sure that the next pointer is zero.
3257  */
3258  if (p->header.next) {
3259  if (log)
3260  /*
3261  * Make sure this change to the header is logged.
3262  * If we really truncate this leaf, the flag
3263  * will be changed to tlckTRUNCATE
3264  */
3265  tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
3266  BT_MARK_DIRTY(mp, ip);
3267  p->header.next = 0;
3268  }
3269 
3270  if (p->header.flag & BT_INTERNAL)
3271  goto getChild;
3272 
3273  /*
3274  * leaf page
3275  */
3276  freed = 0;
3277 
3278  /* does region covered by leaf page precede Teof ? */
3279  xad = &p->xad[index];
3280  xoff = offsetXAD(xad);
3281  xlen = lengthXAD(xad);
3282  if (teof >= xoff + xlen) {
3283  XT_PUTPAGE(mp);
3284  goto getParent;
3285  }
3286 
3287  /* (re)acquire tlock of the leaf page */
3288  if (log) {
3289  if (++locked_leaves > MAX_TRUNCATE_LEAVES) {
3290  /*
3291  * We need to limit the size of the transaction
3292  * to avoid exhausting pagecache & tlocks
3293  */
3294  XT_PUTPAGE(mp);
3295  newsize = (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
3296  goto getParent;
3297  }
3298  tlck = txLock(tid, ip, mp, tlckXTREE);
3299  tlck->type = tlckXTREE | tlckTRUNCATE;
3300  xtlck = (struct xtlock *) & tlck->lock;
3301  xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1;
3302  }
3303  BT_MARK_DIRTY(mp, ip);
3304 
3305  /*
3306  * scan backward leaf page entries
3307  */
3308  for (; index >= XTENTRYSTART; index--) {
3309  xad = &p->xad[index];
3310  xoff = offsetXAD(xad);
3311  xlen = lengthXAD(xad);
3312  xaddr = addressXAD(xad);
3313 
3314  /*
3315  * The "data" for a directory is indexed by the block
3316  * device's address space. This metadata must be invalidated
3317  * here
3318  */
3319  if (S_ISDIR(ip->i_mode) && (teof == 0))
3320  invalidate_xad_metapages(ip, *xad);
3321  /*
3322  * entry beyond eof: continue scan of current page
3323  * xad
3324  * ---|---=======------->
3325  * eof
3326  */
3327  if (teof < xoff) {
3328  nfreed += xlen;
3329  continue;
3330  }
3331 
3332  /*
3333  * (xoff <= teof): last entry to be deleted from page;
3334  * If other entries remain in page: keep and update the page.
3335  */
3336 
3337  /*
3338  * eof == entry_start: delete the entry
3339  * xad
3340  * -------|=======------->
3341  * eof
3342  *
3343  */
3344  if (teof == xoff) {
3345  nfreed += xlen;
3346 
3347  if (index == XTENTRYSTART)
3348  break;
3349 
3350  nextindex = index;
3351  }
3352  /*
3353  * eof within the entry: truncate the entry.
3354  * xad
3355  * -------===|===------->
3356  * eof
3357  */
3358  else if (teof < xoff + xlen) {
3359  /* update truncated entry */
3360  len = teof - xoff;
3361  freexlen = xlen - len;
3362  XADlength(xad, len);
3363 
3364  /* save pxd of truncated extent in tlck */
3365  xaddr += len;
3366  if (log) { /* COMMIT_PWMAP */
3367  xtlck->lwm.offset = (xtlck->lwm.offset) ?
3368  min(index, (int)xtlck->lwm.offset) : index;
3369  xtlck->lwm.length = index + 1 -
3370  xtlck->lwm.offset;
3371  xtlck->twm.offset = index;
3372  pxdlock = (struct pxd_lock *) & xtlck->pxdlock;
3373  pxdlock->flag = mlckFREEPXD;
3374  PXDaddress(&pxdlock->pxd, xaddr);
3375  PXDlength(&pxdlock->pxd, freexlen);
3376  }
3377  /* free truncated extent */
3378  else { /* COMMIT_WMAP */
3379 
3380  pxdlock = (struct pxd_lock *) & xadlock;
3381  pxdlock->flag = mlckFREEPXD;
3382  PXDaddress(&pxdlock->pxd, xaddr);
3383  PXDlength(&pxdlock->pxd, freexlen);
3384  txFreeMap(ip, pxdlock, NULL, COMMIT_WMAP);
3385 
3386  /* reset map lock */
3387  xadlock.flag = mlckFREEXADLIST;
3388  }
3389 
3390  /* current entry is new last entry; */
3391  nextindex = index + 1;
3392 
3393  nfreed += freexlen;
3394  }
3395  /*
3396  * eof beyond the entry:
3397  * xad
3398  * -------=======---|--->
3399  * eof
3400  */
3401  else { /* (xoff + xlen < teof) */
3402 
3403  nextindex = index + 1;
3404  }
3405 
3406  if (nextindex < le16_to_cpu(p->header.nextindex)) {
3407  if (!log) { /* COMMIT_WAMP */
3408  xadlock.xdlist = &p->xad[nextindex];
3409  xadlock.count =
3410  le16_to_cpu(p->header.nextindex) -
3411  nextindex;
3412  txFreeMap(ip, (struct maplock *) & xadlock,
3413  NULL, COMMIT_WMAP);
3414  }
3415  p->header.nextindex = cpu_to_le16(nextindex);
3416  }
3417 
3418  XT_PUTPAGE(mp);
3419 
3420  /* assert(freed == 0); */
3421  goto getParent;
3422  } /* end scan of leaf page entries */
3423 
3424  freed = 1;
3425 
3426  /*
3427  * leaf page become empty: free the page if type != PMAP
3428  */
3429  if (log) { /* COMMIT_PWMAP */
3430  /* txCommit() with tlckFREE:
3431  * free data extents covered by leaf [XTENTRYSTART:hwm);
3432  * invalidate leaf if COMMIT_PWMAP;
3433  * if (TRUNCATE), will write LOG_NOREDOPAGE;
3434  */
3435  tlck->type = tlckXTREE | tlckFREE;
3436  } else { /* COMMIT_WAMP */
3437 
3438  /* free data extents covered by leaf */
3439  xadlock.xdlist = &p->xad[XTENTRYSTART];
3440  xadlock.count =
3441  le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
3442  txFreeMap(ip, (struct maplock *) & xadlock, NULL, COMMIT_WMAP);
3443  }
3444 
3445  if (p->header.flag & BT_ROOT) {
3446  p->header.flag &= ~BT_INTERNAL;
3447  p->header.flag |= BT_LEAF;
3448  p->header.nextindex = cpu_to_le16(XTENTRYSTART);
3449 
3450  XT_PUTPAGE(mp); /* debug */
3451  goto out;
3452  } else {
3453  if (log) { /* COMMIT_PWMAP */
3454  /* page will be invalidated at tx completion
3455  */
3456  XT_PUTPAGE(mp);
3457  } else { /* COMMIT_WMAP */
3458 
3459  if (mp->lid)
3460  lid_to_tlock(mp->lid)->flag |= tlckFREELOCK;
3461 
3462  /* invalidate empty leaf page */
3463  discard_metapage(mp);
3464  }
3465  }
3466 
3467  /*
3468  * the leaf page become empty: delete the parent entry
3469  * for the leaf page if the parent page is to be kept
3470  * in the new sized file.
3471  */
3472 
3473  /*
3474  * go back up to the parent page
3475  */
3476  getParent:
3477  /* pop/restore parent entry for the current child page */
3478  if ((parent = BT_POP(&btstack)) == NULL)
3479  /* current page must have been root */
3480  goto out;
3481 
3482  /* get back the parent page */
3483  bn = parent->bn;
3484  XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3485  if (rc)
3486  return rc;
3487 
3488  index = parent->index;
3489 
3490  /*
3491  * child page was not empty:
3492  */
3493  if (freed == 0) {
3494  /* has any entry deleted from parent ? */
3495  if (index < le16_to_cpu(p->header.nextindex) - 1) {
3496  /* (re)acquire tlock on the parent page */
3497  if (log) { /* COMMIT_PWMAP */
3498  /* txCommit() with tlckTRUNCATE:
3499  * free child extents covered by parent [);
3500  */
3501  tlck = txLock(tid, ip, mp, tlckXTREE);
3502  xtlck = (struct xtlock *) & tlck->lock;
3503  if (!(tlck->type & tlckTRUNCATE)) {
3504  xtlck->hwm.offset =
3505  le16_to_cpu(p->header.
3506  nextindex) - 1;
3507  tlck->type =
3509  }
3510  } else { /* COMMIT_WMAP */
3511 
3512  /* free child extents covered by parent */
3513  xadlock.xdlist = &p->xad[index + 1];
3514  xadlock.count =
3515  le16_to_cpu(p->header.nextindex) -
3516  index - 1;
3517  txFreeMap(ip, (struct maplock *) & xadlock,
3518  NULL, COMMIT_WMAP);
3519  }
3520  BT_MARK_DIRTY(mp, ip);
3521 
3522  p->header.nextindex = cpu_to_le16(index + 1);
3523  }
3524  XT_PUTPAGE(mp);
3525  goto getParent;
3526  }
3527 
3528  /*
3529  * child page was empty:
3530  */
3531  nfreed += lengthXAD(&p->xad[index]);
3532 
3533  /*
3534  * During working map update, child page's tlock must be handled
3535  * before parent's. This is because the parent's tlock will cause
3536  * the child's disk space to be marked available in the wmap, so
3537  * it's important that the child page be released by that time.
3538  *
3539  * ToDo: tlocks should be on doubly-linked list, so we can
3540  * quickly remove it and add it to the end.
3541  */
3542 
3543  /*
3544  * Move parent page's tlock to the end of the tid's tlock list
3545  */
3546  if (log && mp->lid && (tblk->last != mp->lid) &&
3547  lid_to_tlock(mp->lid)->tid) {
3548  lid_t lid = mp->lid;
3549  struct tlock *prev;
3550 
3551  tlck = lid_to_tlock(lid);
3552 
3553  if (tblk->next == lid)
3554  tblk->next = tlck->next;
3555  else {
3556  for (prev = lid_to_tlock(tblk->next);
3557  prev->next != lid;
3558  prev = lid_to_tlock(prev->next)) {
3559  assert(prev->next);
3560  }
3561  prev->next = tlck->next;
3562  }
3563  lid_to_tlock(tblk->last)->next = lid;
3564  tlck->next = 0;
3565  tblk->last = lid;
3566  }
3567 
3568  /*
3569  * parent page become empty: free the page
3570  */
3571  if (index == XTENTRYSTART) {
3572  if (log) { /* COMMIT_PWMAP */
3573  /* txCommit() with tlckFREE:
3574  * free child extents covered by parent;
3575  * invalidate parent if COMMIT_PWMAP;
3576  */
3577  tlck = txLock(tid, ip, mp, tlckXTREE);
3578  xtlck = (struct xtlock *) & tlck->lock;
3579  xtlck->hwm.offset =
3580  le16_to_cpu(p->header.nextindex) - 1;
3581  tlck->type = tlckXTREE | tlckFREE;
3582  } else { /* COMMIT_WMAP */
3583 
3584  /* free child extents covered by parent */
3585  xadlock.xdlist = &p->xad[XTENTRYSTART];
3586  xadlock.count =
3587  le16_to_cpu(p->header.nextindex) -
3588  XTENTRYSTART;
3589  txFreeMap(ip, (struct maplock *) & xadlock, NULL,
3590  COMMIT_WMAP);
3591  }
3592  BT_MARK_DIRTY(mp, ip);
3593 
3594  if (p->header.flag & BT_ROOT) {
3595  p->header.flag &= ~BT_INTERNAL;
3596  p->header.flag |= BT_LEAF;
3597  p->header.nextindex = cpu_to_le16(XTENTRYSTART);
3598  if (le16_to_cpu(p->header.maxentry) == XTROOTMAXSLOT) {
3599  /*
3600  * Shrink root down to allow inline
3601  * EA (otherwise fsck complains)
3602  */
3603  p->header.maxentry =
3605  JFS_IP(ip)->mode2 |= INLINEEA;
3606  }
3607 
3608  XT_PUTPAGE(mp); /* debug */
3609  goto out;
3610  } else {
3611  if (log) { /* COMMIT_PWMAP */
3612  /* page will be invalidated at tx completion
3613  */
3614  XT_PUTPAGE(mp);
3615  } else { /* COMMIT_WMAP */
3616 
3617  if (mp->lid)
3618  lid_to_tlock(mp->lid)->flag |=
3619  tlckFREELOCK;
3620 
3621  /* invalidate parent page */
3622  discard_metapage(mp);
3623  }
3624 
3625  /* parent has become empty and freed:
3626  * go back up to its parent page
3627  */
3628  /* freed = 1; */
3629  goto getParent;
3630  }
3631  }
3632  /*
3633  * parent page still has entries for front region;
3634  */
3635  else {
3636  /* try truncate region covered by preceding entry
3637  * (process backward)
3638  */
3639  index--;
3640 
3641  /* go back down to the child page corresponding
3642  * to the entry
3643  */
3644  goto getChild;
3645  }
3646 
3647  /*
3648  * internal page: go down to child page of current entry
3649  */
3650  getChild:
3651  /* save current parent entry for the child page */
3652  if (BT_STACK_FULL(&btstack)) {
3653  jfs_error(ip->i_sb, "stack overrun in xtTruncate!");
3654  XT_PUTPAGE(mp);
3655  return -EIO;
3656  }
3657  BT_PUSH(&btstack, bn, index);
3658 
3659  /* get child page */
3660  xad = &p->xad[index];
3661  bn = addressXAD(xad);
3662 
3663  /*
3664  * first access of each internal entry:
3665  */
3666  /* release parent page */
3667  XT_PUTPAGE(mp);
3668 
3669  /* process the child page */
3670  goto getPage;
3671 
3672  out:
3673  /*
3674  * update file resource stat
3675  */
3676  /* set size
3677  */
3678  if (S_ISDIR(ip->i_mode) && !newsize)
3679  ip->i_size = 1; /* fsck hates zero-length directories */
3680  else
3681  ip->i_size = newsize;
3682 
3683  /* update quota allocation to reflect freed blocks */
3684  dquot_free_block(ip, nfreed);
3685 
3686  /*
3687  * free tlock of invalidated pages
3688  */
3689  if (flag == COMMIT_WMAP)
3690  txFreelock(ip);
3691 
3692  return newsize;
3693 }
3694 
3695 
3696 /*
3697  * xtTruncate_pmap()
3698  *
3699  * function:
3700  * Perform truncate to zero length for deleted file, leaving the
3701  * the xtree and working map untouched. This allows the file to
3702  * be accessed via open file handles, while the delete of the file
3703  * is committed to disk.
3704  *
3705  * parameter:
3706  * tid_t tid,
3707  * struct inode *ip,
3708  * s64 committed_size)
3709  *
3710  * return: new committed size
3711  *
3712  * note:
3713  *
3714  * To avoid deadlock by holding too many transaction locks, the
3715  * truncation may be broken up into multiple transactions.
3716  * The committed_size keeps track of part of the file has been
3717  * freed from the pmaps.
3718  */
3719 s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
3720 {
3721  s64 bn;
3722  struct btstack btstack;
3723  int cmp;
3724  int index;
3725  int locked_leaves = 0;
3726  struct metapage *mp;
3727  xtpage_t *p;
3728  struct btframe *parent;
3729  int rc;
3730  struct tblock *tblk;
3731  struct tlock *tlck = NULL;
3732  xad_t *xad;
3733  int xlen;
3734  s64 xoff;
3735  struct xtlock *xtlck = NULL;
3736 
3737  /* save object truncation type */
3738  tblk = tid_to_tblock(tid);
3739  tblk->xflag |= COMMIT_PMAP;
3740 
3741  /* clear stack */
3742  BT_CLR(&btstack);
3743 
3744  if (committed_size) {
3745  xoff = (committed_size >> JFS_SBI(ip->i_sb)->l2bsize) - 1;
3746  rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0);
3747  if (rc)
3748  return rc;
3749 
3750  XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
3751 
3752  if (cmp != 0) {
3753  XT_PUTPAGE(mp);
3754  jfs_error(ip->i_sb,
3755  "xtTruncate_pmap: did not find extent");
3756  return -EIO;
3757  }
3758  } else {
3759  /*
3760  * start with root
3761  *
3762  * root resides in the inode
3763  */
3764  bn = 0;
3765 
3766  /*
3767  * first access of each page:
3768  */
3769  getPage:
3770  XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3771  if (rc)
3772  return rc;
3773 
3774  /* process entries backward from last index */
3775  index = le16_to_cpu(p->header.nextindex) - 1;
3776 
3777  if (p->header.flag & BT_INTERNAL)
3778  goto getChild;
3779  }
3780 
3781  /*
3782  * leaf page
3783  */
3784 
3785  if (++locked_leaves > MAX_TRUNCATE_LEAVES) {
3786  /*
3787  * We need to limit the size of the transaction
3788  * to avoid exhausting pagecache & tlocks
3789  */
3790  xad = &p->xad[index];
3791  xoff = offsetXAD(xad);
3792  xlen = lengthXAD(xad);
3793  XT_PUTPAGE(mp);
3794  return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
3795  }
3796  tlck = txLock(tid, ip, mp, tlckXTREE);
3797  tlck->type = tlckXTREE | tlckFREE;
3798  xtlck = (struct xtlock *) & tlck->lock;
3799  xtlck->hwm.offset = index;
3800 
3801 
3802  XT_PUTPAGE(mp);
3803 
3804  /*
3805  * go back up to the parent page
3806  */
3807  getParent:
3808  /* pop/restore parent entry for the current child page */
3809  if ((parent = BT_POP(&btstack)) == NULL)
3810  /* current page must have been root */
3811  goto out;
3812 
3813  /* get back the parent page */
3814  bn = parent->bn;
3815  XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3816  if (rc)
3817  return rc;
3818 
3819  index = parent->index;
3820 
3821  /*
3822  * parent page become empty: free the page
3823  */
3824  if (index == XTENTRYSTART) {
3825  /* txCommit() with tlckFREE:
3826  * free child extents covered by parent;
3827  * invalidate parent if COMMIT_PWMAP;
3828  */
3829  tlck = txLock(tid, ip, mp, tlckXTREE);
3830  xtlck = (struct xtlock *) & tlck->lock;
3831  xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1;
3832  tlck->type = tlckXTREE | tlckFREE;
3833 
3834  XT_PUTPAGE(mp);
3835 
3836  if (p->header.flag & BT_ROOT) {
3837 
3838  goto out;
3839  } else {
3840  goto getParent;
3841  }
3842  }
3843  /*
3844  * parent page still has entries for front region;
3845  */
3846  else
3847  index--;
3848  /*
3849  * internal page: go down to child page of current entry
3850  */
3851  getChild:
3852  /* save current parent entry for the child page */
3853  if (BT_STACK_FULL(&btstack)) {
3854  jfs_error(ip->i_sb, "stack overrun in xtTruncate_pmap!");
3855  XT_PUTPAGE(mp);
3856  return -EIO;
3857  }
3858  BT_PUSH(&btstack, bn, index);
3859 
3860  /* get child page */
3861  xad = &p->xad[index];
3862  bn = addressXAD(xad);
3863 
3864  /*
3865  * first access of each internal entry:
3866  */
3867  /* release parent page */
3868  XT_PUTPAGE(mp);
3869 
3870  /* process the child page */
3871  goto getPage;
3872 
3873  out:
3874 
3875  return 0;
3876 }
3877 
3878 #ifdef CONFIG_JFS_STATISTICS
3879 static int jfs_xtstat_proc_show(struct seq_file *m, void *v)
3880 {
3881  seq_printf(m,
3882  "JFS Xtree statistics\n"
3883  "====================\n"
3884  "searches = %d\n"
3885  "fast searches = %d\n"
3886  "splits = %d\n",
3887  xtStat.search,
3888  xtStat.fastSearch,
3889  xtStat.split);
3890  return 0;
3891 }
3892 
3893 static int jfs_xtstat_proc_open(struct inode *inode, struct file *file)
3894 {
3895  return single_open(file, jfs_xtstat_proc_show, NULL);
3896 }
3897 
3898 const struct file_operations jfs_xtstat_proc_fops = {
3899  .owner = THIS_MODULE,
3900  .open = jfs_xtstat_proc_open,
3901  .read = seq_read,
3902  .llseek = seq_lseek,
3903  .release = single_release,
3904 };
3905 #endif