Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
jfs_imap.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) International Business Machines Corp., 2000-2004
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
12  * the GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17  */
18 
19 /*
20  * jfs_imap.c: inode allocation map manager
21  *
22  * Serialization:
23  * Each AG has a simple lock which is used to control the serialization of
24  * the AG level lists. This lock should be taken first whenever an AG
25  * level list will be modified or accessed.
26  *
27  * Each IAG is locked by obtaining the buffer for the IAG page.
28  *
29  * There is also a inode lock for the inode map inode. A read lock needs to
30  * be taken whenever an IAG is read from the map or the global level
31  * information is read. A write lock needs to be taken whenever the global
32  * level information is modified or an atomic operation needs to be used.
33  *
34  * If more than one IAG is read at one time, the read lock may not
35  * be given up until all of the IAG's are read. Otherwise, a deadlock
36  * may occur when trying to obtain the read lock while another thread
37  * holding the read lock is waiting on the IAG already being held.
38  *
39  * The control page of the inode map is read into memory by diMount().
40  * Thereafter it should only be modified in memory and then it will be
41  * written out when the filesystem is unmounted by diUnmount().
42  */
43 
44 #include <linux/fs.h>
45 #include <linux/buffer_head.h>
46 #include <linux/pagemap.h>
47 #include <linux/quotaops.h>
48 #include <linux/slab.h>
49 
50 #include "jfs_incore.h"
51 #include "jfs_inode.h"
52 #include "jfs_filsys.h"
53 #include "jfs_dinode.h"
54 #include "jfs_dmap.h"
55 #include "jfs_imap.h"
56 #include "jfs_metapage.h"
57 #include "jfs_superblock.h"
58 #include "jfs_debug.h"
59 
60 /*
61  * imap locks
62  */
63 /* iag free list lock */
64 #define IAGFREE_LOCK_INIT(imap) mutex_init(&imap->im_freelock)
65 #define IAGFREE_LOCK(imap) mutex_lock(&imap->im_freelock)
66 #define IAGFREE_UNLOCK(imap) mutex_unlock(&imap->im_freelock)
67 
68 /* per ag iag list locks */
69 #define AG_LOCK_INIT(imap,index) mutex_init(&(imap->im_aglock[index]))
70 #define AG_LOCK(imap,agno) mutex_lock(&imap->im_aglock[agno])
71 #define AG_UNLOCK(imap,agno) mutex_unlock(&imap->im_aglock[agno])
72 
73 /*
74  * forward references
75  */
76 static int diAllocAG(struct inomap *, int, bool, struct inode *);
77 static int diAllocAny(struct inomap *, int, bool, struct inode *);
78 static int diAllocBit(struct inomap *, struct iag *, int);
79 static int diAllocExt(struct inomap *, int, struct inode *);
80 static int diAllocIno(struct inomap *, int, struct inode *);
81 static int diFindFree(u32, int);
82 static int diNewExt(struct inomap *, struct iag *, int);
83 static int diNewIAG(struct inomap *, int *, int, struct metapage **);
84 static void duplicateIXtree(struct super_block *, s64, int, s64 *);
85 
86 static int diIAGRead(struct inomap * imap, int, struct metapage **);
87 static int copy_from_dinode(struct dinode *, struct inode *);
88 static void copy_to_dinode(struct dinode *, struct inode *);
89 
90 /*
91  * NAME: diMount()
92  *
93  * FUNCTION: initialize the incore inode map control structures for
94  * a fileset or aggregate init time.
95  *
96  * the inode map's control structure (dinomap) is
97  * brought in from disk and placed in virtual memory.
98  *
99  * PARAMETERS:
100  * ipimap - pointer to inode map inode for the aggregate or fileset.
101  *
102  * RETURN VALUES:
103  * 0 - success
104  * -ENOMEM - insufficient free virtual memory.
105  * -EIO - i/o error.
106  */
107 int diMount(struct inode *ipimap)
108 {
109  struct inomap *imap;
110  struct metapage *mp;
111  int index;
112  struct dinomap_disk *dinom_le;
113 
114  /*
115  * allocate/initialize the in-memory inode map control structure
116  */
117  /* allocate the in-memory inode map control structure. */
118  imap = kmalloc(sizeof(struct inomap), GFP_KERNEL);
119  if (imap == NULL) {
120  jfs_err("diMount: kmalloc returned NULL!");
121  return -ENOMEM;
122  }
123 
124  /* read the on-disk inode map control structure. */
125 
126  mp = read_metapage(ipimap,
127  IMAPBLKNO << JFS_SBI(ipimap->i_sb)->l2nbperpage,
128  PSIZE, 0);
129  if (mp == NULL) {
130  kfree(imap);
131  return -EIO;
132  }
133 
134  /* copy the on-disk version to the in-memory version. */
135  dinom_le = (struct dinomap_disk *) mp->data;
136  imap->im_freeiag = le32_to_cpu(dinom_le->in_freeiag);
137  imap->im_nextiag = le32_to_cpu(dinom_le->in_nextiag);
138  atomic_set(&imap->im_numinos, le32_to_cpu(dinom_le->in_numinos));
139  atomic_set(&imap->im_numfree, le32_to_cpu(dinom_le->in_numfree));
140  imap->im_nbperiext = le32_to_cpu(dinom_le->in_nbperiext);
141  imap->im_l2nbperiext = le32_to_cpu(dinom_le->in_l2nbperiext);
142  for (index = 0; index < MAXAG; index++) {
143  imap->im_agctl[index].inofree =
144  le32_to_cpu(dinom_le->in_agctl[index].inofree);
145  imap->im_agctl[index].extfree =
146  le32_to_cpu(dinom_le->in_agctl[index].extfree);
147  imap->im_agctl[index].numinos =
148  le32_to_cpu(dinom_le->in_agctl[index].numinos);
149  imap->im_agctl[index].numfree =
150  le32_to_cpu(dinom_le->in_agctl[index].numfree);
151  }
152 
153  /* release the buffer. */
154  release_metapage(mp);
155 
156  /*
157  * allocate/initialize inode allocation map locks
158  */
159  /* allocate and init iag free list lock */
160  IAGFREE_LOCK_INIT(imap);
161 
162  /* allocate and init ag list locks */
163  for (index = 0; index < MAXAG; index++) {
164  AG_LOCK_INIT(imap, index);
165  }
166 
167  /* bind the inode map inode and inode map control structure
168  * to each other.
169  */
170  imap->im_ipimap = ipimap;
171  JFS_IP(ipimap)->i_imap = imap;
172 
173  return (0);
174 }
175 
176 
177 /*
178  * NAME: diUnmount()
179  *
180  * FUNCTION: write to disk the incore inode map control structures for
181  * a fileset or aggregate at unmount time.
182  *
183  * PARAMETERS:
184  * ipimap - pointer to inode map inode for the aggregate or fileset.
185  *
186  * RETURN VALUES:
187  * 0 - success
188  * -ENOMEM - insufficient free virtual memory.
189  * -EIO - i/o error.
190  */
191 int diUnmount(struct inode *ipimap, int mounterror)
192 {
193  struct inomap *imap = JFS_IP(ipimap)->i_imap;
194 
195  /*
196  * update the on-disk inode map control structure
197  */
198 
199  if (!(mounterror || isReadOnly(ipimap)))
200  diSync(ipimap);
201 
202  /*
203  * Invalidate the page cache buffers
204  */
205  truncate_inode_pages(ipimap->i_mapping, 0);
206 
207  /*
208  * free in-memory control structure
209  */
210  kfree(imap);
211 
212  return (0);
213 }
214 
215 
216 /*
217  * diSync()
218  */
219 int diSync(struct inode *ipimap)
220 {
221  struct dinomap_disk *dinom_le;
222  struct inomap *imp = JFS_IP(ipimap)->i_imap;
223  struct metapage *mp;
224  int index;
225 
226  /*
227  * write imap global conrol page
228  */
229  /* read the on-disk inode map control structure */
230  mp = get_metapage(ipimap,
231  IMAPBLKNO << JFS_SBI(ipimap->i_sb)->l2nbperpage,
232  PSIZE, 0);
233  if (mp == NULL) {
234  jfs_err("diSync: get_metapage failed!");
235  return -EIO;
236  }
237 
238  /* copy the in-memory version to the on-disk version */
239  dinom_le = (struct dinomap_disk *) mp->data;
240  dinom_le->in_freeiag = cpu_to_le32(imp->im_freeiag);
241  dinom_le->in_nextiag = cpu_to_le32(imp->im_nextiag);
242  dinom_le->in_numinos = cpu_to_le32(atomic_read(&imp->im_numinos));
243  dinom_le->in_numfree = cpu_to_le32(atomic_read(&imp->im_numfree));
244  dinom_le->in_nbperiext = cpu_to_le32(imp->im_nbperiext);
245  dinom_le->in_l2nbperiext = cpu_to_le32(imp->im_l2nbperiext);
246  for (index = 0; index < MAXAG; index++) {
247  dinom_le->in_agctl[index].inofree =
248  cpu_to_le32(imp->im_agctl[index].inofree);
249  dinom_le->in_agctl[index].extfree =
250  cpu_to_le32(imp->im_agctl[index].extfree);
251  dinom_le->in_agctl[index].numinos =
252  cpu_to_le32(imp->im_agctl[index].numinos);
253  dinom_le->in_agctl[index].numfree =
254  cpu_to_le32(imp->im_agctl[index].numfree);
255  }
256 
257  /* write out the control structure */
258  write_metapage(mp);
259 
260  /*
261  * write out dirty pages of imap
262  */
264 
265  diWriteSpecial(ipimap, 0);
266 
267  return (0);
268 }
269 
270 
271 /*
272  * NAME: diRead()
273  *
274  * FUNCTION: initialize an incore inode from disk.
275  *
276  * on entry, the specifed incore inode should itself
277  * specify the disk inode number corresponding to the
278  * incore inode (i.e. i_number should be initialized).
279  *
280  * this routine handles incore inode initialization for
281  * both "special" and "regular" inodes. special inodes
282  * are those required early in the mount process and
283  * require special handling since much of the file system
284  * is not yet initialized. these "special" inodes are
285  * identified by a NULL inode map inode pointer and are
286  * actually initialized by a call to diReadSpecial().
287  *
288  * for regular inodes, the iag describing the disk inode
289  * is read from disk to determine the inode extent address
290  * for the disk inode. with the inode extent address in
291  * hand, the page of the extent that contains the disk
292  * inode is read and the disk inode is copied to the
293  * incore inode.
294  *
295  * PARAMETERS:
296  * ip - pointer to incore inode to be initialized from disk.
297  *
298  * RETURN VALUES:
299  * 0 - success
300  * -EIO - i/o error.
301  * -ENOMEM - insufficient memory
302  *
303  */
304 int diRead(struct inode *ip)
305 {
306  struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
307  int iagno, ino, extno, rc;
308  struct inode *ipimap;
309  struct dinode *dp;
310  struct iag *iagp;
311  struct metapage *mp;
312  s64 blkno, agstart;
313  struct inomap *imap;
314  int block_offset;
315  int inodes_left;
316  unsigned long pageno;
317  int rel_inode;
318 
319  jfs_info("diRead: ino = %ld", ip->i_ino);
320 
321  ipimap = sbi->ipimap;
322  JFS_IP(ip)->ipimap = ipimap;
323 
324  /* determine the iag number for this inode (number) */
325  iagno = INOTOIAG(ip->i_ino);
326 
327  /* read the iag */
328  imap = JFS_IP(ipimap)->i_imap;
329  IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
330  rc = diIAGRead(imap, iagno, &mp);
331  IREAD_UNLOCK(ipimap);
332  if (rc) {
333  jfs_err("diRead: diIAGRead returned %d", rc);
334  return (rc);
335  }
336 
337  iagp = (struct iag *) mp->data;
338 
339  /* determine inode extent that holds the disk inode */
340  ino = ip->i_ino & (INOSPERIAG - 1);
341  extno = ino >> L2INOSPEREXT;
342 
343  if ((lengthPXD(&iagp->inoext[extno]) != imap->im_nbperiext) ||
344  (addressPXD(&iagp->inoext[extno]) == 0)) {
345  release_metapage(mp);
346  return -ESTALE;
347  }
348 
349  /* get disk block number of the page within the inode extent
350  * that holds the disk inode.
351  */
352  blkno = INOPBLK(&iagp->inoext[extno], ino, sbi->l2nbperpage);
353 
354  /* get the ag for the iag */
355  agstart = le64_to_cpu(iagp->agstart);
356 
357  release_metapage(mp);
358 
359  rel_inode = (ino & (INOSPERPAGE - 1));
360  pageno = blkno >> sbi->l2nbperpage;
361 
362  if ((block_offset = ((u32) blkno & (sbi->nbperpage - 1)))) {
363  /*
364  * OS/2 didn't always align inode extents on page boundaries
365  */
366  inodes_left =
367  (sbi->nbperpage - block_offset) << sbi->l2niperblk;
368 
369  if (rel_inode < inodes_left)
370  rel_inode += block_offset << sbi->l2niperblk;
371  else {
372  pageno += 1;
373  rel_inode -= inodes_left;
374  }
375  }
376 
377  /* read the page of disk inode */
378  mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1);
379  if (!mp) {
380  jfs_err("diRead: read_metapage failed");
381  return -EIO;
382  }
383 
384  /* locate the disk inode requested */
385  dp = (struct dinode *) mp->data;
386  dp += rel_inode;
387 
388  if (ip->i_ino != le32_to_cpu(dp->di_number)) {
389  jfs_error(ip->i_sb, "diRead: i_ino != di_number");
390  rc = -EIO;
391  } else if (le32_to_cpu(dp->di_nlink) == 0)
392  rc = -ESTALE;
393  else
394  /* copy the disk inode to the in-memory inode */
395  rc = copy_from_dinode(dp, ip);
396 
397  release_metapage(mp);
398 
399  /* set the ag for the inode */
400  JFS_IP(ip)->agstart = agstart;
401  JFS_IP(ip)->active_ag = -1;
402 
403  return (rc);
404 }
405 
406 
407 /*
408  * NAME: diReadSpecial()
409  *
410  * FUNCTION: initialize a 'special' inode from disk.
411  *
412  * this routines handles aggregate level inodes. The
413  * inode cache cannot differentiate between the
414  * aggregate inodes and the filesystem inodes, so we
415  * handle these here. We don't actually use the aggregate
416  * inode map, since these inodes are at a fixed location
417  * and in some cases the aggregate inode map isn't initialized
418  * yet.
419  *
420  * PARAMETERS:
421  * sb - filesystem superblock
422  * inum - aggregate inode number
423  * secondary - 1 if secondary aggregate inode table
424  *
425  * RETURN VALUES:
426  * new inode - success
427  * NULL - i/o error.
428  */
429 struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
430 {
431  struct jfs_sb_info *sbi = JFS_SBI(sb);
432  uint address;
433  struct dinode *dp;
434  struct inode *ip;
435  struct metapage *mp;
436 
437  ip = new_inode(sb);
438  if (ip == NULL) {
439  jfs_err("diReadSpecial: new_inode returned NULL!");
440  return ip;
441  }
442 
443  if (secondary) {
444  address = addressPXD(&sbi->ait2) >> sbi->l2nbperpage;
445  JFS_IP(ip)->ipimap = sbi->ipaimap2;
446  } else {
447  address = AITBL_OFF >> L2PSIZE;
448  JFS_IP(ip)->ipimap = sbi->ipaimap;
449  }
450 
451  ASSERT(inum < INOSPEREXT);
452 
453  ip->i_ino = inum;
454 
455  address += inum >> 3; /* 8 inodes per 4K page */
456 
457  /* read the page of fixed disk inode (AIT) in raw mode */
458  mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1);
459  if (mp == NULL) {
460  set_nlink(ip, 1); /* Don't want iput() deleting it */
461  iput(ip);
462  return (NULL);
463  }
464 
465  /* get the pointer to the disk inode of interest */
466  dp = (struct dinode *) (mp->data);
467  dp += inum % 8; /* 8 inodes per 4K page */
468 
469  /* copy on-disk inode to in-memory inode */
470  if ((copy_from_dinode(dp, ip)) != 0) {
471  /* handle bad return by returning NULL for ip */
472  set_nlink(ip, 1); /* Don't want iput() deleting it */
473  iput(ip);
474  /* release the page */
475  release_metapage(mp);
476  return (NULL);
477 
478  }
479 
480  ip->i_mapping->a_ops = &jfs_metapage_aops;
481  mapping_set_gfp_mask(ip->i_mapping, GFP_NOFS);
482 
483  /* Allocations to metadata inodes should not affect quotas */
484  ip->i_flags |= S_NOQUOTA;
485 
486  if ((inum == FILESYSTEM_I) && (JFS_IP(ip)->ipimap == sbi->ipaimap)) {
487  sbi->gengen = le32_to_cpu(dp->di_gengen);
488  sbi->inostamp = le32_to_cpu(dp->di_inostamp);
489  }
490 
491  /* release the page */
492  release_metapage(mp);
493 
494  /*
495  * __mark_inode_dirty expects inodes to be hashed. Since we don't
496  * want special inodes in the fileset inode space, we make them
497  * appear hashed, but do not put on any lists. hlist_del()
498  * will work fine and require no locking.
499  */
500  hlist_add_fake(&ip->i_hash);
501 
502  return (ip);
503 }
504 
505 /*
506  * NAME: diWriteSpecial()
507  *
508  * FUNCTION: Write the special inode to disk
509  *
510  * PARAMETERS:
511  * ip - special inode
512  * secondary - 1 if secondary aggregate inode table
513  *
514  * RETURN VALUES: none
515  */
516 
517 void diWriteSpecial(struct inode *ip, int secondary)
518 {
519  struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
520  uint address;
521  struct dinode *dp;
522  ino_t inum = ip->i_ino;
523  struct metapage *mp;
524 
525  if (secondary)
526  address = addressPXD(&sbi->ait2) >> sbi->l2nbperpage;
527  else
528  address = AITBL_OFF >> L2PSIZE;
529 
530  ASSERT(inum < INOSPEREXT);
531 
532  address += inum >> 3; /* 8 inodes per 4K page */
533 
534  /* read the page of fixed disk inode (AIT) in raw mode */
535  mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1);
536  if (mp == NULL) {
537  jfs_err("diWriteSpecial: failed to read aggregate inode "
538  "extent!");
539  return;
540  }
541 
542  /* get the pointer to the disk inode of interest */
543  dp = (struct dinode *) (mp->data);
544  dp += inum % 8; /* 8 inodes per 4K page */
545 
546  /* copy on-disk inode to in-memory inode */
547  copy_to_dinode(dp, ip);
548  memcpy(&dp->di_xtroot, &JFS_IP(ip)->i_xtroot, 288);
549 
550  if (inum == FILESYSTEM_I)
551  dp->di_gengen = cpu_to_le32(sbi->gengen);
552 
553  /* write the page */
554  write_metapage(mp);
555 }
556 
557 /*
558  * NAME: diFreeSpecial()
559  *
560  * FUNCTION: Free allocated space for special inode
561  */
562 void diFreeSpecial(struct inode *ip)
563 {
564  if (ip == NULL) {
565  jfs_err("diFreeSpecial called with NULL ip!");
566  return;
567  }
570  iput(ip);
571 }
572 
573 
574 
575 /*
576  * NAME: diWrite()
577  *
578  * FUNCTION: write the on-disk inode portion of the in-memory inode
579  * to its corresponding on-disk inode.
580  *
581  * on entry, the specifed incore inode should itself
582  * specify the disk inode number corresponding to the
583  * incore inode (i.e. i_number should be initialized).
584  *
585  * the inode contains the inode extent address for the disk
586  * inode. with the inode extent address in hand, the
587  * page of the extent that contains the disk inode is
588  * read and the disk inode portion of the incore inode
589  * is copied to the disk inode.
590  *
591  * PARAMETERS:
592  * tid - transacation id
593  * ip - pointer to incore inode to be written to the inode extent.
594  *
595  * RETURN VALUES:
596  * 0 - success
597  * -EIO - i/o error.
598  */
599 int diWrite(tid_t tid, struct inode *ip)
600 {
601  struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
602  struct jfs_inode_info *jfs_ip = JFS_IP(ip);
603  int rc = 0;
604  s32 ino;
605  struct dinode *dp;
606  s64 blkno;
607  int block_offset;
608  int inodes_left;
609  struct metapage *mp;
610  unsigned long pageno;
611  int rel_inode;
612  int dioffset;
613  struct inode *ipimap;
614  uint type;
615  lid_t lid;
616  struct tlock *ditlck, *tlck;
617  struct linelock *dilinelock, *ilinelock;
618  struct lv *lv;
619  int n;
620 
621  ipimap = jfs_ip->ipimap;
622 
623  ino = ip->i_ino & (INOSPERIAG - 1);
624 
625  if (!addressPXD(&(jfs_ip->ixpxd)) ||
626  (lengthPXD(&(jfs_ip->ixpxd)) !=
627  JFS_IP(ipimap)->i_imap->im_nbperiext)) {
628  jfs_error(ip->i_sb, "diWrite: ixpxd invalid");
629  return -EIO;
630  }
631 
632  /*
633  * read the page of disk inode containing the specified inode:
634  */
635  /* compute the block address of the page */
636  blkno = INOPBLK(&(jfs_ip->ixpxd), ino, sbi->l2nbperpage);
637 
638  rel_inode = (ino & (INOSPERPAGE - 1));
639  pageno = blkno >> sbi->l2nbperpage;
640 
641  if ((block_offset = ((u32) blkno & (sbi->nbperpage - 1)))) {
642  /*
643  * OS/2 didn't always align inode extents on page boundaries
644  */
645  inodes_left =
646  (sbi->nbperpage - block_offset) << sbi->l2niperblk;
647 
648  if (rel_inode < inodes_left)
649  rel_inode += block_offset << sbi->l2niperblk;
650  else {
651  pageno += 1;
652  rel_inode -= inodes_left;
653  }
654  }
655  /* read the page of disk inode */
656  retry:
657  mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1);
658  if (!mp)
659  return -EIO;
660 
661  /* get the pointer to the disk inode */
662  dp = (struct dinode *) mp->data;
663  dp += rel_inode;
664 
665  dioffset = (ino & (INOSPERPAGE - 1)) << L2DISIZE;
666 
667  /*
668  * acquire transaction lock on the on-disk inode;
669  * N.B. tlock is acquired on ipimap not ip;
670  */
671  if ((ditlck =
672  txLock(tid, ipimap, mp, tlckINODE | tlckENTRY)) == NULL)
673  goto retry;
674  dilinelock = (struct linelock *) & ditlck->lock;
675 
676  /*
677  * copy btree root from in-memory inode to on-disk inode
678  *
679  * (tlock is taken from inline B+-tree root in in-memory
680  * inode when the B+-tree root is updated, which is pointed
681  * by jfs_ip->blid as well as being on tx tlock list)
682  *
683  * further processing of btree root is based on the copy
684  * in in-memory inode, where txLog() will log from, and,
685  * for xtree root, txUpdateMap() will update map and reset
686  * XAD_NEW bit;
687  */
688 
689  if (S_ISDIR(ip->i_mode) && (lid = jfs_ip->xtlid)) {
690  /*
691  * This is the special xtree inside the directory for storing
692  * the directory table
693  */
694  xtpage_t *p, *xp;
695  xad_t *xad;
696 
697  jfs_ip->xtlid = 0;
698  tlck = lid_to_tlock(lid);
699  assert(tlck->type & tlckXTREE);
700  tlck->type |= tlckBTROOT;
701  tlck->mp = mp;
702  ilinelock = (struct linelock *) & tlck->lock;
703 
704  /*
705  * copy xtree root from inode to dinode:
706  */
707  p = &jfs_ip->i_xtroot;
708  xp = (xtpage_t *) &dp->di_dirtable;
709  lv = ilinelock->lv;
710  for (n = 0; n < ilinelock->index; n++, lv++) {
711  memcpy(&xp->xad[lv->offset], &p->xad[lv->offset],
712  lv->length << L2XTSLOTSIZE);
713  }
714 
715  /* reset on-disk (metadata page) xtree XAD_NEW bit */
716  xad = &xp->xad[XTENTRYSTART];
717  for (n = XTENTRYSTART;
718  n < le16_to_cpu(xp->header.nextindex); n++, xad++)
719  if (xad->flag & (XAD_NEW | XAD_EXTENDED))
720  xad->flag &= ~(XAD_NEW | XAD_EXTENDED);
721  }
722 
723  if ((lid = jfs_ip->blid) == 0)
724  goto inlineData;
725  jfs_ip->blid = 0;
726 
727  tlck = lid_to_tlock(lid);
728  type = tlck->type;
729  tlck->type |= tlckBTROOT;
730  tlck->mp = mp;
731  ilinelock = (struct linelock *) & tlck->lock;
732 
733  /*
734  * regular file: 16 byte (XAD slot) granularity
735  */
736  if (type & tlckXTREE) {
737  xtpage_t *p, *xp;
738  xad_t *xad;
739 
740  /*
741  * copy xtree root from inode to dinode:
742  */
743  p = &jfs_ip->i_xtroot;
744  xp = &dp->di_xtroot;
745  lv = ilinelock->lv;
746  for (n = 0; n < ilinelock->index; n++, lv++) {
747  memcpy(&xp->xad[lv->offset], &p->xad[lv->offset],
748  lv->length << L2XTSLOTSIZE);
749  }
750 
751  /* reset on-disk (metadata page) xtree XAD_NEW bit */
752  xad = &xp->xad[XTENTRYSTART];
753  for (n = XTENTRYSTART;
754  n < le16_to_cpu(xp->header.nextindex); n++, xad++)
755  if (xad->flag & (XAD_NEW | XAD_EXTENDED))
756  xad->flag &= ~(XAD_NEW | XAD_EXTENDED);
757  }
758  /*
759  * directory: 32 byte (directory entry slot) granularity
760  */
761  else if (type & tlckDTREE) {
762  dtpage_t *p, *xp;
763 
764  /*
765  * copy dtree root from inode to dinode:
766  */
767  p = (dtpage_t *) &jfs_ip->i_dtroot;
768  xp = (dtpage_t *) & dp->di_dtroot;
769  lv = ilinelock->lv;
770  for (n = 0; n < ilinelock->index; n++, lv++) {
771  memcpy(&xp->slot[lv->offset], &p->slot[lv->offset],
772  lv->length << L2DTSLOTSIZE);
773  }
774  } else {
775  jfs_err("diWrite: UFO tlock");
776  }
777 
778  inlineData:
779  /*
780  * copy inline symlink from in-memory inode to on-disk inode
781  */
782  if (S_ISLNK(ip->i_mode) && ip->i_size < IDATASIZE) {
783  lv = & dilinelock->lv[dilinelock->index];
784  lv->offset = (dioffset + 2 * 128) >> L2INODESLOTSIZE;
785  lv->length = 2;
786  memcpy(&dp->di_fastsymlink, jfs_ip->i_inline, IDATASIZE);
787  dilinelock->index++;
788  }
789  /*
790  * copy inline data from in-memory inode to on-disk inode:
791  * 128 byte slot granularity
792  */
793  if (test_cflag(COMMIT_Inlineea, ip)) {
794  lv = & dilinelock->lv[dilinelock->index];
795  lv->offset = (dioffset + 3 * 128) >> L2INODESLOTSIZE;
796  lv->length = 1;
797  memcpy(&dp->di_inlineea, jfs_ip->i_inline_ea, INODESLOTSIZE);
798  dilinelock->index++;
799 
801  }
802 
803  /*
804  * lock/copy inode base: 128 byte slot granularity
805  */
806  lv = & dilinelock->lv[dilinelock->index];
807  lv->offset = dioffset >> L2INODESLOTSIZE;
808  copy_to_dinode(dp, ip);
810  lv->length = 2;
811  memcpy(&dp->di_dirtable, &jfs_ip->i_dirtable, 96);
812  } else
813  lv->length = 1;
814  dilinelock->index++;
815 
816  /* release the buffer holding the updated on-disk inode.
817  * the buffer will be later written by commit processing.
818  */
819  write_metapage(mp);
820 
821  return (rc);
822 }
823 
824 
825 /*
826  * NAME: diFree(ip)
827  *
828  * FUNCTION: free a specified inode from the inode working map
829  * for a fileset or aggregate.
830  *
831  * if the inode to be freed represents the first (only)
832  * free inode within the iag, the iag will be placed on
833  * the ag free inode list.
834  *
835  * freeing the inode will cause the inode extent to be
836  * freed if the inode is the only allocated inode within
837  * the extent. in this case all the disk resource backing
838  * up the inode extent will be freed. in addition, the iag
839  * will be placed on the ag extent free list if the extent
840  * is the first free extent in the iag. if freeing the
841  * extent also means that no free inodes will exist for
842  * the iag, the iag will also be removed from the ag free
843  * inode list.
844  *
845  * the iag describing the inode will be freed if the extent
846  * is to be freed and it is the only backed extent within
847  * the iag. in this case, the iag will be removed from the
848  * ag free extent list and ag free inode list and placed on
849  * the inode map's free iag list.
850  *
851  * a careful update approach is used to provide consistency
852  * in the face of updates to multiple buffers. under this
853  * approach, all required buffers are obtained before making
854  * any updates and are held until all updates are complete.
855  *
856  * PARAMETERS:
857  * ip - inode to be freed.
858  *
859  * RETURN VALUES:
860  * 0 - success
861  * -EIO - i/o error.
862  */
863 int diFree(struct inode *ip)
864 {
865  int rc;
866  ino_t inum = ip->i_ino;
867  struct iag *iagp, *aiagp, *biagp, *ciagp, *diagp;
868  struct metapage *mp, *amp, *bmp, *cmp, *dmp;
869  int iagno, ino, extno, bitno, sword, agno;
870  int back, fwd;
871  u32 bitmap, mask;
872  struct inode *ipimap = JFS_SBI(ip->i_sb)->ipimap;
873  struct inomap *imap = JFS_IP(ipimap)->i_imap;
874  pxd_t freepxd;
875  tid_t tid;
876  struct inode *iplist[3];
877  struct tlock *tlck;
878  struct pxd_lock *pxdlock;
879 
880  /*
881  * This is just to suppress compiler warnings. The same logic that
882  * references these variables is used to initialize them.
883  */
884  aiagp = biagp = ciagp = diagp = NULL;
885 
886  /* get the iag number containing the inode.
887  */
888  iagno = INOTOIAG(inum);
889 
890  /* make sure that the iag is contained within
891  * the map.
892  */
893  if (iagno >= imap->im_nextiag) {
894  print_hex_dump(KERN_ERR, "imap: ", DUMP_PREFIX_ADDRESS, 16, 4,
895  imap, 32, 0);
896  jfs_error(ip->i_sb,
897  "diFree: inum = %d, iagno = %d, nextiag = %d",
898  (uint) inum, iagno, imap->im_nextiag);
899  return -EIO;
900  }
901 
902  /* get the allocation group for this ino.
903  */
904  agno = BLKTOAG(JFS_IP(ip)->agstart, JFS_SBI(ip->i_sb));
905 
906  /* Lock the AG specific inode map information
907  */
908  AG_LOCK(imap, agno);
909 
910  /* Obtain read lock in imap inode. Don't release it until we have
911  * read all of the IAG's that we are going to.
912  */
913  IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
914 
915  /* read the iag.
916  */
917  if ((rc = diIAGRead(imap, iagno, &mp))) {
918  IREAD_UNLOCK(ipimap);
919  AG_UNLOCK(imap, agno);
920  return (rc);
921  }
922  iagp = (struct iag *) mp->data;
923 
924  /* get the inode number and extent number of the inode within
925  * the iag and the inode number within the extent.
926  */
927  ino = inum & (INOSPERIAG - 1);
928  extno = ino >> L2INOSPEREXT;
929  bitno = ino & (INOSPEREXT - 1);
930  mask = HIGHORDER >> bitno;
931 
932  if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) {
933  jfs_error(ip->i_sb,
934  "diFree: wmap shows inode already free");
935  }
936 
937  if (!addressPXD(&iagp->inoext[extno])) {
938  release_metapage(mp);
939  IREAD_UNLOCK(ipimap);
940  AG_UNLOCK(imap, agno);
941  jfs_error(ip->i_sb, "diFree: invalid inoext");
942  return -EIO;
943  }
944 
945  /* compute the bitmap for the extent reflecting the freed inode.
946  */
947  bitmap = le32_to_cpu(iagp->wmap[extno]) & ~mask;
948 
949  if (imap->im_agctl[agno].numfree > imap->im_agctl[agno].numinos) {
950  release_metapage(mp);
951  IREAD_UNLOCK(ipimap);
952  AG_UNLOCK(imap, agno);
953  jfs_error(ip->i_sb, "diFree: numfree > numinos");
954  return -EIO;
955  }
956  /*
957  * inode extent still has some inodes or below low water mark:
958  * keep the inode extent;
959  */
960  if (bitmap ||
961  imap->im_agctl[agno].numfree < 96 ||
962  (imap->im_agctl[agno].numfree < 288 &&
963  (((imap->im_agctl[agno].numfree * 100) /
964  imap->im_agctl[agno].numinos) <= 25))) {
965  /* if the iag currently has no free inodes (i.e.,
966  * the inode being freed is the first free inode of iag),
967  * insert the iag at head of the inode free list for the ag.
968  */
969  if (iagp->nfreeinos == 0) {
970  /* check if there are any iags on the ag inode
971  * free list. if so, read the first one so that
972  * we can link the current iag onto the list at
973  * the head.
974  */
975  if ((fwd = imap->im_agctl[agno].inofree) >= 0) {
976  /* read the iag that currently is the head
977  * of the list.
978  */
979  if ((rc = diIAGRead(imap, fwd, &amp))) {
980  IREAD_UNLOCK(ipimap);
981  AG_UNLOCK(imap, agno);
982  release_metapage(mp);
983  return (rc);
984  }
985  aiagp = (struct iag *) amp->data;
986 
987  /* make current head point back to the iag.
988  */
989  aiagp->inofreeback = cpu_to_le32(iagno);
990 
991  write_metapage(amp);
992  }
993 
994  /* iag points forward to current head and iag
995  * becomes the new head of the list.
996  */
997  iagp->inofreefwd =
998  cpu_to_le32(imap->im_agctl[agno].inofree);
999  iagp->inofreeback = cpu_to_le32(-1);
1000  imap->im_agctl[agno].inofree = iagno;
1001  }
1002  IREAD_UNLOCK(ipimap);
1003 
1004  /* update the free inode summary map for the extent if
1005  * freeing the inode means the extent will now have free
1006  * inodes (i.e., the inode being freed is the first free
1007  * inode of extent),
1008  */
1009  if (iagp->wmap[extno] == cpu_to_le32(ONES)) {
1010  sword = extno >> L2EXTSPERSUM;
1011  bitno = extno & (EXTSPERSUM - 1);
1012  iagp->inosmap[sword] &=
1013  cpu_to_le32(~(HIGHORDER >> bitno));
1014  }
1015 
1016  /* update the bitmap.
1017  */
1018  iagp->wmap[extno] = cpu_to_le32(bitmap);
1019 
1020  /* update the free inode counts at the iag, ag and
1021  * map level.
1022  */
1023  le32_add_cpu(&iagp->nfreeinos, 1);
1024  imap->im_agctl[agno].numfree += 1;
1025  atomic_inc(&imap->im_numfree);
1026 
1027  /* release the AG inode map lock
1028  */
1029  AG_UNLOCK(imap, agno);
1030 
1031  /* write the iag */
1032  write_metapage(mp);
1033 
1034  return (0);
1035  }
1036 
1037 
1038  /*
1039  * inode extent has become free and above low water mark:
1040  * free the inode extent;
1041  */
1042 
1043  /*
1044  * prepare to update iag list(s) (careful update step 1)
1045  */
1046  amp = bmp = cmp = dmp = NULL;
1047  fwd = back = -1;
1048 
1049  /* check if the iag currently has no free extents. if so,
1050  * it will be placed on the head of the ag extent free list.
1051  */
1052  if (iagp->nfreeexts == 0) {
1053  /* check if the ag extent free list has any iags.
1054  * if so, read the iag at the head of the list now.
1055  * this (head) iag will be updated later to reflect
1056  * the addition of the current iag at the head of
1057  * the list.
1058  */
1059  if ((fwd = imap->im_agctl[agno].extfree) >= 0) {
1060  if ((rc = diIAGRead(imap, fwd, &amp)))
1061  goto error_out;
1062  aiagp = (struct iag *) amp->data;
1063  }
1064  } else {
1065  /* iag has free extents. check if the addition of a free
1066  * extent will cause all extents to be free within this
1067  * iag. if so, the iag will be removed from the ag extent
1068  * free list and placed on the inode map's free iag list.
1069  */
1070  if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) {
1071  /* in preparation for removing the iag from the
1072  * ag extent free list, read the iags preceding
1073  * and following the iag on the ag extent free
1074  * list.
1075  */
1076  if ((fwd = le32_to_cpu(iagp->extfreefwd)) >= 0) {
1077  if ((rc = diIAGRead(imap, fwd, &amp)))
1078  goto error_out;
1079  aiagp = (struct iag *) amp->data;
1080  }
1081 
1082  if ((back = le32_to_cpu(iagp->extfreeback)) >= 0) {
1083  if ((rc = diIAGRead(imap, back, &bmp)))
1084  goto error_out;
1085  biagp = (struct iag *) bmp->data;
1086  }
1087  }
1088  }
1089 
1090  /* remove the iag from the ag inode free list if freeing
1091  * this extent cause the iag to have no free inodes.
1092  */
1093  if (iagp->nfreeinos == cpu_to_le32(INOSPEREXT - 1)) {
1094  int inofreeback = le32_to_cpu(iagp->inofreeback);
1095  int inofreefwd = le32_to_cpu(iagp->inofreefwd);
1096 
1097  /* in preparation for removing the iag from the
1098  * ag inode free list, read the iags preceding
1099  * and following the iag on the ag inode free
1100  * list. before reading these iags, we must make
1101  * sure that we already don't have them in hand
1102  * from up above, since re-reading an iag (buffer)
1103  * we are currently holding would cause a deadlock.
1104  */
1105  if (inofreefwd >= 0) {
1106 
1107  if (inofreefwd == fwd)
1108  ciagp = (struct iag *) amp->data;
1109  else if (inofreefwd == back)
1110  ciagp = (struct iag *) bmp->data;
1111  else {
1112  if ((rc =
1113  diIAGRead(imap, inofreefwd, &cmp)))
1114  goto error_out;
1115  ciagp = (struct iag *) cmp->data;
1116  }
1117  assert(ciagp != NULL);
1118  }
1119 
1120  if (inofreeback >= 0) {
1121  if (inofreeback == fwd)
1122  diagp = (struct iag *) amp->data;
1123  else if (inofreeback == back)
1124  diagp = (struct iag *) bmp->data;
1125  else {
1126  if ((rc =
1127  diIAGRead(imap, inofreeback, &dmp)))
1128  goto error_out;
1129  diagp = (struct iag *) dmp->data;
1130  }
1131  assert(diagp != NULL);
1132  }
1133  }
1134 
1135  IREAD_UNLOCK(ipimap);
1136 
1137  /*
1138  * invalidate any page of the inode extent freed from buffer cache;
1139  */
1140  freepxd = iagp->inoext[extno];
1141  invalidate_pxd_metapages(ip, freepxd);
1142 
1143  /*
1144  * update iag list(s) (careful update step 2)
1145  */
1146  /* add the iag to the ag extent free list if this is the
1147  * first free extent for the iag.
1148  */
1149  if (iagp->nfreeexts == 0) {
1150  if (fwd >= 0)
1151  aiagp->extfreeback = cpu_to_le32(iagno);
1152 
1153  iagp->extfreefwd =
1154  cpu_to_le32(imap->im_agctl[agno].extfree);
1155  iagp->extfreeback = cpu_to_le32(-1);
1156  imap->im_agctl[agno].extfree = iagno;
1157  } else {
1158  /* remove the iag from the ag extent list if all extents
1159  * are now free and place it on the inode map iag free list.
1160  */
1161  if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) {
1162  if (fwd >= 0)
1163  aiagp->extfreeback = iagp->extfreeback;
1164 
1165  if (back >= 0)
1166  biagp->extfreefwd = iagp->extfreefwd;
1167  else
1168  imap->im_agctl[agno].extfree =
1169  le32_to_cpu(iagp->extfreefwd);
1170 
1171  iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1);
1172 
1173  IAGFREE_LOCK(imap);
1174  iagp->iagfree = cpu_to_le32(imap->im_freeiag);
1175  imap->im_freeiag = iagno;
1176  IAGFREE_UNLOCK(imap);
1177  }
1178  }
1179 
1180  /* remove the iag from the ag inode free list if freeing
1181  * this extent causes the iag to have no free inodes.
1182  */
1183  if (iagp->nfreeinos == cpu_to_le32(INOSPEREXT - 1)) {
1184  if ((int) le32_to_cpu(iagp->inofreefwd) >= 0)
1185  ciagp->inofreeback = iagp->inofreeback;
1186 
1187  if ((int) le32_to_cpu(iagp->inofreeback) >= 0)
1188  diagp->inofreefwd = iagp->inofreefwd;
1189  else
1190  imap->im_agctl[agno].inofree =
1191  le32_to_cpu(iagp->inofreefwd);
1192 
1193  iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1);
1194  }
1195 
1196  /* update the inode extent address and working map
1197  * to reflect the free extent.
1198  * the permanent map should have been updated already
1199  * for the inode being freed.
1200  */
1201  if (iagp->pmap[extno] != 0) {
1202  jfs_error(ip->i_sb, "diFree: the pmap does not show inode free");
1203  }
1204  iagp->wmap[extno] = 0;
1205  PXDlength(&iagp->inoext[extno], 0);
1206  PXDaddress(&iagp->inoext[extno], 0);
1207 
1208  /* update the free extent and free inode summary maps
1209  * to reflect the freed extent.
1210  * the inode summary map is marked to indicate no inodes
1211  * available for the freed extent.
1212  */
1213  sword = extno >> L2EXTSPERSUM;
1214  bitno = extno & (EXTSPERSUM - 1);
1215  mask = HIGHORDER >> bitno;
1216  iagp->inosmap[sword] |= cpu_to_le32(mask);
1217  iagp->extsmap[sword] &= cpu_to_le32(~mask);
1218 
1219  /* update the number of free inodes and number of free extents
1220  * for the iag.
1221  */
1222  le32_add_cpu(&iagp->nfreeinos, -(INOSPEREXT - 1));
1223  le32_add_cpu(&iagp->nfreeexts, 1);
1224 
1225  /* update the number of free inodes and backed inodes
1226  * at the ag and inode map level.
1227  */
1228  imap->im_agctl[agno].numfree -= (INOSPEREXT - 1);
1229  imap->im_agctl[agno].numinos -= INOSPEREXT;
1230  atomic_sub(INOSPEREXT - 1, &imap->im_numfree);
1231  atomic_sub(INOSPEREXT, &imap->im_numinos);
1232 
1233  if (amp)
1234  write_metapage(amp);
1235  if (bmp)
1236  write_metapage(bmp);
1237  if (cmp)
1238  write_metapage(cmp);
1239  if (dmp)
1240  write_metapage(dmp);
1241 
1242  /*
1243  * start transaction to update block allocation map
1244  * for the inode extent freed;
1245  *
1246  * N.B. AG_LOCK is released and iag will be released below, and
1247  * other thread may allocate inode from/reusing the ixad freed
1248  * BUT with new/different backing inode extent from the extent
1249  * to be freed by the transaction;
1250  */
1251  tid = txBegin(ipimap->i_sb, COMMIT_FORCE);
1252  mutex_lock(&JFS_IP(ipimap)->commit_mutex);
1253 
1254  /* acquire tlock of the iag page of the freed ixad
1255  * to force the page NOHOMEOK (even though no data is
1256  * logged from the iag page) until NOREDOPAGE|FREEXTENT log
1257  * for the free of the extent is committed;
1258  * write FREEXTENT|NOREDOPAGE log record
1259  * N.B. linelock is overlaid as freed extent descriptor;
1260  */
1261  tlck = txLock(tid, ipimap, mp, tlckINODE | tlckFREE);
1262  pxdlock = (struct pxd_lock *) & tlck->lock;
1263  pxdlock->flag = mlckFREEPXD;
1264  pxdlock->pxd = freepxd;
1265  pxdlock->index = 1;
1266 
1267  write_metapage(mp);
1268 
1269  iplist[0] = ipimap;
1270 
1271  /*
1272  * logredo needs the IAG number and IAG extent index in order
1273  * to ensure that the IMap is consistent. The least disruptive
1274  * way to pass these values through to the transaction manager
1275  * is in the iplist array.
1276  *
1277  * It's not pretty, but it works.
1278  */
1279  iplist[1] = (struct inode *) (size_t)iagno;
1280  iplist[2] = (struct inode *) (size_t)extno;
1281 
1282  rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE);
1283 
1284  txEnd(tid);
1285  mutex_unlock(&JFS_IP(ipimap)->commit_mutex);
1286 
1287  /* unlock the AG inode map information */
1288  AG_UNLOCK(imap, agno);
1289 
1290  return (0);
1291 
1292  error_out:
1293  IREAD_UNLOCK(ipimap);
1294 
1295  if (amp)
1296  release_metapage(amp);
1297  if (bmp)
1298  release_metapage(bmp);
1299  if (cmp)
1300  release_metapage(cmp);
1301  if (dmp)
1302  release_metapage(dmp);
1303 
1304  AG_UNLOCK(imap, agno);
1305 
1307 
1308  return (rc);
1309 }
1310 
1311 /*
1312  * There are several places in the diAlloc* routines where we initialize
1313  * the inode.
1314  */
1315 static inline void
1316 diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp)
1317 {
1318  struct jfs_inode_info *jfs_ip = JFS_IP(ip);
1319 
1320  ip->i_ino = (iagno << L2INOSPERIAG) + ino;
1321  jfs_ip->ixpxd = iagp->inoext[extno];
1322  jfs_ip->agstart = le64_to_cpu(iagp->agstart);
1323  jfs_ip->active_ag = -1;
1324 }
1325 
1326 
1327 /*
1328  * NAME: diAlloc(pip,dir,ip)
1329  *
1330  * FUNCTION: allocate a disk inode from the inode working map
1331  * for a fileset or aggregate.
1332  *
1333  * PARAMETERS:
1334  * pip - pointer to incore inode for the parent inode.
1335  * dir - 'true' if the new disk inode is for a directory.
1336  * ip - pointer to a new inode
1337  *
1338  * RETURN VALUES:
1339  * 0 - success.
1340  * -ENOSPC - insufficient disk resources.
1341  * -EIO - i/o error.
1342  */
1343 int diAlloc(struct inode *pip, bool dir, struct inode *ip)
1344 {
1345  int rc, ino, iagno, addext, extno, bitno, sword;
1346  int nwords, rem, i, agno;
1347  u32 mask, inosmap, extsmap;
1348  struct inode *ipimap;
1349  struct metapage *mp;
1350  ino_t inum;
1351  struct iag *iagp;
1352  struct inomap *imap;
1353 
1354  /* get the pointers to the inode map inode and the
1355  * corresponding imap control structure.
1356  */
1357  ipimap = JFS_SBI(pip->i_sb)->ipimap;
1358  imap = JFS_IP(ipimap)->i_imap;
1359  JFS_IP(ip)->ipimap = ipimap;
1360  JFS_IP(ip)->fileset = FILESYSTEM_I;
1361 
1362  /* for a directory, the allocation policy is to start
1363  * at the ag level using the preferred ag.
1364  */
1365  if (dir) {
1366  agno = dbNextAG(JFS_SBI(pip->i_sb)->ipbmap);
1367  AG_LOCK(imap, agno);
1368  goto tryag;
1369  }
1370 
1371  /* for files, the policy starts off by trying to allocate from
1372  * the same iag containing the parent disk inode:
1373  * try to allocate the new disk inode close to the parent disk
1374  * inode, using parent disk inode number + 1 as the allocation
1375  * hint. (we use a left-to-right policy to attempt to avoid
1376  * moving backward on the disk.) compute the hint within the
1377  * file system and the iag.
1378  */
1379 
1380  /* get the ag number of this iag */
1381  agno = BLKTOAG(JFS_IP(pip)->agstart, JFS_SBI(pip->i_sb));
1382 
1383  if (atomic_read(&JFS_SBI(pip->i_sb)->bmap->db_active[agno])) {
1384  /*
1385  * There is an open file actively growing. We want to
1386  * allocate new inodes from a different ag to avoid
1387  * fragmentation problems.
1388  */
1389  agno = dbNextAG(JFS_SBI(pip->i_sb)->ipbmap);
1390  AG_LOCK(imap, agno);
1391  goto tryag;
1392  }
1393 
1394  inum = pip->i_ino + 1;
1395  ino = inum & (INOSPERIAG - 1);
1396 
1397  /* back off the hint if it is outside of the iag */
1398  if (ino == 0)
1399  inum = pip->i_ino;
1400 
1401  /* lock the AG inode map information */
1402  AG_LOCK(imap, agno);
1403 
1404  /* Get read lock on imap inode */
1405  IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
1406 
1407  /* get the iag number and read the iag */
1408  iagno = INOTOIAG(inum);
1409  if ((rc = diIAGRead(imap, iagno, &mp))) {
1410  IREAD_UNLOCK(ipimap);
1411  AG_UNLOCK(imap, agno);
1412  return (rc);
1413  }
1414  iagp = (struct iag *) mp->data;
1415 
1416  /* determine if new inode extent is allowed to be added to the iag.
1417  * new inode extent can be added to the iag if the ag
1418  * has less than 32 free disk inodes and the iag has free extents.
1419  */
1420  addext = (imap->im_agctl[agno].numfree < 32 && iagp->nfreeexts);
1421 
1422  /*
1423  * try to allocate from the IAG
1424  */
1425  /* check if the inode may be allocated from the iag
1426  * (i.e. the inode has free inodes or new extent can be added).
1427  */
1428  if (iagp->nfreeinos || addext) {
1429  /* determine the extent number of the hint.
1430  */
1431  extno = ino >> L2INOSPEREXT;
1432 
1433  /* check if the extent containing the hint has backed
1434  * inodes. if so, try to allocate within this extent.
1435  */
1436  if (addressPXD(&iagp->inoext[extno])) {
1437  bitno = ino & (INOSPEREXT - 1);
1438  if ((bitno =
1439  diFindFree(le32_to_cpu(iagp->wmap[extno]),
1440  bitno))
1441  < INOSPEREXT) {
1442  ino = (extno << L2INOSPEREXT) + bitno;
1443 
1444  /* a free inode (bit) was found within this
1445  * extent, so allocate it.
1446  */
1447  rc = diAllocBit(imap, iagp, ino);
1448  IREAD_UNLOCK(ipimap);
1449  if (rc) {
1450  assert(rc == -EIO);
1451  } else {
1452  /* set the results of the allocation
1453  * and write the iag.
1454  */
1455  diInitInode(ip, iagno, ino, extno,
1456  iagp);
1457  mark_metapage_dirty(mp);
1458  }
1459  release_metapage(mp);
1460 
1461  /* free the AG lock and return.
1462  */
1463  AG_UNLOCK(imap, agno);
1464  return (rc);
1465  }
1466 
1467  if (!addext)
1468  extno =
1469  (extno ==
1470  EXTSPERIAG - 1) ? 0 : extno + 1;
1471  }
1472 
1473  /*
1474  * no free inodes within the extent containing the hint.
1475  *
1476  * try to allocate from the backed extents following
1477  * hint or, if appropriate (i.e. addext is true), allocate
1478  * an extent of free inodes at or following the extent
1479  * containing the hint.
1480  *
1481  * the free inode and free extent summary maps are used
1482  * here, so determine the starting summary map position
1483  * and the number of words we'll have to examine. again,
1484  * the approach is to allocate following the hint, so we
1485  * might have to initially ignore prior bits of the summary
1486  * map that represent extents prior to the extent containing
1487  * the hint and later revisit these bits.
1488  */
1489  bitno = extno & (EXTSPERSUM - 1);
1490  nwords = (bitno == 0) ? SMAPSZ : SMAPSZ + 1;
1491  sword = extno >> L2EXTSPERSUM;
1492 
1493  /* mask any prior bits for the starting words of the
1494  * summary map.
1495  */
1496  mask = ONES << (EXTSPERSUM - bitno);
1497  inosmap = le32_to_cpu(iagp->inosmap[sword]) | mask;
1498  extsmap = le32_to_cpu(iagp->extsmap[sword]) | mask;
1499 
1500  /* scan the free inode and free extent summary maps for
1501  * free resources.
1502  */
1503  for (i = 0; i < nwords; i++) {
1504  /* check if this word of the free inode summary
1505  * map describes an extent with free inodes.
1506  */
1507  if (~inosmap) {
1508  /* an extent with free inodes has been
1509  * found. determine the extent number
1510  * and the inode number within the extent.
1511  */
1512  rem = diFindFree(inosmap, 0);
1513  extno = (sword << L2EXTSPERSUM) + rem;
1514  rem = diFindFree(le32_to_cpu(iagp->wmap[extno]),
1515  0);
1516  if (rem >= INOSPEREXT) {
1517  IREAD_UNLOCK(ipimap);
1518  release_metapage(mp);
1519  AG_UNLOCK(imap, agno);
1520  jfs_error(ip->i_sb,
1521  "diAlloc: can't find free bit "
1522  "in wmap");
1523  return -EIO;
1524  }
1525 
1526  /* determine the inode number within the
1527  * iag and allocate the inode from the
1528  * map.
1529  */
1530  ino = (extno << L2INOSPEREXT) + rem;
1531  rc = diAllocBit(imap, iagp, ino);
1532  IREAD_UNLOCK(ipimap);
1533  if (rc)
1534  assert(rc == -EIO);
1535  else {
1536  /* set the results of the allocation
1537  * and write the iag.
1538  */
1539  diInitInode(ip, iagno, ino, extno,
1540  iagp);
1541  mark_metapage_dirty(mp);
1542  }
1543  release_metapage(mp);
1544 
1545  /* free the AG lock and return.
1546  */
1547  AG_UNLOCK(imap, agno);
1548  return (rc);
1549 
1550  }
1551 
1552  /* check if we may allocate an extent of free
1553  * inodes and whether this word of the free
1554  * extents summary map describes a free extent.
1555  */
1556  if (addext && ~extsmap) {
1557  /* a free extent has been found. determine
1558  * the extent number.
1559  */
1560  rem = diFindFree(extsmap, 0);
1561  extno = (sword << L2EXTSPERSUM) + rem;
1562 
1563  /* allocate an extent of free inodes.
1564  */
1565  if ((rc = diNewExt(imap, iagp, extno))) {
1566  /* if there is no disk space for a
1567  * new extent, try to allocate the
1568  * disk inode from somewhere else.
1569  */
1570  if (rc == -ENOSPC)
1571  break;
1572 
1573  assert(rc == -EIO);
1574  } else {
1575  /* set the results of the allocation
1576  * and write the iag.
1577  */
1578  diInitInode(ip, iagno,
1579  extno << L2INOSPEREXT,
1580  extno, iagp);
1581  mark_metapage_dirty(mp);
1582  }
1583  release_metapage(mp);
1584  /* free the imap inode & the AG lock & return.
1585  */
1586  IREAD_UNLOCK(ipimap);
1587  AG_UNLOCK(imap, agno);
1588  return (rc);
1589  }
1590 
1591  /* move on to the next set of summary map words.
1592  */
1593  sword = (sword == SMAPSZ - 1) ? 0 : sword + 1;
1594  inosmap = le32_to_cpu(iagp->inosmap[sword]);
1595  extsmap = le32_to_cpu(iagp->extsmap[sword]);
1596  }
1597  }
1598  /* unlock imap inode */
1599  IREAD_UNLOCK(ipimap);
1600 
1601  /* nothing doing in this iag, so release it. */
1602  release_metapage(mp);
1603 
1604  tryag:
1605  /*
1606  * try to allocate anywhere within the same AG as the parent inode.
1607  */
1608  rc = diAllocAG(imap, agno, dir, ip);
1609 
1610  AG_UNLOCK(imap, agno);
1611 
1612  if (rc != -ENOSPC)
1613  return (rc);
1614 
1615  /*
1616  * try to allocate in any AG.
1617  */
1618  return (diAllocAny(imap, agno, dir, ip));
1619 }
1620 
1621 
1622 /*
1623  * NAME: diAllocAG(imap,agno,dir,ip)
1624  *
1625  * FUNCTION: allocate a disk inode from the allocation group.
1626  *
1627  * this routine first determines if a new extent of free
1628  * inodes should be added for the allocation group, with
1629  * the current request satisfied from this extent. if this
1630  * is the case, an attempt will be made to do just that. if
1631  * this attempt fails or it has been determined that a new
1632  * extent should not be added, an attempt is made to satisfy
1633  * the request by allocating an existing (backed) free inode
1634  * from the allocation group.
1635  *
1636  * PRE CONDITION: Already have the AG lock for this AG.
1637  *
1638  * PARAMETERS:
1639  * imap - pointer to inode map control structure.
1640  * agno - allocation group to allocate from.
1641  * dir - 'true' if the new disk inode is for a directory.
1642  * ip - pointer to the new inode to be filled in on successful return
1643  * with the disk inode number allocated, its extent address
1644  * and the start of the ag.
1645  *
1646  * RETURN VALUES:
1647  * 0 - success.
1648  * -ENOSPC - insufficient disk resources.
1649  * -EIO - i/o error.
1650  */
1651 static int
1652 diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip)
1653 {
1654  int rc, addext, numfree, numinos;
1655 
1656  /* get the number of free and the number of backed disk
1657  * inodes currently within the ag.
1658  */
1659  numfree = imap->im_agctl[agno].numfree;
1660  numinos = imap->im_agctl[agno].numinos;
1661 
1662  if (numfree > numinos) {
1663  jfs_error(ip->i_sb, "diAllocAG: numfree > numinos");
1664  return -EIO;
1665  }
1666 
1667  /* determine if we should allocate a new extent of free inodes
1668  * within the ag: for directory inodes, add a new extent
1669  * if there are a small number of free inodes or number of free
1670  * inodes is a small percentage of the number of backed inodes.
1671  */
1672  if (dir)
1673  addext = (numfree < 64 ||
1674  (numfree < 256
1675  && ((numfree * 100) / numinos) <= 20));
1676  else
1677  addext = (numfree == 0);
1678 
1679  /*
1680  * try to allocate a new extent of free inodes.
1681  */
1682  if (addext) {
1683  /* if free space is not available for this new extent, try
1684  * below to allocate a free and existing (already backed)
1685  * inode from the ag.
1686  */
1687  if ((rc = diAllocExt(imap, agno, ip)) != -ENOSPC)
1688  return (rc);
1689  }
1690 
1691  /*
1692  * try to allocate an existing free inode from the ag.
1693  */
1694  return (diAllocIno(imap, agno, ip));
1695 }
1696 
1697 
1698 /*
1699  * NAME: diAllocAny(imap,agno,dir,iap)
1700  *
1701  * FUNCTION: allocate a disk inode from any other allocation group.
1702  *
1703  * this routine is called when an allocation attempt within
1704  * the primary allocation group has failed. if attempts to
1705  * allocate an inode from any allocation group other than the
1706  * specified primary group.
1707  *
1708  * PARAMETERS:
1709  * imap - pointer to inode map control structure.
1710  * agno - primary allocation group (to avoid).
1711  * dir - 'true' if the new disk inode is for a directory.
1712  * ip - pointer to a new inode to be filled in on successful return
1713  * with the disk inode number allocated, its extent address
1714  * and the start of the ag.
1715  *
1716  * RETURN VALUES:
1717  * 0 - success.
1718  * -ENOSPC - insufficient disk resources.
1719  * -EIO - i/o error.
1720  */
1721 static int
1722 diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip)
1723 {
1724  int ag, rc;
1725  int maxag = JFS_SBI(imap->im_ipimap->i_sb)->bmap->db_maxag;
1726 
1727 
1728  /* try to allocate from the ags following agno up to
1729  * the maximum ag number.
1730  */
1731  for (ag = agno + 1; ag <= maxag; ag++) {
1732  AG_LOCK(imap, ag);
1733 
1734  rc = diAllocAG(imap, ag, dir, ip);
1735 
1736  AG_UNLOCK(imap, ag);
1737 
1738  if (rc != -ENOSPC)
1739  return (rc);
1740  }
1741 
1742  /* try to allocate from the ags in front of agno.
1743  */
1744  for (ag = 0; ag < agno; ag++) {
1745  AG_LOCK(imap, ag);
1746 
1747  rc = diAllocAG(imap, ag, dir, ip);
1748 
1749  AG_UNLOCK(imap, ag);
1750 
1751  if (rc != -ENOSPC)
1752  return (rc);
1753  }
1754 
1755  /* no free disk inodes.
1756  */
1757  return -ENOSPC;
1758 }
1759 
1760 
1761 /*
1762  * NAME: diAllocIno(imap,agno,ip)
1763  *
1764  * FUNCTION: allocate a disk inode from the allocation group's free
1765  * inode list, returning an error if this free list is
1766  * empty (i.e. no iags on the list).
1767  *
1768  * allocation occurs from the first iag on the list using
1769  * the iag's free inode summary map to find the leftmost
1770  * free inode in the iag.
1771  *
1772  * PRE CONDITION: Already have AG lock for this AG.
1773  *
1774  * PARAMETERS:
1775  * imap - pointer to inode map control structure.
1776  * agno - allocation group.
1777  * ip - pointer to new inode to be filled in on successful return
1778  * with the disk inode number allocated, its extent address
1779  * and the start of the ag.
1780  *
1781  * RETURN VALUES:
1782  * 0 - success.
1783  * -ENOSPC - insufficient disk resources.
1784  * -EIO - i/o error.
1785  */
1786 static int diAllocIno(struct inomap * imap, int agno, struct inode *ip)
1787 {
1788  int iagno, ino, rc, rem, extno, sword;
1789  struct metapage *mp;
1790  struct iag *iagp;
1791 
1792  /* check if there are iags on the ag's free inode list.
1793  */
1794  if ((iagno = imap->im_agctl[agno].inofree) < 0)
1795  return -ENOSPC;
1796 
1797  /* obtain read lock on imap inode */
1799 
1800  /* read the iag at the head of the list.
1801  */
1802  if ((rc = diIAGRead(imap, iagno, &mp))) {
1803  IREAD_UNLOCK(imap->im_ipimap);
1804  return (rc);
1805  }
1806  iagp = (struct iag *) mp->data;
1807 
1808  /* better be free inodes in this iag if it is on the
1809  * list.
1810  */
1811  if (!iagp->nfreeinos) {
1812  IREAD_UNLOCK(imap->im_ipimap);
1813  release_metapage(mp);
1814  jfs_error(ip->i_sb,
1815  "diAllocIno: nfreeinos = 0, but iag on freelist");
1816  return -EIO;
1817  }
1818 
1819  /* scan the free inode summary map to find an extent
1820  * with free inodes.
1821  */
1822  for (sword = 0;; sword++) {
1823  if (sword >= SMAPSZ) {
1824  IREAD_UNLOCK(imap->im_ipimap);
1825  release_metapage(mp);
1826  jfs_error(ip->i_sb,
1827  "diAllocIno: free inode not found in summary map");
1828  return -EIO;
1829  }
1830 
1831  if (~iagp->inosmap[sword])
1832  break;
1833  }
1834 
1835  /* found a extent with free inodes. determine
1836  * the extent number.
1837  */
1838  rem = diFindFree(le32_to_cpu(iagp->inosmap[sword]), 0);
1839  if (rem >= EXTSPERSUM) {
1840  IREAD_UNLOCK(imap->im_ipimap);
1841  release_metapage(mp);
1842  jfs_error(ip->i_sb, "diAllocIno: no free extent found");
1843  return -EIO;
1844  }
1845  extno = (sword << L2EXTSPERSUM) + rem;
1846 
1847  /* find the first free inode in the extent.
1848  */
1849  rem = diFindFree(le32_to_cpu(iagp->wmap[extno]), 0);
1850  if (rem >= INOSPEREXT) {
1851  IREAD_UNLOCK(imap->im_ipimap);
1852  release_metapage(mp);
1853  jfs_error(ip->i_sb, "diAllocIno: free inode not found");
1854  return -EIO;
1855  }
1856 
1857  /* compute the inode number within the iag.
1858  */
1859  ino = (extno << L2INOSPEREXT) + rem;
1860 
1861  /* allocate the inode.
1862  */
1863  rc = diAllocBit(imap, iagp, ino);
1864  IREAD_UNLOCK(imap->im_ipimap);
1865  if (rc) {
1866  release_metapage(mp);
1867  return (rc);
1868  }
1869 
1870  /* set the results of the allocation and write the iag.
1871  */
1872  diInitInode(ip, iagno, ino, extno, iagp);
1873  write_metapage(mp);
1874 
1875  return (0);
1876 }
1877 
1878 
1879 /*
1880  * NAME: diAllocExt(imap,agno,ip)
1881  *
1882  * FUNCTION: add a new extent of free inodes to an iag, allocating
1883  * an inode from this extent to satisfy the current allocation
1884  * request.
1885  *
1886  * this routine first tries to find an existing iag with free
1887  * extents through the ag free extent list. if list is not
1888  * empty, the head of the list will be selected as the home
1889  * of the new extent of free inodes. otherwise (the list is
1890  * empty), a new iag will be allocated for the ag to contain
1891  * the extent.
1892  *
1893  * once an iag has been selected, the free extent summary map
1894  * is used to locate a free extent within the iag and diNewExt()
1895  * is called to initialize the extent, with initialization
1896  * including the allocation of the first inode of the extent
1897  * for the purpose of satisfying this request.
1898  *
1899  * PARAMETERS:
1900  * imap - pointer to inode map control structure.
1901  * agno - allocation group number.
1902  * ip - pointer to new inode to be filled in on successful return
1903  * with the disk inode number allocated, its extent address
1904  * and the start of the ag.
1905  *
1906  * RETURN VALUES:
1907  * 0 - success.
1908  * -ENOSPC - insufficient disk resources.
1909  * -EIO - i/o error.
1910  */
1911 static int diAllocExt(struct inomap * imap, int agno, struct inode *ip)
1912 {
1913  int rem, iagno, sword, extno, rc;
1914  struct metapage *mp;
1915  struct iag *iagp;
1916 
1917  /* check if the ag has any iags with free extents. if not,
1918  * allocate a new iag for the ag.
1919  */
1920  if ((iagno = imap->im_agctl[agno].extfree) < 0) {
1921  /* If successful, diNewIAG will obtain the read lock on the
1922  * imap inode.
1923  */
1924  if ((rc = diNewIAG(imap, &iagno, agno, &mp))) {
1925  return (rc);
1926  }
1927  iagp = (struct iag *) mp->data;
1928 
1929  /* set the ag number if this a brand new iag
1930  */
1931  iagp->agstart =
1932  cpu_to_le64(AGTOBLK(agno, imap->im_ipimap));
1933  } else {
1934  /* read the iag.
1935  */
1937  if ((rc = diIAGRead(imap, iagno, &mp))) {
1938  IREAD_UNLOCK(imap->im_ipimap);
1939  jfs_error(ip->i_sb, "diAllocExt: error reading iag");
1940  return rc;
1941  }
1942  iagp = (struct iag *) mp->data;
1943  }
1944 
1945  /* using the free extent summary map, find a free extent.
1946  */
1947  for (sword = 0;; sword++) {
1948  if (sword >= SMAPSZ) {
1949  release_metapage(mp);
1950  IREAD_UNLOCK(imap->im_ipimap);
1951  jfs_error(ip->i_sb,
1952  "diAllocExt: free ext summary map not found");
1953  return -EIO;
1954  }
1955  if (~iagp->extsmap[sword])
1956  break;
1957  }
1958 
1959  /* determine the extent number of the free extent.
1960  */
1961  rem = diFindFree(le32_to_cpu(iagp->extsmap[sword]), 0);
1962  if (rem >= EXTSPERSUM) {
1963  release_metapage(mp);
1964  IREAD_UNLOCK(imap->im_ipimap);
1965  jfs_error(ip->i_sb, "diAllocExt: free extent not found");
1966  return -EIO;
1967  }
1968  extno = (sword << L2EXTSPERSUM) + rem;
1969 
1970  /* initialize the new extent.
1971  */
1972  rc = diNewExt(imap, iagp, extno);
1973  IREAD_UNLOCK(imap->im_ipimap);
1974  if (rc) {
1975  /* something bad happened. if a new iag was allocated,
1976  * place it back on the inode map's iag free list, and
1977  * clear the ag number information.
1978  */
1979  if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) {
1980  IAGFREE_LOCK(imap);
1981  iagp->iagfree = cpu_to_le32(imap->im_freeiag);
1982  imap->im_freeiag = iagno;
1983  IAGFREE_UNLOCK(imap);
1984  }
1985  write_metapage(mp);
1986  return (rc);
1987  }
1988 
1989  /* set the results of the allocation and write the iag.
1990  */
1991  diInitInode(ip, iagno, extno << L2INOSPEREXT, extno, iagp);
1992 
1993  write_metapage(mp);
1994 
1995  return (0);
1996 }
1997 
1998 
1999 /*
2000  * NAME: diAllocBit(imap,iagp,ino)
2001  *
2002  * FUNCTION: allocate a backed inode from an iag.
2003  *
2004  * this routine performs the mechanics of allocating a
2005  * specified inode from a backed extent.
2006  *
2007  * if the inode to be allocated represents the last free
2008  * inode within the iag, the iag will be removed from the
2009  * ag free inode list.
2010  *
2011  * a careful update approach is used to provide consistency
2012  * in the face of updates to multiple buffers. under this
2013  * approach, all required buffers are obtained before making
2014  * any updates and are held all are updates are complete.
2015  *
2016  * PRE CONDITION: Already have buffer lock on iagp. Already have AG lock on
2017  * this AG. Must have read lock on imap inode.
2018  *
2019  * PARAMETERS:
2020  * imap - pointer to inode map control structure.
2021  * iagp - pointer to iag.
2022  * ino - inode number to be allocated within the iag.
2023  *
2024  * RETURN VALUES:
2025  * 0 - success.
2026  * -ENOSPC - insufficient disk resources.
2027  * -EIO - i/o error.
2028  */
2029 static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino)
2030 {
2031  int extno, bitno, agno, sword, rc;
2032  struct metapage *amp = NULL, *bmp = NULL;
2033  struct iag *aiagp = NULL, *biagp = NULL;
2034  u32 mask;
2035 
2036  /* check if this is the last free inode within the iag.
2037  * if so, it will have to be removed from the ag free
2038  * inode list, so get the iags preceding and following
2039  * it on the list.
2040  */
2041  if (iagp->nfreeinos == cpu_to_le32(1)) {
2042  if ((int) le32_to_cpu(iagp->inofreefwd) >= 0) {
2043  if ((rc =
2044  diIAGRead(imap, le32_to_cpu(iagp->inofreefwd),
2045  &amp)))
2046  return (rc);
2047  aiagp = (struct iag *) amp->data;
2048  }
2049 
2050  if ((int) le32_to_cpu(iagp->inofreeback) >= 0) {
2051  if ((rc =
2052  diIAGRead(imap,
2053  le32_to_cpu(iagp->inofreeback),
2054  &bmp))) {
2055  if (amp)
2056  release_metapage(amp);
2057  return (rc);
2058  }
2059  biagp = (struct iag *) bmp->data;
2060  }
2061  }
2062 
2063  /* get the ag number, extent number, inode number within
2064  * the extent.
2065  */
2066  agno = BLKTOAG(le64_to_cpu(iagp->agstart), JFS_SBI(imap->im_ipimap->i_sb));
2067  extno = ino >> L2INOSPEREXT;
2068  bitno = ino & (INOSPEREXT - 1);
2069 
2070  /* compute the mask for setting the map.
2071  */
2072  mask = HIGHORDER >> bitno;
2073 
2074  /* the inode should be free and backed.
2075  */
2076  if (((le32_to_cpu(iagp->pmap[extno]) & mask) != 0) ||
2077  ((le32_to_cpu(iagp->wmap[extno]) & mask) != 0) ||
2078  (addressPXD(&iagp->inoext[extno]) == 0)) {
2079  if (amp)
2080  release_metapage(amp);
2081  if (bmp)
2082  release_metapage(bmp);
2083 
2084  jfs_error(imap->im_ipimap->i_sb,
2085  "diAllocBit: iag inconsistent");
2086  return -EIO;
2087  }
2088 
2089  /* mark the inode as allocated in the working map.
2090  */
2091  iagp->wmap[extno] |= cpu_to_le32(mask);
2092 
2093  /* check if all inodes within the extent are now
2094  * allocated. if so, update the free inode summary
2095  * map to reflect this.
2096  */
2097  if (iagp->wmap[extno] == cpu_to_le32(ONES)) {
2098  sword = extno >> L2EXTSPERSUM;
2099  bitno = extno & (EXTSPERSUM - 1);
2100  iagp->inosmap[sword] |= cpu_to_le32(HIGHORDER >> bitno);
2101  }
2102 
2103  /* if this was the last free inode in the iag, remove the
2104  * iag from the ag free inode list.
2105  */
2106  if (iagp->nfreeinos == cpu_to_le32(1)) {
2107  if (amp) {
2108  aiagp->inofreeback = iagp->inofreeback;
2109  write_metapage(amp);
2110  }
2111 
2112  if (bmp) {
2113  biagp->inofreefwd = iagp->inofreefwd;
2114  write_metapage(bmp);
2115  } else {
2116  imap->im_agctl[agno].inofree =
2117  le32_to_cpu(iagp->inofreefwd);
2118  }
2119  iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1);
2120  }
2121 
2122  /* update the free inode count at the iag, ag, inode
2123  * map levels.
2124  */
2125  le32_add_cpu(&iagp->nfreeinos, -1);
2126  imap->im_agctl[agno].numfree -= 1;
2127  atomic_dec(&imap->im_numfree);
2128 
2129  return (0);
2130 }
2131 
2132 
2133 /*
2134  * NAME: diNewExt(imap,iagp,extno)
2135  *
2136  * FUNCTION: initialize a new extent of inodes for an iag, allocating
2137  * the first inode of the extent for use for the current
2138  * allocation request.
2139  *
2140  * disk resources are allocated for the new extent of inodes
2141  * and the inodes themselves are initialized to reflect their
2142  * existence within the extent (i.e. their inode numbers and
2143  * inode extent addresses are set) and their initial state
2144  * (mode and link count are set to zero).
2145  *
2146  * if the iag is new, it is not yet on an ag extent free list
2147  * but will now be placed on this list.
2148  *
2149  * if the allocation of the new extent causes the iag to
2150  * have no free extent, the iag will be removed from the
2151  * ag extent free list.
2152  *
2153  * if the iag has no free backed inodes, it will be placed
2154  * on the ag free inode list, since the addition of the new
2155  * extent will now cause it to have free inodes.
2156  *
2157  * a careful update approach is used to provide consistency
2158  * (i.e. list consistency) in the face of updates to multiple
2159  * buffers. under this approach, all required buffers are
2160  * obtained before making any updates and are held until all
2161  * updates are complete.
2162  *
2163  * PRE CONDITION: Already have buffer lock on iagp. Already have AG lock on
2164  * this AG. Must have read lock on imap inode.
2165  *
2166  * PARAMETERS:
2167  * imap - pointer to inode map control structure.
2168  * iagp - pointer to iag.
2169  * extno - extent number.
2170  *
2171  * RETURN VALUES:
2172  * 0 - success.
2173  * -ENOSPC - insufficient disk resources.
2174  * -EIO - i/o error.
2175  */
2176 static int diNewExt(struct inomap * imap, struct iag * iagp, int extno)
2177 {
2178  int agno, iagno, fwd, back, freei = 0, sword, rc;
2179  struct iag *aiagp = NULL, *biagp = NULL, *ciagp = NULL;
2180  struct metapage *amp, *bmp, *cmp, *dmp;
2181  struct inode *ipimap;
2182  s64 blkno, hint;
2183  int i, j;
2184  u32 mask;
2185  ino_t ino;
2186  struct dinode *dp;
2187  struct jfs_sb_info *sbi;
2188 
2189  /* better have free extents.
2190  */
2191  if (!iagp->nfreeexts) {
2192  jfs_error(imap->im_ipimap->i_sb, "diNewExt: no free extents");
2193  return -EIO;
2194  }
2195 
2196  /* get the inode map inode.
2197  */
2198  ipimap = imap->im_ipimap;
2199  sbi = JFS_SBI(ipimap->i_sb);
2200 
2201  amp = bmp = cmp = NULL;
2202 
2203  /* get the ag and iag numbers for this iag.
2204  */
2205  agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi);
2206  iagno = le32_to_cpu(iagp->iagnum);
2207 
2208  /* check if this is the last free extent within the
2209  * iag. if so, the iag must be removed from the ag
2210  * free extent list, so get the iags preceding and
2211  * following the iag on this list.
2212  */
2213  if (iagp->nfreeexts == cpu_to_le32(1)) {
2214  if ((fwd = le32_to_cpu(iagp->extfreefwd)) >= 0) {
2215  if ((rc = diIAGRead(imap, fwd, &amp)))
2216  return (rc);
2217  aiagp = (struct iag *) amp->data;
2218  }
2219 
2220  if ((back = le32_to_cpu(iagp->extfreeback)) >= 0) {
2221  if ((rc = diIAGRead(imap, back, &bmp)))
2222  goto error_out;
2223  biagp = (struct iag *) bmp->data;
2224  }
2225  } else {
2226  /* the iag has free extents. if all extents are free
2227  * (as is the case for a newly allocated iag), the iag
2228  * must be added to the ag free extent list, so get
2229  * the iag at the head of the list in preparation for
2230  * adding this iag to this list.
2231  */
2232  fwd = back = -1;
2233  if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) {
2234  if ((fwd = imap->im_agctl[agno].extfree) >= 0) {
2235  if ((rc = diIAGRead(imap, fwd, &amp)))
2236  goto error_out;
2237  aiagp = (struct iag *) amp->data;
2238  }
2239  }
2240  }
2241 
2242  /* check if the iag has no free inodes. if so, the iag
2243  * will have to be added to the ag free inode list, so get
2244  * the iag at the head of the list in preparation for
2245  * adding this iag to this list. in doing this, we must
2246  * check if we already have the iag at the head of
2247  * the list in hand.
2248  */
2249  if (iagp->nfreeinos == 0) {
2250  freei = imap->im_agctl[agno].inofree;
2251 
2252  if (freei >= 0) {
2253  if (freei == fwd) {
2254  ciagp = aiagp;
2255  } else if (freei == back) {
2256  ciagp = biagp;
2257  } else {
2258  if ((rc = diIAGRead(imap, freei, &cmp)))
2259  goto error_out;
2260  ciagp = (struct iag *) cmp->data;
2261  }
2262  if (ciagp == NULL) {
2263  jfs_error(imap->im_ipimap->i_sb,
2264  "diNewExt: ciagp == NULL");
2265  rc = -EIO;
2266  goto error_out;
2267  }
2268  }
2269  }
2270 
2271  /* allocate disk space for the inode extent.
2272  */
2273  if ((extno == 0) || (addressPXD(&iagp->inoext[extno - 1]) == 0))
2274  hint = ((s64) agno << sbi->bmap->db_agl2size) - 1;
2275  else
2276  hint = addressPXD(&iagp->inoext[extno - 1]) +
2277  lengthPXD(&iagp->inoext[extno - 1]) - 1;
2278 
2279  if ((rc = dbAlloc(ipimap, hint, (s64) imap->im_nbperiext, &blkno)))
2280  goto error_out;
2281 
2282  /* compute the inode number of the first inode within the
2283  * extent.
2284  */
2285  ino = (iagno << L2INOSPERIAG) + (extno << L2INOSPEREXT);
2286 
2287  /* initialize the inodes within the newly allocated extent a
2288  * page at a time.
2289  */
2290  for (i = 0; i < imap->im_nbperiext; i += sbi->nbperpage) {
2291  /* get a buffer for this page of disk inodes.
2292  */
2293  dmp = get_metapage(ipimap, blkno + i, PSIZE, 1);
2294  if (dmp == NULL) {
2295  rc = -EIO;
2296  goto error_out;
2297  }
2298  dp = (struct dinode *) dmp->data;
2299 
2300  /* initialize the inode number, mode, link count and
2301  * inode extent address.
2302  */
2303  for (j = 0; j < INOSPERPAGE; j++, dp++, ino++) {
2304  dp->di_inostamp = cpu_to_le32(sbi->inostamp);
2305  dp->di_number = cpu_to_le32(ino);
2306  dp->di_fileset = cpu_to_le32(FILESYSTEM_I);
2307  dp->di_mode = 0;
2308  dp->di_nlink = 0;
2309  PXDaddress(&(dp->di_ixpxd), blkno);
2310  PXDlength(&(dp->di_ixpxd), imap->im_nbperiext);
2311  }
2312  write_metapage(dmp);
2313  }
2314 
2315  /* if this is the last free extent within the iag, remove the
2316  * iag from the ag free extent list.
2317  */
2318  if (iagp->nfreeexts == cpu_to_le32(1)) {
2319  if (fwd >= 0)
2320  aiagp->extfreeback = iagp->extfreeback;
2321 
2322  if (back >= 0)
2323  biagp->extfreefwd = iagp->extfreefwd;
2324  else
2325  imap->im_agctl[agno].extfree =
2326  le32_to_cpu(iagp->extfreefwd);
2327 
2328  iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1);
2329  } else {
2330  /* if the iag has all free extents (newly allocated iag),
2331  * add the iag to the ag free extent list.
2332  */
2333  if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) {
2334  if (fwd >= 0)
2335  aiagp->extfreeback = cpu_to_le32(iagno);
2336 
2337  iagp->extfreefwd = cpu_to_le32(fwd);
2338  iagp->extfreeback = cpu_to_le32(-1);
2339  imap->im_agctl[agno].extfree = iagno;
2340  }
2341  }
2342 
2343  /* if the iag has no free inodes, add the iag to the
2344  * ag free inode list.
2345  */
2346  if (iagp->nfreeinos == 0) {
2347  if (freei >= 0)
2348  ciagp->inofreeback = cpu_to_le32(iagno);
2349 
2350  iagp->inofreefwd =
2351  cpu_to_le32(imap->im_agctl[agno].inofree);
2352  iagp->inofreeback = cpu_to_le32(-1);
2353  imap->im_agctl[agno].inofree = iagno;
2354  }
2355 
2356  /* initialize the extent descriptor of the extent. */
2357  PXDlength(&iagp->inoext[extno], imap->im_nbperiext);
2358  PXDaddress(&iagp->inoext[extno], blkno);
2359 
2360  /* initialize the working and persistent map of the extent.
2361  * the working map will be initialized such that
2362  * it indicates the first inode of the extent is allocated.
2363  */
2364  iagp->wmap[extno] = cpu_to_le32(HIGHORDER);
2365  iagp->pmap[extno] = 0;
2366 
2367  /* update the free inode and free extent summary maps
2368  * for the extent to indicate the extent has free inodes
2369  * and no longer represents a free extent.
2370  */
2371  sword = extno >> L2EXTSPERSUM;
2372  mask = HIGHORDER >> (extno & (EXTSPERSUM - 1));
2373  iagp->extsmap[sword] |= cpu_to_le32(mask);
2374  iagp->inosmap[sword] &= cpu_to_le32(~mask);
2375 
2376  /* update the free inode and free extent counts for the
2377  * iag.
2378  */
2379  le32_add_cpu(&iagp->nfreeinos, (INOSPEREXT - 1));
2380  le32_add_cpu(&iagp->nfreeexts, -1);
2381 
2382  /* update the free and backed inode counts for the ag.
2383  */
2384  imap->im_agctl[agno].numfree += (INOSPEREXT - 1);
2385  imap->im_agctl[agno].numinos += INOSPEREXT;
2386 
2387  /* update the free and backed inode counts for the inode map.
2388  */
2389  atomic_add(INOSPEREXT - 1, &imap->im_numfree);
2390  atomic_add(INOSPEREXT, &imap->im_numinos);
2391 
2392  /* write the iags.
2393  */
2394  if (amp)
2395  write_metapage(amp);
2396  if (bmp)
2397  write_metapage(bmp);
2398  if (cmp)
2399  write_metapage(cmp);
2400 
2401  return (0);
2402 
2403  error_out:
2404 
2405  /* release the iags.
2406  */
2407  if (amp)
2408  release_metapage(amp);
2409  if (bmp)
2410  release_metapage(bmp);
2411  if (cmp)
2412  release_metapage(cmp);
2413 
2414  return (rc);
2415 }
2416 
2417 
2418 /*
2419  * NAME: diNewIAG(imap,iagnop,agno)
2420  *
2421  * FUNCTION: allocate a new iag for an allocation group.
2422  *
2423  * first tries to allocate the iag from the inode map
2424  * iagfree list:
2425  * if the list has free iags, the head of the list is removed
2426  * and returned to satisfy the request.
2427  * if the inode map's iag free list is empty, the inode map
2428  * is extended to hold a new iag. this new iag is initialized
2429  * and returned to satisfy the request.
2430  *
2431  * PARAMETERS:
2432  * imap - pointer to inode map control structure.
2433  * iagnop - pointer to an iag number set with the number of the
2434  * newly allocated iag upon successful return.
2435  * agno - allocation group number.
2436  * bpp - Buffer pointer to be filled in with new IAG's buffer
2437  *
2438  * RETURN VALUES:
2439  * 0 - success.
2440  * -ENOSPC - insufficient disk resources.
2441  * -EIO - i/o error.
2442  *
2443  * serialization:
2444  * AG lock held on entry/exit;
2445  * write lock on the map is held inside;
2446  * read lock on the map is held on successful completion;
2447  *
2448  * note: new iag transaction:
2449  * . synchronously write iag;
2450  * . write log of xtree and inode of imap;
2451  * . commit;
2452  * . synchronous write of xtree (right to left, bottom to top);
2453  * . at start of logredo(): init in-memory imap with one additional iag page;
2454  * . at end of logredo(): re-read imap inode to determine
2455  * new imap size;
2456  */
2457 static int
2458 diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
2459 {
2460  int rc;
2461  int iagno, i, xlen;
2462  struct inode *ipimap;
2463  struct super_block *sb;
2464  struct jfs_sb_info *sbi;
2465  struct metapage *mp;
2466  struct iag *iagp;
2467  s64 xaddr = 0;
2468  s64 blkno;
2469  tid_t tid;
2470  struct inode *iplist[1];
2471 
2472  /* pick up pointers to the inode map and mount inodes */
2473  ipimap = imap->im_ipimap;
2474  sb = ipimap->i_sb;
2475  sbi = JFS_SBI(sb);
2476 
2477  /* acquire the free iag lock */
2478  IAGFREE_LOCK(imap);
2479 
2480  /* if there are any iags on the inode map free iag list,
2481  * allocate the iag from the head of the list.
2482  */
2483  if (imap->im_freeiag >= 0) {
2484  /* pick up the iag number at the head of the list */
2485  iagno = imap->im_freeiag;
2486 
2487  /* determine the logical block number of the iag */
2488  blkno = IAGTOLBLK(iagno, sbi->l2nbperpage);
2489  } else {
2490  /* no free iags. the inode map will have to be extented
2491  * to include a new iag.
2492  */
2493 
2494  /* acquire inode map lock */
2495  IWRITE_LOCK(ipimap, RDWRLOCK_IMAP);
2496 
2497  if (ipimap->i_size >> L2PSIZE != imap->im_nextiag + 1) {
2498  IWRITE_UNLOCK(ipimap);
2499  IAGFREE_UNLOCK(imap);
2500  jfs_error(imap->im_ipimap->i_sb,
2501  "diNewIAG: ipimap->i_size is wrong");
2502  return -EIO;
2503  }
2504 
2505 
2506  /* get the next available iag number */
2507  iagno = imap->im_nextiag;
2508 
2509  /* make sure that we have not exceeded the maximum inode
2510  * number limit.
2511  */
2512  if (iagno > (MAXIAGS - 1)) {
2513  /* release the inode map lock */
2514  IWRITE_UNLOCK(ipimap);
2515 
2516  rc = -ENOSPC;
2517  goto out;
2518  }
2519 
2520  /*
2521  * synchronously append new iag page.
2522  */
2523  /* determine the logical address of iag page to append */
2524  blkno = IAGTOLBLK(iagno, sbi->l2nbperpage);
2525 
2526  /* Allocate extent for new iag page */
2527  xlen = sbi->nbperpage;
2528  if ((rc = dbAlloc(ipimap, 0, (s64) xlen, &xaddr))) {
2529  /* release the inode map lock */
2530  IWRITE_UNLOCK(ipimap);
2531 
2532  goto out;
2533  }
2534 
2535  /*
2536  * start transaction of update of the inode map
2537  * addressing structure pointing to the new iag page;
2538  */
2539  tid = txBegin(sb, COMMIT_FORCE);
2540  mutex_lock(&JFS_IP(ipimap)->commit_mutex);
2541 
2542  /* update the inode map addressing structure to point to it */
2543  if ((rc =
2544  xtInsert(tid, ipimap, 0, blkno, xlen, &xaddr, 0))) {
2545  txEnd(tid);
2546  mutex_unlock(&JFS_IP(ipimap)->commit_mutex);
2547  /* Free the blocks allocated for the iag since it was
2548  * not successfully added to the inode map
2549  */
2550  dbFree(ipimap, xaddr, (s64) xlen);
2551 
2552  /* release the inode map lock */
2553  IWRITE_UNLOCK(ipimap);
2554 
2555  goto out;
2556  }
2557 
2558  /* update the inode map's inode to reflect the extension */
2559  ipimap->i_size += PSIZE;
2560  inode_add_bytes(ipimap, PSIZE);
2561 
2562  /* assign a buffer for the page */
2563  mp = get_metapage(ipimap, blkno, PSIZE, 0);
2564  if (!mp) {
2565  /*
2566  * This is very unlikely since we just created the
2567  * extent, but let's try to handle it correctly
2568  */
2569  xtTruncate(tid, ipimap, ipimap->i_size - PSIZE,
2570  COMMIT_PWMAP);
2571 
2572  txAbort(tid, 0);
2573  txEnd(tid);
2574  mutex_unlock(&JFS_IP(ipimap)->commit_mutex);
2575 
2576  /* release the inode map lock */
2577  IWRITE_UNLOCK(ipimap);
2578 
2579  rc = -EIO;
2580  goto out;
2581  }
2582  iagp = (struct iag *) mp->data;
2583 
2584  /* init the iag */
2585  memset(iagp, 0, sizeof(struct iag));
2586  iagp->iagnum = cpu_to_le32(iagno);
2587  iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1);
2588  iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1);
2589  iagp->iagfree = cpu_to_le32(-1);
2590  iagp->nfreeinos = 0;
2591  iagp->nfreeexts = cpu_to_le32(EXTSPERIAG);
2592 
2593  /* initialize the free inode summary map (free extent
2594  * summary map initialization handled by bzero).
2595  */
2596  for (i = 0; i < SMAPSZ; i++)
2597  iagp->inosmap[i] = cpu_to_le32(ONES);
2598 
2599  /*
2600  * Write and sync the metapage
2601  */
2602  flush_metapage(mp);
2603 
2604  /*
2605  * txCommit(COMMIT_FORCE) will synchronously write address
2606  * index pages and inode after commit in careful update order
2607  * of address index pages (right to left, bottom up);
2608  */
2609  iplist[0] = ipimap;
2610  rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE);
2611 
2612  txEnd(tid);
2613  mutex_unlock(&JFS_IP(ipimap)->commit_mutex);
2614 
2615  duplicateIXtree(sb, blkno, xlen, &xaddr);
2616 
2617  /* update the next available iag number */
2618  imap->im_nextiag += 1;
2619 
2620  /* Add the iag to the iag free list so we don't lose the iag
2621  * if a failure happens now.
2622  */
2623  imap->im_freeiag = iagno;
2624 
2625  /* Until we have logredo working, we want the imap inode &
2626  * control page to be up to date.
2627  */
2628  diSync(ipimap);
2629 
2630  /* release the inode map lock */
2631  IWRITE_UNLOCK(ipimap);
2632  }
2633 
2634  /* obtain read lock on map */
2635  IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
2636 
2637  /* read the iag */
2638  if ((rc = diIAGRead(imap, iagno, &mp))) {
2639  IREAD_UNLOCK(ipimap);
2640  rc = -EIO;
2641  goto out;
2642  }
2643  iagp = (struct iag *) mp->data;
2644 
2645  /* remove the iag from the iag free list */
2646  imap->im_freeiag = le32_to_cpu(iagp->iagfree);
2647  iagp->iagfree = cpu_to_le32(-1);
2648 
2649  /* set the return iag number and buffer pointer */
2650  *iagnop = iagno;
2651  *mpp = mp;
2652 
2653  out:
2654  /* release the iag free lock */
2655  IAGFREE_UNLOCK(imap);
2656 
2657  return (rc);
2658 }
2659 
2660 /*
2661  * NAME: diIAGRead()
2662  *
2663  * FUNCTION: get the buffer for the specified iag within a fileset
2664  * or aggregate inode map.
2665  *
2666  * PARAMETERS:
2667  * imap - pointer to inode map control structure.
2668  * iagno - iag number.
2669  * bpp - point to buffer pointer to be filled in on successful
2670  * exit.
2671  *
2672  * SERIALIZATION:
2673  * must have read lock on imap inode
2674  * (When called by diExtendFS, the filesystem is quiesced, therefore
2675  * the read lock is unnecessary.)
2676  *
2677  * RETURN VALUES:
2678  * 0 - success.
2679  * -EIO - i/o error.
2680  */
2681 static int diIAGRead(struct inomap * imap, int iagno, struct metapage ** mpp)
2682 {
2683  struct inode *ipimap = imap->im_ipimap;
2684  s64 blkno;
2685 
2686  /* compute the logical block number of the iag. */
2687  blkno = IAGTOLBLK(iagno, JFS_SBI(ipimap->i_sb)->l2nbperpage);
2688 
2689  /* read the iag. */
2690  *mpp = read_metapage(ipimap, blkno, PSIZE, 0);
2691  if (*mpp == NULL) {
2692  return -EIO;
2693  }
2694 
2695  return (0);
2696 }
2697 
2698 /*
2699  * NAME: diFindFree()
2700  *
2701  * FUNCTION: find the first free bit in a word starting at
2702  * the specified bit position.
2703  *
2704  * PARAMETERS:
2705  * word - word to be examined.
2706  * start - starting bit position.
2707  *
2708  * RETURN VALUES:
2709  * bit position of first free bit in the word or 32 if
2710  * no free bits were found.
2711  */
2712 static int diFindFree(u32 word, int start)
2713 {
2714  int bitno;
2715  assert(start < 32);
2716  /* scan the word for the first free bit. */
2717  for (word <<= start, bitno = start; bitno < 32;
2718  bitno++, word <<= 1) {
2719  if ((word & HIGHORDER) == 0)
2720  break;
2721  }
2722  return (bitno);
2723 }
2724 
2725 /*
2726  * NAME: diUpdatePMap()
2727  *
2728  * FUNCTION: Update the persistent map in an IAG for the allocation or
2729  * freeing of the specified inode.
2730  *
2731  * PRE CONDITIONS: Working map has already been updated for allocate.
2732  *
2733  * PARAMETERS:
2734  * ipimap - Incore inode map inode
2735  * inum - Number of inode to mark in permanent map
2736  * is_free - If 'true' indicates inode should be marked freed, otherwise
2737  * indicates inode should be marked allocated.
2738  *
2739  * RETURN VALUES:
2740  * 0 for success
2741  */
2742 int
2743 diUpdatePMap(struct inode *ipimap,
2744  unsigned long inum, bool is_free, struct tblock * tblk)
2745 {
2746  int rc;
2747  struct iag *iagp;
2748  struct metapage *mp;
2749  int iagno, ino, extno, bitno;
2750  struct inomap *imap;
2751  u32 mask;
2752  struct jfs_log *log;
2753  int lsn, difft, diffp;
2754  unsigned long flags;
2755 
2756  imap = JFS_IP(ipimap)->i_imap;
2757  /* get the iag number containing the inode */
2758  iagno = INOTOIAG(inum);
2759  /* make sure that the iag is contained within the map */
2760  if (iagno >= imap->im_nextiag) {
2761  jfs_error(ipimap->i_sb,
2762  "diUpdatePMap: the iag is outside the map");
2763  return -EIO;
2764  }
2765  /* read the iag */
2766  IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
2767  rc = diIAGRead(imap, iagno, &mp);
2768  IREAD_UNLOCK(ipimap);
2769  if (rc)
2770  return (rc);
2771  metapage_wait_for_io(mp);
2772  iagp = (struct iag *) mp->data;
2773  /* get the inode number and extent number of the inode within
2774  * the iag and the inode number within the extent.
2775  */
2776  ino = inum & (INOSPERIAG - 1);
2777  extno = ino >> L2INOSPEREXT;
2778  bitno = ino & (INOSPEREXT - 1);
2779  mask = HIGHORDER >> bitno;
2780  /*
2781  * mark the inode free in persistent map:
2782  */
2783  if (is_free) {
2784  /* The inode should have been allocated both in working
2785  * map and in persistent map;
2786  * the inode will be freed from working map at the release
2787  * of last reference release;
2788  */
2789  if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) {
2790  jfs_error(ipimap->i_sb,
2791  "diUpdatePMap: inode %ld not marked as "
2792  "allocated in wmap!", inum);
2793  }
2794  if (!(le32_to_cpu(iagp->pmap[extno]) & mask)) {
2795  jfs_error(ipimap->i_sb,
2796  "diUpdatePMap: inode %ld not marked as "
2797  "allocated in pmap!", inum);
2798  }
2799  /* update the bitmap for the extent of the freed inode */
2800  iagp->pmap[extno] &= cpu_to_le32(~mask);
2801  }
2802  /*
2803  * mark the inode allocated in persistent map:
2804  */
2805  else {
2806  /* The inode should be already allocated in the working map
2807  * and should be free in persistent map;
2808  */
2809  if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) {
2810  release_metapage(mp);
2811  jfs_error(ipimap->i_sb,
2812  "diUpdatePMap: the inode is not allocated in "
2813  "the working map");
2814  return -EIO;
2815  }
2816  if ((le32_to_cpu(iagp->pmap[extno]) & mask) != 0) {
2817  release_metapage(mp);
2818  jfs_error(ipimap->i_sb,
2819  "diUpdatePMap: the inode is not free in the "
2820  "persistent map");
2821  return -EIO;
2822  }
2823  /* update the bitmap for the extent of the allocated inode */
2824  iagp->pmap[extno] |= cpu_to_le32(mask);
2825  }
2826  /*
2827  * update iag lsn
2828  */
2829  lsn = tblk->lsn;
2830  log = JFS_SBI(tblk->sb)->log;
2831  LOGSYNC_LOCK(log, flags);
2832  if (mp->lsn != 0) {
2833  /* inherit older/smaller lsn */
2834  logdiff(difft, lsn, log);
2835  logdiff(diffp, mp->lsn, log);
2836  if (difft < diffp) {
2837  mp->lsn = lsn;
2838  /* move mp after tblock in logsync list */
2839  list_move(&mp->synclist, &tblk->synclist);
2840  }
2841  /* inherit younger/larger clsn */
2842  assert(mp->clsn);
2843  logdiff(difft, tblk->clsn, log);
2844  logdiff(diffp, mp->clsn, log);
2845  if (difft > diffp)
2846  mp->clsn = tblk->clsn;
2847  } else {
2848  mp->log = log;
2849  mp->lsn = lsn;
2850  /* insert mp after tblock in logsync list */
2851  log->count++;
2852  list_add(&mp->synclist, &tblk->synclist);
2853  mp->clsn = tblk->clsn;
2854  }
2855  LOGSYNC_UNLOCK(log, flags);
2856  write_metapage(mp);
2857  return (0);
2858 }
2859 
2860 /*
2861  * diExtendFS()
2862  *
2863  * function: update imap for extendfs();
2864  *
2865  * note: AG size has been increased s.t. each k old contiguous AGs are
2866  * coalesced into a new AG;
2867  */
2868 int diExtendFS(struct inode *ipimap, struct inode *ipbmap)
2869 {
2870  int rc, rcx = 0;
2871  struct inomap *imap = JFS_IP(ipimap)->i_imap;
2872  struct iag *iagp = NULL, *hiagp = NULL;
2873  struct bmap *mp = JFS_SBI(ipbmap->i_sb)->bmap;
2874  struct metapage *bp, *hbp;
2875  int i, n, head;
2876  int numinos, xnuminos = 0, xnumfree = 0;
2877  s64 agstart;
2878 
2879  jfs_info("diExtendFS: nextiag:%d numinos:%d numfree:%d",
2880  imap->im_nextiag, atomic_read(&imap->im_numinos),
2881  atomic_read(&imap->im_numfree));
2882 
2883  /*
2884  * reconstruct imap
2885  *
2886  * coalesce contiguous k (newAGSize/oldAGSize) AGs;
2887  * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn;
2888  * note: new AG size = old AG size * (2**x).
2889  */
2890 
2891  /* init per AG control information im_agctl[] */
2892  for (i = 0; i < MAXAG; i++) {
2893  imap->im_agctl[i].inofree = -1;
2894  imap->im_agctl[i].extfree = -1;
2895  imap->im_agctl[i].numinos = 0; /* number of backed inodes */
2896  imap->im_agctl[i].numfree = 0; /* number of free backed inodes */
2897  }
2898 
2899  /*
2900  * process each iag page of the map.
2901  *
2902  * rebuild AG Free Inode List, AG Free Inode Extent List;
2903  */
2904  for (i = 0; i < imap->im_nextiag; i++) {
2905  if ((rc = diIAGRead(imap, i, &bp))) {
2906  rcx = rc;
2907  continue;
2908  }
2909  iagp = (struct iag *) bp->data;
2910  if (le32_to_cpu(iagp->iagnum) != i) {
2911  release_metapage(bp);
2912  jfs_error(ipimap->i_sb,
2913  "diExtendFs: unexpected value of iagnum");
2914  return -EIO;
2915  }
2916 
2917  /* leave free iag in the free iag list */
2918  if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) {
2919  release_metapage(bp);
2920  continue;
2921  }
2922 
2923  agstart = le64_to_cpu(iagp->agstart);
2924  n = agstart >> mp->db_agl2size;
2925  iagp->agstart = cpu_to_le64((s64)n << mp->db_agl2size);
2926 
2927  /* compute backed inodes */
2928  numinos = (EXTSPERIAG - le32_to_cpu(iagp->nfreeexts))
2929  << L2INOSPEREXT;
2930  if (numinos > 0) {
2931  /* merge AG backed inodes */
2932  imap->im_agctl[n].numinos += numinos;
2933  xnuminos += numinos;
2934  }
2935 
2936  /* if any backed free inodes, insert at AG free inode list */
2937  if ((int) le32_to_cpu(iagp->nfreeinos) > 0) {
2938  if ((head = imap->im_agctl[n].inofree) == -1) {
2939  iagp->inofreefwd = cpu_to_le32(-1);
2940  iagp->inofreeback = cpu_to_le32(-1);
2941  } else {
2942  if ((rc = diIAGRead(imap, head, &hbp))) {
2943  rcx = rc;
2944  goto nextiag;
2945  }
2946  hiagp = (struct iag *) hbp->data;
2947  hiagp->inofreeback = iagp->iagnum;
2948  iagp->inofreefwd = cpu_to_le32(head);
2949  iagp->inofreeback = cpu_to_le32(-1);
2950  write_metapage(hbp);
2951  }
2952 
2953  imap->im_agctl[n].inofree =
2954  le32_to_cpu(iagp->iagnum);
2955 
2956  /* merge AG backed free inodes */
2957  imap->im_agctl[n].numfree +=
2958  le32_to_cpu(iagp->nfreeinos);
2959  xnumfree += le32_to_cpu(iagp->nfreeinos);
2960  }
2961 
2962  /* if any free extents, insert at AG free extent list */
2963  if (le32_to_cpu(iagp->nfreeexts) > 0) {
2964  if ((head = imap->im_agctl[n].extfree) == -1) {
2965  iagp->extfreefwd = cpu_to_le32(-1);
2966  iagp->extfreeback = cpu_to_le32(-1);
2967  } else {
2968  if ((rc = diIAGRead(imap, head, &hbp))) {
2969  rcx = rc;
2970  goto nextiag;
2971  }
2972  hiagp = (struct iag *) hbp->data;
2973  hiagp->extfreeback = iagp->iagnum;
2974  iagp->extfreefwd = cpu_to_le32(head);
2975  iagp->extfreeback = cpu_to_le32(-1);
2976  write_metapage(hbp);
2977  }
2978 
2979  imap->im_agctl[n].extfree =
2980  le32_to_cpu(iagp->iagnum);
2981  }
2982 
2983  nextiag:
2984  write_metapage(bp);
2985  }
2986 
2987  if (xnuminos != atomic_read(&imap->im_numinos) ||
2988  xnumfree != atomic_read(&imap->im_numfree)) {
2989  jfs_error(ipimap->i_sb,
2990  "diExtendFs: numinos or numfree incorrect");
2991  return -EIO;
2992  }
2993 
2994  return rcx;
2995 }
2996 
2997 
2998 /*
2999  * duplicateIXtree()
3000  *
3001  * serialization: IWRITE_LOCK held on entry/exit
3002  *
3003  * note: shadow page with regular inode (rel.2);
3004  */
3005 static void duplicateIXtree(struct super_block *sb, s64 blkno,
3006  int xlen, s64 *xaddr)
3007 {
3008  struct jfs_superblock *j_sb;
3009  struct buffer_head *bh;
3010  struct inode *ip;
3011  tid_t tid;
3012 
3013  /* if AIT2 ipmap2 is bad, do not try to update it */
3014  if (JFS_SBI(sb)->mntflag & JFS_BAD_SAIT) /* s_flag */
3015  return;
3016  ip = diReadSpecial(sb, FILESYSTEM_I, 1);
3017  if (ip == NULL) {
3018  JFS_SBI(sb)->mntflag |= JFS_BAD_SAIT;
3019  if (readSuper(sb, &bh))
3020  return;
3021  j_sb = (struct jfs_superblock *)bh->b_data;
3022  j_sb->s_flag |= cpu_to_le32(JFS_BAD_SAIT);
3023 
3024  mark_buffer_dirty(bh);
3025  sync_dirty_buffer(bh);
3026  brelse(bh);
3027  return;
3028  }
3029 
3030  /* start transaction */
3031  tid = txBegin(sb, COMMIT_FORCE);
3032  /* update the inode map addressing structure to point to it */
3033  if (xtInsert(tid, ip, 0, blkno, xlen, xaddr, 0)) {
3034  JFS_SBI(sb)->mntflag |= JFS_BAD_SAIT;
3035  txAbort(tid, 1);
3036  goto cleanup;
3037 
3038  }
3039  /* update the inode map's inode to reflect the extension */
3040  ip->i_size += PSIZE;
3041  inode_add_bytes(ip, PSIZE);
3042  txCommit(tid, 1, &ip, COMMIT_FORCE);
3043  cleanup:
3044  txEnd(tid);
3045  diFreeSpecial(ip);
3046 }
3047 
3048 /*
3049  * NAME: copy_from_dinode()
3050  *
3051  * FUNCTION: Copies inode info from disk inode to in-memory inode
3052  *
3053  * RETURN VALUES:
3054  * 0 - success
3055  * -ENOMEM - insufficient memory
3056  */
3057 static int copy_from_dinode(struct dinode * dip, struct inode *ip)
3058 {
3059  struct jfs_inode_info *jfs_ip = JFS_IP(ip);
3060  struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
3061 
3062  jfs_ip->fileset = le32_to_cpu(dip->di_fileset);
3063  jfs_ip->mode2 = le32_to_cpu(dip->di_mode);
3064  jfs_set_inode_flags(ip);
3065 
3066  ip->i_mode = le32_to_cpu(dip->di_mode) & 0xffff;
3067  if (sbi->umask != -1) {
3068  ip->i_mode = (ip->i_mode & ~0777) | (0777 & ~sbi->umask);
3069  /* For directories, add x permission if r is allowed by umask */
3070  if (S_ISDIR(ip->i_mode)) {
3071  if (ip->i_mode & 0400)
3072  ip->i_mode |= 0100;
3073  if (ip->i_mode & 0040)
3074  ip->i_mode |= 0010;
3075  if (ip->i_mode & 0004)
3076  ip->i_mode |= 0001;
3077  }
3078  }
3079  set_nlink(ip, le32_to_cpu(dip->di_nlink));
3080 
3081  jfs_ip->saved_uid = make_kuid(&init_user_ns, le32_to_cpu(dip->di_uid));
3082  if (!uid_valid(sbi->uid))
3083  ip->i_uid = jfs_ip->saved_uid;
3084  else {
3085  ip->i_uid = sbi->uid;
3086  }
3087 
3088  jfs_ip->saved_gid = make_kgid(&init_user_ns, le32_to_cpu(dip->di_gid));
3089  if (!gid_valid(sbi->gid))
3090  ip->i_gid = jfs_ip->saved_gid;
3091  else {
3092  ip->i_gid = sbi->gid;
3093  }
3094 
3095  ip->i_size = le64_to_cpu(dip->di_size);
3096  ip->i_atime.tv_sec = le32_to_cpu(dip->di_atime.tv_sec);
3097  ip->i_atime.tv_nsec = le32_to_cpu(dip->di_atime.tv_nsec);
3098  ip->i_mtime.tv_sec = le32_to_cpu(dip->di_mtime.tv_sec);
3099  ip->i_mtime.tv_nsec = le32_to_cpu(dip->di_mtime.tv_nsec);
3100  ip->i_ctime.tv_sec = le32_to_cpu(dip->di_ctime.tv_sec);
3101  ip->i_ctime.tv_nsec = le32_to_cpu(dip->di_ctime.tv_nsec);
3102  ip->i_blocks = LBLK2PBLK(ip->i_sb, le64_to_cpu(dip->di_nblocks));
3103  ip->i_generation = le32_to_cpu(dip->di_gen);
3104 
3105  jfs_ip->ixpxd = dip->di_ixpxd; /* in-memory pxd's are little-endian */
3106  jfs_ip->acl = dip->di_acl; /* as are dxd's */
3107  jfs_ip->ea = dip->di_ea;
3108  jfs_ip->next_index = le32_to_cpu(dip->di_next_index);
3109  jfs_ip->otime = le32_to_cpu(dip->di_otime.tv_sec);
3110  jfs_ip->acltype = le32_to_cpu(dip->di_acltype);
3111 
3112  if (S_ISCHR(ip->i_mode) || S_ISBLK(ip->i_mode)) {
3113  jfs_ip->dev = le32_to_cpu(dip->di_rdev);
3114  ip->i_rdev = new_decode_dev(jfs_ip->dev);
3115  }
3116 
3117  if (S_ISDIR(ip->i_mode)) {
3118  memcpy(&jfs_ip->i_dirtable, &dip->di_dirtable, 384);
3119  } else if (S_ISREG(ip->i_mode) || S_ISLNK(ip->i_mode)) {
3120  memcpy(&jfs_ip->i_xtroot, &dip->di_xtroot, 288);
3121  } else
3122  memcpy(&jfs_ip->i_inline_ea, &dip->di_inlineea, 128);
3123 
3124  /* Zero the in-memory-only stuff */
3125  jfs_ip->cflag = 0;
3126  jfs_ip->btindex = 0;
3127  jfs_ip->btorder = 0;
3128  jfs_ip->bxflag = 0;
3129  jfs_ip->blid = 0;
3130  jfs_ip->atlhead = 0;
3131  jfs_ip->atltail = 0;
3132  jfs_ip->xtlid = 0;
3133  return (0);
3134 }
3135 
3136 /*
3137  * NAME: copy_to_dinode()
3138  *
3139  * FUNCTION: Copies inode info from in-memory inode to disk inode
3140  */
3141 static void copy_to_dinode(struct dinode * dip, struct inode *ip)
3142 {
3143  struct jfs_inode_info *jfs_ip = JFS_IP(ip);
3144  struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
3145 
3146  dip->di_fileset = cpu_to_le32(jfs_ip->fileset);
3147  dip->di_inostamp = cpu_to_le32(sbi->inostamp);
3148  dip->di_number = cpu_to_le32(ip->i_ino);
3149  dip->di_gen = cpu_to_le32(ip->i_generation);
3150  dip->di_size = cpu_to_le64(ip->i_size);
3151  dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks));
3152  dip->di_nlink = cpu_to_le32(ip->i_nlink);
3153  if (!uid_valid(sbi->uid))
3154  dip->di_uid = cpu_to_le32(i_uid_read(ip));
3155  else
3157  jfs_ip->saved_uid));
3158  if (!gid_valid(sbi->gid))
3159  dip->di_gid = cpu_to_le32(i_gid_read(ip));
3160  else
3162  jfs_ip->saved_gid));
3163  jfs_get_inode_flags(jfs_ip);
3164  /*
3165  * mode2 is only needed for storing the higher order bits.
3166  * Trust i_mode for the lower order ones
3167  */
3168  if (sbi->umask == -1)
3169  dip->di_mode = cpu_to_le32((jfs_ip->mode2 & 0xffff0000) |
3170  ip->i_mode);
3171  else /* Leave the original permissions alone */
3172  dip->di_mode = cpu_to_le32(jfs_ip->mode2);
3173 
3174  dip->di_atime.tv_sec = cpu_to_le32(ip->i_atime.tv_sec);
3175  dip->di_atime.tv_nsec = cpu_to_le32(ip->i_atime.tv_nsec);
3176  dip->di_ctime.tv_sec = cpu_to_le32(ip->i_ctime.tv_sec);
3177  dip->di_ctime.tv_nsec = cpu_to_le32(ip->i_ctime.tv_nsec);
3178  dip->di_mtime.tv_sec = cpu_to_le32(ip->i_mtime.tv_sec);
3179  dip->di_mtime.tv_nsec = cpu_to_le32(ip->i_mtime.tv_nsec);
3180  dip->di_ixpxd = jfs_ip->ixpxd; /* in-memory pxd's are little-endian */
3181  dip->di_acl = jfs_ip->acl; /* as are dxd's */
3182  dip->di_ea = jfs_ip->ea;
3183  dip->di_next_index = cpu_to_le32(jfs_ip->next_index);
3184  dip->di_otime.tv_sec = cpu_to_le32(jfs_ip->otime);
3185  dip->di_otime.tv_nsec = 0;
3186  dip->di_acltype = cpu_to_le32(jfs_ip->acltype);
3187  if (S_ISCHR(ip->i_mode) || S_ISBLK(ip->i_mode))
3188  dip->di_rdev = cpu_to_le32(jfs_ip->dev);
3189 }