Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
nx-842.c
Go to the documentation of this file.
1 /*
2  * Driver for IBM Power 842 compression accelerator
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  *
18  * Copyright (C) IBM Corporation, 2012
19  *
20  * Authors: Robert Jennings <[email protected]>
21  * Seth Jennings <[email protected]>
22  */
23 
24 #include <linux/kernel.h>
25 #include <linux/module.h>
26 #include <linux/nx842.h>
27 #include <linux/of.h>
28 #include <linux/slab.h>
29 
30 #include <asm/page.h>
31 #include <asm/pSeries_reconfig.h>
32 #include <asm/vio.h>
33 
34 #include "nx_csbcpb.h" /* struct nx_csbcpb */
35 
36 #define MODULE_NAME "nx-compress"
37 MODULE_LICENSE("GPL");
38 MODULE_AUTHOR("Robert Jennings <[email protected]>");
39 MODULE_DESCRIPTION("842 H/W Compression driver for IBM Power processors");
40 
41 #define SHIFT_4K 12
42 #define SHIFT_64K 16
43 #define SIZE_4K (1UL << SHIFT_4K)
44 #define SIZE_64K (1UL << SHIFT_64K)
45 
46 /* IO buffer must be 128 byte aligned */
47 #define IO_BUFFER_ALIGN 128
48 
49 struct nx842_header {
50  int blocks_nr; /* number of compressed blocks */
51  int offset; /* offset of the first block (from beginning of header) */
52  int sizes[0]; /* size of compressed blocks */
53 };
54 
55 static inline int nx842_header_size(const struct nx842_header *hdr)
56 {
57  return sizeof(struct nx842_header) +
58  hdr->blocks_nr * sizeof(hdr->sizes[0]);
59 }
60 
61 /* Macros for fields within nx_csbcpb */
62 /* Check the valid bit within the csbcpb valid field */
63 #define NX842_CSBCBP_VALID_CHK(x) (x & BIT_MASK(7))
64 
65 /* CE macros operate on the completion_extension field bits in the csbcpb.
66  * CE0 0=full completion, 1=partial completion
67  * CE1 0=CE0 indicates completion, 1=termination (output may be modified)
68  * CE2 0=processed_bytes is source bytes, 1=processed_bytes is target bytes */
69 #define NX842_CSBCPB_CE0(x) (x & BIT_MASK(7))
70 #define NX842_CSBCPB_CE1(x) (x & BIT_MASK(6))
71 #define NX842_CSBCPB_CE2(x) (x & BIT_MASK(5))
72 
73 /* The NX unit accepts data only on 4K page boundaries */
74 #define NX842_HW_PAGE_SHIFT SHIFT_4K
75 #define NX842_HW_PAGE_SIZE (ASM_CONST(1) << NX842_HW_PAGE_SHIFT)
76 #define NX842_HW_PAGE_MASK (~(NX842_HW_PAGE_SIZE-1))
77 
81 };
82 
91 };
92 
93 static struct nx842_devdata {
94  struct vio_dev *vdev;
95  struct device *dev;
96  struct ibm_nx842_counters *counters;
97  unsigned int max_sg_len;
98  unsigned int max_sync_size;
99  unsigned int max_sync_sg;
100  enum nx842_status status;
101 } __rcu *devdata;
102 static DEFINE_SPINLOCK(devdata_mutex);
103 
104 #define NX842_COUNTER_INC(_x) \
105 static inline void nx842_inc_##_x( \
106  const struct nx842_devdata *dev) { \
107  if (dev) \
108  atomic64_inc(&dev->counters->_x); \
109 }
115 
116 #define NX842_HIST_SLOTS 16
117 
118 static void ibm_nx842_incr_hist(atomic64_t *times, unsigned int time)
119 {
120  int bucket = fls(time);
121 
122  if (bucket)
123  bucket = min((NX842_HIST_SLOTS - 1), bucket - 1);
124 
125  atomic64_inc(&times[bucket]);
126 }
127 
128 /* NX unit operation flags */
129 #define NX842_OP_COMPRESS 0x0
130 #define NX842_OP_CRC 0x1
131 #define NX842_OP_DECOMPRESS 0x2
132 #define NX842_OP_COMPRESS_CRC (NX842_OP_COMPRESS | NX842_OP_CRC)
133 #define NX842_OP_DECOMPRESS_CRC (NX842_OP_DECOMPRESS | NX842_OP_CRC)
134 #define NX842_OP_ASYNC (1<<23)
135 #define NX842_OP_NOTIFY (1<<22)
136 #define NX842_OP_NOTIFY_INT(x) ((x & 0xff)<<8)
137 
138 static unsigned long nx842_get_desired_dma(struct vio_dev *viodev)
139 {
140  /* No use of DMA mappings within the driver. */
141  return 0;
142 }
143 
145  unsigned long ptr; /* Real address (use __pa()) */
146  unsigned long len;
147 };
148 
149 /* pHyp scatterlist entry */
151  int entry_nr; /* number of slentries */
152  struct nx842_slentry *entries; /* ptr to array of slentries */
153 };
154 
155 /* Does not include sizeof(entry_nr) in the size */
156 static inline unsigned long nx842_get_scatterlist_size(
157  struct nx842_scatterlist *sl)
158 {
159  return sl->entry_nr * sizeof(struct nx842_slentry);
160 }
161 
162 static int nx842_build_scatterlist(unsigned long buf, int len,
163  struct nx842_scatterlist *sl)
164 {
165  unsigned long nextpage;
166  struct nx842_slentry *entry;
167 
168  sl->entry_nr = 0;
169 
170  entry = sl->entries;
171  while (len) {
172  entry->ptr = __pa(buf);
173  nextpage = ALIGN(buf + 1, NX842_HW_PAGE_SIZE);
174  if (nextpage < buf + len) {
175  /* we aren't at the end yet */
176  if (IS_ALIGNED(buf, NX842_HW_PAGE_SIZE))
177  /* we are in the middle (or beginning) */
178  entry->len = NX842_HW_PAGE_SIZE;
179  else
180  /* we are at the beginning */
181  entry->len = nextpage - buf;
182  } else {
183  /* at the end */
184  entry->len = len;
185  }
186 
187  len -= entry->len;
188  buf += entry->len;
189  sl->entry_nr++;
190  entry++;
191  }
192 
193  return 0;
194 }
195 
196 /*
197  * Working memory for software decompression
198  */
199 struct sw842_fifo {
200  union {
201  char f8[256][8];
202  char f4[512][4];
203  };
204  char f2[256][2];
205  unsigned char f84_full;
206  unsigned char f2_full;
207  unsigned char f8_count;
208  unsigned char f2_count;
209  unsigned int f4_count;
210 };
211 
212 /*
213  * Working memory for crypto API
214  */
216  char bounce[PAGE_SIZE]; /* bounce buffer for decompression input */
217  union {
218  /* hardware working memory */
219  struct {
220  /* scatterlist */
221  char slin[SIZE_4K];
222  char slout[SIZE_4K];
223  /* coprocessor status/parameter block */
225  };
226  /* software working memory */
227  struct sw842_fifo swfifo; /* software decompression fifo */
228  };
229 };
230 
232 {
233  return sizeof(struct nx842_workmem) + NX842_HW_PAGE_SIZE;
234 }
236 
238 {
239  return sizeof(struct nx842_workmem);
240 }
242 
243 static int nx842_validate_result(struct device *dev,
244  struct cop_status_block *csb)
245 {
246  /* The csb must be valid after returning from vio_h_cop_sync */
247  if (!NX842_CSBCBP_VALID_CHK(csb->valid)) {
248  dev_err(dev, "%s: cspcbp not valid upon completion.\n",
249  __func__);
250  dev_dbg(dev, "valid:0x%02x cs:0x%02x cc:0x%02x ce:0x%02x\n",
251  csb->valid,
252  csb->crb_seq_number,
253  csb->completion_code,
254  csb->completion_extension);
255  dev_dbg(dev, "processed_bytes:%d address:0x%016lx\n",
257  (unsigned long)csb->address);
258  return -EIO;
259  }
260 
261  /* Check return values from the hardware in the CSB */
262  switch (csb->completion_code) {
263  case 0: /* Completed without error */
264  break;
265  case 64: /* Target bytes > Source bytes during compression */
266  case 13: /* Output buffer too small */
267  dev_dbg(dev, "%s: Compression output larger than input\n",
268  __func__);
269  return -ENOSPC;
270  case 66: /* Input data contains an illegal template field */
271  case 67: /* Template indicates data past the end of the input stream */
272  dev_dbg(dev, "%s: Bad data for decompression (code:%d)\n",
273  __func__, csb->completion_code);
274  return -EINVAL;
275  default:
276  dev_dbg(dev, "%s: Unspecified error (code:%d)\n",
277  __func__, csb->completion_code);
278  return -EIO;
279  }
280 
281  /* Hardware sanity check */
283  dev_err(dev, "%s: No error returned by hardware, but "
284  "data returned is unusable, contact support.\n"
285  "(Additional info: csbcbp->processed bytes "
286  "does not specify processed bytes for the "
287  "target buffer.)\n", __func__);
288  return -EIO;
289  }
290 
291  return 0;
292 }
293 
320 int nx842_compress(const unsigned char *in, unsigned int inlen,
321  unsigned char *out, unsigned int *outlen, void *wmem)
322 {
323  struct nx842_header *hdr;
324  struct nx842_devdata *local_devdata;
325  struct device *dev = NULL;
326  struct nx842_workmem *workmem;
327  struct nx842_scatterlist slin, slout;
328  struct nx_csbcpb *csbcpb;
329  int ret = 0, max_sync_size, i, bytesleft, size, hdrsize;
330  unsigned long inbuf, outbuf, padding;
331  struct vio_pfo_op op = {
332  .done = NULL,
333  .handle = 0,
334  .timeout = 0,
335  };
336  unsigned long start_time = get_tb();
337 
338  /*
339  * Make sure input buffer is 64k page aligned. This is assumed since
340  * this driver is designed for page compression only (for now). This
341  * is very nice since we can now use direct DDE(s) for the input and
342  * the alignment is guaranteed.
343  */
344  inbuf = (unsigned long)in;
345  if (!IS_ALIGNED(inbuf, PAGE_SIZE) || inlen != PAGE_SIZE)
346  return -EINVAL;
347 
348  rcu_read_lock();
349  local_devdata = rcu_dereference(devdata);
350  if (!local_devdata || !local_devdata->dev) {
351  rcu_read_unlock();
352  return -ENODEV;
353  }
354  max_sync_size = local_devdata->max_sync_size;
355  dev = local_devdata->dev;
356 
357  /* Create the header */
358  hdr = (struct nx842_header *)out;
359  hdr->blocks_nr = PAGE_SIZE / max_sync_size;
360  hdrsize = nx842_header_size(hdr);
361  outbuf = (unsigned long)out + hdrsize;
362  bytesleft = *outlen - hdrsize;
363 
364  /* Init scatterlist */
365  workmem = (struct nx842_workmem *)ALIGN((unsigned long)wmem,
367  slin.entries = (struct nx842_slentry *)workmem->slin;
368  slout.entries = (struct nx842_slentry *)workmem->slout;
369 
370  /* Init operation */
371  op.flags = NX842_OP_COMPRESS;
372  csbcpb = &workmem->csbcpb;
373  memset(csbcpb, 0, sizeof(*csbcpb));
374  op.csbcpb = __pa(csbcpb);
375  op.out = __pa(slout.entries);
376 
377  for (i = 0; i < hdr->blocks_nr; i++) {
378  /*
379  * Aligning the output blocks to 128 bytes does waste space,
380  * but it prevents the need for bounce buffers and memory
381  * copies. It also simplifies the code a lot. In the worst
382  * case (64k page, 4k max_sync_size), you lose up to
383  * (128*16)/64k = ~3% the compression factor. For 64k
384  * max_sync_size, the loss would be at most 128/64k = ~0.2%.
385  */
386  padding = ALIGN(outbuf, IO_BUFFER_ALIGN) - outbuf;
387  outbuf += padding;
388  bytesleft -= padding;
389  if (i == 0)
390  /* save offset into first block in header */
391  hdr->offset = padding + hdrsize;
392 
393  if (bytesleft <= 0) {
394  ret = -ENOSPC;
395  goto unlock;
396  }
397 
398  /*
399  * NOTE: If the default max_sync_size is changed from 4k
400  * to 64k, remove the "likely" case below, since a
401  * scatterlist will always be needed.
402  */
403  if (likely(max_sync_size == NX842_HW_PAGE_SIZE)) {
404  /* Create direct DDE */
405  op.in = __pa(inbuf);
406  op.inlen = max_sync_size;
407 
408  } else {
409  /* Create indirect DDE (scatterlist) */
410  nx842_build_scatterlist(inbuf, max_sync_size, &slin);
411  op.in = __pa(slin.entries);
412  op.inlen = -nx842_get_scatterlist_size(&slin);
413  }
414 
415  /*
416  * If max_sync_size != NX842_HW_PAGE_SIZE, an indirect
417  * DDE is required for the outbuf.
418  * If max_sync_size == NX842_HW_PAGE_SIZE, outbuf must
419  * also be page aligned (1 in 128/4k=32 chance) in order
420  * to use a direct DDE.
421  * This is unlikely, just use an indirect DDE always.
422  */
423  nx842_build_scatterlist(outbuf,
424  min(bytesleft, max_sync_size), &slout);
425  /* op.out set before loop */
426  op.outlen = -nx842_get_scatterlist_size(&slout);
427 
428  /* Send request to pHyp */
429  ret = vio_h_cop_sync(local_devdata->vdev, &op);
430 
431  /* Check for pHyp error */
432  if (ret) {
433  dev_dbg(dev, "%s: vio_h_cop_sync error (ret=%d, hret=%ld)\n",
434  __func__, ret, op.hcall_err);
435  ret = -EIO;
436  goto unlock;
437  }
438 
439  /* Check for hardware error */
440  ret = nx842_validate_result(dev, &csbcpb->csb);
441  if (ret && ret != -ENOSPC)
442  goto unlock;
443 
444  /* Handle incompressible data */
445  if (unlikely(ret == -ENOSPC)) {
446  if (bytesleft < max_sync_size) {
447  /*
448  * Not enough space left in the output buffer
449  * to store uncompressed block
450  */
451  goto unlock;
452  } else {
453  /* Store incompressible block */
454  memcpy((void *)outbuf, (void *)inbuf,
455  max_sync_size);
456  hdr->sizes[i] = -max_sync_size;
457  outbuf += max_sync_size;
458  bytesleft -= max_sync_size;
459  /* Reset ret, incompressible data handled */
460  ret = 0;
461  }
462  } else {
463  /* Normal case, compression was successful */
464  size = csbcpb->csb.processed_byte_count;
465  dev_dbg(dev, "%s: processed_bytes=%d\n",
466  __func__, size);
467  hdr->sizes[i] = size;
468  outbuf += size;
469  bytesleft -= size;
470  }
471 
472  inbuf += max_sync_size;
473  }
474 
475  *outlen = (unsigned int)(outbuf - (unsigned long)out);
476 
477 unlock:
478  if (ret)
479  nx842_inc_comp_failed(local_devdata);
480  else {
481  nx842_inc_comp_complete(local_devdata);
482  ibm_nx842_incr_hist(local_devdata->counters->comp_times,
483  (get_tb() - start_time) / tb_ticks_per_usec);
484  }
485  rcu_read_unlock();
486  return ret;
487 }
489 
490 static int sw842_decompress(const unsigned char *, int, unsigned char *, int *,
491  const void *);
492 
520 int nx842_decompress(const unsigned char *in, unsigned int inlen,
521  unsigned char *out, unsigned int *outlen, void *wmem)
522 {
523  struct nx842_header *hdr;
524  struct nx842_devdata *local_devdata;
525  struct device *dev = NULL;
526  struct nx842_workmem *workmem;
527  struct nx842_scatterlist slin, slout;
528  struct nx_csbcpb *csbcpb;
529  int ret = 0, i, size, max_sync_size;
530  unsigned long inbuf, outbuf;
531  struct vio_pfo_op op = {
532  .done = NULL,
533  .handle = 0,
534  .timeout = 0,
535  };
536  unsigned long start_time = get_tb();
537 
538  /* Ensure page alignment and size */
539  outbuf = (unsigned long)out;
540  if (!IS_ALIGNED(outbuf, PAGE_SIZE) || *outlen != PAGE_SIZE)
541  return -EINVAL;
542 
543  rcu_read_lock();
544  local_devdata = rcu_dereference(devdata);
545  if (local_devdata)
546  dev = local_devdata->dev;
547 
548  /* Get header */
549  hdr = (struct nx842_header *)in;
550 
551  workmem = (struct nx842_workmem *)ALIGN((unsigned long)wmem,
553 
554  inbuf = (unsigned long)in + hdr->offset;
555  if (likely(!IS_ALIGNED(inbuf, IO_BUFFER_ALIGN))) {
556  /* Copy block(s) into bounce buffer for alignment */
557  memcpy(workmem->bounce, in + hdr->offset, inlen - hdr->offset);
558  inbuf = (unsigned long)workmem->bounce;
559  }
560 
561  /* Init scatterlist */
562  slin.entries = (struct nx842_slentry *)workmem->slin;
563  slout.entries = (struct nx842_slentry *)workmem->slout;
564 
565  /* Init operation */
566  op.flags = NX842_OP_DECOMPRESS;
567  csbcpb = &workmem->csbcpb;
568  memset(csbcpb, 0, sizeof(*csbcpb));
569  op.csbcpb = __pa(csbcpb);
570 
571  /*
572  * max_sync_size may have changed since compression,
573  * so we can't read it from the device info. We need
574  * to derive it from hdr->blocks_nr.
575  */
576  max_sync_size = PAGE_SIZE / hdr->blocks_nr;
577 
578  for (i = 0; i < hdr->blocks_nr; i++) {
579  /* Skip padding */
580  inbuf = ALIGN(inbuf, IO_BUFFER_ALIGN);
581 
582  if (hdr->sizes[i] < 0) {
583  /* Negative sizes indicate uncompressed data blocks */
584  size = abs(hdr->sizes[i]);
585  memcpy((void *)outbuf, (void *)inbuf, size);
586  outbuf += size;
587  inbuf += size;
588  continue;
589  }
590 
591  if (!dev)
592  goto sw;
593 
594  /*
595  * The better the compression, the more likely the "likely"
596  * case becomes.
597  */
598  if (likely((inbuf & NX842_HW_PAGE_MASK) ==
599  ((inbuf + hdr->sizes[i] - 1) & NX842_HW_PAGE_MASK))) {
600  /* Create direct DDE */
601  op.in = __pa(inbuf);
602  op.inlen = hdr->sizes[i];
603  } else {
604  /* Create indirect DDE (scatterlist) */
605  nx842_build_scatterlist(inbuf, hdr->sizes[i] , &slin);
606  op.in = __pa(slin.entries);
607  op.inlen = -nx842_get_scatterlist_size(&slin);
608  }
609 
610  /*
611  * NOTE: If the default max_sync_size is changed from 4k
612  * to 64k, remove the "likely" case below, since a
613  * scatterlist will always be needed.
614  */
615  if (likely(max_sync_size == NX842_HW_PAGE_SIZE)) {
616  /* Create direct DDE */
617  op.out = __pa(outbuf);
618  op.outlen = max_sync_size;
619  } else {
620  /* Create indirect DDE (scatterlist) */
621  nx842_build_scatterlist(outbuf, max_sync_size, &slout);
622  op.out = __pa(slout.entries);
623  op.outlen = -nx842_get_scatterlist_size(&slout);
624  }
625 
626  /* Send request to pHyp */
627  ret = vio_h_cop_sync(local_devdata->vdev, &op);
628 
629  /* Check for pHyp error */
630  if (ret) {
631  dev_dbg(dev, "%s: vio_h_cop_sync error (ret=%d, hret=%ld)\n",
632  __func__, ret, op.hcall_err);
633  dev = NULL;
634  goto sw;
635  }
636 
637  /* Check for hardware error */
638  ret = nx842_validate_result(dev, &csbcpb->csb);
639  if (ret) {
640  dev = NULL;
641  goto sw;
642  }
643 
644  /* HW decompression success */
645  inbuf += hdr->sizes[i];
646  outbuf += csbcpb->csb.processed_byte_count;
647  continue;
648 
649 sw:
650  /* software decompression */
651  size = max_sync_size;
652  ret = sw842_decompress(
653  (unsigned char *)inbuf, hdr->sizes[i],
654  (unsigned char *)outbuf, &size, wmem);
655  if (ret)
656  pr_debug("%s: sw842_decompress failed with %d\n",
657  __func__, ret);
658 
659  if (ret) {
660  if (ret != -ENOSPC && ret != -EINVAL &&
661  ret != -EMSGSIZE)
662  ret = -EIO;
663  goto unlock;
664  }
665 
666  /* SW decompression success */
667  inbuf += hdr->sizes[i];
668  outbuf += size;
669  }
670 
671  *outlen = (unsigned int)(outbuf - (unsigned long)out);
672 
673 unlock:
674  if (ret)
675  /* decompress fail */
676  nx842_inc_decomp_failed(local_devdata);
677  else {
678  if (!dev)
679  /* software decompress */
680  nx842_inc_swdecomp(local_devdata);
681  nx842_inc_decomp_complete(local_devdata);
682  ibm_nx842_incr_hist(local_devdata->counters->decomp_times,
683  (get_tb() - start_time) / tb_ticks_per_usec);
684  }
685 
686  rcu_read_unlock();
687  return ret;
688 }
690 
700 static int nx842_OF_set_defaults(struct nx842_devdata *devdata)
701 {
702  if (devdata) {
703  devdata->max_sync_size = 0;
704  devdata->max_sync_sg = 0;
705  devdata->max_sg_len = 0;
706  devdata->status = UNAVAILABLE;
707  return 0;
708  } else
709  return -ENOENT;
710 }
711 
727 static int nx842_OF_upd_status(struct nx842_devdata *devdata,
728  struct property *prop) {
729  int ret = 0;
730  const char *status = (const char *)prop->value;
731 
732  if (!strncmp(status, "okay", (size_t)prop->length)) {
733  devdata->status = AVAILABLE;
734  } else {
735  dev_info(devdata->dev, "%s: status '%s' is not 'okay'\n",
736  __func__, status);
737  devdata->status = UNAVAILABLE;
738  }
739 
740  return ret;
741 }
742 
764 static int nx842_OF_upd_maxsglen(struct nx842_devdata *devdata,
765  struct property *prop) {
766  int ret = 0;
767  const int *maxsglen = prop->value;
768 
769  if (prop->length != sizeof(*maxsglen)) {
770  dev_err(devdata->dev, "%s: unexpected format for ibm,max-sg-len property\n", __func__);
771  dev_dbg(devdata->dev, "%s: ibm,max-sg-len is %d bytes long, expected %lu bytes\n", __func__,
772  prop->length, sizeof(*maxsglen));
773  ret = -EINVAL;
774  } else {
775  devdata->max_sg_len = (unsigned int)min(*maxsglen,
776  (int)NX842_HW_PAGE_SIZE);
777  }
778 
779  return ret;
780 }
781 
812 static int nx842_OF_upd_maxsyncop(struct nx842_devdata *devdata,
813  struct property *prop) {
814  int ret = 0;
815  const struct maxsynccop_t {
816  int comp_elements;
817  int comp_data_limit;
818  int comp_sg_limit;
819  int decomp_elements;
820  int decomp_data_limit;
821  int decomp_sg_limit;
822  } *maxsynccop;
823 
824  if (prop->length != sizeof(*maxsynccop)) {
825  dev_err(devdata->dev, "%s: unexpected format for ibm,max-sync-cop property\n", __func__);
826  dev_dbg(devdata->dev, "%s: ibm,max-sync-cop is %d bytes long, expected %lu bytes\n", __func__, prop->length,
827  sizeof(*maxsynccop));
828  ret = -EINVAL;
829  goto out;
830  }
831 
832  maxsynccop = (const struct maxsynccop_t *)prop->value;
833 
834  /* Use one limit rather than separate limits for compression and
835  * decompression. Set a maximum for this so as not to exceed the
836  * size that the header can support and round the value down to
837  * the hardware page size (4K) */
838  devdata->max_sync_size =
839  (unsigned int)min(maxsynccop->comp_data_limit,
840  maxsynccop->decomp_data_limit);
841 
842  devdata->max_sync_size = min_t(unsigned int, devdata->max_sync_size,
843  SIZE_64K);
844 
845  if (devdata->max_sync_size < SIZE_4K) {
846  dev_err(devdata->dev, "%s: hardware max data size (%u) is "
847  "less than the driver minimum, unable to use "
848  "the hardware device\n",
849  __func__, devdata->max_sync_size);
850  ret = -EINVAL;
851  goto out;
852  }
853 
854  devdata->max_sync_sg = (unsigned int)min(maxsynccop->comp_sg_limit,
855  maxsynccop->decomp_sg_limit);
856  if (devdata->max_sync_sg < 1) {
857  dev_err(devdata->dev, "%s: hardware max sg size (%u) is "
858  "less than the driver minimum, unable to use "
859  "the hardware device\n",
860  __func__, devdata->max_sync_sg);
861  ret = -EINVAL;
862  goto out;
863  }
864 
865 out:
866  return ret;
867 }
868 
888 static int nx842_OF_upd(struct property *new_prop)
889 {
890  struct nx842_devdata *old_devdata = NULL;
891  struct nx842_devdata *new_devdata = NULL;
892  struct device_node *of_node = NULL;
893  struct property *status = NULL;
894  struct property *maxsglen = NULL;
895  struct property *maxsyncop = NULL;
896  int ret = 0;
897  unsigned long flags;
898 
899  spin_lock_irqsave(&devdata_mutex, flags);
900  old_devdata = rcu_dereference_check(devdata,
901  lockdep_is_held(&devdata_mutex));
902  if (old_devdata)
903  of_node = old_devdata->dev->of_node;
904 
905  if (!old_devdata || !of_node) {
906  pr_err("%s: device is not available\n", __func__);
907  spin_unlock_irqrestore(&devdata_mutex, flags);
908  return -ENODEV;
909  }
910 
911  new_devdata = kzalloc(sizeof(*new_devdata), GFP_NOFS);
912  if (!new_devdata) {
913  dev_err(old_devdata->dev, "%s: Could not allocate memory for device data\n", __func__);
914  ret = -ENOMEM;
915  goto error_out;
916  }
917 
918  memcpy(new_devdata, old_devdata, sizeof(*old_devdata));
919  new_devdata->counters = old_devdata->counters;
920 
921  /* Set ptrs for existing properties */
922  status = of_find_property(of_node, "status", NULL);
923  maxsglen = of_find_property(of_node, "ibm,max-sg-len", NULL);
924  maxsyncop = of_find_property(of_node, "ibm,max-sync-cop", NULL);
925  if (!status || !maxsglen || !maxsyncop) {
926  dev_err(old_devdata->dev, "%s: Could not locate device properties\n", __func__);
927  ret = -EINVAL;
928  goto error_out;
929  }
930 
931  /* Set ptr to new property if provided */
932  if (new_prop) {
933  /* Single property */
934  if (!strncmp(new_prop->name, "status", new_prop->length)) {
935  status = new_prop;
936 
937  } else if (!strncmp(new_prop->name, "ibm,max-sg-len",
938  new_prop->length)) {
939  maxsglen = new_prop;
940 
941  } else if (!strncmp(new_prop->name, "ibm,max-sync-cop",
942  new_prop->length)) {
943  maxsyncop = new_prop;
944 
945  } else {
946  /*
947  * Skip the update, the property being updated
948  * has no impact.
949  */
950  goto out;
951  }
952  }
953 
954  /* Perform property updates */
955  ret = nx842_OF_upd_status(new_devdata, status);
956  if (ret)
957  goto error_out;
958 
959  ret = nx842_OF_upd_maxsglen(new_devdata, maxsglen);
960  if (ret)
961  goto error_out;
962 
963  ret = nx842_OF_upd_maxsyncop(new_devdata, maxsyncop);
964  if (ret)
965  goto error_out;
966 
967 out:
968  dev_info(old_devdata->dev, "%s: max_sync_size new:%u old:%u\n",
969  __func__, new_devdata->max_sync_size,
970  old_devdata->max_sync_size);
971  dev_info(old_devdata->dev, "%s: max_sync_sg new:%u old:%u\n",
972  __func__, new_devdata->max_sync_sg,
973  old_devdata->max_sync_sg);
974  dev_info(old_devdata->dev, "%s: max_sg_len new:%u old:%u\n",
975  __func__, new_devdata->max_sg_len,
976  old_devdata->max_sg_len);
977 
978  rcu_assign_pointer(devdata, new_devdata);
979  spin_unlock_irqrestore(&devdata_mutex, flags);
980  synchronize_rcu();
981  dev_set_drvdata(new_devdata->dev, new_devdata);
982  kfree(old_devdata);
983  return 0;
984 
985 error_out:
986  if (new_devdata) {
987  dev_info(old_devdata->dev, "%s: device disabled\n", __func__);
988  nx842_OF_set_defaults(new_devdata);
989  rcu_assign_pointer(devdata, new_devdata);
990  spin_unlock_irqrestore(&devdata_mutex, flags);
991  synchronize_rcu();
992  dev_set_drvdata(new_devdata->dev, new_devdata);
993  kfree(old_devdata);
994  } else {
995  dev_err(old_devdata->dev, "%s: could not update driver from hardware\n", __func__);
996  spin_unlock_irqrestore(&devdata_mutex, flags);
997  }
998 
999  if (!ret)
1000  ret = -EINVAL;
1001  return ret;
1002 }
1003 
1017 static int nx842_OF_notifier(struct notifier_block *np,
1018  unsigned long action,
1019  void *update)
1020 {
1021  struct pSeries_reconfig_prop_update *upd;
1022  struct nx842_devdata *local_devdata;
1023  struct device_node *node = NULL;
1024 
1025  upd = (struct pSeries_reconfig_prop_update *)update;
1026 
1027  rcu_read_lock();
1028  local_devdata = rcu_dereference(devdata);
1029  if (local_devdata)
1030  node = local_devdata->dev->of_node;
1031 
1032  if (local_devdata &&
1033  action == PSERIES_UPDATE_PROPERTY &&
1034  !strcmp(upd->node->name, node->name)) {
1035  rcu_read_unlock();
1036  nx842_OF_upd(upd->property);
1037  } else
1038  rcu_read_unlock();
1039 
1040  return NOTIFY_OK;
1041 }
1042 
1043 static struct notifier_block nx842_of_nb = {
1044  .notifier_call = nx842_OF_notifier,
1045 };
1046 
1047 #define nx842_counter_read(_name) \
1048 static ssize_t nx842_##_name##_show(struct device *dev, \
1049  struct device_attribute *attr, \
1050  char *buf) { \
1051  struct nx842_devdata *local_devdata; \
1052  int p = 0; \
1053  rcu_read_lock(); \
1054  local_devdata = rcu_dereference(devdata); \
1055  if (local_devdata) \
1056  p = snprintf(buf, PAGE_SIZE, "%ld\n", \
1057  atomic64_read(&local_devdata->counters->_name)); \
1058  rcu_read_unlock(); \
1059  return p; \
1060 }
1061 
1062 #define NX842DEV_COUNTER_ATTR_RO(_name) \
1063  nx842_counter_read(_name); \
1064  static struct device_attribute dev_attr_##_name = __ATTR(_name, \
1065  0444, \
1066  nx842_##_name##_show,\
1067  NULL);
1068 
1069 NX842DEV_COUNTER_ATTR_RO(comp_complete);
1070 NX842DEV_COUNTER_ATTR_RO(comp_failed);
1071 NX842DEV_COUNTER_ATTR_RO(decomp_complete);
1072 NX842DEV_COUNTER_ATTR_RO(decomp_failed);
1073 NX842DEV_COUNTER_ATTR_RO(swdecomp);
1074 
1075 static ssize_t nx842_timehist_show(struct device *,
1076  struct device_attribute *, char *);
1077 
1078 static struct device_attribute dev_attr_comp_times = __ATTR(comp_times, 0444,
1079  nx842_timehist_show, NULL);
1080 static struct device_attribute dev_attr_decomp_times = __ATTR(decomp_times,
1081  0444, nx842_timehist_show, NULL);
1082 
1083 static ssize_t nx842_timehist_show(struct device *dev,
1084  struct device_attribute *attr, char *buf) {
1085  char *p = buf;
1086  struct nx842_devdata *local_devdata;
1087  atomic64_t *times;
1088  int bytes_remain = PAGE_SIZE;
1089  int bytes;
1090  int i;
1091 
1092  rcu_read_lock();
1093  local_devdata = rcu_dereference(devdata);
1094  if (!local_devdata) {
1095  rcu_read_unlock();
1096  return 0;
1097  }
1098 
1099  if (attr == &dev_attr_comp_times)
1100  times = local_devdata->counters->comp_times;
1101  else if (attr == &dev_attr_decomp_times)
1102  times = local_devdata->counters->decomp_times;
1103  else {
1104  rcu_read_unlock();
1105  return 0;
1106  }
1107 
1108  for (i = 0; i < (NX842_HIST_SLOTS - 2); i++) {
1109  bytes = snprintf(p, bytes_remain, "%u-%uus:\t%ld\n",
1110  i ? (2<<(i-1)) : 0, (2<<i)-1,
1111  atomic64_read(&times[i]));
1112  bytes_remain -= bytes;
1113  p += bytes;
1114  }
1115  /* The last bucket holds everything over
1116  * 2<<(NX842_HIST_SLOTS - 2) us */
1117  bytes = snprintf(p, bytes_remain, "%uus - :\t%ld\n",
1118  2<<(NX842_HIST_SLOTS - 2),
1119  atomic64_read(&times[(NX842_HIST_SLOTS - 1)]));
1120  p += bytes;
1121 
1122  rcu_read_unlock();
1123  return p - buf;
1124 }
1125 
1126 static struct attribute *nx842_sysfs_entries[] = {
1127  &dev_attr_comp_complete.attr,
1128  &dev_attr_comp_failed.attr,
1129  &dev_attr_decomp_complete.attr,
1130  &dev_attr_decomp_failed.attr,
1131  &dev_attr_swdecomp.attr,
1132  &dev_attr_comp_times.attr,
1133  &dev_attr_decomp_times.attr,
1134  NULL,
1135 };
1136 
1137 static struct attribute_group nx842_attribute_group = {
1138  .name = NULL, /* put in device directory */
1139  .attrs = nx842_sysfs_entries,
1140 };
1141 
1142 static int __init nx842_probe(struct vio_dev *viodev,
1143  const struct vio_device_id *id)
1144 {
1145  struct nx842_devdata *old_devdata, *new_devdata = NULL;
1146  unsigned long flags;
1147  int ret = 0;
1148 
1149  spin_lock_irqsave(&devdata_mutex, flags);
1150  old_devdata = rcu_dereference_check(devdata,
1151  lockdep_is_held(&devdata_mutex));
1152 
1153  if (old_devdata && old_devdata->vdev != NULL) {
1154  dev_err(&viodev->dev, "%s: Attempt to register more than one instance of the hardware\n", __func__);
1155  ret = -1;
1156  goto error_unlock;
1157  }
1158 
1159  dev_set_drvdata(&viodev->dev, NULL);
1160 
1161  new_devdata = kzalloc(sizeof(*new_devdata), GFP_NOFS);
1162  if (!new_devdata) {
1163  dev_err(&viodev->dev, "%s: Could not allocate memory for device data\n", __func__);
1164  ret = -ENOMEM;
1165  goto error_unlock;
1166  }
1167 
1168  new_devdata->counters = kzalloc(sizeof(*new_devdata->counters),
1169  GFP_NOFS);
1170  if (!new_devdata->counters) {
1171  dev_err(&viodev->dev, "%s: Could not allocate memory for performance counters\n", __func__);
1172  ret = -ENOMEM;
1173  goto error_unlock;
1174  }
1175 
1176  new_devdata->vdev = viodev;
1177  new_devdata->dev = &viodev->dev;
1178  nx842_OF_set_defaults(new_devdata);
1179 
1180  rcu_assign_pointer(devdata, new_devdata);
1181  spin_unlock_irqrestore(&devdata_mutex, flags);
1182  synchronize_rcu();
1183  kfree(old_devdata);
1184 
1185  pSeries_reconfig_notifier_register(&nx842_of_nb);
1186 
1187  ret = nx842_OF_upd(NULL);
1188  if (ret && ret != -ENODEV) {
1189  dev_err(&viodev->dev, "could not parse device tree. %d\n", ret);
1190  ret = -1;
1191  goto error;
1192  }
1193 
1194  rcu_read_lock();
1195  if (dev_set_drvdata(&viodev->dev, rcu_dereference(devdata))) {
1196  rcu_read_unlock();
1197  dev_err(&viodev->dev, "failed to set driver data for device\n");
1198  ret = -1;
1199  goto error;
1200  }
1201  rcu_read_unlock();
1202 
1203  if (sysfs_create_group(&viodev->dev.kobj, &nx842_attribute_group)) {
1204  dev_err(&viodev->dev, "could not create sysfs device attributes\n");
1205  ret = -1;
1206  goto error;
1207  }
1208 
1209  return 0;
1210 
1211 error_unlock:
1212  spin_unlock_irqrestore(&devdata_mutex, flags);
1213  if (new_devdata)
1214  kfree(new_devdata->counters);
1215  kfree(new_devdata);
1216 error:
1217  return ret;
1218 }
1219 
1220 static int __exit nx842_remove(struct vio_dev *viodev)
1221 {
1222  struct nx842_devdata *old_devdata;
1223  unsigned long flags;
1224 
1225  pr_info("Removing IBM Power 842 compression device\n");
1226  sysfs_remove_group(&viodev->dev.kobj, &nx842_attribute_group);
1227 
1228  spin_lock_irqsave(&devdata_mutex, flags);
1229  old_devdata = rcu_dereference_check(devdata,
1230  lockdep_is_held(&devdata_mutex));
1232  rcu_assign_pointer(devdata, NULL);
1233  spin_unlock_irqrestore(&devdata_mutex, flags);
1234  synchronize_rcu();
1235  dev_set_drvdata(&viodev->dev, NULL);
1236  if (old_devdata)
1237  kfree(old_devdata->counters);
1238  kfree(old_devdata);
1239  return 0;
1240 }
1241 
1242 static struct vio_device_id nx842_driver_ids[] = {
1243  {"ibm,compression-v1", "ibm,compression"},
1244  {"", ""},
1245 };
1246 
1247 static struct vio_driver nx842_driver = {
1248  .name = MODULE_NAME,
1249  .probe = nx842_probe,
1250  .remove = nx842_remove,
1251  .get_desired_dma = nx842_get_desired_dma,
1252  .id_table = nx842_driver_ids,
1253 };
1254 
1255 static int __init nx842_init(void)
1256 {
1257  struct nx842_devdata *new_devdata;
1258  pr_info("Registering IBM Power 842 compression driver\n");
1259 
1260  RCU_INIT_POINTER(devdata, NULL);
1261  new_devdata = kzalloc(sizeof(*new_devdata), GFP_KERNEL);
1262  if (!new_devdata) {
1263  pr_err("Could not allocate memory for device data\n");
1264  return -ENOMEM;
1265  }
1266  new_devdata->status = UNAVAILABLE;
1267  RCU_INIT_POINTER(devdata, new_devdata);
1268 
1269  return vio_register_driver(&nx842_driver);
1270 }
1271 
1272 module_init(nx842_init);
1273 
1274 static void __exit nx842_exit(void)
1275 {
1276  struct nx842_devdata *old_devdata;
1277  unsigned long flags;
1278 
1279  pr_info("Exiting IBM Power 842 compression driver\n");
1280  spin_lock_irqsave(&devdata_mutex, flags);
1281  old_devdata = rcu_dereference_check(devdata,
1282  lockdep_is_held(&devdata_mutex));
1283  rcu_assign_pointer(devdata, NULL);
1284  spin_unlock_irqrestore(&devdata_mutex, flags);
1285  synchronize_rcu();
1286  if (old_devdata)
1287  dev_set_drvdata(old_devdata->dev, NULL);
1288  kfree(old_devdata);
1289  vio_unregister_driver(&nx842_driver);
1290 }
1291 
1292 module_exit(nx842_exit);
1293 
1294 /*********************************
1295  * 842 software decompressor
1296 *********************************/
1297 typedef int (*sw842_template_op)(const char **, int *, unsigned char **,
1298  struct sw842_fifo *);
1299 
1300 static int sw842_data8(const char **, int *, unsigned char **,
1301  struct sw842_fifo *);
1302 static int sw842_data4(const char **, int *, unsigned char **,
1303  struct sw842_fifo *);
1304 static int sw842_data2(const char **, int *, unsigned char **,
1305  struct sw842_fifo *);
1306 static int sw842_ptr8(const char **, int *, unsigned char **,
1307  struct sw842_fifo *);
1308 static int sw842_ptr4(const char **, int *, unsigned char **,
1309  struct sw842_fifo *);
1310 static int sw842_ptr2(const char **, int *, unsigned char **,
1311  struct sw842_fifo *);
1312 
1313 /* special templates */
1314 #define SW842_TMPL_REPEAT 0x1B
1315 #define SW842_TMPL_ZEROS 0x1C
1316 #define SW842_TMPL_EOF 0x1E
1317 
1318 static sw842_template_op sw842_tmpl_ops[26][4] = {
1319  { sw842_data8, NULL}, /* 0 (00000) */
1320  { sw842_data4, sw842_data2, sw842_ptr2, NULL},
1321  { sw842_data4, sw842_ptr2, sw842_data2, NULL},
1322  { sw842_data4, sw842_ptr2, sw842_ptr2, NULL},
1323  { sw842_data4, sw842_ptr4, NULL},
1324  { sw842_data2, sw842_ptr2, sw842_data4, NULL},
1325  { sw842_data2, sw842_ptr2, sw842_data2, sw842_ptr2},
1326  { sw842_data2, sw842_ptr2, sw842_ptr2, sw842_data2},
1327  { sw842_data2, sw842_ptr2, sw842_ptr2, sw842_ptr2,},
1328  { sw842_data2, sw842_ptr2, sw842_ptr4, NULL},
1329  { sw842_ptr2, sw842_data2, sw842_data4, NULL}, /* 10 (01010) */
1330  { sw842_ptr2, sw842_data4, sw842_ptr2, NULL},
1331  { sw842_ptr2, sw842_data2, sw842_ptr2, sw842_data2},
1332  { sw842_ptr2, sw842_data2, sw842_ptr2, sw842_ptr2},
1333  { sw842_ptr2, sw842_data2, sw842_ptr4, NULL},
1334  { sw842_ptr2, sw842_ptr2, sw842_data4, NULL},
1335  { sw842_ptr2, sw842_ptr2, sw842_data2, sw842_ptr2},
1336  { sw842_ptr2, sw842_ptr2, sw842_ptr2, sw842_data2},
1337  { sw842_ptr2, sw842_ptr2, sw842_ptr2, sw842_ptr2},
1338  { sw842_ptr2, sw842_ptr2, sw842_ptr4, NULL},
1339  { sw842_ptr4, sw842_data4, NULL}, /* 20 (10100) */
1340  { sw842_ptr4, sw842_data2, sw842_ptr2, NULL},
1341  { sw842_ptr4, sw842_ptr2, sw842_data2, NULL},
1342  { sw842_ptr4, sw842_ptr2, sw842_ptr2, NULL},
1343  { sw842_ptr4, sw842_ptr4, NULL},
1344  { sw842_ptr8, NULL}
1345 };
1346 
1347 /* Software decompress helpers */
1348 
1349 static uint8_t sw842_get_byte(const char *buf, int bit)
1350 {
1351  uint8_t tmpl;
1352  uint16_t tmp;
1353  tmp = htons(*(uint16_t *)(buf));
1354  tmp = (uint16_t)(tmp << bit);
1355  tmp = ntohs(tmp);
1356  memcpy(&tmpl, &tmp, 1);
1357  return tmpl;
1358 }
1359 
1360 static uint8_t sw842_get_template(const char **buf, int *bit)
1361 {
1362  uint8_t byte;
1363  byte = sw842_get_byte(*buf, *bit);
1364  byte = byte >> 3;
1365  byte &= 0x1F;
1366  *buf += (*bit + 5) / 8;
1367  *bit = (*bit + 5) % 8;
1368  return byte;
1369 }
1370 
1371 /* repeat_count happens to be 5-bit too (like the template) */
1372 static uint8_t sw842_get_repeat_count(const char **buf, int *bit)
1373 {
1374  uint8_t byte;
1375  byte = sw842_get_byte(*buf, *bit);
1376  byte = byte >> 2;
1377  byte &= 0x3F;
1378  *buf += (*bit + 6) / 8;
1379  *bit = (*bit + 6) % 8;
1380  return byte;
1381 }
1382 
1383 static uint8_t sw842_get_ptr2(const char **buf, int *bit)
1384 {
1385  uint8_t ptr;
1386  ptr = sw842_get_byte(*buf, *bit);
1387  (*buf)++;
1388  return ptr;
1389 }
1390 
1391 static uint16_t sw842_get_ptr4(const char **buf, int *bit,
1392  struct sw842_fifo *fifo)
1393 {
1394  uint16_t ptr;
1395  ptr = htons(*(uint16_t *)(*buf));
1396  ptr = (uint16_t)(ptr << *bit);
1397  ptr = ptr >> 7;
1398  ptr &= 0x01FF;
1399  *buf += (*bit + 9) / 8;
1400  *bit = (*bit + 9) % 8;
1401  return ptr;
1402 }
1403 
1404 static uint8_t sw842_get_ptr8(const char **buf, int *bit,
1405  struct sw842_fifo *fifo)
1406 {
1407  return sw842_get_ptr2(buf, bit);
1408 }
1409 
1410 /* Software decompress template ops */
1411 
1412 static int sw842_data8(const char **inbuf, int *inbit,
1413  unsigned char **outbuf, struct sw842_fifo *fifo)
1414 {
1415  int ret;
1416 
1417  ret = sw842_data4(inbuf, inbit, outbuf, fifo);
1418  if (ret)
1419  return ret;
1420  ret = sw842_data4(inbuf, inbit, outbuf, fifo);
1421  return ret;
1422 }
1423 
1424 static int sw842_data4(const char **inbuf, int *inbit,
1425  unsigned char **outbuf, struct sw842_fifo *fifo)
1426 {
1427  int ret;
1428 
1429  ret = sw842_data2(inbuf, inbit, outbuf, fifo);
1430  if (ret)
1431  return ret;
1432  ret = sw842_data2(inbuf, inbit, outbuf, fifo);
1433  return ret;
1434 }
1435 
1436 static int sw842_data2(const char **inbuf, int *inbit,
1437  unsigned char **outbuf, struct sw842_fifo *fifo)
1438 {
1439  **outbuf = sw842_get_byte(*inbuf, *inbit);
1440  (*inbuf)++;
1441  (*outbuf)++;
1442  **outbuf = sw842_get_byte(*inbuf, *inbit);
1443  (*inbuf)++;
1444  (*outbuf)++;
1445  return 0;
1446 }
1447 
1448 static int sw842_ptr8(const char **inbuf, int *inbit,
1449  unsigned char **outbuf, struct sw842_fifo *fifo)
1450 {
1451  uint8_t ptr;
1452  ptr = sw842_get_ptr8(inbuf, inbit, fifo);
1453  if (!fifo->f84_full && (ptr >= fifo->f8_count))
1454  return 1;
1455  memcpy(*outbuf, fifo->f8[ptr], 8);
1456  *outbuf += 8;
1457  return 0;
1458 }
1459 
1460 static int sw842_ptr4(const char **inbuf, int *inbit,
1461  unsigned char **outbuf, struct sw842_fifo *fifo)
1462 {
1463  uint16_t ptr;
1464  ptr = sw842_get_ptr4(inbuf, inbit, fifo);
1465  if (!fifo->f84_full && (ptr >= fifo->f4_count))
1466  return 1;
1467  memcpy(*outbuf, fifo->f4[ptr], 4);
1468  *outbuf += 4;
1469  return 0;
1470 }
1471 
1472 static int sw842_ptr2(const char **inbuf, int *inbit,
1473  unsigned char **outbuf, struct sw842_fifo *fifo)
1474 {
1475  uint8_t ptr;
1476  ptr = sw842_get_ptr2(inbuf, inbit);
1477  if (!fifo->f2_full && (ptr >= fifo->f2_count))
1478  return 1;
1479  memcpy(*outbuf, fifo->f2[ptr], 2);
1480  *outbuf += 2;
1481  return 0;
1482 }
1483 
1484 static void sw842_copy_to_fifo(const char *buf, struct sw842_fifo *fifo)
1485 {
1486  unsigned char initial_f2count = fifo->f2_count;
1487 
1488  memcpy(fifo->f8[fifo->f8_count], buf, 8);
1489  fifo->f4_count += 2;
1490  fifo->f8_count += 1;
1491 
1492  if (!fifo->f84_full && fifo->f4_count >= 512) {
1493  fifo->f84_full = 1;
1494  fifo->f4_count /= 512;
1495  }
1496 
1497  memcpy(fifo->f2[fifo->f2_count++], buf, 2);
1498  memcpy(fifo->f2[fifo->f2_count++], buf + 2, 2);
1499  memcpy(fifo->f2[fifo->f2_count++], buf + 4, 2);
1500  memcpy(fifo->f2[fifo->f2_count++], buf + 6, 2);
1501  if (fifo->f2_count < initial_f2count)
1502  fifo->f2_full = 1;
1503 }
1504 
1505 static int sw842_decompress(const unsigned char *src, int srclen,
1506  unsigned char *dst, int *destlen,
1507  const void *wrkmem)
1508 {
1509  uint8_t tmpl;
1510  const char *inbuf;
1511  int inbit = 0;
1512  unsigned char *outbuf, *outbuf_end, *origbuf, *prevbuf;
1513  const char *inbuf_end;
1515  int opindex;
1516  int i, repeat_count;
1517  struct sw842_fifo *fifo;
1518  int ret = 0;
1519 
1520  fifo = &((struct nx842_workmem *)(wrkmem))->swfifo;
1521  memset(fifo, 0, sizeof(*fifo));
1522 
1523  origbuf = NULL;
1524  inbuf = src;
1525  inbuf_end = src + srclen;
1526  outbuf = dst;
1527  outbuf_end = dst + *destlen;
1528 
1529  while ((tmpl = sw842_get_template(&inbuf, &inbit)) != SW842_TMPL_EOF) {
1530  if (inbuf >= inbuf_end) {
1531  ret = -EINVAL;
1532  goto out;
1533  }
1534 
1535  opindex = 0;
1536  prevbuf = origbuf;
1537  origbuf = outbuf;
1538  switch (tmpl) {
1539  case SW842_TMPL_REPEAT:
1540  if (prevbuf == NULL) {
1541  ret = -EINVAL;
1542  goto out;
1543  }
1544 
1545  repeat_count = sw842_get_repeat_count(&inbuf,
1546  &inbit) + 1;
1547 
1548  /* Did the repeat count advance past the end of input */
1549  if (inbuf > inbuf_end) {
1550  ret = -EINVAL;
1551  goto out;
1552  }
1553 
1554  for (i = 0; i < repeat_count; i++) {
1555  /* Would this overflow the output buffer */
1556  if ((outbuf + 8) > outbuf_end) {
1557  ret = -ENOSPC;
1558  goto out;
1559  }
1560 
1561  memcpy(outbuf, prevbuf, 8);
1562  sw842_copy_to_fifo(outbuf, fifo);
1563  outbuf += 8;
1564  }
1565  break;
1566 
1567  case SW842_TMPL_ZEROS:
1568  /* Would this overflow the output buffer */
1569  if ((outbuf + 8) > outbuf_end) {
1570  ret = -ENOSPC;
1571  goto out;
1572  }
1573 
1574  memset(outbuf, 0, 8);
1575  sw842_copy_to_fifo(outbuf, fifo);
1576  outbuf += 8;
1577  break;
1578 
1579  default:
1580  if (tmpl > 25) {
1581  ret = -EINVAL;
1582  goto out;
1583  }
1584 
1585  /* Does this go past the end of the input buffer */
1586  if ((inbuf + 2) > inbuf_end) {
1587  ret = -EINVAL;
1588  goto out;
1589  }
1590 
1591  /* Would this overflow the output buffer */
1592  if ((outbuf + 8) > outbuf_end) {
1593  ret = -ENOSPC;
1594  goto out;
1595  }
1596 
1597  while (opindex < 4 &&
1598  (op = sw842_tmpl_ops[tmpl][opindex++])
1599  != NULL) {
1600  ret = (*op)(&inbuf, &inbit, &outbuf, fifo);
1601  if (ret) {
1602  ret = -EINVAL;
1603  goto out;
1604  }
1605  sw842_copy_to_fifo(origbuf, fifo);
1606  }
1607  }
1608  }
1609 
1610 out:
1611  if (!ret)
1612  *destlen = (unsigned int)(outbuf - dst);
1613  else
1614  *destlen = 0;
1615 
1616  return ret;
1617 }