24 #include <linux/kernel.h>
25 #include <linux/module.h>
28 #include <linux/slab.h>
36 #define MODULE_NAME "nx-compress"
43 #define SIZE_4K (1UL << SHIFT_4K)
44 #define SIZE_64K (1UL << SHIFT_64K)
47 #define IO_BUFFER_ALIGN 128
58 hdr->blocks_nr *
sizeof(hdr->
sizes[0]);
63 #define NX842_CSBCBP_VALID_CHK(x) (x & BIT_MASK(7))
69 #define NX842_CSBCPB_CE0(x) (x & BIT_MASK(7))
70 #define NX842_CSBCPB_CE1(x) (x & BIT_MASK(6))
71 #define NX842_CSBCPB_CE2(x) (x & BIT_MASK(5))
74 #define NX842_HW_PAGE_SHIFT SHIFT_4K
75 #define NX842_HW_PAGE_SIZE (ASM_CONST(1) << NX842_HW_PAGE_SHIFT)
76 #define NX842_HW_PAGE_MASK (~(NX842_HW_PAGE_SIZE-1))
93 static struct nx842_devdata {
97 unsigned int max_sg_len;
98 unsigned int max_sync_size;
99 unsigned int max_sync_sg;
104 #define NX842_COUNTER_INC(_x) \
105 static inline void nx842_inc_##_x( \
106 const struct nx842_devdata *dev) { \
108 atomic64_inc(&dev->counters->_x); \
116 #define NX842_HIST_SLOTS 16
120 int bucket = fls(time);
129 #define NX842_OP_COMPRESS 0x0
130 #define NX842_OP_CRC 0x1
131 #define NX842_OP_DECOMPRESS 0x2
132 #define NX842_OP_COMPRESS_CRC (NX842_OP_COMPRESS | NX842_OP_CRC)
133 #define NX842_OP_DECOMPRESS_CRC (NX842_OP_DECOMPRESS | NX842_OP_CRC)
134 #define NX842_OP_ASYNC (1<<23)
135 #define NX842_OP_NOTIFY (1<<22)
136 #define NX842_OP_NOTIFY_INT(x) ((x & 0xff)<<8)
138 static unsigned long nx842_get_desired_dma(
struct vio_dev *viodev)
156 static inline unsigned long nx842_get_scatterlist_size(
162 static int nx842_build_scatterlist(
unsigned long buf,
int len,
165 unsigned long nextpage;
174 if (nextpage < buf + len) {
181 entry->
len = nextpage -
buf;
243 static int nx842_validate_result(
struct device *
dev,
248 dev_err(dev,
"%s: cspcbp not valid upon completion.\n",
250 dev_dbg(dev,
"valid:0x%02x cs:0x%02x cc:0x%02x ce:0x%02x\n",
255 dev_dbg(dev,
"processed_bytes:%d address:0x%016lx\n",
267 dev_dbg(dev,
"%s: Compression output larger than input\n",
272 dev_dbg(dev,
"%s: Bad data for decompression (code:%d)\n",
276 dev_dbg(dev,
"%s: Unspecified error (code:%d)\n",
283 dev_err(dev,
"%s: No error returned by hardware, but "
284 "data returned is unusable, contact support.\n"
285 "(Additional info: csbcbp->processed bytes "
286 "does not specify processed bytes for the "
287 "target buffer.)\n", __func__);
321 unsigned char *
out,
unsigned int *
outlen,
void *wmem)
324 struct nx842_devdata *local_devdata;
329 int ret = 0, max_sync_size,
i, bytesleft,
size, hdrsize;
331 struct vio_pfo_op op = {
344 inbuf = (
unsigned long)in;
350 if (!local_devdata || !local_devdata->dev) {
354 max_sync_size = local_devdata->max_sync_size;
355 dev = local_devdata->dev;
360 hdrsize = nx842_header_size(hdr);
361 outbuf = (
unsigned long)out + hdrsize;
362 bytesleft = *outlen - hdrsize;
372 csbcpb = &workmem->
csbcpb;
373 memset(csbcpb, 0,
sizeof(*csbcpb));
374 op.csbcpb =
__pa(csbcpb);
391 hdr->
offset = padding + hdrsize;
393 if (bytesleft <= 0) {
406 op.inlen = max_sync_size;
410 nx842_build_scatterlist(inbuf, max_sync_size, &slin);
412 op.inlen = -nx842_get_scatterlist_size(&slin);
423 nx842_build_scatterlist(outbuf,
424 min(bytesleft, max_sync_size), &slout);
426 op.outlen = -nx842_get_scatterlist_size(&slout);
433 dev_dbg(dev,
"%s: vio_h_cop_sync error (ret=%d, hret=%ld)\n",
434 __func__, ret, op.hcall_err);
440 ret = nx842_validate_result(dev, &csbcpb->
csb);
441 if (ret && ret != -
ENOSPC)
446 if (bytesleft < max_sync_size) {
454 memcpy((
void *)outbuf, (
void *)inbuf,
456 hdr->
sizes[
i] = -max_sync_size;
457 outbuf += max_sync_size;
458 bytesleft -= max_sync_size;
464 size = csbcpb->
csb.processed_byte_count;
465 dev_dbg(dev,
"%s: processed_bytes=%d\n",
472 inbuf += max_sync_size;
475 *outlen = (
unsigned int)(outbuf - (
unsigned long)
out);
479 nx842_inc_comp_failed(local_devdata);
481 nx842_inc_comp_complete(local_devdata);
482 ibm_nx842_incr_hist(local_devdata->counters->comp_times,
490 static int sw842_decompress(
const unsigned char *,
int,
unsigned char *,
int *,
521 unsigned char *
out,
unsigned int *
outlen,
void *wmem)
524 struct nx842_devdata *local_devdata;
529 int ret = 0,
i,
size, max_sync_size;
530 unsigned long inbuf, outbuf;
531 struct vio_pfo_op op = {
539 outbuf = (
unsigned long)out;
546 dev = local_devdata->dev;
567 csbcpb = &workmem->
csbcpb;
568 memset(csbcpb, 0,
sizeof(*csbcpb));
569 op.csbcpb =
__pa(csbcpb);
585 memcpy((
void *)outbuf, (
void *)inbuf, size);
599 ((inbuf + hdr->
sizes[
i] - 1) & NX842_HW_PAGE_MASK))) {
605 nx842_build_scatterlist(inbuf, hdr->
sizes[
i] , &slin);
607 op.inlen = -nx842_get_scatterlist_size(&slin);
617 op.out =
__pa(outbuf);
618 op.outlen = max_sync_size;
621 nx842_build_scatterlist(outbuf, max_sync_size, &slout);
623 op.outlen = -nx842_get_scatterlist_size(&slout);
631 dev_dbg(dev,
"%s: vio_h_cop_sync error (ret=%d, hret=%ld)\n",
632 __func__, ret, op.hcall_err);
638 ret = nx842_validate_result(dev, &csbcpb->
csb);
646 outbuf += csbcpb->
csb.processed_byte_count;
651 size = max_sync_size;
652 ret = sw842_decompress(
653 (
unsigned char *)inbuf, hdr->
sizes[
i],
654 (
unsigned char *)outbuf, &size, wmem);
656 pr_debug(
"%s: sw842_decompress failed with %d\n",
671 *outlen = (
unsigned int)(outbuf - (
unsigned long)
out);
676 nx842_inc_decomp_failed(local_devdata);
680 nx842_inc_swdecomp(local_devdata);
681 nx842_inc_decomp_complete(local_devdata);
682 ibm_nx842_incr_hist(local_devdata->counters->decomp_times,
700 static int nx842_OF_set_defaults(
struct nx842_devdata *devdata)
703 devdata->max_sync_size = 0;
704 devdata->max_sync_sg = 0;
705 devdata->max_sg_len = 0;
727 static int nx842_OF_upd_status(
struct nx842_devdata *devdata,
735 dev_info(devdata->dev,
"%s: status '%s' is not 'okay'\n",
764 static int nx842_OF_upd_maxsglen(
struct nx842_devdata *devdata,
767 const int *maxsglen = prop->
value;
769 if (prop->
length !=
sizeof(*maxsglen)) {
770 dev_err(devdata->dev,
"%s: unexpected format for ibm,max-sg-len property\n", __func__);
771 dev_dbg(devdata->dev,
"%s: ibm,max-sg-len is %d bytes long, expected %lu bytes\n", __func__,
772 prop->
length,
sizeof(*maxsglen));
775 devdata->max_sg_len = (
unsigned int)
min(*maxsglen,
812 static int nx842_OF_upd_maxsyncop(
struct nx842_devdata *devdata,
815 const struct maxsynccop_t {
820 int decomp_data_limit;
824 if (prop->
length !=
sizeof(*maxsynccop)) {
825 dev_err(devdata->dev,
"%s: unexpected format for ibm,max-sync-cop property\n", __func__);
826 dev_dbg(devdata->dev,
"%s: ibm,max-sync-cop is %d bytes long, expected %lu bytes\n", __func__, prop->
length,
827 sizeof(*maxsynccop));
832 maxsynccop = (
const struct maxsynccop_t *)prop->
value;
838 devdata->max_sync_size =
839 (
unsigned int)
min(maxsynccop->comp_data_limit,
840 maxsynccop->decomp_data_limit);
842 devdata->max_sync_size =
min_t(
unsigned int, devdata->max_sync_size,
845 if (devdata->max_sync_size <
SIZE_4K) {
846 dev_err(devdata->dev,
"%s: hardware max data size (%u) is "
847 "less than the driver minimum, unable to use "
848 "the hardware device\n",
849 __func__, devdata->max_sync_size);
854 devdata->max_sync_sg = (
unsigned int)
min(maxsynccop->comp_sg_limit,
855 maxsynccop->decomp_sg_limit);
856 if (devdata->max_sync_sg < 1) {
857 dev_err(devdata->dev,
"%s: hardware max sg size (%u) is "
858 "less than the driver minimum, unable to use "
859 "the hardware device\n",
860 __func__, devdata->max_sync_sg);
888 static int nx842_OF_upd(
struct property *new_prop)
890 struct nx842_devdata *old_devdata =
NULL;
891 struct nx842_devdata *new_devdata =
NULL;
901 lockdep_is_held(&devdata_mutex));
903 of_node = old_devdata->dev->of_node;
905 if (!old_devdata || !of_node) {
906 pr_err(
"%s: device is not available\n", __func__);
907 spin_unlock_irqrestore(&devdata_mutex, flags);
911 new_devdata = kzalloc(
sizeof(*new_devdata),
GFP_NOFS);
913 dev_err(old_devdata->dev,
"%s: Could not allocate memory for device data\n", __func__);
918 memcpy(new_devdata, old_devdata,
sizeof(*old_devdata));
919 new_devdata->counters = old_devdata->counters;
925 if (!status || !maxsglen || !maxsyncop) {
926 dev_err(old_devdata->dev,
"%s: Could not locate device properties\n", __func__);
937 }
else if (!
strncmp(new_prop->
name,
"ibm,max-sg-len",
941 }
else if (!
strncmp(new_prop->
name,
"ibm,max-sync-cop",
943 maxsyncop = new_prop;
955 ret = nx842_OF_upd_status(new_devdata, status);
959 ret = nx842_OF_upd_maxsglen(new_devdata, maxsglen);
963 ret = nx842_OF_upd_maxsyncop(new_devdata, maxsyncop);
968 dev_info(old_devdata->dev,
"%s: max_sync_size new:%u old:%u\n",
969 __func__, new_devdata->max_sync_size,
970 old_devdata->max_sync_size);
971 dev_info(old_devdata->dev,
"%s: max_sync_sg new:%u old:%u\n",
972 __func__, new_devdata->max_sync_sg,
973 old_devdata->max_sync_sg);
974 dev_info(old_devdata->dev,
"%s: max_sg_len new:%u old:%u\n",
975 __func__, new_devdata->max_sg_len,
976 old_devdata->max_sg_len);
979 spin_unlock_irqrestore(&devdata_mutex, flags);
987 dev_info(old_devdata->dev,
"%s: device disabled\n", __func__);
988 nx842_OF_set_defaults(new_devdata);
990 spin_unlock_irqrestore(&devdata_mutex, flags);
995 dev_err(old_devdata->dev,
"%s: could not update driver from hardware\n", __func__);
996 spin_unlock_irqrestore(&devdata_mutex, flags);
1021 struct pSeries_reconfig_prop_update *upd;
1022 struct nx842_devdata *local_devdata;
1025 upd = (
struct pSeries_reconfig_prop_update *)update;
1030 node = local_devdata->dev->of_node;
1032 if (local_devdata &&
1033 action == PSERIES_UPDATE_PROPERTY &&
1036 nx842_OF_upd(upd->property);
1044 .notifier_call = nx842_OF_notifier,
1047 #define nx842_counter_read(_name) \
1048 static ssize_t nx842_##_name##_show(struct device *dev, \
1049 struct device_attribute *attr, \
1051 struct nx842_devdata *local_devdata; \
1054 local_devdata = rcu_dereference(devdata); \
1055 if (local_devdata) \
1056 p = snprintf(buf, PAGE_SIZE, "%ld\n", \
1057 atomic64_read(&local_devdata->counters->_name)); \
1058 rcu_read_unlock(); \
1062 #define NX842DEV_COUNTER_ATTR_RO(_name) \
1063 nx842_counter_read(_name); \
1064 static struct device_attribute dev_attr_##_name = __ATTR(_name, \
1066 nx842_##_name##_show,\
1079 nx842_timehist_show,
NULL);
1081 0444, nx842_timehist_show,
NULL);
1086 struct nx842_devdata *local_devdata;
1094 if (!local_devdata) {
1099 if (attr == &dev_attr_comp_times)
1100 times = local_devdata->counters->comp_times;
1101 else if (attr == &dev_attr_decomp_times)
1102 times = local_devdata->counters->decomp_times;
1109 bytes =
snprintf(p, bytes_remain,
"%u-%uus:\t%ld\n",
1110 i ? (2<<(i-1)) : 0, (2<<i)-1,
1112 bytes_remain -=
bytes;
1117 bytes =
snprintf(p, bytes_remain,
"%uus - :\t%ld\n",
1126 static struct attribute *nx842_sysfs_entries[] = {
1127 &dev_attr_comp_complete.attr,
1128 &dev_attr_comp_failed.attr,
1129 &dev_attr_decomp_complete.attr,
1130 &dev_attr_decomp_failed.attr,
1131 &dev_attr_swdecomp.attr,
1132 &dev_attr_comp_times.
attr,
1133 &dev_attr_decomp_times.
attr,
1139 .attrs = nx842_sysfs_entries,
1145 struct nx842_devdata *old_devdata, *new_devdata =
NULL;
1146 unsigned long flags;
1151 lockdep_is_held(&devdata_mutex));
1153 if (old_devdata && old_devdata->vdev !=
NULL) {
1154 dev_err(&viodev->
dev,
"%s: Attempt to register more than one instance of the hardware\n", __func__);
1161 new_devdata = kzalloc(
sizeof(*new_devdata),
GFP_NOFS);
1163 dev_err(&viodev->
dev,
"%s: Could not allocate memory for device data\n", __func__);
1168 new_devdata->counters = kzalloc(
sizeof(*new_devdata->counters),
1170 if (!new_devdata->counters) {
1171 dev_err(&viodev->
dev,
"%s: Could not allocate memory for performance counters\n", __func__);
1176 new_devdata->vdev = viodev;
1177 new_devdata->dev = &viodev->
dev;
1178 nx842_OF_set_defaults(new_devdata);
1181 spin_unlock_irqrestore(&devdata_mutex, flags);
1187 ret = nx842_OF_upd(
NULL);
1188 if (ret && ret != -
ENODEV) {
1189 dev_err(&viodev->
dev,
"could not parse device tree. %d\n", ret);
1197 dev_err(&viodev->
dev,
"failed to set driver data for device\n");
1204 dev_err(&viodev->
dev,
"could not create sysfs device attributes\n");
1212 spin_unlock_irqrestore(&devdata_mutex, flags);
1214 kfree(new_devdata->counters);
1222 struct nx842_devdata *old_devdata;
1223 unsigned long flags;
1225 pr_info(
"Removing IBM Power 842 compression device\n");
1230 lockdep_is_held(&devdata_mutex));
1233 spin_unlock_irqrestore(&devdata_mutex, flags);
1237 kfree(old_devdata->counters);
1243 {
"ibm,compression-v1",
"ibm,compression"},
1249 .probe = nx842_probe,
1250 .remove = nx842_remove,
1251 .get_desired_dma = nx842_get_desired_dma,
1252 .id_table = nx842_driver_ids,
1255 static int __init nx842_init(
void)
1257 struct nx842_devdata *new_devdata;
1258 pr_info(
"Registering IBM Power 842 compression driver\n");
1261 new_devdata = kzalloc(
sizeof(*new_devdata),
GFP_KERNEL);
1263 pr_err(
"Could not allocate memory for device data\n");
1274 static void __exit nx842_exit(
void)
1276 struct nx842_devdata *old_devdata;
1277 unsigned long flags;
1279 pr_info(
"Exiting IBM Power 842 compression driver\n");
1282 lockdep_is_held(&devdata_mutex));
1284 spin_unlock_irqrestore(&devdata_mutex, flags);
1300 static int sw842_data8(
const char **,
int *,
unsigned char **,
1302 static int sw842_data4(
const char **,
int *,
unsigned char **,
1304 static int sw842_data2(
const char **,
int *,
unsigned char **,
1306 static int sw842_ptr8(
const char **,
int *,
unsigned char **,
1308 static int sw842_ptr4(
const char **,
int *,
unsigned char **,
1310 static int sw842_ptr2(
const char **,
int *,
unsigned char **,
1314 #define SW842_TMPL_REPEAT 0x1B
1315 #define SW842_TMPL_ZEROS 0x1C
1316 #define SW842_TMPL_EOF 0x1E
1319 { sw842_data8,
NULL},
1320 { sw842_data4, sw842_data2, sw842_ptr2,
NULL},
1321 { sw842_data4, sw842_ptr2, sw842_data2,
NULL},
1322 { sw842_data4, sw842_ptr2, sw842_ptr2,
NULL},
1323 { sw842_data4, sw842_ptr4,
NULL},
1324 { sw842_data2, sw842_ptr2, sw842_data4,
NULL},
1325 { sw842_data2, sw842_ptr2, sw842_data2, sw842_ptr2},
1326 { sw842_data2, sw842_ptr2, sw842_ptr2, sw842_data2},
1327 { sw842_data2, sw842_ptr2, sw842_ptr2, sw842_ptr2,},
1328 { sw842_data2, sw842_ptr2, sw842_ptr4,
NULL},
1329 { sw842_ptr2, sw842_data2, sw842_data4,
NULL},
1330 { sw842_ptr2, sw842_data4, sw842_ptr2,
NULL},
1331 { sw842_ptr2, sw842_data2, sw842_ptr2, sw842_data2},
1332 { sw842_ptr2, sw842_data2, sw842_ptr2, sw842_ptr2},
1333 { sw842_ptr2, sw842_data2, sw842_ptr4,
NULL},
1334 { sw842_ptr2, sw842_ptr2, sw842_data4,
NULL},
1335 { sw842_ptr2, sw842_ptr2, sw842_data2, sw842_ptr2},
1336 { sw842_ptr2, sw842_ptr2, sw842_ptr2, sw842_data2},
1337 { sw842_ptr2, sw842_ptr2, sw842_ptr2, sw842_ptr2},
1338 { sw842_ptr2, sw842_ptr2, sw842_ptr4,
NULL},
1339 { sw842_ptr4, sw842_data4,
NULL},
1340 { sw842_ptr4, sw842_data2, sw842_ptr2,
NULL},
1341 { sw842_ptr4, sw842_ptr2, sw842_data2,
NULL},
1342 { sw842_ptr4, sw842_ptr2, sw842_ptr2,
NULL},
1343 { sw842_ptr4, sw842_ptr4,
NULL},
1349 static uint8_t sw842_get_byte(
const char *buf,
int bit)
1360 static uint8_t sw842_get_template(
const char **buf,
int *bit)
1363 byte = sw842_get_byte(*buf, *bit);
1366 *buf += (*bit + 5) / 8;
1367 *bit = (*bit + 5) % 8;
1372 static uint8_t sw842_get_repeat_count(
const char **buf,
int *bit)
1375 byte = sw842_get_byte(*buf, *bit);
1378 *buf += (*bit + 6) / 8;
1379 *bit = (*bit + 6) % 8;
1383 static uint8_t sw842_get_ptr2(
const char **buf,
int *bit)
1386 ptr = sw842_get_byte(*buf, *bit);
1391 static uint16_t sw842_get_ptr4(
const char **buf,
int *bit,
1399 *buf += (*bit + 9) / 8;
1400 *bit = (*bit + 9) % 8;
1404 static uint8_t sw842_get_ptr8(
const char **buf,
int *bit,
1407 return sw842_get_ptr2(buf, bit);
1412 static int sw842_data8(
const char **
inbuf,
int *inbit,
1413 unsigned char **outbuf,
struct sw842_fifo *fifo)
1417 ret = sw842_data4(inbuf, inbit, outbuf, fifo);
1420 ret = sw842_data4(inbuf, inbit, outbuf, fifo);
1424 static int sw842_data4(
const char **inbuf,
int *inbit,
1425 unsigned char **outbuf,
struct sw842_fifo *fifo)
1429 ret = sw842_data2(inbuf, inbit, outbuf, fifo);
1432 ret = sw842_data2(inbuf, inbit, outbuf, fifo);
1436 static int sw842_data2(
const char **inbuf,
int *inbit,
1437 unsigned char **outbuf,
struct sw842_fifo *fifo)
1439 **outbuf = sw842_get_byte(*inbuf, *inbit);
1442 **outbuf = sw842_get_byte(*inbuf, *inbit);
1448 static int sw842_ptr8(
const char **inbuf,
int *inbit,
1449 unsigned char **outbuf,
struct sw842_fifo *fifo)
1452 ptr = sw842_get_ptr8(inbuf, inbit, fifo);
1460 static int sw842_ptr4(
const char **inbuf,
int *inbit,
1461 unsigned char **outbuf,
struct sw842_fifo *fifo)
1464 ptr = sw842_get_ptr4(inbuf, inbit, fifo);
1472 static int sw842_ptr2(
const char **inbuf,
int *inbit,
1473 unsigned char **outbuf,
struct sw842_fifo *fifo)
1476 ptr = sw842_get_ptr2(inbuf, inbit);
1484 static void sw842_copy_to_fifo(
const char *buf,
struct sw842_fifo *fifo)
1486 unsigned char initial_f2count = fifo->
f2_count;
1501 if (fifo->
f2_count < initial_f2count)
1505 static int sw842_decompress(
const unsigned char *
src,
int srclen,
1506 unsigned char *
dst,
int *destlen,
1512 unsigned char *outbuf, *outbuf_end, *origbuf, *prevbuf;
1513 const char *inbuf_end;
1516 int i, repeat_count;
1521 memset(fifo, 0,
sizeof(*fifo));
1525 inbuf_end = src + srclen;
1527 outbuf_end = dst + *destlen;
1529 while ((tmpl = sw842_get_template(&inbuf, &inbit)) !=
SW842_TMPL_EOF) {
1530 if (inbuf >= inbuf_end) {
1540 if (prevbuf ==
NULL) {
1545 repeat_count = sw842_get_repeat_count(&inbuf,
1549 if (inbuf > inbuf_end) {
1554 for (i = 0; i < repeat_count; i++) {
1556 if ((outbuf + 8) > outbuf_end) {
1561 memcpy(outbuf, prevbuf, 8);
1562 sw842_copy_to_fifo(outbuf, fifo);
1569 if ((outbuf + 8) > outbuf_end) {
1575 sw842_copy_to_fifo(outbuf, fifo);
1586 if ((inbuf + 2) > inbuf_end) {
1592 if ((outbuf + 8) > outbuf_end) {
1597 while (opindex < 4 &&
1598 (op = sw842_tmpl_ops[tmpl][opindex++])
1600 ret = (*op)(&inbuf, &inbit, &outbuf, fifo);
1605 sw842_copy_to_fifo(origbuf, fifo);
1612 *destlen = (
unsigned int)(outbuf - dst);