ore: Make ore_striping_info and ore_calc_stripe_info public
[pandora-kernel.git] / fs / exofs / ore.c
1 /*
2  * Copyright (C) 2005, 2006
3  * Avishay Traeger (avishay@gmail.com)
4  * Copyright (C) 2008, 2009
5  * Boaz Harrosh <bharrosh@panasas.com>
6  *
7  * This file is part of exofs.
8  *
9  * exofs is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation.  Since it is based on ext2, and the only
12  * valid version of GPL for the Linux kernel is version 2, the only valid
13  * version of GPL for exofs is version 2.
14  *
15  * exofs is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU General Public License for more details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with exofs; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
23  */
24
25 #include <linux/slab.h>
26 #include <asm/div64.h>
27
28 #include <scsi/osd_ore.h>
29
30 #define ORE_ERR(fmt, a...) printk(KERN_ERR "ore: " fmt, ##a)
31
32 #ifdef CONFIG_EXOFS_DEBUG
33 #define ORE_DBGMSG(fmt, a...) \
34         printk(KERN_NOTICE "ore @%s:%d: " fmt, __func__, __LINE__, ##a)
35 #else
36 #define ORE_DBGMSG(fmt, a...) \
37         do { if (0) printk(fmt, ##a); } while (0)
38 #endif
39
40 /* u64 has problems with printk this will cast it to unsigned long long */
41 #define _LLU(x) (unsigned long long)(x)
42
43 #define ORE_DBGMSG2(M...) do {} while (0)
44 /* #define ORE_DBGMSG2 ORE_DBGMSG */
45
46 MODULE_AUTHOR("Boaz Harrosh <bharrosh@panasas.com>");
47 MODULE_DESCRIPTION("Objects Raid Engine ore.ko");
48 MODULE_LICENSE("GPL");
49
50 static u8 *_ios_cred(struct ore_io_state *ios, unsigned index)
51 {
52         return ios->oc->comps[index & ios->oc->single_comp].cred;
53 }
54
55 static struct osd_obj_id *_ios_obj(struct ore_io_state *ios, unsigned index)
56 {
57         return &ios->oc->comps[index & ios->oc->single_comp].obj;
58 }
59
60 static struct osd_dev *_ios_od(struct ore_io_state *ios, unsigned index)
61 {
62         return ios->oc->ods[index];
63 }
64
65 int  ore_get_rw_state(struct ore_layout *layout, struct ore_components *oc,
66                       bool is_reading, u64 offset, u64 length,
67                       struct ore_io_state **pios)
68 {
69         struct ore_io_state *ios;
70
71         /*TODO: Maybe use kmem_cach per sbi of size
72          * exofs_io_state_size(layout->s_numdevs)
73          */
74         ios = kzalloc(ore_io_state_size(oc->numdevs), GFP_KERNEL);
75         if (unlikely(!ios)) {
76                 ORE_DBGMSG("Failed kzalloc bytes=%d\n",
77                              ore_io_state_size(oc->numdevs));
78                 *pios = NULL;
79                 return -ENOMEM;
80         }
81
82         ios->layout = layout;
83         ios->oc = oc;
84         ios->offset = offset;
85         ios->length = length;
86         ios->reading = is_reading;
87
88         *pios = ios;
89         return 0;
90 }
91 EXPORT_SYMBOL(ore_get_rw_state);
92
93 int  ore_get_io_state(struct ore_layout *layout, struct ore_components *oc,
94                       struct ore_io_state **ios)
95 {
96         return ore_get_rw_state(layout, oc, true, 0, 0, ios);
97 }
98 EXPORT_SYMBOL(ore_get_io_state);
99
100 void ore_put_io_state(struct ore_io_state *ios)
101 {
102         if (ios) {
103                 unsigned i;
104
105                 for (i = 0; i < ios->numdevs; i++) {
106                         struct ore_per_dev_state *per_dev = &ios->per_dev[i];
107
108                         if (per_dev->or)
109                                 osd_end_request(per_dev->or);
110                         if (per_dev->bio)
111                                 bio_put(per_dev->bio);
112                 }
113
114                 kfree(ios);
115         }
116 }
117 EXPORT_SYMBOL(ore_put_io_state);
118
119 static void _sync_done(struct ore_io_state *ios, void *p)
120 {
121         struct completion *waiting = p;
122
123         complete(waiting);
124 }
125
126 static void _last_io(struct kref *kref)
127 {
128         struct ore_io_state *ios = container_of(
129                                         kref, struct ore_io_state, kref);
130
131         ios->done(ios, ios->private);
132 }
133
134 static void _done_io(struct osd_request *or, void *p)
135 {
136         struct ore_io_state *ios = p;
137
138         kref_put(&ios->kref, _last_io);
139 }
140
141 static int ore_io_execute(struct ore_io_state *ios)
142 {
143         DECLARE_COMPLETION_ONSTACK(wait);
144         bool sync = (ios->done == NULL);
145         int i, ret;
146
147         if (sync) {
148                 ios->done = _sync_done;
149                 ios->private = &wait;
150         }
151
152         for (i = 0; i < ios->numdevs; i++) {
153                 struct osd_request *or = ios->per_dev[i].or;
154                 if (unlikely(!or))
155                         continue;
156
157                 ret = osd_finalize_request(or, 0, _ios_cred(ios, i), NULL);
158                 if (unlikely(ret)) {
159                         ORE_DBGMSG("Failed to osd_finalize_request() => %d\n",
160                                      ret);
161                         return ret;
162                 }
163         }
164
165         kref_init(&ios->kref);
166
167         for (i = 0; i < ios->numdevs; i++) {
168                 struct osd_request *or = ios->per_dev[i].or;
169                 if (unlikely(!or))
170                         continue;
171
172                 kref_get(&ios->kref);
173                 osd_execute_request_async(or, _done_io, ios);
174         }
175
176         kref_put(&ios->kref, _last_io);
177         ret = 0;
178
179         if (sync) {
180                 wait_for_completion(&wait);
181                 ret = ore_check_io(ios, NULL);
182         }
183         return ret;
184 }
185
186 static void _clear_bio(struct bio *bio)
187 {
188         struct bio_vec *bv;
189         unsigned i;
190
191         __bio_for_each_segment(bv, bio, i, 0) {
192                 unsigned this_count = bv->bv_len;
193
194                 if (likely(PAGE_SIZE == this_count))
195                         clear_highpage(bv->bv_page);
196                 else
197                         zero_user(bv->bv_page, bv->bv_offset, this_count);
198         }
199 }
200
201 int ore_check_io(struct ore_io_state *ios, u64 *resid)
202 {
203         enum osd_err_priority acumulated_osd_err = 0;
204         int acumulated_lin_err = 0;
205         int i;
206
207         for (i = 0; i < ios->numdevs; i++) {
208                 struct osd_sense_info osi;
209                 struct osd_request *or = ios->per_dev[i].or;
210                 int ret;
211
212                 if (unlikely(!or))
213                         continue;
214
215                 ret = osd_req_decode_sense(or, &osi);
216                 if (likely(!ret))
217                         continue;
218
219                 if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) {
220                         /* start read offset passed endof file */
221                         _clear_bio(ios->per_dev[i].bio);
222                         ORE_DBGMSG("start read offset passed end of file "
223                                 "offset=0x%llx, length=0x%llx\n",
224                                 _LLU(ios->per_dev[i].offset),
225                                 _LLU(ios->per_dev[i].length));
226
227                         continue; /* we recovered */
228                 }
229
230                 if (osi.osd_err_pri >= acumulated_osd_err) {
231                         acumulated_osd_err = osi.osd_err_pri;
232                         acumulated_lin_err = ret;
233                 }
234         }
235
236         /* TODO: raid specific residual calculations */
237         if (resid) {
238                 if (likely(!acumulated_lin_err))
239                         *resid = 0;
240                 else
241                         *resid = ios->length;
242         }
243
244         return acumulated_lin_err;
245 }
246 EXPORT_SYMBOL(ore_check_io);
247
248 /*
249  * L - logical offset into the file
250  *
251  * U - The number of bytes in a stripe within a group
252  *
253  *      U = stripe_unit * group_width
254  *
255  * T - The number of bytes striped within a group of component objects
256  *     (before advancing to the next group)
257  *
258  *      T = stripe_unit * group_width * group_depth
259  *
260  * S - The number of bytes striped across all component objects
261  *     before the pattern repeats
262  *
263  *      S = stripe_unit * group_width * group_depth * group_count
264  *
265  * M - The "major" (i.e., across all components) stripe number
266  *
267  *      M = L / S
268  *
269  * G - Counts the groups from the beginning of the major stripe
270  *
271  *      G = (L - (M * S)) / T   [or (L % S) / T]
272  *
273  * H - The byte offset within the group
274  *
275  *      H = (L - (M * S)) % T   [or (L % S) % T]
276  *
277  * N - The "minor" (i.e., across the group) stripe number
278  *
279  *      N = H / U
280  *
281  * C - The component index coresponding to L
282  *
283  *      C = (H - (N * U)) / stripe_unit + G * group_width
284  *      [or (L % U) / stripe_unit + G * group_width]
285  *
286  * O - The component offset coresponding to L
287  *
288  *      O = L % stripe_unit + N * stripe_unit + M * group_depth * stripe_unit
289  */
290 static void ore_calc_stripe_info(struct ore_layout *layout, u64 file_offset,
291                                  struct ore_striping_info *si)
292 {
293         u32     stripe_unit = layout->stripe_unit;
294         u32     group_width = layout->group_width;
295         u64     group_depth = layout->group_depth;
296
297         u32     U = stripe_unit * group_width;
298         u64     T = U * group_depth;
299         u64     S = T * layout->group_count;
300         u64     M = div64_u64(file_offset, S);
301
302         /*
303         G = (L - (M * S)) / T
304         H = (L - (M * S)) % T
305         */
306         u64     LmodS = file_offset - M * S;
307         u32     G = div64_u64(LmodS, T);
308         u64     H = LmodS - G * T;
309
310         u32     N = div_u64(H, U);
311
312         /* "H - (N * U)" is just "H % U" so it's bound to u32 */
313         si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width;
314         si->dev *= layout->mirrors_p1;
315
316         div_u64_rem(file_offset, stripe_unit, &si->unit_off);
317
318         si->obj_offset = si->unit_off + (N * stripe_unit) +
319                                   (M * group_depth * stripe_unit);
320
321         si->group_length = T - H;
322         si->M = M;
323 }
324
325 static int _add_stripe_unit(struct ore_io_state *ios,  unsigned *cur_pg,
326                 unsigned pgbase, struct ore_per_dev_state *per_dev,
327                 int cur_len)
328 {
329         unsigned pg = *cur_pg;
330         struct request_queue *q =
331                         osd_request_queue(_ios_od(ios, per_dev->dev));
332
333         per_dev->length += cur_len;
334
335         if (per_dev->bio == NULL) {
336                 unsigned pages_in_stripe = ios->layout->group_width *
337                                         (ios->layout->stripe_unit / PAGE_SIZE);
338                 unsigned bio_size = (ios->nr_pages + pages_in_stripe) /
339                                                 ios->layout->group_width;
340
341                 per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size);
342                 if (unlikely(!per_dev->bio)) {
343                         ORE_DBGMSG("Failed to allocate BIO size=%u\n",
344                                      bio_size);
345                         return -ENOMEM;
346                 }
347         }
348
349         while (cur_len > 0) {
350                 unsigned pglen = min_t(unsigned, PAGE_SIZE - pgbase, cur_len);
351                 unsigned added_len;
352
353                 BUG_ON(ios->nr_pages <= pg);
354                 cur_len -= pglen;
355
356                 added_len = bio_add_pc_page(q, per_dev->bio, ios->pages[pg],
357                                             pglen, pgbase);
358                 if (unlikely(pglen != added_len))
359                         return -ENOMEM;
360                 pgbase = 0;
361                 ++pg;
362         }
363         BUG_ON(cur_len);
364
365         *cur_pg = pg;
366         return 0;
367 }
368
369 static int _prepare_one_group(struct ore_io_state *ios, u64 length,
370                               struct ore_striping_info *si)
371 {
372         unsigned stripe_unit = ios->layout->stripe_unit;
373         unsigned mirrors_p1 = ios->layout->mirrors_p1;
374         unsigned devs_in_group = ios->layout->group_width * mirrors_p1;
375         unsigned dev = si->dev;
376         unsigned first_dev = dev - (dev % devs_in_group);
377         unsigned max_comp = ios->numdevs ? ios->numdevs - mirrors_p1 : 0;
378         unsigned cur_pg = ios->pages_consumed;
379         int ret = 0;
380
381         while (length) {
382                 struct ore_per_dev_state *per_dev = &ios->per_dev[dev];
383                 unsigned cur_len, page_off = 0;
384
385                 if (!per_dev->length) {
386                         per_dev->dev = dev;
387                         if (dev < si->dev) {
388                                 per_dev->offset = si->obj_offset + stripe_unit -
389                                                                    si->unit_off;
390                                 cur_len = stripe_unit;
391                         } else if (dev == si->dev) {
392                                 per_dev->offset = si->obj_offset;
393                                 cur_len = stripe_unit - si->unit_off;
394                                 page_off = si->unit_off & ~PAGE_MASK;
395                                 BUG_ON(page_off && (page_off != ios->pgbase));
396                         } else { /* dev > si->dev */
397                                 per_dev->offset = si->obj_offset - si->unit_off;
398                                 cur_len = stripe_unit;
399                         }
400
401                         if (max_comp < dev)
402                                 max_comp = dev;
403                 } else {
404                         cur_len = stripe_unit;
405                 }
406                 if (cur_len >= length)
407                         cur_len = length;
408
409                 ret = _add_stripe_unit(ios, &cur_pg, page_off , per_dev,
410                                        cur_len);
411                 if (unlikely(ret))
412                         goto out;
413
414                 dev += mirrors_p1;
415                 dev = (dev % devs_in_group) + first_dev;
416
417                 length -= cur_len;
418         }
419 out:
420         ios->numdevs = max_comp + mirrors_p1;
421         ios->pages_consumed = cur_pg;
422         return ret;
423 }
424
425 static int _prepare_for_striping(struct ore_io_state *ios)
426 {
427         u64 length = ios->length;
428         u64 offset = ios->offset;
429         struct ore_striping_info si;
430         int ret = 0;
431
432         if (!ios->pages) {
433                 if (ios->kern_buff) {
434                         struct ore_per_dev_state *per_dev = &ios->per_dev[0];
435
436                         ore_calc_stripe_info(ios->layout, ios->offset, &si);
437                         per_dev->offset = si.obj_offset;
438                         per_dev->dev = si.dev;
439
440                         /* no cross device without page array */
441                         BUG_ON((ios->layout->group_width > 1) &&
442                                (si.unit_off + ios->length >
443                                 ios->layout->stripe_unit));
444                 }
445                 ios->numdevs = ios->layout->mirrors_p1;
446                 return 0;
447         }
448
449         while (length) {
450                 ore_calc_stripe_info(ios->layout, offset, &si);
451
452                 if (length < si.group_length)
453                         si.group_length = length;
454
455                 ret = _prepare_one_group(ios, si.group_length, &si);
456                 if (unlikely(ret))
457                         goto out;
458
459                 offset += si.group_length;
460                 length -= si.group_length;
461         }
462
463 out:
464         return ret;
465 }
466
467 int ore_create(struct ore_io_state *ios)
468 {
469         int i, ret;
470
471         for (i = 0; i < ios->oc->numdevs; i++) {
472                 struct osd_request *or;
473
474                 or = osd_start_request(_ios_od(ios, i), GFP_KERNEL);
475                 if (unlikely(!or)) {
476                         ORE_ERR("%s: osd_start_request failed\n", __func__);
477                         ret = -ENOMEM;
478                         goto out;
479                 }
480                 ios->per_dev[i].or = or;
481                 ios->numdevs++;
482
483                 osd_req_create_object(or, _ios_obj(ios, i));
484         }
485         ret = ore_io_execute(ios);
486
487 out:
488         return ret;
489 }
490 EXPORT_SYMBOL(ore_create);
491
492 int ore_remove(struct ore_io_state *ios)
493 {
494         int i, ret;
495
496         for (i = 0; i < ios->oc->numdevs; i++) {
497                 struct osd_request *or;
498
499                 or = osd_start_request(_ios_od(ios, i), GFP_KERNEL);
500                 if (unlikely(!or)) {
501                         ORE_ERR("%s: osd_start_request failed\n", __func__);
502                         ret = -ENOMEM;
503                         goto out;
504                 }
505                 ios->per_dev[i].or = or;
506                 ios->numdevs++;
507
508                 osd_req_remove_object(or, _ios_obj(ios, i));
509         }
510         ret = ore_io_execute(ios);
511
512 out:
513         return ret;
514 }
515 EXPORT_SYMBOL(ore_remove);
516
517 static int _write_mirror(struct ore_io_state *ios, int cur_comp)
518 {
519         struct ore_per_dev_state *master_dev = &ios->per_dev[cur_comp];
520         unsigned dev = ios->per_dev[cur_comp].dev;
521         unsigned last_comp = cur_comp + ios->layout->mirrors_p1;
522         int ret = 0;
523
524         if (ios->pages && !master_dev->length)
525                 return 0; /* Just an empty slot */
526
527         for (; cur_comp < last_comp; ++cur_comp, ++dev) {
528                 struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp];
529                 struct osd_request *or;
530
531                 or = osd_start_request(_ios_od(ios, dev), GFP_KERNEL);
532                 if (unlikely(!or)) {
533                         ORE_ERR("%s: osd_start_request failed\n", __func__);
534                         ret = -ENOMEM;
535                         goto out;
536                 }
537                 per_dev->or = or;
538                 per_dev->offset = master_dev->offset;
539
540                 if (ios->pages) {
541                         struct bio *bio;
542
543                         if (per_dev != master_dev) {
544                                 bio = bio_kmalloc(GFP_KERNEL,
545                                                   master_dev->bio->bi_max_vecs);
546                                 if (unlikely(!bio)) {
547                                         ORE_DBGMSG(
548                                               "Failed to allocate BIO size=%u\n",
549                                               master_dev->bio->bi_max_vecs);
550                                         ret = -ENOMEM;
551                                         goto out;
552                                 }
553
554                                 __bio_clone(bio, master_dev->bio);
555                                 bio->bi_bdev = NULL;
556                                 bio->bi_next = NULL;
557                                 per_dev->length = master_dev->length;
558                                 per_dev->bio =  bio;
559                                 per_dev->dev = dev;
560                         } else {
561                                 bio = master_dev->bio;
562                                 /* FIXME: bio_set_dir() */
563                                 bio->bi_rw |= REQ_WRITE;
564                         }
565
566                         osd_req_write(or, _ios_obj(ios, dev), per_dev->offset,
567                                       bio, per_dev->length);
568                         ORE_DBGMSG("write(0x%llx) offset=0x%llx "
569                                       "length=0x%llx dev=%d\n",
570                                      _LLU(_ios_obj(ios, dev)->id),
571                                      _LLU(per_dev->offset),
572                                      _LLU(per_dev->length), dev);
573                 } else if (ios->kern_buff) {
574                         ret = osd_req_write_kern(or, _ios_obj(ios, dev),
575                                                  per_dev->offset,
576                                                  ios->kern_buff, ios->length);
577                         if (unlikely(ret))
578                                 goto out;
579                         ORE_DBGMSG2("write_kern(0x%llx) offset=0x%llx "
580                                       "length=0x%llx dev=%d\n",
581                                      _LLU(_ios_obj(ios, dev)->id),
582                                      _LLU(per_dev->offset),
583                                      _LLU(ios->length), dev);
584                 } else {
585                         osd_req_set_attributes(or, _ios_obj(ios, dev));
586                         ORE_DBGMSG2("obj(0x%llx) set_attributes=%d dev=%d\n",
587                                      _LLU(_ios_obj(ios, dev)->id),
588                                      ios->out_attr_len, dev);
589                 }
590
591                 if (ios->out_attr)
592                         osd_req_add_set_attr_list(or, ios->out_attr,
593                                                   ios->out_attr_len);
594
595                 if (ios->in_attr)
596                         osd_req_add_get_attr_list(or, ios->in_attr,
597                                                   ios->in_attr_len);
598         }
599
600 out:
601         return ret;
602 }
603
604 int ore_write(struct ore_io_state *ios)
605 {
606         int i;
607         int ret;
608
609         ret = _prepare_for_striping(ios);
610         if (unlikely(ret))
611                 return ret;
612
613         for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) {
614                 ret = _write_mirror(ios, i);
615                 if (unlikely(ret))
616                         return ret;
617         }
618
619         ret = ore_io_execute(ios);
620         return ret;
621 }
622 EXPORT_SYMBOL(ore_write);
623
624 static int _read_mirror(struct ore_io_state *ios, unsigned cur_comp)
625 {
626         struct osd_request *or;
627         struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp];
628         struct osd_obj_id *obj = _ios_obj(ios, cur_comp);
629         unsigned first_dev = (unsigned)obj->id;
630
631         if (ios->pages && !per_dev->length)
632                 return 0; /* Just an empty slot */
633
634         first_dev = per_dev->dev + first_dev % ios->layout->mirrors_p1;
635         or = osd_start_request(_ios_od(ios, first_dev), GFP_KERNEL);
636         if (unlikely(!or)) {
637                 ORE_ERR("%s: osd_start_request failed\n", __func__);
638                 return -ENOMEM;
639         }
640         per_dev->or = or;
641
642         if (ios->pages) {
643                 osd_req_read(or, obj, per_dev->offset,
644                                 per_dev->bio, per_dev->length);
645                 ORE_DBGMSG("read(0x%llx) offset=0x%llx length=0x%llx"
646                              " dev=%d\n", _LLU(obj->id),
647                              _LLU(per_dev->offset), _LLU(per_dev->length),
648                              first_dev);
649         } else if (ios->kern_buff) {
650                 int ret = osd_req_read_kern(or, obj, per_dev->offset,
651                                             ios->kern_buff, ios->length);
652                 ORE_DBGMSG2("read_kern(0x%llx) offset=0x%llx "
653                               "length=0x%llx dev=%d ret=>%d\n",
654                               _LLU(obj->id), _LLU(per_dev->offset),
655                               _LLU(ios->length), first_dev, ret);
656                 if (unlikely(ret))
657                         return ret;
658         } else {
659                 osd_req_get_attributes(or, obj);
660                 ORE_DBGMSG2("obj(0x%llx) get_attributes=%d dev=%d\n",
661                               _LLU(obj->id),
662                               ios->in_attr_len, first_dev);
663         }
664         if (ios->out_attr)
665                 osd_req_add_set_attr_list(or, ios->out_attr, ios->out_attr_len);
666
667         if (ios->in_attr)
668                 osd_req_add_get_attr_list(or, ios->in_attr, ios->in_attr_len);
669
670         return 0;
671 }
672
673 int ore_read(struct ore_io_state *ios)
674 {
675         int i;
676         int ret;
677
678         ret = _prepare_for_striping(ios);
679         if (unlikely(ret))
680                 return ret;
681
682         for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) {
683                 ret = _read_mirror(ios, i);
684                 if (unlikely(ret))
685                         return ret;
686         }
687
688         ret = ore_io_execute(ios);
689         return ret;
690 }
691 EXPORT_SYMBOL(ore_read);
692
693 int extract_attr_from_ios(struct ore_io_state *ios, struct osd_attr *attr)
694 {
695         struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */
696         void *iter = NULL;
697         int nelem;
698
699         do {
700                 nelem = 1;
701                 osd_req_decode_get_attr_list(ios->per_dev[0].or,
702                                              &cur_attr, &nelem, &iter);
703                 if ((cur_attr.attr_page == attr->attr_page) &&
704                     (cur_attr.attr_id == attr->attr_id)) {
705                         attr->len = cur_attr.len;
706                         attr->val_ptr = cur_attr.val_ptr;
707                         return 0;
708                 }
709         } while (iter);
710
711         return -EIO;
712 }
713 EXPORT_SYMBOL(extract_attr_from_ios);
714
715 static int _truncate_mirrors(struct ore_io_state *ios, unsigned cur_comp,
716                              struct osd_attr *attr)
717 {
718         int last_comp = cur_comp + ios->layout->mirrors_p1;
719
720         for (; cur_comp < last_comp; ++cur_comp) {
721                 struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp];
722                 struct osd_request *or;
723
724                 or = osd_start_request(_ios_od(ios, cur_comp), GFP_KERNEL);
725                 if (unlikely(!or)) {
726                         ORE_ERR("%s: osd_start_request failed\n", __func__);
727                         return -ENOMEM;
728                 }
729                 per_dev->or = or;
730
731                 osd_req_set_attributes(or, _ios_obj(ios, cur_comp));
732                 osd_req_add_set_attr_list(or, attr, 1);
733         }
734
735         return 0;
736 }
737
738 struct _trunc_info {
739         struct ore_striping_info si;
740         u64 prev_group_obj_off;
741         u64 next_group_obj_off;
742
743         unsigned first_group_dev;
744         unsigned nex_group_dev;
745         unsigned max_devs;
746 };
747
748 static void _calc_trunk_info(struct ore_layout *layout, u64 file_offset,
749                              struct _trunc_info *ti)
750 {
751         unsigned stripe_unit = layout->stripe_unit;
752
753         ore_calc_stripe_info(layout, file_offset, &ti->si);
754
755         ti->prev_group_obj_off = ti->si.M * stripe_unit;
756         ti->next_group_obj_off = ti->si.M ? (ti->si.M - 1) * stripe_unit : 0;
757
758         ti->first_group_dev = ti->si.dev - (ti->si.dev % layout->group_width);
759         ti->nex_group_dev = ti->first_group_dev + layout->group_width;
760         ti->max_devs = layout->group_width * layout->group_count;
761 }
762
763 int ore_truncate(struct ore_layout *layout, struct ore_components *oc,
764                    u64 size)
765 {
766         struct ore_io_state *ios;
767         struct exofs_trunc_attr {
768                 struct osd_attr attr;
769                 __be64 newsize;
770         } *size_attrs;
771         struct _trunc_info ti;
772         int i, ret;
773
774         ret = ore_get_io_state(layout, oc, &ios);
775         if (unlikely(ret))
776                 return ret;
777
778         _calc_trunk_info(ios->layout, size, &ti);
779
780         size_attrs = kcalloc(ti.max_devs, sizeof(*size_attrs),
781                              GFP_KERNEL);
782         if (unlikely(!size_attrs)) {
783                 ret = -ENOMEM;
784                 goto out;
785         }
786
787         ios->numdevs = ios->oc->numdevs;
788
789         for (i = 0; i < ti.max_devs; ++i) {
790                 struct exofs_trunc_attr *size_attr = &size_attrs[i];
791                 u64 obj_size;
792
793                 if (i < ti.first_group_dev)
794                         obj_size = ti.prev_group_obj_off;
795                 else if (i >= ti.nex_group_dev)
796                         obj_size = ti.next_group_obj_off;
797                 else if (i < ti.si.dev) /* dev within this group */
798                         obj_size = ti.si.obj_offset +
799                                       ios->layout->stripe_unit - ti.si.unit_off;
800                 else if (i == ti.si.dev)
801                         obj_size = ti.si.obj_offset;
802                 else /* i > ti.dev */
803                         obj_size = ti.si.obj_offset - ti.si.unit_off;
804
805                 size_attr->newsize = cpu_to_be64(obj_size);
806                 size_attr->attr = g_attr_logical_length;
807                 size_attr->attr.val_ptr = &size_attr->newsize;
808
809                 ORE_DBGMSG("trunc(0x%llx) obj_offset=0x%llx dev=%d\n",
810                              _LLU(oc->comps->obj.id), _LLU(obj_size), i);
811                 ret = _truncate_mirrors(ios, i * ios->layout->mirrors_p1,
812                                         &size_attr->attr);
813                 if (unlikely(ret))
814                         goto out;
815         }
816         ret = ore_io_execute(ios);
817
818 out:
819         kfree(size_attrs);
820         ore_put_io_state(ios);
821         return ret;
822 }
823 EXPORT_SYMBOL(ore_truncate);
824
825 const struct osd_attr g_attr_logical_length = ATTR_DEF(
826         OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8);
827 EXPORT_SYMBOL(g_attr_logical_length);