Btrfs: make things static and include the right headers
[pandora-kernel.git] / fs / btrfs / zlib.c
1 /*
2  * Copyright (C) 2008 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  *
18  * Based on jffs2 zlib code:
19  * Copyright © 2001-2007 Red Hat, Inc.
20  * Created by David Woodhouse <dwmw2@infradead.org>
21  */
22
23 #include <linux/kernel.h>
24 #include <linux/slab.h>
25 #include <linux/zlib.h>
26 #include <linux/zutil.h>
27 #include <linux/vmalloc.h>
28 #include <linux/init.h>
29 #include <linux/err.h>
30 #include <linux/sched.h>
31 #include <linux/pagemap.h>
32 #include <linux/bio.h>
33 #include "compression.h"
34
35 /* Plan: call deflate() with avail_in == *sourcelen,
36         avail_out = *dstlen - 12 and flush == Z_FINISH.
37         If it doesn't manage to finish, call it again with
38         avail_in == 0 and avail_out set to the remaining 12
39         bytes for it to clean up.
40    Q: Is 12 bytes sufficient?
41 */
42 #define STREAM_END_SPACE 12
43
44 struct workspace {
45         z_stream inf_strm;
46         z_stream def_strm;
47         char *buf;
48         struct list_head list;
49 };
50
51 static LIST_HEAD(idle_workspace);
52 static DEFINE_SPINLOCK(workspace_lock);
53 static unsigned long num_workspace;
54 static atomic_t alloc_workspace = ATOMIC_INIT(0);
55 static DECLARE_WAIT_QUEUE_HEAD(workspace_wait);
56
57 /*
58  * this finds an available zlib workspace or allocates a new one
59  * NULL or an ERR_PTR is returned if things go bad.
60  */
61 static struct workspace *find_zlib_workspace(void)
62 {
63         struct workspace *workspace;
64         int ret;
65         int cpus = num_online_cpus();
66
67 again:
68         spin_lock(&workspace_lock);
69         if (!list_empty(&idle_workspace)) {
70                 workspace = list_entry(idle_workspace.next, struct workspace,
71                                        list);
72                 list_del(&workspace->list);
73                 num_workspace--;
74                 spin_unlock(&workspace_lock);
75                 return workspace;
76
77         }
78         spin_unlock(&workspace_lock);
79         if (atomic_read(&alloc_workspace) > cpus) {
80                 DEFINE_WAIT(wait);
81                 prepare_to_wait(&workspace_wait, &wait, TASK_UNINTERRUPTIBLE);
82                 if (atomic_read(&alloc_workspace) > cpus)
83                         schedule();
84                 finish_wait(&workspace_wait, &wait);
85                 goto again;
86         }
87         atomic_inc(&alloc_workspace);
88         workspace = kzalloc(sizeof(*workspace), GFP_NOFS);
89         if (!workspace) {
90                 ret = -ENOMEM;
91                 goto fail;
92         }
93
94         workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize());
95         if (!workspace->def_strm.workspace) {
96                 ret = -ENOMEM;
97                 goto fail;
98         }
99         workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize());
100         if (!workspace->inf_strm.workspace) {
101                 ret = -ENOMEM;
102                 goto fail_inflate;
103         }
104         workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS);
105         if (!workspace->buf) {
106                 ret = -ENOMEM;
107                 goto fail_kmalloc;
108         }
109         return workspace;
110
111 fail_kmalloc:
112         vfree(workspace->inf_strm.workspace);
113 fail_inflate:
114         vfree(workspace->def_strm.workspace);
115 fail:
116         kfree(workspace);
117         atomic_dec(&alloc_workspace);
118         wake_up(&workspace_wait);
119         return ERR_PTR(ret);
120 }
121
122 /*
123  * put a workspace struct back on the list or free it if we have enough
124  * idle ones sitting around
125  */
126 static int free_workspace(struct workspace *workspace)
127 {
128         spin_lock(&workspace_lock);
129         if (num_workspace < num_online_cpus()) {
130                 list_add_tail(&workspace->list, &idle_workspace);
131                 num_workspace++;
132                 spin_unlock(&workspace_lock);
133                 if (waitqueue_active(&workspace_wait))
134                         wake_up(&workspace_wait);
135                 return 0;
136         }
137         spin_unlock(&workspace_lock);
138         vfree(workspace->def_strm.workspace);
139         vfree(workspace->inf_strm.workspace);
140         kfree(workspace->buf);
141         kfree(workspace);
142
143         atomic_dec(&alloc_workspace);
144         if (waitqueue_active(&workspace_wait))
145                 wake_up(&workspace_wait);
146         return 0;
147 }
148
149 /*
150  * cleanup function for module exit
151  */
152 static void free_workspaces(void)
153 {
154         struct workspace *workspace;
155         while(!list_empty(&idle_workspace)) {
156                 workspace = list_entry(idle_workspace.next, struct workspace,
157                                        list);
158                 list_del(&workspace->list);
159                 vfree(workspace->def_strm.workspace);
160                 vfree(workspace->inf_strm.workspace);
161                 kfree(workspace->buf);
162                 kfree(workspace);
163                 atomic_dec(&alloc_workspace);
164         }
165 }
166
167 /*
168  * given an address space and start/len, compress the bytes.
169  *
170  * pages are allocated to hold the compressed result and stored
171  * in 'pages'
172  *
173  * out_pages is used to return the number of pages allocated.  There
174  * may be pages allocated even if we return an error
175  *
176  * total_in is used to return the number of bytes actually read.  It
177  * may be smaller then len if we had to exit early because we
178  * ran out of room in the pages array or because we cross the
179  * max_out threshold.
180  *
181  * total_out is used to return the total number of compressed bytes
182  *
183  * max_out tells us the max number of bytes that we're allowed to
184  * stuff into pages
185  */
186 int btrfs_zlib_compress_pages(struct address_space *mapping,
187                               u64 start, unsigned long len,
188                               struct page **pages,
189                               unsigned long nr_dest_pages,
190                               unsigned long *out_pages,
191                               unsigned long *total_in,
192                               unsigned long *total_out,
193                               unsigned long max_out)
194 {
195         int ret;
196         struct workspace *workspace;
197         char *data_in;
198         char *cpage_out;
199         int nr_pages = 0;
200         struct page *in_page = NULL;
201         struct page *out_page = NULL;
202         int out_written = 0;
203         int in_read = 0;
204         unsigned long bytes_left;
205
206         *out_pages = 0;
207         *total_out = 0;
208         *total_in = 0;
209
210         workspace = find_zlib_workspace();
211         if (!workspace)
212                 return -1;
213
214         if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) {
215                 printk(KERN_WARNING "deflateInit failed\n");
216                 ret = -1;
217                 goto out;
218         }
219
220         workspace->def_strm.total_in = 0;
221         workspace->def_strm.total_out = 0;
222
223         in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
224         data_in = kmap(in_page);
225
226         out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
227         cpage_out = kmap(out_page);
228         pages[0] = out_page;
229         nr_pages = 1;
230
231         workspace->def_strm.next_in = data_in;
232         workspace->def_strm.next_out = cpage_out;
233         workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
234         workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE);
235
236         out_written = 0;
237         in_read = 0;
238
239         while (workspace->def_strm.total_in < len) {
240                 ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH);
241                 if (ret != Z_OK) {
242                         printk(KERN_DEBUG "btrfs deflate in loop returned %d\n",
243                                ret);
244                         zlib_deflateEnd(&workspace->def_strm);
245                         ret = -1;
246                         goto out;
247                 }
248
249                 /* we're making it bigger, give up */
250                 if (workspace->def_strm.total_in > 8192 &&
251                     workspace->def_strm.total_in <
252                     workspace->def_strm.total_out) {
253                         ret = -1;
254                         goto out;
255                 }
256                 /* we need another page for writing out.  Test this
257                  * before the total_in so we will pull in a new page for
258                  * the stream end if required
259                  */
260                 if (workspace->def_strm.avail_out == 0) {
261                         kunmap(out_page);
262                         if (nr_pages == nr_dest_pages) {
263                                 out_page = NULL;
264                                 ret = -1;
265                                 goto out;
266                         }
267                         out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
268                         cpage_out = kmap(out_page);
269                         pages[nr_pages] = out_page;
270                         nr_pages++;
271                         workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
272                         workspace->def_strm.next_out = cpage_out;
273                 }
274                 /* we're all done */
275                 if (workspace->def_strm.total_in >= len)
276                         break;
277
278                 /* we've read in a full page, get a new one */
279                 if (workspace->def_strm.avail_in == 0) {
280                         if (workspace->def_strm.total_out > max_out)
281                                 break;
282
283                         bytes_left = len - workspace->def_strm.total_in;
284                         kunmap(in_page);
285                         page_cache_release(in_page);
286
287                         start += PAGE_CACHE_SIZE;
288                         in_page = find_get_page(mapping,
289                                                 start >> PAGE_CACHE_SHIFT);
290                         data_in = kmap(in_page);
291                         workspace->def_strm.avail_in = min(bytes_left,
292                                                            PAGE_CACHE_SIZE);
293                         workspace->def_strm.next_in = data_in;
294                 }
295         }
296         workspace->def_strm.avail_in = 0;
297         ret = zlib_deflate(&workspace->def_strm, Z_FINISH);
298         zlib_deflateEnd(&workspace->def_strm);
299
300         if (ret != Z_STREAM_END) {
301                 ret = -1;
302                 goto out;
303         }
304
305         if (workspace->def_strm.total_out >= workspace->def_strm.total_in) {
306                 ret = -1;
307                 goto out;
308         }
309
310         ret = 0;
311         *total_out = workspace->def_strm.total_out;
312         *total_in = workspace->def_strm.total_in;
313 out:
314         *out_pages = nr_pages;
315         if (out_page)
316                 kunmap(out_page);
317
318         if (in_page) {
319                 kunmap(in_page);
320                 page_cache_release(in_page);
321         }
322         free_workspace(workspace);
323         return ret;
324 }
325
326 /*
327  * pages_in is an array of pages with compressed data.
328  *
329  * disk_start is the starting logical offset of this array in the file
330  *
331  * bvec is a bio_vec of pages from the file that we want to decompress into
332  *
333  * vcnt is the count of pages in the biovec
334  *
335  * srclen is the number of bytes in pages_in
336  *
337  * The basic idea is that we have a bio that was created by readpages.
338  * The pages in the bio are for the uncompressed data, and they may not
339  * be contiguous.  They all correspond to the range of bytes covered by
340  * the compressed extent.
341  */
342 int btrfs_zlib_decompress_biovec(struct page **pages_in,
343                               u64 disk_start,
344                               struct bio_vec *bvec,
345                               int vcnt,
346                               size_t srclen)
347 {
348         int ret = 0;
349         int wbits = MAX_WBITS;
350         struct workspace *workspace;
351         char *data_in;
352         size_t total_out = 0;
353         unsigned long page_bytes_left;
354         unsigned long page_in_index = 0;
355         unsigned long page_out_index = 0;
356         struct page *page_out;
357         unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) /
358                                         PAGE_CACHE_SIZE;
359         unsigned long buf_start;
360         unsigned long buf_offset;
361         unsigned long bytes;
362         unsigned long working_bytes;
363         unsigned long pg_offset;
364         unsigned long start_byte;
365         unsigned long current_buf_start;
366         char *kaddr;
367
368         workspace = find_zlib_workspace();
369         if (!workspace)
370                 return -ENOMEM;
371
372         data_in = kmap(pages_in[page_in_index]);
373         workspace->inf_strm.next_in = data_in;
374         workspace->inf_strm.avail_in = min_t(size_t, srclen, PAGE_CACHE_SIZE);
375         workspace->inf_strm.total_in = 0;
376
377         workspace->inf_strm.total_out = 0;
378         workspace->inf_strm.next_out = workspace->buf;
379         workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
380         page_out = bvec[page_out_index].bv_page;
381         page_bytes_left = PAGE_CACHE_SIZE;
382         pg_offset = 0;
383
384         /* If it's deflate, and it's got no preset dictionary, then
385            we can tell zlib to skip the adler32 check. */
386         if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
387             ((data_in[0] & 0x0f) == Z_DEFLATED) &&
388             !(((data_in[0]<<8) + data_in[1]) % 31)) {
389
390                 wbits = -((data_in[0] >> 4) + 8);
391                 workspace->inf_strm.next_in += 2;
392                 workspace->inf_strm.avail_in -= 2;
393         }
394
395         if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
396                 printk(KERN_WARNING "inflateInit failed\n");
397                 ret = -1;
398                 goto out;
399         }
400         while(workspace->inf_strm.total_in < srclen) {
401                 ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH);
402                 if (ret != Z_OK && ret != Z_STREAM_END) {
403                         break;
404                 }
405
406                 /*
407                  * buf start is the byte offset we're of the start of
408                  * our workspace buffer
409                  */
410                 buf_start = total_out;
411
412                 /* total_out is the last byte of the workspace buffer */
413                 total_out = workspace->inf_strm.total_out;
414
415                 working_bytes = total_out - buf_start;
416
417                 /*
418                  * start byte is the first byte of the page we're currently
419                  * copying into relative to the start of the compressed data.
420                  */
421                 start_byte = page_offset(page_out) - disk_start;
422
423                 if (working_bytes == 0) {
424                         /* we didn't make progress in this inflate
425                          * call, we're done
426                          */
427                         if (ret != Z_STREAM_END) {
428                                 ret = -1;
429                         }
430                         break;
431                 }
432
433                 /* we haven't yet hit data corresponding to this page */
434                 if (total_out <= start_byte) {
435                         goto next;
436                 }
437
438                 /*
439                  * the start of the data we care about is offset into
440                  * the middle of our working buffer
441                  */
442                 if (total_out > start_byte && buf_start < start_byte) {
443                         buf_offset = start_byte - buf_start;
444                         working_bytes -= buf_offset;
445                 } else {
446                         buf_offset = 0;
447                 }
448                 current_buf_start = buf_start;
449
450                 /* copy bytes from the working buffer into the pages */
451                 while(working_bytes > 0) {
452                         bytes = min(PAGE_CACHE_SIZE - pg_offset,
453                                     PAGE_CACHE_SIZE - buf_offset);
454                         bytes = min(bytes, working_bytes);
455                         kaddr = kmap_atomic(page_out, KM_USER0);
456                         memcpy(kaddr + pg_offset, workspace->buf + buf_offset,
457                                bytes);
458                         kunmap_atomic(kaddr, KM_USER0);
459                         flush_dcache_page(page_out);
460
461                         pg_offset += bytes;
462                         page_bytes_left -= bytes;
463                         buf_offset += bytes;
464                         working_bytes -= bytes;
465                         current_buf_start += bytes;
466
467                         /* check if we need to pick another page */
468                         if (page_bytes_left == 0) {
469                                 page_out_index++;
470                                 if (page_out_index >= vcnt) {
471                                         ret = 0;
472                                         goto done;
473                                 }
474                                 page_out = bvec[page_out_index].bv_page;
475                                 pg_offset = 0;
476                                 page_bytes_left = PAGE_CACHE_SIZE;
477                                 start_byte = page_offset(page_out) - disk_start;
478
479                                 /*
480                                  * make sure our new page is covered by this
481                                  * working buffer
482                                  */
483                                 if (total_out <= start_byte) {
484                                         goto next;
485                                 }
486
487                                 /* the next page in the biovec might not
488                                  * be adjacent to the last page, but it
489                                  * might still be found inside this working
490                                  * buffer.  bump our offset pointer
491                                  */
492                                 if (total_out > start_byte &&
493                                     current_buf_start < start_byte) {
494                                         buf_offset = start_byte - buf_start;
495                                         working_bytes = total_out - start_byte;
496                                         current_buf_start = buf_start +
497                                                 buf_offset;
498                                 }
499                         }
500                 }
501 next:
502                 workspace->inf_strm.next_out = workspace->buf;
503                 workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
504
505                 if (workspace->inf_strm.avail_in == 0) {
506                         unsigned long tmp;
507                         kunmap(pages_in[page_in_index]);
508                         page_in_index++;
509                         if (page_in_index >= total_pages_in) {
510                                 data_in = NULL;
511                                 break;
512                         }
513                         data_in = kmap(pages_in[page_in_index]);
514                         workspace->inf_strm.next_in = data_in;
515                         tmp = srclen - workspace->inf_strm.total_in;
516                         workspace->inf_strm.avail_in = min(tmp,
517                                                            PAGE_CACHE_SIZE);
518                 }
519         }
520         if (ret != Z_STREAM_END) {
521                 ret = -1;
522         } else {
523                 ret = 0;
524         }
525 done:
526         zlib_inflateEnd(&workspace->inf_strm);
527         if (data_in)
528                 kunmap(pages_in[page_in_index]);
529 out:
530         free_workspace(workspace);
531         return ret;
532 }
533
534 /*
535  * a less complex decompression routine.  Our compressed data fits in a
536  * single page, and we want to read a single page out of it.
537  * start_byte tells us the offset into the compressed data we're interested in
538  */
539 int btrfs_zlib_decompress(unsigned char *data_in,
540                           struct page *dest_page,
541                           unsigned long start_byte,
542                           size_t srclen, size_t destlen)
543 {
544         int ret = 0;
545         int wbits = MAX_WBITS;
546         struct workspace *workspace;
547         unsigned long bytes_left = destlen;
548         unsigned long total_out = 0;
549         char *kaddr;
550
551         if (destlen > PAGE_CACHE_SIZE)
552                 return -ENOMEM;
553
554         workspace = find_zlib_workspace();
555         if (!workspace)
556                 return -ENOMEM;
557
558         workspace->inf_strm.next_in = data_in;
559         workspace->inf_strm.avail_in = srclen;
560         workspace->inf_strm.total_in = 0;
561
562         workspace->inf_strm.next_out = workspace->buf;
563         workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
564         workspace->inf_strm.total_out = 0;
565         /* If it's deflate, and it's got no preset dictionary, then
566            we can tell zlib to skip the adler32 check. */
567         if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
568             ((data_in[0] & 0x0f) == Z_DEFLATED) &&
569             !(((data_in[0]<<8) + data_in[1]) % 31)) {
570
571                 wbits = -((data_in[0] >> 4) + 8);
572                 workspace->inf_strm.next_in += 2;
573                 workspace->inf_strm.avail_in -= 2;
574         }
575
576         if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
577                 printk(KERN_WARNING "inflateInit failed\n");
578                 ret = -1;
579                 goto out;
580         }
581
582         while(bytes_left > 0) {
583                 unsigned long buf_start;
584                 unsigned long buf_offset;
585                 unsigned long bytes;
586                 unsigned long pg_offset = 0;
587
588                 ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH);
589                 if (ret != Z_OK && ret != Z_STREAM_END) {
590                         break;
591                 }
592
593                 buf_start = total_out;
594                 total_out = workspace->inf_strm.total_out;
595
596                 if (total_out == buf_start) {
597                         ret = -1;
598                         break;
599                 }
600
601                 if (total_out <= start_byte) {
602                         goto next;
603                 }
604
605                 if (total_out > start_byte && buf_start < start_byte) {
606                         buf_offset = start_byte - buf_start;
607                 } else {
608                         buf_offset = 0;
609                 }
610
611                 bytes = min(PAGE_CACHE_SIZE - pg_offset,
612                             PAGE_CACHE_SIZE - buf_offset);
613                 bytes = min(bytes, bytes_left);
614
615                 kaddr = kmap_atomic(dest_page, KM_USER0);
616                 memcpy(kaddr + pg_offset, workspace->buf + buf_offset, bytes);
617                 kunmap_atomic(kaddr, KM_USER0);
618
619                 pg_offset += bytes;
620                 bytes_left -= bytes;
621 next:
622                 workspace->inf_strm.next_out = workspace->buf;
623                 workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
624         }
625         if (ret != Z_STREAM_END && bytes_left != 0) {
626                 ret = -1;
627         } else {
628                 ret = 0;
629         }
630         zlib_inflateEnd(&workspace->inf_strm);
631 out:
632         free_workspace(workspace);
633         return ret;
634 }
635
636 void btrfs_zlib_exit(void)
637 {
638     free_workspaces();
639 }